diff --git a/doc/ChangeLog b/doc/ChangeLog index ff66e1d5..ba801e44 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -29,6 +29,8 @@ Changes from version 0.21.1 to 0.22 segment. (Reported by Stefan Brandl) - 0000757: Wrong ELSE statement in src/CMakeLists.txt. (Reported by Michael Hansen) + - 0000708: On Windows (MSVC and MSYS/MinGw builds), charset conversions + now use respective Windows functions if iconv is not available. - 0000689: Support for Encapsulated PostScript (*.eps) files. (Michael Ulbrich, Volker Grabsch) - 0000439: The exiv2 library should be re-entrant. diff --git a/src/convert.cpp b/src/convert.cpp index 8e03d304..135fd0a9 100644 --- a/src/convert.cpp +++ b/src/convert.cpp @@ -40,6 +40,7 @@ EXIV2_RCSID("@(#) $Id$") #include "convert.hpp" // + standard includes +#include #include #include #include @@ -50,6 +51,10 @@ EXIV2_RCSID("@(#) $Id$") #endif #include +#if defined WIN32 && !defined __CYGWIN__ +# include +#endif + #ifdef EXV_HAVE_ICONV # include # include @@ -65,6 +70,14 @@ EXIV2_RCSID("@(#) $Id$") // ***************************************************************************** // local declarations namespace { +#if defined WIN32 && !defined __CYGWIN__ + // Convert string charset with Windows MSVC functions. + bool convertStringCharsetMsvc(std::string& str, const char* from, const char* to); +#endif +#if defined EXV_HAVE_ICONV + // Convert string charset with iconv. + bool convertStringCharsetIconv(std::string& str, const char* from, const char* to); +#endif /*! @brief Get the text value of an XmpDatum \em pos. @@ -1318,7 +1331,184 @@ namespace Exiv2 { bool convertStringCharset(std::string &str, const char* from, const char* to) { if (0 == strcmp(from, to)) return true; // nothing to do + bool ret = false; #if defined EXV_HAVE_ICONV + ret = convertStringCharsetIconv(str, from, to); +#elif defined WIN32 && !defined __CYGWIN__ + ret = convertStringCharsetMsvc(str, from, to); +#else +# ifndef SUPPRESS_WARNINGS + EXV_WARNING << "Charset conversion required but no character mapping functionality available.\n"; +# endif +#endif + return ret; + } +} // namespace Exiv2 + +// ***************************************************************************** +// local definitions +namespace { + + using namespace Exiv2; + +#if defined WIN32 && !defined __CYGWIN__ + bool swapBytes(std::string& str) + { + // Naive byte-swapping, I'm sure this can be done more efficiently + if (str.size() & 1) { +#ifdef DEBUG + EXV_DEBUG << "swapBytes: Size " << str.size() << " of input string is not even.\n"; +#endif + return false; + } + for (unsigned int i = 0; i < str.size() / 2; ++i) { + char t = str[2 * i]; + str[2 * i] = str[2 * i + 1]; + str[2 * i + 1] = t; + } + return true; + } + + bool mb2wc(UINT cp, std::string& str) + { + if (str.empty()) return true; + int len = MultiByteToWideChar(cp, 0, str.c_str(), str.size(), 0, 0); + if (len == 0) { +#ifdef DEBUG + EXV_DEBUG << "mb2wc: Failed to determine required size of output buffer.\n"; +#endif + return false; + } + std::vector out; + out.resize(len * 2); + int ret = MultiByteToWideChar(cp, 0, str.c_str(), str.size(), (LPWSTR)&out[0], len * 2); + if (ret == 0) { +#ifdef DEBUG + EXV_DEBUG << "mb2wc: Failed to convert the input string to a wide character string.\n"; +#endif + return false; + } + str.assign(out.begin(), out.end()); + return true; + } + + bool wc2mb(UINT cp, std::string& str) + { + if (str.empty()) return true; + if (str.size() & 1) { +#ifdef DEBUG + EXV_DEBUG << "wc2mb: Size " << str.size() << " of input string is not even.\n"; +#endif + return false; + } + int len = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), str.size() / 2, 0, 0, 0, 0); + if (len == 0) { +#ifdef DEBUG + EXV_DEBUG << "wc2mb: Failed to determine required size of output buffer.\n"; +#endif + return false; + } + std::vector out; + out.resize(len); + int ret = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), str.size() / 2, (LPSTR)&out[0], len, 0, 0); + if (ret == 0) { +#ifdef DEBUG + EXV_DEBUG << "wc2mb: Failed to convert the input string to a multi byte string.\n"; +#endif + return false; + } + str.assign(out.begin(), out.end()); + return true; + } + + bool utf8ToUcs2be(std::string& str) + { + bool ret = mb2wc(CP_UTF8, str); + if (ret) ret = swapBytes(str); + return ret; + } + + bool utf8ToUcs2le(std::string& str) + { + return mb2wc(CP_UTF8, str); + } + + bool ucs2beToUtf8(std::string& str) + { + bool ret = swapBytes(str); + if (ret) ret = wc2mb(CP_UTF8, str); + return ret; + } + + bool ucs2beToUcs2le(std::string& str) + { + return swapBytes(str); + } + + bool ucs2leToUtf8(std::string& str) + { + return wc2mb(CP_UTF8, str); + } + + bool ucs2leToUcs2be(std::string& str) + { + return swapBytes(str); + } + + bool iso88591ToUtf8(std::string& str) + { + bool ret = mb2wc(28591, str); + if (ret) ret = wc2mb(CP_UTF8, str); + return ret; + } + + bool asciiToUtf8(std::string& /*str*/) + { + // nothing to do + return true; + } + + typedef bool (*ConvFct)(std::string& str); + + struct ConvFctList { + bool operator==(std::pair fromTo) const + { return 0 == strcmp(from_, fromTo.first) && 0 == strcmp(to_, fromTo.second); } + const char* from_; + const char* to_; + ConvFct convFct_; + }; + + const ConvFctList convFctList[] = { + { "UTF-8", "UCS-2BE", utf8ToUcs2be }, + { "UTF-8", "UCS-2LE", utf8ToUcs2le }, + { "UCS-2BE", "UTF-8", ucs2beToUtf8 }, + { "UCS-2BE", "UCS-2LE", ucs2beToUcs2le }, + { "UCS-2LE", "UTF-8", ucs2leToUtf8 }, + { "UCS-2LE", "UCS-2BE", ucs2leToUcs2be }, + { "ISO-8859-1", "UTF-8", iso88591ToUtf8 }, + { "ASCII", "UTF-8", asciiToUtf8 } + // Update the convertStringCharset() documentation if you add more here! + }; + + bool convertStringCharsetMsvc(std::string& str, const char* from, const char* to) + { + bool ret = false; + const ConvFctList* p = find(convFctList, std::make_pair(from, to)); + if (p) ret = p->convFct_(str); +#ifndef SUPPRESS_WARNINGS + else { + EXV_WARNING << "No Windows function to map character string from " << from << " to " << to << " available.\n"; + } +#endif + return ret; + } + +#endif // defined WIN32 && !defined __CYGWIN__ +#if defined EXV_HAVE_ICONV + bool convertStringCharsetIconv(std::string& str, const char* from, const char* to) + { + if (0 == strcmp(from, to)) return true; // nothing to do + bool ret = true; iconv_t cd; cd = iconv_open(to, from); @@ -1329,18 +1519,18 @@ namespace Exiv2 { return false; } std::string outstr; - EXV_ICONV_CONST char *inptr = const_cast(str.c_str()); + EXV_ICONV_CONST char* inptr = const_cast(str.c_str()); size_t inbytesleft = str.length(); while (inbytesleft) { - char outbuf[100]; - char *outptr = outbuf; - size_t outbytesleft = sizeof(outbuf) - 1; + char outbuf[256]; + char* outptr = outbuf; + size_t outbytesleft = sizeof(outbuf); size_t rc = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft); - int outbytesProduced = sizeof(outbuf) - 1 - outbytesleft; + int outbytesProduced = sizeof(outbuf) - outbytesleft; if (rc == size_t(-1) && errno != E2BIG) { #ifndef SUPPRESS_WARNINGS EXV_WARNING << "iconv: " << strError() @@ -1349,7 +1539,6 @@ namespace Exiv2 { ret = false; break; } - *outptr = '\0'; outstr.append(std::string(outbuf, outbytesProduced)); } if (cd != (iconv_t)(-1)) { @@ -1358,20 +1547,12 @@ namespace Exiv2 { if (ret) str = outstr; return ret; -#else // !EXV_HAVE_ICONV - return false; -#endif // EXV_HAVE_ICONV } -} // namespace Exiv2 - -// ***************************************************************************** -// local definitions -namespace { - - bool getTextValue(std::string& value, const Exiv2::XmpData::iterator& pos) +#endif // EXV_HAVE_ICONV + bool getTextValue(std::string& value, const XmpData::iterator& pos) { - if (pos->typeId() == Exiv2::langAlt) { + if (pos->typeId() == langAlt) { // get the default language entry without x-default qualifier value = pos->toString(0); if (!pos->value().ok() && pos->count() == 1) { diff --git a/src/convert.hpp b/src/convert.hpp index a78dfd80..545723b8 100644 --- a/src/convert.hpp +++ b/src/convert.hpp @@ -79,7 +79,37 @@ namespace Exiv2 { //! Convert (move) XMP properties to IPTC tags, remove converted XMP properties. EXIV2API void moveXmpToIptc(XmpData& xmpData, IptcData& iptcData); - //! Convert string charset with iconv. + /*! + @brief Convert character encoding of \em str from \em from to \em to. + The string is modified and its size may change. + + This function uses the iconv library, if Exiv2 was compiled with iconv + support. Otherwise, on Windows, it uses MSVC functions to support a + limited number of conversions and issues a warning if an unsupported + conversion is attempted. The conversions supported on Windows without + iconv are: + + from: UTF-8 to: UCS-2BE
+ from: UTF-8 to: UCS-2LE
+ from: UCS-2BE to: UTF-8
+ from: UCS-2BE to: UCS-2LE
+ from: UCS-2LE to: UTF-8
+ from: UCS-2LE to: UCS-2BE
+ from: ISO-8859-1 to: UTF-8
+ from: ASCII to: UTF-8
+ + If the function is called but Exiv2 was not compiled with iconv support + and can't use Windows MSVC functions, it issues a warning. + + @param str The string to convert. It is updated to the converted string, + which may have a different size. If the function call fails, + the string may have been modified. + @param from Charset in which the input string is encoded as a name + understood by \c iconv_open(3). + @param to Charset to convert the string to as a name + understood by \c iconv_open(3). + @return Return \c true if the conversion was successful, else \c false. + */ EXIV2API bool convertStringCharset(std::string& str, const char* from, const char* to); } // namespace Exiv2 diff --git a/src/tags.cpp b/src/tags.cpp index 8b3bd562..1207fa49 100644 --- a/src/tags.cpp +++ b/src/tags.cpp @@ -38,6 +38,7 @@ EXIV2_RCSID("@(#) $Id$") #include "error.hpp" #include "futils.hpp" #include "value.hpp" +#include "convert.hpp" #include "i18n.h" // NLS support. #include "canonmn_int.hpp" @@ -60,14 +61,6 @@ EXIV2_RCSID("@(#) $Id$") #include #include -#ifdef EXV_HAVE_ICONV -# include -#endif - -#if defined WIN32 && !defined __CYGWIN__ -# include -#endif - // ***************************************************************************** // local declarations namespace { @@ -2308,78 +2301,18 @@ namespace Exiv2 { std::ostream& printUcs2(std::ostream& os, const Value& value, const ExifData*) { -#if defined WIN32 && !defined __CYGWIN__ - // in Windows the WideCharToMultiByte function can be used - if (value.typeId() == unsignedByte) { - DataBuf ib(value.size()); - value.copy(ib.pData_, invalidByteOrder); - int out_size = WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast(ib.pData_), - ib.size_ / sizeof(WCHAR), NULL, 0, NULL, NULL); - if (out_size >= 0) { - DataBuf ob(out_size + 1); - WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast(ib.pData_), - ib.size_ / sizeof(WCHAR), reinterpret_cast(ob.pData_), - ob.size_, NULL, NULL); - os << std::string(reinterpret_cast(ob.pData_)); - } - else { - os << value; - } - } - return os; -#elif defined EXV_HAVE_ICONV // !(defined WIN32 && !defined __CYGWIN__) - bool go = true; - iconv_t cd = (iconv_t)(-1); - if (value.typeId() != unsignedByte) { - go = false; - } - if (go) { - cd = iconv_open("UTF-8", "UCS-2LE"); - if (cd == (iconv_t)(-1)) { -#ifndef SUPPRESS_WARNINGS - EXV_WARNING << "iconv_open: " << strError() << "\n"; -#endif - go = false; - } + bool cnv = false; + if (value.typeId() == unsignedByte && value.size() > 0) { + DataBuf buf(value.size()); + value.copy(buf.pData_, invalidByteOrder); + // Strip trailing UCS-2 0-character, if there is one + if (buf.pData_[buf.size_ - 1] == 0 && buf.pData_[buf.size_ - 2] == 0) buf.size_ -= 2; + std::string str((const char*)buf.pData_, buf.size_); + cnv = convertStringCharset(str, "UCS-2LE", "UTF-8"); + if (cnv) os << str; } - if (go) { - DataBuf ib(value.size()); - value.copy(ib.pData_, invalidByteOrder); - DataBuf ob(value.size()); - char* outptr = reinterpret_cast(ob.pData_); - const char* outbuf = outptr; - size_t outbytesleft = ob.size_; - EXV_ICONV_CONST char* inbuf - = reinterpret_cast(ib.pData_); - size_t inbytesleft = ib.size_; - size_t rc = iconv(cd, - &inbuf, - &inbytesleft, - &outptr, - &outbytesleft); - if (rc == size_t(-1)) { -#ifndef SUPPRESS_WARNINGS - EXV_WARNING << "iconv: " << strError() - << " inbytesleft = " << inbytesleft << "\n"; -#endif - go = false; - } - if (go) { - if (outptr > outbuf && *(outptr-1) == '\0') outptr--; - os << std::string(outbuf, outptr-outbuf); - } - } - if (cd != (iconv_t)(-1)) { - iconv_close(cd); - } - if (!go) { - os << value; - } - return os; -#else - os << value; + if (!cnv) os << value; return os; -#endif // EXV_HAVE_ICONV } // printUcs2 std::ostream& printExifUnit(std::ostream& os, const Value& value, const ExifData* metadata)