#708: Implemented charset conversions with native Windows functions if iconv is not available.

v0.27.3
Andreas Huggel 14 years ago
parent 389ea5fc27
commit 32e0ade243

@ -29,6 +29,8 @@ Changes from version 0.21.1 to 0.22
segment. (Reported by Stefan Brandl) segment. (Reported by Stefan Brandl)
- 0000757: Wrong ELSE statement in src/CMakeLists.txt. - 0000757: Wrong ELSE statement in src/CMakeLists.txt.
(Reported by Michael Hansen) (Reported by Michael Hansen)
- 0000708: On Windows (MSVC and MSYS/MinGw builds), charset conversions
now use respective Windows functions if iconv is not available.
- 0000689: Support for Encapsulated PostScript (*.eps) files. - 0000689: Support for Encapsulated PostScript (*.eps) files.
(Michael Ulbrich, Volker Grabsch) (Michael Ulbrich, Volker Grabsch)
- 0000439: The exiv2 library should be re-entrant. - 0000439: The exiv2 library should be re-entrant.

@ -40,6 +40,7 @@ EXIV2_RCSID("@(#) $Id$")
#include "convert.hpp" #include "convert.hpp"
// + standard includes // + standard includes
#include <utility>
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <ios> #include <ios>
@ -50,6 +51,10 @@ EXIV2_RCSID("@(#) $Id$")
#endif #endif
#include <cstring> #include <cstring>
#if defined WIN32 && !defined __CYGWIN__
# include <windows.h>
#endif
#ifdef EXV_HAVE_ICONV #ifdef EXV_HAVE_ICONV
# include <iconv.h> # include <iconv.h>
# include <errno.h> # include <errno.h>
@ -65,6 +70,14 @@ EXIV2_RCSID("@(#) $Id$")
// ***************************************************************************** // *****************************************************************************
// local declarations // local declarations
namespace { namespace {
#if defined WIN32 && !defined __CYGWIN__
// Convert string charset with Windows MSVC functions.
bool convertStringCharsetMsvc(std::string& str, const char* from, const char* to);
#endif
#if defined EXV_HAVE_ICONV
// Convert string charset with iconv.
bool convertStringCharsetIconv(std::string& str, const char* from, const char* to);
#endif
/*! /*!
@brief Get the text value of an XmpDatum \em pos. @brief Get the text value of an XmpDatum \em pos.
@ -1318,7 +1331,184 @@ namespace Exiv2 {
bool convertStringCharset(std::string &str, const char* from, const char* to) bool convertStringCharset(std::string &str, const char* from, const char* to)
{ {
if (0 == strcmp(from, to)) return true; // nothing to do if (0 == strcmp(from, to)) return true; // nothing to do
bool ret = false;
#if defined EXV_HAVE_ICONV #if defined EXV_HAVE_ICONV
ret = convertStringCharsetIconv(str, from, to);
#elif defined WIN32 && !defined __CYGWIN__
ret = convertStringCharsetMsvc(str, from, to);
#else
# ifndef SUPPRESS_WARNINGS
EXV_WARNING << "Charset conversion required but no character mapping functionality available.\n";
# endif
#endif
return ret;
}
} // namespace Exiv2
// *****************************************************************************
// local definitions
namespace {
using namespace Exiv2;
#if defined WIN32 && !defined __CYGWIN__
bool swapBytes(std::string& str)
{
// Naive byte-swapping, I'm sure this can be done more efficiently
if (str.size() & 1) {
#ifdef DEBUG
EXV_DEBUG << "swapBytes: Size " << str.size() << " of input string is not even.\n";
#endif
return false;
}
for (unsigned int i = 0; i < str.size() / 2; ++i) {
char t = str[2 * i];
str[2 * i] = str[2 * i + 1];
str[2 * i + 1] = t;
}
return true;
}
bool mb2wc(UINT cp, std::string& str)
{
if (str.empty()) return true;
int len = MultiByteToWideChar(cp, 0, str.c_str(), str.size(), 0, 0);
if (len == 0) {
#ifdef DEBUG
EXV_DEBUG << "mb2wc: Failed to determine required size of output buffer.\n";
#endif
return false;
}
std::vector<std::string::value_type> out;
out.resize(len * 2);
int ret = MultiByteToWideChar(cp, 0, str.c_str(), str.size(), (LPWSTR)&out[0], len * 2);
if (ret == 0) {
#ifdef DEBUG
EXV_DEBUG << "mb2wc: Failed to convert the input string to a wide character string.\n";
#endif
return false;
}
str.assign(out.begin(), out.end());
return true;
}
bool wc2mb(UINT cp, std::string& str)
{
if (str.empty()) return true;
if (str.size() & 1) {
#ifdef DEBUG
EXV_DEBUG << "wc2mb: Size " << str.size() << " of input string is not even.\n";
#endif
return false;
}
int len = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), str.size() / 2, 0, 0, 0, 0);
if (len == 0) {
#ifdef DEBUG
EXV_DEBUG << "wc2mb: Failed to determine required size of output buffer.\n";
#endif
return false;
}
std::vector<std::string::value_type> out;
out.resize(len);
int ret = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), str.size() / 2, (LPSTR)&out[0], len, 0, 0);
if (ret == 0) {
#ifdef DEBUG
EXV_DEBUG << "wc2mb: Failed to convert the input string to a multi byte string.\n";
#endif
return false;
}
str.assign(out.begin(), out.end());
return true;
}
bool utf8ToUcs2be(std::string& str)
{
bool ret = mb2wc(CP_UTF8, str);
if (ret) ret = swapBytes(str);
return ret;
}
bool utf8ToUcs2le(std::string& str)
{
return mb2wc(CP_UTF8, str);
}
bool ucs2beToUtf8(std::string& str)
{
bool ret = swapBytes(str);
if (ret) ret = wc2mb(CP_UTF8, str);
return ret;
}
bool ucs2beToUcs2le(std::string& str)
{
return swapBytes(str);
}
bool ucs2leToUtf8(std::string& str)
{
return wc2mb(CP_UTF8, str);
}
bool ucs2leToUcs2be(std::string& str)
{
return swapBytes(str);
}
bool iso88591ToUtf8(std::string& str)
{
bool ret = mb2wc(28591, str);
if (ret) ret = wc2mb(CP_UTF8, str);
return ret;
}
bool asciiToUtf8(std::string& /*str*/)
{
// nothing to do
return true;
}
typedef bool (*ConvFct)(std::string& str);
struct ConvFctList {
bool operator==(std::pair<const char*, const char*> fromTo) const
{ return 0 == strcmp(from_, fromTo.first) && 0 == strcmp(to_, fromTo.second); }
const char* from_;
const char* to_;
ConvFct convFct_;
};
const ConvFctList convFctList[] = {
{ "UTF-8", "UCS-2BE", utf8ToUcs2be },
{ "UTF-8", "UCS-2LE", utf8ToUcs2le },
{ "UCS-2BE", "UTF-8", ucs2beToUtf8 },
{ "UCS-2BE", "UCS-2LE", ucs2beToUcs2le },
{ "UCS-2LE", "UTF-8", ucs2leToUtf8 },
{ "UCS-2LE", "UCS-2BE", ucs2leToUcs2be },
{ "ISO-8859-1", "UTF-8", iso88591ToUtf8 },
{ "ASCII", "UTF-8", asciiToUtf8 }
// Update the convertStringCharset() documentation if you add more here!
};
bool convertStringCharsetMsvc(std::string& str, const char* from, const char* to)
{
bool ret = false;
const ConvFctList* p = find(convFctList, std::make_pair(from, to));
if (p) ret = p->convFct_(str);
#ifndef SUPPRESS_WARNINGS
else {
EXV_WARNING << "No Windows function to map character string from " << from << " to " << to << " available.\n";
}
#endif
return ret;
}
#endif // defined WIN32 && !defined __CYGWIN__
#if defined EXV_HAVE_ICONV
bool convertStringCharsetIconv(std::string& str, const char* from, const char* to)
{
if (0 == strcmp(from, to)) return true; // nothing to do
bool ret = true; bool ret = true;
iconv_t cd; iconv_t cd;
cd = iconv_open(to, from); cd = iconv_open(to, from);
@ -1332,15 +1522,15 @@ namespace Exiv2 {
EXV_ICONV_CONST char* inptr = const_cast<char*>(str.c_str()); EXV_ICONV_CONST char* inptr = const_cast<char*>(str.c_str());
size_t inbytesleft = str.length(); size_t inbytesleft = str.length();
while (inbytesleft) { while (inbytesleft) {
char outbuf[100]; char outbuf[256];
char* outptr = outbuf; char* outptr = outbuf;
size_t outbytesleft = sizeof(outbuf) - 1; size_t outbytesleft = sizeof(outbuf);
size_t rc = iconv(cd, size_t rc = iconv(cd,
&inptr, &inptr,
&inbytesleft, &inbytesleft,
&outptr, &outptr,
&outbytesleft); &outbytesleft);
int outbytesProduced = sizeof(outbuf) - 1 - outbytesleft; int outbytesProduced = sizeof(outbuf) - outbytesleft;
if (rc == size_t(-1) && errno != E2BIG) { if (rc == size_t(-1) && errno != E2BIG) {
#ifndef SUPPRESS_WARNINGS #ifndef SUPPRESS_WARNINGS
EXV_WARNING << "iconv: " << strError() EXV_WARNING << "iconv: " << strError()
@ -1349,7 +1539,6 @@ namespace Exiv2 {
ret = false; ret = false;
break; break;
} }
*outptr = '\0';
outstr.append(std::string(outbuf, outbytesProduced)); outstr.append(std::string(outbuf, outbytesProduced));
} }
if (cd != (iconv_t)(-1)) { if (cd != (iconv_t)(-1)) {
@ -1358,20 +1547,12 @@ namespace Exiv2 {
if (ret) str = outstr; if (ret) str = outstr;
return ret; return ret;
#else // !EXV_HAVE_ICONV
return false;
#endif // EXV_HAVE_ICONV
} }
} // namespace Exiv2 #endif // EXV_HAVE_ICONV
bool getTextValue(std::string& value, const XmpData::iterator& pos)
// *****************************************************************************
// local definitions
namespace {
bool getTextValue(std::string& value, const Exiv2::XmpData::iterator& pos)
{ {
if (pos->typeId() == Exiv2::langAlt) { if (pos->typeId() == langAlt) {
// get the default language entry without x-default qualifier // get the default language entry without x-default qualifier
value = pos->toString(0); value = pos->toString(0);
if (!pos->value().ok() && pos->count() == 1) { if (!pos->value().ok() && pos->count() == 1) {

@ -79,7 +79,37 @@ namespace Exiv2 {
//! Convert (move) XMP properties to IPTC tags, remove converted XMP properties. //! Convert (move) XMP properties to IPTC tags, remove converted XMP properties.
EXIV2API void moveXmpToIptc(XmpData& xmpData, IptcData& iptcData); EXIV2API void moveXmpToIptc(XmpData& xmpData, IptcData& iptcData);
//! Convert string charset with iconv. /*!
@brief Convert character encoding of \em str from \em from to \em to.
The string is modified and its size may change.
This function uses the iconv library, if Exiv2 was compiled with iconv
support. Otherwise, on Windows, it uses MSVC functions to support a
limited number of conversions and issues a warning if an unsupported
conversion is attempted. The conversions supported on Windows without
iconv are:
from: UTF-8 to: UCS-2BE<br>
from: UTF-8 to: UCS-2LE<br>
from: UCS-2BE to: UTF-8<br>
from: UCS-2BE to: UCS-2LE<br>
from: UCS-2LE to: UTF-8<br>
from: UCS-2LE to: UCS-2BE<br>
from: ISO-8859-1 to: UTF-8<br>
from: ASCII to: UTF-8<br>
If the function is called but Exiv2 was not compiled with iconv support
and can't use Windows MSVC functions, it issues a warning.
@param str The string to convert. It is updated to the converted string,
which may have a different size. If the function call fails,
the string may have been modified.
@param from Charset in which the input string is encoded as a name
understood by \c iconv_open(3).
@param to Charset to convert the string to as a name
understood by \c iconv_open(3).
@return Return \c true if the conversion was successful, else \c false.
*/
EXIV2API bool convertStringCharset(std::string& str, const char* from, const char* to); EXIV2API bool convertStringCharset(std::string& str, const char* from, const char* to);
} // namespace Exiv2 } // namespace Exiv2

@ -38,6 +38,7 @@ EXIV2_RCSID("@(#) $Id$")
#include "error.hpp" #include "error.hpp"
#include "futils.hpp" #include "futils.hpp"
#include "value.hpp" #include "value.hpp"
#include "convert.hpp"
#include "i18n.h" // NLS support. #include "i18n.h" // NLS support.
#include "canonmn_int.hpp" #include "canonmn_int.hpp"
@ -60,14 +61,6 @@ EXIV2_RCSID("@(#) $Id$")
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#ifdef EXV_HAVE_ICONV
# include <iconv.h>
#endif
#if defined WIN32 && !defined __CYGWIN__
# include <windows.h>
#endif
// ***************************************************************************** // *****************************************************************************
// local declarations // local declarations
namespace { namespace {
@ -2308,78 +2301,18 @@ namespace Exiv2 {
std::ostream& printUcs2(std::ostream& os, const Value& value, const ExifData*) std::ostream& printUcs2(std::ostream& os, const Value& value, const ExifData*)
{ {
#if defined WIN32 && !defined __CYGWIN__ bool cnv = false;
// in Windows the WideCharToMultiByte function can be used if (value.typeId() == unsignedByte && value.size() > 0) {
if (value.typeId() == unsignedByte) { DataBuf buf(value.size());
DataBuf ib(value.size()); value.copy(buf.pData_, invalidByteOrder);
value.copy(ib.pData_, invalidByteOrder); // Strip trailing UCS-2 0-character, if there is one
int out_size = WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast<LPWSTR>(ib.pData_), if (buf.pData_[buf.size_ - 1] == 0 && buf.pData_[buf.size_ - 2] == 0) buf.size_ -= 2;
ib.size_ / sizeof(WCHAR), NULL, 0, NULL, NULL); std::string str((const char*)buf.pData_, buf.size_);
if (out_size >= 0) { cnv = convertStringCharset(str, "UCS-2LE", "UTF-8");
DataBuf ob(out_size + 1); if (cnv) os << str;
WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast<LPWSTR>(ib.pData_), }
ib.size_ / sizeof(WCHAR), reinterpret_cast<char*>(ob.pData_), if (!cnv) os << value;
ob.size_, NULL, NULL);
os << std::string(reinterpret_cast<char*>(ob.pData_));
}
else {
os << value;
}
}
return os;
#elif defined EXV_HAVE_ICONV // !(defined WIN32 && !defined __CYGWIN__)
bool go = true;
iconv_t cd = (iconv_t)(-1);
if (value.typeId() != unsignedByte) {
go = false;
}
if (go) {
cd = iconv_open("UTF-8", "UCS-2LE");
if (cd == (iconv_t)(-1)) {
#ifndef SUPPRESS_WARNINGS
EXV_WARNING << "iconv_open: " << strError() << "\n";
#endif
go = false;
}
}
if (go) {
DataBuf ib(value.size());
value.copy(ib.pData_, invalidByteOrder);
DataBuf ob(value.size());
char* outptr = reinterpret_cast<char*>(ob.pData_);
const char* outbuf = outptr;
size_t outbytesleft = ob.size_;
EXV_ICONV_CONST char* inbuf
= reinterpret_cast<EXV_ICONV_CONST char*>(ib.pData_);
size_t inbytesleft = ib.size_;
size_t rc = iconv(cd,
&inbuf,
&inbytesleft,
&outptr,
&outbytesleft);
if (rc == size_t(-1)) {
#ifndef SUPPRESS_WARNINGS
EXV_WARNING << "iconv: " << strError()
<< " inbytesleft = " << inbytesleft << "\n";
#endif
go = false;
}
if (go) {
if (outptr > outbuf && *(outptr-1) == '\0') outptr--;
os << std::string(outbuf, outptr-outbuf);
}
}
if (cd != (iconv_t)(-1)) {
iconv_close(cd);
}
if (!go) {
os << value;
}
return os;
#else
os << value;
return os; return os;
#endif // EXV_HAVE_ICONV
} // printUcs2 } // printUcs2
std::ostream& printExifUnit(std::ostream& os, const Value& value, const ExifData* metadata) std::ostream& printExifUnit(std::ostream& os, const Value& value, const ExifData* metadata)

Loading…
Cancel
Save