#708: Implemented charset conversions with native Windows functions if iconv is not available.

v0.27.3
Andreas Huggel 14 years ago
parent 389ea5fc27
commit 32e0ade243

@ -29,6 +29,8 @@ Changes from version 0.21.1 to 0.22
segment. (Reported by Stefan Brandl)
- 0000757: Wrong ELSE statement in src/CMakeLists.txt.
(Reported by Michael Hansen)
- 0000708: On Windows (MSVC and MSYS/MinGw builds), charset conversions
now use respective Windows functions if iconv is not available.
- 0000689: Support for Encapsulated PostScript (*.eps) files.
(Michael Ulbrich, Volker Grabsch)
- 0000439: The exiv2 library should be re-entrant.

@ -40,6 +40,7 @@ EXIV2_RCSID("@(#) $Id$")
#include "convert.hpp"
// + standard includes
#include <utility>
#include <iostream>
#include <iomanip>
#include <ios>
@ -50,6 +51,10 @@ EXIV2_RCSID("@(#) $Id$")
#endif
#include <cstring>
#if defined WIN32 && !defined __CYGWIN__
# include <windows.h>
#endif
#ifdef EXV_HAVE_ICONV
# include <iconv.h>
# include <errno.h>
@ -65,6 +70,14 @@ EXIV2_RCSID("@(#) $Id$")
// *****************************************************************************
// local declarations
namespace {
#if defined WIN32 && !defined __CYGWIN__
// Convert string charset with Windows MSVC functions.
bool convertStringCharsetMsvc(std::string& str, const char* from, const char* to);
#endif
#if defined EXV_HAVE_ICONV
// Convert string charset with iconv.
bool convertStringCharsetIconv(std::string& str, const char* from, const char* to);
#endif
/*!
@brief Get the text value of an XmpDatum \em pos.
@ -1318,7 +1331,184 @@ namespace Exiv2 {
bool convertStringCharset(std::string &str, const char* from, const char* to)
{
if (0 == strcmp(from, to)) return true; // nothing to do
bool ret = false;
#if defined EXV_HAVE_ICONV
ret = convertStringCharsetIconv(str, from, to);
#elif defined WIN32 && !defined __CYGWIN__
ret = convertStringCharsetMsvc(str, from, to);
#else
# ifndef SUPPRESS_WARNINGS
EXV_WARNING << "Charset conversion required but no character mapping functionality available.\n";
# endif
#endif
return ret;
}
} // namespace Exiv2
// *****************************************************************************
// local definitions
namespace {
using namespace Exiv2;
#if defined WIN32 && !defined __CYGWIN__
bool swapBytes(std::string& str)
{
// Naive byte-swapping, I'm sure this can be done more efficiently
if (str.size() & 1) {
#ifdef DEBUG
EXV_DEBUG << "swapBytes: Size " << str.size() << " of input string is not even.\n";
#endif
return false;
}
for (unsigned int i = 0; i < str.size() / 2; ++i) {
char t = str[2 * i];
str[2 * i] = str[2 * i + 1];
str[2 * i + 1] = t;
}
return true;
}
bool mb2wc(UINT cp, std::string& str)
{
if (str.empty()) return true;
int len = MultiByteToWideChar(cp, 0, str.c_str(), str.size(), 0, 0);
if (len == 0) {
#ifdef DEBUG
EXV_DEBUG << "mb2wc: Failed to determine required size of output buffer.\n";
#endif
return false;
}
std::vector<std::string::value_type> out;
out.resize(len * 2);
int ret = MultiByteToWideChar(cp, 0, str.c_str(), str.size(), (LPWSTR)&out[0], len * 2);
if (ret == 0) {
#ifdef DEBUG
EXV_DEBUG << "mb2wc: Failed to convert the input string to a wide character string.\n";
#endif
return false;
}
str.assign(out.begin(), out.end());
return true;
}
bool wc2mb(UINT cp, std::string& str)
{
if (str.empty()) return true;
if (str.size() & 1) {
#ifdef DEBUG
EXV_DEBUG << "wc2mb: Size " << str.size() << " of input string is not even.\n";
#endif
return false;
}
int len = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), str.size() / 2, 0, 0, 0, 0);
if (len == 0) {
#ifdef DEBUG
EXV_DEBUG << "wc2mb: Failed to determine required size of output buffer.\n";
#endif
return false;
}
std::vector<std::string::value_type> out;
out.resize(len);
int ret = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), str.size() / 2, (LPSTR)&out[0], len, 0, 0);
if (ret == 0) {
#ifdef DEBUG
EXV_DEBUG << "wc2mb: Failed to convert the input string to a multi byte string.\n";
#endif
return false;
}
str.assign(out.begin(), out.end());
return true;
}
bool utf8ToUcs2be(std::string& str)
{
bool ret = mb2wc(CP_UTF8, str);
if (ret) ret = swapBytes(str);
return ret;
}
bool utf8ToUcs2le(std::string& str)
{
return mb2wc(CP_UTF8, str);
}
bool ucs2beToUtf8(std::string& str)
{
bool ret = swapBytes(str);
if (ret) ret = wc2mb(CP_UTF8, str);
return ret;
}
bool ucs2beToUcs2le(std::string& str)
{
return swapBytes(str);
}
bool ucs2leToUtf8(std::string& str)
{
return wc2mb(CP_UTF8, str);
}
bool ucs2leToUcs2be(std::string& str)
{
return swapBytes(str);
}
bool iso88591ToUtf8(std::string& str)
{
bool ret = mb2wc(28591, str);
if (ret) ret = wc2mb(CP_UTF8, str);
return ret;
}
bool asciiToUtf8(std::string& /*str*/)
{
// nothing to do
return true;
}
typedef bool (*ConvFct)(std::string& str);
struct ConvFctList {
bool operator==(std::pair<const char*, const char*> fromTo) const
{ return 0 == strcmp(from_, fromTo.first) && 0 == strcmp(to_, fromTo.second); }
const char* from_;
const char* to_;
ConvFct convFct_;
};
const ConvFctList convFctList[] = {
{ "UTF-8", "UCS-2BE", utf8ToUcs2be },
{ "UTF-8", "UCS-2LE", utf8ToUcs2le },
{ "UCS-2BE", "UTF-8", ucs2beToUtf8 },
{ "UCS-2BE", "UCS-2LE", ucs2beToUcs2le },
{ "UCS-2LE", "UTF-8", ucs2leToUtf8 },
{ "UCS-2LE", "UCS-2BE", ucs2leToUcs2be },
{ "ISO-8859-1", "UTF-8", iso88591ToUtf8 },
{ "ASCII", "UTF-8", asciiToUtf8 }
// Update the convertStringCharset() documentation if you add more here!
};
bool convertStringCharsetMsvc(std::string& str, const char* from, const char* to)
{
bool ret = false;
const ConvFctList* p = find(convFctList, std::make_pair(from, to));
if (p) ret = p->convFct_(str);
#ifndef SUPPRESS_WARNINGS
else {
EXV_WARNING << "No Windows function to map character string from " << from << " to " << to << " available.\n";
}
#endif
return ret;
}
#endif // defined WIN32 && !defined __CYGWIN__
#if defined EXV_HAVE_ICONV
bool convertStringCharsetIconv(std::string& str, const char* from, const char* to)
{
if (0 == strcmp(from, to)) return true; // nothing to do
bool ret = true;
iconv_t cd;
cd = iconv_open(to, from);
@ -1329,18 +1519,18 @@ namespace Exiv2 {
return false;
}
std::string outstr;
EXV_ICONV_CONST char *inptr = const_cast<char *>(str.c_str());
EXV_ICONV_CONST char* inptr = const_cast<char*>(str.c_str());
size_t inbytesleft = str.length();
while (inbytesleft) {
char outbuf[100];
char *outptr = outbuf;
size_t outbytesleft = sizeof(outbuf) - 1;
char outbuf[256];
char* outptr = outbuf;
size_t outbytesleft = sizeof(outbuf);
size_t rc = iconv(cd,
&inptr,
&inbytesleft,
&outptr,
&outbytesleft);
int outbytesProduced = sizeof(outbuf) - 1 - outbytesleft;
int outbytesProduced = sizeof(outbuf) - outbytesleft;
if (rc == size_t(-1) && errno != E2BIG) {
#ifndef SUPPRESS_WARNINGS
EXV_WARNING << "iconv: " << strError()
@ -1349,7 +1539,6 @@ namespace Exiv2 {
ret = false;
break;
}
*outptr = '\0';
outstr.append(std::string(outbuf, outbytesProduced));
}
if (cd != (iconv_t)(-1)) {
@ -1358,20 +1547,12 @@ namespace Exiv2 {
if (ret) str = outstr;
return ret;
#else // !EXV_HAVE_ICONV
return false;
#endif // EXV_HAVE_ICONV
}
} // namespace Exiv2
// *****************************************************************************
// local definitions
namespace {
bool getTextValue(std::string& value, const Exiv2::XmpData::iterator& pos)
#endif // EXV_HAVE_ICONV
bool getTextValue(std::string& value, const XmpData::iterator& pos)
{
if (pos->typeId() == Exiv2::langAlt) {
if (pos->typeId() == langAlt) {
// get the default language entry without x-default qualifier
value = pos->toString(0);
if (!pos->value().ok() && pos->count() == 1) {

@ -79,7 +79,37 @@ namespace Exiv2 {
//! Convert (move) XMP properties to IPTC tags, remove converted XMP properties.
EXIV2API void moveXmpToIptc(XmpData& xmpData, IptcData& iptcData);
//! Convert string charset with iconv.
/*!
@brief Convert character encoding of \em str from \em from to \em to.
The string is modified and its size may change.
This function uses the iconv library, if Exiv2 was compiled with iconv
support. Otherwise, on Windows, it uses MSVC functions to support a
limited number of conversions and issues a warning if an unsupported
conversion is attempted. The conversions supported on Windows without
iconv are:
from: UTF-8 to: UCS-2BE<br>
from: UTF-8 to: UCS-2LE<br>
from: UCS-2BE to: UTF-8<br>
from: UCS-2BE to: UCS-2LE<br>
from: UCS-2LE to: UTF-8<br>
from: UCS-2LE to: UCS-2BE<br>
from: ISO-8859-1 to: UTF-8<br>
from: ASCII to: UTF-8<br>
If the function is called but Exiv2 was not compiled with iconv support
and can't use Windows MSVC functions, it issues a warning.
@param str The string to convert. It is updated to the converted string,
which may have a different size. If the function call fails,
the string may have been modified.
@param from Charset in which the input string is encoded as a name
understood by \c iconv_open(3).
@param to Charset to convert the string to as a name
understood by \c iconv_open(3).
@return Return \c true if the conversion was successful, else \c false.
*/
EXIV2API bool convertStringCharset(std::string& str, const char* from, const char* to);
} // namespace Exiv2

@ -38,6 +38,7 @@ EXIV2_RCSID("@(#) $Id$")
#include "error.hpp"
#include "futils.hpp"
#include "value.hpp"
#include "convert.hpp"
#include "i18n.h" // NLS support.
#include "canonmn_int.hpp"
@ -60,14 +61,6 @@ EXIV2_RCSID("@(#) $Id$")
#include <cmath>
#include <cstring>
#ifdef EXV_HAVE_ICONV
# include <iconv.h>
#endif
#if defined WIN32 && !defined __CYGWIN__
# include <windows.h>
#endif
// *****************************************************************************
// local declarations
namespace {
@ -2308,78 +2301,18 @@ namespace Exiv2 {
std::ostream& printUcs2(std::ostream& os, const Value& value, const ExifData*)
{
#if defined WIN32 && !defined __CYGWIN__
// in Windows the WideCharToMultiByte function can be used
if (value.typeId() == unsignedByte) {
DataBuf ib(value.size());
value.copy(ib.pData_, invalidByteOrder);
int out_size = WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast<LPWSTR>(ib.pData_),
ib.size_ / sizeof(WCHAR), NULL, 0, NULL, NULL);
if (out_size >= 0) {
DataBuf ob(out_size + 1);
WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast<LPWSTR>(ib.pData_),
ib.size_ / sizeof(WCHAR), reinterpret_cast<char*>(ob.pData_),
ob.size_, NULL, NULL);
os << std::string(reinterpret_cast<char*>(ob.pData_));
}
else {
os << value;
}
}
return os;
#elif defined EXV_HAVE_ICONV // !(defined WIN32 && !defined __CYGWIN__)
bool go = true;
iconv_t cd = (iconv_t)(-1);
if (value.typeId() != unsignedByte) {
go = false;
}
if (go) {
cd = iconv_open("UTF-8", "UCS-2LE");
if (cd == (iconv_t)(-1)) {
#ifndef SUPPRESS_WARNINGS
EXV_WARNING << "iconv_open: " << strError() << "\n";
#endif
go = false;
}
bool cnv = false;
if (value.typeId() == unsignedByte && value.size() > 0) {
DataBuf buf(value.size());
value.copy(buf.pData_, invalidByteOrder);
// Strip trailing UCS-2 0-character, if there is one
if (buf.pData_[buf.size_ - 1] == 0 && buf.pData_[buf.size_ - 2] == 0) buf.size_ -= 2;
std::string str((const char*)buf.pData_, buf.size_);
cnv = convertStringCharset(str, "UCS-2LE", "UTF-8");
if (cnv) os << str;
}
if (go) {
DataBuf ib(value.size());
value.copy(ib.pData_, invalidByteOrder);
DataBuf ob(value.size());
char* outptr = reinterpret_cast<char*>(ob.pData_);
const char* outbuf = outptr;
size_t outbytesleft = ob.size_;
EXV_ICONV_CONST char* inbuf
= reinterpret_cast<EXV_ICONV_CONST char*>(ib.pData_);
size_t inbytesleft = ib.size_;
size_t rc = iconv(cd,
&inbuf,
&inbytesleft,
&outptr,
&outbytesleft);
if (rc == size_t(-1)) {
#ifndef SUPPRESS_WARNINGS
EXV_WARNING << "iconv: " << strError()
<< " inbytesleft = " << inbytesleft << "\n";
#endif
go = false;
}
if (go) {
if (outptr > outbuf && *(outptr-1) == '\0') outptr--;
os << std::string(outbuf, outptr-outbuf);
}
}
if (cd != (iconv_t)(-1)) {
iconv_close(cd);
}
if (!go) {
os << value;
}
return os;
#else
os << value;
if (!cnv) os << value;
return os;
#endif // EXV_HAVE_ICONV
} // printUcs2
std::ostream& printExifUnit(std::ostream& os, const Value& value, const ExifData* metadata)

Loading…
Cancel
Save