Browse Source

Improve iconv error handling

- Change conversion errors to warnings
- Only print one warning if iconv_open() fails
- Add a fall-back converter
- Replace bad code sequences and try to resume on the next byte
coverity_scan
Daniel Scharrer 13 years ago
parent
commit
ebb2ec3ac8
  1. 78
      src/util/load.cpp

78
src/util/load.cpp

@ -32,6 +32,10 @@
namespace {
static const boost::uint32_t cp_utf8 = 65001;
static const boost::uint32_t cp_utf16 = 1200;
static const char replacement_char = '_';
std::map<boost::uint32_t, iconv_t> converters;
iconv_t get_converter(boost::uint32_t codepage) {
@ -43,7 +47,7 @@ iconv_t get_converter(boost::uint32_t codepage) {
}
std::ostringstream oss;
if(codepage == 1200) {
if(codepage == cp_utf16) {
// iconv's behavior for "UTF-16" is platform-dependant if there is no BOM.
// There never is any BOM in Inno Setup files and it's always little-endian,
// so we specify the exact encoding.
@ -52,7 +56,13 @@ iconv_t get_converter(boost::uint32_t codepage) {
oss << "CP" << codepage;
}
return converters[codepage] = iconv_open("UTF-8", oss.str().c_str());
iconv_t handle = iconv_open("UTF-8", oss.str().c_str());
if(handle == iconv_t(-1)) {
log_warning << "could not get " << oss.str() << " -> UTF-8 converter";
}
return converters[codepage] = handle;
}
};
@ -83,9 +93,40 @@ void encoded_string::load(std::istream & is, std::string & target, boost::uint32
to_utf8(temp, target, codepage);
}
//! Fallback conversion that will at least work for ASCII characters
static void to_utf8_fallback(const std::string & from, std::string & to,
boost::uint32_t codepage) {
size_t skip = ((codepage == cp_utf16) ? 2 : 1);
to.clear();
to.reserve(ceildiv(from.size(), skip));
for(size_t i = 0; i < from.size(); i += skip) {
if((unsigned char)from[i] <= 127) {
// copy ASCII characters
to.push_back(from[i]);
} else {
// replace everything else with underscores
to.push_back(replacement_char);
}
}
}
void to_utf8(const std::string & from, std::string & to, boost::uint32_t codepage) {
if(codepage == cp_utf8) {
// copy UTF-8 directly
to = from;
return;
}
iconv_t converter = get_converter(codepage);
if(converter == iconv_t(-1)) {
to_utf8_fallback(from, to, codepage);
return;
}
/*
* Some iconv implementations declare the second parameter of iconv() as
@ -110,22 +151,45 @@ void to_utf8(const std::string & from, std::string & to, boost::uint32_t codepag
iconv(converter, NULL, NULL, NULL, NULL);
size_t skip = ((codepage == cp_utf16) ? 2 : 1);
bool warn = false;
while(insize) {
to.resize(outbase + insize + 4);
to.resize(outbase + ceildiv(insize, skip) + 4);
char * outbuf = &to[0] + outbase;
size_t outsize = to.size() - outbase;
size_t ret = iconv(converter, inbuf, &insize, &outbuf, &outsize);
if(ret == size_t(-1) && errno != E2BIG) {
log_error << "iconv error while converting from CP" << codepage << ": " << errno;
to.clear();
return;
if(ret == size_t(-1)) {
if(errno == E2BIG) {
// not enough output space - we'll allocate more in the next loop
} else if(/*errno == EILSEQ &&*/ insize >= 2) {
// invalid byte (sequence) - add a replacement char nd try the next byte
if(outsize == 0) {
to.push_back(replacement_char);
} else {
*outbuf = replacement_char;
outsize--;
}
inbuf.buf += skip;
insize -= skip;
warn = true;
} else {
// something else went wrong - return what we have so far
insize = 0;
warn = true;
}
}
outbase = to.size() - outsize;
}
if(warn) {
log_warning << "unexpected data while converting from CP" << codepage << " to UTF-8";
}
to.resize(outbase);
}

Loading…
Cancel
Save