From 93361f8a1ac41c85da33df17e3a3c74bc73bbafb Mon Sep 17 00:00:00 2001 From: Daniel Scharrer Date: Sun, 8 Jun 2014 08:47:23 +0200 Subject: [PATCH] Add support for building without iconv on Windows Implements: issue #33 --- CMakeLists.txt | 20 ++++++--- README.md | 22 ++++----- src/configure.hpp.in | 1 + src/util/encoding.cpp | 102 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 126 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f95b7d..4bee31b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ cmake_minimum_required(VERSION 2.8) # Define configuration options option(USE_LZMA "Build lzma decompression support." ON) +option(USE_ICONV "Build against libiconv instead of native OS functions." OFF) option(DEBUG_EXTRA "Expensive debug options" OFF) option(SET_WARNING_FLAGS "Adjust compiler warning flags" ON) option(SET_OPTIMIZATION_FLAGS "Adjust compiler optimization flags" ON) @@ -137,11 +138,16 @@ if(Boost_USE_STATIC_LIBS) endif() -find_package(iconv REQUIRED) -check_link_library(iconv iconv_LIBRARIES) -list(APPEND LIBRARIES ${iconv_LIBRARIES}) -include_directories(SYSTEM ${iconv_INCLUDE_DIR}) -add_definitions(${iconv_DEFINITIONS}) +if(NOT WIN32 OR USE_ICONV) + find_package(iconv REQUIRED) + check_link_library(iconv iconv_LIBRARIES) + list(APPEND LIBRARIES ${iconv_LIBRARIES}) + include_directories(SYSTEM ${iconv_INCLUDE_DIR}) + add_definitions(${iconv_DEFINITIONS}) + set(INNOEXTRACT_HAVE_ICONV 1) +else() + set(INNOEXTRACT_HAVE_ICONV 0) +endif() # Set compiler flags @@ -365,4 +371,8 @@ print_configuration("File time precision" FIRST INNOEXTRACT_HAVE_UTIMES "microseconds" 1 "seconds" ) +print_configuration("Charset conversion" FIRST + USE_ICONV "iconv" + WIN32 "Win32" +) message("") diff --git a/README.md b/README.md index f446262..d406a5f 100644 --- a/README.md +++ b/README.md @@ -43,21 +43,23 @@ Build options: | Option | Default | Description | |:------------------------ |:---------:|:----------- | | `USE_LZMA` | `ON` | Use `liblzma` if available. +| `USE_ICONV` | `OFF`^1 | Use `libiconv` instead of native OS function | `CMAKE_BUILD_TYPE` | `Release` | Set to `Debug` to enable debug output. -| `DEBUG` | `OFF`^1 | Enable debug output and runtime checks. +| `DEBUG` | `OFF`^2 | Enable debug output and runtime checks. | `DEBUG_EXTRA` | `OFF` | Expensive debug options. | `SET_WARNING_FLAGS` | `ON` | Adjust compiler warning flags. This should not affect the produced binaries but is useful to catch potential problems. | `SET_OPTIMIZATION_FLAGS` | `ON` | Adjust compiler optimization flags. For non-debug builds the only thing this does is instruct the linker to only link against libraries that are actually needed. | `USE_CXX11` | `ON` | Try to compile in C++11 mode if available. -| `USE_STATIC_LIBS` | `OFF`^2 | Turns on static linking for all libraries, including `-static-libgcc` and `-static-libstdc++`. You can also use the individual options below: -| `LZMA_USE_STATIC_LIBS` | `OFF`^3 | Statically link `liblzma`. -| `Boost_USE_STATIC_LIBS` | `OFF`^3 | Statically link Boost. See also `FindBoost.cmake` -| `ZLIB_USE_STATIC_LIBS` | `OFF`^3 | Statically link `libz`. (used via Boost) -| `BZip2_USE_STATIC_LIBS` | `OFF`^3 | Statically link `libbz2`. (used via Boost) -| `iconv_USE_STATIC_LIBS` | `OFF`^3 | Statically link `libiconv`. -1. Enabled automatically if `CMAKE_BUILD_TYPE` is set to `Debug`. -2. Under Windows, the default is `ON`. -3. Default is `ON` if `USE_STATIC_LIBS` is enabled. +| `USE_STATIC_LIBS` | `OFF`^3 | Turns on static linking for all libraries, including `-static-libgcc` and `-static-libstdc++`. You can also use the individual options below: +| `LZMA_USE_STATIC_LIBS` | `OFF`^4 | Statically link `liblzma`. +| `Boost_USE_STATIC_LIBS` | `OFF`^4 | Statically link Boost. See also `FindBoost.cmake` +| `ZLIB_USE_STATIC_LIBS` | `OFF`^4 | Statically link `libz`. (used via Boost) +| `BZip2_USE_STATIC_LIBS` | `OFF`^4 | Statically link `libbz2`. (used via Boost) +| `iconv_USE_STATIC_LIBS` | `OFF`^4 | Statically link `libiconv`. +1. This is only meaningful for Windows +2. Enabled automatically if `CMAKE_BUILD_TYPE` is set to `Debug`. +3. Under Windows, the default is `ON`. +4. Default is `ON` if `USE_STATIC_LIBS` is enabled. Install options: diff --git a/src/configure.hpp.in b/src/configure.hpp.in index e72b631..54dc9e5 100644 --- a/src/configure.hpp.in +++ b/src/configure.hpp.in @@ -27,5 +27,6 @@ // Optional dependencies #cmakedefine01 INNOEXTRACT_HAVE_LZMA +#cmakedefine01 INNOEXTRACT_HAVE_ICONV #endif // INNOEXTRACT_CONFIGURE_HPP diff --git a/src/util/encoding.cpp b/src/util/encoding.cpp index 05aaf52..ff1b483 100644 --- a/src/util/encoding.cpp +++ b/src/util/encoding.cpp @@ -20,13 +20,24 @@ #include "util/encoding.hpp" -#include -#include +#include + #include #include +#include +#include +#include + +#include "configure.hpp" +#if INNOEXTRACT_HAVE_ICONV #include #include +#elif defined(_WIN32) +#include +#else +#error No charset conversion library available! +#endif #include #include @@ -36,12 +47,15 @@ namespace util { -namespace { - static const codepage_id cp_utf8 = 65001; static const codepage_id cp_ascii = 20127; + +#if INNOEXTRACT_HAVE_ICONV + static const char replacement_char = '_'; +namespace { + typedef boost::unordered_map converter_map; converter_map converters; @@ -266,4 +280,84 @@ void to_utf8(const std::string & from, std::string & to, codepage_id codepage) { to.resize(outbase); } +#elif defined(_WIN32) + +static const codepage_id cp_utf16le = 1200; + +namespace { + +std::string windows_error_string(DWORD code) { + char * error; + DWORD n = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, code, 0, reinterpret_cast(&error), 0, + NULL); + if(n == 0) { + return "unknown"; + } else { + std::string ret(error, size_t(n)); + LocalFree(error); + if(!ret.empty() && ret[ret.size() - 1] == '\n') { + ret.resize(ret.size() - 1); + } + return ret; + } +} + +} // anonymous namespace + +void to_utf8(const std::string & from, std::string & to, codepage_id cp) { + + if(from.empty()) { + to.clear(); + return; + } + + if(cp == cp_utf8 || cp == cp_ascii) { + // copy UTF-8 directly + to = from; + return; + } + + + int ret = 0; + + // Convert from the source codepage to UTF-16LE + const WCHAR * utf16; + int utf16_size; + std::vector buffer; + if(cp == cp_utf16le) { + utf16 = reinterpret_cast(from.data()); + utf16_size = int(from.size()) / 2; + } else { + utf16_size = MultiByteToWideChar(cp, 0, from.data(), int(from.length()), NULL, 0); + if(utf16_size > 0) { + buffer.resize(size_t(utf16_size)); + ret = MultiByteToWideChar(cp, 0, from.data(), int(from.length()), + &buffer.front(), utf16_size); + } + if(utf16_size <= 0 || ret <= 0) { + log_warning << "error while converting from CP" << cp << " to UTF-16: " + << windows_error_string(GetLastError()); + return; + } + utf16 = &buffer.front(); + } + + // Convert from UTF-16-LE to UTF-8 + int utf8_size = WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_size, NULL, 0, NULL, NULL); + if(utf8_size > 0) { + to.resize(size_t(utf8_size)); + ret = WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_size, + &to[0], utf8_size, NULL, NULL); + } + if(utf8_size <= 0 || ret <= 0) { + log_warning << "error while converting from UTF-16 to UTF-8: " + << windows_error_string(GetLastError()); + return; + } + +} + +#endif + } // namespace util