|
|
|
|
@ -24,8 +24,8 @@
|
|
|
|
|
* occurs, this pointer will be a guess that depends on the particular |
|
|
|
|
* error, but it will always advance at least one byte. |
|
|
|
|
*/ |
|
|
|
|
inline void * |
|
|
|
|
utf8_decode(void *buf, uint32_t *c, int *e) |
|
|
|
|
inline const unsigned char * |
|
|
|
|
utf8_decode(const unsigned char *buf, uint32_t *c, int *e) |
|
|
|
|
{ |
|
|
|
|
static const char lengths[] = { |
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
@ -36,14 +36,14 @@ utf8_decode(void *buf, uint32_t *c, int *e)
|
|
|
|
|
static const int shiftc[] = {0, 18, 12, 6, 0}; |
|
|
|
|
static const int shifte[] = {0, 6, 4, 2, 0}; |
|
|
|
|
|
|
|
|
|
unsigned char *s = static_cast<unsigned char*>(buf); |
|
|
|
|
const unsigned char *s = buf; |
|
|
|
|
int len = lengths[s[0] >> 3]; |
|
|
|
|
|
|
|
|
|
/* Compute the pointer to the next character early so that the next
|
|
|
|
|
* iteration can start working on the next character. Neither Clang |
|
|
|
|
* nor GCC figure out this reordering on their own. |
|
|
|
|
*/ |
|
|
|
|
unsigned char *next = s + len + !len; |
|
|
|
|
const unsigned char *next = s + len + !len; |
|
|
|
|
|
|
|
|
|
/* Assume a four-byte character and load four bytes. Unused bits are
|
|
|
|
|
* shifted out. |
|
|
|
|
@ -71,7 +71,7 @@ inline std::string utf8_to_latin1(const char* in)
|
|
|
|
|
{ |
|
|
|
|
std::string instr(in); |
|
|
|
|
instr.resize(instr.size() + 4); |
|
|
|
|
const char* buf = instr.data(); |
|
|
|
|
const unsigned char* buf = reinterpret_cast<const unsigned char*>(instr.data()); |
|
|
|
|
std::string ret; |
|
|
|
|
uint32_t next; |
|
|
|
|
int error; |
|
|
|
|
|