You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
1.5 KiB
62 lines
1.5 KiB
#pragma once |
|
|
|
#include <cstdint> |
|
#include <string> |
|
#include <utility> |
|
|
|
#include "utils/stdcompat/string_view.hpp" |
|
|
|
namespace devilution { |
|
|
|
constexpr char32_t Utf8DecodeError = 0xD83F; |
|
|
|
/** |
|
* Decodes the first code point from UTF8-encoded input. |
|
* |
|
* Sets `len` to the length of the code point in bytes. |
|
* Returns `Utf8DecodeError` on error. |
|
*/ |
|
char32_t DecodeFirstUtf8CodePoint(string_view input, std::size_t *len); |
|
|
|
/** |
|
* Decodes and removes the first code point from UTF8-encoded input. |
|
*/ |
|
inline char32_t ConsumeFirstUtf8CodePoint(string_view *input) |
|
{ |
|
std::size_t len; |
|
const char32_t result = DecodeFirstUtf8CodePoint(*input, &len); |
|
input->remove_prefix(len); |
|
return result; |
|
} |
|
|
|
/** |
|
* Returns true if this is a trailing byte in a UTF-8 code point encoding. |
|
* |
|
* A trailing byte is any byte that is not the heading byte. |
|
*/ |
|
inline bool IsTrailUtf8CodeUnit(char x) |
|
{ |
|
return static_cast<signed char>(x) < -0x40; |
|
} |
|
|
|
/** |
|
* Returns the start byte index of the last code point in a UTF-8 string. |
|
*/ |
|
inline std::size_t FindLastUtf8Symbols(string_view input) |
|
{ |
|
if (input.empty()) |
|
return 0; |
|
|
|
std::size_t pos = input.size() - 1; |
|
while (pos > 0 && IsTrailUtf8CodeUnit(input[pos])) |
|
--pos; |
|
return pos; |
|
} |
|
|
|
/** |
|
* @brief Copy up to a given number of bytes from a UTF8 string, and zero terminate string |
|
* @param bytes Max number of bytes to copy |
|
*/ |
|
void CopyUtf8(char *dest, string_view source, std::size_t bytes); |
|
|
|
} // namespace devilution
|
|
|