Browse Source

DrawString: Stop allocating

Switch to a state-machine UTF-8 decoder from the branchless one.
This allows us to avoid copying the string on every `DrawString` call.
pull/3415/head
Gleb Mazovetskiy 4 years ago committed by Anders Jenbo
parent
commit
e9a9daa794
  1. 3
      3rdParty/hoehrmann_utf8/CMakeLists.txt
  2. 61
      3rdParty/hoehrmann_utf8/hoehrmann_utf8.h
  3. 6
      CMakeLists.txt
  4. 2
      Source/DiabloUI/diabloui.cpp
  5. 2
      Source/control.cpp
  6. 49
      Source/engine/render/text_render.cpp
  7. 2
      Source/miniwin/misc_msg.cpp
  8. 27
      Source/utils/utf8.cpp
  9. 85
      Source/utils/utf8.h
  10. 56
      Source/utils/utf8.hpp

3
3rdParty/hoehrmann_utf8/CMakeLists.txt vendored

@ -0,0 +1,3 @@
add_library(hoehrmann_utf8 INTERFACE)
target_include_directories(hoehrmann_utf8 INTERFACE ${CMAKE_CURRENT_LIST_DIR})

61
3rdParty/hoehrmann_utf8/hoehrmann_utf8.h vendored

@ -0,0 +1,61 @@
/* Adapted from: https://github.com/hoehrmann/utf-8-misc/blob/449221e7a693a9c7b8938721cd4244eed4ca9320/utf8_branch.h */
/*-
* Copyright (c) 2014 Taylor R Campbell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#define UTF8_ACCEPT 0
#define UTF8_REJECT 0xf
static const uint32_t utf8_classtab[0x10] = {
0x88888888UL,0x88888888UL,0x99999999UL,0x99999999UL,
0xaaaaaaaaUL,0xaaaaaaaaUL,0xaaaaaaaaUL,0xaaaaaaaaUL,
0x222222ffUL,0x22222222UL,0x22222222UL,0x22222222UL,
0x3333333bUL,0x33433333UL,0xfff5666cUL,0xffffffffUL,
};
static const uint32_t utf8_statetab[0x10] = {
0xfffffff0UL,0xffffffffUL,0xfffffff1UL,0xfffffff3UL,
0xfffffff4UL,0xfffffff7UL,0xfffffff6UL,0xffffffffUL,
0x33f11f0fUL,0xf3311f0fUL,0xf33f110fUL,0xfffffff2UL,
0xfffffff5UL,0xffffffffUL,0xffffffffUL,0xffffffffUL,
};
static inline uint8_t
utf8_decode_step(uint8_t state, uint8_t octet, uint32_t *cpp)
{
const uint8_t reject = (state >> 3), nonascii = (octet >> 7);
const uint8_t klass = (!nonascii? 0 :
(0xf & (utf8_classtab[(octet >> 3) & 0xf] >> (4 * (octet & 7)))));
*cpp = (state == UTF8_ACCEPT
? (octet & (0xffU >> klass))
: ((octet & 0x3fU) | (*cpp << 6)));
return (reject? 0xf :
(0xf & (utf8_statetab[klass] >> (4 * (state & 7)))));
}

6
CMakeLists.txt

@ -355,6 +355,8 @@ add_subdirectory(3rdParty/simpleini)
add_subdirectory(3rdParty/libmpq)
add_subdirectory(3rdParty/hoehrmann_utf8)
add_library(PKWare STATIC
3rdParty/PKWare/explode.cpp
3rdParty/PKWare/implode.cpp)
@ -466,6 +468,7 @@ set(libdevilutionx_SRCS
Source/utils/sdl_bilinear_scale.cpp
Source/utils/sdl_rwops_file_wrapper.cpp
Source/utils/sdl_thread.cpp
Source/utils/utf8.cpp
Source/DiabloUI/art.cpp
Source/DiabloUI/art_draw.cpp
Source/DiabloUI/button.cpp
@ -861,7 +864,8 @@ target_link_libraries(libdevilutionx PUBLIC
PKWare
libmpq
smacker
simpleini)
simpleini
hoehrmann_utf8)
if(WIN32)
target_link_libraries(libdevilutionx PUBLIC find_steam_game)

2
Source/DiabloUI/diabloui.cpp

@ -24,7 +24,7 @@
#include "utils/sdl_wrap.h"
#include "utils/stubs.h"
#include "utils/language.h"
#include "utils/utf8.h"
#include "utils/utf8.hpp"
#ifdef __SWITCH__
// for virtual keyboard on Switch

2
Source/control.cpp

@ -38,7 +38,7 @@
#include "utils/language.h"
#include "utils/sdl_geometry.h"
#include "utils/stdcompat/optional.hpp"
#include "utils/utf8.h"
#include "utils/utf8.hpp"
#include "options.h"
#ifdef _DEBUG

49
Source/engine/render/text_render.cpp

@ -20,7 +20,7 @@
#include "palette.h"
#include "utils/display.h"
#include "utils/sdl_compat.h"
#include "utils/utf8.h"
#include "utils/utf8.hpp"
namespace devilution {
@ -196,20 +196,13 @@ int GetLineWidth(string_view text, GameFontTables size, int spacing, int *charac
{
int lineWidth = 0;
std::string textBuffer;
textBuffer.reserve(textBuffer.size() + 3); // Buffer must be padded before calling utf8_decode()
textBuffer.append(text.data(), text.size());
textBuffer.resize(textBuffer.size() + 3);
const char *textData = textBuffer.data();
uint32_t codepoints = 0;
uint32_t currentUnicodeRow = 0;
std::array<uint8_t, 256> *kerning = nullptr;
char32_t next;
int error;
while (*textData != '\0') {
textData = utf8_decode(textData, &next, &error);
if (error)
while (!text.empty()) {
next = ConsumeFirstUtf8CodePoint(&text);
if (next == Utf8DecodeError)
break;
if (next == ZWSP)
continue;
@ -249,13 +242,11 @@ std::string WordWrapString(string_view text, size_t width, GameFontTables size,
int lastBreakableLen;
char32_t lastBreakableCodePoint;
std::string input;
std::string input { text };
std::string output;
input.reserve(input.size() + 3); // Buffer must be padded before calling utf8_decode()
input.append(text.data(), text.size());
input.resize(input.size() + 3);
output.reserve(text.size());
const char *begin = input.data();
const char *end = input.data() + input.size();
const char *cur = begin;
const char *processedEnd = cur;
@ -263,10 +254,11 @@ std::string WordWrapString(string_view text, size_t width, GameFontTables size,
size_t lineWidth = 0;
std::array<uint8_t, 256> *kerning = nullptr;
char32_t next;
int error;
while (*cur != '\0') {
cur = utf8_decode(cur, &next, &error);
if (error != 0)
while (cur != end && *cur != '\0') {
uint8_t codepointLen;
next = DecodeFirstUtf8CodePoint(cur, &codepointLen);
cur += codepointLen;
if (next == Utf8DecodeError)
break;
if (next == U'\n') { // Existing line break, scan next line
@ -361,17 +353,12 @@ uint32_t DrawString(const Surface &out, string_view text, const Rectangle &rect,
Art *font = nullptr;
std::array<uint8_t, 256> *kerning = nullptr;
std::string textBuffer(text);
textBuffer.resize(textBuffer.size() + 4); // Buffer must be padded before calling utf8_decode()
const char *textData = textBuffer.data();
const char *previousPosition = textData;
char32_t next;
uint32_t currentUnicodeRow = 0;
int error;
for (; *textData != '\0'; previousPosition = textData) {
textData = utf8_decode(textData, &next, &error);
if (error)
string_view remaining = text;
while (!remaining.empty() && remaining[0] != '\0') {
next = ConsumeFirstUtf8CodePoint(&remaining);
if (next == Utf8DecodeError)
break;
if (next == ZWSP)
continue;
@ -392,8 +379,8 @@ uint32_t DrawString(const Surface &out, string_view text, const Rectangle &rect,
if (HasAnyOf(flags, (UiFlags::AlignCenter | UiFlags::AlignRight))) {
lineWidth = (*kerning)[frame];
if (*textData != '\0')
lineWidth += spacing + GetLineWidth(textData, size, spacing);
if (text[0] != '\0')
lineWidth += spacing + GetLineWidth(text, size, spacing);
}
if (HasAnyOf(flags, UiFlags::AlignCenter))
@ -415,7 +402,7 @@ uint32_t DrawString(const Surface &out, string_view text, const Rectangle &rect,
DrawArt(out, characterPosition, LoadFont(size, color, 0), '|');
}
return previousPosition - textBuffer.data();
return text.data() - remaining.data();
}
uint8_t PentSpn2Spin()

2
Source/miniwin/misc_msg.cpp

@ -27,7 +27,7 @@
#include "utils/log.hpp"
#include "utils/sdl_compat.h"
#include "utils/stubs.h"
#include "utils/utf8.h"
#include "utils/utf8.hpp"
#ifdef __vita__
#include "platform/vita/touch.h"

27
Source/utils/utf8.cpp

@ -0,0 +1,27 @@
#include "utils/utf8.hpp"
#include <cstddef>
#include <hoehrmann_utf8.h>
namespace devilution {
char32_t DecodeFirstUtf8CodePoint(string_view input, uint8_t *len)
{
uint32_t codepoint = 0;
uint32_t state = UTF8_ACCEPT;
for (std::size_t i = 0; i < input.size(); ++i) {
state = utf8_decode_step(state, static_cast<uint8_t>(input[i]), &codepoint);
if (state == UTF8_ACCEPT) {
*len = i + 1;
return codepoint;
}
if (state == UTF8_REJECT) {
*len = i + 1;
return Utf8DecodeError;
}
}
return codepoint;
}
} // namespace devilution

85
Source/utils/utf8.h

@ -1,85 +0,0 @@
#pragma once
#include <cstdint>
#include <string>
#include <utility>
/* Branchless UTF-8 decoder
*
* This is free and unencumbered software released into the public domain.
*/
/* Decode the next character, C, from BUF, reporting errors in E.
*
* Since this is a branchless decoder, four bytes will be read from the
* buffer regardless of the actual length of the next character. This
* means the buffer _must_ have at least three bytes of zero padding
* following the end of the data stream.
*
* Errors are reported in E, which will be non-zero if the parsed
* character was somehow invalid: invalid byte sequence, non-canonical
* encoding, or a surrogate half.
*
* The function returns a pointer to the next character. When an error
* occurs, this pointer will be a guess that depends on the particular
* error, but it will always advance at least one byte.
*/
inline const char *utf8_decode(const char *buf, char32_t *c, int *e)
{
static const char lengths[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
};
static const int masks[] = { 0x00, 0x7f, 0x1f, 0x0f, 0x07 };
static const uint32_t mins[] = { 4194304, 0, 128, 2048, 65536 };
static const int shiftc[] = { 0, 18, 12, 6, 0 };
static const int shifte[] = { 0, 6, 4, 2, 0 };
auto s = reinterpret_cast<const unsigned char *>(buf);
int len = lengths[s[0] >> 3];
/* Compute the pointer to the next character early so that the next
* iteration can start working on the next character. Neither Clang
* nor GCC figure out this reordering on their own.
*/
const unsigned char *next = s + len + !len;
/* Assume a four-byte character and load four bytes. Unused bits are
* shifted out.
*/
*c = static_cast<char32_t>((s[0] & masks[len]) << 18);
*c |= static_cast<char32_t>((s[1] & 0x3f) << 12);
*c |= static_cast<char32_t>((s[2] & 0x3f) << 6);
*c |= static_cast<char32_t>((s[3] & 0x3f) << 0);
*c >>= shiftc[len];
/* Accumulate the various error conditions. */
*e = (*c < mins[len]) << 6; // non-canonical encoding
*e |= ((*c >> 11) == 0x1b) << 7; // surrogate half?
*e |= (*c > 0x10FFFF) << 8; // out of range?
*e |= (s[1] & 0xc0) >> 2;
*e |= (s[2] & 0xc0) >> 4;
*e |= (s[3]) >> 6;
*e ^= 0x2a; // top two bits of each tail byte correct?
*e >>= shifte[len];
return reinterpret_cast<const char *>(next);
}
inline int FindLastUtf8Symbols(const char *text)
{
std::string textBuffer(text);
textBuffer.resize(textBuffer.size() + 4); // Buffer must be padded before calling utf8_decode()
const char *textData = textBuffer.data();
const char *previousPosition = textData;
char32_t next;
int error;
for (; *textData != '\0'; previousPosition = textData) {
textData = utf8_decode(textData, &next, &error);
if (*textData == '\0')
return previousPosition - textBuffer.data();
}
return 0;
}

56
Source/utils/utf8.hpp

@ -0,0 +1,56 @@
#pragma once
#include <cstdint>
#include <string>
#include <utility>
#include "utils/stdcompat/string_view.hpp"
namespace devilution {
constexpr char32_t Utf8DecodeError = 0xD83F;
/**
* Decodes the first code point from UTF8-encoded input.
*
* Sets `len` to the length of the code point in bytes.
* Returns `Utf8DecodeError` on error.
*/
char32_t DecodeFirstUtf8CodePoint(string_view input, uint8_t *len);
/**
* Decodes and removes the first code point from UTF8-encoded input.
*/
inline char32_t ConsumeFirstUtf8CodePoint(string_view *input)
{
uint8_t len;
const char32_t result = DecodeFirstUtf8CodePoint(*input, &len);
input->remove_prefix(len);
return result;
}
/**
* Returns true if this is a trailing byte in a UTF-8 code point encoding.
*
* A trailing byte is any byte that is not the heading byte.
*/
inline bool IsTrailUtf8CodeUnit(char x)
{
return static_cast<signed char>(x) < -0x40;
}
/**
* Returns the start byte index of the last code point in a UTF-8 string.
*/
inline std::size_t FindLastUtf8Symbols(string_view input)
{
if (input.empty())
return 0;
std::size_t pos = input.size() - 1;
while (pos > 0 && IsTrailUtf8CodeUnit(input[pos]))
--pos;
return pos;
}
} // namespace devilution
Loading…
Cancel
Save