Add support for parsing fixed point decimal strings

3 years ago · 3fda0586bd
6 changed files with 337 additions and 5 deletions
--- a/Source/data/iterators.hpp
+++ b/Source/data/iterators.hpp
@ -6,6 +6,7 @@
 #include <expected.hpp>

 #include "parser.hpp"
+#include "utils/parse_int.hpp"

 namespace devilution {

@ -36,6 +37,18 @@ public:
 		}
 	}

+	static tl::expected<void, Error> mapError(ParseIntError ec)
+	{
+		switch (ec) {
+		case ParseIntError::OutOfRange:
+			return tl::unexpected { Error::OutOfRange };
+		case ParseIntError::ParseError:
+			return tl::unexpected { Error::NotANumber };
+		default:
+			return tl::unexpected { Error::InvalidValue };
+		}
+	}
+
 	DataFileField(GetFieldResult *state, const char *end, unsigned row, unsigned column)
 	    : state_(state)
 	    , end_(end)
@ -105,6 +118,47 @@ public:
 		return parseInt(value).map([value]() { return value; });
 	}

+	/**
+	 * @brief Attempts to parse the current field as a fixed point value with 6 bits for the fraction
+	 *
+	 * You can freely interleave this method with calls to operator*. If this is the first value
+	 * access since the last advance this will scan the current field and store it for later
+	 * use with operator* or repeated calls to parseInt/Fixed6 (even with different types).
+	 * @tparam T an Integral type supported by std::from_chars
+	 * @param destination value to store the result of successful parsing
+	 * @return an error code equivalent to what you'd get from from_chars if parsing failed
+	 */
+	template <typename T>
+	[[nodiscard]] tl::expected<void, Error> parseFixed6(T &destination)
+	{
+		ParseIntResult<T> parseResult;
+		if (state_->status == GetFieldResult::Status::ReadyToRead) {
+			const char *begin = state_->next;
+			// first read, consume digits
+			parseResult = ParseFixed6<T>({ begin, static_cast<size_t>(end_ - begin) }, &state_->next);
+			// then read the remainder of the field
+			*state_ = GetNextField(state_->next, end_);
+			// and prepend what was already parsed
+			state_->value = { begin, (state_->value.data() - begin) + state_->value.size() };
+		} else {
+			parseResult = ParseFixed6<T>(state_->value);
+		}
+
+		if (parseResult.has_value()) {
+			destination = parseResult.value();
+			return {};
+		} else {
+			return mapError(parseResult.error());
+		}
+	}
+
+	template <typename T>
+	[[nodiscard]] tl::expected<T, Error> asFixed6()
+	{
+		T value = 0;
+		return parseFixed6(value).map([value]() { return value; });
+	}
+
 	/**
 	 * Returns the current row number
 	 */
--- a/Source/utils/parse_int.hpp
+++ b/Source/utils/parse_int.hpp
@ -1,5 +1,6 @@
 #pragma once

+#include <algorithm>
 #include <charconv>
 #include <string_view>
 #include <system_error>
@ -19,10 +20,13 @@ using ParseIntResult = tl::expected<IntT, ParseIntError>;
 template <typename IntT>
 ParseIntResult<IntT> ParseInt(
    std::string_view str, IntT min = std::numeric_limits<IntT>::min(),
-    IntT max = std::numeric_limits<IntT>::max())
+    IntT max = std::numeric_limits<IntT>::max(), const char **endOfParse = nullptr)
 {
 	IntT value;
 	const std::from_chars_result result = std::from_chars(str.data(), str.data() + str.size(), value);
+	if (endOfParse != nullptr) {
+		*endOfParse = result.ptr;
+	}
 	if (result.ec == std::errc::invalid_argument)
 		return tl::unexpected(ParseIntError::ParseError);
 	if (result.ec == std::errc::result_out_of_range || value < min || value > max)
@ -32,4 +36,99 @@ ParseIntResult<IntT> ParseInt(
 	return value;
 }

+inline uint8_t ParseFixed6Fraction(std::string_view str, const char **endOfParse = nullptr)
+{
+	unsigned numDigits = 0;
+	uint32_t decimalFraction = 0;
+
+	// Read at most 7 digits, at that threshold we're able to determine an exact rounding for 6 bit fixed point numbers
+	while (!str.empty() && numDigits < 7) {
+		if (str[0] < '0' || str[0] > '9') {
+			break;
+		}
+		decimalFraction = decimalFraction * 10 + str[0] - '0';
+		++numDigits;
+		str.remove_prefix(1);
+	}
+	if (endOfParse != nullptr) {
+		// to mimic the behaviour of std::from_chars consume all remaining digits in case the value was overly precise.
+		*endOfParse = std::find_if_not(str.data(), str.data() + str.size(), [](char character) { return character >= '0' && character <= '9'; });
+	}
+	// to ensure rounding to nearest we normalise all values to 7 decimal places
+	while (numDigits < 7) {
+		decimalFraction *= 10;
+		++numDigits;
+	}
+	// we add half the step between representable values to use integer truncation as a substitute for rounding to nearest.
+	return (decimalFraction + 78125) / 156250;
+}
+
+template <typename IntT>
+ParseIntResult<IntT> ParseFixed6(std::string_view str, const char **endOfParse = nullptr)
+{
+	if (endOfParse != nullptr) {
+		// To allow for early returns we set the end pointer to the start of the string, which is the common case for errors.
+		*endOfParse = str.data();
+	}
+
+	if (str.empty()) {
+		return tl::unexpected { ParseIntError::ParseError };
+	}
+
+	constexpr IntT minIntegerValue = std::numeric_limits<IntT>::min() >> 6;
+	constexpr IntT maxIntegerValue = std::numeric_limits<IntT>::max() >> 6;
+
+	const char *currentChar; // will be set by the call to parseInt
+	ParseIntResult<IntT> integerParseResult = ParseInt(str, minIntegerValue, maxIntegerValue, &currentChar);
+
+	bool isNegative = std::is_signed_v<IntT> && str[0] == '-';
+	bool haveDigits = integerParseResult.has_value() || integerParseResult.error() == ParseIntError::OutOfRange;
+	if (haveDigits) {
+		str.remove_prefix(static_cast<size_t>(std::distance(str.data(), currentChar)));
+	} else if (isNegative) {
+		str.remove_prefix(1);
+	}
+
+	// if the string has no leading digits we still need to try parse the fraction part
+	uint8_t fractionPart = 0;
+	if (!str.empty() && str[0] == '.') {
+		// got a fractional part to read too
+		str.remove_prefix(1); // skip past the decimal point
+
+		fractionPart = ParseFixed6Fraction(str, &currentChar);
+		haveDigits = haveDigits || str.data() != currentChar;
+	}
+
+	if (!haveDigits) {
+		// early return in case we got a string like "-.abc", don't want to set the end pointer in this case
+		return tl::unexpected { ParseIntError::ParseError };
+	}
+
+	if (endOfParse != nullptr) {
+		*endOfParse = currentChar;
+	}
+
+	if (!integerParseResult.has_value() && integerParseResult.error() == ParseIntError::OutOfRange) {
+		// if the integer parsing gave us an out of range value then we've done a bit of unnecessary
+		//  work parsing the fraction part, but it saves duplicating code.
+		return integerParseResult;
+	}
+	// integerParseResult could be a ParseError at this point because of a string like ".123" or "-.1"
+	//  so we need to default to 0 (and use the result of the minus sign check when it's relevant)
+	IntT integerPart = integerParseResult.value_or(0);
+
+	// rounding could give us a value of 64 for the fraction part (e.g. 0.993 rounds to 1.0) so we need to ensure this doesn't overflow
+	if (fractionPart >= 64 && (integerPart >= maxIntegerValue || (std::is_signed_v<IntT> && integerPart <= minIntegerValue))) {
+		return tl::unexpected { ParseIntError::OutOfRange };
+	} else {
+		IntT fixedValue = integerPart << 6;
+		if (isNegative) {
+			fixedValue -= fractionPart;
+		} else {
+			fixedValue += fractionPart;
+		}
+		return fixedValue;
+	}
+}
+
 } // namespace devilution
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -32,6 +32,7 @@ set(tests
  missiles_test
  pack_test
  path_test
+  parse_int_test
  player_test
  quests_test
  random_test
--- a/test/data_file_test.cpp
+++ b/test/data_file_test.cpp
@ -232,6 +232,12 @@ TEST(DataFileTest, ParseInt)
 		EXPECT_TRUE(parseIntResult.has_value()) << "Expected " << field << " to fit into a uint8_t variable";
 		EXPECT_EQ(shortVal, 145) << "Parsing should give the expected base 10 value";
 		EXPECT_EQ(*field, "145") << "Should be able to access the field value as a string even after parsing as an int";
+
+		int longVal = 1;
+		auto parseFixedResult = field.parseFixed6(longVal);
+		EXPECT_TRUE(parseFixedResult.has_value()) << "Expected " << field << " to be parsed as a fixed point integer wiith only the integer part";
+		EXPECT_EQ(longVal, 145 << 6) << "Parsing should give the expected fixed point base 10 value";
+
 		++fieldIt;

 		ASSERT_NE(fieldIt, end) << "sample.tsv must contain a third field to use as a test value for large ints";
@ -245,7 +251,7 @@ TEST(DataFileTest, ParseInt)
 			EXPECT_EQ(parseIntResult.error(), DataFileField::Error::OutOfRange) << "A value too large to fit into a uint8_t variable should report an error";
 		}
 		EXPECT_EQ(shortVal, 145) << "Value is not modified when parsing as uint8_t fails due to out of range value";
-		int longVal = 42;
+		longVal = 42;
 		parseIntResult = field.parseInt(longVal);
 		EXPECT_TRUE(parseIntResult.has_value()) << "Expected " << field << " to fit into an int variable";
 		EXPECT_EQ(longVal, 70322) << "Value is expected to be parsed into a larger type after an out of range failure";
@ -259,7 +265,28 @@ TEST(DataFileTest, ParseInt)
 		parseIntResult = field.parseInt(shortVal);
 		EXPECT_TRUE(parseIntResult.has_value()) << "Expected " << field << " to fit into a uint8_t variable (even though it's not really an int)";
 		EXPECT_EQ(shortVal, 6) << "Value is loaded as expected until the first non-digit character";
-		EXPECT_EQ(*field, "6.34") << "Should be able to access the field value as a string after failure";
+		EXPECT_EQ(*field, "6.34") << "Should be able to access the field value as a string after parsing as an int";
+		int fixedVal = 64;
+		parseFixedResult = field.parseFixed6(fixedVal);
+		EXPECT_TRUE(parseFixedResult.has_value()) << "Expected " << field << " to be parsed as a fixed point value";
+		// 6.34 is parsed as 384 (6<<6) + 22 (0.34 rounds to 0.34375, 22/64)
+		EXPECT_EQ(fixedVal, 406) << "Value is loaded as a fixed point number";
+
+		uint8_t shortFixedVal = 32;
+		parseFixedResult = field.parseFixed6(shortFixedVal);
+		EXPECT_FALSE(parseFixedResult.has_value()) << "Expected " << field << " to fail to parse into a 2.6 fixed point variable";
+		EXPECT_EQ(parseFixedResult.error(), DataFileField::Error::OutOfRange) << "A value too large to fit into a 2 bit integer part should report an error";
+		EXPECT_EQ(shortFixedVal, 32) << "The variiable should not be modified when parsing fails";
+
+		++fieldIt;
+
+		ASSERT_NE(fieldIt, end) << "sample.tsv must contain a fifth field to use as a test value for fixed point overflow";
+
+		field = *fieldIt;
+		parseFixedResult = field.parseFixed6(shortFixedVal);
+		EXPECT_FALSE(parseFixedResult.has_value()) << "Expected " << field << " to fail to parse into a 2.6 fixed point variable";
+		EXPECT_EQ(parseFixedResult.error(), DataFileField::Error::OutOfRange) << "A value that after rounding is too large to fit into a 2 bit integer part should report an error";
+		EXPECT_EQ(shortFixedVal, 32) << "The variiable should not be modified when parsing fails";
 	}
 }

--- a/test/fixtures/txtdata/sample.tsv
+++ b/test/fixtures/txtdata/sample.tsv
@ -1,2 +1,2 @@
-String	Byte	Int	Float
-Sample	145	70322	6.34
+String	Byte	Int	Float	FloatOverflow
+Sample	145	70322	6.34	3.999
--- a/test/parse_int_test.cpp
+++ b/test/parse_int_test.cpp
@ -0,0 +1,151 @@
+#include <gtest/gtest.h>
+
+#include "utils/parse_int.hpp"
+
+namespace devilution {
+TEST(ParseIntTest, ParseInt)
+{
+	ParseIntResult<int> result = ParseInt<int>("");
+	ASSERT_FALSE(result.has_value());
+	EXPECT_EQ(result.error(), ParseIntError::ParseError);
+
+	result = ParseInt<int>("abcd");
+	ASSERT_FALSE(result.has_value());
+	EXPECT_EQ(result.error(), ParseIntError::ParseError);
+
+	result = ParseInt<int>("12");
+	ASSERT_TRUE(result.has_value());
+	EXPECT_EQ(result.value(), 12);
+
+	result = ParseInt<int>(("99999999"), -5, 100);
+	ASSERT_FALSE(result.has_value());
+	EXPECT_EQ(result.error(), ParseIntError::OutOfRange);
+
+	ParseIntResult<int8_t> shortResult = ParseInt<int8_t>(("99999999"));
+	ASSERT_FALSE(shortResult.has_value());
+	EXPECT_EQ(shortResult.error(), ParseIntError::OutOfRange);
+}
+
+TEST(ParseIntTest, ParseFixed6Fraction)
+{
+	EXPECT_EQ(ParseFixed6Fraction(""), 0);
+	EXPECT_EQ(ParseFixed6Fraction("0"), 0);
+	EXPECT_EQ(ParseFixed6Fraction("00781249"), 0);
+	EXPECT_EQ(ParseFixed6Fraction("0078125"), 1);
+	EXPECT_EQ(ParseFixed6Fraction("015625"), 1);
+	EXPECT_EQ(ParseFixed6Fraction("03125"), 2);
+	EXPECT_EQ(ParseFixed6Fraction("046875"), 3);
+	EXPECT_EQ(ParseFixed6Fraction("0625"), 4);
+	EXPECT_EQ(ParseFixed6Fraction("078125"), 5);
+	EXPECT_EQ(ParseFixed6Fraction("09375"), 6);
+	EXPECT_EQ(ParseFixed6Fraction("109375"), 7);
+	EXPECT_EQ(ParseFixed6Fraction("125"), 8);
+	EXPECT_EQ(ParseFixed6Fraction("140625"), 9);
+	EXPECT_EQ(ParseFixed6Fraction("15625"), 10);
+	EXPECT_EQ(ParseFixed6Fraction("171875"), 11);
+	EXPECT_EQ(ParseFixed6Fraction("1875"), 12);
+	EXPECT_EQ(ParseFixed6Fraction("203125"), 13);
+	EXPECT_EQ(ParseFixed6Fraction("21875"), 14);
+	EXPECT_EQ(ParseFixed6Fraction("234375"), 15);
+	EXPECT_EQ(ParseFixed6Fraction("25"), 16);
+	EXPECT_EQ(ParseFixed6Fraction("265625"), 17);
+	EXPECT_EQ(ParseFixed6Fraction("28125"), 18);
+	EXPECT_EQ(ParseFixed6Fraction("296875"), 19);
+	EXPECT_EQ(ParseFixed6Fraction("3125"), 20);
+	EXPECT_EQ(ParseFixed6Fraction("328125"), 21);
+	EXPECT_EQ(ParseFixed6Fraction("34375"), 22);
+	EXPECT_EQ(ParseFixed6Fraction("359375"), 23);
+	EXPECT_EQ(ParseFixed6Fraction("375"), 24);
+	EXPECT_EQ(ParseFixed6Fraction("390625"), 25);
+	EXPECT_EQ(ParseFixed6Fraction("40625"), 26);
+	EXPECT_EQ(ParseFixed6Fraction("421875"), 27);
+	EXPECT_EQ(ParseFixed6Fraction("4375"), 28);
+	EXPECT_EQ(ParseFixed6Fraction("453125"), 29);
+	EXPECT_EQ(ParseFixed6Fraction("46875"), 30);
+	EXPECT_EQ(ParseFixed6Fraction("484375"), 31);
+	EXPECT_EQ(ParseFixed6Fraction("5"), 32);
+	EXPECT_EQ(ParseFixed6Fraction("515625"), 33);
+	EXPECT_EQ(ParseFixed6Fraction("53125"), 34);
+	EXPECT_EQ(ParseFixed6Fraction("546875"), 35);
+	EXPECT_EQ(ParseFixed6Fraction("5625"), 36);
+	EXPECT_EQ(ParseFixed6Fraction("578125"), 37);
+	EXPECT_EQ(ParseFixed6Fraction("59375"), 38);
+	EXPECT_EQ(ParseFixed6Fraction("609375"), 39);
+	EXPECT_EQ(ParseFixed6Fraction("625"), 40);
+	EXPECT_EQ(ParseFixed6Fraction("640625"), 41);
+	EXPECT_EQ(ParseFixed6Fraction("65625"), 42);
+	EXPECT_EQ(ParseFixed6Fraction("671875"), 43);
+	EXPECT_EQ(ParseFixed6Fraction("6875"), 44);
+	EXPECT_EQ(ParseFixed6Fraction("703125"), 45);
+	EXPECT_EQ(ParseFixed6Fraction("71875"), 46);
+	EXPECT_EQ(ParseFixed6Fraction("734375"), 47);
+	EXPECT_EQ(ParseFixed6Fraction("75"), 48);
+	EXPECT_EQ(ParseFixed6Fraction("765625"), 49);
+	EXPECT_EQ(ParseFixed6Fraction("78125"), 50);
+	EXPECT_EQ(ParseFixed6Fraction("796875"), 51);
+	EXPECT_EQ(ParseFixed6Fraction("8125"), 52);
+	EXPECT_EQ(ParseFixed6Fraction("828125"), 53);
+	EXPECT_EQ(ParseFixed6Fraction("84375"), 54);
+	EXPECT_EQ(ParseFixed6Fraction("859375"), 55);
+	EXPECT_EQ(ParseFixed6Fraction("875"), 56);
+	EXPECT_EQ(ParseFixed6Fraction("890625"), 57);
+	EXPECT_EQ(ParseFixed6Fraction("90625"), 58);
+	EXPECT_EQ(ParseFixed6Fraction("921875"), 59);
+	EXPECT_EQ(ParseFixed6Fraction("9375"), 60);
+	EXPECT_EQ(ParseFixed6Fraction("953125"), 61);
+	EXPECT_EQ(ParseFixed6Fraction("96875"), 62);
+	EXPECT_EQ(ParseFixed6Fraction("984375"), 63);
+	EXPECT_EQ(ParseFixed6Fraction("99218749"), 63);
+	EXPECT_EQ(ParseFixed6Fraction("9921875"), 64);
+}
+
+TEST(ParseInt, ParseFixed6)
+{
+	ParseIntResult<int> result = ParseFixed6<int>("");
+	ASSERT_FALSE(result.has_value()) << "Empty strings are not valid fixed point values.";
+	EXPECT_EQ(result.error(), ParseIntError::ParseError) << "ParseFixed6 should give a ParseError code when parsing an empty string.";
+
+	result = ParseFixed6<int>("abcd");
+	ASSERT_FALSE(result.has_value()) << "Non-numeric strings should not be parsed as a fixed-point value.";
+	EXPECT_EQ(result.error(), ParseIntError::ParseError) << "ParseFixed6 should give a ParseError code when parsing a non-numeric string.";
+
+	result = ParseFixed6<int>(".");
+	ASSERT_FALSE(result.has_value()) << "To match std::from_chars ParseFixed6 should fail to parse a decimal string with no digits.";
+	EXPECT_EQ(result.error(), ParseIntError::ParseError) << "Decimal strings with no digits are reported as ParseError codes.";
+
+	result = ParseFixed6<int>("1.");
+	ASSERT_TRUE(result.has_value()) << "A trailing decimal point is permitted for fixed point values";
+	EXPECT_EQ(result.value(), 1 << 6);
+
+	result = ParseFixed6<int>(".5");
+	ASSERT_TRUE(result.has_value()) << "A fixed point value with no integer part is accepted";
+	EXPECT_EQ(result.value(), 32);
+
+	std::string_view badString { "-." };
+	const char *endOfParse = nullptr;
+	result = ParseFixed6<int>(badString, &endOfParse);
+	ASSERT_FALSE(result.has_value()) << "To match std::from_chars ParseFixed6 should fail to parse a decimal string with no digits, even if it starts with a minus sign.";
+	EXPECT_EQ(result.error(), ParseIntError::ParseError) << "Decimal strings with no digits are reported as ParseError codes.";
+	EXPECT_EQ(endOfParse, badString.data()) << "Failed fixed point parsing should set the end pointer to match the start of the string even though it read multiple characters";
+
+	result = ParseFixed6<int>("-1.");
+	ASSERT_TRUE(result.has_value()) << "negative fixed point values are handled when reading into signed types";
+	EXPECT_EQ(result.value(), -1 << 6);
+
+	result = ParseFixed6<int>("-1.25");
+	ASSERT_TRUE(result.has_value()) << "negative fixed point values are handled when reading into signed types";
+	EXPECT_EQ(result.value(), -((1 << 6) + 16)) << "and the fraction part is combined with the integer part respecting the sign";
+
+	result = ParseFixed6<int>("-.25");
+	ASSERT_TRUE(result.has_value()) << "negative fixed point values with no integer digits are handled when reading into signed types";
+	EXPECT_EQ(result.value(), -16) << "and the fraction part is used respecting the sign";
+
+	result = ParseFixed6<int>("-0.25");
+	ASSERT_TRUE(result.has_value()) << "negative fixed point values with an explicit -0 integer part are handled when reading into signed types";
+	EXPECT_EQ(result.value(), -16) << "and the fraction part is used respecting the sign";
+
+	ParseIntResult<unsigned> unsignedResult = ParseFixed6<unsigned>("-1.");
+	ASSERT_FALSE(unsignedResult.has_value()) << "negative fixed point values are not permitted when reading into unsigned types";
+	EXPECT_EQ(unsignedResult.error(), ParseIntError::ParseError) << "Attempting to parse a negative value into an unsigned type is a ParseError, not an OutOfRange value";
+}
+} // namespace devilution