Refactor iterator parsing to allow more reuse

3 years ago · 5f6e5ae9fb
13 changed files with 882 additions and 435 deletions
--- a/Packaging/resources/assets/txtdata/Readme.md
+++ b/Packaging/resources/assets/txtdata/Readme.md
@ -21,28 +21,49 @@ A formal description of the format using
  The first record in a file SHOULD be used as a header
    Implementations are free to treat records however they want, using
    multiple records as headers or none at all.
-  Files SHOULD contain at least one record.
-    An empty file (zero-length or containing only a UTF8 BOM sequence) is
-    valid and is typically interpreted as a header for a single unnamed column
-    with no records.
+  All files contain at least one record. The last record in a file SHOULD end
+    with a trailing newline, however some spreadsheet applications do not
+    output trailing newlines so we allow flexibility here. This means that an
+    empty file (zero-length or containing only a UTF8 BOM sequence) is valid
+    and is typically interpreted as a header for a single unnamed column with
+    no records.
  Records SHOULD contain the same number of fields.
-  Files SHOULD end with a trailing newline.
-    Note that while a trailing newline is treated as if the file has a final
-    record containing a single empty field, in practice these records SHOULD
-    be discarded. This is mainly to handle the typical output from spreadsheet
-    applications like Excel.
 */
-DXTxtFile ::= utf8bom? header ( recordsep, record )*
+DXTxtFile     ::= utf8bom? ( singleRecord | record+ finalRecord? )

-utf8bom   ::= #xEF #xBB #xBF
+utf8bom       ::= #xEF #xBB #xBF

-header    ::= record
+/*
+  if parsing reaches EOF and the file has no record terminators then the file
+  MUST be treated as containing a single record with no terminator
+*/
+singleRecord  ::= fields
+
+/*
+  if for some reason you want to end a file with a record containing a single
+  empty field then the record MUST end with a valid terminator
+*/
+record        ::= fields recordterm
+
+/*
+  this means that the terminator is only truly optional for files where the
+  final record contains a single field with at least one character, or at least
+  two fields (as there will then be at least one field separator even if both
+  fields are zero-length)
+*/
+finalRecord   ::= nonEmptyField | field fieldsep fields

 /* an empty line is treated as a single empty field */
-record    ::= field ( fieldsep field )*
+fields        ::= field ( fieldsep field )*

 /* fields MAY be zero length */
-field     ::= character*
+field         ::= character*
+
+/*
+  unless the final record only consists of a single field, in which case it
+  MUST contain at least one character
+*/
+nonEmptyField ::= character+

 /*
  Any Char (see https://www.w3.org/TR/xml/#NT-Char) except Tab ("\t", 0x09),
@ -50,13 +71,16 @@ field     ::= character*
  field value. For maximum portability characters in the discouraged ranges
  described in the linked section SHOULD NOT be used
 */
-character ::= [^#x9#xA#xD]
+character     ::= [^#x9#xA#xD]

 /* fields MUST be separated by tabs */
-fieldsep  ::= #x9
+fieldsep      ::= #x9

-/* records MUST be separated by line feeds, a cr/lf pair MAY be used */
-recordsep ::= #xD? #xA
+/*
+  records (other than the final record or the only record in a single-record
+  file) MUST be terminated by line feeds, a cr/lf pair MAY be used
+*/
+recordterm    ::= #xD? #xA
 ```

 ## File Descriptions
--- a/Source/data/file.cpp
+++ b/Source/data/file.cpp
@ -1,6 +1,9 @@
 #include "file.hpp"

+#include <fmt/format.h>
+
 #include "engine/assets.hpp"
+#include "utils/language.h"

 namespace devilution {
 tl::expected<DataFile, DataFile::Error> DataFile::load(std::string_view path)
@ -13,9 +16,103 @@ tl::expected<DataFile, DataFile::Error> DataFile::load(std::string_view path)
 	std::unique_ptr<char[]> data { new char[size] };
 	{
 		AssetHandle handle = OpenAsset(std::move(ref));
-		if (!handle.ok() || !handle.read(data.get(), size))
-			return tl::unexpected { Error::ReadError };
+		if (!handle.ok())
+			return tl::unexpected { Error::OpenFailed };
+		if (size > 0 && !handle.read(data.get(), size))
+			return tl::unexpected { Error::BadRead };
 	}
 	return DataFile { std::move(data), size };
 }
+
+void DataFile::reportFatalError(Error code, std::string_view fileName)
+{
+	switch (code) {
+	case Error::NotFound:
+	case Error::OpenFailed:
+	case Error::BadRead:
+		app_fatal(fmt::format(fmt::runtime(_(
+		                          /* TRANSLATORS: Error message when a data file is missing or corrupt. Arguments are {file name} */
+		                          "Unable to load data from file {0}")),
+		    fileName));
+	case Error::NoContent:
+		app_fatal(fmt::format(fmt::runtime(_(
+		                          /* TRANSLATORS: Error message when a data file is empty or only contains the header row. Arguments are {file name} */
+		                          "{0} is incomplete, please check the file contents.")),
+		    fileName));
+	case Error::NotEnoughColumns:
+		app_fatal(fmt::format(fmt::runtime(_(
+		                          /* TRANSLATORS: Error message when a data file doesn't contain the expected columns. Arguments are {file name} */
+		                          "Your {0} file doesn't have the expected columns, please make sure it matches the documented format.")),
+		    fileName));
+	}
+}
+
+void DataFile::reportFatalFieldError(std::errc code, std::string_view fileName, std::string_view fieldName, const DataFileField &field)
+{
+	switch (code) {
+	case std::errc::invalid_argument:
+		app_fatal(fmt::format(fmt::runtime(_(
+		                          /* TRANSLATORS: Error message when parsing a data file and a text value is encountered when a number is expected. Arguments are {found value}, {column heading}, {file name}, {row/record number}, {column/field number} */
+		                          "Non-numeric value {0} for {1} in {2} at row {3} and column {4}")),
+		    field.currentValue(), fieldName, fileName, field.row(), field.column()));
+	case std::errc::result_out_of_range:
+		app_fatal(fmt::format(fmt::runtime(_(
+		                          /* TRANSLATORS: Error message when parsing a data file and we find a number larger than expected. Arguments are {found value}, {column heading}, {file name}, {row/record number}, {column/field number} */
+		                          "Out of range value {0} for {1} in {2} at row {3} and column {4}")),
+		    field.currentValue(), fieldName, fileName, field.row(), field.column()));
+	default:
+		app_fatal(fmt::format(fmt::runtime(_(
+		                          /* TRANSLATORS: Fallback error message when an error occurs while parsing a data file and we can't determine the cause. Arguments are {file name}, {row/record number}, {column/field number} */
+		                          "Unexpected error while reading {0} at row {1} and column {2}")),
+		    fileName, field.row(), field.column()));
+	}
+}
+
+tl::expected<void, DataFile::Error> DataFile::parseHeader(ColumnDefinition *begin, ColumnDefinition *end, tl::function_ref<tl::expected<uint8_t, ColumnDefinition::Error>(std::string_view)> mapper)
+{
+	std::bitset<std::numeric_limits<uint8_t>::max()> seenColumns;
+	unsigned lastColumn = 0;
+
+	RecordIterator firstRecord { data(), data() + size(), false };
+	for (DataFileField field : *firstRecord) {
+		if (begin == end) {
+			// All key columns have been identified
+			break;
+		}
+
+		auto mapResult = mapper(*field);
+		if (!mapResult.has_value()) {
+			// not a key column
+			continue;
+		}
+
+		uint8_t columnType = mapResult.value();
+		if (seenColumns.test(columnType)) {
+			// Repeated column? unusual, maybe this should be an error
+			continue;
+		}
+		seenColumns.set(columnType);
+
+		unsigned skipColumns = 0;
+		if (field.column() > lastColumn)
+			skipColumns = field.column() - lastColumn - 1;
+		lastColumn = field.column();
+
+		*begin = { columnType, skipColumns };
+		++begin;
+	}
+
+	// Incrementing the iterator causes it to read to the end of the record in case we broke early (maybe there were extra columns)
+	++firstRecord;
+	if (firstRecord == this->end()) {
+		return tl::unexpected { Error::NoContent };
+	}
+
+	body_ = firstRecord.data();
+
+	if (begin != end) {
+		return tl::unexpected { Error::NotEnoughColumns };
+	}
+	return {};
+}
 } // namespace devilution
--- a/Source/data/file.hpp
+++ b/Source/data/file.hpp
@ -4,17 +4,59 @@
 #include <string_view>

 #include <expected.hpp>
+#include <function_ref.hpp>

 #include "iterators.hpp"

 namespace devilution {
+
+struct ColumnDefinition {
+	uint8_t type;
+
+	enum class Error {
+		UnknownColumn
+	};
+
+	// The number of fields between this column and the last one identified as important (or from start of the record if this is the first column we care about)
+	unsigned skipLength = 0;
+
+	ColumnDefinition()
+	    : type(std::numeric_limits<uint8_t>::max())
+	{
+	}
+
+	ColumnDefinition(unsigned type)
+	    : type(type)
+	{
+	}
+
+	ColumnDefinition(unsigned type, unsigned skipLength)
+	    : type(type)
+	    , skipLength(skipLength)
+	{
+	}
+
+	bool operator==(const ColumnDefinition &other) const
+	{
+		return type == other.type && skipLength == other.skipLength;
+	}
+
+	template <typename T>
+	explicit operator T() const
+	{
+		return static_cast<T>(type);
+	}
+};
+
 /**
 * @brief Container for a tab-delimited file following the TSV-like format described in txtdata/Readme.md
 */
 class DataFile {
-	std::unique_ptr<char[]> data_;
+	std::unique_ptr<const char[]> data_;
 	std::string_view content_;

+	const char *body_;
+
 	DataFile() = delete;

 	/**
@ -22,19 +64,24 @@ class DataFile {
 	 * @param data pointer to the raw data backing the view (this container will take ownership to ensure the lifetime of the view)
 	 * @param size total number of bytes/code units including the BOM if present
 	 */
-	DataFile(std::unique_ptr<char[]> &&data, size_t size)
+	DataFile(std::unique_ptr<const char[]> &&data, size_t size)
 	    : data_(std::move(data))
 	    , content_(data_.get(), size)
 	{
 		constexpr std::string_view utf8BOM = "\xef\xbb\xbf";
 		if (this->content_.starts_with(utf8BOM))
 			this->content_.remove_prefix(utf8BOM.size());
+
+		body_ = this->content_.data();
 	}

 public:
 	enum class Error {
 		NotFound,
-		ReadError
+		OpenFailed,
+		BadRead,
+		NoContent,
+		NotEnoughColumns
 	};

 	/**
@ -46,19 +93,52 @@ public:
 	 */
 	static tl::expected<DataFile, Error> load(std::string_view path);

-	[[nodiscard]] RecordsRange records() const
+	static void reportFatalError(Error code, std::string_view fileName);
+	static void reportFatalFieldError(std::errc code, std::string_view fileName, std::string_view fieldName, const DataFileField &field);
+
+	void resetHeader()
 	{
-		return content_;
+		body_ = content_.data();
 	}

-	[[nodiscard]] const char *begin() const
+	/**
+	 * @brief Attempts to parse the first row/record in the file, populating the range defined by [begin, end) using the provided mapping function
+	 *
+	 * This method will also set an internal marker so that future uses of the begin iterator skip the header line.
+	 * @param begin Start of the destination range
+	 * @param end End of the destination range
+	 * @param mapper Function that maps from a string_view to a unique numeric identifier for the column
+	 * @return If the file ends after the header or not enough columns were defined this function returns an error code describing the failure.
+	 */
+	[[nodiscard]] tl::expected<void, DataFile::Error> parseHeader(ColumnDefinition *begin, ColumnDefinition *end, tl::function_ref<tl::expected<uint8_t, ColumnDefinition::Error>(std::string_view)> mapper);
+
+	/**
+	 * @brief Templated version of parseHeader(uint8_t) to allow using directly with enum definitions of columns
+	 * @tparam T An enum or any type that defines operator uint8_t()
+	 * @param begin Start of the destination range
+	 * @param end End of the destination range
+	 * @param typedMapper Function that maps from a string_view to a unique T value
+	 * @return A void success result or an error code as described above
+	 */
+	template <typename T>
+	[[nodiscard]] tl::expected<void, DataFile::Error> parseHeader(ColumnDefinition *begin, ColumnDefinition *end, std::function<tl::expected<T, ColumnDefinition::Error>(std::string_view)> typedMapper)
 	{
-		return content_.data();
+		return parseHeader(begin, end, [typedMapper](std::string_view label) { return typedMapper(label).transform([](T value) { return static_cast<uint8_t>(value); }); });
+	}
+
+	[[nodiscard]] RecordIterator begin() const
+	{
+		return { body_, data() + size(), body_ != data() };
 	}

-	[[nodiscard]] const char *end() const
+	[[nodiscard]] RecordIterator end() const
 	{
-		return content_.data() + content_.size();
+		return {};
+	}
+
+	[[nodiscard]] const char *data() const
+	{
+		return content_.data();
 	}

 	[[nodiscard]] size_t size() const
--- a/Source/data/iterators.hpp
+++ b/Source/data/iterators.hpp
@ -1,234 +1,365 @@
 #pragma once

 #include <charconv>
+#include <ostream>
+
+#include <expected.hpp>

 #include "parser.hpp"

 namespace devilution {
-class FieldsInRecordRange {
+
+class DataFileField {
 	GetFieldResult *state_;
-	const char *const end_;
+	const char *end_;
+	unsigned row_;
+	unsigned column_;

 public:
-	FieldsInRecordRange(GetFieldResult *state, const char *end)
+	DataFileField(GetFieldResult *state, const char *end, unsigned row, unsigned column)
 	    : state_(state)
 	    , end_(end)
+	    , row_(row)
+	    , column_(column)
 	{
 	}

-	class Iterator {
-		GetFieldResult *state_;
-		const char *const end_;
+	/**
+	 * @brief Returns a view of the current field
+	 *
+	 * This method scans the current field if this is the first value access since the last
+	 * advance. If you expect the field to contain a numeric value then calling parseInt first
+	 * is more efficient, but calling the methods in either order is supported.
+	 * @return The current field value (may be an empty string) or a zero length string_view
+	 */
+	[[nodiscard]] std::string_view value()
+	{
+		if (state_->status == GetFieldResult::Status::ReadyToRead) {
+			*state_ = GetNextField(state_->next, end_);
+		}
+		return state_->value;
+	}

-	public:
-		using iterator_category = std::input_iterator_tag;
-		using value_type = std::string_view;
+	/**
+	 * Convenience function to let DataFileField instances be used like other single-value STL containers
+	 */
+	[[nodiscard]] std::string_view operator*()
+	{
+		return this->value();
+	}

-		Iterator()
-		    : state_(nullptr)
-		    , end_(nullptr)
-		{
+	/**
+	 * @brief Attempts to parse the current field as a numeric value using std::from_chars
+	 *
+	 * You can freely interleave this method with calls to operator*. If this is the first value
+	 * access since the last advance this will scan the current field and store it for later
+	 * use with operator* or repeated calls to parseInt (even with different types).
+	 * @tparam T an Integral type supported by std::from_chars
+	 * @param destination value to store the result of successful parsing
+	 * @return the error code from std::from_chars
+	 */
+	template <typename T>
+	[[nodiscard]] std::errc parseInt(T &destination)
+	{
+		std::from_chars_result result {};
+		if (state_->status == GetFieldResult::Status::ReadyToRead) {
+			const char *begin = state_->next;
+			result = std::from_chars(begin, end_, destination);
+			if (result.ec != std::errc::invalid_argument) {
+				// from_chars was able to consume at least one character, consume the rest of the field
+				*state_ = GetNextField(result.ptr, end_);
+				// and prepend what was already parsed
+				state_->value = { begin, (state_->value.data() - begin) + state_->value.size() };
+			}
+		} else {
+			result = std::from_chars(state_->value.data(), end_, destination);
 		}
+		return result.ec;
+	}

-		Iterator(GetFieldResult *state, const char *end)
-		    : state_(state)
-		    , end_(end)
-		{
-			state_->status = GetFieldResult::Status::ReadyToRead;
+	template <typename T>
+	[[nodiscard]] tl::expected<T, std::errc> asInt()
+	{
+		T value = 0;
+		auto parseIntResult = parseInt(value);
+		if (parseIntResult == std::errc()) {
+			return value;
 		}
+		return tl::unexpected { parseIntResult };
+	}

-		[[nodiscard]] bool operator==(const Iterator &rhs) const
-		{
-			if (state_ == nullptr && rhs.state_ == nullptr)
-				return true;
+	/**
+	 * Returns the current row number
+	 */
+	[[nodiscard]] unsigned row() const
+	{
+		return row_;
+	}

-			return state_ != nullptr && rhs.state_ != nullptr && state_->next == rhs.state_->next;
-		}
+	/**
+	 * Returns the current column/field number (from the start of the row/record)
+	 */
+	[[nodiscard]] unsigned column() const
+	{
+		return column_;
+	}

-		[[nodiscard]] bool operator!=(const Iterator &rhs) const
-		{
-			return !(*this == rhs);
-		}
+	/**
+	 * Allows accessing the value of this field in a const context
+	 *
+	 * This requires an actual non-const value access to happen first before it returns
+	 * any useful results, intended for use in error reporting (or test output).
+	 */
+	[[nodiscard]] std::string_view currentValue() const
+	{
+		return state_->value;
+	}
+};

-		/**
-		 * Advances to the next field in the current record
-		 */
-		Iterator &operator++()
-		{
-			return *this += 1;
-		}
+/**
+ * @brief Show the field value along with the row/column number (mainly used in test failure messages)
+ * @param stream output stream, expected to have overloads for unsigned, std::string_view, and char*
+ * @param field Object to display
+ * @return the stream, to allow chaining
+ */
+inline std::ostream &operator<<(std::ostream &stream, const DataFileField &field)
+{
+	return stream << "\"" << field.currentValue() << "\" (at row " << field.row() << ", column " << field.column() << ")";
+}
+
+class FieldIterator {
+	GetFieldResult *state_;
+	const char *const end_;
+	const unsigned row_;
+	unsigned column_ = 0;

-		/**
-		 * @brief Advances by the specified number of fields
-		 *
-		 * if a non-zero increment is provided and advancing the iterator causes it to reach the end
-		 * of the record the iterator is invalidated. It will compare equal to an end iterator and
-		 * cannot be used for value access or any further parsing
-		 * @param increment how many fields to advance (can be 0)
-		 * @return self-reference
-		 */
-		Iterator &operator+=(unsigned increment)
-		{
-			if (state_->status == GetFieldResult::Status::ReadyToRead)
-				*state_ = DiscardMultipleFields(state_->next, end_, increment);
-
-			if (state_->endOfRecord()) {
-				state_ = nullptr;
-			} else {
-				// We use Status::ReadyToRead as a marker so we only read the next value on the next call to operator*
-				// this allows consumers to read from the input stream without using operator* if they want
-				state_->status = GetFieldResult::Status::ReadyToRead;
-			}
-			return *this;
-		}
+public:
+	using iterator_category = std::input_iterator_tag;
+	using value_type = DataFileField;

-		/**
-		 * @brief Returns a view of the current field
-		 *
-		 * This method scans the current field if this is the first value access since the last
-		 * advance. You can repeatedly call operator* safely but you must not try to use parseInt
-		 * after calling this method. If you calling parseInt and get an invalid_argument result
-		 * back you can use this method to get the actual field value, but if you received any
-		 * other result then parseInt has consumed the field and this method is no longer reliable.
-		 * @return The current field value (may be an empty string) or a zero length string_view
-		 */
-		[[nodiscard]] value_type operator*()
-		{
-			if (state_->status == GetFieldResult::Status::ReadyToRead) {
-				*state_ = GetNextField(state_->next, end_);
-			}
-			return state_->value;
-		}
+	FieldIterator()
+	    : state_(nullptr)
+	    , end_(nullptr)
+	    , row_(0)
+	{
+	}

-		/**
-		 * @brief Attempts to parse a field as a numeric value using std::from_chars
-		 *
-		 * This method is NOT repeatable. In addition to the behaviour of std::from_chars please
-		 * heed the following warnings. If the field contains a value larger than will fit into the
-		 * given type parsing will fail with std::errc::out_of_range and the field will be
-		 * discarded. If a field starts with some digits then is followed by other characters the
-		 * remainder will also be discarded. You can only get a useful value out of operator* if
-		 * the result code is std::errc::invalid_argument.
-		 * @tparam T an Integral type supported by std::from_chars
-		 * @param destination value to store the result of successful parsing
-		 * @return the error code from std::from_chars
-		 */
-		template <typename T>
-		[[nodiscard]] std::errc parseInt(T &destination)
-		{
-			auto result = std::from_chars(state_->next, end_, destination);
-			if (result.ec != std::errc::invalid_argument) {
-				// from_chars was able to consume at least one character, so discard the rest of the field
-				*state_ = DiscardField(result.ptr, end_);
-			}
-			return result.ec;
+	FieldIterator(GetFieldResult *state, const char *end, unsigned row)
+	    : state_(state)
+	    , end_(end)
+	    , row_(row)
+	{
+		state_->status = GetFieldResult::Status::ReadyToRead;
+	}
+
+	[[nodiscard]] bool operator==(const FieldIterator &rhs) const
+	{
+		if (state_ == nullptr && rhs.state_ == nullptr)
+			return true;
+
+		return state_ != nullptr && rhs.state_ != nullptr && state_->next == rhs.state_->next;
+	}
+
+	[[nodiscard]] bool operator!=(const FieldIterator &rhs) const
+	{
+		return !(*this == rhs);
+	}
+
+	/**
+	 * Advances to the next field in the current record
+	 */
+	FieldIterator &operator++()
+	{
+		return *this += 1;
+	}
+
+	/**
+	 * @brief Advances by the specified number of fields
+	 *
+	 * if a non-zero increment is provided and advancing the iterator causes it to reach the end
+	 * of the record the iterator is invalidated. It will compare equal to an end iterator and
+	 * cannot be used for value access or any further parsing
+	 * @param increment how many fields to advance (can be 0)
+	 * @return self-reference
+	 */
+	FieldIterator &operator+=(unsigned increment)
+	{
+		if (increment == 0)
+			return *this;
+
+		if (state_->status == GetFieldResult::Status::ReadyToRead) {
+			// We never read the value and no longer need it, discard it so that we end up
+			//  advancing past the field delimiter (as if a value access had happened)
+			*state_ = DiscardField(state_->next, end_);
 		}

-		/**
-		 * @brief Checks if this field is the last field in a file, not just in the record
-		 *
-		 * If you call this method before calling parseInt or operator* then this will trigger a
-		 * read, meaning you can no longer use parseInt to try extract a numeric value. You
-		 * probably want to use this after calling parseInt.
-		 * @return true if this field is the last field in the file/RecordsRange
-		 */
-		[[nodiscard]] bool endOfFile() const
-		{
-			if (state_->status == GetFieldResult::Status::ReadyToRead) {
-				*state_ = GetNextField(state_->next, end_);
-			}
-			return state_->endOfFile();
+		if (state_->endOfRecord()) {
+			state_ = nullptr;
+		} else {
+			unsigned fieldsSkipped = 0;
+			// By this point we've already advanced past the end of this field (either because the
+			//  last value access found the end of the field by necessity or we discarded it a few
+			//  lines up), so we only need to advance further if an increment greater than 1 was
+			//  provided.
+			*state_ = DiscardMultipleFields(state_->next, end_, increment - 1, &fieldsSkipped);
+			// As we've consumed the current field by this point we need to increment the internal
+			//  column counter one extra time so we have an accurate value.
+			column_ += fieldsSkipped + 1;
+			// We use Status::ReadyToRead as a marker so we only read the next value on the next
+			//  value access, this allows consumers to choose the most efficient method (e.g. if
+			//  they want the value as an int) or even repeated advances without using a value.
+			state_->status = GetFieldResult::Status::ReadyToRead;
 		}
-	};
+		return *this;
+	}

-	[[nodiscard]] Iterator begin() const
+	/**
+	 * @brief Returns a view of the current field
+	 *
+	 * The returned value is a thin wrapper over the current state of this iterator (or last
+	 * successful read if incrementing this iterator would result in it reaching the end state).
+	 */
+	[[nodiscard]] value_type operator*()
 	{
-		return { state_, end_ };
+		return { state_, end_, row_, column_ };
 	}

-	[[nodiscard]] Iterator end() const
+	/**
+	 * @brief Returns the current row number
+	 */
+	[[nodiscard]] unsigned row() const
 	{
-		return {};
+		return row_;
+	}
+	/**
+	 * @brief Returns the current column/field number (from the start of the row/record)
+	 */
+	[[nodiscard]] unsigned column() const
+	{
+		return column_;
 	}
 };

-class RecordsRange {
-	const char *const begin_;
+class DataFileRecord {
+	GetFieldResult *state_;
 	const char *const end_;
+	const unsigned row_;

 public:
-	RecordsRange(std::string_view characters)
-	    : begin_(characters.data())
-	    , end_(characters.data() + characters.size())
+	DataFileRecord(GetFieldResult *state, const char *end, unsigned row)
+	    : state_(state)
+	    , end_(end)
+	    , row_(row)
 	{
 	}

-	class Iterator {
-		GetFieldResult state_;
-		const char *const end_;
+	[[nodiscard]] FieldIterator begin()
+	{
+		return { state_, end_, row_ };
+	}

-	public:
-		using iterator_category = std::forward_iterator_tag;
-		using value_type = FieldsInRecordRange;
+	[[nodiscard]] FieldIterator end() const
+	{
+		return {};
+	}

-		Iterator()
-		    : state_(nullptr, GetFieldResult::Status::EndOfFile)
-		    , end_(nullptr)
-		{
-		}
+	[[nodiscard]] unsigned row() const
+	{
+		return row_;
+	}
+};

-		Iterator(const char *begin, const char *end)
-		    : state_(begin)
-		    , end_(end)
-		{
-		}
+class RecordIterator {
+	GetFieldResult state_;
+	const char *const end_;
+	unsigned row_ = 0;

-		[[nodiscard]] bool operator==(const Iterator &rhs) const
-		{
-			return state_.next == rhs.state_.next;
-		}
+public:
+	using iterator_category = std::forward_iterator_tag;
+	using value_type = DataFileRecord;

-		[[nodiscard]] bool operator!=(const Iterator &rhs) const
-		{
-			return !(*this == rhs);
-		}
+	RecordIterator()
+	    : state_(nullptr, GetFieldResult::Status::EndOfFile)
+	    , end_(nullptr)
+	{
+	}

-		Iterator &operator++()
-		{
-			return *this += 1;
-		}
+	RecordIterator(const char *begin, const char *end, bool skippedHeader)
+	    : state_(begin)
+	    , end_(end)
+	    , row_(skippedHeader ? 1 : 0)
+	{
+	}

-		Iterator &operator+=(unsigned increment)
-		{
-			if (!state_.endOfRecord())
-				state_ = DiscardRemainingFields(state_.next, end_);
-
-			if (state_.endOfFile()) {
-				state_.next = nullptr;
-			} else {
-				if (increment > 0)
-					state_ = DiscardMultipleRecords(state_.next, end_, increment - 1);
-				// We use Status::ReadyToRead as a marker in case the Field iterator is never
-				//  used, so the next call to operator+= will advance past the current record
-				state_.status = GetFieldResult::Status::ReadyToRead;
-			}
+	[[nodiscard]] bool operator==(const RecordIterator &rhs) const
+	{
+		return state_.next == rhs.state_.next;
+	}
+
+	[[nodiscard]] bool operator!=(const RecordIterator &rhs) const
+	{
+		return !(*this == rhs);
+	}
+
+	RecordIterator &operator++()
+	{
+		return *this += 1;
+	}
+
+	RecordIterator &operator+=(unsigned increment)
+	{
+		if (increment == 0)
 			return *this;
+
+		if (!state_.endOfRecord()) {
+			// The field iterator either hasn't been used or hasn't consumed the entire record
+			state_ = DiscardRemainingFields(state_.next, end_);
 		}

-		[[nodiscard]] value_type operator*()
-		{
-			return { &state_, end_ };
+		if (state_.endOfFile()) {
+			state_.next = nullptr;
+		} else {
+			unsigned recordsSkipped = 0;
+			// By this point we've already advanced past the end of this record (either because the
+			//  last value access found the end of the record by necessity or we discarded any
+			//  leftovers a few lines up), so we only need to advance further if an increment
+			//  greater than 1 was provided.
+			state_ = DiscardMultipleRecords(state_.next, end_, increment - 1, &recordsSkipped);
+			// As we've consumed the current record by this point we need to increment the internal
+			//  row counter one extra time so we have an accurate value.
+			row_ += recordsSkipped + 1;
+			// We use Status::ReadyToRead as a marker in case the DataFileField iterator is never
+			//  used, so the next call to operator+= will advance past the current record
+			state_.status = GetFieldResult::Status::ReadyToRead;
 		}
-	};
+		return *this;
+	}

-	[[nodiscard]] Iterator begin() const
+	[[nodiscard]] DataFileRecord operator*()
 	{
-		return { begin_, end_ };
+		return { &state_, end_, row_ };
 	}

-	[[nodiscard]] Iterator end() const
+	/**
+	 * @brief Exposes the current location of this input iterator.
+	 *
+	 * This is only expected to be used internally so the DataFile instance knows where the header
+	 * ends and the body begins. You probably don't want to use this directly.
+	 */
+	[[nodiscard]] const char *data() const
 	{
-		return {};
+		return state_.next;
+	}
+
+	/**
+	 * @brief Returns the current row/record number (from the start of the file)
+	 *
+	 * The header row is always considered row 0, however if you've called DataFile.parseHeader()
+	 * before calling DataFile.begin() then you'll get row 1 as the first record of the range.
+	 */
+	[[nodiscard]] unsigned row() const
+	{
+		return row_;
 	}
 };
 } // namespace devilution
--- a/Source/data/parser.cpp
+++ b/Source/data/parser.cpp
@ -1,10 +1,10 @@
 #include "parser.hpp"

 namespace devilution {
-GetFieldResult HandleRecordSeparator(const char *begin, const char *end)
+GetFieldResult HandleRecordTerminator(const char *begin, const char *end)
 {
 	if (begin == end) {
-		return { end, GetFieldResult::Status::EndOfFile };
+		return { end, GetFieldResult::Status::NoFinalTerminator };
 	}

 	if (*begin == '\r') {
@ -15,36 +15,49 @@ GetFieldResult HandleRecordSeparator(const char *begin, const char *end)
 		// carriage returns should be followed by a newline, so let's let the following checks handle it
 	}
 	if (*begin == '\n') {
-		return { begin + 1, GetFieldResult::Status::EndOfRecord };
+		++begin;
+		if (begin == end) {
+			return { end, GetFieldResult::Status::EndOfFile };
+		}
+
+		return { begin, GetFieldResult::Status::EndOfRecord };
 	}

-	return { begin, GetFieldResult::Status::BadRecordSeparator };
+	return { begin, GetFieldResult::Status::BadRecordTerminator };
 }

-GetFieldResult DiscardMultipleFields(const char *begin, const char *end, unsigned skipLength)
+GetFieldResult DiscardMultipleFields(const char *begin, const char *end, unsigned skipLength, unsigned *fieldsSkipped)
 {
 	GetFieldResult result { begin };
-	while (skipLength > 0) {
+	unsigned skipCount = 0;
+	while (skipCount < skipLength) {
+		++skipCount;
 		result = DiscardField(result.next, end);
 		if (result.endOfRecord()) {
-			// Found the end of record early, we can reuse the error code so just return it
-			return result;
+			// Found the end of record early
+			break;
 		}
-		--skipLength;
+	}
+	if (fieldsSkipped != nullptr) {
+		*fieldsSkipped = skipCount;
 	}
 	return result;
 }

-GetFieldResult DiscardMultipleRecords(const char *begin, const char *end, unsigned skipLength)
+GetFieldResult DiscardMultipleRecords(const char *begin, const char *end, unsigned skipLength, unsigned *recordsSkipped)
 {
 	GetFieldResult result { begin };
-	while (skipLength > 0) {
+	unsigned skipCount = 0;
+	while (skipCount < skipLength) {
+		++skipCount;
 		result = DiscardRemainingFields(result.next, end);
 		if (result.endOfFile()) {
-			// Found the end of file early, we can reuse the error code so just return it
-			return result;
+			// Found the end of file early
+			break;
 		}
-		--skipLength;
+	}
+	if (recordsSkipped != nullptr) {
+		*recordsSkipped = skipCount;
 	}
 	return result;
 }
--- a/Source/data/parser.hpp
+++ b/Source/data/parser.hpp
@ -16,9 +16,10 @@ struct GetFieldResult {
 		ReadyToRead,
 		EndOfField,
 		EndOfRecord,
-		BadRecordSeparator,
+		BadRecordTerminator,
 		EndOfFile,
-		FileTruncated
+		FileTruncated,
+		NoFinalTerminator
 	} status
 	    = Status::ReadyToRead;

@ -53,7 +54,7 @@ struct GetFieldResult {
 	 */
 	[[nodiscard]] bool endOfRecord() const
 	{
-		return IsAnyOf(status, Status::EndOfRecord, Status::BadRecordSeparator) || endOfFile();
+		return IsAnyOf(status, Status::EndOfRecord, Status::BadRecordTerminator) || endOfFile();
 	}

 	/**
@ -61,16 +62,16 @@ struct GetFieldResult {
 	 */
 	[[nodiscard]] bool endOfFile() const
 	{
-		return IsAnyOf(status, Status::EndOfFile, Status::FileTruncated);
+		return IsAnyOf(status, Status::EndOfFile, Status::FileTruncated, Status::NoFinalTerminator);
 	}
 };

 /**
- * @brief Checks if this character is potentially part of a record separator sequence
+ * @brief Checks if this character is potentially part of a record terminator sequence
 * @param c character to check
- * @return true if it's a record separator (lf) or carriage return (cr, accepted as the start of a crlf pair)
+ * @return true if it's a record terminator (lf) or carriage return (cr, accepted as the start of a crlf pair)
 */
-constexpr bool IsRecordSeparator(char c)
+constexpr bool IsRecordTerminator(char c)
 {
 	return c == '\r' || c == '\n';
 }
@ -78,34 +79,33 @@ constexpr bool IsRecordSeparator(char c)
 /**
 * @brief Checks if this character is a field separator
 *
- * Note that record separator sequences also act to delimit fields
+ * Note that record terminator sequences also act to separate fields
 * @param c character to check
- * @return true if it's a field separator (tab) or part of a record separator sequence
+ * @return true if it's a field separator (tab) or part of a record terminator sequence
 */
 constexpr bool IsFieldSeparator(char c)
 {
-	return c == '\t' || IsRecordSeparator(c);
+	return c == '\t' || IsRecordTerminator(c);
 }

 /**
- * @brief Consumes the current record separator sequence and returns a result describing whether at least one more record is available.
+ * @brief Consumes the current record terminator sequence and returns a result describing whether at least one more record is available.
 *
- * Assumes that begin points to a record separator (cr, lf, or EOF). If we reached the end of the
- * stream (endOfField == end) then `status` will compare equal to
- * GetFieldResult::Status::EndOfFile. Otherwise if a valid record separator sequence was found (lf
- * or crlf) then the `status` equals GetFieldResult::Status::EndOfRecord. If we found a carriage
- * return (cr) character just before the end of the stream then it's likely the file was
- * truncated, `status` will contain GetFieldResult::Status::FileTruncated. If we found a carriage
- * return that is followed by any character other than a newline (lf), or `begin` didn't point to
- * a record separator, then `status` will be GetFieldResult::Status::BadRecordSeparator and the
- * file has probably been mangled.
- * @param begin start of a stream (expected to be pointing to a record separator)
+ * Assumes that begin points to a record terminator (lf or crlf) or the last read reached the end
+ * of the final record (in which case the terminator is optional). If we reached the end of the
+ * stream (`begin == end`) then `status` will compare equal to Status::NoFinalTerminator. If we
+ * found a carriage return (cr) character just before the end of the stream then it's likely the
+ * file was truncated, `status` will contain Status::FileTruncated. If we found a carriage return
+ * that is followed by any character other than a newline (lf), or `begin` didn't point to a record
+ * terminator, then `status` will be Status::BadRecordTerminator and the file has probably been
+ * mangled. Otherwise `status` will be Status::EndOfRecord at least one more record is available or
+ * Status::EndOfFile if this was the end of the last record.
+ * @param begin start of a stream (expected to be pointing to a record terminator)
 * @param end one past the last character of the stream
 * @return a struct containing a pointer to the start of the next record (if more characters are
- *          available) or a copy of end, and optionally a status code describing what type of
- *          separator was found.
+ *          available) or a copy of end, and a status code describing the terminator.
 */
-GetFieldResult HandleRecordSeparator(const char *begin, const char *end);
+GetFieldResult HandleRecordTerminator(const char *begin, const char *end);

 /**
 * @brief Consumes the current field (or record) separator and returns a result describing whether at least one more field is available.
@ -113,7 +113,7 @@ GetFieldResult HandleRecordSeparator(const char *begin, const char *end);
 * Assumes that begin points to a field or record separator (tab, cr, lf, or EOF). If there are
 * more fields in the current record then the return value will have a pointer to the start of the
 * next field and `status` will be GetFieldResult::Status::EndOfField. Otherwise refer to
- * HandleRecordSeparator for a description of the different codes.
+ * HandleRecordTerminator for a description of the different codes.
 * @param begin start of a stream (expected to be pointing to a record separator)
 * @param end one past the last character of the stream
 * @return a struct containing a pointer to the start of the next field (if more characters are
@ -126,7 +126,7 @@ inline GetFieldResult HandleFieldSeparator(const char *begin, const char *end)
 		return { begin + 1, GetFieldResult::Status::EndOfField };
 	}

-	return HandleRecordSeparator(begin, end);
+	return HandleRecordTerminator(begin, end);
 }

 /**
@ -148,20 +148,24 @@ inline GetFieldResult DiscardField(const char *begin, const char *end)
 * @param begin first character of the stream
 * @param end one past the last character in the stream
 * @param skipLength how many fields to skip (specifying 0 will cause the method to return without advancing)
+ * @param fieldsSkipped optional output parameter, will be filled with a count of how many fields
+ *                       were skipped in case the end of record was reached early
 * @return a GetFieldResult struct containing an empty value, a pointer to the start of the next
 *          field/record, and a status code describing what type of separator was found
 */
-GetFieldResult DiscardMultipleFields(const char *begin, const char *end, unsigned skipLength);
+GetFieldResult DiscardMultipleFields(const char *begin, const char *end, unsigned skipLength, unsigned *fieldsSkipped = nullptr);

 /**
 * @brief Advances by the specified number of records or until the end of the file, whichever occurs first
 * @param begin first character of the stream
 * @param end one past the last character in the stream
 * @param skipLength how many records to skip (specifying 0 will cause the method to return without advancing)
+ * @param recordsSkipped optional output parameter, will be filled with a count of how many records
+ *                        were skipped in case the end of file was reached early
 * @return a GetFieldResult struct containing an empty value, a pointer to the start of the next
 *          record, and a status code describing what type of separator was found
 */
-GetFieldResult DiscardMultipleRecords(const char *begin, const char *end, unsigned skipLength);
+GetFieldResult DiscardMultipleRecords(const char *begin, const char *end, unsigned skipLength, unsigned *recordsSkipped = nullptr);

 /**
 * @brief Discard any remaining fields in the current record
@ -172,9 +176,9 @@ GetFieldResult DiscardMultipleRecords(const char *begin, const char *end, unsign
 */
 inline GetFieldResult DiscardRemainingFields(const char *begin, const char *end)
 {
-	const char *nextSeparator = std::find_if(begin, end, IsRecordSeparator);
+	const char *nextSeparator = std::find_if(begin, end, IsRecordTerminator);

-	return HandleRecordSeparator(nextSeparator, end);
+	return HandleRecordTerminator(nextSeparator, end);
 }

 /**
@ -185,7 +189,7 @@ inline GetFieldResult DiscardRemainingFields(const char *begin, const char *end)
 * `next` member of the returned type will be either the start of the next field/record or `end`.
 * The `status` member contains additional information to distinguish between the end of a field
 * and the end of a record. If there are additional fields in this record then `status` will be
- * GetFieldResult::Status::EndOfField, otherwise refer to HandleRecordSeparator for the meanings
+ * GetFieldResult::Status::EndOfField, otherwise refer to HandleRecordTerminator for the meanings
 * associated with the remaining codes.
 * @param begin first character of the stream
 * @param end one past the last character in the stream
--- a/Source/playerdat.cpp
+++ b/Source/playerdat.cpp
@ -45,7 +45,7 @@ public:
 	[[nodiscard]] uint32_t getThresholdForLevel(unsigned level) const
 	{
 		if (level > 0)
-			return levelThresholds[std::min<size_t>(level - 1, getMaxLevel())];
+			return levelThresholds[std::min<unsigned>(level - 1, getMaxLevel())];

 		return 0;
 	}
@ -64,167 +64,76 @@ public:
 	}
 } ExperienceData;

-struct ExperienceColumnDefinition {
-	enum class ColumnType {
-		Level,
-		Experience,
-		LAST = Experience
-	} type;
-
-	enum class Error {
-		UnknownColumn
-	};
-
-	// The number of fields between this column and the last one identified as important (or from start of the record if this is the first column we care about)
-	unsigned skipLength;
-
-	static tl::expected<ColumnType, Error> mapNameToType(std::string_view name)
-	{
-		if (name == "Level") {
-			return ColumnType::Level;
-		}
-		if (name == "Experience") {
-			return ColumnType::Experience;
-		}
-		return tl::unexpected { Error::UnknownColumn };
-	}
-
-	ExperienceColumnDefinition() = delete;
-
-	ExperienceColumnDefinition(const ColumnType &type)
-	    : type(type)
-	    , skipLength(0)
-	{
-	}
+enum class ExperienceColumn {
+	Level,
+	Experience,
+	LAST = Experience
+};

-	ExperienceColumnDefinition(const ColumnType &type, unsigned skipLength)
-	    : type(type)
-	    , skipLength(skipLength)
-	{
+tl::expected<ExperienceColumn, ColumnDefinition::Error> mapExperienceColumnFromName(std::string_view name)
+{
+	if (name == "Level") {
+		return ExperienceColumn::Level;
 	}
-
-	bool operator==(const ExperienceColumnDefinition &other) const
-	{
-		return type == other.type && skipLength == other.skipLength;
+	if (name == "Experience") {
+		return ExperienceColumn::Experience;
 	}
-};
+	return tl::unexpected { ColumnDefinition::Error::UnknownColumn };
+}

 void ReloadExperienceData()
 {
 	constexpr std::string_view filename = "txtdata\\Experience.tsv";
 	auto dataFileResult = DataFile::load(filename);
 	if (!dataFileResult.has_value()) {
-		app_fatal(fmt::format(fmt::runtime(_(/* TRANSLATORS: Error message when a data file is missing or corrupt */ "Unable to load player experience data from file {:s}")), filename));
+		DataFile::reportFatalError(dataFileResult.error(), filename);
 	}
-	const DataFile &dataFile = dataFileResult.value();
-
-	constexpr unsigned ExpectedColumnCount = enum_size<ExperienceColumnDefinition::ColumnType>::value;
-
-	StaticVector<ExperienceColumnDefinition, ExpectedColumnCount> columns;
-	std::bitset<ExpectedColumnCount> seenColumns;
-
-	unsigned currentColumn = 0;
-	unsigned lastColumn = 0;
-	RecordsRange records = dataFile.records();
-	auto record = records.begin();
-	auto endRecord = records.end();
-	for (std::string_view field : *record) {
-		if (columns.size() >= ExpectedColumnCount) {
-			// All key columns have been identified
-			break;
-		}
+	DataFile &dataFile = dataFileResult.value();

-		auto columnType = ExperienceColumnDefinition::mapNameToType(field);
-		if (columnType.has_value() && !seenColumns.test(static_cast<size_t>(columnType.value()))) {
-			seenColumns.set(static_cast<size_t>(columnType.value()));
-			unsigned skipColumns = 0;
-			if (currentColumn > lastColumn)
-				skipColumns = currentColumn - lastColumn - 1;
-			columns.emplace_back(columnType.value(), skipColumns);
-			lastColumn = currentColumn;
-		}
-		++currentColumn;
-	}
-	++record;
-
-	if (record == endRecord) {
-		// The data file ended after the header, since there's no data we can't proceed
-		app_fatal(fmt::format(fmt::runtime(_(
-		                          /* TRANSLATORS: Error message when a data file is empty or only contains the header row */
-		                          "{:s} is incomplete, please check the file contents.")),
-		    filename));
-	}
+	constexpr unsigned ExpectedColumnCount = enum_size<ExperienceColumn>::value;
+
+	std::array<ColumnDefinition, ExpectedColumnCount> columns;
+	auto parseHeaderResult = dataFile.parseHeader<ExperienceColumn>(columns.data(), columns.data() + columns.size(), mapExperienceColumnFromName);

-	if (columns.size() < ExpectedColumnCount) {
-		// The data file doesn't have the required headers. Though we could potentially just allocate
-		//  missing columns in the default order that's likely to lead to further corruption in saves
-		app_fatal(fmt::format(fmt::runtime(_(
-		                          /* TRANSLATORS: Error message when a data file doesn't contain the expected columns */
-		                          "Your {:s} file doesn't have the expected columns, please make sure it matches the documented format.")),
-		    filename));
+	if (!parseHeaderResult.has_value()) {
+		DataFile::reportFatalError(parseHeaderResult.error(), filename);
 	}

 	ExperienceData.clear();
-	unsigned row = 1; // current line/record number for error messages
-	while (record != endRecord) {
+	for (DataFileRecord record : dataFile) {
 		uint8_t level = 0;
 		uint32_t experience = 0;
 		bool skipRecord = false;

-		FieldsInRecordRange fields = *record;
-		auto field = fields.begin();
-		auto endField = fields.end();
-		unsigned col = 0; // current field number for error messages
+		FieldIterator fieldIt = record.begin();
+		FieldIterator endField = record.end();
 		for (auto &column : columns) {
-			col += column.skipLength;
-			field += column.skipLength;
+			fieldIt += column.skipLength;

-			if (field == endField) {
-				// reached the end of record early, this could be from a trailing newline so don't throw an error
-				skipRecord = true;
-				break;
+			if (fieldIt == endField) {
+				DataFile::reportFatalError(DataFile::Error::NotEnoughColumns, filename);
 			}

-			switch (column.type) {
-			case ExperienceColumnDefinition::ColumnType::Level: {
+			DataFileField field = *fieldIt;
+
+			switch (static_cast<ExperienceColumn>(column)) {
+			case ExperienceColumn::Level: {
 				auto parseIntResult = field.parseInt(level);
-				if (parseIntResult == std::errc::invalid_argument) {
-					// not a signless numeric value, is this a trailing newline or the MaxLevel line?
-					if (field.endOfFile() || *field == "MaxLevel") {
+
+				if (parseIntResult != std::errc()) {
+					if (*field == "MaxLevel") {
 						skipRecord = true;
 					} else {
-						app_fatal(fmt::format(fmt::runtime(_(
-						                          /* TRANSLATORS: Error message when parsing the Experience data file and a text value is encountered in the Level column */
-						                          "Expected a positive numeric value for Level in {:s}, found {:s} at row {:d} and column {:d}")),
-						    filename, *field, row, col));
+						DataFile::reportFatalFieldError(parseIntResult, filename, "Level", field);
 					}
-				} else if (parseIntResult == std::errc::result_out_of_range) {
-					// a level greater than 255 was provided
-					app_fatal(fmt::format(fmt::runtime(_(
-					                          /* TRANSLATORS: Error message when parsing the Experience data file and a text value is encountered in the Level column */
-					                          "Levels above {:d} are not supported, out of range value in {:s} at row {:d} and column {:d}")),
-					    std::numeric_limits<uint8_t>::max(), filename, row, col));
 				}
 			} break;

-			case ExperienceColumnDefinition::ColumnType::Experience: {
+			case ExperienceColumn::Experience: {
 				auto parseIntResult = field.parseInt(experience);
-				if (parseIntResult == std::errc::invalid_argument) {
-					// not a signless numeric value, is this a trailing newline?
-					if (field.endOfFile()) {
-						skipRecord = true;
-					} else {
-						app_fatal(fmt::format(fmt::runtime(_(
-						                          /* TRANSLATORS: Error message when parsing the Experience data file and a text value is encountered in the Experience column */
-						                          "Expected a positive numeric value for Experience in {:s}, found {:s} at row {:d} and column {:d}")),
-						    filename, *field, row, col));
-					}
-				} else if (parseIntResult == std::errc::result_out_of_range) {
-					// an experience threshold greater than 2^32-1 was provided
-					app_fatal(fmt::format(fmt::runtime(_(
-					                          /* TRANSLATORS: Error message when parsing the Experience data file and a text value is encountered in the Experience column */
-					                          "Experience thresholds above {:d} are not supported, out of range value in {:s} at row {:d} and column {:d}")),
-					    std::numeric_limits<uint32_t>::max(), filename, row, col));
+
+				if (parseIntResult != std::errc()) {
+					DataFile::reportFatalFieldError(parseIntResult, filename, "Experience", field);
 				}
 			} break;

@ -235,14 +144,11 @@ void ReloadExperienceData()
 			if (skipRecord)
 				break;

-			++field;
-			++col;
+			++fieldIt;
 		}

 		if (!skipRecord)
 			ExperienceData.setThresholdForLevel(level, experience);
-		++record;
-		++row;
 	}
 }

--- a/test/Fixtures.cmake
+++ b/test/Fixtures.cmake
@ -86,7 +86,11 @@ set(devilutionx_fixtures
  timedemo/WarriorLevel1to2/spawn_0.sv
  txtdata/cr.tsv
  txtdata/crlf.tsv
+  txtdata/empty.tsv
+  txtdata/empty_with_utf8_bom.tsv
  txtdata/lf.tsv
+  txtdata/lf_no_trail.tsv
+  txtdata/sample.tsv
  txtdata/utf8_bom.tsv
 )

--- a/test/data_file_test.cpp
+++ b/test/data_file_test.cpp
@ -22,14 +22,15 @@ void TestFileContents(
    GetFieldResult::Status expectedEndOfRecordStatus = GetFieldResult::Status::EndOfRecord,
    GetFieldResult::Status expectedEndOfFileStatus = GetFieldResult::Status::EndOfFile)
 {
-	GetFieldResult result { dataFile.begin() };
+	GetFieldResult result { dataFile.data() };
+	const char *end = dataFile.data() + dataFile.size();
 	unsigned row = 0;
 	do {
 		ASSERT_LT(row, expectedContent.size()) << "Too many records";
 		unsigned col = 0;
 		do {
 			ASSERT_LT(col, expectedContent[row].size()) << "Too many fields in record " << row;
-			result = GetNextField(result.next, dataFile.end());
+			result = GetNextField(result.next, end);
 			EXPECT_EQ(result.value, expectedContent[row][col]) << "Unexpected value at record " << row << " and field " << col;
 			col++;
 		} while (!result.endOfRecord());
@ -65,10 +66,9 @@ TEST(DataFileTest, LoadCRFile)
 		{ "", "2", "3" },
 		{ "1", "2", "" },
 		{ "1", "", "3" }
-		// because the file does not end with a newline parsing stops at the previous record
 	};

-	TestFileContents(dataFile, expectedFields, GetFieldResult::Status::BadRecordSeparator, GetFieldResult::Status::FileTruncated);
+	TestFileContents(dataFile, expectedFields, GetFieldResult::Status::BadRecordTerminator, GetFieldResult::Status::FileTruncated);
 }

 TEST(DataFileTest, LoadWindowsFile)
@ -84,7 +84,6 @@ TEST(DataFileTest, LoadWindowsFile)
 		{ "", "2", "3" },
 		{ "1", "2", "" },
 		{ "1", "", "3" },
-		{ "" } // file ends with a newline, parsing returns a single empty field
 	};

 	TestFileContents(dataFile, expectedFields);
@ -103,12 +102,77 @@ TEST(DataFileTest, LoadTypicalFile)
 		{ "", "2", "3" },
 		{ "1", "2", "" },
 		{ "1", "", "3" },
-		{ "" } // file ends with a newline, parsing returns a single empty field
 	};

 	TestFileContents(dataFile, expectedFields);
 }

+TEST(DataFileTest, LoadFileWithNoTrailingNewline)
+{
+	auto result = LoadDataFile("txtdata\\lf_no_trail.tsv");
+	ASSERT_TRUE(result.has_value()) << "Unable to load lf_no_trail.tsv";
+
+	DataFile &dataFile = result.value();
+	EXPECT_EQ(dataFile.size(), 32) << "File size should be reported in code units";
+
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "Test", "Empty", "Values" },
+		{ "", "2", "3" },
+		{ "1", "2", "" },
+		{ "1", "", "3" },
+	};
+
+	TestFileContents(dataFile, expectedFields, GetFieldResult::Status::EndOfRecord, GetFieldResult::Status::NoFinalTerminator);
+}
+
+std::string_view mapError(DataFile::Error error)
+{
+	switch (error) {
+	case DataFile::Error::NotFound:
+		return "not found";
+	case DataFile::Error::OpenFailed:
+		return "cannot open";
+	case DataFile::Error::BadRead:
+		return "cannot read contents";
+	default:
+		return "unexpected error";
+	}
+}
+
+TEST(DataFileTest, LoadEmptyFile)
+{
+	auto result = LoadDataFile("txtdata\\empty.tsv");
+	if (!result.has_value()) {
+		FAIL() << "Unable to load empty.tsv, error: " << mapError(result.error());
+	}
+
+	DataFile &dataFile = result.value();
+	EXPECT_EQ(dataFile.size(), 0) << "File size should be reported in code units";
+
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "" },
+	};
+
+	TestFileContents(dataFile, expectedFields, GetFieldResult::Status::NoFinalTerminator, GetFieldResult::Status::NoFinalTerminator);
+}
+
+TEST(DataFileTest, LoadEmptyFileWithBOM)
+{
+	auto result = LoadDataFile("txtdata\\empty_with_utf8_bom.tsv");
+	if (!result.has_value()) {
+		FAIL() << "Unable to load empty_with_utf8_bom.tsv, error: " << mapError(result.error());
+	}
+
+	DataFile &dataFile = result.value();
+	EXPECT_EQ(dataFile.size(), 0) << "Loading a file containing a UTF8 byte order marker should strip that prefix";
+
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "" },
+	};
+
+	TestFileContents(dataFile, expectedFields, GetFieldResult::Status::NoFinalTerminator, GetFieldResult::Status::NoFinalTerminator);
+}
+
 TEST(DataFileTest, LoadUtf8WithBOM)
 {
 	auto result = LoadDataFile("txtdata\\utf8_bom.tsv");
@ -122,12 +186,74 @@ TEST(DataFileTest, LoadUtf8WithBOM)
 		{ "", "2", "3" },
 		{ "1", "2", "" },
 		{ "1", "", "3" },
-		{ "" } // file ends with a newline, parsing returns a single empty field
 	};

 	TestFileContents(dataFile, expectedFields);
 }

+TEST(DataFileTest, ParseInt)
+{
+	auto result = LoadDataFile("txtdata\\sample.tsv");
+	if (!result.has_value()) {
+		FAIL() << "Unable to load sample.tsv, error: " << mapError(result.error());
+	}
+
+	DataFile &dataFile = result.value();
+	auto unused = dataFile.parseHeader(nullptr, nullptr, [](std::string_view) -> tl::expected<uint8_t, ColumnDefinition::Error> { return tl::unexpected { ColumnDefinition::Error::UnknownColumn }; });
+
+	EXPECT_TRUE(unused.has_value()) << "Should be able to parse and discard the header from the sample.tsv file";
+
+	for (DataFileRecord record : dataFile) {
+		auto fieldIt = record.begin();
+		auto end = record.end();
+
+		ASSERT_NE(fieldIt, end) << "sample.tsv must contain at least one field to use as a test value for strings";
+
+		// First field is a string that doesn't start with a digit or - character
+		DataFileField field = *fieldIt;
+		uint8_t shortVal = 5;
+		auto parseIntResult = field.parseInt(shortVal);
+		EXPECT_EQ(parseIntResult, std::errc::invalid_argument) << "Strings are not uint8_t values";
+		EXPECT_EQ(shortVal, 5) << "Value is not modified when parsing as uint8_t fails due to non-numeric fields";
+		EXPECT_EQ(*field, "Sample") << "Should be able to access the field value as a string after failure";
+		++fieldIt;
+
+		ASSERT_NE(fieldIt, end) << "sample.tsv must contain a second field to use as a test value for small ints";
+
+		// Second field is a number that fits into an uint8_t value
+		field = *fieldIt;
+		shortVal = 5;
+		parseIntResult = field.parseInt(shortVal);
+		EXPECT_EQ(parseIntResult, std::errc()) << "Expected " << field << "to fit into a uint8_t variable";
+		EXPECT_EQ(shortVal, 145) << "Parsing should give the expected base 10 value";
+		EXPECT_EQ(*field, "145") << "Should be able to access the field value as a string even after parsing as an int";
+		++fieldIt;
+
+		ASSERT_NE(fieldIt, end) << "sample.tsv must contain a third field to use as a test value for large ints";
+
+		// Third field is a number too large for a uint8_t but that fits into an int value
+		field = *fieldIt;
+		parseIntResult = field.parseInt(shortVal);
+		EXPECT_EQ(parseIntResult, std::errc::result_out_of_range) << "A value too large to fit into a uint8_t variable should report an error";
+		EXPECT_EQ(shortVal, 145) << "Value is not modified when parsing as uint8_t fails due to out of range value";
+		int longVal = 42;
+		parseIntResult = field.parseInt(longVal);
+		EXPECT_EQ(parseIntResult, std::errc()) << "Expected " << field << "to fit into an int variable";
+		EXPECT_EQ(longVal, 70322) << "Value is expected to be parsed into a larger type after an out of range failure";
+		EXPECT_EQ(*field, "70322") << "Should be able to access the field value as a string after parsing as an int";
+		++fieldIt;
+
+		ASSERT_NE(fieldIt, end) << "sample.tsv must contain a fourth field to use as a test value for fields that look like ints";
+
+		// Fourth field is not an integer, but a value that starts with one or more digits that fit into an uint8_t value
+		field = *fieldIt;
+		parseIntResult = field.parseInt(shortVal);
+		EXPECT_EQ(parseIntResult, std::errc()) << "Expected " << field << "to fit into a uint8_t variable (even though it's not really an int)";
+		EXPECT_EQ(shortVal, 6) << "Value is loaded as expected until the first non-digit character";
+		EXPECT_EQ(*field, "6.34") << "Should be able to access the field value as a string after failure";
+	}
+}
+
 TEST(DataFileTest, IterateOverRecords)
 {
 	auto result = LoadDataFile("txtdata\\lf.tsv");
@ -140,16 +266,51 @@ TEST(DataFileTest, IterateOverRecords)
 		{ "", "2", "3" },
 		{ "1", "2", "" },
 		{ "1", "", "3" },
-		{ "" } // file ends with a newline, parsing returns a single empty field
 	};

 	unsigned row = 0;
-	for (FieldsInRecordRange record : dataFile.records()) {
+	for (DataFileRecord record : dataFile) {
+		ASSERT_LT(row, expectedFields.size()) << "Too many records";
+		unsigned col = 0;
+		for (DataFileField field : record) {
+			if (col < expectedFields[row].size())
+				EXPECT_EQ(*field, expectedFields[row][col]) << "Unexpected value at record " << row << " and field " << col;
+			else
+				ADD_FAILURE() << "Extra value '" << field << "' in record " << row << " at field " << col;
+			col++;
+		}
+		EXPECT_GE(col, expectedFields[row].size()) << "Parsing returned fewer fields than expected in record " << row;
+		row++;
+	}
+	EXPECT_EQ(row, expectedFields.size()) << "Parsing returned fewer records than expected";
+}
+
+TEST(DataFileTest, ParseHeaderThenIterateOverRecords)
+{
+	auto result = LoadDataFile("txtdata\\lf.tsv");
+	ASSERT_TRUE(result.has_value()) << "Unable to load lf.tsv";
+
+	DataFile &dataFile = result.value();
+
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "", "2", "3" },
+		{ "1", "2", "" },
+		{ "1", "", "3" },
+	};
+
+	auto parseHeaderResult = dataFile.parseHeader(nullptr, nullptr, [](std::string_view) -> tl::expected<uint8_t, ColumnDefinition::Error> { return tl::unexpected { ColumnDefinition::Error::UnknownColumn }; });
+	EXPECT_TRUE(parseHeaderResult.has_value()) << "Expected to be able to parse and discard the header record";
+
+	unsigned row = 0;
+	for (DataFileRecord record : dataFile) {
 		ASSERT_LT(row, expectedFields.size()) << "Too many records";
+		EXPECT_EQ(row + 1, record.row()) << "DataFileRecord (through iterator) should report a 1-indexed row after parsing the header record";
 		unsigned col = 0;
-		for (std::string_view field : record) {
+		for (DataFileField field : record) {
+			EXPECT_EQ(record.row(), field.row()) << "Field should report the same row as the current DataFileRecord";
+			EXPECT_EQ(col, field.column()) << "Field (through iterator) should report a 0-indexed column";
 			if (col < expectedFields[row].size())
-				EXPECT_EQ(field, expectedFields[row][col]) << "Unexpected value at record " << row << " and field " << col;
+				EXPECT_EQ(*field, expectedFields[row][col]) << "Unexpected value at record " << row << " and field " << col;
 			else
 				ADD_FAILURE() << "Extra value '" << field << "' in record " << row << " at field " << col;
 			col++;
@ -167,19 +328,25 @@ TEST(DataFileTest, DiscardAllAfterFirstField)

 	DataFile &dataFile = loadDataResult.value();

-	std::array<std::string_view, 5> expectedFields { "Test", "", "1", "1", "" };
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "Test", "Empty", "Values" },
+		{ "", "2", "3" },
+		{ "1", "2", "" },
+		{ "1", "", "3" },
+	};

-	GetFieldResult result { dataFile.begin() };
+	GetFieldResult result { dataFile.data() };
+	const char *end = dataFile.data() + dataFile.size();
 	unsigned row = 0;
 	do {
 		ASSERT_LT(row, expectedFields.size()) << "Too many records";
-		result = GetNextField(result.next, dataFile.end());
-		EXPECT_EQ(result.value, expectedFields[row]) << "Unexpected first value at record " << row;
+		result = GetNextField(result.next, end);
+		EXPECT_EQ(result.value, expectedFields[row][0]) << "Unexpected first value at record " << row;

 		if (result.endOfRecord() && !result.endOfFile())
 			ADD_FAILURE() << "Parsing returned fewer fields than expected in record " << row;
 		else
-			result = DiscardRemainingFields(result.next, dataFile.end());
+			result = DiscardRemainingFields(result.next, end);
 		row++;
 	} while (!result.endOfFile());

@ -193,13 +360,18 @@ TEST(DataFileTest, DiscardAllAfterFirstFieldIterator)

 	DataFile &dataFile = result.value();

-	std::array<std::string_view, 5> expectedFields { "Test", "", "1", "1", "" };
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "Test", "Empty", "Values" },
+		{ "", "2", "3" },
+		{ "1", "2", "" },
+		{ "1", "", "3" },
+	};

 	unsigned row = 0;
-	for (FieldsInRecordRange record : dataFile.records()) {
+	for (DataFileRecord record : dataFile) {
 		ASSERT_LT(row, expectedFields.size()) << "Too many records";
-		for (std::string_view field : record) {
-			EXPECT_EQ(field, expectedFields[row]) << "Unexpected first value at record " << row;
+		for (DataFileField field : record) {
+			EXPECT_EQ(*field, expectedFields[row][0]) << "Field with value " << field << " does not match the expected value for record " << row;
 			break;
 		}
 		row++;
@ -213,23 +385,29 @@ TEST(DataFileTest, DiscardAllUpToLastField)

 	DataFile &dataFile = loadDataResult.value();

-	std::array<std::string_view, 5> expectedFields { "Values", "3", "", "3", "" };
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "Test", "Empty", "Values" },
+		{ "", "2", "3" },
+		{ "1", "2", "" },
+		{ "1", "", "3" },
+	};

-	GetFieldResult result { dataFile.begin() };
+	GetFieldResult result { dataFile.data() };
+	const char *const end = dataFile.data() + dataFile.size();
 	unsigned row = 0;
 	do {
 		ASSERT_LT(row, expectedFields.size()) << "Too many records";
-		result = DiscardMultipleFields(result.next, dataFile.end(), 2);
-		if (row < expectedFields.size() - 1) {
+		result = DiscardMultipleFields(result.next, end, 2);
+		if (row < expectedFields.size()) {
 			EXPECT_FALSE(result.endOfRecord()) << "Parsing returned fewer fields than expected in record " << row;
 			if (!result.endOfRecord())
-				result = GetNextField(result.next, dataFile.end());
+				result = GetNextField(result.next, end);
 		}
-		EXPECT_EQ(result.value, expectedFields[row]) << "Unexpected last value at record " << row;
+		EXPECT_EQ(result.value, expectedFields[row][2]) << "Unexpected last value at record " << row;

 		if (!result.endOfRecord()) {
 			ADD_FAILURE() << "Parsing returned fewer fields than expected in record " << row;
-			result = DiscardRemainingFields(result.next, dataFile.end());
+			result = DiscardRemainingFields(result.next, end);
 		}
 		row++;
 	} while (!result.endOfFile());
@ -244,32 +422,34 @@ TEST(DataFileTest, SkipFieldIterator)

 	DataFile &dataFile = loadDataResult.value();

-	// we don't actually test the last value as incrementing the fields-in-record iterator past
-	//  the last field invalidates the value, we can't recover the same way the procedural interface does.
-	std::array<std::string_view, 5> expectedFields { "Values", "3", "", "3", "" };
+	std::vector<std::vector<std::string_view>> expectedFields {
+		{ "Test", "Empty", "Values" },
+		{ "", "2", "3" },
+		{ "1", "2", "" },
+		{ "1", "", "3" },
+	};

-	RecordsRange recordsRange = dataFile.records();
-	auto record = recordsRange.begin();
-	auto endRecord = recordsRange.end();
 	unsigned row = 0;
-	while (record != endRecord) {
+	for (DataFileRecord record : dataFile) {
 		ASSERT_LT(row, expectedFields.size()) << "Too many records";
-		FieldsInRecordRange fieldRange = *record;
-		auto field = fieldRange.begin();
-		auto endField = fieldRange.end();
-		field += 2;
-		if (row < expectedFields.size() - 1) {
-			if (field == endField) {
+		auto fieldIt = record.begin();
+		auto endField = record.end();
+		fieldIt += 0;
+		EXPECT_EQ((*fieldIt).value(), expectedFields[row][0]) << "Advancing a field iterator by 0 should not discard any values";
+
+		fieldIt += 2;
+		if (row < expectedFields.size()) {
+			if (fieldIt == endField) {
 				ADD_FAILURE() << "Parsing returned fewer fields than expected in record " << row;
 			} else {
-				EXPECT_EQ(*field, expectedFields[row]) << "Unexpected last value at record " << row;
-				++field;
+				EXPECT_EQ((*fieldIt).value(), expectedFields[row][2]) << "Unexpected last value at record " << row;
+				++fieldIt;
 			}
 		}
-		EXPECT_EQ(field, endField) << "Parsing returned more fields than expected in record " << row;
+		EXPECT_EQ(fieldIt, endField) << "Parsing returned more fields than expected in record " << row;
+		EXPECT_EQ(fieldIt.column(), expectedFields[row].size() - 1) << "Field iterator should report the index of the last column";

 		++row;
-		++record;
 	}

 	EXPECT_EQ(row, expectedFields.size()) << "Parsing returned fewer records than expected";
@ -283,30 +463,31 @@ TEST(DataFileTest, SkipRowIterator)
 	DataFile &dataFile = loadDataResult.value();

 	std::vector<std::vector<std::string_view>> expectedFields {
-		// skipping the first two lines
+		{ "Test", "Empty", "Values" },
+		{ "", "2", "3" },
 		{ "1", "2", "" },
 		{ "1", "", "3" },
-		{ "" } // file ends with a newline, parsing returns a single empty field
 	};

-	RecordsRange recordsRange = dataFile.records();
-	auto record = recordsRange.begin();
-	auto endRecord = recordsRange.end();
-	record += 2;
-	unsigned row = 0;
-	while (record != endRecord) {
+	auto recordIt = dataFile.begin();
+	auto endRecord = dataFile.end();
+	recordIt += 0;
+	EXPECT_EQ((*(*recordIt).begin()).value(), expectedFields[0][0]) << "Advancing a record iterator by 0 should not discard any values";
+	recordIt += 2;
+	unsigned row = 2;
+	while (recordIt != endRecord) {
 		ASSERT_LT(row, expectedFields.size()) << "Too many records";
 		unsigned col = 0;
-		for (std::string_view field : *record) {
+		for (DataFileField field : *recordIt) {
 			if (col < expectedFields[row].size())
-				EXPECT_EQ(field, expectedFields[row][col]) << "Unexpected value at record " << row << " and field " << col;
+				EXPECT_EQ(*field, expectedFields[row][col]) << "Unexpected value at record " << row << " and field " << col;
 			else
 				ADD_FAILURE() << "Extra value '" << field << "' in record " << row << " at field " << col;
 			col++;
 		}
 		EXPECT_GE(col, expectedFields[row].size()) << "Parsing returned fewer fields than expected in record " << row;
 		++row;
-		++record;
+		++recordIt;
 	}

 	EXPECT_EQ(row, expectedFields.size()) << "Parsing returned fewer records than expected";
--- a/test/fixtures/txtdata/empty.tsv
+++ b/test/fixtures/txtdata/empty.tsv
--- a/test/fixtures/txtdata/empty_with_utf8_bom.tsv
+++ b/test/fixtures/txtdata/empty_with_utf8_bom.tsv
@ -0,0 +1 @@
+
--- a/test/fixtures/txtdata/lf_no_trail.tsv
+++ b/test/fixtures/txtdata/lf_no_trail.tsv
@ -0,0 +1,4 @@
+Test	Empty	Values
+	2	3
+1	2	
+1		3
--- a/test/fixtures/txtdata/sample.tsv
+++ b/test/fixtures/txtdata/sample.tsv
@ -0,0 +1,2 @@
+String	Byte	Int	Float
+Sample	145	70322	6.34