13 changed files with 882 additions and 435 deletions
@ -1,234 +1,365 @@
|
||||
#pragma once |
||||
|
||||
#include <charconv> |
||||
#include <ostream> |
||||
|
||||
#include <expected.hpp> |
||||
|
||||
#include "parser.hpp" |
||||
|
||||
namespace devilution { |
||||
class FieldsInRecordRange { |
||||
|
||||
class DataFileField { |
||||
GetFieldResult *state_; |
||||
const char *const end_; |
||||
const char *end_; |
||||
unsigned row_; |
||||
unsigned column_; |
||||
|
||||
public: |
||||
FieldsInRecordRange(GetFieldResult *state, const char *end) |
||||
DataFileField(GetFieldResult *state, const char *end, unsigned row, unsigned column) |
||||
: state_(state) |
||||
, end_(end) |
||||
, row_(row) |
||||
, column_(column) |
||||
{ |
||||
} |
||||
|
||||
class Iterator { |
||||
GetFieldResult *state_; |
||||
const char *const end_; |
||||
/**
|
||||
* @brief Returns a view of the current field |
||||
* |
||||
* This method scans the current field if this is the first value access since the last |
||||
* advance. If you expect the field to contain a numeric value then calling parseInt first |
||||
* is more efficient, but calling the methods in either order is supported. |
||||
* @return The current field value (may be an empty string) or a zero length string_view |
||||
*/ |
||||
[[nodiscard]] std::string_view value() |
||||
{ |
||||
if (state_->status == GetFieldResult::Status::ReadyToRead) { |
||||
*state_ = GetNextField(state_->next, end_); |
||||
} |
||||
return state_->value; |
||||
} |
||||
|
||||
public: |
||||
using iterator_category = std::input_iterator_tag; |
||||
using value_type = std::string_view; |
||||
/**
|
||||
* Convenience function to let DataFileField instances be used like other single-value STL containers |
||||
*/ |
||||
[[nodiscard]] std::string_view operator*() |
||||
{ |
||||
return this->value(); |
||||
} |
||||
|
||||
Iterator() |
||||
: state_(nullptr) |
||||
, end_(nullptr) |
||||
{ |
||||
/**
|
||||
* @brief Attempts to parse the current field as a numeric value using std::from_chars |
||||
* |
||||
* You can freely interleave this method with calls to operator*. If this is the first value |
||||
* access since the last advance this will scan the current field and store it for later |
||||
* use with operator* or repeated calls to parseInt (even with different types). |
||||
* @tparam T an Integral type supported by std::from_chars |
||||
* @param destination value to store the result of successful parsing |
||||
* @return the error code from std::from_chars |
||||
*/ |
||||
template <typename T> |
||||
[[nodiscard]] std::errc parseInt(T &destination) |
||||
{ |
||||
std::from_chars_result result {}; |
||||
if (state_->status == GetFieldResult::Status::ReadyToRead) { |
||||
const char *begin = state_->next; |
||||
result = std::from_chars(begin, end_, destination); |
||||
if (result.ec != std::errc::invalid_argument) { |
||||
// from_chars was able to consume at least one character, consume the rest of the field
|
||||
*state_ = GetNextField(result.ptr, end_); |
||||
// and prepend what was already parsed
|
||||
state_->value = { begin, (state_->value.data() - begin) + state_->value.size() }; |
||||
} |
||||
} else { |
||||
result = std::from_chars(state_->value.data(), end_, destination); |
||||
} |
||||
return result.ec; |
||||
} |
||||
|
||||
Iterator(GetFieldResult *state, const char *end) |
||||
: state_(state) |
||||
, end_(end) |
||||
{ |
||||
state_->status = GetFieldResult::Status::ReadyToRead; |
||||
template <typename T> |
||||
[[nodiscard]] tl::expected<T, std::errc> asInt() |
||||
{ |
||||
T value = 0; |
||||
auto parseIntResult = parseInt(value); |
||||
if (parseIntResult == std::errc()) { |
||||
return value; |
||||
} |
||||
return tl::unexpected { parseIntResult }; |
||||
} |
||||
|
||||
[[nodiscard]] bool operator==(const Iterator &rhs) const |
||||
{ |
||||
if (state_ == nullptr && rhs.state_ == nullptr) |
||||
return true; |
||||
/**
|
||||
* Returns the current row number |
||||
*/ |
||||
[[nodiscard]] unsigned row() const |
||||
{ |
||||
return row_; |
||||
} |
||||
|
||||
return state_ != nullptr && rhs.state_ != nullptr && state_->next == rhs.state_->next; |
||||
} |
||||
/**
|
||||
* Returns the current column/field number (from the start of the row/record) |
||||
*/ |
||||
[[nodiscard]] unsigned column() const |
||||
{ |
||||
return column_; |
||||
} |
||||
|
||||
[[nodiscard]] bool operator!=(const Iterator &rhs) const |
||||
{ |
||||
return !(*this == rhs); |
||||
} |
||||
/**
|
||||
* Allows accessing the value of this field in a const context |
||||
* |
||||
* This requires an actual non-const value access to happen first before it returns |
||||
* any useful results, intended for use in error reporting (or test output). |
||||
*/ |
||||
[[nodiscard]] std::string_view currentValue() const |
||||
{ |
||||
return state_->value; |
||||
} |
||||
}; |
||||
|
||||
/**
|
||||
* Advances to the next field in the current record |
||||
*/ |
||||
Iterator &operator++() |
||||
{ |
||||
return *this += 1; |
||||
} |
||||
/**
|
||||
* @brief Show the field value along with the row/column number (mainly used in test failure messages) |
||||
* @param stream output stream, expected to have overloads for unsigned, std::string_view, and char* |
||||
* @param field Object to display |
||||
* @return the stream, to allow chaining |
||||
*/ |
||||
inline std::ostream &operator<<(std::ostream &stream, const DataFileField &field) |
||||
{ |
||||
return stream << "\"" << field.currentValue() << "\" (at row " << field.row() << ", column " << field.column() << ")"; |
||||
} |
||||
|
||||
class FieldIterator { |
||||
GetFieldResult *state_; |
||||
const char *const end_; |
||||
const unsigned row_; |
||||
unsigned column_ = 0; |
||||
|
||||
/**
|
||||
* @brief Advances by the specified number of fields |
||||
* |
||||
* if a non-zero increment is provided and advancing the iterator causes it to reach the end |
||||
* of the record the iterator is invalidated. It will compare equal to an end iterator and |
||||
* cannot be used for value access or any further parsing |
||||
* @param increment how many fields to advance (can be 0) |
||||
* @return self-reference |
||||
*/ |
||||
Iterator &operator+=(unsigned increment) |
||||
{ |
||||
if (state_->status == GetFieldResult::Status::ReadyToRead) |
||||
*state_ = DiscardMultipleFields(state_->next, end_, increment); |
||||
|
||||
if (state_->endOfRecord()) { |
||||
state_ = nullptr; |
||||
} else { |
||||
// We use Status::ReadyToRead as a marker so we only read the next value on the next call to operator*
|
||||
// this allows consumers to read from the input stream without using operator* if they want
|
||||
state_->status = GetFieldResult::Status::ReadyToRead; |
||||
} |
||||
return *this; |
||||
} |
||||
public: |
||||
using iterator_category = std::input_iterator_tag; |
||||
using value_type = DataFileField; |
||||
|
||||
/**
|
||||
* @brief Returns a view of the current field |
||||
* |
||||
* This method scans the current field if this is the first value access since the last |
||||
* advance. You can repeatedly call operator* safely but you must not try to use parseInt |
||||
* after calling this method. If you calling parseInt and get an invalid_argument result |
||||
* back you can use this method to get the actual field value, but if you received any |
||||
* other result then parseInt has consumed the field and this method is no longer reliable. |
||||
* @return The current field value (may be an empty string) or a zero length string_view |
||||
*/ |
||||
[[nodiscard]] value_type operator*() |
||||
{ |
||||
if (state_->status == GetFieldResult::Status::ReadyToRead) { |
||||
*state_ = GetNextField(state_->next, end_); |
||||
} |
||||
return state_->value; |
||||
} |
||||
FieldIterator() |
||||
: state_(nullptr) |
||||
, end_(nullptr) |
||||
, row_(0) |
||||
{ |
||||
} |
||||
|
||||
/**
|
||||
* @brief Attempts to parse a field as a numeric value using std::from_chars |
||||
* |
||||
* This method is NOT repeatable. In addition to the behaviour of std::from_chars please |
||||
* heed the following warnings. If the field contains a value larger than will fit into the |
||||
* given type parsing will fail with std::errc::out_of_range and the field will be |
||||
* discarded. If a field starts with some digits then is followed by other characters the |
||||
* remainder will also be discarded. You can only get a useful value out of operator* if |
||||
* the result code is std::errc::invalid_argument. |
||||
* @tparam T an Integral type supported by std::from_chars |
||||
* @param destination value to store the result of successful parsing |
||||
* @return the error code from std::from_chars |
||||
*/ |
||||
template <typename T> |
||||
[[nodiscard]] std::errc parseInt(T &destination) |
||||
{ |
||||
auto result = std::from_chars(state_->next, end_, destination); |
||||
if (result.ec != std::errc::invalid_argument) { |
||||
// from_chars was able to consume at least one character, so discard the rest of the field
|
||||
*state_ = DiscardField(result.ptr, end_); |
||||
} |
||||
return result.ec; |
||||
FieldIterator(GetFieldResult *state, const char *end, unsigned row) |
||||
: state_(state) |
||||
, end_(end) |
||||
, row_(row) |
||||
{ |
||||
state_->status = GetFieldResult::Status::ReadyToRead; |
||||
} |
||||
|
||||
[[nodiscard]] bool operator==(const FieldIterator &rhs) const |
||||
{ |
||||
if (state_ == nullptr && rhs.state_ == nullptr) |
||||
return true; |
||||
|
||||
return state_ != nullptr && rhs.state_ != nullptr && state_->next == rhs.state_->next; |
||||
} |
||||
|
||||
[[nodiscard]] bool operator!=(const FieldIterator &rhs) const |
||||
{ |
||||
return !(*this == rhs); |
||||
} |
||||
|
||||
/**
|
||||
* Advances to the next field in the current record |
||||
*/ |
||||
FieldIterator &operator++() |
||||
{ |
||||
return *this += 1; |
||||
} |
||||
|
||||
/**
|
||||
* @brief Advances by the specified number of fields |
||||
* |
||||
* if a non-zero increment is provided and advancing the iterator causes it to reach the end |
||||
* of the record the iterator is invalidated. It will compare equal to an end iterator and |
||||
* cannot be used for value access or any further parsing |
||||
* @param increment how many fields to advance (can be 0) |
||||
* @return self-reference |
||||
*/ |
||||
FieldIterator &operator+=(unsigned increment) |
||||
{ |
||||
if (increment == 0) |
||||
return *this; |
||||
|
||||
if (state_->status == GetFieldResult::Status::ReadyToRead) { |
||||
// We never read the value and no longer need it, discard it so that we end up
|
||||
// advancing past the field delimiter (as if a value access had happened)
|
||||
*state_ = DiscardField(state_->next, end_); |
||||
} |
||||
|
||||
/**
|
||||
* @brief Checks if this field is the last field in a file, not just in the record |
||||
* |
||||
* If you call this method before calling parseInt or operator* then this will trigger a |
||||
* read, meaning you can no longer use parseInt to try extract a numeric value. You |
||||
* probably want to use this after calling parseInt. |
||||
* @return true if this field is the last field in the file/RecordsRange |
||||
*/ |
||||
[[nodiscard]] bool endOfFile() const |
||||
{ |
||||
if (state_->status == GetFieldResult::Status::ReadyToRead) { |
||||
*state_ = GetNextField(state_->next, end_); |
||||
} |
||||
return state_->endOfFile(); |
||||
if (state_->endOfRecord()) { |
||||
state_ = nullptr; |
||||
} else { |
||||
unsigned fieldsSkipped = 0; |
||||
// By this point we've already advanced past the end of this field (either because the
|
||||
// last value access found the end of the field by necessity or we discarded it a few
|
||||
// lines up), so we only need to advance further if an increment greater than 1 was
|
||||
// provided.
|
||||
*state_ = DiscardMultipleFields(state_->next, end_, increment - 1, &fieldsSkipped); |
||||
// As we've consumed the current field by this point we need to increment the internal
|
||||
// column counter one extra time so we have an accurate value.
|
||||
column_ += fieldsSkipped + 1; |
||||
// We use Status::ReadyToRead as a marker so we only read the next value on the next
|
||||
// value access, this allows consumers to choose the most efficient method (e.g. if
|
||||
// they want the value as an int) or even repeated advances without using a value.
|
||||
state_->status = GetFieldResult::Status::ReadyToRead; |
||||
} |
||||
}; |
||||
return *this; |
||||
} |
||||
|
||||
[[nodiscard]] Iterator begin() const |
||||
/**
|
||||
* @brief Returns a view of the current field |
||||
* |
||||
* The returned value is a thin wrapper over the current state of this iterator (or last |
||||
* successful read if incrementing this iterator would result in it reaching the end state). |
||||
*/ |
||||
[[nodiscard]] value_type operator*() |
||||
{ |
||||
return { state_, end_ }; |
||||
return { state_, end_, row_, column_ }; |
||||
} |
||||
|
||||
[[nodiscard]] Iterator end() const |
||||
/**
|
||||
* @brief Returns the current row number |
||||
*/ |
||||
[[nodiscard]] unsigned row() const |
||||
{ |
||||
return {}; |
||||
return row_; |
||||
} |
||||
/**
|
||||
* @brief Returns the current column/field number (from the start of the row/record) |
||||
*/ |
||||
[[nodiscard]] unsigned column() const |
||||
{ |
||||
return column_; |
||||
} |
||||
}; |
||||
|
||||
class RecordsRange { |
||||
const char *const begin_; |
||||
class DataFileRecord { |
||||
GetFieldResult *state_; |
||||
const char *const end_; |
||||
const unsigned row_; |
||||
|
||||
public: |
||||
RecordsRange(std::string_view characters) |
||||
: begin_(characters.data()) |
||||
, end_(characters.data() + characters.size()) |
||||
DataFileRecord(GetFieldResult *state, const char *end, unsigned row) |
||||
: state_(state) |
||||
, end_(end) |
||||
, row_(row) |
||||
{ |
||||
} |
||||
|
||||
class Iterator { |
||||
GetFieldResult state_; |
||||
const char *const end_; |
||||
[[nodiscard]] FieldIterator begin() |
||||
{ |
||||
return { state_, end_, row_ }; |
||||
} |
||||
|
||||
public: |
||||
using iterator_category = std::forward_iterator_tag; |
||||
using value_type = FieldsInRecordRange; |
||||
[[nodiscard]] FieldIterator end() const |
||||
{ |
||||
return {}; |
||||
} |
||||
|
||||
Iterator() |
||||
: state_(nullptr, GetFieldResult::Status::EndOfFile) |
||||
, end_(nullptr) |
||||
{ |
||||
} |
||||
[[nodiscard]] unsigned row() const |
||||
{ |
||||
return row_; |
||||
} |
||||
}; |
||||
|
||||
Iterator(const char *begin, const char *end) |
||||
: state_(begin) |
||||
, end_(end) |
||||
{ |
||||
} |
||||
class RecordIterator { |
||||
GetFieldResult state_; |
||||
const char *const end_; |
||||
unsigned row_ = 0; |
||||
|
||||
[[nodiscard]] bool operator==(const Iterator &rhs) const |
||||
{ |
||||
return state_.next == rhs.state_.next; |
||||
} |
||||
public: |
||||
using iterator_category = std::forward_iterator_tag; |
||||
using value_type = DataFileRecord; |
||||
|
||||
[[nodiscard]] bool operator!=(const Iterator &rhs) const |
||||
{ |
||||
return !(*this == rhs); |
||||
} |
||||
RecordIterator() |
||||
: state_(nullptr, GetFieldResult::Status::EndOfFile) |
||||
, end_(nullptr) |
||||
{ |
||||
} |
||||
|
||||
Iterator &operator++() |
||||
{ |
||||
return *this += 1; |
||||
} |
||||
RecordIterator(const char *begin, const char *end, bool skippedHeader) |
||||
: state_(begin) |
||||
, end_(end) |
||||
, row_(skippedHeader ? 1 : 0) |
||||
{ |
||||
} |
||||
|
||||
Iterator &operator+=(unsigned increment) |
||||
{ |
||||
if (!state_.endOfRecord()) |
||||
state_ = DiscardRemainingFields(state_.next, end_); |
||||
|
||||
if (state_.endOfFile()) { |
||||
state_.next = nullptr; |
||||
} else { |
||||
if (increment > 0) |
||||
state_ = DiscardMultipleRecords(state_.next, end_, increment - 1); |
||||
// We use Status::ReadyToRead as a marker in case the Field iterator is never
|
||||
// used, so the next call to operator+= will advance past the current record
|
||||
state_.status = GetFieldResult::Status::ReadyToRead; |
||||
} |
||||
[[nodiscard]] bool operator==(const RecordIterator &rhs) const |
||||
{ |
||||
return state_.next == rhs.state_.next; |
||||
} |
||||
|
||||
[[nodiscard]] bool operator!=(const RecordIterator &rhs) const |
||||
{ |
||||
return !(*this == rhs); |
||||
} |
||||
|
||||
RecordIterator &operator++() |
||||
{ |
||||
return *this += 1; |
||||
} |
||||
|
||||
RecordIterator &operator+=(unsigned increment) |
||||
{ |
||||
if (increment == 0) |
||||
return *this; |
||||
|
||||
if (!state_.endOfRecord()) { |
||||
// The field iterator either hasn't been used or hasn't consumed the entire record
|
||||
state_ = DiscardRemainingFields(state_.next, end_); |
||||
} |
||||
|
||||
[[nodiscard]] value_type operator*() |
||||
{ |
||||
return { &state_, end_ }; |
||||
if (state_.endOfFile()) { |
||||
state_.next = nullptr; |
||||
} else { |
||||
unsigned recordsSkipped = 0; |
||||
// By this point we've already advanced past the end of this record (either because the
|
||||
// last value access found the end of the record by necessity or we discarded any
|
||||
// leftovers a few lines up), so we only need to advance further if an increment
|
||||
// greater than 1 was provided.
|
||||
state_ = DiscardMultipleRecords(state_.next, end_, increment - 1, &recordsSkipped); |
||||
// As we've consumed the current record by this point we need to increment the internal
|
||||
// row counter one extra time so we have an accurate value.
|
||||
row_ += recordsSkipped + 1; |
||||
// We use Status::ReadyToRead as a marker in case the DataFileField iterator is never
|
||||
// used, so the next call to operator+= will advance past the current record
|
||||
state_.status = GetFieldResult::Status::ReadyToRead; |
||||
} |
||||
}; |
||||
return *this; |
||||
} |
||||
|
||||
[[nodiscard]] Iterator begin() const |
||||
[[nodiscard]] DataFileRecord operator*() |
||||
{ |
||||
return { begin_, end_ }; |
||||
return { &state_, end_, row_ }; |
||||
} |
||||
|
||||
[[nodiscard]] Iterator end() const |
||||
/**
|
||||
* @brief Exposes the current location of this input iterator. |
||||
* |
||||
* This is only expected to be used internally so the DataFile instance knows where the header |
||||
* ends and the body begins. You probably don't want to use this directly. |
||||
*/ |
||||
[[nodiscard]] const char *data() const |
||||
{ |
||||
return {}; |
||||
return state_.next; |
||||
} |
||||
|
||||
/**
|
||||
* @brief Returns the current row/record number (from the start of the file) |
||||
* |
||||
* The header row is always considered row 0, however if you've called DataFile.parseHeader() |
||||
* before calling DataFile.begin() then you'll get row 1 as the first record of the range. |
||||
*/ |
||||
[[nodiscard]] unsigned row() const |
||||
{ |
||||
return row_; |
||||
} |
||||
}; |
||||
} // namespace devilution
|
||||
|
||||
|
Loading…
Reference in new issue