From 7c066151ff4cd2b9d9aee281a403c6eb756f6870 Mon Sep 17 00:00:00 2001 From: Snowdream Date: Mon, 25 Jul 2022 00:38:42 -0400 Subject: [PATCH] Add a function to parse CSV files to multidimensional arrays. --- binding/binding-mri.cpp | 31 + macos/mkxp-z.xcodeproj/project.pbxproj | 2 + src/util/rapidcsv.h | 1892 ++++++++++++++++++++++++ 3 files changed, 1925 insertions(+) create mode 100644 src/util/rapidcsv.h diff --git a/binding/binding-mri.cpp b/binding/binding-mri.cpp index 39d62d2..c3459df 100644 --- a/binding/binding-mri.cpp +++ b/binding/binding-mri.cpp @@ -41,6 +41,7 @@ #include "eventthread.h" #include +#include "util/rapidcsv.h" extern "C" { #include @@ -152,6 +153,8 @@ RB_METHOD(mkxpStringToUTF8Bang); VALUE json2rb(json5pp::value const &v); json5pp::value rb2json(VALUE v); +RB_METHOD(mkxpParseCSV); + static void mriBindingInit() { tableBindingInit(); etcBindingInit(); @@ -249,6 +252,8 @@ static void mriBindingInit() { _rb_define_module_function(mod, "default_font_family=", mkxpSetDefaultFontFamily); + _rb_define_module_function(mod, "parse_csv", mkxpParseCSV); + _rb_define_method(rb_cString, "to_utf8", mkxpStringToUTF8); _rb_define_method(rb_cString, "to_utf8!", mkxpStringToUTF8Bang); @@ -688,6 +693,32 @@ RB_METHOD_GUARD(mkxpLaunch) { } RB_METHOD_GUARD_END +RB_METHOD(mkxpParseCSV) { + RB_UNUSED_PARAM; + + VALUE str; + rb_scan_args(argc, argv, "1", &str); + SafeStringValue(str); + + VALUE ret = rb_ary_new(); + std::stringstream stream(RSTRING_PTR(str)); + try { + rapidcsv::Document doc(stream); + for (int r = 0; r < doc.GetRowCount(); r++) { + VALUE col = rb_ary_new(); + for (int c = 0; c < doc.GetColumnCount(); c++) { + std::string str = doc.GetCell(c, r); + rb_ary_push(col, rb_utf8_str_new(str.c_str(), str.length())); + } + rb_ary_push(ret, col); + } + } catch (std::exception &e) { + raiseRbExc(Exception(Exception::MKXPError, "Failed to parse CSV: %s", e.what())); + } + + return ret; +} + json5pp::value loadUserSettings() { json5pp::value ret; VALUE cpath = rb_utf8_str_new_cstr(shState->config().userConfPath.c_str()); diff --git a/macos/mkxp-z.xcodeproj/project.pbxproj b/macos/mkxp-z.xcodeproj/project.pbxproj index 399d498..01553d1 100644 --- a/macos/mkxp-z.xcodeproj/project.pbxproj +++ b/macos/mkxp-z.xcodeproj/project.pbxproj @@ -1141,6 +1141,7 @@ 96573E7A27913B46002C3E77 /* TouchBar.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; name = TouchBar.mm; path = views/TouchBar.mm; sourceTree = ""; }; 96573E7B27913B46002C3E77 /* TouchBar.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.c.h; name = TouchBar.h; path = views/TouchBar.h; sourceTree = ""; }; 96573E80279152DC002C3E77 /* TouchBar.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; name = TouchBar.xcassets; path = views/TouchBar.xcassets; sourceTree = ""; }; + 968CB90E288E46F4008BC75D /* rapidcsv.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rapidcsv.h; sourceTree = ""; }; 96D8EDD028728DCA00A331EA /* gamecontrollerdb.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = gamecontrollerdb.txt; path = ../assets/gamecontrollerdb.txt; sourceTree = ""; }; A8AA4AE49BB66C97EFAE3055 /* libbrotlicommon-static.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libbrotlicommon-static.a"; path = "Dependencies/build-macosx-universal/lib/libbrotlicommon-static.a"; sourceTree = ""; }; CBEA4C45BE737EE0FF5A8A4C /* bicubic.frag */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.glsl; name = bicubic.frag; path = ../shader/bicubic.frag; sourceTree = ""; }; @@ -1457,6 +1458,7 @@ 3B012198261544A0001E574A /* string-util.h */, 3B10ED402568E95D00372D13 /* util.h */, 3B10EE1F2569348E00372D13 /* json5pp.hpp */, + 968CB90E288E46F4008BC75D /* rapidcsv.h */, ); path = util; sourceTree = ""; diff --git a/src/util/rapidcsv.h b/src/util/rapidcsv.h new file mode 100644 index 0000000..cc603af --- /dev/null +++ b/src/util/rapidcsv.h @@ -0,0 +1,1892 @@ +/* + * rapidcsv.h + * + * URL: https://github.com/d99kris/rapidcsv + * Version: 8.80 + * + * Copyright (C) 2017-2023 Kristofer Berggren + * All rights reserved. + * + * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details. + * + */ + +#pragma once + +#include +#include +#include +#ifdef HAS_CODECVT +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rapidcsv +{ +#if defined(_MSC_VER) + static const bool sPlatformHasCR = true; +#else + static const bool sPlatformHasCR = false; +#endif + static const std::vector s_Utf8BOM = { '\xef', '\xbb', '\xbf' }; + + /** + * @brief Datastructure holding parameters controlling how invalid numbers (including + * empty strings) should be handled. + */ + struct ConverterParams + { + /** + * @brief Constructor + * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be + * converted to a default numerical value, instead of causing + * an exception to be thrown (default). + * @param pDefaultFloat floating-point default value to represent invalid numbers. + * @param pDefaultInteger integer default value to represent invalid numbers. + * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default + * true). + */ + explicit ConverterParams(const bool pHasDefaultConverter = false, + const long double pDefaultFloat = std::numeric_limits::signaling_NaN(), + const long long pDefaultInteger = 0, + const bool pNumericLocale = true) + : mHasDefaultConverter(pHasDefaultConverter) + , mDefaultFloat(pDefaultFloat) + , mDefaultInteger(pDefaultInteger) + , mNumericLocale(pNumericLocale) + { + } + + /** + * @brief specifies if conversion of non-numerical strings shall be converted to a default + * numerical value, instead of causing an exception to be thrown (default). + */ + bool mHasDefaultConverter; + + /** + * @brief floating-point default value to represent invalid numbers. + */ + long double mDefaultFloat; + + /** + * @brief integer default value to represent invalid numbers. + */ + long long mDefaultInteger; + + /** + * @brief specifies whether to honor LC_NUMERIC locale. + */ + bool mNumericLocale; + }; + + /** + * @brief Exception thrown when attempting to access Document data in a datatype which + * is not supported by the Converter class. + */ + class no_converter : public std::exception + { + public: + /** + * @brief Provides details about the exception + * @returns an explanatory string + */ + const char* what() const throw() override + { + return "unsupported conversion datatype"; + } + }; + + /** + * @brief Class providing conversion to/from numerical datatypes and strings. Only + * intended for rapidcsv internal usage, but exposed externally to allow + * specialization for custom datatype conversions. + */ + template + class Converter + { + public: + /** + * @brief Constructor + * @param pConverterParams specifies how conversion of non-numerical values to + * numerical datatype shall be handled. + */ + Converter(const ConverterParams& pConverterParams) + : mConverterParams(pConverterParams) + { + } + + /** + * @brief Converts numerical value to string representation. + * @param pVal numerical value + * @param pStr output string + */ + void ToStr(const T& pVal, std::string& pStr) const + { + if (typeid(T) == typeid(int) || + typeid(T) == typeid(long) || + typeid(T) == typeid(long long) || + typeid(T) == typeid(unsigned) || + typeid(T) == typeid(unsigned long) || + typeid(T) == typeid(unsigned long long) || + typeid(T) == typeid(long double) || + typeid(T) == typeid(char)) + { + std::ostringstream out; + out << pVal; + pStr = out.str(); + } + else if (typeid(T) == typeid(float)) + { + std::ostringstream out; + out << std::setprecision(9) << pVal; + pStr = out.str(); + } + else if (typeid(T) == typeid(double)) + { + std::ostringstream out; + out << std::setprecision(17) << pVal; + pStr = out.str(); + } + else + { + throw no_converter(); + } + } + + /** + * @brief Converts string holding a numerical value to numerical datatype representation. + * @param pVal numerical value + * @param pStr output string + */ + void ToVal(const std::string& pStr, T& pVal) const + { + try + { + if (typeid(T) == typeid(int)) + { + pVal = static_cast(std::stoi(pStr)); + return; + } + else if (typeid(T) == typeid(long)) + { + pVal = static_cast(std::stol(pStr)); + return; + } + else if (typeid(T) == typeid(long long)) + { + pVal = static_cast(std::stoll(pStr)); + return; + } + else if (typeid(T) == typeid(unsigned)) + { + pVal = static_cast(std::stoul(pStr)); + return; + } + else if (typeid(T) == typeid(unsigned long)) + { + pVal = static_cast(std::stoul(pStr)); + return; + } + else if (typeid(T) == typeid(unsigned long long)) + { + pVal = static_cast(std::stoull(pStr)); + return; + } + } + catch (...) + { + if (!mConverterParams.mHasDefaultConverter) + { + throw; + } + else + { + pVal = static_cast(mConverterParams.mDefaultInteger); + return; + } + } + + try + { + if (mConverterParams.mNumericLocale) + { + if (typeid(T) == typeid(float)) + { + pVal = static_cast(std::stof(pStr)); + return; + } + else if (typeid(T) == typeid(double)) + { + pVal = static_cast(std::stod(pStr)); + return; + } + else if (typeid(T) == typeid(long double)) + { + pVal = static_cast(std::stold(pStr)); + return; + } + } + else + { + if ((typeid(T) == typeid(float)) || + (typeid(T) == typeid(double)) || + (typeid(T) == typeid(long double))) + { + std::istringstream iss(pStr); + iss.imbue(std::locale::classic()); + iss >> pVal; + if (iss.fail() || iss.bad() || !iss.eof()) + { + throw std::invalid_argument("istringstream: no conversion"); + } + return; + } + } + } + catch (...) + { + if (!mConverterParams.mHasDefaultConverter) + { + throw; + } + else + { + pVal = static_cast(mConverterParams.mDefaultFloat); + return; + } + } + + if (typeid(T) == typeid(char)) + { + pVal = static_cast(pStr[0]); + return; + } + else + { + throw no_converter(); + } + } + + private: + const ConverterParams& mConverterParams; + }; + + /** + * @brief Specialized implementation handling string to string conversion. + * @param pVal string + * @param pStr string + */ + template<> + inline void Converter::ToStr(const std::string& pVal, std::string& pStr) const + { + pStr = pVal; + } + + /** + * @brief Specialized implementation handling string to string conversion. + * @param pVal string + * @param pStr string + */ + template<> + inline void Converter::ToVal(const std::string& pStr, std::string& pVal) const + { + pVal = pStr; + } + + template + using ConvFunc = std::function; + + /** + * @brief Datastructure holding parameters controlling which row and column should be + * treated as labels. + */ + struct LabelParams + { + /** + * @brief Constructor + * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting + * it to -1 prevents column lookup by label name, and gives access + * to all rows as document data. Default: 0 + * @param pRowNameIdx specifies the zero-based column index of the row labels, setting + * it to -1 prevents row lookup by label name, and gives access + * to all columns as document data. Default: -1 + */ + explicit LabelParams(const int pColumnNameIdx = 0, const int pRowNameIdx = -1) + : mColumnNameIdx(pColumnNameIdx) + , mRowNameIdx(pRowNameIdx) + { + if (mColumnNameIdx < -1) + { + const std::string errStr = "invalid column name index " + + std::to_string(mColumnNameIdx) + " < -1"; + throw std::out_of_range(errStr); + } + + if (mRowNameIdx < -1) + { + const std::string errStr = "invalid row name index " + + std::to_string(mRowNameIdx) + " < -1"; + throw std::out_of_range(errStr); + } + } + + /** + * @brief specifies the zero-based row index of the column labels. + */ + int mColumnNameIdx; + + /** + * @brief specifies the zero-based column index of the row labels. + */ + int mRowNameIdx; + }; + + /** + * @brief Datastructure holding parameters controlling how the CSV data fields are separated. + */ + struct SeparatorParams + { + /** + * @brief Constructor + * @param pSeparator specifies the column separator (default ','). + * @param pTrim specifies whether to trim leading and trailing spaces from + * cells read (default false). + * @param pHasCR specifies whether a new document (i.e. not an existing document read) + * should use CR/LF instead of only LF (default is to use standard + * behavior of underlying platforms - CR/LF for Win, and LF for others). + * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false) + * @param pAutoQuote specifies whether to automatically dequote data during read, and add + * quotes during write (default true). + * @param pQuoteChar specifies the quote character (default '\"'). + */ + explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false, + const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false, + const bool pAutoQuote = true, const char pQuoteChar = '"') + : mSeparator(pSeparator) + , mTrim(pTrim) + , mHasCR(pHasCR) + , mQuotedLinebreaks(pQuotedLinebreaks) + , mAutoQuote(pAutoQuote) + , mQuoteChar(pQuoteChar) + { + } + + /** + * @brief specifies the column separator. + */ + char mSeparator; + + /** + * @brief specifies whether to trim leading and trailing spaces from cells read. + */ + bool mTrim; + + /** + * @brief specifies whether new documents should use CR/LF instead of LF. + */ + bool mHasCR; + + /** + * @brief specifies whether to allow line breaks in quoted text. + */ + bool mQuotedLinebreaks; + + /** + * @brief specifies whether to automatically dequote cell data. + */ + bool mAutoQuote; + + /** + * @brief specifies the quote character. + */ + char mQuoteChar; + }; + + /** + * @brief Datastructure holding parameters controlling how special line formats should be + * treated. + */ + struct LineReaderParams + { + /** + * @brief Constructor + * @param pSkipCommentLines specifies whether to skip lines prefixed with + * mCommentPrefix. Default: false + * @param pCommentPrefix specifies which prefix character to indicate a comment + * line. Default: # + * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false + */ + explicit LineReaderParams(const bool pSkipCommentLines = false, + const char pCommentPrefix = '#', + const bool pSkipEmptyLines = false) + : mSkipCommentLines(pSkipCommentLines) + , mCommentPrefix(pCommentPrefix) + , mSkipEmptyLines(pSkipEmptyLines) + { + } + + /** + * @brief specifies whether to skip lines prefixed with mCommentPrefix. + */ + bool mSkipCommentLines; + + /** + * @brief specifies which prefix character to indicate a comment line. + */ + char mCommentPrefix; + + /** + * @brief specifies whether to skip empty lines. + */ + bool mSkipEmptyLines; + }; + + /** + * @brief Class representing a CSV document. + */ + class Document + { + public: + /** + * @brief Constructor + * @param pPath specifies the path of an existing CSV-file to populate the Document + * data with. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + explicit Document(const std::string& pPath = std::string(), + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + : mPath(pPath) + , mLabelParams(pLabelParams) + , mSeparatorParams(pSeparatorParams) + , mConverterParams(pConverterParams) + , mLineReaderParams(pLineReaderParams) + , mData() + , mColumnNames() + , mRowNames() + { + if (!mPath.empty()) + { + ReadCsv(); + } + } + + /** + * @brief Constructor + * @param pStream specifies a binary input stream to read CSV data from. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + explicit Document(std::istream& pStream, + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + : mPath() + , mLabelParams(pLabelParams) + , mSeparatorParams(pSeparatorParams) + , mConverterParams(pConverterParams) + , mLineReaderParams(pLineReaderParams) + , mData() + , mColumnNames() + , mRowNames() + { + ReadCsv(pStream); + } + + /** + * @brief Read Document data from file. + * @param pPath specifies the path of an existing CSV-file to populate the Document + * data with. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + void Load(const std::string& pPath, + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + { + mPath = pPath; + mLabelParams = pLabelParams; + mSeparatorParams = pSeparatorParams; + mConverterParams = pConverterParams; + mLineReaderParams = pLineReaderParams; + ReadCsv(); + } + + /** + * @brief Read Document data from stream. + * @param pStream specifies a binary input stream to read CSV data from. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + void Load(std::istream& pStream, + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + { + mPath = ""; + mLabelParams = pLabelParams; + mSeparatorParams = pSeparatorParams; + mConverterParams = pConverterParams; + mLineReaderParams = pLineReaderParams; + ReadCsv(pStream); + } + + /** + * @brief Write Document data to file. + * @param pPath optionally specifies the path where the CSV-file will be created + * (if not specified, the original path provided when creating or + * loading the Document data will be used). + */ + void Save(const std::string& pPath = std::string()) + { + if (!pPath.empty()) + { + mPath = pPath; + } + WriteCsv(); + } + + /** + * @brief Write Document data to stream. + * @param pStream specifies a binary output stream to write the data to. + */ + void Save(std::ostream& pStream) const + { + WriteCsv(pStream); + } + + /** + * @brief Clears loaded Document data. + * + */ + void Clear() + { + mData.clear(); + mColumnNames.clear(); + mRowNames.clear(); +#ifdef HAS_CODECVT + mIsUtf16 = false; + mIsLE = false; +#endif + mHasUtf8BOM = false; + } + + /** + * @brief Get column index by name. + * @param pColumnName column label name. + * @returns zero-based column index. + */ + int GetColumnIdx(const std::string& pColumnName) const + { + if (mLabelParams.mColumnNameIdx >= 0) + { + if (mColumnNames.find(pColumnName) != mColumnNames.end()) + { + return static_cast(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1); + } + } + return -1; + } + + /** + * @brief Get column by index. + * @param pColumnIdx zero-based column index. + * @returns vector of column data. + */ + template + std::vector GetColumn(const size_t pColumnIdx) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + std::vector column; + Converter converter(mConverterParams); + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) + { + if (dataColumnIdx < itRow->size()) + { + T val; + converter.ToVal(itRow->at(dataColumnIdx), val); + column.push_back(val); + } + else + { + const std::string errStr = "requested column index " + + std::to_string(pColumnIdx) + " >= " + + std::to_string(itRow->size() - GetDataColumnIndex(0)) + + " (number of columns on row index " + + std::to_string(std::distance(mData.begin(), itRow) - + (mLabelParams.mColumnNameIdx + 1)) + ")"; + throw std::out_of_range(errStr); + } + } + } + return column; + } + + /** + * @brief Get column by index. + * @param pColumnIdx zero-based column index. + * @param pToVal conversion function. + * @returns vector of column data. + */ + template + std::vector GetColumn(const size_t pColumnIdx, ConvFunc pToVal) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + std::vector column; + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) + { + T val; + pToVal(itRow->at(dataColumnIdx), val); + column.push_back(val); + } + } + return column; + } + + /** + * @brief Get column by name. + * @param pColumnName column label name. + * @returns vector of column data. + */ + template + std::vector GetColumn(const std::string& pColumnName) const + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + return GetColumn(static_cast(columnIdx)); + } + + /** + * @brief Get column by name. + * @param pColumnName column label name. + * @param pToVal conversion function. + * @returns vector of column data. + */ + template + std::vector GetColumn(const std::string& pColumnName, ConvFunc pToVal) const + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + return GetColumn(static_cast(columnIdx), pToVal); + } + + /** + * @brief Set column by index. + * @param pColumnIdx zero-based column index. + * @param pColumn vector of column data. + */ + template + void SetColumn(const size_t pColumnIdx, const std::vector& pColumn) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + + while (GetDataRowIndex(pColumn.size()) > GetDataRowCount()) + { + std::vector row; + row.resize(GetDataColumnCount()); + mData.push_back(row); + } + + if ((dataColumnIdx + 1) > GetDataColumnCount()) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) + { + itRow->resize(GetDataColumnIndex(dataColumnIdx + 1)); + } + } + } + + Converter converter(mConverterParams); + for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) + { + std::string str; + converter.ToStr(*itRow, str); + mData.at(static_cast(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at( + dataColumnIdx) = str; + } + } + + /** + * @brief Set column by name. + * @param pColumnName column label name. + * @param pColumn vector of column data. + */ + template + void SetColumn(const std::string& pColumnName, const std::vector& pColumn) + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + SetColumn(static_cast(columnIdx), pColumn); + } + + /** + * @brief Remove column by index. + * @param pColumnIdx zero-based column index. + */ + void RemoveColumn(const size_t pColumnIdx) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) + { + itRow->erase(itRow->begin() + static_cast(dataColumnIdx)); + } + } + + UpdateColumnNames(); + } + + /** + * @brief Remove column by name. + * @param pColumnName column label name. + */ + void RemoveColumn(const std::string& pColumnName) + { + int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + RemoveColumn(static_cast(columnIdx)); + } + + /** + * @brief Insert column at specified index. + * @param pColumnIdx zero-based column index. + * @param pColumn vector of column data (optional argument). + * @param pColumnName column label name (optional argument). + */ + template + void InsertColumn(const size_t pColumnIdx, const std::vector& pColumn = std::vector(), + const std::string& pColumnName = std::string()) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + + std::vector column; + if (pColumn.empty()) + { + column.resize(GetDataRowCount()); + } + else + { + column.resize(GetDataRowIndex(pColumn.size())); + Converter converter(mConverterParams); + for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) + { + std::string str; + converter.ToStr(*itRow, str); + const size_t rowIdx = + static_cast(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1)); + column.at(rowIdx) = str; + } + } + + while (column.size() > GetDataRowCount()) + { + std::vector row; + const size_t columnCount = std::max(static_cast(mLabelParams.mColumnNameIdx + 1), + GetDataColumnCount()); + row.resize(columnCount); + mData.push_back(row); + } + + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) + { + const size_t rowIdx = static_cast(std::distance(mData.begin(), itRow)); + itRow->insert(itRow->begin() + static_cast(dataColumnIdx), column.at(rowIdx)); + } + } + + if (!pColumnName.empty()) + { + SetColumnName(pColumnIdx, pColumnName); + } + + UpdateColumnNames(); + } + + /** + * @brief Get number of data columns (excluding label columns). + * @returns column count. + */ + size_t GetColumnCount() const + { + const int count = static_cast((mData.size() > 0) ? mData.at(0).size() : 0) - + (mLabelParams.mRowNameIdx + 1); + return (count >= 0) ? static_cast(count) : 0; + } + + /** + * @brief Get row index by name. + * @param pRowName row label name. + * @returns zero-based row index. + */ + int GetRowIdx(const std::string& pRowName) const + { + if (mLabelParams.mRowNameIdx >= 0) + { + if (mRowNames.find(pRowName) != mRowNames.end()) + { + return static_cast(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1); + } + } + return -1; + } + + /** + * @brief Get row by index. + * @param pRowIdx zero-based row index. + * @returns vector of row data. + */ + template + std::vector GetRow(const size_t pRowIdx) const + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + std::vector row; + Converter converter(mConverterParams); + for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol) + { + if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) + { + T val; + converter.ToVal(*itCol, val); + row.push_back(val); + } + } + return row; + } + + /** + * @brief Get row by index. + * @param pRowIdx zero-based row index. + * @param pToVal conversion function. + * @returns vector of row data. + */ + template + std::vector GetRow(const size_t pRowIdx, ConvFunc pToVal) const + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + std::vector row; + Converter converter(mConverterParams); + for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol) + { + if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) + { + T val; + pToVal(*itCol, val); + row.push_back(val); + } + } + return row; + } + + /** + * @brief Get row by name. + * @param pRowName row label name. + * @returns vector of row data. + */ + template + std::vector GetRow(const std::string& pRowName) const + { + int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + return GetRow(static_cast(rowIdx)); + } + + /** + * @brief Get row by name. + * @param pRowName row label name. + * @param pToVal conversion function. + * @returns vector of row data. + */ + template + std::vector GetRow(const std::string& pRowName, ConvFunc pToVal) const + { + int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + return GetRow(static_cast(rowIdx), pToVal); + } + + /** + * @brief Set row by index. + * @param pRowIdx zero-based row index. + * @param pRow vector of row data. + */ + template + void SetRow(const size_t pRowIdx, const std::vector& pRow) + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + while ((dataRowIdx + 1) > GetDataRowCount()) + { + std::vector row; + row.resize(GetDataColumnCount()); + mData.push_back(row); + } + + if (pRow.size() > GetDataColumnCount()) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) + { + itRow->resize(GetDataColumnIndex(pRow.size())); + } + } + } + + Converter converter(mConverterParams); + for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) + { + std::string str; + converter.ToStr(*itCol, str); + mData.at(dataRowIdx).at(static_cast(std::distance(pRow.begin(), + itCol) + mLabelParams.mRowNameIdx + 1)) = str; + } + } + + /** + * @brief Set row by name. + * @param pRowName row label name. + * @param pRow vector of row data. + */ + template + void SetRow(const std::string& pRowName, const std::vector& pRow) + { + int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + return SetRow(static_cast(rowIdx), pRow); + } + + /** + * @brief Remove row by index. + * @param pRowIdx zero-based row index. + */ + void RemoveRow(const size_t pRowIdx) + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + mData.erase(mData.begin() + static_cast(dataRowIdx)); + UpdateRowNames(); + } + + /** + * @brief Remove row by name. + * @param pRowName row label name. + */ + void RemoveRow(const std::string& pRowName) + { + int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + RemoveRow(static_cast(rowIdx)); + } + + /** + * @brief Insert row at specified index. + * @param pRowIdx zero-based row index. + * @param pRow vector of row data (optional argument). + * @param pRowName row label name (optional argument). + */ + template + void InsertRow(const size_t pRowIdx, const std::vector& pRow = std::vector(), + const std::string& pRowName = std::string()) + { + const size_t rowIdx = GetDataRowIndex(pRowIdx); + + std::vector row; + if (pRow.empty()) + { + row.resize(GetDataColumnCount()); + } + else + { + row.resize(GetDataColumnIndex(pRow.size())); + Converter converter(mConverterParams); + for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) + { + std::string str; + converter.ToStr(*itCol, str); + row.at(static_cast(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str; + } + } + + while (rowIdx > GetDataRowCount()) + { + std::vector tempRow; + tempRow.resize(GetDataColumnCount()); + mData.push_back(tempRow); + } + + mData.insert(mData.begin() + static_cast(rowIdx), row); + + if (!pRowName.empty()) + { + SetRowName(pRowIdx, pRowName); + } + + UpdateRowNames(); + } + + /** + * @brief Get number of data rows (excluding label rows). + * @returns row count. + */ + size_t GetRowCount() const + { + const int count = static_cast(mData.size()) - (mLabelParams.mColumnNameIdx + 1); + return (count >= 0) ? static_cast(count) : 0; + } + + /** + * @brief Get cell by index. + * @param pColumnIdx zero-based column index. + * @param pRowIdx zero-based row index. + * @returns cell data. + */ + template + T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + T val; + Converter converter(mConverterParams); + converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); + return val; + } + + /** + * @brief Get cell by index. + * @param pColumnIdx zero-based column index. + * @param pRowIdx zero-based row index. + * @param pToVal conversion function. + * @returns cell data. + */ + template + T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc pToVal) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + T val; + pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); + return val; + } + + /** + * @brief Get cell by name. + * @param pColumnName column label name. + * @param pRowName row label name. + * @returns cell data. + */ + template + T GetCell(const std::string& pColumnName, const std::string& pRowName) const + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + const int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell(static_cast(columnIdx), static_cast(rowIdx)); + } + + /** + * @brief Get cell by name. + * @param pColumnName column label name. + * @param pRowName row label name. + * @param pToVal conversion function. + * @returns cell data. + */ + template + T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc pToVal) const + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + const int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell(static_cast(columnIdx), static_cast(rowIdx), pToVal); + } + + /** + * @brief Get cell by column name and row index. + * @param pColumnName column label name. + * @param pRowIdx zero-based row index. + * @returns cell data. + */ + template + T GetCell(const std::string& pColumnName, const size_t pRowIdx) const + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + return GetCell(static_cast(columnIdx), pRowIdx); + } + + /** + * @brief Get cell by column name and row index. + * @param pColumnName column label name. + * @param pRowIdx zero-based row index. + * @param pToVal conversion function. + * @returns cell data. + */ + template + T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc pToVal) const + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + return GetCell(static_cast(columnIdx), pRowIdx, pToVal); + } + + /** + * @brief Get cell by column index and row name. + * @param pColumnIdx zero-based column index. + * @param pRowName row label name. + * @returns cell data. + */ + template + T GetCell(const size_t pColumnIdx, const std::string& pRowName) const + { + const int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell(pColumnIdx, static_cast(rowIdx)); + } + + /** + * @brief Get cell by column index and row name. + * @param pColumnIdx zero-based column index. + * @param pRowName row label name. + * @param pToVal conversion function. + * @returns cell data. + */ + template + T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc pToVal) const + { + const int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell(pColumnIdx, static_cast(rowIdx), pToVal); + } + + /** + * @brief Set cell by index. + * @param pRowIdx zero-based row index. + * @param pColumnIdx zero-based column index. + * @param pCell cell data. + */ + template + void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + while ((dataRowIdx + 1) > GetDataRowCount()) + { + std::vector row; + row.resize(GetDataColumnCount()); + mData.push_back(row); + } + + if ((dataColumnIdx + 1) > GetDataColumnCount()) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) + { + itRow->resize(dataColumnIdx + 1); + } + } + } + + std::string str; + Converter converter(mConverterParams); + converter.ToStr(pCell, str); + mData.at(dataRowIdx).at(dataColumnIdx) = str; + } + + /** + * @brief Set cell by name. + * @param pColumnName column label name. + * @param pRowName row label name. + * @param pCell cell data. + */ + template + void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell) + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + const int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + SetCell(static_cast(columnIdx), static_cast(rowIdx), pCell); + } + + /** + * @brief Set cell by column index and row name. + * @param pColumnIdx zero-based column index. + * @param pRowName row label name. + * @param pCell cell data. + */ + template + void SetCell(const size_t pColumnIdx, const std::string& pRowName, const T& pCell) + { + const int rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + SetCell(pColumnIdx, static_cast(rowIdx), pCell); + } + + /** + * @brief Set cell by column name and row index. + * @param pColumnName column label name. + * @param pRowIdx zero-based row index. + * @param pCell cell data. + */ + template + void SetCell(const std::string& pColumnName, const size_t pRowIdx, const T& pCell) + { + const int columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + SetCell(static_cast(columnIdx), pRowIdx, pCell); + } + + /** + * @brief Get column name + * @param pColumnIdx zero-based column index. + * @returns column name. + */ + std::string GetColumnName(const size_t pColumnIdx) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + if (mLabelParams.mColumnNameIdx < 0) + { + throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx)); + } + + return mData.at(static_cast(mLabelParams.mColumnNameIdx)).at(dataColumnIdx); + } + + /** + * @brief Set column name + * @param pColumnIdx zero-based column index. + * @param pColumnName column name. + */ + void SetColumnName(size_t pColumnIdx, const std::string& pColumnName) + { + if (mLabelParams.mColumnNameIdx < 0) + { + throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx)); + } + + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + mColumnNames[pColumnName] = dataColumnIdx; + + // increase table size if necessary: + const size_t rowIdx = static_cast(mLabelParams.mColumnNameIdx); + if (rowIdx >= mData.size()) + { + mData.resize(rowIdx + 1); + } + auto& row = mData[rowIdx]; + if (dataColumnIdx >= row.size()) + { + row.resize(dataColumnIdx + 1); + } + + mData.at(static_cast(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName; + } + + /** + * @brief Get column names + * @returns vector of column names. + */ + std::vector GetColumnNames() const + { + if (mLabelParams.mColumnNameIdx >= 0) + { + return std::vector(mData.at(static_cast(mLabelParams.mColumnNameIdx)).begin() + + (mLabelParams.mRowNameIdx + 1), + mData.at(static_cast(mLabelParams.mColumnNameIdx)).end()); + } + + return std::vector(); + } + + /** + * @brief Get row name + * @param pRowIdx zero-based column index. + * @returns row name. + */ + std::string GetRowName(const size_t pRowIdx) const + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + if (mLabelParams.mRowNameIdx < 0) + { + throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx)); + } + + return mData.at(dataRowIdx).at(static_cast(mLabelParams.mRowNameIdx)); + } + + /** + * @brief Set row name + * @param pRowIdx zero-based row index. + * @param pRowName row name. + */ + void SetRowName(size_t pRowIdx, const std::string& pRowName) + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + mRowNames[pRowName] = dataRowIdx; + if (mLabelParams.mRowNameIdx < 0) + { + throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx)); + } + + // increase table size if necessary: + if (dataRowIdx >= mData.size()) + { + mData.resize(dataRowIdx + 1); + } + auto& row = mData[dataRowIdx]; + if (mLabelParams.mRowNameIdx >= static_cast(row.size())) + { + row.resize(static_cast(mLabelParams.mRowNameIdx) + 1); + } + + mData.at(dataRowIdx).at(static_cast(mLabelParams.mRowNameIdx)) = pRowName; + } + + /** + * @brief Get row names + * @returns vector of row names. + */ + std::vector GetRowNames() const + { + std::vector rownames; + if (mLabelParams.mRowNameIdx >= 0) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) + { + rownames.push_back(itRow->at(static_cast(mLabelParams.mRowNameIdx))); + } + } + } + return rownames; + } + + private: + void ReadCsv() + { + std::ifstream stream; + stream.exceptions(std::ifstream::failbit | std::ifstream::badbit); + stream.open(mPath, std::ios::binary); + ReadCsv(stream); + } + + void ReadCsv(std::istream& pStream) + { + Clear(); + pStream.seekg(0, std::ios::end); + std::streamsize length = pStream.tellg(); + pStream.seekg(0, std::ios::beg); + +#ifdef HAS_CODECVT + std::vector bom2b(2, '\0'); + if (length >= 2) + { + pStream.read(bom2b.data(), 2); + pStream.seekg(0, std::ios::beg); + } + + static const std::vector bomU16le = { '\xff', '\xfe' }; + static const std::vector bomU16be = { '\xfe', '\xff' }; + if ((bom2b == bomU16le) || (bom2b == bomU16be)) + { + mIsUtf16 = true; + mIsLE = (bom2b == bomU16le); + + std::wifstream wstream; + wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit); + wstream.open(mPath, std::ios::binary); + if (mIsLE) + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16(std::consume_header | + std::little_endian)>)); + } + else + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16)); + } + std::wstringstream wss; + wss << wstream.rdbuf(); + std::string utf8 = ToString(wss.str()); + std::stringstream ss(utf8); + ParseCsv(ss, static_cast(utf8.size())); + } + else +#endif + { + // check for UTF-8 Byte order mark and skip it when found + if (length >= 3) + { + std::vector bom3b(3, '\0'); + pStream.read(bom3b.data(), 3); + + if (bom3b != s_Utf8BOM) + { + // file does not start with a UTF-8 Byte order mark + pStream.seekg(0, std::ios::beg); + } + else + { + // file did start with a UTF-8 Byte order mark, simply skip it + length -= 3; + mHasUtf8BOM = true; + } + } + + ParseCsv(pStream, length); + } + } + + void ParseCsv(std::istream& pStream, std::streamsize p_FileLength) + { + const std::streamsize bufLength = 64 * 1024; + std::vector buffer(bufLength); + std::vector row; + std::string cell; + bool quoted = false; + int cr = 0; + int lf = 0; + + while (p_FileLength > 0) + { + const std::streamsize toReadLength = std::min(p_FileLength, bufLength); + pStream.read(buffer.data(), toReadLength); + + // With user-specified istream opened in non-binary mode on windows, we may have a + // data length mismatch, so ensure we don't parse outside actual data length read. + const std::streamsize readLength = pStream.gcount(); + if (readLength <= 0) + { + break; + } + + for (size_t i = 0; i < static_cast(readLength); ++i) + { + if (buffer[i] == mSeparatorParams.mQuoteChar) + { + if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar)) + { + quoted = !quoted; + } + cell += buffer[i]; + } + else if (buffer[i] == mSeparatorParams.mSeparator) + { + if (!quoted) + { + row.push_back(Unquote(Trim(cell))); + cell.clear(); + } + else + { + cell += buffer[i]; + } + } + else if (buffer[i] == '\r') + { + if (mSeparatorParams.mQuotedLinebreaks && quoted) + { + cell += buffer[i]; + } + else + { + ++cr; + } + } + else if (buffer[i] == '\n') + { + if (mSeparatorParams.mQuotedLinebreaks && quoted) + { + cell += buffer[i]; + } + else + { + ++lf; + if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty()) + { + // skip empty line + } + else + { + row.push_back(Unquote(Trim(cell))); + + if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() && + (row.at(0)[0] == mLineReaderParams.mCommentPrefix)) + { + // skip comment line + } + else + { + mData.push_back(row); + } + + cell.clear(); + row.clear(); + quoted = false; + } + } + } + else + { + cell += buffer[i]; + } + } + p_FileLength -= readLength; + } + + // Handle last line without linebreak + if (!cell.empty() || !row.empty()) + { + row.push_back(Unquote(Trim(cell))); + cell.clear(); + mData.push_back(row); + row.clear(); + } + + // Assume CR/LF if at least half the linebreaks have CR + mSeparatorParams.mHasCR = (cr > (lf / 2)); + + // Set up column labels + UpdateColumnNames(); + + // Set up row labels + UpdateRowNames(); + } + + void WriteCsv() const + { +#ifdef HAS_CODECVT + if (mIsUtf16) + { + std::stringstream ss; + WriteCsv(ss); + std::string utf8 = ss.str(); + std::wstring wstr = ToWString(utf8); + + std::wofstream wstream; + wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit); + wstream.open(mPath, std::ios::binary | std::ios::trunc); + + if (mIsLE) + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16(std::little_endian)>)); + } + else + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16)); + } + + wstream << static_cast(0xfeff); + wstream << wstr; + } + else +#endif + { + std::ofstream stream; + stream.exceptions(std::ofstream::failbit | std::ofstream::badbit); + stream.open(mPath, std::ios::binary | std::ios::trunc); + if (mHasUtf8BOM) + { + stream.write(s_Utf8BOM.data(), 3); + } + + WriteCsv(stream); + } + } + + void WriteCsv(std::ostream& pStream) const + { + for (auto itr = mData.begin(); itr != mData.end(); ++itr) + { + for (auto itc = itr->begin(); itc != itr->end(); ++itc) + { + if (mSeparatorParams.mAutoQuote && + ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) || + (itc->find(' ') != std::string::npos))) + { + // escape quotes in string + std::string str = *itc; + const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar); + ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr); + + pStream << quoteCharStr << str << quoteCharStr; + } + else + { + pStream << *itc; + } + + if (std::distance(itc, itr->end()) > 1) + { + pStream << mSeparatorParams.mSeparator; + } + } + pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n"); + } + } + + size_t GetDataRowCount() const + { + return mData.size(); + } + + size_t GetDataColumnCount() const + { + const size_t firstDataRow = static_cast((mLabelParams.mColumnNameIdx >= 0) ? mLabelParams.mColumnNameIdx : 0); + return (mData.size() > firstDataRow) ? mData.at(firstDataRow).size() : 0; + } + + inline size_t GetDataRowIndex(const size_t pRowIdx) const + { + const size_t firstDataRow = static_cast((mLabelParams.mColumnNameIdx + 1 >= 0) ? mLabelParams.mColumnNameIdx + 1 : 0); + return pRowIdx + firstDataRow; + } + + inline size_t GetDataColumnIndex(const size_t pColumnIdx) const + { + const size_t firstDataColumn = static_cast((mLabelParams.mRowNameIdx + 1 >= 0) ? mLabelParams.mRowNameIdx + 1 : 0); + return pColumnIdx + firstDataColumn; + } + + std::string Trim(const std::string& pStr) const + { + if (mSeparatorParams.mTrim) + { + std::string str = pStr; + + // ltrim + str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); })); + + // rtrim + str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end()); + + return str; + } + else + { + return pStr; + } + } + + std::string Unquote(const std::string& pStr) const + { + if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) && + (pStr.front() == mSeparatorParams.mQuoteChar) && + (pStr.back() == mSeparatorParams.mQuoteChar)) + { + // remove start/end quotes + std::string str = pStr.substr(1, pStr.size() - 2); + + // unescape quotes in string + const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar); + ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr); + + return str; + } + else + { + return pStr; + } + } + + void UpdateColumnNames() + { + mColumnNames.clear(); + if ((mLabelParams.mColumnNameIdx >= 0) && + (static_cast(mData.size()) > mLabelParams.mColumnNameIdx)) + { + size_t i = 0; + for (auto& columnName : mData[static_cast(mLabelParams.mColumnNameIdx)]) + { + mColumnNames[columnName] = i++; + } + } + } + + void UpdateRowNames() + { + mRowNames.clear(); + if ((mLabelParams.mRowNameIdx >= 0) && + (static_cast(mData.size()) > + (mLabelParams.mColumnNameIdx + 1))) + { + size_t i = 0; + for (auto& dataRow : mData) + { + if (static_cast(dataRow.size()) > mLabelParams.mRowNameIdx) + { + mRowNames[dataRow[static_cast(mLabelParams.mRowNameIdx)]] = i++; + } + } + } + } + +#ifdef HAS_CODECVT +#if defined(_MSC_VER) +#pragma warning (push) +#pragma warning (disable: 4996) +#endif + static std::string ToString(const std::wstring& pWStr) + { + return std::wstring_convert, wchar_t>{ }.to_bytes(pWStr); + } + + static std::wstring ToWString(const std::string& pStr) + { + return std::wstring_convert, wchar_t>{ }.from_bytes(pStr); + } +#if defined(_MSC_VER) +#pragma warning (pop) +#endif +#endif + + static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace) + { + size_t pos = 0; + + while ((pos = pStr.find(pSearch, pos)) != std::string::npos) + { + pStr.replace(pos, pSearch.size(), pReplace); + pos += pReplace.size(); + } + } + + private: + std::string mPath; + LabelParams mLabelParams; + SeparatorParams mSeparatorParams; + ConverterParams mConverterParams; + LineReaderParams mLineReaderParams; + std::vector> mData; + std::map mColumnNames; + std::map mRowNames; +#ifdef HAS_CODECVT + bool mIsUtf16 = false; + bool mIsLE = false; +#endif + bool mHasUtf8BOM = false; + }; +}