Skip to content

Commit

Permalink
Fix bug mentionned in #1445 (comment)
Browse files Browse the repository at this point in the history
  • Loading branch information
sjanel committed Nov 21, 2024
1 parent 4d4f63c commit 4c5724e
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 88 deletions.
152 changes: 72 additions & 80 deletions include/glaze/csv/read.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ namespace glz
}
}
};

// CSV spec: https://www.ietf.org/rfc/rfc4180.txt
// Quotes are escaped via double quotes

Expand Down Expand Up @@ -151,43 +151,34 @@ namespace glz
// Escaped quote
value.push_back('"');
++it;
} else {
}
else {
// Closing quote
break;
}
} else {
}
else {
value.push_back(*it);
++it;
}
}
// After closing quote, expect comma, newline, or end of input
if (it != end) {
if (*it == ',') {
++it; // Skip the comma
} else if (*it == '\n') {
// End of record, do nothing
} else if (it == end) {
// End of input
} else {
// Invalid character after closing quote
ctx.error = error_code::syntax_error;
return;
}
if (it != end && *it != ',' && *it == '\n') {
// Invalid character after closing quote
ctx.error = error_code::syntax_error;
return;
}
} else {
}
else {
// Unquoted field
while (it != end && *it != ',' && *it != '\n') {
value.push_back(*it);
++it;
}
if (it != end && *it == ',') {
++it; // Skip the comma
}
}
}
};


template <bool_t T>
struct from<CSV, T>
{
Expand Down Expand Up @@ -220,8 +211,7 @@ namespace glz
template <char delim>
inline void goto_delim(auto&& it, auto&& end) noexcept
{
while (++it != end && *it != delim)
;
while (++it != end && *it != delim);
}

inline auto read_column_wise_keys(auto&& ctx, auto&& it, auto&& end)
Expand Down Expand Up @@ -469,87 +459,89 @@ namespace glz
decode_hash_with_size<CSV, T, HashInfo, HashInfo.type>::op(key.data(), end, key.size());

if (index < N) [[likely]] {
visit<N>([&]<size_t I>() {
decltype(auto) member = [&]() -> decltype(auto) {
if constexpr (reflectable<T>) {
return get_member(value, get<I>(to_tuple(value)));
}
else {
return get_member(value, get<I>(reflect<T>::values));
}
}();

using M = std::decay_t<decltype(member)>;
if constexpr (fixed_array_value_t<M> && emplace_backable<M>) {
size_t col = 0;
while (it != end) {
if (col < member.size()) [[likely]] {
read<CSV>::op<Opts>(member[col][csv_index], ctx, it, end);
visit<N>(
[&]<size_t I>() {
decltype(auto) member = [&]() -> decltype(auto) {
if constexpr (reflectable<T>) {
return get_member(value, get<I>(to_tuple(value)));
}
else [[unlikely]] {
read<CSV>::op<Opts>(member.emplace_back()[csv_index], ctx, it, end);
else {
return get_member(value, get<I>(reflect<T>::values));
}
}();

using M = std::decay_t<decltype(member)>;
if constexpr (fixed_array_value_t<M> && emplace_backable<M>) {
size_t col = 0;
while (it != end) {
if (col < member.size()) [[likely]] {
read<CSV>::op<Opts>(member[col][csv_index], ctx, it, end);
}
else [[unlikely]] {
read<CSV>::op<Opts>(member.emplace_back()[csv_index], ctx, it, end);
}

if (*it == '\r') {
++it;
if (*it == '\n') {
if (*it == '\r') {
++it;
if (*it == '\n') {
++it;
break;
}
else [[unlikely]] {
ctx.error = error_code::syntax_error;
return;
}
}
else if (*it == '\n') {
++it;
break;
}
else if (it == end) {
return;
}

if (*it == ',') [[likely]] {
++it;
}
else [[unlikely]] {
ctx.error = error_code::syntax_error;
return;
}
}
else if (*it == '\n') {
++it;
break;
}
else if (it == end) {
return;
}

if (*it == ',') [[likely]] {
++it;
++col;
}
else [[unlikely]] {
ctx.error = error_code::syntax_error;
return;
}

++col;
}
}
else {
while (it != end) {
read<CSV>::op<Opts>(member, ctx, it, end);
else {
while (it != end) {
read<CSV>::op<Opts>(member, ctx, it, end);

if (*it == '\r') {
++it;
if (*it == '\n') {
if (*it == '\r') {
++it;
if (*it == '\n') {
++it;
break;
}
else [[unlikely]] {
ctx.error = error_code::syntax_error;
return;
}
}
else if (*it == '\n') {
++it;
break;
}

if (*it == ',') [[likely]] {
++it;
}
else [[unlikely]] {
ctx.error = error_code::syntax_error;
return;
}
}
else if (*it == '\n') {
++it;
break;
}

if (*it == ',') [[likely]] {
++it;
}
else [[unlikely]] {
ctx.error = error_code::syntax_error;
return;
}
}
}
}, index);
},
index);

if (bool(ctx.error)) [[unlikely]] {
return;
Expand Down
53 changes: 45 additions & 8 deletions tests/csv_test/csv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,21 +589,58 @@ suite fish_record = [] {
};
};

struct CurrencyCSV {
std::vector<std::string> Entity;
std::vector<std::string> Currency;
std::vector<std::string> AlphabeticCode;
std::vector<std::string> NumericCode;
std::vector<std::string> MinorUnit;
std::vector<std::string> WithdrawalDate;
struct CurrencyCSV
{
std::vector<std::string> Entity;
std::vector<std::string> Currency;
std::vector<std::string> AlphabeticCode;
std::vector<std::string> NumericCode;
std::vector<std::string> MinorUnit;
std::vector<std::string> WithdrawalDate;
};

suite currency_csv_test = [] {
"currency"_test = [] {
"currency_row"_test = [] {
CurrencyCSV obj{};
std::string buffer{};
// auto ec = glz::read_file_csv(obj, GLZ_TEST_DIRECTORY "/currency.csv", buffer);
// expect(not ec) << glz::format_error(ec, buffer) << '\n';
};
"currency_col"_test = [] {
CurrencyCSV obj{};
std::string buffer{};
auto ec = glz::read_file_csv<glz::colwise>(obj, GLZ_TEST_DIRECTORY "/currency.csv", buffer);
expect(not ec) << glz::format_error(ec, buffer) << '\n';

constexpr auto kExpectedSize = 445;

expect(obj.Entity.size() == kExpectedSize);
expect(obj.Currency.size() == kExpectedSize);
expect(obj.AlphabeticCode.size() == kExpectedSize);
expect(obj.NumericCode.size() == kExpectedSize);
expect(obj.MinorUnit.size() == kExpectedSize);
expect(obj.WithdrawalDate.size() == kExpectedSize);

expect(obj.Entity[0] == "AFGHANISTAN");
expect(obj.Currency[0] == "Afghani");
expect(obj.AlphabeticCode[0] == "AFN");
expect(obj.NumericCode[0] == "971");
expect(obj.MinorUnit[0] == "2");
expect(obj.WithdrawalDate[0] == "");

expect(obj.Entity[29] == "BONAIRE, SINT EUSTATIUS AND SABA");
expect(obj.Currency[29] == "US Dollar");
expect(obj.AlphabeticCode[29] == "USD");
expect(obj.NumericCode[29] == "840");
expect(obj.MinorUnit[29] == "2");
expect(obj.WithdrawalDate[29] == "");

expect(obj.Entity[324] == "EUROPEAN MONETARY CO-OPERATION FUND (EMCF)");
expect(obj.Currency[324] == "European Currency Unit (E.C.U)");
expect(obj.AlphabeticCode[324] == "XEU");
expect(obj.NumericCode[324] == "954");
expect(obj.MinorUnit[324] == "");
expect(obj.WithdrawalDate[324] == "1999-01");
};
};

Expand Down

0 comments on commit 4c5724e

Please sign in to comment.