Skip to content

Commit

Permalink
Spreadsheet: Make the XSV parser start with a preview parse
Browse files Browse the repository at this point in the history
Instead of parsing the whole document. That's really wasteful and
super slow.
  • Loading branch information
alimpfard committed Jun 16, 2021
1 parent 88b168f commit b11b3c2
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
3 changes: 2 additions & 1 deletion Userland/Applications/Spreadsheet/ImportDialog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
if (should_trim_trailing)
behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces;

return Reader::XSV(m_csv, traits, behaviours);
return Reader::XSV(m_csv, move(traits), behaviours);
};

void CSVImportDialogPage::update_preview()
Expand Down Expand Up @@ -195,6 +195,7 @@ Result<NonnullRefPtrVector<Sheet>, String> ImportDialog::make_and_run_for(String
NonnullRefPtrVector<Sheet> sheets;

if (reader.has_value()) {
reader->parse();
if (reader.value().has_error())
return String::formatted("CSV Import failed: {}", reader.value().error_string());

Expand Down
6 changes: 6 additions & 0 deletions Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ TEST_CASE(should_parse_valid_data)
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error());

EXPECT_EQ(csv[0]["Foo"], "1");
Expand All @@ -31,6 +32,7 @@ TEST_CASE(should_parse_valid_data)
4, "5 " , 6
"""x", y"z, 9 )~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error());

EXPECT_EQ(csv[0]["Foo"], "1");
Expand All @@ -46,6 +48,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz
x, y)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
}
Expand All @@ -54,6 +57,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz
x, y, "z)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
}
Expand All @@ -66,6 +70,7 @@ TEST_CASE(should_iterate_rows)
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error());

bool ran = false;
Expand All @@ -82,6 +87,7 @@ BENCHMARK_CASE(fairly_big_data)

auto data = file_or_error.value()->read_all();
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
csv.parse();

EXPECT(!csv.has_error());
EXPECT_EQ(csv.size(), 100000u);
Expand Down
18 changes: 16 additions & 2 deletions Userland/Applications/Spreadsheet/Readers/XSV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ namespace Reader {

ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
{
return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
return static_cast<ParserBehaviour>(to_underlying(left) & to_underlying(right));
}

ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
{
return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
return static_cast<ParserBehaviour>(to_underlying(left) | to_underlying(right));
}

void XSV::set_error(ReadError error)
Expand All @@ -43,8 +43,22 @@ Vector<String> XSV::headers() const
return headers;
}

void XSV::parse_preview()
{
reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();

while (!has_error() && !m_lexer.is_eof()) {
if (m_rows.size() >= 10)
break;
m_rows.append(read_row());
}
}

void XSV::parse()
{
reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();

Expand Down
16 changes: 12 additions & 4 deletions Userland/Applications/Spreadsheet/Readers/XSV.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,18 @@ constexpr ParserBehaviour default_behaviours()

class XSV {
public:
XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours())
XSV(StringView source, ParserTraits traits, ParserBehaviour behaviours = default_behaviours())
: m_source(source)
, m_lexer(m_source)
, m_traits(traits)
, m_behaviours(behaviours)
{
parse();
parse_preview();
}

virtual ~XSV() { }

void parse();
bool has_error() const { return m_error != ReadError::None; }
ReadError error() const { return m_error; }
String error_string() const
Expand Down Expand Up @@ -180,16 +181,23 @@ class XSV {
}
};
void set_error(ReadError error);
void parse();
void parse_preview();
void read_headers();
void reset()
{
m_lexer = GenericLexer { m_source };
m_rows.clear();
m_names.clear();
m_error = ReadError::None;
}
Vector<Field> read_row(bool header_row = false);
Field read_one_field();
Field read_one_quoted_field();
Field read_one_unquoted_field();

StringView m_source;
GenericLexer m_lexer;
const ParserTraits& m_traits;
ParserTraits m_traits;
ParserBehaviour m_behaviours;
Vector<Field> m_names;
Vector<Vector<Field>> m_rows;
Expand Down

0 comments on commit b11b3c2

Please sign in to comment.