Skip to content

Commit

Permalink
remove ExprIndStr
Browse files Browse the repository at this point in the history
it can be replaced with StringToken if we add another bit if information to
StringToken, namely whether this string should take part in indentation scanning
or not. since all escaping terminates indentation scanning we need to set this
bit only for the non-escaped IND_STRING rule.

this improves performance by about 1%.

 before

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.880 s ±  0.048 s    [User: 6.809 s, System: 1.643 s]
    Range (min … max):    8.781 s …  8.993 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     375.0 ms ±   2.2 ms    [User: 339.8 ms, System: 35.2 ms]
    Range (min … max):   371.5 ms … 379.3 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.831 s ±  0.040 s    [User: 2.536 s, System: 0.225 s]
    Range (min … max):    2.769 s …  2.912 s    20 runs

 after

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.832 s ±  0.048 s    [User: 6.757 s, System: 1.657 s]
    Range (min … max):    8.743 s …  8.921 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     367.4 ms ±   3.2 ms    [User: 332.7 ms, System: 34.7 ms]
    Range (min … max):   364.6 ms … 374.6 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.810 s ±  0.030 s    [User: 2.517 s, System: 0.225 s]
    Range (min … max):    2.742 s …  2.854 s    20 runs
  • Loading branch information
pennae committed Jan 19, 2022
1 parent bc44351 commit 0a77466
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 54 deletions.
18 changes: 9 additions & 9 deletions src/libexpr/lexer.l
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)

// we make use of the fact that the parser receives a private copy of the input
// string and can munge around in it.
static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
{
char * result = s;
char * t = s;
Expand All @@ -89,7 +89,7 @@ static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
else *t = c;
t++;
}
return new ExprString(symbols.create({result, size_t(t - result)}));
return {result, size_t(t - result)};
}


Expand Down Expand Up @@ -176,7 +176,7 @@ or { return OR_KW; }
/* It is impossible to match strings ending with '$' with one
regex because trailing contexts are only valid at the end
of a rule. (A sane but undocumented limitation.) */
yylval->e = unescapeStr(data->symbols, yytext, yyleng);
yylval->str = unescapeStr(data->symbols, yytext, yyleng);
return STR;
}
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
Expand All @@ -191,26 +191,26 @@ or { return OR_KW; }

\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
yylval->e = new ExprIndStr(yytext);
yylval->str = {yytext, (size_t) yyleng, true};
return IND_STR;
}
<IND_STRING>\'\'\$ |
<IND_STRING>\$ {
yylval->e = new ExprIndStr("$");
yylval->str = {"$", 1};
return IND_STR;
}
<IND_STRING>\'\'\' {
yylval->e = new ExprIndStr("''");
yylval->str = {"''", 2};
return IND_STR;
}
<IND_STRING>\'\'\\{ANY} {
yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
yylval->str = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
return IND_STR;
}
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
<IND_STRING>\' {
yylval->e = new ExprIndStr("'");
yylval->str = {"'", 1};
return IND_STR;
}

Expand Down Expand Up @@ -264,7 +264,7 @@ or { return OR_KW; }
PUSH_STATE(INPATH_SLASH);
else
PUSH_STATE(INPATH);
yylval->e = new ExprString(data->symbols.create(string(yytext)));
yylval->str = {yytext, (size_t) yyleng};
return STR;
}
<INPATH>{ANY} |
Expand Down
7 changes: 0 additions & 7 deletions src/libexpr/nixexpr.hh
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,6 @@ struct ExprString : Expr
Value * maybeThunk(EvalState & state, Env & env);
};

/* Temporary class used during parsing of indented strings. */
struct ExprIndStr : Expr
{
string s;
ExprIndStr(const string & s) : s(s) { };
};

struct ExprPath : Expr
{
string s;
Expand Down
86 changes: 48 additions & 38 deletions src/libexpr/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef BISON_HEADER
#define BISON_HEADER

#include <variant>

#include "util.hh"

#include "nixexpr.hh"
Expand All @@ -41,6 +43,15 @@ namespace nix {

}

// using C a struct allows us to avoid having to define the special
// members that using string_view here would implicitly delete.
struct StringToken {
const char * p;
size_t l;
bool hasIndentation;
operator std::string_view() const { return {p, l}; }
};

#define YY_DECL int yylex \
(YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data)

Expand Down Expand Up @@ -152,7 +163,8 @@ static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
}


static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<std::pair<Pos, Expr *> > & es)
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
vector<std::pair<Pos, std::variant<Expr *, StringToken> > > & es)
{
if (es.empty()) return new ExprString(symbols.create(""));

Expand All @@ -163,28 +175,28 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
size_t minIndent = 1000000;
size_t curIndent = 0;
for (auto & [i_pos, i] : es) {
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i);
if (!e) {
/* Anti-quotations end the current start-of-line whitespace. */
auto * str = std::get_if<StringToken>(&i);
if (!str || !str->hasIndentation) {
/* Anti-quotations and escaped characters end the current start-of-line whitespace. */
if (atStartOfLine) {
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
continue;
}
for (size_t j = 0; j < e->s.size(); ++j) {
for (size_t j = 0; j < str->l; ++j) {
if (atStartOfLine) {
if (e->s[j] == ' ')
if (str->p[j] == ' ')
curIndent++;
else if (e->s[j] == '\n') {
else if (str->p[j] == '\n') {
/* Empty line, doesn't influence minimum
indentation. */
curIndent = 0;
} else {
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
} else if (e->s[j] == '\n') {
} else if (str->p[j] == '\n') {
atStartOfLine = true;
curIndent = 0;
}
Expand All @@ -196,33 +208,31 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
atStartOfLine = true;
size_t curDropped = 0;
size_t n = es.size();
for (vector<std::pair<Pos, Expr *> >::iterator i = es.begin(); i != es.end(); ++i, --n) {
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i->second);
if (!e) {
atStartOfLine = false;
curDropped = 0;
es2->push_back(*i);
continue;
}

auto i = es.begin();
const auto trimExpr = [&] (Expr * e) {
atStartOfLine = false;
curDropped = 0;
es2->emplace_back(i->first, e);
};
const auto trimString = [&] (const StringToken & t) {
string s2;
for (size_t j = 0; j < e->s.size(); ++j) {
for (size_t j = 0; j < t.l; ++j) {
if (atStartOfLine) {
if (e->s[j] == ' ') {
if (t.p[j] == ' ') {
if (curDropped++ >= minIndent)
s2 += e->s[j];
s2 += t.p[j];
}
else if (e->s[j] == '\n') {
else if (t.p[j] == '\n') {
curDropped = 0;
s2 += e->s[j];
s2 += t.p[j];
} else {
atStartOfLine = false;
curDropped = 0;
s2 += e->s[j];
s2 += t.p[j];
}
} else {
s2 += e->s[j];
if (e->s[j] == '\n') atStartOfLine = true;
s2 += t.p[j];
if (t.p[j] == '\n') atStartOfLine = true;
}
}

Expand All @@ -235,6 +245,9 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
}

es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
};
for (; i != es.end(); ++i, --n) {
std::visit(overloaded { trimExpr, trimString }, i->second);
}

/* If this is a single string, then don't do a concatenation. */
Expand Down Expand Up @@ -273,18 +286,13 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
nix::Formal * formal;
nix::NixInt n;
nix::NixFloat nf;
// using C a struct allows us to avoid having to define the special
// members that using string_view here would implicitly delete.
struct StringToken {
const char * p;
size_t l;
operator std::string_view() const { return {p, l}; }
};
StringToken id; // !!! -> Symbol
StringToken path;
StringToken uri;
StringToken str;
std::vector<nix::AttrName> * attrNames;
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
std::vector<std::pair<nix::Pos, std::variant<nix::Expr *, StringToken> > > * ind_string_parts;
}

%type <e> start expr expr_function expr_if expr_op
Expand All @@ -294,11 +302,12 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
%type <formals> formals
%type <formal> formal
%type <attrNames> attrs attrpath
%type <string_parts> string_parts_interpolated ind_string_parts
%type <string_parts> string_parts_interpolated
%type <ind_string_parts> ind_string_parts
%type <e> path_start string_parts string_attr
%type <id> attr
%token <id> ID ATTRPATH
%token <e> STR IND_STR
%token <str> STR IND_STR
%token <n> INT
%token <nf> FLOAT
%token <path> PATH HPATH SPATH PATH_END
Expand Down Expand Up @@ -449,18 +458,19 @@ expr_simple
;

string_parts
: STR
: STR { $$ = new ExprString(data->symbols.create($1)); }
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
| { $$ = new ExprString(data->symbols.create("")); }
;

string_parts_interpolated
: string_parts_interpolated STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
: string_parts_interpolated STR
{ $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(data->symbols.create($2))); }
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
| DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
| STR DOLLAR_CURLY expr '}' {
$$ = new vector<std::pair<Pos, Expr *> >;
$$->emplace_back(makeCurPos(@1, data), $1);
$$->emplace_back(makeCurPos(@1, data), new ExprString(data->symbols.create($1)));
$$->emplace_back(makeCurPos(@2, data), $3);
}
;
Expand All @@ -482,7 +492,7 @@ path_start
ind_string_parts
: ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
| ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
| { $$ = new vector<std::pair<Pos, Expr *> >; }
| { $$ = new vector<std::pair<Pos, std::variant<Expr *, StringToken> > >; }
;

binds
Expand Down

0 comments on commit 0a77466

Please sign in to comment.