forked from html5lib/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
These tests are based on two things: some from WPT, and some based on Anolis
- Loading branch information
Showing
36 changed files
with
8,220 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import io | ||
import os | ||
import sys | ||
|
||
import pyperf | ||
|
||
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")] | ||
import html5lib # noqa: E402 | ||
|
||
|
||
def bench_parse(fh, treebuilder): | ||
fh.seek(0) | ||
html5lib.parse(fh, treebuilder=treebuilder, useChardet=False) | ||
|
||
|
||
def bench_serialize(loops, fh, treebuilder): | ||
fh.seek(0) | ||
doc = html5lib.parse(fh, treebuilder=treebuilder, useChardet=False) | ||
|
||
range_it = range(loops) | ||
t0 = pyperf.perf_counter() | ||
|
||
for loops in range_it: | ||
html5lib.serialize(doc, tree=treebuilder, encoding="ascii", inject_meta_charset=False) | ||
|
||
return pyperf.perf_counter() - t0 | ||
|
||
|
||
BENCHMARKS = ["parse", "serialize"] | ||
|
||
|
||
def add_cmdline_args(cmd, args): | ||
if args.benchmark: | ||
cmd.append(args.benchmark) | ||
|
||
|
||
if __name__ == "__main__": | ||
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) | ||
runner.metadata["description"] = "Run benchmarks based on Anolis" | ||
runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS) | ||
|
||
args = runner.parse_args() | ||
if args.benchmark: | ||
benchmarks = (args.benchmark,) | ||
else: | ||
benchmarks = BENCHMARKS | ||
|
||
with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh: | ||
source = io.BytesIO(fh.read()) | ||
|
||
if "parse" in benchmarks: | ||
for tb in ("etree", "dom", "lxml"): | ||
runner.bench_func("html_parse_%s" % tb, bench_parse, source, tb) | ||
|
||
if "serialize" in benchmarks: | ||
for tb in ("etree", "dom", "lxml"): | ||
runner.bench_time_func("html_serialize_%s" % tb, bench_serialize, source, tb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import io | ||
import os | ||
import sys | ||
|
||
import pyperf | ||
|
||
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")] | ||
import html5lib # noqa: E402 | ||
|
||
|
||
def bench_html5lib(fh): | ||
fh.seek(0) | ||
html5lib.parse(fh, treebuilder="etree", useChardet=False) | ||
|
||
|
||
def add_cmdline_args(cmd, args): | ||
if args.benchmark: | ||
cmd.append(args.benchmark) | ||
|
||
|
||
BENCHMARKS = {} | ||
for root, dirs, files in os.walk(os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "wpt")): | ||
for f in files: | ||
if f.endswith(".html"): | ||
BENCHMARKS[f[: -len(".html")]] = os.path.join(root, f) | ||
|
||
|
||
if __name__ == "__main__": | ||
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) | ||
runner.metadata["description"] = "Run parser benchmarks from WPT" | ||
runner.argparser.add_argument("benchmark", nargs="?", choices=sorted(BENCHMARKS)) | ||
|
||
args = runner.parse_args() | ||
if args.benchmark: | ||
benchmarks = (args.benchmark,) | ||
else: | ||
benchmarks = sorted(BENCHMARKS) | ||
|
||
for bench in benchmarks: | ||
name = "wpt_%s" % bench | ||
path = BENCHMARKS[bench] | ||
with open(path, "rb") as fh: | ||
fh2 = io.BytesIO(fh.read()) | ||
|
||
runner.bench_func(name, bench_html5lib, fh2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
The files in this data are derived from: | ||
|
||
* `html.html`: from [html](http:https://github.com/whatwg/html), revision | ||
77db356a293f2b152b648c836b6989d17afe42bb. This is the first 5000 lines of `source`. (This is | ||
representative of the input to [Anolis](https://bitbucket.org/ms2ger/anolis/); first 5000 lines | ||
chosen to make it parse in a reasonable time.) | ||
|
||
* `wpt`: see `wpt/README.md`. |
Oops, something went wrong.