Skip to content

Commit

Permalink
small performance improvements
Browse files Browse the repository at this point in the history
For the most part, this switches write_record to write_byte_record where
possible, and also moves some uses of the `byte_records` iterator to the
manual `read_byte_record` method, which permits us to amortize
allocation.

This also removes the use of normal selection for `xsv search`, which
was causing a huge slow down.

Also, bump to csv 1.0.0.beta.3.
  • Loading branch information
BurntSushi committed May 25, 2017
1 parent 0f58a98 commit b0da83b
Show file tree
Hide file tree
Showing 15 changed files with 86 additions and 78 deletions.
36 changes: 19 additions & 17 deletions BENCHMARKS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,25 @@ These benchmarks were run on an Intel i7-6900K (8 CPUs, 16 threads) with 64GB
of memory.

```
count 0.11 seconds 413.76 MB/sec
flatten 4.54 seconds 10.02 MB/sec
flatten_condensed 4.45 seconds 10.22 MB/sec
frequency 1.82 seconds 25.00 MB/sec
index 0.12 seconds 379.28 MB/sec
sample_10 0.18 seconds 252.85 MB/sec
sample_1000 0.18 seconds 252.85 MB/sec
sample_100000 0.29 seconds 156.94 MB/sec
search 0.27 seconds 168.56 MB/sec
select 0.14 seconds 325.09 MB/sec
sort 2.18 seconds 20.87 MB/sec
slice_one_middle 0.08 seconds 568.92 MB/sec
slice_one_middle_index 0.01 seconds 4551.36 MB/sec
stats 1.09 seconds 41.75 MB/sec
stats_index 0.15 seconds 303.42 MB/sec
stats_everything 1.94 seconds 23.46 MB/sec
stats_everything_index 0.93 seconds 48.93 MB/sec
count 0.11 seconds 413.76 MB/sec
flatten 4.54 seconds 10.02 MB/sec
flatten_condensed 4.45 seconds 10.22 MB/sec
frequency 1.82 seconds 25.00 MB/sec
index 0.12 seconds 379.28 MB/sec
sample_10 0.18 seconds 252.85 MB/sec
sample_1000 0.18 seconds 252.85 MB/sec
sample_100000 0.29 seconds 156.94 MB/sec
search 0.27 seconds 168.56 MB/sec
select 0.14 seconds 325.09 MB/sec
search 0.13 seconds 350.10 MB/sec
select 0.13 seconds 350.10 MB/sec
sort 2.18 seconds 20.87 MB/sec
slice_one_middle 0.08 seconds 568.92 MB/sec
slice_one_middle_index 0.01 seconds 4551.36 MB/sec
stats 1.09 seconds 41.75 MB/sec
stats_index 0.15 seconds 303.42 MB/sec
stats_everything 1.94 seconds 23.46 MB/sec
stats_everything_index 0.93 seconds 48.93 MB/sec
```

### Details
Expand Down
48 changes: 24 additions & 24 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ name = "tests"

[profile.release]
opt-level = 3
debug = true

[profile.test]
opt-level = 3

[dependencies]
byteorder = "1"
chan = "0.1"
csv = "1.0.0-beta.1"
csv-index = "0.1"
csv = "1.0.0-beta.3"
csv-index = "0.1.2"
docopt = "0.7"
filetime = "0.1"
num_cpus = "1.4"
Expand Down
7 changes: 4 additions & 3 deletions src/cmd/cat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,15 @@ impl Args {
}

fn cat_rows(&self) -> CliResult<()> {
let mut row = csv::ByteRecord::new();
let mut wtr = Config::new(&self.flag_output).writer()?;
for (i, conf) in self.configs()?.into_iter().enumerate() {
let mut rdr = conf.reader()?;
if i == 0 {
conf.write_headers(&mut rdr, &mut wtr)?;
}
for r in rdr.byte_records() {
wtr.write_record(&r?)?;
while rdr.read_byte_record(&mut row)? {
wtr.write_byte_record(&row)?;
}
}
wtr.flush().map_err(From::from)
Expand Down Expand Up @@ -124,7 +125,7 @@ impl Args {
if num_done >= iters.len() {
break 'OUTER;
}
wtr.write_record(&record)?;
wtr.write_byte_record(&record)?;
}
wtr.flush().map_err(From::from)
}
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/fixlengths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
} else {
r.truncate(length);
}
wtr.write_record(&r)?;
wtr.write_byte_record(&r)?;
}
wtr.flush()?;
Ok(())
Expand Down
5 changes: 3 additions & 2 deletions src/cmd/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

let mut rdr = rconfig.reader()?;
let mut wtr = wconfig.writer()?;
for r in rdr.byte_records() {
wtr.write_record(&r?)?;
let mut r = csv::ByteRecord::new();
while rdr.read_byte_record(&mut r)? {
wtr.write_byte_record(&r)?;
}
wtr.flush()?;
Ok(())
Expand Down
7 changes: 5 additions & 2 deletions src/cmd/input.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use csv;

use CliResult;
use config::{Config, Delimiter};
use util;
Expand Down Expand Up @@ -48,8 +50,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

let mut rdr = rconfig.reader()?;
let mut wtr = wconfig.writer()?;
for r in rdr.byte_records() {
wtr.write_record(&r?)?;
let mut row = csv::ByteRecord::new();
while rdr.read_byte_record(&mut row)? {
wtr.write_record(&row)?;
}
wtr.flush()?;
Ok(())
Expand Down
7 changes: 3 additions & 4 deletions src/cmd/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,8 @@ impl Args {

let mut writers: HashMap<Vec<u8>, BoxedWriter> =
HashMap::new();
for row in rdr.byte_records() {
let row = row?;

let mut row = csv::ByteRecord::new();
while rdr.read_byte_record(&mut row)? {
// Decide what file to put this in.
let column = &row[key_col];
let key = match self.flag_prefix_length {
Expand All @@ -118,7 +117,7 @@ impl Args {
vacant.insert(wtr)
}
};
wtr.write_record(&row)?;
wtr.write_byte_record(&row)?;
}
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
};
for row in sampled.into_iter() {
wtr.write_record(&row)?;
wtr.write_byte_record(&row)?;
}
Ok(wtr.flush()?)
}
Expand Down
11 changes: 6 additions & 5 deletions src/cmd/search.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use csv;
use regex::bytes::RegexBuilder;

use CliResult;
Expand Down Expand Up @@ -60,19 +61,19 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let mut wtr = Config::new(&args.flag_output).writer()?;

let headers = rdr.byte_headers()?.clone();
let nsel = rconfig.normal_selection(&headers)?;
let sel = rconfig.selection(&headers)?;

if !rconfig.no_headers {
wtr.write_record(&headers)?;
}
for row in rdr.byte_records() {
let row = row?;
let mut m = nsel.select(row.iter()).any(|f| pattern.is_match(f));
let mut record = csv::ByteRecord::new();
while rdr.read_byte_record(&mut record)? {
let mut m = sel.select(&record).any(|f| pattern.is_match(f));
if args.flag_invert_match {
m = !m;
}
if m {
wtr.write_record(&row)?;
wtr.write_byte_record(&record)?;
}
}
Ok(wtr.flush()?)
Expand Down
4 changes: 2 additions & 2 deletions src/cmd/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl Args {

let (start, end) = self.range()?;
for r in rdr.byte_records().skip(start).take(end - start) {
wtr.write_record(&r?)?;
wtr.write_byte_record(&r?)?;
}
Ok(wtr.flush()?)
}
Expand All @@ -87,7 +87,7 @@ impl Args {
}
idx.seek(start as u64)?;
for r in idx.byte_records().take(end - start) {
wtr.write_record(&r?)?;
wtr.write_byte_record(&r?)?;
}
wtr.flush()?;
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

rconfig.write_headers(&mut rdr, &mut wtr)?;
for r in all.into_iter() {
wtr.write_record(&r)?;
wtr.write_byte_record(&r)?;
}
Ok(wtr.flush()?)
}
Expand Down
9 changes: 6 additions & 3 deletions src/cmd/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,15 @@ impl Args {
let headers = rdr.byte_headers()?.clone();

let mut wtr = self.new_writer(&headers, 0)?;
for (i, row) in rdr.byte_records().enumerate() {
let mut i = 0;
let mut row = csv::ByteRecord::new();
while rdr.read_byte_record(&mut row)? {
if i > 0 && i % self.flag_size == 0 {
wtr.flush()?;
wtr = self.new_writer(&headers, i)?;
}
wtr.write_record(&row?)?;
wtr.write_byte_record(&row)?;
i += 1;
}
wtr.flush()?;
Ok(())
Expand Down Expand Up @@ -111,7 +114,7 @@ impl Args {
idx.seek((i * args.flag_size) as u64).unwrap();
for row in idx.byte_records().take(args.flag_size) {
let row = row.unwrap();
wtr.write_record(row.into_iter()).unwrap();
wtr.write_byte_record(&row).unwrap();
}
wtr.flush().unwrap();
wg.done();
Expand Down
Loading

0 comments on commit b0da83b

Please sign in to comment.