Skip to content

Commit

Permalink
2018 edition
Browse files Browse the repository at this point in the history
implements #25
  • Loading branch information
jqnatividad committed Sep 17, 2021
1 parent e9b0533 commit 7e32f13
Show file tree
Hide file tree
Showing 51 changed files with 319 additions and 464 deletions.
280 changes: 86 additions & 194 deletions Cargo.lock

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ readme = "README.md"
keywords = ["csv", "tsv", "slice", "command"]
license = "Unlicense/MIT"
autotests = false
edition = "2018"

[[bin]]
name = "qsv"
Expand All @@ -31,20 +32,20 @@ opt-level = 3

[dependencies]
mimalloc = { version = "*", default-features = false }
byteorder = "1"
crossbeam-channel = "0.2.4"
byteorder = "1.4"
crossbeam-channel = "0.5"
csv = "1.1.0"
csv-index = "0.1.6"
docopt = "1"
filetime = "0.1"
num_cpus = "1.4"
rand = "0.5"
filetime = "0.2"
num_cpus = "1"
rand = "0.7"
regex = "1"
serde = { version = "1.0.130", features = ["derive"] }
streaming-stats = "0.2.3"
tabwriter = "1"
threadpool = "1.3"
threadpool = "1.8"

[dev-dependencies]
quickcheck = { version = "0.7", default-features = false }
quickcheck = { version = "0.9", default-features = false }
log = "0.4"
10 changes: 4 additions & 6 deletions src/cmd/cat.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use csv;

use CliResult;
use config::{Config, Delimiter};
use util;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Concatenates CSV data by column or by row.
When concatenating by column, the columns will be written in the same order as
Expand Down
13 changes: 6 additions & 7 deletions src/cmd/count.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use csv;

use CliResult;
use config::{Delimiter, Config};
use util;
use crate::CliResult;
use crate::config::{Delimiter, Config};
use crate::util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Prints a count of the number of records in the CSV data.
Note that the count will not include the header row (unless --no-headers is
Expand Down Expand Up @@ -48,5 +46,6 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
count
}
};
Ok(println!("{}", count))
println!("{}", count);
Ok(())
}
14 changes: 6 additions & 8 deletions src/cmd/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,13 @@ use std::io;
use std::iter;
use std::ops;

use csv;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::select::{SelectColumns, Selection};
use crate::util;
use crate::serde::Deserialize;

use CliResult;
use config::{Config, Delimiter};
use select::{SelectColumns, Selection};
use util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Fill empty fields in selected columns of a CSV.
This command fills empty fields in the selected column
Expand Down
20 changes: 8 additions & 12 deletions src/cmd/fixlengths.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
use std::cmp;

use csv;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Transforms CSV data so that all records have the same length. The length is
the length of the longest record in the data (not counting trailing empty fields,
but at least 1). Records with smaller lengths are padded with empty fields.
Expand Down Expand Up @@ -64,12 +62,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let mut rdr = config.reader()?;
let mut record = csv::ByteRecord::new();
while rdr.read_byte_record(&mut record)? {
let mut index = 0;
let mut nonempty_count = 0;
for field in &record {
index += 1;
if index == 1 || !field.is_empty() {
nonempty_count = index;
for (index, field) in record.iter().enumerate() {
if index == 0 || !field.is_empty() {
nonempty_count = index+1;
}
}
maxlen = cmp::max(maxlen, nonempty_count);
Expand Down
10 changes: 5 additions & 5 deletions src/cmd/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use std::io::{self, Write};

use tabwriter::TabWriter;

use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Prints flattened records such that fields are labeled separated by a new line.
This mode is particularly useful for viewing one record at a time. Each
record is separated by a special '#' character (on a line by itself), which
Expand Down
12 changes: 5 additions & 7 deletions src/cmd/fmt.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use csv;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Formats CSV data with a custom delimiter or CRLF line endings.
Generally, all commands in qsv output CSV data in a default format, which is
Expand Down
26 changes: 11 additions & 15 deletions src/cmd/frequency.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
use std::fs;
use std::io;

use channel;
use csv;
use stats::{Frequencies, merge_all};
use threadpool::ThreadPool;

use CliResult;
use config::{Config, Delimiter};
use index::Indexed;
use select::{SelectColumns, Selection};
use util;
use serde::Deserialize;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::index::Indexed;
use crate::select::{SelectColumns, Selection};
use crate::util;
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Compute a frequency table on CSV data.
The frequency table is formatted as CSV data:
Expand Down Expand Up @@ -157,11 +155,11 @@ impl Args {
let mut idx = args.rconfig().indexed().unwrap().unwrap();
idx.seek((i * chunk_size) as u64).unwrap();
let it = idx.byte_records().take(chunk_size);
send.send(args.ftables(&sel, it).unwrap());
send.send(args.ftables(&sel, it).unwrap()).unwrap();
});
}
drop(send);
Ok((headers, merge_all(recv).unwrap()))
Ok((headers, merge_all(recv.iter()).unwrap()))
}

fn ftables<I>(&self, sel: &Selection, it: I) -> CliResult<FTables>
Expand All @@ -176,10 +174,8 @@ impl Args {
let field = trim(field.to_vec());
if !field.is_empty() {
tabs[i].add(field);
} else {
if !self.flag_no_nulls {
tabs[i].add(null.clone());
}
} else if !self.flag_no_nulls {
tabs[i].add(null.clone());
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/cmd/headers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ use std::io;

use tabwriter::TabWriter;

use CliResult;
use config::Delimiter;
use util;
use serde::Deserialize;
use crate::CliResult;
use crate::config::Delimiter;
use crate::util;
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Prints the fields of the first row in the CSV data.
These names can be used in commands like 'select' to refer to columns in the
Expand Down
10 changes: 5 additions & 5 deletions src/cmd/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ use std::path::{Path, PathBuf};

use csv_index::RandomAccessSimple;

use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Creates an index of the given CSV data, which can make other operations like
slicing, splitting and gathering statistics much faster.
Expand Down
12 changes: 5 additions & 7 deletions src/cmd/input.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use csv;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Read CSV data with special quoting rules.
Generally, all qsv commands support basic options like specifying the delimiter
Expand Down
25 changes: 12 additions & 13 deletions src/cmd/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,15 @@ use std::iter::repeat;
use std::str;

use byteorder::{WriteBytesExt, BigEndian};
use csv;

use CliResult;
use config::{Config, Delimiter};
use index::Indexed;
use select::{SelectColumns, Selection};
use util;
use serde::Deserialize;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::index::Indexed;
use crate::select::{SelectColumns, Selection};
use crate::util;
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Joins two sets of CSV data on the specified columns.
The default join operation is an 'inner' join. This corresponds to the
Expand Down Expand Up @@ -295,10 +294,10 @@ impl Args {
&rconf1, &mut rdr1, &rconf2, &mut rdr2)?;
Ok(IoState {
wtr: Config::new(&self.flag_output).writer()?,
rdr1: rdr1,
sel1: sel1,
rdr2: rdr2,
sel2: sel2,
rdr1,
sel1,
rdr2,
sel2,
no_headers: rconf1.no_headers,
casei: self.flag_no_case,
nulls: self.flag_nulls,
Expand Down Expand Up @@ -390,7 +389,7 @@ impl<R: io::Read + io::Seek> ValueIndex<R> {
let idx = Indexed::open(rdr, io::Cursor::new(row_idx.into_inner()))?;
Ok(ValueIndex {
values: val_idx,
idx: idx,
idx,
num_rows: rowi,
})
}
Expand Down
15 changes: 7 additions & 8 deletions src/cmd/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@ use std::fs;
use std::io;
use std::path::Path;

use csv;
use regex::Regex;

use CliResult;
use config::{Config, Delimiter};
use select::SelectColumns;
use util::{self, FilenameTemplate};
use serde::Deserialize;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::select::SelectColumns;
use crate::util::{self, FilenameTemplate};
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Partitions the given CSV data into chunks based on the value of a column
The files are written to the output directory with filenames based on the
Expand Down Expand Up @@ -149,7 +148,7 @@ struct WriterGenerator {
impl WriterGenerator {
fn new(template: FilenameTemplate) -> WriterGenerator {
WriterGenerator {
template: template,
template,
counter: 1,
used: HashSet::new(),
non_word_char: Regex::new(r"\W").unwrap(),
Expand Down
12 changes: 5 additions & 7 deletions src/cmd/rename.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use csv;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Rename the columns of CSV data efficiently.
This command lets you rename the columns in CSV data. You must specify
Expand Down
10 changes: 5 additions & 5 deletions src/cmd/reverse.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use CliResult;
use config::{Config, Delimiter};
use util;
use serde::Deserialize;
use crate::CliResult;
use crate::config::{Config, Delimiter};
use crate::util;
use crate::serde::Deserialize;

static USAGE: &'static str = "
static USAGE: &str = "
Reverses rows of CSV data.
Useful for cases when there is no column that can be used for sorting in reverse order,
Expand Down
Loading

0 comments on commit 7e32f13

Please sign in to comment.