[package] name = "qsv" version = "0.135.0" #:version authors = ["Joel Natividad "] description = "A high performance CSV data-wrangling toolkit." documentation = "https://github.com/jqnatividad/qsv#qsv-ultra-fast-csv-data-wrangling-toolkit" homepage = "https://qsv.dathere.com" repository = "https://github.com/jqnatividad/qsv" readme = "README.md" keywords = ["csv", "geocode", "data-engineering", "etl", "opendata"] categories = ["command-line-utilities", "parser-implementations"] license = "MIT OR Unlicense" autotests = false edition = "2021" rust-version = "1.81" autobins = false include = [ "src/**/*", "LICENSE-MIT", "UNLICENSE", "README.md", "CHANGELOG.md", "resources/luau/vendor/luadate/date.lua", ] [[bin]] name = "qsv" test = true bench = false doctest = false path = "src/main.rs" required-features = ["feature_capable"] [[bin]] name = "qsvlite" test = true bench = false doctest = false path = "src/mainlite.rs" required-features = ["lite"] [[bin]] name = "qsvdp" test = true bench = false doctest = false path = "src/maindp.rs" required-features = ["datapusher_plus"] [[test]] name = "tests" path = "tests/tests.rs" [profile.release] codegen-units = 1 debug = false lto = true opt-level = 3 strip = true [profile.release-samply] inherits = "release" debug = true strip = false [profile.release-nightly] inherits = "release" panic = "abort" [dependencies] ahash = "0.8" arboard = "3.4.1" arrow = {version = "53", default-features = false, features = ["csv"], optional = true} atoi_simd = "0.16" base62 = { version = "2.0", optional = true } byteorder = "1.5" bytes = "1" cached = { version = "0.53", features = [ "disk_store", "redis_ahash", ], optional = true } calamine = { version = "0.25", features = ["dates"] } censor = { version = "0.3", optional = true } chrono = { version = "0.4", default-features = false } chrono-tz = "0.10" console = { version = "0.15", optional = true } cpc = { version = "1.9", optional = true } crc32fast = { version = "1.4", optional = true } crossbeam-channel = "0.5" csv = "1.3" csv-core = "0.1" csv-diff = "0.1.0" csv-index = "0.1" csvlens = { version = "0.10", optional = true } csvs_convert = { version = "0.8.14", default-features = false, features = [ "converters", ], optional = true } data-encoding = { version = "2.6", optional = true } dotenvy = "0.15" dynfmt = { version = "0.1", default-features = false, features = ["curly"] } eudex = { version = "0.1", optional = true } ext-sort = { version = "0.1", features = [ "memory-limit", ], default-features = false } flate2 = { version = "1", optional = true } file-format = { version = "0.25", features = ["reader"] } filetime = "0.2" flexi_logger = { version = "0.29", features = [ "async", "compress", "dont_minimize_extra_stacks", ], default-features = false } futures = "0.3" futures-util = "0.3" gender_guesser = { version = "0.2", optional = true } geosuggest-core = { version = "0.6", optional = true } geosuggest-utils = { version = "0.6", optional = true } governor = { version = "0.6", optional = true } grex = { version = "1.4", default-features = false } gzp = { version = "0.11", default-features = false, features = [ "snappy_default", ] } hashbrown = { version = "0.14", optional = true } indexmap = "2.5" indicatif = "0.17" itertools = "0.13" itoa = "1" jaq-interpret = "1.5.0" jaq-parse = "1.0.3" jemallocator = { version = "0.5", optional = true } json-objects-to-csv = "0.1.3" jsonschema = { version = "0.20", features = [ "resolve-file", "resolve-http", ], default-features = false } jql-runner = { version = "7.2", default-features = false, optional = true } local-encoding = { version = "0.2", optional = true } localzone = { version = "0.3", features = ["auto_validation"] } log = "0.4" mimalloc = { version = "0.1", default-features = false, optional = true } mlua = { version = "0.9", features = [ "luau", "luau-jit", "serialize", ], optional = true } num_cpus = "1" odht = "0.3" phf = { version = "0.11", features = ["macros"], optional = true } polars = { version = "0.43", features = [ "asof_join", "avro", "avx512", "aws", "binary_encoding", "cloud", "coalesce", "cross_join", "dtype-full", "extract_jsonpath", "ipc", "json", "lazy", "object", "parquet", "performant", "pivot", "semi_anti_join", "serde-lazy", "sql", "streaming", "timezones", ], optional = true } pyo3 = { version = "0.22", features = [ "auto-initialize", "gil-refs", ], optional = true } qsv-dateparser = "0.12" qsv_docopt = "1.8" qsv-stats = "0.22" qsv_currency = "0.7" qsv-sniffer = { version = "0.10", default-features = false, features = [ "runtime-dispatch-simd", ] } qsv_vader_sentiment_analysis = { version = "0.2", optional = true } rand = "0.8" rand_hc = "0.3" rand_xoshiro = "0.6" rayon = "1.10" redis = { version = "0.27", features = [ "ahash", "tcp_nodelay", ], default-features = false, optional = true } regex = "1" reqwest = { version = "0.12", features = [ "blocking", "brotli", "cookies", "deflate", "gzip", "http2", "json", "rustls-tls", "stream", "zstd", ], default-features = false } rfd = { version = "0.15", optional = true } rust_decimal = { version = "1.36", default-features = false } ryu = "1" sanitize-filename = { version = "0.5", optional = true } simd-json = "0.14" self_update = { version = "0.41", features = [ "archive-zip", "compression-zip-deflate", "rustls", "signatures", ], default-features = false, optional = true } semver = "1" serde = { version = "1", features = ["derive"] } serde_json = { version = "1", features = ["preserve_order"] } serde_stacker = { version = "0.1", optional = true } serde_urlencoded = { version = "0.7", optional = true } simple-expand-tilde = { version = "0.4.2", optional = true } snap = "1" strsim = { version = "0.11", optional = true } strum = { version = "0.26", features = ["phf"] } strum_macros = "0.26" sysinfo = "0.31" tabwriter = "1.4" tempfile = "3" thousands = { version = "0.2", optional = true } threadpool = "1.8" titlecase = { version = "3", optional = true } tokio = { version = "1", features = ["rt-multi-thread"] } uuid = { version = "1", features = ["v4", "v7"] } url = "2.5" whatlang = { version = "0.16", optional = true } xlsxwriter = { version = "0.6", optional = true } xxhash-rust = { version = "0.8", features = ["xxh3"] } # enable parking_lot hardware lock elision on x86_64 [target.'cfg(target_arch = "x86_64")'.dependencies] parking_lot = { version = "0.12", features = ["hardware-lock-elision"] } # otherwise, on non-x86_64, use the default [target.'cfg(not(target_arch = "x86_64"))'.dependencies] parking_lot = "0.12" [target.'cfg(not(target_arch = "aarch64"))'.dependencies] simdutf8 = "0.1" # use SIMD on aarch64 (Apple Silicon, Raspberry Pi 4, etc.) [target.'cfg(target_arch = "aarch64")'.dependencies] simdutf8 = { version = "0.1", features = ["aarch64_neon"] } [dev-dependencies] actix-governor = "0.6" actix-web = { version = "4.9", default-features = false, features = [ "compress-brotli", "compress-gzip", ] } assert-json-diff = "2.0" newline-converter = "0.3" # disable these dev dependencies for testing to commands # as they are expensive and slow down the build/CI tests # postgres = "0.19" # rusqlite = { version = "0.32", features = ["bundled"] } quickcheck = { version = "1", default-features = false } serial_test = { version = "3.1", features = ["file_locks"] } [patch.crates-io] # use our tweaked fork of csv crate # the csv crate underpins a lot of qsv's functionality, so every perf tweak helps # the main feature of this csv fork is accelerated UTF-8 validation using the simdutf8 crate csv = { git = "https://github.com/jqnatividad/rust-csv", branch = "qsv-optimized" } csv-core = { git = "https://github.com/jqnatividad/rust-csv", branch = "qsv-optimized" } csv-index = { git = "https://github.com/jqnatividad/rust-csv", branch = "qsv-optimized" } # older lexical-core versions have soundness issues - https://rustsec.org/advisories/RUSTSEC-2023-0086 # use arrow 53 upstream with unreleased lexical-core fix & other fixes; used by csvlens arrow = { git = "https://github.com/apache/arrow-rs", rev = "b809021" } # use our csvlens fork with latest dependencies, including arrow 53 upstream, with unreleased lexical-core fix csvlens = { git = "https://github.com/jqnatividad/csvlens", branch = "dependency-upgrades-lexical-core_fix" } # upstream csvs_convert has some old dependencies # csvs_convert = { git = "https://github.com/jqnatividad/csvs_convert", branch = "bump-more-dependencies" } # needed as dynfmt doesn't work in release mode without this # see https://github.com/jan-auer/dynfmt/pull/9 dynfmt = { git = "https://github.com/jqnatividad/dynfmt", branch = "2021-clippy_ptr_as_ptr-bumpdeps" } # needed to get latest dependencies and unreleased fixes grex = { git = "https://github.com/pemistahl/grex", rev = "ff8533d" } # use calamine 0.25.0 with unreleased fixes and select clippy lints calamine = { git = "https://github.com/tafia/calamine", rev = "247c080" } # use modernized version of local_encoding local-encoding = { git = "https://github.com/slonopotamus/local-encoding-rs", branch = "travis-madness" } # use of fork of xlsxwriter with bumped bindgen dependency xlsxwriter = { git = "https://github.com/jqnatividad/xlsxwriter-rs", branch = "bump-bindgen-to-0.70.1"} # use serde_json with unreleased optimizations serde_json = { git = "https://github.com/serde-rs/json", rev = "1faf3a1" } # Polars has a much higher release tempo for its Python bindings compared # to its underlying Rust library. See https://github.com/pola-rs/polars/releases # It's qsv's policy to use the latest upstream of polars/py-polars # to take advantage of Polars' latest unreleased fixes and features. # Based on what's available at the time of qsv's release, we may need to pin polars to a py-polars tag # or a specific commit if more revisions have been made since the latest polars/py-polars release. # BUILD NOTE: Be sure to set QSV_POLARS_REV below to the latest commit short hash or tag # of polars/py-polars before building qsv. This allows us to show the polars rev/tag in --version. # if we are using a release version of Rust Polars, leave QSV_POLARS_REV empty # QSV_POLARS_REV=py-1.8.2 polars = { git = "https://github.com/pola-rs/polars", tag = "py-1.8.2" } # polars = { git = "https://github.com/pola-rs/polars", rev = "506f927" } [features] default = ["mimalloc"] distrib_features = [ "feature_capable", "apply", "fetch", "foreach", "geocode", "lens", "luau", "polars", "prompt", "python", "to", ] all_features = ["distrib_features", "self_update"] apply = [ "base62", "censor", "cpc", "crc32fast", "data-encoding", "eudex", "gender_guesser", "hashbrown", "qsv_vader_sentiment_analysis", "strsim", "thousands", "titlecase", # "vader_sentiment", "whatlang", ] fetch = [ "cached", "console", "flate2", "governor", "hashbrown", "jql-runner", "redis", "serde_stacker", "serde_urlencoded", "simple-expand-tilde", ] foreach = ["local-encoding"] geocode = [ "cached", "geosuggest-core", "geosuggest-utils", "phf", "simple-expand-tilde", ] luau = ["mlua", "sanitize-filename", "simple-expand-tilde"] prompt = ["rfd"] python = ["pyo3"] to = ["csvs_convert", "xlsxwriter"] lens = ["arrow", "csvlens"] lite = [] datapusher_plus = ["self_update"] feature_capable = [] nightly = [ "crc32fast/nightly", "hashbrown/nightly", "polars/nightly", "polars/simd", "pyo3/nightly", "rand/nightly", ] [package.metadata.deb] maintainer = "Konstantin Sivakov " copyright = "2024, datHere Inc. " extended-description = """A high performance CSV data-wrangling toolkit.""" depends = "$auto" features = ["feature_capable"] section = "utility" priority = "optional" assets = [["target/release/qsv", "/usr/local/bin/", "755"]]