Skip to content

Commit

Permalink
refactor and polish the source code of the library (#29)
Browse files Browse the repository at this point in the history
this PR is all about refactoring the *formatting* library and executable
:yum:

## details
> **Note**
> i've made this PR such that it's easy to [review commit by
commit](https://github.com/nushell/nufmt/pull/29/commits) and there are
explaination in all the commit bodies 👌

- rename the library to `nu_formatter`
- link to the README in the doc of `nufmt`
- make *Clippy* run on all the targets, e.g. including benchmarks
- remove (almost) all `print` in favor of `log`
- fix some "*hacks*" in the code (`cli.files`, `#[cfg(windows)]` and
exit codes)
- simplify the benchmarks
- rename some functions
- remove useless comments and simplify the documentation when possible
- relax *Clippy* on missing documentation
- do not make all items of `nu_formatter` public
  • Loading branch information
amtoine committed Jun 16, 2023
1 parent 478471d commit acd75e7
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 195 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ jobs:
- name: Clippy
run: |
cargo clippy \
--all-targets \
--no-deps \
--workspace \
-- \
-D warnings \
-D rustdoc::broken_intra_doc_links \
-W missing_docs \
-W clippy::missing_docs_in_private_items \
-W clippy::explicit_iter_loop \
-W clippy::explicit_into_iter_loop \
-W clippy::semicolon_if_nothing_returned \
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ jobs:
- name: Clippy
run: |
cargo clippy \
--all-targets \
--no-deps \
--workspace \
-- \
-D warnings \
-D rustdoc::broken_intra_doc_links \
-W missing_docs \
-W clippy::missing_docs_in_private_items \
-W clippy::explicit_iter_loop \
-W clippy::explicit_into_iter_loop \
-W clippy::semicolon_if_nothing_returned \
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ default = ["bin"]
bin = ["clap"]

[lib]
name = "nufmt"
name = "nu_formatter"
path = "src/lib.rs"

[[bin]]
Expand Down
13 changes: 5 additions & 8 deletions benches/file-format-bench.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
use criterion::{criterion_group, criterion_main, Criterion};
use nufmt::{config::Config, format_single_file};
use std::{io, path::PathBuf};

fn format_massive_nu(file: &PathBuf) -> io::Result<()> {
Ok(format_single_file(file, &Config::default()))
}
use nu_formatter::{config::Config, format_single_file};
use std::path::PathBuf;

fn criterion_benchmark(c: &mut Criterion) {
let file = PathBuf::from("./benches/example.nu");
c.bench_function("Format massive nu", |b| b.iter(|| format_massive_nu(&file)));
c.bench_function("Format massive nu", |b| {
b.iter(|| format_single_file(&PathBuf::from("./benches/example.nu"), &Config::default()));
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down
17 changes: 1 addition & 16 deletions src/config.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,9 @@
//! Config module
//!
//! This keeps all the options, tweaks and dials of the configuration.

use anyhow::Result;
//! Keeps all the options, tweaks and dials of the configuration.

#[derive(Debug)]
/// Configurations available to the formatter
pub struct Config {
/// Number of spaces of indent.
pub tab_spaces: usize,
/// Maximum width of each line.
pub max_width: usize,
/// Number of lines bafore and after a custom command.
pub margin: usize,
}

Expand All @@ -26,8 +18,6 @@ impl Default for Config {
}

impl Config {
/// Creates a new config. You need to pass every field to create the config.
/// You cannot skip any field yet.
pub fn new(tab_spaces: usize, max_width: usize, margin: usize) -> Self {
Config {
tab_spaces,
Expand All @@ -36,8 +26,3 @@ impl Config {
}
}
}

/// Returns a default config.
pub fn load(/* file_path: Option<&Path> */) -> Result<Config> {
Ok(Config::default())
}
85 changes: 22 additions & 63 deletions src/formatting.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,50 @@
//! Formatting module
//!
//! In this module occurs most of the magic in `nufmt`.
//! It has functions to format slice of bytes and some help functions to separate concerns while doing the job.
//!
//! It has functions to format slice of bytes and some help functions to separate concerns while doing the job.
use crate::config::Config;
use log::trace;
use log::{info, trace};
use nu_parser::{flatten_block, parse, FlatShape};
use nu_protocol::{
ast::Block,
engine::{self, StateWorkingSet},
Span,
};

/// Format an array of bytes
/// format an array of bytes
///
/// Reading the file gives you a list of bytes
pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
// nice place to measure formatting time
// let mut timer = Timer::start();

// parsing starts
pub(crate) fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
let engine_state = engine::EngineState::new();
let mut working_set = StateWorkingSet::new(&engine_state);

let parsed_block = parse(&mut working_set, None, contents, false);
trace!("parsed block:\n{:?}", &parsed_block);

// check if the block has at least 1 pipeline
if !block_has_pipelines(&parsed_block) {
trace!("block has no pipelines!");
println!("File has no code to format.");
info!("File has no code to format.");
return contents.to_vec();
}
// flat is a list of (Span , Flatshape)
//
// Span is the piece of code. You can stringfy the contents.
// Flatshape is an enum of the type of token read by the AST.

let flat = flatten_block(&working_set, &parsed_block);
trace!("flattened block:\n{:?}", &flat);
// timer = timer.done_parsing()

// formatting starts
let mut out: Vec<u8> = vec![];

let mut start = 0;
let end_of_file = contents.len();

for (span, shape) in flat.clone() {
// check if span skipped some bytes before the current span
if span.start > start {
trace!(
"Span didn't started on the beginning! span {0}, start: {1}",
"Span does not start at the beginning! span {0}, start: {1}",
span.start,
start
);

let skipped_contents = &contents[start..span.start];
let printable = String::from_utf8_lossy(skipped_contents).to_string();
trace!("contents: {:?}", printable);

if skipped_contents.contains(&b'#') {
trace!("This have a comment. Writing.");
out.extend(trim_ascii_whitespace(skipped_contents));
Expand All @@ -66,13 +54,12 @@ pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
}
}

// get the span contents and format it
let mut c_bites = working_set.get_span_contents(span);
let content = String::from_utf8_lossy(c_bites).to_string();
trace!("shape is {shape}");
trace!("shape contents: {:?}", &content);

match shape {
// if its one of these types, just do nothing. Write it away.
FlatShape::String | FlatShape::Int | FlatShape::Nothing => out.extend(c_bites),
FlatShape::List | FlatShape::Record => {
c_bites = trim_ascii_whitespace(c_bites);
Expand All @@ -81,50 +68,31 @@ pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
out.extend(c_bites);
}
FlatShape::Pipe => {
// here you don't have to strip the whitespace.
// The pipe is just a pipe `|`.
//
// return the pipe AND a space after that
out.extend("| ".to_string().bytes());
}
FlatShape::External => {
// External are some key commands
//
// List of what I've found: seq, each, str,
out.extend(c_bites);
// It doen't have a space after it. You have to add it here.
out.extend([b' '].iter());
out.extend(b"| ");
}
FlatShape::ExternalArg => {
// This shape is the argument of an External command (see previous case).
//
// As a result, ExternalArg may be an entire expression.
// like: "{ |row|\r\n let row_data = (seq ... r\n}"
FlatShape::External | FlatShape::ExternalArg => {
out.extend(c_bites);
// It doen't have a space after it. You have to add it here.
out.extend([b' '].iter());
out.extend(b" ");
}
FlatShape::Garbage => {
// Garbage is not garbage at all
//
// IDK what is it. I groups a bunch of commands like let my_var = 3
out.extend(c_bites);
out = insert_newline(out);
}

_ => out.extend(c_bites),
}

// check if span skipped some bytes between the final spann and the end of file
if is_last_span(span, &flat) && span.end < end_of_file {
trace!(
"The last span doesn't end the file! span: {0}, end: {1}",
span.end,
end_of_file
);

let remaining_contents = &contents[span.end..end_of_file];
let printable = String::from_utf8_lossy(remaining_contents).to_string();
trace!("contents: {:?}", printable);

if remaining_contents.contains(&b'#') {
trace!("This have a comment. Writing.");
out.push(b'\n');
Expand All @@ -134,49 +102,40 @@ pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
}
}

// cleanup
start = span.end + 1;
}
// just before writing, check if a new line is needed.
out = add_newline_at_end_of_file(out);

// timer = timer.done_formatting()
out
}

/// A wrapper to insert a new line
///
/// It is used frequently in `nufmt`, so
/// we have a wrapper to improve readability of the code.
/// insert a newline at the end of a buffer
fn insert_newline(mut bytes: Vec<u8>) -> Vec<u8> {
// If I need cfg windows, then I need \r\n
// let newline = vec![b'\r', b'\n'];
let newline = vec![b'\n'];
bytes.extend(newline.iter());
bytes.extend(b"\n");
bytes
}

/// Checks if it missing a new line. If true, adds it.
/// make sure there is a newline at the end of a buffer
fn add_newline_at_end_of_file(out: Vec<u8>) -> Vec<u8> {
match out.last() {
Some(&b'\n') => out,
_ => insert_newline(out),
}
}

/// Given a slice of bytes, strip all spaces, new lines and tabs found within
/// strip all spaces, new lines and tabs found a sequence of bytes
///
/// Because you don't know how the incoming code is formatted,
/// the best way to format is to strip all the whitespace
/// and afterwards include the new lines and indentation correctly
/// according to the configuration
pub fn trim_ascii_whitespace(x: &[u8]) -> &[u8] {
fn trim_ascii_whitespace(x: &[u8]) -> &[u8] {
let Some(from) = x.iter().position(|x| !x.is_ascii_whitespace()) else { return &x[0..0] };
let to = x.iter().rposition(|x| !x.is_ascii_whitespace()).unwrap();
&x[from..=to]
}

/// Returns true if the Block has at least 1 Pipeline
/// return true if the Nushell block has at least 1 pipeline
///
/// This function exists because sometimes is passed to `nufmt` an empty String,
/// or a nu code which the parser can't identify something runnable
Expand All @@ -189,7 +148,7 @@ fn block_has_pipelines(block: &Block) -> bool {
!block.pipelines.is_empty()
}

/// Returns true if the `Span` is the last Span in the slice of `flat`
/// return true if the given span is the last one
fn is_last_span(span: Span, flat: &[(Span, FlatShape)]) -> bool {
span == flat.last().unwrap().0
}
14 changes: 4 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
//!
//! nufmt is a library for formatting nu.
//! `nu_formatter` is a library for formatting nu.
//!
//! It does not do anything more than that, which makes it so fast.

use config::Config;
use formatting::format_inner;
use log::{debug, trace};
Expand All @@ -11,23 +9,19 @@ use std::io::Write;
use std::path::PathBuf;

pub mod config;
pub mod formatting;
mod formatting;

/// Reads a file and format it. Then writes the file inplace.
/// format a Nushell file inplace
pub fn format_single_file(file: &PathBuf, config: &Config) {
// read the contents of the file
let contents = std::fs::read(file)
.unwrap_or_else(|_| panic!("something went wrong reading the file {}", file.display()));

// obtain the formatted file
let formatted_bytes = format_inner(&contents, config);

// compare the contents
if formatted_bytes == contents {
debug!("File is formatted correctly.");
}

// write down the file to path
let mut writer = File::create(file).unwrap();
let file_bites = formatted_bytes.as_slice();
writer
Expand All @@ -36,7 +30,7 @@ pub fn format_single_file(file: &PathBuf, config: &Config) {
trace!("written");
}

/// Take a `String` and format it. Then returns a new `String`
/// format a string of Nushell code
pub fn format_string(input_string: &String, config: &Config) -> String {
let contents = input_string.as_bytes();
let formatted_bytes = format_inner(contents, config);
Expand Down

0 comments on commit acd75e7

Please sign in to comment.