Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor and polish the source code of the library #29

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ jobs:
- name: Clippy
run: |
cargo clippy \
--all-targets \
--no-deps \
--workspace \
-- \
-D warnings \
-D rustdoc::broken_intra_doc_links \
-W missing_docs \
-W clippy::missing_docs_in_private_items \
-W clippy::explicit_iter_loop \
-W clippy::explicit_into_iter_loop \
-W clippy::semicolon_if_nothing_returned \
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ jobs:
- name: Clippy
run: |
cargo clippy \
--all-targets \
--no-deps \
--workspace \
-- \
-D warnings \
-D rustdoc::broken_intra_doc_links \
-W missing_docs \
-W clippy::missing_docs_in_private_items \
-W clippy::explicit_iter_loop \
-W clippy::explicit_into_iter_loop \
-W clippy::semicolon_if_nothing_returned \
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ default = ["bin"]
bin = ["clap"]

[lib]
name = "nufmt"
name = "nu_formatter"
path = "src/lib.rs"

[[bin]]
Expand Down
13 changes: 5 additions & 8 deletions benches/file-format-bench.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
use criterion::{criterion_group, criterion_main, Criterion};
use nufmt::{config::Config, format_single_file};
use std::{io, path::PathBuf};

fn format_massive_nu(file: &PathBuf) -> io::Result<()> {
Ok(format_single_file(file, &Config::default()))
}
use nu_formatter::{config::Config, format_single_file};
use std::path::PathBuf;

fn criterion_benchmark(c: &mut Criterion) {
let file = PathBuf::from("./benches/example.nu");
c.bench_function("Format massive nu", |b| b.iter(|| format_massive_nu(&file)));
c.bench_function("Format massive nu", |b| {
b.iter(|| format_single_file(&PathBuf::from("./benches/example.nu"), &Config::default()));
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down
17 changes: 1 addition & 16 deletions src/config.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,9 @@
//! Config module
//!
//! This keeps all the options, tweaks and dials of the configuration.

use anyhow::Result;
//! Keeps all the options, tweaks and dials of the configuration.

#[derive(Debug)]
/// Configurations available to the formatter
pub struct Config {
/// Number of spaces of indent.
pub tab_spaces: usize,
/// Maximum width of each line.
pub max_width: usize,
/// Number of lines bafore and after a custom command.
pub margin: usize,
}

Expand All @@ -26,8 +18,6 @@ impl Default for Config {
}

impl Config {
/// Creates a new config. You need to pass every field to create the config.
/// You cannot skip any field yet.
pub fn new(tab_spaces: usize, max_width: usize, margin: usize) -> Self {
Config {
tab_spaces,
Expand All @@ -36,8 +26,3 @@ impl Config {
}
}
}

/// Returns a default config.
pub fn load(/* file_path: Option<&Path> */) -> Result<Config> {
Ok(Config::default())
}
85 changes: 22 additions & 63 deletions src/formatting.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,50 @@
//! Formatting module
//!
//! In this module occurs most of the magic in `nufmt`.
//! It has functions to format slice of bytes and some help functions to separate concerns while doing the job.
//!
//! It has functions to format slice of bytes and some help functions to separate concerns while doing the job.
use crate::config::Config;
use log::trace;
use log::{info, trace};
use nu_parser::{flatten_block, parse, FlatShape};
use nu_protocol::{
ast::Block,
engine::{self, StateWorkingSet},
Span,
};

/// Format an array of bytes
/// format an array of bytes
///
/// Reading the file gives you a list of bytes
pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
// nice place to measure formatting time
// let mut timer = Timer::start();

// parsing starts
pub(crate) fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
let engine_state = engine::EngineState::new();
let mut working_set = StateWorkingSet::new(&engine_state);

let parsed_block = parse(&mut working_set, None, contents, false);
trace!("parsed block:\n{:?}", &parsed_block);

// check if the block has at least 1 pipeline
if !block_has_pipelines(&parsed_block) {
trace!("block has no pipelines!");
println!("File has no code to format.");
info!("File has no code to format.");
return contents.to_vec();
}
// flat is a list of (Span , Flatshape)
//
// Span is the piece of code. You can stringfy the contents.
// Flatshape is an enum of the type of token read by the AST.

let flat = flatten_block(&working_set, &parsed_block);
trace!("flattened block:\n{:?}", &flat);
// timer = timer.done_parsing()

// formatting starts
let mut out: Vec<u8> = vec![];

let mut start = 0;
let end_of_file = contents.len();

for (span, shape) in flat.clone() {
// check if span skipped some bytes before the current span
if span.start > start {
trace!(
"Span didn't started on the beginning! span {0}, start: {1}",
"Span does not start at the beginning! span {0}, start: {1}",
span.start,
start
);

let skipped_contents = &contents[start..span.start];
let printable = String::from_utf8_lossy(skipped_contents).to_string();
trace!("contents: {:?}", printable);

if skipped_contents.contains(&b'#') {
trace!("This have a comment. Writing.");
out.extend(trim_ascii_whitespace(skipped_contents));
Expand All @@ -66,13 +54,12 @@ pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
}
}

// get the span contents and format it
let mut c_bites = working_set.get_span_contents(span);
let content = String::from_utf8_lossy(c_bites).to_string();
trace!("shape is {shape}");
trace!("shape contents: {:?}", &content);

match shape {
// if its one of these types, just do nothing. Write it away.
FlatShape::String | FlatShape::Int | FlatShape::Nothing => out.extend(c_bites),
FlatShape::List | FlatShape::Record => {
c_bites = trim_ascii_whitespace(c_bites);
Expand All @@ -81,50 +68,31 @@ pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
out.extend(c_bites);
}
FlatShape::Pipe => {
// here you don't have to strip the whitespace.
// The pipe is just a pipe `|`.
//
// return the pipe AND a space after that
out.extend("| ".to_string().bytes());
}
FlatShape::External => {
// External are some key commands
//
// List of what I've found: seq, each, str,
out.extend(c_bites);
// It doen't have a space after it. You have to add it here.
out.extend([b' '].iter());
out.extend(b"| ");
}
FlatShape::ExternalArg => {
// This shape is the argument of an External command (see previous case).
//
// As a result, ExternalArg may be an entire expression.
// like: "{ |row|\r\n let row_data = (seq ... r\n}"
FlatShape::External | FlatShape::ExternalArg => {
out.extend(c_bites);
// It doen't have a space after it. You have to add it here.
out.extend([b' '].iter());
out.extend(b" ");
}
FlatShape::Garbage => {
// Garbage is not garbage at all
//
// IDK what is it. I groups a bunch of commands like let my_var = 3
out.extend(c_bites);
out = insert_newline(out);
}

_ => out.extend(c_bites),
}

// check if span skipped some bytes between the final spann and the end of file
if is_last_span(span, &flat) && span.end < end_of_file {
trace!(
"The last span doesn't end the file! span: {0}, end: {1}",
span.end,
end_of_file
);

let remaining_contents = &contents[span.end..end_of_file];
let printable = String::from_utf8_lossy(remaining_contents).to_string();
trace!("contents: {:?}", printable);

if remaining_contents.contains(&b'#') {
trace!("This have a comment. Writing.");
out.push(b'\n');
Expand All @@ -134,49 +102,40 @@ pub fn format_inner(contents: &[u8], _config: &Config) -> Vec<u8> {
}
}

// cleanup
start = span.end + 1;
}
// just before writing, check if a new line is needed.
out = add_newline_at_end_of_file(out);

// timer = timer.done_formatting()
out
}

/// A wrapper to insert a new line
///
/// It is used frequently in `nufmt`, so
/// we have a wrapper to improve readability of the code.
/// insert a newline at the end of a buffer
fn insert_newline(mut bytes: Vec<u8>) -> Vec<u8> {
// If I need cfg windows, then I need \r\n
// let newline = vec![b'\r', b'\n'];
let newline = vec![b'\n'];
bytes.extend(newline.iter());
bytes.extend(b"\n");
bytes
}

/// Checks if it missing a new line. If true, adds it.
/// make sure there is a newline at the end of a buffer
fn add_newline_at_end_of_file(out: Vec<u8>) -> Vec<u8> {
match out.last() {
Some(&b'\n') => out,
_ => insert_newline(out),
}
}

/// Given a slice of bytes, strip all spaces, new lines and tabs found within
/// strip all spaces, new lines and tabs found a sequence of bytes
///
/// Because you don't know how the incoming code is formatted,
/// the best way to format is to strip all the whitespace
/// and afterwards include the new lines and indentation correctly
/// according to the configuration
pub fn trim_ascii_whitespace(x: &[u8]) -> &[u8] {
fn trim_ascii_whitespace(x: &[u8]) -> &[u8] {
let Some(from) = x.iter().position(|x| !x.is_ascii_whitespace()) else { return &x[0..0] };
let to = x.iter().rposition(|x| !x.is_ascii_whitespace()).unwrap();
&x[from..=to]
}

/// Returns true if the Block has at least 1 Pipeline
/// return true if the Nushell block has at least 1 pipeline
///
/// This function exists because sometimes is passed to `nufmt` an empty String,
/// or a nu code which the parser can't identify something runnable
Expand All @@ -189,7 +148,7 @@ fn block_has_pipelines(block: &Block) -> bool {
!block.pipelines.is_empty()
}

/// Returns true if the `Span` is the last Span in the slice of `flat`
/// return true if the given span is the last one
fn is_last_span(span: Span, flat: &[(Span, FlatShape)]) -> bool {
span == flat.last().unwrap().0
}
14 changes: 4 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
//!
//! nufmt is a library for formatting nu.
//! `nu_formatter` is a library for formatting nu.
//!
//! It does not do anything more than that, which makes it so fast.

use config::Config;
use formatting::format_inner;
use log::{debug, trace};
Expand All @@ -11,23 +9,19 @@ use std::io::Write;
use std::path::PathBuf;

pub mod config;
pub mod formatting;
mod formatting;

/// Reads a file and format it. Then writes the file inplace.
/// format a Nushell file inplace
pub fn format_single_file(file: &PathBuf, config: &Config) {
// read the contents of the file
let contents = std::fs::read(file)
.unwrap_or_else(|_| panic!("something went wrong reading the file {}", file.display()));

// obtain the formatted file
let formatted_bytes = format_inner(&contents, config);

// compare the contents
if formatted_bytes == contents {
debug!("File is formatted correctly.");
}

// write down the file to path
let mut writer = File::create(file).unwrap();
let file_bites = formatted_bytes.as_slice();
writer
Expand All @@ -36,7 +30,7 @@ pub fn format_single_file(file: &PathBuf, config: &Config) {
trace!("written");
}

/// Take a `String` and format it. Then returns a new `String`
/// format a string of Nushell code
pub fn format_string(input_string: &String, config: &Config) -> String {
let contents = input_string.as_bytes();
let formatted_bytes = format_inner(contents, config);
Expand Down
Loading