Skip to content

Commit

Permalink
fix(ext/node): lossy UTF-8 read node_modules files (denoland#24140)
Browse files Browse the repository at this point in the history
Previously various reads of files in `node_modules` would error on
invalid UTF-8. These were cases involving:

- reading package.json from Rust
- reading package.json from JS
- reading CommonJS files from JS
- reading CommonJS files from Rust (for ESM translation)
- reading ESM files from Rust
  • Loading branch information
lucacasonato committed Jun 8, 2024
1 parent 22d34f7 commit c1f23c5
Show file tree
Hide file tree
Showing 26 changed files with 112 additions and 33 deletions.
2 changes: 1 addition & 1 deletion cli/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer {
None => {
self
.fs
.read_text_file_async(specifier.to_file_path().unwrap(), None)
.read_text_file_lossy_async(specifier.to_file_path().unwrap(), None)
.await?
}
};
Expand Down
7 changes: 6 additions & 1 deletion cli/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,12 @@ impl NpmModuleLoader {

let code = if self.cjs_resolutions.contains(specifier) {
// translate cjs to esm if it's cjs and inject node globals
let code = String::from_utf8(code)?;
let code = match String::from_utf8_lossy(&code) {
Cow::Owned(code) => code,
// SAFETY: `String::from_utf8_lossy` guarantees that the result is valid
// UTF-8 if `Cow::Borrowed` is returned.
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(code) },
};
ModuleSourceCode::String(
self
.node_code_translator
Expand Down
2 changes: 1 addition & 1 deletion cli/util/gitignore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl GitIgnoreTree {
});
let current = self
.fs
.read_text_file_sync(&dir_path.join(".gitignore"), None)
.read_text_file_lossy_sync(&dir_path.join(".gitignore"), None)
.ok()
.and_then(|text| {
let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path);
Expand Down
25 changes: 17 additions & 8 deletions ext/fs/interface.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
Expand Down Expand Up @@ -284,24 +285,32 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
self.stat_sync(path).is_ok()
}

fn read_text_file_sync(
fn read_text_file_lossy_sync(
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<String> {
let buf = self.read_file_sync(path, access_check)?;
String::from_utf8(buf).map_err(|err| {
std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
})
Ok(string_from_utf8_lossy(buf))
}
async fn read_text_file_async<'a>(
async fn read_text_file_lossy_async<'a>(
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<String> {
let buf = self.read_file_async(path, access_check).await?;
String::from_utf8(buf).map_err(|err| {
std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
})
Ok(string_from_utf8_lossy(buf))
}
}

// Like String::from_utf8_lossy but operates on owned values
#[inline(always)]
fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
match String::from_utf8_lossy(&buf) {
// buf contained non-utf8 chars than have been patched
Cow::Owned(s) => s,
// SAFETY: if Borrowed then the buf only contains utf8 chars,
// we do this instead of .into_owned() to avoid copying the input buf
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
}
}
25 changes: 7 additions & 18 deletions ext/fs/ops.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use std::borrow::Cow;
use std::cell::RefCell;
use std::io;
use std::io::SeekFrom;
Expand Down Expand Up @@ -1333,11 +1332,11 @@ where
let fs = state.borrow::<FileSystemRc>().clone();
let mut access_check =
sync_permission_check::<P>(state.borrow_mut(), "Deno.readFileSync()");
let buf = fs
.read_file_sync(&path, Some(&mut access_check))
let str = fs
.read_text_file_lossy_sync(&path, Some(&mut access_check))
.map_err(|error| map_permission_error("readfile", error, &path))?;

Ok(string_from_utf8_lossy(buf))
Ok(str)
}

#[op2(async)]
Expand All @@ -1361,9 +1360,10 @@ where
(state.borrow::<FileSystemRc>().clone(), cancel_handle)
};

let fut = fs.read_file_async(path.clone(), Some(&mut access_check));
let fut =
fs.read_text_file_lossy_async(path.clone(), Some(&mut access_check));

let buf = if let Some(cancel_handle) = cancel_handle {
let str = if let Some(cancel_handle) = cancel_handle {
let res = fut.or_cancel(cancel_handle).await;

if let Some(cancel_rid) = cancel_rid {
Expand All @@ -1379,18 +1379,7 @@ where
.map_err(|error| map_permission_error("readfile", error, &path))?
};

Ok(string_from_utf8_lossy(buf))
}

// Like String::from_utf8_lossy but operates on owned values
fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
match String::from_utf8_lossy(&buf) {
// buf contained non-utf8 chars than have been patched
Cow::Owned(s) => s,
// SAFETY: if Borrowed then the buf only contains utf8 chars,
// we do this instead of .into_owned() to avoid copying the input buf
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
}
Ok(str)
}

fn to_seek_from(offset: i64, whence: i32) -> Result<SeekFrom, AnyError> {
Expand Down
2 changes: 1 addition & 1 deletion ext/node/ops/require.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ where
let file_path = PathBuf::from(file_path);
ensure_read_permission::<P>(state, &file_path)?;
let fs = state.borrow::<FileSystemRc>();
Ok(fs.read_text_file_sync(&file_path, None)?)
Ok(fs.read_text_file_lossy_sync(&file_path, None)?)
}

#[op2]
Expand Down
2 changes: 1 addition & 1 deletion ext/node/package_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl PackageJson {
return Ok(CACHE.with(|cache| cache.borrow()[&path].clone()));
}

let source = match fs.read_text_file_sync(&path, None) {
let source = match fs.read_text_file_lossy_sync(&path, None) {
Ok(source) => source,
Err(err) if err.kind() == ErrorKind::NotFound => {
return Ok(Rc::new(PackageJson::empty(path)));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export default '����';
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "@denotest/lossy-utf8-script",
"version": "1.0.0",
"type": "module",
"dependencies": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export default "hello";
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"name": "@denotest/lossy-utf8-package-json",
"version": "1.0.0",
"type": "module",
"dependencies": {},
"files": ["þþÿÿ"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = '����';
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "@denotest/lossy-utf8-script",
"version": "1.0.0",
"type": "commonjs",
"dependencies": {}
}
5 changes: 5 additions & 0 deletions tests/specs/npm/lossy_utf8_module/__test__.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"args": "run main.mjs",
"output": "main.out",
"exitCode": 0
}
3 changes: 3 additions & 0 deletions tests/specs/npm/lossy_utf8_module/main.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import mod from "npm:@denotest/[email protected]";

console.log(mod);
3 changes: 3 additions & 0 deletions tests/specs/npm/lossy_utf8_module/main.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Download http:https://localhost:4260/@denotest/lossy-utf8-module
Download http:https://localhost:4260/@denotest/lossy-utf8-module/1.0.0.tgz
����
5 changes: 5 additions & 0 deletions tests/specs/npm/lossy_utf8_package_json/__test__.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"args": "run main.mjs",
"output": "main.out",
"exitCode": 0
}
3 changes: 3 additions & 0 deletions tests/specs/npm/lossy_utf8_package_json/main.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import mod from "npm:@denotest/[email protected]";

console.log(mod);
3 changes: 3 additions & 0 deletions tests/specs/npm/lossy_utf8_package_json/main.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Download http:https://localhost:4260/@denotest/lossy-utf8-package-json
Download http:https://localhost:4260/@denotest/lossy-utf8-package-json/1.0.0.tgz
hello
5 changes: 5 additions & 0 deletions tests/specs/npm/lossy_utf8_script/__test__.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"args": "run main.mjs",
"output": "main.out",
"exitCode": 0
}
3 changes: 3 additions & 0 deletions tests/specs/npm/lossy_utf8_script/main.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import mod from "npm:@denotest/[email protected]";

console.log(mod);
3 changes: 3 additions & 0 deletions tests/specs/npm/lossy_utf8_script/main.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Download http:https://localhost:4260/@denotest/lossy-utf8-script
Download http:https://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz
����
6 changes: 6 additions & 0 deletions tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"args": "run --node-modules-dir --allow-read main.mjs",
"output": "main.out",
"exitCode": 0,
"tempDir": true
}
10 changes: 10 additions & 0 deletions tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { createRequire } from "node:module";

// Import this so that deno_graph knows to download this file.
if (false) import("npm:@denotest/[email protected]");

const require = createRequire(import.meta.url);

const mod = require("@denotest/lossy-utf8-script");

console.log(mod);
4 changes: 4 additions & 0 deletions tests/specs/npm/lossy_utf8_script_from_cjs/main.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Download http:https://localhost:4260/@denotest/lossy-utf8-script
Download http:https://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz
Initialize @denotest/[email protected]
����
5 changes: 3 additions & 2 deletions tests/util/server/src/npm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,11 @@ fn get_npm_package(

tarballs.insert(version.clone(), tarball_bytes);
let package_json_path = version_folder.join("package.json");
let package_json_text = fs::read_to_string(&package_json_path)
.with_context(|| {
let package_json_bytes =
fs::read(&package_json_path).with_context(|| {
format!("Error reading package.json at {}", package_json_path)
})?;
let package_json_text = String::from_utf8_lossy(&package_json_bytes);
let mut version_info: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(&package_json_text)?;
version_info.insert("dist".to_string(), dist.into());
Expand Down

0 comments on commit c1f23c5

Please sign in to comment.