From 4b7c6049ef9d40394eb823859c82cbf8d293430d Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 31 Jan 2024 22:15:22 -0500 Subject: [PATCH] refactor: load bytes in deno_graph (#22212) Upgrades deno_graph to 0.64 where deno_graph is now responsible for turning bytes into a string. This is in preparation for Wasm modules. --- Cargo.lock | 19 +- cli/Cargo.toml | 10 +- cli/args/import_map.rs | 8 +- cli/cache/mod.rs | 4 +- cli/cache/parsed_source.rs | 4 +- cli/emit.rs | 2 +- cli/file_fetcher.rs | 480 ++++-------------- cli/graph_util.rs | 6 +- cli/lsp/documents.rs | 42 +- cli/lsp/npm.rs | 6 +- cli/lsp/registries.rs | 10 +- cli/lsp/urls.rs | 10 +- cli/module_loader.rs | 17 +- cli/standalone/mod.rs | 17 +- cli/tests/integration/compile_tests.rs | 29 ++ cli/tests/integration/npm_tests.rs | 6 +- cli/tests/testdata/fmt/invalid_data.out | 3 + .../npm/cjs_require_esm_error/main.out | 2 +- .../esm/{my_esm_module.js => my_es_module.js} | 0 .../cjs-require-esm-error/1.0.0/index.js | 2 +- cli/tools/bundle.rs | 2 +- cli/tools/check.rs | 6 +- cli/tools/coverage/mod.rs | 23 +- cli/tools/fmt.rs | 19 +- cli/tools/info.rs | 10 +- cli/tools/registry/graph.rs | 8 +- cli/tools/registry/publish_order.rs | 2 +- cli/tools/repl/mod.rs | 6 +- cli/tools/run/mod.rs | 32 +- cli/tools/test/mod.rs | 14 +- cli/tools/vendor/build.rs | 22 +- cli/tools/vendor/import_map.rs | 11 +- cli/tools/vendor/mappings.rs | 4 +- cli/tools/vendor/test.rs | 12 +- cli/tsc/diagnostics.rs | 2 +- cli/tsc/mod.rs | 10 +- cli/util/text_encoding.rs | 98 ---- test_util/src/fs.rs | 4 + 38 files changed, 325 insertions(+), 637 deletions(-) rename cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/esm/{my_esm_module.js => my_es_module.js} (100%) diff --git a/Cargo.lock b/Cargo.lock index a9ce363eac62af..9981eef5b39b32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -996,7 +996,6 @@ dependencies = [ "console_static_text", "dashmap", "data-encoding", - "data-url", "deno_ast", "deno_bench_util", "deno_cache_dir", @@ -1017,7 +1016,6 @@ dependencies = [ "dprint-plugin-jupyter", "dprint-plugin-markdown", "dprint-plugin-typescript", - "encoding_rs", "env_logger", "eszip", "fancy-regex", @@ -1297,9 +1295,9 @@ dependencies = [ [[package]] name = "deno_doc" -version = "0.98.0" +version = "0.100.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "837af595f7a7bf3f96c54973a82174149edd4e94544890b619db3a7a8f11c407" +checksum = "d1df9ba70ba4335847c304a9a771da4833e4e0c219758b8b58db36c096061b7b" dependencies = [ "anyhow", "cfg-if", @@ -1321,9 +1319,9 @@ dependencies = [ [[package]] name = "deno_emit" -version = "0.34.0" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b568ac3d8f6fba012d7cb0d9fc792bfd7ee0ed70e1dfc0784030aeada0a8b31d" +checksum = "870bd633969034668194c6cdf7d6f8aa94296e26db554aba1ea5f074aa966c37" dependencies = [ "anyhow", "base64", @@ -1391,15 +1389,16 @@ dependencies = [ [[package]] name = "deno_graph" -version = "0.63.6" +version = "0.64.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3744708bc2efb969bd9a78a0ccb8d2267c4d49baaae1243a5b3af8e2fdedee" +checksum = "c0eb6ad784fa5885867ba00e0db8ddcb2d98a4a0234fe336d50a13092e268c44" dependencies = [ "anyhow", "async-trait", "data-url", "deno_ast", "deno_semver", + "encoding_rs", "futures", "import_map", "indexmap", @@ -2388,9 +2387,9 @@ dependencies = [ [[package]] name = "eszip" -version = "0.59.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c44cb502967cc1602a5a7ed1ae7022fc371f83ae709e95bfb9b510eb25d71a5a" +checksum = "a060f8bb81229bd98c26e1c0efc066be2460558ee9187e73e40a89bd2c949f06" dependencies = [ "anyhow", "base64", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 89d677f9dcd84c..41439991ef5bb9 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -57,16 +57,16 @@ deno_ast = { workspace = true, features = ["bundler", "cjs", "codegen", "dep_gra deno_cache_dir = "=0.6.1" deno_config = "=0.9.1" deno_core = { workspace = true, features = ["include_js_files_for_snapshotting"] } -deno_doc = { version = "=0.98.0", features = ["html"] } -deno_emit = "=0.34.0" -deno_graph = "=0.63.6" +deno_doc = { version = "=0.100.0", features = ["html"] } +deno_emit = "=0.35.0" +deno_graph = "=0.64.1" deno_lint = { version = "=0.55.0", features = ["docs"] } deno_lockfile.workspace = true deno_npm = "=0.16.0" deno_runtime = { workspace = true, features = ["include_js_files_for_snapshotting"] } deno_semver = "=0.5.4" deno_task_shell = "=0.14.3" -eszip = "=0.59.0" +eszip = "=0.60.0" napi_sym.workspace = true async-trait.workspace = true @@ -83,14 +83,12 @@ color-print = "0.3.5" console_static_text.workspace = true dashmap = "5.5.3" data-encoding.workspace = true -data-url.workspace = true dissimilar = "=1.0.4" dotenvy = "0.15.7" dprint-plugin-json = "=0.19.1" dprint-plugin-jupyter = "=0.1.2" dprint-plugin-markdown = "=0.16.3" dprint-plugin-typescript = "=0.88.10" -encoding_rs.workspace = true env_logger = "=0.10.0" fancy-regex = "=0.10.0" # If you disable the default __vendored_zlib_ng feature above, you _must_ be able to link against `-lz`. diff --git a/cli/args/import_map.rs b/cli/args/import_map.rs index b2ea14174d3fa1..da4f0eb85b646a 100644 --- a/cli/args/import_map.rs +++ b/cli/args/import_map.rs @@ -9,7 +9,6 @@ use import_map::ImportMapDiagnostic; use log::warn; use super::ConfigFile; -use crate::file_fetcher::get_source_from_data_url; use crate::file_fetcher::FileFetcher; pub async fn resolve_import_map_from_specifier( @@ -18,7 +17,9 @@ pub async fn resolve_import_map_from_specifier( file_fetcher: &FileFetcher, ) -> Result { let value: serde_json::Value = if specifier.scheme() == "data" { - serde_json::from_str(&get_source_from_data_url(specifier)?.0)? + let data_url_text = + deno_graph::source::RawDataUrl::parse(specifier)?.decode()?; + serde_json::from_str(&data_url_text)? } else { let import_map_config = maybe_config_file .as_ref() @@ -28,7 +29,8 @@ pub async fn resolve_import_map_from_specifier( None => { let file = file_fetcher .fetch(specifier, PermissionsContainer::allow_all()) - .await?; + .await? + .into_text_decoded()?; serde_json::from_str(&file.source)? } } diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index 13efc24ff65763..7004f11af733b6 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -279,10 +279,10 @@ impl Loader for FetchCacher { fn cache_module_info( &mut self, specifier: &ModuleSpecifier, - source: &str, + source: &Arc<[u8]>, module_info: &deno_graph::ModuleInfo, ) { - let source_hash = ModuleInfoCacheSourceHash::from_source(source.as_bytes()); + let source_hash = ModuleInfoCacheSourceHash::from_source(source); let result = self.module_info_cache.set_module_info( specifier, MediaType::from_specifier(specifier), diff --git a/cli/cache/parsed_source.rs b/cli/cache/parsed_source.rs index 77f2e59534ad17..7bb1a72a726c35 100644 --- a/cli/cache/parsed_source.rs +++ b/cli/cache/parsed_source.rs @@ -17,9 +17,9 @@ pub struct ParsedSourceCache { } impl ParsedSourceCache { - pub fn get_parsed_source_from_esm_module( + pub fn get_parsed_source_from_js_module( &self, - module: &deno_graph::EsmModule, + module: &deno_graph::JsModule, ) -> Result { self.get_or_parse_module( &module.specifier, diff --git a/cli/emit.rs b/cli/emit.rs index 6bec60a37b0b0d..2c267df6774753 100644 --- a/cli/emit.rs +++ b/cli/emit.rs @@ -40,7 +40,7 @@ impl Emitter { graph: &ModuleGraph, ) -> Result<(), AnyError> { for module in graph.modules() { - if let Module::Esm(module) = module { + if let Module::Js(module) = module { let is_emittable = matches!( module.media_type, MediaType::TypeScript diff --git a/cli/file_fetcher.rs b/cli/file_fetcher.rs index bbcdd3f84c60cd..5a7ca2b8409fde 100644 --- a/cli/file_fetcher.rs +++ b/cli/file_fetcher.rs @@ -12,10 +12,9 @@ use crate::http_util::HeadersMap; use crate::http_util::HttpClient; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::UpdateGuard; -use crate::util::text_encoding; -use data_url::DataUrl; use deno_ast::MediaType; +use deno_core::anyhow::Context; use deno_core::error::custom_error; use deno_core::error::generic_error; use deno_core::error::uri_error; @@ -45,21 +44,60 @@ use std::time::SystemTime; pub const SUPPORTED_SCHEMES: [&str; 5] = ["data", "blob", "file", "http", "https"]; -/// A structure representing a source file. #[derive(Debug, Clone, Eq, PartialEq)] -pub struct File { - /// For remote files, if there was an `X-TypeScript-Type` header, the parsed - /// out value of that header. - pub maybe_types: Option, - /// The resolved media type for the file. +pub struct TextDecodedFile { pub media_type: MediaType, - /// The source of the file as a string. - pub source: Arc, /// The _final_ specifier for the file. The requested specifier and the final /// specifier maybe different for remote files that have been redirected. pub specifier: ModuleSpecifier, + /// The source of the file. + pub source: Arc, +} +/// A structure representing a source file. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct File { + /// The _final_ specifier for the file. The requested specifier and the final + /// specifier maybe different for remote files that have been redirected. + pub specifier: ModuleSpecifier, pub maybe_headers: Option>, + /// The source of the file. + pub source: Arc<[u8]>, +} + +impl File { + pub fn resolve_media_type_and_charset(&self) -> (MediaType, Option<&str>) { + deno_graph::source::resolve_media_type_and_charset_from_headers( + &self.specifier, + self.maybe_headers.as_ref(), + ) + } + + /// Decodes the source bytes into a string handling any encoding rules + /// for local vs remote files and dealing with the charset. + pub fn into_text_decoded(self) -> Result { + // lots of borrow checker fighting here + let (media_type, maybe_charset) = + deno_graph::source::resolve_media_type_and_charset_from_headers( + &self.specifier, + self.maybe_headers.as_ref(), + ); + let specifier = self.specifier; + match deno_graph::source::decode_source( + &specifier, + self.source, + maybe_charset, + ) { + Ok(source) => Ok(TextDecodedFile { + media_type, + specifier, + source, + }), + Err(err) => { + Err(err).with_context(|| format!("Failed decoding \"{}\".", specifier)) + } + } + } } /// Simple struct implementing in-process caching to prevent multiple @@ -85,49 +123,14 @@ fn fetch_local(specifier: &ModuleSpecifier) -> Result { uri_error(format!("Invalid file path.\n Specifier: {specifier}")) })?; let bytes = fs::read(local)?; - let charset = text_encoding::detect_charset(&bytes).to_string(); - let source = get_source_from_bytes(bytes, Some(charset))?; - let media_type = MediaType::from_specifier(specifier); Ok(File { - maybe_types: None, - media_type, - source: source.into(), specifier: specifier.clone(), maybe_headers: None, + source: bytes.into(), }) } -/// Returns the decoded body and content-type of a provided -/// data URL. -pub fn get_source_from_data_url( - specifier: &ModuleSpecifier, -) -> Result<(String, String), AnyError> { - let data_url = DataUrl::process(specifier.as_str()) - .map_err(|e| uri_error(format!("{e:?}")))?; - let mime = data_url.mime_type(); - let charset = mime.get_parameter("charset").map(|v| v.to_string()); - let (bytes, _) = data_url - .decode_to_vec() - .map_err(|e| uri_error(format!("{e:?}")))?; - Ok((get_source_from_bytes(bytes, charset)?, format!("{mime}"))) -} - -/// Given a vector of bytes and optionally a charset, decode the bytes to a -/// string. -pub fn get_source_from_bytes( - bytes: Vec, - maybe_charset: Option, -) -> Result { - let source = if let Some(charset) = maybe_charset { - text_encoding::convert_to_utf8(&bytes, &charset)?.to_string() - } else { - String::from_utf8(bytes)? - }; - - Ok(source) -} - /// Return a validated scheme for a given module specifier. fn get_validated_scheme( specifier: &ModuleSpecifier, @@ -142,27 +145,6 @@ fn get_validated_scheme( } } -/// Resolve a media type and optionally the charset from a module specifier and -/// the value of a content type header. -pub fn map_content_type( - specifier: &ModuleSpecifier, - maybe_content_type: Option<&String>, -) -> (MediaType, Option) { - if let Some(content_type) = maybe_content_type { - let mut content_types = content_type.split(';'); - let content_type = content_types.next().unwrap(); - let media_type = MediaType::from_content_type(specifier, content_type); - let charset = content_types - .map(str::trim) - .find_map(|s| s.strip_prefix("charset=")) - .map(String::from); - - (media_type, charset) - } else { - (MediaType::from_specifier(specifier), None) - } -} - pub struct FetchOptions<'a> { pub specifier: &'a ModuleSpecifier, pub permissions: PermissionsContainer, @@ -215,34 +197,6 @@ impl FileFetcher { self.download_log_level = level; } - /// Creates a `File` structure for a remote file. - fn build_remote_file( - &self, - specifier: &ModuleSpecifier, - bytes: Vec, - headers: &HashMap, - ) -> Result { - let maybe_content_type = headers.get("content-type"); - let (media_type, maybe_charset) = - map_content_type(specifier, maybe_content_type); - let source = get_source_from_bytes(bytes, maybe_charset)?; - let maybe_types = match media_type { - MediaType::JavaScript - | MediaType::Cjs - | MediaType::Mjs - | MediaType::Jsx => headers.get("x-typescript-types").cloned(), - _ => None, - }; - - Ok(File { - maybe_types, - media_type, - source: source.into(), - specifier: specifier.clone(), - maybe_headers: Some(headers.clone()), - }) - } - /// Fetch cached remote file. /// /// This is a recursive operation if source file has redirections. @@ -269,9 +223,12 @@ impl FileFetcher { let Some(bytes) = self.http_cache.read_file_bytes(&cache_key)? else { return Ok(None); }; - let file = self.build_remote_file(specifier, bytes, &headers)?; - Ok(Some(file)) + Ok(Some(File { + specifier: specifier.clone(), + maybe_headers: Some(headers), + source: Arc::from(bytes), + })) } /// Convert a data URL into a file, resulting in an error if the URL is @@ -281,16 +238,12 @@ impl FileFetcher { specifier: &ModuleSpecifier, ) -> Result { debug!("FileFetcher::fetch_data_url() - specifier: {}", specifier); - let (source, content_type) = get_source_from_data_url(specifier)?; - let (media_type, _) = map_content_type(specifier, Some(&content_type)); - let mut headers = HashMap::new(); - headers.insert("content-type".to_string(), content_type); + let data_url = deno_graph::source::RawDataUrl::parse(specifier)?; + let (bytes, headers) = data_url.into_bytes_and_headers(); Ok(File { - maybe_types: None, - media_type, - source: source.into(), specifier: specifier.clone(), maybe_headers: Some(headers), + source: Arc::from(bytes), }) } @@ -310,21 +263,14 @@ impl FileFetcher { ) })?; - let content_type = blob.media_type.clone(); let bytes = blob.read_all().await?; - - let (media_type, maybe_charset) = - map_content_type(specifier, Some(&content_type)); - let source = get_source_from_bytes(bytes, maybe_charset)?; - let mut headers = HashMap::new(); - headers.insert("content-type".to_string(), content_type); + let headers = + HashMap::from([("content-type".to_string(), blob.media_type.clone())]); Ok(File { - maybe_types: None, - media_type, - source: source.into(), specifier: specifier.clone(), maybe_headers: Some(headers), + source: Arc::from(bytes), }) } @@ -453,9 +399,11 @@ impl FileFetcher { file_fetcher .http_cache .set(&specifier, headers.clone(), &bytes)?; - let file = - file_fetcher.build_remote_file(&specifier, bytes, &headers)?; - Ok(file) + Ok(File { + specifier, + maybe_headers: Some(headers), + source: Arc::from(bytes), + }) } FetchOnceResult::RequestError(err) => { handle_request_or_server_error(&mut retried, &specifier, err) @@ -767,16 +715,6 @@ mod tests { (file_fetcher, temp_dir, blob_store) } - macro_rules! file_url { - ($path:expr) => { - if cfg!(target_os = "windows") { - concat!("file:///C:", $path) - } else { - concat!("file://", $path) - } - }; - } - async fn test_fetch(specifier: &ModuleSpecifier) -> (File, FileFetcher) { let (file_fetcher, _) = setup(CacheSetting::ReloadAll, None); let result = file_fetcher @@ -812,6 +750,9 @@ mod tests { ) } + // this test used to test how the file fetcher decoded strings, but + // now we're using it as a bit of an integration test with the functionality + // in deno_graph async fn test_fetch_remote_encoded( fixture: &str, charset: &str, @@ -820,8 +761,18 @@ mod tests { let url_str = format!("http://127.0.0.1:4545/encoding/{fixture}"); let specifier = resolve_url(&url_str).unwrap(); let (file, headers) = test_fetch_remote(&specifier).await; - assert_eq!(&*file.source, expected); - assert_eq!(file.media_type, MediaType::TypeScript); + let (media_type, maybe_charset) = + deno_graph::source::resolve_media_type_and_charset_from_headers( + &specifier, + Some(&headers), + ); + assert_eq!( + deno_graph::source::decode_source(&specifier, file.source, maybe_charset) + .unwrap() + .as_ref(), + expected + ); + assert_eq!(media_type, MediaType::TypeScript); assert_eq!( headers.get("content-type").unwrap(), &format!("application/typescript;charset={charset}") @@ -832,7 +783,12 @@ mod tests { let p = test_util::testdata_path().join(format!("encoding/{charset}.ts")); let specifier = ModuleSpecifier::from_file_path(p).unwrap(); let (file, _) = test_fetch(&specifier).await; - assert_eq!(&*file.source, expected); + assert_eq!( + deno_graph::source::decode_source(&specifier, file.source, None) + .unwrap() + .as_ref(), + expected + ); } #[test] @@ -857,192 +813,18 @@ mod tests { } } - #[test] - fn test_map_content_type() { - let fixtures = vec![ - // Extension only - (file_url!("/foo/bar.ts"), None, MediaType::TypeScript, None), - (file_url!("/foo/bar.tsx"), None, MediaType::Tsx, None), - (file_url!("/foo/bar.d.cts"), None, MediaType::Dcts, None), - (file_url!("/foo/bar.d.mts"), None, MediaType::Dmts, None), - (file_url!("/foo/bar.d.ts"), None, MediaType::Dts, None), - (file_url!("/foo/bar.js"), None, MediaType::JavaScript, None), - (file_url!("/foo/bar.jsx"), None, MediaType::Jsx, None), - (file_url!("/foo/bar.json"), None, MediaType::Json, None), - (file_url!("/foo/bar.wasm"), None, MediaType::Wasm, None), - (file_url!("/foo/bar.cjs"), None, MediaType::Cjs, None), - (file_url!("/foo/bar.mjs"), None, MediaType::Mjs, None), - (file_url!("/foo/bar.cts"), None, MediaType::Cts, None), - (file_url!("/foo/bar.mts"), None, MediaType::Mts, None), - (file_url!("/foo/bar"), None, MediaType::Unknown, None), - // Media type no extension - ( - "https://deno.land/x/mod", - Some("application/typescript".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/typescript".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("video/vnd.dlna.mpeg-tts".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("video/mp2t".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/x-typescript".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/javascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/javascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/ecmascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/ecmascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/x-javascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/node".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/jsx".to_string()), - MediaType::Jsx, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/tsx".to_string()), - MediaType::Tsx, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/json".to_string()), - MediaType::Json, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/json; charset=utf-8".to_string()), - MediaType::Json, - Some("utf-8".to_string()), - ), - // Extension with media type - ( - "https://deno.land/x/mod.ts", - Some("text/plain".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod.ts", - Some("foo/bar".to_string()), - MediaType::Unknown, - None, - ), - ( - "https://deno.land/x/mod.tsx", - Some("application/typescript".to_string()), - MediaType::Tsx, - None, - ), - ( - "https://deno.land/x/mod.tsx", - Some("application/javascript".to_string()), - MediaType::Tsx, - None, - ), - ( - "https://deno.land/x/mod.jsx", - Some("application/javascript".to_string()), - MediaType::Jsx, - None, - ), - ( - "https://deno.land/x/mod.jsx", - Some("application/x-typescript".to_string()), - MediaType::Jsx, - None, - ), - ( - "https://deno.land/x/mod.d.ts", - Some("application/javascript".to_string()), - MediaType::Dts, - None, - ), - ( - "https://deno.land/x/mod.d.ts", - Some("text/plain".to_string()), - MediaType::Dts, - None, - ), - ( - "https://deno.land/x/mod.d.ts", - Some("application/x-typescript".to_string()), - MediaType::Dts, - None, - ), - ]; - - for (specifier, maybe_content_type, media_type, maybe_charset) in fixtures { - let specifier = ModuleSpecifier::parse(specifier).unwrap(); - assert_eq!( - map_content_type(&specifier, maybe_content_type.as_ref()), - (media_type, maybe_charset) - ); - } - } - #[tokio::test] async fn test_insert_cached() { let (file_fetcher, temp_dir) = setup(CacheSetting::Use, None); let local = temp_dir.path().join("a.ts"); let specifier = ModuleSpecifier::from_file_path(&local).unwrap(); let file = File { - maybe_types: None, - media_type: MediaType::TypeScript, - source: "some source code".into(), + source: Arc::from("some source code".as_bytes()), specifier: specifier.clone(), - maybe_headers: None, + maybe_headers: Some(HashMap::from([( + "content-type".to_string(), + "application/javascript".to_string(), + )])), }; file_fetcher.insert_cached(file.clone()); @@ -1069,8 +851,8 @@ mod tests { let maybe_file = file_fetcher.get_source(&specifier); assert!(maybe_file.is_some()); - let file = maybe_file.unwrap(); - assert_eq!(&*file.source, "export const redirect = 1;\n"); + let file = maybe_file.unwrap().into_text_decoded().unwrap(); + assert_eq!(file.source.as_ref(), "export const redirect = 1;\n"); assert_eq!( file.specifier, resolve_url("http://localhost:4545/subdir/redirects/redirect1.js") @@ -1087,13 +869,12 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n" ); assert_eq!(file.media_type, MediaType::TypeScript); - assert_eq!(file.maybe_types, None); assert_eq!(file.specifier, specifier); } @@ -1119,13 +900,12 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n" ); assert_eq!(file.media_type, MediaType::TypeScript); - assert_eq!(file.maybe_types, None); assert_eq!(file.specifier, specifier); } @@ -1142,7 +922,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1169,7 +949,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1198,7 +978,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1223,7 +1003,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1634,7 +1414,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!(&*file.source, r#"console.log("hello deno");"#); fs::write(fixture_path, r#"console.log("goodbye deno");"#).unwrap(); @@ -1642,7 +1422,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!(&*file.source, r#"console.log("goodbye deno");"#); } @@ -1702,78 +1482,36 @@ mod tests { #[tokio::test] async fn test_fetch_local_utf_16be() { - let expected = String::from_utf8( - b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(), - ) - .unwrap(); + let expected = + String::from_utf8(b"console.log(\"Hello World\");\x0A".to_vec()).unwrap(); test_fetch_local_encoded("utf-16be", expected).await; } #[tokio::test] async fn test_fetch_local_utf_16le() { - let expected = String::from_utf8( - b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(), - ) - .unwrap(); + let expected = + String::from_utf8(b"console.log(\"Hello World\");\x0A".to_vec()).unwrap(); test_fetch_local_encoded("utf-16le", expected).await; } #[tokio::test] async fn test_fetch_local_utf8_with_bom() { - let expected = String::from_utf8( - b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(), - ) - .unwrap(); + let expected = + String::from_utf8(b"console.log(\"Hello World\");\x0A".to_vec()).unwrap(); test_fetch_local_encoded("utf-8", expected).await; } - #[tokio::test] - async fn test_fetch_remote_javascript_with_types() { - let specifier = - ModuleSpecifier::parse("http://127.0.0.1:4545/xTypeScriptTypes.js") - .unwrap(); - let (file, _) = test_fetch_remote(&specifier).await; - assert_eq!( - file.maybe_types, - Some("./xTypeScriptTypes.d.ts".to_string()) - ); - } - - #[tokio::test] - async fn test_fetch_remote_jsx_with_types() { - let specifier = - ModuleSpecifier::parse("http://127.0.0.1:4545/xTypeScriptTypes.jsx") - .unwrap(); - let (file, _) = test_fetch_remote(&specifier).await; - assert_eq!(file.media_type, MediaType::Jsx,); - assert_eq!( - file.maybe_types, - Some("./xTypeScriptTypes.d.ts".to_string()) - ); - } - - #[tokio::test] - async fn test_fetch_remote_typescript_with_types() { - let specifier = - ModuleSpecifier::parse("http://127.0.0.1:4545/xTypeScriptTypes.ts") - .unwrap(); - let (file, _) = test_fetch_remote(&specifier).await; - assert_eq!(file.maybe_types, None); - } - #[tokio::test] async fn test_fetch_remote_utf16_le() { let expected = - std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A") - .unwrap(); + std::str::from_utf8(b"console.log(\"Hello World\");\x0A").unwrap(); test_fetch_remote_encoded("utf-16le.ts", "utf-16le", expected).await; } #[tokio::test] async fn test_fetch_remote_utf16_be() { let expected = - std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A") - .unwrap(); + std::str::from_utf8(b"console.log(\"Hello World\");\x0A").unwrap(); test_fetch_remote_encoded("utf-16be.ts", "utf-16be", expected).await; } diff --git a/cli/graph_util.rs b/cli/graph_util.rs index 342013e6cbb0ec..3633784b833516 100644 --- a/cli/graph_util.rs +++ b/cli/graph_util.rs @@ -169,8 +169,8 @@ pub fn graph_valid( pub fn graph_lock_or_exit(graph: &ModuleGraph, lockfile: &mut Lockfile) { for module in graph.modules() { let source = match module { - Module::Esm(module) if module.media_type.is_declaration() => continue, // skip declaration files - Module::Esm(module) => &module.source, + Module::Js(module) if module.media_type.is_declaration() => continue, // skip declaration files + Module::Js(module) => &module.source, Module::Json(module) => &module.source, Module::Node(_) | Module::Npm(_) | Module::External(_) => continue, }; @@ -558,7 +558,7 @@ pub fn error_for_any_npm_specifier( Module::Node(module) => { bail!("Node specifiers have not yet been implemented for this subcommand (https://github.com/denoland/deno/issues/15960). Found: node:{}", module.module_name) } - Module::Esm(_) | Module::Json(_) | Module::External(_) => {} + Module::Js(_) | Module::Json(_) | Module::External(_) => {} } } Ok(()) diff --git a/cli/lsp/documents.rs b/cli/lsp/documents.rs index 611d1d07fb529a..c758d341bc8c4c 100644 --- a/cli/lsp/documents.rs +++ b/cli/lsp/documents.rs @@ -13,9 +13,6 @@ use crate::args::ConfigFile; use crate::args::JsxImportSourceConfig; use crate::cache::FastInsecureHasher; use crate::cache::HttpCache; -use crate::file_fetcher::get_source_from_bytes; -use crate::file_fetcher::get_source_from_data_url; -use crate::file_fetcher::map_content_type; use crate::lsp::logging::lsp_warn; use crate::npm::CliNpmResolver; use crate::resolver::CliGraphResolver; @@ -24,7 +21,6 @@ use crate::resolver::SloppyImportsFsEntry; use crate::resolver::SloppyImportsResolution; use crate::resolver::SloppyImportsResolver; use crate::util::path::specifier_to_file_path; -use crate::util::text_encoding; use deno_ast::MediaType; use deno_ast::ParsedSource; @@ -278,7 +274,7 @@ impl DocumentDependencies { } } - pub fn from_module(module: &deno_graph::EsmModule) -> Self { + pub fn from_module(module: &deno_graph::JsModule) -> Self { Self { deps: module.dependencies.clone(), maybe_types_dependency: module.maybe_types_dependency.clone(), @@ -286,7 +282,7 @@ impl DocumentDependencies { } } -type ModuleResult = Result; +type ModuleResult = Result; type ParsedSourceResult = Result; #[derive(Debug)] @@ -593,7 +589,7 @@ impl Document { self.0.maybe_lsp_version } - fn maybe_esm_module(&self) -> Option<&ModuleResult> { + fn maybe_js_module(&self) -> Option<&ModuleResult> { self.0.maybe_module.as_ref() } @@ -632,7 +628,7 @@ impl Document { &self, position: &lsp::Position, ) -> Option<(String, deno_graph::Dependency, deno_graph::Range)> { - let module = self.maybe_esm_module()?.as_ref().ok()?; + let module = self.maybe_js_module()?.as_ref().ok()?; let position = deno_graph::Position { line: position.line as usize, character: position.character as usize, @@ -798,9 +794,8 @@ impl FileSystemDocuments { let path = specifier_to_file_path(specifier).ok()?; let fs_version = calculate_fs_version_at_path(&path)?; let bytes = fs::read(path).ok()?; - let maybe_charset = - Some(text_encoding::detect_charset(&bytes).to_string()); - let content = get_source_from_bytes(bytes, maybe_charset).ok()?; + let content = + deno_graph::source::decode_owned_source(specifier, bytes, None).ok()?; Document::new( specifier.clone(), fs_version, @@ -810,7 +805,10 @@ impl FileSystemDocuments { npm_resolver, ) } else if specifier.scheme() == "data" { - let (source, _) = get_source_from_data_url(specifier).ok()?; + let source = deno_graph::source::RawDataUrl::parse(specifier) + .ok()? + .decode() + .ok()?; Document::new( specifier.clone(), "1".to_string(), @@ -824,10 +822,18 @@ impl FileSystemDocuments { let cache_key = cache.cache_item_key(specifier).ok()?; let bytes = cache.read_file_bytes(&cache_key).ok()??; let specifier_metadata = cache.read_metadata(&cache_key).ok()??; - let maybe_content_type = specifier_metadata.headers.get("content-type"); - let (_, maybe_charset) = map_content_type(specifier, maybe_content_type); + let (_, maybe_charset) = + deno_graph::source::resolve_media_type_and_charset_from_headers( + specifier, + Some(&specifier_metadata.headers), + ); + let content = deno_graph::source::decode_owned_source( + specifier, + bytes, + maybe_charset, + ) + .ok()?; let maybe_headers = Some(specifier_metadata.headers); - let content = get_source_from_bytes(bytes, maybe_charset).ok()?; Document::new( specifier.clone(), fs_version, @@ -1681,7 +1687,7 @@ impl Documents { return node_resolve_npm_req_ref(npm_ref, maybe_npm, referrer); } let doc = self.get(specifier)?; - let maybe_module = doc.maybe_esm_module().and_then(|r| r.as_ref().ok()); + let maybe_module = doc.maybe_js_module().and_then(|r| r.as_ref().ok()); let maybe_types_dependency = maybe_module .and_then(|m| m.maybe_types_dependency.as_ref().map(|d| &d.dependency)); if let Some(specifier) = @@ -1752,7 +1758,7 @@ impl<'a> OpenDocumentsGraphLoader<'a> { if let Some(doc) = self.open_docs.get(specifier) { return Some( future::ready(Ok(Some(deno_graph::source::LoadResponse::Module { - content: doc.content(), + content: Arc::from(doc.content()), specifier: doc.specifier().clone(), maybe_headers: None, }))) @@ -1816,7 +1822,7 @@ impl<'a> deno_graph::source::Loader for OpenDocumentsGraphLoader<'a> { fn cache_module_info( &mut self, specifier: &deno_ast::ModuleSpecifier, - source: &str, + source: &Arc<[u8]>, module_info: &deno_graph::ModuleInfo, ) { self diff --git a/cli/lsp/npm.rs b/cli/lsp/npm.rs index c730c79902573e..613b7897e28ca8 100644 --- a/cli/lsp/npm.rs +++ b/cli/lsp/npm.rs @@ -62,7 +62,8 @@ impl NpmSearchApi for CliNpmSearchApi { let file = self .file_fetcher .fetch(&search_url, PermissionsContainer::allow_all()) - .await?; + .await? + .into_text_decoded()?; let names = Arc::new(parse_npm_search_response(&file.source)?); self .search_cache @@ -88,7 +89,8 @@ impl NpmSearchApi for CliNpmSearchApi { .file_fetcher .fetch(&info_url, PermissionsContainer::allow_all()) .await?; - let info = Arc::new(serde_json::from_str::(&file.source)?); + let info = + Arc::new(serde_json::from_slice::(&file.source)?); self .info_cache .lock() diff --git a/cli/lsp/registries.rs b/cli/lsp/registries.rs index 328b325ff0c254..f4a64c7ee7f1f3 100644 --- a/cli/lsp/registries.rs +++ b/cli/lsp/registries.rs @@ -528,7 +528,7 @@ impl ModuleRegistry { ); self.http_cache.set(specifier, headers_map, &[])?; } - let file = fetch_result?; + let file = fetch_result?.into_text_decoded()?; let config: RegistryConfigurationJson = serde_json::from_str(&file.source)?; validate_config(&config)?; Ok(config.registries) @@ -610,6 +610,8 @@ impl ModuleRegistry { .file_fetcher .fetch(&endpoint, PermissionsContainer::allow_all()) .await + .ok()? + .into_text_decoded() .ok()?; let documentation: lsp::Documentation = serde_json::from_str(&file.source).ok()?; @@ -974,6 +976,8 @@ impl ModuleRegistry { .file_fetcher .fetch(&specifier, PermissionsContainer::allow_all()) .await + .ok()? + .into_text_decoded() .ok()?; serde_json::from_str(&file.source).ok() } @@ -1037,6 +1041,8 @@ impl ModuleRegistry { specifier, err ); }) + .ok()? + .into_text_decoded() .ok()?; let items: VariableItems = serde_json::from_str(&file.source) .map_err(|err| { @@ -1073,6 +1079,8 @@ impl ModuleRegistry { specifier, err ); }) + .ok()? + .into_text_decoded() .ok()?; let items: VariableItems = serde_json::from_str(&file.source) .map_err(|err| { diff --git a/cli/lsp/urls.rs b/cli/lsp/urls.rs index b8f3037ba73dff..f1e75c9ddc9b6b 100644 --- a/cli/lsp/urls.rs +++ b/cli/lsp/urls.rs @@ -1,11 +1,8 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use crate::cache::LocalLspHttpCache; -use crate::file_fetcher::map_content_type; -use data_url::DataUrl; use deno_ast::MediaType; -use deno_core::error::uri_error; use deno_core::error::AnyError; use deno_core::parking_lot::Mutex; use deno_core::url::Position; @@ -191,11 +188,8 @@ impl LspUrlMap { let specifier_str = if specifier.scheme() == "asset" { format!("deno:/asset{}", specifier.path()) } else if specifier.scheme() == "data" { - let data_url = DataUrl::process(specifier.as_str()) - .map_err(|e| uri_error(format!("{e:?}")))?; - let mime = data_url.mime_type(); - let (media_type, _) = - map_content_type(specifier, Some(&format!("{mime}"))); + let data_url = deno_graph::source::RawDataUrl::parse(specifier)?; + let media_type = data_url.media_type(); let extension = if media_type == MediaType::Unknown { "" } else { diff --git a/cli/module_loader.rs b/cli/module_loader.rs index 3d80aeb5c4fb53..0058b9be07f2f4 100644 --- a/cli/module_loader.rs +++ b/cli/module_loader.rs @@ -47,7 +47,7 @@ use deno_core::ResolutionKind; use deno_core::SourceMapGetter; use deno_graph::source::ResolutionMode; use deno_graph::source::Resolver; -use deno_graph::EsmModule; +use deno_graph::JsModule; use deno_graph::JsonModule; use deno_graph::Module; use deno_graph::Resolution; @@ -296,7 +296,7 @@ impl PreparedModuleLoader { found_url: specifier.clone(), media_type: *media_type, }), - Some(deno_graph::Module::Esm(EsmModule { + Some(deno_graph::Module::Js(JsModule { source, media_type, specifier, @@ -335,7 +335,12 @@ impl PreparedModuleLoader { media_type: *media_type, }) } - _ => { + Some( + deno_graph::Module::External(_) + | deno_graph::Module::Node(_) + | deno_graph::Module::Npm(_), + ) + | None => { let mut msg = format!("Loading unprepared module: {specifier}"); if let Some(referrer) = maybe_referrer { msg = format!("{}, imported from: {}", msg, referrer.as_str()); @@ -542,7 +547,7 @@ impl ModuleLoader for CliModuleLoader { let graph = self.shared.graph_container.graph(); let maybe_resolved = match graph.get(referrer) { - Some(Module::Esm(module)) => { + Some(Module::Js(module)) => { module.dependencies.get(specifier).map(|d| &d.maybe_code) } _ => None, @@ -577,7 +582,7 @@ impl ModuleLoader for CliModuleLoader { }) } Some(Module::Node(module)) => Ok(module.specifier.clone()), - Some(Module::Esm(module)) => Ok(module.specifier.clone()), + Some(Module::Js(module)) => Ok(module.specifier.clone()), Some(Module::Json(module)) => Ok(module.specifier.clone()), Some(Module::External(module)) => { Ok(node::resolve_specifier_into_node_modules(&module.specifier)) @@ -715,7 +720,7 @@ impl SourceMapGetter for CliSourceMapGetter { ) -> Option { let graph = self.shared.graph_container.graph(); let code = match graph.get(&resolve_url(file_name).ok()?) { - Some(deno_graph::Module::Esm(module)) => &module.source, + Some(deno_graph::Module::Js(module)) => &module.source, Some(deno_graph::Module::Json(module)) => &module.source, _ => return None, }; diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 0175b9c119c87b..4049d67092526f 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -9,7 +9,6 @@ use crate::args::StorageKeyResolver; use crate::cache::Caches; use crate::cache::DenoDirProvider; use crate::cache::NodeAnalysisCache; -use crate::file_fetcher::get_source_from_data_url; use crate::http_util::HttpClient; use crate::node::CliCjsCodeAnalyzer; use crate::npm::create_cli_npm_resolver; @@ -150,12 +149,22 @@ impl ModuleLoader for EmbeddedModuleLoader { is_dynamic: bool, _requested_module_type: RequestedModuleType, ) -> deno_core::ModuleLoadResponse { - let is_data_uri = get_source_from_data_url(original_specifier).ok(); - if let Some((source, _)) = is_data_uri { + if original_specifier.scheme() == "data" { + let data_url_text = + match deno_graph::source::RawDataUrl::parse(original_specifier) + .and_then(|url| url.decode().map_err(|err| err.into())) + { + Ok(response) => response, + Err(err) => { + return deno_core::ModuleLoadResponse::Sync(Err(type_error( + format!("{:#}", err), + ))); + } + }; return deno_core::ModuleLoadResponse::Sync(Ok( deno_core::ModuleSource::new( deno_core::ModuleType::JavaScript, - ModuleSourceCode::String(source.into()), + ModuleSourceCode::String(data_url_text.into()), original_specifier, ), )); diff --git a/cli/tests/integration/compile_tests.rs b/cli/tests/integration/compile_tests.rs index d6c7febd5be9ef..cf3bf023dbade4 100644 --- a/cli/tests/integration/compile_tests.rs +++ b/cli/tests/integration/compile_tests.rs @@ -1149,3 +1149,32 @@ fn granular_unstable_features() { output.assert_exit_code(0); output.assert_matches_text("Kv {}\n"); } + +#[test] +fn dynamic_import_bad_data_uri() { + let context = TestContextBuilder::new().build(); + let dir = context.temp_dir(); + let exe = if cfg!(windows) { + dir.path().join("app.exe") + } else { + dir.path().join("app") + }; + let file = dir.path().join("bad_data_uri.ts"); + file.write("await import('data:application/')"); + let output = context + .new_command() + .args_vec([ + "compile", + "--output", + &exe.to_string_lossy(), + &file.to_string_lossy(), + ]) + .run(); + output.assert_exit_code(0); + output.skip_output_check(); + let output = context.new_command().name(&exe).run(); + output.assert_exit_code(1); + output.assert_matches_text( + "[WILDCARD]TypeError: Unable to decode data url.[WILDCARD]", + ); +} diff --git a/cli/tests/integration/npm_tests.rs b/cli/tests/integration/npm_tests.rs index e3b1196b97bcca..a63253260ab922 100644 --- a/cli/tests/integration/npm_tests.rs +++ b/cli/tests/integration/npm_tests.rs @@ -12,14 +12,14 @@ use util::TestContextBuilder; // NOTE: See how to make test npm packages at ./testdata/npm/README.md -itest!(esm_module { +itest!(es_module { args: "run --allow-read --allow-env npm/esm/main.js", output: "npm/esm/main.out", envs: env_vars_for_npm_tests(), http_server: true, }); -itest!(esm_module_eval { +itest!(es_module_eval { args_vec: vec![ "eval", "import chalk from 'npm:chalk@5'; console.log(chalk.green('chalk esm loads'));", @@ -29,7 +29,7 @@ itest!(esm_module_eval { http_server: true, }); -itest!(esm_module_deno_test { +itest!(es_module_deno_test { args: "test --allow-read --allow-env npm/esm/test.js", output: "npm/esm/test.out", envs: env_vars_for_npm_tests(), diff --git a/cli/tests/testdata/fmt/invalid_data.out b/cli/tests/testdata/fmt/invalid_data.out index 7fd5046ee72045..dee00fcc5e696c 100644 --- a/cli/tests/testdata/fmt/invalid_data.out +++ b/cli/tests/testdata/fmt/invalid_data.out @@ -1 +1,4 @@ error: [WILDCARD] is not a valid UTF-8 file + +Caused by: + invalid data diff --git a/cli/tests/testdata/npm/cjs_require_esm_error/main.out b/cli/tests/testdata/npm/cjs_require_esm_error/main.out index f24675de93cf8d..b6ade69042fc97 100644 --- a/cli/tests/testdata/npm/cjs_require_esm_error/main.out +++ b/cli/tests/testdata/npm/cjs_require_esm_error/main.out @@ -1,2 +1,2 @@ -error: Uncaught (in promise) Error: require() of ES Module [WILDCARD]my_esm_module.js from [WILDCARD]index.js not supported. Instead change the require to a dynamic import() which is available in all CommonJS modules. +error: Uncaught (in promise) Error: require() of ES Module [WILDCARD]my_es_module.js from [WILDCARD]index.js not supported. Instead change the require to a dynamic import() which is available in all CommonJS modules. [WILDCARD] diff --git a/cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/esm/my_esm_module.js b/cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/esm/my_es_module.js similarity index 100% rename from cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/esm/my_esm_module.js rename to cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/esm/my_es_module.js diff --git a/cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/index.js b/cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/index.js index 6db336dba51319..ba630f93bb0a28 100644 --- a/cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/index.js +++ b/cli/tests/testdata/npm/registry/@denotest/cjs-require-esm-error/1.0.0/index.js @@ -1 +1 @@ -module.exports.Test = require("./esm/my_esm_module.js"); +module.exports.Test = require("./esm/my_es_module.js"); diff --git a/cli/tools/bundle.rs b/cli/tools/bundle.rs index 0e54e59f66a8c1..33ec78fc952de9 100644 --- a/cli/tools/bundle.rs +++ b/cli/tools/bundle.rs @@ -73,7 +73,7 @@ async fn bundle_action( .specifiers() .filter_map(|(_, r)| { r.ok().and_then(|module| match module { - Module::Esm(m) => m.specifier.to_file_path().ok(), + Module::Js(m) => m.specifier.to_file_path().ok(), Module::Json(m) => m.specifier.to_file_path().ok(), // nothing to watch Module::Node(_) | Module::Npm(_) | Module::External(_) => None, diff --git a/cli/tools/check.rs b/cli/tools/check.rs index 7ce9c578cde2d1..90a1d0299dbbad 100644 --- a/cli/tools/check.rs +++ b/cli/tools/check.rs @@ -243,7 +243,7 @@ fn get_check_hash( // this iterator of modules is already deterministic, so no need to sort it for module in graph.modules() { match module { - Module::Esm(module) => { + Module::Js(module) => { let ts_check = has_ts_check(module.media_type, &module.source); if ts_check { has_file_to_type_check = true; @@ -329,7 +329,7 @@ fn get_tsc_roots( check_js: bool, ) -> Option<(ModuleSpecifier, MediaType)> { match module { - Module::Esm(module) => match module.media_type { + Module::Js(module) => match module.media_type { MediaType::TypeScript | MediaType::Tsx | MediaType::Mts @@ -404,7 +404,7 @@ fn get_tsc_roots( if let Some(entry) = maybe_get_check_entry(module, check_js) { result.push(entry); } - if let Some(module) = module.esm() { + if let Some(module) = module.js() { let deps = module.dependencies_prefer_fast_check(); for dep in deps.values() { // walk both the code and type dependencies diff --git a/cli/tools/coverage/mod.rs b/cli/tools/coverage/mod.rs index 30d35878b1af59..16c9555768ab8b 100644 --- a/cli/tools/coverage/mod.rs +++ b/cli/tools/coverage/mod.rs @@ -530,24 +530,24 @@ pub async fn cover_files( Before generating coverage report, run `deno test --coverage` to ensure consistent state.", module_specifier ) - })?; + })?.into_text_decoded()?; - // Check if file was transpiled let original_source = file.source.clone(); - let transpiled_code: ModuleCodeString = match file.media_type { + // Check if file was transpiled + let transpiled_code = match file.media_type { MediaType::JavaScript | MediaType::Unknown | MediaType::Cjs | MediaType::Mjs - | MediaType::Json => file.source.clone().into(), - MediaType::Dts | MediaType::Dmts | MediaType::Dcts => Default::default(), + | MediaType::Json => None, + MediaType::Dts | MediaType::Dmts | MediaType::Dcts => Some(String::new()), MediaType::TypeScript | MediaType::Jsx | MediaType::Mts | MediaType::Cts | MediaType::Tsx => { - match emitter.maybe_cached_emit(&file.specifier, &file.source) { - Some(code) => code.into(), + Some(match emitter.maybe_cached_emit(&file.specifier, &file.source) { + Some(code) => code, None => { return Err(anyhow!( "Missing transpiled source code for: \"{}\". @@ -555,17 +555,20 @@ pub async fn cover_files( file.specifier, )) } - } + }) } MediaType::Wasm | MediaType::TsBuildInfo | MediaType::SourceMap => { unreachable!() } }; + let runtime_code: ModuleCodeString = transpiled_code + .map(|c| c.into()) + .unwrap_or_else(|| original_source.clone().into()); - let source_map = source_map_from_code(&transpiled_code); + let source_map = source_map_from_code(&runtime_code); let coverage_report = generate_coverage_report( &script_coverage, - transpiled_code.as_str().to_owned(), + runtime_code.as_str().to_owned(), &source_map, &out_mode, ); diff --git a/cli/tools/fmt.rs b/cli/tools/fmt.rs index ad35615a080393..86fc9700eea88b 100644 --- a/cli/tools/fmt.rs +++ b/cli/tools/fmt.rs @@ -20,7 +20,6 @@ use crate::util::file_watcher; use crate::util::fs::canonicalize_path; use crate::util::fs::FileCollector; use crate::util::path::get_extension; -use crate::util::text_encoding; use deno_ast::ParsedSource; use deno_config::glob::FilePatterns; use deno_core::anyhow::anyhow; @@ -607,28 +606,24 @@ struct FileContents { fn read_file_contents(file_path: &Path) -> Result { let file_bytes = fs::read(file_path) .with_context(|| format!("Error reading {}", file_path.display()))?; - let charset = text_encoding::detect_charset(&file_bytes); - let file_text = text_encoding::convert_to_utf8(&file_bytes, charset) - .map_err(|_| { + let had_bom = file_bytes.starts_with(&[0xEF, 0xBB, 0xBF]); + // will have the BOM stripped + let text = deno_graph::source::decode_owned_file_source(file_bytes) + .with_context(|| { anyhow!("{} is not a valid UTF-8 file", file_path.display()) })?; - let had_bom = file_text.starts_with(text_encoding::BOM_CHAR); - let text = if had_bom { - text_encoding::strip_bom(&file_text).to_string() - } else { - file_text.to_string() - }; Ok(FileContents { text, had_bom }) } fn write_file_contents( file_path: &Path, - file_contents: FileContents, + mut file_contents: FileContents, ) -> Result<(), AnyError> { let file_text = if file_contents.had_bom { // add back the BOM - format!("{}{}", text_encoding::BOM_CHAR, file_contents.text) + file_contents.text.insert(0, '\u{FEFF}'); + file_contents.text } else { file_contents.text }; diff --git a/cli/tools/info.rs b/cli/tools/info.rs index 2234c783d2684c..a9d6a8c59bece1 100644 --- a/cli/tools/info.rs +++ b/cli/tools/info.rs @@ -434,7 +434,7 @@ impl<'a> GraphDisplayContext<'a> { match self.graph.try_get(&root_specifier) { Ok(Some(root)) => { let maybe_cache_info = match root { - Module::Esm(module) => module.maybe_cache_info.as_ref(), + Module::Js(module) => module.maybe_cache_info.as_ref(), Module::Json(module) => module.maybe_cache_info.as_ref(), Module::Node(_) | Module::Npm(_) | Module::External(_) => None, }; @@ -464,7 +464,7 @@ impl<'a> GraphDisplayContext<'a> { )?; } } - if let Some(module) = root.esm() { + if let Some(module) = root.js() { writeln!(writer, "{} {}", colors::bold("type:"), module.media_type)?; } let total_modules_size = self @@ -472,7 +472,7 @@ impl<'a> GraphDisplayContext<'a> { .modules() .map(|m| { let size = match m { - Module::Esm(module) => module.size(), + Module::Js(module) => module.size(), Module::Json(module) => module.size(), Module::Node(_) | Module::Npm(_) | Module::External(_) => 0, }; @@ -571,7 +571,7 @@ impl<'a> GraphDisplayContext<'a> { self.npm_info.package_sizes.get(&package.id).copied() } Specifier(_) => match module { - Module::Esm(module) => Some(module.size() as u64), + Module::Js(module) => Some(module.size() as u64), Module::Json(module) => Some(module.size() as u64), Module::Node(_) | Module::Npm(_) | Module::External(_) => None, }, @@ -587,7 +587,7 @@ impl<'a> GraphDisplayContext<'a> { tree_node.children.extend(self.build_npm_deps(package)); } Specifier(_) => { - if let Some(module) = module.esm() { + if let Some(module) = module.js() { if let Some(types_dep) = &module.maybe_types_dependency { if let Some(child) = self.build_resolved_info(&types_dep.dependency, true) diff --git a/cli/tools/registry/graph.rs b/cli/tools/registry/graph.rs index 2a3b4cc17a63ac..0bee5fe8a99f0b 100644 --- a/cli/tools/registry/graph.rs +++ b/cli/tools/registry/graph.rs @@ -122,7 +122,7 @@ pub fn collect_invalid_external_imports( let ModuleEntryRef::Module(module) = entry else { continue; }; - let Some(module) = module.esm() else { + let Some(module) = module.js() else { continue; }; @@ -158,10 +158,10 @@ pub fn collect_fast_check_type_graph_diagnostics( let Ok(Some(module)) = graph.try_get_prefer_types(&specifier) else { continue; }; - let Some(esm_module) = module.esm() else { + let Some(es_module) = module.js() else { continue; }; - if let Some(diagnostic) = esm_module.fast_check_diagnostic() { + if let Some(diagnostic) = es_module.fast_check_diagnostic() { for diagnostic in diagnostic.flatten_multiple() { if !seen_diagnostics.insert(diagnostic.message_with_range_for_test()) { @@ -179,7 +179,7 @@ pub fn collect_fast_check_type_graph_diagnostics( } // analyze the next dependencies - for dep in esm_module.dependencies_prefer_fast_check().values() { + for dep in es_module.dependencies_prefer_fast_check().values() { let Some(specifier) = graph.resolve_dependency_from_dep(dep, true) else { continue; diff --git a/cli/tools/registry/publish_order.rs b/cli/tools/registry/publish_order.rs index 4071c42caa4513..bb423b2b5c8e8a 100644 --- a/cli/tools/registry/publish_order.rs +++ b/cli/tools/registry/publish_order.rs @@ -139,7 +139,7 @@ fn build_pkg_deps( let mut pending = VecDeque::new(); pending.extend(root.exports.clone()); while let Some(specifier) = pending.pop_front() { - let Some(module) = graph.get(&specifier).and_then(|m| m.esm()) else { + let Some(module) = graph.get(&specifier).and_then(|m| m.js()) else { continue; }; let mut dep_specifiers = diff --git a/cli/tools/repl/mod.rs b/cli/tools/repl/mod.rs index f1fef6d54ba76a..e40c6362a17ae0 100644 --- a/cli/tools/repl/mod.rs +++ b/cli/tools/repl/mod.rs @@ -1,5 +1,7 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::sync::Arc; + use crate::args::CliOptions; use crate::args::Flags; use crate::args::ReplFlags; @@ -140,7 +142,7 @@ async fn read_eval_file( cli_options: &CliOptions, file_fetcher: &FileFetcher, eval_file: &str, -) -> Result { +) -> Result, AnyError> { let specifier = deno_core::resolve_url_or_path(eval_file, cli_options.initial_cwd())?; @@ -148,7 +150,7 @@ async fn read_eval_file( .fetch(&specifier, PermissionsContainer::allow_all()) .await?; - Ok((*file.source).to_string()) + Ok(file.into_text_decoded()?.source) } pub async fn run(flags: Flags, repl_flags: ReplFlags) -> Result { diff --git a/cli/tools/run/mod.rs b/cli/tools/run/mod.rs index 05fd2ba36a2cce..ffb26f2eb10565 100644 --- a/cli/tools/run/mod.rs +++ b/cli/tools/run/mod.rs @@ -2,7 +2,6 @@ use std::io::Read; -use deno_ast::MediaType; use deno_core::error::AnyError; use deno_runtime::permissions::Permissions; use deno_runtime::permissions::PermissionsContainer; @@ -90,17 +89,13 @@ pub async fn run_from_stdin(flags: Flags) -> Result { )?); let mut source = Vec::new(); std::io::stdin().read_to_end(&mut source)?; - // Create a dummy source file. - let source_file = File { - maybe_types: None, - media_type: MediaType::TypeScript, - source: String::from_utf8(source)?.into(), + // Save a fake file into file fetcher cache + // to allow module access by TS compiler + file_fetcher.insert_cached(File { specifier: main_module.clone(), maybe_headers: None, - }; - // Save our fake file into file fetcher cache - // to allow module access by TS compiler - file_fetcher.insert_cached(source_file); + source: source.into(), + }); let mut worker = worker_factory .create_main_worker(main_module, permissions) @@ -175,20 +170,15 @@ pub async fn eval_command( format!("console.log({})", eval_flags.code) } else { eval_flags.code - } - .into_bytes(); - - let file = File { - maybe_types: None, - media_type: MediaType::Unknown, - source: String::from_utf8(source_code)?.into(), - specifier: main_module.clone(), - maybe_headers: None, }; - // Save our fake file into file fetcher cache + // Save a fake file into file fetcher cache // to allow module access by TS compiler. - file_fetcher.insert_cached(file); + file_fetcher.insert_cached(File { + specifier: main_module.clone(), + maybe_headers: None, + source: source_code.into_bytes().into(), + }); let permissions = PermissionsContainer::new(Permissions::from_options( &cli_options.permissions_options(), diff --git a/cli/tools/test/mod.rs b/cli/tools/test/mod.rs index d1dc76028ac2e3..7a5d633ec75d36 100644 --- a/cli/tools/test/mod.rs +++ b/cli/tools/test/mod.rs @@ -685,11 +685,9 @@ fn extract_files_from_regex_blocks( .unwrap_or(file_specifier); Some(File { - maybe_types: None, - media_type: file_media_type, - source: file_source.into(), specifier: file_specifier, maybe_headers: None, + source: file_source.into_bytes().into(), }) }) .collect(); @@ -769,7 +767,10 @@ async fn fetch_inline_files( let mut files = Vec::new(); for specifier in specifiers { let fetch_permissions = PermissionsContainer::allow_all(); - let file = file_fetcher.fetch(&specifier, fetch_permissions).await?; + let file = file_fetcher + .fetch(&specifier, fetch_permissions) + .await? + .into_text_decoded()?; let inline_files = if file.media_type == MediaType::Unknown { extract_files_from_fenced_blocks( @@ -1177,9 +1178,8 @@ async fn fetch_specifiers_with_test_mode( .fetch(specifier, PermissionsContainer::allow_all()) .await?; - if file.media_type == MediaType::Unknown - || file.media_type == MediaType::Dts - { + let (media_type, _) = file.resolve_media_type_and_charset(); + if matches!(media_type, MediaType::Unknown | MediaType::Dts) { *mode = TestMode::Documentation } } diff --git a/cli/tools/vendor/build.rs b/cli/tools/vendor/build.rs index e2728aa46f56ba..1646a995929545 100644 --- a/cli/tools/vendor/build.rs +++ b/cli/tools/vendor/build.rs @@ -12,7 +12,7 @@ use deno_core::error::AnyError; use deno_core::futures::future::LocalBoxFuture; use deno_core::parking_lot::Mutex; use deno_graph::source::ResolutionMode; -use deno_graph::EsmModule; +use deno_graph::JsModule; use deno_graph::Module; use deno_graph::ModuleGraph; use deno_runtime::deno_fs; @@ -36,7 +36,7 @@ use super::specifiers::is_remote_specifier; pub trait VendorEnvironment { fn cwd(&self) -> Result; fn create_dir_all(&self, dir_path: &Path) -> Result<(), AnyError>; - fn write_file(&self, file_path: &Path, text: &str) -> Result<(), AnyError>; + fn write_file(&self, file_path: &Path, bytes: &[u8]) -> Result<(), AnyError>; fn path_exists(&self, path: &Path) -> bool; } @@ -51,8 +51,8 @@ impl VendorEnvironment for RealVendorEnvironment { Ok(std::fs::create_dir_all(dir_path)?) } - fn write_file(&self, file_path: &Path, text: &str) -> Result<(), AnyError> { - std::fs::write(file_path, text) + fn write_file(&self, file_path: &Path, bytes: &[u8]) -> Result<(), AnyError> { + std::fs::write(file_path, bytes) .with_context(|| format!("Failed writing {}", file_path.display())) } @@ -159,7 +159,7 @@ pub async fn build< // write out all the files for module in &remote_modules { let source = match module { - Module::Esm(module) => &module.source, + Module::Js(module) => &module.source, Module::Json(module) => &module.source, Module::Node(_) | Module::Npm(_) | Module::External(_) => continue, }; @@ -169,17 +169,17 @@ pub async fn build< .unwrap_or_else(|| mappings.local_path(specifier)); environment.create_dir_all(local_path.parent().unwrap())?; - environment.write_file(&local_path, source)?; + environment.write_file(&local_path, source.as_bytes())?; } // write out the proxies for (specifier, proxied_module) in mappings.proxied_modules() { let proxy_path = mappings.local_path(specifier); - let module = graph.get(specifier).unwrap().esm().unwrap(); + let module = graph.get(specifier).unwrap().js().unwrap(); let text = build_proxy_module_source(module, proxied_module, parsed_source_cache)?; - environment.write_file(&proxy_path, &text)?; + environment.write_file(&proxy_path, text.as_bytes())?; } // create the import map if necessary @@ -195,7 +195,7 @@ pub async fn build< resolver, parsed_source_cache, })?; - environment.write_file(&import_map_path, &import_map_text)?; + environment.write_file(&import_map_path, import_map_text.as_bytes())?; } Ok(BuildOutput { @@ -242,7 +242,7 @@ fn validate_original_import_map( } fn build_proxy_module_source( - module: &EsmModule, + module: &JsModule, proxied_module: &ProxiedModule, parsed_source_cache: &ParsedSourceCache, ) -> Result { @@ -269,7 +269,7 @@ fn build_proxy_module_source( // add a default export if one exists in the module let parsed_source = - parsed_source_cache.get_parsed_source_from_esm_module(module)?; + parsed_source_cache.get_parsed_source_from_js_module(module)?; if has_default_export(&parsed_source) { writeln!(text, "export {{ default }} from \"{relative_specifier}\";") .unwrap(); diff --git a/cli/tools/vendor/import_map.rs b/cli/tools/vendor/import_map.rs index c985dfb5fdf3b9..7f627f35e52c91 100644 --- a/cli/tools/vendor/import_map.rs +++ b/cli/tools/vendor/import_map.rs @@ -240,7 +240,7 @@ fn visit_modules( ) -> Result<(), AnyError> { for module in modules { let module = match module { - Module::Esm(module) => module, + Module::Js(module) => module, // skip visiting Json modules as they are leaves Module::Json(_) | Module::Npm(_) @@ -249,9 +249,8 @@ fn visit_modules( }; let parsed_source = - parsed_source_cache.get_parsed_source_from_esm_module(module)?; + parsed_source_cache.get_parsed_source_from_js_module(module)?; let text_info = parsed_source.text_info().clone(); - let source_text = &module.source; for dep in module.dependencies.values() { visit_resolution( @@ -261,7 +260,7 @@ fn visit_modules( &module.specifier, mappings, &text_info, - source_text, + &module.source, ); visit_resolution( &dep.maybe_type, @@ -270,7 +269,7 @@ fn visit_modules( &module.specifier, mappings, &text_info, - source_text, + &module.source, ); } @@ -282,7 +281,7 @@ fn visit_modules( &module.specifier, mappings, &text_info, - source_text, + &module.source, ); } } diff --git a/cli/tools/vendor/mappings.rs b/cli/tools/vendor/mappings.rs index 00d5055f7c891e..6d2722b89ca4a4 100644 --- a/cli/tools/vendor/mappings.rs +++ b/cli/tools/vendor/mappings.rs @@ -55,7 +55,7 @@ impl Mappings { for specifier in specifiers { let module = graph.get(&specifier).unwrap(); let media_type = match module { - Module::Esm(module) => module.media_type, + Module::Js(module) => module.media_type, Module::Json(_) => MediaType::Json, Module::Node(_) | Module::Npm(_) | Module::External(_) => continue, }; @@ -81,7 +81,7 @@ impl Mappings { // resolve all the "proxy" paths to use for when an x-typescript-types header is specified for module in remote_modules { - if let Some(module) = module.esm() { + if let Some(module) = module.js() { if let Some(resolved) = &module .maybe_types_dependency .as_ref() diff --git a/cli/tools/vendor/test.rs b/cli/tools/vendor/test.rs index bf6579d231946d..7910dcf2260511 100644 --- a/cli/tools/vendor/test.rs +++ b/cli/tools/vendor/test.rs @@ -122,7 +122,7 @@ impl Loader for TestLoader { let result = self.files.get(specifier).map(|result| match result { Ok(result) => Ok(LoadResponse::Module { specifier: specifier.clone(), - content: result.0.clone().into(), + content: result.0.clone().into_bytes().into(), maybe_headers: result.1.clone(), }), Err(err) => Err(err), @@ -160,15 +160,15 @@ impl VendorEnvironment for TestVendorEnvironment { Ok(()) } - fn write_file(&self, file_path: &Path, text: &str) -> Result<(), AnyError> { + fn write_file(&self, file_path: &Path, text: &[u8]) -> Result<(), AnyError> { let parent = file_path.parent().unwrap(); if !self.directories.borrow().contains(parent) { bail!("Directory not found: {}", parent.display()); } - self - .files - .borrow_mut() - .insert(file_path.to_path_buf(), text.to_string()); + self.files.borrow_mut().insert( + file_path.to_path_buf(), + String::from_utf8(text.to_vec()).unwrap(), + ); Ok(()) } diff --git a/cli/tsc/diagnostics.rs b/cli/tsc/diagnostics.rs index 56610106b3f292..2030b5ba2ffa4e 100644 --- a/cli/tsc/diagnostics.rs +++ b/cli/tsc/diagnostics.rs @@ -298,7 +298,7 @@ impl Diagnostics { { if let Ok(Some(module)) = graph.try_get_prefer_types(&specifier) { if let Some(fast_check_module) = - module.esm().and_then(|m| m.fast_check_module()) + module.js().and_then(|m| m.fast_check_module()) { // todo(dsherret): use a short lived cache to prevent parsing // source maps so often diff --git a/cli/tsc/mod.rs b/cli/tsc/mod.rs index db5d80a1f60f2a..18316b750eca29 100644 --- a/cli/tsc/mod.rs +++ b/cli/tsc/mod.rs @@ -492,7 +492,7 @@ fn op_load( }; let maybe_source = if let Some(module) = graph.get(specifier) { match module { - Module::Esm(module) => { + Module::Js(module) => { media_type = module.media_type; let source = module .fast_check_module() @@ -597,7 +597,7 @@ fn op_resolve( let graph = &state.graph; let resolved_dep = graph .get(&referrer) - .and_then(|m| m.esm()) + .and_then(|m| m.js()) .and_then(|m| m.dependencies_prefer_fast_check().get(&specifier)) .and_then(|d| d.maybe_type.ok().or_else(|| d.maybe_code.ok())); @@ -653,7 +653,7 @@ fn resolve_graph_specifier_types( let maybe_module = graph.get(specifier); // follow the types reference directive, which may be pointing at an npm package let maybe_module = match maybe_module { - Some(Module::Esm(module)) => { + Some(Module::Js(module)) => { let maybe_types_dep = module .maybe_types_dependency .as_ref() @@ -668,7 +668,7 @@ fn resolve_graph_specifier_types( // now get the types from the resolved module match maybe_module { - Some(Module::Esm(module)) => { + Some(Module::Js(module)) => { Ok(Some((module.specifier.clone(), module.media_type))) } Some(Module::Json(module)) => { @@ -913,7 +913,7 @@ mod tests { .replace("://", "_") .replace('/', "-"); let source_path = self.fixtures.join(specifier_text); - let response = source_path.read_to_string_if_exists().map(|c| { + let response = source_path.read_to_bytes_if_exists().map(|c| { Some(deno_graph::source::LoadResponse::Module { specifier: specifier.clone(), maybe_headers: None, diff --git a/cli/util/text_encoding.rs b/cli/util/text_encoding.rs index 56b02e7dee54cc..25d827eb64dfb9 100644 --- a/cli/util/text_encoding.rs +++ b/cli/util/text_encoding.rs @@ -3,58 +3,6 @@ use base64::prelude::BASE64_STANDARD; use base64::Engine; use deno_core::ModuleCodeString; -use encoding_rs::*; -use std::borrow::Cow; -use std::io::Error; -use std::io::ErrorKind; - -pub const BOM_CHAR: char = '\u{FEFF}'; - -/// Attempts to detect the character encoding of the provided bytes. -/// -/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian. -pub fn detect_charset(bytes: &'_ [u8]) -> &'static str { - const UTF16_LE_BOM: &[u8] = b"\xFF\xFE"; - const UTF16_BE_BOM: &[u8] = b"\xFE\xFF"; - - if bytes.starts_with(UTF16_LE_BOM) { - "utf-16le" - } else if bytes.starts_with(UTF16_BE_BOM) { - "utf-16be" - } else { - // Assume everything else is utf-8 - "utf-8" - } -} - -/// Attempts to convert the provided bytes to a UTF-8 string. -/// -/// Supports all encodings supported by the encoding_rs crate, which includes -/// all encodings specified in the WHATWG Encoding Standard, and only those -/// encodings (see: ). -pub fn convert_to_utf8<'a>( - bytes: &'a [u8], - charset: &'_ str, -) -> Result, Error> { - match Encoding::for_label(charset.as_bytes()) { - Some(encoding) => encoding - .decode_without_bom_handling_and_without_replacement(bytes) - .ok_or_else(|| ErrorKind::InvalidData.into()), - None => Err(Error::new( - ErrorKind::InvalidInput, - format!("Unsupported charset: {charset}"), - )), - } -} - -/// Strips the byte order mark from the provided text if it exists. -pub fn strip_bom(text: &str) -> &str { - if text.starts_with(BOM_CHAR) { - &text[BOM_CHAR.len_utf8()..] - } else { - text - } -} static SOURCE_MAP_PREFIX: &[u8] = b"//# sourceMappingURL=data:application/json;base64,"; @@ -91,52 +39,6 @@ pub fn code_without_source_map(mut code: ModuleCodeString) -> ModuleCodeString { mod tests { use super::*; - fn test_detection(test_data: &[u8], expected_charset: &str) { - let detected_charset = detect_charset(test_data); - assert_eq!( - expected_charset.to_lowercase(), - detected_charset.to_lowercase() - ); - } - - #[test] - fn test_detection_utf8_no_bom() { - let test_data = "Hello UTF-8 it is \u{23F0} for Deno!" - .to_owned() - .into_bytes(); - test_detection(&test_data, "utf-8"); - } - - #[test] - fn test_detection_utf16_little_endian() { - let test_data = b"\xFF\xFEHello UTF-16LE".to_owned().to_vec(); - test_detection(&test_data, "utf-16le"); - } - - #[test] - fn test_detection_utf16_big_endian() { - let test_data = b"\xFE\xFFHello UTF-16BE".to_owned().to_vec(); - test_detection(&test_data, "utf-16be"); - } - - #[test] - fn test_decoding_unsupported_charset() { - let test_data = Vec::new(); - let result = convert_to_utf8(&test_data, "utf-32le"); - assert!(result.is_err()); - let err = result.expect_err("Err expected"); - assert!(err.kind() == ErrorKind::InvalidInput); - } - - #[test] - fn test_decoding_invalid_utf8() { - let test_data = b"\xFE\xFE\xFF\xFF".to_vec(); - let result = convert_to_utf8(&test_data, "utf-8"); - assert!(result.is_err()); - let err = result.expect_err("Err expected"); - assert!(err.kind() == ErrorKind::InvalidData); - } - #[test] fn test_source_without_source_map() { run_test("", ""); diff --git a/test_util/src/fs.rs b/test_util/src/fs.rs index 17620276bcdf83..0e47a750352e79 100644 --- a/test_util/src/fs.rs +++ b/test_util/src/fs.rs @@ -118,6 +118,10 @@ impl PathRef { .with_context(|| format!("Could not read file: {}", self)) } + pub fn read_to_bytes_if_exists(&self) -> Result, anyhow::Error> { + fs::read(self).with_context(|| format!("Could not read file: {}", self)) + } + pub fn read_json(&self) -> TValue { serde_json::from_str(&self.read_to_string()).unwrap() }