From c1429e2971f6f83a103bafb89fb2c3fa99e8d853 Mon Sep 17 00:00:00 2001 From: Khafra Date: Sun, 30 Apr 2023 10:02:38 -0400 Subject: [PATCH] mimesniff: fix many broken tests (#2103) * mimesniff: fix many broken tests * mimesniff: dont export this * mimesniff: undo small change * mimesniff: fix some bugs --- lib/fetch/dataURL.js | 95 ++++++++++++++++++++++++++++++-------- lib/fetch/response.js | 2 +- test/wpt/runner/runner.mjs | 3 +- 3 files changed, 79 insertions(+), 21 deletions(-) diff --git a/lib/fetch/dataURL.js b/lib/fetch/dataURL.js index beefad15482..6df4fcc8cc6 100644 --- a/lib/fetch/dataURL.js +++ b/lib/fetch/dataURL.js @@ -1,14 +1,18 @@ const assert = require('assert') const { atob } = require('buffer') -const { isValidHTTPToken, isomorphicDecode } = require('./util') +const { isomorphicDecode } = require('./util') const encoder = new TextEncoder() -// Regex -const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-z0-9]+$/ +/** + * @see https://mimesniff.spec.whatwg.org/#http-token-code-point + */ +const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/ const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line -// https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point -const HTTP_QUOTED_STRING_TOKENS = /^(\u0009|\x{0020}-\x{007E}|\x{0080}-\x{00FF})+$/ // eslint-disable-line +/** + * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point + */ +const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line // https://fetch.spec.whatwg.org/#data-url-processor /** @param {URL} dataURL */ @@ -38,14 +42,12 @@ function dataURLProcessor (dataURL) { // 6. Strip leading and trailing ASCII whitespace // from mimeType. - // Note: This will only remove U+0020 SPACE code - // points, if any. // Undici implementation note: we need to store the // length because if the mimetype has spaces removed, // the wrong amount will be sliced from the input in // step #9 const mimeTypeLength = mimeType.length - mimeType = mimeType.replace(/^(\u0020)+|(\u0020)+$/g, '') + mimeType = removeASCIIWhitespace(mimeType, true, true) // 7. If position is past the end of input, then // return failure @@ -233,7 +235,7 @@ function percentDecode (input) { function parseMIMEType (input) { // 1. Remove any leading and trailing HTTP whitespace // from input. - input = input.trim() + input = removeHTTPWhitespace(input, true, true) // 2. Let position be a position variable for input, // initially pointing at the start of input. @@ -274,7 +276,7 @@ function parseMIMEType (input) { ) // 8. Remove any trailing HTTP whitespace from subtype. - subtype = subtype.trimEnd() + subtype = removeHTTPWhitespace(subtype, false, true) // 9. If subtype is the empty string or does not solely // contain HTTP token code points, then return failure. @@ -282,17 +284,20 @@ function parseMIMEType (input) { return 'failure' } + const typeLowercase = type.toLowerCase() + const subtypeLowercase = subtype.toLowerCase() + // 10. Let mimeType be a new MIME type record whose type // is type, in ASCII lowercase, and subtype is subtype, // in ASCII lowercase. // https://mimesniff.spec.whatwg.org/#mime-type const mimeType = { - type: type.toLowerCase(), - subtype: subtype.toLowerCase(), + type: typeLowercase, + subtype: subtypeLowercase, /** @type {Map} */ parameters: new Map(), // https://mimesniff.spec.whatwg.org/#mime-type-essence - essence: `${type}/${subtype}` + essence: `${typeLowercase}/${subtypeLowercase}` } // 11. While position is not past the end of input: @@ -370,8 +375,7 @@ function parseMIMEType (input) { ) // 2. Remove any trailing HTTP whitespace from parameterValue. - // Note: it says "trailing" whitespace; leading is fine. - parameterValue = parameterValue.trimEnd() + parameterValue = removeHTTPWhitespace(parameterValue, false, true) // 3. If parameterValue is the empty string, then continue. if (parameterValue.length === 0) { @@ -388,7 +392,7 @@ function parseMIMEType (input) { if ( parameterName.length !== 0 && HTTP_TOKEN_CODEPOINTS.test(parameterName) && - !HTTP_QUOTED_STRING_TOKENS.test(parameterValue) && + (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) && !mimeType.parameters.has(parameterName) ) { mimeType.parameters.set(parameterName, parameterValue) @@ -522,11 +526,11 @@ function collectAnHTTPQuotedString (input, position, extractValue) { */ function serializeAMimeType (mimeType) { assert(mimeType !== 'failure') - const { type, subtype, parameters } = mimeType + const { parameters, essence } = mimeType // 1. Let serialization be the concatenation of mimeType’s // type, U+002F (/), and mimeType’s subtype. - let serialization = `${type}/${subtype}` + let serialization = essence // 2. For each name → value of mimeType’s parameters: for (let [name, value] of parameters.entries()) { @@ -541,7 +545,7 @@ function serializeAMimeType (mimeType) { // 4. If value does not solely contain HTTP token code // points or value is the empty string, then: - if (!isValidHTTPToken(value)) { + if (!HTTP_TOKEN_CODEPOINTS.test(value)) { // 1. Precede each occurence of U+0022 (") or // U+005C (\) in value with U+005C (\). value = value.replace(/(\\|")/g, '\\$1') @@ -561,6 +565,59 @@ function serializeAMimeType (mimeType) { return serialization } +/** + * @see https://fetch.spec.whatwg.org/#http-whitespace + * @param {string} char + */ +function isHTTPWhiteSpace (char) { + return char === '\r' || char === '\n' || char === '\t' || char === ' ' +} + +/** + * @see https://fetch.spec.whatwg.org/#http-whitespace + * @param {string} str + */ +function removeHTTPWhitespace (str, leading = true, trailing = true) { + let lead = 0 + let trail = str.length - 1 + + if (leading) { + for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++); + } + + if (trailing) { + for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--); + } + + return str.slice(lead, trail + 1) +} + +/** + * @see https://infra.spec.whatwg.org/#ascii-whitespace + * @param {string} char + */ +function isASCIIWhitespace (char) { + return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' ' +} + +/** + * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace + */ +function removeASCIIWhitespace (str, leading = true, trailing = true) { + let lead = 0 + let trail = str.length - 1 + + if (leading) { + for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++); + } + + if (trailing) { + for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--); + } + + return str.slice(lead, trail + 1) +} + module.exports = { dataURLProcessor, URLSerializer, diff --git a/lib/fetch/response.js b/lib/fetch/response.js index 96cacbce157..1029dbef533 100644 --- a/lib/fetch/response.js +++ b/lib/fetch/response.js @@ -467,7 +467,7 @@ function initializeResponse (response, init, body) { // 5. If init["headers"] exists, then fill response’s headers with init["headers"]. if ('headers' in init && init.headers != null) { - fill(response[kState].headersList, init.headers) + fill(response[kHeaders], init.headers) } // 6. If body was given, then: diff --git a/test/wpt/runner/runner.mjs b/test/wpt/runner/runner.mjs index 5979730d70a..ec236454e74 100644 --- a/test/wpt/runner/runner.mjs +++ b/test/wpt/runner/runner.mjs @@ -150,8 +150,9 @@ export class WPTRunner extends EventEmitter { return [test, code, meta] }) + console.log('='.repeat(96)) + for (const [test, code, meta] of files) { - console.log('='.repeat(96)) console.log(`Started ${test}`) const status = resolveStatusPath(test, this.#status)