'use strict'; // Rationale for this particular test character sequence, which is // used in filenames and also in file contents: // // - ABC~ ensures the string starts with something we can read to // ensure it is from the correct source; ~ is used because even // some 1-byte otherwise-ASCII-like parts of ISO-2022-JP // interpret it differently. // - ‾¥ are inside a single-byte range of ISO-2022-JP and help // diagnose problems due to filesystem encoding or locale // - ≈ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - ¤ is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale; it is also the "simplest" case // needing substitution in ISO-2022-JP // - ・ is inside a single-byte range of ISO-2022-JP in some variants // and helps diagnose problems due to filesystem encoding or locale; // on the web it is distinct when decoding but unified when encoding // - ・ is inside a double-byte range of ISO-2022-JP and helps // diagnose problems due to filesystem encoding or locale // - • is inside Windows-1252 and helps diagnose problems due to // filesystem encoding or locale and also ensures these aren't // accidentally turned into e.g. control codes // - ∙ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - · is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale and also ensures HTML named // character references (e.g. ·) are not used // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to // filesystem encoding or locale and also ensures these aren't // accidentally turned into e.g. control codes // - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct // output easier to spot // - 星 is inside ISO-2022-JP on a Kanji page and makes correct // output easier to spot // - 🌟 is outside the BMP and makes incorrect surrogate pair // substitution detectable and ensures substitutions work // correctly immediately after Kanji 2-byte ISO-2022-JP // - 星 repeated here ensures the correct codec state is used // after a non-BMP substitution // - ★ repeated here also makes correct output easier to spot // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to // filesystem encoding or locale and also ensures these aren't // accidentally turned into e.g. control codes and also ensures // substitutions work correctly immediately after non-Kanji // 2-byte ISO-2022-JP // - · is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale and also ensures HTML named // character references (e.g. ·) are not used // - ∙ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - • is inside Windows-1252 and again helps diagnose problems // due to filesystem encoding or locale // - ・ is inside a double-byte range of ISO-2022-JP and helps // diagnose problems due to filesystem encoding or locale // - ・ is inside a single-byte range of ISO-2022-JP in some variants // and helps diagnose problems due to filesystem encoding or locale; // on the web it is distinct when decoding but unified when encoding // - ¤ is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale; again it is a "simple" // substitution case // - ≈ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - ¥‾ are inside a single-byte range of ISO-2022-JP and help // diagnose problems due to filesystem encoding or locale // - ~XYZ ensures earlier errors don't lead to misencoding of // simple ASCII // // Overall the near-symmetry makes common I18N mistakes like // off-by-1-after-non-BMP easier to spot. All the characters // are also allowed in Windows Unicode filenames. const kTestChars = 'ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ'; // NOTE: The expected interpretation of ISO-2022-JP according to // https://encoding.spec.whatwg.org/#iso-2022-jp-encoder unifies // single-byte and double-byte katakana. const kTestFallbackIso2022jp = ('ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B🌟' + '\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ').replace( /[^\0-\x7F]/gu, x => `&#${x.codePointAt(0)};`); // NOTE: \uFFFD is used here to replace Windows-1252 bytes to match // how we will see them in the reflected POST bytes in a frame using // UTF-8 byte interpretation. The bytes will actually be intact, but // this code cannot tell and does not really care. const kTestFallbackWindows1252 = 'ABC~‾\xA5≈\xA4・・\x95∙\xB7☼★星🌟星★☼\xB7∙\x95・・\xA4≈\xA5‾~XYZ'.replace( /[^\0-\xFF]/gu, x => `&#${x.codePointAt(0)};`).replace(/[\x80-\xFF]/g, '\uFFFD'); const kTestFallbackXUserDefined = kTestChars.replace(/[^\0-\x7F]/gu, x => `&#${x.codePointAt(0)};`); // formPostFileUploadTest - verifies multipart upload structure and // numeric character reference replacement for filenames, field names, // and field values. // // Uses /fetch/api/resources/echo-content.py to echo the upload // POST with UTF-8 byte interpretation, leading to the "UTF-8 goggles" // behavior documented below for expectedEncodedBaseName when non- // UTF-8-compatible byte sequences appear in the formEncoding-encoded // uploaded data. // // Fields in the parameter object: // // - fileNameSource: purely explanatory and gives a clue about which // character encoding is the source for the non-7-bit-ASCII parts of // the fileBaseName, or Unicode if no smaller-than-Unicode source // contains all the characters. Used in the test name. // - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename // used for the constructed test file. Used in the test name. // - formEncoding: the acceptCharset of the form used to submit the // test file. Used in the test name. // - expectedEncodedBaseName: the expected formEncoding-encoded // version of fileBaseName with unencodable characters replaced by // numeric character references and non-7-bit-ASCII bytes seen // through UTF-8 goggles; subsequences not interpretable as UTF-8 // have each byte represented here by \uFFFD REPLACEMENT CHARACTER. const formPostFileUploadTest = ({ fileNameSource, fileBaseName, formEncoding, expectedEncodedBaseName, }) => { promise_test(async testCase => { if (document.readyState !== 'complete') { await new Promise(resolve => addEventListener('load', resolve)); } const formTargetFrame = Object.assign(document.createElement('iframe'), { name: 'formtargetframe', }); document.body.append(formTargetFrame); testCase.add_cleanup(() => { document.body.removeChild(formTargetFrame); }); const form = Object.assign(document.createElement('form'), { acceptCharset: formEncoding, action: '/fetch/api/resources/echo-content.py', method: 'POST', enctype: 'multipart/form-data', target: formTargetFrame.name, }); document.body.append(form); testCase.add_cleanup(() => { document.body.removeChild(form); }); // Used to verify that the browser agrees with the test about // which form charset is used. form.append(Object.assign(document.createElement('input'), { type: 'hidden', name: '_charset_', })); // Used to verify that the browser agrees with the test about // field value replacement and encoding independently of file system // idiosyncracies. form.append(Object.assign(document.createElement('input'), { type: 'hidden', name: 'filename', value: fileBaseName, })); // Same, but with name and value reversed to ensure field names // get the same treatment. form.append(Object.assign(document.createElement('input'), { type: 'hidden', name: fileBaseName, value: 'filename', })); const fileInput = Object.assign(document.createElement('input'), { type: 'file', name: 'file', }); form.append(fileInput); // Removes c:\fakepath\ or other pseudofolder and returns just the // final component of filePath; allows both / and \ as segment // delimiters. const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop(); await new Promise(resolve => { const dataTransfer = new DataTransfer; dataTransfer.items.add( new File([kTestChars], fileBaseName, {type: 'text/plain'})); fileInput.files = dataTransfer.files; // For historical reasons .value will be prefixed with // c:\fakepath\, but the basename should match the file name // exposed through the newer .files[0].name API. This check // verifies that assumption. assert_equals( fileInput.files[0].name, baseNameOfFilePath(fileInput.value), `The basename of the field's value should match its files[0].name`); form.submit(); formTargetFrame.onload = resolve; }); const formDataText = formTargetFrame.contentDocument.body.textContent; const formDataLines = formDataText.split('\n'); if (formDataLines.length && !formDataLines[formDataLines.length - 1]) { --formDataLines.length; } assert_greater_than( formDataLines.length, 2, `${fileBaseName}: multipart form data must have at least 3 lines: ${ JSON.stringify(formDataText) }`); const boundary = formDataLines[0]; assert_equals( formDataLines[formDataLines.length - 1], boundary + '--', `${fileBaseName}: multipart form data must end with ${boundary}--: ${ JSON.stringify(formDataText) }`); const expectedText = [ boundary, 'Content-Disposition: form-data; name="_charset_"', '', formEncoding, boundary, 'Content-Disposition: form-data; name="filename"', '', expectedEncodedBaseName, boundary, `Content-Disposition: form-data; name="${expectedEncodedBaseName}"`, '', 'filename', boundary, `Content-Disposition: form-data; name="file"; ` + `filename="${expectedEncodedBaseName}"`, 'Content-Type: text/plain', '', kTestChars, boundary + '--', ].join('\n'); assert_true( formDataText.startsWith(expectedText), `Unexpected multipart-shaped form data received:\n${ formDataText }\nExpected:\n${expectedText}`); }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`); };