diff --git a/src/handlers/main.ts b/src/handlers/main.ts index 8ff4ab9..d3fa83a 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -12,6 +12,8 @@ import isLocalResource from "../utils/islocal"; import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main"; import { HandlerInput } from "./handler-input"; +import { Readable } from "stream"; +import { decodeStream, parseEncodingName } from "../utils/http"; export default async function handlePage( url: string, // remote URL @@ -26,6 +28,7 @@ export default async function handlePage( } const response = await axios.get(url); + const data: Readable = response.data; const mime: string | undefined = response.headers["content-type"]?.toString(); if (mime && mime.indexOf("text/html") === -1) { @@ -34,7 +37,7 @@ export default async function handlePage( return getFallbackEngine(urlObj.hostname, engine)( new HandlerInput( - response.data, + await decodeStream(data, parseEncodingName(mime)), url, requestUrl, engine, diff --git a/src/types/axios.ts b/src/types/axios.ts index bbb8162..947ccfa 100644 --- a/src/types/axios.ts +++ b/src/types/axios.ts @@ -5,4 +5,5 @@ export default axios.create({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", }, + responseType: "stream", }); diff --git a/src/utils/http.ts b/src/utils/http.ts new file mode 100644 index 0000000..86e436a --- /dev/null +++ b/src/utils/http.ts @@ -0,0 +1,26 @@ +import { Readable } from "stream"; +import iconv from "iconv-lite"; + +export async function decodeStream( + data: Readable, + charset: string = "utf-8", +): Promise { + const strm = data.pipe(iconv.decodeStream(charset)) as IconvStream; + return await new Promise(resolve => { + strm.collect((_err: Error, body: string) => { + resolve(body); + }); + }); +} + +export function parseEncodingName(ctype?: string): string { + const match = ctype?.match(/charset=([A-Za-z0-9-]+)$/); + if (!match) { + return "utf-8"; + } + return match[1]; +} + +interface IconvStream extends NodeJS.ReadWriteStream { + collect: (cb: (err: Error, body: string) => void) => void; +}