Merge pull request #57 from TxtDot/encoding

Decode response stream manually with iconv-lite
This commit is contained in:
Artemy Egorov 2023-09-21 09:49:09 +03:00 committed by GitHub
commit 28bbcda513
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 1 deletions

View File

@ -12,6 +12,8 @@ import isLocalResource from "../utils/islocal";
import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main"; import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main";
import { HandlerInput } from "./handler-input"; import { HandlerInput } from "./handler-input";
import { Readable } from "stream";
import { decodeStream, parseEncodingName } from "../utils/http";
export default async function handlePage( export default async function handlePage(
url: string, // remote URL url: string, // remote URL
@ -26,6 +28,7 @@ export default async function handlePage(
} }
const response = await axios.get(url); const response = await axios.get(url);
const data: Readable = response.data;
const mime: string | undefined = response.headers["content-type"]?.toString(); const mime: string | undefined = response.headers["content-type"]?.toString();
if (mime && mime.indexOf("text/html") === -1) { if (mime && mime.indexOf("text/html") === -1) {
@ -34,7 +37,7 @@ export default async function handlePage(
return getFallbackEngine(urlObj.hostname, engine)( return getFallbackEngine(urlObj.hostname, engine)(
new HandlerInput( new HandlerInput(
response.data, await decodeStream(data, parseEncodingName(mime)),
url, url,
requestUrl, requestUrl,
engine, engine,

View File

@ -5,4 +5,5 @@ export default axios.create({
"User-Agent": "User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
}, },
responseType: "stream",
}); });

26
src/utils/http.ts Normal file
View File

@ -0,0 +1,26 @@
import { Readable } from "stream";
import iconv from "iconv-lite";
export async function decodeStream(
data: Readable,
charset: string = "utf-8",
): Promise<string> {
const strm = data.pipe(iconv.decodeStream(charset)) as IconvStream;
return await new Promise(resolve => {
strm.collect((_err: Error, body: string) => {
resolve(body);
});
});
}
export function parseEncodingName(ctype?: string): string {
const match = ctype?.match(/charset=([A-Za-z0-9-]+)$/);
if (!match) {
return "utf-8";
}
return match[1];
}
interface IconvStream extends NodeJS.ReadWriteStream {
collect: (cb: (err: Error, body: string) => void) => void;
}