From 4994bd5e5427875a19da0baa995300f2416c0540 Mon Sep 17 00:00:00 2001 From: DarkCat09 Date: Fri, 22 Sep 2023 12:53:25 +0400 Subject: [PATCH] Refactor: move links replacement into utils, call it after Readability --- src/handlers/handler-input.ts | 84 ----------------------------------- src/handlers/main.ts | 17 +++++-- src/utils/replace-href.ts | 82 ++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 88 deletions(-) create mode 100644 src/utils/replace-href.ts diff --git a/src/handlers/handler-input.ts b/src/handlers/handler-input.ts index 7877c84..ad510b9 100644 --- a/src/handlers/handler-input.ts +++ b/src/handlers/handler-input.ts @@ -1,27 +1,16 @@ import { JSDOM } from "jsdom"; -import { generateParserUrl, generateProxyUrl } from "../utils/generate"; -import getConfig from "../config/main"; export class HandlerInput { private data: string; private url: string; - private requestUrl: URL; - private engine?: string; - private redirectPath: string; private dom?: JSDOM; constructor( data: string, url: string, - requestUrl: URL, - engine?: string, - redirectPath: string = "get", ) { this.data = data; this.url = url; - this.requestUrl = requestUrl; - this.engine = engine; - this.redirectPath = redirectPath; } getUrl(): string { @@ -34,79 +23,6 @@ export class HandlerInput { } this.dom = new JSDOM(this.data, { url: this.url }); - - const bytag = - (dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag); - const bycss = - (dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css); - - const parserUrl = (href: string) => generateParserUrl( - this.requestUrl, - href, - this.engine, - this.redirectPath, - ); - const proxyUrl = (href: string) => generateProxyUrl( - this.requestUrl, - href, - ); - - this.modifyLinks( - bytag(this.dom, "a"), - "href", - parserUrl, - ); - this.modifyLinks( - bycss(this.dom, "frame,iframe"), - "src", - parserUrl, - ); - - if (getConfig().proxy_res) { - this.modifyLinks( - bycss(this.dom, "img,image,video,audio,embed,track,source"), - "src", - proxyUrl, - ); - - this.modifyLinks( - bytag(this.dom, "object"), - "data", - proxyUrl, - ); - - const sources = bytag(this.dom, "source"); - for (const source of sources) { - // split srcset by comma - // @ts-ignore - source.srcset = source.srcset.split(",").map( - (src: string) => { - // split src by space - const parts = src.split(" "); - try { - // first part is URL - parts[0] = proxyUrl(parts[0]); - } catch (_err) { } - // join by space after splitting - return parts.join(" "); - } - ).join(","); // join by comma - } - } - return this.dom; } - - private modifyLinks( - nodeList: NodeListOf | HTMLCollectionOf, - property: string, - generateLink: (value: string) => string, - ) { - for (const node of nodeList) { - try { - // @ts-ignore - node[property] = generateLink(node[property]); - } catch (_err) { } - } - } } diff --git a/src/handlers/main.ts b/src/handlers/main.ts index 27875a2..45e5e2f 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -4,6 +4,8 @@ import axios from "../types/axios"; import micromatch from "micromatch"; +import { JSDOM } from "jsdom"; + import readability from "./readability"; import google, { GoogleDomains } from "./google"; import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main"; @@ -14,6 +16,7 @@ import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main"; import { HandlerInput } from "./handler-input"; import { Readable } from "stream"; import { decodeStream, parseEncodingName } from "../utils/http"; +import replaceHref from "../utils/replace-href"; export default async function handlePage( url: string, // remote URL @@ -35,15 +38,21 @@ export default async function handlePage( throw new NotHtmlMimetypeError(); } - return getFallbackEngine(urlObj.hostname, engine)( + const handler = getFallbackEngine(urlObj.hostname, engine); + const output = await handler( new HandlerInput( await decodeStream(data, parseEncodingName(mime)), url, - requestUrl, - engine, - redirectPath, ) ); + + // post-process + const dom = new JSDOM(output.content, { url }); + replaceHref(dom, requestUrl, engine, redirectPath); + output.content = dom.serialize(); + // TODO: DomPurify + + return output; } function getFallbackEngine(host: string, specified?: string): EngineFunction { diff --git a/src/utils/replace-href.ts b/src/utils/replace-href.ts new file mode 100644 index 0000000..bd16d33 --- /dev/null +++ b/src/utils/replace-href.ts @@ -0,0 +1,82 @@ +import { JSDOM } from "jsdom"; +import { generateParserUrl, generateProxyUrl } from "./generate"; +import getConfig from "../config/main"; + +export default function replaceHref( + dom: JSDOM, + requestUrl: URL, + engine?: string, + redirectPath: string = "get", +) { + const bytag = + (dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag); + const bycss = + (dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css); + + const parserUrl = (href: string) => generateParserUrl( + requestUrl, + href, + engine, + redirectPath, + ); + const proxyUrl = (href: string) => generateProxyUrl( + requestUrl, + href, + ); + + modifyLinks( + bytag(dom, "a"), + "href", + parserUrl, + ); + modifyLinks( + bycss(dom, "frame,iframe"), + "src", + parserUrl, + ); + + if (getConfig().proxy_res) { + modifyLinks( + bycss(dom, "img,image,video,audio,embed,track,source"), + "src", + proxyUrl, + ); + + modifyLinks( + bytag(dom, "object"), + "data", + proxyUrl, + ); + + const sources = bytag(dom, "source"); + for (const source of sources) { + // split srcset by comma + // @ts-ignore + source.srcset = source.srcset.split(",").map( + (src: string) => { + // split src by space + const parts = src.split(" "); + try { + // first part is URL + parts[0] = proxyUrl(parts[0]); + } catch (_err) { } + // join by space after splitting + return parts.join(" "); + } + ).join(","); // join by comma + } + } +} + +function modifyLinks( + nodeList: NodeListOf | HTMLCollectionOf, + property: string, + generateLink: (value: string) => string, +) { + for (const node of nodeList) { + try { + // @ts-ignore + node[property] = generateLink(node[property]); + } catch (_err) { } + } +}