Refactor: move links replacement into utils, call it after Readability

This commit is contained in:
DarkCat09 2023-09-22 12:53:25 +04:00
parent 64054ff627
commit 4994bd5e54
No known key found for this signature in database
GPG Key ID: 0A26CD5B3345D6E3
3 changed files with 95 additions and 88 deletions

View File

@ -1,27 +1,16 @@
import { JSDOM } from "jsdom";
import { generateParserUrl, generateProxyUrl } from "../utils/generate";
import getConfig from "../config/main";
export class HandlerInput {
private data: string;
private url: string;
private requestUrl: URL;
private engine?: string;
private redirectPath: string;
private dom?: JSDOM;
constructor(
data: string,
url: string,
requestUrl: URL,
engine?: string,
redirectPath: string = "get",
) {
this.data = data;
this.url = url;
this.requestUrl = requestUrl;
this.engine = engine;
this.redirectPath = redirectPath;
}
getUrl(): string {
@ -34,79 +23,6 @@ export class HandlerInput {
}
this.dom = new JSDOM(this.data, { url: this.url });
const bytag =
(dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag);
const bycss =
(dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css);
const parserUrl = (href: string) => generateParserUrl(
this.requestUrl,
href,
this.engine,
this.redirectPath,
);
const proxyUrl = (href: string) => generateProxyUrl(
this.requestUrl,
href,
);
this.modifyLinks(
bytag(this.dom, "a"),
"href",
parserUrl,
);
this.modifyLinks(
bycss(this.dom, "frame,iframe"),
"src",
parserUrl,
);
if (getConfig().proxy_res) {
this.modifyLinks(
bycss(this.dom, "img,image,video,audio,embed,track,source"),
"src",
proxyUrl,
);
this.modifyLinks(
bytag(this.dom, "object"),
"data",
proxyUrl,
);
const sources = bytag(this.dom, "source");
for (const source of sources) {
// split srcset by comma
// @ts-ignore
source.srcset = source.srcset.split(",").map(
(src: string) => {
// split src by space
const parts = src.split(" ");
try {
// first part is URL
parts[0] = proxyUrl(parts[0]);
} catch (_err) { }
// join by space after splitting
return parts.join(" ");
}
).join(","); // join by comma
}
}
return this.dom;
}
private modifyLinks(
nodeList: NodeListOf<Element> | HTMLCollectionOf<Element>,
property: string,
generateLink: (value: string) => string,
) {
for (const node of nodeList) {
try {
// @ts-ignore
node[property] = generateLink(node[property]);
} catch (_err) { }
}
}
}

View File

@ -4,6 +4,8 @@ import axios from "../types/axios";
import micromatch from "micromatch";
import { JSDOM } from "jsdom";
import readability from "./readability";
import google, { GoogleDomains } from "./google";
import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main";
@ -14,6 +16,7 @@ import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main";
import { HandlerInput } from "./handler-input";
import { Readable } from "stream";
import { decodeStream, parseEncodingName } from "../utils/http";
import replaceHref from "../utils/replace-href";
export default async function handlePage(
url: string, // remote URL
@ -35,15 +38,21 @@ export default async function handlePage(
throw new NotHtmlMimetypeError();
}
return getFallbackEngine(urlObj.hostname, engine)(
const handler = getFallbackEngine(urlObj.hostname, engine);
const output = await handler(
new HandlerInput(
await decodeStream(data, parseEncodingName(mime)),
url,
requestUrl,
engine,
redirectPath,
)
);
// post-process
const dom = new JSDOM(output.content, { url });
replaceHref(dom, requestUrl, engine, redirectPath);
output.content = dom.serialize();
// TODO: DomPurify
return output;
}
function getFallbackEngine(host: string, specified?: string): EngineFunction {

82
src/utils/replace-href.ts Normal file
View File

@ -0,0 +1,82 @@
import { JSDOM } from "jsdom";
import { generateParserUrl, generateProxyUrl } from "./generate";
import getConfig from "../config/main";
export default function replaceHref(
dom: JSDOM,
requestUrl: URL,
engine?: string,
redirectPath: string = "get",
) {
const bytag =
(dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag);
const bycss =
(dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css);
const parserUrl = (href: string) => generateParserUrl(
requestUrl,
href,
engine,
redirectPath,
);
const proxyUrl = (href: string) => generateProxyUrl(
requestUrl,
href,
);
modifyLinks(
bytag(dom, "a"),
"href",
parserUrl,
);
modifyLinks(
bycss(dom, "frame,iframe"),
"src",
parserUrl,
);
if (getConfig().proxy_res) {
modifyLinks(
bycss(dom, "img,image,video,audio,embed,track,source"),
"src",
proxyUrl,
);
modifyLinks(
bytag(dom, "object"),
"data",
proxyUrl,
);
const sources = bytag(dom, "source");
for (const source of sources) {
// split srcset by comma
// @ts-ignore
source.srcset = source.srcset.split(",").map(
(src: string) => {
// split src by space
const parts = src.split(" ");
try {
// first part is URL
parts[0] = proxyUrl(parts[0]);
} catch (_err) { }
// join by space after splitting
return parts.join(" ");
}
).join(","); // join by comma
}
}
}
function modifyLinks(
nodeList: NodeListOf<Element> | HTMLCollectionOf<Element>,
property: string,
generateLink: (value: string) => string,
) {
for (const node of nodeList) {
try {
// @ts-ignore
node[property] = generateLink(node[property]);
} catch (_err) { }
}
}