mirror of
https://github.com/txtdot/txtdot
synced 2024-10-18 14:40:19 +03:00
Refactor: move links replacement into utils, call it after Readability
This commit is contained in:
parent
64054ff627
commit
4994bd5e54
@ -1,27 +1,16 @@
|
||||
import { JSDOM } from "jsdom";
|
||||
import { generateParserUrl, generateProxyUrl } from "../utils/generate";
|
||||
import getConfig from "../config/main";
|
||||
|
||||
export class HandlerInput {
|
||||
private data: string;
|
||||
private url: string;
|
||||
private requestUrl: URL;
|
||||
private engine?: string;
|
||||
private redirectPath: string;
|
||||
private dom?: JSDOM;
|
||||
|
||||
constructor(
|
||||
data: string,
|
||||
url: string,
|
||||
requestUrl: URL,
|
||||
engine?: string,
|
||||
redirectPath: string = "get",
|
||||
) {
|
||||
this.data = data;
|
||||
this.url = url;
|
||||
this.requestUrl = requestUrl;
|
||||
this.engine = engine;
|
||||
this.redirectPath = redirectPath;
|
||||
}
|
||||
|
||||
getUrl(): string {
|
||||
@ -34,79 +23,6 @@ export class HandlerInput {
|
||||
}
|
||||
|
||||
this.dom = new JSDOM(this.data, { url: this.url });
|
||||
|
||||
const bytag =
|
||||
(dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag);
|
||||
const bycss =
|
||||
(dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css);
|
||||
|
||||
const parserUrl = (href: string) => generateParserUrl(
|
||||
this.requestUrl,
|
||||
href,
|
||||
this.engine,
|
||||
this.redirectPath,
|
||||
);
|
||||
const proxyUrl = (href: string) => generateProxyUrl(
|
||||
this.requestUrl,
|
||||
href,
|
||||
);
|
||||
|
||||
this.modifyLinks(
|
||||
bytag(this.dom, "a"),
|
||||
"href",
|
||||
parserUrl,
|
||||
);
|
||||
this.modifyLinks(
|
||||
bycss(this.dom, "frame,iframe"),
|
||||
"src",
|
||||
parserUrl,
|
||||
);
|
||||
|
||||
if (getConfig().proxy_res) {
|
||||
this.modifyLinks(
|
||||
bycss(this.dom, "img,image,video,audio,embed,track,source"),
|
||||
"src",
|
||||
proxyUrl,
|
||||
);
|
||||
|
||||
this.modifyLinks(
|
||||
bytag(this.dom, "object"),
|
||||
"data",
|
||||
proxyUrl,
|
||||
);
|
||||
|
||||
const sources = bytag(this.dom, "source");
|
||||
for (const source of sources) {
|
||||
// split srcset by comma
|
||||
// @ts-ignore
|
||||
source.srcset = source.srcset.split(",").map(
|
||||
(src: string) => {
|
||||
// split src by space
|
||||
const parts = src.split(" ");
|
||||
try {
|
||||
// first part is URL
|
||||
parts[0] = proxyUrl(parts[0]);
|
||||
} catch (_err) { }
|
||||
// join by space after splitting
|
||||
return parts.join(" ");
|
||||
}
|
||||
).join(","); // join by comma
|
||||
}
|
||||
}
|
||||
|
||||
return this.dom;
|
||||
}
|
||||
|
||||
private modifyLinks(
|
||||
nodeList: NodeListOf<Element> | HTMLCollectionOf<Element>,
|
||||
property: string,
|
||||
generateLink: (value: string) => string,
|
||||
) {
|
||||
for (const node of nodeList) {
|
||||
try {
|
||||
// @ts-ignore
|
||||
node[property] = generateLink(node[property]);
|
||||
} catch (_err) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,8 @@ import axios from "../types/axios";
|
||||
|
||||
import micromatch from "micromatch";
|
||||
|
||||
import { JSDOM } from "jsdom";
|
||||
|
||||
import readability from "./readability";
|
||||
import google, { GoogleDomains } from "./google";
|
||||
import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main";
|
||||
@ -14,6 +16,7 @@ import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main";
|
||||
import { HandlerInput } from "./handler-input";
|
||||
import { Readable } from "stream";
|
||||
import { decodeStream, parseEncodingName } from "../utils/http";
|
||||
import replaceHref from "../utils/replace-href";
|
||||
|
||||
export default async function handlePage(
|
||||
url: string, // remote URL
|
||||
@ -35,15 +38,21 @@ export default async function handlePage(
|
||||
throw new NotHtmlMimetypeError();
|
||||
}
|
||||
|
||||
return getFallbackEngine(urlObj.hostname, engine)(
|
||||
const handler = getFallbackEngine(urlObj.hostname, engine);
|
||||
const output = await handler(
|
||||
new HandlerInput(
|
||||
await decodeStream(data, parseEncodingName(mime)),
|
||||
url,
|
||||
requestUrl,
|
||||
engine,
|
||||
redirectPath,
|
||||
)
|
||||
);
|
||||
|
||||
// post-process
|
||||
const dom = new JSDOM(output.content, { url });
|
||||
replaceHref(dom, requestUrl, engine, redirectPath);
|
||||
output.content = dom.serialize();
|
||||
// TODO: DomPurify
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function getFallbackEngine(host: string, specified?: string): EngineFunction {
|
||||
|
82
src/utils/replace-href.ts
Normal file
82
src/utils/replace-href.ts
Normal file
@ -0,0 +1,82 @@
|
||||
import { JSDOM } from "jsdom";
|
||||
import { generateParserUrl, generateProxyUrl } from "./generate";
|
||||
import getConfig from "../config/main";
|
||||
|
||||
export default function replaceHref(
|
||||
dom: JSDOM,
|
||||
requestUrl: URL,
|
||||
engine?: string,
|
||||
redirectPath: string = "get",
|
||||
) {
|
||||
const bytag =
|
||||
(dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag);
|
||||
const bycss =
|
||||
(dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css);
|
||||
|
||||
const parserUrl = (href: string) => generateParserUrl(
|
||||
requestUrl,
|
||||
href,
|
||||
engine,
|
||||
redirectPath,
|
||||
);
|
||||
const proxyUrl = (href: string) => generateProxyUrl(
|
||||
requestUrl,
|
||||
href,
|
||||
);
|
||||
|
||||
modifyLinks(
|
||||
bytag(dom, "a"),
|
||||
"href",
|
||||
parserUrl,
|
||||
);
|
||||
modifyLinks(
|
||||
bycss(dom, "frame,iframe"),
|
||||
"src",
|
||||
parserUrl,
|
||||
);
|
||||
|
||||
if (getConfig().proxy_res) {
|
||||
modifyLinks(
|
||||
bycss(dom, "img,image,video,audio,embed,track,source"),
|
||||
"src",
|
||||
proxyUrl,
|
||||
);
|
||||
|
||||
modifyLinks(
|
||||
bytag(dom, "object"),
|
||||
"data",
|
||||
proxyUrl,
|
||||
);
|
||||
|
||||
const sources = bytag(dom, "source");
|
||||
for (const source of sources) {
|
||||
// split srcset by comma
|
||||
// @ts-ignore
|
||||
source.srcset = source.srcset.split(",").map(
|
||||
(src: string) => {
|
||||
// split src by space
|
||||
const parts = src.split(" ");
|
||||
try {
|
||||
// first part is URL
|
||||
parts[0] = proxyUrl(parts[0]);
|
||||
} catch (_err) { }
|
||||
// join by space after splitting
|
||||
return parts.join(" ");
|
||||
}
|
||||
).join(","); // join by comma
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function modifyLinks(
|
||||
nodeList: NodeListOf<Element> | HTMLCollectionOf<Element>,
|
||||
property: string,
|
||||
generateLink: (value: string) => string,
|
||||
) {
|
||||
for (const node of nodeList) {
|
||||
try {
|
||||
// @ts-ignore
|
||||
node[property] = generateLink(node[property]);
|
||||
} catch (_err) { }
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user