mirror of
https://github.com/txtdot/txtdot
synced 2024-10-18 14:40:19 +03:00
Refactor: move links replacement into utils, call it after Readability
This commit is contained in:
parent
64054ff627
commit
4994bd5e54
@ -1,27 +1,16 @@
|
|||||||
import { JSDOM } from "jsdom";
|
import { JSDOM } from "jsdom";
|
||||||
import { generateParserUrl, generateProxyUrl } from "../utils/generate";
|
|
||||||
import getConfig from "../config/main";
|
|
||||||
|
|
||||||
export class HandlerInput {
|
export class HandlerInput {
|
||||||
private data: string;
|
private data: string;
|
||||||
private url: string;
|
private url: string;
|
||||||
private requestUrl: URL;
|
|
||||||
private engine?: string;
|
|
||||||
private redirectPath: string;
|
|
||||||
private dom?: JSDOM;
|
private dom?: JSDOM;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
data: string,
|
data: string,
|
||||||
url: string,
|
url: string,
|
||||||
requestUrl: URL,
|
|
||||||
engine?: string,
|
|
||||||
redirectPath: string = "get",
|
|
||||||
) {
|
) {
|
||||||
this.data = data;
|
this.data = data;
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.requestUrl = requestUrl;
|
|
||||||
this.engine = engine;
|
|
||||||
this.redirectPath = redirectPath;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getUrl(): string {
|
getUrl(): string {
|
||||||
@ -34,79 +23,6 @@ export class HandlerInput {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this.dom = new JSDOM(this.data, { url: this.url });
|
this.dom = new JSDOM(this.data, { url: this.url });
|
||||||
|
|
||||||
const bytag =
|
|
||||||
(dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag);
|
|
||||||
const bycss =
|
|
||||||
(dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css);
|
|
||||||
|
|
||||||
const parserUrl = (href: string) => generateParserUrl(
|
|
||||||
this.requestUrl,
|
|
||||||
href,
|
|
||||||
this.engine,
|
|
||||||
this.redirectPath,
|
|
||||||
);
|
|
||||||
const proxyUrl = (href: string) => generateProxyUrl(
|
|
||||||
this.requestUrl,
|
|
||||||
href,
|
|
||||||
);
|
|
||||||
|
|
||||||
this.modifyLinks(
|
|
||||||
bytag(this.dom, "a"),
|
|
||||||
"href",
|
|
||||||
parserUrl,
|
|
||||||
);
|
|
||||||
this.modifyLinks(
|
|
||||||
bycss(this.dom, "frame,iframe"),
|
|
||||||
"src",
|
|
||||||
parserUrl,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (getConfig().proxy_res) {
|
|
||||||
this.modifyLinks(
|
|
||||||
bycss(this.dom, "img,image,video,audio,embed,track,source"),
|
|
||||||
"src",
|
|
||||||
proxyUrl,
|
|
||||||
);
|
|
||||||
|
|
||||||
this.modifyLinks(
|
|
||||||
bytag(this.dom, "object"),
|
|
||||||
"data",
|
|
||||||
proxyUrl,
|
|
||||||
);
|
|
||||||
|
|
||||||
const sources = bytag(this.dom, "source");
|
|
||||||
for (const source of sources) {
|
|
||||||
// split srcset by comma
|
|
||||||
// @ts-ignore
|
|
||||||
source.srcset = source.srcset.split(",").map(
|
|
||||||
(src: string) => {
|
|
||||||
// split src by space
|
|
||||||
const parts = src.split(" ");
|
|
||||||
try {
|
|
||||||
// first part is URL
|
|
||||||
parts[0] = proxyUrl(parts[0]);
|
|
||||||
} catch (_err) { }
|
|
||||||
// join by space after splitting
|
|
||||||
return parts.join(" ");
|
|
||||||
}
|
|
||||||
).join(","); // join by comma
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.dom;
|
return this.dom;
|
||||||
}
|
}
|
||||||
|
|
||||||
private modifyLinks(
|
|
||||||
nodeList: NodeListOf<Element> | HTMLCollectionOf<Element>,
|
|
||||||
property: string,
|
|
||||||
generateLink: (value: string) => string,
|
|
||||||
) {
|
|
||||||
for (const node of nodeList) {
|
|
||||||
try {
|
|
||||||
// @ts-ignore
|
|
||||||
node[property] = generateLink(node[property]);
|
|
||||||
} catch (_err) { }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,8 @@ import axios from "../types/axios";
|
|||||||
|
|
||||||
import micromatch from "micromatch";
|
import micromatch from "micromatch";
|
||||||
|
|
||||||
|
import { JSDOM } from "jsdom";
|
||||||
|
|
||||||
import readability from "./readability";
|
import readability from "./readability";
|
||||||
import google, { GoogleDomains } from "./google";
|
import google, { GoogleDomains } from "./google";
|
||||||
import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main";
|
import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main";
|
||||||
@ -14,6 +16,7 @@ import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main";
|
|||||||
import { HandlerInput } from "./handler-input";
|
import { HandlerInput } from "./handler-input";
|
||||||
import { Readable } from "stream";
|
import { Readable } from "stream";
|
||||||
import { decodeStream, parseEncodingName } from "../utils/http";
|
import { decodeStream, parseEncodingName } from "../utils/http";
|
||||||
|
import replaceHref from "../utils/replace-href";
|
||||||
|
|
||||||
export default async function handlePage(
|
export default async function handlePage(
|
||||||
url: string, // remote URL
|
url: string, // remote URL
|
||||||
@ -35,15 +38,21 @@ export default async function handlePage(
|
|||||||
throw new NotHtmlMimetypeError();
|
throw new NotHtmlMimetypeError();
|
||||||
}
|
}
|
||||||
|
|
||||||
return getFallbackEngine(urlObj.hostname, engine)(
|
const handler = getFallbackEngine(urlObj.hostname, engine);
|
||||||
|
const output = await handler(
|
||||||
new HandlerInput(
|
new HandlerInput(
|
||||||
await decodeStream(data, parseEncodingName(mime)),
|
await decodeStream(data, parseEncodingName(mime)),
|
||||||
url,
|
url,
|
||||||
requestUrl,
|
|
||||||
engine,
|
|
||||||
redirectPath,
|
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// post-process
|
||||||
|
const dom = new JSDOM(output.content, { url });
|
||||||
|
replaceHref(dom, requestUrl, engine, redirectPath);
|
||||||
|
output.content = dom.serialize();
|
||||||
|
// TODO: DomPurify
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getFallbackEngine(host: string, specified?: string): EngineFunction {
|
function getFallbackEngine(host: string, specified?: string): EngineFunction {
|
||||||
|
82
src/utils/replace-href.ts
Normal file
82
src/utils/replace-href.ts
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
import { JSDOM } from "jsdom";
|
||||||
|
import { generateParserUrl, generateProxyUrl } from "./generate";
|
||||||
|
import getConfig from "../config/main";
|
||||||
|
|
||||||
|
export default function replaceHref(
|
||||||
|
dom: JSDOM,
|
||||||
|
requestUrl: URL,
|
||||||
|
engine?: string,
|
||||||
|
redirectPath: string = "get",
|
||||||
|
) {
|
||||||
|
const bytag =
|
||||||
|
(dom: JSDOM, tag: string) => dom.window.document.getElementsByTagName(tag);
|
||||||
|
const bycss =
|
||||||
|
(dom: JSDOM, css: string) => dom.window.document.querySelectorAll(css);
|
||||||
|
|
||||||
|
const parserUrl = (href: string) => generateParserUrl(
|
||||||
|
requestUrl,
|
||||||
|
href,
|
||||||
|
engine,
|
||||||
|
redirectPath,
|
||||||
|
);
|
||||||
|
const proxyUrl = (href: string) => generateProxyUrl(
|
||||||
|
requestUrl,
|
||||||
|
href,
|
||||||
|
);
|
||||||
|
|
||||||
|
modifyLinks(
|
||||||
|
bytag(dom, "a"),
|
||||||
|
"href",
|
||||||
|
parserUrl,
|
||||||
|
);
|
||||||
|
modifyLinks(
|
||||||
|
bycss(dom, "frame,iframe"),
|
||||||
|
"src",
|
||||||
|
parserUrl,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (getConfig().proxy_res) {
|
||||||
|
modifyLinks(
|
||||||
|
bycss(dom, "img,image,video,audio,embed,track,source"),
|
||||||
|
"src",
|
||||||
|
proxyUrl,
|
||||||
|
);
|
||||||
|
|
||||||
|
modifyLinks(
|
||||||
|
bytag(dom, "object"),
|
||||||
|
"data",
|
||||||
|
proxyUrl,
|
||||||
|
);
|
||||||
|
|
||||||
|
const sources = bytag(dom, "source");
|
||||||
|
for (const source of sources) {
|
||||||
|
// split srcset by comma
|
||||||
|
// @ts-ignore
|
||||||
|
source.srcset = source.srcset.split(",").map(
|
||||||
|
(src: string) => {
|
||||||
|
// split src by space
|
||||||
|
const parts = src.split(" ");
|
||||||
|
try {
|
||||||
|
// first part is URL
|
||||||
|
parts[0] = proxyUrl(parts[0]);
|
||||||
|
} catch (_err) { }
|
||||||
|
// join by space after splitting
|
||||||
|
return parts.join(" ");
|
||||||
|
}
|
||||||
|
).join(","); // join by comma
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function modifyLinks(
|
||||||
|
nodeList: NodeListOf<Element> | HTMLCollectionOf<Element>,
|
||||||
|
property: string,
|
||||||
|
generateLink: (value: string) => string,
|
||||||
|
) {
|
||||||
|
for (const node of nodeList) {
|
||||||
|
try {
|
||||||
|
// @ts-ignore
|
||||||
|
node[property] = generateLink(node[property]);
|
||||||
|
} catch (_err) { }
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user