mirror of
https://github.com/txtdot/txtdot
synced 2024-10-18 14:40:19 +03:00
Merge pull request #44 from TxtDot/new-match-system
feat: micromatch system to match domains
This commit is contained in:
commit
7d0d5f1888
28
package-lock.json
generated
28
package-lock.json
generated
@ -20,12 +20,14 @@
|
||||
"fastify": "^4.21.0",
|
||||
"ip-range-check": "^0.2.0",
|
||||
"jsdom": "^22.1.0",
|
||||
"json-schema-to-ts": "^2.9.2"
|
||||
"json-schema-to-ts": "^2.9.2",
|
||||
"micromatch": "^4.0.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/ejs": "^3.1.2",
|
||||
"@types/express": "^4.17.17",
|
||||
"@types/jsdom": "^21.1.1",
|
||||
"@types/micromatch": "^4.0.2",
|
||||
"@types/node": "^20.4.10",
|
||||
"@typescript-eslint/eslint-plugin": "^6.3.0",
|
||||
"@typescript-eslint/parser": "^6.3.0",
|
||||
@ -355,6 +357,12 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-U5tlMYa0U/2eFTmJgKcPWQOEICP173sJDa6OjHbj5Tv+NVaYcrq2xmdWpNXOwWYGwJu+jER/pfTLdoQ31q8PzA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/connect": {
|
||||
"version": "3.4.35",
|
||||
"resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz",
|
||||
@ -415,6 +423,15 @@
|
||||
"version": "7.0.12",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/micromatch": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/micromatch/-/micromatch-4.0.2.tgz",
|
||||
"integrity": "sha512-oqXqVb0ci19GtH0vOA/U2TmHTcRY9kuZl4mqUxe0QmJAlIW13kzhuK5pi1i9+ngav8FjpSb9FVS/GE00GLX1VA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@types/braces": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/mime": {
|
||||
"version": "1.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz",
|
||||
@ -889,7 +906,6 @@
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.2",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fill-range": "^7.0.1"
|
||||
@ -1660,7 +1676,6 @@
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
@ -2038,7 +2053,6 @@
|
||||
},
|
||||
"node_modules/is-number": {
|
||||
"version": "7.0.0",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.12.0"
|
||||
@ -2243,8 +2257,8 @@
|
||||
},
|
||||
"node_modules/micromatch": {
|
||||
"version": "4.0.5",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
|
||||
"integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==",
|
||||
"dependencies": {
|
||||
"braces": "^3.0.2",
|
||||
"picomatch": "^2.3.1"
|
||||
@ -2492,7 +2506,6 @@
|
||||
},
|
||||
"node_modules/picomatch": {
|
||||
"version": "2.3.1",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8.6"
|
||||
@ -3071,7 +3084,6 @@
|
||||
},
|
||||
"node_modules/to-regex-range": {
|
||||
"version": "5.0.1",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-number": "^7.0.0"
|
||||
|
@ -16,12 +16,14 @@
|
||||
"fastify": "^4.21.0",
|
||||
"ip-range-check": "^0.2.0",
|
||||
"jsdom": "^22.1.0",
|
||||
"json-schema-to-ts": "^2.9.2"
|
||||
"json-schema-to-ts": "^2.9.2",
|
||||
"micromatch": "^4.0.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/ejs": "^3.1.2",
|
||||
"@types/express": "^4.17.17",
|
||||
"@types/jsdom": "^21.1.1",
|
||||
"@types/micromatch": "^4.0.2",
|
||||
"@types/node": "^20.4.10",
|
||||
"@typescript-eslint/eslint-plugin": "^6.3.0",
|
||||
"@typescript-eslint/parser": "^6.3.0",
|
||||
|
@ -3,12 +3,18 @@ import { IHandlerOutput } from "./handler.interface";
|
||||
import { EngineParseError } from "../errors/main";
|
||||
|
||||
export default async function google(
|
||||
window: DOMWindow
|
||||
window: DOMWindow,
|
||||
): Promise<IHandlerOutput> {
|
||||
const googleAnchors = [
|
||||
...window.document.querySelectorAll("a[jsname=ACyKwe]"),
|
||||
] as HTMLAnchorElement[];
|
||||
|
||||
if (!googleAnchors) {
|
||||
throw new EngineParseError(
|
||||
"Failed to find anchors in search result [google]",
|
||||
);
|
||||
}
|
||||
|
||||
const results = googleAnchors
|
||||
.map((a: HTMLAnchorElement): GoogleProps => {
|
||||
const parsedHref = new URL(new URL(a.href).searchParams.get("url")!);
|
||||
@ -20,12 +26,6 @@ export default async function google(
|
||||
})
|
||||
.filter((a) => a.heading);
|
||||
|
||||
if (!googleAnchors) {
|
||||
throw new EngineParseError(
|
||||
"Failed to find anchors in search result [google]"
|
||||
);
|
||||
}
|
||||
|
||||
const convertToFormat = (result: GoogleProps, isHtml: boolean) => {
|
||||
return isHtml
|
||||
? `<p><a href="${result.href}">${result.siteName} - ${result.heading}</p>`
|
||||
@ -41,12 +41,12 @@ export default async function google(
|
||||
});
|
||||
|
||||
const search = window.document.getElementById(
|
||||
"APjFqb"
|
||||
"APjFqb",
|
||||
) as HTMLTextAreaElement;
|
||||
|
||||
const navLinks = [
|
||||
...window.document.querySelectorAll(
|
||||
"table[class=AaVjTc] > tbody > tr > td > a"
|
||||
"table[class=AaVjTc] > tbody > tr > td > a",
|
||||
),
|
||||
].map((l) => {
|
||||
const link = l as HTMLAnchorElement;
|
||||
@ -81,6 +81,15 @@ export default async function google(
|
||||
};
|
||||
}
|
||||
|
||||
export const GoogleDomains = [
|
||||
"google.*",
|
||||
"google.co.*",
|
||||
"google.com.*",
|
||||
"www.google.*",
|
||||
"www.google.co.*",
|
||||
"www.google.com.*",
|
||||
];
|
||||
|
||||
interface GoogleProps {
|
||||
href: string;
|
||||
siteName: string;
|
||||
|
@ -5,22 +5,21 @@ import { JSDOM } from "jsdom";
|
||||
import { DOMWindow } from "jsdom";
|
||||
|
||||
import readability from "./readability";
|
||||
import google from "./google";
|
||||
import stackoverflow from "./stackoverflow/main";
|
||||
import google, { GoogleDomains } from "./google";
|
||||
import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main";
|
||||
|
||||
import { generateProxyUrl } from "../utils/generate";
|
||||
import isLocalResource from "../utils/islocal";
|
||||
|
||||
import {
|
||||
LocalResourceError,
|
||||
NotHtmlMimetypeError,
|
||||
} from "../errors/main";
|
||||
import micromatch from "micromatch";
|
||||
|
||||
import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main";
|
||||
|
||||
export default async function handlePage(
|
||||
url: string, // remote URL
|
||||
requestUrl: URL, // proxy URL
|
||||
engine?: string,
|
||||
redirect_path: string = "get"
|
||||
redirect_path: string = "get",
|
||||
): Promise<IHandlerOutput> {
|
||||
const urlObj = new URL(url);
|
||||
|
||||
@ -39,7 +38,12 @@ export default async function handlePage(
|
||||
|
||||
[...window.document.getElementsByTagName("a")].forEach((link) => {
|
||||
try {
|
||||
link.href = generateProxyUrl(requestUrl, link.href, engine, redirect_path);
|
||||
link.href = generateProxyUrl(
|
||||
requestUrl,
|
||||
link.href,
|
||||
engine,
|
||||
redirect_path,
|
||||
);
|
||||
} catch (_err) {
|
||||
// ignore TypeError: Invalid URL
|
||||
}
|
||||
@ -49,25 +53,41 @@ export default async function handlePage(
|
||||
return engines[engine](window);
|
||||
}
|
||||
|
||||
return fallback[urlObj.host]?.(window) || fallback["*"](window);
|
||||
for (let match of fallback) {
|
||||
if (micromatch.isMatch(urlObj.hostname, match.pattern)) {
|
||||
return match.engine(window);
|
||||
}
|
||||
}
|
||||
|
||||
return engines.readability(window);
|
||||
}
|
||||
|
||||
interface Engines {
|
||||
[key: string]: EngineFunction;
|
||||
}
|
||||
|
||||
type EngineFunction = (window: DOMWindow) => Promise<IHandlerOutput>;
|
||||
|
||||
export const engines: Engines = {
|
||||
readability,
|
||||
google,
|
||||
stackoverflow,
|
||||
};
|
||||
|
||||
type EngineFunction = (window: DOMWindow) => Promise<IHandlerOutput>;
|
||||
export type EngineMatch = {
|
||||
pattern: string | string[];
|
||||
engine: EngineFunction;
|
||||
};
|
||||
export type EnginesMatch = EngineMatch[];
|
||||
|
||||
export const engineList: string[] = Object.keys(engines);
|
||||
|
||||
const fallback: Engines = {
|
||||
"stackoverflow.com": engines.stackoverflow,
|
||||
"www.google.com": engines.google,
|
||||
"*": engines.readability,
|
||||
};
|
||||
export const fallback: EnginesMatch = [
|
||||
{
|
||||
pattern: GoogleDomains,
|
||||
engine: engines.google,
|
||||
},
|
||||
{
|
||||
pattern: StackOverflowDomains,
|
||||
engine: engines.stackoverflow,
|
||||
},
|
||||
];
|
||||
|
@ -4,7 +4,7 @@ import { EngineParseError } from "../../errors/main";
|
||||
import qPostsHandler from "./questions-posts";
|
||||
|
||||
export default async function stackoverflow(
|
||||
window: DOMWindow
|
||||
window: DOMWindow,
|
||||
): Promise<IHandlerOutput> {
|
||||
const url = new URL(window.location.href);
|
||||
|
||||
@ -29,3 +29,14 @@ export default async function stackoverflow(
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export const StackOverflowDomains = [
|
||||
"stackoverflow.com",
|
||||
"*.stackoverflow.com",
|
||||
"*.stackexchange.com",
|
||||
"askubuntu.com",
|
||||
"stackapps.com",
|
||||
"mathoverflow.net",
|
||||
"superuser.com",
|
||||
"serverfault.com",
|
||||
];
|
||||
|
Loading…
Reference in New Issue
Block a user