diff --git a/package-lock.json b/package-lock.json index c40fba8..fac306d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,12 +20,14 @@ "fastify": "^4.21.0", "ip-range-check": "^0.2.0", "jsdom": "^22.1.0", - "json-schema-to-ts": "^2.9.2" + "json-schema-to-ts": "^2.9.2", + "micromatch": "^4.0.5" }, "devDependencies": { "@types/ejs": "^3.1.2", "@types/express": "^4.17.17", "@types/jsdom": "^21.1.1", + "@types/micromatch": "^4.0.2", "@types/node": "^20.4.10", "@typescript-eslint/eslint-plugin": "^6.3.0", "@typescript-eslint/parser": "^6.3.0", @@ -355,6 +357,12 @@ "@types/node": "*" } }, + "node_modules/@types/braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-U5tlMYa0U/2eFTmJgKcPWQOEICP173sJDa6OjHbj5Tv+NVaYcrq2xmdWpNXOwWYGwJu+jER/pfTLdoQ31q8PzA==", + "dev": true + }, "node_modules/@types/connect": { "version": "3.4.35", "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz", @@ -415,6 +423,15 @@ "version": "7.0.12", "license": "MIT" }, + "node_modules/@types/micromatch": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/micromatch/-/micromatch-4.0.2.tgz", + "integrity": "sha512-oqXqVb0ci19GtH0vOA/U2TmHTcRY9kuZl4mqUxe0QmJAlIW13kzhuK5pi1i9+ngav8FjpSb9FVS/GE00GLX1VA==", + "dev": true, + "dependencies": { + "@types/braces": "*" + } + }, "node_modules/@types/mime": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz", @@ -889,7 +906,6 @@ }, "node_modules/braces": { "version": "3.0.2", - "dev": true, "license": "MIT", "dependencies": { "fill-range": "^7.0.1" @@ -1660,7 +1676,6 @@ }, "node_modules/fill-range": { "version": "7.0.1", - "dev": true, "license": "MIT", "dependencies": { "to-regex-range": "^5.0.1" @@ -2038,7 +2053,6 @@ }, "node_modules/is-number": { "version": "7.0.0", - "dev": true, "license": "MIT", "engines": { "node": ">=0.12.0" @@ -2243,8 +2257,8 @@ }, "node_modules/micromatch": { "version": "4.0.5", - "dev": true, - "license": "MIT", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", + "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", "dependencies": { "braces": "^3.0.2", "picomatch": "^2.3.1" @@ -2492,7 +2506,6 @@ }, "node_modules/picomatch": { "version": "2.3.1", - "dev": true, "license": "MIT", "engines": { "node": ">=8.6" @@ -3071,7 +3084,6 @@ }, "node_modules/to-regex-range": { "version": "5.0.1", - "dev": true, "license": "MIT", "dependencies": { "is-number": "^7.0.0" diff --git a/package.json b/package.json index 793d9ef..e926e61 100644 --- a/package.json +++ b/package.json @@ -16,12 +16,14 @@ "fastify": "^4.21.0", "ip-range-check": "^0.2.0", "jsdom": "^22.1.0", - "json-schema-to-ts": "^2.9.2" + "json-schema-to-ts": "^2.9.2", + "micromatch": "^4.0.5" }, "devDependencies": { "@types/ejs": "^3.1.2", "@types/express": "^4.17.17", "@types/jsdom": "^21.1.1", + "@types/micromatch": "^4.0.2", "@types/node": "^20.4.10", "@typescript-eslint/eslint-plugin": "^6.3.0", "@typescript-eslint/parser": "^6.3.0", diff --git a/src/handlers/google.ts b/src/handlers/google.ts index 9caccf8..2e50b6c 100644 --- a/src/handlers/google.ts +++ b/src/handlers/google.ts @@ -3,12 +3,18 @@ import { IHandlerOutput } from "./handler.interface"; import { EngineParseError } from "../errors/main"; export default async function google( - window: DOMWindow + window: DOMWindow, ): Promise { const googleAnchors = [ ...window.document.querySelectorAll("a[jsname=ACyKwe]"), ] as HTMLAnchorElement[]; + if (!googleAnchors) { + throw new EngineParseError( + "Failed to find anchors in search result [google]", + ); + } + const results = googleAnchors .map((a: HTMLAnchorElement): GoogleProps => { const parsedHref = new URL(new URL(a.href).searchParams.get("url")!); @@ -20,12 +26,6 @@ export default async function google( }) .filter((a) => a.heading); - if (!googleAnchors) { - throw new EngineParseError( - "Failed to find anchors in search result [google]" - ); - } - const convertToFormat = (result: GoogleProps, isHtml: boolean) => { return isHtml ? `

${result.siteName} - ${result.heading}

` @@ -41,12 +41,12 @@ export default async function google( }); const search = window.document.getElementById( - "APjFqb" + "APjFqb", ) as HTMLTextAreaElement; const navLinks = [ ...window.document.querySelectorAll( - "table[class=AaVjTc] > tbody > tr > td > a" + "table[class=AaVjTc] > tbody > tr > td > a", ), ].map((l) => { const link = l as HTMLAnchorElement; @@ -81,6 +81,15 @@ export default async function google( }; } +export const GoogleDomains = [ + "google.*", + "google.co.*", + "google.com.*", + "www.google.*", + "www.google.co.*", + "www.google.com.*", +]; + interface GoogleProps { href: string; siteName: string; diff --git a/src/handlers/main.ts b/src/handlers/main.ts index 46ce881..e16ed22 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -5,22 +5,21 @@ import { JSDOM } from "jsdom"; import { DOMWindow } from "jsdom"; import readability from "./readability"; -import google from "./google"; -import stackoverflow from "./stackoverflow/main"; +import google, { GoogleDomains } from "./google"; +import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main"; import { generateProxyUrl } from "../utils/generate"; import isLocalResource from "../utils/islocal"; -import { - LocalResourceError, - NotHtmlMimetypeError, -} from "../errors/main"; +import micromatch from "micromatch"; + +import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main"; export default async function handlePage( url: string, // remote URL requestUrl: URL, // proxy URL engine?: string, - redirect_path: string = "get" + redirect_path: string = "get", ): Promise { const urlObj = new URL(url); @@ -39,7 +38,12 @@ export default async function handlePage( [...window.document.getElementsByTagName("a")].forEach((link) => { try { - link.href = generateProxyUrl(requestUrl, link.href, engine, redirect_path); + link.href = generateProxyUrl( + requestUrl, + link.href, + engine, + redirect_path, + ); } catch (_err) { // ignore TypeError: Invalid URL } @@ -49,25 +53,41 @@ export default async function handlePage( return engines[engine](window); } - return fallback[urlObj.host]?.(window) || fallback["*"](window); + for (let match of fallback) { + if (micromatch.isMatch(urlObj.hostname, match.pattern)) { + return match.engine(window); + } + } + + return engines.readability(window); } interface Engines { [key: string]: EngineFunction; } -type EngineFunction = (window: DOMWindow) => Promise; - export const engines: Engines = { readability, google, stackoverflow, }; +type EngineFunction = (window: DOMWindow) => Promise; +export type EngineMatch = { + pattern: string | string[]; + engine: EngineFunction; +}; +export type EnginesMatch = EngineMatch[]; + export const engineList: string[] = Object.keys(engines); -const fallback: Engines = { - "stackoverflow.com": engines.stackoverflow, - "www.google.com": engines.google, - "*": engines.readability, -}; +export const fallback: EnginesMatch = [ + { + pattern: GoogleDomains, + engine: engines.google, + }, + { + pattern: StackOverflowDomains, + engine: engines.stackoverflow, + }, +]; diff --git a/src/handlers/stackoverflow/main.ts b/src/handlers/stackoverflow/main.ts index 4ed4878..9730bc8 100644 --- a/src/handlers/stackoverflow/main.ts +++ b/src/handlers/stackoverflow/main.ts @@ -4,7 +4,7 @@ import { EngineParseError } from "../../errors/main"; import qPostsHandler from "./questions-posts"; export default async function stackoverflow( - window: DOMWindow + window: DOMWindow, ): Promise { const url = new URL(window.location.href); @@ -29,3 +29,14 @@ export default async function stackoverflow( return result; } + +export const StackOverflowDomains = [ + "stackoverflow.com", + "*.stackoverflow.com", + "*.stackexchange.com", + "askubuntu.com", + "stackapps.com", + "mathoverflow.net", + "superuser.com", + "serverfault.com", +];