diff --git a/package-lock.json b/package-lock.json index c40fba8..fac306d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,12 +20,14 @@ "fastify": "^4.21.0", "ip-range-check": "^0.2.0", "jsdom": "^22.1.0", - "json-schema-to-ts": "^2.9.2" + "json-schema-to-ts": "^2.9.2", + "micromatch": "^4.0.5" }, "devDependencies": { "@types/ejs": "^3.1.2", "@types/express": "^4.17.17", "@types/jsdom": "^21.1.1", + "@types/micromatch": "^4.0.2", "@types/node": "^20.4.10", "@typescript-eslint/eslint-plugin": "^6.3.0", "@typescript-eslint/parser": "^6.3.0", @@ -355,6 +357,12 @@ "@types/node": "*" } }, + "node_modules/@types/braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-U5tlMYa0U/2eFTmJgKcPWQOEICP173sJDa6OjHbj5Tv+NVaYcrq2xmdWpNXOwWYGwJu+jER/pfTLdoQ31q8PzA==", + "dev": true + }, "node_modules/@types/connect": { "version": "3.4.35", "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz", @@ -415,6 +423,15 @@ "version": "7.0.12", "license": "MIT" }, + "node_modules/@types/micromatch": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/micromatch/-/micromatch-4.0.2.tgz", + "integrity": "sha512-oqXqVb0ci19GtH0vOA/U2TmHTcRY9kuZl4mqUxe0QmJAlIW13kzhuK5pi1i9+ngav8FjpSb9FVS/GE00GLX1VA==", + "dev": true, + "dependencies": { + "@types/braces": "*" + } + }, "node_modules/@types/mime": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz", @@ -889,7 +906,6 @@ }, "node_modules/braces": { "version": "3.0.2", - "dev": true, "license": "MIT", "dependencies": { "fill-range": "^7.0.1" @@ -1660,7 +1676,6 @@ }, "node_modules/fill-range": { "version": "7.0.1", - "dev": true, "license": "MIT", "dependencies": { "to-regex-range": "^5.0.1" @@ -2038,7 +2053,6 @@ }, "node_modules/is-number": { "version": "7.0.0", - "dev": true, "license": "MIT", "engines": { "node": ">=0.12.0" @@ -2243,8 +2257,8 @@ }, "node_modules/micromatch": { "version": "4.0.5", - "dev": true, - "license": "MIT", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", + "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", "dependencies": { "braces": "^3.0.2", "picomatch": "^2.3.1" @@ -2492,7 +2506,6 @@ }, "node_modules/picomatch": { "version": "2.3.1", - "dev": true, "license": "MIT", "engines": { "node": ">=8.6" @@ -3071,7 +3084,6 @@ }, "node_modules/to-regex-range": { "version": "5.0.1", - "dev": true, "license": "MIT", "dependencies": { "is-number": "^7.0.0" diff --git a/package.json b/package.json index 793d9ef..e926e61 100644 --- a/package.json +++ b/package.json @@ -16,12 +16,14 @@ "fastify": "^4.21.0", "ip-range-check": "^0.2.0", "jsdom": "^22.1.0", - "json-schema-to-ts": "^2.9.2" + "json-schema-to-ts": "^2.9.2", + "micromatch": "^4.0.5" }, "devDependencies": { "@types/ejs": "^3.1.2", "@types/express": "^4.17.17", "@types/jsdom": "^21.1.1", + "@types/micromatch": "^4.0.2", "@types/node": "^20.4.10", "@typescript-eslint/eslint-plugin": "^6.3.0", "@typescript-eslint/parser": "^6.3.0", diff --git a/src/handlers/google.ts b/src/handlers/google.ts index 9caccf8..24a404e 100644 --- a/src/handlers/google.ts +++ b/src/handlers/google.ts @@ -3,7 +3,7 @@ import { IHandlerOutput } from "./handler.interface"; import { EngineParseError } from "../errors/main"; export default async function google( - window: DOMWindow + window: DOMWindow, ): Promise { const googleAnchors = [ ...window.document.querySelectorAll("a[jsname=ACyKwe]"), @@ -22,7 +22,7 @@ export default async function google( if (!googleAnchors) { throw new EngineParseError( - "Failed to find anchors in search result [google]" + "Failed to find anchors in search result [google]", ); } @@ -41,12 +41,12 @@ export default async function google( }); const search = window.document.getElementById( - "APjFqb" + "APjFqb", ) as HTMLTextAreaElement; const navLinks = [ ...window.document.querySelectorAll( - "table[class=AaVjTc] > tbody > tr > td > a" + "table[class=AaVjTc] > tbody > tr > td > a", ), ].map((l) => { const link = l as HTMLAnchorElement; @@ -81,6 +81,383 @@ export default async function google( }; } +export const GoogleDomains = [ + "google.com", + "google.ad", + "google.ae", + "google.com.af", + "google.com.ag", + "google.al", + "google.am", + "google.co.ao", + "google.com.ar", + "google.as", + "google.at", + "google.com.au", + "google.az", + "google.ba", + "google.com.bd", + "google.be", + "google.bf", + "google.bg", + "google.com.bh", + "google.bi", + "google.bj", + "google.com.bn", + "google.com.bo", + "google.com.br", + "google.bs", + "google.bt", + "google.co.bw", + "google.by", + "google.com.bz", + "google.ca", + "google.cd", + "google.cf", + "google.cg", + "google.ch", + "google.ci", + "google.co.ck", + "google.cl", + "google.cm", + "google.cn", + "google.com.co", + "google.co.cr", + "google.com.cu", + "google.cv", + "google.com.cy", + "google.cz", + "google.de", + "google.dj", + "google.dk", + "google.dm", + "google.com.do", + "google.dz", + "google.com.ec", + "google.ee", + "google.com.eg", + "google.es", + "google.com.et", + "google.fi", + "google.com.fj", + "google.fm", + "google.fr", + "google.ga", + "google.ge", + "google.gg", + "google.com.gh", + "google.com.gi", + "google.gl", + "google.gm", + "google.gr", + "google.com.gt", + "google.gy", + "google.com.hk", + "google.hn", + "google.hr", + "google.ht", + "google.hu", + "google.co.id", + "google.ie", + "google.co.il", + "google.im", + "google.co.in", + "google.iq", + "google.is", + "google.it", + "google.je", + "google.com.jm", + "google.jo", + "google.co.jp", + "google.co.ke", + "google.com.kh", + "google.ki", + "google.kg", + "google.co.kr", + "google.com.kw", + "google.kz", + "google.la", + "google.com.lb", + "google.li", + "google.lk", + "google.co.ls", + "google.lt", + "google.lu", + "google.lv", + "google.com.ly", + "google.co.ma", + "google.md", + "google.me", + "google.mg", + "google.mk", + "google.ml", + "google.com.mm", + "google.mn", + "google.com.mt", + "google.mu", + "google.mv", + "google.mw", + "google.com.mx", + "google.com.my", + "google.co.mz", + "google.com.na", + "google.com.ng", + "google.com.ni", + "google.ne", + "google.nl", + "google.no", + "google.com.np", + "google.nr", + "google.nu", + "google.co.nz", + "google.com.om", + "google.com.pa", + "google.com.pe", + "google.com.pg", + "google.com.ph", + "google.com.pk", + "google.pl", + "google.pn", + "google.com.pr", + "google.ps", + "google.pt", + "google.com.py", + "google.com.qa", + "google.ro", + "google.ru", + "google.rw", + "google.com.sa", + "google.com.sb", + "google.sc", + "google.se", + "google.com.sg", + "google.sh", + "google.si", + "google.sk", + "google.com.sl", + "google.sn", + "google.so", + "google.sm", + "google.sr", + "google.st", + "google.com.sv", + "google.td", + "google.tg", + "google.co.th", + "google.com.tj", + "google.tl", + "google.tm", + "google.tn", + "google.to", + "google.com.tr", + "google.tt", + "google.com.tw", + "google.co.tz", + "google.com.ua", + "google.co.ug", + "google.co.uk", + "google.com.uy", + "google.co.uz", + "google.com.vc", + "google.co.ve", + "google.co.vi", + "google.com.vn", + "google.vu", + "google.ws", + "google.rs", + "google.co.za", + "google.co.zm", + "google.co.zw", + "google.cat", + "www.google.com", + "www.google.ad", + "www.google.ae", + "www.google.com.af", + "www.google.com.ag", + "www.google.al", + "www.google.am", + "www.google.co.ao", + "www.google.com.ar", + "www.google.as", + "www.google.at", + "www.google.com.au", + "www.google.az", + "www.google.ba", + "www.google.com.bd", + "www.google.be", + "www.google.bf", + "www.google.bg", + "www.google.com.bh", + "www.google.bi", + "www.google.bj", + "www.google.com.bn", + "www.google.com.bo", + "www.google.com.br", + "www.google.bs", + "www.google.bt", + "www.google.co.bw", + "www.google.by", + "www.google.com.bz", + "www.google.ca", + "www.google.cd", + "www.google.cf", + "www.google.cg", + "www.google.ch", + "www.google.ci", + "www.google.co.ck", + "www.google.cl", + "www.google.cm", + "www.google.cn", + "www.google.com.co", + "www.google.co.cr", + "www.google.com.cu", + "www.google.cv", + "www.google.com.cy", + "www.google.cz", + "www.google.de", + "www.google.dj", + "www.google.dk", + "www.google.dm", + "www.google.com.do", + "www.google.dz", + "www.google.com.ec", + "www.google.ee", + "www.google.com.eg", + "www.google.es", + "www.google.com.et", + "www.google.fi", + "www.google.com.fj", + "www.google.fm", + "www.google.fr", + "www.google.ga", + "www.google.ge", + "www.google.gg", + "www.google.com.gh", + "www.google.com.gi", + "www.google.gl", + "www.google.gm", + "www.google.gr", + "www.google.com.gt", + "www.google.gy", + "www.google.com.hk", + "www.google.hn", + "www.google.hr", + "www.google.ht", + "www.google.hu", + "www.google.co.id", + "www.google.ie", + "www.google.co.il", + "www.google.im", + "www.google.co.in", + "www.google.iq", + "www.google.is", + "www.google.it", + "www.google.je", + "www.google.com.jm", + "www.google.jo", + "www.google.co.jp", + "www.google.co.ke", + "www.google.com.kh", + "www.google.ki", + "www.google.kg", + "www.google.co.kr", + "www.google.com.kw", + "www.google.kz", + "www.google.la", + "www.google.com.lb", + "www.google.li", + "www.google.lk", + "www.google.co.ls", + "www.google.lt", + "www.google.lu", + "www.google.lv", + "www.google.com.ly", + "www.google.co.ma", + "www.google.md", + "www.google.me", + "www.google.mg", + "www.google.mk", + "www.google.ml", + "www.google.com.mm", + "www.google.mn", + "www.google.com.mt", + "www.google.mu", + "www.google.mv", + "www.google.mw", + "www.google.com.mx", + "www.google.com.my", + "www.google.co.mz", + "www.google.com.na", + "www.google.com.ng", + "www.google.com.ni", + "www.google.ne", + "www.google.nl", + "www.google.no", + "www.google.com.np", + "www.google.nr", + "www.google.nu", + "www.google.co.nz", + "www.google.com.om", + "www.google.com.pa", + "www.google.com.pe", + "www.google.com.pg", + "www.google.com.ph", + "www.google.com.pk", + "www.google.pl", + "www.google.pn", + "www.google.com.pr", + "www.google.ps", + "www.google.pt", + "www.google.com.py", + "www.google.com.qa", + "www.google.ro", + "www.google.ru", + "www.google.rw", + "www.google.com.sa", + "www.google.com.sb", + "www.google.sc", + "www.google.se", + "www.google.com.sg", + "www.google.sh", + "www.google.si", + "www.google.sk", + "www.google.com.sl", + "www.google.sn", + "www.google.so", + "www.google.sm", + "www.google.sr", + "www.google.st", + "www.google.com.sv", + "www.google.td", + "www.google.tg", + "www.google.co.th", + "www.google.com.tj", + "www.google.tl", + "www.google.tm", + "www.google.tn", + "www.google.to", + "www.google.com.tr", + "www.google.tt", + "www.google.com.tw", + "www.google.co.tz", + "www.google.com.ua", + "www.google.co.ug", + "www.google.co.uk", + "www.google.com.uy", + "www.google.co.uz", + "www.google.com.vc", + "www.google.co.ve", + "www.google.co.vi", + "www.google.com.vn", + "www.google.vu", + "www.google.ws", + "www.google.rs", + "www.google.co.za", + "www.google.co.zm", + "www.google.co.zw", + "www.google.cat", +]; + interface GoogleProps { href: string; siteName: string; diff --git a/src/handlers/main.ts b/src/handlers/main.ts index 46ce881..e16ed22 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -5,22 +5,21 @@ import { JSDOM } from "jsdom"; import { DOMWindow } from "jsdom"; import readability from "./readability"; -import google from "./google"; -import stackoverflow from "./stackoverflow/main"; +import google, { GoogleDomains } from "./google"; +import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main"; import { generateProxyUrl } from "../utils/generate"; import isLocalResource from "../utils/islocal"; -import { - LocalResourceError, - NotHtmlMimetypeError, -} from "../errors/main"; +import micromatch from "micromatch"; + +import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main"; export default async function handlePage( url: string, // remote URL requestUrl: URL, // proxy URL engine?: string, - redirect_path: string = "get" + redirect_path: string = "get", ): Promise { const urlObj = new URL(url); @@ -39,7 +38,12 @@ export default async function handlePage( [...window.document.getElementsByTagName("a")].forEach((link) => { try { - link.href = generateProxyUrl(requestUrl, link.href, engine, redirect_path); + link.href = generateProxyUrl( + requestUrl, + link.href, + engine, + redirect_path, + ); } catch (_err) { // ignore TypeError: Invalid URL } @@ -49,25 +53,41 @@ export default async function handlePage( return engines[engine](window); } - return fallback[urlObj.host]?.(window) || fallback["*"](window); + for (let match of fallback) { + if (micromatch.isMatch(urlObj.hostname, match.pattern)) { + return match.engine(window); + } + } + + return engines.readability(window); } interface Engines { [key: string]: EngineFunction; } -type EngineFunction = (window: DOMWindow) => Promise; - export const engines: Engines = { readability, google, stackoverflow, }; +type EngineFunction = (window: DOMWindow) => Promise; +export type EngineMatch = { + pattern: string | string[]; + engine: EngineFunction; +}; +export type EnginesMatch = EngineMatch[]; + export const engineList: string[] = Object.keys(engines); -const fallback: Engines = { - "stackoverflow.com": engines.stackoverflow, - "www.google.com": engines.google, - "*": engines.readability, -}; +export const fallback: EnginesMatch = [ + { + pattern: GoogleDomains, + engine: engines.google, + }, + { + pattern: StackOverflowDomains, + engine: engines.stackoverflow, + }, +]; diff --git a/src/handlers/stackoverflow/main.ts b/src/handlers/stackoverflow/main.ts index 4ed4878..9730bc8 100644 --- a/src/handlers/stackoverflow/main.ts +++ b/src/handlers/stackoverflow/main.ts @@ -4,7 +4,7 @@ import { EngineParseError } from "../../errors/main"; import qPostsHandler from "./questions-posts"; export default async function stackoverflow( - window: DOMWindow + window: DOMWindow, ): Promise { const url = new URL(window.location.href); @@ -29,3 +29,14 @@ export default async function stackoverflow( return result; } + +export const StackOverflowDomains = [ + "stackoverflow.com", + "*.stackoverflow.com", + "*.stackexchange.com", + "askubuntu.com", + "stackapps.com", + "mathoverflow.net", + "superuser.com", + "serverfault.com", +];