From 239994ac5e7f58e0a9fe5131a851ca386af192ad Mon Sep 17 00:00:00 2001 From: Artemy Date: Tue, 15 Aug 2023 11:18:08 +0300 Subject: [PATCH 1/4] refactor: change handlers argument to window --- src/handlers/main.ts | 17 ++++++++++------- src/handlers/readability.ts | 7 +++++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/handlers/main.ts b/src/handlers/main.ts index a082260..21b1409 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -1,9 +1,10 @@ -import axios from "../types/axios"; import { IHandlerOutput } from "./handler.interface"; -import { readability } from "./readability"; + +import axios from "../types/axios"; import { JSDOM } from "jsdom"; -type EngineFunction = (url: Document) => Promise; +import readability from "./readability"; +import { DOMWindow } from "jsdom"; export default async function handlePage( url: string, @@ -15,27 +16,29 @@ export default async function handlePage( } const response = await axios.get(url); - const document = new JSDOM(response.data, { url: url }).window.document; + const window = new JSDOM(response.data, { url: url }).window; const UrlParsed = new URL(originalUrl); - [...document.getElementsByTagName("a")].forEach((link) => { + [...window.document.getElementsByTagName("a")].forEach((link) => { link.href = `${UrlParsed.origin}/?url=${link.href}${ engine && `&engine=${engine}` }`; }); if (engine) { - return engines[engine](document); + return engines[engine](window); } const host = new URL(url).hostname; - return fallback[host](document) || fallback["*"](document); + return fallback[host](window) || fallback["*"](window); } interface Engines { [key: string]: EngineFunction; } +type EngineFunction = (window: DOMWindow) => Promise; + export const engines: Engines = { readability, }; diff --git a/src/handlers/readability.ts b/src/handlers/readability.ts index fc9a011..f4388ea 100644 --- a/src/handlers/readability.ts +++ b/src/handlers/readability.ts @@ -1,8 +1,11 @@ import { Readability } from "@mozilla/readability"; import { IHandlerOutput } from "./handler.interface"; +import { DOMWindow } from "jsdom"; -export async function readability(document: Document): Promise { - const reader = new Readability(document); +export default async function readability( + window: DOMWindow +): Promise { + const reader = new Readability(window.document); const parsed = reader.parse(); if (!parsed) { From 7000189d6d21077c120c6ee11326fd33ebe49b54 Mon Sep 17 00:00:00 2001 From: Artemy Date: Tue, 15 Aug 2023 13:05:06 +0300 Subject: [PATCH 2/4] feat: google parsing and search --- src/handlers/google.ts | 35 +++++++++++++++++++++++++++++++++++ src/handlers/main.ts | 10 +++++++--- src/routes/main.ts | 2 +- src/types/axios.ts | 3 ++- 4 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 src/handlers/google.ts diff --git a/src/handlers/google.ts b/src/handlers/google.ts new file mode 100644 index 0000000..b5fc05d --- /dev/null +++ b/src/handlers/google.ts @@ -0,0 +1,35 @@ +import { DOMWindow } from "jsdom"; +import { IHandlerOutput } from "./handler.interface"; + +export default async function google( + window: DOMWindow +): Promise { + const searchEl = window.document.querySelectorAll( + "#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)" + ); + + if (!searchEl) { + throw new Error("Failed to find search element [google]"); + } + const results = [...searchEl]; + + const content = results.map((result) => { + const anchor = result as HTMLAnchorElement; + const heading = anchor.childNodes[1] as HTMLHeadingElement; + return `

${heading.innerHTML}

`; + }); + + const searchForm = ` +
+ + +
+ `; + + return { + content: `${searchForm}${content.join("")}`, + textContent: "parsed.textContent", + title: window.document.title, + lang: "parsed.lang", + }; +} diff --git a/src/handlers/main.ts b/src/handlers/main.ts index 21b1409..d559c61 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -4,8 +4,8 @@ import axios from "../types/axios"; import { JSDOM } from "jsdom"; import readability from "./readability"; +import google from "./google"; import { DOMWindow } from "jsdom"; - export default async function handlePage( url: string, originalUrl: string, @@ -16,12 +16,13 @@ export default async function handlePage( } const response = await axios.get(url); + const window = new JSDOM(response.data, { url: url }).window; const UrlParsed = new URL(originalUrl); [...window.document.getElementsByTagName("a")].forEach((link) => { link.href = `${UrlParsed.origin}/?url=${link.href}${ - engine && `&engine=${engine}` + engine ? `&engine=${engine}` : "" }`; }); @@ -30,7 +31,8 @@ export default async function handlePage( } const host = new URL(url).hostname; - return fallback[host](window) || fallback["*"](window); + + return fallback[host]?.(window) || fallback["*"](window); } interface Engines { @@ -41,10 +43,12 @@ type EngineFunction = (window: DOMWindow) => Promise; export const engines: Engines = { readability, + google, }; export const engineList: string[] = Object.keys(engines); const fallback: Engines = { + "www.google.com": engines.google, "*": engines.readability, }; diff --git a/src/routes/main.ts b/src/routes/main.ts index 28e2205..0c7f388 100644 --- a/src/routes/main.ts +++ b/src/routes/main.ts @@ -7,7 +7,7 @@ import { generateOriginUrl } from "../utils"; export default async function mainRoute(fastify: FastifyInstance) { fastify.get("/", async (request: GetRequest, reply) => { const remoteUrl = request.query.url; - const engine = request.query.engine || "readability"; + const engine = request.query.engine; let format: string; diff --git a/src/types/axios.ts b/src/types/axios.ts index fbd3bd6..bbb8162 100644 --- a/src/types/axios.ts +++ b/src/types/axios.ts @@ -2,6 +2,7 @@ import axios from "axios"; export default axios.create({ headers: { - "User-Agent": "txtdot", + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", }, }); From 23a6484e20960071aa8c2871dcf543256cccd8af Mon Sep 17 00:00:00 2001 From: Artemy Date: Tue, 15 Aug 2023 13:11:09 +0300 Subject: [PATCH 3/4] feat: dynamic engine list in start page --- src/routes/start.ts | 3 ++- templates/start.ejs | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/routes/start.ts b/src/routes/start.ts index 06fc883..1bf780b 100644 --- a/src/routes/start.ts +++ b/src/routes/start.ts @@ -1,7 +1,8 @@ import { FastifyInstance } from "fastify"; +import { engineList } from "../handlers/main"; export default async function parseRoute(fastify: FastifyInstance) { fastify.get("/start", async (_, reply) => { - return reply.view("/templates/start.ejs"); + return reply.view("/templates/start.ejs", { engineList }); }); } diff --git a/templates/start.ejs b/templates/start.ejs index cfe0ee9..8fd9310 100644 --- a/templates/start.ejs +++ b/templates/start.ejs @@ -17,7 +17,12 @@

From 1630dbfa1753833634cbd50189a65ba6e59c673d Mon Sep 17 00:00:00 2001 From: Artemy Date: Tue, 15 Aug 2023 13:37:21 +0300 Subject: [PATCH 4/4] fix: lang, textContent --- src/handlers/google.ts | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/handlers/google.ts b/src/handlers/google.ts index b5fc05d..ef7729d 100644 --- a/src/handlers/google.ts +++ b/src/handlers/google.ts @@ -4,32 +4,42 @@ import { IHandlerOutput } from "./handler.interface"; export default async function google( window: DOMWindow ): Promise { - const searchEl = window.document.querySelectorAll( + const googleAnchors = window.document.querySelectorAll( "#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)" ); - if (!searchEl) { - throw new Error("Failed to find search element [google]"); + if (!googleAnchors) { + throw new Error("Failed to find anchors in search result [google]"); } - const results = [...searchEl]; + const results = [...googleAnchors]; - const content = results.map((result) => { + const convertToFormat = (result: Element, isHtml: boolean) => { const anchor = result as HTMLAnchorElement; const heading = anchor.childNodes[1] as HTMLHeadingElement; - return `

${heading.innerHTML}

`; + return isHtml + ? `

${heading.innerHTML}

` + : `${heading.innerHTML} > ${anchor.href}`; + }; + + const content = results.map((result) => { + return convertToFormat(result, true); + }); + + const textContent = results.map((result) => { + return convertToFormat(result, false); }); const searchForm = ` -
+
-
+ `; return { content: `${searchForm}${content.join("")}`, - textContent: "parsed.textContent", + textContent: textContent.join("\n"), title: window.document.title, - lang: "parsed.lang", + lang: window.document.documentElement.lang, }; }