From 87cba82ee69398801fa722b6047eb54da4560bab Mon Sep 17 00:00:00 2001 From: Artemy Date: Fri, 18 Aug 2023 11:09:02 +0300 Subject: [PATCH 1/3] fix: google results --- package-lock.json | 4 ++-- package.json | 2 +- src/handlers/google.ts | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index a18ba24..8ce5347 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "txtdot", - "version": "1.0.0", + "version": "1.1.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "txtdot", - "version": "1.0.0", + "version": "1.1.1", "license": "MIT", "dependencies": { "@fastify/static": "^6.10.2", diff --git a/package.json b/package.json index 6f3f68a..87583ee 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "txtdot", - "version": "1.0.0", + "version": "1.1.1", "private": true, "description": "", "main": "dist/app.js", diff --git a/src/handlers/google.ts b/src/handlers/google.ts index 17c6df2..8501652 100644 --- a/src/handlers/google.ts +++ b/src/handlers/google.ts @@ -10,15 +10,15 @@ export default async function google( ] as HTMLAnchorElement[]; const googleNames = [...window.document.querySelectorAll(".VuuXrf")]; - const results = googleAnchors.map( - (a: HTMLAnchorElement, i: number): GoogleProps => { + const results = googleAnchors + .map((a: HTMLAnchorElement, i: number): GoogleProps => { return { href: a.href!, siteName: googleNames[i].textContent!, - heading: a.childNodes[1].textContent!, + heading: a.childNodes[1]?.textContent, }; - } - ); + }) + .filter((a) => a.heading); if (!googleAnchors) { throw new EngineParseError( @@ -90,5 +90,5 @@ export default async function google( interface GoogleProps { href: string; siteName: string; - heading: string; + heading: string | null; } From 08a89190ed64457c004535dabf642407a83a238c Mon Sep 17 00:00:00 2001 From: Artemy Date: Fri, 18 Aug 2023 11:38:05 +0300 Subject: [PATCH 2/3] fix: proxy url --- src/app.ts | 10 +++++++--- src/config/config.service.ts | 5 ++++- src/handlers/main.ts | 3 ++- src/routes/get.ts | 11 ++++++++++- src/routes/parse.ts | 11 ++++++++++- src/routes/raw-html.ts | 12 +++++++++++- src/utils/generate.ts | 8 ++++++-- 7 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/app.ts b/src/app.ts index f92fc18..eb3df5c 100644 --- a/src/app.ts +++ b/src/app.ts @@ -27,6 +27,7 @@ class App { async init() { const fastify = Fastify({ logger: true, + trustProxy: this.config.reverse_proxy_enabled, }); fastify.register(fastifyStatic, { @@ -58,9 +59,12 @@ class App { fastify.setErrorHandler(errorHandler); - fastify.listen({ host: this.config.host, port: this.config.port }, (err) => { - err && console.log(err); - }); + fastify.listen( + { host: this.config.host, port: this.config.port }, + (err) => { + err && console.log(err); + } + ); } } diff --git a/src/config/config.service.ts b/src/config/config.service.ts index f88c60c..10b8bfc 100644 --- a/src/config/config.service.ts +++ b/src/config/config.service.ts @@ -3,6 +3,7 @@ import { config } from "dotenv"; export class ConfigService { public readonly host: string; public readonly port: number; + public readonly reverse_proxy_enabled: boolean; constructor() { const parsed = config().parsed; @@ -11,7 +12,9 @@ export class ConfigService { throw new Error("Invalid .env file"); } - this.host = process.env.HOST || 'localhost'; + this.host = process.env.HOST || "localhost"; this.port = Number(process.env.PORT) || 8080; + this.reverse_proxy_enabled = + Boolean(process.env.REVERSE_PROXY_ENABLED) || false; } } diff --git a/src/handlers/main.ts b/src/handlers/main.ts index 01d8983..6769283 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -18,6 +18,7 @@ import { export default async function handlePage( url: string, // remote URL + requestUrl: URL, // proxy URL engine?: string ): Promise { const urlObj = new URL(url); @@ -40,7 +41,7 @@ export default async function handlePage( const window = new JSDOM(response.data, { url }).window; [...window.document.getElementsByTagName("a")].forEach((link) => { - link.href = generateProxyUrl(link.href, engine); + link.href = generateProxyUrl(requestUrl, link.href, engine); }); if (engine) { diff --git a/src/routes/get.ts b/src/routes/get.ts index e6afe6b..dbd1e59 100644 --- a/src/routes/get.ts +++ b/src/routes/get.ts @@ -2,6 +2,7 @@ import { FastifyInstance } from "fastify"; import { GetSchema, IGetSchema } from "../types/requests"; import handlePage from "../handlers/main"; +import { generateRequestUrl } from "../utils/generate"; export default async function getRoute(fastify: FastifyInstance) { fastify.get( @@ -11,7 +12,15 @@ export default async function getRoute(fastify: FastifyInstance) { const remoteUrl = request.query.url; const engine = request.query.engine; - const parsed = await handlePage(remoteUrl, engine); + const parsed = await handlePage( + remoteUrl, + generateRequestUrl( + request.protocol, + request.hostname, + request.originalUrl + ), + engine + ); if (request.query.format === "text") { reply.type("text/plain; charset=utf-8"); diff --git a/src/routes/parse.ts b/src/routes/parse.ts index 09bb06a..bca6363 100644 --- a/src/routes/parse.ts +++ b/src/routes/parse.ts @@ -1,13 +1,22 @@ import { EngineRequest, IParseSchema, parseSchema } from "../types/requests"; import { FastifyInstance } from "fastify"; import handlePage from "../handlers/main"; +import { generateRequestUrl } from "../utils/generate"; export default async function parseRoute(fastify: FastifyInstance) { fastify.get( "/parse", { schema: parseSchema }, async (request: EngineRequest) => { - return await handlePage(request.query.url, request.query.engine); + return await handlePage( + request.query.url, + generateRequestUrl( + request.protocol, + request.hostname, + request.originalUrl + ), + request.query.engine + ); } ); } diff --git a/src/routes/raw-html.ts b/src/routes/raw-html.ts index 20bcae5..30c520f 100644 --- a/src/routes/raw-html.ts +++ b/src/routes/raw-html.ts @@ -2,13 +2,23 @@ import { FastifyInstance } from "fastify"; import { GetRequest, IParseSchema, rawHtmlSchema } from "../types/requests"; import handlePage from "../handlers/main"; +import { generateRequestUrl } from "../utils/generate"; export default async function rawHtml(fastify: FastifyInstance) { fastify.get( "/raw-html", { schema: rawHtmlSchema }, async (request: GetRequest) => { - return (await handlePage(request.query.url)).content; + return ( + await handlePage( + request.query.url, + generateRequestUrl( + request.protocol, + request.hostname, + request.originalUrl + ) + ) + ).content; } ); } diff --git a/src/utils/generate.ts b/src/utils/generate.ts index 4a0095a..08f5c90 100644 --- a/src/utils/generate.ts +++ b/src/utils/generate.ts @@ -6,8 +6,12 @@ export function generateRequestUrl( return new URL(`${protocol}://${host}${originalUrl}`); } -export function generateProxyUrl(href: string, engine?: string): string { +export function generateProxyUrl( + requestUrl: URL, + href: string, + engine?: string +): string { const urlParam = `?url=${encodeURIComponent(href)}`; const engineParam = engine ? `&engine=${engine}` : ""; - return `/get${urlParam}${engineParam}`; + return `${requestUrl.origin}/get${urlParam}${engineParam}`; } From 82a7fb96e6aeb191c64d98f10b422be0441af218 Mon Sep 17 00:00:00 2001 From: Artemy Date: Fri, 18 Aug 2023 11:38:30 +0300 Subject: [PATCH 3/3] fix: google name parsing --- src/handlers/google.ts | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/handlers/google.ts b/src/handlers/google.ts index 8501652..9caccf8 100644 --- a/src/handlers/google.ts +++ b/src/handlers/google.ts @@ -8,13 +8,13 @@ export default async function google( const googleAnchors = [ ...window.document.querySelectorAll("a[jsname=ACyKwe]"), ] as HTMLAnchorElement[]; - const googleNames = [...window.document.querySelectorAll(".VuuXrf")]; const results = googleAnchors - .map((a: HTMLAnchorElement, i: number): GoogleProps => { + .map((a: HTMLAnchorElement): GoogleProps => { + const parsedHref = new URL(new URL(a.href).searchParams.get("url")!); return { href: a.href!, - siteName: googleNames[i].textContent!, + siteName: parsedHref.hostname, heading: a.childNodes[1]?.textContent, }; }) @@ -26,12 +26,6 @@ export default async function google( ); } - if (!googleNames) { - throw new EngineParseError( - "Failed to find names in search result [google]" - ); - } - const convertToFormat = (result: GoogleProps, isHtml: boolean) => { return isHtml ? `

${result.siteName} - ${result.heading}

`