From 239994ac5e7f58e0a9fe5131a851ca386af192ad Mon Sep 17 00:00:00 2001
From: Artemy
Date: Tue, 15 Aug 2023 11:18:08 +0300
Subject: [PATCH 1/4] refactor: change handlers argument to window
---
src/handlers/main.ts | 17 ++++++++++-------
src/handlers/readability.ts | 7 +++++--
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/src/handlers/main.ts b/src/handlers/main.ts
index a082260..21b1409 100644
--- a/src/handlers/main.ts
+++ b/src/handlers/main.ts
@@ -1,9 +1,10 @@
-import axios from "../types/axios";
import { IHandlerOutput } from "./handler.interface";
-import { readability } from "./readability";
+
+import axios from "../types/axios";
import { JSDOM } from "jsdom";
-type EngineFunction = (url: Document) => Promise;
+import readability from "./readability";
+import { DOMWindow } from "jsdom";
export default async function handlePage(
url: string,
@@ -15,27 +16,29 @@ export default async function handlePage(
}
const response = await axios.get(url);
- const document = new JSDOM(response.data, { url: url }).window.document;
+ const window = new JSDOM(response.data, { url: url }).window;
const UrlParsed = new URL(originalUrl);
- [...document.getElementsByTagName("a")].forEach((link) => {
+ [...window.document.getElementsByTagName("a")].forEach((link) => {
link.href = `${UrlParsed.origin}/?url=${link.href}${
engine && `&engine=${engine}`
}`;
});
if (engine) {
- return engines[engine](document);
+ return engines[engine](window);
}
const host = new URL(url).hostname;
- return fallback[host](document) || fallback["*"](document);
+ return fallback[host](window) || fallback["*"](window);
}
interface Engines {
[key: string]: EngineFunction;
}
+type EngineFunction = (window: DOMWindow) => Promise;
+
export const engines: Engines = {
readability,
};
diff --git a/src/handlers/readability.ts b/src/handlers/readability.ts
index fc9a011..f4388ea 100644
--- a/src/handlers/readability.ts
+++ b/src/handlers/readability.ts
@@ -1,8 +1,11 @@
import { Readability } from "@mozilla/readability";
import { IHandlerOutput } from "./handler.interface";
+import { DOMWindow } from "jsdom";
-export async function readability(document: Document): Promise {
- const reader = new Readability(document);
+export default async function readability(
+ window: DOMWindow
+): Promise {
+ const reader = new Readability(window.document);
const parsed = reader.parse();
if (!parsed) {
From 7000189d6d21077c120c6ee11326fd33ebe49b54 Mon Sep 17 00:00:00 2001
From: Artemy
Date: Tue, 15 Aug 2023 13:05:06 +0300
Subject: [PATCH 2/4] feat: google parsing and search
---
src/handlers/google.ts | 35 +++++++++++++++++++++++++++++++++++
src/handlers/main.ts | 10 +++++++---
src/routes/main.ts | 2 +-
src/types/axios.ts | 3 ++-
4 files changed, 45 insertions(+), 5 deletions(-)
create mode 100644 src/handlers/google.ts
diff --git a/src/handlers/google.ts b/src/handlers/google.ts
new file mode 100644
index 0000000..b5fc05d
--- /dev/null
+++ b/src/handlers/google.ts
@@ -0,0 +1,35 @@
+import { DOMWindow } from "jsdom";
+import { IHandlerOutput } from "./handler.interface";
+
+export default async function google(
+ window: DOMWindow
+): Promise {
+ const searchEl = window.document.querySelectorAll(
+ "#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)"
+ );
+
+ if (!searchEl) {
+ throw new Error("Failed to find search element [google]");
+ }
+ const results = [...searchEl];
+
+ const content = results.map((result) => {
+ const anchor = result as HTMLAnchorElement;
+ const heading = anchor.childNodes[1] as HTMLHeadingElement;
+ return `${heading.innerHTML}
`;
+ });
+
+ const searchForm = `
+
+
+
+
+ `;
+
+ return {
+ content: `${searchForm}${content.join("")}`,
+ textContent: "parsed.textContent",
+ title: window.document.title,
+ lang: "parsed.lang",
+ };
+}
diff --git a/src/handlers/main.ts b/src/handlers/main.ts
index 21b1409..d559c61 100644
--- a/src/handlers/main.ts
+++ b/src/handlers/main.ts
@@ -4,8 +4,8 @@ import axios from "../types/axios";
import { JSDOM } from "jsdom";
import readability from "./readability";
+import google from "./google";
import { DOMWindow } from "jsdom";
-
export default async function handlePage(
url: string,
originalUrl: string,
@@ -16,12 +16,13 @@ export default async function handlePage(
}
const response = await axios.get(url);
+
const window = new JSDOM(response.data, { url: url }).window;
const UrlParsed = new URL(originalUrl);
[...window.document.getElementsByTagName("a")].forEach((link) => {
link.href = `${UrlParsed.origin}/?url=${link.href}${
- engine && `&engine=${engine}`
+ engine ? `&engine=${engine}` : ""
}`;
});
@@ -30,7 +31,8 @@ export default async function handlePage(
}
const host = new URL(url).hostname;
- return fallback[host](window) || fallback["*"](window);
+
+ return fallback[host]?.(window) || fallback["*"](window);
}
interface Engines {
@@ -41,10 +43,12 @@ type EngineFunction = (window: DOMWindow) => Promise;
export const engines: Engines = {
readability,
+ google,
};
export const engineList: string[] = Object.keys(engines);
const fallback: Engines = {
+ "www.google.com": engines.google,
"*": engines.readability,
};
diff --git a/src/routes/main.ts b/src/routes/main.ts
index 28e2205..0c7f388 100644
--- a/src/routes/main.ts
+++ b/src/routes/main.ts
@@ -7,7 +7,7 @@ import { generateOriginUrl } from "../utils";
export default async function mainRoute(fastify: FastifyInstance) {
fastify.get("/", async (request: GetRequest, reply) => {
const remoteUrl = request.query.url;
- const engine = request.query.engine || "readability";
+ const engine = request.query.engine;
let format: string;
diff --git a/src/types/axios.ts b/src/types/axios.ts
index fbd3bd6..bbb8162 100644
--- a/src/types/axios.ts
+++ b/src/types/axios.ts
@@ -2,6 +2,7 @@ import axios from "axios";
export default axios.create({
headers: {
- "User-Agent": "txtdot",
+ "User-Agent":
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
},
});
From 23a6484e20960071aa8c2871dcf543256cccd8af Mon Sep 17 00:00:00 2001
From: Artemy
Date: Tue, 15 Aug 2023 13:11:09 +0300
Subject: [PATCH 3/4] feat: dynamic engine list in start page
---
src/routes/start.ts | 3 ++-
templates/start.ejs | 7 ++++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/routes/start.ts b/src/routes/start.ts
index 06fc883..1bf780b 100644
--- a/src/routes/start.ts
+++ b/src/routes/start.ts
@@ -1,7 +1,8 @@
import { FastifyInstance } from "fastify";
+import { engineList } from "../handlers/main";
export default async function parseRoute(fastify: FastifyInstance) {
fastify.get("/start", async (_, reply) => {
- return reply.view("/templates/start.ejs");
+ return reply.view("/templates/start.ejs", { engineList });
});
}
diff --git a/templates/start.ejs b/templates/start.ejs
index cfe0ee9..8fd9310 100644
--- a/templates/start.ejs
+++ b/templates/start.ejs
@@ -17,7 +17,12 @@
From 1630dbfa1753833634cbd50189a65ba6e59c673d Mon Sep 17 00:00:00 2001
From: Artemy
Date: Tue, 15 Aug 2023 13:37:21 +0300
Subject: [PATCH 4/4] fix: lang, textContent
---
src/handlers/google.ts | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/src/handlers/google.ts b/src/handlers/google.ts
index b5fc05d..ef7729d 100644
--- a/src/handlers/google.ts
+++ b/src/handlers/google.ts
@@ -4,32 +4,42 @@ import { IHandlerOutput } from "./handler.interface";
export default async function google(
window: DOMWindow
): Promise {
- const searchEl = window.document.querySelectorAll(
+ const googleAnchors = window.document.querySelectorAll(
"#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)"
);
- if (!searchEl) {
- throw new Error("Failed to find search element [google]");
+ if (!googleAnchors) {
+ throw new Error("Failed to find anchors in search result [google]");
}
- const results = [...searchEl];
+ const results = [...googleAnchors];
- const content = results.map((result) => {
+ const convertToFormat = (result: Element, isHtml: boolean) => {
const anchor = result as HTMLAnchorElement;
const heading = anchor.childNodes[1] as HTMLHeadingElement;
- return `${heading.innerHTML}
`;
+ return isHtml
+ ? `${heading.innerHTML}
`
+ : `${heading.innerHTML} > ${anchor.href}`;
+ };
+
+ const content = results.map((result) => {
+ return convertToFormat(result, true);
+ });
+
+ const textContent = results.map((result) => {
+ return convertToFormat(result, false);
});
const searchForm = `
-
+
+
`;
return {
content: `${searchForm}${content.join("")}`,
- textContent: "parsed.textContent",
+ textContent: textContent.join("\n"),
title: window.document.title,
- lang: "parsed.lang",
+ lang: window.document.documentElement.lang,
};
}