Merge pull request #16 from TxtDot/non-html

Redirect in case of non-html content (using exceptions)
This commit is contained in:
Artemy Egorov 2023-08-16 12:23:38 +03:00 committed by GitHub
commit 3b82b1232b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 41 additions and 23 deletions

3
src/errors.ts Normal file
View File

@ -0,0 +1,3 @@
export class EngineParseError extends Error {}
export class InvalidParameterError extends Error {}
export class NotHtmlMimetypeError extends Error {}

View File

@ -1,5 +1,6 @@
import { DOMWindow } from "jsdom"; import { DOMWindow } from "jsdom";
import { IHandlerOutput } from "./handler.interface"; import { IHandlerOutput } from "./handler.interface";
import { EngineParseError } from "../errors";
export default async function google( export default async function google(
window: DOMWindow window: DOMWindow
@ -9,7 +10,9 @@ export default async function google(
); );
if (!googleAnchors) { if (!googleAnchors) {
throw new Error("Failed to find anchors in search result [google]"); throw new EngineParseError(
"Failed to find anchors in search result [google]"
);
} }
const results = [...googleAnchors]; const results = [...googleAnchors];

View File

@ -8,6 +8,8 @@ import readability from "./readability";
import google from "./google"; import google from "./google";
import { generateProxyUrl } from "../utils"; import { generateProxyUrl } from "../utils";
import { InvalidParameterError, NotHtmlMimetypeError } from "../errors";
export default async function handlePage( export default async function handlePage(
url: string, url: string,
requestUrl: URL, requestUrl: URL,
@ -15,18 +17,24 @@ export default async function handlePage(
): Promise<IHandlerOutput> { ): Promise<IHandlerOutput> {
if (engine && engineList.indexOf(engine) === -1) { if (engine && engineList.indexOf(engine) === -1) {
throw new Error("Invalid engine"); throw new InvalidParameterError("Invalid engine");
} }
const response = await axios.get(url); const response = await axios.get(url);
const mime: string | undefined = (
response.headers["content-type"]?.toString()
);
if (mime && mime.indexOf("text/html") === -1) {
throw new NotHtmlMimetypeError();
}
const window = new JSDOM(response.data, { url: url }).window; const window = new JSDOM(response.data, { url: url }).window;
[...window.document.getElementsByTagName("a")].forEach((link) => { [...window.document.getElementsByTagName("a")].forEach((link) => {
link.href = generateProxyUrl(requestUrl, link.href, engine); link.href = generateProxyUrl(requestUrl, link.href, engine);
}); });
// maybe implement image proxy?
if (engine) { if (engine) {
return engines[engine](window); return engines[engine](window);
} }

View File

@ -1,6 +1,7 @@
import { Readability } from "@mozilla/readability"; import { Readability } from "@mozilla/readability";
import { IHandlerOutput } from "./handler.interface"; import { IHandlerOutput } from "./handler.interface";
import { DOMWindow } from "jsdom"; import { DOMWindow } from "jsdom";
import { EngineParseError } from "../errors";
export default async function readability( export default async function readability(
window: DOMWindow window: DOMWindow
@ -9,7 +10,7 @@ export default async function readability(
const parsed = reader.parse(); const parsed = reader.parse();
if (!parsed) { if (!parsed) {
throw new Error("Failed to parse [readability]"); throw new EngineParseError("Failed to parse [readability]");
} }
return { return {

View File

@ -4,6 +4,8 @@ import { GetSchema, IGetSchema } from "../types/requests";
import handlePage from "../handlers/main"; import handlePage from "../handlers/main";
import { generateRequestUrl } from "../utils"; import { generateRequestUrl } from "../utils";
import { NotHtmlMimetypeError } from "../errors";
export default async function getRoute(fastify: FastifyInstance) { export default async function getRoute(fastify: FastifyInstance) {
fastify.get<IGetSchema>( fastify.get<IGetSchema>(
"/get", "/get",
@ -12,29 +14,30 @@ export default async function getRoute(fastify: FastifyInstance) {
const remoteUrl = request.query.url; const remoteUrl = request.query.url;
const engine = request.query.engine; const engine = request.query.engine;
let format: string; let parsed;
try {
parsed = await handlePage(
remoteUrl,
generateRequestUrl(
request.protocol,
request.hostname,
request.originalUrl
),
engine
);
} catch (err) {
if (err instanceof NotHtmlMimetypeError) {
return reply.redirect(301, remoteUrl);
} else {
throw err;
}
}
if (request.query.format === "text") { if (request.query.format === "text") {
reply.type("text/plain; charset=utf-8"); reply.type("text/plain; charset=utf-8");
format = "text";
} else {
reply.type("text/html; charset=utf-8");
format = "html";
}
const parsed = await handlePage(
remoteUrl,
generateRequestUrl(
request.protocol,
request.hostname,
request.originalUrl
),
engine
);
if (format === "text") {
return parsed.textContent; return parsed.textContent;
} else { } else {
reply.type("text/html; charset=utf-8");
return reply.view("/templates/get.ejs", { parsed: parsed }); return reply.view("/templates/get.ejs", { parsed: parsed });
} }
} }