diff --git a/.env.example b/.env.example index 22b7787..5f3fb65 100644 --- a/.env.example +++ b/.env.example @@ -2,3 +2,6 @@ HOST=127.0.0.1 # 0.0.0.0 if txtdot is not behind reverse proxy PORT=8080 REVERSE_PROXY=true # only for reverse proxy; see docs + +PROXY_RES=true +SWAGGER=false # whether to add API docs route or not diff --git a/src/app.ts b/src/app.ts index 65f5a5d..70a935a 100644 --- a/src/app.ts +++ b/src/app.ts @@ -1,5 +1,3 @@ -import { ConfigService } from "./config/config.service"; - import path from "path"; import Fastify from "fastify"; @@ -9,25 +7,23 @@ import fastifySwagger from "@fastify/swagger"; import fastifySwaggerUi from "@fastify/swagger-ui"; import ejs from "ejs"; -import getRoute from "./routes/browser/get"; -import parseRoute from "./routes/api/parse"; import indexRoute from "./routes/browser/index"; +import getRoute from "./routes/browser/get"; +import proxyRoute from "./routes/browser/proxy"; +import parseRoute from "./routes/api/parse"; import rawHtml from "./routes/api/raw-html"; import publicConfig from "./publicConfig"; import errorHandler from "./errors/handler"; +import getConfig from "./config/main"; class App { - config: ConfigService; - - constructor() { - this.config = new ConfigService(); - } - async init() { + const config = getConfig(); + const fastify = Fastify({ logger: true, - trustProxy: this.config.reverse_proxy, + trustProxy: config.reverse_proxy, }); fastify.register(fastifyStatic, { @@ -41,26 +37,32 @@ class App { }, }); - await fastify.register(fastifySwagger, { - swagger: { - info: { - title: "TXTDot API", - description: publicConfig.description, - version: publicConfig.version, - }, - } - }); - await fastify.register(fastifySwaggerUi, { routePrefix: "/doc" }); + if (config.swagger) { + await fastify.register(fastifySwagger, { + swagger: { + info: { + title: "TXTDot API", + description: publicConfig.description, + version: publicConfig.version, + }, + } + }); + await fastify.register(fastifySwaggerUi, { routePrefix: "/doc" }); + } fastify.register(indexRoute); fastify.register(getRoute); + + if (config.proxy_res) + fastify.register(proxyRoute); + fastify.register(parseRoute); fastify.register(rawHtml); fastify.setErrorHandler(errorHandler); fastify.listen( - { host: this.config.host, port: this.config.port }, + { host: config.host, port: config.port }, (err) => { err && console.log(err); } diff --git a/src/config/config.service.ts b/src/config/config.service.ts index fa870ab..3ff93e3 100644 --- a/src/config/config.service.ts +++ b/src/config/config.service.ts @@ -4,6 +4,8 @@ export class ConfigService { public readonly host: string; public readonly port: number; public readonly reverse_proxy: boolean; + public readonly proxy_res: boolean; + public readonly swagger: boolean; constructor() { config(); @@ -11,6 +13,14 @@ export class ConfigService { this.host = process.env.HOST || "0.0.0.0"; this.port = Number(process.env.PORT) || 8080; - this.reverse_proxy = Boolean(process.env.REVERSE_PROXY) || false; + this.reverse_proxy = this.parseBool(process.env.REVERSE_PROXY, false); + + this.proxy_res = this.parseBool(process.env.PROXY_RES, true); + this.swagger = this.parseBool(process.env.SWAGGER, false); + } + + parseBool(value: string | undefined, def: boolean): boolean { + if (!value) return def; + return value === "true" || value === "1"; } } diff --git a/src/config/main.ts b/src/config/main.ts new file mode 100644 index 0000000..37dcad3 --- /dev/null +++ b/src/config/main.ts @@ -0,0 +1,12 @@ +import { ConfigService } from "./config.service"; + +let configSvc: ConfigService | undefined; + +export default function getConfig(): ConfigService { + if (configSvc) { + return configSvc; + } + + configSvc = new ConfigService(); + return configSvc; +} diff --git a/src/errors/handler.ts b/src/errors/handler.ts index 1b0e472..5d28722 100644 --- a/src/errors/handler.ts +++ b/src/errors/handler.ts @@ -3,6 +3,7 @@ import { NotHtmlMimetypeError, TxtDotError } from "./main"; import { getFastifyError } from "./validation"; import { IGetSchema } from "../types/requests/browser"; +import getConfig from "../config/main"; export default function errorHandler( error: Error, @@ -29,10 +30,6 @@ function apiErrorHandler(error: Error, reply: FastifyReply) { }); } - if (error instanceof NotHtmlMimetypeError) { - return generateResponse(501); - } - if (getFastifyError(error)?.statusCode === 400) { return generateResponse(400); } @@ -45,10 +42,6 @@ function apiErrorHandler(error: Error, reply: FastifyReply) { } function htmlErrorHandler(error: Error, reply: FastifyReply, url: string) { - if (error instanceof NotHtmlMimetypeError) { - return reply.redirect(301, error.url); - } - if (getFastifyError(error)?.statusCode === 400) { return reply.code(400).view("/templates/error.ejs", { url, @@ -62,6 +55,10 @@ function htmlErrorHandler(error: Error, reply: FastifyReply, url: string) { url, code: error.code, description: error.description, + proxyBtn: ( + error instanceof NotHtmlMimetypeError && + getConfig().proxy_res + ), }); } diff --git a/src/errors/main.ts b/src/errors/main.ts index 29190c9..81f983a 100644 --- a/src/errors/main.ts +++ b/src/errors/main.ts @@ -1,9 +1,15 @@ +import getConfig from "../config/main"; + export abstract class TxtDotError extends Error { code: number; name: string; description: string; - constructor(code: number, name: string, description: string) { + constructor( + code: number, + name: string, + description: string, + ) { super(description); this.code = code; this.name = name; @@ -13,22 +19,34 @@ export abstract class TxtDotError extends Error { export class EngineParseError extends TxtDotError { constructor(message: string) { - super(422, "EngineParseError", `Parse error: ${message}`); + super( + 422, + "EngineParseError", + `Parse error: ${message}`, + ); } } export class LocalResourceError extends TxtDotError { constructor() { - super(403, "LocalResourceError", "Proxying local resources is forbidden."); + super( + 403, + "LocalResourceError", + "Proxying local resources is forbidden.", + ); } } -export class NotHtmlMimetypeError extends Error { - name: string = "NotHtmlMimetypeError"; - url: string; - - constructor(url: string) { - super(); - this.url = url; +export class NotHtmlMimetypeError extends TxtDotError { + constructor() { + super( + 421, + "NotHtmlMimetypeError", + "Received non-HTML content, " + ( + getConfig().proxy_res ? + "use proxy instead of parser." : + "proxying is disabled by the instance admin." + ), + ); } } diff --git a/src/handlers/handler-input.ts b/src/handlers/handler-input.ts index 7c7693e..ad510b9 100644 --- a/src/handlers/handler-input.ts +++ b/src/handlers/handler-input.ts @@ -1,26 +1,20 @@ import { JSDOM } from "jsdom"; -import { generateProxyUrl } from "../utils/generate"; export class HandlerInput { private data: string; private url: string; - private requestUrl: URL; - private engine?: string; - private redirectPath: string; private dom?: JSDOM; constructor( data: string, url: string, - requestUrl: URL, - engine?: string, - redirectPath: string = "get", ) { this.data = data; this.url = url; - this.requestUrl = requestUrl; - this.engine = engine; - this.redirectPath = redirectPath; + } + + getUrl(): string { + return this.url; } parseDom(): JSDOM { @@ -29,25 +23,6 @@ export class HandlerInput { } this.dom = new JSDOM(this.data, { url: this.url }); - - const links = this.dom.window.document.getElementsByTagName("a"); - for (const link of links) { - try { - link.href = generateProxyUrl( - this.requestUrl, - link.href, - this.engine, - this.redirectPath, - ); - } catch (_err) { - // ignore TypeError: Invalid URL - } - } - return this.dom; } - - getUrl(): string { - return this.url; - } } diff --git a/src/handlers/main.ts b/src/handlers/main.ts index d3fa83a..45e5e2f 100644 --- a/src/handlers/main.ts +++ b/src/handlers/main.ts @@ -4,6 +4,8 @@ import axios from "../types/axios"; import micromatch from "micromatch"; +import { JSDOM } from "jsdom"; + import readability from "./readability"; import google, { GoogleDomains } from "./google"; import stackoverflow, { StackOverflowDomains } from "./stackoverflow/main"; @@ -14,6 +16,7 @@ import { LocalResourceError, NotHtmlMimetypeError } from "../errors/main"; import { HandlerInput } from "./handler-input"; import { Readable } from "stream"; import { decodeStream, parseEncodingName } from "../utils/http"; +import replaceHref from "../utils/replace-href"; export default async function handlePage( url: string, // remote URL @@ -32,18 +35,24 @@ export default async function handlePage( const mime: string | undefined = response.headers["content-type"]?.toString(); if (mime && mime.indexOf("text/html") === -1) { - throw new NotHtmlMimetypeError(url); + throw new NotHtmlMimetypeError(); } - return getFallbackEngine(urlObj.hostname, engine)( + const handler = getFallbackEngine(urlObj.hostname, engine); + const output = await handler( new HandlerInput( await decodeStream(data, parseEncodingName(mime)), url, - requestUrl, - engine, - redirectPath, ) ); + + // post-process + const dom = new JSDOM(output.content, { url }); + replaceHref(dom, requestUrl, engine, redirectPath); + output.content = dom.serialize(); + // TODO: DomPurify + + return output; } function getFallbackEngine(host: string, specified?: string): EngineFunction { diff --git a/src/routes/browser/proxy.ts b/src/routes/browser/proxy.ts new file mode 100644 index 0000000..4947abf --- /dev/null +++ b/src/routes/browser/proxy.ts @@ -0,0 +1,18 @@ +import { FastifyInstance } from "fastify"; +import { IProxySchema, ProxySchema } from "../../types/requests/browser"; +import axios from "../../types/axios"; + +export default async function proxyRoute(fastify: FastifyInstance) { + fastify.get( + "/proxy", + { schema: ProxySchema }, + async (request, reply) => { + const response = await axios.get(request.query.url); + const mime: string | undefined = response.headers["content-type"]?.toString(); + const clen: string | undefined = response.headers["content-length"]?.toString(); + mime && reply.header("Content-Type", mime); + clen && reply.header("Content-Length", Number(clen)); + return reply.send(response.data); + } + ); +} diff --git a/src/types/requests/browser.ts b/src/types/requests/browser.ts index e9891ed..4507b11 100644 --- a/src/types/requests/browser.ts +++ b/src/types/requests/browser.ts @@ -6,10 +6,9 @@ export interface IGetSchema { Querystring: IGetQuerySchema; } -export const indexSchema = { - produces: ["text/html"], - hide: true -}; +export interface IProxySchema { + Querystring: IProxyQuerySchema; +} export const getQuerySchema = { type: "object", @@ -32,9 +31,32 @@ export const getQuerySchema = { } as const; export type IGetQuerySchema = FromSchema; +export const proxyQuerySchema = { + type: "object", + required: ["url"], + properties: { + url: { + type: "string", + description: "URL", + }, + } +} as const; +export type IProxyQuerySchema = FromSchema; + +export const indexSchema = { + hide: true, + produces: ["text/html"], +}; + export const GetSchema: FastifySchema = { description: "Get page", hide: true, querystring: getQuerySchema, produces: ["text/html", "text/plain"], }; + +export const ProxySchema: FastifySchema = { + description: "Proxy resource", + hide: true, + querystring: proxyQuerySchema, +} diff --git a/src/utils/generate.ts b/src/utils/generate.ts index 4d5a006..a275585 100644 --- a/src/utils/generate.ts +++ b/src/utils/generate.ts @@ -6,7 +6,7 @@ export function generateRequestUrl( return new URL(`${protocol}://${host}${originalUrl}`); } -export function generateProxyUrl( +export function generateParserUrl( requestUrl: URL, href: string, engine?: string, @@ -22,3 +22,11 @@ export function generateProxyUrl( return `${requestUrl.origin}/${redirect_url}${urlParam}${engineParam}${hash}`; } + +export function generateProxyUrl( + requestUrl: URL, + href: string, +): string { + const urlParam = `?url=${encodeURIComponent(href)}`; + return `${requestUrl.origin}/proxy${urlParam}`; +} diff --git a/src/utils/replace-href.ts b/src/utils/replace-href.ts new file mode 100644 index 0000000..b964870 --- /dev/null +++ b/src/utils/replace-href.ts @@ -0,0 +1,85 @@ +import { JSDOM } from "jsdom"; +import { generateParserUrl, generateProxyUrl } from "./generate"; +import getConfig from "../config/main"; + +export default function replaceHref( + dom: JSDOM, + requestUrl: URL, + engine?: string, + redirectPath: string = "get", +) { + const doc = dom.window.document; + + const parserUrl = (href: string) => + href.startsWith("http") ? generateParserUrl( + requestUrl, + href, + engine, + redirectPath, + ) : href; + const proxyUrl = (href: string) => + href.startsWith("http") ? generateProxyUrl( + requestUrl, + href, + ) : href; + + modifyLinks( + doc.getElementsByTagName("a"), + "href", + parserUrl, + ); + modifyLinks( + doc.querySelectorAll("frame,iframe"), + "src", + parserUrl, + ); + + if (getConfig().proxy_res) { + modifyLinks( + doc.querySelectorAll("img,image,video,audio,embed,track,source"), + "src", + proxyUrl, + ); + + modifyLinks( + doc.getElementsByTagName("object"), + "data", + proxyUrl, + ); + + const sources = doc.querySelectorAll("source,img"); + for (const source of sources) { + // split srcset by comma + // @ts-ignore + if (!source.srcset) + continue; + // @ts-ignore + source.srcset = source.srcset.split(",").map( + (src: string) => { + // split src by space + const parts = src.trim().split(" "); + try { + // first part is URL + // (srcset="http 200w 1x,...") + parts[0] = proxyUrl(parts[0]); + } catch (_err) { } + // join by space after splitting + return parts.join(" "); + } + ).join(","); // join by comma + } + } +} + +function modifyLinks( + nodeList: NodeListOf | HTMLCollectionOf, + property: string, + generateLink: (value: string) => string, +) { + for (const node of nodeList) { + try { + // @ts-ignore + node[property] = generateLink(node[property]); + } catch (_err) { } + } +} diff --git a/templates/error.ejs b/templates/error.ejs index 127c7da..2bea449 100644 --- a/templates/error.ejs +++ b/templates/error.ejs @@ -15,6 +15,14 @@

txt.

<%= description %>