Mass refactoring and stackoverflow users parser (#83)

* refactor: public config

delete public config, replace with package.json. Update version to 1.6.0 for this pull request.

* fix: searx pagination

* refactor: type system for routes

* refactor: universal redirection

* fix: stackoverflow questions

add No handler Found error

* feat: stackoverflow users parser
This commit is contained in:
Artemy Egorov 2024-02-25 21:17:56 +03:00 committed by GitHub
parent c9f9e48acb
commit b78da40255
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 216 additions and 132 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "txtdot",
"version": "1.5.3",
"version": "1.6.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "txtdot",
"version": "1.5.3",
"version": "1.6.0",
"license": "MIT",
"dependencies": {
"@fastify/static": "^6.12.0",

View File

@ -1,8 +1,8 @@
{
"name": "txtdot",
"version": "1.5.3",
"version": "1.6.0",
"private": true,
"description": "",
"description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts",
"main": "dist/app.js",
"dependencies": {
"@fastify/static": "^6.12.0",

View File

@ -13,10 +13,10 @@ import proxyRoute from './routes/browser/proxy';
import parseRoute from './routes/api/parse';
import rawHtml from './routes/api/raw-html';
import publicConfig from './publicConfig';
import packageJSON from './package';
import errorHandler from './errors/handler';
import getConfig from './config/main';
import searchRoute from './routes/browser/search';
import redirectRoute from './routes/browser/redirect';
class App {
async init() {
@ -46,8 +46,8 @@ class App {
swagger: {
info: {
title: 'TXTDot API',
description: publicConfig.description,
version: publicConfig.version,
description: packageJSON.description,
version: packageJSON.version,
},
},
});
@ -58,7 +58,7 @@ class App {
fastify.register(getRoute);
if (config.search.enabled) {
fastify.register(searchRoute);
fastify.register(redirectRoute);
}
if (config.proxy_res) fastify.register(proxyRoute);

View File

@ -19,6 +19,12 @@ export class EngineParseError extends TxtDotError {
}
}
export class NoHandlerFoundError extends TxtDotError {
constructor(message: string) {
super(404, 'NoHandlerFoundError', `No handler found for: ${message}`);
}
}
export class LocalResourceError extends TxtDotError {
constructor() {
super(403, 'LocalResourceError', 'Proxying local resources is forbidden.');

View File

@ -1,25 +1,29 @@
import Route from 'route-parser';
import { HandlerInput } from './handler-input';
import { IHandlerOutput } from './handler.interface';
import { EngineParseError } from '../errors/main';
import { EngineFunction } from '../types/handlers';
import { NoHandlerFoundError } from '../errors/main';
import { EngineFunction, RouteValues } from '../types/handlers';
interface IRoute {
interface IRoute<TParams extends RouteValues> {
route: Route;
handler: EngineFunction;
handler: EngineFunction<TParams>;
}
export class Engine {
name: string;
domains: string[];
routes: IRoute[] = [];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
routes: IRoute<any>[] = [];
constructor(name: string, domains: string[] = []) {
this.domains = domains;
this.name = name;
}
route(path: string, handler: EngineFunction) {
this.routes.push({ route: new Route(path), handler: handler });
route<TParams extends RouteValues>(
path: string,
handler: EngineFunction<TParams>
) {
this.routes.push({ route: new Route<TParams>(path), handler });
}
async handle(input: HandlerInput): Promise<IHandlerOutput> {
@ -29,10 +33,13 @@ export class Engine {
const match = route.route.match(path);
if (match) {
return await route.handler(input, match);
return await route.handler(input, {
q: match,
reverse: (req) => route.route.reverse(req),
});
}
}
throw new EngineParseError(`No handler for ${path}. [${this.name}]`);
throw new NoHandlerFoundError(`${path}. [${this.name}]`);
}
}

View File

@ -5,14 +5,12 @@ import { Engine } from '../engine';
const ReadabilityEngine = new Engine('Readability');
ReadabilityEngine.route('*path', async (input, req) => {
ReadabilityEngine.route('*path', async (input, ro) => {
const reader = new Readability(input.parseDom().window.document);
const parsed = reader.parse();
if (!parsed) {
throw new EngineParseError(
`Parse error (${req.path}). [${ReadabilityEngine.name}]`
);
throw new EngineParseError(`(${ro.q.path}). [${ReadabilityEngine.name}]`);
}
return {

View File

@ -1,22 +1,22 @@
import { Route } from '../../types/handlers';
import { Engine } from '../engine';
import { HandlerInput } from '../handler-input';
const SearXEngine = new Engine('SearX', ['searx.*']);
SearXEngine.route('/search?q=:search', async (input, req) => {
async function search(
input: HandlerInput,
ro: Route<{ search: string; pageno?: string }>
) {
const document = input.parseDom().window.document;
const search = req.search;
const url = new URL(input.getUrl());
const page = parseInt(url.searchParams.get('pageno') || '1');
const search = ro.q.search;
const page = parseInt(ro.q.pageno || '1');
const page_footer = `${
page !== 1
? `<a href="${url.origin}${url.pathname}?q=${search}&pageno=${
page - 1
}">Previous </a>|`
? `<a href="${ro.reverse({ search, pageno: page - 1 })}">Previous </a>|`
: ''
}<a href="${url.origin}${url.pathname}?q=${search}&pageno=${
page + 1
}"> Next</a>`;
}<a href="${ro.reverse({ search, pageno: page + 1 })}"> Next</a>`;
const articles = Array.from(document.querySelectorAll('.result'));
const articles_parsed = articles.map((a) => {
@ -49,6 +49,9 @@ SearXEngine.route('/search?q=:search', async (input, req) => {
title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
};
});
}
SearXEngine.route('/search?q=:search&pageno=:pageno', search);
SearXEngine.route('/search?q=:search', search);
export default SearXEngine;

View File

@ -1,45 +0,0 @@
import { Engine } from '../engine';
const SOE = new Engine('StackOverflow', [
'stackoverflow.com',
'*.stackoverflow.com',
'*.stackexchange.com',
'askubuntu.com',
'stackapps.com',
'mathoverflow.net',
'superuser.com',
'serverfault.com',
]);
SOE.route('/questions/:id/:slug', async (input, req) => {
const document = input.parseDom().window.document;
const questionEl = document.getElementById('question');
const question = postParser(questionEl);
const title = document.querySelector('.question-hyperlink')?.innerHTML || '';
const allAnswers = [...document.querySelectorAll('.answer')];
const answers = allAnswers.map((a) => postParser(a));
return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
'<hr>'
)}`,
textContent: `${req.id}/${req.slug}\n`,
title,
lang: 'en',
};
});
function postParser(el: Element | null): string {
if (!el) {
return '';
}
const body = el.querySelector('.js-post-body')?.innerHTML || '';
const voteCount = el.querySelector('.js-vote-count')?.textContent || '';
return `<h3>${voteCount} votes</h3>${body}`;
}
export default SOE;

View File

@ -0,0 +1,18 @@
import { Engine } from '../../engine';
import questions from './questions';
import users from './users';
const soEngine = new Engine('StackOverflow', [
'stackoverflow.com',
'*.stackoverflow.com',
'*.stackexchange.com',
'askubuntu.com',
'stackapps.com',
'mathoverflow.net',
'superuser.com',
'serverfault.com',
]);
soEngine.route('/questions/:id/*slug', questions);
soEngine.route('/users/:id/*slug', users);
export default soEngine;

View File

@ -0,0 +1,49 @@
import { Route } from '../../../types/handlers';
import { HandlerInput } from '../../handler-input';
async function questions(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;
const questionEl = document.getElementById('question');
const question = postParser(questionEl);
const title = document.querySelector('.question-hyperlink')?.innerHTML || '';
const allAnswers = [...document.querySelectorAll('.answer')];
const answers = allAnswers.map((a) => postParser(a));
return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
'<hr>'
)}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title,
lang: document.documentElement.lang,
};
}
function postParser(el: Element | null): string {
if (!el) {
return '';
}
const body = el.querySelector('.js-post-body')?.innerHTML || '';
const voteCount = el.querySelector('.js-vote-count')?.textContent || '';
const footer = [...el.querySelectorAll('.post-signature')].map((el) => {
const userName = el.querySelector('.user-details a')?.textContent || '';
const userUrl =
(el.querySelector('.user-details a') as HTMLAnchorElement)?.href || '';
const userTitle = el.querySelector('.user-action-time')?.textContent || '';
return `<h4>${userTitle}${
userUrl ? ` by <a href="${userUrl}">${userName}</a>` : ''
}</h4>`;
});
return `<h3>${voteCount} votes</h3>${body}${footer.join('')}`;
}
export default questions;

View File

@ -0,0 +1,37 @@
import { Route } from '../../../types/handlers';
import { HandlerInput } from '../../handler-input';
async function users(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;
const userInfo =
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
'';
const topPosts = [
...(document.querySelector('#js-top-posts > div:nth-child(2)')?.children ||
[]),
]
.map((el) => {
const title = el.querySelector('a')?.textContent || '';
const url = el.querySelector('a')?.href || '';
const votes = el.querySelector('.s-badge__votes')?.textContent || '';
const type =
el.querySelector('.iconAnswer, .iconQuestion')?.textContent || '';
return `<strong>${type} (${votes}) </strong><a href="${url}">${title}</a>`;
})
.join('<br/>');
return {
content: `${userInfo}<hr><h3>Top Posts</h3>${topPosts}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
};
}
export default users;

View File

@ -1,7 +1,7 @@
import { Distributor } from './distributor';
import Readability from './engines/readability';
import SearX from './engines/searx';
import StackOverflow from './engines/stackoverflow';
import StackOverflow from './engines/stackoverflow/main';
const distributor = new Distributor();

3
src/package.ts Normal file
View File

@ -0,0 +1,3 @@
import * as config from '../package.json';
export default config;

View File

@ -1,5 +0,0 @@
export default {
version: '1.5.3',
description:
'txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts',
};

View File

@ -1,6 +1,6 @@
import { FastifyInstance } from 'fastify';
import publicConfig from '../../publicConfig';
import packageJSON from '../../package';
import { engineList } from '../../handlers/main';
import { indexSchema } from '../../types/requests/browser';
@ -9,7 +9,7 @@ import getConfig from '../../config/main';
export default async function indexRoute(fastify: FastifyInstance) {
fastify.get('/', { schema: indexSchema }, async (_, reply) => {
return reply.view('/templates/index.ejs', {
publicConfig,
packageJSON,
engineList,
config: getConfig(),
});

View File

@ -0,0 +1,20 @@
import { FastifyInstance } from 'fastify';
import { redirectSchema, IRedirectSchema } from '../../types/requests/browser';
export default async function redirectRoute(fastify: FastifyInstance) {
fastify.get<IRedirectSchema>(
'/redirect',
{ schema: redirectSchema },
async (request, reply) => {
const params = new URLSearchParams(request.query);
params.delete('url');
reply.redirect(
`/get?url=${encodeURIComponent(
request.query.url + '?' + params.toString()
)}`
);
}
);
}

View File

@ -1,24 +0,0 @@
import { FastifyInstance } from 'fastify';
import { searchSchema, ISearchSchema } from '../../types/requests/browser';
import getConfig from '../../config/main';
export default async function searchRoute(fastify: FastifyInstance) {
fastify.get<ISearchSchema>(
'/search',
{ schema: searchSchema },
async (request, reply) => {
const query = request.query.q;
const config = getConfig();
if (config.search.enabled) {
const searchUrl = `${config.search.searx_url}/search?q=${query}`;
reply.redirect(`/get?url=${encodeURI(searchUrl)}`);
} else {
throw new Error('Search is not enabled');
}
}
);
}

View File

@ -1,3 +1,4 @@
// import Route from 'route-parser';
import { Engine } from '../handlers/engine';
import { HandlerInput } from '../handlers/handler-input';
import { IHandlerOutput } from '../handlers/handler.interface';
@ -6,17 +7,25 @@ export interface Engines {
[key: string]: Engine;
}
export type EngineMatch = {
export type EngineMatch<TParams extends RouteValues> = {
pattern: string | string[];
engine: EngineFunction;
engine: EngineFunction<TParams>;
};
export interface RouteValues {
[key: string]: string;
}
export type EngineFunction = (
export type EngineFunction<TParams extends RouteValues> = (
input: HandlerInput,
req: RouteValues
ro: Route<TParams>
) => Promise<IHandlerOutput>;
export type EnginesMatch = EngineMatch[];
export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[];
export interface Route<TParams extends RouteValues> {
q: TParams;
reverse: (req: { [K in keyof TParams]: string | number | boolean }) =>
| string
| false;
}

View File

@ -10,21 +10,27 @@ export interface IProxySchema {
Querystring: IProxyQuerySchema;
}
export interface ISearchSchema {
Querystring: ISearchQuerySchema;
export interface IRedirectSchema {
Querystring: IRedirectQuerySchema;
}
export const searchQuerySchema = {
export const redirectQuerySchema = {
type: 'object',
required: ['q'],
required: ['url'],
properties: {
q: {
url: {
type: 'string',
description: 'Search query',
description: 'URL to redirect without querystring',
},
},
patternProperties: {
'^(?!url).*$': { type: 'string' },
},
} as const;
export type ISearchQuerySchema = FromSchema<typeof searchQuerySchema>;
export type IRedirectQuerySchema = {
url: string;
[key: string]: string;
};
export const getQuerySchema = {
type: 'object',
@ -64,10 +70,10 @@ export const indexSchema = {
produces: ['text/html'],
};
export const searchSchema: FastifySchema = {
description: 'Search redirection page',
export const redirectSchema: FastifySchema = {
description: 'Universal redirection page',
hide: true,
querystring: searchQuerySchema,
querystring: redirectQuerySchema,
};
export const GetSchema: FastifySchema = {

View File

@ -10,13 +10,14 @@
<span>Search</span>
</label>
<form action="/search" method="get" class="input-grid main-form-search">
<form action="/redirect" method="get" class="input-grid main-form-search">
<div class="input">
<input type="text" name="q" id="search" placeholder="Search">
</div>
<div class="input">
<input type="submit" id="submit" class="button" value="Go">
</div>
<input type="hidden" name="url" value="<%= config.search.searx_url %>/search"/>
</form>
<% } %>

View File

@ -4,9 +4,10 @@
<%
if (config.search.enabled) {
%>
<form class="form-search" action="/search" method="get">
<form class="form-search" action="/redirect" method="get">
<input type="text" name="q" id="search" placeholder="Search">
<input class="button" type="submit" value="Go"/>
<input type="hidden" name="url" value="<%= config.search.searx_url %>/search"/>
</form>
<%
}

View File

@ -4,7 +4,7 @@
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="description" content="<%= publicConfig.description %>">
<meta name="description" content="<%= packageJSON.description %>">
<title>txt. main page</title>
<link rel="stylesheet" href="/static/common.css">
<link rel="stylesheet" href="/static/index.css">
@ -16,11 +16,11 @@
<header>
<h1>txt<span class="dot">.</span></h1>
<div class="menu">
<a href="https://github.com/TxtDot/txtdot/releases/latest" class="button secondary">v<%= publicConfig.version %></a>
<a href="https://github.com/TxtDot/txtdot/releases/latest" class="button secondary">v<%= packageJSON.version %></a>
<a href="https://github.com/txtdot/txtdot" class="button secondary">GitHub</a>
<a href="https://txtdot.github.io/documentation" class="button secondary">Docs</a>
</div>
<p><%= publicConfig.description %></p>
<p><%= packageJSON.description %></p>
</header>
<%- include('./components/form-main.ejs') %>
</main>

View File

@ -39,7 +39,7 @@
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "resolveJsonModule": true, /* Enable importing .json files. */
"resolveJsonModule": true /* Enable importing .json files. */,
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
@ -55,7 +55,7 @@
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
"outDir": "./dist/src/" /* Specify an output folder for all emitted files. */,
"outDir": "./dist/" /* Specify an output folder for all emitted files. */,
// "removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */