From d477de027a1349bc99d8d8855937f2c074f1d014 Mon Sep 17 00:00:00 2001 From: Artemy Date: Wed, 15 May 2024 16:09:01 +0300 Subject: [PATCH] feat: middlewares feat: highlighter --- packages/plugins/src/engines/readability.ts | 6 +- packages/plugins/src/engines/searx.tsx | 1 - packages/plugins/src/lib.ts | 7 +- .../plugins/src/middlewares/highlight.tsx | 39 ++++++++++ packages/plugins/src/middlewares/index.ts | 3 + packages/sdk/src/engine.ts | 2 +- packages/sdk/src/jsx.ts | 4 +- packages/sdk/src/lib.ts | 6 ++ packages/sdk/src/middleware.ts | 61 +++++++++++++++ packages/sdk/src/types/handler.ts | 32 +++++--- packages/server/src/config/pluginConfig.ts | 3 +- packages/server/src/distributor.ts | 77 ++++++++++++++----- packages/server/src/plugin_manager.ts | 6 +- .../src/routes/browser/configuration.ts | 2 +- packages/server/src/types/pluginConfig.ts | 6 +- 15 files changed, 209 insertions(+), 46 deletions(-) create mode 100644 packages/plugins/src/middlewares/highlight.tsx create mode 100644 packages/plugins/src/middlewares/index.ts create mode 100644 packages/sdk/src/middleware.ts diff --git a/packages/plugins/src/engines/readability.ts b/packages/plugins/src/engines/readability.ts index f35d8f3..2adcdf5 100644 --- a/packages/plugins/src/engines/readability.ts +++ b/packages/plugins/src/engines/readability.ts @@ -1,7 +1,5 @@ import { Readability as OReadability } from '@mozilla/readability'; - -import { Engine, EngineParseError } from '@txtdot/sdk'; -import { parseHTML } from 'linkedom'; +import { Engine, EngineParseError, Route } from '@txtdot/sdk'; const Readability = new Engine( 'Readability', @@ -9,7 +7,7 @@ const Readability = new Engine( ['*'] ); -Readability.route('*path', async (input, ro) => { +Readability.route('*path', async (input, ro: Route<{ path: string }>) => { const reader = new OReadability(input.document); const parsed = reader.parse(); diff --git a/packages/plugins/src/engines/searx.tsx b/packages/plugins/src/engines/searx.tsx index cb7c2d3..2bffe93 100644 --- a/packages/plugins/src/engines/searx.tsx +++ b/packages/plugins/src/engines/searx.tsx @@ -1,6 +1,5 @@ import { Engine, JSX } from '@txtdot/sdk'; import { HandlerInput, Route } from '@txtdot/sdk'; -import { parseHTML } from 'linkedom'; import { PageFooter, ResultItem } from '../components/searchers'; const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [ diff --git a/packages/plugins/src/lib.ts b/packages/plugins/src/lib.ts index a06217c..6093a0a 100644 --- a/packages/plugins/src/lib.ts +++ b/packages/plugins/src/lib.ts @@ -1,15 +1,16 @@ import * as engines from './engines'; - export { engines }; - export const engineList = [ engines.StackOverflow, engines.SearX, engines.Readability, ]; -import { compile } from 'html-to-text'; +import * as middlewares from './middlewares'; +export { middlewares }; +export const middlewareList = [middlewares.Highlight]; +import { compile } from 'html-to-text'; export const html2text = compile({ longWordSplit: { forceWrapOnLimit: true, diff --git a/packages/plugins/src/middlewares/highlight.tsx b/packages/plugins/src/middlewares/highlight.tsx new file mode 100644 index 0000000..72a5bca --- /dev/null +++ b/packages/plugins/src/middlewares/highlight.tsx @@ -0,0 +1,39 @@ +import { Middleware, JSX } from '@txtdot/sdk'; + +const Highlight = new Middleware( + 'highlight', + 'Highlights code with highlight.js', + ['*'] +); + +Highlight.use(async (input, ro, out) => { + if (out.content.indexOf(', + }; + + return out; +}); + +function Highlighter({ content }: { content: string }) { + return ( + <> + + + {content} + + ); +} + +export default Highlight; diff --git a/packages/plugins/src/middlewares/index.ts b/packages/plugins/src/middlewares/index.ts new file mode 100644 index 0000000..9b2b961 --- /dev/null +++ b/packages/plugins/src/middlewares/index.ts @@ -0,0 +1,3 @@ +import Highlight from './highlight'; + +export { Highlight }; diff --git a/packages/sdk/src/engine.ts b/packages/sdk/src/engine.ts index 601e647..e085945 100644 --- a/packages/sdk/src/engine.ts +++ b/packages/sdk/src/engine.ts @@ -34,7 +34,7 @@ export class Engine { } async handle(input: HandlerInput): Promise { - const url = new URL(input.getUrl()); + const url = new URL(input.url); const path = url.pathname + url.search + url.hash; for (const route of this.routes) { const match = route.route.match(path); diff --git a/packages/sdk/src/jsx.ts b/packages/sdk/src/jsx.ts index 81f921f..d5850ba 100644 --- a/packages/sdk/src/jsx.ts +++ b/packages/sdk/src/jsx.ts @@ -24,9 +24,7 @@ export function createElement( }) .join(' '); - return inner.length === 0 - ? `<${name} ${propsstr}/>` - : `<${name} ${propsstr}>${content}`; + return `<${name} ${propsstr}>${content}`; } else if (typeof name === 'function') { return name(props, content); } else { diff --git a/packages/sdk/src/lib.ts b/packages/sdk/src/lib.ts index 31909b6..3250c63 100644 --- a/packages/sdk/src/lib.ts +++ b/packages/sdk/src/lib.ts @@ -1,4 +1,5 @@ import { Engine } from './engine'; +import { Middleware } from './middleware'; import { EngineParseError, @@ -16,17 +17,22 @@ import { HandlerOutput, Route, handlerSchema, + EngineOutput, + MiddleFunction, } from './types/handler'; import * as JSX from './jsx'; export { Engine, + Middleware, EngineParseError, NoHandlerFoundError, TxtDotError, EngineFunction, + MiddleFunction, EngineMatch, + EngineOutput, Engines, RouteValues, EnginesMatch, diff --git a/packages/sdk/src/middleware.ts b/packages/sdk/src/middleware.ts new file mode 100644 index 0000000..6b5c627 --- /dev/null +++ b/packages/sdk/src/middleware.ts @@ -0,0 +1,61 @@ +import Route from 'route-parser'; + +import { + HandlerInput, + RouteValues, + EngineOutput, + MiddleFunction, +} from './types/handler'; + +interface IMiddle { + route: Route; + handler: MiddleFunction; +} + +export class Middleware { + name: string; + description: string; + domains: string[]; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + middles: IMiddle[] = []; + constructor(name: string, description: string, domains: string[] = []) { + this.domains = domains; + this.name = name; + this.description = description; + } + + route( + path: string, + handler: MiddleFunction + ) { + this.middles.push({ route: new Route(path), handler }); + } + + use(handler: MiddleFunction) { + this.middles.push({ route: new Route<{ path: string }>('*path'), handler }); + } + + async handle(input: HandlerInput, out: EngineOutput): Promise { + const url = new URL(input.url); + const path = url.pathname + url.search + url.hash; + + let processed_out = out; + + for (const middle of this.middles) { + const match = middle.route.match(path); + + if (match) { + processed_out = await middle.handler( + input, + { + q: match, + reverse: (req) => middle.route.reverse(req), + }, + out + ); + } + } + + return processed_out; + } +} diff --git a/packages/sdk/src/types/handler.ts b/packages/sdk/src/types/handler.ts index 30b2eb0..6e4fcd7 100644 --- a/packages/sdk/src/types/handler.ts +++ b/packages/sdk/src/types/handler.ts @@ -2,26 +2,30 @@ import { parseHTML } from 'linkedom'; import { Engine } from '../engine'; export class HandlerInput { - private data: string; - private url: string; - private window?: Window; + private _data: string; + private _url: string; + private _window?: Window; constructor(data: string, url: string) { - this.data = data; - this.url = url; + this._data = data; + this._url = url; } - getUrl(): string { - return this.url; + get url(): string { + return this._url; + } + + get data(): string { + return this._data; } get document(): Document { - if (this.window) { - return this.window.document; + if (this._window) { + return this._window.document; } - this.window = parseHTML(this.data); - return this.window.document; + this._window = parseHTML(this._data); + return this._window.document; } } @@ -75,6 +79,12 @@ export type EngineFunction = ( ro: Route ) => Promise; +export type MiddleFunction = ( + input: HandlerInput, + ro: Route, + out: EngineOutput +) => Promise; + export type EnginesMatch = EngineMatch[]; export interface Route { diff --git a/packages/server/src/config/pluginConfig.ts b/packages/server/src/config/pluginConfig.ts index 3fc3dcb..c426f6c 100644 --- a/packages/server/src/config/pluginConfig.ts +++ b/packages/server/src/config/pluginConfig.ts @@ -1,5 +1,5 @@ import { IAppConfig } from '../types/pluginConfig'; -import { engineList, html2text } from '@txtdot/plugins'; +import { engineList, middlewareList, html2text } from '@txtdot/plugins'; /** * Configuration of plugins @@ -7,6 +7,7 @@ import { engineList, html2text } from '@txtdot/plugins'; */ const plugin_config: IAppConfig = { engines: [...engineList], + middlewares: [...middlewareList], html2text, }; diff --git a/packages/server/src/distributor.ts b/packages/server/src/distributor.ts index bb1a037..dafdb4c 100644 --- a/packages/server/src/distributor.ts +++ b/packages/server/src/distributor.ts @@ -6,7 +6,7 @@ import { NotHtmlMimetypeError } from './errors/main'; import { decodeStream, parseEncodingName } from './utils/http'; import replaceHref from './utils/replace-href'; -import { Engine } from '@txtdot/sdk'; +import { Engine, EngineOutput, Middleware } from '@txtdot/sdk'; import { HandlerInput, HandlerOutput } from '@txtdot/sdk'; import config from './config'; import { parseHTML } from 'linkedom'; @@ -18,14 +18,25 @@ interface IEngineId { export class Distributor { engines_id: IEngineId = {}; - fallback: Engine[] = []; - list: string[] = []; + engines_fallback: Engine[] = []; + engines_list: string[] = []; + + middles_id: IEngineId = {}; + middles_fallback: Middleware[] = []; + middles_list: string[] = []; + constructor() {} engine(engine: Engine) { - this.engines_id[engine.name] = this.list.length; - this.fallback.push(engine); - this.list.push(engine.name); + this.engines_id[engine.name] = this.engines_list.length; + this.engines_fallback.push(engine); + this.engines_list.push(engine.name); + } + + middleware(middleware: Middleware) { + this.middles_id[middleware.name] = this.middles_list.length; + this.middles_fallback.push(middleware); + this.middles_list.push(middleware.name); } async handlePage( @@ -54,13 +65,13 @@ export class Distributor { const engine = this.getFallbackEngine(urlObj.hostname, engineName); - const output = await engine.handle( - new HandlerInput( - await decodeStream(data, parseEncodingName(mime)), - remoteUrl - ) + const input = new HandlerInput( + await decodeStream(data, parseEncodingName(mime)), + remoteUrl ); + const output = await engine.handle(input); + const dom = parseHTML(output.content); // Get text content before link replacement, because in text format we need original links @@ -77,15 +88,27 @@ export class Distributor { ); const purify = DOMPurify(dom); - const content = purify.sanitize(dom.document.toString()); - const title = output.title || dom.document.title; - const lang = output.lang || dom.document.documentElement.lang; + const purified_content = purify.sanitize(dom.document.toString()); + + const purified = { + ...output, + content: purified_content, + }; + + const processed = await this.processMiddlewares( + urlObj.hostname, + input, + purified + ); + + const title = processed.title || dom.document.title; + const lang = processed.lang || dom.document.documentElement.lang; const textContent = - html2text(stdTextContent, output, title) || + html2text(stdTextContent, processed, title) || 'Text output cannot be generated.'; return { - content, + content: processed.content, textContent, title, lang, @@ -94,15 +117,31 @@ export class Distributor { getFallbackEngine(host: string, specified?: string): Engine { if (specified) { - return this.fallback[this.engines_id[specified]]; + return this.engines_fallback[this.engines_id[specified]]; } - for (const engine of this.fallback) { + for (const engine of this.engines_fallback) { if (micromatch.isMatch(host, engine.domains)) { return engine; } } - return this.fallback[0]; + return this.engines_fallback[0]; + } + + async processMiddlewares( + host: string, + input: HandlerInput, + output: EngineOutput + ): Promise { + let processed_output = output; + + for (const middle of this.middles_fallback) { + if (micromatch.isMatch(host, middle.domains)) { + processed_output = await middle.handle(input, processed_output); + } + } + + return processed_output; } } diff --git a/packages/server/src/plugin_manager.ts b/packages/server/src/plugin_manager.ts index 7eee703..9d8473c 100644 --- a/packages/server/src/plugin_manager.ts +++ b/packages/server/src/plugin_manager.ts @@ -7,5 +7,9 @@ for (const engine of plugin_config.engines) { distributor.engine(engine); } -export const engineList = distributor.list; +for (const middleware of plugin_config.middlewares || []) { + distributor.middleware(middleware); +} + +export const engineList = distributor.engines_list; export { distributor }; diff --git a/packages/server/src/routes/browser/configuration.ts b/packages/server/src/routes/browser/configuration.ts index e832d53..bbb9bd9 100644 --- a/packages/server/src/routes/browser/configuration.ts +++ b/packages/server/src/routes/browser/configuration.ts @@ -8,7 +8,7 @@ import config from '../../config'; export default async function configurationRoute(fastify: FastifyInstance) { fastify.get('/configuration', { schema: indexSchema }, async (_, reply) => { return reply.view('/templates/configuration.ejs', { - engines: distributor.fallback, + engines: distributor.engines_fallback, config, }); }); diff --git a/packages/server/src/types/pluginConfig.ts b/packages/server/src/types/pluginConfig.ts index 634d48c..d2f0656 100644 --- a/packages/server/src/types/pluginConfig.ts +++ b/packages/server/src/types/pluginConfig.ts @@ -1,4 +1,4 @@ -import { Engine } from '@txtdot/sdk'; +import { Engine, Middleware } from '@txtdot/sdk'; type Html2TextConverter = (html: string) => string; @@ -7,6 +7,10 @@ export interface IAppConfig { * List of engines, ordered */ engines: Engine[]; + /** + * List of middlewares, ordered + */ + middlewares?: Middleware[]; /** * HTML to text converter, if engine doesn't support text */