Merge pull request #156 from TxtDot/engines-refactor

Add jsx support to engines plugins
This commit is contained in:
Artemy Egorov 2024-05-13 18:43:18 +03:00 committed by GitHub
commit e4402c5448
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 4102 additions and 3098 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "@txtdot/plugins", "name": "@txtdot/plugins",
"version": "1.1.1", "version": "2.0.0",
"description": "Official txtdot plugins", "description": "Official txtdot plugins",
"main": "dist/lib.js", "main": "dist/lib.js",
"types": "dist/lib.d.ts", "types": "dist/lib.d.ts",
@ -19,6 +19,7 @@
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@mozilla/readability": "^0.5.0", "@mozilla/readability": "^0.5.0",
"linkedom": "^0.18.0",
"@txtdot/sdk": "workspace:*" "@txtdot/sdk": "workspace:*"
}, },
"devDependencies": { "devDependencies": {

View File

@ -0,0 +1,41 @@
import { JSX } from '@txtdot/sdk';
export function PageFooter({
page,
previous,
next,
}: {
page: number;
previous: string | false;
next: string | false;
}) {
return (
<>
{page !== 1 ? (
<>
<a href={previous}>Previous </a> |
</>
) : (
<></>
)}
<a href={next}> Next</a>
</>
);
}
export function ResultItem({
url,
title,
content,
}: {
url: string;
title: string;
content: string;
}) {
return (
<>
<a href={url}>{title}</a>
<p>{content}</p>
</>
);
}

View File

@ -1,7 +1,7 @@
import { Readability as OReadability } from '@mozilla/readability'; import { Readability as OReadability } from '@mozilla/readability';
import { EngineParseError } from '@txtdot/sdk/dist/types/errors';
import { Engine } from '@txtdot/sdk'; import { Engine, EngineParseError } from '@txtdot/sdk';
import { parseHTML } from 'linkedom';
const Readability = new Engine( const Readability = new Engine(
'Readability', 'Readability',
@ -10,7 +10,7 @@ const Readability = new Engine(
); );
Readability.route('*path', async (input, ro) => { Readability.route('*path', async (input, ro) => {
const reader = new OReadability(input.parseDom().window.document); const reader = new OReadability(input.document);
const parsed = reader.parse(); const parsed = reader.parse();
if (!parsed) { if (!parsed) {

View File

@ -1,5 +1,7 @@
import { Engine } from '@txtdot/sdk'; import { Engine, JSX } from '@txtdot/sdk';
import { HandlerInput, Route } from '@txtdot/sdk/dist/types/handler'; import { HandlerInput, Route } from '@txtdot/sdk';
import { parseHTML } from 'linkedom';
import { PageFooter, ResultItem } from '../components/searchers';
const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [ const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [
'searx.*', 'searx.*',
@ -9,17 +11,23 @@ async function search(
input: HandlerInput, input: HandlerInput,
ro: Route<{ search: string; pageno?: string }> ro: Route<{ search: string; pageno?: string }>
) { ) {
const document = input.parseDom().window.document; const document = input.document;
const search = ro.q.search; const search = ro.q.search;
const page = parseInt(ro.q.pageno || '1'); const page = parseInt(ro.q.pageno || '1');
const page_footer = `${ let previous: string | false;
page !== 1 let next: string | false;
? `<a href="${ro.reverse({ search, pageno: page - 1 })}">Previous </a>|`
: '' if (ro.q.pageno) {
}<a href="${ro.reverse({ search, pageno: page + 1 })}"> Next</a>`; previous = ro.reverse({ search, pageno: page - 1 });
next = ro.reverse({ search, pageno: page + 1 });
} else {
previous = false;
next = `/search?q=${search}&pageno=${page + 1}`;
}
const articles = Array.from(document.querySelectorAll('.result')); const articles = Array.from(document.querySelectorAll('.result'));
const articles_parsed = articles.map((a) => { const articles_parsed = articles.map((a) => {
const parsed = { const parsed = {
url: url:
@ -34,21 +42,24 @@ async function search(
}; };
return { return {
html: `<a href="${parsed.url}">${parsed.title}</a><p>${parsed.content}</p><hr>`, html: <ResultItem {...parsed} />,
text: `${parsed.title} (${parsed.url})\n${parsed.content}\n---\n\n`, text: `${parsed.title} (${parsed.url})\n${parsed.content}\n---\n\n`,
}; };
}); });
const content = `${articles_parsed const content = (
.map((a) => a.html) <>
.join('')}${page_footer}`; {articles_parsed.map((a) => a.html).join('')}
<PageFooter page={page} previous={previous} next={next} />
</>
);
const textContent = articles_parsed.map((a) => a.text).join(''); const textContent = articles_parsed.map((a) => a.text).join('');
return { return {
content, content: content,
textContent, textContent,
title: `${search} - Searx - Page ${page}`, title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
}; };
} }

View File

@ -1,10 +1,11 @@
import { HandlerInput, Route } from '@txtdot/sdk/dist/types/handler'; import { HandlerInput, Route } from '@txtdot/sdk';
import { JSX } from '@txtdot/sdk';
async function questions( async function questions(
input: HandlerInput, input: HandlerInput,
ro: Route<{ id: string; slug: string }> ro: Route<{ id: string; slug: string }>
) { ) {
const document = input.parseDom().window.document; const document = input.document;
const questionEl = document.getElementById('question'); const questionEl = document.getElementById('question');
const question = postParser(questionEl); const question = postParser(questionEl);
@ -15,12 +16,15 @@ async function questions(
const answers = allAnswers.map((a) => postParser(a)); const answers = allAnswers.map((a) => postParser(a));
return { return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join( content: (
'<hr>' <>
)}`, {question}
textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO <hr />
{answers.length} answers <hr />
{answers.join(<hr />)}
</>
),
title, title,
lang: document.documentElement.lang,
}; };
} }
@ -37,12 +41,27 @@ function postParser(el: Element | null): string {
(el.querySelector('.user-details a') as HTMLAnchorElement)?.href || ''; (el.querySelector('.user-details a') as HTMLAnchorElement)?.href || '';
const userTitle = el.querySelector('.user-action-time')?.textContent || ''; const userTitle = el.querySelector('.user-action-time')?.textContent || '';
return `<h4>${userTitle}${ return (
userUrl ? ` by <a href="${userUrl}">${userName}</a>` : '' <h4>
}</h4>`; {userTitle}
{userUrl ? (
<>
by <a href={userUrl}>{userName}</a>
</>
) : (
<></>
)}
</h4>
);
}); });
return `<h3>${voteCount} votes</h3>${body}${footer.join('')}`; return (
<>
<h3>{voteCount} votes</h3>
{body}
{footer.join('')}
</>
);
} }
export default questions; export default questions;

View File

@ -1,10 +1,11 @@
import { HandlerInput, Route } from '@txtdot/sdk/dist/types/handler'; import { HandlerInput, Route } from '@txtdot/sdk';
import { JSX } from '@txtdot/sdk';
async function users( async function users(
input: HandlerInput, input: HandlerInput,
ro: Route<{ id: string; slug: string }> ro: Route<{ id: string; slug: string }>
) { ) {
const document = input.parseDom().window.document; const document = input.document;
const userInfo = const userInfo =
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent || document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
@ -21,15 +22,28 @@ async function users(
const type = const type =
el.querySelector('.iconAnswer, .iconQuestion')?.textContent || ''; el.querySelector('.iconAnswer, .iconQuestion')?.textContent || '';
return `<strong>${type} (${votes}) </strong><a href="${url}">${title}</a>`; return (
<>
<strong>
{type} ({votes}){' '}
</strong>
<a href={url}>{title}</a>
</>
);
}) })
.join('<br/>'); .join(<br />);
return { return {
content: `${userInfo}<hr><h3>Top Posts</h3>${topPosts}`, content: (
<>
{userInfo}
<hr />
<h3>Top Posts</h3>
{topPosts}
</>
),
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '', title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
}; };
} }

View File

@ -13,13 +13,13 @@
/* Language and Environment */ /* Language and Environment */
"target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, "target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */ "jsx": "react" /* Specify what JSX code is generated. */,
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ "reactNamespace": "JSX" /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */,
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */

View File

@ -1,6 +1,6 @@
{ {
"name": "@txtdot/sdk", "name": "@txtdot/sdk",
"version": "1.1.1", "version": "2.0.0",
"description": "SDK for creating plugins for TxtDot", "description": "SDK for creating plugins for TxtDot",
"main": "dist/lib.js", "main": "dist/lib.js",
"types": "dist/lib.d.ts", "types": "dist/lib.d.ts",

View File

@ -2,9 +2,9 @@ import Route from 'route-parser';
import { import {
HandlerInput, HandlerInput,
IHandlerOutput,
EngineFunction, EngineFunction,
RouteValues, RouteValues,
EngineOutput,
} from './types/handler'; } from './types/handler';
import { NoHandlerFoundError } from './types/errors'; import { NoHandlerFoundError } from './types/errors';
@ -33,7 +33,7 @@ export class Engine {
this.routes.push({ route: new Route<TParams>(path), handler }); this.routes.push({ route: new Route<TParams>(path), handler });
} }
async handle(input: HandlerInput): Promise<IHandlerOutput> { async handle(input: HandlerInput): Promise<EngineOutput> {
const url = new URL(input.getUrl()); const url = new URL(input.getUrl());
const path = url.pathname + url.search + url.hash; const path = url.pathname + url.search + url.hash;
for (const route of this.routes) { for (const route of this.routes) {

35
packages/sdk/src/jsx.ts Normal file
View File

@ -0,0 +1,35 @@
// eslint-disable-next-line @typescript-eslint/no-namespace
export namespace JSX {
export type Element = string;
export interface IntrinsicElements {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[elemName: string]: any;
}
}
export function createElement(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
name: any,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
props: { [id: string]: any },
...content: string[]
) {
if (typeof name === 'string') {
props = props || {};
const propsstr = Object.keys(props)
.map((key) => {
const value = props[key];
if (key === 'className') return `class=${value}`;
else return `${key}=${value}`;
})
.join(' ');
return content.length === 0
? `<${name} ${propsstr}/>`
: `<${name} ${propsstr}>${content.join('')}</${name}>`;
} else if (typeof name === 'function') {
return name(props, ...content);
} else {
return content.join('');
}
}

View File

@ -1,3 +1,38 @@
import { Engine } from './engine'; import { Engine } from './engine';
export { Engine }; import {
EngineParseError,
NoHandlerFoundError,
TxtDotError,
} from './types/errors';
import {
EngineFunction,
EngineMatch,
Engines,
RouteValues,
EnginesMatch,
HandlerInput,
HandlerOutput,
Route,
handlerSchema,
} from './types/handler';
import * as JSX from './jsx';
export {
Engine,
EngineParseError,
NoHandlerFoundError,
TxtDotError,
EngineFunction,
EngineMatch,
Engines,
RouteValues,
EnginesMatch,
HandlerInput,
HandlerOutput,
Route,
handlerSchema,
JSX,
};

View File

@ -4,7 +4,7 @@ import { Engine } from '../engine';
export class HandlerInput { export class HandlerInput {
private data: string; private data: string;
private url: string; private url: string;
private dom?: Window; private window?: Window;
constructor(data: string, url: string) { constructor(data: string, url: string) {
this.data = data; this.data = data;
@ -15,19 +15,26 @@ export class HandlerInput {
return this.url; return this.url;
} }
parseDom(): Window { get document(): Document {
if (this.dom) { if (this.window) {
return this.dom; return this.window.document;
} }
this.dom = parseHTML(this.data); this.window = parseHTML(this.data);
return this.dom; return this.window.document;
} }
} }
export interface IHandlerOutput { export interface HandlerOutput {
content: string; content: string;
textContent: string; textContent: string;
title: string;
lang: string;
}
export interface EngineOutput {
content: string;
textContent?: string;
title?: string; title?: string;
lang?: string; lang?: string;
} }
@ -66,7 +73,7 @@ export interface RouteValues {
export type EngineFunction<TParams extends RouteValues> = ( export type EngineFunction<TParams extends RouteValues> = (
input: HandlerInput, input: HandlerInput,
ro: Route<TParams> ro: Route<TParams>
) => Promise<IHandlerOutput>; ) => Promise<EngineOutput>;
export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[]; export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[];

View File

@ -1,6 +1,6 @@
{ {
"name": "@txtdot/server", "name": "@txtdot/server",
"version": "1.7.0", "version": "1.8.0",
"private": true, "private": true,
"description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts", "description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts",
"main": "dist/app.js", "main": "dist/app.js",

View File

@ -5,11 +5,11 @@ import { Readable } from 'stream';
import { NotHtmlMimetypeError } from './errors/main'; import { NotHtmlMimetypeError } from './errors/main';
import { decodeStream, parseEncodingName } from './utils/http'; import { decodeStream, parseEncodingName } from './utils/http';
import replaceHref from './utils/replace-href'; import replaceHref from './utils/replace-href';
import { parseHTML } from 'linkedom';
import { Engine } from '@txtdot/sdk'; import { Engine } from '@txtdot/sdk';
import { HandlerInput, IHandlerOutput } from '@txtdot/sdk/dist/types/handler'; import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
import config from './config'; import config from './config';
import { parseHTML } from 'linkedom';
interface IEngineId { interface IEngineId {
[key: string]: number; [key: string]: number;
@ -32,7 +32,7 @@ export class Distributor {
requestUrl: URL, // proxy URL requestUrl: URL, // proxy URL
engineName?: string, engineName?: string,
redirectPath: string = 'get' redirectPath: string = 'get'
): Promise<IHandlerOutput> { ): Promise<HandlerOutput> {
const urlObj = new URL(remoteUrl); const urlObj = new URL(remoteUrl);
const webder_url = config.env.third_party.webder_url; const webder_url = config.env.third_party.webder_url;
@ -52,6 +52,7 @@ export class Distributor {
} }
const engine = this.getFallbackEngine(urlObj.hostname, engineName); const engine = this.getFallbackEngine(urlObj.hostname, engineName);
const output = await engine.handle( const output = await engine.handle(
new HandlerInput( new HandlerInput(
await decodeStream(data, parseEncodingName(mime)), await decodeStream(data, parseEncodingName(mime)),
@ -59,15 +60,28 @@ export class Distributor {
) )
); );
const dom = parseHTML(output.content);
// post-process // post-process
// TODO: generate dom in handler and not parse here twice // TODO: generate dom in handler and not parse here twice
const dom = parseHTML(output.content); replaceHref(
replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath); dom.document,
requestUrl,
new URL(remoteUrl),
engineName,
redirectPath
);
const purify = DOMPurify(dom.window); const purify = DOMPurify(dom);
output.content = purify.sanitize(dom.document.toString()); const content = purify.sanitize(dom.document.toString());
return output; return {
content,
textContent:
output.textContent || dom.document.documentElement.textContent || '',
title: output.title || dom.document.title,
lang: output.lang || dom.document.documentElement.lang,
};
} }
getFallbackEngine(host: string, specified?: string): Engine { getFallbackEngine(host: string, specified?: string): Engine {

View File

@ -2,7 +2,7 @@ import { FastifyReply, FastifyRequest } from 'fastify';
import { NotHtmlMimetypeError } from './main'; import { NotHtmlMimetypeError } from './main';
import { getFastifyError } from './validation'; import { getFastifyError } from './validation';
import { TxtDotError } from '@txtdot/sdk/dist/types/errors'; import { TxtDotError } from '@txtdot/sdk';
import { IGetSchema } from '../types/requests/browser'; import { IGetSchema } from '../types/requests/browser';
import config from '../config'; import config from '../config';

View File

@ -1,5 +1,5 @@
import config from '../config'; import config from '../config';
import { TxtDotError } from '@txtdot/sdk/dist/types/errors'; import { TxtDotError } from '@txtdot/sdk';
export class LocalResourceError extends TxtDotError { export class LocalResourceError extends TxtDotError {
constructor() { constructor() {

View File

@ -2,7 +2,7 @@ import { FastifySchema, FastifyRequest } from 'fastify';
import { IApiError, errorResponseSchema } from '../../errors/api'; import { IApiError, errorResponseSchema } from '../../errors/api';
import { engineList } from '../../plugin_manager'; import { engineList } from '../../plugin_manager';
import { FromSchema } from 'json-schema-to-ts'; import { FromSchema } from 'json-schema-to-ts';
import { handlerSchema } from '@txtdot/sdk/dist/types/handler'; import { handlerSchema } from '@txtdot/sdk';
export interface IApiResponse<T> { export interface IApiResponse<T> {
data?: T; data?: T;

View File

@ -2,13 +2,12 @@ import config from '../config';
import { generateParserUrl, generateProxyUrl } from './generate'; import { generateParserUrl, generateProxyUrl } from './generate';
export default function replaceHref( export default function replaceHref(
dom: Window, doc: Document,
requestUrl: URL, requestUrl: URL,
remoteUrl: URL, remoteUrl: URL,
engine?: string, engine?: string,
redirectPath: string = 'get' redirectPath: string = 'get'
) { ) {
const doc: Document = dom.window.document;
const parserUrl = (href: string) => const parserUrl = (href: string) =>
generateParserUrl(requestUrl, remoteUrl, href, engine, redirectPath); generateParserUrl(requestUrl, remoteUrl, href, engine, redirectPath);

File diff suppressed because it is too large Load Diff