Merge pull request #156 from TxtDot/engines-refactor

Add jsx support to engines plugins
This commit is contained in:
Artemy Egorov 2024-05-13 18:43:18 +03:00 committed by GitHub
commit e4402c5448
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 4102 additions and 3098 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@txtdot/plugins",
"version": "1.1.1",
"version": "2.0.0",
"description": "Official txtdot plugins",
"main": "dist/lib.js",
"types": "dist/lib.d.ts",
@ -19,6 +19,7 @@
"license": "MIT",
"dependencies": {
"@mozilla/readability": "^0.5.0",
"linkedom": "^0.18.0",
"@txtdot/sdk": "workspace:*"
},
"devDependencies": {

View File

@ -0,0 +1,41 @@
import { JSX } from '@txtdot/sdk';
export function PageFooter({
page,
previous,
next,
}: {
page: number;
previous: string | false;
next: string | false;
}) {
return (
<>
{page !== 1 ? (
<>
<a href={previous}>Previous </a> |
</>
) : (
<></>
)}
<a href={next}> Next</a>
</>
);
}
export function ResultItem({
url,
title,
content,
}: {
url: string;
title: string;
content: string;
}) {
return (
<>
<a href={url}>{title}</a>
<p>{content}</p>
</>
);
}

View File

@ -1,7 +1,7 @@
import { Readability as OReadability } from '@mozilla/readability';
import { EngineParseError } from '@txtdot/sdk/dist/types/errors';
import { Engine } from '@txtdot/sdk';
import { Engine, EngineParseError } from '@txtdot/sdk';
import { parseHTML } from 'linkedom';
const Readability = new Engine(
'Readability',
@ -10,7 +10,7 @@ const Readability = new Engine(
);
Readability.route('*path', async (input, ro) => {
const reader = new OReadability(input.parseDom().window.document);
const reader = new OReadability(input.document);
const parsed = reader.parse();
if (!parsed) {

View File

@ -1,5 +1,7 @@
import { Engine } from '@txtdot/sdk';
import { HandlerInput, Route } from '@txtdot/sdk/dist/types/handler';
import { Engine, JSX } from '@txtdot/sdk';
import { HandlerInput, Route } from '@txtdot/sdk';
import { parseHTML } from 'linkedom';
import { PageFooter, ResultItem } from '../components/searchers';
const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [
'searx.*',
@ -9,17 +11,23 @@ async function search(
input: HandlerInput,
ro: Route<{ search: string; pageno?: string }>
) {
const document = input.parseDom().window.document;
const document = input.document;
const search = ro.q.search;
const page = parseInt(ro.q.pageno || '1');
const page_footer = `${
page !== 1
? `<a href="${ro.reverse({ search, pageno: page - 1 })}">Previous </a>|`
: ''
}<a href="${ro.reverse({ search, pageno: page + 1 })}"> Next</a>`;
let previous: string | false;
let next: string | false;
if (ro.q.pageno) {
previous = ro.reverse({ search, pageno: page - 1 });
next = ro.reverse({ search, pageno: page + 1 });
} else {
previous = false;
next = `/search?q=${search}&pageno=${page + 1}`;
}
const articles = Array.from(document.querySelectorAll('.result'));
const articles_parsed = articles.map((a) => {
const parsed = {
url:
@ -34,21 +42,24 @@ async function search(
};
return {
html: `<a href="${parsed.url}">${parsed.title}</a><p>${parsed.content}</p><hr>`,
html: <ResultItem {...parsed} />,
text: `${parsed.title} (${parsed.url})\n${parsed.content}\n---\n\n`,
};
});
const content = `${articles_parsed
.map((a) => a.html)
.join('')}${page_footer}`;
const content = (
<>
{articles_parsed.map((a) => a.html).join('')}
<PageFooter page={page} previous={previous} next={next} />
</>
);
const textContent = articles_parsed.map((a) => a.text).join('');
return {
content,
content: content,
textContent,
title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
};
}

View File

@ -1,10 +1,11 @@
import { HandlerInput, Route } from '@txtdot/sdk/dist/types/handler';
import { HandlerInput, Route } from '@txtdot/sdk';
import { JSX } from '@txtdot/sdk';
async function questions(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;
const document = input.document;
const questionEl = document.getElementById('question');
const question = postParser(questionEl);
@ -15,12 +16,15 @@ async function questions(
const answers = allAnswers.map((a) => postParser(a));
return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
'<hr>'
)}`,
textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO
content: (
<>
{question}
<hr />
{answers.length} answers <hr />
{answers.join(<hr />)}
</>
),
title,
lang: document.documentElement.lang,
};
}
@ -37,12 +41,27 @@ function postParser(el: Element | null): string {
(el.querySelector('.user-details a') as HTMLAnchorElement)?.href || '';
const userTitle = el.querySelector('.user-action-time')?.textContent || '';
return `<h4>${userTitle}${
userUrl ? ` by <a href="${userUrl}">${userName}</a>` : ''
}</h4>`;
return (
<h4>
{userTitle}
{userUrl ? (
<>
by <a href={userUrl}>{userName}</a>
</>
) : (
<></>
)}
</h4>
);
});
return `<h3>${voteCount} votes</h3>${body}${footer.join('')}`;
return (
<>
<h3>{voteCount} votes</h3>
{body}
{footer.join('')}
</>
);
}
export default questions;

View File

@ -1,10 +1,11 @@
import { HandlerInput, Route } from '@txtdot/sdk/dist/types/handler';
import { HandlerInput, Route } from '@txtdot/sdk';
import { JSX } from '@txtdot/sdk';
async function users(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;
const document = input.document;
const userInfo =
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
@ -21,15 +22,28 @@ async function users(
const type =
el.querySelector('.iconAnswer, .iconQuestion')?.textContent || '';
return `<strong>${type} (${votes}) </strong><a href="${url}">${title}</a>`;
return (
<>
<strong>
{type} ({votes}){' '}
</strong>
<a href={url}>{title}</a>
</>
);
})
.join('<br/>');
.join(<br />);
return {
content: `${userInfo}<hr><h3>Top Posts</h3>${topPosts}`,
content: (
<>
{userInfo}
<hr />
<h3>Top Posts</h3>
{topPosts}
</>
),
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
};
}

View File

@ -13,13 +13,13 @@
/* Language and Environment */
"target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
"jsx": "react" /* Specify what JSX code is generated. */,
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
"reactNamespace": "JSX" /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */,
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */

View File

@ -1,6 +1,6 @@
{
"name": "@txtdot/sdk",
"version": "1.1.1",
"version": "2.0.0",
"description": "SDK for creating plugins for TxtDot",
"main": "dist/lib.js",
"types": "dist/lib.d.ts",

View File

@ -2,9 +2,9 @@ import Route from 'route-parser';
import {
HandlerInput,
IHandlerOutput,
EngineFunction,
RouteValues,
EngineOutput,
} from './types/handler';
import { NoHandlerFoundError } from './types/errors';
@ -33,7 +33,7 @@ export class Engine {
this.routes.push({ route: new Route<TParams>(path), handler });
}
async handle(input: HandlerInput): Promise<IHandlerOutput> {
async handle(input: HandlerInput): Promise<EngineOutput> {
const url = new URL(input.getUrl());
const path = url.pathname + url.search + url.hash;
for (const route of this.routes) {

35
packages/sdk/src/jsx.ts Normal file
View File

@ -0,0 +1,35 @@
// eslint-disable-next-line @typescript-eslint/no-namespace
export namespace JSX {
export type Element = string;
export interface IntrinsicElements {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[elemName: string]: any;
}
}
export function createElement(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
name: any,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
props: { [id: string]: any },
...content: string[]
) {
if (typeof name === 'string') {
props = props || {};
const propsstr = Object.keys(props)
.map((key) => {
const value = props[key];
if (key === 'className') return `class=${value}`;
else return `${key}=${value}`;
})
.join(' ');
return content.length === 0
? `<${name} ${propsstr}/>`
: `<${name} ${propsstr}>${content.join('')}</${name}>`;
} else if (typeof name === 'function') {
return name(props, ...content);
} else {
return content.join('');
}
}

View File

@ -1,3 +1,38 @@
import { Engine } from './engine';
export { Engine };
import {
EngineParseError,
NoHandlerFoundError,
TxtDotError,
} from './types/errors';
import {
EngineFunction,
EngineMatch,
Engines,
RouteValues,
EnginesMatch,
HandlerInput,
HandlerOutput,
Route,
handlerSchema,
} from './types/handler';
import * as JSX from './jsx';
export {
Engine,
EngineParseError,
NoHandlerFoundError,
TxtDotError,
EngineFunction,
EngineMatch,
Engines,
RouteValues,
EnginesMatch,
HandlerInput,
HandlerOutput,
Route,
handlerSchema,
JSX,
};

View File

@ -4,7 +4,7 @@ import { Engine } from '../engine';
export class HandlerInput {
private data: string;
private url: string;
private dom?: Window;
private window?: Window;
constructor(data: string, url: string) {
this.data = data;
@ -15,19 +15,26 @@ export class HandlerInput {
return this.url;
}
parseDom(): Window {
if (this.dom) {
return this.dom;
get document(): Document {
if (this.window) {
return this.window.document;
}
this.dom = parseHTML(this.data);
return this.dom;
this.window = parseHTML(this.data);
return this.window.document;
}
}
export interface IHandlerOutput {
export interface HandlerOutput {
content: string;
textContent: string;
title: string;
lang: string;
}
export interface EngineOutput {
content: string;
textContent?: string;
title?: string;
lang?: string;
}
@ -66,7 +73,7 @@ export interface RouteValues {
export type EngineFunction<TParams extends RouteValues> = (
input: HandlerInput,
ro: Route<TParams>
) => Promise<IHandlerOutput>;
) => Promise<EngineOutput>;
export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[];

View File

@ -1,6 +1,6 @@
{
"name": "@txtdot/server",
"version": "1.7.0",
"version": "1.8.0",
"private": true,
"description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts",
"main": "dist/app.js",

View File

@ -5,11 +5,11 @@ import { Readable } from 'stream';
import { NotHtmlMimetypeError } from './errors/main';
import { decodeStream, parseEncodingName } from './utils/http';
import replaceHref from './utils/replace-href';
import { parseHTML } from 'linkedom';
import { Engine } from '@txtdot/sdk';
import { HandlerInput, IHandlerOutput } from '@txtdot/sdk/dist/types/handler';
import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
import config from './config';
import { parseHTML } from 'linkedom';
interface IEngineId {
[key: string]: number;
@ -32,7 +32,7 @@ export class Distributor {
requestUrl: URL, // proxy URL
engineName?: string,
redirectPath: string = 'get'
): Promise<IHandlerOutput> {
): Promise<HandlerOutput> {
const urlObj = new URL(remoteUrl);
const webder_url = config.env.third_party.webder_url;
@ -52,6 +52,7 @@ export class Distributor {
}
const engine = this.getFallbackEngine(urlObj.hostname, engineName);
const output = await engine.handle(
new HandlerInput(
await decodeStream(data, parseEncodingName(mime)),
@ -59,15 +60,28 @@ export class Distributor {
)
);
const dom = parseHTML(output.content);
// post-process
// TODO: generate dom in handler and not parse here twice
const dom = parseHTML(output.content);
replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath);
replaceHref(
dom.document,
requestUrl,
new URL(remoteUrl),
engineName,
redirectPath
);
const purify = DOMPurify(dom.window);
output.content = purify.sanitize(dom.document.toString());
const purify = DOMPurify(dom);
const content = purify.sanitize(dom.document.toString());
return output;
return {
content,
textContent:
output.textContent || dom.document.documentElement.textContent || '',
title: output.title || dom.document.title,
lang: output.lang || dom.document.documentElement.lang,
};
}
getFallbackEngine(host: string, specified?: string): Engine {

View File

@ -2,7 +2,7 @@ import { FastifyReply, FastifyRequest } from 'fastify';
import { NotHtmlMimetypeError } from './main';
import { getFastifyError } from './validation';
import { TxtDotError } from '@txtdot/sdk/dist/types/errors';
import { TxtDotError } from '@txtdot/sdk';
import { IGetSchema } from '../types/requests/browser';
import config from '../config';

View File

@ -1,5 +1,5 @@
import config from '../config';
import { TxtDotError } from '@txtdot/sdk/dist/types/errors';
import { TxtDotError } from '@txtdot/sdk';
export class LocalResourceError extends TxtDotError {
constructor() {

View File

@ -2,7 +2,7 @@ import { FastifySchema, FastifyRequest } from 'fastify';
import { IApiError, errorResponseSchema } from '../../errors/api';
import { engineList } from '../../plugin_manager';
import { FromSchema } from 'json-schema-to-ts';
import { handlerSchema } from '@txtdot/sdk/dist/types/handler';
import { handlerSchema } from '@txtdot/sdk';
export interface IApiResponse<T> {
data?: T;

View File

@ -2,13 +2,12 @@ import config from '../config';
import { generateParserUrl, generateProxyUrl } from './generate';
export default function replaceHref(
dom: Window,
doc: Document,
requestUrl: URL,
remoteUrl: URL,
engine?: string,
redirectPath: string = 'get'
) {
const doc: Document = dom.window.document;
const parserUrl = (href: string) =>
generateParserUrl(requestUrl, remoteUrl, href, engine, redirectPath);

File diff suppressed because it is too large Load Diff