From 3cee45b591a71d6a3067d896548f8648523d3ecb Mon Sep 17 00:00:00 2001 From: Artemy Date: Mon, 13 May 2024 16:35:36 +0300 Subject: [PATCH] feat(sdk, plugins): jsx support --- packages/plugins/src/engines/habr.tsx | 13 ++++++++++ packages/plugins/src/engines/index.ts | 3 ++- packages/plugins/src/engines/readability.ts | 2 +- packages/plugins/src/engines/searx.ts | 2 +- .../src/engines/stackoverflow/questions.ts | 4 +--- .../src/engines/stackoverflow/users.ts | 3 +-- packages/plugins/tsconfig.json | 4 ++-- packages/sdk/src/jsx.ts | 24 +++++++++++++++++++ packages/sdk/src/lib.ts | 3 +++ packages/sdk/src/types/handler.ts | 2 +- packages/server/src/distributor.ts | 13 ++++++---- 11 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 packages/plugins/src/engines/habr.tsx create mode 100644 packages/sdk/src/jsx.ts diff --git a/packages/plugins/src/engines/habr.tsx b/packages/plugins/src/engines/habr.tsx new file mode 100644 index 0000000..1d79441 --- /dev/null +++ b/packages/plugins/src/engines/habr.tsx @@ -0,0 +1,13 @@ +import { Engine } from '@txtdot/sdk'; + +import { JSX } from '@txtdot/sdk'; + +const Habr = new Engine('Habr', 'Habr parser', ['*']); + +Habr.route('*path', async (input, ro) => { + return { + content:
Test
, + }; +}); + +export default Habr; diff --git a/packages/plugins/src/engines/index.ts b/packages/plugins/src/engines/index.ts index b5f8527..ab279b6 100644 --- a/packages/plugins/src/engines/index.ts +++ b/packages/plugins/src/engines/index.ts @@ -1,5 +1,6 @@ import StackOverflow from './stackoverflow'; import Readability from './readability'; import SearX from './searx'; +import Habr from './habr'; -export { StackOverflow, Readability, SearX }; +export { StackOverflow, Readability, SearX, Habr }; diff --git a/packages/plugins/src/engines/readability.ts b/packages/plugins/src/engines/readability.ts index 6b064af..daf3f1c 100644 --- a/packages/plugins/src/engines/readability.ts +++ b/packages/plugins/src/engines/readability.ts @@ -18,7 +18,7 @@ Readability.route('*path', async (input, ro) => { } return { - document: parseHTML(parsed.content).document, + content: parsed.content, textContent: parsed.textContent, title: parsed.title, lang: parsed.lang, diff --git a/packages/plugins/src/engines/searx.ts b/packages/plugins/src/engines/searx.ts index 5446780..c947a1c 100644 --- a/packages/plugins/src/engines/searx.ts +++ b/packages/plugins/src/engines/searx.ts @@ -46,7 +46,7 @@ async function search( const textContent = articles_parsed.map((a) => a.text).join(''); return { - document: parseHTML(content).document, + content: content, textContent, title: `${search} - Searx - Page ${page}`, lang: document.documentElement.lang, diff --git a/packages/plugins/src/engines/stackoverflow/questions.ts b/packages/plugins/src/engines/stackoverflow/questions.ts index 03b1121..f80f378 100644 --- a/packages/plugins/src/engines/stackoverflow/questions.ts +++ b/packages/plugins/src/engines/stackoverflow/questions.ts @@ -16,9 +16,7 @@ async function questions( const answers = allAnswers.map((a) => postParser(a)); return { - document: parseHTML( - `${question}
${answers.length} answers
${answers.join('
')}` - ).document, + content: `${question}
${answers.length} answers
${answers.join('
')}`, textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO title, lang: document.documentElement.lang, diff --git a/packages/plugins/src/engines/stackoverflow/users.ts b/packages/plugins/src/engines/stackoverflow/users.ts index e0d015a..180d313 100644 --- a/packages/plugins/src/engines/stackoverflow/users.ts +++ b/packages/plugins/src/engines/stackoverflow/users.ts @@ -27,8 +27,7 @@ async function users( .join('
'); return { - document: parseHTML(`${userInfo}

Top Posts

${topPosts}`) - .document, + content: `${userInfo}

Top Posts

${topPosts}`, textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO title: document.querySelector('title')?.textContent || '', lang: document.documentElement.lang, diff --git a/packages/plugins/tsconfig.json b/packages/plugins/tsconfig.json index 073c804..c810653 100644 --- a/packages/plugins/tsconfig.json +++ b/packages/plugins/tsconfig.json @@ -13,13 +13,13 @@ /* Language and Environment */ "target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ - // "jsx": "preserve", /* Specify what JSX code is generated. */ + "jsx": "react" /* Specify what JSX code is generated. */, // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ - // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ + "reactNamespace": "JSX" /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */, // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ diff --git a/packages/sdk/src/jsx.ts b/packages/sdk/src/jsx.ts new file mode 100644 index 0000000..bc85416 --- /dev/null +++ b/packages/sdk/src/jsx.ts @@ -0,0 +1,24 @@ +// eslint-disable-next-line @typescript-eslint/no-namespace +export namespace JSX { + export type Element = string; + export interface IntrinsicElements { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + [elemName: string]: any; + } +} + +export function createElement( + name: string, + props: { [id: string]: string }, + ...content: string[] +) { + props = props || {}; + const propsstr = Object.keys(props) + .map((key) => { + const value = props[key]; + if (key === 'className') return `class=${value}`; + else return `${key}=${value}`; + }) + .join(' '); + return `<${name} ${propsstr}>${content.join('')}`; +} diff --git a/packages/sdk/src/lib.ts b/packages/sdk/src/lib.ts index 023984c..31909b6 100644 --- a/packages/sdk/src/lib.ts +++ b/packages/sdk/src/lib.ts @@ -18,6 +18,8 @@ import { handlerSchema, } from './types/handler'; +import * as JSX from './jsx'; + export { Engine, EngineParseError, @@ -32,4 +34,5 @@ export { HandlerOutput, Route, handlerSchema, + JSX, }; diff --git a/packages/sdk/src/types/handler.ts b/packages/sdk/src/types/handler.ts index 1d842bb..cf14a03 100644 --- a/packages/sdk/src/types/handler.ts +++ b/packages/sdk/src/types/handler.ts @@ -33,7 +33,7 @@ export interface HandlerOutput { } export interface EngineOutput { - document: Document; + content: string; textContent?: string; title?: string; lang?: string; diff --git a/packages/server/src/distributor.ts b/packages/server/src/distributor.ts index 59c8cbe..3fbd9c1 100644 --- a/packages/server/src/distributor.ts +++ b/packages/server/src/distributor.ts @@ -9,6 +9,7 @@ import replaceHref from './utils/replace-href'; import { Engine } from '@txtdot/sdk'; import { HandlerInput, HandlerOutput } from '@txtdot/sdk'; import config from './config'; +import { parseHTML } from 'linkedom'; interface IEngineId { [key: string]: number; @@ -51,6 +52,7 @@ export class Distributor { } const engine = this.getFallbackEngine(urlObj.hostname, engineName); + const output = await engine.handle( new HandlerInput( await decodeStream(data, parseEncodingName(mime)), @@ -58,22 +60,25 @@ export class Distributor { ) ); + const dom = parseHTML(output.content); + // post-process // TODO: generate dom in handler and not parse here twice replaceHref( - output.document, + dom.document, requestUrl, new URL(remoteUrl), engineName, redirectPath ); - const purify = DOMPurify(); - const content = purify.sanitize(output.document.toString()); + const purify = DOMPurify(dom); + const content = purify.sanitize(output.content); return { content, - textContent: output.textContent || output.document.textContent || '', + textContent: + output.textContent || dom.document.documentElement.textContent || '', title: output.title, lang: output.lang, };