diff --git a/packages/plugins/src/engines/habr.tsx b/packages/plugins/src/engines/habr.tsx
new file mode 100644
index 0000000..1d79441
--- /dev/null
+++ b/packages/plugins/src/engines/habr.tsx
@@ -0,0 +1,13 @@
+import { Engine } from '@txtdot/sdk';
+
+import { JSX } from '@txtdot/sdk';
+
+const Habr = new Engine('Habr', 'Habr parser', ['*']);
+
+Habr.route('*path', async (input, ro) => {
+ return {
+ content:
Test
,
+ };
+});
+
+export default Habr;
diff --git a/packages/plugins/src/engines/index.ts b/packages/plugins/src/engines/index.ts
index b5f8527..ab279b6 100644
--- a/packages/plugins/src/engines/index.ts
+++ b/packages/plugins/src/engines/index.ts
@@ -1,5 +1,6 @@
import StackOverflow from './stackoverflow';
import Readability from './readability';
import SearX from './searx';
+import Habr from './habr';
-export { StackOverflow, Readability, SearX };
+export { StackOverflow, Readability, SearX, Habr };
diff --git a/packages/plugins/src/engines/readability.ts b/packages/plugins/src/engines/readability.ts
index 6b064af..daf3f1c 100644
--- a/packages/plugins/src/engines/readability.ts
+++ b/packages/plugins/src/engines/readability.ts
@@ -18,7 +18,7 @@ Readability.route('*path', async (input, ro) => {
}
return {
- document: parseHTML(parsed.content).document,
+ content: parsed.content,
textContent: parsed.textContent,
title: parsed.title,
lang: parsed.lang,
diff --git a/packages/plugins/src/engines/searx.ts b/packages/plugins/src/engines/searx.ts
index 5446780..c947a1c 100644
--- a/packages/plugins/src/engines/searx.ts
+++ b/packages/plugins/src/engines/searx.ts
@@ -46,7 +46,7 @@ async function search(
const textContent = articles_parsed.map((a) => a.text).join('');
return {
- document: parseHTML(content).document,
+ content: content,
textContent,
title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
diff --git a/packages/plugins/src/engines/stackoverflow/questions.ts b/packages/plugins/src/engines/stackoverflow/questions.ts
index 03b1121..f80f378 100644
--- a/packages/plugins/src/engines/stackoverflow/questions.ts
+++ b/packages/plugins/src/engines/stackoverflow/questions.ts
@@ -16,9 +16,7 @@ async function questions(
const answers = allAnswers.map((a) => postParser(a));
return {
- document: parseHTML(
- `${question}
${answers.length} answers
${answers.join('
')}`
- ).document,
+ content: `${question}
${answers.length} answers
${answers.join('
')}`,
textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO
title,
lang: document.documentElement.lang,
diff --git a/packages/plugins/src/engines/stackoverflow/users.ts b/packages/plugins/src/engines/stackoverflow/users.ts
index e0d015a..180d313 100644
--- a/packages/plugins/src/engines/stackoverflow/users.ts
+++ b/packages/plugins/src/engines/stackoverflow/users.ts
@@ -27,8 +27,7 @@ async function users(
.join('
');
return {
- document: parseHTML(`${userInfo}
Top Posts
${topPosts}`)
- .document,
+ content: `${userInfo}
Top Posts
${topPosts}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
diff --git a/packages/plugins/tsconfig.json b/packages/plugins/tsconfig.json
index 073c804..c810653 100644
--- a/packages/plugins/tsconfig.json
+++ b/packages/plugins/tsconfig.json
@@ -13,13 +13,13 @@
/* Language and Environment */
"target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
- // "jsx": "preserve", /* Specify what JSX code is generated. */
+ "jsx": "react" /* Specify what JSX code is generated. */,
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
- // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
+ "reactNamespace": "JSX" /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */,
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
diff --git a/packages/sdk/src/jsx.ts b/packages/sdk/src/jsx.ts
new file mode 100644
index 0000000..bc85416
--- /dev/null
+++ b/packages/sdk/src/jsx.ts
@@ -0,0 +1,24 @@
+// eslint-disable-next-line @typescript-eslint/no-namespace
+export namespace JSX {
+ export type Element = string;
+ export interface IntrinsicElements {
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ [elemName: string]: any;
+ }
+}
+
+export function createElement(
+ name: string,
+ props: { [id: string]: string },
+ ...content: string[]
+) {
+ props = props || {};
+ const propsstr = Object.keys(props)
+ .map((key) => {
+ const value = props[key];
+ if (key === 'className') return `class=${value}`;
+ else return `${key}=${value}`;
+ })
+ .join(' ');
+ return `<${name} ${propsstr}>${content.join('')}${name}>`;
+}
diff --git a/packages/sdk/src/lib.ts b/packages/sdk/src/lib.ts
index 023984c..31909b6 100644
--- a/packages/sdk/src/lib.ts
+++ b/packages/sdk/src/lib.ts
@@ -18,6 +18,8 @@ import {
handlerSchema,
} from './types/handler';
+import * as JSX from './jsx';
+
export {
Engine,
EngineParseError,
@@ -32,4 +34,5 @@ export {
HandlerOutput,
Route,
handlerSchema,
+ JSX,
};
diff --git a/packages/sdk/src/types/handler.ts b/packages/sdk/src/types/handler.ts
index 1d842bb..cf14a03 100644
--- a/packages/sdk/src/types/handler.ts
+++ b/packages/sdk/src/types/handler.ts
@@ -33,7 +33,7 @@ export interface HandlerOutput {
}
export interface EngineOutput {
- document: Document;
+ content: string;
textContent?: string;
title?: string;
lang?: string;
diff --git a/packages/server/src/distributor.ts b/packages/server/src/distributor.ts
index 59c8cbe..3fbd9c1 100644
--- a/packages/server/src/distributor.ts
+++ b/packages/server/src/distributor.ts
@@ -9,6 +9,7 @@ import replaceHref from './utils/replace-href';
import { Engine } from '@txtdot/sdk';
import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
import config from './config';
+import { parseHTML } from 'linkedom';
interface IEngineId {
[key: string]: number;
@@ -51,6 +52,7 @@ export class Distributor {
}
const engine = this.getFallbackEngine(urlObj.hostname, engineName);
+
const output = await engine.handle(
new HandlerInput(
await decodeStream(data, parseEncodingName(mime)),
@@ -58,22 +60,25 @@ export class Distributor {
)
);
+ const dom = parseHTML(output.content);
+
// post-process
// TODO: generate dom in handler and not parse here twice
replaceHref(
- output.document,
+ dom.document,
requestUrl,
new URL(remoteUrl),
engineName,
redirectPath
);
- const purify = DOMPurify();
- const content = purify.sanitize(output.document.toString());
+ const purify = DOMPurify(dom);
+ const content = purify.sanitize(output.content);
return {
content,
- textContent: output.textContent || output.document.textContent || '',
+ textContent:
+ output.textContent || dom.document.documentElement.textContent || '',
title: output.title,
lang: output.lang,
};