diff --git a/packages/plugins/package.json b/packages/plugins/package.json
index 2f1a8cd..8958ba5 100644
--- a/packages/plugins/package.json
+++ b/packages/plugins/package.json
@@ -1,6 +1,6 @@
{
"name": "@txtdot/plugins",
- "version": "1.1.1",
+ "version": "2.0.0",
"description": "Official txtdot plugins",
"main": "dist/lib.js",
"types": "dist/lib.d.ts",
@@ -19,6 +19,7 @@
"license": "MIT",
"dependencies": {
"@mozilla/readability": "^0.5.0",
+ "linkedom": "^0.18.0",
"@txtdot/sdk": "workspace:*"
},
"devDependencies": {
diff --git a/packages/plugins/src/engines/readability.ts b/packages/plugins/src/engines/readability.ts
index 28dcfe0..6b064af 100644
--- a/packages/plugins/src/engines/readability.ts
+++ b/packages/plugins/src/engines/readability.ts
@@ -1,6 +1,7 @@
import { Readability as OReadability } from '@mozilla/readability';
import { Engine, EngineParseError } from '@txtdot/sdk';
+import { parseHTML } from 'linkedom';
const Readability = new Engine(
'Readability',
@@ -9,7 +10,7 @@ const Readability = new Engine(
);
Readability.route('*path', async (input, ro) => {
- const reader = new OReadability(input.parseDom().window.document);
+ const reader = new OReadability(input.document);
const parsed = reader.parse();
if (!parsed) {
@@ -17,7 +18,7 @@ Readability.route('*path', async (input, ro) => {
}
return {
- content: parsed.content,
+ document: parseHTML(parsed.content).document,
textContent: parsed.textContent,
title: parsed.title,
lang: parsed.lang,
diff --git a/packages/plugins/src/engines/searx.ts b/packages/plugins/src/engines/searx.ts
index 8f9bcd2..5446780 100644
--- a/packages/plugins/src/engines/searx.ts
+++ b/packages/plugins/src/engines/searx.ts
@@ -1,5 +1,6 @@
import { Engine } from '@txtdot/sdk';
import { HandlerInput, Route } from '@txtdot/sdk';
+import { parseHTML } from 'linkedom';
const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [
'searx.*',
@@ -9,7 +10,7 @@ async function search(
input: HandlerInput,
ro: Route<{ search: string; pageno?: string }>
) {
- const document = input.parseDom().window.document;
+ const document = input.document;
const search = ro.q.search;
const page = parseInt(ro.q.pageno || '1');
@@ -45,7 +46,7 @@ async function search(
const textContent = articles_parsed.map((a) => a.text).join('');
return {
- content,
+ document: parseHTML(content).document,
textContent,
title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
diff --git a/packages/plugins/src/engines/stackoverflow/questions.ts b/packages/plugins/src/engines/stackoverflow/questions.ts
index f648768..03b1121 100644
--- a/packages/plugins/src/engines/stackoverflow/questions.ts
+++ b/packages/plugins/src/engines/stackoverflow/questions.ts
@@ -1,10 +1,11 @@
import { HandlerInput, Route } from '@txtdot/sdk';
+import { parseHTML } from 'linkedom';
async function questions(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
- const document = input.parseDom().window.document;
+ const document = input.document;
const questionEl = document.getElementById('question');
const question = postParser(questionEl);
@@ -15,9 +16,9 @@ async function questions(
const answers = allAnswers.map((a) => postParser(a));
return {
- content: `${question}
${answers.length} answers
${answers.join(
- '
'
- )}`,
+ document: parseHTML(
+ `${question}
${answers.length} answers
${answers.join('
')}`
+ ).document,
textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO
title,
lang: document.documentElement.lang,
diff --git a/packages/plugins/src/engines/stackoverflow/users.ts b/packages/plugins/src/engines/stackoverflow/users.ts
index dc46521..e0d015a 100644
--- a/packages/plugins/src/engines/stackoverflow/users.ts
+++ b/packages/plugins/src/engines/stackoverflow/users.ts
@@ -1,10 +1,11 @@
import { HandlerInput, Route } from '@txtdot/sdk';
+import { parseHTML } from 'linkedom';
async function users(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
- const document = input.parseDom().window.document;
+ const document = input.document;
const userInfo =
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
@@ -26,7 +27,8 @@ async function users(
.join('
');
return {
- content: `${userInfo}
Top Posts
${topPosts}`,
+ document: parseHTML(`${userInfo}
Top Posts
${topPosts}`)
+ .document,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
diff --git a/packages/sdk/package.json b/packages/sdk/package.json
index 71f9b95..fbe73a0 100644
--- a/packages/sdk/package.json
+++ b/packages/sdk/package.json
@@ -1,6 +1,6 @@
{
"name": "@txtdot/sdk",
- "version": "1.1.2",
+ "version": "2.0.0",
"description": "SDK for creating plugins for TxtDot",
"main": "dist/lib.js",
"types": "dist/lib.d.ts",
diff --git a/packages/sdk/src/engine.ts b/packages/sdk/src/engine.ts
index 20a9a91..601e647 100644
--- a/packages/sdk/src/engine.ts
+++ b/packages/sdk/src/engine.ts
@@ -2,9 +2,9 @@ import Route from 'route-parser';
import {
HandlerInput,
- IHandlerOutput,
EngineFunction,
RouteValues,
+ EngineOutput,
} from './types/handler';
import { NoHandlerFoundError } from './types/errors';
@@ -33,7 +33,7 @@ export class Engine {
this.routes.push({ route: new Route(path), handler });
}
- async handle(input: HandlerInput): Promise {
+ async handle(input: HandlerInput): Promise {
const url = new URL(input.getUrl());
const path = url.pathname + url.search + url.hash;
for (const route of this.routes) {
diff --git a/packages/sdk/src/lib.ts b/packages/sdk/src/lib.ts
index 63198f1..023984c 100644
--- a/packages/sdk/src/lib.ts
+++ b/packages/sdk/src/lib.ts
@@ -13,7 +13,7 @@ import {
RouteValues,
EnginesMatch,
HandlerInput,
- IHandlerOutput,
+ HandlerOutput,
Route,
handlerSchema,
} from './types/handler';
@@ -29,7 +29,7 @@ export {
RouteValues,
EnginesMatch,
HandlerInput,
- IHandlerOutput,
+ HandlerOutput,
Route,
handlerSchema,
};
diff --git a/packages/sdk/src/types/handler.ts b/packages/sdk/src/types/handler.ts
index 9af088a..1d842bb 100644
--- a/packages/sdk/src/types/handler.ts
+++ b/packages/sdk/src/types/handler.ts
@@ -4,7 +4,7 @@ import { Engine } from '../engine';
export class HandlerInput {
private data: string;
private url: string;
- private dom?: Window;
+ private window?: Window;
constructor(data: string, url: string) {
this.data = data;
@@ -15,23 +15,30 @@ export class HandlerInput {
return this.url;
}
- parseDom(): Window {
- if (this.dom) {
- return this.dom;
+ get document(): Document {
+ if (this.window) {
+ return this.window.document;
}
- this.dom = parseHTML(this.data);
- return this.dom;
+ this.window = parseHTML(this.data);
+ return this.window.document;
}
}
-export interface IHandlerOutput {
+export interface HandlerOutput {
content: string;
textContent: string;
title?: string;
lang?: string;
}
+export interface EngineOutput {
+ document: Document;
+ textContent?: string;
+ title?: string;
+ lang?: string;
+}
+
export const handlerSchema = {
type: 'object',
properties: {
@@ -66,7 +73,7 @@ export interface RouteValues {
export type EngineFunction = (
input: HandlerInput,
ro: Route
-) => Promise;
+) => Promise;
export type EnginesMatch = EngineMatch[];
diff --git a/packages/server/package.json b/packages/server/package.json
index dbe48be..0d95386 100644
--- a/packages/server/package.json
+++ b/packages/server/package.json
@@ -1,6 +1,6 @@
{
"name": "@txtdot/server",
- "version": "1.7.0",
+ "version": "1.8.0",
"private": true,
"description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts",
"main": "dist/app.js",
diff --git a/packages/server/src/distributor.ts b/packages/server/src/distributor.ts
index 60cc8e6..59c8cbe 100644
--- a/packages/server/src/distributor.ts
+++ b/packages/server/src/distributor.ts
@@ -5,10 +5,9 @@ import { Readable } from 'stream';
import { NotHtmlMimetypeError } from './errors/main';
import { decodeStream, parseEncodingName } from './utils/http';
import replaceHref from './utils/replace-href';
-import { parseHTML } from 'linkedom';
import { Engine } from '@txtdot/sdk';
-import { HandlerInput, IHandlerOutput } from '@txtdot/sdk';
+import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
import config from './config';
interface IEngineId {
@@ -32,7 +31,7 @@ export class Distributor {
requestUrl: URL, // proxy URL
engineName?: string,
redirectPath: string = 'get'
- ): Promise {
+ ): Promise {
const urlObj = new URL(remoteUrl);
const webder_url = config.env.third_party.webder_url;
@@ -61,13 +60,23 @@ export class Distributor {
// post-process
// TODO: generate dom in handler and not parse here twice
- const dom = parseHTML(output.content);
- replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath);
+ replaceHref(
+ output.document,
+ requestUrl,
+ new URL(remoteUrl),
+ engineName,
+ redirectPath
+ );
- const purify = DOMPurify(dom.window);
- output.content = purify.sanitize(dom.document.toString());
+ const purify = DOMPurify();
+ const content = purify.sanitize(output.document.toString());
- return output;
+ return {
+ content,
+ textContent: output.textContent || output.document.textContent || '',
+ title: output.title,
+ lang: output.lang,
+ };
}
getFallbackEngine(host: string, specified?: string): Engine {
diff --git a/packages/server/src/utils/replace-href.ts b/packages/server/src/utils/replace-href.ts
index 4c38e66..0b2fd0e 100644
--- a/packages/server/src/utils/replace-href.ts
+++ b/packages/server/src/utils/replace-href.ts
@@ -2,13 +2,12 @@ import config from '../config';
import { generateParserUrl, generateProxyUrl } from './generate';
export default function replaceHref(
- dom: Window,
+ doc: Document,
requestUrl: URL,
remoteUrl: URL,
engine?: string,
redirectPath: string = 'get'
) {
- const doc: Document = dom.window.document;
const parserUrl = (href: string) =>
generateParserUrl(requestUrl, remoteUrl, href, engine, redirectPath);
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index bb8e7d0..5c881b6 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -13,7 +13,7 @@ importers:
version: 20.12.11
'@typescript-eslint/eslint-plugin':
specifier: ^7.7.0
- version: 7.8.0(@typescript-eslint/parser@7.8.0)(eslint@8.57.0)(typescript@5.4.5)
+ version: 7.8.0(@typescript-eslint/parser@7.8.0(eslint@8.57.0)(typescript@5.4.5))(eslint@8.57.0)(typescript@5.4.5)
'@typescript-eslint/parser':
specifier: ^7.7.0
version: 7.8.0(eslint@8.57.0)(typescript@5.4.5)
@@ -22,7 +22,7 @@ importers:
version: 8.57.0
lerna:
specifier: ^8.1.2
- version: 8.1.2
+ version: 8.1.2(encoding@0.1.13)
prettier:
specifier: ^3.1.1
version: 3.2.5
@@ -41,6 +41,9 @@ importers:
'@txtdot/sdk':
specifier: workspace:*
version: link:../sdk
+ linkedom:
+ specifier: ^0.18.0
+ version: 0.18.0
devDependencies:
typescript:
specifier: ^5.4.5
@@ -3378,12 +3381,12 @@ snapshots:
dependencies:
'@sinclair/typebox': 0.27.8
- '@lerna/create@8.1.2(typescript@5.4.5)':
+ '@lerna/create@8.1.2(encoding@0.1.13)(typescript@5.4.5)':
dependencies:
'@npmcli/run-script': 7.0.2
'@nx/devkit': 18.3.4(nx@18.3.4)
'@octokit/plugin-enterprise-rest': 6.0.1
- '@octokit/rest': 19.0.11
+ '@octokit/rest': 19.0.11(encoding@0.1.13)
byte-size: 8.1.1
chalk: 4.1.0
clone-deep: 4.0.1
@@ -3413,7 +3416,7 @@ snapshots:
make-dir: 4.0.0
minimatch: 3.0.5
multimatch: 5.0.0
- node-fetch: 2.6.7
+ node-fetch: 2.6.7(encoding@0.1.13)
npm-package-arg: 8.1.1
npm-packlist: 5.1.1
npm-registry-fetch: 14.0.5
@@ -3578,11 +3581,11 @@ snapshots:
'@octokit/auth-token@3.0.4': {}
- '@octokit/core@4.2.4':
+ '@octokit/core@4.2.4(encoding@0.1.13)':
dependencies:
'@octokit/auth-token': 3.0.4
- '@octokit/graphql': 5.0.6
- '@octokit/request': 6.2.8
+ '@octokit/graphql': 5.0.6(encoding@0.1.13)
+ '@octokit/request': 6.2.8(encoding@0.1.13)
'@octokit/request-error': 3.0.3
'@octokit/types': 9.3.2
before-after-hook: 2.2.3
@@ -3596,9 +3599,9 @@ snapshots:
is-plain-object: 5.0.0
universal-user-agent: 6.0.1
- '@octokit/graphql@5.0.6':
+ '@octokit/graphql@5.0.6(encoding@0.1.13)':
dependencies:
- '@octokit/request': 6.2.8
+ '@octokit/request': 6.2.8(encoding@0.1.13)
'@octokit/types': 9.3.2
universal-user-agent: 6.0.1
transitivePeerDependencies:
@@ -3608,19 +3611,19 @@ snapshots:
'@octokit/plugin-enterprise-rest@6.0.1': {}
- '@octokit/plugin-paginate-rest@6.1.2(@octokit/core@4.2.4)':
+ '@octokit/plugin-paginate-rest@6.1.2(@octokit/core@4.2.4(encoding@0.1.13))':
dependencies:
- '@octokit/core': 4.2.4
+ '@octokit/core': 4.2.4(encoding@0.1.13)
'@octokit/tsconfig': 1.0.2
'@octokit/types': 9.3.2
- '@octokit/plugin-request-log@1.0.4(@octokit/core@4.2.4)':
+ '@octokit/plugin-request-log@1.0.4(@octokit/core@4.2.4(encoding@0.1.13))':
dependencies:
- '@octokit/core': 4.2.4
+ '@octokit/core': 4.2.4(encoding@0.1.13)
- '@octokit/plugin-rest-endpoint-methods@7.2.3(@octokit/core@4.2.4)':
+ '@octokit/plugin-rest-endpoint-methods@7.2.3(@octokit/core@4.2.4(encoding@0.1.13))':
dependencies:
- '@octokit/core': 4.2.4
+ '@octokit/core': 4.2.4(encoding@0.1.13)
'@octokit/types': 10.0.0
'@octokit/request-error@3.0.3':
@@ -3629,23 +3632,23 @@ snapshots:
deprecation: 2.3.1
once: 1.4.0
- '@octokit/request@6.2.8':
+ '@octokit/request@6.2.8(encoding@0.1.13)':
dependencies:
'@octokit/endpoint': 7.0.6
'@octokit/request-error': 3.0.3
'@octokit/types': 9.3.2
is-plain-object: 5.0.0
- node-fetch: 2.6.7
+ node-fetch: 2.6.7(encoding@0.1.13)
universal-user-agent: 6.0.1
transitivePeerDependencies:
- encoding
- '@octokit/rest@19.0.11':
+ '@octokit/rest@19.0.11(encoding@0.1.13)':
dependencies:
- '@octokit/core': 4.2.4
- '@octokit/plugin-paginate-rest': 6.1.2(@octokit/core@4.2.4)
- '@octokit/plugin-request-log': 1.0.4(@octokit/core@4.2.4)
- '@octokit/plugin-rest-endpoint-methods': 7.2.3(@octokit/core@4.2.4)
+ '@octokit/core': 4.2.4(encoding@0.1.13)
+ '@octokit/plugin-paginate-rest': 6.1.2(@octokit/core@4.2.4(encoding@0.1.13))
+ '@octokit/plugin-request-log': 1.0.4(@octokit/core@4.2.4(encoding@0.1.13))
+ '@octokit/plugin-rest-endpoint-methods': 7.2.3(@octokit/core@4.2.4(encoding@0.1.13))
transitivePeerDependencies:
- encoding
@@ -3771,7 +3774,7 @@ snapshots:
'@types/trusted-types@2.0.7': {}
- '@typescript-eslint/eslint-plugin@7.8.0(@typescript-eslint/parser@7.8.0)(eslint@8.57.0)(typescript@5.4.5)':
+ '@typescript-eslint/eslint-plugin@7.8.0(@typescript-eslint/parser@7.8.0(eslint@8.57.0)(typescript@5.4.5))(eslint@8.57.0)(typescript@5.4.5)':
dependencies:
'@eslint-community/regexpp': 4.10.0
'@typescript-eslint/parser': 7.8.0(eslint@8.57.0)(typescript@5.4.5)
@@ -3786,6 +3789,7 @@ snapshots:
natural-compare: 1.4.0
semver: 7.6.2
ts-api-utils: 1.3.0(typescript@5.4.5)
+ optionalDependencies:
typescript: 5.4.5
transitivePeerDependencies:
- supports-color
@@ -3798,6 +3802,7 @@ snapshots:
'@typescript-eslint/visitor-keys': 7.8.0
debug: 4.3.4
eslint: 8.57.0
+ optionalDependencies:
typescript: 5.4.5
transitivePeerDependencies:
- supports-color
@@ -3814,6 +3819,7 @@ snapshots:
debug: 4.3.4
eslint: 8.57.0
ts-api-utils: 1.3.0(typescript@5.4.5)
+ optionalDependencies:
typescript: 5.4.5
transitivePeerDependencies:
- supports-color
@@ -3830,6 +3836,7 @@ snapshots:
minimatch: 9.0.4
semver: 7.6.2
ts-api-utils: 1.3.0(typescript@5.4.5)
+ optionalDependencies:
typescript: 5.4.5
transitivePeerDependencies:
- supports-color
@@ -3909,11 +3916,11 @@ snapshots:
indent-string: 4.0.0
ajv-formats@2.1.1(ajv@8.13.0):
- dependencies:
+ optionalDependencies:
ajv: 8.13.0
ajv-formats@3.0.1(ajv@8.13.0):
- dependencies:
+ optionalDependencies:
ajv: 8.13.0
ajv@6.12.6:
@@ -4300,6 +4307,7 @@ snapshots:
js-yaml: 4.1.0
parse-json: 5.2.0
path-type: 4.0.0
+ optionalDependencies:
typescript: 5.4.5
cross-spawn@7.0.3:
@@ -5138,13 +5146,13 @@ snapshots:
kind-of@6.0.3: {}
- lerna@8.1.2:
+ lerna@8.1.2(encoding@0.1.13):
dependencies:
- '@lerna/create': 8.1.2(typescript@5.4.5)
+ '@lerna/create': 8.1.2(encoding@0.1.13)(typescript@5.4.5)
'@npmcli/run-script': 7.0.2
'@nx/devkit': 18.3.4(nx@18.3.4)
'@octokit/plugin-enterprise-rest': 6.0.1
- '@octokit/rest': 19.0.11
+ '@octokit/rest': 19.0.11(encoding@0.1.13)
byte-size: 8.1.1
chalk: 4.1.0
clone-deep: 4.0.1
@@ -5180,7 +5188,7 @@ snapshots:
make-dir: 4.0.0
minimatch: 3.0.5
multimatch: 5.0.0
- node-fetch: 2.6.7
+ node-fetch: 2.6.7(encoding@0.1.13)
npm-package-arg: 8.1.1
npm-packlist: 5.1.1
npm-registry-fetch: 14.0.5
@@ -5506,9 +5514,11 @@ snapshots:
node-cleanup@2.1.2: {}
- node-fetch@2.6.7:
+ node-fetch@2.6.7(encoding@0.1.13):
dependencies:
whatwg-url: 5.0.0
+ optionalDependencies:
+ encoding: 0.1.13
node-gyp@10.1.0:
dependencies: