diff --git a/packages/plugins/package.json b/packages/plugins/package.json index 2f1a8cd..8958ba5 100644 --- a/packages/plugins/package.json +++ b/packages/plugins/package.json @@ -1,6 +1,6 @@ { "name": "@txtdot/plugins", - "version": "1.1.1", + "version": "2.0.0", "description": "Official txtdot plugins", "main": "dist/lib.js", "types": "dist/lib.d.ts", @@ -19,6 +19,7 @@ "license": "MIT", "dependencies": { "@mozilla/readability": "^0.5.0", + "linkedom": "^0.18.0", "@txtdot/sdk": "workspace:*" }, "devDependencies": { diff --git a/packages/plugins/src/engines/readability.ts b/packages/plugins/src/engines/readability.ts index 28dcfe0..6b064af 100644 --- a/packages/plugins/src/engines/readability.ts +++ b/packages/plugins/src/engines/readability.ts @@ -1,6 +1,7 @@ import { Readability as OReadability } from '@mozilla/readability'; import { Engine, EngineParseError } from '@txtdot/sdk'; +import { parseHTML } from 'linkedom'; const Readability = new Engine( 'Readability', @@ -9,7 +10,7 @@ const Readability = new Engine( ); Readability.route('*path', async (input, ro) => { - const reader = new OReadability(input.parseDom().window.document); + const reader = new OReadability(input.document); const parsed = reader.parse(); if (!parsed) { @@ -17,7 +18,7 @@ Readability.route('*path', async (input, ro) => { } return { - content: parsed.content, + document: parseHTML(parsed.content).document, textContent: parsed.textContent, title: parsed.title, lang: parsed.lang, diff --git a/packages/plugins/src/engines/searx.ts b/packages/plugins/src/engines/searx.ts index 8f9bcd2..5446780 100644 --- a/packages/plugins/src/engines/searx.ts +++ b/packages/plugins/src/engines/searx.ts @@ -1,5 +1,6 @@ import { Engine } from '@txtdot/sdk'; import { HandlerInput, Route } from '@txtdot/sdk'; +import { parseHTML } from 'linkedom'; const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [ 'searx.*', @@ -9,7 +10,7 @@ async function search( input: HandlerInput, ro: Route<{ search: string; pageno?: string }> ) { - const document = input.parseDom().window.document; + const document = input.document; const search = ro.q.search; const page = parseInt(ro.q.pageno || '1'); @@ -45,7 +46,7 @@ async function search( const textContent = articles_parsed.map((a) => a.text).join(''); return { - content, + document: parseHTML(content).document, textContent, title: `${search} - Searx - Page ${page}`, lang: document.documentElement.lang, diff --git a/packages/plugins/src/engines/stackoverflow/questions.ts b/packages/plugins/src/engines/stackoverflow/questions.ts index f648768..03b1121 100644 --- a/packages/plugins/src/engines/stackoverflow/questions.ts +++ b/packages/plugins/src/engines/stackoverflow/questions.ts @@ -1,10 +1,11 @@ import { HandlerInput, Route } from '@txtdot/sdk'; +import { parseHTML } from 'linkedom'; async function questions( input: HandlerInput, ro: Route<{ id: string; slug: string }> ) { - const document = input.parseDom().window.document; + const document = input.document; const questionEl = document.getElementById('question'); const question = postParser(questionEl); @@ -15,9 +16,9 @@ async function questions( const answers = allAnswers.map((a) => postParser(a)); return { - content: `${question}
${answers.length} answers
${answers.join( - '
' - )}`, + document: parseHTML( + `${question}
${answers.length} answers
${answers.join('
')}` + ).document, textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO title, lang: document.documentElement.lang, diff --git a/packages/plugins/src/engines/stackoverflow/users.ts b/packages/plugins/src/engines/stackoverflow/users.ts index dc46521..e0d015a 100644 --- a/packages/plugins/src/engines/stackoverflow/users.ts +++ b/packages/plugins/src/engines/stackoverflow/users.ts @@ -1,10 +1,11 @@ import { HandlerInput, Route } from '@txtdot/sdk'; +import { parseHTML } from 'linkedom'; async function users( input: HandlerInput, ro: Route<{ id: string; slug: string }> ) { - const document = input.parseDom().window.document; + const document = input.document; const userInfo = document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent || @@ -26,7 +27,8 @@ async function users( .join('
'); return { - content: `${userInfo}

Top Posts

${topPosts}`, + document: parseHTML(`${userInfo}

Top Posts

${topPosts}`) + .document, textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO title: document.querySelector('title')?.textContent || '', lang: document.documentElement.lang, diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 71f9b95..fbe73a0 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -1,6 +1,6 @@ { "name": "@txtdot/sdk", - "version": "1.1.2", + "version": "2.0.0", "description": "SDK for creating plugins for TxtDot", "main": "dist/lib.js", "types": "dist/lib.d.ts", diff --git a/packages/sdk/src/engine.ts b/packages/sdk/src/engine.ts index 20a9a91..601e647 100644 --- a/packages/sdk/src/engine.ts +++ b/packages/sdk/src/engine.ts @@ -2,9 +2,9 @@ import Route from 'route-parser'; import { HandlerInput, - IHandlerOutput, EngineFunction, RouteValues, + EngineOutput, } from './types/handler'; import { NoHandlerFoundError } from './types/errors'; @@ -33,7 +33,7 @@ export class Engine { this.routes.push({ route: new Route(path), handler }); } - async handle(input: HandlerInput): Promise { + async handle(input: HandlerInput): Promise { const url = new URL(input.getUrl()); const path = url.pathname + url.search + url.hash; for (const route of this.routes) { diff --git a/packages/sdk/src/lib.ts b/packages/sdk/src/lib.ts index 63198f1..023984c 100644 --- a/packages/sdk/src/lib.ts +++ b/packages/sdk/src/lib.ts @@ -13,7 +13,7 @@ import { RouteValues, EnginesMatch, HandlerInput, - IHandlerOutput, + HandlerOutput, Route, handlerSchema, } from './types/handler'; @@ -29,7 +29,7 @@ export { RouteValues, EnginesMatch, HandlerInput, - IHandlerOutput, + HandlerOutput, Route, handlerSchema, }; diff --git a/packages/sdk/src/types/handler.ts b/packages/sdk/src/types/handler.ts index 9af088a..1d842bb 100644 --- a/packages/sdk/src/types/handler.ts +++ b/packages/sdk/src/types/handler.ts @@ -4,7 +4,7 @@ import { Engine } from '../engine'; export class HandlerInput { private data: string; private url: string; - private dom?: Window; + private window?: Window; constructor(data: string, url: string) { this.data = data; @@ -15,23 +15,30 @@ export class HandlerInput { return this.url; } - parseDom(): Window { - if (this.dom) { - return this.dom; + get document(): Document { + if (this.window) { + return this.window.document; } - this.dom = parseHTML(this.data); - return this.dom; + this.window = parseHTML(this.data); + return this.window.document; } } -export interface IHandlerOutput { +export interface HandlerOutput { content: string; textContent: string; title?: string; lang?: string; } +export interface EngineOutput { + document: Document; + textContent?: string; + title?: string; + lang?: string; +} + export const handlerSchema = { type: 'object', properties: { @@ -66,7 +73,7 @@ export interface RouteValues { export type EngineFunction = ( input: HandlerInput, ro: Route -) => Promise; +) => Promise; export type EnginesMatch = EngineMatch[]; diff --git a/packages/server/package.json b/packages/server/package.json index dbe48be..0d95386 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -1,6 +1,6 @@ { "name": "@txtdot/server", - "version": "1.7.0", + "version": "1.8.0", "private": true, "description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts", "main": "dist/app.js", diff --git a/packages/server/src/distributor.ts b/packages/server/src/distributor.ts index 60cc8e6..59c8cbe 100644 --- a/packages/server/src/distributor.ts +++ b/packages/server/src/distributor.ts @@ -5,10 +5,9 @@ import { Readable } from 'stream'; import { NotHtmlMimetypeError } from './errors/main'; import { decodeStream, parseEncodingName } from './utils/http'; import replaceHref from './utils/replace-href'; -import { parseHTML } from 'linkedom'; import { Engine } from '@txtdot/sdk'; -import { HandlerInput, IHandlerOutput } from '@txtdot/sdk'; +import { HandlerInput, HandlerOutput } from '@txtdot/sdk'; import config from './config'; interface IEngineId { @@ -32,7 +31,7 @@ export class Distributor { requestUrl: URL, // proxy URL engineName?: string, redirectPath: string = 'get' - ): Promise { + ): Promise { const urlObj = new URL(remoteUrl); const webder_url = config.env.third_party.webder_url; @@ -61,13 +60,23 @@ export class Distributor { // post-process // TODO: generate dom in handler and not parse here twice - const dom = parseHTML(output.content); - replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath); + replaceHref( + output.document, + requestUrl, + new URL(remoteUrl), + engineName, + redirectPath + ); - const purify = DOMPurify(dom.window); - output.content = purify.sanitize(dom.document.toString()); + const purify = DOMPurify(); + const content = purify.sanitize(output.document.toString()); - return output; + return { + content, + textContent: output.textContent || output.document.textContent || '', + title: output.title, + lang: output.lang, + }; } getFallbackEngine(host: string, specified?: string): Engine { diff --git a/packages/server/src/utils/replace-href.ts b/packages/server/src/utils/replace-href.ts index 4c38e66..0b2fd0e 100644 --- a/packages/server/src/utils/replace-href.ts +++ b/packages/server/src/utils/replace-href.ts @@ -2,13 +2,12 @@ import config from '../config'; import { generateParserUrl, generateProxyUrl } from './generate'; export default function replaceHref( - dom: Window, + doc: Document, requestUrl: URL, remoteUrl: URL, engine?: string, redirectPath: string = 'get' ) { - const doc: Document = dom.window.document; const parserUrl = (href: string) => generateParserUrl(requestUrl, remoteUrl, href, engine, redirectPath); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bb8e7d0..5c881b6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -13,7 +13,7 @@ importers: version: 20.12.11 '@typescript-eslint/eslint-plugin': specifier: ^7.7.0 - version: 7.8.0(@typescript-eslint/parser@7.8.0)(eslint@8.57.0)(typescript@5.4.5) + version: 7.8.0(@typescript-eslint/parser@7.8.0(eslint@8.57.0)(typescript@5.4.5))(eslint@8.57.0)(typescript@5.4.5) '@typescript-eslint/parser': specifier: ^7.7.0 version: 7.8.0(eslint@8.57.0)(typescript@5.4.5) @@ -22,7 +22,7 @@ importers: version: 8.57.0 lerna: specifier: ^8.1.2 - version: 8.1.2 + version: 8.1.2(encoding@0.1.13) prettier: specifier: ^3.1.1 version: 3.2.5 @@ -41,6 +41,9 @@ importers: '@txtdot/sdk': specifier: workspace:* version: link:../sdk + linkedom: + specifier: ^0.18.0 + version: 0.18.0 devDependencies: typescript: specifier: ^5.4.5 @@ -3378,12 +3381,12 @@ snapshots: dependencies: '@sinclair/typebox': 0.27.8 - '@lerna/create@8.1.2(typescript@5.4.5)': + '@lerna/create@8.1.2(encoding@0.1.13)(typescript@5.4.5)': dependencies: '@npmcli/run-script': 7.0.2 '@nx/devkit': 18.3.4(nx@18.3.4) '@octokit/plugin-enterprise-rest': 6.0.1 - '@octokit/rest': 19.0.11 + '@octokit/rest': 19.0.11(encoding@0.1.13) byte-size: 8.1.1 chalk: 4.1.0 clone-deep: 4.0.1 @@ -3413,7 +3416,7 @@ snapshots: make-dir: 4.0.0 minimatch: 3.0.5 multimatch: 5.0.0 - node-fetch: 2.6.7 + node-fetch: 2.6.7(encoding@0.1.13) npm-package-arg: 8.1.1 npm-packlist: 5.1.1 npm-registry-fetch: 14.0.5 @@ -3578,11 +3581,11 @@ snapshots: '@octokit/auth-token@3.0.4': {} - '@octokit/core@4.2.4': + '@octokit/core@4.2.4(encoding@0.1.13)': dependencies: '@octokit/auth-token': 3.0.4 - '@octokit/graphql': 5.0.6 - '@octokit/request': 6.2.8 + '@octokit/graphql': 5.0.6(encoding@0.1.13) + '@octokit/request': 6.2.8(encoding@0.1.13) '@octokit/request-error': 3.0.3 '@octokit/types': 9.3.2 before-after-hook: 2.2.3 @@ -3596,9 +3599,9 @@ snapshots: is-plain-object: 5.0.0 universal-user-agent: 6.0.1 - '@octokit/graphql@5.0.6': + '@octokit/graphql@5.0.6(encoding@0.1.13)': dependencies: - '@octokit/request': 6.2.8 + '@octokit/request': 6.2.8(encoding@0.1.13) '@octokit/types': 9.3.2 universal-user-agent: 6.0.1 transitivePeerDependencies: @@ -3608,19 +3611,19 @@ snapshots: '@octokit/plugin-enterprise-rest@6.0.1': {} - '@octokit/plugin-paginate-rest@6.1.2(@octokit/core@4.2.4)': + '@octokit/plugin-paginate-rest@6.1.2(@octokit/core@4.2.4(encoding@0.1.13))': dependencies: - '@octokit/core': 4.2.4 + '@octokit/core': 4.2.4(encoding@0.1.13) '@octokit/tsconfig': 1.0.2 '@octokit/types': 9.3.2 - '@octokit/plugin-request-log@1.0.4(@octokit/core@4.2.4)': + '@octokit/plugin-request-log@1.0.4(@octokit/core@4.2.4(encoding@0.1.13))': dependencies: - '@octokit/core': 4.2.4 + '@octokit/core': 4.2.4(encoding@0.1.13) - '@octokit/plugin-rest-endpoint-methods@7.2.3(@octokit/core@4.2.4)': + '@octokit/plugin-rest-endpoint-methods@7.2.3(@octokit/core@4.2.4(encoding@0.1.13))': dependencies: - '@octokit/core': 4.2.4 + '@octokit/core': 4.2.4(encoding@0.1.13) '@octokit/types': 10.0.0 '@octokit/request-error@3.0.3': @@ -3629,23 +3632,23 @@ snapshots: deprecation: 2.3.1 once: 1.4.0 - '@octokit/request@6.2.8': + '@octokit/request@6.2.8(encoding@0.1.13)': dependencies: '@octokit/endpoint': 7.0.6 '@octokit/request-error': 3.0.3 '@octokit/types': 9.3.2 is-plain-object: 5.0.0 - node-fetch: 2.6.7 + node-fetch: 2.6.7(encoding@0.1.13) universal-user-agent: 6.0.1 transitivePeerDependencies: - encoding - '@octokit/rest@19.0.11': + '@octokit/rest@19.0.11(encoding@0.1.13)': dependencies: - '@octokit/core': 4.2.4 - '@octokit/plugin-paginate-rest': 6.1.2(@octokit/core@4.2.4) - '@octokit/plugin-request-log': 1.0.4(@octokit/core@4.2.4) - '@octokit/plugin-rest-endpoint-methods': 7.2.3(@octokit/core@4.2.4) + '@octokit/core': 4.2.4(encoding@0.1.13) + '@octokit/plugin-paginate-rest': 6.1.2(@octokit/core@4.2.4(encoding@0.1.13)) + '@octokit/plugin-request-log': 1.0.4(@octokit/core@4.2.4(encoding@0.1.13)) + '@octokit/plugin-rest-endpoint-methods': 7.2.3(@octokit/core@4.2.4(encoding@0.1.13)) transitivePeerDependencies: - encoding @@ -3771,7 +3774,7 @@ snapshots: '@types/trusted-types@2.0.7': {} - '@typescript-eslint/eslint-plugin@7.8.0(@typescript-eslint/parser@7.8.0)(eslint@8.57.0)(typescript@5.4.5)': + '@typescript-eslint/eslint-plugin@7.8.0(@typescript-eslint/parser@7.8.0(eslint@8.57.0)(typescript@5.4.5))(eslint@8.57.0)(typescript@5.4.5)': dependencies: '@eslint-community/regexpp': 4.10.0 '@typescript-eslint/parser': 7.8.0(eslint@8.57.0)(typescript@5.4.5) @@ -3786,6 +3789,7 @@ snapshots: natural-compare: 1.4.0 semver: 7.6.2 ts-api-utils: 1.3.0(typescript@5.4.5) + optionalDependencies: typescript: 5.4.5 transitivePeerDependencies: - supports-color @@ -3798,6 +3802,7 @@ snapshots: '@typescript-eslint/visitor-keys': 7.8.0 debug: 4.3.4 eslint: 8.57.0 + optionalDependencies: typescript: 5.4.5 transitivePeerDependencies: - supports-color @@ -3814,6 +3819,7 @@ snapshots: debug: 4.3.4 eslint: 8.57.0 ts-api-utils: 1.3.0(typescript@5.4.5) + optionalDependencies: typescript: 5.4.5 transitivePeerDependencies: - supports-color @@ -3830,6 +3836,7 @@ snapshots: minimatch: 9.0.4 semver: 7.6.2 ts-api-utils: 1.3.0(typescript@5.4.5) + optionalDependencies: typescript: 5.4.5 transitivePeerDependencies: - supports-color @@ -3909,11 +3916,11 @@ snapshots: indent-string: 4.0.0 ajv-formats@2.1.1(ajv@8.13.0): - dependencies: + optionalDependencies: ajv: 8.13.0 ajv-formats@3.0.1(ajv@8.13.0): - dependencies: + optionalDependencies: ajv: 8.13.0 ajv@6.12.6: @@ -4300,6 +4307,7 @@ snapshots: js-yaml: 4.1.0 parse-json: 5.2.0 path-type: 4.0.0 + optionalDependencies: typescript: 5.4.5 cross-spawn@7.0.3: @@ -5138,13 +5146,13 @@ snapshots: kind-of@6.0.3: {} - lerna@8.1.2: + lerna@8.1.2(encoding@0.1.13): dependencies: - '@lerna/create': 8.1.2(typescript@5.4.5) + '@lerna/create': 8.1.2(encoding@0.1.13)(typescript@5.4.5) '@npmcli/run-script': 7.0.2 '@nx/devkit': 18.3.4(nx@18.3.4) '@octokit/plugin-enterprise-rest': 6.0.1 - '@octokit/rest': 19.0.11 + '@octokit/rest': 19.0.11(encoding@0.1.13) byte-size: 8.1.1 chalk: 4.1.0 clone-deep: 4.0.1 @@ -5180,7 +5188,7 @@ snapshots: make-dir: 4.0.0 minimatch: 3.0.5 multimatch: 5.0.0 - node-fetch: 2.6.7 + node-fetch: 2.6.7(encoding@0.1.13) npm-package-arg: 8.1.1 npm-packlist: 5.1.1 npm-registry-fetch: 14.0.5 @@ -5506,9 +5514,11 @@ snapshots: node-cleanup@2.1.2: {} - node-fetch@2.6.7: + node-fetch@2.6.7(encoding@0.1.13): dependencies: whatwg-url: 5.0.0 + optionalDependencies: + encoding: 0.1.13 node-gyp@10.1.0: dependencies: