diff --git a/packages/server/src/distributor.ts b/packages/server/src/distributor.ts index 68065a3..bb1a037 100644 --- a/packages/server/src/distributor.ts +++ b/packages/server/src/distributor.ts @@ -63,6 +63,9 @@ export class Distributor { const dom = parseHTML(output.content); + // Get text content before link replacement, because in text format we need original links + const stdTextContent = dom.document.documentElement.textContent; + // post-process // TODO: generate dom in handler and not parse here twice replaceHref( @@ -78,7 +81,7 @@ export class Distributor { const title = output.title || dom.document.title; const lang = output.lang || dom.document.documentElement.lang; const textContent = - html2text(output, dom.document, title) || + html2text(stdTextContent, output, title) || 'Text output cannot be generated.'; return { diff --git a/packages/server/src/utils/html2text.ts b/packages/server/src/utils/html2text.ts index 270e279..d4d60c0 100644 --- a/packages/server/src/utils/html2text.ts +++ b/packages/server/src/utils/html2text.ts @@ -6,9 +6,13 @@ function setTitle(body: string | null, title: string) { return `${title.toUpperCase()}\n${'='.repeat(title.length)}\n\n${body}`; } -export function html2text(output: EngineOutput, doc: Document, title: string) { +export function html2text( + stdTextContent: string | null, + output: EngineOutput, + title: string +) { if (output.textContent) return output.textContent; else if (config.plugin.html2text) return setTitle(config.plugin.html2text(output.content), title); - else return setTitle(doc.documentElement.textContent, title); + else return setTitle(stdTextContent, title); }