feat: stackoverflow parser for questions page

This commit is contained in:
Artemy 2023-08-18 20:50:26 +03:00
parent fd6289b410
commit 50338ec07b
6 changed files with 77 additions and 3 deletions

View File

@ -15,6 +15,7 @@ import {
LocalResourceError,
NotHtmlMimetypeError,
} from "../errors/main";
import stackoverflow from "./stackoverflow/main";
export default async function handlePage(
url: string, // remote URL
@ -60,11 +61,13 @@ type EngineFunction = (window: DOMWindow) => Promise<IHandlerOutput>;
export const engines: Engines = {
readability,
google,
stackoverflow,
};
export const engineList: string[] = Object.keys(engines);
const fallback: Engines = {
"stackoverflow.com": engines.stackoverflow,
"www.google.com": engines.google,
"*": engines.readability,
};

View File

@ -0,0 +1,31 @@
import { IHandlerOutput } from "../handler.interface";
import { DOMWindow } from "jsdom";
import { EngineParseError } from "../../errors/main";
import qPostsHandler from "./questions-posts";
export default async function stackoverflow(
window: DOMWindow
): Promise<IHandlerOutput> {
const url = new URL(window.location.href);
const path = url.pathname.split("/").filter((p) => p !== "");
let result: IHandlerOutput = {
content: "",
textContent: "",
title: "",
lang: "",
};
if (path[0] === "questions") {
if (path.length === 3) {
result = await qPostsHandler(window);
} else if (path.length === 1) {
result.content = "questions";
} else {
throw new EngineParseError("Invalid URL [stackoverflow]");
}
}
return result;
}

View File

@ -0,0 +1,9 @@
export default function postParser(el: Element | null): string {
if (!el) {
return "";
}
const body = el.querySelector(".js-post-body")?.innerHTML || "";
const voteCount = el.querySelector(".js-vote-count")?.textContent || "";
return `<h3>${voteCount} votes</h3>${body}`;
}

View File

@ -0,0 +1,26 @@
import { DOMWindow } from "jsdom";
import { IHandlerOutput } from "../handler.interface";
import postParser from "./post-parser";
export default async function qPostsHandler(
window: DOMWindow
): Promise<IHandlerOutput> {
const questionEl = window.document.getElementById("question");
const question = postParser(questionEl);
const title =
window.document.querySelector(".question-hyperlink")?.innerHTML || "";
const allAnswers = [...window.document.querySelectorAll(".answer")];
const answers = allAnswers.map((a) => postParser(a));
return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
"<hr>"
)}`,
textContent: "question",
title,
lang: "en",
};
}

View File

@ -6,7 +6,8 @@
font-size: 0.9rem;
}
.title {
font-weight: 500;
font-weight: 1000;
margin: 1rem;
}
a {
@ -28,11 +29,14 @@ table {
overflow-x: auto;
}
img, picture, video {
img,
picture,
video {
max-width: 100%;
height: auto;
}
frame, iframe {
frame,
iframe {
max-width: 100%;
}

View File

@ -18,6 +18,7 @@
<div class="title">
<%= parsed.title %>
</div>
<hr>
<%- parsed.content %>
</main>
</body>