import { DbEntry } from '../datas.ts' const domParser = new DOMParser() /** * Construct db by scrapping site. * @param siteUrl - Index page url * @returns DbEntries[] */ export async function fetchDbFromSite(siteUrl: string) { //Index page const index = await fetch(siteUrl, { mode: 'no-cors' }) .then((response) => response.text()) .then((raw) => domParser.parseFromString(raw, 'text/html')) //Get nodes from index page links const nodes = Array.from( index.querySelectorAll('#episodes>a'), ) .map((a) => ({ href: a.href, title: a.innerText })) const db: DbEntry[] = [] //Fill db with nodes for (const node of nodes) { const id = getNodeId(node) const title = node.title const content = node.href const links = await getNodeLinks(node) db.push({ id, title, content, links }) } return db } /** * Determine id from node link href attribute. * @returns node id */ function getNodeId({ href }: { href: string }): number { const { pathname } = new URL(href) return Number.parseInt(pathname.slice(1, -5)) } /** * Get ids of nodes related to current nodes by listing all links in current node web page. * @returns List of ids related to current node. */ async function getNodeLinks({ href }: { href: string }): Promise { const page = await fetch(href, { mode: 'no-cors' }) .then((response) => response.text()) .then((raw) => domParser.parseFromString(raw, 'text/html')) return Array.from(page.querySelectorAll('.cell-p>a')).map( getNodeId, ) }