59 lines
1.5 KiB
TypeScript
59 lines
1.5 KiB
TypeScript
import { DbEntry } from '../datas.ts'
|
|
|
|
const domParser = new DOMParser()
|
|
|
|
/**
|
|
* Construct db by scrapping site.
|
|
* @param siteUrl - Index page url
|
|
* @returns DbEntries[]
|
|
*/
|
|
export async function fetchDbFromSite(siteUrl: string) {
|
|
//Index page
|
|
const index = await fetch(siteUrl, { mode: 'no-cors' })
|
|
.then((response) => response.text())
|
|
.then((raw) => domParser.parseFromString(raw, 'text/html'))
|
|
|
|
//Get nodes from index page links
|
|
const nodes = Array.from(
|
|
index.querySelectorAll<HTMLAnchorElement>('#episodes>a'),
|
|
)
|
|
.map((a) => ({ href: a.href, title: a.innerText }))
|
|
|
|
const db: DbEntry[] = []
|
|
|
|
//Fill db with nodes
|
|
for (const node of nodes) {
|
|
const id = getNodeId(node)
|
|
const title = node.title
|
|
const content = node.href
|
|
const links = await getNodeLinks(node)
|
|
|
|
db.push({ id, title, content, links })
|
|
}
|
|
|
|
return db
|
|
}
|
|
|
|
/**
|
|
* Determine id from node link href attribute.
|
|
* @returns node id
|
|
*/
|
|
function getNodeId({ href }: { href: string }): number {
|
|
const { pathname } = new URL(href)
|
|
return Number.parseInt(pathname.slice(1, -5))
|
|
}
|
|
|
|
/**
|
|
* Get ids of nodes related to current nodes by listing all links in current node web page.
|
|
* @returns List of ids related to current node.
|
|
*/
|
|
async function getNodeLinks({ href }: { href: string }): Promise<number[]> {
|
|
const page = await fetch(href, { mode: 'no-cors' })
|
|
.then((response) => response.text())
|
|
.then((raw) => domParser.parseFromString(raw, 'text/html'))
|
|
|
|
return Array.from(page.querySelectorAll<HTMLAnchorElement>('.cell-p>a')).map(
|
|
getNodeId,
|
|
)
|
|
}
|