demo_network_visjs/client/fetch_db_from_site.ts

59 lines
1.5 KiB
TypeScript
Raw Permalink Normal View History

2024-01-23 14:13:26 +01:00
import { DbEntry } from '../datas.ts'
const domParser = new DOMParser()
2024-01-23 14:30:10 +01:00
/**
* Construct db by scrapping site.
* @param siteUrl - Index page url
* @returns DbEntries[]
*/
2024-01-23 14:13:26 +01:00
export async function fetchDbFromSite(siteUrl: string) {
2024-01-23 14:30:10 +01:00
//Index page
2024-01-23 14:13:26 +01:00
const index = await fetch(siteUrl, { mode: 'no-cors' })
.then((response) => response.text())
.then((raw) => domParser.parseFromString(raw, 'text/html'))
2024-01-23 14:30:10 +01:00
//Get nodes from index page links
2024-01-23 14:13:26 +01:00
const nodes = Array.from(
index.querySelectorAll<HTMLAnchorElement>('#episodes>a'),
)
.map((a) => ({ href: a.href, title: a.innerText }))
const db: DbEntry[] = []
2024-01-23 14:30:10 +01:00
//Fill db with nodes
2024-01-23 14:13:26 +01:00
for (const node of nodes) {
const id = getNodeId(node)
const title = node.title
const content = node.href
const links = await getNodeLinks(node)
db.push({ id, title, content, links })
}
return db
}
2024-01-23 14:30:10 +01:00
/**
* Determine id from node link href attribute.
* @returns node id
*/
function getNodeId({ href }: { href: string }): number {
2024-01-23 14:13:26 +01:00
const { pathname } = new URL(href)
return Number.parseInt(pathname.slice(1, -5))
}
2024-01-23 14:30:10 +01:00
/**
* Get ids of nodes related to current nodes by listing all links in current node web page.
* @returns List of ids related to current node.
*/
async function getNodeLinks({ href }: { href: string }): Promise<number[]> {
2024-01-23 14:13:26 +01:00
const page = await fetch(href, { mode: 'no-cors' })
.then((response) => response.text())
.then((raw) => domParser.parseFromString(raw, 'text/html'))
return Array.from(page.querySelectorAll<HTMLAnchorElement>('.cell-p>a')).map(
getNodeId,
)
}