卡片召唤师
精华
|
战斗力 鹅
|
回帖 0
注册时间 2017-7-13
|
按 #24 楼思路和 Copilot 聊了聊,简单写了个 Node.js 脚本用来统计信息,姑且抛砖引玉(
- const fs = require('fs'); // 用于文件操作
- const http = require('http'); // 用于 API 调用
- const https = require('https'); // 用于 API 调用
- // 需要获取回帖内容的帖子请写在 forumUrl 中
- // 记录单集链接时请使用 https://bangumi.tv/ep/1296949 或 ep/1296949
- // 请勿频繁使用,以免对论坛服务器造成压力
- async function main() {
- // Step 1: 从网页中获取回帖内容
- const forumUrl = 'https://bbs.saraba1st.com/2b/thread-2236399-1-1.html';
- const sampleContent = await fetchAllPages(forumUrl);
- // 移除 `blockcode` 属性代码块中的所有信息
- const blockCodeContent = sampleContent.replace(/<div class="blockcode">[\s\S]*?<\/div>/g, '');
- // 移除 `href` 属性中的 `ep/` 信息
- const strippedContent = blockCodeContent.replace(/href="[^"]*ep\/\d+[^"]*"/g, '');
- // Step 2: 提取 ep 链接
- const epRegex = /https?:\/\/bangumi\.tv\/ep\/(\d+)|ep\/(\d+)/g;
- const matches = [...strippedContent.matchAll(epRegex)];
- const epCounts = {};
- matches.forEach(match => {
- const epId = match[1] || match[2]; // 提取 id
- epCounts[epId] = (epCounts[epId] || 0) + 1; // 统计出现次数
- });
- // Step 3: 根据统计结果按出现次数排序
- const sortedEpCounts = Object.entries(epCounts).sort((a, b) => b[1] - a[1]);
- // Step 4: 获取每个 ep 的详细信息
- const epDetails = await Promise.all(sortedEpCounts.map(async ([epId, count]) => {
- const epData = await fetchJson(`https://api.bgm.tv/v0/episodes/${epId}`);
- const subjectData = await fetchJson(`https://api.bgm.tv/v0/subjects/${epData.subject_id}`);
- return {
- count,
- subject_name: subjectData.name,
- subject_name_cn: subjectData.name_cn,
- ep: epData.ep,
- epId,
- ep_name: epData.name,
- ep_name_cn: epData.name_cn
- };
- }));
- // Step 5: 输出结果为 CSV 文件
- const csvContent = [
- ['count', 'subject_name', 'subject_name_cn', 'ep', 'ep_name', 'ep_name_cn'],
- ...epDetails.map(detail => [
- detail.count,
- detail.subject_name,
- detail.subject_name_cn,
- detail.ep,
- detail.ep_name,
- detail.ep_name_cn
- ])
- ].map(row => row.join(',')).join('\n');
- console.log(csvContent); // 在输出 CSV 文件前打印内容
- fs.writeFileSync('output.csv', csvContent);
- console.log('CSV file has been generated');
- }
- function fetchJson(url) {
- return new Promise((resolve, reject) => {
- const client = url.startsWith('https') ? https : http;
- const options = {
- headers: {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
- }
- };
- client.get(url, options, res => {
- let data = '';
- res.on('data', chunk => data += chunk);
- res.on('end', () => resolve(JSON.parse(data)));
- }).on('error', reject);
- });
- }
- function fetchHtml(url) {
- return new Promise((resolve, reject) => {
- const client = url.startsWith('https') ? https : http;
- const options = {
- headers: {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
- }
- };
- client.get(url, options, res => {
- let data = '';
- res.on('data', chunk => data += chunk);
- res.on('end', () => resolve(data));
- }).on('error', reject);
- });
- }
- async function fetchAllPages(url) {
- let content = '';
- let currentPage = 1;
- let maxPage = 1;
- while (currentPage <= maxPage) {
- const pageUrl = url.replace(/-\d+-\d+\.html$/, `-${currentPage}-1.html`);
- const pageContent = await fetchHtml(pageUrl);
- content += pageContent;
- // 查找最大页数
- if (currentPage === 1) {
- const pgDivMatch = pageContent.match(/<div class="pg">([\s\S]*?)<\/div>/);
- if (pgDivMatch) {
- const pgDivContent = pgDivMatch[1];
- const pageNumbers = [];
- // 匹配 <a> 标签中的纯数字
- const anchorMatches = [...pgDivContent.matchAll(/<a [^>]*>(\d+)<\/a>/g)];
- anchorMatches.forEach(match => {
- const pageNum = parseInt(match[1], 10);
- if (!isNaN(pageNum)) {
- pageNumbers.push(pageNum);
- }
- });
- // 处理 " ... 49" 的情况
- const lastPageMatch = pgDivContent.match(/\.{3}\s*(\d+)<\/a>/);
- if (lastPageMatch) {
- const lastPageNum = parseInt(lastPageMatch[1], 10);
- if (!isNaN(lastPageNum)) {
- pageNumbers.push(lastPageNum);
- }
- }
- maxPage = Math.max(...pageNumbers);
- }
- }
- currentPage++;
- }
- return content;
- }
- main().catch(console.error);
复制代码
|
评分
-
查看全部评分
|