Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type ConfigEnvKeys =
| 'BITBUCKET_USERNAME'
| 'BITBUCKET_PASSWORD'
| 'BTBYR_HOST'
| 'BAIDU_COOKIE'
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
| 'BTBYR_COOKIE'
| 'BUPT_PORTAL_COOKIE'
| 'CAIXIN_COOKIE'
Expand Down Expand Up @@ -348,6 +349,9 @@ export type Config = {
};

// Route-specific Configurations
baidu: {
cookie?: string;
};
bilibili: {
cookies: Record<string, string | undefined>;
dmImgList?: string;
Expand Down Expand Up @@ -763,7 +767,7 @@ const calculateValue = () => {
type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存
requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60),
routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒
Comment thread
FlanChanXwO marked this conversation as resolved.
},
memory: {
max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger.
Expand Down Expand Up @@ -843,6 +847,9 @@ const calculateValue = () => {
},

// Route-specific Configurations
baidu: {
cookie: envs.BAIDU_COOKIE,
},
bilibili: {
cookies: bilibili_cookies,
dmImgList: envs.BILIBILI_DM_IMG_LIST,
Expand Down
122 changes: 122 additions & 0 deletions lib/routes/baidu/tieba/common.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import { config } from '@/config';
import ConfigNotFoundError from '@/errors/types/config-not-found';
import cache from '@/utils/cache';

/**
* 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组
* 正确处理包含 '=' 的 cookie 值
*/
export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> {
return cookieStr
.split(';')
.map((c) => c.trim())
.filter((c) => c.length > 0)
.map((c) => {
const firstEqualIndex = c.indexOf('=');
if (firstEqualIndex === -1) {
return { name: c, value: '', domain: '.tieba.baidu.com' };
}
const name = c.slice(0, firstEqualIndex).trim();
const value = c.slice(firstEqualIndex + 1).trim();
return { name, value, domain: '.tieba.baidu.com' };
});
}
Comment thread
FlanChanXwO marked this conversation as resolved.

/**
* 检查 HTML 内容是否包含百度安全验证页面
*/
export function checkSecurityVerification(html: string): void {
if (html.includes('安全验证') || html.includes('百度安全验证')) {
throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.');
}
}

/**
* 使用 Puppeteer 获取贴吧页面内容
* 包含统一的 cookie 设置、安全验证检查和缓存逻辑
* 带有重试机制处理瞬态错误
*/
export async function getTiebaPageContent(
url: string,
cacheKey: string,
options: {
waitForSelector?: string;
timeout?: number;
retries?: number;
} = {}
): Promise<string> {
const cookie = config.baidu.cookie;

if (!cookie) {
throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of <a href="https://docs.rsshub.app/deploy/config#baidu">BAIDU_COOKIE</a>');
}

const { getPuppeteerPage } = await import('@/utils/puppeteer');
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options;

const data = await cache.tryGet(
cacheKey,
async () => {
let lastError: Error | undefined;

/* eslint-disable no-await-in-loop -- Intentional sequential retry logic */
for (let attempt = 0; attempt < retries; attempt++) {
const { page, destroy } = await getPuppeteerPage(url, { noGoto: true });

try {
// 设置 Cookie(在访问页面前设置,减少一次导航)
const cookies = parseBaiduCookies(cookie);
await page.setCookie(...cookies);

// 访问目标页面 - 使用更宽松的等待条件
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated

// 等待页面稳定
await new Promise((resolve) => setTimeout(resolve, 2000));

// 动态等待内容加载
try {
await page.waitForSelector(waitForSelector, { timeout });
} catch {
// 如果超时,继续执行
}

return await page.content();
} catch (error) {
lastError = error as Error;
// 如果是最后一次尝试,抛出错误
if (attempt === retries - 1) {
throw lastError;
}
// 等待后重试
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
} finally {
await destroy();
}
}
/* eslint-enable no-await-in-loop */

throw lastError || new Error('Failed to fetch page content');
},
config.cache.routeExpire,
false
);

const html = data as string;
checkSecurityVerification(html);
return html;
}

/**
* 规范化 URL 为绝对地址
*/
export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string {
if (!href) {
return '';
}
if (href.startsWith('http')) {
return href;
}
const path = href.startsWith('/') ? href : `/${href}`;
return `${base}${path}`;
}
120 changes: 68 additions & 52 deletions lib/routes/baidu/tieba/forum.tsx
Original file line number Diff line number Diff line change
@@ -1,85 +1,101 @@
import { load } from 'cheerio';
import { raw } from 'hono/html';
import { renderToString } from 'hono/jsx/dom/server';

import type { Route } from '@/types';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

import { getTiebaPageContent, normalizeUrl } from './common';
import { parseRelativeTime, parseThreads } from './utils';

export const route: Route = {
path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'],
categories: ['bbs'],
example: '/baidu/tieba/forum/good/女图',
parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
requireConfig: [
{
name: 'BAIDU_COOKIE',
optional: false,
description: '百度 cookie 值,用于需要登录的贴吧页面',
},
],
requirePuppeteer: true,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: '精品帖子',
maintainers: ['u3u'],
maintainers: ['u3u', 'FlanChanXwO'],
handler,
};

async function handler(ctx) {
// sortBy: created, replied
const { kw, cid = '0', sortBy = 'created' } = ctx.req.param();
const sortParam = sortBy === 'replied' ? '&sc=67108864' : '';

// PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}
// 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1
const params = { kw: encodeURIComponent(kw) };
ctx.req.path.includes('good') && (params.tab = 'good');
cid && (params.cid = cid);
const { data } = await got(`https://tieba.baidu.com/f`, {
headers: {
Referer: 'https://tieba.baidu.com/',
},
searchParams: params,
});
// 固定抓取3页,约30条帖子
const maxPages = 3;

// 并发获取所有页面
const pagePromises = [];
for (let pageNum = 0; pageNum < maxPages; pageNum++) {
const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`;

const promise = getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 });
pagePromises.push(promise);
}

const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]')
.contents()
.filter((e) => e.nodeType === '8');
Comment thread
FlanChanXwO marked this conversation as resolved.
// 等待所有页面获取完成
const pageResults = await Promise.all(pagePromises);

const $ = load(threadListHTML.prevObject[0].data);
const list = $('#thread_list > .j_thread_list[data-field]')
.toArray()
.map((element) => {
const item = $(element);
const { id, author_name } = item.data('field');
const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim();
const title = item.find('a.j_th_tit').text().trim();
const details = item.find('.threadlist_abs').text().trim();
const medias = item
.find('.threadlist_media img')
.toArray()
.map((element) => {
const item = $(element);
return `<img src="${item.attr('bpic')}">`;
})
.join('');
// 解析所有页面数据并去重
const threadMap = new Map();
for (const html of pageResults) {
if (html && html.length > 0) {
const $ = load(html);
const threads = parseThreads($);
for (const thread of threads) {
// 使用帖子ID去重,只保留第一次出现的
if (!threadMap.has(thread.id)) {
threadMap.set(thread.id, thread);
}
}
}
}

return {
title,
description: renderToString(
<>
<p>{details}</p>
<p>{raw(medias)}</p>
<p>作者:{author_name}</p>
</>
),
pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8),
link: `https://tieba.baidu.com/p/${id}`,
};
});
const allThreads = [...threadMap.values()];

if (allThreads.length === 0) {
throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.');
}

const list = allThreads.map((thread) => {
const parsedDate = parseRelativeTime(thread.time);
return {
title: thread.title,
link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`,
pubDate: parsedDate ? timezone(parsedDate, +8) : undefined,
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
author: thread.author,
description: renderToString(
<>
{thread.content ? <p>{thread.content}</p> : null}
{thread.images && thread.images.length > 0 ? (
<div>
{thread.images.map((img) => (
<img src={img} alt="" style={{ maxWidth: '100%', margin: '5px 0' }} />
))}
</div>
) : null}
</>
),
};
});

return {
title: `${kw}吧`,
description: load(data)('meta[name="description"]').attr('content'),
link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`,
item: list,
};
Expand Down
Loading
Loading