Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,15 @@ type ConfigEnvKeys =
| 'FOLLOW_PRICE'
| 'FOLLOW_USER_LIMIT'
// Route-specific (dynamic cookies with prefixes)
| 'BAIDU_COOKIE'
| `BILIBILI_COOKIE_${string}`
| 'BILIBILI_DM_IMG_LIST'
| 'BILIBILI_DM_IMG_INTER'
| 'BILIBILI_EXCLUDE_SUBTITLES'
| 'BITBUCKET_USERNAME'
| 'BITBUCKET_PASSWORD'
| 'BTBYR_HOST'
| 'BAIDU_COOKIE'
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
| 'BTBYR_COOKIE'
| 'BUPT_PORTAL_COOKIE'
| 'CAIXIN_COOKIE'
Expand Down Expand Up @@ -348,6 +350,9 @@ export type Config = {
};

// Route-specific Configurations
baidu: {
cookie?: string;
};
bilibili: {
cookies: Record<string, string | undefined>;
dmImgList?: string;
Expand Down Expand Up @@ -763,7 +768,7 @@ const calculateValue = () => {
type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存
requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60),
routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒
Comment thread
FlanChanXwO marked this conversation as resolved.
},
memory: {
max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger.
Expand Down Expand Up @@ -843,6 +848,9 @@ const calculateValue = () => {
},

// Route-specific Configurations
baidu: {
cookie: envs.BAIDU_COOKIE,
},
bilibili: {
cookies: bilibili_cookies,
dmImgList: envs.BILIBILI_DM_IMG_LIST,
Expand Down
120 changes: 120 additions & 0 deletions lib/routes/baidu/tieba/common.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import { Cookie } from 'tough-cookie';

import { config } from '@/config';
import ConfigNotFoundError from '@/errors/types/config-not-found';
import cache from '@/utils/cache';
import { getPuppeteerPage } from '@/utils/puppeteer';

/**
* 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组
* 正确处理包含 '=' 的 cookie 值
*/
export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> {
return cookieStr
.split(';')
.map((c) => Cookie.parse(c.trim()))
.filter((c): c is Cookie => Boolean(c?.key))
.map((c) => ({
name: c.key,
value: c.value,
domain: '.tieba.baidu.com',
}));
}
Comment thread
FlanChanXwO marked this conversation as resolved.

/**
* 检查 HTML 内容是否包含百度安全验证页面
*/
export function checkSecurityVerification(html: string): void {
if (html.includes('安全验证') || html.includes('百度安全验证')) {
throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.');
}
}

/**
* 使用 Puppeteer 获取贴吧页面内容
* 包含统一的 cookie 设置、安全验证检查和缓存逻辑
* 带有重试机制处理瞬态错误
*/
export async function getTiebaPageContent(
url: string,
cacheKey: string,
options: {
waitForSelector?: string;
timeout?: number;
retries?: number;
} = {}
): Promise<string> {
const cookie = config.baidu.cookie;

if (!cookie) {
throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of <a href="https://docs.rsshub.app/deploy/config#baidu">BAIDU_COOKIE</a>');
}

const cookies = parseBaiduCookies(cookie);
const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options;

const data = await cache.tryGet(
cacheKey,
async () => {
let lastError: Error | undefined;

/* eslint-disable no-await-in-loop -- Intentional sequential retry logic */
for (let attempt = 0; attempt < retries; attempt++) {
const { page, destroy } = await getPuppeteerPage(url, {
Comment thread
FlanChanXwO marked this conversation as resolved.
onBeforeLoad: async (page) => {
if (cookies.length > 0) {
await page.setCookie(...cookies);
}
},
gotoConfig: { waitUntil: 'domcontentloaded' },
});

try {
// 等待页面稳定
await new Promise((resolve) => setTimeout(resolve, 2000));

// 动态等待内容加载
try {
await page.waitForSelector(waitForSelector, { timeout });
} catch {
// 如果超时,继续执行
}

return await page.content();
} catch (error) {
lastError = error as Error;
// 如果是最后一次尝试,抛出错误
if (attempt === retries - 1) {
throw lastError;
}
// 等待后重试
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
} finally {
await destroy();
}
}
/* eslint-enable no-await-in-loop */
throw lastError || new Error('Failed to fetch page content');
},
config.cache.routeExpire,
false
);

const html = data as string;
checkSecurityVerification(html);
return html;
}

/**
* 规范化 URL 为绝对地址
*/
export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string {
if (!href) {
return '';
}
if (href.startsWith('http')) {
return href;
}
const path = href.startsWith('/') ? href : `/${href}`;
return `${base}${path}`;
}
100 changes: 49 additions & 51 deletions lib/routes/baidu/tieba/forum.tsx
Original file line number Diff line number Diff line change
@@ -1,85 +1,83 @@
import { load } from 'cheerio';
import { raw } from 'hono/html';
import { renderToString } from 'hono/jsx/dom/server';

import type { Route } from '@/types';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

import { getTiebaPageContent, normalizeUrl } from './common';
import { parseRelativeTime, parseThreads } from './utils';

export const route: Route = {
path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'],
categories: ['bbs'],
example: '/baidu/tieba/forum/good/女图',
parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
requireConfig: [
{
name: 'BAIDU_COOKIE',
optional: false,
description: '百度 cookie 值,用于需要登录的贴吧页面',
},
],
requirePuppeteer: true,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: '精品帖子',
maintainers: ['u3u'],
maintainers: ['u3u', 'FlanChanXwO'],
handler,
};

async function handler(ctx) {
// sortBy: created, replied
const { kw, cid = '0', sortBy = 'created' } = ctx.req.param();
const sortParam = sortBy === 'replied' ? '&sc=67108864' : '';

// PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}
// 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1
const params = { kw: encodeURIComponent(kw) };
ctx.req.path.includes('good') && (params.tab = 'good');
cid && (params.cid = cid);
const { data } = await got(`https://tieba.baidu.com/f`, {
headers: {
Referer: 'https://tieba.baidu.com/',
},
searchParams: params,
});
const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=0${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${sortParam}`;
const data = await getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 });
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated

const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]')
const $ = load(data);
const threadListHTML = $('code[id="pagelet_html_frs-list/pagelet/thread_list"]')
.contents()
.filter((e) => e.nodeType === '8');
Comment thread
FlanChanXwO marked this conversation as resolved.
.filter((_, e) => e.type === 'comment' || (e as { nodeType?: number }).nodeType === 8)
.first()
.text()
.trim();

const $ = load(threadListHTML.prevObject[0].data);
const list = $('#thread_list > .j_thread_list[data-field]')
.toArray()
.map((element) => {
const item = $(element);
const { id, author_name } = item.data('field');
const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim();
const title = item.find('a.j_th_tit').text().trim();
const details = item.find('.threadlist_abs').text().trim();
const medias = item
.find('.threadlist_media img')
.toArray()
.map((element) => {
const item = $(element);
return `<img src="${item.attr('bpic')}">`;
})
.join('');
const threadRoot = threadListHTML ? load(threadListHTML) : $;
const allThreads = parseThreads(threadRoot);

return {
title,
description: renderToString(
<>
<p>{details}</p>
<p>{raw(medias)}</p>
<p>作者:{author_name}</p>
</>
),
pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8),
link: `https://tieba.baidu.com/p/${id}`,
};
});
if (allThreads.length === 0) {
throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.');
}

const list = allThreads.map((thread) => {
const parsedDate = parseRelativeTime(thread.time);
return {
title: thread.title,
link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`,
pubDate: parsedDate ? timezone(parsedDate, +8) : undefined,
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
author: thread.author,
description: renderToString(
<>
{thread.content ? <p>{thread.content}</p> : null}
{thread.images && thread.images.length > 0 ? (
<div>
{thread.images.map((img) => (
<img src={img} alt="" style={{ maxWidth: '100%', margin: '5px 0' }} />
))}
</div>
) : null}
</>
),
};
});

return {
title: `${kw}吧`,
description: load(data)('meta[name="description"]').attr('content'),
link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`,
item: list,
};
Expand Down
Loading