Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type ConfigEnvKeys =
| 'BITBUCKET_USERNAME'
| 'BITBUCKET_PASSWORD'
| 'BTBYR_HOST'
| 'BAIDU_COOKIE'
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
| 'BTBYR_COOKIE'
| 'BUPT_PORTAL_COOKIE'
| 'CAIXIN_COOKIE'
Expand Down Expand Up @@ -348,6 +349,9 @@ export type Config = {
};

// Route-specific Configurations
baidu: {
cookie?: string;
};
bilibili: {
cookies: Record<string, string | undefined>;
dmImgList?: string;
Expand Down Expand Up @@ -763,7 +767,7 @@ const calculateValue = () => {
type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存
requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60),
routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒
Comment thread
FlanChanXwO marked this conversation as resolved.
},
memory: {
max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger.
Expand Down Expand Up @@ -843,6 +847,9 @@ const calculateValue = () => {
},

// Route-specific Configurations
baidu: {
cookie: envs.BAIDU_COOKIE,
},
bilibili: {
cookies: bilibili_cookies,
dmImgList: envs.BILIBILI_DM_IMG_LIST,
Expand Down
122 changes: 122 additions & 0 deletions lib/routes/baidu/tieba/common.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import { config } from '@/config';
import ConfigNotFoundError from '@/errors/types/config-not-found';
import cache from '@/utils/cache';

/**
* 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组
* 正确处理包含 '=' 的 cookie 值
*/
export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> {
return cookieStr
.split(';')
.map((c) => c.trim())
.filter((c) => c.length > 0)
.map((c) => {
const firstEqualIndex = c.indexOf('=');
if (firstEqualIndex === -1) {
return { name: c, value: '', domain: '.tieba.baidu.com' };
}
const name = c.slice(0, firstEqualIndex).trim();
const value = c.slice(firstEqualIndex + 1).trim();
return { name, value, domain: '.tieba.baidu.com' };
});
}
Comment thread
FlanChanXwO marked this conversation as resolved.

/**
* 检查 HTML 内容是否包含百度安全验证页面
*/
export function checkSecurityVerification(html: string): void {
if (html.includes('安全验证') || html.includes('百度安全验证')) {
throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.');
}
}

/**
* 使用 Puppeteer 获取贴吧页面内容
* 包含统一的 cookie 设置、安全验证检查和缓存逻辑
* 带有重试机制处理瞬态错误
*/
export async function getTiebaPageContent(
url: string,
cacheKey: string,
options: {
waitForSelector?: string;
timeout?: number;
retries?: number;
} = {}
): Promise<string> {
const cookie = config.baidu.cookie;

if (!cookie) {
throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of <a href="https://docs.rsshub.app/deploy/config#baidu">BAIDU_COOKIE</a>');
}

const { getPuppeteerPage } = await import('@/utils/puppeteer');
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options;

const data = await cache.tryGet(
cacheKey,
async () => {
let lastError: Error | undefined;

/* eslint-disable no-await-in-loop -- Intentional sequential retry logic */
for (let attempt = 0; attempt < retries; attempt++) {
const { page, destroy } = await getPuppeteerPage(url, { noGoto: true });

try {
// 设置 Cookie(在访问页面前设置,减少一次导航)
const cookies = parseBaiduCookies(cookie);
await page.setCookie(...cookies);

// 访问目标页面 - 使用更宽松的等待条件
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated

// 等待页面稳定
await new Promise((resolve) => setTimeout(resolve, 2000));

// 动态等待内容加载
try {
await page.waitForSelector(waitForSelector, { timeout });
} catch {
// 如果超时,继续执行
}

return await page.content();
} catch (error) {
lastError = error as Error;
// 如果是最后一次尝试,抛出错误
if (attempt === retries - 1) {
throw lastError;
}
// 等待后重试
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
} finally {
await destroy();
}
}
/* eslint-enable no-await-in-loop */

throw lastError || new Error('Failed to fetch page content');
},
config.cache.routeExpire,
false
);

const html = data as string;
checkSecurityVerification(html);
return html;
}

/**
* 规范化 URL 为绝对地址
*/
export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string {
if (!href) {
return '';
}
if (href.startsWith('http')) {
return href;
}
const path = href.startsWith('/') ? href : `/${href}`;
return `${base}${path}`;
}
120 changes: 68 additions & 52 deletions lib/routes/baidu/tieba/forum.tsx
Original file line number Diff line number Diff line change
@@ -1,85 +1,101 @@
import { load } from 'cheerio';
import { raw } from 'hono/html';
import { renderToString } from 'hono/jsx/dom/server';

import type { Route } from '@/types';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

import { getTiebaPageContent, normalizeUrl } from './common';
import { parseRelativeTime, parseThreads } from './utils';

export const route: Route = {
path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'],
categories: ['bbs'],
example: '/baidu/tieba/forum/good/女图',
parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
requireConfig: [
{
name: 'BAIDU_COOKIE',
optional: false,
description: '百度 cookie 值,用于需要登录的贴吧页面',
},
],
requirePuppeteer: true,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: '精品帖子',
maintainers: ['u3u'],
maintainers: ['u3u', 'FlanChanXwO'],
handler,
};

async function handler(ctx) {
// sortBy: created, replied
const { kw, cid = '0', sortBy = 'created' } = ctx.req.param();
const sortParam = sortBy === 'replied' ? '&sc=67108864' : '';

// PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}
// 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1
const params = { kw: encodeURIComponent(kw) };
ctx.req.path.includes('good') && (params.tab = 'good');
cid && (params.cid = cid);
const { data } = await got(`https://tieba.baidu.com/f`, {
headers: {
Referer: 'https://tieba.baidu.com/',
},
searchParams: params,
});
// 固定抓取3页,约30条帖子
const maxPages = 3;

// 并发获取所有页面
const pagePromises = [];
for (let pageNum = 0; pageNum < maxPages; pageNum++) {
const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`;

const promise = getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 });
pagePromises.push(promise);
}

const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]')
.contents()
.filter((e) => e.nodeType === '8');
Comment thread
FlanChanXwO marked this conversation as resolved.
// 等待所有页面获取完成
const pageResults = await Promise.all(pagePromises);

const $ = load(threadListHTML.prevObject[0].data);
const list = $('#thread_list > .j_thread_list[data-field]')
.toArray()
.map((element) => {
const item = $(element);
const { id, author_name } = item.data('field');
const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim();
const title = item.find('a.j_th_tit').text().trim();
const details = item.find('.threadlist_abs').text().trim();
const medias = item
.find('.threadlist_media img')
.toArray()
.map((element) => {
const item = $(element);
return `<img src="${item.attr('bpic')}">`;
})
.join('');
// 解析所有页面数据并去重
const threadMap = new Map();
for (const html of pageResults) {
if (html && html.length > 0) {
const $ = load(html);
const threads = parseThreads($);
for (const thread of threads) {
// 使用帖子ID去重,只保留第一次出现的
if (!threadMap.has(thread.id)) {
threadMap.set(thread.id, thread);
}
}
}
}

return {
title,
description: renderToString(
<>
<p>{details}</p>
<p>{raw(medias)}</p>
<p>作者:{author_name}</p>
</>
),
pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8),
link: `https://tieba.baidu.com/p/${id}`,
};
});
const allThreads = [...threadMap.values()];

if (allThreads.length === 0) {
throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.');
}

const list = allThreads.map((thread) => {
const parsedDate = parseRelativeTime(thread.time);
return {
title: thread.title,
link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`,
pubDate: parsedDate ? timezone(parsedDate, +8) : undefined,
Comment thread
FlanChanXwO marked this conversation as resolved.
Outdated
author: thread.author,
description: renderToString(
<>
{thread.content ? <p>{thread.content}</p> : null}
{thread.images && thread.images.length > 0 ? (
<div>
{thread.images.map((img) => (
<img src={img} alt="" style={{ maxWidth: '100%', margin: '5px 0' }} />
))}
</div>
) : null}
</>
),
};
});

return {
title: `${kw}吧`,
description: load(data)('meta[name="description"]').attr('content'),
link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`,
item: list,
};
Expand Down
Loading
Loading