From d44d879f179d10824f004286f08bf126a8d79af9 Mon Sep 17 00:00:00 2001 From: Tony Date: Mon, 11 May 2026 21:32:58 +0800 Subject: [PATCH 01/10] refactor: use patchwright --- .github/workflows/issue-command.yml | 2 +- .github/workflows/test.yml | 4 +- Dockerfile | 10 +-- lib/utils/playwright.mock.test.ts | 77 ++++++---------- lib/utils/playwright.ts | 133 +++++++++------------------- lib/utils/playwright.worker.ts | 70 +-------------- package.json | 5 +- pnpm-lock.yaml | 38 ++++---- 8 files changed, 101 insertions(+), 238 deletions(-) diff --git a/.github/workflows/issue-command.yml b/.github/workflows/issue-command.yml index 1b65ce41f14e..27fc494ee88c 100644 --- a/.github/workflows/issue-command.yml +++ b/.github/workflows/issue-command.yml @@ -120,7 +120,7 @@ jobs: cache: 'pnpm' - name: Install dependencies (pnpm) - run: pnpm i && pnpm rb && pnpm exec playwright install chromium + run: pnpm i && pnpm rb && pnpm exec patchright install chromium - name: Fetch affected routes id: fetch-route diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc623d56a581..26298f71d103 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: - name: Install dependencies (pnpm) run: pnpm i - name: Run postinstall script for dependencies - run: pnpm rb && pnpm exec playwright install chromium + run: pnpm rb && pnpm exec patchright install chromium - name: Build routes run: pnpm build - name: Build worker routes @@ -91,7 +91,7 @@ jobs: run: pnpm build - name: Install bundled Chromium if: ${{ matrix.chromium.dependency == '' }} - run: pnpm exec playwright install chromium + run: pnpm exec patchright install chromium - name: Install Chromium if: ${{ matrix.chromium.dependency != '' }} # 'chromium-browser' from Ubuntu APT repo is a dummy package. Its version (85.0.4183.83) means diff --git a/Dockerfile b/Dockerfile index 6683193b7ed6..0654acb9915a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,7 +40,7 @@ WORKDIR /ver COPY ./package.json /app/ RUN \ set -ex && \ - grep -Po '(?<="playwright": ")[^\s"]*(?=")' /app/package.json | tee /ver/.playwright_version && \ + grep -Po '(?<="patchright": ")[^\s"]*(?=")' /app/package.json | tee /ver/.patchright_version && \ grep -Po '(?<="@vercel/nft": ")[^\s"]*(?=")' /app/package.json | tee /ver/.nft_version && \ grep -Po '(?<="fs-extra": ")[^\s"]*(?=")' /app/package.json | tee /ver/.fs_extra_version @@ -88,12 +88,12 @@ FROM node:24-bookworm-slim AS chromium-downloader # Yeah, downloading Chromium never needs those dependencies below. WORKDIR /app -COPY --from=dep-version-parser /ver/.playwright_version /app/.playwright_version +COPY --from=dep-version-parser /ver/.patchright_version /app/.patchright_version ARG TARGETPLATFORM ARG USE_CHINA_NPM_REGISTRY=0 ARG PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 -# The official recommended way to use Playwright on x86(_64) is to use the bundled browser. +# The official recommended way to use Patchright on x86(_64) is to use the bundled browser. RUN \ set -ex ; \ if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = 0 ] && [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ @@ -106,9 +106,9 @@ RUN \ unset PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD && \ export PLAYWRIGHT_BROWSERS_PATH=/app/node_modules/.cache/ms-playwright && \ corepack enable pnpm && \ - pnpm --allow-build=playwright add playwright@$(cat /app/.playwright_version) --save-prod && \ + pnpm --allow-build=patchright --allow-build=patchright-core add patchright@$(cat /app/.patchright_version) --save-prod && \ pnpm rb && \ - pnpm exec playwright install chromium ; \ + pnpm exec patchright install chromium ; \ else \ mkdir -p /app/node_modules/.cache/ms-playwright ; \ fi; diff --git a/lib/utils/playwright.mock.test.ts b/lib/utils/playwright.mock.test.ts index f09351631f99..35f8cae1e1af 100644 --- a/lib/utils/playwright.mock.test.ts +++ b/lib/utils/playwright.mock.test.ts @@ -4,19 +4,6 @@ const connect = vi.fn(); const connectOverCDP = vi.fn(); const launch = vi.fn(); -const routeContinue = vi.fn(); -const routeAbort = vi.fn(); -const route = { - request: vi.fn(), - continue: routeContinue, - abort: routeAbort, -}; - -const request = { - resourceType: vi.fn(), - url: vi.fn(), -}; - let page: any; let context: any; let browser: any; @@ -26,9 +13,7 @@ const createBrowserMocks = () => { context: vi.fn(), goto: vi.fn(), on: vi.fn(), - route: vi.fn(), setExtraHTTPHeaders: vi.fn(), - unroute: vi.fn(), }; context = { @@ -53,7 +38,7 @@ const proxyMock = { getDispatcherForProxy: vi.fn(), }; -vi.mock('playwright', () => ({ +vi.mock('patchright', () => ({ chromium: { connect, connectOverCDP, @@ -83,11 +68,6 @@ const resetMocks = () => { connect.mockReset(); connectOverCDP.mockReset(); launch.mockReset(); - routeContinue.mockReset(); - routeAbort.mockReset(); - route.request.mockReset(); - request.resourceType.mockReset(); - request.url.mockReset(); proxyMock.multiProxy = undefined; proxyMock.getCurrentProxy.mockReset(); proxyMock.markProxyFailed.mockReset(); @@ -118,16 +98,38 @@ describe('getPlaywrightPage (mocked)', () => { const endpoint = connect.mock.calls[0][0] as string; expect(connectOverCDP).not.toHaveBeenCalled(); - expect(endpoint).toContain('launch-options='); - expect(endpoint).not.toContain('launch='); - const launchOptions = JSON.parse(new URL(endpoint).searchParams.get('launch-options') || '{}'); + expect(endpoint).toContain('launch='); + expect(endpoint).not.toContain('launch-options='); + const launchOptions = JSON.parse(new URL(endpoint).searchParams.get('launch') || '{}'); expect(launchOptions.args).not.toContainEqual(expect.stringContaining('--user-agent=')); + expect(launchOptions.executablePath).toBeUndefined(); + expect(launchOptions.acceptInsecureCerts).toBe(true); expect(onBeforeLoad).toHaveBeenCalled(); await result.destroy(); expect(close).toHaveBeenCalled(); }); + it('merges browserless launch options with existing ws endpoint launch param', async () => { + resetMocks(); + connect.mockResolvedValue(browser); + launch.mockResolvedValue(browser); + page.goto.mockResolvedValue(undefined); + browser.close.mockResolvedValue(undefined); + process.env.PLAYWRIGHT_WS_ENDPOINT = `ws://localhost:3000/?token=abc&launch=${encodeURIComponent(JSON.stringify({ stealth: true }))}`; + proxyMock.getCurrentProxy.mockReturnValue(null); + + const getPlaywrightPage = await loadPlaywright(); + const result = await getPlaywrightPage('https://example.com', { noGoto: true }); + + const endpoint = connect.mock.calls[0][0] as string; + const launchOptions = JSON.parse(new URL(endpoint).searchParams.get('launch') || '{}'); + expect(launchOptions.stealth).toBe(true); + expect(launchOptions.headless).toBe(true); + + await result.destroy(); + }); + it('does not override the browser user agent', async () => { resetMocks(); launch.mockResolvedValue(browser); @@ -238,31 +240,4 @@ describe('getPlaywrightPage (mocked)', () => { waitUntil: 'networkidle', }); }); - - it('keeps legacy request interception helpers', async () => { - resetMocks(); - launch.mockResolvedValue(browser); - page.goto.mockResolvedValue(undefined); - proxyMock.getCurrentProxy.mockReturnValue(null); - request.resourceType.mockReturnValue('image'); - request.url.mockReturnValue('https://example.com/logo.png'); - routeAbort.mockReturnValueOnce(new Promise((resolve) => setTimeout(resolve, 0))); - route.request.mockReturnValue(request); - - const getPlaywrightPage = await loadPlaywright(); - await getPlaywrightPage('https://example.com', { - onBeforeLoad: async (page) => { - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); - }); - }, - }); - - const routeHandler = page.route.mock.calls[0][1]; - await routeHandler(route); - - expect(routeAbort).toHaveBeenCalled(); - expect(routeContinue).not.toHaveBeenCalled(); - }); }); diff --git a/lib/utils/playwright.ts b/lib/utils/playwright.ts index 25b4a65da984..5e4f13f637b3 100644 --- a/lib/utils/playwright.ts +++ b/lib/utils/playwright.ts @@ -1,5 +1,5 @@ -import type { Browser as PlaywrightBrowser, BrowserContext, BrowserContextOptions, LaunchOptions, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse, Route } from 'playwright'; -import { chromium } from 'playwright'; +import type { Browser as PlaywrightBrowser, BrowserContext, BrowserContextOptions, LaunchOptions, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse } from 'patchright'; +import { chromium } from 'patchright'; import { config } from '@/config'; @@ -8,19 +8,10 @@ import proxy from './proxy'; type SetCookieParam = Parameters[0][number]; type Cookie = Awaited>[number]; -type GotoOptions = Parameters[1] & { - waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'networkidle0' | 'networkidle2'; -}; +type GotoOptions = Parameters[1]; type ProxyState = NonNullable>; -type RouteRequest = { - abort: (errorCode?: string) => Promise; - continue: (options?: Parameters[0]) => Promise; - resourceType: () => ReturnType; - url: () => string; -}; - type FinishedRequest = { response: () => { status: () => number; @@ -28,17 +19,13 @@ type FinishedRequest = { url: () => string; }; -type RequestHandler = (request: RouteRequest) => Promise | void; type RequestFinishedHandler = (request: FinishedRequest) => Promise | void; -type HandledRouteRequest = RouteRequest & { handled: boolean }; export type Page = PlaywrightPage & { authenticate: (credentials: { password?: string; username?: string }) => Promise; cookies: (urls?: string | string[]) => Promise; - goto: (url: string, options?: GotoOptions) => ReturnType; - on: ((event: 'request', handler: RequestHandler) => Page) & ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; + on: ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; setCookie: (...cookies: SetCookieParam[]) => Promise; - setRequestInterception: (enabled: boolean) => Promise; setUserAgent: (userAgent: string) => Promise; }; @@ -49,16 +36,6 @@ export type Browser = PlaywrightBrowser & { userAgent: () => string; }; -const normalizeWaitUntil = (waitUntil: GotoOptions['waitUntil']) => (waitUntil === 'networkidle0' || waitUntil === 'networkidle2' ? 'networkidle' : waitUntil); - -const normalizeGotoOptions = (options?: GotoOptions): Parameters[1] | undefined => - options - ? { - ...options, - waitUntil: normalizeWaitUntil(options.waitUntil), - } - : options; - const withDefaultCookiePath = (cookie: SetCookieParam): SetCookieParam => ('domain' in cookie && !('path' in cookie) ? { ...cookie, path: '/' } : cookie); const proxyServerFromUrl = (proxyUrl: URL) => { @@ -95,43 +72,40 @@ const getProxyOptions = (currentProxy: ProxyState | null | undefined) => { } satisfies Pick; }; +const COMMON_LAUNCH_ARGS = ['--no-sandbox', '--disable-setuid-sandbox', '--window-position=0,0', '--ignore-certificate-errors', '--ignore-certificate-errors-spki-list']; + +// Patchright already patches playwright's default args (e.g. injects --disable-blink-features=AutomationControlled and strips --enable-automation), so we don't add those manually. const getLaunchOptions = (currentProxy?: ProxyState | null): LaunchOptions => ({ - args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled', '--window-position=0,0', '--ignore-certificate-errors', '--ignore-certificate-errors-spki-list'], + args: COMMON_LAUNCH_ARGS, executablePath: config.chromiumExecutablePath || undefined, headless: true, ...getProxyOptions(currentProxy), }); -const getContextOptions = (): BrowserContextOptions => ({ - ignoreHTTPSErrors: true, -}); - -const createRouteRequest = (route: Route): HandledRouteRequest => { - const request = route.request(); - const routeRequest = { - abort: async (errorCode) => { - routeRequest.handled = true; - await route.abort(errorCode); - }, - continue: async (options) => { - routeRequest.handled = true; - await route.continue(options); - }, - handled: false, - resourceType: () => request.resourceType(), - url: () => request.url(), - }; - return routeRequest; +// Browserless accepts launch options as a `launch` URL query parameter (URL-encoded JSON). +// (Patchright's own launch-server uses `launch-options` — RSSHub's WS_ENDPOINT targets browserless, so we emit `launch`.) +// The browserless schema also differs from patchright's LaunchOptions: no `executablePath`, and `ignoreHTTPSErrors` is renamed to `acceptInsecureCerts`. +type BrowserlessLaunchOptions = { + acceptInsecureCerts?: boolean; + args?: string[]; + headless?: boolean; + ignoreDefaultArgs?: boolean | string[]; + proxy?: LaunchOptions['proxy']; + slowMo?: number; + stealth?: boolean; }; -const runRequestHandlers = async (handlers: RequestHandler[], request: HandledRouteRequest, index = 0): Promise => { - if (request.handled || index >= handlers.length) { - return; - } +const toBrowserlessLaunchOptions = (currentProxy?: ProxyState | null): BrowserlessLaunchOptions => ({ + acceptInsecureCerts: true, + args: COMMON_LAUNCH_ARGS, + headless: true, + stealth: true, + ...getProxyOptions(currentProxy), +}); - await handlers[index](request); - await runRequestHandlers(handlers, request, index + 1); -}; +const getContextOptions = (): BrowserContextOptions => ({ + ignoreHTTPSErrors: true, +}); const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightResponse | null): FinishedRequest => ({ response: () => @@ -145,15 +119,8 @@ const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightR const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { const compatPage = page as Page; - const requestHandlers: RequestHandler[] = []; - const originalGoto = page.goto.bind(page); const originalOn = page.on.bind(page); - const originalRoute = page.route.bind(page); - const originalUnroute = page.unroute.bind(page); - let routeHandler: ((route: Route) => Promise) | undefined; - let requestInterceptionEnabled = false; - compatPage.goto = (url, options) => originalGoto(url, normalizeGotoOptions(options)); compatPage.cookies = (urls) => context.cookies(urls); compatPage.setCookie = async (...cookies) => { await context.addCookies(cookies.map((cookie) => withDefaultCookiePath(cookie))); @@ -176,31 +143,7 @@ const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { 'User-Agent': userAgent, }); }; - compatPage.setRequestInterception = async (enabled) => { - requestInterceptionEnabled = enabled; - if (enabled && !routeHandler) { - routeHandler = async (route) => { - const request = createRouteRequest(route); - await runRequestHandlers(requestHandlers, request); - if (request.handled) { - return; - } - await route.continue(); - }; - await originalRoute('**/*', routeHandler); - } else if (!enabled && routeHandler) { - await originalUnroute('**/*', routeHandler); - routeHandler = undefined; - } - }; compatPage.on = ((event: string, handler: (...args: any[]) => any) => { - if (event === 'request') { - requestHandlers.push(handler as RequestHandler); - if (!requestInterceptionEnabled) { - originalOn(event, handler); - } - return compatPage; - } if (event === 'requestfinished') { originalOn(event, async (request) => { let response: PlaywrightResponse | null = null; @@ -244,14 +187,24 @@ const createCompatBrowser = async (browser: PlaywrightBrowser, contextOptions: B }; const launchBrowser = async (currentProxy?: ProxyState | null) => { - const launchOptions = getLaunchOptions(currentProxy); - const browser = config.playwrightWSEndpoint ? await chromium.connect(getEndpointWithLaunchOptions(config.playwrightWSEndpoint, launchOptions)) : await chromium.launch(launchOptions); + const browser = config.playwrightWSEndpoint ? await chromium.connect(getBrowserlessEndpoint(config.playwrightWSEndpoint, toBrowserlessLaunchOptions(currentProxy))) : await chromium.launch(getLaunchOptions(currentProxy)); return createCompatBrowser(browser, getContextOptions()); }; -const getEndpointWithLaunchOptions = (endpoint: string, launchOptions: LaunchOptions) => { +// Merge our launch options into the existing `launch` query parameter so endpoint-level options +// (e.g. `?launch=%7B%22stealth%22%3Atrue%7D`) are preserved instead of being overwritten. +const getBrowserlessEndpoint = (endpoint: string, launchOptions: BrowserlessLaunchOptions) => { const endpointURL = new URL(endpoint); - endpointURL.searchParams.set('launch-options', JSON.stringify(launchOptions)); + const existing = endpointURL.searchParams.get('launch'); + let merged: BrowserlessLaunchOptions = launchOptions; + if (existing) { + try { + merged = { ...(JSON.parse(existing) as BrowserlessLaunchOptions), ...launchOptions }; + } catch { + // Existing value is not JSON (could be base64 or malformed); leave caller's options as the source of truth. + } + } + endpointURL.searchParams.set('launch', JSON.stringify(merged)); return endpointURL.toString(); }; diff --git a/lib/utils/playwright.worker.ts b/lib/utils/playwright.worker.ts index bdf49429ba3f..6be5bcca5609 100644 --- a/lib/utils/playwright.worker.ts +++ b/lib/utils/playwright.worker.ts @@ -1,5 +1,5 @@ // Worker-compatible Playwright using Cloudflare Browser Run. -import type { Browser as PlaywrightBrowser, BrowserContext, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse, Route } from '@cloudflare/playwright'; +import type { Browser as PlaywrightBrowser, BrowserContext, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse } from '@cloudflare/playwright'; import { launch } from '@cloudflare/playwright'; import { config } from '@/config'; @@ -12,13 +12,6 @@ type GotoOptions = Parameters[1] & { waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'networkidle0' | 'networkidle2'; }; -type RouteRequest = { - abort: (errorCode?: string) => Promise; - continue: (options?: Parameters[0]) => Promise; - resourceType: () => ReturnType; - url: () => string; -}; - type FinishedRequest = { response: () => { status: () => number; @@ -26,17 +19,14 @@ type FinishedRequest = { url: () => string; }; -type RequestHandler = (request: RouteRequest) => Promise | void; type RequestFinishedHandler = (request: FinishedRequest) => Promise | void; -type HandledRouteRequest = RouteRequest & { handled: boolean }; export type Page = PlaywrightPage & { authenticate: (credentials: { password?: string; username?: string }) => Promise; cookies: (urls?: string | string[]) => Promise; goto: (url: string, options?: GotoOptions) => ReturnType; - on: ((event: 'request', handler: RequestHandler) => Page) & ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; + on: ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; setCookie: (...cookies: SetCookieParam[]) => Promise; - setRequestInterception: (enabled: boolean) => Promise; setUserAgent: (userAgent: string) => Promise; }; @@ -72,33 +62,6 @@ const normalizeGotoOptions = (options?: GotoOptions): Parameters ('domain' in cookie && !('path' in cookie) ? { ...cookie, path: '/' } : cookie); -const createRouteRequest = (route: Route): HandledRouteRequest => { - const request = route.request(); - const routeRequest = { - abort: async (errorCode) => { - routeRequest.handled = true; - await route.abort(errorCode); - }, - continue: async (options) => { - routeRequest.handled = true; - await route.continue(options); - }, - handled: false, - resourceType: () => request.resourceType(), - url: () => request.url(), - }; - return routeRequest; -}; - -const runRequestHandlers = async (handlers: RequestHandler[], request: HandledRouteRequest, index = 0): Promise => { - if (request.handled || index >= handlers.length) { - return; - } - - await handlers[index](request); - await runRequestHandlers(handlers, request, index + 1); -}; - const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightResponse | null): FinishedRequest => ({ response: () => response @@ -111,13 +74,8 @@ const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightR const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { const compatPage = page as Page; - const requestHandlers: RequestHandler[] = []; const originalGoto = page.goto.bind(page); const originalOn = page.on.bind(page); - const originalRoute = page.route.bind(page); - const originalUnroute = page.unroute.bind(page); - let routeHandler: ((route: Route) => Promise) | undefined; - let requestInterceptionEnabled = false; compatPage.goto = (url, options) => originalGoto(url, normalizeGotoOptions(options)); compatPage.cookies = (urls) => context.cookies(urls); @@ -130,31 +88,7 @@ const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { 'User-Agent': userAgent, }); }; - compatPage.setRequestInterception = async (enabled) => { - requestInterceptionEnabled = enabled; - if (enabled && !routeHandler) { - routeHandler = async (route) => { - const request = createRouteRequest(route); - await runRequestHandlers(requestHandlers, request); - if (request.handled) { - return; - } - await route.continue(); - }; - await originalRoute('**/*', routeHandler); - } else if (!enabled && routeHandler) { - await originalUnroute('**/*', routeHandler); - routeHandler = undefined; - } - }; compatPage.on = ((event: string, handler: (...args: any[]) => any) => { - if (event === 'request') { - requestHandlers.push(handler as RequestHandler); - if (!requestInterceptionEnabled) { - originalOn(event, handler); - } - return compatPage; - } if (event === 'requestfinished') { originalOn(event, async (request) => { const response = await request.response(); diff --git a/package.json b/package.json index 35963dfbaa64..27a92dcb93a1 100644 --- a/package.json +++ b/package.json @@ -115,7 +115,7 @@ "otplib": "13.4.0", "p-map": "7.0.4", "pac-proxy-agent": "9.0.1", - "playwright": "1.59.1", + "patchright": "1.59.1", "query-string": "9.3.1", "rate-limiter-flexible": "11.1.0", "re2js": "2.6.1", @@ -235,7 +235,8 @@ "esbuild", "eslint-nibble", "msw", - "playwright", + "patchright", + "patchright-core", "protobufjs", "rolldown", "sharp", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1b4e2094b629..d2559957375a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -205,7 +205,7 @@ importers: pac-proxy-agent: specifier: 9.0.1 version: 9.0.1 - playwright: + patchright: specifier: 1.59.1 version: 1.59.1 query-string: @@ -5216,6 +5216,16 @@ packages: parseley@0.13.1: resolution: {integrity: sha512-uNBJZzmb60l6p6VWLTmevizNAGnE0xoSf1n0B4q3ntegDNzcS68NRCcBDZTcyXHxt2XhBChsCuqj4M+nChvE/A==} + patchright-core@1.59.1: + resolution: {integrity: sha512-VthHtavFwFgs5VRalZtbacmjw8E7SPXPhGjfOakvjPsrJcIHl/i7K9lgKYpevy4F90+/GQSJiO0Mt58JB4+9HQ==} + engines: {node: '>=18'} + hasBin: true + + patchright@1.59.1: + resolution: {integrity: sha512-nrWFE1/U3qu9ybrgNFJt75iTiszvZ23Dn4S6UDSydbXJtbyBXRVGenHowKq6n7hmArGYPVTfuDqiXVH7avPkiw==} + engines: {node: '>=18'} + hasBin: true + path-browserify@1.0.1: resolution: {integrity: sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==} @@ -5285,16 +5295,6 @@ packages: resolution: {integrity: sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==} hasBin: true - playwright-core@1.59.1: - resolution: {integrity: sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==} - engines: {node: '>=18'} - hasBin: true - - playwright@1.59.1: - resolution: {integrity: sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==} - engines: {node: '>=18'} - hasBin: true - pluralize@8.0.0: resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==} engines: {node: '>=4'} @@ -11137,6 +11137,14 @@ snapshots: leac: 0.7.0 peberminta: 0.10.0 + patchright-core@1.59.1: {} + + patchright@1.59.1: + dependencies: + patchright-core: 1.59.1 + optionalDependencies: + fsevents: 2.3.2 + path-browserify@1.0.1: {} path-exists@4.0.0: {} @@ -11204,14 +11212,6 @@ snapshots: sonic-boom: 4.2.1 thread-stream: 4.0.0 - playwright-core@1.59.1: {} - - playwright@1.59.1: - dependencies: - playwright-core: 1.59.1 - optionalDependencies: - fsevents: 2.3.2 - pluralize@8.0.0: {} postcss@8.5.10: From f9820b6519e854b0df1036fd8c4a5df622d224e0 Mon Sep 17 00:00:00 2001 From: Tony Date: Mon, 11 May 2026 21:44:25 +0800 Subject: [PATCH 02/10] refactor: migrate setRequestInterception and waitUntil --- lib/bilibili-video-route.test.ts | 6 +++--- lib/routes/alternativeto/utils.ts | 6 +++--- lib/routes/apkpure/versions.ts | 6 +++--- lib/routes/bilibili/video.ts | 6 +++--- lib/routes/bluestacks/release.ts | 12 ++++++------ lib/routes/ccac/news.ts | 6 +++--- lib/routes/chinatimes/index.ts | 6 +++--- lib/routes/cjlu/yjsy/index.ts | 8 ++++---- lib/routes/cmde/index.ts | 12 ++++++------ lib/routes/colamanga/manga.ts | 7 +++---- lib/routes/cw/utils.ts | 12 ++++++------ lib/routes/dailypush/utils.ts | 8 ++++---- lib/routes/dcard/section.ts | 6 +++--- lib/routes/dcard/utils.ts | 6 +++--- lib/routes/douyin/hashtag.ts | 8 ++++---- lib/routes/douyin/live.ts | 9 ++++----- lib/routes/douyin/user.ts | 9 ++++----- lib/routes/fortnite/news.ts | 9 ++++----- lib/routes/gov/customs/utils.ts | 6 +++--- lib/routes/gov/hangzhou/zjzwfw.ts | 10 +++++----- lib/routes/gov/pbc/goutongjiaoliu.ts | 12 ++++++------ lib/routes/gov/pbc/trade-announcement.ts | 12 ++++++------ lib/routes/hkushop/vinyl-or-picture-lp.ts | 6 +++--- lib/routes/hottoys/index.ts | 7 +++---- lib/routes/ielts/index.ts | 6 +++--- lib/routes/iqiyi/video.ts | 6 +++--- lib/routes/iwara/ranking.ts | 8 ++++---- lib/routes/javdb/utils.ts | 12 ++++++------ lib/routes/javtrailers/utils.ts | 6 +++--- lib/routes/kuaishou/profile.ts | 9 ++++----- lib/routes/linkedin/posts.ts | 7 +++---- lib/routes/njust/utils.ts | 8 ++++---- lib/routes/nuaa/utils/pypasswaf.ts | 8 ++++---- lib/routes/nytimes/utils.ts | 6 +++--- lib/routes/parliament.uk/commonslibrary.ts | 6 +++--- lib/routes/parliament.uk/lordslibrary.ts | 6 +++--- lib/routes/perplexity/blog.ts | 12 ++++++------ lib/routes/perplexity/changelog.ts | 12 ++++++------ lib/routes/picnob/utils.ts | 6 +++--- lib/routes/picuki/profile.ts | 6 +++--- lib/routes/pincong/utils.ts | 6 +++--- lib/routes/researchgate/publications.ts | 12 ++++++------ lib/routes/science/blogs.ts | 7 +++---- lib/routes/sotwe/user.ts | 6 +++--- lib/routes/tiktok/user.ts | 8 ++++---- lib/routes/uchicago/current.ts | 12 ++++++------ lib/routes/uestc/auto.ts | 8 ++++---- lib/routes/uestc/cqe.ts | 8 ++++---- lib/routes/uestc/scse.ts | 8 ++++---- lib/routes/uestc/sice.ts | 8 ++++---- lib/routes/uestc/sise.ts | 8 ++++---- lib/routes/ups/track.ts | 10 ++++------ lib/routes/uraaka-joshi/uraaka-joshi-user.ts | 6 +++--- lib/routes/uraaka-joshi/uraaka-joshi.ts | 6 +++--- lib/routes/weibo/utils.ts | 11 +++++------ lib/routes/xiaohongshu/util.ts | 12 ++++++------ lib/routes/xsijishe/utils.ts | 6 +++--- lib/routes/xueqiu/cookies.ts | 6 +++--- lib/utils/playwright.mock.test.ts | 19 ------------------- lib/utils/playwright.worker.ts | 17 +---------------- 60 files changed, 229 insertions(+), 274 deletions(-) diff --git a/lib/bilibili-video-route.test.ts b/lib/bilibili-video-route.test.ts index 09f601237bd5..4cc71db1080c 100644 --- a/lib/bilibili-video-route.test.ts +++ b/lib/bilibili-video-route.test.ts @@ -11,15 +11,15 @@ const destroy = vi.fn(); const getPlaywrightPage = vi.fn(); const goto = vi.fn(); const on = vi.fn(); +const pageRoute = vi.fn(); const setCookie = vi.fn(); -const setRequestInterception = vi.fn(); const waitForResponse = vi.fn(); const page = { goto, on, + route: pageRoute, setCookie, - setRequestInterception, waitForResponse, }; @@ -66,8 +66,8 @@ describe('/bilibili/user/video/:uid', () => { getPlaywrightPage.mockReset(); goto.mockReset(); on.mockReset(); + pageRoute.mockReset(); setCookie.mockReset(); - setRequestInterception.mockReset(); waitForResponse.mockReset(); }); diff --git a/lib/routes/alternativeto/utils.ts b/lib/routes/alternativeto/utils.ts index 08fd66814fef..f7f74eddf774 100644 --- a/lib/routes/alternativeto/utils.ts +++ b/lib/routes/alternativeto/utils.ts @@ -6,9 +6,9 @@ const playwrightGet = (url, cache) => cache.tryGet(url, async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/apkpure/versions.ts b/lib/routes/apkpure/versions.ts index 911f05d2ae65..765dc6d66c0b 100644 --- a/lib/routes/apkpure/versions.ts +++ b/lib/routes/apkpure/versions.ts @@ -30,9 +30,9 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${link}`); await page.goto(link, { diff --git a/lib/routes/bilibili/video.ts b/lib/routes/bilibili/video.ts index 9b2a66bdfae5..dee905fcd858 100644 --- a/lib/routes/bilibili/video.ts +++ b/lib/routes/bilibili/video.ts @@ -184,9 +184,9 @@ async function fetchVideoListFromBrowser(uid: string): Promise { await applyCookie(page, cookie); } - await page.setRequestInterception(true); - page.on('request', (request) => { - allowedBrowserRequestTypes.has(request.resourceType()) ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + allowedBrowserRequestTypes.has(request.resourceType()) ? route.continue() : route.abort(); }); }, gotoConfig: { waitUntil: 'domcontentloaded' }, diff --git a/lib/routes/bluestacks/release.ts b/lib/routes/bluestacks/release.ts index bf3c823ecf1e..c54ecf19f71f 100644 --- a/lib/routes/bluestacks/release.ts +++ b/lib/routes/bluestacks/release.ts @@ -34,9 +34,9 @@ export const route: Route = { async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(pageUrl, { waitUntil: 'domcontentloaded', @@ -60,9 +60,9 @@ async function handler() { items.map((item) => cache.tryGet(item.link, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(item.link, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/ccac/news.ts b/lib/routes/ccac/news.ts index cf893ee8e771..68bfaa99ade8 100644 --- a/lib/routes/ccac/news.ts +++ b/lib/routes/ccac/news.ts @@ -38,9 +38,9 @@ async function handler(ctx) { const BASE = utils.langBase(lang); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(BASE, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/chinatimes/index.ts b/lib/routes/chinatimes/index.ts index bfeb05a87d93..2ebf7feb4682 100644 --- a/lib/routes/chinatimes/index.ts +++ b/lib/routes/chinatimes/index.ts @@ -67,9 +67,9 @@ async function handler(ctx) { list.map((item) => cache.tryGet(item.link, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${item.link}`); await page.goto(item.link, { diff --git a/lib/routes/cjlu/yjsy/index.ts b/lib/routes/cjlu/yjsy/index.ts index 68804d6e1930..0524ccf6f928 100644 --- a/lib/routes/cjlu/yjsy/index.ts +++ b/lib/routes/cjlu/yjsy/index.ts @@ -90,12 +90,12 @@ async function handler(ctx) { onBeforeLoad: async (page) => { await page.setExtraHTTPHeaders(headers); await page.setUserAgent(headers['User-Agent']); - await page.setRequestInterception(true); - page.on('request', (request) => { - allowedResourceTypes.has(request.resourceType()) ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + allowedResourceTypes.has(request.resourceType()) ? route.continue() : route.abort(); }); }, - gotoConfig: { waitUntil: 'networkidle2' }, + gotoConfig: { waitUntil: 'networkidle' }, }); const cookies = await browser.cookies(); diff --git a/lib/routes/cmde/index.ts b/lib/routes/cmde/index.ts index 0f7776e45237..6aa6e6476ecc 100644 --- a/lib/routes/cmde/index.ts +++ b/lib/routes/cmde/index.ts @@ -21,9 +21,9 @@ async function handler(ctx) { const browser = await playwright(); const data = await cache.tryGet(url, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', @@ -53,9 +53,9 @@ async function handler(ctx) { data.items.map((item) => cache.tryGet(item.link, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(item.link, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/colamanga/manga.ts b/lib/routes/colamanga/manga.ts index 626f1b0d8067..9601e8e8809c 100644 --- a/lib/routes/colamanga/manga.ts +++ b/lib/routes/colamanga/manga.ts @@ -47,10 +47,9 @@ async function handler(ctx: Context) { const page = await browser.newPage(); - await page.setRequestInterception(true); - - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${url}`); diff --git a/lib/routes/cw/utils.ts b/lib/routes/cw/utils.ts index f1f9a9859505..450886917408 100644 --- a/lib/routes/cw/utils.ts +++ b/lib/routes/cw/utils.ts @@ -33,9 +33,9 @@ const getCookie = async (browser, tryGet) => { if (!cookie) { cookie = await tryGet('cw:cookie', async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); logger.http(`Requesting ${baseUrl}/user/get/cookie-bar`); await page.goto(`${baseUrl}/user/get/cookie-bar`, { @@ -54,9 +54,9 @@ const parsePage = async (path, browser, ctx) => { const cookie = await getCookie(browser, cache.tryGet); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await setCookies(page, cookie, 'cw.com.tw'); logger.http(`Requesting ${pageUrl}`); diff --git a/lib/routes/dailypush/utils.ts b/lib/routes/dailypush/utils.ts index 1fd24ec3ad85..a276c93fb5cc 100644 --- a/lib/routes/dailypush/utils.ts +++ b/lib/routes/dailypush/utils.ts @@ -23,14 +23,14 @@ export interface ArticleItem { const allowedRequestTypes = new Set(['document']); async function preparePage(page: Page) { - await page.setRequestInterception(true); - page.on('request', (request) => { + await page.route('**/*', (route) => { + const request = route.request(); if (allowedRequestTypes.has(request.resourceType())) { - request.continue(); + route.continue(); return; } - request.abort(); + route.abort(); }); } diff --git a/lib/routes/dcard/section.ts b/lib/routes/dcard/section.ts index a32f14582405..103ecbe81e89 100644 --- a/lib/routes/dcard/section.ts +++ b/lib/routes/dcard/section.ts @@ -49,9 +49,9 @@ async function handler(ctx) { } const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.setExtraHTTPHeaders({ referer: `https://www.dcard.tw/f/${section}`, diff --git a/lib/routes/dcard/utils.ts b/lib/routes/dcard/utils.ts index 083c736d6dd6..8293e7d120c8 100644 --- a/lib/routes/dcard/utils.ts +++ b/lib/routes/dcard/utils.ts @@ -11,9 +11,9 @@ const ProcessFeed = async (items, cookies, browser, limit, cache) => { // try catch 处理被删除的帖子 try { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' || request.resourceType() === 'xhr' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); }); await page.setExtraHTTPHeaders({ referer: `https://www.dcard.tw/f/${i.forumAlias}/p/${i.id}`, diff --git a/lib/routes/douyin/hashtag.ts b/lib/routes/douyin/hashtag.ts index 47f3f01fa9e3..557ea5330d7f 100644 --- a/lib/routes/douyin/hashtag.ts +++ b/lib/routes/douyin/hashtag.ts @@ -49,10 +49,10 @@ async function handler(ctx) { async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); let awemeList = ''; - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); }); page.on('response', async (response) => { const request = response.request(); @@ -61,7 +61,7 @@ async function handler(ctx) { } }); await page.goto(tagUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); await page.waitForSelector('#RENDER_DATA'); const html = await page.evaluate(() => document.querySelector('#RENDER_DATA').textContent); diff --git a/lib/routes/douyin/live.ts b/lib/routes/douyin/live.ts index c1d3f7a438b4..ba517a3ba3bd 100644 --- a/lib/routes/douyin/live.ts +++ b/lib/routes/douyin/live.ts @@ -44,10 +44,9 @@ async function handler(ctx) { let roomInfo; const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'stylesheet' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'stylesheet' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); }); page.on('response', async (response) => { const request = response.request(); @@ -57,7 +56,7 @@ async function handler(ctx) { }); logger.http(`Requesting ${pageUrl}`); await page.goto(pageUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); await browser.close(); diff --git a/lib/routes/douyin/user.ts b/lib/routes/douyin/user.ts index bbfbb8edc50f..38ae08da8db1 100644 --- a/lib/routes/douyin/user.ts +++ b/lib/routes/douyin/user.ts @@ -52,10 +52,9 @@ async function handler(ctx) { let postData; const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); }); page.on('response', async (response) => { const request = response.request(); @@ -66,7 +65,7 @@ async function handler(ctx) { logger.http(`Requesting ${pageUrl}`); await page.goto(pageUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); await browser.close(); diff --git a/lib/routes/fortnite/news.ts b/lib/routes/fortnite/news.ts index a64faa80dc4d..00c352f65bca 100644 --- a/lib/routes/fortnite/news.ts +++ b/lib/routes/fortnite/news.ts @@ -43,11 +43,10 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - // intercept all requests - await page.setRequestInterception(true); // only document is allowed - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); // log manually (necessary for Playwright) @@ -55,7 +54,7 @@ async function handler(ctx) { let data; try { const response = await page.goto(apiUrl, { - waitUntil: 'networkidle0', + waitUntil: 'networkidle', }); if (!response) { throw new Error(`No response received from ${apiUrl}`); diff --git a/lib/routes/gov/customs/utils.ts b/lib/routes/gov/customs/utils.ts index 798cdf0225d1..854e654e0cda 100644 --- a/lib/routes/gov/customs/utils.ts +++ b/lib/routes/gov/customs/utils.ts @@ -3,9 +3,9 @@ const host = 'http://www.customs.gov.cn'; const playwrightGet = async (url, browser) => { const page = await browser.newPage(); await page.setExtraHTTPHeaders({ referer: host }); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/gov/hangzhou/zjzwfw.ts b/lib/routes/gov/hangzhou/zjzwfw.ts index 24a01b5c0725..6c5076e7da8f 100644 --- a/lib/routes/gov/hangzhou/zjzwfw.ts +++ b/lib/routes/gov/hangzhou/zjzwfw.ts @@ -4,17 +4,17 @@ export async function crawler(item: any, browser: any): Promise { try { let response = ''; const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { + await page.route('**/*', (route) => { + const request = route.request(); const resourceType = request.resourceType(); if (['document', 'script', 'stylesheet', 'xhr'].includes(resourceType)) { - request.continue(); + route.continue(); } else { - request.abort(); + route.abort(); } }); await page.goto(item.link, { - waitUntil: 'networkidle0', + waitUntil: 'networkidle', timeout: 29000, }); const selector = '.item-left .item .title .button'; diff --git a/lib/routes/gov/pbc/goutongjiaoliu.ts b/lib/routes/gov/pbc/goutongjiaoliu.ts index c87ecd3e8fe7..e913d9342ab5 100644 --- a/lib/routes/gov/pbc/goutongjiaoliu.ts +++ b/lib/routes/gov/pbc/goutongjiaoliu.ts @@ -35,9 +35,9 @@ async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(link, { waitUntil: 'domcontentloaded', @@ -60,9 +60,9 @@ async function handler() { list.map((item) => cache.tryGet(item.link, async () => { const detailPage = await browser.newPage(); - await detailPage.setRequestInterception(true); - detailPage.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await detailPage.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await detailPage.goto(item.link, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/gov/pbc/trade-announcement.ts b/lib/routes/gov/pbc/trade-announcement.ts index fbefde745579..323b17ebef97 100644 --- a/lib/routes/gov/pbc/trade-announcement.ts +++ b/lib/routes/gov/pbc/trade-announcement.ts @@ -29,9 +29,9 @@ async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(link, { waitUntil: 'domcontentloaded', @@ -53,9 +53,9 @@ async function handler() { list.map((item) => cache.tryGet(item.link, async () => { const detailPage = await browser.newPage(); - await detailPage.setRequestInterception(true); - detailPage.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await detailPage.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await detailPage.goto(item.link, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/hkushop/vinyl-or-picture-lp.ts b/lib/routes/hkushop/vinyl-or-picture-lp.ts index 0f80c6f6ee8a..b8b88d33567b 100644 --- a/lib/routes/hkushop/vinyl-or-picture-lp.ts +++ b/lib/routes/hkushop/vinyl-or-picture-lp.ts @@ -43,9 +43,9 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/hottoys/index.ts b/lib/routes/hottoys/index.ts index 5684067c9822..d7a4e0def8ce 100644 --- a/lib/routes/hottoys/index.ts +++ b/lib/routes/hottoys/index.ts @@ -29,11 +29,10 @@ async function handler() { // 打开一个新标签页 const page = await browser.newPage(); // 拦截所有请求 - await page.setRequestInterception(true); - - page.on('request', (request) => { + await page.route('**/*', (route) => { + const request = route.request(); // 在这次例子,我们只允许 HTML 请求 - request.resourceType() === 'document' ? request.continue() : request.abort(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(baseUrl, { diff --git a/lib/routes/ielts/index.ts b/lib/routes/ielts/index.ts index 6a347c3af307..f1a26b4a0eb6 100644 --- a/lib/routes/ielts/index.ts +++ b/lib/routes/ielts/index.ts @@ -30,9 +30,9 @@ async function handler() { async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(targetUrl, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/iqiyi/video.ts b/lib/routes/iqiyi/video.ts index f30d3a5a44da..582269b1cf49 100644 --- a/lib/routes/iqiyi/video.ts +++ b/lib/routes/iqiyi/video.ts @@ -42,9 +42,9 @@ async function handler(ctx) { link, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); logger.http(`Requesting ${link}`); await page.goto(link, { diff --git a/lib/routes/iwara/ranking.ts b/lib/routes/iwara/ranking.ts index fe2aef5abb10..42b66d6c2afc 100644 --- a/lib/routes/iwara/ranking.ts +++ b/lib/routes/iwara/ranking.ts @@ -60,13 +60,13 @@ async function handler(ctx) { async () => { const { page, destroy } = await getPlaywrightPage(url, { onBeforeLoad: async (page) => { - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' || request.resourceType() === 'fetch' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' || request.resourceType() === 'fetch' ? route.continue() : route.abort(); }); }, gotoConfig: { - waitUntil: 'networkidle0', + waitUntil: 'networkidle', }, }); diff --git a/lib/routes/javdb/utils.ts b/lib/routes/javdb/utils.ts index eaccdbef7c1a..e51e9be8341d 100644 --- a/lib/routes/javdb/utils.ts +++ b/lib/routes/javdb/utils.ts @@ -27,9 +27,9 @@ const ProcessItems = async (ctx, currentUrl, title) => { path: '/', }); } - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(url.href, { waitUntil: 'domcontentloaded', @@ -57,9 +57,9 @@ const ProcessItems = async (ctx, currentUrl, title) => { items.map((item) => cache.tryGet(item.link, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${item.link}`); await page.goto(item.link, { diff --git a/lib/routes/javtrailers/utils.ts b/lib/routes/javtrailers/utils.ts index 5ddea1f3133f..7d6cdfed20b3 100644 --- a/lib/routes/javtrailers/utils.ts +++ b/lib/routes/javtrailers/utils.ts @@ -30,9 +30,9 @@ export const parseList = (videos) => export const playwrightFetch = async (url: string, browser) => { const page = await browser.newPage(); await page.setExtraHTTPHeaders(headers); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${url}`); diff --git a/lib/routes/kuaishou/profile.ts b/lib/routes/kuaishou/profile.ts index cfd25b5fcfd3..b4a5564fe7f2 100644 --- a/lib/routes/kuaishou/profile.ts +++ b/lib/routes/kuaishou/profile.ts @@ -34,13 +34,12 @@ async function handler(ctx) { const promise = new Promise((res) => { resolve = res; }); - await page.setRequestInterception(true); - page.on('request', (req) => { - const resourceType = req.resourceType(); + await page.route('**/*', (route) => { + const resourceType = route.request().resourceType(); if (resourceType === 'image' || resourceType === 'media' || resourceType === 'font' || resourceType === 'stylesheet' || resourceType === 'ping') { - req.abort(); + route.abort(); } else { - req.continue(); + route.continue(); } }); page.on('response', async (res) => { diff --git a/lib/routes/linkedin/posts.ts b/lib/routes/linkedin/posts.ts index 7dd01b650fe5..2fbe6c6d92d4 100644 --- a/lib/routes/linkedin/posts.ts +++ b/lib/routes/linkedin/posts.ts @@ -29,10 +29,9 @@ export const route: Route = { // Puppeteer setup const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); const url = new URL(`${BASE_URL}/company/${company_id}`); diff --git a/lib/routes/njust/utils.ts b/lib/routes/njust/utils.ts index 7db782c84fde..77ce149e34de 100644 --- a/lib/routes/njust/utils.ts +++ b/lib/routes/njust/utils.ts @@ -13,12 +13,12 @@ async function getContent(url, pptr = false) { // get: () => undefined, // }); // }); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(url, { - waitUntil: 'networkidle0', + waitUntil: 'networkidle', }); const content = await page.content(); return content; diff --git a/lib/routes/nuaa/utils/pypasswaf.ts b/lib/routes/nuaa/utils/pypasswaf.ts index ca34af3458e7..94d0c59ff27c 100644 --- a/lib/routes/nuaa/utils/pypasswaf.ts +++ b/lib/routes/nuaa/utils/pypasswaf.ts @@ -8,13 +8,13 @@ import { getCookies } from '@/utils/playwright-utils'; export default async function getCookie(host) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(host, { - waitUntil: 'networkidle0', + waitUntil: 'networkidle', }); const cookie = await getCookies(page); diff --git a/lib/routes/nytimes/utils.ts b/lib/routes/nytimes/utils.ts index 2db5209d5139..6c12f409d55a 100644 --- a/lib/routes/nytimes/utils.ts +++ b/lib/routes/nytimes/utils.ts @@ -18,9 +18,9 @@ const ProcessImage = ($, e) => { const PuppeterGetter = async (ctx, browser, link) => { const result = await cache.tryGet(`nyt: ${link}`, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(link, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/parliament.uk/commonslibrary.ts b/lib/routes/parliament.uk/commonslibrary.ts index 1981f4eb2707..b382c3f54a90 100644 --- a/lib/routes/parliament.uk/commonslibrary.ts +++ b/lib/routes/parliament.uk/commonslibrary.ts @@ -28,9 +28,9 @@ async function handler(ctx) { const url = `${baseUrl}/type/${topic}/`; const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/parliament.uk/lordslibrary.ts b/lib/routes/parliament.uk/lordslibrary.ts index 8f548d297a5b..dd85f0539c63 100644 --- a/lib/routes/parliament.uk/lordslibrary.ts +++ b/lib/routes/parliament.uk/lordslibrary.ts @@ -28,9 +28,9 @@ async function handler(ctx) { const url = `${baseUrl}/type/${topic}/`; const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/perplexity/blog.ts b/lib/routes/perplexity/blog.ts index 64acdeaf5220..a8f24d538daa 100644 --- a/lib/routes/perplexity/blog.ts +++ b/lib/routes/perplexity/blog.ts @@ -40,9 +40,9 @@ async function handler(ctx: Context) { const { page, destroy, browser } = await getPlaywrightPage(rootUrl, { onBeforeLoad: async (page) => { - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); }, }); @@ -114,9 +114,9 @@ async function handler(ctx: Context) { return (await cache.tryGet(item.link, async () => { const contentPage = await browser.newPage(); - await contentPage.setRequestInterception(true); - contentPage.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await contentPage.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await contentPage.goto(item.link!, { diff --git a/lib/routes/perplexity/changelog.ts b/lib/routes/perplexity/changelog.ts index 2f0f0c6e1b11..ffdc1f93645d 100644 --- a/lib/routes/perplexity/changelog.ts +++ b/lib/routes/perplexity/changelog.ts @@ -18,9 +18,9 @@ export const handler = async (ctx: Context): Promise => { const { page, destroy, browser } = await getPlaywrightPage(targetUrl, { onBeforeLoad: async (page) => { - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); }, }); @@ -104,9 +104,9 @@ export const handler = async (ctx: Context): Promise => { const contentPage = await browser.newPage(); // Set request interception for this page - await contentPage.setRequestInterception(true); - contentPage.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await contentPage.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); // Navigate to the item link diff --git a/lib/routes/picnob/utils.ts b/lib/routes/picnob/utils.ts index 496c7a8412d9..76609b70f2c1 100644 --- a/lib/routes/picnob/utils.ts +++ b/lib/routes/picnob/utils.ts @@ -4,9 +4,9 @@ const playwrightGet = async (url) => { let data; const { destroy } = await getPlaywrightPage(url, { onBeforeLoad: async (page) => { - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); page.on('response', async (response) => { data = await (response.request().url().includes('/api/posts') ? response.json() : response.text()); diff --git a/lib/routes/picuki/profile.ts b/lib/routes/picuki/profile.ts index b556d54e3e35..b53e48f85726 100644 --- a/lib/routes/picuki/profile.ts +++ b/lib/routes/picuki/profile.ts @@ -88,9 +88,9 @@ async function handler(ctx) { const { page, destroy } = await getPlaywrightPage(profileUrl, { onBeforeLoad: async (page) => { const expectResourceTypes = new Set(['document', 'script', 'xhr', 'fetch']); - await page.setRequestInterception(true); - page.on('request', (request) => { - expectResourceTypes.has(request.resourceType()) ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + expectResourceTypes.has(request.resourceType()) ? route.continue() : route.abort(); }); }, }); diff --git a/lib/routes/pincong/utils.ts b/lib/routes/pincong/utils.ts index 256aea609207..0897e5b53838 100644 --- a/lib/routes/pincong/utils.ts +++ b/lib/routes/pincong/utils.ts @@ -6,9 +6,9 @@ const playwrightGet = (url, cache) => cache.tryGet(url, async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/researchgate/publications.ts b/lib/routes/researchgate/publications.ts index 296640a034b3..89b21ed8e62e 100644 --- a/lib/routes/researchgate/publications.ts +++ b/lib/routes/researchgate/publications.ts @@ -25,9 +25,9 @@ async function handler(ctx) { const currentUrl = `${rootUrl}/profile/${id}`; const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(currentUrl); const response = await page.evaluate(() => document.documentElement.innerHTML); @@ -50,9 +50,9 @@ async function handler(ctx) { list.map((item) => cache.tryGet(item.link, async () => { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(item.link); const detailResponse = await page.evaluate(() => document.documentElement.innerHTML); diff --git a/lib/routes/science/blogs.ts b/lib/routes/science/blogs.ts index 37d64fcbbf98..9205b478e4a3 100644 --- a/lib/routes/science/blogs.ts +++ b/lib/routes/science/blogs.ts @@ -42,10 +42,9 @@ async function handler(ctx) { async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(link, { diff --git a/lib/routes/sotwe/user.ts b/lib/routes/sotwe/user.ts index 3507d7fad535..97a3e30276ad 100644 --- a/lib/routes/sotwe/user.ts +++ b/lib/routes/sotwe/user.ts @@ -63,9 +63,9 @@ async function handler(ctx) { async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - ['document', 'script', 'xhr', 'fetch'].includes(request.resourceType()) ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + ['document', 'script', 'xhr', 'fetch'].includes(request.resourceType()) ? route.continue() : route.abort(); }); const apiUrl = `${baseUrl}/api/v3/user/${id}/`; logger.http(`Requesting ${apiUrl}`); diff --git a/lib/routes/tiktok/user.ts b/lib/routes/tiktok/user.ts index 5d12b229fcca..f6b05d2beab5 100644 --- a/lib/routes/tiktok/user.ts +++ b/lib/routes/tiktok/user.ts @@ -45,10 +45,10 @@ async function handler(ctx) { async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); let itemList = { itemList: [] }; - page.on('request', (request) => { - ['document', 'script', 'xhr', 'fetch'].includes(request.resourceType()) ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + ['document', 'script', 'xhr', 'fetch'].includes(request.resourceType()) ? route.continue() : route.abort(); }); page.on('response', async (response) => { const request = response.request(); @@ -57,7 +57,7 @@ async function handler(ctx) { } }); await page.goto(`${baseUrl}/${user}`, { - waitUntil: 'networkidle0', + waitUntil: 'networkidle', }); const pageHtml = await page.content(); diff --git a/lib/routes/uchicago/current.ts b/lib/routes/uchicago/current.ts index d0f0595f17e7..7a600619c756 100644 --- a/lib/routes/uchicago/current.ts +++ b/lib/routes/uchicago/current.ts @@ -37,9 +37,9 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${link}`); await page.goto(link, { @@ -59,9 +59,9 @@ async function handler(ctx) { cache.tryGet(item.link, async () => { const page = await browser.newPage(); await setCookies(page, cookies, 'journals.uchicago.edu'); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); logger.http(`Requesting ${item.link}`); await page.goto(item.link, { diff --git a/lib/routes/uestc/auto.ts b/lib/routes/uestc/auto.ts index 2b861f3760f1..0191c69d1c53 100644 --- a/lib/routes/uestc/auto.ts +++ b/lib/routes/uestc/auto.ts @@ -34,12 +34,12 @@ export const route: Route = { async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(baseIndexUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); const content = await page.content(); await browser.close(); diff --git a/lib/routes/uestc/cqe.ts b/lib/routes/uestc/cqe.ts index 4ba17e54f545..d1b50285e155 100644 --- a/lib/routes/uestc/cqe.ts +++ b/lib/routes/uestc/cqe.ts @@ -54,12 +54,12 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(baseUrl + pageUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); const content = await page.content(); await browser.close(); diff --git a/lib/routes/uestc/scse.ts b/lib/routes/uestc/scse.ts index d50a94507b36..a693a275b224 100644 --- a/lib/routes/uestc/scse.ts +++ b/lib/routes/uestc/scse.ts @@ -48,12 +48,12 @@ export const route: Route = { async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(baseIndexUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); const content = await page.content(); await browser.close(); diff --git a/lib/routes/uestc/sice.ts b/lib/routes/uestc/sice.ts index 664b0decf091..d26ab8229db6 100644 --- a/lib/routes/uestc/sice.ts +++ b/lib/routes/uestc/sice.ts @@ -35,12 +35,12 @@ export const route: Route = { async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(baseIndexUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); const content = await page.content(); await browser.close(); diff --git a/lib/routes/uestc/sise.ts b/lib/routes/uestc/sise.ts index 688db6c63f52..1e2f3f66afe4 100644 --- a/lib/routes/uestc/sise.ts +++ b/lib/routes/uestc/sise.ts @@ -69,12 +69,12 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(baseUrl, { - waitUntil: 'networkidle2', + waitUntil: 'networkidle', }); const content = await page.content(); await browser.close(); diff --git a/lib/routes/ups/track.ts b/lib/routes/ups/track.ts index 5421c6d30dde..9a995cda147c 100644 --- a/lib/routes/ups/track.ts +++ b/lib/routes/ups/track.ts @@ -29,14 +29,12 @@ async function handler(ctx) { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - // skip loading images, stylesheets, and fonts - page.on('request', (request) => { - if (['image', 'stylesheet', 'font', 'ping', 'fetch'].includes(request.resourceType())) { - request.abort(); + await page.route('**/*', (route) => { + if (['image', 'stylesheet', 'font', 'ping', 'fetch'].includes(route.request().resourceType())) { + route.abort(); } else { - request.continue(); + route.continue(); } }); diff --git a/lib/routes/uraaka-joshi/uraaka-joshi-user.ts b/lib/routes/uraaka-joshi/uraaka-joshi-user.ts index 799a3b8ad8c4..8d003e827852 100644 --- a/lib/routes/uraaka-joshi/uraaka-joshi-user.ts +++ b/lib/routes/uraaka-joshi/uraaka-joshi-user.ts @@ -40,9 +40,9 @@ async function handler(ctx) { async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? route.continue() : route.abort(); }); page.on('requestfinished', async (request) => { if (request.url() === link && request.response().status() === 403) { diff --git a/lib/routes/uraaka-joshi/uraaka-joshi.ts b/lib/routes/uraaka-joshi/uraaka-joshi.ts index e2e2df4bd246..b3577070f5c2 100644 --- a/lib/routes/uraaka-joshi/uraaka-joshi.ts +++ b/lib/routes/uraaka-joshi/uraaka-joshi.ts @@ -28,9 +28,9 @@ async function handler() { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? route.continue() : route.abort(); }); page.on('requestfinished', async (request) => { if (request.url() === link && request.response().status() === 403) { diff --git a/lib/routes/weibo/utils.ts b/lib/routes/weibo/utils.ts index c4df2ea2fd19..853034af8191 100644 --- a/lib/routes/weibo/utils.ts +++ b/lib/routes/weibo/utils.ts @@ -84,21 +84,20 @@ const weiboUtils = { onBeforeLoad: async (page) => { const expectResourceTypes = new Set(['document', 'script', 'xhr', 'fetch']); await page.setUserAgent(weiboUtils.apiHeaders['User-Agent']); - await page.setRequestInterception(true); - page.on('request', (request) => { + await page.route('**/*', (route) => { + const request = route.request(); // 1st: initial request, 302 to visitor.passport.weibo.cn; 2nd: auth ok if (!expectResourceTypes.has(request.resourceType()) || times >= 2) { - request.abort(); + route.abort(); return; } if (request.url().startsWith(url)) { times++; } - request.continue(); + route.continue(); }); }, - // networkidle2 returns too early if the connection is slow - gotoConfig: { waitUntil: 'networkidle0' }, + gotoConfig: { waitUntil: 'networkidle' }, }); const cookies: string = await getCookies(page, 'weibo.cn'); await destroy(); diff --git a/lib/routes/xiaohongshu/util.ts b/lib/routes/xiaohongshu/util.ts index 29d2573e936c..59e7fe2c4a27 100644 --- a/lib/routes/xiaohongshu/util.ts +++ b/lib/routes/xiaohongshu/util.ts @@ -65,9 +65,9 @@ const getUser = (url, cache) => // Use Playwright const { page, destroy } = await getPlaywrightPage(url, { onBeforeLoad: async (page) => { - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' || request.resourceType() === 'other' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' || request.resourceType() === 'other' ? route.continue() : route.abort(); }); }, }); @@ -131,9 +131,9 @@ const getBoard = (url, cache) => const browser = await playwright(); try { const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); }); logger.http(`Requesting ${url}`); await page.goto(url); diff --git a/lib/routes/xsijishe/utils.ts b/lib/routes/xsijishe/utils.ts index f74a47a772aa..16757e769f01 100644 --- a/lib/routes/xsijishe/utils.ts +++ b/lib/routes/xsijishe/utils.ts @@ -19,9 +19,9 @@ const playwrightGet = async (url: string, browser: Browser, waitForSelector = '. await setCookies(page, options.cookie, 'xsijishe.com'); } - await page.setRequestInterception(true); - page.on('request', (request) => { - expectResourceTypes.has(request.resourceType()) ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + expectResourceTypes.has(request.resourceType()) ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/xueqiu/cookies.ts b/lib/routes/xueqiu/cookies.ts index 076310122616..d9a95b7320f8 100644 --- a/lib/routes/xueqiu/cookies.ts +++ b/lib/routes/xueqiu/cookies.ts @@ -9,9 +9,9 @@ export const parseToken = (link: string) => async () => { const browser = await playwright(); const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(link, { waitUntil: 'domcontentloaded', diff --git a/lib/utils/playwright.mock.test.ts b/lib/utils/playwright.mock.test.ts index 35f8cae1e1af..9fa401ca5c2d 100644 --- a/lib/utils/playwright.mock.test.ts +++ b/lib/utils/playwright.mock.test.ts @@ -221,23 +221,4 @@ describe('getPlaywrightPage (mocked)', () => { expect(proxyMock.markProxyFailed).toHaveBeenCalledWith(currentProxy.uri); }); - - it('maps legacy networkidle waits to playwright networkidle', async () => { - resetMocks(); - launch.mockResolvedValue(browser); - page.goto.mockResolvedValue(undefined); - proxyMock.getCurrentProxy.mockReturnValue(null); - - const getPlaywrightPage = await loadPlaywright(); - const goto = page.goto; - await getPlaywrightPage('https://example.com', { - gotoConfig: { - waitUntil: 'networkidle2', - }, - }); - - expect(goto).toHaveBeenCalledWith('https://example.com', { - waitUntil: 'networkidle', - }); - }); }); diff --git a/lib/utils/playwright.worker.ts b/lib/utils/playwright.worker.ts index 6be5bcca5609..90581455edf8 100644 --- a/lib/utils/playwright.worker.ts +++ b/lib/utils/playwright.worker.ts @@ -8,9 +8,7 @@ import logger from './logger'; type SetCookieParam = Parameters[0][number]; type Cookie = Awaited>[number]; -type GotoOptions = Parameters[1] & { - waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'networkidle0' | 'networkidle2'; -}; +type GotoOptions = Parameters[1]; type FinishedRequest = { response: () => { @@ -24,7 +22,6 @@ type RequestFinishedHandler = (request: FinishedRequest) => Promise | void export type Page = PlaywrightPage & { authenticate: (credentials: { password?: string; username?: string }) => Promise; cookies: (urls?: string | string[]) => Promise; - goto: (url: string, options?: GotoOptions) => ReturnType; on: ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; setCookie: (...cookies: SetCookieParam[]) => Promise; setUserAgent: (userAgent: string) => Promise; @@ -50,16 +47,6 @@ const getBrowserBinding = () => { return browserBinding; }; -const normalizeWaitUntil = (waitUntil: GotoOptions['waitUntil']) => (waitUntil === 'networkidle0' || waitUntil === 'networkidle2' ? 'networkidle' : waitUntil); - -const normalizeGotoOptions = (options?: GotoOptions): Parameters[1] | undefined => - options - ? { - ...options, - waitUntil: normalizeWaitUntil(options.waitUntil), - } - : options; - const withDefaultCookiePath = (cookie: SetCookieParam): SetCookieParam => ('domain' in cookie && !('path' in cookie) ? { ...cookie, path: '/' } : cookie); const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightResponse | null): FinishedRequest => ({ @@ -74,10 +61,8 @@ const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightR const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { const compatPage = page as Page; - const originalGoto = page.goto.bind(page); const originalOn = page.on.bind(page); - compatPage.goto = (url, options) => originalGoto(url, normalizeGotoOptions(options)); compatPage.cookies = (urls) => context.cookies(urls); compatPage.setCookie = async (...cookies) => { await context.addCookies(cookies.map((cookie) => withDefaultCookiePath(cookie))); From 3ca7bec66529cdd328358d60d83e01ad9c2cc414 Mon Sep 17 00:00:00 2001 From: Tony Date: Tue, 12 May 2026 00:53:36 +0800 Subject: [PATCH 03/10] refactor: migrate to CloakBrowser --- .github/workflows/issue-command.yml | 2 +- .github/workflows/test.yml | 4 +- Dockerfile | 56 ++++++++++++------------- lib/utils/playwright.mock.test.ts | 7 +++- lib/utils/playwright.ts | 18 +++++--- package.json | 6 +-- pnpm-lock.yaml | 64 +++++++++++++++++------------ 7 files changed, 87 insertions(+), 70 deletions(-) diff --git a/.github/workflows/issue-command.yml b/.github/workflows/issue-command.yml index 27fc494ee88c..155452f852b3 100644 --- a/.github/workflows/issue-command.yml +++ b/.github/workflows/issue-command.yml @@ -120,7 +120,7 @@ jobs: cache: 'pnpm' - name: Install dependencies (pnpm) - run: pnpm i && pnpm rb && pnpm exec patchright install chromium + run: pnpm i && pnpm rb && pnpm exec cloakbrowser install - name: Fetch affected routes id: fetch-route diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 26298f71d103..fefb09f949a8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: - name: Install dependencies (pnpm) run: pnpm i - name: Run postinstall script for dependencies - run: pnpm rb && pnpm exec patchright install chromium + run: pnpm rb && pnpm exec cloakbrowser install - name: Build routes run: pnpm build - name: Build worker routes @@ -91,7 +91,7 @@ jobs: run: pnpm build - name: Install bundled Chromium if: ${{ matrix.chromium.dependency == '' }} - run: pnpm exec patchright install chromium + run: pnpm exec cloakbrowser install - name: Install Chromium if: ${{ matrix.chromium.dependency != '' }} # 'chromium-browser' from Ubuntu APT repo is a dummy package. Its version (85.0.4183.83) means diff --git a/Dockerfile b/Dockerfile index 0654acb9915a..a8e7ff05c37d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,7 +40,7 @@ WORKDIR /ver COPY ./package.json /app/ RUN \ set -ex && \ - grep -Po '(?<="patchright": ")[^\s"]*(?=")' /app/package.json | tee /ver/.patchright_version && \ + grep -Po '(?<="cloakbrowser": ")[^\s"]*(?=")' /app/package.json | tee /ver/.cloakbrowser_version && \ grep -Po '(?<="@vercel/nft": ")[^\s"]*(?=")' /app/package.json | tee /ver/.nft_version && \ grep -Po '(?<="fs-extra": ")[^\s"]*(?=")' /app/package.json | tee /ver/.fs_extra_version @@ -88,29 +88,29 @@ FROM node:24-bookworm-slim AS chromium-downloader # Yeah, downloading Chromium never needs those dependencies below. WORKDIR /app -COPY --from=dep-version-parser /ver/.patchright_version /app/.patchright_version +COPY --from=dep-version-parser /ver/.cloakbrowser_version /app/.cloakbrowser_version ARG TARGETPLATFORM ARG USE_CHINA_NPM_REGISTRY=0 ARG PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 -# The official recommended way to use Patchright on x86(_64) is to use the bundled browser. +# CloakBrowser publishes prebuilt patched Chromium for both linux/amd64 and linux/arm64, +ENV CLOAKBROWSER_CACHE_DIR=/app/node_modules/.cache/cloakbrowser RUN \ set -ex ; \ - if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = 0 ] && [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ + if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = 0 ]; then \ if [ "$USE_CHINA_NPM_REGISTRY" = 1 ]; then \ npm config set registry https://registry.npmmirror.com && \ yarn config set registry https://registry.npmmirror.com && \ pnpm config set registry https://registry.npmmirror.com ; \ fi; \ - echo 'Downloading Chromium...' && \ + echo 'Downloading CloakBrowser ...' && \ unset PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD && \ - export PLAYWRIGHT_BROWSERS_PATH=/app/node_modules/.cache/ms-playwright && \ corepack enable pnpm && \ - pnpm --allow-build=patchright --allow-build=patchright-core add patchright@$(cat /app/.patchright_version) --save-prod && \ + pnpm add cloakbrowser@$(cat /app/.cloakbrowser_version) --save-prod && \ pnpm rb && \ - pnpm exec patchright install chromium ; \ + pnpm exec cloakbrowser install ; \ else \ - mkdir -p /app/node_modules/.cache/ms-playwright ; \ + mkdir -p "$CLOAKBROWSER_CACHE_DIR" ; \ fi; # --------------------------------------------------------------------------------------------------------------------- @@ -121,6 +121,7 @@ LABEL org.opencontainers.image.authors="https://github.com/DIYgod/RSSHub" ENV NODE_ENV=production ENV TZ=Asia/Shanghai +ENV CLOAKBROWSER_AUTO_UPDATE=false WORKDIR /app @@ -129,7 +130,7 @@ ARG TARGETPLATFORM ARG PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 # https://playwright.dev/docs/docker#introduction # https://www.debian.org/releases/bookworm/amd64/release-notes/ch-information.en.html#noteworthy-obsolete-packages -# On arm/arm64, install Chromium from the distribution repositories. +# CloakBrowser ships prebuilt patched Chromium for both linux/amd64 and linux/arm64 RUN \ set -ex && \ apt-get update && \ @@ -137,32 +138,29 @@ RUN \ dumb-init git curl \ ; \ if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = 0 ]; then \ - if [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ - apt-get install -yq --no-install-recommends \ - ca-certificates fonts-liberation wget xdg-utils \ - libasound2 libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libcairo2 libcups2 libdbus-1-3 libdrm2 \ - libexpat1 libgbm1 libglib2.0-0 libnspr4 libnss3 libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 \ - libxdamage1 libxext6 libxfixes3 libxkbcommon0 libxrandr2 \ - ; \ - else \ - apt-get install -yq --no-install-recommends \ - chromium \ - && \ - echo "CHROMIUM_EXECUTABLE_PATH=$(which chromium)" | tee /app/.env ; \ - fi; \ + apt-get install -yq --no-install-recommends \ + ca-certificates fonts-liberation wget xdg-utils \ + libasound2 libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libcairo2 libcups2 libdbus-1-3 libdrm2 \ + libexpat1 libgbm1 libglib2.0-0 libnspr4 libnss3 libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 \ + libxdamage1 libxext6 libxfixes3 libxkbcommon0 libxrandr2 \ + ; \ fi; \ rm -rf /var/lib/apt/lists/* -COPY --from=chromium-downloader /app/node_modules/.cache/ms-playwright /app/node_modules/.cache/ms-playwright +ENV CLOAKBROWSER_CACHE_DIR=/app/node_modules/.cache/cloakbrowser +COPY --from=chromium-downloader /app/node_modules/.cache/cloakbrowser /app/node_modules/.cache/cloakbrowser RUN \ set -ex && \ - if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = 0 ] && [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ - echo 'Verifying Chromium installation...' && \ - _chrome_path=$(find /app/node_modules/.cache/ms-playwright/ -name chrome -xtype f -executable | head -n1) && \ - echo "CHROMIUM_EXECUTABLE_PATH=$_chrome_path" | tee /app/.env && \ + if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = 0 ]; then \ + echo 'Verifying CloakBrowser Chromium installation...' && \ + _chrome_path=$(find /app/node_modules/.cache/cloakbrowser/ -name chrome -xtype f -executable | head -n1) && \ + if [ -z "$_chrome_path" ]; then \ + echo "!!! CloakBrowser binary not found !!!" && \ + exit 1 ; \ + fi; \ if ldd "$_chrome_path" | grep "not found"; then \ - echo "!!! Chromium has unmet shared libs !!!" && \ + echo "!!! CloakBrowser has unmet shared libs !!!" && \ exit 1 ; \ else \ echo "Awesome! All shared libs are met!" ; \ diff --git a/lib/utils/playwright.mock.test.ts b/lib/utils/playwright.mock.test.ts index 9fa401ca5c2d..761a1ca73d07 100644 --- a/lib/utils/playwright.mock.test.ts +++ b/lib/utils/playwright.mock.test.ts @@ -38,14 +38,17 @@ const proxyMock = { getDispatcherForProxy: vi.fn(), }; -vi.mock('patchright', () => ({ +vi.mock('playwright-core', () => ({ chromium: { connect, connectOverCDP, - launch, }, })); +vi.mock('cloakbrowser', () => ({ + launch, +})); + vi.mock('@/utils/proxy', () => ({ default: proxyMock, })); diff --git a/lib/utils/playwright.ts b/lib/utils/playwright.ts index 5e4f13f637b3..1f30fe84c82f 100644 --- a/lib/utils/playwright.ts +++ b/lib/utils/playwright.ts @@ -1,5 +1,6 @@ -import type { Browser as PlaywrightBrowser, BrowserContext, BrowserContextOptions, LaunchOptions, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse } from 'patchright'; -import { chromium } from 'patchright'; +import { launch } from 'cloakbrowser'; +import type { Browser as PlaywrightBrowser, BrowserContext, BrowserContextOptions, LaunchOptions, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse } from 'playwright-core'; +import { chromium } from 'playwright-core'; import { config } from '@/config'; @@ -74,7 +75,8 @@ const getProxyOptions = (currentProxy: ProxyState | null | undefined) => { const COMMON_LAUNCH_ARGS = ['--no-sandbox', '--disable-setuid-sandbox', '--window-position=0,0', '--ignore-certificate-errors', '--ignore-certificate-errors-spki-list']; -// Patchright already patches playwright's default args (e.g. injects --disable-blink-features=AutomationControlled and strips --enable-automation), so we don't add those manually. +// CloakBrowser auto-downloads its own patched Chromium (both linux/amd64 and linux/arm64 prebuilds) and applies anti-detection at the C++ source level. +// executablePath is still forwarded when CHROMIUM_EXECUTABLE_PATH is set so users can point at a custom binary — in that case CloakBrowser's C++ patches don't apply. const getLaunchOptions = (currentProxy?: ProxyState | null): LaunchOptions => ({ args: COMMON_LAUNCH_ARGS, executablePath: config.chromiumExecutablePath || undefined, @@ -83,8 +85,8 @@ const getLaunchOptions = (currentProxy?: ProxyState | null): LaunchOptions => ({ }); // Browserless accepts launch options as a `launch` URL query parameter (URL-encoded JSON). -// (Patchright's own launch-server uses `launch-options` — RSSHub's WS_ENDPOINT targets browserless, so we emit `launch`.) -// The browserless schema also differs from patchright's LaunchOptions: no `executablePath`, and `ignoreHTTPSErrors` is renamed to `acceptInsecureCerts`. +// (Playwright's own launch-server uses `launch-options` — RSSHub's WS_ENDPOINT targets browserless, so we emit `launch`.) +// The browserless schema differs from playwright LaunchOptions: no `executablePath`, and `ignoreHTTPSErrors` is renamed to `acceptInsecureCerts`. type BrowserlessLaunchOptions = { acceptInsecureCerts?: boolean; args?: string[]; @@ -187,7 +189,11 @@ const createCompatBrowser = async (browser: PlaywrightBrowser, contextOptions: B }; const launchBrowser = async (currentProxy?: ProxyState | null) => { - const browser = config.playwrightWSEndpoint ? await chromium.connect(getBrowserlessEndpoint(config.playwrightWSEndpoint, toBrowserlessLaunchOptions(currentProxy))) : await chromium.launch(getLaunchOptions(currentProxy)); + // When WS_ENDPOINT is set we connect to self-hosted browserless via playwright-core's CDP/WS; + // otherwise we launch CloakBrowser's bundled stealth Chromium locally. + const browser = config.playwrightWSEndpoint + ? await chromium.connect(getBrowserlessEndpoint(config.playwrightWSEndpoint, toBrowserlessLaunchOptions(currentProxy))) + : ((await launch(getLaunchOptions(currentProxy))) as PlaywrightBrowser); return createCompatBrowser(browser, getContextOptions()); }; diff --git a/package.json b/package.json index 27a92dcb93a1..8aa4477fa89f 100644 --- a/package.json +++ b/package.json @@ -78,6 +78,7 @@ "aes-js": "3.1.2", "cheerio": "1.2.0", "city-timezones": "1.3.4", + "cloakbrowser": "0.3.27", "cross-env": "10.1.0", "crypto-js": "4.2.0", "currency-symbol-map": "5.1.0", @@ -115,7 +116,7 @@ "otplib": "13.4.0", "p-map": "7.0.4", "pac-proxy-agent": "9.0.1", - "patchright": "1.59.1", + "playwright-core": "1.59.1", "query-string": "9.3.1", "rate-limiter-flexible": "11.1.0", "re2js": "2.6.1", @@ -235,8 +236,7 @@ "esbuild", "eslint-nibble", "msw", - "patchright", - "patchright-core", + "playwright-core", "protobufjs", "rolldown", "sharp", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d2559957375a..0ea147c2938a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -94,6 +94,9 @@ importers: city-timezones: specifier: 1.3.4 version: 1.3.4 + cloakbrowser: + specifier: 0.3.27 + version: 0.3.27(playwright-core@1.59.1)(socks-proxy-agent@10.0.0) cross-env: specifier: 10.1.0 version: 10.1.0 @@ -205,7 +208,7 @@ importers: pac-proxy-agent: specifier: 9.0.1 version: 9.0.1 - patchright: + playwright-core: specifier: 1.59.1 version: 1.59.1 query-string: @@ -3443,6 +3446,25 @@ packages: resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} engines: {node: '>=12'} + cloakbrowser@0.3.27: + resolution: {integrity: sha512-EjTI+Ux8XaCDHKDOLFOt6Tsv2g6AhQPQjIL1GqAazcxk+BAg8FfFxtSHYdVcvldsXSW8RpPEz8N3AX54vEjzBg==} + engines: {node: '>=20.0.0'} + hasBin: true + peerDependencies: + mmdb-lib: '>=2.0.0' + playwright-core: '>=1.40.0' + puppeteer-core: '>=21.0.0' + socks-proxy-agent: '>=10.0.0' + peerDependenciesMeta: + mmdb-lib: + optional: true + playwright-core: + optional: true + puppeteer-core: + optional: true + socks-proxy-agent: + optional: true + cluster-key-slot@1.1.2: resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==} engines: {node: '>=0.10.0'} @@ -4113,11 +4135,6 @@ packages: resolution: {integrity: sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg==} engines: {node: '>=14.14'} - fsevents@2.3.2: - resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} - engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} - os: [darwin] - fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -5216,16 +5233,6 @@ packages: parseley@0.13.1: resolution: {integrity: sha512-uNBJZzmb60l6p6VWLTmevizNAGnE0xoSf1n0B4q3ntegDNzcS68NRCcBDZTcyXHxt2XhBChsCuqj4M+nChvE/A==} - patchright-core@1.59.1: - resolution: {integrity: sha512-VthHtavFwFgs5VRalZtbacmjw8E7SPXPhGjfOakvjPsrJcIHl/i7K9lgKYpevy4F90+/GQSJiO0Mt58JB4+9HQ==} - engines: {node: '>=18'} - hasBin: true - - patchright@1.59.1: - resolution: {integrity: sha512-nrWFE1/U3qu9ybrgNFJt75iTiszvZ23Dn4S6UDSydbXJtbyBXRVGenHowKq6n7hmArGYPVTfuDqiXVH7avPkiw==} - engines: {node: '>=18'} - hasBin: true - path-browserify@1.0.1: resolution: {integrity: sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==} @@ -5295,6 +5302,11 @@ packages: resolution: {integrity: sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==} hasBin: true + playwright-core@1.59.1: + resolution: {integrity: sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==} + engines: {node: '>=18'} + hasBin: true + pluralize@8.0.0: resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==} engines: {node: '>=4'} @@ -8961,6 +8973,13 @@ snapshots: strip-ansi: 6.0.1 wrap-ansi: 7.0.0 + cloakbrowser@0.3.27(playwright-core@1.59.1)(socks-proxy-agent@10.0.0): + dependencies: + tar: 7.5.13 + optionalDependencies: + playwright-core: 1.59.1 + socks-proxy-agent: 10.0.0 + cluster-key-slot@1.1.2: {} color-convert@2.0.1: @@ -9716,9 +9735,6 @@ snapshots: jsonfile: 6.2.1 universalify: 2.0.1 - fsevents@2.3.2: - optional: true - fsevents@2.3.3: optional: true @@ -11137,14 +11153,6 @@ snapshots: leac: 0.7.0 peberminta: 0.10.0 - patchright-core@1.59.1: {} - - patchright@1.59.1: - dependencies: - patchright-core: 1.59.1 - optionalDependencies: - fsevents: 2.3.2 - path-browserify@1.0.1: {} path-exists@4.0.0: {} @@ -11212,6 +11220,8 @@ snapshots: sonic-boom: 4.2.1 thread-stream: 4.0.0 + playwright-core@1.59.1: {} + pluralize@8.0.0: {} postcss@8.5.10: From 8d3158bdc9bfdb09d41d4b577dafe40d5a5bff14 Mon Sep 17 00:00:00 2001 From: Tony Date: Tue, 12 May 2026 20:32:34 +0800 Subject: [PATCH 04/10] refactor: drop puppeteer cookie/requestfinished shims Migrate page.cookies/setCookie and browser.cookies/setCookie call sites to the native context APIs (page.context().cookies/addCookies), rewrite requestfinished handlers to await request.response() instead of consuming the synchronous puppeteer-shaped payload, and remove the now-redundant browser.close context-cleanup wrapper. Playwright already handles all of these natively; the shims existed only for the puppeteer -> playwright migration. --- lib/bilibili-video-route.test.ts | 3 - lib/routes/bilibili/cache.ts | 2 +- lib/routes/bilibili/video.ts | 2 +- lib/routes/cjlu/yjsy/index.ts | 4 +- lib/routes/dcard/section.ts | 2 +- lib/routes/dcard/utils.ts | 4 +- lib/routes/javdb/utils.ts | 14 +++-- lib/routes/twitter/api/web-api/login.ts | 2 +- lib/routes/uraaka-joshi/uraaka-joshi-user.ts | 6 +- lib/routes/uraaka-joshi/uraaka-joshi.ts | 6 +- lib/utils/playwright-utils.ts | 6 +- lib/utils/playwright.mock.test.ts | 27 --------- lib/utils/playwright.ts | 64 +------------------- lib/utils/playwright.worker.ts | 63 +------------------ 14 files changed, 33 insertions(+), 172 deletions(-) diff --git a/lib/bilibili-video-route.test.ts b/lib/bilibili-video-route.test.ts index 4cc71db1080c..1da126cda7ea 100644 --- a/lib/bilibili-video-route.test.ts +++ b/lib/bilibili-video-route.test.ts @@ -12,14 +12,12 @@ const getPlaywrightPage = vi.fn(); const goto = vi.fn(); const on = vi.fn(); const pageRoute = vi.fn(); -const setCookie = vi.fn(); const waitForResponse = vi.fn(); const page = { goto, on, route: pageRoute, - setCookie, waitForResponse, }; @@ -67,7 +65,6 @@ describe('/bilibili/user/video/:uid', () => { goto.mockReset(); on.mockReset(); pageRoute.mockReset(); - setCookie.mockReset(); waitForResponse.mockReset(); }); diff --git a/lib/routes/bilibili/cache.ts b/lib/routes/bilibili/cache.ts index 213727ec6635..c8bc573544bc 100644 --- a/lib/routes/bilibili/cache.ts +++ b/lib/routes/bilibili/cache.ts @@ -51,7 +51,7 @@ const getCookie = (disableConfig = false) => { waitForRequest = new Promise((resolve) => { page.on('requestfinished', async (request) => { if (request.url() === 'https://api.bilibili.com/x/web-interface/nav') { - const cookies = await page.cookies(); + const cookies = await page.context().cookies(); let cookieString = cookies.map((cookie) => `${cookie.name}=${cookie.value}`).join('; '); cookieString = cookieString.replace(/b_lsid=[0-9A-F]+_[0-9A-F]+/, `b_lsid=${utils.lsid()}`); resolve(cookieString); diff --git a/lib/routes/bilibili/video.ts b/lib/routes/bilibili/video.ts index dee905fcd858..c34a821c3b29 100644 --- a/lib/routes/bilibili/video.ts +++ b/lib/routes/bilibili/video.ts @@ -140,7 +140,7 @@ async function applyCookie(page: Page, cookie: string) { .filter((item) => item !== undefined); if (cookies.length > 0) { - await page.setCookie(...cookies); + await page.context().addCookies(cookies); } } diff --git a/lib/routes/cjlu/yjsy/index.ts b/lib/routes/cjlu/yjsy/index.ts index 0524ccf6f928..ed16abb69076 100644 --- a/lib/routes/cjlu/yjsy/index.ts +++ b/lib/routes/cjlu/yjsy/index.ts @@ -86,7 +86,7 @@ async function handler(ctx) { const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 10; const url = `${host}index/${cate}.htm`; - const { page, destroy, browser } = await getPlaywrightPage(url, { + const { page, destroy } = await getPlaywrightPage(url, { onBeforeLoad: async (page) => { await page.setExtraHTTPHeaders(headers); await page.setUserAgent(headers['User-Agent']); @@ -98,7 +98,7 @@ async function handler(ctx) { gotoConfig: { waitUntil: 'networkidle' }, }); - const cookies = await browser.cookies(); + const cookies = await page.context().cookies(); const cookieString = cookies.map((c) => `${c.name}=${c.value}`).join('; '); const response = await page.content(); diff --git a/lib/routes/dcard/section.ts b/lib/routes/dcard/section.ts index 103ecbe81e89..1de2b184dbc4 100644 --- a/lib/routes/dcard/section.ts +++ b/lib/routes/dcard/section.ts @@ -60,7 +60,7 @@ async function handler(ctx) { await page.goto(`${api}&limit=100`); await page.waitForSelector('body > pre'); const response = await page.evaluate(() => document.querySelector('body > pre').textContent); - const cookies = await cache.tryGet('dcard:cookies', () => page.cookies(), 3600, false); + const cookies = await cache.tryGet('dcard:cookies', () => page.context().cookies(), 3600, false); await page.close(); const data = JSON.parse(response); diff --git a/lib/routes/dcard/utils.ts b/lib/routes/dcard/utils.ts index 8293e7d120c8..be7f0b709069 100644 --- a/lib/routes/dcard/utils.ts +++ b/lib/routes/dcard/utils.ts @@ -18,11 +18,11 @@ const ProcessFeed = async (items, cookies, browser, limit, cache) => { await page.setExtraHTTPHeaders({ referer: `https://www.dcard.tw/f/${i.forumAlias}/p/${i.id}`, }); - await page.setCookie(...cookies); + await page.context().addCookies(cookies); await page.goto(url); await page.waitForSelector('body > pre'); response = await page.evaluate(() => document.querySelector('body > pre').textContent); - newCookies = await page.cookies(); + newCookies = await page.context().cookies(); await page.close(); const data = JSON.parse(response); diff --git a/lib/routes/javdb/utils.ts b/lib/routes/javdb/utils.ts index e51e9be8341d..fc72f927a794 100644 --- a/lib/routes/javdb/utils.ts +++ b/lib/routes/javdb/utils.ts @@ -20,12 +20,14 @@ const ProcessItems = async (ctx, currentUrl, title) => { const { page, destroy, browser } = await getPlaywrightPage('about:blank'); if (config.javdb.session) { - await browser.setCookie({ - name: '_jdb_session', - value: config.javdb.session, - domain, - path: '/', - }); + await page.context().addCookies([ + { + name: '_jdb_session', + value: config.javdb.session, + domain, + path: '/', + }, + ]); } await page.route('**/*', (route) => { const request = route.request(); diff --git a/lib/routes/twitter/api/web-api/login.ts b/lib/routes/twitter/api/web-api/login.ts index e3e001b76af7..468aa23f384d 100644 --- a/lib/routes/twitter/api/web-api/login.ts +++ b/lib/routes/twitter/api/web-api/login.ts @@ -54,7 +54,7 @@ async function login({ username, password, authenticationSecret }) { logger.error(`twitter debug: twitter username ${username} login failed: messageprompt-suspended-prompt`); resolve(''); } - const cookies = await page.cookies(); + const cookies = await page.context().cookies(); for (const cookie of cookies) { cookieJar.setCookieSync(`${cookie.name}=${cookie.value}`, 'https://x.com'); } diff --git a/lib/routes/uraaka-joshi/uraaka-joshi-user.ts b/lib/routes/uraaka-joshi/uraaka-joshi-user.ts index 8d003e827852..b0cd4f074c64 100644 --- a/lib/routes/uraaka-joshi/uraaka-joshi-user.ts +++ b/lib/routes/uraaka-joshi/uraaka-joshi-user.ts @@ -45,7 +45,11 @@ async function handler(ctx) { request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? route.continue() : route.abort(); }); page.on('requestfinished', async (request) => { - if (request.url() === link && request.response().status() === 403) { + if (request.url() !== link) { + return; + } + const response = await request.response(); + if (response?.status() === 403) { await page.close(); } }); diff --git a/lib/routes/uraaka-joshi/uraaka-joshi.ts b/lib/routes/uraaka-joshi/uraaka-joshi.ts index b3577070f5c2..70c34a5320aa 100644 --- a/lib/routes/uraaka-joshi/uraaka-joshi.ts +++ b/lib/routes/uraaka-joshi/uraaka-joshi.ts @@ -33,7 +33,11 @@ async function handler() { request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? route.continue() : route.abort(); }); page.on('requestfinished', async (request) => { - if (request.url() === link && request.response().status() === 403) { + if (request.url() !== link) { + return; + } + const response = await request.response(); + if (response?.status() === 403) { await page.close(); } }); diff --git a/lib/utils/playwright-utils.ts b/lib/utils/playwright-utils.ts index 433c4bd67d6d..5451e1e66d18 100644 --- a/lib/utils/playwright-utils.ts +++ b/lib/utils/playwright-utils.ts @@ -27,7 +27,7 @@ const parseCookieArray = (cookies, domainFilter?: string | RegExp) => { const constructCookieArray = (cookieStr, domain) => cookieStr.split('; ').map((item) => { const [name, value] = item.split('='); - return value === undefined ? { name: '', value: name, domain } : { name, value, domain }; + return value === undefined ? { name: '', value: name, domain, path: '/' } : { name, value, domain, path: '/' }; }); /** @@ -40,7 +40,7 @@ const constructCookieArray = (cookieStr, domain) => */ const setCookies = async (page, cookieStr, domain) => { const cookies = constructCookieArray(cookieStr, domain); - await page.setCookie(...cookies); + await page.context().addCookies(cookies); }; /** @@ -51,7 +51,7 @@ const setCookies = async (page, cookieStr, domain) => { * @return {Promise} Cookie-header-style cookie string */ const getCookies = async (page, domainFilter?: string) => { - const cookies = await page.cookies(); + const cookies = await page.context().cookies(); return parseCookieArray(cookies, domainFilter); }; diff --git a/lib/utils/playwright.mock.test.ts b/lib/utils/playwright.mock.test.ts index 761a1ca73d07..61f2e17f1d50 100644 --- a/lib/utils/playwright.mock.test.ts +++ b/lib/utils/playwright.mock.test.ts @@ -180,33 +180,6 @@ describe('getPlaywrightPage (mocked)', () => { } }); - it('handles requestfinished events after the remote page closes', async () => { - resetMocks(); - launch.mockResolvedValue(browser); - page.goto.mockResolvedValue(undefined); - proxyMock.getCurrentProxy.mockReturnValue(null); - - const getPlaywrightPage = await loadPlaywright(); - const handler = vi.fn(); - const rawOn = page.on; - await getPlaywrightPage('https://example.com', { - onBeforeLoad: (page) => { - page.on('requestfinished', handler); - }, - }); - - const finishedHandler = rawOn.mock.calls.find(([event]) => event === 'requestfinished')?.[1]; - await finishedHandler?.({ - response: vi.fn().mockRejectedValue(new Error('closed')), - url: () => 'https://example.com/api', - }); - - expect(handler).toHaveBeenCalledTimes(1); - const finishedRequest = handler.mock.calls[0][0]; - expect(finishedRequest.response()).toBeNull(); - expect(finishedRequest.url()).toBe('https://example.com/api'); - }); - it('marks proxy failed when navigation throws with multi-proxy', async () => { resetMocks(); launch.mockResolvedValue(browser); diff --git a/lib/utils/playwright.ts b/lib/utils/playwright.ts index 1f30fe84c82f..86856be7e8f5 100644 --- a/lib/utils/playwright.ts +++ b/lib/utils/playwright.ts @@ -1,5 +1,5 @@ import { launch } from 'cloakbrowser'; -import type { Browser as PlaywrightBrowser, BrowserContext, BrowserContextOptions, LaunchOptions, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse } from 'playwright-core'; +import type { Browser as PlaywrightBrowser, BrowserContext, BrowserContextOptions, LaunchOptions, Page as PlaywrightPage } from 'playwright-core'; import { chromium } from 'playwright-core'; import { config } from '@/config'; @@ -7,38 +7,20 @@ import { config } from '@/config'; import logger from './logger'; import proxy from './proxy'; -type SetCookieParam = Parameters[0][number]; -type Cookie = Awaited>[number]; type GotoOptions = Parameters[1]; type ProxyState = NonNullable>; -type FinishedRequest = { - response: () => { - status: () => number; - } | null; - url: () => string; -}; - -type RequestFinishedHandler = (request: FinishedRequest) => Promise | void; - export type Page = PlaywrightPage & { authenticate: (credentials: { password?: string; username?: string }) => Promise; - cookies: (urls?: string | string[]) => Promise; - on: ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; - setCookie: (...cookies: SetCookieParam[]) => Promise; setUserAgent: (userAgent: string) => Promise; }; export type Browser = PlaywrightBrowser & { - cookies: (urls?: string | string[]) => Promise; newPage: () => Promise; - setCookie: (...cookies: SetCookieParam[]) => Promise; userAgent: () => string; }; -const withDefaultCookiePath = (cookie: SetCookieParam): SetCookieParam => ('domain' in cookie && !('path' in cookie) ? { ...cookie, path: '/' } : cookie); - const proxyServerFromUrl = (proxyUrl: URL) => { const protocol = proxyUrl.protocol.replace('socks5h:', 'socks5:').replace('socks4a:', 'socks4:'); return `${protocol}//${proxyUrl.host}`; @@ -109,24 +91,9 @@ const getContextOptions = (): BrowserContextOptions => ({ ignoreHTTPSErrors: true, }); -const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightResponse | null): FinishedRequest => ({ - response: () => - response - ? { - status: () => response.status(), - } - : null, - url: () => request.url(), -}); - const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { const compatPage = page as Page; - const originalOn = page.on.bind(page); - compatPage.cookies = (urls) => context.cookies(urls); - compatPage.setCookie = async (...cookies) => { - await context.addCookies(cookies.map((cookie) => withDefaultCookiePath(cookie))); - }; compatPage.authenticate = async () => {}; compatPage.setUserAgent = async (userAgent) => { const contextWithCDP = context as BrowserContext & { @@ -145,22 +112,6 @@ const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { 'User-Agent': userAgent, }); }; - compatPage.on = ((event: string, handler: (...args: any[]) => any) => { - if (event === 'requestfinished') { - originalOn(event, async (request) => { - let response: PlaywrightResponse | null = null; - try { - response = await request.response(); - } catch { - // The remote browser may close before Playwright resolves the response. - } - await (handler as RequestFinishedHandler)(createFinishedRequest(request, response)); - }); - return compatPage; - } - originalOn(event, handler); - return compatPage; - }) as Page['on']; return compatPage; }; @@ -168,22 +119,9 @@ const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { const createCompatBrowser = async (browser: PlaywrightBrowser, contextOptions: BrowserContextOptions): Promise => { const context = await browser.newContext(contextOptions); const compatBrowser = browser as Browser; - const originalClose = browser.close.bind(browser); compatBrowser.newPage = async () => patchPage(await context.newPage(), context); - compatBrowser.setCookie = async (...cookies) => { - await context.addCookies(cookies.map((cookie) => withDefaultCookiePath(cookie))); - }; - compatBrowser.cookies = (urls) => context.cookies(urls); compatBrowser.userAgent = () => config.ua; - compatBrowser.close = async (options) => { - try { - await context.close(); - } catch { - // Ignore already-closed contexts. - } - await originalClose(options); - }; return compatBrowser; }; diff --git a/lib/utils/playwright.worker.ts b/lib/utils/playwright.worker.ts index 90581455edf8..b949ee6224eb 100644 --- a/lib/utils/playwright.worker.ts +++ b/lib/utils/playwright.worker.ts @@ -1,36 +1,20 @@ // Worker-compatible Playwright using Cloudflare Browser Run. -import type { Browser as PlaywrightBrowser, BrowserContext, Page as PlaywrightPage, Request as PlaywrightRequest, Response as PlaywrightResponse } from '@cloudflare/playwright'; +import type { Browser as PlaywrightBrowser, Page as PlaywrightPage } from '@cloudflare/playwright'; import { launch } from '@cloudflare/playwright'; import { config } from '@/config'; import logger from './logger'; -type SetCookieParam = Parameters[0][number]; -type Cookie = Awaited>[number]; type GotoOptions = Parameters[1]; -type FinishedRequest = { - response: () => { - status: () => number; - } | null; - url: () => string; -}; - -type RequestFinishedHandler = (request: FinishedRequest) => Promise | void; - export type Page = PlaywrightPage & { authenticate: (credentials: { password?: string; username?: string }) => Promise; - cookies: (urls?: string | string[]) => Promise; - on: ((event: 'requestfinished', handler: RequestFinishedHandler) => Page) & PlaywrightPage['on']; - setCookie: (...cookies: SetCookieParam[]) => Promise; setUserAgent: (userAgent: string) => Promise; }; export type Browser = PlaywrightBrowser & { - cookies: (urls?: string | string[]) => Promise; newPage: () => Promise; - setCookie: (...cookies: SetCookieParam[]) => Promise; userAgent: () => string; }; @@ -47,43 +31,15 @@ const getBrowserBinding = () => { return browserBinding; }; -const withDefaultCookiePath = (cookie: SetCookieParam): SetCookieParam => ('domain' in cookie && !('path' in cookie) ? { ...cookie, path: '/' } : cookie); - -const createFinishedRequest = (request: PlaywrightRequest, response: PlaywrightResponse | null): FinishedRequest => ({ - response: () => - response - ? { - status: () => response.status(), - } - : null, - url: () => request.url(), -}); - -const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { +const patchPage = (page: PlaywrightPage): Page => { const compatPage = page as Page; - const originalOn = page.on.bind(page); - compatPage.cookies = (urls) => context.cookies(urls); - compatPage.setCookie = async (...cookies) => { - await context.addCookies(cookies.map((cookie) => withDefaultCookiePath(cookie))); - }; compatPage.authenticate = async () => {}; compatPage.setUserAgent = async (userAgent) => { await page.setExtraHTTPHeaders({ 'User-Agent': userAgent, }); }; - compatPage.on = ((event: string, handler: (...args: any[]) => any) => { - if (event === 'requestfinished') { - originalOn(event, async (request) => { - const response = await request.response(); - await (handler as RequestFinishedHandler)(createFinishedRequest(request, response)); - }); - return compatPage; - } - originalOn(event, handler); - return compatPage; - }) as Page['on']; return compatPage; }; @@ -93,22 +49,9 @@ const createCompatBrowser = async (browser: PlaywrightBrowser): Promise ignoreHTTPSErrors: true, }); const compatBrowser = browser as Browser; - const originalClose = browser.close.bind(browser); - compatBrowser.newPage = async () => patchPage(await context.newPage(), context); - compatBrowser.setCookie = async (...cookies) => { - await context.addCookies(cookies.map((cookie) => withDefaultCookiePath(cookie))); - }; - compatBrowser.cookies = (urls) => context.cookies(urls); + compatBrowser.newPage = async () => patchPage(await context.newPage()); compatBrowser.userAgent = () => config.ua; - compatBrowser.close = async (options) => { - try { - await context.close(); - } catch { - // Ignore already-closed contexts. - } - await originalClose(options); - }; return compatBrowser; }; From a7f24020a8676bc8821024895410a2f84067da90 Mon Sep 17 00:00:00 2001 From: Tony Date: Tue, 12 May 2026 21:09:14 +0800 Subject: [PATCH 05/10] refactor: remove page.authenticate, page.setUserAgent, browser.newPage(), browser.userAgent() shims update cookie utility tests --- lib/utils/playwright-utils.test.ts | 40 +++++++++--------- lib/utils/playwright.mock.test.ts | 4 +- lib/utils/playwright.test.ts | 31 +++++++------- lib/utils/playwright.ts | 68 ++++++------------------------ lib/utils/playwright.worker.ts | 57 ++++++------------------- lib/utils/puppeteer.ts | 2 +- lib/utils/puppeteer.worker.ts | 2 +- 7 files changed, 67 insertions(+), 137 deletions(-) diff --git a/lib/utils/playwright-utils.test.ts b/lib/utils/playwright-utils.test.ts index 4151605d5b8c..de7683a23fbf 100644 --- a/lib/utils/playwright-utils.test.ts +++ b/lib/utils/playwright-utils.test.ts @@ -1,15 +1,15 @@ +import type { BrowserContext } from 'playwright-core'; import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { Browser } from '@/utils/playwright'; import playwright from '@/utils/playwright'; import { constructCookieArray, getCookies, parseCookieArray, setCookies } from '@/utils/playwright-utils'; -let browser: Browser | null = null; +let context: BrowserContext | null = null; afterEach(async () => { - if (browser) { - await browser.close(); - browser = null; + if (context) { + await context.close(); + context = null; } vi.resetModules(); @@ -17,19 +17,19 @@ afterEach(async () => { describe('browser cookie utils', () => { const cookieArrayExampleCom = [ - { name: 'foobar', value: '', domain: 'example.com' }, - { name: 'foo', value: 'bar', domain: 'example.com' }, - { name: 'baz', value: 'qux', domain: 'example.com' }, + { name: 'foobar', value: '', domain: 'example.com', path: '/' }, + { name: 'foo', value: 'bar', domain: 'example.com', path: '/' }, + { name: 'baz', value: 'qux', domain: 'example.com', path: '/' }, ]; const cookieArraySubExampleCom = [ - { name: 'barfoo', value: '', domain: 'sub.example.com' }, - { name: 'bar', value: 'foo', domain: 'sub.example.com' }, - { name: 'qux', value: 'baz', domain: 'sub.example.com' }, + { name: 'barfoo', value: '', domain: 'sub.example.com', path: '/' }, + { name: 'bar', value: 'foo', domain: 'sub.example.com', path: '/' }, + { name: 'qux', value: 'baz', domain: 'sub.example.com', path: '/' }, ]; const cookieArrayRsshubTest = [ - { name: '', value: 'rsshub', domain: 'rsshub.test' }, - { name: 'rsshub', value: '', domain: 'rsshub.test' }, - { name: 'test', value: 'rsshub', domain: 'rsshub.test' }, + { name: '', value: 'rsshub', domain: 'rsshub.test', path: '/' }, + { name: 'rsshub', value: '', domain: 'rsshub.test', path: '/' }, + { name: 'test', value: 'rsshub', domain: 'rsshub.test', path: '/' }, ]; const cookieArrayAll = [...cookieArrayExampleCom, ...cookieArraySubExampleCom, ...cookieArrayRsshubTest]; @@ -69,8 +69,8 @@ describe('browser cookie utils', () => { }); it('getCookies httpbingo', async () => { - browser = await playwright(); - const page = await browser.newPage(); + context = await playwright(); + const page = await context.newPage(); await page.goto('https://httpbingo.org/cookies/set?foo=bar&baz=qux', { waitUntil: 'domcontentloaded', }); @@ -78,8 +78,8 @@ describe('browser cookie utils', () => { }, 45000); it('setCookies httpbingo', async () => { - browser = await playwright(); - const page = await browser.newPage(); + context = await playwright(); + const page = await context.newPage(); // httpbingo.org cannot recognize cookies with empty name properly, so we cannot use cookieStrAll here await setCookies(page, cookieStrExampleCom, 'httpbingo.org'); await page.goto('https://httpbingo.org/cookies', { @@ -90,8 +90,8 @@ describe('browser cookie utils', () => { }, 45000); it('setCookies & getCookies example.org', async () => { - browser = await playwright(); - const page = await browser.newPage(); + context = await playwright(); + const page = await context.newPage(); // we can use cookieStrAll here! await setCookies(page, cookieStrAll, 'example.org'); await page.goto('https://example.org', { diff --git a/lib/utils/playwright.mock.test.ts b/lib/utils/playwright.mock.test.ts index 61f2e17f1d50..1b5ba86699a0 100644 --- a/lib/utils/playwright.mock.test.ts +++ b/lib/utils/playwright.mock.test.ts @@ -93,7 +93,7 @@ describe('getPlaywrightPage (mocked)', () => { const getPlaywrightPage = await loadPlaywright(); const onBeforeLoad = vi.fn(); - const close = browser.close; + const contextClose = context.close; const result = await getPlaywrightPage('https://example.com', { noGoto: true, onBeforeLoad, @@ -110,7 +110,7 @@ describe('getPlaywrightPage (mocked)', () => { expect(onBeforeLoad).toHaveBeenCalled(); await result.destroy(); - expect(close).toHaveBeenCalled(); + expect(contextClose).toHaveBeenCalled(); }); it('merges browserless launch options with existing ws endpoint launch param', async () => { diff --git a/lib/utils/playwright.test.ts b/lib/utils/playwright.test.ts index e5ec76855698..71ef1aba5622 100644 --- a/lib/utils/playwright.test.ts +++ b/lib/utils/playwright.test.ts @@ -1,15 +1,14 @@ +import type { BrowserContext } from 'playwright-core'; import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { Browser } from '@/utils/playwright'; - import wait from './wait'; -let browser: Browser | null = null; +let context: BrowserContext | null = null; afterEach(async () => { - if (browser) { - await browser.close(); - browser = null; + if (context) { + await context.close(); + context = null; } delete process.env.PROXY_URI; @@ -25,9 +24,10 @@ afterEach(async () => { describe('playwright', () => { it('playwright run', async () => { const { default: playwright } = await import('./playwright'); - browser = await playwright(); + context = await playwright(); + const browser = context.browser(); const startTime = Date.now(); - const page = await browser.newPage(); + const page = await context.newPage(); await page.goto('https://www.google.com', { waitUntil: 'domcontentloaded', }); @@ -35,13 +35,13 @@ describe('playwright', () => { const html = await page.evaluate(() => document.body.innerHTML); expect(html.length).toBeGreaterThan(0); - expect(browser.isConnected()).toBe(true); + expect(browser?.isConnected()).toBe(true); const sleepTime = 31 * 1000 - (Date.now() - startTime); if (sleepTime > 0) { await wait(sleepTime); } - expect(browser.isConnected()).toBe(false); - browser = null; + expect(browser?.isConnected()).toBe(false); + context = null; }, 45000); }); @@ -50,18 +50,19 @@ describe('getPlaywrightPage', () => { const { getPlaywrightPage } = await import('./playwright'); const playwright = await getPlaywrightPage('https://www.google.com'); const page = playwright.page; - browser = playwright.browser; + context = playwright.context; + const browser = context.browser(); const startTime = Date.now(); const html = await page.evaluate(() => document.body.innerHTML); expect(html.length).toBeGreaterThan(0); - expect(browser.isConnected()).toBe(true); + expect(browser?.isConnected()).toBe(true); const sleepTime = 31 * 1000 - (Date.now() - startTime); if (sleepTime > 0) { await wait(sleepTime); } - expect(browser.isConnected()).toBe(false); - browser = null; + expect(browser?.isConnected()).toBe(false); + context = null; }, 45000); }); diff --git a/lib/utils/playwright.ts b/lib/utils/playwright.ts index 86856be7e8f5..4fed4c4678c5 100644 --- a/lib/utils/playwright.ts +++ b/lib/utils/playwright.ts @@ -11,15 +11,7 @@ type GotoOptions = Parameters[1]; type ProxyState = NonNullable>; -export type Page = PlaywrightPage & { - authenticate: (credentials: { password?: string; username?: string }) => Promise; - setUserAgent: (userAgent: string) => Promise; -}; - -export type Browser = PlaywrightBrowser & { - newPage: () => Promise; - userAgent: () => string; -}; +export type Page = PlaywrightPage; const proxyServerFromUrl = (proxyUrl: URL) => { const protocol = proxyUrl.protocol.replace('socks5h:', 'socks5:').replace('socks4a:', 'socks4:'); @@ -91,48 +83,14 @@ const getContextOptions = (): BrowserContextOptions => ({ ignoreHTTPSErrors: true, }); -const patchPage = (page: PlaywrightPage, context: BrowserContext): Page => { - const compatPage = page as Page; - - compatPage.authenticate = async () => {}; - compatPage.setUserAgent = async (userAgent) => { - const contextWithCDP = context as BrowserContext & { - newCDPSession?: (page: PlaywrightPage) => Promise<{ - detach: () => Promise; - send: (method: string, params?: Record) => Promise; - }>; - }; - if (contextWithCDP.newCDPSession) { - const session = await contextWithCDP.newCDPSession(page); - await session.send('Network.setUserAgentOverride', { userAgent }); - await session.detach(); - return; - } - await page.setExtraHTTPHeaders({ - 'User-Agent': userAgent, - }); - }; - - return compatPage; -}; - -const createCompatBrowser = async (browser: PlaywrightBrowser, contextOptions: BrowserContextOptions): Promise => { - const context = await browser.newContext(contextOptions); - const compatBrowser = browser as Browser; - - compatBrowser.newPage = async () => patchPage(await context.newPage(), context); - compatBrowser.userAgent = () => config.ua; - - return compatBrowser; -}; - const launchBrowser = async (currentProxy?: ProxyState | null) => { // When WS_ENDPOINT is set we connect to self-hosted browserless via playwright-core's CDP/WS; // otherwise we launch CloakBrowser's bundled stealth Chromium locally. const browser = config.playwrightWSEndpoint ? await chromium.connect(getBrowserlessEndpoint(config.playwrightWSEndpoint, toBrowserlessLaunchOptions(currentProxy))) : ((await launch(getLaunchOptions(currentProxy))) as PlaywrightBrowser); - return createCompatBrowser(browser, getContextOptions()); + const context = await browser.newContext(getContextOptions()); + return { browser, context }; }; // Merge our launch options into the existing `launch` query parameter so endpoint-level options @@ -152,20 +110,20 @@ const getBrowserlessEndpoint = (endpoint: string, launchOptions: BrowserlessLaun return endpointURL.toString(); }; -const scheduleClose = (browser: Browser, timeout = 30000) => { +const scheduleClose = (browser: PlaywrightBrowser, timeout = 30000) => { setTimeout(() => { void browser.close(); }, timeout); }; /** - * @returns Playwright browser + * @returns Playwright browser context (native `newPage()` shares state across calls) */ const outPlaywright = async () => { const currentProxy = proxy.getCurrentProxy(); - const browser = await launchBrowser(currentProxy && proxy.proxyObj.url_regex === '.*' ? currentProxy : null); + const { browser, context } = await launchBrowser(currentProxy && proxy.proxyObj.url_regex === '.*' ? currentProxy : null); scheduleClose(browser); - return browser; + return context; }; export default outPlaywright; @@ -182,7 +140,7 @@ export const getPlaywrightPage = async ( closeTimeout?: number; gotoConfig?: GotoOptions; noGoto?: boolean; - onBeforeLoad?: (page: Page, browser?: Browser) => Promise | void; + onBeforeLoad?: (page: Page, context?: BrowserContext) => Promise | void; } = {} ) => { let allowProxy = false; @@ -201,16 +159,16 @@ export const getPlaywrightPage = async ( const currentProxy = proxy.getCurrentProxy(); const currentProxyState = currentProxy && allowProxy ? currentProxy : null; const hasProxy = Boolean(getProxyOptions(currentProxyState).proxy); - const browser = await launchBrowser(currentProxyState); + const { browser, context } = await launchBrowser(currentProxyState); scheduleClose(browser, instanceOptions.closeTimeout); - const page = await browser.newPage(); + const page = await context.newPage(); if (hasProxy && currentProxyState) { logger.debug(`Proxying request in playwright via ${currentProxyState.uri}: ${url}`); } if (instanceOptions.onBeforeLoad) { - await instanceOptions.onBeforeLoad(page, browser); + await instanceOptions.onBeforeLoad(page, context); } if (!instanceOptions.noGoto) { @@ -227,9 +185,9 @@ export const getPlaywrightPage = async ( } return { - browser, + context, destroy: async () => { - await browser.close(); + await context.close(); }, page, }; diff --git a/lib/utils/playwright.worker.ts b/lib/utils/playwright.worker.ts index b949ee6224eb..b9413a3295aa 100644 --- a/lib/utils/playwright.worker.ts +++ b/lib/utils/playwright.worker.ts @@ -2,21 +2,11 @@ import type { Browser as PlaywrightBrowser, Page as PlaywrightPage } from '@cloudflare/playwright'; import { launch } from '@cloudflare/playwright'; -import { config } from '@/config'; - import logger from './logger'; type GotoOptions = Parameters[1]; -export type Page = PlaywrightPage & { - authenticate: (credentials: { password?: string; username?: string }) => Promise; - setUserAgent: (userAgent: string) => Promise; -}; - -export type Browser = PlaywrightBrowser & { - newPage: () => Promise; - userAgent: () => string; -}; +export type Page = PlaywrightPage; let browserBinding: any = null; @@ -31,46 +21,27 @@ const getBrowserBinding = () => { return browserBinding; }; -const patchPage = (page: PlaywrightPage): Page => { - const compatPage = page as Page; - - compatPage.authenticate = async () => {}; - compatPage.setUserAgent = async (userAgent) => { - await page.setExtraHTTPHeaders({ - 'User-Agent': userAgent, - }); - }; - - return compatPage; -}; - -const createCompatBrowser = async (browser: PlaywrightBrowser): Promise => { +const launchBrowser = async () => { + const browser = await launch(getBrowserBinding(), { keep_alive: 60000 }); const context = await browser.newContext({ ignoreHTTPSErrors: true, }); - const compatBrowser = browser as Browser; - - compatBrowser.newPage = async () => patchPage(await context.newPage()); - compatBrowser.userAgent = () => config.ua; - - return compatBrowser; + return { browser, context }; }; -const launchBrowser = async () => createCompatBrowser(await launch(getBrowserBinding(), { keep_alive: 60000 })); - -const scheduleClose = (browser: Browser) => { +const scheduleClose = (browser: PlaywrightBrowser) => { setTimeout(() => { void browser.close(); }, 30000); }; /** - * @returns Playwright browser + * @returns Playwright browser context (native `newPage()` shares state across calls) */ const outPlaywright = async () => { - const browser = await launchBrowser(); + const { browser, context } = await launchBrowser(); scheduleClose(browser); - return browser; + return context; }; export default outPlaywright; @@ -83,17 +54,17 @@ export const getPlaywrightPage = async ( instanceOptions: { gotoConfig?: GotoOptions; noGoto?: boolean; - onBeforeLoad?: (page: Page, browser?: Browser) => Promise | void; + onBeforeLoad?: (page: Page, context?: Awaited>['context']) => Promise | void; } = {} ) => { logger.debug(`Launching Cloudflare Browser for: ${url}`); - const browser = await launchBrowser(); + const { browser, context } = await launchBrowser(); scheduleClose(browser); - const page = await browser.newPage(); + const page = await context.newPage(); if (instanceOptions.onBeforeLoad) { - await instanceOptions.onBeforeLoad(page, browser); + await instanceOptions.onBeforeLoad(page, context); } if (!instanceOptions.noGoto) { @@ -106,9 +77,9 @@ export const getPlaywrightPage = async ( } return { - browser, + context, destroy: async () => { - await browser.close(); + await context.close(); }, page, }; diff --git a/lib/utils/puppeteer.ts b/lib/utils/puppeteer.ts index 117b4cd4de0f..7d93dd3d6709 100644 --- a/lib/utils/puppeteer.ts +++ b/lib/utils/puppeteer.ts @@ -1,2 +1,2 @@ -export type { Browser, Page } from './playwright'; +export type { Page } from './playwright'; export { default, getPlaywrightPage, getPlaywrightPage as getPuppeteerPage, setBrowserBinding } from './playwright'; diff --git a/lib/utils/puppeteer.worker.ts b/lib/utils/puppeteer.worker.ts index ca40b70ae6f1..56b623717721 100644 --- a/lib/utils/puppeteer.worker.ts +++ b/lib/utils/puppeteer.worker.ts @@ -1,2 +1,2 @@ -export type { Browser, Page } from './playwright.worker'; +export type { Page } from './playwright.worker'; export { default, getPlaywrightPage, getPlaywrightPage as getPuppeteerPage, setBrowserBinding } from './playwright.worker'; From 331bc10ceb9dca3d245c139d49bf57868797e8c3 Mon Sep 17 00:00:00 2001 From: Tony Date: Tue, 12 May 2026 21:10:19 +0800 Subject: [PATCH 06/10] refactor: replace browser instance with context in Playwright usage across multiple routes - Updated all instances of `browser` to `context` in Playwright-related functions for consistency and improved resource management. - Ensured proper closure of context after operations to prevent memory leaks. - Adjusted request interception methods to utilize the new context variable. --- lib/bilibili-video-route.test.ts | 2 ++ lib/routes/acs/journal.tsx | 12 ++++++------ lib/routes/aip/journal-pupp.ts | 6 +++--- lib/routes/aip/utils.tsx | 10 +++++----- lib/routes/alternativeto/utils.ts | 6 +++--- lib/routes/apkpure/versions.ts | 6 +++--- lib/routes/bluestacks/release.ts | 8 ++++---- lib/routes/ccac/news.ts | 6 +++--- lib/routes/chinadegrees/province.tsx | 12 ++++++------ lib/routes/chinatimes/index.ts | 6 +++--- lib/routes/cjlu/yjsy/index.ts | 1 - lib/routes/cmde/index.ts | 8 ++++---- lib/routes/colamanga/manga.ts | 6 +++--- lib/routes/cw/author.ts | 6 +++--- lib/routes/cw/master.ts | 6 +++--- lib/routes/cw/sub.ts | 6 +++--- lib/routes/cw/today.ts | 6 +++--- lib/routes/cw/utils.ts | 19 ++++++++++--------- lib/routes/dailypush/all.ts | 8 ++++---- lib/routes/dailypush/tags.ts | 8 ++++---- lib/routes/dailypush/utils.ts | 13 +++++++------ lib/routes/dcard/section.ts | 8 ++++---- lib/routes/dcard/utils.ts | 4 ++-- lib/routes/douyin/hashtag.ts | 6 +++--- lib/routes/douyin/live.ts | 6 +++--- lib/routes/douyin/user.ts | 6 +++--- lib/routes/fortnite/news.ts | 6 +++--- lib/routes/gov/customs/list.ts | 8 ++++---- lib/routes/gov/customs/utils.ts | 4 ++-- lib/routes/gov/hangzhou/zjzwfw.ts | 4 ++-- lib/routes/gov/hangzhou/zwfw.tsx | 6 +++--- lib/routes/gov/pbc/goutongjiaoliu.ts | 8 ++++---- lib/routes/gov/pbc/trade-announcement.ts | 8 ++++---- lib/routes/hitcon/zeroday.tsx | 13 ++++++------- lib/routes/hkushop/vinyl-or-picture-lp.ts | 6 +++--- lib/routes/hottoys/index.ts | 6 +++--- lib/routes/ielts/index.ts | 6 +++--- lib/routes/iqiyi/video.ts | 6 +++--- lib/routes/javdb/utils.ts | 4 ++-- lib/routes/javtrailers/casts.ts | 8 ++++---- lib/routes/javtrailers/categories.ts | 8 ++++---- lib/routes/javtrailers/studios.ts | 8 ++++---- lib/routes/javtrailers/utils.ts | 8 ++++---- lib/routes/kuaishou/profile.ts | 6 +++--- lib/routes/linkedin/posts.ts | 6 +++--- lib/routes/missav/new.tsx | 12 ++++++------ lib/routes/nhentai/util.tsx | 6 +++--- lib/routes/njust/utils.ts | 6 +++--- lib/routes/nuaa/utils/pypasswaf.ts | 6 +++--- lib/routes/nytimes/index.ts | 6 +++--- lib/routes/nytimes/utils.ts | 4 ++-- lib/routes/oceanengine/arithmetic-index.tsx | 12 ++++++------ lib/routes/parliament.uk/commonslibrary.ts | 6 +++--- lib/routes/parliament.uk/lordslibrary.ts | 6 +++--- lib/routes/perplexity/blog.ts | 4 ++-- lib/routes/perplexity/changelog.ts | 4 ++-- lib/routes/pincong/utils.ts | 6 +++--- lib/routes/pnas/index.tsx | 12 ++++++------ lib/routes/researchgate/publications.ts | 8 ++++---- lib/routes/science/blogs.ts | 6 +++--- lib/routes/science/current.ts | 6 +++--- lib/routes/science/early.ts | 6 +++--- lib/routes/science/utils.tsx | 10 +++++----- lib/routes/sotwe/user.ts | 6 +++--- lib/routes/spankbang/new-videos.tsx | 12 ++++++------ lib/routes/tiktok/user.ts | 6 +++--- lib/routes/twitter/api/web-api/login.ts | 6 +++--- lib/routes/uchicago/current.ts | 8 ++++---- lib/routes/uestc/auto.ts | 6 +++--- lib/routes/uestc/cqe.ts | 6 +++--- lib/routes/uestc/scse.ts | 6 +++--- lib/routes/uestc/sice.ts | 6 +++--- lib/routes/uestc/sise.ts | 6 +++--- lib/routes/ups/track.ts | 6 +++--- lib/routes/uraaka-joshi/uraaka-joshi-user.ts | 6 +++--- lib/routes/uraaka-joshi/uraaka-joshi.ts | 6 +++--- lib/routes/weibo/utils.ts | 2 +- lib/routes/xiaohongshu/util.ts | 6 +++--- lib/routes/xsijishe/rank.ts | 6 +++--- lib/routes/xsijishe/utils.ts | 9 +++++---- lib/routes/xueqiu/cookies.ts | 4 ++-- lib/routes/xueqiu/user.ts | 6 +++--- 82 files changed, 284 insertions(+), 281 deletions(-) diff --git a/lib/bilibili-video-route.test.ts b/lib/bilibili-video-route.test.ts index 1da126cda7ea..39dd7ea1b034 100644 --- a/lib/bilibili-video-route.test.ts +++ b/lib/bilibili-video-route.test.ts @@ -106,6 +106,7 @@ describe('/bilibili/user/video/:uid', () => { getPlaywrightPage.mockImplementation(async (_url, options) => { await options.onBeforeLoad?.(page); return { + context: {}, destroy, page, }; @@ -168,6 +169,7 @@ describe('/bilibili/user/video/:uid', () => { getPlaywrightPage.mockImplementation(async (_url, options) => { await options.onBeforeLoad?.(page); return { + context: {}, destroy, page, }; diff --git a/lib/routes/acs/journal.tsx b/lib/routes/acs/journal.tsx index 4ad4d5587da1..d5f2b847817d 100644 --- a/lib/routes/acs/journal.tsx +++ b/lib/routes/acs/journal.tsx @@ -28,14 +28,14 @@ async function handler(ctx) { let title = ''; - const browser = await playwright(); + const context = await playwright(); const items = await cache.tryGet( currentUrl, async () => { - const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + const page = await context.newPage(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(currentUrl, { waitUntil: 'domcontentloaded', @@ -76,7 +76,7 @@ async function handler(ctx) { false ); - await browser.close(); + await context.close(); return { title, diff --git a/lib/routes/aip/journal-pupp.ts b/lib/routes/aip/journal-pupp.ts index 845661a89223..3e0412b5a127 100644 --- a/lib/routes/aip/journal-pupp.ts +++ b/lib/routes/aip/journal-pupp.ts @@ -18,12 +18,12 @@ const handler = async (ctx) => { } // use Playwright due to the obstacle by cloudflare challenge - const browser = await playwright(); + const context = await playwright(); const { jrnlName, list } = await cache.tryGet( jrnlUrl, async () => { - const response = await playwrightGet(jrnlUrl, browser); + const response = await playwrightGet(jrnlUrl, context); const $ = load(response); const jrnlName = $('.header-journal-title').text(); const list = $('.card') @@ -52,7 +52,7 @@ const handler = async (ctx) => { false ); - await browser.close(); + await context.close(); return { title: jrnlName, diff --git a/lib/routes/aip/utils.tsx b/lib/routes/aip/utils.tsx index ba592557f627..00826b987be3 100644 --- a/lib/routes/aip/utils.tsx +++ b/lib/routes/aip/utils.tsx @@ -1,11 +1,11 @@ import { renderToString } from 'hono/jsx/dom/server'; -const playwrightGet = async (url, browser) => { - const page = await browser.newPage(); +const playwrightGet = async (url, context) => { + const page = await context.newPage(); // await page.setExtraHTTPHeaders({ referer: host }); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', diff --git a/lib/routes/alternativeto/utils.ts b/lib/routes/alternativeto/utils.ts index f7f74eddf774..8160f19c5471 100644 --- a/lib/routes/alternativeto/utils.ts +++ b/lib/routes/alternativeto/utils.ts @@ -4,8 +4,8 @@ const baseURL = 'https://alternativeto.net'; const playwrightGet = (url, cache) => cache.tryGet(url, async () => { - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' ? route.continue() : route.abort(); @@ -14,7 +14,7 @@ const playwrightGet = (url, cache) => waitUntil: 'domcontentloaded', }); const html = await page.evaluate(() => document.documentElement.innerHTML); - await browser.close(); + await context.close(); return html; }); diff --git a/lib/routes/apkpure/versions.ts b/lib/routes/apkpure/versions.ts index 765dc6d66c0b..bce6bb781d1e 100644 --- a/lib/routes/apkpure/versions.ts +++ b/lib/routes/apkpure/versions.ts @@ -28,8 +28,8 @@ async function handler(ctx) { const baseUrl = 'https://apkpure.com'; const link = `${baseUrl}/${region}/${pkg}/versions`; - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' ? route.continue() : route.abort(); @@ -40,7 +40,7 @@ async function handler(ctx) { }); const r = await page.evaluate(() => document.documentElement.innerHTML); - await browser.close(); + await context.close(); const $ = load(r); const img = new URL($('.ver-top img').attr('src')); diff --git a/lib/routes/bluestacks/release.ts b/lib/routes/bluestacks/release.ts index c54ecf19f71f..19fc5b164813 100644 --- a/lib/routes/bluestacks/release.ts +++ b/lib/routes/bluestacks/release.ts @@ -32,8 +32,8 @@ export const route: Route = { }; async function handler() { - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -59,7 +59,7 @@ async function handler() { await Promise.all( items.map((item) => cache.tryGet(item.link, async () => { - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -79,7 +79,7 @@ async function handler() { ) ); - await browser.close(); + await context.close(); return { title: $('.article__title').text().trim(), diff --git a/lib/routes/ccac/news.ts b/lib/routes/ccac/news.ts index 68bfaa99ade8..508c196ce03a 100644 --- a/lib/routes/ccac/news.ts +++ b/lib/routes/ccac/news.ts @@ -32,12 +32,12 @@ export const route: Route = { }; async function handler(ctx) { - const browser = await playwright(); + const context = await playwright(); const lang = ctx.req.param('lang') ?? 'sc'; const type = utils.TYPE[ctx.req.param('type')]; const BASE = utils.langBase(lang); - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -46,7 +46,7 @@ async function handler(ctx) { waitUntil: 'domcontentloaded', }); const articles = await page.evaluate(() => window.articles); - await browser.close(); + await context.close(); const list = utils .typeFilter(articles, type) diff --git a/lib/routes/chinadegrees/province.tsx b/lib/routes/chinadegrees/province.tsx index e32c38d7da5a..e129324fe2f2 100644 --- a/lib/routes/chinadegrees/province.tsx +++ b/lib/routes/chinadegrees/province.tsx @@ -82,11 +82,11 @@ async function handler(ctx) { const data = await cache.tryGet( url, async () => { - const browser = await playwright(); - const page = await browser.newPage(); - await page.setRequestInterception(true); - page.on('request', (request) => { - request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort(); + const context = await playwright(); + const page = await context.newPage(); + await page.route('**/*', (route) => { + const request = route.request(); + request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); }); await page.goto(url, { waitUntil: 'domcontentloaded', @@ -94,7 +94,7 @@ async function handler(ctx) { await page.waitForSelector('.datalist'); const html = await page.evaluate(() => document.documentElement.innerHTML); - await browser.close(); + await context.close(); const $ = load(html); return { diff --git a/lib/routes/chinatimes/index.ts b/lib/routes/chinatimes/index.ts index 2ebf7feb4682..20c9bdab7a1f 100644 --- a/lib/routes/chinatimes/index.ts +++ b/lib/routes/chinatimes/index.ts @@ -43,7 +43,7 @@ async function handler(ctx) { const response = await ofetch(link); const $ = load(response); - const browser = await playwright(); + const context = await playwright(); const list = $('.articlebox-compact') .toArray() @@ -66,7 +66,7 @@ async function handler(ctx) { const items = await Promise.all( list.map((item) => cache.tryGet(item.link, async () => { - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' ? route.continue() : route.abort(); @@ -98,7 +98,7 @@ async function handler(ctx) { ) ); - await browser.close(); + await context.close(); return { title: $('head title').text(), diff --git a/lib/routes/cjlu/yjsy/index.ts b/lib/routes/cjlu/yjsy/index.ts index ed16abb69076..8bf31f06a120 100644 --- a/lib/routes/cjlu/yjsy/index.ts +++ b/lib/routes/cjlu/yjsy/index.ts @@ -89,7 +89,6 @@ async function handler(ctx) { const { page, destroy } = await getPlaywrightPage(url, { onBeforeLoad: async (page) => { await page.setExtraHTTPHeaders(headers); - await page.setUserAgent(headers['User-Agent']); await page.route('**/*', (route) => { const request = route.request(); allowedResourceTypes.has(request.resourceType()) ? route.continue() : route.abort(); diff --git a/lib/routes/cmde/index.ts b/lib/routes/cmde/index.ts index 6aa6e6476ecc..6e3d8c1cfcb7 100644 --- a/lib/routes/cmde/index.ts +++ b/lib/routes/cmde/index.ts @@ -18,9 +18,9 @@ export const route: Route = { async function handler(ctx) { const cate = ctx.req.param('cate') ?? 'xwdt/zxyw'; const url = `${rootURL}/${cate}/`; - const browser = await playwright(); + const context = await playwright(); const data = await cache.tryGet(url, async () => { - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -52,7 +52,7 @@ async function handler(ctx) { const items = await Promise.all( data.items.map((item) => cache.tryGet(item.link, async () => { - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -72,7 +72,7 @@ async function handler(ctx) { ) ); - await browser.close(); + await context.close(); return { title: data.title, diff --git a/lib/routes/colamanga/manga.ts b/lib/routes/colamanga/manga.ts index 9601e8e8809c..bda4c200b721 100644 --- a/lib/routes/colamanga/manga.ts +++ b/lib/routes/colamanga/manga.ts @@ -43,9 +43,9 @@ async function handler(ctx: Context) { const id = ctx.req.param('id'); const url = `https://${domain}/${id}`; - const browser = await playwright(); + const context = await playwright(); - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); @@ -59,7 +59,7 @@ async function handler(ctx: Context) { }); const response = await page.content(); - await browser.close(); + await context.close(); const $ = load(response); diff --git a/lib/routes/cw/author.ts b/lib/routes/cw/author.ts index 2be67a126dfe..fda1501a0996 100644 --- a/lib/routes/cw/author.ts +++ b/lib/routes/cw/author.ts @@ -27,11 +27,11 @@ export const route: Route = { }; async function handler(ctx) { - const browser = await playwright(); + const context = await playwright(); - const { $, items } = await parsePage('author', browser, ctx); + const { $, items } = await parsePage('author', context, ctx); - await browser.close(); + await context.close(); return { title: $('head title').text(), diff --git a/lib/routes/cw/master.ts b/lib/routes/cw/master.ts index eaae855f3f21..239d6c22a334 100644 --- a/lib/routes/cw/master.ts +++ b/lib/routes/cw/master.ts @@ -37,11 +37,11 @@ export const route: Route = { }; async function handler(ctx) { - const browser = await playwright(); + const context = await playwright(); - const { $, items } = await parsePage('master', browser, ctx); + const { $, items } = await parsePage('master', context, ctx); - await browser.close(); + await context.close(); return { title: $('head title').text(), diff --git a/lib/routes/cw/sub.ts b/lib/routes/cw/sub.ts index 3f5abdb728bb..a374c6d1921c 100644 --- a/lib/routes/cw/sub.ts +++ b/lib/routes/cw/sub.ts @@ -22,11 +22,11 @@ export const route: Route = { }; async function handler(ctx) { - const browser = await playwright(); + const context = await playwright(); - const { $, items } = await parsePage('sub', browser, ctx); + const { $, items } = await parsePage('sub', context, ctx); - await browser.close(); + await context.close(); return { title: $('head title').text(), diff --git a/lib/routes/cw/today.ts b/lib/routes/cw/today.ts index dd34056332ab..73e4303f4c0e 100644 --- a/lib/routes/cw/today.ts +++ b/lib/routes/cw/today.ts @@ -28,11 +28,11 @@ export const route: Route = { }; async function handler(ctx) { - const browser = await playwright(); + const context = await playwright(); - const { $, items } = await parsePage('today', browser, ctx); + const { $, items } = await parsePage('today', context, ctx); - await browser.close(); + await context.close(); return { title: $('head title').text(), diff --git a/lib/routes/cw/utils.ts b/lib/routes/cw/utils.ts index 450886917408..be75a2aafa30 100644 --- a/lib/routes/cw/utils.ts +++ b/lib/routes/cw/utils.ts @@ -1,5 +1,6 @@ import { load } from 'cheerio'; +import { config } from '@/config'; import cache from '@/utils/cache'; import logger from '@/utils/logger'; import ofetch from '@/utils/ofetch'; @@ -29,10 +30,10 @@ const pathMap = { }, }; -const getCookie = async (browser, tryGet) => { +const getCookie = async (context, tryGet) => { if (!cookie) { cookie = await tryGet('cw:cookie', async () => { - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -49,11 +50,11 @@ const getCookie = async (browser, tryGet) => { return cookie; }; -const parsePage = async (path, browser, ctx) => { +const parsePage = async (path, context, ctx) => { const pageUrl = `${baseUrl}${pathMap[path].pageUrl(ctx.req.param('channel'))}`; - const cookie = await getCookie(browser, cache.tryGet); - const page = await browser.newPage(); + const cookie = await getCookie(context, cache.tryGet); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -70,7 +71,7 @@ const parsePage = async (path, browser, ctx) => { const $ = load(response); const list = parseList($, ctx.req.query('limit') ? Number(ctx.req.query('limit')) : pathMap[path].limit); - const items = await parseItems(list, browser, cache.tryGet); + const items = await parseItems(list, context, cache.tryGet); return { $, items }; }; @@ -88,14 +89,14 @@ const parseList = ($, limit) => }) .slice(0, limit); -const parseItems = (list, browser, tryGet) => +const parseItems = (list, context, tryGet) => Promise.all( list.map((item) => tryGet(item.link, async () => { const response = await ofetch(item.link, { headers: { - Cookie: await getCookie(browser, tryGet), - 'User-Agent': browser.userAgent(), + Cookie: await getCookie(context, tryGet), + 'User-Agent': config.ua, }, }); const $ = load(response); diff --git a/lib/routes/dailypush/all.ts b/lib/routes/dailypush/all.ts index 6a39fe133178..e92405045348 100644 --- a/lib/routes/dailypush/all.ts +++ b/lib/routes/dailypush/all.ts @@ -42,12 +42,12 @@ async function handler(ctx) { const { sort = '' } = ctx.req.param(); const url = sort ? `${BASE_URL}/${sort}` : BASE_URL; - const browser = await playwright(); + const context = await playwright(); try { - const html = await fetchPageHtml(browser, url, 'article'); + const html = await fetchPageHtml(context, url, 'article'); const $ = load(html); const list = parseArticles($, BASE_URL); - const items = await enhanceItemsWithSummaries(browser, list); + const items = await enhanceItemsWithSummaries(context, list); const pageTitle = $('title').text() || 'DailyPush - All'; @@ -57,6 +57,6 @@ async function handler(ctx) { item: items, }; } finally { - await browser.close(); + await context.close(); } } diff --git a/lib/routes/dailypush/tags.ts b/lib/routes/dailypush/tags.ts index c1430d29ae47..0cac69fc7345 100644 --- a/lib/routes/dailypush/tags.ts +++ b/lib/routes/dailypush/tags.ts @@ -43,12 +43,12 @@ async function handler(ctx) { const { tag, sort = 'trending' } = ctx.req.param(); const url = `${BASE_URL}/${tag}/${sort}`; - const browser = await playwright(); + const context = await playwright(); try { - const html = await fetchPageHtml(browser, url, 'article'); + const html = await fetchPageHtml(context, url, 'article'); const $ = load(html); const list = parseArticles($, BASE_URL); - const items = await enhanceItemsWithSummaries(browser, list); + const items = await enhanceItemsWithSummaries(context, list); const pageTitle = $('title').text() || `DailyPush - ${tag.charAt(0).toUpperCase() + tag.slice(1)}`; @@ -58,6 +58,6 @@ async function handler(ctx) { item: items, }; } finally { - await browser.close(); + await context.close(); } } diff --git a/lib/routes/dailypush/utils.ts b/lib/routes/dailypush/utils.ts index a276c93fb5cc..a19c71a7d848 100644 --- a/lib/routes/dailypush/utils.ts +++ b/lib/routes/dailypush/utils.ts @@ -1,11 +1,12 @@ import type { CheerioAPI } from 'cheerio'; import { load } from 'cheerio'; +import type { BrowserContext } from 'playwright-core'; import type { DataItem } from '@/types'; import cache from '@/utils/cache'; import logger from '@/utils/logger'; import { parseRelativeDate } from '@/utils/parse-date'; -import type { Browser, Page } from '@/utils/playwright'; +import type { Page } from '@/utils/playwright'; export const BASE_URL = 'https://www.dailypush.dev'; @@ -34,8 +35,8 @@ async function preparePage(page: Page) { }); } -export async function fetchPageHtml(browser: Browser, url: string, waitForSelector?: string): Promise { - const page = await browser.newPage(); +export async function fetchPageHtml(context: BrowserContext, url: string, waitForSelector?: string): Promise { + const page = await context.newPage(); await preparePage(page); try { @@ -259,9 +260,9 @@ export function parseArticles($: CheerioAPI, baseUrl: string): ArticleItem[] { /** * Enhance items with full summaries from dailypush article pages. - * Uses the provided browser; opens a new tab per URL (document requests only). Caller must close the browser. + * Uses the provided context; opens a new tab per URL (document requests only). Caller must close the context. */ -export async function enhanceItemsWithSummaries(browser: Browser, items: ArticleItem[]): Promise { +export async function enhanceItemsWithSummaries(context: BrowserContext, items: ArticleItem[]): Promise { const itemsWithUrl = items.filter((item) => item.dailyPushUrl !== undefined); const itemsWithoutUrl: DataItem[] = items.filter((item) => item.dailyPushUrl === undefined); @@ -269,7 +270,7 @@ export async function enhanceItemsWithSummaries(browser: Browser, items: Article itemsWithUrl.map((item) => cache.tryGet(item.dailyPushUrl!, async () => { try { - const html = await fetchPageHtml(browser, item.dailyPushUrl!, 'p.font-ibm-plex-sans.leading-relaxed'); + const html = await fetchPageHtml(context, item.dailyPushUrl!, 'p.font-ibm-plex-sans.leading-relaxed'); const $ = load(html); const summary = $('p.font-ibm-plex-sans.leading-relaxed'); if (summary.length > 0 && summary.text().trim()) { diff --git a/lib/routes/dcard/section.ts b/lib/routes/dcard/section.ts index 1de2b184dbc4..9c30bc07c7fa 100644 --- a/lib/routes/dcard/section.ts +++ b/lib/routes/dcard/section.ts @@ -26,7 +26,7 @@ export const route: Route = { async function handler(ctx) { const { type = 'latest', section = 'posts' } = ctx.req.param(); const limit = ctx.req.query('limit') ? Number(ctx.req.query('limit')) : 30; - const browser = await playwright(); + const context = await playwright(); let link = 'https://www.dcard.tw/f'; let api = 'https://www.dcard.tw/service/api/v2'; @@ -48,7 +48,7 @@ async function handler(ctx) { title += '最新'; } - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' ? route.continue() : route.abort(); @@ -76,8 +76,8 @@ async function handler(ctx) { })); // parse fulltext for first `limit` items - const result = await utils.ProcessFeed(items, cookies, browser, limit, cache); - await browser.close(); + const result = await utils.ProcessFeed(items, cookies, context, limit, cache); + await context.close(); return { title, diff --git a/lib/routes/dcard/utils.ts b/lib/routes/dcard/utils.ts index be7f0b709069..0262350f7541 100644 --- a/lib/routes/dcard/utils.ts +++ b/lib/routes/dcard/utils.ts @@ -1,6 +1,6 @@ import pMap from 'p-map'; -const ProcessFeed = async (items, cookies, browser, limit, cache) => { +const ProcessFeed = async (items, cookies, context, limit, cache) => { let newCookies = []; const result = await pMap( items.slice(0, limit), @@ -10,7 +10,7 @@ const ProcessFeed = async (items, cookies, browser, limit, cache) => { let response; // try catch 处理被删除的帖子 try { - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); diff --git a/lib/routes/douyin/hashtag.ts b/lib/routes/douyin/hashtag.ts index 557ea5330d7f..77541e05e406 100644 --- a/lib/routes/douyin/hashtag.ts +++ b/lib/routes/douyin/hashtag.ts @@ -47,8 +47,8 @@ async function handler(ctx) { const tagData = await cache.tryGet( `douyin:hashtag:${cid}`, async () => { - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); let awemeList = ''; await page.route('**/*', (route) => { const request = route.request(); @@ -65,7 +65,7 @@ async function handler(ctx) { }); await page.waitForSelector('#RENDER_DATA'); const html = await page.evaluate(() => document.querySelector('#RENDER_DATA').textContent); - await browser.close(); + await context.close(); const renderData = JSON.parse(decodeURIComponent(html)); const dataKey = Object.keys(renderData).find((key) => renderData[key].topicDetail); diff --git a/lib/routes/douyin/live.ts b/lib/routes/douyin/live.ts index ba517a3ba3bd..93cb864a36f3 100644 --- a/lib/routes/douyin/live.ts +++ b/lib/routes/douyin/live.ts @@ -42,8 +42,8 @@ async function handler(ctx) { `douyin:live:${rid}`, async () => { let roomInfo; - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'stylesheet' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); @@ -58,7 +58,7 @@ async function handler(ctx) { await page.goto(pageUrl, { waitUntil: 'networkidle', }); - await browser.close(); + await context.close(); return roomInfo; }, diff --git a/lib/routes/douyin/user.ts b/lib/routes/douyin/user.ts index 38ae08da8db1..ee7994e93c3f 100644 --- a/lib/routes/douyin/user.ts +++ b/lib/routes/douyin/user.ts @@ -50,8 +50,8 @@ async function handler(ctx) { `douyin:user:${uid}`, async () => { let postData; - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? route.continue() : route.abort(); @@ -68,7 +68,7 @@ async function handler(ctx) { waitUntil: 'networkidle', }); - await browser.close(); + await context.close(); if (!postData) { throw new Error('Empty post data. The request may be filtered by WAF.'); diff --git a/lib/routes/fortnite/news.ts b/lib/routes/fortnite/news.ts index 00c352f65bca..2c2fad0058fe 100644 --- a/lib/routes/fortnite/news.ts +++ b/lib/routes/fortnite/news.ts @@ -40,8 +40,8 @@ async function handler(ctx) { const apiUrl = `https://www.fortnite.com/api/blog/getPosts?category=&postsPerPage=0&offset=0&locale=${language}&rootPageSlug=blog`; // Use Playwright instead of got, which may be blocked by anti-crawling scripts with response code 403. - const browser = await playwright(); - const page = await browser.newPage(); + const context = await playwright(); + const page = await context.newPage(); // only document is allowed await page.route('**/*', (route) => { @@ -72,7 +72,7 @@ async function handler(ctx) { data = await response.json(); } finally { await page.close(); - await browser.close(); + await context.close(); } const { blogList: list } = data; diff --git a/lib/routes/gov/customs/list.ts b/lib/routes/gov/customs/list.ts index 21ca8ee6891b..e177ce6fc76d 100644 --- a/lib/routes/gov/customs/list.ts +++ b/lib/routes/gov/customs/list.ts @@ -61,12 +61,12 @@ async function handler(ctx) { break; } - const browser = await playwright(); + const context = await playwright(); const list = await cache.tryGet( link, async () => { - const response = await playwrightGet(link, browser); + const response = await playwrightGet(link, context); const $ = load(response); const list = $('[class^="conList_ul"] li') .toArray() @@ -90,7 +90,7 @@ async function handler(ctx) { if (info.link.endsWith('.pdf') || info.link.endsWith('.doc')) { return info; } - const response = await playwrightGet(info.link, browser); + const response = await playwrightGet(info.link, context); const $ = load(response); let date; @@ -110,7 +110,7 @@ async function handler(ctx) { ) ); - await browser.close(); + await context.close(); return { title: `中国海关-${channelName}`, diff --git a/lib/routes/gov/customs/utils.ts b/lib/routes/gov/customs/utils.ts index 854e654e0cda..9a2d92359f13 100644 --- a/lib/routes/gov/customs/utils.ts +++ b/lib/routes/gov/customs/utils.ts @@ -1,7 +1,7 @@ const host = 'http://www.customs.gov.cn'; -const playwrightGet = async (url, browser) => { - const page = await browser.newPage(); +const playwrightGet = async (url, context) => { + const page = await context.newPage(); await page.setExtraHTTPHeaders({ referer: host }); await page.route('**/*', (route) => { const request = route.request(); diff --git a/lib/routes/gov/hangzhou/zjzwfw.ts b/lib/routes/gov/hangzhou/zjzwfw.ts index 6c5076e7da8f..949c9ac27e14 100644 --- a/lib/routes/gov/hangzhou/zjzwfw.ts +++ b/lib/routes/gov/hangzhou/zjzwfw.ts @@ -1,9 +1,9 @@ import logger from '@/utils/logger'; -export async function crawler(item: any, browser: any): Promise { +export async function crawler(item: any, context: any): Promise { try { let response = ''; - const page = await browser.newPage(); + const page = await context.newPage(); await page.route('**/*', (route) => { const request = route.request(); const resourceType = request.resourceType(); diff --git a/lib/routes/gov/hangzhou/zwfw.tsx b/lib/routes/gov/hangzhou/zwfw.tsx index 224d7e5555a9..f5cd4cdfe64f 100644 --- a/lib/routes/gov/hangzhou/zwfw.tsx +++ b/lib/routes/gov/hangzhou/zwfw.tsx @@ -216,7 +216,7 @@ async function handler() { const host = 'https://www.hangzhou.gov.cn/col/col1256349/index.html'; const response = await ofetch(host); - const browser = await playwright(); + const context = await playwright(); const link = host; const formatted = response .replace('