diff --git a/lib/routes/meta/ai-blog.ts b/lib/routes/meta/ai-blog.ts index ab308d5629e8..26512e762b8e 100644 --- a/lib/routes/meta/ai-blog.ts +++ b/lib/routes/meta/ai-blog.ts @@ -1,9 +1,9 @@ -import { load } from 'cheerio'; - import type { Route } from '@/types'; import ofetch from '@/utils/ofetch'; import { parseDate } from '@/utils/parse-date'; +import { buildGraphqlBody, getMetaServerContext, GRAPHQL_ENDPOINT, metaGraphqlHeaders } from './utils'; + export const route: Route = { path: '/ai/blog', categories: ['programming'], @@ -23,82 +23,17 @@ async function handler(ctx) { const limit = Number.parseInt(ctx.req.query('limit') || 12, 10); const link = 'https://ai.meta.com/blog/'; - const res = await ofetch(link, { - headers: { - // All these headers are required - 'sec-fetch-dest': 'document', - 'sec-fetch-mode': 'navigate', - 'sec-fetch-site': 'none', - 'sec-fetch-user': '?1', - }, - }); - const $ = load(res); - const script = $('script:contains("DTSGInitialData"):first').text(); - const serverJs = JSON.parse(script.match(/\(new ServerJS\(\)\)\.handle\((\{[\s\S]*?\})\);/)?.[1] || '{}'); - - type ServerData = { - LSD: { token: string }; - SiteData: { - haste_session: string; - hsi: string; - __spin_r: number; - __spin_b: string; - __spin_t: number; - }; - }; + const { $, server } = await getMetaServerContext(link); + const friendlyName = 'MetaAIBlogRecentPostSearchQuery'; - const server: ServerData = { - LSD: { token: '' }, - SiteData: { - haste_session: '', - hsi: '', - __spin_r: 0, - __spin_b: 'trunk', - __spin_t: Date.now(), - }, - }; - - for (const obj of serverJs.define) { - const key = obj[0]; - const value = obj[2]; - server[key as keyof ServerData] = value; - } - - const data = await ofetch('https://ai.meta.com/api/graphql/', { + const data = await ofetch(GRAPHQL_ENDPOINT, { method: 'POST', - headers: { - 'content-type': 'application/x-www-form-urlencoded', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-origin', - 'x-asbd-id': '359341', - 'x-fb-friendly-name': 'MetaAIBlogRecentPostSearchQuery', - 'x-fb-lsd': server.LSD.token, - }, - body: new URLSearchParams({ - av: '0', - __user: '0', - __a: '1', - __req: '1', - // __hs: server.SiteData.haste_session || '', - dpr: '1', - __ccg: 'EXCELLENT', - __rev: String(server.SiteData.__spin_r || ''), - // __s: '', - // __hsi: server.SiteData.hsi || '', - // __dyn: '', - // __hsdp: '', - // __hblp: '', - lsd: server.LSD.token, - // jazoest: '', - __spin_r: String(server.SiteData.__spin_r || ''), - __spin_b: String(server.SiteData.__spin_b || 'trunk'), - __spin_t: String(server.SiteData.__spin_t || Date.now()), - fb_api_caller_class: 'RelayModern', - fb_api_req_friendly_name: 'MetaAIBlogRecentPostSearchQuery', - variables: JSON.stringify({ input: { query: '', from: 0, limit, tags: [], excludeObjectIDs: ['27568536916124137'] } }), - server_timestamps: 'true', - doc_id: '9516719638450392', + headers: metaGraphqlHeaders(server, friendlyName), + body: buildGraphqlBody({ + server, + friendlyName, + docId: '9516719638450392', + variables: { input: { query: '', from: 0, limit, tags: [], excludeObjectIDs: ['27568536916124137'] } }, }), parseResponse: JSON.parse, }); diff --git a/lib/routes/meta/ai-global-search.ts b/lib/routes/meta/ai-global-search.ts new file mode 100644 index 000000000000..9f7c0fc538bb --- /dev/null +++ b/lib/routes/meta/ai-global-search.ts @@ -0,0 +1,151 @@ +import querystring from 'node:querystring'; + +import type { Route } from '@/types'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; + +import { buildGraphqlBody, getMetaServerContext, GRAPHQL_ENDPOINT, metaGraphqlHeaders } from './utils'; + +export const route: Route = { + path: '/ai/global-search/:routeParams?', + categories: ['programming'], + example: '/meta/ai/global-search/content_types=blog', + name: 'AI Global Search', + maintainers: ['shcheglovnd'], + url: 'ai.meta.com/global_search/', + parameters: { + routeParams: + 'URL-encoded query string of filters (path-based so each combination caches independently). Supported keys: `q` (search query), `content_types` (comma-separated: `person`, `publication`, `blog`, `dataset`, `event`, `tool`), `research_areas` (e.g. `natural-language-processing,computer-vision`), `filter_tags` (`research`, `ml-applications`, `open-source`, `developer-tools`, `ar-vr`, `hardware`), `years` (e.g. `2024,2025`), `location_cities` (publication venues like `AAAI,ACL`), `alphabetical_filter` (single letter, pairs with `content_types=person`+`sort_by=ALPHABETICAL`), `sort_by` (`RELEVANCE`, `MOST_RECENT`, `ALPHABETICAL`, `RANDOM`, default `RELEVANCE`), `offset` (default `0`). Combine multiple filters by encoding `&` as `%26`.', + }, + description: 'Page size can be tuned with the `limit` query string parameter (default `36`).', + radar: [ + { + source: ['ai.meta.com/global_search/', 'ai.meta.com/global_search', 'ai.meta.com/results/'], + }, + ], + handler, +}; + +const toList = (value: string | undefined): string[] | null => { + if (!value) { + return null; + } + const list = value + .split(',') + .map((s) => s.trim()) + .filter(Boolean); + return list.length ? list : null; +}; + +const firstString = (value: string | string[] | undefined): string | undefined => (Array.isArray(value) ? value[0] : value); + +type ResultShape = { + title: string; + description: string | null; + href: string; + image_src: string | null; + cmsid: string; + type: string; + authors: string | null; + tags: string[] | null; + location: string | null; + journal_number: string | null; + published_time: string | null; + year: string | null; +}; + +type SearchInput = { + alphabetical_filter: string | null; + content_types: string[] | null; + offset: number; + search_query: string; + sort_by: string; + filter_tags: string[] | null; + location_cities: string[] | null; + research_areas: string[] | null; + years: string[] | null; +}; + +const buildSearchInput = (params: querystring.ParsedUrlQuery): SearchInput => ({ + alphabetical_filter: firstString(params.alphabetical_filter) || null, + content_types: toList(firstString(params.content_types)), + offset: Number.parseInt(firstString(params.offset) ?? '0', 10), + search_query: firstString(params.q) ?? firstString(params.search_query) ?? '', + sort_by: firstString(params.sort_by) ?? 'RELEVANCE', + filter_tags: toList(firstString(params.filter_tags)), + location_cities: toList(firstString(params.location_cities)), + research_areas: toList(firstString(params.research_areas)), + years: toList(firstString(params.years)), +}); + +const summarizeFilters = (input: SearchInput): string => { + const parts: string[] = []; + if (input.search_query) { + parts.push(`q=${input.search_query}`); + } + if (input.content_types) { + parts.push(`content_types=${input.content_types.join(',')}`); + } + if (input.research_areas) { + parts.push(`research_areas=${input.research_areas.join(',')}`); + } + if (input.filter_tags) { + parts.push(`filter_tags=${input.filter_tags.join(',')}`); + } + if (input.years) { + parts.push(`years=${input.years.join(',')}`); + } + if (input.location_cities) { + parts.push(`location_cities=${input.location_cities.join(',')}`); + } + if (input.alphabetical_filter) { + parts.push(`alphabetical_filter=${input.alphabetical_filter}`); + } + return parts.join(' · '); +}; + +const mapItem = (item: ResultShape) => ({ + title: item.title, + description: item.description ?? '', + link: item.href?.startsWith('http') ? item.href : `https://ai.meta.com${item.href}`, + pubDate: item.published_time ? parseDate(Number(item.published_time) * 1000) : undefined, + author: item.authors || undefined, + category: [item.type, ...(item.tags ?? [])].filter(Boolean) as string[], + image: item.image_src || undefined, + guid: item.cmsid, +}); + +async function handler(ctx) { + const link = 'https://ai.meta.com/global_search/'; + const { server } = await getMetaServerContext(link); + + const params = querystring.parse(ctx.req.param('routeParams') || ''); + const limit = Number.parseInt(ctx.req.query('limit') ?? '36', 10); + const input = buildSearchInput(params); + + const friendlyName = 'useFBAIGlobalSearchQuery'; + const data = await ofetch(GRAPHQL_ENDPOINT, { + method: 'POST', + headers: metaGraphqlHeaders(server, friendlyName), + body: buildGraphqlBody({ + server, + friendlyName, + docId: '9716930201759979', + variables: { input }, + }), + parseResponse: JSON.parse, + }); + + const result = data?.data?.result; + const shapes: ResultShape[] = result?.result_shapes ?? []; + const items = shapes.slice(0, limit).map((item) => mapItem(item)); + + const filterSummary = summarizeFilters(input); + const baseTitle = 'Meta AI Global Search'; + return { + title: filterSummary ? `${baseTitle} — ${filterSummary}` : baseTitle, + description: 'Search results from ai.meta.com/global_search/.', + link, + item: items, + }; +} diff --git a/lib/routes/meta/utils.ts b/lib/routes/meta/utils.ts new file mode 100644 index 000000000000..35adf8434dec --- /dev/null +++ b/lib/routes/meta/utils.ts @@ -0,0 +1,82 @@ +import { type CheerioAPI, load } from 'cheerio'; + +import ofetch from '@/utils/ofetch'; + +export type ServerData = { + LSD: { token: string }; + SiteData: { + haste_session: string; + hsi: string; + __spin_r: number; + __spin_b: string; + __spin_t: number; + }; +}; + +export async function getMetaServerContext(link: string): Promise<{ $: CheerioAPI; server: ServerData }> { + const res = await ofetch(link, { + headers: { + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'none', + 'sec-fetch-user': '?1', + }, + }); + const $ = load(res); + const script = $('script:contains("DTSGInitialData"):first').text(); + const serverJs = JSON.parse(script.match(/\(new ServerJS\(\)\)\.handle\((\{[\s\S]*?\})\);/)?.[1] || '{}'); + + const server: ServerData = { + LSD: { token: '' }, + SiteData: { + haste_session: '', + hsi: '', + __spin_r: 0, + __spin_b: 'trunk', + __spin_t: Date.now(), + }, + }; + + for (const obj of serverJs.define ?? []) { + const key = obj[0]; + const value = obj[2]; + server[key as keyof ServerData] = value; + } + + return { $, server }; +} + +export function buildGraphqlBody({ server, friendlyName, docId, variables }: { server: ServerData; friendlyName: string; docId: string; variables: unknown }) { + return new URLSearchParams({ + av: '0', + __user: '0', + __a: '1', + __req: '1', + dpr: '1', + __ccg: 'EXCELLENT', + __rev: String(server.SiteData.__spin_r || ''), + lsd: server.LSD.token, + __spin_r: String(server.SiteData.__spin_r || ''), + __spin_b: String(server.SiteData.__spin_b || 'trunk'), + __spin_t: String(server.SiteData.__spin_t || Date.now()), + fb_api_caller_class: 'RelayModern', + fb_api_req_friendly_name: friendlyName, + variables: JSON.stringify(variables), + server_timestamps: 'true', + doc_id: docId, + }); +} + +export function metaGraphqlHeaders(server: ServerData, friendlyName: string) { + return { + 'content-type': 'application/x-www-form-urlencoded', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'x-asbd-id': '359341', + 'x-fb-friendly-name': friendlyName, + 'x-fb-lsd': server.LSD.token, + }; +} + +export const GRAPHQL_ENDPOINT = 'https://ai.meta.com/api/graphql/';