Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 11 additions & 76 deletions lib/routes/meta/ai-blog.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { load } from 'cheerio';

import type { Route } from '@/types';
import ofetch from '@/utils/ofetch';
import { parseDate } from '@/utils/parse-date';

import { buildGraphqlBody, getMetaServerContext, GRAPHQL_ENDPOINT, metaGraphqlHeaders } from './utils';

export const route: Route = {
path: '/ai/blog',
categories: ['programming'],
Expand All @@ -23,82 +23,17 @@ async function handler(ctx) {
const limit = Number.parseInt(ctx.req.query('limit') || 12, 10);
const link = 'https://ai.meta.com/blog/';

const res = await ofetch(link, {
headers: {
// All these headers are required
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
},
});
const $ = load(res);
const script = $('script:contains("DTSGInitialData"):first').text();
const serverJs = JSON.parse(script.match(/\(new ServerJS\(\)\)\.handle\((\{[\s\S]*?\})\);/)?.[1] || '{}');

type ServerData = {
LSD: { token: string };
SiteData: {
haste_session: string;
hsi: string;
__spin_r: number;
__spin_b: string;
__spin_t: number;
};
};
const { $, server } = await getMetaServerContext(link);
const friendlyName = 'MetaAIBlogRecentPostSearchQuery';

const server: ServerData = {
LSD: { token: '' },
SiteData: {
haste_session: '',
hsi: '',
__spin_r: 0,
__spin_b: 'trunk',
__spin_t: Date.now(),
},
};

for (const obj of serverJs.define) {
const key = obj[0];
const value = obj[2];
server[key as keyof ServerData] = value;
}

const data = await ofetch('https://ai.meta.com/api/graphql/', {
const data = await ofetch(GRAPHQL_ENDPOINT, {
method: 'POST',
headers: {
'content-type': 'application/x-www-form-urlencoded',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'x-asbd-id': '359341',
'x-fb-friendly-name': 'MetaAIBlogRecentPostSearchQuery',
'x-fb-lsd': server.LSD.token,
},
body: new URLSearchParams({
av: '0',
__user: '0',
__a: '1',
__req: '1',
// __hs: server.SiteData.haste_session || '',
dpr: '1',
__ccg: 'EXCELLENT',
__rev: String(server.SiteData.__spin_r || ''),
// __s: '',
// __hsi: server.SiteData.hsi || '',
// __dyn: '',
// __hsdp: '',
// __hblp: '',
lsd: server.LSD.token,
// jazoest: '',
Comment on lines -83 to -93
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't remove those as they maybe needed later.

__spin_r: String(server.SiteData.__spin_r || ''),
__spin_b: String(server.SiteData.__spin_b || 'trunk'),
__spin_t: String(server.SiteData.__spin_t || Date.now()),
fb_api_caller_class: 'RelayModern',
fb_api_req_friendly_name: 'MetaAIBlogRecentPostSearchQuery',
variables: JSON.stringify({ input: { query: '', from: 0, limit, tags: [], excludeObjectIDs: ['27568536916124137'] } }),
server_timestamps: 'true',
doc_id: '9516719638450392',
headers: metaGraphqlHeaders(server, friendlyName),
body: buildGraphqlBody({
server,
friendlyName,
docId: '9516719638450392',
variables: { input: { query: '', from: 0, limit, tags: [], excludeObjectIDs: ['27568536916124137'] } },
}),
parseResponse: JSON.parse,
});
Expand Down
151 changes: 151 additions & 0 deletions lib/routes/meta/ai-global-search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import querystring from 'node:querystring';

import type { Route } from '@/types';
import ofetch from '@/utils/ofetch';
import { parseDate } from '@/utils/parse-date';

import { buildGraphqlBody, getMetaServerContext, GRAPHQL_ENDPOINT, metaGraphqlHeaders } from './utils';

export const route: Route = {
path: '/ai/global-search/:routeParams?',
categories: ['programming'],
example: '/meta/ai/global-search/content_types=blog',
name: 'AI Global Search',
maintainers: ['shcheglovnd'],
url: 'ai.meta.com/global_search/',
parameters: {
routeParams:
'URL-encoded query string of filters (path-based so each combination caches independently). Supported keys: `q` (search query), `content_types` (comma-separated: `person`, `publication`, `blog`, `dataset`, `event`, `tool`), `research_areas` (e.g. `natural-language-processing,computer-vision`), `filter_tags` (`research`, `ml-applications`, `open-source`, `developer-tools`, `ar-vr`, `hardware`), `years` (e.g. `2024,2025`), `location_cities` (publication venues like `AAAI,ACL`), `alphabetical_filter` (single letter, pairs with `content_types=person`+`sort_by=ALPHABETICAL`), `sort_by` (`RELEVANCE`, `MOST_RECENT`, `ALPHABETICAL`, `RANDOM`, default `RELEVANCE`), `offset` (default `0`). Combine multiple filters by encoding `&` as `%26`.',
},
description: 'Page size can be tuned with the `limit` query string parameter (default `36`).',
radar: [
{
source: ['ai.meta.com/global_search/', 'ai.meta.com/global_search', 'ai.meta.com/results/'],
},
],
handler,
};

const toList = (value: string | undefined): string[] | null => {
if (!value) {
return null;
}
const list = value
.split(',')
.map((s) => s.trim())
.filter(Boolean);
return list.length ? list : null;
};

const firstString = (value: string | string[] | undefined): string | undefined => (Array.isArray(value) ? value[0] : value);

type ResultShape = {
title: string;
description: string | null;
href: string;
image_src: string | null;
cmsid: string;
type: string;
authors: string | null;
tags: string[] | null;
location: string | null;
journal_number: string | null;
published_time: string | null;
year: string | null;
};

type SearchInput = {
alphabetical_filter: string | null;
content_types: string[] | null;
offset: number;
search_query: string;
sort_by: string;
filter_tags: string[] | null;
location_cities: string[] | null;
research_areas: string[] | null;
years: string[] | null;
};

const buildSearchInput = (params: querystring.ParsedUrlQuery): SearchInput => ({
alphabetical_filter: firstString(params.alphabetical_filter) || null,
content_types: toList(firstString(params.content_types)),
offset: Number.parseInt(firstString(params.offset) ?? '0', 10),
search_query: firstString(params.q) ?? firstString(params.search_query) ?? '',
sort_by: firstString(params.sort_by) ?? 'RELEVANCE',
filter_tags: toList(firstString(params.filter_tags)),
location_cities: toList(firstString(params.location_cities)),
research_areas: toList(firstString(params.research_areas)),
years: toList(firstString(params.years)),
});

const summarizeFilters = (input: SearchInput): string => {
const parts: string[] = [];
if (input.search_query) {
parts.push(`q=${input.search_query}`);
}
if (input.content_types) {
parts.push(`content_types=${input.content_types.join(',')}`);
}
if (input.research_areas) {
parts.push(`research_areas=${input.research_areas.join(',')}`);
}
if (input.filter_tags) {
parts.push(`filter_tags=${input.filter_tags.join(',')}`);
}
if (input.years) {
parts.push(`years=${input.years.join(',')}`);
}
if (input.location_cities) {
parts.push(`location_cities=${input.location_cities.join(',')}`);
}
if (input.alphabetical_filter) {
parts.push(`alphabetical_filter=${input.alphabetical_filter}`);
}
return parts.join(' · ');
};
Comment on lines +81 to +105
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use Array.prototype.map instead of push


const mapItem = (item: ResultShape) => ({
title: item.title,
description: item.description ?? '',
link: item.href?.startsWith('http') ? item.href : `https://ai.meta.com${item.href}`,
pubDate: item.published_time ? parseDate(Number(item.published_time) * 1000) : undefined,
author: item.authors || undefined,
category: [item.type, ...(item.tags ?? [])].filter(Boolean) as string[],
image: item.image_src || undefined,
guid: item.cmsid,
});

async function handler(ctx) {
const link = 'https://ai.meta.com/global_search/';
const { server } = await getMetaServerContext(link);

const params = querystring.parse(ctx.req.param('routeParams') || '');
const limit = Number.parseInt(ctx.req.query('limit') ?? '36', 10);
const input = buildSearchInput(params);

const friendlyName = 'useFBAIGlobalSearchQuery';
const data = await ofetch(GRAPHQL_ENDPOINT, {
method: 'POST',
headers: metaGraphqlHeaders(server, friendlyName),
body: buildGraphqlBody({
server,
friendlyName,
docId: '9716930201759979',
variables: { input },
}),
parseResponse: JSON.parse,
});

const result = data?.data?.result;
const shapes: ResultShape[] = result?.result_shapes ?? [];
const items = shapes.slice(0, limit).map((item) => mapItem(item));

const filterSummary = summarizeFilters(input);
const baseTitle = 'Meta AI Global Search';
return {
title: filterSummary ? `${baseTitle} — ${filterSummary}` : baseTitle,
description: 'Search results from ai.meta.com/global_search/.',
link,
item: items,
};
}
82 changes: 82 additions & 0 deletions lib/routes/meta/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { type CheerioAPI, load } from 'cheerio';

import ofetch from '@/utils/ofetch';

export type ServerData = {
LSD: { token: string };
SiteData: {
haste_session: string;
hsi: string;
__spin_r: number;
__spin_b: string;
__spin_t: number;
};
};

export async function getMetaServerContext(link: string): Promise<{ $: CheerioAPI; server: ServerData }> {
const res = await ofetch(link, {
headers: {
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
},
});
const $ = load(res);
const script = $('script:contains("DTSGInitialData"):first').text();
const serverJs = JSON.parse(script.match(/\(new ServerJS\(\)\)\.handle\((\{[\s\S]*?\})\);/)?.[1] || '{}');

const server: ServerData = {
LSD: { token: '' },
SiteData: {
haste_session: '',
hsi: '',
__spin_r: 0,
__spin_b: 'trunk',
__spin_t: Date.now(),
},
};

for (const obj of serverJs.define ?? []) {
const key = obj[0];
const value = obj[2];
server[key as keyof ServerData] = value;
}

return { $, server };
}

export function buildGraphqlBody({ server, friendlyName, docId, variables }: { server: ServerData; friendlyName: string; docId: string; variables: unknown }) {
return new URLSearchParams({
av: '0',
__user: '0',
__a: '1',
__req: '1',
dpr: '1',
__ccg: 'EXCELLENT',
__rev: String(server.SiteData.__spin_r || ''),
lsd: server.LSD.token,
__spin_r: String(server.SiteData.__spin_r || ''),
__spin_b: String(server.SiteData.__spin_b || 'trunk'),
__spin_t: String(server.SiteData.__spin_t || Date.now()),
fb_api_caller_class: 'RelayModern',
fb_api_req_friendly_name: friendlyName,
variables: JSON.stringify(variables),
server_timestamps: 'true',
doc_id: docId,
});
}

export function metaGraphqlHeaders(server: ServerData, friendlyName: string) {
return {
'content-type': 'application/x-www-form-urlencoded',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'x-asbd-id': '359341',
'x-fb-friendly-name': friendlyName,
'x-fb-lsd': server.LSD.token,
};
}

export const GRAPHQL_ENDPOINT = 'https://ai.meta.com/api/graphql/';
Loading