diff --git a/src/node/db/HistoricalAuthorDataCache.ts b/src/node/db/HistoricalAuthorDataCache.ts new file mode 100644 index 00000000000..63be8c8803a --- /dev/null +++ b/src/node/db/HistoricalAuthorDataCache.ts @@ -0,0 +1,96 @@ +// Per-pad cache for the `{authorId -> {name, colorId}}` map used by +// PadMessageHandler.handleClientReady to populate clientVars +// (#7756 connect-handshake investigation). +// +// At 200+ authors a burst of 50 simultaneous CLIENT_READY handshakes +// would otherwise each do Promise.all(authors.map(getAuthor)) = +// 50 * 200 = 10 000 ueberdb cache lookups inside the join hot path, +// competing for the event loop. This cache collapses that to one +// computation shared across the simultaneous joins. +// +// Extracted into its own module (rather than nested inside Pad) so it can +// be unit-tested without standing up the full pad / DB stack. + +export type AuthorRecord = {name: string; colorId: string}; +export type GetAuthorFn = (id: string) => Promise; +export type OnMissingAuthorFn = (id: string) => void; + +type CacheState = { + /** Resolved data. Empty `{}` until the first compute() resolves. */ + data: {[id: string]: AuthorRecord}; + /** Set iff a compute() is currently in flight. New callers await this same + * promise rather than starting a duplicate compute. Cleared on resolve. */ + promise?: Promise<{[id: string]: AuthorRecord}>; + /** Wall-clock time the current data was committed. Used for TTL only. */ + builtAt: number; +}; + +export class HistoricalAuthorDataCache { + private state: CacheState | null = null; + + constructor( + private readonly listAuthorIds: () => string[], + private readonly getAuthor: GetAuthorFn, + private readonly ttlMs: number = 5_000, + private readonly now: () => number = Date.now, + /** Called once per author id that the fetcher returns falsy for. + * Lets the consumer preserve the error log that lived in the + * previous inline Promise.all loop. Optional. */ + private readonly onMissingAuthor: OnMissingAuthorFn = () => {}, + ) {} + + async get(): Promise<{[id: string]: AuthorRecord}> { + const now = this.now(); + const s = this.state; + // In-flight compute: piggyback on it regardless of TTL — never start a + // second compute on top of a running one. The previous version could + // race two computes if the first ran past ttlMs, and the older + // resolution would clobber the newer cached value. + if (s?.promise) return cloneData(await s.promise); + if (s && now - s.builtAt < this.ttlMs) return cloneData(s.data); + return cloneData(await this.refresh(now)); + } + + /** Force the next get() to refetch. PadMessageHandler can call this when + * a new author commits, if we add hookable author-add events later. */ + invalidate(): void { this.state = null; } + + private refresh(now: number): Promise<{[id: string]: AuthorRecord}> { + const promise = this.compute(); + this.state = {data: {}, promise, builtAt: now}; + promise.then( + (data) => { + // Only commit if our promise is still the one the state references — + // covers the (unlikely) case where invalidate() ran during compute. + if (this.state?.promise === promise) { + this.state = {data, builtAt: this.now()}; + } + }, + () => { if (this.state?.promise === promise) this.state = null; }, + ); + return promise; + } + + private async compute(): Promise<{[id: string]: AuthorRecord}> { + const ids = this.listAuthorIds(); + const out: {[id: string]: AuthorRecord} = {}; + await Promise.all(ids.map(async (id) => { + const a = await this.getAuthor(id); + if (a) out[id] = {name: a.name, colorId: a.colorId}; + else this.onMissingAuthor(id); + })); + return out; + } +} + +// Defensive shallow copy on every get(). Callers (notably handleClientReady, +// which embeds the result in clientVars and exposes it to the clientVars +// hook) historically received a fresh object per call; preserving that +// here so a mutation by one join can't bleed into the next. +const cloneData = ( + src: {[id: string]: AuthorRecord}, +): {[id: string]: AuthorRecord} => { + const out: {[id: string]: AuthorRecord} = {}; + for (const k in src) out[k] = {name: src[k]!.name, colorId: src[k]!.colorId}; + return out; +}; diff --git a/src/node/db/Pad.ts b/src/node/db/Pad.ts index 0e4eaaac0e4..2ad936ed065 100644 --- a/src/node/db/Pad.ts +++ b/src/node/db/Pad.ts @@ -22,6 +22,9 @@ const padMessageHandler = require('../handler/PadMessageHandler'); const groupManager = require('./GroupManager'); const CustomError = require('../utils/customError'); import readOnlyManager from './ReadOnlyManager'; +import {HistoricalAuthorDataCache} from './HistoricalAuthorDataCache'; +import log4js from 'log4js'; +const padMessageLogger = log4js.getLogger('message'); import randomString from '../utils/randomstring'; const hooks = require('../../static/js/pluginfw/hooks'); import pad_utils from "../../static/js/pad_utils"; @@ -102,6 +105,10 @@ class Pad { private id: string; private savedRevisions: any[]; private padSettings: PadSettings; + // Per-pad cache for handleClientReady's historicalAuthorData map. Lazily + // initialised on first call so we don't touch authorManager during pad + // construction. See HistoricalAuthorDataCache for the rationale (#7756). + private historicalAuthorDataCache: HistoricalAuthorDataCache | null = null; /** * @param id * @param [database] - Database object to access this pad's records (and only this pad's records; @@ -326,6 +333,34 @@ class Pad { return authorIds; } + /** + * Returns the `{authorId -> {name, colorId}}` map used by handleClientReady + * to populate clientVars.collab_client_vars.historicalAuthorData. Cached + * per pad with a short TTL so a burst of simultaneous joins share one + * computation. Writes from `authorManager.setAuthorName` / + * `setAuthorColorId` become visible within at most the cache TTL (5s). + */ + async getHistoricalAuthorData(): Promise<{[authorId: string]: {name: string; colorId: string}}> { + if (this.historicalAuthorDataCache == null) { + this.historicalAuthorDataCache = new HistoricalAuthorDataCache( + () => this.getAllAuthors(), + (id: string) => authorManager.getAuthor(id), + 5_000, + Date.now, + (id: string) => { + // Preserves the explicit error log emitted by the previous inline + // Promise.all loop in handleClientReady before this cache landed. + // Don't drop missing-author logs silently — they point at + // https://github.com/ether/etherpad-lite/issues/2802. + padMessageLogger.error( + `There is no author for authorId: ${id}. ` + + 'This is possibly related to https://github.com/ether/etherpad-lite/issues/2802'); + }, + ); + } + return this.historicalAuthorDataCache.get(); + } + async getInternalRevisionAText(targetRev: number) { const keyRev = this.getKeyRevisionNumber(targetRev); const headRev = this.getHeadRevisionNumber(); diff --git a/src/node/handler/PadMessageHandler.ts b/src/node/handler/PadMessageHandler.ts index 3a27a7ac7be..de38313cca9 100644 --- a/src/node/handler/PadMessageHandler.ts +++ b/src/node/handler/PadMessageHandler.ts @@ -1084,27 +1084,19 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => { await pad.saveToDatabase(); } - // these db requests all need the pad object (timestamp of latest revision, author data) - const authors = pad.getAllAuthors(); - // get timestamp of latest revision needed for timeslider const currentTime = await pad.getRevisionDate(pad.getHeadRevisionNumber()); - // get all author data out of the database (in parallel) - const historicalAuthorData:MapArrayType<{ + // Per-pad cached author lookup (#7756 connect-handshake cliff). At 200+ + // authors a fresh burst of 50 simultaneous joiners would otherwise do + // 50 * 200 = 10000 ueberdb cache lookups inside the join hot path, + // competing for the same event loop as the existing authors' USER_CHANGES + // traffic. The Pad-level cache collapses that to a single computation + // shared across the simultaneous joins (5-second TTL). + const historicalAuthorData: MapArrayType<{ name: string; colorId: string; - }> = {}; - await Promise.all(authors.map(async (authorId: string) => { - const author = await authorManager.getAuthor(authorId); - if (!author) { - messageLogger.error(`There is no author for authorId: ${authorId}. ` + - 'This is possibly related to https://github.com/ether/etherpad-lite/issues/2802'); - } else { - // Filter author attribs (e.g. don't send author's pads to all clients) - historicalAuthorData[authorId] = {name: author.name, colorId: author.colorId}; - } - })); + }> = await pad.getHistoricalAuthorData(); // glue the clientVars together, send them and tell the other clients that a new one is there diff --git a/src/tests/backend-new/specs/pad-historical-author-data.test.ts b/src/tests/backend-new/specs/pad-historical-author-data.test.ts new file mode 100644 index 00000000000..119c7c3d5c6 --- /dev/null +++ b/src/tests/backend-new/specs/pad-historical-author-data.test.ts @@ -0,0 +1,135 @@ +// HistoricalAuthorDataCache pins the per-pad author-data cache used by +// PadMessageHandler.handleClientReady. The cache exists to coalesce the +// Promise.all(authors.map(getAuthor)) work across simultaneous CLIENT_READY +// handshakes — see ether/etherpad#7756. +// +// The helper takes pure functions as input (no DB, no Pad), so this test +// exercises the real production code path without standing up the full +// pad / DB stack. + +import {describe, it, expect, vi, beforeEach} from 'vitest'; +import {HistoricalAuthorDataCache, type AuthorRecord} from '../../../node/db/HistoricalAuthorDataCache'; + +type Fetcher = (id: string) => Promise; +type OnMissing = (id: string) => void; + +const makeCache = ( + ids: string[], + fetcher: Fetcher, + ttlMs = 5_000, + now = () => Date.now(), + onMissing: OnMissing = () => {}, +) => new HistoricalAuthorDataCache(() => ids, fetcher, ttlMs, now, onMissing); + +describe('HistoricalAuthorDataCache', () => { + let getAuthorMock: ReturnType>; + + beforeEach(() => { + getAuthorMock = vi.fn(async (id: string) => ({name: `n-${id}`, colorId: `c-${id}`})); + }); + + it('returns one entry per author with {name, colorId}', async () => { + const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock); + const data = await cache.get(); + expect(data).toEqual({ + 'a.1': {name: 'n-a.1', colorId: 'c-a.1'}, + 'a.2': {name: 'n-a.2', colorId: 'c-a.2'}, + 'a.3': {name: 'n-a.3', colorId: 'c-a.3'}, + }); + }); + + it('coalesces 50 simultaneous get() calls into 1 fetch per author', async () => { + const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock); + const results = await Promise.all(Array.from({length: 50}, () => cache.get())); + expect(results).toHaveLength(50); + expect(getAuthorMock).toHaveBeenCalledTimes(3); + for (const r of results) { + expect(Object.keys(r).sort()).toEqual(['a.1', 'a.2', 'a.3']); + } + }); + + it('refetches once the TTL expires', async () => { + let clock = 0; + const cache = makeCache(['a.1'], getAuthorMock, 5_000, () => clock); + await cache.get(); + expect(getAuthorMock).toHaveBeenCalledTimes(1); + clock = 4_000; + await cache.get(); + expect(getAuthorMock).toHaveBeenCalledTimes(1); + clock = 6_000; + await cache.get(); + expect(getAuthorMock).toHaveBeenCalledTimes(2); + }); + + it('returns a fresh object on every get() — callers may safely mutate without bleeding into other joiners', async () => { + const cache = makeCache(['a.1'], getAuthorMock); + const first = await cache.get(); + const second = await cache.get(); + // Two distinct top-level objects and per-author records. + expect(first).not.toBe(second); + expect(first['a.1']).not.toBe(second['a.1']); + expect(first).toEqual(second); + // Mutating the returned object must not affect the next caller. + first['a.1']!.name = 'mutated'; + const third = await cache.get(); + expect(third).toEqual({'a.1': {name: 'n-a.1', colorId: 'c-a.1'}}); + }); + + it('a slow compute that runs past TTL still resolves callers; no duplicate fetch starts in flight', async () => { + // Compute that hangs on a gate; TTL is 10ms. Without the in-flight + // guard, the second get() after 10ms would start a duplicate compute, + // and the older resolution could clobber the newer cached value. + let release: () => void; + const gate = new Promise((r) => { release = r; }); + let calls = 0; + let clock = 0; + const fetcher = vi.fn(async (id: string) => { + calls++; + await gate; + return {name: `n-${id}`, colorId: `c-${id}`}; + }); + const cache = makeCache(['a.1'], fetcher, 10, () => clock); + const first = cache.get(); + clock = 50; // well past ttlMs + const second = cache.get(); + expect(calls).toBe(1); + release!(); + const [a, b] = await Promise.all([first, second]); + expect(a).toEqual(b); + expect(calls).toBe(1); + }); + + it('calls onMissingAuthor exactly once per id the fetcher returns falsy for', async () => { + const fetcher = vi.fn(async (id: string) => + id === 'a.gone' ? null : {name: `n-${id}`, colorId: 'c'}); + const onMissing = vi.fn(); + const cache = makeCache(['a.1', 'a.gone', 'a.2'], fetcher, 5_000, Date.now, onMissing); + const data = await cache.get(); + expect(Object.keys(data).sort()).toEqual(['a.1', 'a.2']); + expect(onMissing).toHaveBeenCalledTimes(1); + expect(onMissing).toHaveBeenCalledWith('a.gone'); + }); + + it('invalidate() forces the next call to refetch', async () => { + const cache = makeCache(['a.1'], getAuthorMock); + await cache.get(); + await cache.get(); + expect(getAuthorMock).toHaveBeenCalledTimes(1); + cache.invalidate(); + await cache.get(); + expect(getAuthorMock).toHaveBeenCalledTimes(2); + }); + + it('a failed fetch clears the cache so the next call retries', async () => { + let attempt = 0; + const flakyFetcher = vi.fn(async (id: string) => { + attempt++; + if (attempt === 1) throw new Error('first attempt fails'); + return {name: `n-${id}`, colorId: 'c'}; + }); + const cache = makeCache(['a.1'], flakyFetcher); + await expect(cache.get()).rejects.toThrow('first attempt fails'); + const data = await cache.get(); + expect(data).toEqual({'a.1': {name: 'n-a.1', colorId: 'c'}}); + }); +});