Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/node/db/HistoricalAuthorDataCache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Per-pad cache for the `{authorId -> {name, colorId}}` map used by
// PadMessageHandler.handleClientReady to populate clientVars
// (#7756 connect-handshake cliff investigation).
//
// At 200+ authors a burst of 50 simultaneous CLIENT_READY handshakes
// would otherwise each do Promise.all(authors.map(getAuthor)) =
// 50 * 200 = 10 000 ueberdb cache lookups inside the join hot path,
// competing for the event loop. This cache collapses that to one
// computation shared across the simultaneous joins.
//
// Extracted into its own module (rather than nested inside Pad) so it can
// be unit-tested without standing up the full pad / DB stack.

export type AuthorRecord = {name: string; colorId: string};
export type GetAuthorFn = (id: string) => Promise<AuthorRecord | null | undefined>;

export class HistoricalAuthorDataCache {
private cached: AuthorRecord extends never ? never : {
data: {[id: string]: AuthorRecord};
promise?: Promise<{[id: string]: AuthorRecord}>;
builtAt: number;
} | null = null;

constructor(
private readonly listAuthorIds: () => string[],
private readonly getAuthor: GetAuthorFn,
private readonly ttlMs: number = 5_000,
private readonly now: () => number = Date.now,
) {}

async get(): Promise<{[id: string]: AuthorRecord}> {
const now = this.now();
const cached = this.cached;
if (cached && now - cached.builtAt < this.ttlMs) {
return cached.promise ?? cached.data;
}
const promise = this.compute();
this.cached = {data: {}, promise, builtAt: now};
try {
const data = await promise;
this.cached = {data, builtAt: now};
return data;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Cache recompute race 🐞 Bug ☼ Reliability

HistoricalAuthorDataCache.get() can start a second compute() while a previous compute promise is
still in flight if the first run exceeds ttlMs, causing duplicate lookups and allowing an older
promise to overwrite a newer cached value.
Agent Prompt
### Issue description
`HistoricalAuthorDataCache.get()` uses `builtAt` (set before `compute()` resolves) for the TTL check, and it does not prioritize an in-flight `cached.promise`. If `compute()` takes longer than `ttlMs`, a subsequent `get()` will start a new `compute()`, and whichever promise resolves last will overwrite `this.cached`, potentially reverting the cache to older data.

### Issue Context
This cache is used on the `CLIENT_READY` join path, where slow author fetches under load are plausible; the race undermines the PR’s primary goal (coalescing work under concurrency).

### Fix Focus Areas
- src/node/db/HistoricalAuthorDataCache.ts[31-46]

### Suggested fix
- If `this.cached?.promise` exists, return it **regardless of TTL** (TTL should apply to completed `data`, not to in-flight work).
- Set `builtAt` when the compute finishes (e.g., `builtAt: this.now()` after `await promise`).
- Ensure only the “current” promise can commit results (compare stored promise identity or use a monotonically increasing generation counter):
  - Create `const promise = this.compute(); this.cached = {data: {}, promise, builtAt: now};`
  - After `await promise`, only assign `this.cached = {data, builtAt}` if `this.cached?.promise === promise`.
- Add a unit test that simulates a slow fetch exceeding TTL and asserts only one compute is used and the cache is not overwritten by an older completion.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

} catch (err) {
this.cached = null;
throw err;
}
}

/** Force the next get() to refetch. PadMessageHandler can call this when
* a new author commits, if we add hookable author-add events later. */
invalidate(): void { this.cached = null; }

private async compute(): Promise<{[id: string]: AuthorRecord}> {
const ids = this.listAuthorIds();
const out: {[id: string]: AuthorRecord} = {};
await Promise.all(ids.map(async (id) => {
const a = await this.getAuthor(id);
if (a) out[id] = {name: a.name, colorId: a.colorId};
}));
return out;
}
}
22 changes: 22 additions & 0 deletions src/node/db/Pad.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const padMessageHandler = require('../handler/PadMessageHandler');
const groupManager = require('./GroupManager');
const CustomError = require('../utils/customError');
import readOnlyManager from './ReadOnlyManager';
import {HistoricalAuthorDataCache} from './HistoricalAuthorDataCache';
import randomString from '../utils/randomstring';
const hooks = require('../../static/js/pluginfw/hooks');
import pad_utils from "../../static/js/pad_utils";
Expand Down Expand Up @@ -102,6 +103,10 @@ class Pad {
private id: string;
private savedRevisions: any[];
private padSettings: PadSettings;
// Per-pad cache for handleClientReady's historicalAuthorData map. Lazily
// initialised on first call so we don't touch authorManager during pad
// construction. See HistoricalAuthorDataCache for the rationale (#7756).
private historicalAuthorDataCache: HistoricalAuthorDataCache | null = null;
/**
* @param id
* @param [database] - Database object to access this pad's records (and only this pad's records;
Expand Down Expand Up @@ -326,6 +331,23 @@ class Pad {
return authorIds;
}

/**
* Returns the `{authorId -> {name, colorId}}` map used by handleClientReady
* to populate clientVars.collab_client_vars.historicalAuthorData. Cached
* per pad with a short TTL so a burst of simultaneous joins share one
* computation. Writes from `authorManager.setAuthorName` /
* `setAuthorColorId` become visible within at most the cache TTL (5s).
*/
async getHistoricalAuthorData(): Promise<{[authorId: string]: {name: string; colorId: string}}> {
if (this.historicalAuthorDataCache == null) {
this.historicalAuthorDataCache = new HistoricalAuthorDataCache(
() => this.getAllAuthors(),
(id: string) => authorManager.getAuthor(id),
);
}
return this.historicalAuthorDataCache.get();
}

async getInternalRevisionAText(targetRev: number) {
const keyRev = this.getKeyRevisionNumber(targetRev);
const headRev = this.getHeadRevisionNumber();
Expand Down
24 changes: 8 additions & 16 deletions src/node/handler/PadMessageHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1084,27 +1084,19 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => {
await pad.saveToDatabase();
}

// these db requests all need the pad object (timestamp of latest revision, author data)
const authors = pad.getAllAuthors();

// get timestamp of latest revision needed for timeslider
const currentTime = await pad.getRevisionDate(pad.getHeadRevisionNumber());

// get all author data out of the database (in parallel)
const historicalAuthorData:MapArrayType<{
// Per-pad cached author lookup (#7756 connect-handshake cliff). At 200+
// authors a fresh burst of 50 simultaneous joiners would otherwise do
// 50 * 200 = 10000 ueberdb cache lookups inside the join hot path,
// competing for the same event loop as the existing authors' USER_CHANGES
// traffic. The Pad-level cache collapses that to a single computation
// shared across the simultaneous joins (5-second TTL).
const historicalAuthorData: MapArrayType<{
name: string;
colorId: string;
}> = {};
await Promise.all(authors.map(async (authorId: string) => {
const author = await authorManager.getAuthor(authorId);
if (!author) {
messageLogger.error(`There is no author for authorId: ${authorId}. ` +
'This is possibly related to https://github.com/ether/etherpad-lite/issues/2802');
} else {
// Filter author attribs (e.g. don't send author's pads to all clients)
historicalAuthorData[authorId] = {name: author.name, colorId: author.colorId};
}
}));
}> = await pad.getHistoricalAuthorData();

// glue the clientVars together, send them and tell the other clients that a new one is there

Expand Down
86 changes: 86 additions & 0 deletions src/tests/backend-new/specs/pad-historical-author-data.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// HistoricalAuthorDataCache pins the per-pad author-data cache used by
// PadMessageHandler.handleClientReady. The cache exists to coalesce the
// Promise.all(authors.map(getAuthor)) work across simultaneous CLIENT_READY
// handshakes — see ether/etherpad#7756.
//
// The helper takes pure functions as input (no DB, no Pad), so this test
// exercises the real production code path without standing up the full
// pad / DB stack.

import {describe, it, expect, vi, beforeEach} from 'vitest';
import {HistoricalAuthorDataCache, type AuthorRecord} from '../../../node/db/HistoricalAuthorDataCache';

const makeCache = (ids: string[], fetcher: (id: string) => Promise<AuthorRecord | null | undefined>, ttlMs = 5_000, now = () => Date.now()) =>
new HistoricalAuthorDataCache(() => ids, fetcher, ttlMs, now);

describe('HistoricalAuthorDataCache', () => {
let getAuthorMock: ReturnType<typeof vi.fn<(id: string) => Promise<AuthorRecord | null>>>;

beforeEach(() => {
getAuthorMock = vi.fn(async (id: string) => ({name: `n-${id}`, colorId: `c-${id}`}));
});

it('returns one entry per author with {name, colorId}', async () => {
const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock);
const data = await cache.get();
expect(data).toEqual({
'a.1': {name: 'n-a.1', colorId: 'c-a.1'},
'a.2': {name: 'n-a.2', colorId: 'c-a.2'},
'a.3': {name: 'n-a.3', colorId: 'c-a.3'},
});
});

it('coalesces 50 simultaneous get() calls into 1 fetch per author', async () => {
const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock);
const results = await Promise.all(Array.from({length: 50}, () => cache.get()));
expect(results).toHaveLength(50);
expect(getAuthorMock).toHaveBeenCalledTimes(3);
for (const r of results) {
expect(Object.keys(r).sort()).toEqual(['a.1', 'a.2', 'a.3']);
}
});

it('refetches once the TTL expires', async () => {
let clock = 0;
const cache = makeCache(['a.1'], getAuthorMock, 5_000, () => clock);
await cache.get();
expect(getAuthorMock).toHaveBeenCalledTimes(1);
clock = 4_000;
await cache.get();
expect(getAuthorMock).toHaveBeenCalledTimes(1);
clock = 6_000;
await cache.get();
expect(getAuthorMock).toHaveBeenCalledTimes(2);
});

it('omits authors the fetcher returns falsy for', async () => {
const fetcher = vi.fn(async (id: string) =>
id === 'a.gone' ? null : {name: `n-${id}`, colorId: 'c'});
const cache = makeCache(['a.1', 'a.gone', 'a.2'], fetcher);
const data = await cache.get();
expect(Object.keys(data).sort()).toEqual(['a.1', 'a.2']);
});

it('invalidate() forces the next call to refetch', async () => {
const cache = makeCache(['a.1'], getAuthorMock);
await cache.get();
await cache.get();
expect(getAuthorMock).toHaveBeenCalledTimes(1);
cache.invalidate();
await cache.get();
expect(getAuthorMock).toHaveBeenCalledTimes(2);
});

it('a failed fetch clears the cache so the next call retries', async () => {
let attempt = 0;
const flakyFetcher = vi.fn(async (id: string) => {
attempt++;
if (attempt === 1) throw new Error('first attempt fails');
return {name: `n-${id}`, colorId: 'c'};
});
const cache = makeCache(['a.1'], flakyFetcher);
await expect(cache.get()).rejects.toThrow('first attempt fails');
const data = await cache.get();
expect(data).toEqual({'a.1': {name: 'n-a.1', colorId: 'c'}});
});
});
Loading