ether · JohnMcLear · May 16, 2026 · May 16, 2026 · qodo-free-for-open-source-projects · May 16, 2026
diff --git a/src/node/db/HistoricalAuthorDataCache.ts b/src/node/db/HistoricalAuthorDataCache.ts
@@ -0,0 +1,62 @@
+// Per-pad cache for the `{authorId -> {name, colorId}}` map used by
+// PadMessageHandler.handleClientReady to populate clientVars
+// (#7756 connect-handshake cliff investigation).
+//
+// At 200+ authors a burst of 50 simultaneous CLIENT_READY handshakes
+// would otherwise each do Promise.all(authors.map(getAuthor)) =
+// 50 * 200 = 10 000 ueberdb cache lookups inside the join hot path,
+// competing for the event loop. This cache collapses that to one
+// computation shared across the simultaneous joins.
+//
+// Extracted into its own module (rather than nested inside Pad) so it can
+// be unit-tested without standing up the full pad / DB stack.
+
+export type AuthorRecord = {name: string; colorId: string};
+export type GetAuthorFn = (id: string) => Promise<AuthorRecord | null | undefined>;
+
+export class HistoricalAuthorDataCache {
+  private cached: AuthorRecord extends never ? never : {
+    data: {[id: string]: AuthorRecord};
+    promise?: Promise<{[id: string]: AuthorRecord}>;
+    builtAt: number;
+  } | null = null;
+
+  constructor(
+    private readonly listAuthorIds: () => string[],
+    private readonly getAuthor: GetAuthorFn,
+    private readonly ttlMs: number = 5_000,
+    private readonly now: () => number = Date.now,
+  ) {}
+
+  async get(): Promise<{[id: string]: AuthorRecord}> {
+    const now = this.now();
+    const cached = this.cached;
+    if (cached && now - cached.builtAt < this.ttlMs) {
+      return cached.promise ?? cached.data;
+    }
+    const promise = this.compute();
+    this.cached = {data: {}, promise, builtAt: now};
+    try {
+      const data = await promise;
+      this.cached = {data, builtAt: now};
+      return data;
+    } catch (err) {
+      this.cached = null;
+      throw err;
+    }
+  }
+
+  /** Force the next get() to refetch. PadMessageHandler can call this when
+   *  a new author commits, if we add hookable author-add events later. */
+  invalidate(): void { this.cached = null; }
+
+  private async compute(): Promise<{[id: string]: AuthorRecord}> {
+    const ids = this.listAuthorIds();
+    const out: {[id: string]: AuthorRecord} = {};
+    await Promise.all(ids.map(async (id) => {
+      const a = await this.getAuthor(id);
+      if (a) out[id] = {name: a.name, colorId: a.colorId};
+    }));
+    return out;
+  }
+}
diff --git a/src/node/db/Pad.ts b/src/node/db/Pad.ts
@@ -22,6 +22,7 @@ const padMessageHandler = require('../handler/PadMessageHandler');
 const groupManager = require('./GroupManager');
 const CustomError = require('../utils/customError');
 import readOnlyManager from './ReadOnlyManager';
+import {HistoricalAuthorDataCache} from './HistoricalAuthorDataCache';
 import randomString from '../utils/randomstring';
 const hooks = require('../../static/js/pluginfw/hooks');
 import pad_utils from "../../static/js/pad_utils";
@@ -102,6 +103,10 @@ class Pad {
     private id: string;
     private savedRevisions: any[];
     private padSettings: PadSettings;
+    // Per-pad cache for handleClientReady's historicalAuthorData map. Lazily
+    // initialised on first call so we don't touch authorManager during pad
+    // construction. See HistoricalAuthorDataCache for the rationale (#7756).
+    private historicalAuthorDataCache: HistoricalAuthorDataCache | null = null;
   /**
    * @param id
    * @param [database] - Database object to access this pad's records (and only this pad's records;
@@ -326,6 +331,23 @@ class Pad {
     return authorIds;
   }
 
+  /**
+   * Returns the `{authorId -> {name, colorId}}` map used by handleClientReady
+   * to populate clientVars.collab_client_vars.historicalAuthorData. Cached
+   * per pad with a short TTL so a burst of simultaneous joins share one
+   * computation. Writes from `authorManager.setAuthorName` /
+   * `setAuthorColorId` become visible within at most the cache TTL (5s).
+   */
+  async getHistoricalAuthorData(): Promise<{[authorId: string]: {name: string; colorId: string}}> {
+    if (this.historicalAuthorDataCache == null) {
+      this.historicalAuthorDataCache = new HistoricalAuthorDataCache(
+        () => this.getAllAuthors(),
+        (id: string) => authorManager.getAuthor(id),
+      );
+    }
+    return this.historicalAuthorDataCache.get();
+  }
+
   async getInternalRevisionAText(targetRev: number) {
     const keyRev = this.getKeyRevisionNumber(targetRev);
     const headRev = this.getHeadRevisionNumber();

diff --git a/src/node/handler/PadMessageHandler.ts b/src/node/handler/PadMessageHandler.ts
@@ -1084,27 +1084,19 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => {
     await pad.saveToDatabase();
   }
 
-  // these db requests all need the pad object (timestamp of latest revision, author data)
-  const authors = pad.getAllAuthors();
-
   // get timestamp of latest revision needed for timeslider
   const currentTime = await pad.getRevisionDate(pad.getHeadRevisionNumber());
 
-  // get all author data out of the database (in parallel)
-  const historicalAuthorData:MapArrayType<{
+  // Per-pad cached author lookup (#7756 connect-handshake cliff). At 200+
+  // authors a fresh burst of 50 simultaneous joiners would otherwise do
+  // 50 * 200 = 10000 ueberdb cache lookups inside the join hot path,
+  // competing for the same event loop as the existing authors' USER_CHANGES
+  // traffic. The Pad-level cache collapses that to a single computation
+  // shared across the simultaneous joins (5-second TTL).
+  const historicalAuthorData: MapArrayType<{
     name: string;
     colorId: string;
-  }> = {};
-  await Promise.all(authors.map(async (authorId: string) => {
-    const author = await authorManager.getAuthor(authorId);
-    if (!author) {
-      messageLogger.error(`There is no author for authorId: ${authorId}. ` +
-          'This is possibly related to https://github.com/ether/etherpad-lite/issues/2802');
-    } else {
-      // Filter author attribs (e.g. don't send author's pads to all clients)
-      historicalAuthorData[authorId] = {name: author.name, colorId: author.colorId};
-    }
-  }));
+  }> = await pad.getHistoricalAuthorData();
 
   // glue the clientVars together, send them and tell the other clients that a new one is there
 

diff --git a/src/tests/backend-new/specs/pad-historical-author-data.test.ts b/src/tests/backend-new/specs/pad-historical-author-data.test.ts
@@ -0,0 +1,86 @@
+// HistoricalAuthorDataCache pins the per-pad author-data cache used by
+// PadMessageHandler.handleClientReady. The cache exists to coalesce the
+// Promise.all(authors.map(getAuthor)) work across simultaneous CLIENT_READY
+// handshakes — see ether/etherpad#7756.
+//
+// The helper takes pure functions as input (no DB, no Pad), so this test
+// exercises the real production code path without standing up the full
+// pad / DB stack.
+
+import {describe, it, expect, vi, beforeEach} from 'vitest';
+import {HistoricalAuthorDataCache, type AuthorRecord} from '../../../node/db/HistoricalAuthorDataCache';
+
+const makeCache = (ids: string[], fetcher: (id: string) => Promise<AuthorRecord | null | undefined>, ttlMs = 5_000, now = () => Date.now()) =>
+  new HistoricalAuthorDataCache(() => ids, fetcher, ttlMs, now);
+
+describe('HistoricalAuthorDataCache', () => {
+  let getAuthorMock: ReturnType<typeof vi.fn<(id: string) => Promise<AuthorRecord | null>>>;
+
+  beforeEach(() => {
+    getAuthorMock = vi.fn(async (id: string) => ({name: `n-${id}`, colorId: `c-${id}`}));
+  });
+
+  it('returns one entry per author with {name, colorId}', async () => {
+    const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock);
+    const data = await cache.get();
+    expect(data).toEqual({
+      'a.1': {name: 'n-a.1', colorId: 'c-a.1'},
+      'a.2': {name: 'n-a.2', colorId: 'c-a.2'},
+      'a.3': {name: 'n-a.3', colorId: 'c-a.3'},
+    });
+  });
+
+  it('coalesces 50 simultaneous get() calls into 1 fetch per author', async () => {
+    const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock);
+    const results = await Promise.all(Array.from({length: 50}, () => cache.get()));
+    expect(results).toHaveLength(50);
+    expect(getAuthorMock).toHaveBeenCalledTimes(3);
+    for (const r of results) {
+      expect(Object.keys(r).sort()).toEqual(['a.1', 'a.2', 'a.3']);
+    }
+  });
+
+  it('refetches once the TTL expires', async () => {
+    let clock = 0;
+    const cache = makeCache(['a.1'], getAuthorMock, 5_000, () => clock);
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(1);
+    clock = 4_000;
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(1);
+    clock = 6_000;
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(2);
+  });
+
+  it('omits authors the fetcher returns falsy for', async () => {
+    const fetcher = vi.fn(async (id: string) =>
+      id === 'a.gone' ? null : {name: `n-${id}`, colorId: 'c'});
+    const cache = makeCache(['a.1', 'a.gone', 'a.2'], fetcher);
+    const data = await cache.get();
+    expect(Object.keys(data).sort()).toEqual(['a.1', 'a.2']);
+  });
+
+  it('invalidate() forces the next call to refetch', async () => {
+    const cache = makeCache(['a.1'], getAuthorMock);
+    await cache.get();
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(1);
+    cache.invalidate();
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(2);
+  });
+
+  it('a failed fetch clears the cache so the next call retries', async () => {
+    let attempt = 0;
+    const flakyFetcher = vi.fn(async (id: string) => {
+      attempt++;
+      if (attempt === 1) throw new Error('first attempt fails');
+      return {name: `n-${id}`, colorId: 'c'};
+    });
+    const cache = makeCache(['a.1'], flakyFetcher);
+    await expect(cache.get()).rejects.toThrow('first attempt fails');
+    const data = await cache.get();
+    expect(data).toEqual({'a.1': {name: 'n-a.1', colorId: 'c'}});
+  });
+});