From 635f2b3a1366b8ef1fc09a974d5bf41e61833ca6 Mon Sep 17 00:00:00 2001 From: Travis Downs Date: Thu, 14 May 2026 19:04:14 -0400 Subject: [PATCH 1/2] [c++] avrogencpp: emit deterministic include guard CodeGen::guard() in avrogencpp.cc was suffixing the generated header's include guard with the output of boost::mt19937 seeded from ::time(nullptr). That produced a different guard on every avrogen invocation, e.g.: #ifndef FOO_AVROGEN_H_3350718792_H #ifndef FOO_AVROGEN_H_2362587291_H Two consequences: 1. Generated headers were non-deterministic. Repeated runs on the same schema produced different bytes. 2. Build systems that key their cache on input-content digests (e.g. Bazel's remote cache, the Nix store) saw every consumer of the generated header miss the cache on every build, even when the schema was byte-identical. In a hermetic two-output-base Bazel build of Redpanda this surfaced as a chain of cascade rebuilds starting at manifest_file.avrogen.h and propagating through every .cc that included it. headerFile_ is already guaranteed-unique per output. The random suffix doesn't add uniqueness, only entropy. Mirrors the same change being proposed upstream at apache/avro. --- lang/c++/impl/avrogencpp.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc index 14351521590..1adff62bec4 100644 --- a/lang/c++/impl/avrogencpp.cc +++ b/lang/c++/impl/avrogencpp.cc @@ -770,7 +770,14 @@ void CodeGen::emitGeneratedWarning() { string CodeGen::guard() { string h = headerFile_; makeCanonical(h, true); - return h + "_" + lexical_cast(random_()) + "_H"; + // headerFile_ is already a unique-per-output path, so the canonicalised + // form is already a valid, unique include guard. Avoid mixing in a + // time-seeded RNG here so the generated output is byte-deterministic + // across invocations -- otherwise build systems that key their cache on + // input-content digests (e.g. Bazel remote cache, Nix store paths) end + // up rebuilding every downstream consumer on every invocation, even on + // byte-identical schemas. + return h + "_H"; } void CodeGen::generate(const ValidSchema &schema) { From 96c2f5141f0f1425c9f2bd168caaff6c47c8f1c4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 14 May 2026 23:30:11 +0000 Subject: [PATCH 2/2] Remove now-unused random_ member and its dead includes Agent-Logs-Url: https://github.com/redpanda-data/avro/sessions/b62d1929-ada4-43d6-9a02-d5dc98f08aaf Co-authored-by: travisdowns <2403521+travisdowns@users.noreply.github.com> --- lang/c++/impl/avrogencpp.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc index 1adff62bec4..1c27ba4077f 100644 --- a/lang/c++/impl/avrogencpp.cc +++ b/lang/c++/impl/avrogencpp.cc @@ -17,9 +17,6 @@ */ #include -#ifndef _WIN32 -#include -#endif #include #include #include @@ -30,7 +27,6 @@ #include #include -#include #include #include "Compiler.hh" @@ -92,7 +88,6 @@ class CodeGen { const std::string includePrefix_; const bool noUnion_; const std::string guardString_; - boost::mt19937 random_; vector pendingGettersAndSetters; vector pendingConstructors; @@ -123,8 +118,7 @@ class CodeGen { std::string includePrefix, bool noUnion) : unionTracker_(schemaFile), os_(os), inNamespace_(false), ns_(std::move(ns)), schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)), includePrefix_(std::move(includePrefix)), noUnion_(noUnion), - guardString_(std::move(guardString)), - random_(static_cast(::time(nullptr))) { + guardString_(std::move(guardString)) { } void generate(const ValidSchema &schema);