Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 137 additions & 2 deletions .github/workflows/validate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
name: End Status
if: always()
runs-on: ubuntu-latest
needs: [discover-modules, test-modules, test-scenarios, validate-structure, test-charts]
needs: [discover-modules, test-modules, test-scenarios, validate-structure, test-charts, test-merge]
permissions:
contents: read
statuses: write
Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:
- name: Install common dependencies
run: |
sudo apt-get update
sudo apt-get install -y jq curl wget
sudo apt-get install -y jq curl wget gawk

- name: Set up Go (for jplot module)
if: matrix.module == 'jplot'
Expand Down Expand Up @@ -185,6 +185,141 @@ jobs:

echo "✓ ${{ matrix.traffic }}/${{ matrix.scenario }} scenario executed successfully"

test-merge:
name: Test Multi-Pod Merge
runs-on: ubuntu-latest
needs: [test-modules, start-status]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y jq curl wget gawk

- name: Install vegeta and jaggr
run: |
chmod +x modules/vegeta/install/install.sh
modules/vegeta/install/install.sh

- name: Start test server
run: |
docker run -d \
--name merge-test-server \
-p 9998:9998 \
-e PORT=9998 \
Comment thread
meecethereese marked this conversation as resolved.
ghcr.io/azure/aks-traffic-ingress-competitive-testing:8aba95806ff611e9939257e2c3c9f53b3af5f7a2

SERVER_READY=false
for i in {1..10}; do
if curl -s http://localhost:9998 > /dev/null; then
echo "Test server ready"
SERVER_READY=true
break
fi
echo "Waiting for server... (attempt $i/10)"
sleep 2
done

if [ "$SERVER_READY" != "true" ]; then
echo "ERROR: Test server failed to start after 10 attempts"
docker logs merge-test-server 2>&1 || true
exit 1
fi

- name: Run 4 simultaneous vegeta attacks
run: |
mkdir -p results/merge-test

# Simulate 4 pods each attacking at 50 RPS for 15s (total: 200 RPS)
for pod in 0 1 2 3; do
echo "GET http://localhost:9998" | \
vegeta attack -rate=50 -duration=15s -workers=2 \
> "results/merge-test/pod${pod}.bin" &
done

echo "Waiting for all 4 attacks to complete..."
wait
echo "All attacks finished"

# Verify all .bin files are non-empty
for pod in 0 1 2 3; do
if [[ ! -s "results/merge-test/pod${pod}.bin" ]]; then
echo "ERROR: pod${pod}.bin is empty"
exit 1
fi
echo "pod${pod}.bin: $(wc -c < results/merge-test/pod${pod}.bin) bytes"
done

- name: Merge results and validate
run: |
chmod +x modules/vegeta/merge/merge.sh

modules/vegeta/merge/merge.sh \
--output-file results/merge-test/merged.json \
results/merge-test/pod0.bin \
results/merge-test/pod1.bin \
results/merge-test/pod2.bin \
results/merge-test/pod3.bin

echo "=== Merged output ==="
cat results/merge-test/merged.json
echo ""

# Verify the merged file is non-empty
if [[ ! -s results/merge-test/merged.json ]]; then
echo "ERROR: Merged output is empty"
exit 1
fi

# Verify per-second bucketing: 15s test should produce at least 12 lines
# (first and last second-buckets may be partial)
LINE_COUNT=$(wc -l < results/merge-test/merged.json)
echo "Merged output has ${LINE_COUNT} lines for a 15s test"
if [[ "${LINE_COUNT}" -lt 12 ]]; then
echo "ERROR: Expected at least 12 second-buckets for a 15s test, got ${LINE_COUNT}"
exit 1
fi

# Verify combined RPS is roughly 200 (4 pods x 50 RPS)
AVG_RPS=$(jq -r '.rps' results/merge-test/merged.json | awk '{s+=$1; n++} END {print int(s/n)}')
echo "Average merged RPS: ${AVG_RPS} (expected ~200)"
if [[ "${AVG_RPS}" -lt 120 ]]; then
echo "ERROR: Average RPS ${AVG_RPS} is too low (expected ~200 from 4x50 RPS)"
exit 1
fi

# Verify code histogram sums match reported rps for each line
while IFS= read -r line; do
RPS_VAL=$(echo "$line" | jq -r '.rps')
CODE_SUM=$(echo "$line" | jq -r '[.code.hist | to_entries[] | .value] | add // 0')
if [[ "${CODE_SUM}" -ne "${RPS_VAL}" ]]; then
echo "ERROR: Code histogram sum (${CODE_SUM}) does not match rps (${RPS_VAL})"
echo "Line: ${line}"
exit 1
fi
done < results/merge-test/merged.json

# Verify all lines have the expected JSON structure
while IFS= read -r line; do
for field in rps code.hist latency.p25 latency.p50 latency.p99 bytes_in.sum bytes_out.sum; do
if ! echo "$line" | jq -e ".${field}" > /dev/null 2>&1; then
echo "ERROR: Merged output line missing field '${field}'"
echo "Line: ${line}"
exit 1
fi
done
done < results/merge-test/merged.json

echo "✓ Multi-pod merge validation passed"

- name: Cleanup
if: always()
run: |
docker stop merge-test-server > /dev/null 2>&1 || true
docker rm merge-test-server > /dev/null 2>&1 || true

validate-structure:
name: Validate Project Structure
runs-on: ubuntu-latest
Expand Down
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \
wget \
git \
jq \
gawk \
unzip \
bash \
ca-certificates \
Expand Down Expand Up @@ -103,6 +104,7 @@ RUN printf '%s\n' \
' echo " install/<name> [args...] Run an install script (e.g. install/nginx)"' \
' echo " setup/<name> [args...] Run a setup script (e.g. setup/ingress)"' \
' echo " module/<name>/<action> [args...] Run a module script (e.g. module/vegeta/run)"' \
' echo " merge [args...] Merge vegeta .bin files (modules/vegeta/merge/merge.sh)"' \
' echo " server Start the HTTP server"' \
' echo " bash -c \"...\" Run a custom command"' \
' echo ""' \
Expand Down Expand Up @@ -171,6 +173,9 @@ RUN printf '%s\n' \
' done' \
' exit 1' \
' fi' \
'elif [ "$1" = "merge" ]; then' \
' shift' \
' exec bash /app/modules/vegeta/merge/merge.sh "$@"' \
'else' \
' exec "$@"' \
'fi' \
Expand Down
165 changes: 165 additions & 0 deletions modules/vegeta/merge/merge.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/bin/bash

# Script to merge multiple vegeta raw .bin files into jaggr-format per-second JSON output.
# Uses actual request timestamps for bucketing (not wall-clock time), so it works correctly
# on saved/replayed data and correctly interleaves results from pods that started at
# slightly different times.
#
# Note: The first and last second-buckets may be partial (vegeta doesn't start/stop exactly
# on second boundaries). Consumers should account for this when validating RPS values.

set -eo pipefail

show_usage() {
echo "Usage: $0 [--output-file FILE] <bin_file1> [bin_file2 ...]"
echo ""
echo "Merges one or more raw vegeta .bin files into jaggr-format per-second JSON output."
echo ""
echo "Options:"
echo " --output-file FILE Write output to FILE (default: stdout)"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 results/pod0.bin results/pod1.bin"
echo " $0 --output-file merged.json results/pod0.bin results/pod1.bin results/pod2.bin"
echo " $0 results/single.bin"
}

output_file=""
bin_files=()

while [[ $# -gt 0 ]]; do
case "$1" in
--output-file)
output_file="$2"
shift 2
;;
-h|--help)
show_usage
exit 0
;;
-*)
echo "Error: Unknown option: $1"
show_usage
exit 1
;;
*)
bin_files+=("$1")
shift
;;
esac
done

if [[ ${#bin_files[@]} -eq 0 ]]; then
echo "Error: At least one .bin file is required"
show_usage
exit 1
fi

# Validate all input files exist and are non-empty
for f in "${bin_files[@]}"; do
if [[ ! -f "$f" ]]; then
echo "Error: File not found: $f"
exit 1
fi
if [[ ! -s "$f" ]]; then
echo "Error: File is empty: $f"
exit 1
fi
done

echo "Merging ${#bin_files[@]} .bin file(s)..." >&2

# Pipeline:
# 1. vegeta encode --to csv on all input files (vegeta round-robins through them)
# 2. Sort by timestamp column (column 1, nanoseconds since epoch)
# 3. gawk to bucket by second and compute per-bucket aggregates
# 4. Output one JSON line per second-bucket
merge_results() {
vegeta encode --to csv "${bin_files[@]}" | \
sort -t, -k1,1n | \
gawk -F, '
function floor_val(x) {
return int(x)
}
function flush_bucket() {
if (bucket_count == 0) return

# Compute latency percentiles
asort(latencies, sorted_lat)
n = bucket_count
p25_idx = floor_val(n * 0.25)
Comment thread
meecethereese marked this conversation as resolved.
if (p25_idx < 1) p25_idx = 1
p50_idx = floor_val(n * 0.50)
if (p50_idx < 1) p50_idx = 1
p99_idx = floor_val(n * 0.99)
if (p99_idx < 1) p99_idx = 1

p25_val = sorted_lat[p25_idx]
p50_val = sorted_lat[p50_idx]
p99_val = sorted_lat[p99_idx]

# Build code histogram JSON
code_hist = ""
for (code in code_counts) {
if (code_hist != "") code_hist = code_hist ","
code_hist = code_hist "\"" code "\":" code_counts[code]
}

printf "{\"rps\":%d,\"code\":{\"hist\":{%s}},\"latency\":{\"p25\":%d,\"p50\":%d,\"p99\":%d},\"bytes_in\":{\"sum\":%d},\"bytes_out\":{\"sum\":%d}}\n", \
bucket_count, code_hist, p25_val, p50_val, p99_val, bytes_in_sum, bytes_out_sum
}

BEGIN {
current_second = -1
bucket_count = 0
bytes_in_sum = 0
bytes_out_sum = 0
}

{
# CSV columns: timestamp_ns, status_code, latency_ns, bytes_out, bytes_in, error
timestamp_ns = $1
status_code = $2
latency_ns = $3
bytes_out = $4
bytes_in = $5

# Bucket by second (integer division of nanoseconds by 1e9)
this_second = floor_val(timestamp_ns / 1000000000)

if (current_second == -1) {
current_second = this_second
}

if (this_second != current_second) {
flush_bucket()

# Reset for new bucket
current_second = this_second
bucket_count = 0
bytes_in_sum = 0
bytes_out_sum = 0
delete code_counts
delete latencies
}

bucket_count++
latencies[bucket_count] = latency_ns + 0
code_counts[status_code] += 1
bytes_in_sum += bytes_in + 0
bytes_out_sum += bytes_out + 0
}

END {
flush_bucket()
}
'
}

if [[ -n "$output_file" ]]; then
merge_results > "$output_file"
echo "Merged output written to ${output_file}" >&2
else
merge_results
fi
Loading
Loading