Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
6c7f7f1
feat: implement Waku API Health spec (WIP)
fcecin Jan 17, 2026
7a38194
Fix failing libwaku build
fcecin Jan 19, 2026
a441933
Improvements
fcecin Jan 23, 2026
60ee9b1
Introduce api/send
NagyZoltanPeter Nov 21, 2025
11c2ac3
Fix edge mode config and test added
NagyZoltanPeter Dec 14, 2025
e9df811
Fix some import issues, start and stop waku shall not throw exception…
NagyZoltanPeter Dec 15, 2025
7e68a66
Utlize sync RequestBroker, adapt to non-async broker usage and gcsafe…
NagyZoltanPeter Dec 15, 2025
e8bbf56
add api_example app to examples2
NagyZoltanPeter Dec 15, 2025
1f691ae
Adapt after merge from master
NagyZoltanPeter Dec 15, 2025
d27ce37
Adapt code for using broker context
NagyZoltanPeter Jan 12, 2026
36fdba3
Fix brokerCtx settings for all usedbrokers, cover locked node init
NagyZoltanPeter Jan 15, 2026
4b163ea
Various fixes upon test failures. Added initial of subscribe API and …
NagyZoltanPeter Jan 23, 2026
5af834b
More test added
NagyZoltanPeter Jan 24, 2026
fc8e9f6
Fix multi propagate event emit, fix fail send test case
NagyZoltanPeter Jan 25, 2026
3642646
Fix rebase
NagyZoltanPeter Jan 25, 2026
8b9e7b8
Rename
fcecin Jan 26, 2026
66ebce6
Fix PushMessageHandlers in tests
NagyZoltanPeter Jan 27, 2026
a97de82
adapt libwaku to api changes
NagyZoltanPeter Jan 27, 2026
31586f5
Merge branch 'feat-waku-api-send' into feat/lmn-health-api
fcecin Jan 27, 2026
2b14692
Merge branch 'master' into feat/lmn-health-api
fcecin Jan 27, 2026
4333f69
Connect with Lmn Health API (WIP)
fcecin Jan 28, 2026
a834abb
Implement Waku Health API (WIP)
fcecin Jan 28, 2026
8eee1ec
Improve topic health API
fcecin Jan 28, 2026
8595b1c
stricter API availability check proc
fcecin Jan 29, 2026
f5e2b97
Add EventWakuPeer emitted by PeerManager
fcecin Jan 29, 2026
baf68a8
Health brokering improvements
fcecin Feb 2, 2026
8d2bc13
Merge branch 'master' into feat/lmn-health-api
fcecin Feb 2, 2026
345616f
Add missing API elements and improvements
fcecin Feb 3, 2026
53830ae
Health API testing and fixes
fcecin Feb 7, 2026
765cd60
Simplify NodeHealthMonitor creation
fcecin Feb 9, 2026
2100e48
Fix var relay -> let relay
fcecin Feb 10, 2026
8c66356
Merge branch 'chore/fix-health-monitor-ctor' into feat/lmn-health-api
fcecin Feb 11, 2026
0cb2c33
Fixes from Ivan's review (partial, round 1)
fcecin Feb 11, 2026
2f6c59e
Fixes from Ivan's review (round 2/2)
fcecin Feb 11, 2026
4532b6a
Merge branch 'master' into feat/lmn-health-api
fcecin Feb 11, 2026
f0c1be7
fix lint
fcecin Feb 11, 2026
c28e4aa
Roll back passing brokerCtx in PeerManager and WakuRelay ctors
fcecin Feb 11, 2026
f999487
Add tests & wire RequestHealthReport
fcecin Feb 11, 2026
51019a7
Fix RequestContentTopicsHealth provider and tests
fcecin Feb 12, 2026
84e25e5
add RequestProtocolHealth edge mode smoke test
fcecin Feb 12, 2026
e658175
last review fixes
fcecin Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions library/events/json_connection_status_change_event.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{.push raises: [].}

import system, std/json
import ./json_base_event
import ../../waku/api/types

type JsonConnectionStatusChangeEvent* = ref object of JsonEvent
status*: ConnectionStatus

proc new*(
T: type JsonConnectionStatusChangeEvent, status: ConnectionStatus
): T =
return JsonConnectionStatusChangeEvent(
eventType: "node_health_change",
status: status
)

method `$`*(event: JsonConnectionStatusChangeEvent): string =
$(%*event)
8 changes: 8 additions & 0 deletions library/libwaku.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ import
./events/json_message_event,
./events/json_topic_health_change_event,
./events/json_connection_change_event,
./events/json_connection_status_change_event,
../waku/factory/app_callbacks,
waku/factory/waku,
waku/node/waku_node,
waku/node/health_monitor/health_status,
./declare_lib

################################################################################
Expand Down Expand Up @@ -61,10 +63,16 @@ proc waku_new(
callEventCallback(ctx, "onConnectionChange"):
$JsonConnectionChangeEvent.new($peerId, peerEvent)

proc onConnectionStatusChange(ctx: ptr FFIContext): ConnectionStatusChangeHandler =
return proc(status: ConnectionStatus) {.async.} =
callEventCallback(ctx, "onConnectionStatusChange"):
$JsonConnectionStatusChangeEvent.new(status)

let appCallbacks = AppCallbacks(
relayHandler: onReceivedMessage(ctx),
topicHealthChangeHandler: onTopicHealthChange(ctx),
connectionChangeHandler: onConnectionChange(ctx),
connectionStatusChangeHandler: onConnectionStatusChange(ctx)
)

ffi.sendRequestToFFIThread(
Expand Down
3 changes: 2 additions & 1 deletion tests/node/test_all.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ import
./test_wakunode_peer_exchange,
./test_wakunode_store,
./test_wakunode_legacy_store,
./test_wakunode_peer_manager
./test_wakunode_peer_manager,
./test_wakunode_health_monitor
324 changes: 324 additions & 0 deletions tests/node/test_wakunode_health_monitor.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
{.used.}

import
std/[json, options, sequtils, strutils, tables], testutils/unittests, chronos, results

import
waku/[
waku_core,
common/waku_protocol,
node/waku_node,
node/peer_manager,
node/health_monitor/health_status,
node/health_monitor/connection_status,
node/health_monitor/protocol_health,
node/health_monitor/node_health_monitor,
node/kernel_api/relay,
node/kernel_api/store,
node/kernel_api/lightpush,
node/kernel_api/filter,
waku_archive,
]

import ../testlib/[wakunode, wakucore], ../waku_archive/archive_utils

proc p(kind: WakuProtocol, health: HealthStatus): ProtocolHealth =
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better use a longer name so it's easier to lookup.

Suggested change
proc p(kind: WakuProtocol, health: HealthStatus): ProtocolHealth =
proc pLongerName(kind: WakuProtocol, health: HealthStatus): ProtocolHealth =

var ph = ProtocolHealth.init(kind)
if health == HealthStatus.READY:
return ph.ready()
else:
return ph.notReady("mock")

suite "Health Monitor - health state calculation":
test "Disconnected, zero peers":
let protocols =
@[
p(RelayProtocol, HealthStatus.NOT_READY),
p(StoreClientProtocol, HealthStatus.NOT_READY),
p(FilterClientProtocol, HealthStatus.NOT_READY),
p(LightpushClientProtocol, HealthStatus.NOT_READY),
]
let strength = initTable[WakuProtocol, int]()
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't be DefaultRelayFailoverThreshold - 1 in this case ?

I'd assume state == Connected when DefaultRelayFailoverThreshold

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is mocking basically everything and is just testing the math behind the ConnectionStatus calculation. Here DefaultRelayFailoverThreshold is just a stand-in for the DLow config parameter for GossipSub. Since GossipSub doesn't exist here, we are just saying it is the default value for when an actual DLow config doesn't exist for some reason. The logic in calculateConnectionState, which is otherwise just a static calculator, depends on what DLow is, which is externally-provided from the node and the network.

check state == ConnectionStatus.Disconnected

test "PartiallyConnected, weak relay":
let weakCount = DefaultRelayFailoverThreshold - 1
let protocols =
@[
p(RelayProtocol, HealthStatus.READY), p(StoreClientProtocol, HealthStatus.READY)
]
var strength = initTable[WakuProtocol, int]()
strength[RelayProtocol] = weakCount
strength[StoreClientProtocol] = 1
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.PartiallyConnected
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a comment explaining why "PartiallyConnected" would help.


test "Connected, robust relay":
let protocols =
@[
p(RelayProtocol, HealthStatus.READY), p(StoreClientProtocol, HealthStatus.READY)
]
var strength = initTable[WakuProtocol, int]()
strength[RelayProtocol] = DefaultRelayFailoverThreshold
strength[StoreClientProtocol] = FailoverThreshold
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe :)?

Suggested change
strength[StoreClientProtocol] = FailoverThreshold
strength[StoreClientProtocol] = HealthyThreshold

let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.Connected

test "Connected, robust edge":
let protocols =
@[
p(RelayProtocol, HealthStatus.NOT_MOUNTED),
p(LightpushClientProtocol, HealthStatus.READY),
p(FilterClientProtocol, HealthStatus.READY),
p(StoreClientProtocol, HealthStatus.READY),
]
var strength = initTable[WakuProtocol, int]()
strength[LightpushClientProtocol] = FailoverThreshold
strength[FilterClientProtocol] = FailoverThreshold
strength[StoreClientProtocol] = FailoverThreshold
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.Connected

test "Disconnected, edge missing store":
let protocols =
@[
p(LightpushClientProtocol, HealthStatus.READY),
p(FilterClientProtocol, HealthStatus.READY),
p(StoreClientProtocol, HealthStatus.NOT_READY),
]
var strength = initTable[WakuProtocol, int]()
strength[LightpushClientProtocol] = FailoverThreshold
strength[FilterClientProtocol] = FailoverThreshold
strength[StoreClientProtocol] = 0
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.Disconnected

test "PartiallyConnected, edge meets minimum failover requirement":
let weakCount = max(1, FailoverThreshold - 1)
let protocols =
@[
p(LightpushClientProtocol, HealthStatus.READY),
p(FilterClientProtocol, HealthStatus.READY),
p(StoreClientProtocol, HealthStatus.READY),
]
var strength = initTable[WakuProtocol, int]()
strength[LightpushClientProtocol] = weakCount
strength[FilterClientProtocol] = weakCount
strength[StoreClientProtocol] = weakCount
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.PartiallyConnected

test "Connected, robust relay ignores store server":
let protocols =
@[p(RelayProtocol, HealthStatus.READY), p(StoreProtocol, HealthStatus.READY)]
var strength = initTable[WakuProtocol, int]()
strength[RelayProtocol] = DefaultRelayFailoverThreshold
strength[StoreProtocol] = 0
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.Connected

test "Connected, robust relay ignores store client":
let protocols =
@[
p(RelayProtocol, HealthStatus.READY),
p(StoreProtocol, HealthStatus.READY),
p(StoreClientProtocol, HealthStatus.NOT_READY),
]
var strength = initTable[WakuProtocol, int]()
strength[RelayProtocol] = DefaultRelayFailoverThreshold
strength[StoreProtocol] = 0
strength[StoreClientProtocol] = 0
let state =
calculateConnectionState(protocols, strength, DefaultRelayFailoverThreshold)
check state == ConnectionStatus.Connected

suite "Health Monitor - events":
asyncTest "Core (relay) health update":
let
nodeAKey = generateSecp256k1Key()
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))

(await nodeA.mountRelay()).expect("Node A failed to mount Relay")

nodeA.mountStoreClient()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed. That happens in node_factory.nim


await nodeA.start()

let monitorA = NodeHealthMonitor.new()
monitorA.setNodeToHealthMonitor(nodeA)

var
lastStatus = ConnectionStatus.Disconnected
callbackCount = 0
healthChangeSignal = newFuture[void]()

monitorA.onConnectionStatusChange = proc(status: ConnectionStatus) {.async.} =
lastStatus = status
callbackCount.inc()
if not healthChangeSignal.finished:
healthChangeSignal.complete()

monitorA.startHealthMonitor().expect("Health monitor failed to start")

let
nodeBKey = generateSecp256k1Key()
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))

let driver = newSqliteArchiveDriver()
nodeB.mountArchive(driver).expect("Node B failed to mount archive")

(await nodeB.mountRelay()).expect("Node B failed to mount relay")
await nodeB.mountStore()

await nodeB.start()

await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])

proc dummyHandler(topic: PubsubTopic, msg: WakuMessage): Future[void] {.async.} =
discard

nodeA.subscribe((kind: PubsubSub, topic: DefaultPubsubTopic), dummyHandler).expect(
"Node A failed to subscribe"
)
nodeB.subscribe((kind: PubsubSub, topic: DefaultPubsubTopic), dummyHandler).expect(
"Node B failed to subscribe"
)
Comment on lines +177 to +182
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These might not be needed as subscribeShards = @[DefaultShardId], happens when creating node in the test.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking these out breaks the test unfortunately. The newTestWakuNode helper just passes this to the "enrBuilder"; it doesn't seem to execute relay subscriptions with it, like setupAppCallbacks eventually does for real nodes.


let connectTimeLimit = Moment.now() + 10.seconds
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot wait so long :)
I'd also move that wait value into a const at the top of the test file.

Suggested change
let connectTimeLimit = Moment.now() + 10.seconds
let connectTimeLimit = Moment.now() + 3.seconds

var gotConnected = false

while Moment.now() < connectTimeLimit:
if lastStatus != ConnectionStatus.Disconnected:
gotConnected = true
break

if healthChangeSignal.finished:
healthChangeSignal = newFuture[void]()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not needed as it happens a few lines below

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed from void Future to AsyncEvent to make it cleaner and clearer


discard await healthChangeSignal.withTimeout(connectTimeLimit - Moment.now())

check:
gotConnected == true
callbackCount >= 1

if healthChangeSignal.finished:
healthChangeSignal = newFuture[void]()

await nodeB.stop()
await nodeA.disconnectNode(nodeB.switch.peerInfo.toRemotePeerInfo())

let disconnectTimeLimit = Moment.now() + 10.seconds
var gotDisconnected = false

while Moment.now() < disconnectTimeLimit:
if lastStatus == ConnectionStatus.Disconnected:
gotDisconnected = true
break

if healthChangeSignal.finished:
healthChangeSignal = newFuture[void]()

discard await healthChangeSignal.withTimeout(disconnectTimeLimit - Moment.now())

check:
gotDisconnected == true

await monitorA.stopHealthMonitor()
await nodeA.stop()

asyncTest "Edge (light client) health update":
let
nodeAKey = generateSecp256k1Key()
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))

nodeA.mountLightpushClient()
await nodeA.mountFilterClient()
nodeA.mountStoreClient()

await nodeA.start()

let monitorA = NodeHealthMonitor.new()
monitorA.setNodeToHealthMonitor(nodeA)

var
lastStatus = ConnectionStatus.Disconnected
callbackCount = 0
healthChangeSignal = newFuture[void]()

monitorA.onConnectionStatusChange = proc(status: ConnectionStatus) {.async.} =
lastStatus = status
callbackCount.inc()
if not healthChangeSignal.finished:
healthChangeSignal.complete()

monitorA.startHealthMonitor().expect("Health monitor failed to start")

let
nodeBKey = generateSecp256k1Key()
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))

let driver = newSqliteArchiveDriver()
nodeB.mountArchive(driver).expect("Node B failed to mount archive")

(await nodeB.mountRelay()).expect("Node B failed to mount relay")

(await nodeB.mountLightpush()).expect("Node B failed to mount lightpush")
await nodeB.mountFilter()
await nodeB.mountStore()

await nodeB.start()

await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])

let connectTimeLimit = Moment.now() + 10.seconds
var gotConnected = false

while Moment.now() < connectTimeLimit:
if lastStatus == ConnectionStatus.PartiallyConnected:
gotConnected = true
break

if healthChangeSignal.finished:
healthChangeSignal = newFuture[void]()

discard await healthChangeSignal.withTimeout(connectTimeLimit - Moment.now())

check:
gotConnected == true
callbackCount >= 1
lastStatus == ConnectionStatus.PartiallyConnected

if healthChangeSignal.finished:
healthChangeSignal = newFuture[void]()

await nodeB.stop()
await nodeA.disconnectNode(nodeB.switch.peerInfo.toRemotePeerInfo())

let disconnectTimeLimit = Moment.now() + 10.seconds
var gotDisconnected = false

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think is interesting to confirm the opposite state happens before entering the loop.

Suggested change
assert lastStatus == ConnectionStatus.Connected, "Expected last status to be connected"

while Moment.now() < disconnectTimeLimit:
if lastStatus == ConnectionStatus.Disconnected:
gotDisconnected = true
break

if healthChangeSignal.finished:
healthChangeSignal = newFuture[void]()

discard await healthChangeSignal.withTimeout(disconnectTimeLimit - Moment.now())

check:
gotDisconnected == true
lastStatus == ConnectionStatus.Disconnected

await monitorA.stopHealthMonitor()
await nodeA.stop()

1 change: 1 addition & 0 deletions tests/waku_relay/utils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import
waku_core,
waku_node,
waku_rln_relay,
common/broker/broker_context,
],
../waku_store/store_utils,
../waku_archive/archive_utils,
Expand Down
Loading
Loading