From fd95cf6b29231b8dc58c4faac5db04e441c1729d Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Tue, 28 Apr 2026 22:08:35 -0400 Subject: [PATCH] fix: retry observer wait after signal --- .../agents/observer-loop.sh | 24 +++++++- tests/hooks/observer-memory.test.js | 57 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/skills/continuous-learning-v2/agents/observer-loop.sh b/skills/continuous-learning-v2/agents/observer-loop.sh index bedc7f67..f18d8ae5 100755 --- a/skills/continuous-learning-v2/agents/observer-loop.sh +++ b/skills/continuous-learning-v2/agents/observer-loop.sh @@ -83,6 +83,28 @@ exit_if_idle_without_sessions() { fi } +wait_for_claude_analysis() { + local child_pid="$1" + local wait_status=0 + + while true; do + wait "$child_pid" + wait_status=$? + + if [ "$wait_status" -eq 0 ]; then + return 0 + fi + + # SIGUSR1 can interrupt wait while the Claude child is still running. + # Re-wait in that case so a signal is not logged as a false child failure. + if kill -0 "$child_pid" 2>/dev/null; then + continue + fi + + return "$wait_status" + done +} + analyze_observations() { if [ ! -f "$OBSERVATIONS_FILE" ]; then return @@ -217,7 +239,7 @@ PROMPT ) & watchdog_pid=$! - wait "$claude_pid" + wait_for_claude_analysis "$claude_pid" exit_code=$? kill "$watchdog_pid" 2>/dev/null || true rm -f "$analysis_file" diff --git a/tests/hooks/observer-memory.test.js b/tests/hooks/observer-memory.test.js index 892d025b..9436db67 100644 --- a/tests/hooks/observer-memory.test.js +++ b/tests/hooks/observer-memory.test.js @@ -205,6 +205,63 @@ test('prompt references analysis_file not full OBSERVATIONS_FILE', () => { assert.ok(promptSection.includes('${analysis_relpath}'), 'Prompt should point Claude at the sampled analysis file (via relative path), not the full observations file'); }); +test('observer-loop wait helper retries SIGUSR1-interrupted waits while claude child is alive', () => { + if (process.platform === 'win32') { + return; + } + + const content = fs.readFileSync(observerLoopPath, 'utf8'); + const helperMatch = content.match(/wait_for_claude_analysis\(\) \{[\s\S]*?\n\}/); + assert.ok(helperMatch, 'observer-loop.sh should define wait_for_claude_analysis helper'); + + const script = [ + 'set +e', + helperMatch[0], + 'trap ":" USR1', + '( sleep 0.35; exit 0 ) &', + 'claude_child=$!', + '( sleep 0.05; kill -USR1 $$ ) &', + 'signaler=$!', + 'wait_for_claude_analysis "$claude_child"', + 'status=$?', + 'wait "$signaler" 2>/dev/null || true', + 'exit "$status"' + ].join('\n'); + + const result = spawnSync('bash', ['-c', script], { + encoding: 'utf8', + timeout: 5000 + }); + + assert.strictEqual(result.status, 0, `interrupted wait should return child exit 0, got ${result.status}; stderr: ${result.stderr}`); +}); + +test('observer-loop wait helper preserves real nonzero claude exits', () => { + if (process.platform === 'win32') { + return; + } + + const content = fs.readFileSync(observerLoopPath, 'utf8'); + const helperMatch = content.match(/wait_for_claude_analysis\(\) \{[\s\S]*?\n\}/); + assert.ok(helperMatch, 'observer-loop.sh should define wait_for_claude_analysis helper'); + + const script = [ + 'set +e', + helperMatch[0], + '( sleep 0.05; exit 7 ) &', + 'claude_child=$!', + 'wait_for_claude_analysis "$claude_child"', + 'exit "$?"' + ].join('\n'); + + const result = spawnSync('bash', ['-c', script], { + encoding: 'utf8', + timeout: 5000 + }); + + assert.strictEqual(result.status, 7, `real child failure should be preserved, got ${result.status}; stderr: ${result.stderr}`); +}); + // ────────────────────────────────────────────────────── // Test group 5: Signal counter file simulation // ──────────────────────────────────────────────────────