991f7ded34
This commit packages the standalone task-14 acceptance and task-15 fault-suite execution toolchain for downstream validation. It includes all runnable harness scripts, helper utilities, and generated evidence captures so downstream behavior can be reproduced and reviewed independently from docs and core implementation. Bundling these assets separately allows QA/automation workflows to validate runtime changes without dragging operational notes or release-gate documentation into the same review unit. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
436 lines
9.9 KiB
Bash
Executable File
436 lines
9.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -u -o pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
STREAMER_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
WORKTREE_ROOT="$(cd "${STREAMER_ROOT}/../.." && pwd)"
|
|
BUILD_DIR="${STREAMER_ROOT}/build"
|
|
|
|
EVIDENCE_ROOT="${WORKTREE_ROOT}/.sisyphus/evidence"
|
|
TASK_EVIDENCE_DIR="${EVIDENCE_ROOT}/task-15-fault-suite"
|
|
SUMMARY_HELPER="${SCRIPT_DIR}/fault_summary_helper.py"
|
|
|
|
MODE="baseline"
|
|
if [[ $# -gt 0 ]]; then
|
|
case "$1" in
|
|
--mode)
|
|
if [[ $# -lt 2 ]]; then
|
|
echo "missing value for --mode" >&2
|
|
exit 2
|
|
fi
|
|
MODE="$2"
|
|
shift 2
|
|
;;
|
|
--degraded)
|
|
MODE="degraded"
|
|
shift
|
|
;;
|
|
*)
|
|
echo "unknown argument: $1" >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
fi
|
|
|
|
if [[ "${MODE}" != "baseline" && "${MODE}" != "degraded" ]]; then
|
|
echo "invalid --mode '${MODE}' (expected: baseline|degraded)" >&2
|
|
exit 2
|
|
fi
|
|
|
|
RUN_ID=""
|
|
RUN_DIR=""
|
|
MANIFEST_TSV="${RUN_DIR}/rows.tsv"
|
|
SUMMARY_JSON="${RUN_DIR}/summary.json"
|
|
|
|
if [[ "${MODE}" == "baseline" ]]; then
|
|
LATEST_SUMMARY_JSON="${EVIDENCE_ROOT}/task-15-fault-suite-summary.json"
|
|
EVIDENCE_TEXT="${EVIDENCE_ROOT}/task-15-fault-suite.txt"
|
|
else
|
|
LATEST_SUMMARY_JSON="${EVIDENCE_ROOT}/task-15-fault-suite-error-summary.json"
|
|
EVIDENCE_TEXT="${EVIDENCE_ROOT}/task-15-fault-suite-error.txt"
|
|
fi
|
|
|
|
STARTED_AT_UTC="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
|
|
mkdir -p "${TASK_EVIDENCE_DIR}"
|
|
|
|
allocate_run_dir() {
|
|
local attempts=0
|
|
while (( attempts < 50 )); do
|
|
local candidate_id
|
|
candidate_id="$(date +"%Y%m%dT%H%M%S")-$(date +"%N")-p$$-$RANDOM"
|
|
local candidate_dir="${TASK_EVIDENCE_DIR}/${candidate_id}-${MODE}"
|
|
if mkdir "${candidate_dir}" 2>/dev/null; then
|
|
RUN_ID="${candidate_id}"
|
|
RUN_DIR="${candidate_dir}"
|
|
MANIFEST_TSV="${RUN_DIR}/rows.tsv"
|
|
SUMMARY_JSON="${RUN_DIR}/summary.json"
|
|
return 0
|
|
fi
|
|
attempts=$((attempts + 1))
|
|
sleep 0.01
|
|
done
|
|
echo "failed to allocate unique fault-suite run directory" >&2
|
|
return 1
|
|
}
|
|
|
|
allocate_run_dir || exit 1
|
|
|
|
echo -e "order\tscenario_id\tname\tstatus\treason\tduration_ms\tsim_rc\tstreamer_rc\ttester_rc\tsim_log\tstreamer_log\ttester_log\tsdp_path" > "${MANIFEST_TSV}"
|
|
|
|
cleanup_pids=()
|
|
|
|
cleanup_all() {
|
|
for pid in "${cleanup_pids[@]:-}"; do
|
|
if [[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null; then
|
|
kill "${pid}" 2>/dev/null || true
|
|
fi
|
|
done
|
|
}
|
|
|
|
trap cleanup_all EXIT
|
|
|
|
binary_exists() {
|
|
local path="$1"
|
|
[[ -x "${path}" ]]
|
|
}
|
|
|
|
wait_pid() {
|
|
local pid="$1"
|
|
local timeout_s="$2"
|
|
local elapsed=0
|
|
while kill -0 "${pid}" 2>/dev/null; do
|
|
if (( elapsed >= timeout_s )); then
|
|
kill "${pid}" 2>/dev/null || true
|
|
wait "${pid}" 2>/dev/null || true
|
|
return 124
|
|
fi
|
|
sleep 1
|
|
elapsed=$((elapsed + 1))
|
|
done
|
|
|
|
wait "${pid}" 2>/dev/null
|
|
return $?
|
|
}
|
|
|
|
append_manifest_row() {
|
|
local order="$1"
|
|
local scenario_id="$2"
|
|
local name="$3"
|
|
local status="$4"
|
|
local reason="$5"
|
|
local duration_ms="$6"
|
|
local sim_rc="$7"
|
|
local streamer_rc="$8"
|
|
local tester_rc="$9"
|
|
local sim_log="${10}"
|
|
local streamer_log="${11}"
|
|
local tester_log="${12}"
|
|
local sdp_path="${13}"
|
|
|
|
echo -e "${order}\t${scenario_id}\t${name}\t${status}\t${reason}\t${duration_ms}\t${sim_rc}\t${streamer_rc}\t${tester_rc}\t${sim_log}\t${streamer_log}\t${tester_log}\t${sdp_path}" >> "${MANIFEST_TSV}"
|
|
}
|
|
|
|
scenario_port() {
|
|
local order="$1"
|
|
if [[ "${MODE}" == "baseline" ]]; then
|
|
echo $((52040 + (order - 1) * 2))
|
|
else
|
|
echo $((52140 + (order - 1) * 2))
|
|
fi
|
|
}
|
|
|
|
run_fault_scenario() {
|
|
local order="$1"
|
|
local scenario_id="$2"
|
|
local name="$3"
|
|
|
|
local row_dir="${RUN_DIR}/${order}-${scenario_id}"
|
|
mkdir -p "${row_dir}"
|
|
|
|
local sim_log="${row_dir}/sim.log"
|
|
local streamer_log="${row_dir}/streamer.log"
|
|
local tester_log="${row_dir}/tester.log"
|
|
local sdp_path="${row_dir}/stream.sdp"
|
|
|
|
local shm_name="fault_${MODE}_${scenario_id}_${RUN_ID}"
|
|
local zmq_endpoint="ipc:///tmp/fault_${MODE}_${scenario_id}_${RUN_ID}.ipc"
|
|
local sim_label="f${order}_${MODE:0:3}_${scenario_id:0:3}"
|
|
|
|
local sim_frames=360
|
|
local sim_fps=200
|
|
local reset_every=""
|
|
local snapshot_delay_us=0
|
|
local emit_stall_ms=0
|
|
local ingest_max_frames=180
|
|
|
|
case "${scenario_id}" in
|
|
torn_read)
|
|
if [[ "${MODE}" == "baseline" ]]; then
|
|
snapshot_delay_us=2500
|
|
sim_fps=240
|
|
else
|
|
snapshot_delay_us=25000
|
|
sim_fps=320
|
|
fi
|
|
;;
|
|
sink_stall)
|
|
if [[ "${MODE}" == "baseline" ]]; then
|
|
emit_stall_ms=3
|
|
ingest_max_frames=140
|
|
else
|
|
emit_stall_ms=60
|
|
ingest_max_frames=160
|
|
fi
|
|
;;
|
|
reset_storm)
|
|
if [[ "${MODE}" == "baseline" ]]; then
|
|
reset_every=20
|
|
ingest_max_frames=120
|
|
else
|
|
reset_every=3
|
|
ingest_max_frames=180
|
|
fi
|
|
;;
|
|
*)
|
|
echo "unknown scenario_id=${scenario_id}" >&2
|
|
return 1
|
|
;;
|
|
esac
|
|
|
|
local rtp_port
|
|
rtp_port="$(scenario_port "${order}")"
|
|
|
|
local sim_cmd=(
|
|
"${BUILD_DIR}/cvmmap_sim"
|
|
--shm-name "${shm_name}"
|
|
--zmq-endpoint "${zmq_endpoint}"
|
|
--label "${sim_label}"
|
|
--frames "${sim_frames}"
|
|
--fps "${sim_fps}"
|
|
--width 640
|
|
--height 360
|
|
)
|
|
if [[ -n "${reset_every}" ]]; then
|
|
sim_cmd+=(--emit-reset-every "${reset_every}")
|
|
fi
|
|
|
|
local streamer_cmd=(
|
|
"${BUILD_DIR}/cvmmap_streamer"
|
|
--run-mode pipeline
|
|
--codec h264
|
|
--shm-name "${shm_name}"
|
|
--zmq-endpoint "${zmq_endpoint}"
|
|
--queue-size 1
|
|
--gop 30
|
|
--b-frames 0
|
|
--ingest-max-frames "${ingest_max_frames}"
|
|
--ingest-idle-timeout-ms 8000
|
|
--snapshot-copy-delay-us "${snapshot_delay_us}"
|
|
--emit-stall-ms "${emit_stall_ms}"
|
|
--rtp
|
|
--rtp-endpoint "127.0.0.1:${rtp_port}"
|
|
--rtp-payload-type 96
|
|
--rtp-sdp "${sdp_path}"
|
|
)
|
|
|
|
local tester_cmd=(
|
|
"${BUILD_DIR}/rtp_receiver_tester"
|
|
--port "${rtp_port}"
|
|
--expect-pt 96
|
|
--packet-threshold 1
|
|
--timeout-ms 15000
|
|
)
|
|
|
|
local row_start_ms row_end_ms duration_ms
|
|
row_start_ms="$(date +%s%3N)"
|
|
|
|
"${tester_cmd[@]}" > "${tester_log}" 2>&1 &
|
|
local tester_pid=$!
|
|
cleanup_pids+=("${tester_pid}")
|
|
|
|
sleep 1
|
|
|
|
"${sim_cmd[@]}" > "${sim_log}" 2>&1 &
|
|
local sim_pid=$!
|
|
cleanup_pids+=("${sim_pid}")
|
|
|
|
sleep 1
|
|
|
|
"${streamer_cmd[@]}" > "${streamer_log}" 2>&1
|
|
local streamer_rc=$?
|
|
|
|
wait_pid "${tester_pid}" 25
|
|
local tester_rc=$?
|
|
|
|
wait_pid "${sim_pid}" 25
|
|
local sim_rc=$?
|
|
|
|
row_end_ms="$(date +%s%3N)"
|
|
duration_ms=$((row_end_ms - row_start_ms))
|
|
|
|
local status="PASS"
|
|
local reason="all-processes-ok"
|
|
if (( sim_rc != 0 || streamer_rc != 0 || tester_rc != 0 )); then
|
|
status="FAIL"
|
|
reason="sim_rc=${sim_rc},streamer_rc=${streamer_rc},tester_rc=${tester_rc}"
|
|
fi
|
|
|
|
append_manifest_row \
|
|
"${order}" \
|
|
"${scenario_id}" \
|
|
"${name}" \
|
|
"${status}" \
|
|
"${reason}" \
|
|
"${duration_ms}" \
|
|
"${sim_rc}" \
|
|
"${streamer_rc}" \
|
|
"${tester_rc}" \
|
|
"${sim_log}" \
|
|
"${streamer_log}" \
|
|
"${tester_log}" \
|
|
"${sdp_path}"
|
|
|
|
printf "[%s] %s => %s (%s)\n" "${scenario_id}" "${name}" "${status}" "${reason}"
|
|
}
|
|
|
|
main() {
|
|
local required=(
|
|
"${BUILD_DIR}/cvmmap_sim"
|
|
"${BUILD_DIR}/cvmmap_streamer"
|
|
"${BUILD_DIR}/rtp_receiver_tester"
|
|
)
|
|
|
|
local missing=()
|
|
for bin in "${required[@]}"; do
|
|
if ! binary_exists "${bin}"; then
|
|
missing+=("${bin}")
|
|
fi
|
|
done
|
|
|
|
if (( ${#missing[@]} > 0 )); then
|
|
{
|
|
echo "task=15"
|
|
echo "mode=${MODE}"
|
|
echo "run_id=${RUN_ID}"
|
|
echo "run_dir=${RUN_DIR}"
|
|
echo "manifest=${MANIFEST_TSV}"
|
|
echo "missing_binaries=${missing[*]}"
|
|
} > "${EVIDENCE_TEXT}"
|
|
echo "missing binaries: ${missing[*]}" >&2
|
|
return 1
|
|
fi
|
|
|
|
run_fault_scenario 1 "torn_read" "fault:torn-read"
|
|
run_fault_scenario 2 "sink_stall" "fault:sink-stall"
|
|
run_fault_scenario 3 "reset_storm" "fault:reset-storm"
|
|
|
|
local finished_at_utc
|
|
finished_at_utc="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
|
|
python3 "${SUMMARY_HELPER}" \
|
|
--manifest "${MANIFEST_TSV}" \
|
|
--output "${SUMMARY_JSON}" \
|
|
--run-id "${RUN_ID}" \
|
|
--run-dir "${RUN_DIR}" \
|
|
--started-at "${STARTED_AT_UTC}" \
|
|
--finished-at "${finished_at_utc}" \
|
|
--mode "${MODE}"
|
|
local summary_rc=$?
|
|
|
|
cp -f "${SUMMARY_JSON}" "${LATEST_SUMMARY_JSON}" 2>/dev/null || true
|
|
|
|
local total_count pass_count fail_count all_pass
|
|
total_count="$(python3 - <<'PY' "${SUMMARY_JSON}"
|
|
import json
|
|
import sys
|
|
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
|
counts = data.get("counts", {})
|
|
print(counts.get("total", 0))
|
|
PY
|
|
)"
|
|
pass_count="$(python3 - <<'PY' "${SUMMARY_JSON}"
|
|
import json
|
|
import sys
|
|
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
|
counts = data.get("counts", {})
|
|
print(counts.get("pass", 0))
|
|
PY
|
|
)"
|
|
fail_count="$(python3 - <<'PY' "${SUMMARY_JSON}"
|
|
import json
|
|
import sys
|
|
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
|
counts = data.get("counts", {})
|
|
print(counts.get("fail", 0))
|
|
PY
|
|
)"
|
|
all_pass="$(python3 - <<'PY' "${SUMMARY_JSON}"
|
|
import json
|
|
import sys
|
|
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
|
print("true" if data.get("all_pass", False) else "false")
|
|
PY
|
|
)"
|
|
|
|
local violation_lines
|
|
violation_lines="$(python3 - <<'PY' "${SUMMARY_JSON}"
|
|
import json
|
|
import sys
|
|
|
|
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
|
for scenario in data.get("scenarios", []):
|
|
sid = scenario.get("id", "unknown")
|
|
for violation in scenario.get("violations", []):
|
|
print(f"{sid}:{violation}")
|
|
PY
|
|
)"
|
|
|
|
{
|
|
echo "task=15"
|
|
echo "mode=${MODE}"
|
|
echo "run_id=${RUN_ID}"
|
|
echo "run_dir=${RUN_DIR}"
|
|
echo "manifest=${MANIFEST_TSV}"
|
|
echo "summary_json=${SUMMARY_JSON}"
|
|
echo "latest_summary_json=${LATEST_SUMMARY_JSON}"
|
|
echo "started_at=${STARTED_AT_UTC}"
|
|
echo "finished_at=${finished_at_utc}"
|
|
echo "scenario_total=${total_count}"
|
|
echo "scenario_pass=${pass_count}"
|
|
echo "scenario_fail=${fail_count}"
|
|
echo "all_pass=${all_pass}"
|
|
echo "summary_helper_rc=${summary_rc}"
|
|
echo "violated_thresholds_begin"
|
|
if [[ -n "${violation_lines}" ]]; then
|
|
echo "${violation_lines}"
|
|
fi
|
|
echo "violated_thresholds_end"
|
|
} > "${EVIDENCE_TEXT}"
|
|
|
|
if (( summary_rc != 0 )); then
|
|
echo "summary helper failed with rc=${summary_rc}" >&2
|
|
return 1
|
|
fi
|
|
|
|
echo "fault-suite mode=${MODE} total=${total_count} pass=${pass_count} fail=${fail_count}"
|
|
echo "summary: ${SUMMARY_JSON}"
|
|
|
|
if [[ "${MODE}" == "baseline" ]]; then
|
|
if [[ "${total_count}" == "3" && "${pass_count}" == "3" && "${fail_count}" == "0" ]]; then
|
|
return 0
|
|
fi
|
|
return 1
|
|
fi
|
|
|
|
if [[ "${fail_count}" != "0" ]]; then
|
|
return 1
|
|
fi
|
|
|
|
echo "degraded mode did not violate thresholds" >&2
|
|
return 2
|
|
}
|
|
|
|
main "$@"
|