defmodule Symbiont.Heartbeat do @moduledoc """ Periodic health check and queue processor. Runs on a configurable interval (default: 5 minutes). Each tick: 1. Checks system health (API responding, disk space, ledger writable) 2. Processes pending tasks from the queue 3. Logs a health snapshot to heartbeat.jsonl """ use GenServer require Logger # -- Client API -- def start_link(opts) do GenServer.start_link(__MODULE__, opts, name: __MODULE__) end @doc "Trigger a heartbeat manually (useful for testing)." def pulse do GenServer.call(__MODULE__, :pulse, 60_000) end @doc "Get the last recorded health snapshot." def last_snapshot do GenServer.call(__MODULE__, :last_snapshot) end # -- Server Callbacks -- @impl true def init(_opts) do interval = Application.get_env(:symbiont, :heartbeat_interval_ms, 300_000) data_dir = Application.get_env(:symbiont, :data_dir, "data") heartbeat_path = Path.join(data_dir, "heartbeat.jsonl") unless File.exists?(heartbeat_path), do: File.write!(heartbeat_path, "") # Schedule first heartbeat after a short delay (let other services start) Process.send_after(self(), :tick, 5_000) state = %{ interval: interval, heartbeat_path: heartbeat_path, last_snapshot: nil, started_at: DateTime.utc_now() } {:ok, state} end @impl true def handle_info(:tick, state) do snapshot = run_heartbeat(state) schedule_next(state.interval) {:noreply, %{state | last_snapshot: snapshot}} end @impl true def handle_call(:pulse, _from, state) do snapshot = run_heartbeat(state) {:reply, snapshot, %{state | last_snapshot: snapshot}} end @impl true def handle_call(:last_snapshot, _from, state) do {:reply, state.last_snapshot, state} end # -- Private -- defp run_heartbeat(state) do Logger.info("Heartbeat: running health check") # 1. Check health queue_size = Symbiont.Queue.size() ledger_stats = Symbiont.Ledger.stats() # 2. Process pending tasks max_batch = Application.get_env(:symbiont, :max_queue_batch, 5) tasks_processed = process_queue(max_batch) # 3. Build snapshot snapshot = %{ "timestamp" => DateTime.utc_now() |> DateTime.to_iso8601(), "status" => "healthy", "queue_size" => queue_size, "tasks_processed" => tasks_processed, "total_calls" => ledger_stats["total_calls"], "total_cost" => ledger_stats["total_cost_estimated_usd"], "uptime_seconds" => DateTime.diff(DateTime.utc_now(), state.started_at, :second) } # 4. Log snapshot line = Jason.encode!(snapshot) <> "\n" File.write!(state.heartbeat_path, line, [:append]) Logger.info( "Heartbeat: queue=#{queue_size} processed=#{tasks_processed} " <> "total_cost=$#{ledger_stats["total_cost_estimated_usd"]}" ) snapshot end defp process_queue(max_batch) do tasks = Symbiont.Queue.take(max_batch) Enum.each(tasks, fn task -> Task.Supervisor.start_child(Symbiont.TaskSupervisor, fn -> case Symbiont.Router.route_and_execute(task["task"]) do {:ok, result} -> Symbiont.Queue.complete(task["id"], result[:result]) {:error, reason} -> Symbiont.Queue.fail(task["id"], inspect(reason)) end end) end) length(tasks) end defp schedule_next(interval) do Process.send_after(self(), :tick, interval) end end