symbiont_ex/lib/symbiont/heartbeat.ex

131 lines
3.4 KiB
Elixir

defmodule Symbiont.Heartbeat do
@moduledoc """
Periodic health check and queue processor.
Runs on a configurable interval (default: 5 minutes).
Each tick:
1. Checks system health (API responding, disk space, ledger writable)
2. Processes pending tasks from the queue
3. Logs a health snapshot to heartbeat.jsonl
"""
use GenServer
require Logger
# -- Client API --
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@doc "Trigger a heartbeat manually (useful for testing)."
def pulse do
GenServer.call(__MODULE__, :pulse, 60_000)
end
@doc "Get the last recorded health snapshot."
def last_snapshot do
GenServer.call(__MODULE__, :last_snapshot)
end
# -- Server Callbacks --
@impl true
def init(_opts) do
interval = Application.get_env(:symbiont, :heartbeat_interval_ms, 300_000)
data_dir = Application.get_env(:symbiont, :data_dir, "data")
heartbeat_path = Path.join(data_dir, "heartbeat.jsonl")
unless File.exists?(heartbeat_path), do: File.write!(heartbeat_path, "")
# Schedule first heartbeat after a short delay (let other services start)
Process.send_after(self(), :tick, 5_000)
state = %{
interval: interval,
heartbeat_path: heartbeat_path,
last_snapshot: nil,
started_at: DateTime.utc_now()
}
{:ok, state}
end
@impl true
def handle_info(:tick, state) do
snapshot = run_heartbeat(state)
schedule_next(state.interval)
{:noreply, %{state | last_snapshot: snapshot}}
end
@impl true
def handle_call(:pulse, _from, state) do
snapshot = run_heartbeat(state)
{:reply, snapshot, %{state | last_snapshot: snapshot}}
end
@impl true
def handle_call(:last_snapshot, _from, state) do
{:reply, state.last_snapshot, state}
end
# -- Private --
defp run_heartbeat(state) do
Logger.info("Heartbeat: running health check")
# 1. Check health
queue_size = Symbiont.Queue.size()
ledger_stats = Symbiont.Ledger.stats()
# 2. Process pending tasks
max_batch = Application.get_env(:symbiont, :max_queue_batch, 5)
tasks_processed = process_queue(max_batch)
# 3. Build snapshot
snapshot = %{
"timestamp" => DateTime.utc_now() |> DateTime.to_iso8601(),
"status" => "healthy",
"queue_size" => queue_size,
"tasks_processed" => tasks_processed,
"total_calls" => ledger_stats["total_calls"],
"total_cost" => ledger_stats["total_cost_estimated_usd"],
"uptime_seconds" =>
DateTime.diff(DateTime.utc_now(), state.started_at, :second)
}
# 4. Log snapshot
line = Jason.encode!(snapshot) <> "\n"
File.write!(state.heartbeat_path, line, [:append])
Logger.info(
"Heartbeat: queue=#{queue_size} processed=#{tasks_processed} " <>
"total_cost=$#{ledger_stats["total_cost_estimated_usd"]}"
)
snapshot
end
defp process_queue(max_batch) do
tasks = Symbiont.Queue.take(max_batch)
Enum.each(tasks, fn task ->
Task.Supervisor.start_child(Symbiont.TaskSupervisor, fn ->
case Symbiont.Router.route_and_execute(task["task"]) do
{:ok, result} ->
Symbiont.Queue.complete(task["id"], result[:result])
{:error, reason} ->
Symbiont.Queue.fail(task["id"], inspect(reason))
end
end)
end)
length(tasks)
end
defp schedule_next(interval) do
Process.send_after(self(), :tick, interval)
end
end