defmodule Symbiont.Dispatcher do @moduledoc """ Wraps the Claude Code CLI to execute model calls. Handles: - CLI invocation via System.cmd/3 with stdin piping (Elixir 1.15+) - Token counting and cost estimation - Ledger logging (every call gets an immutable entry) - Rate-limit detection and tier escalation """ require Logger @model_map %{ haiku: "haiku", sonnet: "sonnet", opus: "opus" } @cost_per_input_token %{ haiku: 0.00000025, sonnet: 0.000003, opus: 0.000015 } @cost_per_output_token %{ haiku: 0.00000125, sonnet: 0.000015, opus: 0.000075 } @type invoke_result :: %{ model: atom(), result: String.t(), input_tokens: non_neg_integer(), output_tokens: non_neg_integer(), estimated_cost_usd: float(), elapsed_seconds: float(), success: boolean() } @doc """ Invoke Claude CLI with the given tier and prompt. Returns {:ok, result_map} or {:error, reason}. Automatically logs to the ledger. """ @spec invoke(atom(), String.t()) :: {:ok, invoke_result()} | {:error, term()} def invoke(tier, prompt) when is_atom(tier) and is_binary(prompt) do model = Map.get(@model_map, tier, "sonnet") cli = Application.get_env(:symbiont, :claude_cli, "claude") start_time = System.monotonic_time(:millisecond) result = run_cli(cli, model, prompt) elapsed_ms = System.monotonic_time(:millisecond) - start_time elapsed_seconds = elapsed_ms / 1_000.0 case result do {:ok, output} -> parsed = parse_cli_output(output) input_tokens = parsed[:input_tokens] || estimate_tokens(prompt) output_tokens = parsed[:output_tokens] || estimate_tokens(parsed[:result] || "") cost = estimate_cost(tier, input_tokens, output_tokens) entry = %{ model: to_string(tier), success: true, elapsed_seconds: Float.round(elapsed_seconds, 2), input_tokens: input_tokens, output_tokens: output_tokens, estimated_cost_usd: Float.round(cost, 6), prompt_preview: String.slice(prompt, 0, 100), result: parsed[:result] || output } Symbiont.Ledger.append(entry) {:ok, entry} {:error, {output, exit_code}} when is_binary(output) -> Logger.warning("Claude CLI failed: exit=#{exit_code} output=#{String.slice(output, 0, 200)}") if String.contains?(output, "rate") or String.contains?(output, "429") do handle_rate_limit(tier, prompt) else entry = %{ model: to_string(tier), success: false, elapsed_seconds: Float.round(elapsed_seconds, 2), input_tokens: 0, output_tokens: 0, estimated_cost_usd: 0.0, prompt_preview: String.slice(prompt, 0, 100), error: "exit_code=#{exit_code}: #{String.slice(output, 0, 200)}" } Symbiont.Ledger.append(entry) {:error, {:cli_failed, exit_code, output}} end {:error, reason} -> Logger.warning("Claude CLI error: #{inspect(reason)}") {:error, reason} end end # -- Private -- defp run_cli(cli, model, prompt) do # Pipe prompt via stdin using a shell heredoc — safe for arbitrary content escaped = prompt |> String.replace("'", "'\\''") shell_cmd = "printf '%s' '#{escaped}' | #{cli} -p --model #{model} --output-format json 2>&1" try do {output, exit_code} = System.shell(shell_cmd) if exit_code == 0 do {:ok, output} else {:error, {output, exit_code}} end rescue e -> {:error, Exception.message(e)} end end defp parse_cli_output(output) do case Jason.decode(String.trim(output)) do {:ok, %{"result" => result} = data} -> %{ result: result, input_tokens: get_in(data, ["usage", "input_tokens"]), output_tokens: get_in(data, ["usage", "output_tokens"]) } {:ok, data} when is_map(data) -> result = data["result"] || data["content"] || data["text"] || inspect(data) %{result: result} _ -> %{result: String.trim(output)} end end defp estimate_tokens(text) do div(String.length(text), 4) end defp estimate_cost(tier, input_tokens, output_tokens) do input_rate = Map.get(@cost_per_input_token, tier, 0.000003) output_rate = Map.get(@cost_per_output_token, tier, 0.000015) input_tokens * input_rate + output_tokens * output_rate end defp handle_rate_limit(tier, prompt) do next_tier = case tier do :haiku -> :sonnet :sonnet -> :opus :opus -> nil end if next_tier do Logger.info("Rate limited on #{tier}, escalating to #{next_tier}") invoke(next_tier, prompt) else {:error, :rate_limited_all_tiers} end end end