182 lines
5.0 KiB
Elixir
182 lines
5.0 KiB
Elixir
defmodule Symbiont.Dispatcher do
|
|
@moduledoc """
|
|
Wraps the Claude Code CLI to execute model calls.
|
|
|
|
Handles:
|
|
- CLI invocation via Port for stdin support
|
|
- Token counting and cost estimation
|
|
- Ledger logging (every call gets an immutable entry)
|
|
- Rate-limit detection and tier escalation
|
|
"""
|
|
|
|
require Logger
|
|
|
|
@model_map %{
|
|
haiku: "haiku",
|
|
sonnet: "sonnet",
|
|
opus: "opus"
|
|
}
|
|
|
|
@cost_per_input_token %{
|
|
haiku: 0.00000025,
|
|
sonnet: 0.000003,
|
|
opus: 0.000015
|
|
}
|
|
|
|
@cost_per_output_token %{
|
|
haiku: 0.00000125,
|
|
sonnet: 0.000015,
|
|
opus: 0.000075
|
|
}
|
|
|
|
@type invoke_result :: %{
|
|
model: atom(),
|
|
result: String.t(),
|
|
input_tokens: non_neg_integer(),
|
|
output_tokens: non_neg_integer(),
|
|
estimated_cost_usd: float(),
|
|
elapsed_seconds: float(),
|
|
success: boolean()
|
|
}
|
|
|
|
@doc """
|
|
Invoke Claude CLI with the given tier and prompt.
|
|
|
|
Returns {:ok, result_map} or {:error, reason}.
|
|
Automatically logs to the ledger.
|
|
"""
|
|
@spec invoke(atom(), String.t()) :: {:ok, invoke_result()} | {:error, term()}
|
|
def invoke(tier, prompt) when is_atom(tier) and is_binary(prompt) do
|
|
model = Map.get(@model_map, tier, "sonnet")
|
|
cli = Application.get_env(:symbiont, :claude_cli, "claude")
|
|
|
|
start_time = System.monotonic_time(:millisecond)
|
|
|
|
result = run_cli(cli, model, prompt)
|
|
|
|
elapsed_ms = System.monotonic_time(:millisecond) - start_time
|
|
elapsed_seconds = elapsed_ms / 1_000.0
|
|
|
|
case result do
|
|
{:ok, output} ->
|
|
parsed = parse_cli_output(output)
|
|
input_tokens = parsed[:input_tokens] || estimate_tokens(prompt)
|
|
output_tokens = parsed[:output_tokens] || estimate_tokens(parsed[:result] || "")
|
|
|
|
cost = estimate_cost(tier, input_tokens, output_tokens)
|
|
|
|
entry = %{
|
|
model: to_string(tier),
|
|
success: true,
|
|
elapsed_seconds: Float.round(elapsed_seconds, 2),
|
|
input_tokens: input_tokens,
|
|
output_tokens: output_tokens,
|
|
estimated_cost_usd: Float.round(cost, 6),
|
|
prompt_preview: String.slice(prompt, 0, 100),
|
|
result: parsed[:result] || output
|
|
}
|
|
|
|
Symbiont.Ledger.append(entry)
|
|
{:ok, entry}
|
|
|
|
{:error, {output, exit_code}} when is_binary(output) ->
|
|
Logger.warning("Claude CLI failed: exit=#{exit_code} output=#{String.slice(output, 0, 200)}")
|
|
|
|
if String.contains?(output, "rate") or String.contains?(output, "429") do
|
|
handle_rate_limit(tier, prompt)
|
|
else
|
|
entry = %{
|
|
model: to_string(tier),
|
|
success: false,
|
|
elapsed_seconds: Float.round(elapsed_seconds, 2),
|
|
input_tokens: 0,
|
|
output_tokens: 0,
|
|
estimated_cost_usd: 0.0,
|
|
prompt_preview: String.slice(prompt, 0, 100),
|
|
error: "exit_code=#{exit_code}: #{String.slice(output, 0, 200)}"
|
|
}
|
|
|
|
Symbiont.Ledger.append(entry)
|
|
{:error, {:cli_failed, exit_code, output}}
|
|
end
|
|
|
|
{:error, reason} ->
|
|
Logger.warning("Claude CLI error: #{inspect(reason)}")
|
|
{:error, reason}
|
|
end
|
|
end
|
|
|
|
# -- Private --
|
|
|
|
defp run_cli(cli, model, prompt) do
|
|
# Use System.cmd with a shell wrapper to pipe stdin
|
|
# This works on all Elixir versions
|
|
cmd = "#{cli} -p --model #{model} --output-format json"
|
|
|
|
try do
|
|
{output, exit_code} = System.cmd("sh", ["-c", "echo #{escape_for_shell(prompt)} | #{cmd}"],
|
|
stderr_to_stdout: true
|
|
)
|
|
|
|
if exit_code == 0 do
|
|
{:ok, output}
|
|
else
|
|
{:error, {output, exit_code}}
|
|
end
|
|
rescue
|
|
e ->
|
|
{:error, Exception.message(e)}
|
|
end
|
|
end
|
|
|
|
defp escape_for_shell(text) do
|
|
# Use base64 encoding to safely pass arbitrary text through shell
|
|
encoded = Base.encode64(text)
|
|
"$(echo #{encoded} | base64 -d)"
|
|
end
|
|
|
|
defp parse_cli_output(output) do
|
|
case Jason.decode(String.trim(output)) do
|
|
{:ok, %{"result" => result} = data} ->
|
|
%{
|
|
result: result,
|
|
input_tokens: get_in(data, ["usage", "input_tokens"]),
|
|
output_tokens: get_in(data, ["usage", "output_tokens"])
|
|
}
|
|
|
|
{:ok, data} when is_map(data) ->
|
|
result = data["result"] || data["content"] || data["text"] || inspect(data)
|
|
%{result: result}
|
|
|
|
_ ->
|
|
%{result: String.trim(output)}
|
|
end
|
|
end
|
|
|
|
defp estimate_tokens(text) do
|
|
div(String.length(text), 4)
|
|
end
|
|
|
|
defp estimate_cost(tier, input_tokens, output_tokens) do
|
|
input_rate = Map.get(@cost_per_input_token, tier, 0.000003)
|
|
output_rate = Map.get(@cost_per_output_token, tier, 0.000015)
|
|
input_tokens * input_rate + output_tokens * output_rate
|
|
end
|
|
|
|
defp handle_rate_limit(tier, prompt) do
|
|
next_tier =
|
|
case tier do
|
|
:haiku -> :sonnet
|
|
:sonnet -> :opus
|
|
:opus -> nil
|
|
end
|
|
|
|
if next_tier do
|
|
Logger.info("Rate limited on #{tier}, escalating to #{next_tier}")
|
|
invoke(next_tier, prompt)
|
|
else
|
|
{:error, :rate_limited_all_tiers}
|
|
end
|
|
end
|
|
end
|