diff --git a/backfill_transcript.py b/backfill_transcript.py new file mode 100644 index 0000000..f1fa36a --- /dev/null +++ b/backfill_transcript.py @@ -0,0 +1,82 @@ + +import sys, sqlite3 +from datetime import datetime +sys.path.insert(0, '/data/symbiont') +from symbiont.engram import Engram + +db = sqlite3.connect('/data/symbiont/engram.db') + +# Update the genesis session with richer data from the actual transcript +genesis_id = db.execute( + "SELECT id FROM sessions WHERE summary LIKE '%Genesis session%' ORDER BY started_at LIMIT 1" +).fetchone() + +if genesis_id: + sid = genesis_id[0] + # Clear hand-written logs + db.execute("DELETE FROM session_logs WHERE session_id=?", (sid,)) + + # Write 49 real log entries extracted from the transcript + entries = [ + "User opened with 'organism survival' framing: AI runs on tokens (money), can it be self-sustaining?", + "Agreed: revenue > token cost is the core equation; discussed model tier costs (Haiku $0.25/MTok vs Opus $15/MTok)", + "Identified three revenue streams: content-as-a-service, micro-SaaS APIs, subscription research reports", + "Ownership structure settled: Michael owns all accounts (legal), ~50/50 revenue split after costs", + "Tax/entity decision: single-member LLC (Wyoming/NM) as the 'virtual entity' for now", + "Key insight: LLM router as Building Block #1 — metabolic efficiency before anything else", + "Router design: Haiku classifies tasks (cheap), dispatches to cheapest capable tier", + "Model tiers defined: Haiku (simple/extract), Sonnet (write/code), Opus (strategy/QA)", + "Michael: use Claude Code CLI on Pro subscription to keep marginal cost at $0", + "Rate-limit strategy: detect limits, back off, set systemd timer to self-wake", + "Michael uploaded cortex SSH key; connected via paramiko to cortex.hydrascale.net", + "Cortex environment: Ubuntu 24.04, 849GB free, 15GB RAM, git/python/node all present", + "Installed Claude Code CLI v2.1.79 via npm; authenticated with Michael's Pro account", + "Created /data/symbiont/ project structure with git init", + "Built dispatcher.py: Claude Code CLI wrapper, pipes prompt via stdin, parses JSON output", + "Built router.py: Haiku classifier + dispatch_with_fallback() with tier chains", + "Built scheduler.py: JSONL task queue + systemd transient timer for self-wake", + "Built api.py: FastAPI with /task, /queue, /status, /ledger, /ledger/stats", + "Built wake.py: called by systemd when rate limit expires, drains queue", + "Discovered --max-tokens flag doesn't exist in Claude Code CLI; fixed to use stdin piping", + "Discovered --dangerously-skip-permissions blocked under root; removed it", + "FIRST LIVE TEST: Haiku responded 'SYMBIONT ONLINE' in 1.3s; confirmed working", + "FULL ROUTER TEST: Task 1 (email extract) → Haiku (confidence 0.98); Task 2 (content write) → Sonnet (0.85)", + "Ledger entries confirmed: costs tracked per call with real token counts from CLI JSON output", + "Project named 'Symbiont' — mutualistic relationship, both parties benefit", + "Created systemd services: symbiont-api.service (always-on) + symbiont-heartbeat.timer (5min)", + "Crash recovery tested: SIGKILL → auto-restart confirmed in ~11 seconds", + "Both services enabled (survive reboot); git log shows 5 clean commits", + "Built heartbeat.py: checks CLI auth, disk, API status, ledger stats, drains queue each tick", + "Dendrite session introduced: headless Chromium on cortex at browser.hydrascale.net", + "Dendrite health confirmed: status ok, uptime 278k+ seconds, fetch test successful", + "Built symbiont/web.py: fetch_page, take_screenshot, execute_js, search_web, BrowserSession", + "Integration test: Symbiont → web.py → Dendrite → fetched Herman Melville from httpbin", + "Created /data/skills/ canonical skills repo with git; cortex-server and symbiont skills added", + "package_all.sh packages .skill files to /data/skills/dist/; Caddy serves at /skills/ endpoint", + "Heartbeat updated to auto-detect skill changes and commit/repackage every 5 minutes", + "Dendrite skill added to canonical repo; symbiont skill updated with Dendrite integration docs", + "Discussion: MCP vs CLI access — decided CLI + CLAUDE.md is better than predefined MCP tools", + "Elixir/OTP chosen as long-term target language: supervisors, GenServers, hot reload, BEAM concurrency", + "Built CLAUDE.md: bootstrap context auto-loaded by Claude Code in /data/symbiont/", + "Built sessions.py (later renamed Engram): SQLite registry with sessions, logs, resource_locks tables", + "WAL mode enabled; Engram handles 2-4 concurrent agents cleanly at this scale", + "API endpoints /sitrep and /sessions added; task dispatch now logs to Engram", + "Session naming: 'Engram' — the physical trace a memory leaves in neural tissue", + "Muse ecosystem fully named: Cortex (infra), Dendrite (senses), Symbiont (orchestrator), Engram (memory)", + "Genesis session registered with 22 log entries; tested sitrep showing active/completed sessions", + "Cowork session JSONL found at .claude/projects/; other sessions not visible from this mount", + "Harvester script written for Michael to run on Mac to backfill all past sessions into Engram", + "Discussion of Fastmail component from previous session — needs to be extracted and catalogued", + ] + + now = datetime.now().isoformat() + for entry in entries: + db.execute('INSERT INTO session_logs (session_id, timestamp, entry) VALUES (?,?,?)', + (sid, now, entry)) + + db.commit() + print(f"Updated genesis session {sid} with {len(entries)} log entries from transcript") +else: + print("Genesis session not found") + +db.close() diff --git a/engram.db b/engram.db index 1567658..6ce01ab 100644 Binary files a/engram.db and b/engram.db differ diff --git a/heartbeat.jsonl b/heartbeat.jsonl index c52f9f7..ec6da6d 100644 --- a/heartbeat.jsonl +++ b/heartbeat.jsonl @@ -211,3 +211,9 @@ {"timestamp": "2026-03-20T13:31:06.561516", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} {"timestamp": "2026-03-20T13:36:06.668406", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} {"timestamp": "2026-03-20T13:41:07.769148", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-20T13:46:10.125386", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-20T13:51:17.548996", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-20T13:56:23.569762", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-20T14:01:33.754549", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-20T14:07:02.043593", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-20T14:12:02.310931", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 0, "cost_today": 0.0}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} diff --git a/symbiont/engram.py b/symbiont/engram.py index 92d31fc..5c48cbb 100644 --- a/symbiont/engram.py +++ b/symbiont/engram.py @@ -8,12 +8,19 @@ building a shared memory of what's being worked on across the ecosystem. This lets each instance see what others are working on, avoid conflicts on shared resources, and pick up context from recently completed work. +Two-tier memory model: +- world_state: A singleton markdown document updated at the end of any session that + changes things. Read at the start of every session (fits safely in context). +- session_logs: Detailed logs available via get_session_logs() only when needed. + SQLite with WAL mode handles 2-4 concurrent readers cleanly. Each session writes only its own rows, so writer contention is minimal. Usage: - from symbiont.engram import Engram + from symbiont.engram import Engram, sitrep + # Start every session with: + print(sitrep()) eng = Engram() sid = eng.register("cowork", "Building the Elixir port of Symbiont") @@ -33,6 +40,9 @@ Usage: # Heartbeat (call periodically on long sessions) eng.heartbeat(sid, "Still working on dispatcher, 60% done") + # Update world state before completing (optional, only if your work changes the world) + eng.set_world_state("Updated world state content", updated_by=sid) + # Done eng.complete(sid, "Finished Elixir port of router + dispatcher. Tests passing.") """ @@ -65,16 +75,23 @@ class Engram: def _init_db(self): with self._connect() as conn: conn.executescript(""" + CREATE TABLE IF NOT EXISTS world_state ( + id INTEGER PRIMARY KEY CHECK (id = 1), + updated_at TEXT NOT NULL, + updated_by TEXT, + content TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS sessions ( id TEXT PRIMARY KEY, - session_type TEXT NOT NULL, -- 'cowork', 'code', 'desktop', 'api' + session_type TEXT NOT NULL, summary TEXT NOT NULL, - status TEXT NOT NULL DEFAULT 'active', -- 'active', 'idle', 'completed' + status TEXT NOT NULL DEFAULT 'active', started_at TEXT NOT NULL, last_heartbeat TEXT NOT NULL, completed_at TEXT, completion_summary TEXT, - metadata TEXT -- JSON blob for extra context + metadata TEXT ); CREATE TABLE IF NOT EXISTS session_logs ( @@ -97,6 +114,23 @@ class Engram: CREATE INDEX IF NOT EXISTS idx_locks_resource ON resource_locks(resource); """) + def get_world_state(self) -> str: + """Get the current world state. Returns empty string if not set.""" + with self._connect() as conn: + row = conn.execute("SELECT content FROM world_state WHERE id=1").fetchone() + return row[0] if row else "" + + def set_world_state(self, content: str, updated_by: str = None): + """Replace the world state. Called at the end of any session that changes things.""" + now = datetime.now().isoformat() + with self._connect() as conn: + conn.execute( + "INSERT INTO world_state (id, updated_at, updated_by, content) VALUES (1, ?, ?, ?) " + "ON CONFLICT(id) DO UPDATE SET updated_at=excluded.updated_at, " + "updated_by=excluded.updated_by, content=excluded.content", + (now, updated_by, content), + ) + def register(self, session_type: str, summary: str, metadata: Optional[str] = None) -> str: """Register a new session. Returns session ID.""" sid = datetime.now().strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:8] @@ -135,7 +169,6 @@ class Engram: "UPDATE sessions SET status='completed', completed_at=?, completion_summary=? WHERE id=?", (now, completion_summary, session_id), ) - # Release all locks conn.execute("DELETE FROM resource_locks WHERE session_id=?", (session_id,)) logger.info(f"Session completed: {session_id}") @@ -169,7 +202,7 @@ class Engram: return [dict(r) for r in rows] def get_session_logs(self, session_id: str, limit: int = 20) -> list[dict]: - """Get log entries for a specific session.""" + """Get log entries for a specific session. Detailed logs available on-demand only.""" with self._connect() as conn: rows = conn.execute( "SELECT * FROM session_logs WHERE session_id=? ORDER BY timestamp DESC LIMIT ?", @@ -216,51 +249,57 @@ class Engram: def get_situation_report(self) -> str: """ - Generate a human-readable situation report for a new session. - This is the first thing a new session should read. + Context-safe situation report. Designed to fit in any Claude context window + without overwhelming it. Returns world state + active session one-liners only. + Detailed logs are available via get_session_logs(session_id) when needed. """ + lines = [] + + # World state (the maintained truth) + world = self.get_world_state() + if world: + lines.append(world) + lines.append("") + + # Active sessions — one line each, nothing more active = self.get_active_sessions() - recent = self.get_recent_sessions(hours=24) - - lines = ["# Symbiont Situation Report", f"Generated: {datetime.now().isoformat()}", ""] - if active: - lines.append(f"## Active Sessions ({len(active)})") + lines.append(f"**Active sessions ({len(active)}):**") for s in active: - lines.append(f"- **{s['id']}** ({s['session_type']}): {s['summary']}") - lines.append(f" Last heartbeat: {s['last_heartbeat']}") + last = datetime.fromisoformat(s['last_heartbeat']) + stale = (datetime.now() - last) > timedelta(minutes=30) + marker = "⚠ stale" if stale else "●" + lines.append(f"- {marker} `{s['id']}` ({s['session_type']}): {s['summary'][:100]}") lines.append("") - # Check for resource locks - with self._connect() as conn: - locks = conn.execute( - "SELECT rl.resource, rl.session_id, rl.note FROM resource_locks rl " - "JOIN sessions s ON rl.session_id = s.id WHERE s.status='active'" - ).fetchall() - if locks: - lines.append("### Active Resource Locks") - for l in locks: - note = f" ({l['note']})" if l["note"] else "" - lines.append(f"- `{l['resource']}` — locked by {l['session_id']}{note}") - lines.append("") - else: - lines.append("## No active sessions") + # Resource locks — only if any exist + with self._connect() as conn: + locks = conn.execute( + "SELECT rl.resource, rl.session_id FROM resource_locks rl " + "JOIN sessions s ON rl.session_id = s.id WHERE s.status='active'" + ).fetchall() + if locks: + lines.append("**Locked resources:**") + for l in locks: + lines.append(f"- `{l[0]}` by `{l[1]}`") lines.append("") - if recent: - lines.append(f"## Recently Completed ({len(recent)} in last 24h)") - for s in recent: - lines.append(f"- **{s['id']}** ({s['session_type']}): {s.get('completion_summary', s['summary'])}") - lines.append("") + if not lines: + lines.append("No world state set yet. Call `eng.set_world_state(...)` to initialize.") return "\n".join(lines) -# Convenience function for quick sitrep def sitrep() -> str: """Get a situation report. Call this at the start of every session.""" return Engram().get_situation_report() +def update_world_state(content: str, session_id: str = None): + """Module-level convenience: update the world state.""" + Engram().set_world_state(content, updated_by=session_id) + + # Backward compatibility alias SessionRegistry = Engram +