From 9034536775ef84d8f861966cfc82b973c2092d16 Mon Sep 17 00:00:00 2001 From: Symbiont Date: Thu, 19 Mar 2026 20:14:16 +0000 Subject: [PATCH] Add web.py: Dendrite integration for web perception Provides fetch_page, take_screenshot, execute_js, search_web, and BrowserSession for multi-step interactions. Uses localhost for speed since Dendrite runs on the same box. Co-Authored-By: Claude Opus 4.6 --- heartbeat.jsonl | 2 + symbiont/web.py | 178 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 symbiont/web.py diff --git a/heartbeat.jsonl b/heartbeat.jsonl index 799a6a9..0b86cd8 100644 --- a/heartbeat.jsonl +++ b/heartbeat.jsonl @@ -5,3 +5,5 @@ {"timestamp": "2026-03-19T19:56:48.576386", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 4, "cost_today": 0.062}, "queue": {"processed": 0}, "health": "healthy"} {"timestamp": "2026-03-19T20:02:01.418535", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 4, "cost_today": 0.062}, "queue": {"processed": 0}, "health": "healthy"} {"timestamp": "2026-03-19T20:03:15.583444", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 4, "cost_today": 0.062}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-19T20:07:01.507403", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 4, "cost_today": 0.062}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} +{"timestamp": "2026-03-19T20:12:09.328179", "claude_cli": {"status": "ok", "detail": "authenticated"}, "disk": {"status": "ok", "total": "915G", "used": "20G", "available": "849G", "use_pct": "3%"}, "api_server": {"status": "ok", "detail": "active"}, "ledger": {"calls_today": 4, "cost_today": 0.062}, "queue": {"processed": 0}, "skills": {"status": "clean", "changes": 0}, "health": "healthy"} diff --git a/symbiont/web.py b/symbiont/web.py new file mode 100644 index 0000000..55b8cd4 --- /dev/null +++ b/symbiont/web.py @@ -0,0 +1,178 @@ +""" +Web perception via Dendrite. + +Thin wrapper around the Dendrite REST API running on cortex. +Dendrite provides headless Chromium browsing — full JS execution, +sessions, screenshots, and Readability content extraction. + +Usage: + from symbiont.web import fetch_page, take_screenshot, execute_js + + page = fetch_page("https://example.com") + print(page['content']) +""" + +import json +import logging +import urllib.request +import urllib.error +from typing import Optional + +logger = logging.getLogger(__name__) + +# Dendrite runs on the same box, so use localhost for speed +DENDRITE_URL = "http://localhost:3000" +DENDRITE_KEY = "8dc5e8f7a02745ee8db90c94b2481fd9e1deeea1e2ce74420f54047859ea7edf" + + +def _call(path: str, body: Optional[dict] = None, method: Optional[str] = None, timeout: int = 60) -> dict: + """Low-level Dendrite API call.""" + url = f"{DENDRITE_URL}{path}" + if body is not None: + data = json.dumps(body).encode() + req = urllib.request.Request(url, data=data, method=method or "POST") + req.add_header("Content-Type", "application/json") + else: + req = urllib.request.Request(url, method=method or "GET") + req.add_header("X-API-Key", DENDRITE_KEY) + + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + try: + err = json.loads(e.read()) + msg = err.get("error", e.reason) + except Exception: + msg = str(e.reason) + logger.error(f"Dendrite {path} failed ({e.code}): {msg}") + raise RuntimeError(f"Dendrite error {e.code}: {msg}") + + +def _call_raw(path: str, body: dict, timeout: int = 60) -> bytes: + """Low-level Dendrite API call returning raw bytes (for screenshots).""" + url = f"{DENDRITE_URL}{path}" + data = json.dumps(body).encode() + req = urllib.request.Request(url, data=data, method="POST") + req.add_header("Content-Type", "application/json") + req.add_header("X-API-Key", DENDRITE_KEY) + with urllib.request.urlopen(req, timeout=timeout) as resp: + return resp.read() + + +def health() -> dict: + """Check Dendrite health. No auth required.""" + return _call("/health") + + +def fetch_page( + url: str, + format: str = "markdown", + extract_main: bool = True, + wait_for: str = "domcontentloaded", + timeout_ms: int = 30000, +) -> dict: + """ + Fetch a URL and return structured content. + + Returns: {url, title, content, format} + """ + return _call("/fetch", { + "url": url, + "format": format, + "extractMain": extract_main, + "waitFor": wait_for, + "timeout": timeout_ms, + }) + + +def take_screenshot( + url: str, + full_page: bool = True, + selector: Optional[str] = None, +) -> bytes: + """Take a screenshot of a URL. Returns PNG bytes.""" + body = { + "url": url, + "fullPage": full_page, + "format": "png", + "waitFor": "networkidle", + } + if selector: + body["selector"] = selector + return _call_raw("/screenshot", body) + + +def execute_js(url: str, script: str) -> dict: + """Execute JavaScript in a page context. Returns {result, url, title}.""" + return _call("/execute", {"url": url, "script": script}) + + +def search_web(query: str, num_results: int = 5) -> list[dict]: + """ + Search the web using Dendrite. + Fetches Google search results and extracts links. + Returns list of {title, url, snippet}. + """ + import urllib.parse + search_url = f"https://www.google.com/search?q={urllib.parse.quote(query)}&num={num_results}" + + result = _call("/execute", { + "url": search_url, + "script": """ + return Array.from(document.querySelectorAll('div.g')).map(el => { + const a = el.querySelector('a'); + const title = el.querySelector('h3'); + const snippet = el.querySelector('.VwiC3b'); + return { + title: title ? title.textContent : '', + url: a ? a.href : '', + snippet: snippet ? snippet.textContent : '' + }; + }).filter(r => r.url && r.title); + """ + }) + + return result.get("result", []) + + +# Session management for multi-step interactions +class BrowserSession: + """Persistent browser session for multi-step interactions.""" + + def __init__(self, locale="en-US", timezone="America/Chicago"): + result = _call("/session", {"locale": locale, "timezone": timezone}) + self.id = result["id"] + logger.info(f"Browser session created: {self.id}") + + def fetch(self, url: Optional[str] = None, **kwargs) -> dict: + body = {"sessionId": self.id} + if url: + body["url"] = url + body.update(kwargs) + return _call("/fetch", body) + + def click(self, selector: str, timeout: int = 5000) -> dict: + return _call("/interact", { + "sessionId": self.id, "action": "click", + "selector": selector, "timeout": timeout, + }) + + def type(self, selector: str, value: str, submit: bool = False) -> dict: + return _call("/interact", { + "sessionId": self.id, "action": "type", + "selector": selector, "value": value, "submit": submit, + }) + + def screenshot(self) -> bytes: + return _call_raw("/screenshot", {"sessionId": self.id, "fullPage": True}) + + def close(self): + _call(f"/session/{self.id}", method="DELETE") + logger.info(f"Browser session closed: {self.id}") + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close()