From e8d7611e2be5f7706dbb90f159db27a49ca3105d Mon Sep 17 00:00:00 2001 From: Symbiont Date: Thu, 19 Mar 2026 20:12:41 +0000 Subject: [PATCH] Add dendrite skill to canonical repo Headless Chromium browser service for web browsing, scraping, and automation. Part of the Muse ecosystem: Symbiont orchestrates, Dendrite perceives. Co-Authored-By: Claude Opus 4.6 --- dendrite/SKILL.md | 393 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 393 insertions(+) create mode 100644 dendrite/SKILL.md diff --git a/dendrite/SKILL.md b/dendrite/SKILL.md new file mode 100644 index 0000000..04cc8ea --- /dev/null +++ b/dendrite/SKILL.md @@ -0,0 +1,393 @@ +--- +name: dendrite +description: > + Headless Chromium browser on cortex.hydrascale.net — full JS execution, sessions, screenshots, + Readability content extraction. Use for ANY web browsing: fetching pages, research, scraping, + screenshots, login flows, form filling, SPA rendering. Prefer over Claude in Chrome (faster, + runs on cortex). Trigger on: URLs in messages, "browse", "fetch", "scrape", "screenshot", + "read this page", "check this link", "log in to", "go to", web research, or any task needing + web access. Also use when WebFetch fails on JS-heavy pages. +--- + +# Dendrite: Sensory Extension for Muse + +## What It Is + +Dendrite is the nervous system's sensory arm — it reaches out into the web, perceives content +through full Chromium rendering, and carries structured information back to the system. Named +for the branching neural extensions that receive signals from the outside world. + +It runs as a Docker container on `cortex.hydrascale.net`, exposes a REST API behind Caddy +with auto-HTTPS, and includes an MCP server for native Claude integration. Full JavaScript +execution, persistent sessions, Mozilla Readability content extraction, ad blocking, and +minimal stealth patches. + +**Part of the Muse ecosystem:** Symbiont orchestrates, Dendrite perceives. + +--- + +## Quick Reference + +| Item | Value | +|------|-------| +| Base URL | `https://browser.hydrascale.net` | +| Internal URL | `http://localhost:3000` (from cortex via SSH) | +| API Key | `8dc5e8f7a02745ee8db90c94b2481fd9e1deeea1e2ce74420f54047859ea7edf` | +| Auth header | `X-API-Key: ` (required on all endpoints except `/health`) | +| Health check | `GET /health` (no auth) | +| Source | `/opt/muse-browser/` on cortex | +| Git repo | `/data/repos/muse-browser.git` (bare, auto-backed up to rsync.net) | +| Docker container | `muse-browser` (restart: unless-stopped) | +| Caddy domain | `browser.hydrascale.net` (auto-HTTPS) | + +--- + +## Python Helper (paste into every session that needs web access) + +```python +import urllib.request, json, urllib.error + +DENDRITE_URL = 'https://browser.hydrascale.net' +DENDRITE_KEY = '8dc5e8f7a02745ee8db90c94b2481fd9e1deeea1e2ce74420f54047859ea7edf' + +def dendrite(path, body=None, method=None): + """Call the Dendrite API. Returns parsed JSON.""" + url = f'{DENDRITE_URL}{path}' + if body is not None: + data = json.dumps(body).encode() + req = urllib.request.Request(url, data=data, method=method or 'POST') + req.add_header('Content-Type', 'application/json') + else: + req = urllib.request.Request(url, method=method or 'GET') + req.add_header('X-API-Key', DENDRITE_KEY) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + err = json.loads(e.read()) + raise RuntimeError(f"Dendrite error {e.code}: {err.get('error', e.reason)}") + +def dendrite_screenshot(body): + """Take a screenshot. Returns raw PNG bytes.""" + url = f'{DENDRITE_URL}/screenshot' + data = json.dumps(body).encode() + req = urllib.request.Request(url, data=data, method='POST') + req.add_header('Content-Type', 'application/json') + req.add_header('X-API-Key', DENDRITE_KEY) + with urllib.request.urlopen(req, timeout=60) as resp: + return resp.read() +``` + +--- + +## Endpoints + +### `POST /fetch` — The workhorse + +Fetch a URL and return its content as markdown, HTML, or text. Runs full Chromium with +JavaScript execution. Readability extracts the main article content by default. + +```python +result = dendrite('/fetch', { + 'url': 'https://example.com/article', + 'format': 'markdown', # 'markdown' | 'html' | 'text' (default: markdown) + 'extractMain': True, # Readability strips nav/ads (default: True) + 'waitFor': 'domcontentloaded', # 'networkidle' for SPAs (default: domcontentloaded) + 'blockAds': True, # Block trackers (default: True) + 'timeout': 30000, # ms (default: 30000) +}) +# Returns: { url, title, content, format } +print(result['title']) +print(result['content']) +``` + +**When to use `waitFor: 'networkidle'`:** React/Vue/Angular SPAs, dashboards, or pages where +content loads after the initial HTML. Slower (~5-10s) but catches dynamically rendered content. + +**When to use `extractMain: false`:** You need the full HTML (link scraping, structured data, +or when Readability strips too much on non-article pages like listings or search results). + +### `POST /screenshot` + +```python +png_bytes = dendrite_screenshot({ + 'url': 'https://example.com', + 'fullPage': True, # default: True + 'format': 'png', # 'png' | 'jpeg' + 'waitFor': 'networkidle', # default: networkidle (screenshots need rendering) + 'selector': '.chart', # optional: screenshot just this element +}) +with open('screenshot.png', 'wb') as f: + f.write(png_bytes) +``` + +### `POST /execute` + +Run JavaScript in a page context. Scripts are wrapped in an IIFE — use `return` for values. + +```python +result = dendrite('/execute', { + 'url': 'https://example.com', + 'script': 'return document.querySelectorAll("a").length', +}) +print(result['result']) # e.g., 42 +``` + +### `POST /interact` + +Interact with elements in a session. See Sessions section below. + +```python +dendrite('/interact', { + 'sessionId': sid, + 'action': 'click', # 'click' | 'type' | 'select' | 'wait' | 'scroll' + 'selector': '#submit', + 'timeout': 5000, +}) +# Returns: { ok, title, url } +``` + +### `POST /session` / `DELETE /session/:id` / `GET /sessions` + +Session lifecycle management. See Sessions section. + +### `GET /health` + +No auth required. Returns: `{ status, sessions, activePages, uptime, timestamp }` + +--- + +## Sessions (multi-step interactions) + +Sessions maintain cookies, localStorage, and auth state across requests. Use them for +login flows, multi-page navigation, form filling, and any workflow requiring state. + +**Sessions auto-expire after 30 minutes of inactivity. Always close when done.** + +### Full session workflow example + +```python +# 1. Create +session = dendrite('/session', { + 'locale': 'en-US', + 'timezone': 'America/Chicago', + 'blockAds': True, +}) +sid = session['id'] + +# 2. Navigate to login +page = dendrite('/fetch', {'sessionId': sid, 'url': 'https://app.example.com/login'}) + +# 3. Type credentials +dendrite('/interact', { + 'sessionId': sid, 'action': 'type', + 'selector': '#email', 'value': 'user@example.com', +}) +dendrite('/interact', { + 'sessionId': sid, 'action': 'type', + 'selector': '#password', 'value': 'secret', 'submit': True, +}) + +# 4. Check where we landed +page = dendrite('/fetch', {'sessionId': sid}) # no url = get current page +print(page['title'], page['url']) + +# 5. Click around +dendrite('/interact', {'sessionId': sid, 'action': 'click', 'selector': 'nav a.dashboard'}) +page = dendrite('/fetch', {'sessionId': sid}) + +# 6. Always close +dendrite(f'/session/{sid}', method='DELETE') +``` + +### Interact actions reference + +| Action | Required | Optional | Description | +|--------|----------|----------|-------------| +| `click` | `selector` | `timeout` | Click element, wait for domcontentloaded | +| `type` | `selector`, `value` | `submit`, `timeout` | Fill input. `submit: true` presses Enter | +| `select` | `selector`, `value` | `timeout` | Select dropdown option by value | +| `wait` | `selector` | `timeout` | Wait for element to appear in DOM | +| `scroll` | — | `selector`, `timeout` | Scroll element into view, or page bottom if no selector | + +--- + +## Decision Guide + +| I need to... | Use | +|---|---| +| Read an article / docs page | `POST /fetch` (default settings) | +| Fetch a React/Vue SPA | `POST /fetch` with `waitFor: 'networkidle'` | +| Scrape links or structured data | `POST /fetch` with `extractMain: false, format: 'html'` then parse | +| Visually verify a page | `POST /screenshot` | +| Extract data via JS | `POST /execute` | +| Log in, fill forms, multi-step | Create session → interact → close | +| Quick check what's at a URL | `POST /fetch` — one line | + +### Dendrite vs WebFetch vs Claude in Chrome + +| Feature | Dendrite | WebFetch | Claude in Chrome | +|---------|----------|----------|-----------------| +| JavaScript execution | Full Chromium | None | Full Chrome | +| Speed | Fast (server-side) | Fastest (no browser) | Slow (screen recording) | +| SPAs (React, etc.) | Works | Fails | Works | +| Sessions/auth flows | Yes | No | Yes (manual) | +| Screenshots | Yes (API) | No | Yes (visual) | +| Runs on | cortex (16GB) | Cowork VM | Michael's MacBook | +| Best for | Research, scraping, automation | Simple static pages | Visual tasks, debugging | + +**Rule of thumb:** Try Dendrite first. Fall back to WebFetch for dead-simple pages where +you don't need JS. Use Claude in Chrome only when you truly need to see and interact with +the visual layout (drag-and-drop, complex visual UIs). + +--- + +## Error Handling + +```python +try: + result = dendrite('/fetch', {'url': 'https://example.com'}) +except RuntimeError as e: + print(f"Error: {e}") + # Common errors: + # 401 — Bad API key + # 404 — Session not found (expired after 30min idle) + # 429 — Too many concurrent pages (max 10), retry shortly + # 500 — Navigation timeout, page error, or unreachable site +``` + +**If a page times out:** Try with `waitFor: 'domcontentloaded'` (faster, may miss lazy content) +or increase `timeout` beyond the default 30s. + +**If content is empty/short:** The page may be JavaScript-rendered. Use `waitFor: 'networkidle'`. +If Readability returns too little, try `extractMain: false` and extract what you need manually. + +--- + +## Architecture + +``` +Internet cortex.hydrascale.net + │ │ + ▼ ▼ +[Caddy] ──HTTPS──▶ [Docker: muse-browser] + :443 :3000 + ┌─────────┐ + │ Fastify │ ← REST API + │ server │ + └────┬────┘ + │ + ┌────▼────┐ + │Playwright│ ← Single Chromium instance + │ + pool │ Multiple BrowserContexts (sessions) + └────┬────┘ + │ + ┌────▼─────┐ + │Readability│ ← Content extraction + │+ Turndown │ HTML → Markdown + └──────────┘ +``` + +### Stack +- **Runtime:** Node.js 20 on Debian Bookworm (Docker) +- **Browser:** Playwright + Chromium (headless, with stealth patches) +- **HTTP server:** Fastify v4 with CORS + API key auth +- **Content extraction:** Mozilla Readability + Turndown +- **MCP:** stdio transport (for Claude Desktop integration) +- **Reverse proxy:** Caddy (auto-HTTPS, gzip) + +### Key files on cortex +| Path | Purpose | +|------|---------| +| `/opt/muse-browser/` | Working directory (Docker build source) | +| `/opt/muse-browser/src/server.js` | Fastify entry point | +| `/opt/muse-browser/src/browser.js` | Chromium pool + sessions | +| `/opt/muse-browser/src/extract.js` | Readability + Turndown | +| `/opt/muse-browser/src/routes.js` | REST endpoints | +| `/opt/muse-browser/src/mcp-stdio.js` | MCP server (stdio) | +| `/opt/muse-browser/.env` | Secrets (API key, config) | +| `/data/repos/muse-browser.git` | Bare git repo (backed up nightly) | + +--- + +## Maintenance & Operations + +### Health check +```python +health = dendrite('/health', method='GET') +print(health) # { status: "ok", sessions, activePages, uptime, timestamp } +``` + +### From cortex SSH +```bash +# Container status +docker ps | grep muse-browser +docker logs muse-browser --tail=50 + +# Restart +docker compose -f /opt/muse-browser/docker-compose.yml restart + +# Full rebuild after code changes +cd /opt/muse-browser && docker compose down && docker compose build --no-cache && docker compose up -d +``` + +### Git deploy (from Michael's Mac) +```bash +# First time +git clone root@cortex.hydrascale.net:/data/repos/muse-browser.git +cd muse-browser + +# After making changes +git push origin main +# → post-receive hook auto-rebuilds container and restarts +``` + +### Caddy logs (if HTTPS issues) +```bash +journalctl -u caddy --no-pager -n 30 +``` + +--- + +## MCP Configuration (Claude Desktop) + +To use Dendrite tools natively in Claude Desktop, add to MCP config: + +```json +{ + "mcpServers": { + "dendrite": { + "command": "ssh", + "args": [ + "-o", "StrictHostKeyChecking=no", + "-i", "~/.ssh/cortex", + "root@cortex.hydrascale.net", + "docker exec -i muse-browser node src/mcp-stdio.js" + ] + } + } +} +``` + +### MCP tools available + +| Tool | Description | +|------|-------------| +| `fetch_page` | Fetch URL → markdown/html/text | +| `take_screenshot` | Screenshot URL or session → PNG | +| `run_javascript` | Execute JS in page context | +| `create_session` | Open persistent browser session | +| `close_session` | Destroy session | +| `navigate` | Session: go to URL, return content | +| `click` | Session: click element by selector | +| `type_text` | Session: type into input field | +| `get_page_content` | Session: get current page content | +| `get_page_screenshot` | Session: screenshot current page | + +--- + +## Relationship to Other Muse Components + +- **Symbiont** (orchestrator) can dispatch tasks that require web research → Dendrite fetches the content +- **Cortex** (infrastructure) hosts and runs Dendrite as a Docker service +- **Future components** can call Dendrite's REST API to perceive the web without their own browser