commit b84613270af4b25f53678ee22e58ef58a9ba8302 Author: deploy Date: Mon Mar 16 14:54:50 2026 +0000 Initial commit: muse-browser v0.1.0 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..6979a04 --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +API_KEY= +PORT=3000 +HOST=0.0.0.0 +LOG_LEVEL=info +SESSION_TTL_MS=1800000 +SESSION_CLEANUP_INTERVAL_MS=300000 +MAX_CONCURRENT_PAGES=10 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd71946 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +node_modules/ +.env +*.log +downloads/ +.DS_Store diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d9ef726 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM node:20-bookworm-slim + +WORKDIR /app + +# Install dependencies first (layer-cached until package.json changes) +COPY package*.json ./ +RUN npm ci --omit=dev + +# Install Chromium and all required system dependencies via Playwright's installer +RUN npx playwright install chromium --with-deps + +# Copy application source +COPY src/ ./src/ +COPY .env.example ./ + +ENV NODE_ENV=production + +EXPOSE 3000 + +HEALTHCHECK --interval=30s --timeout=10s --start-period=25s --retries=3 \ + CMD node -e "\ + const http = require('http'); \ + http.get('http://localhost:3000/health', (r) => { \ + process.exit(r.statusCode === 200 ? 0 : 1); \ + }).on('error', () => process.exit(1));" + +CMD ["node", "src/server.js"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..84de4c7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,18 @@ +services: + muse-browser: + build: . + image: muse-browser:latest + container_name: muse-browser + restart: unless-stopped + ports: + - "127.0.0.1:3000:3000" # localhost only — Caddy proxies publicly + env_file: + - .env + shm_size: '2gb' # Chrome uses /dev/shm; default 64MB causes crashes + security_opt: + - seccomp=unconfined # needed alongside --no-sandbox in some kernel configs + logging: + driver: "json-file" + options: + max-size: "50m" + max-file: "3" diff --git a/package.json b/package.json new file mode 100644 index 0000000..ce52092 --- /dev/null +++ b/package.json @@ -0,0 +1,27 @@ +{ + "name": "muse-browser", + "version": "0.1.0", + "private": true, + "description": "Full-capability headless browser service — Muse building block", + "main": "src/server.js", + "scripts": { + "start": "node src/server.js", + "mcp": "node src/mcp-stdio.js", + "dev": "node --watch src/server.js" + }, + "dependencies": { + "fastify": "^4.28.0", + "@fastify/cors": "^9.0.1", + "playwright": "^1.44.0", + "@mozilla/readability": "^0.5.0", + "jsdom": "^24.1.0", + "turndown": "^7.2.0", + "@modelcontextprotocol/sdk": "^1.6.1", + "uuid": "^9.0.1", + "zod": "^3.23.8", + "dotenv": "^16.4.5" + }, + "engines": { + "node": ">=20" + } +} diff --git a/src/browser.js b/src/browser.js new file mode 100644 index 0000000..49a97f6 --- /dev/null +++ b/src/browser.js @@ -0,0 +1,172 @@ +'use strict'; + +const { chromium } = require('playwright'); +const { v4: uuidv4 } = require('uuid'); + +const MAX_PAGES = parseInt(process.env.MAX_CONCURRENT_PAGES || '10', 10); +const SESSION_TTL = parseInt(process.env.SESSION_TTL_MS || '1800000', 10); +const CLEANUP_INTERVAL = parseInt(process.env.SESSION_CLEANUP_INTERVAL_MS || '300000', 10); + +let _browser = null; +let activePages = 0; +const sessions = new Map(); + +// Ad/tracker domains to block +const BLOCKED_DOMAINS = [ + 'doubleclick.net', 'googlesyndication.com', 'googleadservices.com', + 'google-analytics.com', 'googletagmanager.com', 'facebook.net', + 'connect.facebook.net', 'amazon-adsystem.com', 'outbrain.com', + 'taboola.com', 'criteo.com', 'hotjar.com', 'intercom.io', + 'advertising.com', 'moatads.com', 'scorecardresearch.com', + 'quantserve.com', 'adsrvr.org', 'adsafeprotected.com', + 'adnxs.com', 'rubiconproject.com', 'openx.net', +]; + +// Chromium flags required in Docker +const BROWSER_ARGS = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--no-first-run', + '--no-zygote', + '--disable-gpu', + '--disable-background-networking', + '--disable-client-side-phishing-detection', + '--disable-default-apps', + '--disable-extensions', +]; + +// Minimal stealth — patches the most common bot-detection checks without extra deps +const STEALTH_SCRIPT = ` + Object.defineProperty(navigator, 'webdriver', { get: () => false }); + Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); + Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); + try { delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array; } catch(e) {} + try { delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise; } catch(e) {} + try { delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol; } catch(e) {} +`; + +// --- Browser lifecycle --- + +async function getBrowser() { + if (_browser && _browser.isConnected()) return _browser; + _browser = await chromium.launch({ headless: true, args: BROWSER_ARGS }); + _browser.on('disconnected', () => { + _browser = null; + }); + return _browser; +} + +async function newContext(options = {}) { + const b = await getBrowser(); + const ctx = await b.newContext({ + userAgent: options.userAgent, + viewport: options.viewport || { width: 1280, height: 720 }, + locale: options.locale || 'en-US', + timezoneId: options.timezone || 'America/Chicago', + extraHTTPHeaders: options.headers || {}, + }); + await ctx.addInitScript(STEALTH_SCRIPT); + if (options.cookies && options.cookies.length > 0) { + await ctx.addCookies(options.cookies); + } + return ctx; +} + +function applyAdBlocking(page) { + page.route('**/*', (route) => { + const url = route.request().url(); + if (BLOCKED_DOMAINS.some((d) => url.includes(d))) { + route.abort(); + } else { + route.continue(); + } + }); +} + +// --- One-shot page (ephemeral: context is closed after fn completes) --- + +async function withPage(options = {}, fn) { + if (activePages >= MAX_PAGES) { + const err = new Error(`Too many concurrent pages (max ${MAX_PAGES}). Try again shortly.`); + err.statusCode = 429; + throw err; + } + activePages++; + const ctx = await newContext(options); + const page = await ctx.newPage(); + if (options.blockAds !== false) applyAdBlocking(page); + try { + return await fn(page); + } finally { + activePages--; + await ctx.close().catch(() => {}); + } +} + +// --- Persistent sessions --- + +async function createSession(options = {}) { + const id = uuidv4(); + const ctx = await newContext(options); + const page = await ctx.newPage(); + if (options.blockAds !== false) applyAdBlocking(page); + const session = { id, context: ctx, page, createdAt: new Date(), lastUsedAt: new Date() }; + sessions.set(id, session); + return { id, createdAt: session.createdAt }; +} + +function getSession(id) { + const s = sessions.get(id); + if (s) s.lastUsedAt = new Date(); + return s || null; +} + +async function closeSession(id) { + const s = sessions.get(id); + if (!s) return false; + sessions.delete(id); + await s.context.close().catch(() => {}); + return true; +} + +function listSessions() { + return [...sessions.values()].map((s) => ({ + id: s.id, + createdAt: s.createdAt, + lastUsedAt: s.lastUsedAt, + })); +} + +// --- Shutdown --- + +async function shutdown() { + clearInterval(_cleanupTimer); + for (const [id] of sessions) await closeSession(id); + if (_browser) await _browser.close().catch(() => {}); +} + +// --- TTL cleanup --- + +const _cleanupTimer = setInterval(async () => { + const now = Date.now(); + for (const [id, s] of sessions) { + if (now - s.lastUsedAt.getTime() > SESSION_TTL) { + await closeSession(id); + } + } +}, CLEANUP_INTERVAL); + +_cleanupTimer.unref(); // don't prevent process from exiting + +module.exports = { + withPage, + createSession, + getSession, + closeSession, + listSessions, + shutdown, + getActivePages: () => activePages, + getSessionCount: () => sessions.size, +}; diff --git a/src/extract.js b/src/extract.js new file mode 100644 index 0000000..6bdf6c0 --- /dev/null +++ b/src/extract.js @@ -0,0 +1,82 @@ +'use strict'; + +const { Readability } = require('@mozilla/readability'); +const { JSDOM } = require('jsdom'); +const TurndownService = require('turndown'); + +const td = new TurndownService({ + headingStyle: 'atx', + codeBlockStyle: 'fenced', + bulletListMarker: '-', + hr: '---', +}); + +// Readability already strips nav/aside/footer; remove remaining noise from Turndown output +td.remove(['script', 'style', 'iframe', 'noscript', 'svg', 'figure']); + +/** + * Extract the primary article content from raw HTML using Mozilla Readability. + * Falls back to full if Readability can't identify a main article. + * + * @param {string} html - Raw page HTML + * @param {string} url - Page URL (needed by Readability to resolve relative links) + * @returns {{ title: string, content: string }} title + clean HTML content + */ +function extractContent(html, url) { + try { + const dom = new JSDOM(html, { url: url || 'https://example.com' }); + const reader = new Readability(dom.window.document, { + charThreshold: 20, // lower threshold catches short but valid articles + keepClasses: false, // strip class attributes for cleaner output + }); + const article = reader.parse(); + if (article && article.content && article.content.length > 100) { + return { title: article.title || '', content: article.content }; + } + } catch (_) { + // Readability failed — fall through to body fallback + } + + // Fallback: use full body HTML + try { + const dom = new JSDOM(html); + const doc = dom.window.document; + return { + title: doc.title || '', + content: doc.body ? doc.body.innerHTML : html, + }; + } catch (_) { + return { title: '', content: html }; + } +} + +/** + * Convert HTML to Markdown. + * @param {string} html + * @returns {string} + */ +function toMarkdown(html) { + if (!html) return ''; + try { + return td.turndown(html); + } catch (_) { + return toText(html); + } +} + +/** + * Strip all HTML tags and collapse whitespace to plain text. + * @param {string} html + * @returns {string} + */ +function toText(html) { + if (!html) return ''; + return html + .replace(//gi, '') + .replace(//gi, '') + .replace(/<[^>]+>/g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +module.exports = { extractContent, toMarkdown, toText }; diff --git a/src/mcp-stdio.js b/src/mcp-stdio.js new file mode 100644 index 0000000..1595d81 --- /dev/null +++ b/src/mcp-stdio.js @@ -0,0 +1,269 @@ +'use strict'; + +/** + * muse-browser MCP server — stdio transport + * + * Runs inside the Docker container. Communicates with the Fastify HTTP server + * on localhost:PORT. Each tool call makes an internal HTTP request. + * + * Usage (from Claude Desktop config): + * "command": "ssh", + * "args": ["-i", "~/.ssh/cortex", "root@cortex.hydrascale.net", + * "docker exec -i muse-browser node src/mcp-stdio.js"] + */ + +require('dotenv').config(); + +const { McpServer } = require('@modelcontextprotocol/sdk/server/mcp.js'); +const { StdioServerTransport } = require('@modelcontextprotocol/sdk/server/stdio.js'); +const { z } = require('zod'); + +const PORT = process.env.PORT || 3000; +const API_KEY = process.env.API_KEY || ''; +const BASE = `http://localhost:${PORT}`; + +// ── Internal HTTP helpers ──────────────────────────────────────────────────── + +async function api(path, method = 'GET', body) { + const res = await fetch(`${BASE}${path}`, { + method, + headers: { 'Content-Type': 'application/json', 'X-API-Key': API_KEY }, + body: body ? JSON.stringify(body) : undefined, + }); + const data = await res.json(); + if (!res.ok) throw new Error(data.error || res.statusText); + return data; +} + +async function apiRaw(path, method, body) { + const res = await fetch(`${BASE}${path}`, { + method, + headers: { 'Content-Type': 'application/json', 'X-API-Key': API_KEY }, + body: body ? JSON.stringify(body) : undefined, + }); + if (!res.ok) { + const data = await res.json().catch(() => ({})); + throw new Error(data.error || res.statusText); + } + return res; +} + +// ── MCP server ─────────────────────────────────────────────────────────────── + +const server = new McpServer({ name: 'muse-browser', version: '0.1.0' }); + +// ── fetch_page ─────────────────────────────────────────────────────────────── + +server.tool( + 'fetch_page', + 'Fetch a web page and return its content as markdown, HTML, or plain text. ' + + 'Runs full Chromium with JavaScript execution. ' + + 'Use extractMain:true (default) to get clean article content without nav/ads.', + { + url: z.string().url().describe('URL to fetch'), + format: z.enum(['markdown', 'html', 'text']).default('markdown').describe('Output format'), + waitFor: z.enum(['networkidle', 'domcontentloaded', 'load']).default('domcontentloaded') + .describe('When to consider the page loaded. Use networkidle for SPAs.'), + blockAds: z.boolean().default(true).describe('Block ad and tracker requests'), + extractMain: z.boolean().default(true).describe('Extract main article content (removes nav, footer, ads)'), + timeout: z.number().default(30000).describe('Timeout in milliseconds'), + sessionId: z.string().optional().describe('Use an existing persistent session'), + }, + async (args) => { + const data = await api('/fetch', 'POST', args); + return { + content: [{ type: 'text', text: `# ${data.title}\nURL: ${data.url}\n\n${data.content}` }], + }; + } +); + +// ── take_screenshot ────────────────────────────────────────────────────────── + +server.tool( + 'take_screenshot', + 'Take a screenshot of a web page. Returns a PNG image. ' + + 'Useful for visually verifying page state or capturing charts/visual content.', + { + url: z.string().url().optional().describe('URL to screenshot (required if no sessionId)'), + fullPage: z.boolean().default(true).describe('Capture full scrollable page or just viewport'), + selector: z.string().optional().describe('CSS selector — screenshot just this element'), + waitFor: z.enum(['networkidle', 'domcontentloaded', 'load']).default('networkidle'), + sessionId: z.string().optional().describe('Use an existing persistent session'), + timeout: z.number().default(30000), + }, + async (args) => { + const res = await apiRaw('/screenshot', 'POST', args); + const buffer = Buffer.from(await res.arrayBuffer()); + return { + content: [{ type: 'image', data: buffer.toString('base64'), mimeType: 'image/png' }], + }; + } +); + +// ── run_javascript ─────────────────────────────────────────────────────────── + +server.tool( + 'run_javascript', + 'Execute JavaScript in a page context and return the result. ' + + 'Scripts are wrapped in an IIFE — use `return` to provide a value. ' + + 'Example: `return document.querySelectorAll("a").length`', + { + url: z.string().url().optional().describe('URL to navigate to first (required if no sessionId)'), + script: z.string().describe('JavaScript to run in the page. Use return to get a value.'), + sessionId: z.string().optional().describe('Use an existing persistent session'), + timeout: z.number().default(30000), + }, + async (args) => { + const data = await api('/execute', 'POST', args); + const result = typeof data.result === 'string' + ? data.result + : JSON.stringify(data.result, null, 2); + return { content: [{ type: 'text', text: result }] }; + } +); + +// ── create_session ─────────────────────────────────────────────────────────── + +server.tool( + 'create_session', + 'Create a persistent browser session. ' + + 'Use this when you need multi-step interactions: logging in, filling forms, navigating across pages. ' + + 'Returns a session ID — pass it to other tools.', + { + userAgent: z.string().optional().describe('Custom user agent'), + locale: z.string().default('en-US'), + timezone: z.string().default('America/Chicago'), + blockAds: z.boolean().default(true), + }, + async (args) => { + const data = await api('/session', 'POST', args); + return { + content: [{ + type: 'text', + text: `Session created.\nSession ID: ${data.id}\nPass this ID to navigate, click, type_text, and other session tools.`, + }], + }; + } +); + +// ── close_session ──────────────────────────────────────────────────────────── + +server.tool( + 'close_session', + 'Close and destroy a browser session, freeing its resources.', + { + sessionId: z.string().describe('Session ID to close'), + }, + async ({ sessionId }) => { + await api(`/session/${sessionId}`, 'DELETE'); + return { content: [{ type: 'text', text: `Session ${sessionId} closed.` }] }; + } +); + +// ── navigate ───────────────────────────────────────────────────────────────── + +server.tool( + 'navigate', + 'Navigate a session to a URL and return the page content as markdown.', + { + sessionId: z.string().describe('Session ID'), + url: z.string().url().describe('URL to navigate to'), + format: z.enum(['markdown', 'html', 'text']).default('markdown'), + waitFor: z.enum(['networkidle', 'domcontentloaded', 'load']).default('domcontentloaded'), + extractMain: z.boolean().default(true), + timeout: z.number().default(30000), + }, + async (args) => { + const data = await api('/fetch', 'POST', args); + return { + content: [{ type: 'text', text: `Navigated to: ${data.url}\n# ${data.title}\n\n${data.content}` }], + }; + } +); + +// ── click ──────────────────────────────────────────────────────────────────── + +server.tool( + 'click', + 'Click an element in a browser session by CSS selector.', + { + sessionId: z.string().describe('Session ID'), + selector: z.string().describe('CSS selector of the element to click'), + timeout: z.number().default(5000), + }, + async ({ sessionId, selector, timeout }) => { + const data = await api('/interact', 'POST', { sessionId, action: 'click', selector, timeout }); + return { + content: [{ type: 'text', text: `Clicked "${selector}". Now on: ${data.title} (${data.url})` }], + }; + } +); + +// ── type_text ──────────────────────────────────────────────────────────────── + +server.tool( + 'type_text', + 'Type text into an input field in a browser session.', + { + sessionId: z.string().describe('Session ID'), + selector: z.string().describe('CSS selector of the input field'), + text: z.string().describe('Text to type into the field'), + submit: z.boolean().default(false).describe('Press Enter after typing to submit'), + timeout: z.number().default(5000), + }, + async ({ sessionId, selector, text, submit, timeout }) => { + const data = await api('/interact', 'POST', { sessionId, action: 'type', selector, value: text, submit, timeout }); + return { + content: [{ type: 'text', text: `Typed into "${selector}"${submit ? ' and submitted' : ''}. Now on: ${data.title} (${data.url})` }], + }; + } +); + +// ── get_page_content ───────────────────────────────────────────────────────── + +server.tool( + 'get_page_content', + 'Get the content of the current page in a session without navigating.', + { + sessionId: z.string().describe('Session ID'), + format: z.enum(['markdown', 'html', 'text']).default('markdown'), + extractMain: z.boolean().default(true), + }, + async (args) => { + // Fetch with sessionId and no url = return current page + const data = await api('/fetch', 'POST', { ...args, url: undefined }); + return { + content: [{ type: 'text', text: `# ${data.title}\nURL: ${data.url}\n\n${data.content}` }], + }; + } +); + +// ── get_page_screenshot ────────────────────────────────────────────────────── + +server.tool( + 'get_page_screenshot', + 'Take a screenshot of the current page in a session.', + { + sessionId: z.string().describe('Session ID'), + fullPage: z.boolean().default(true), + }, + async (args) => { + const res = await apiRaw('/screenshot', 'POST', args); + const buffer = Buffer.from(await res.arrayBuffer()); + return { + content: [{ type: 'image', data: buffer.toString('base64'), mimeType: 'image/png' }], + }; + } +); + +// ── Start ──────────────────────────────────────────────────────────────────── + +async function main() { + const transport = new StdioServerTransport(); + await server.connect(transport); +} + +main().catch((err) => { + process.stderr.write(`MCP server error: ${err.message}\n`); + process.exit(1); +}); diff --git a/src/routes.js b/src/routes.js new file mode 100644 index 0000000..be8fabc --- /dev/null +++ b/src/routes.js @@ -0,0 +1,285 @@ +'use strict'; + +const browser = require('./browser'); +const { extractContent, toMarkdown, toText } = require('./extract'); + +async function registerRoutes(fastify) { + + // ── Health ────────────────────────────────────────────────────────────────── + + fastify.get('/health', async () => ({ + status: 'ok', + sessions: browser.getSessionCount(), + activePages: browser.getActivePages(), + uptime: Math.round(process.uptime()), + timestamp: new Date().toISOString(), + })); + + // ── Fetch ─────────────────────────────────────────────────────────────────── + + fastify.post('/fetch', { + schema: { + body: { + type: 'object', + required: ['url'], + properties: { + url: { type: 'string' }, + format: { type: 'string', enum: ['markdown', 'html', 'text'], default: 'markdown' }, + waitFor: { type: 'string', enum: ['networkidle', 'domcontentloaded', 'load'], default: 'domcontentloaded' }, + blockAds: { type: 'boolean', default: true }, + extractMain: { type: 'boolean', default: true }, + timeout: { type: 'number', default: 30000 }, + sessionId: { type: 'string' }, + headers: { type: 'object', additionalProperties: { type: 'string' } }, + cookies: { type: 'array' }, + }, + }, + }, + }, async (req, reply) => { + const { + url, format = 'markdown', extractMain = true, sessionId, + waitFor = 'domcontentloaded', timeout = 30000, + blockAds = true, headers, cookies, + } = req.body; + + try { + let html, title, finalUrl; + + if (sessionId) { + // Use existing session — navigate if url provided, otherwise get current page + const session = browser.getSession(sessionId); + if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' }); + if (url) { + await session.page.goto(url, { waitUntil: waitFor, timeout }); + } + html = await session.page.content(); + title = await session.page.title(); + finalUrl = session.page.url(); + } else { + // One-shot ephemeral page + await browser.withPage({ blockAds, headers, cookies }, async (page) => { + await page.goto(url, { waitUntil: waitFor, timeout }); + html = await page.content(); + title = await page.title(); + finalUrl = page.url(); + }); + } + + // Extract and convert + let content; + if (extractMain) { + const extracted = extractContent(html, finalUrl); + title = extracted.title || title; + content = formatContent(extracted.content, format); + } else { + content = formatContent(html, format); + } + + return { url: finalUrl, title, content, format }; + } catch (err) { + const status = err.statusCode || 500; + return reply.code(status).send({ error: err.message, code: 'FETCH_ERROR' }); + } + }); + + // ── Screenshot ────────────────────────────────────────────────────────────── + + fastify.post('/screenshot', { + schema: { + body: { + type: 'object', + properties: { + url: { type: 'string' }, + fullPage: { type: 'boolean', default: true }, + format: { type: 'string', enum: ['png', 'jpeg'], default: 'png' }, + quality: { type: 'number', default: 80 }, + selector: { type: 'string' }, + waitFor: { type: 'string', enum: ['networkidle', 'domcontentloaded', 'load'], default: 'networkidle' }, + timeout: { type: 'number', default: 30000 }, + sessionId: { type: 'string' }, + }, + }, + }, + }, async (req, reply) => { + const { + url, fullPage = true, format = 'png', quality = 80, + selector, waitFor = 'networkidle', timeout = 30000, sessionId, + } = req.body; + + try { + let imgBuffer; + + const capture = async (page) => { + const opts = { + type: format, + ...(format === 'jpeg' ? { quality } : {}), + fullPage: selector ? false : fullPage, + }; + if (selector) { + imgBuffer = await page.locator(selector).first().screenshot(opts); + } else { + imgBuffer = await page.screenshot(opts); + } + }; + + if (sessionId) { + const session = browser.getSession(sessionId); + if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' }); + if (url) await session.page.goto(url, { waitUntil: waitFor, timeout }); + await capture(session.page); + } else { + if (!url) return reply.code(400).send({ error: 'url is required when no sessionId', code: 'MISSING_URL' }); + await browser.withPage({}, async (page) => { + await page.goto(url, { waitUntil: waitFor, timeout }); + await capture(page); + }); + } + + reply.type(format === 'jpeg' ? 'image/jpeg' : 'image/png').send(imgBuffer); + } catch (err) { + return reply.code(500).send({ error: err.message, code: 'SCREENSHOT_ERROR' }); + } + }); + + // ── Execute JS ────────────────────────────────────────────────────────────── + + fastify.post('/execute', { + schema: { + body: { + type: 'object', + required: ['script'], + properties: { + url: { type: 'string' }, + script: { type: 'string' }, + sessionId: { type: 'string' }, + waitFor: { type: 'string', enum: ['networkidle', 'domcontentloaded', 'load'], default: 'domcontentloaded' }, + timeout: { type: 'number', default: 30000 }, + }, + }, + }, + }, async (req, reply) => { + const { url, script, sessionId, waitFor = 'domcontentloaded', timeout = 30000 } = req.body; + + // Always wrap in IIFE so callers can use `return` naturally + const wrappedScript = `(function() { ${script} })()`; + + try { + let result; + + const runScript = async (page) => { + result = await page.evaluate(wrappedScript); + }; + + if (sessionId) { + const session = browser.getSession(sessionId); + if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' }); + if (url) await session.page.goto(url, { waitUntil: waitFor, timeout }); + await runScript(session.page); + } else { + if (!url) return reply.code(400).send({ error: 'url is required when no sessionId', code: 'MISSING_URL' }); + await browser.withPage({}, async (page) => { + await page.goto(url, { waitUntil: waitFor, timeout }); + await runScript(page); + }); + } + + return { result }; + } catch (err) { + return reply.code(500).send({ error: err.message, code: 'EXECUTE_ERROR' }); + } + }); + + // ── Interact (click / type / select / wait / scroll) ──────────────────────── + + fastify.post('/interact', { + schema: { + body: { + type: 'object', + required: ['sessionId', 'action'], + properties: { + sessionId: { type: 'string' }, + action: { type: 'string', enum: ['click', 'type', 'select', 'wait', 'scroll'] }, + selector: { type: 'string' }, + value: { type: 'string' }, + submit: { type: 'boolean', default: false }, + timeout: { type: 'number', default: 5000 }, + }, + }, + }, + }, async (req, reply) => { + const { sessionId, action, selector, value, submit = false, timeout = 5000 } = req.body; + + const session = browser.getSession(sessionId); + if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' }); + + const page = session.page; + try { + switch (action) { + case 'click': + await page.click(selector, { timeout }); + await page.waitForLoadState('domcontentloaded').catch(() => {}); + break; + case 'type': + await page.fill(selector, value || '', { timeout }); + if (submit) { + await page.press(selector, 'Enter'); + await page.waitForLoadState('domcontentloaded').catch(() => {}); + } + break; + case 'select': + await page.selectOption(selector, value || '', { timeout }); + break; + case 'wait': + await page.waitForSelector(selector, { timeout }); + break; + case 'scroll': + if (selector) { + await page.locator(selector).scrollIntoViewIfNeeded({ timeout }); + } else { + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); + } + break; + default: + return reply.code(400).send({ error: `Unknown action: ${action}`, code: 'UNKNOWN_ACTION' }); + } + + const title = await page.title(); + const url = page.url(); + return { ok: true, title, url }; + } catch (err) { + return reply.code(500).send({ error: err.message, code: 'INTERACT_ERROR' }); + } + }); + + // ── Sessions ──────────────────────────────────────────────────────────────── + + fastify.post('/session', async (req, reply) => { + try { + const session = await browser.createSession(req.body || {}); + return session; + } catch (err) { + return reply.code(500).send({ error: err.message, code: 'SESSION_CREATE_ERROR' }); + } + }); + + fastify.delete('/session/:id', async (req, reply) => { + const closed = await browser.closeSession(req.params.id); + if (!closed) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' }); + return { ok: true }; + }); + + fastify.get('/sessions', async () => ({ + sessions: browser.listSessions(), + count: browser.getSessionCount(), + })); +} + +// ── Helper ────────────────────────────────────────────────────────────────── + +function formatContent(html, format) { + if (format === 'markdown') return toMarkdown(html); + if (format === 'text') return toText(html); + return html; +} + +module.exports = { registerRoutes }; diff --git a/src/server.js b/src/server.js new file mode 100644 index 0000000..3db6211 --- /dev/null +++ b/src/server.js @@ -0,0 +1,55 @@ +'use strict'; + +require('dotenv').config(); + +const Fastify = require('fastify'); +const cors = require('@fastify/cors'); +const browser = require('./browser'); +const { registerRoutes } = require('./routes'); + +const PORT = parseInt(process.env.PORT || '3000', 10); +const HOST = process.env.HOST || '0.0.0.0'; +const API_KEY = process.env.API_KEY || ''; + +async function start() { + const fastify = Fastify({ + logger: { level: process.env.LOG_LEVEL || 'info' }, + }); + + await fastify.register(cors, { origin: '*' }); + + // ── API key auth (all routes except GET /health) ───────────────────────── + if (API_KEY) { + fastify.addHook('onRequest', async (request, reply) => { + if (request.url === '/health' || request.method === 'OPTIONS') return; + const key = request.headers['x-api-key']; + if (key !== API_KEY) { + return reply.code(401).send({ error: 'Unauthorized', code: 'UNAUTHORIZED' }); + } + }); + } else { + fastify.log.warn('API_KEY is not set — all endpoints are unauthenticated!'); + } + + await registerRoutes(fastify); + + // ── Graceful shutdown ──────────────────────────────────────────────────── + const shutdown = async (signal) => { + fastify.log.info(`${signal} received — shutting down gracefully`); + await browser.shutdown(); + await fastify.close(); + process.exit(0); + }; + + process.on('SIGTERM', () => shutdown('SIGTERM')); + process.on('SIGINT', () => shutdown('SIGINT')); + + await fastify.listen({ port: PORT, host: HOST }); + fastify.log.info(`muse-browser running on http://${HOST}:${PORT}`); + fastify.log.info(`MCP stdio: docker exec -i muse-browser node src/mcp-stdio.js`); +} + +start().catch((err) => { + console.error('Fatal startup error:', err); + process.exit(1); +});