Initial commit: muse-browser v0.1.0
This commit is contained in:
commit
b84613270a
7
.env.example
Normal file
7
.env.example
Normal file
@ -0,0 +1,7 @@
|
||||
API_KEY=
|
||||
PORT=3000
|
||||
HOST=0.0.0.0
|
||||
LOG_LEVEL=info
|
||||
SESSION_TTL_MS=1800000
|
||||
SESSION_CLEANUP_INTERVAL_MS=300000
|
||||
MAX_CONCURRENT_PAGES=10
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
node_modules/
|
||||
.env
|
||||
*.log
|
||||
downloads/
|
||||
.DS_Store
|
||||
27
Dockerfile
Normal file
27
Dockerfile
Normal file
@ -0,0 +1,27 @@
|
||||
FROM node:20-bookworm-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies first (layer-cached until package.json changes)
|
||||
COPY package*.json ./
|
||||
RUN npm ci --omit=dev
|
||||
|
||||
# Install Chromium and all required system dependencies via Playwright's installer
|
||||
RUN npx playwright install chromium --with-deps
|
||||
|
||||
# Copy application source
|
||||
COPY src/ ./src/
|
||||
COPY .env.example ./
|
||||
|
||||
ENV NODE_ENV=production
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=25s --retries=3 \
|
||||
CMD node -e "\
|
||||
const http = require('http'); \
|
||||
http.get('http://localhost:3000/health', (r) => { \
|
||||
process.exit(r.statusCode === 200 ? 0 : 1); \
|
||||
}).on('error', () => process.exit(1));"
|
||||
|
||||
CMD ["node", "src/server.js"]
|
||||
18
docker-compose.yml
Normal file
18
docker-compose.yml
Normal file
@ -0,0 +1,18 @@
|
||||
services:
|
||||
muse-browser:
|
||||
build: .
|
||||
image: muse-browser:latest
|
||||
container_name: muse-browser
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:3000:3000" # localhost only — Caddy proxies publicly
|
||||
env_file:
|
||||
- .env
|
||||
shm_size: '2gb' # Chrome uses /dev/shm; default 64MB causes crashes
|
||||
security_opt:
|
||||
- seccomp=unconfined # needed alongside --no-sandbox in some kernel configs
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "3"
|
||||
27
package.json
Normal file
27
package.json
Normal file
@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "muse-browser",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"description": "Full-capability headless browser service — Muse building block",
|
||||
"main": "src/server.js",
|
||||
"scripts": {
|
||||
"start": "node src/server.js",
|
||||
"mcp": "node src/mcp-stdio.js",
|
||||
"dev": "node --watch src/server.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"fastify": "^4.28.0",
|
||||
"@fastify/cors": "^9.0.1",
|
||||
"playwright": "^1.44.0",
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"jsdom": "^24.1.0",
|
||||
"turndown": "^7.2.0",
|
||||
"@modelcontextprotocol/sdk": "^1.6.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"dotenv": "^16.4.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
}
|
||||
}
|
||||
172
src/browser.js
Normal file
172
src/browser.js
Normal file
@ -0,0 +1,172 @@
|
||||
'use strict';
|
||||
|
||||
const { chromium } = require('playwright');
|
||||
const { v4: uuidv4 } = require('uuid');
|
||||
|
||||
const MAX_PAGES = parseInt(process.env.MAX_CONCURRENT_PAGES || '10', 10);
|
||||
const SESSION_TTL = parseInt(process.env.SESSION_TTL_MS || '1800000', 10);
|
||||
const CLEANUP_INTERVAL = parseInt(process.env.SESSION_CLEANUP_INTERVAL_MS || '300000', 10);
|
||||
|
||||
let _browser = null;
|
||||
let activePages = 0;
|
||||
const sessions = new Map();
|
||||
|
||||
// Ad/tracker domains to block
|
||||
const BLOCKED_DOMAINS = [
|
||||
'doubleclick.net', 'googlesyndication.com', 'googleadservices.com',
|
||||
'google-analytics.com', 'googletagmanager.com', 'facebook.net',
|
||||
'connect.facebook.net', 'amazon-adsystem.com', 'outbrain.com',
|
||||
'taboola.com', 'criteo.com', 'hotjar.com', 'intercom.io',
|
||||
'advertising.com', 'moatads.com', 'scorecardresearch.com',
|
||||
'quantserve.com', 'adsrvr.org', 'adsafeprotected.com',
|
||||
'adnxs.com', 'rubiconproject.com', 'openx.net',
|
||||
];
|
||||
|
||||
// Chromium flags required in Docker
|
||||
const BROWSER_ARGS = [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--no-first-run',
|
||||
'--no-zygote',
|
||||
'--disable-gpu',
|
||||
'--disable-background-networking',
|
||||
'--disable-client-side-phishing-detection',
|
||||
'--disable-default-apps',
|
||||
'--disable-extensions',
|
||||
];
|
||||
|
||||
// Minimal stealth — patches the most common bot-detection checks without extra deps
|
||||
const STEALTH_SCRIPT = `
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
|
||||
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
||||
try { delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array; } catch(e) {}
|
||||
try { delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise; } catch(e) {}
|
||||
try { delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol; } catch(e) {}
|
||||
`;
|
||||
|
||||
// --- Browser lifecycle ---
|
||||
|
||||
async function getBrowser() {
|
||||
if (_browser && _browser.isConnected()) return _browser;
|
||||
_browser = await chromium.launch({ headless: true, args: BROWSER_ARGS });
|
||||
_browser.on('disconnected', () => {
|
||||
_browser = null;
|
||||
});
|
||||
return _browser;
|
||||
}
|
||||
|
||||
async function newContext(options = {}) {
|
||||
const b = await getBrowser();
|
||||
const ctx = await b.newContext({
|
||||
userAgent: options.userAgent,
|
||||
viewport: options.viewport || { width: 1280, height: 720 },
|
||||
locale: options.locale || 'en-US',
|
||||
timezoneId: options.timezone || 'America/Chicago',
|
||||
extraHTTPHeaders: options.headers || {},
|
||||
});
|
||||
await ctx.addInitScript(STEALTH_SCRIPT);
|
||||
if (options.cookies && options.cookies.length > 0) {
|
||||
await ctx.addCookies(options.cookies);
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
function applyAdBlocking(page) {
|
||||
page.route('**/*', (route) => {
|
||||
const url = route.request().url();
|
||||
if (BLOCKED_DOMAINS.some((d) => url.includes(d))) {
|
||||
route.abort();
|
||||
} else {
|
||||
route.continue();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// --- One-shot page (ephemeral: context is closed after fn completes) ---
|
||||
|
||||
async function withPage(options = {}, fn) {
|
||||
if (activePages >= MAX_PAGES) {
|
||||
const err = new Error(`Too many concurrent pages (max ${MAX_PAGES}). Try again shortly.`);
|
||||
err.statusCode = 429;
|
||||
throw err;
|
||||
}
|
||||
activePages++;
|
||||
const ctx = await newContext(options);
|
||||
const page = await ctx.newPage();
|
||||
if (options.blockAds !== false) applyAdBlocking(page);
|
||||
try {
|
||||
return await fn(page);
|
||||
} finally {
|
||||
activePages--;
|
||||
await ctx.close().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
// --- Persistent sessions ---
|
||||
|
||||
async function createSession(options = {}) {
|
||||
const id = uuidv4();
|
||||
const ctx = await newContext(options);
|
||||
const page = await ctx.newPage();
|
||||
if (options.blockAds !== false) applyAdBlocking(page);
|
||||
const session = { id, context: ctx, page, createdAt: new Date(), lastUsedAt: new Date() };
|
||||
sessions.set(id, session);
|
||||
return { id, createdAt: session.createdAt };
|
||||
}
|
||||
|
||||
function getSession(id) {
|
||||
const s = sessions.get(id);
|
||||
if (s) s.lastUsedAt = new Date();
|
||||
return s || null;
|
||||
}
|
||||
|
||||
async function closeSession(id) {
|
||||
const s = sessions.get(id);
|
||||
if (!s) return false;
|
||||
sessions.delete(id);
|
||||
await s.context.close().catch(() => {});
|
||||
return true;
|
||||
}
|
||||
|
||||
function listSessions() {
|
||||
return [...sessions.values()].map((s) => ({
|
||||
id: s.id,
|
||||
createdAt: s.createdAt,
|
||||
lastUsedAt: s.lastUsedAt,
|
||||
}));
|
||||
}
|
||||
|
||||
// --- Shutdown ---
|
||||
|
||||
async function shutdown() {
|
||||
clearInterval(_cleanupTimer);
|
||||
for (const [id] of sessions) await closeSession(id);
|
||||
if (_browser) await _browser.close().catch(() => {});
|
||||
}
|
||||
|
||||
// --- TTL cleanup ---
|
||||
|
||||
const _cleanupTimer = setInterval(async () => {
|
||||
const now = Date.now();
|
||||
for (const [id, s] of sessions) {
|
||||
if (now - s.lastUsedAt.getTime() > SESSION_TTL) {
|
||||
await closeSession(id);
|
||||
}
|
||||
}
|
||||
}, CLEANUP_INTERVAL);
|
||||
|
||||
_cleanupTimer.unref(); // don't prevent process from exiting
|
||||
|
||||
module.exports = {
|
||||
withPage,
|
||||
createSession,
|
||||
getSession,
|
||||
closeSession,
|
||||
listSessions,
|
||||
shutdown,
|
||||
getActivePages: () => activePages,
|
||||
getSessionCount: () => sessions.size,
|
||||
};
|
||||
82
src/extract.js
Normal file
82
src/extract.js
Normal file
@ -0,0 +1,82 @@
|
||||
'use strict';
|
||||
|
||||
const { Readability } = require('@mozilla/readability');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const TurndownService = require('turndown');
|
||||
|
||||
const td = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
codeBlockStyle: 'fenced',
|
||||
bulletListMarker: '-',
|
||||
hr: '---',
|
||||
});
|
||||
|
||||
// Readability already strips nav/aside/footer; remove remaining noise from Turndown output
|
||||
td.remove(['script', 'style', 'iframe', 'noscript', 'svg', 'figure']);
|
||||
|
||||
/**
|
||||
* Extract the primary article content from raw HTML using Mozilla Readability.
|
||||
* Falls back to full <body> if Readability can't identify a main article.
|
||||
*
|
||||
* @param {string} html - Raw page HTML
|
||||
* @param {string} url - Page URL (needed by Readability to resolve relative links)
|
||||
* @returns {{ title: string, content: string }} title + clean HTML content
|
||||
*/
|
||||
function extractContent(html, url) {
|
||||
try {
|
||||
const dom = new JSDOM(html, { url: url || 'https://example.com' });
|
||||
const reader = new Readability(dom.window.document, {
|
||||
charThreshold: 20, // lower threshold catches short but valid articles
|
||||
keepClasses: false, // strip class attributes for cleaner output
|
||||
});
|
||||
const article = reader.parse();
|
||||
if (article && article.content && article.content.length > 100) {
|
||||
return { title: article.title || '', content: article.content };
|
||||
}
|
||||
} catch (_) {
|
||||
// Readability failed — fall through to body fallback
|
||||
}
|
||||
|
||||
// Fallback: use full body HTML
|
||||
try {
|
||||
const dom = new JSDOM(html);
|
||||
const doc = dom.window.document;
|
||||
return {
|
||||
title: doc.title || '',
|
||||
content: doc.body ? doc.body.innerHTML : html,
|
||||
};
|
||||
} catch (_) {
|
||||
return { title: '', content: html };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML to Markdown.
|
||||
* @param {string} html
|
||||
* @returns {string}
|
||||
*/
|
||||
function toMarkdown(html) {
|
||||
if (!html) return '';
|
||||
try {
|
||||
return td.turndown(html);
|
||||
} catch (_) {
|
||||
return toText(html);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip all HTML tags and collapse whitespace to plain text.
|
||||
* @param {string} html
|
||||
* @returns {string}
|
||||
*/
|
||||
function toText(html) {
|
||||
if (!html) return '';
|
||||
return html
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, '')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
module.exports = { extractContent, toMarkdown, toText };
|
||||
269
src/mcp-stdio.js
Normal file
269
src/mcp-stdio.js
Normal file
@ -0,0 +1,269 @@
|
||||
'use strict';
|
||||
|
||||
/**
|
||||
* muse-browser MCP server — stdio transport
|
||||
*
|
||||
* Runs inside the Docker container. Communicates with the Fastify HTTP server
|
||||
* on localhost:PORT. Each tool call makes an internal HTTP request.
|
||||
*
|
||||
* Usage (from Claude Desktop config):
|
||||
* "command": "ssh",
|
||||
* "args": ["-i", "~/.ssh/cortex", "root@cortex.hydrascale.net",
|
||||
* "docker exec -i muse-browser node src/mcp-stdio.js"]
|
||||
*/
|
||||
|
||||
require('dotenv').config();
|
||||
|
||||
const { McpServer } = require('@modelcontextprotocol/sdk/server/mcp.js');
|
||||
const { StdioServerTransport } = require('@modelcontextprotocol/sdk/server/stdio.js');
|
||||
const { z } = require('zod');
|
||||
|
||||
const PORT = process.env.PORT || 3000;
|
||||
const API_KEY = process.env.API_KEY || '';
|
||||
const BASE = `http://localhost:${PORT}`;
|
||||
|
||||
// ── Internal HTTP helpers ────────────────────────────────────────────────────
|
||||
|
||||
async function api(path, method = 'GET', body) {
|
||||
const res = await fetch(`${BASE}${path}`, {
|
||||
method,
|
||||
headers: { 'Content-Type': 'application/json', 'X-API-Key': API_KEY },
|
||||
body: body ? JSON.stringify(body) : undefined,
|
||||
});
|
||||
const data = await res.json();
|
||||
if (!res.ok) throw new Error(data.error || res.statusText);
|
||||
return data;
|
||||
}
|
||||
|
||||
async function apiRaw(path, method, body) {
|
||||
const res = await fetch(`${BASE}${path}`, {
|
||||
method,
|
||||
headers: { 'Content-Type': 'application/json', 'X-API-Key': API_KEY },
|
||||
body: body ? JSON.stringify(body) : undefined,
|
||||
});
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}));
|
||||
throw new Error(data.error || res.statusText);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// ── MCP server ───────────────────────────────────────────────────────────────
|
||||
|
||||
const server = new McpServer({ name: 'muse-browser', version: '0.1.0' });
|
||||
|
||||
// ── fetch_page ───────────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'fetch_page',
|
||||
'Fetch a web page and return its content as markdown, HTML, or plain text. ' +
|
||||
'Runs full Chromium with JavaScript execution. ' +
|
||||
'Use extractMain:true (default) to get clean article content without nav/ads.',
|
||||
{
|
||||
url: z.string().url().describe('URL to fetch'),
|
||||
format: z.enum(['markdown', 'html', 'text']).default('markdown').describe('Output format'),
|
||||
waitFor: z.enum(['networkidle', 'domcontentloaded', 'load']).default('domcontentloaded')
|
||||
.describe('When to consider the page loaded. Use networkidle for SPAs.'),
|
||||
blockAds: z.boolean().default(true).describe('Block ad and tracker requests'),
|
||||
extractMain: z.boolean().default(true).describe('Extract main article content (removes nav, footer, ads)'),
|
||||
timeout: z.number().default(30000).describe('Timeout in milliseconds'),
|
||||
sessionId: z.string().optional().describe('Use an existing persistent session'),
|
||||
},
|
||||
async (args) => {
|
||||
const data = await api('/fetch', 'POST', args);
|
||||
return {
|
||||
content: [{ type: 'text', text: `# ${data.title}\nURL: ${data.url}\n\n${data.content}` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── take_screenshot ──────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'take_screenshot',
|
||||
'Take a screenshot of a web page. Returns a PNG image. ' +
|
||||
'Useful for visually verifying page state or capturing charts/visual content.',
|
||||
{
|
||||
url: z.string().url().optional().describe('URL to screenshot (required if no sessionId)'),
|
||||
fullPage: z.boolean().default(true).describe('Capture full scrollable page or just viewport'),
|
||||
selector: z.string().optional().describe('CSS selector — screenshot just this element'),
|
||||
waitFor: z.enum(['networkidle', 'domcontentloaded', 'load']).default('networkidle'),
|
||||
sessionId: z.string().optional().describe('Use an existing persistent session'),
|
||||
timeout: z.number().default(30000),
|
||||
},
|
||||
async (args) => {
|
||||
const res = await apiRaw('/screenshot', 'POST', args);
|
||||
const buffer = Buffer.from(await res.arrayBuffer());
|
||||
return {
|
||||
content: [{ type: 'image', data: buffer.toString('base64'), mimeType: 'image/png' }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── run_javascript ───────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'run_javascript',
|
||||
'Execute JavaScript in a page context and return the result. ' +
|
||||
'Scripts are wrapped in an IIFE — use `return` to provide a value. ' +
|
||||
'Example: `return document.querySelectorAll("a").length`',
|
||||
{
|
||||
url: z.string().url().optional().describe('URL to navigate to first (required if no sessionId)'),
|
||||
script: z.string().describe('JavaScript to run in the page. Use return to get a value.'),
|
||||
sessionId: z.string().optional().describe('Use an existing persistent session'),
|
||||
timeout: z.number().default(30000),
|
||||
},
|
||||
async (args) => {
|
||||
const data = await api('/execute', 'POST', args);
|
||||
const result = typeof data.result === 'string'
|
||||
? data.result
|
||||
: JSON.stringify(data.result, null, 2);
|
||||
return { content: [{ type: 'text', text: result }] };
|
||||
}
|
||||
);
|
||||
|
||||
// ── create_session ───────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'create_session',
|
||||
'Create a persistent browser session. ' +
|
||||
'Use this when you need multi-step interactions: logging in, filling forms, navigating across pages. ' +
|
||||
'Returns a session ID — pass it to other tools.',
|
||||
{
|
||||
userAgent: z.string().optional().describe('Custom user agent'),
|
||||
locale: z.string().default('en-US'),
|
||||
timezone: z.string().default('America/Chicago'),
|
||||
blockAds: z.boolean().default(true),
|
||||
},
|
||||
async (args) => {
|
||||
const data = await api('/session', 'POST', args);
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: `Session created.\nSession ID: ${data.id}\nPass this ID to navigate, click, type_text, and other session tools.`,
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── close_session ────────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'close_session',
|
||||
'Close and destroy a browser session, freeing its resources.',
|
||||
{
|
||||
sessionId: z.string().describe('Session ID to close'),
|
||||
},
|
||||
async ({ sessionId }) => {
|
||||
await api(`/session/${sessionId}`, 'DELETE');
|
||||
return { content: [{ type: 'text', text: `Session ${sessionId} closed.` }] };
|
||||
}
|
||||
);
|
||||
|
||||
// ── navigate ─────────────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'navigate',
|
||||
'Navigate a session to a URL and return the page content as markdown.',
|
||||
{
|
||||
sessionId: z.string().describe('Session ID'),
|
||||
url: z.string().url().describe('URL to navigate to'),
|
||||
format: z.enum(['markdown', 'html', 'text']).default('markdown'),
|
||||
waitFor: z.enum(['networkidle', 'domcontentloaded', 'load']).default('domcontentloaded'),
|
||||
extractMain: z.boolean().default(true),
|
||||
timeout: z.number().default(30000),
|
||||
},
|
||||
async (args) => {
|
||||
const data = await api('/fetch', 'POST', args);
|
||||
return {
|
||||
content: [{ type: 'text', text: `Navigated to: ${data.url}\n# ${data.title}\n\n${data.content}` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── click ────────────────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'click',
|
||||
'Click an element in a browser session by CSS selector.',
|
||||
{
|
||||
sessionId: z.string().describe('Session ID'),
|
||||
selector: z.string().describe('CSS selector of the element to click'),
|
||||
timeout: z.number().default(5000),
|
||||
},
|
||||
async ({ sessionId, selector, timeout }) => {
|
||||
const data = await api('/interact', 'POST', { sessionId, action: 'click', selector, timeout });
|
||||
return {
|
||||
content: [{ type: 'text', text: `Clicked "${selector}". Now on: ${data.title} (${data.url})` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── type_text ────────────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'type_text',
|
||||
'Type text into an input field in a browser session.',
|
||||
{
|
||||
sessionId: z.string().describe('Session ID'),
|
||||
selector: z.string().describe('CSS selector of the input field'),
|
||||
text: z.string().describe('Text to type into the field'),
|
||||
submit: z.boolean().default(false).describe('Press Enter after typing to submit'),
|
||||
timeout: z.number().default(5000),
|
||||
},
|
||||
async ({ sessionId, selector, text, submit, timeout }) => {
|
||||
const data = await api('/interact', 'POST', { sessionId, action: 'type', selector, value: text, submit, timeout });
|
||||
return {
|
||||
content: [{ type: 'text', text: `Typed into "${selector}"${submit ? ' and submitted' : ''}. Now on: ${data.title} (${data.url})` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── get_page_content ─────────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'get_page_content',
|
||||
'Get the content of the current page in a session without navigating.',
|
||||
{
|
||||
sessionId: z.string().describe('Session ID'),
|
||||
format: z.enum(['markdown', 'html', 'text']).default('markdown'),
|
||||
extractMain: z.boolean().default(true),
|
||||
},
|
||||
async (args) => {
|
||||
// Fetch with sessionId and no url = return current page
|
||||
const data = await api('/fetch', 'POST', { ...args, url: undefined });
|
||||
return {
|
||||
content: [{ type: 'text', text: `# ${data.title}\nURL: ${data.url}\n\n${data.content}` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── get_page_screenshot ──────────────────────────────────────────────────────
|
||||
|
||||
server.tool(
|
||||
'get_page_screenshot',
|
||||
'Take a screenshot of the current page in a session.',
|
||||
{
|
||||
sessionId: z.string().describe('Session ID'),
|
||||
fullPage: z.boolean().default(true),
|
||||
},
|
||||
async (args) => {
|
||||
const res = await apiRaw('/screenshot', 'POST', args);
|
||||
const buffer = Buffer.from(await res.arrayBuffer());
|
||||
return {
|
||||
content: [{ type: 'image', data: buffer.toString('base64'), mimeType: 'image/png' }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// ── Start ────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
process.stderr.write(`MCP server error: ${err.message}\n`);
|
||||
process.exit(1);
|
||||
});
|
||||
285
src/routes.js
Normal file
285
src/routes.js
Normal file
@ -0,0 +1,285 @@
|
||||
'use strict';
|
||||
|
||||
const browser = require('./browser');
|
||||
const { extractContent, toMarkdown, toText } = require('./extract');
|
||||
|
||||
async function registerRoutes(fastify) {
|
||||
|
||||
// ── Health ──────────────────────────────────────────────────────────────────
|
||||
|
||||
fastify.get('/health', async () => ({
|
||||
status: 'ok',
|
||||
sessions: browser.getSessionCount(),
|
||||
activePages: browser.getActivePages(),
|
||||
uptime: Math.round(process.uptime()),
|
||||
timestamp: new Date().toISOString(),
|
||||
}));
|
||||
|
||||
// ── Fetch ───────────────────────────────────────────────────────────────────
|
||||
|
||||
fastify.post('/fetch', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['url'],
|
||||
properties: {
|
||||
url: { type: 'string' },
|
||||
format: { type: 'string', enum: ['markdown', 'html', 'text'], default: 'markdown' },
|
||||
waitFor: { type: 'string', enum: ['networkidle', 'domcontentloaded', 'load'], default: 'domcontentloaded' },
|
||||
blockAds: { type: 'boolean', default: true },
|
||||
extractMain: { type: 'boolean', default: true },
|
||||
timeout: { type: 'number', default: 30000 },
|
||||
sessionId: { type: 'string' },
|
||||
headers: { type: 'object', additionalProperties: { type: 'string' } },
|
||||
cookies: { type: 'array' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (req, reply) => {
|
||||
const {
|
||||
url, format = 'markdown', extractMain = true, sessionId,
|
||||
waitFor = 'domcontentloaded', timeout = 30000,
|
||||
blockAds = true, headers, cookies,
|
||||
} = req.body;
|
||||
|
||||
try {
|
||||
let html, title, finalUrl;
|
||||
|
||||
if (sessionId) {
|
||||
// Use existing session — navigate if url provided, otherwise get current page
|
||||
const session = browser.getSession(sessionId);
|
||||
if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' });
|
||||
if (url) {
|
||||
await session.page.goto(url, { waitUntil: waitFor, timeout });
|
||||
}
|
||||
html = await session.page.content();
|
||||
title = await session.page.title();
|
||||
finalUrl = session.page.url();
|
||||
} else {
|
||||
// One-shot ephemeral page
|
||||
await browser.withPage({ blockAds, headers, cookies }, async (page) => {
|
||||
await page.goto(url, { waitUntil: waitFor, timeout });
|
||||
html = await page.content();
|
||||
title = await page.title();
|
||||
finalUrl = page.url();
|
||||
});
|
||||
}
|
||||
|
||||
// Extract and convert
|
||||
let content;
|
||||
if (extractMain) {
|
||||
const extracted = extractContent(html, finalUrl);
|
||||
title = extracted.title || title;
|
||||
content = formatContent(extracted.content, format);
|
||||
} else {
|
||||
content = formatContent(html, format);
|
||||
}
|
||||
|
||||
return { url: finalUrl, title, content, format };
|
||||
} catch (err) {
|
||||
const status = err.statusCode || 500;
|
||||
return reply.code(status).send({ error: err.message, code: 'FETCH_ERROR' });
|
||||
}
|
||||
});
|
||||
|
||||
// ── Screenshot ──────────────────────────────────────────────────────────────
|
||||
|
||||
fastify.post('/screenshot', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
url: { type: 'string' },
|
||||
fullPage: { type: 'boolean', default: true },
|
||||
format: { type: 'string', enum: ['png', 'jpeg'], default: 'png' },
|
||||
quality: { type: 'number', default: 80 },
|
||||
selector: { type: 'string' },
|
||||
waitFor: { type: 'string', enum: ['networkidle', 'domcontentloaded', 'load'], default: 'networkidle' },
|
||||
timeout: { type: 'number', default: 30000 },
|
||||
sessionId: { type: 'string' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (req, reply) => {
|
||||
const {
|
||||
url, fullPage = true, format = 'png', quality = 80,
|
||||
selector, waitFor = 'networkidle', timeout = 30000, sessionId,
|
||||
} = req.body;
|
||||
|
||||
try {
|
||||
let imgBuffer;
|
||||
|
||||
const capture = async (page) => {
|
||||
const opts = {
|
||||
type: format,
|
||||
...(format === 'jpeg' ? { quality } : {}),
|
||||
fullPage: selector ? false : fullPage,
|
||||
};
|
||||
if (selector) {
|
||||
imgBuffer = await page.locator(selector).first().screenshot(opts);
|
||||
} else {
|
||||
imgBuffer = await page.screenshot(opts);
|
||||
}
|
||||
};
|
||||
|
||||
if (sessionId) {
|
||||
const session = browser.getSession(sessionId);
|
||||
if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' });
|
||||
if (url) await session.page.goto(url, { waitUntil: waitFor, timeout });
|
||||
await capture(session.page);
|
||||
} else {
|
||||
if (!url) return reply.code(400).send({ error: 'url is required when no sessionId', code: 'MISSING_URL' });
|
||||
await browser.withPage({}, async (page) => {
|
||||
await page.goto(url, { waitUntil: waitFor, timeout });
|
||||
await capture(page);
|
||||
});
|
||||
}
|
||||
|
||||
reply.type(format === 'jpeg' ? 'image/jpeg' : 'image/png').send(imgBuffer);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err.message, code: 'SCREENSHOT_ERROR' });
|
||||
}
|
||||
});
|
||||
|
||||
// ── Execute JS ──────────────────────────────────────────────────────────────
|
||||
|
||||
fastify.post('/execute', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['script'],
|
||||
properties: {
|
||||
url: { type: 'string' },
|
||||
script: { type: 'string' },
|
||||
sessionId: { type: 'string' },
|
||||
waitFor: { type: 'string', enum: ['networkidle', 'domcontentloaded', 'load'], default: 'domcontentloaded' },
|
||||
timeout: { type: 'number', default: 30000 },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (req, reply) => {
|
||||
const { url, script, sessionId, waitFor = 'domcontentloaded', timeout = 30000 } = req.body;
|
||||
|
||||
// Always wrap in IIFE so callers can use `return` naturally
|
||||
const wrappedScript = `(function() { ${script} })()`;
|
||||
|
||||
try {
|
||||
let result;
|
||||
|
||||
const runScript = async (page) => {
|
||||
result = await page.evaluate(wrappedScript);
|
||||
};
|
||||
|
||||
if (sessionId) {
|
||||
const session = browser.getSession(sessionId);
|
||||
if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' });
|
||||
if (url) await session.page.goto(url, { waitUntil: waitFor, timeout });
|
||||
await runScript(session.page);
|
||||
} else {
|
||||
if (!url) return reply.code(400).send({ error: 'url is required when no sessionId', code: 'MISSING_URL' });
|
||||
await browser.withPage({}, async (page) => {
|
||||
await page.goto(url, { waitUntil: waitFor, timeout });
|
||||
await runScript(page);
|
||||
});
|
||||
}
|
||||
|
||||
return { result };
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err.message, code: 'EXECUTE_ERROR' });
|
||||
}
|
||||
});
|
||||
|
||||
// ── Interact (click / type / select / wait / scroll) ────────────────────────
|
||||
|
||||
fastify.post('/interact', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['sessionId', 'action'],
|
||||
properties: {
|
||||
sessionId: { type: 'string' },
|
||||
action: { type: 'string', enum: ['click', 'type', 'select', 'wait', 'scroll'] },
|
||||
selector: { type: 'string' },
|
||||
value: { type: 'string' },
|
||||
submit: { type: 'boolean', default: false },
|
||||
timeout: { type: 'number', default: 5000 },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (req, reply) => {
|
||||
const { sessionId, action, selector, value, submit = false, timeout = 5000 } = req.body;
|
||||
|
||||
const session = browser.getSession(sessionId);
|
||||
if (!session) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' });
|
||||
|
||||
const page = session.page;
|
||||
try {
|
||||
switch (action) {
|
||||
case 'click':
|
||||
await page.click(selector, { timeout });
|
||||
await page.waitForLoadState('domcontentloaded').catch(() => {});
|
||||
break;
|
||||
case 'type':
|
||||
await page.fill(selector, value || '', { timeout });
|
||||
if (submit) {
|
||||
await page.press(selector, 'Enter');
|
||||
await page.waitForLoadState('domcontentloaded').catch(() => {});
|
||||
}
|
||||
break;
|
||||
case 'select':
|
||||
await page.selectOption(selector, value || '', { timeout });
|
||||
break;
|
||||
case 'wait':
|
||||
await page.waitForSelector(selector, { timeout });
|
||||
break;
|
||||
case 'scroll':
|
||||
if (selector) {
|
||||
await page.locator(selector).scrollIntoViewIfNeeded({ timeout });
|
||||
} else {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return reply.code(400).send({ error: `Unknown action: ${action}`, code: 'UNKNOWN_ACTION' });
|
||||
}
|
||||
|
||||
const title = await page.title();
|
||||
const url = page.url();
|
||||
return { ok: true, title, url };
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err.message, code: 'INTERACT_ERROR' });
|
||||
}
|
||||
});
|
||||
|
||||
// ── Sessions ────────────────────────────────────────────────────────────────
|
||||
|
||||
fastify.post('/session', async (req, reply) => {
|
||||
try {
|
||||
const session = await browser.createSession(req.body || {});
|
||||
return session;
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err.message, code: 'SESSION_CREATE_ERROR' });
|
||||
}
|
||||
});
|
||||
|
||||
fastify.delete('/session/:id', async (req, reply) => {
|
||||
const closed = await browser.closeSession(req.params.id);
|
||||
if (!closed) return reply.code(404).send({ error: 'Session not found', code: 'SESSION_NOT_FOUND' });
|
||||
return { ok: true };
|
||||
});
|
||||
|
||||
fastify.get('/sessions', async () => ({
|
||||
sessions: browser.listSessions(),
|
||||
count: browser.getSessionCount(),
|
||||
}));
|
||||
}
|
||||
|
||||
// ── Helper ──────────────────────────────────────────────────────────────────
|
||||
|
||||
function formatContent(html, format) {
|
||||
if (format === 'markdown') return toMarkdown(html);
|
||||
if (format === 'text') return toText(html);
|
||||
return html;
|
||||
}
|
||||
|
||||
module.exports = { registerRoutes };
|
||||
55
src/server.js
Normal file
55
src/server.js
Normal file
@ -0,0 +1,55 @@
|
||||
'use strict';
|
||||
|
||||
require('dotenv').config();
|
||||
|
||||
const Fastify = require('fastify');
|
||||
const cors = require('@fastify/cors');
|
||||
const browser = require('./browser');
|
||||
const { registerRoutes } = require('./routes');
|
||||
|
||||
const PORT = parseInt(process.env.PORT || '3000', 10);
|
||||
const HOST = process.env.HOST || '0.0.0.0';
|
||||
const API_KEY = process.env.API_KEY || '';
|
||||
|
||||
async function start() {
|
||||
const fastify = Fastify({
|
||||
logger: { level: process.env.LOG_LEVEL || 'info' },
|
||||
});
|
||||
|
||||
await fastify.register(cors, { origin: '*' });
|
||||
|
||||
// ── API key auth (all routes except GET /health) ─────────────────────────
|
||||
if (API_KEY) {
|
||||
fastify.addHook('onRequest', async (request, reply) => {
|
||||
if (request.url === '/health' || request.method === 'OPTIONS') return;
|
||||
const key = request.headers['x-api-key'];
|
||||
if (key !== API_KEY) {
|
||||
return reply.code(401).send({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
|
||||
}
|
||||
});
|
||||
} else {
|
||||
fastify.log.warn('API_KEY is not set — all endpoints are unauthenticated!');
|
||||
}
|
||||
|
||||
await registerRoutes(fastify);
|
||||
|
||||
// ── Graceful shutdown ────────────────────────────────────────────────────
|
||||
const shutdown = async (signal) => {
|
||||
fastify.log.info(`${signal} received — shutting down gracefully`);
|
||||
await browser.shutdown();
|
||||
await fastify.close();
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
||||
process.on('SIGINT', () => shutdown('SIGINT'));
|
||||
|
||||
await fastify.listen({ port: PORT, host: HOST });
|
||||
fastify.log.info(`muse-browser running on http://${HOST}:${PORT}`);
|
||||
fastify.log.info(`MCP stdio: docker exec -i muse-browser node src/mcp-stdio.js`);
|
||||
}
|
||||
|
||||
start().catch((err) => {
|
||||
console.error('Fatal startup error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user