diff --git a/agent.py b/agent.py index a3daaf88..c70dab0e 100644 --- a/agent.py +++ b/agent.py @@ -144,6 +144,7 @@ async def run_autonomous_agent( agent_type: Optional[str] = None, testing_feature_id: Optional[int] = None, testing_feature_ids: Optional[list[int]] = None, + testing_mode: str = "full", ) -> None: """ Run the autonomous agent loop. @@ -158,6 +159,7 @@ async def run_autonomous_agent( agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect) testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode) testing_feature_ids: For testing agents, list of feature IDs to batch test + testing_mode: Testing mode - "full" or "smart" """ print("\n" + "=" * 70) print(" AUTONOMOUS CODING AGENT") @@ -250,7 +252,29 @@ async def run_autonomous_agent( agent_id = f"feature-{feature_id}" else: agent_id = None - client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id, agent_type=agent_type) + + # Get feature category for smart testing mode + feature_category = None + if feature_id and testing_mode == "smart": + try: + from api.database import Feature, get_session + session = get_session(str(project_dir)) + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if feature: + feature_category = feature.category + print(f" Feature category: {feature_category} (for smart testing)") + session.close() + except Exception as e: + print(f" Warning: Could not get feature category: {e}") + + client = create_client( + project_dir, model, + yolo_mode=yolo_mode, + agent_id=agent_id, + agent_type=agent_type, + testing_mode=testing_mode, + feature_category=feature_category, + ) # Choose prompt based on agent type if agent_type == "initializer": diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py index 918b2c1b..29d8d83d 100644 --- a/autonomous_agent_demo.py +++ b/autonomous_agent_demo.py @@ -186,6 +186,14 @@ def parse_args() -> argparse.Namespace: help="Max features per coding agent batch (1-3, default: 3)", ) + parser.add_argument( + "--testing-mode", + type=str, + default="full", + choices=["full", "smart"], + help="Testing mode: full (always Playwright), smart (Playwright for UI only)", + ) + return parser.parse_args() @@ -269,6 +277,7 @@ def main() -> None: agent_type=args.agent_type, testing_feature_id=args.testing_feature_id, testing_feature_ids=testing_feature_ids, + testing_mode=args.testing_mode, ) ) else: @@ -300,6 +309,7 @@ def main() -> None: testing_agent_ratio=args.testing_ratio, testing_batch_size=args.testing_batch_size, batch_size=args.batch_size, + testing_mode=args.testing_mode, ) ) except KeyboardInterrupt: diff --git a/client.py b/client.py index a81a66db..cfe5af29 100644 --- a/client.py +++ b/client.py @@ -278,12 +278,45 @@ def get_extra_read_paths() -> list[Path]: ] +def should_use_playwright(testing_mode: str, feature_category: str | None, yolo_mode: bool) -> bool: + """ + Determine if Playwright tools should be included based on testing mode and feature category. + + Args: + testing_mode: Testing mode - "full" or "smart" + feature_category: Category of the feature (e.g., "API", "UI", "Database") + yolo_mode: Whether YOLO mode is enabled (overrides everything) + + Returns: + True if Playwright tools should be included, False otherwise + """ + # YOLO mode always disables Playwright + if yolo_mode: + return False + + # "smart" mode only uses Playwright for UI features + if testing_mode == "smart": + if feature_category: + category_lower = feature_category.lower() + # Exclude for API/backend features + api_keywords = ["api", "backend", "database", "db", "server", "endpoint", "service"] + if any(kw in category_lower for kw in api_keywords): + return False + # Default: use Playwright (for UI features or unknown categories) + return True + + # "full" mode (default) always uses Playwright + return True + + def create_client( project_dir: Path, model: str, yolo_mode: bool = False, agent_id: str | None = None, agent_type: str = "coding", + testing_mode: str = "full", + feature_category: str | None = None, ): """ Create a Claude Agent SDK client with multi-layered security. @@ -296,6 +329,8 @@ def create_client( When provided, each agent gets its own browser profile. agent_type: One of "coding", "testing", or "initializer". Controls which MCP tools are exposed and the max_turns limit. + testing_mode: Testing mode - "full" (always Playwright), "smart" (UI only) + feature_category: Category of the feature being worked on (for smart mode) Returns: Configured ClaudeSDKClient (from claude_agent_sdk) @@ -327,10 +362,12 @@ def create_client( } max_turns = max_turns_map.get(agent_type, 300) + # Determine if Playwright should be used + use_playwright = should_use_playwright(testing_mode, feature_category, yolo_mode) + # Build allowed tools list based on mode and agent type. - # In YOLO mode, exclude Playwright tools for faster prototyping. allowed_tools = [*BUILTIN_TOOLS, *feature_tools] - if not yolo_mode: + if use_playwright: allowed_tools.extend(PLAYWRIGHT_TOOLS) # Build permissions list. @@ -363,8 +400,8 @@ def create_client( permissions_list.append(f"Glob({path}/**)") permissions_list.append(f"Grep({path}/**)") - if not yolo_mode: - # Allow Playwright MCP tools for browser automation (standard mode only) + if use_playwright: + # Allow Playwright MCP tools for browser automation permissions_list.extend(PLAYWRIGHT_TOOLS) # Create comprehensive security settings @@ -394,8 +431,11 @@ def create_client( if extra_read_paths: print(f" - Extra read paths (validated): {', '.join(str(p) for p in extra_read_paths)}") print(" - Bash commands restricted to allowlist (see security.py)") - if yolo_mode: - print(" - MCP servers: features (database) - YOLO MODE (no Playwright)") + if not use_playwright: + reason = "YOLO MODE" if yolo_mode else f"testing_mode={testing_mode}" + if testing_mode == "smart" and feature_category: + reason += f", category={feature_category}" + print(f" - MCP servers: features (database) - NO Playwright ({reason})") else: print(" - MCP servers: playwright (browser), features (database)") print(" - Project settings enabled (skills, commands, CLAUDE.md)") @@ -421,8 +461,8 @@ def create_client( }, }, } - if not yolo_mode: - # Include Playwright MCP server for browser automation (standard mode only) + if use_playwright: + # Include Playwright MCP server for browser automation # Browser and headless mode configurable via environment variables browser = get_playwright_browser() playwright_args = [ diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py index 856e33cb..13663230 100644 --- a/parallel_orchestrator.py +++ b/parallel_orchestrator.py @@ -155,6 +155,7 @@ def __init__( testing_agent_ratio: int = 1, testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, batch_size: int = 3, + testing_mode: str = "full", on_output: Callable[[int, str], None] | None = None, on_status: Callable[[int, str], None] | None = None, ): @@ -170,6 +171,7 @@ def __init__( 0 = disabled, 1-3 = maintain that many testing agents running independently. testing_batch_size: Number of features to include per testing session (1-5). Each testing agent receives this many features to regression test. + testing_mode: Testing mode - full (always Playwright) or smart (UI only) on_output: Callback for agent output (feature_id, line) on_status: Callback for agent status changes (feature_id, status) """ @@ -178,6 +180,7 @@ def __init__( self.model = model self.yolo_mode = yolo_mode self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3 + self.testing_mode = testing_mode self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5 self.batch_size = min(max(batch_size, 1), 3) # Clamp 1-3 self.on_output = on_output @@ -828,6 +831,7 @@ def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]: "--max-iterations", "1", "--agent-type", "coding", "--feature-id", str(feature_id), + "--testing-mode", self.testing_mode, ] if self.model: cmd.extend(["--model", self.model]) @@ -1651,6 +1655,7 @@ async def run_parallel_orchestrator( testing_agent_ratio: int = 1, testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, batch_size: int = 3, + testing_mode: str = "full", ) -> None: """Run the unified orchestrator. @@ -1662,8 +1667,9 @@ async def run_parallel_orchestrator( testing_agent_ratio: Number of regression agents to maintain (0-3) testing_batch_size: Number of features per testing batch (1-5) batch_size: Max features per coding agent batch (1-3) + testing_mode: Testing mode - full or smart """ - print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True) + print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}, testing_mode={testing_mode}", flush=True) orchestrator = ParallelOrchestrator( project_dir=project_dir, max_concurrency=max_concurrency, @@ -1672,6 +1678,7 @@ async def run_parallel_orchestrator( testing_agent_ratio=testing_agent_ratio, testing_batch_size=testing_batch_size, batch_size=batch_size, + testing_mode=testing_mode, ) # Set up cleanup to run on exit (handles normal exit, exceptions) @@ -1763,6 +1770,13 @@ def main(): default=3, help="Max features per coding agent batch (1-5, default: 3)", ) + parser.add_argument( + "--testing-mode", + type=str, + default="full", + choices=["full", "smart"], + help="Testing mode: full (always Playwright), smart (Playwright for UI only)", + ) args = parser.parse_args() @@ -1791,6 +1805,7 @@ def main(): testing_agent_ratio=args.testing_agent_ratio, testing_batch_size=args.testing_batch_size, batch_size=args.batch_size, + testing_mode=args.testing_mode, )) except KeyboardInterrupt: print("\n\nInterrupted by user", flush=True) diff --git a/server/routers/agent.py b/server/routers/agent.py index 26605e4b..039f6e67 100644 --- a/server/routers/agent.py +++ b/server/routers/agent.py @@ -17,11 +17,11 @@ from ..utils.validation import validate_project_name -def _get_settings_defaults() -> tuple[bool, str, int, bool, int]: +def _get_settings_defaults() -> tuple[bool, str, int, bool, int, str]: """Get defaults from global settings. Returns: - Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size) + Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_mode) """ import sys root = Path(__file__).parent.parent.parent @@ -47,7 +47,10 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int]: except (ValueError, TypeError): batch_size = 3 - return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size + # Get testing mode (full, smart) + testing_mode = settings.get("testing_mode", "full") + + return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_mode router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"]) @@ -84,6 +87,7 @@ async def get_agent_status(project_name: str): parallel_mode=manager.parallel_mode, max_concurrency=manager.max_concurrency, testing_agent_ratio=manager.testing_agent_ratio, + testing_mode=getattr(manager, 'testing_mode', 'full'), ) @@ -96,7 +100,7 @@ async def start_agent( manager = get_project_manager(project_name) # Get defaults from global settings if not provided in request - default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size = _get_settings_defaults() + default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size, default_testing_mode = _get_settings_defaults() yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo model = request.model if request.model else default_model @@ -104,6 +108,7 @@ async def start_agent( testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio batch_size = default_batch_size + testing_mode = request.testing_mode if request.testing_mode else default_testing_mode success, message = await manager.start( yolo_mode=yolo_mode, @@ -112,6 +117,7 @@ async def start_agent( testing_agent_ratio=testing_agent_ratio, playwright_headless=playwright_headless, batch_size=batch_size, + testing_mode=testing_mode, ) # Notify scheduler of manual start (to prevent auto-stop during scheduled window) diff --git a/server/routers/settings.py b/server/routers/settings.py index 6137c63c..e95078db 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -111,6 +111,7 @@ async def get_settings(): glm_mode=glm_mode, ollama_mode=ollama_mode, testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + testing_mode=all_settings.get("testing_mode", "full"), playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), batch_size=_parse_int(all_settings.get("batch_size"), 3), api_provider=api_provider, @@ -138,6 +139,9 @@ async def update_settings(update: SettingsUpdate): if update.batch_size is not None: set_setting("batch_size", str(update.batch_size)) + if update.testing_mode is not None: + set_setting("testing_mode", update.testing_mode) + # API provider settings if update.api_provider is not None: old_provider = get_setting("api_provider", "claude") @@ -175,6 +179,7 @@ async def update_settings(update: SettingsUpdate): glm_mode=glm_mode, ollama_mode=ollama_mode, testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + testing_mode=all_settings.get("testing_mode", "full"), playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), batch_size=_parse_int(all_settings.get("batch_size"), 3), api_provider=api_provider, diff --git a/server/schemas.py b/server/schemas.py index 5f546e2b..c6945f0c 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -20,6 +20,12 @@ from registry import DEFAULT_MODEL, VALID_MODELS +# Valid testing modes for Playwright browser control +# - "full": Always use Playwright for all features +# - "smart": Use Playwright only for UI features (skip for API features) +VALID_TESTING_MODES = {"full", "smart"} +DEFAULT_TESTING_MODE = "full" + # ============================================================================ # Project Schemas # ============================================================================ @@ -186,6 +192,15 @@ class AgentStartRequest(BaseModel): parallel_mode: bool | None = None # DEPRECATED: Use max_concurrency instead max_concurrency: int | None = None # Max concurrent coding agents (1-5) testing_agent_ratio: int | None = None # Regression testing agents (0-3) + testing_mode: str | None = None # Testing mode: full, smart + + @field_validator('testing_mode') + @classmethod + def validate_testing_mode(cls, v: str | None) -> str | None: + """Validate testing_mode is in the allowed list.""" + if v is not None and v not in VALID_TESTING_MODES: + raise ValueError(f"Invalid testing_mode. Must be one of: {VALID_TESTING_MODES}") + return v @field_validator('model') @classmethod @@ -225,6 +240,7 @@ class AgentStatus(BaseModel): parallel_mode: bool = False # DEPRECATED: Always True now (unified orchestrator) max_concurrency: int | None = None testing_agent_ratio: int = 1 # Regression testing agents (0-3) + testing_mode: str = "full" # Testing mode: full, smart class AgentActionResponse(BaseModel): @@ -417,6 +433,7 @@ class SettingsResponse(BaseModel): glm_mode: bool = False # True when api_provider is "glm" ollama_mode: bool = False # True when api_provider is "ollama" testing_agent_ratio: int = 1 # Regression testing agents (0-3) + testing_mode: str = DEFAULT_TESTING_MODE # Testing mode: full, smart playwright_headless: bool = True batch_size: int = 3 # Features per coding agent batch (1-3) api_provider: str = "claude" @@ -436,6 +453,7 @@ class SettingsUpdate(BaseModel): yolo_mode: bool | None = None model: str | None = None testing_agent_ratio: int | None = None # 0-3 + testing_mode: str | None = None # full, smart playwright_headless: bool | None = None batch_size: int | None = None # Features per agent batch (1-3) api_provider: str | None = None @@ -464,6 +482,13 @@ def validate_model(cls, v: str | None, info) -> str | None: # type: ignore[over raise ValueError(f"Invalid model. Must be one of: {VALID_MODELS}") return v + @field_validator('testing_mode') + @classmethod + def validate_testing_mode(cls, v: str | None) -> str | None: + if v is not None and v not in VALID_TESTING_MODES: + raise ValueError(f"Invalid testing_mode. Must be one of: {VALID_TESTING_MODES}") + return v + @field_validator('testing_agent_ratio') @classmethod def validate_testing_ratio(cls, v: int | None) -> int | None: diff --git a/server/services/process_manager.py b/server/services/process_manager.py index d38d9001..e0740111 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -85,6 +85,7 @@ def __init__( self.parallel_mode: bool = False # Parallel execution mode self.max_concurrency: int | None = None # Max concurrent agents self.testing_agent_ratio: int = 1 # Regression testing agents (0-3) + self.testing_mode: str = "full" # Testing mode: full, smart # Support multiple callbacks (for multiple WebSocket clients) self._output_callbacks: Set[Callable[[str], Awaitable[None]]] = set() @@ -340,6 +341,7 @@ async def start( testing_agent_ratio: int = 1, playwright_headless: bool = True, batch_size: int = 3, + testing_mode: str = "full", ) -> tuple[bool, str]: """ Start the agent as a subprocess. @@ -351,6 +353,7 @@ async def start( max_concurrency: Max concurrent coding agents (1-5, default 1) testing_agent_ratio: Number of regression testing agents (0-3, default 1) playwright_headless: If True, run browser in headless mode + testing_mode: Testing mode (full, smart) Returns: Tuple of (success, message) @@ -370,6 +373,7 @@ async def start( self.parallel_mode = True # Always True now (unified orchestrator) self.max_concurrency = max_concurrency or 1 self.testing_agent_ratio = testing_agent_ratio + self.testing_mode = testing_mode # Build command - unified orchestrator with --concurrency cmd = [ @@ -397,6 +401,9 @@ async def start( # Add --batch-size flag for multi-feature batching cmd.extend(["--batch-size", str(batch_size)]) + # Add testing mode configuration + cmd.extend(["--testing-mode", testing_mode]) + try: # Start subprocess with piped stdout/stderr # Use project_dir as cwd so Claude SDK sandbox allows access to project files @@ -489,6 +496,7 @@ async def stop(self) -> tuple[bool, str]: self.parallel_mode = False # Reset parallel mode self.max_concurrency = None # Reset concurrency self.testing_agent_ratio = 1 # Reset testing ratio + self.testing_mode = "full" # Reset testing mode return True, "Agent stopped" except Exception as e: @@ -575,6 +583,7 @@ def get_status_dict(self) -> dict: "parallel_mode": self.parallel_mode, "max_concurrency": self.max_concurrency, "testing_agent_ratio": self.testing_agent_ratio, + "testing_mode": self.testing_mode, } diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx index 0a2b9eec..195e5ebb 100644 --- a/ui/src/components/SettingsModal.tsx +++ b/ui/src/components/SettingsModal.tsx @@ -57,6 +57,12 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { } } + const handleTestingModeChange = (mode: string) => { + if (!updateSettings.isPending) { + updateSettings.mutate({ testing_mode: mode }) + } + } + const handleBatchSizeChange = (size: number) => { if (!updateSettings.isPending) { updateSettings.mutate({ batch_size: size }) @@ -373,6 +379,35 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { /> + {/* Browser Testing Mode */} +
+ {(settings.testing_mode || 'full') === 'smart' ? 'UI features only' : 'All features'} +
+