AutoForgeAI · maswa · Feb 7, 2026
diff --git a/agent.py b/agent.py
@@ -144,6 +144,7 @@ async def run_autonomous_agent(
     agent_type: Optional[str] = None,
     testing_feature_id: Optional[int] = None,
     testing_feature_ids: Optional[list[int]] = None,
+    testing_mode: str = "full",
 ) -> None:
     """
     Run the autonomous agent loop.
@@ -158,6 +159,7 @@ async def run_autonomous_agent(
         agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect)
         testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode)
         testing_feature_ids: For testing agents, list of feature IDs to batch test
+        testing_mode: Testing mode - "full" or "smart"
     """
     print("\n" + "=" * 70)
     print("  AUTONOMOUS CODING AGENT")
@@ -250,7 +252,29 @@ async def run_autonomous_agent(
             agent_id = f"feature-{feature_id}"
         else:
             agent_id = None
-        client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id, agent_type=agent_type)
+
+        # Get feature category for smart testing mode
+        feature_category = None
+        if feature_id and testing_mode == "smart":
+            try:
+                from api.database import Feature, get_session
+                session = get_session(str(project_dir))
+                feature = session.query(Feature).filter(Feature.id == feature_id).first()
+                if feature:
+                    feature_category = feature.category
+                    print(f"   Feature category: {feature_category} (for smart testing)")
+                session.close()
+            except Exception as e:
+                print(f"   Warning: Could not get feature category: {e}")
+
+        client = create_client(
+            project_dir, model,
+            yolo_mode=yolo_mode,
+            agent_id=agent_id,
+            agent_type=agent_type,
+            testing_mode=testing_mode,
+            feature_category=feature_category,
+        )
 
         # Choose prompt based on agent type
         if agent_type == "initializer":

diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py
@@ -186,6 +186,14 @@ def parse_args() -> argparse.Namespace:
         help="Max features per coding agent batch (1-3, default: 3)",
     )
 
+    parser.add_argument(
+        "--testing-mode",
+        type=str,
+        default="full",
+        choices=["full", "smart"],
+        help="Testing mode: full (always Playwright), smart (Playwright for UI only)",
+    )
+
     return parser.parse_args()
 
 
@@ -269,6 +277,7 @@ def main() -> None:
                     agent_type=args.agent_type,
                     testing_feature_id=args.testing_feature_id,
                     testing_feature_ids=testing_feature_ids,
+                    testing_mode=args.testing_mode,
                 )
             )
         else:
@@ -300,6 +309,7 @@ def main() -> None:
                     testing_agent_ratio=args.testing_ratio,
                     testing_batch_size=args.testing_batch_size,
                     batch_size=args.batch_size,
+                    testing_mode=args.testing_mode,
                 )
             )
     except KeyboardInterrupt:

diff --git a/client.py b/client.py
@@ -278,12 +278,45 @@ def get_extra_read_paths() -> list[Path]:
 ]
 
 
+def should_use_playwright(testing_mode: str, feature_category: str | None, yolo_mode: bool) -> bool:
+    """
+    Determine if Playwright tools should be included based on testing mode and feature category.
+
+    Args:
+        testing_mode: Testing mode - "full" or "smart"
+        feature_category: Category of the feature (e.g., "API", "UI", "Database")
+        yolo_mode: Whether YOLO mode is enabled (overrides everything)
+
+    Returns:
+        True if Playwright tools should be included, False otherwise
+    """
+    # YOLO mode always disables Playwright
+    if yolo_mode:
+        return False
+
+    # "smart" mode only uses Playwright for UI features
+    if testing_mode == "smart":
+        if feature_category:
+            category_lower = feature_category.lower()
+            # Exclude for API/backend features
+            api_keywords = ["api", "backend", "database", "db", "server", "endpoint", "service"]
+            if any(kw in category_lower for kw in api_keywords):
+                return False
+        # Default: use Playwright (for UI features or unknown categories)
+        return True
+
+    # "full" mode (default) always uses Playwright
+    return True
+
+
 def create_client(
     project_dir: Path,
     model: str,
     yolo_mode: bool = False,
     agent_id: str | None = None,
     agent_type: str = "coding",
+    testing_mode: str = "full",
+    feature_category: str | None = None,
 ):
     """
     Create a Claude Agent SDK client with multi-layered security.
@@ -296,6 +329,8 @@ def create_client(
                   When provided, each agent gets its own browser profile.
         agent_type: One of "coding", "testing", or "initializer". Controls which
                     MCP tools are exposed and the max_turns limit.
+        testing_mode: Testing mode - "full" (always Playwright), "smart" (UI only)
+        feature_category: Category of the feature being worked on (for smart mode)
 
     Returns:
         Configured ClaudeSDKClient (from claude_agent_sdk)
@@ -327,10 +362,12 @@ def create_client(
     }
     max_turns = max_turns_map.get(agent_type, 300)
 
+    # Determine if Playwright should be used
+    use_playwright = should_use_playwright(testing_mode, feature_category, yolo_mode)
+
     # Build allowed tools list based on mode and agent type.
-    # In YOLO mode, exclude Playwright tools for faster prototyping.
     allowed_tools = [*BUILTIN_TOOLS, *feature_tools]
-    if not yolo_mode:
+    if use_playwright:
         allowed_tools.extend(PLAYWRIGHT_TOOLS)
 
     # Build permissions list.
@@ -363,8 +400,8 @@ def create_client(
         permissions_list.append(f"Glob({path}/**)")
         permissions_list.append(f"Grep({path}/**)")
 
-    if not yolo_mode:
-        # Allow Playwright MCP tools for browser automation (standard mode only)
+    if use_playwright:
+        # Allow Playwright MCP tools for browser automation
         permissions_list.extend(PLAYWRIGHT_TOOLS)
 
     # Create comprehensive security settings
@@ -394,8 +431,11 @@ def create_client(
     if extra_read_paths:
         print(f"   - Extra read paths (validated): {', '.join(str(p) for p in extra_read_paths)}")
     print("   - Bash commands restricted to allowlist (see security.py)")
-    if yolo_mode:
-        print("   - MCP servers: features (database) - YOLO MODE (no Playwright)")
+    if not use_playwright:
+        reason = "YOLO MODE" if yolo_mode else f"testing_mode={testing_mode}"
+        if testing_mode == "smart" and feature_category:
+            reason += f", category={feature_category}"
+        print(f"   - MCP servers: features (database) - NO Playwright ({reason})")
     else:
         print("   - MCP servers: playwright (browser), features (database)")
     print("   - Project settings enabled (skills, commands, CLAUDE.md)")
@@ -421,8 +461,8 @@ def create_client(
             },
         },
     }
-    if not yolo_mode:
-        # Include Playwright MCP server for browser automation (standard mode only)
+    if use_playwright:
+        # Include Playwright MCP server for browser automation
         # Browser and headless mode configurable via environment variables
         browser = get_playwright_browser()
         playwright_args = [

diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py
@@ -155,6 +155,7 @@ def __init__(
         testing_agent_ratio: int = 1,
         testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
         batch_size: int = 3,
+        testing_mode: str = "full",
         on_output: Callable[[int, str], None] | None = None,
         on_status: Callable[[int, str], None] | None = None,
     ):
@@ -170,6 +171,7 @@ def __init__(
                 0 = disabled, 1-3 = maintain that many testing agents running independently.
             testing_batch_size: Number of features to include per testing session (1-5).
                 Each testing agent receives this many features to regression test.
+            testing_mode: Testing mode - full (always Playwright) or smart (UI only)
             on_output: Callback for agent output (feature_id, line)
             on_status: Callback for agent status changes (feature_id, status)
         """
@@ -178,6 +180,7 @@ def __init__(
         self.model = model
         self.yolo_mode = yolo_mode
         self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3)  # Clamp 0-3
+        self.testing_mode = testing_mode
         self.testing_batch_size = min(max(testing_batch_size, 1), 5)  # Clamp 1-5
         self.batch_size = min(max(batch_size, 1), 3)  # Clamp 1-3
         self.on_output = on_output
@@ -828,6 +831,7 @@ def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]:
             "--max-iterations", "1",
             "--agent-type", "coding",
             "--feature-id", str(feature_id),
+            "--testing-mode", self.testing_mode,
         ]
         if self.model:
             cmd.extend(["--model", self.model])
@@ -1651,6 +1655,7 @@ async def run_parallel_orchestrator(
     testing_agent_ratio: int = 1,
     testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
     batch_size: int = 3,
+    testing_mode: str = "full",
 ) -> None:
     """Run the unified orchestrator.
 
@@ -1662,8 +1667,9 @@ async def run_parallel_orchestrator(
         testing_agent_ratio: Number of regression agents to maintain (0-3)
         testing_batch_size: Number of features per testing batch (1-5)
         batch_size: Max features per coding agent batch (1-3)
+        testing_mode: Testing mode - full or smart
     """
-    print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True)
+    print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}, testing_mode={testing_mode}", flush=True)
     orchestrator = ParallelOrchestrator(
         project_dir=project_dir,
         max_concurrency=max_concurrency,
@@ -1672,6 +1678,7 @@ async def run_parallel_orchestrator(
         testing_agent_ratio=testing_agent_ratio,
         testing_batch_size=testing_batch_size,
         batch_size=batch_size,
+        testing_mode=testing_mode,
     )
 
     # Set up cleanup to run on exit (handles normal exit, exceptions)
@@ -1763,6 +1770,13 @@ def main():
         default=3,
         help="Max features per coding agent batch (1-5, default: 3)",
     )
+    parser.add_argument(
+        "--testing-mode",
+        type=str,
+        default="full",
+        choices=["full", "smart"],
+        help="Testing mode: full (always Playwright), smart (Playwright for UI only)",
+    )
 
     args = parser.parse_args()
 
@@ -1791,6 +1805,7 @@ def main():
             testing_agent_ratio=args.testing_agent_ratio,
             testing_batch_size=args.testing_batch_size,
             batch_size=args.batch_size,
+            testing_mode=args.testing_mode,
         ))
     except KeyboardInterrupt:
         print("\n\nInterrupted by user", flush=True)

diff --git a/server/routers/agent.py b/server/routers/agent.py
@@ -17,11 +17,11 @@
 from ..utils.validation import validate_project_name
 
 
-def _get_settings_defaults() -> tuple[bool, str, int, bool, int]:
+def _get_settings_defaults() -> tuple[bool, str, int, bool, int, str]:
     """Get defaults from global settings.
 
     Returns:
-        Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size)
+        Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_mode)
     """
     import sys
     root = Path(__file__).parent.parent.parent
@@ -47,7 +47,10 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int]:
     except (ValueError, TypeError):
         batch_size = 3
 
-    return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size
+    # Get testing mode (full, smart)
+    testing_mode = settings.get("testing_mode", "full")
+
+    return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_mode
 
 
 router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
@@ -84,6 +87,7 @@ async def get_agent_status(project_name: str):
         parallel_mode=manager.parallel_mode,
         max_concurrency=manager.max_concurrency,
         testing_agent_ratio=manager.testing_agent_ratio,
+        testing_mode=getattr(manager, 'testing_mode', 'full'),
     )
 
 
@@ -96,14 +100,15 @@ async def start_agent(
     manager = get_project_manager(project_name)
 
     # Get defaults from global settings if not provided in request
-    default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size = _get_settings_defaults()
+    default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size, default_testing_mode = _get_settings_defaults()
 
     yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
     model = request.model if request.model else default_model
     max_concurrency = request.max_concurrency or 1
     testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio
 
     batch_size = default_batch_size
+    testing_mode = request.testing_mode if request.testing_mode else default_testing_mode
 
     success, message = await manager.start(
         yolo_mode=yolo_mode,
@@ -112,6 +117,7 @@ async def start_agent(
         testing_agent_ratio=testing_agent_ratio,
         playwright_headless=playwright_headless,
         batch_size=batch_size,
+        testing_mode=testing_mode,
     )
 
     # Notify scheduler of manual start (to prevent auto-stop during scheduled window)

diff --git a/server/routers/settings.py b/server/routers/settings.py
@@ -111,6 +111,7 @@ async def get_settings():
         glm_mode=glm_mode,
         ollama_mode=ollama_mode,
         testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
+        testing_mode=all_settings.get("testing_mode", "full"),
         playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
         batch_size=_parse_int(all_settings.get("batch_size"), 3),
         api_provider=api_provider,
@@ -138,6 +139,9 @@ async def update_settings(update: SettingsUpdate):
     if update.batch_size is not None:
         set_setting("batch_size", str(update.batch_size))
 
+    if update.testing_mode is not None:
+        set_setting("testing_mode", update.testing_mode)
+
     # API provider settings
     if update.api_provider is not None:
         old_provider = get_setting("api_provider", "claude")
@@ -175,6 +179,7 @@ async def update_settings(update: SettingsUpdate):
         glm_mode=glm_mode,
         ollama_mode=ollama_mode,
         testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
+        testing_mode=all_settings.get("testing_mode", "full"),
         playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
         batch_size=_parse_int(all_settings.get("batch_size"), 3),
         api_provider=api_provider,