Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ async def run_autonomous_agent(
agent_type: Optional[str] = None,
testing_feature_id: Optional[int] = None,
testing_feature_ids: Optional[list[int]] = None,
testing_mode: str = "full",
) -> None:
"""
Run the autonomous agent loop.
Expand All @@ -158,6 +159,7 @@ async def run_autonomous_agent(
agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect)
testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode)
testing_feature_ids: For testing agents, list of feature IDs to batch test
testing_mode: Testing mode - "full" or "smart"
"""
print("\n" + "=" * 70)
print(" AUTONOMOUS CODING AGENT")
Expand Down Expand Up @@ -250,7 +252,29 @@ async def run_autonomous_agent(
agent_id = f"feature-{feature_id}"
else:
agent_id = None
client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id, agent_type=agent_type)

# Get feature category for smart testing mode
feature_category = None
if feature_id and testing_mode == "smart":
try:
from api.database import Feature, get_session
session = get_session(str(project_dir))
feature = session.query(Feature).filter(Feature.id == feature_id).first()
if feature:
feature_category = feature.category
print(f" Feature category: {feature_category} (for smart testing)")
session.close()
except Exception as e:
print(f" Warning: Could not get feature category: {e}")

client = create_client(
project_dir, model,
yolo_mode=yolo_mode,
agent_id=agent_id,
agent_type=agent_type,
testing_mode=testing_mode,
feature_category=feature_category,
)

# Choose prompt based on agent type
if agent_type == "initializer":
Expand Down
10 changes: 10 additions & 0 deletions autonomous_agent_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ def parse_args() -> argparse.Namespace:
help="Max features per coding agent batch (1-3, default: 3)",
)

parser.add_argument(
"--testing-mode",
type=str,
default="full",
choices=["full", "smart"],
help="Testing mode: full (always Playwright), smart (Playwright for UI only)",
)

return parser.parse_args()


Expand Down Expand Up @@ -269,6 +277,7 @@ def main() -> None:
agent_type=args.agent_type,
testing_feature_id=args.testing_feature_id,
testing_feature_ids=testing_feature_ids,
testing_mode=args.testing_mode,
)
)
else:
Expand Down Expand Up @@ -300,6 +309,7 @@ def main() -> None:
testing_agent_ratio=args.testing_ratio,
testing_batch_size=args.testing_batch_size,
batch_size=args.batch_size,
testing_mode=args.testing_mode,
)
)
except KeyboardInterrupt:
Expand Down
56 changes: 48 additions & 8 deletions client.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,45 @@ def get_extra_read_paths() -> list[Path]:
]


def should_use_playwright(testing_mode: str, feature_category: str | None, yolo_mode: bool) -> bool:
"""
Determine if Playwright tools should be included based on testing mode and feature category.

Args:
testing_mode: Testing mode - "full" or "smart"
feature_category: Category of the feature (e.g., "API", "UI", "Database")
yolo_mode: Whether YOLO mode is enabled (overrides everything)

Returns:
True if Playwright tools should be included, False otherwise
"""
# YOLO mode always disables Playwright
if yolo_mode:
return False

# "smart" mode only uses Playwright for UI features
if testing_mode == "smart":
if feature_category:
category_lower = feature_category.lower()
# Exclude for API/backend features
api_keywords = ["api", "backend", "database", "db", "server", "endpoint", "service"]
if any(kw in category_lower for kw in api_keywords):
return False
# Default: use Playwright (for UI features or unknown categories)
return True

# "full" mode (default) always uses Playwright
return True


def create_client(
project_dir: Path,
model: str,
yolo_mode: bool = False,
agent_id: str | None = None,
agent_type: str = "coding",
testing_mode: str = "full",
feature_category: str | None = None,
):
"""
Create a Claude Agent SDK client with multi-layered security.
Expand All @@ -296,6 +329,8 @@ def create_client(
When provided, each agent gets its own browser profile.
agent_type: One of "coding", "testing", or "initializer". Controls which
MCP tools are exposed and the max_turns limit.
testing_mode: Testing mode - "full" (always Playwright), "smart" (UI only)
feature_category: Category of the feature being worked on (for smart mode)

Returns:
Configured ClaudeSDKClient (from claude_agent_sdk)
Expand Down Expand Up @@ -327,10 +362,12 @@ def create_client(
}
max_turns = max_turns_map.get(agent_type, 300)

# Determine if Playwright should be used
use_playwright = should_use_playwright(testing_mode, feature_category, yolo_mode)

# Build allowed tools list based on mode and agent type.
# In YOLO mode, exclude Playwright tools for faster prototyping.
allowed_tools = [*BUILTIN_TOOLS, *feature_tools]
if not yolo_mode:
if use_playwright:
allowed_tools.extend(PLAYWRIGHT_TOOLS)

# Build permissions list.
Expand Down Expand Up @@ -363,8 +400,8 @@ def create_client(
permissions_list.append(f"Glob({path}/**)")
permissions_list.append(f"Grep({path}/**)")

if not yolo_mode:
# Allow Playwright MCP tools for browser automation (standard mode only)
if use_playwright:
# Allow Playwright MCP tools for browser automation
permissions_list.extend(PLAYWRIGHT_TOOLS)

# Create comprehensive security settings
Expand Down Expand Up @@ -394,8 +431,11 @@ def create_client(
if extra_read_paths:
print(f" - Extra read paths (validated): {', '.join(str(p) for p in extra_read_paths)}")
print(" - Bash commands restricted to allowlist (see security.py)")
if yolo_mode:
print(" - MCP servers: features (database) - YOLO MODE (no Playwright)")
if not use_playwright:
reason = "YOLO MODE" if yolo_mode else f"testing_mode={testing_mode}"
if testing_mode == "smart" and feature_category:
reason += f", category={feature_category}"
print(f" - MCP servers: features (database) - NO Playwright ({reason})")
else:
print(" - MCP servers: playwright (browser), features (database)")
print(" - Project settings enabled (skills, commands, CLAUDE.md)")
Expand All @@ -421,8 +461,8 @@ def create_client(
},
},
}
if not yolo_mode:
# Include Playwright MCP server for browser automation (standard mode only)
if use_playwright:
# Include Playwright MCP server for browser automation
# Browser and headless mode configurable via environment variables
browser = get_playwright_browser()
playwright_args = [
Expand Down
17 changes: 16 additions & 1 deletion parallel_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def __init__(
testing_agent_ratio: int = 1,
testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
batch_size: int = 3,
testing_mode: str = "full",
on_output: Callable[[int, str], None] | None = None,
on_status: Callable[[int, str], None] | None = None,
):
Expand All @@ -170,6 +171,7 @@ def __init__(
0 = disabled, 1-3 = maintain that many testing agents running independently.
testing_batch_size: Number of features to include per testing session (1-5).
Each testing agent receives this many features to regression test.
testing_mode: Testing mode - full (always Playwright) or smart (UI only)
on_output: Callback for agent output (feature_id, line)
on_status: Callback for agent status changes (feature_id, status)
"""
Expand All @@ -178,6 +180,7 @@ def __init__(
self.model = model
self.yolo_mode = yolo_mode
self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3
self.testing_mode = testing_mode
self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5
self.batch_size = min(max(batch_size, 1), 3) # Clamp 1-3
self.on_output = on_output
Expand Down Expand Up @@ -828,6 +831,7 @@ def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]:
"--max-iterations", "1",
"--agent-type", "coding",
"--feature-id", str(feature_id),
"--testing-mode", self.testing_mode,
]
if self.model:
cmd.extend(["--model", self.model])
Expand Down Expand Up @@ -1651,6 +1655,7 @@ async def run_parallel_orchestrator(
testing_agent_ratio: int = 1,
testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
batch_size: int = 3,
testing_mode: str = "full",
) -> None:
"""Run the unified orchestrator.

Expand All @@ -1662,8 +1667,9 @@ async def run_parallel_orchestrator(
testing_agent_ratio: Number of regression agents to maintain (0-3)
testing_batch_size: Number of features per testing batch (1-5)
batch_size: Max features per coding agent batch (1-3)
testing_mode: Testing mode - full or smart
"""
print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True)
print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}, testing_mode={testing_mode}", flush=True)
orchestrator = ParallelOrchestrator(
project_dir=project_dir,
max_concurrency=max_concurrency,
Expand All @@ -1672,6 +1678,7 @@ async def run_parallel_orchestrator(
testing_agent_ratio=testing_agent_ratio,
testing_batch_size=testing_batch_size,
batch_size=batch_size,
testing_mode=testing_mode,
)

# Set up cleanup to run on exit (handles normal exit, exceptions)
Expand Down Expand Up @@ -1763,6 +1770,13 @@ def main():
default=3,
help="Max features per coding agent batch (1-5, default: 3)",
)
parser.add_argument(
"--testing-mode",
type=str,
default="full",
choices=["full", "smart"],
help="Testing mode: full (always Playwright), smart (Playwright for UI only)",
)

args = parser.parse_args()

Expand Down Expand Up @@ -1791,6 +1805,7 @@ def main():
testing_agent_ratio=args.testing_agent_ratio,
testing_batch_size=args.testing_batch_size,
batch_size=args.batch_size,
testing_mode=args.testing_mode,
))
except KeyboardInterrupt:
print("\n\nInterrupted by user", flush=True)
Expand Down
14 changes: 10 additions & 4 deletions server/routers/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
from ..utils.validation import validate_project_name


def _get_settings_defaults() -> tuple[bool, str, int, bool, int]:
def _get_settings_defaults() -> tuple[bool, str, int, bool, int, str]:
"""Get defaults from global settings.

Returns:
Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size)
Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_mode)
"""
import sys
root = Path(__file__).parent.parent.parent
Expand All @@ -47,7 +47,10 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int]:
except (ValueError, TypeError):
batch_size = 3

return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size
# Get testing mode (full, smart)
testing_mode = settings.get("testing_mode", "full")

return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_mode


router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
Expand Down Expand Up @@ -84,6 +87,7 @@ async def get_agent_status(project_name: str):
parallel_mode=manager.parallel_mode,
max_concurrency=manager.max_concurrency,
testing_agent_ratio=manager.testing_agent_ratio,
testing_mode=getattr(manager, 'testing_mode', 'full'),
)


Expand All @@ -96,14 +100,15 @@ async def start_agent(
manager = get_project_manager(project_name)

# Get defaults from global settings if not provided in request
default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size = _get_settings_defaults()
default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size, default_testing_mode = _get_settings_defaults()

yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
model = request.model if request.model else default_model
max_concurrency = request.max_concurrency or 1
testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio

batch_size = default_batch_size
testing_mode = request.testing_mode if request.testing_mode else default_testing_mode

success, message = await manager.start(
yolo_mode=yolo_mode,
Expand All @@ -112,6 +117,7 @@ async def start_agent(
testing_agent_ratio=testing_agent_ratio,
playwright_headless=playwright_headless,
batch_size=batch_size,
testing_mode=testing_mode,
)

# Notify scheduler of manual start (to prevent auto-stop during scheduled window)
Expand Down
5 changes: 5 additions & 0 deletions server/routers/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ async def get_settings():
glm_mode=glm_mode,
ollama_mode=ollama_mode,
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
testing_mode=all_settings.get("testing_mode", "full"),
playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
batch_size=_parse_int(all_settings.get("batch_size"), 3),
api_provider=api_provider,
Expand Down Expand Up @@ -138,6 +139,9 @@ async def update_settings(update: SettingsUpdate):
if update.batch_size is not None:
set_setting("batch_size", str(update.batch_size))

if update.testing_mode is not None:
set_setting("testing_mode", update.testing_mode)

# API provider settings
if update.api_provider is not None:
old_provider = get_setting("api_provider", "claude")
Expand Down Expand Up @@ -175,6 +179,7 @@ async def update_settings(update: SettingsUpdate):
glm_mode=glm_mode,
ollama_mode=ollama_mode,
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
testing_mode=all_settings.get("testing_mode", "full"),
playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
batch_size=_parse_int(all_settings.get("batch_size"), 3),
api_provider=api_provider,
Expand Down
Loading