#!/usr/bin/env bash # test-llm-extraction.sh - Validate LLM extraction functionality # Part of the Comprehensive Vision UAT set -euo pipefail # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" UAT_DIR="$(dirname "$SCRIPT_DIR")" APHORIA_DIR="$(dirname "$UAT_DIR")" STEMEDB_DIR="$(dirname "$(dirname "$APHORIA_DIR")")" # Build Aphoria if needed APHORIA_BIN="${STEMEDB_DIR}/target/release/aphoria" if [[ ! -f "$APHORIA_BIN" ]]; then echo "Building Aphoria..." cargo build --release --package aphoria --manifest-path "${STEMEDB_DIR}/Cargo.toml" fi # Test fixtures directory FIXTURES_DIR="${UAT_DIR}/fixtures/llm" mkdir -p "$FIXTURES_DIR" # LLM fixtures directory (existing) LLM_FIXTURES_DIR="${APHORIA_DIR}/tests/llm_fixtures" PASSED=0 FAILED=0 TOTAL=0 test_case() { local id="$1" local description="$2" TOTAL=$((TOTAL + 1)) echo -e "\n${YELLOW}[$id]${NC} $description" } pass() { PASSED=$((PASSED + 1)) echo -e " ${GREEN}✓ PASS${NC}" } fail() { local reason="$1" FAILED=$((FAILED + 1)) echo -e " ${RED}✗ FAIL: $reason${NC}" } skip() { local reason="$1" PASSED=$((PASSED + 1)) # Count as pass since it's expected behavior echo -e " ${YELLOW}⊘ SKIPPED: $reason${NC}" } # Create test fixtures create_fixtures() { echo "Creating LLM extraction test fixtures..." # High-value file (auth directory) mkdir -p "${FIXTURES_DIR}/auth" cat > "${FIXTURES_DIR}/auth/login.py" << 'EOF' # Authentication module - high value file from flask import Flask, request app = Flask(__name__) def authenticate(username, password): # Simplified auth for testing if username == "admin" and password == "admin123": return True return False EOF # High-value file (crypto directory) mkdir -p "${FIXTURES_DIR}/crypto" cat > "${FIXTURES_DIR}/crypto/encrypt.py" << 'EOF' # Cryptography module - high value file import hashlib def hash_password(password): # BAD: MD5 for password hashing return hashlib.md5(password.encode()).hexdigest() EOF # Non-high-value file (regular code) mkdir -p "${FIXTURES_DIR}/utils" cat > "${FIXTURES_DIR}/utils/helpers.py" << 'EOF' # Utility helpers - not high value def format_date(date): return date.strftime("%Y-%m-%d") def parse_int(value): try: return int(value) except ValueError: return 0 EOF # Project config cat > "${FIXTURES_DIR}/pyproject.toml" << 'EOF' [project] name = "llm-test" version = "0.1.0" EOF } # Test 4.1.1: Mock mode runs without API key test_mock_mode() { test_case "4.1.1" "Mock mode runs without API key" # Unset any API key and run in mock mode local output output=$(GEMINI_API_KEY="" ANTHROPIC_API_KEY="" "$APHORIA_BIN" eval run \ --fixtures "${LLM_FIXTURES_DIR}" \ --mode mock \ --max-fixtures 1 \ 2>&1 || true) # Mock mode should complete without errors about missing API key if echo "$output" | grep -qi 'error.*api.*key\|missing.*key'; then fail "Mock mode should not require API key" echo " Output: $(echo "$output" | head -20)" else pass fi } # Test 4.1.2: Cached mode uses cache test_cached_mode() { test_case "4.1.2" "Cached mode uses cache without API calls" # This test verifies that cached mode doesn't make API calls # We run mock first, then cached should use those (mock) results local output output=$(GEMINI_API_KEY="" ANTHROPIC_API_KEY="" "$APHORIA_BIN" eval run \ --fixtures "${LLM_FIXTURES_DIR}" \ --mode cached \ --max-fixtures 1 \ 2>&1 || true) # Cached mode should complete (it falls back gracefully if no cache) if echo "$output" | grep -qi 'error\|panic'; then fail "Cached mode should not error" echo " Output: $(echo "$output" | head -20)" else pass fi } # Test 4.1.3: High-value file detection test_high_value_detection() { test_case "4.1.3" "High-value files in auth/, crypto/, config/ detected" # The auth and crypto directories should be flagged as high-value # We scan with debug mode to see detection logic local output output=$("$APHORIA_BIN" scan "${FIXTURES_DIR}" --format json --debug 2>&1 || true) # Check that auth/ or crypto/ files are processed (they exist in output) if echo "$output" | grep -qi 'auth\|crypto\|login\.py\|encrypt\.py'; then pass else fail "High-value files should be detected" echo " Output: $(echo "$output" | head -20)" fi } # Test 4.1.4: Non-high-value file handling test_non_high_value_files() { test_case "4.1.4" "Non-high-value files processed efficiently" # Helpers file should still be scanned but with lower priority local output output=$("$APHORIA_BIN" scan "${FIXTURES_DIR}" --format json 2>&1 || true) # The helpers.py should be scanned (exists in a project being scanned) # This is more about ensuring the scan completes if echo "$output" | grep -qi 'conflicts\|clean\|Conflicts\|Scan'; then pass else fail "Regular files should be processed" echo " Output: $(echo "$output" | head -20)" fi } # Test 4.1.5: Token budget tracking test_token_budget() { test_case "4.1.5" "Token budget tracking reported" # Run eval with mock mode and check for metrics output local output output=$(GEMINI_API_KEY="" ANTHROPIC_API_KEY="" "$APHORIA_BIN" eval run \ --fixtures "${LLM_FIXTURES_DIR}" \ --mode mock \ --max-fixtures 2 \ --format json \ 2>&1 || true) # The output should contain some form of metrics/stats if echo "$output" | grep -qi 'precision\|recall\|f1\|metrics\|fixtures\|completed\|finished\|run'; then pass else fail "Token budget or metrics should be reported" echo " Output: $(echo "$output" | head -20)" fi } # Run all tests main() { echo "========================================" echo "Aphoria LLM Extraction UAT" echo "========================================" create_fixtures # Check if LLM fixtures exist if [[ ! -d "$LLM_FIXTURES_DIR" ]]; then echo -e "${YELLOW}Warning: LLM fixtures directory not found at ${LLM_FIXTURES_DIR}${NC}" echo "Some tests may be skipped." fi echo "" echo "Running LLM extraction tests..." echo "(Note: These tests use mock mode - no API key required)" test_mock_mode test_cached_mode test_high_value_detection test_non_high_value_files test_token_budget echo "" echo "========================================" echo "Results: $PASSED/$TOTAL passed, $FAILED failed" echo "========================================" if [[ $FAILED -gt 0 ]]; then exit 1 fi } main "$@"