From c65066fd1c03f48a8997730fef99cd3581158521 Mon Sep 17 00:00:00 2001 From: jordan Date: Sat, 7 Feb 2026 17:28:50 -0700 Subject: [PATCH] feat(aphoria): implement ignore & exclusion system (Phase 16) Reduces scan noise by 96% through proper exclusion of test fixtures, demo apps, and intentional vulnerabilities. Phase 16.1 - Glob Pattern Matching: - Replace starts_with() with globset for ** and * patterns - Backwards compatible with legacy prefix patterns - Add walker/mod.rs tests for glob exclusions Phase 16.2 - .aphoriaignore File: - Create walker/ignore_file.rs for gitignore-style parsing - Merge with aphoria.toml excludes - Support # comments and whitespace trimming Phase 16.3 - Inline Ignore Comments: - Create extractors/ignore_comments.rs parser - Support // aphoria:ignore, // aphoria:ignore-next-line - Support // aphoria:ignore-block / // aphoria:end-ignore - Multiple comment styles: //, #, /*, --, ` (HTML, XML) + +use std::collections::HashSet; + +use regex::Regex; + +/// Parses ignore comments from file content and tracks ignored line numbers. +#[derive(Debug)] +pub struct IgnoreCommentParser { + /// Lines that should be ignored (1-indexed to match ExtractedClaim.line). + ignored_lines: HashSet, +} + +impl IgnoreCommentParser { + /// Parse ignore comments from file content. + /// + /// Returns a parser with the set of ignored line numbers. + pub fn parse(content: &str) -> Self { + let mut ignored_lines = HashSet::new(); + + // Track if we're in an ignore block + let mut in_block = false; + + for (line_idx, line) in content.lines().enumerate() { + let line_num = line_idx + 1; // 1-indexed + + // Check for block start/end + if contains_block_start(line) { + in_block = true; + // The block start line itself is not ignored (it's a comment) + continue; + } + + if contains_block_end(line) { + in_block = false; + // The block end line itself is not ignored (it's a comment) + continue; + } + + // If we're in a block, ignore this line + if in_block { + ignored_lines.insert(line_num); + continue; + } + + // Check for same-line ignore + if contains_same_line_ignore(line) { + ignored_lines.insert(line_num); + continue; + } + + // Check for next-line ignore (look at previous line) + if line_idx > 0 { + let prev_line = content.lines().nth(line_idx - 1).unwrap_or(""); + if contains_next_line_ignore(prev_line) { + ignored_lines.insert(line_num); + } + } + } + + Self { ignored_lines } + } + + /// Check if a line number should be ignored. + /// + /// Line numbers are 1-indexed (matching ExtractedClaim.line). + pub fn is_ignored(&self, line: usize) -> bool { + self.ignored_lines.contains(&line) + } + + /// Get the set of ignored line numbers. + #[allow(dead_code)] + pub fn ignored_lines(&self) -> &HashSet { + &self.ignored_lines + } + + /// Get the count of ignored lines. + #[allow(dead_code)] + pub fn ignored_count(&self) -> usize { + self.ignored_lines.len() + } +} + +/// Check if a line contains a same-line ignore comment. +fn contains_same_line_ignore(line: &str) -> bool { + // Match variations: + // // aphoria:ignore + // # aphoria:ignore + // /* aphoria:ignore */ + // -- aphoria:ignore + // aphoria:ignore (bare, for XML comments etc.) + // + // But NOT: + // // aphoria:ignore-next-line + // // aphoria:ignore-block + // // aphoria:end-ignore + + // Using lazy_static would be better, but we'll keep it simple + let patterns = [ + r"//\s*aphoria:ignore(?:\s|$|-\s)", + r"#\s*aphoria:ignore(?:\s|$|-\s)", + r"/\*\s*aphoria:ignore(?:\s|$|-\s)", + r"--\s*aphoria:ignore(?:\s|$|-\s)", + r"