logcheck_fluent_bit_filter/
rules.rs

1use crate::regex_conversion;
2use regex::{Regex, RegexSet};
3use serde;
4use std::collections::HashMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7
8#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
9pub enum RuleCategory {
10    Cracking,
11    CrackingIgnore,
12    Violations,
13    ViolationsIgnore,
14    SystemEvents,
15    Workstation,
16    Server,
17    Local,
18}
19
20/// A rule set that can be split into multiple chunks to avoid regex size limits
21#[allow(dead_code)]
22#[derive(Debug, Clone)]
23pub struct RuleSet {
24    pub category: RuleCategory,
25    pub patterns: Vec<String>,
26    pub compiled: Option<CompiledRules>,
27    pub source_files: Vec<PathBuf>,
28}
29
30/// Compiled regex rules that may be split into multiple chunks
31#[derive(Debug, Clone)]
32pub enum CompiledRules {
33    /// Single RegexSet (optimized, when all patterns fit)
34    Single(RegexSet),
35    /// Multiple RegexSets (used when patterns exceed size limit)
36    Chunked(Vec<RegexSet>),
37}
38
39#[derive(Debug, Clone)]
40pub struct LogcheckDatabase {
41    pub cracking_rules: RuleSet,
42    pub cracking_ignore: RuleSet,
43    pub violations_rules: RuleSet,
44    pub violations_ignore: RuleSet,
45    pub system_events: RuleSet,
46    pub workstation: RuleSet,
47    pub server: RuleSet,
48    pub local: RuleSet,
49}
50
51#[allow(dead_code)]
52#[derive(Debug, thiserror::Error)]
53pub enum RuleError {
54    #[error("IO error: {0}")]
55    IoError(#[from] std::io::Error),
56    #[error("Regex error: {0}")]
57    RegexError(#[from] regex::Error),
58    #[error("Invalid rule format: {0}")]
59    InvalidRuleFormat(String),
60    #[error("Directory not found: {0}")]
61    DirectoryNotFound(PathBuf),
62}
63
64impl Default for LogcheckDatabase {
65    fn default() -> Self {
66        Self::new()
67    }
68}
69
70impl LogcheckDatabase {
71    /// Create a new empty logcheck database
72    pub fn new() -> Self {
73        Self {
74            cracking_rules: RuleSet::new(RuleCategory::Cracking),
75            cracking_ignore: RuleSet::new(RuleCategory::CrackingIgnore),
76            violations_rules: RuleSet::new(RuleCategory::Violations),
77            violations_ignore: RuleSet::new(RuleCategory::ViolationsIgnore),
78            system_events: RuleSet::new(RuleCategory::SystemEvents),
79            workstation: RuleSet::new(RuleCategory::Workstation),
80            server: RuleSet::new(RuleCategory::Server),
81            local: RuleSet::new(RuleCategory::Local),
82        }
83    }
84
85    /// Convert POSIX character classes to Rust regex equivalents
86    /// Logcheck rules use POSIX classes like \[\[:alnum:\]\], \[\[:digit:\]\], etc.
87    /// which are not supported by Rust's regex crate
88    ///
89    /// This now uses the centralized regex_conversion module
90    fn convert_posix_classes(pattern: &str) -> String {
91        regex_conversion::posix_to_rust(pattern)
92    }
93
94    /// Load logcheck database from traditional directory structure
95    #[allow(dead_code)]
96    pub fn load_from_directory<P: AsRef<Path>>(base_path: P) -> Result<Self, RuleError> {
97        let base_path = base_path.as_ref();
98
99        if !base_path.exists() {
100            return Err(RuleError::DirectoryNotFound(base_path.to_path_buf()));
101        }
102
103        let mut database = Self::new();
104
105        // Load rule categories in order
106        let categories = [
107            ("cracking.d", &mut database.cracking_rules),
108            ("cracking.ignore.d", &mut database.cracking_ignore),
109            ("violations.d", &mut database.violations_rules),
110            ("violations.ignore.d", &mut database.violations_ignore),
111            ("ignore.d.paranoid", &mut database.system_events),
112            ("ignore.d.workstation", &mut database.workstation),
113            ("ignore.d.server", &mut database.server),
114            ("local.d", &mut database.local),
115        ];
116
117        for (dir_name, rule_set) in categories {
118            let dir_path = base_path.join(dir_name);
119            if dir_path.exists() && dir_path.is_dir() {
120                Self::load_rule_directory(&dir_path, rule_set)?;
121            }
122        }
123
124        // Compile all rule sets
125        database.compile_all()?;
126
127        Ok(database)
128    }
129
130    /// Load all rule files from a directory
131    #[allow(dead_code)]
132    fn load_rule_directory(dir_path: &Path, rule_set: &mut RuleSet) -> Result<(), RuleError> {
133        let entries = fs::read_dir(dir_path)?;
134
135        for entry in entries {
136            let entry = entry?;
137            let path = entry.path();
138
139            // Only process regular files
140            if path.is_file() {
141                Self::load_rule_file(&path, rule_set)?;
142            }
143        }
144
145        Ok(())
146    }
147
148    /// Load a single rule file
149    #[allow(dead_code)]
150    fn load_rule_file(file_path: &Path, rule_set: &mut RuleSet) -> Result<(), RuleError> {
151        let content = fs::read_to_string(file_path)?;
152        let mut skipped = 0;
153
154        for (line_num, line) in content.lines().enumerate() {
155            let line = line.trim();
156
157            // Skip empty lines and comments
158            if line.is_empty() || line.starts_with('#') {
159                continue;
160            }
161
162            // Skip patterns with backreferences (not supported in Rust regex)
163            if line.contains("\\1") || line.contains("\\2") || line.contains("\\3") {
164                skipped += 1;
165                continue;
166            }
167
168            // Convert POSIX character classes to Rust regex equivalents
169            let converted_pattern = Self::convert_posix_classes(line);
170
171            // Validate regex pattern
172            if let Err(e) = Regex::new(&converted_pattern) {
173                eprintln!(
174                    "Warning: Skipping invalid regex in {}:{}: '{}'",
175                    file_path.display(),
176                    line_num + 1,
177                    line
178                );
179                eprintln!("  Error: {}", e);
180                skipped += 1;
181                continue; // Skip invalid rules instead of failing
182            }
183
184            rule_set.patterns.push(converted_pattern);
185        }
186
187        if skipped > 0 {
188            eprintln!(
189                "ℹ️  Skipped {} incompatible rules in {}",
190                skipped,
191                file_path.display()
192            );
193        }
194
195        rule_set.source_files.push(file_path.to_path_buf());
196        Ok(())
197    }
198
199    /// Compile all rule sets for efficient matching
200    pub fn compile_all(&mut self) -> Result<(), RuleError> {
201        self.cracking_rules.compile()?;
202        self.cracking_ignore.compile()?;
203        self.violations_rules.compile()?;
204        self.violations_ignore.compile()?;
205        self.system_events.compile()?;
206        self.workstation.compile()?;
207        self.server.compile()?;
208        self.local.compile()?;
209        Ok(())
210    }
211
212    /// Match a log message against logcheck rules
213    /// Returns the rule category if matched, following logcheck precedence
214    pub fn match_message(&self, message: &str) -> Option<RuleCategory> {
215        // Logcheck processing order: cracking -> violations -> ignore rules
216
217        // 1. Check for cracking attempts (highest priority)
218        if self.cracking_rules.matches(message) && !self.cracking_ignore.matches(message) {
219            return Some(RuleCategory::Cracking);
220        }
221
222        // 2. Check for violations (security events)
223        if self.violations_rules.matches(message) && !self.violations_ignore.matches(message) {
224            return Some(RuleCategory::Violations);
225        }
226
227        // 3. Check ignore rules (system events) - these are "normal" events to ignore
228        if self.system_events.matches(message)
229            || self.server.matches(message)
230            || self.workstation.matches(message)
231            || self.local.matches(message)
232        {
233            return Some(RuleCategory::SystemEvents);
234        }
235
236        // No match found - this is an unclassified/new event
237        None
238    }
239
240    /// Get statistics about loaded rules
241    #[allow(dead_code)]
242    pub fn get_stats(&self) -> HashMap<String, usize> {
243        let mut stats = HashMap::new();
244
245        stats.insert(
246            "cracking_rules".to_string(),
247            self.cracking_rules.patterns.len(),
248        );
249        stats.insert(
250            "cracking_ignore".to_string(),
251            self.cracking_ignore.patterns.len(),
252        );
253        stats.insert(
254            "violations_rules".to_string(),
255            self.violations_rules.patterns.len(),
256        );
257        stats.insert(
258            "violations_ignore".to_string(),
259            self.violations_ignore.patterns.len(),
260        );
261        stats.insert(
262            "system_events".to_string(),
263            self.system_events.patterns.len(),
264        );
265        stats.insert("workstation".to_string(), self.workstation.patterns.len());
266        stats.insert("server".to_string(), self.server.patterns.len());
267        stats.insert("local".to_string(), self.local.patterns.len());
268
269        let total = stats.values().sum();
270        stats.insert("total_rules".to_string(), total);
271
272        stats
273    }
274}
275
276impl RuleSet {
277    pub fn new(category: RuleCategory) -> Self {
278        Self {
279            category,
280            patterns: Vec::new(),
281            compiled: None,
282            source_files: Vec::new(),
283        }
284    }
285
286    /// Compile patterns into RegexSet(s), automatically chunking if size limit is exceeded
287    pub fn compile(&mut self) -> Result<(), RuleError> {
288        if self.patterns.is_empty() {
289            return Ok(());
290        }
291
292        // First, try to compile all patterns in a single RegexSet
293        match RegexSet::new(&self.patterns) {
294            Ok(regex_set) => {
295                // Success! Use single optimized RegexSet
296                self.compiled = Some(CompiledRules::Single(regex_set));
297                Ok(())
298            }
299            Err(e) => {
300                // Check if error is due to compiled size limit
301                let error_msg = e.to_string();
302
303                if error_msg.contains("size limit") || error_msg.contains("CompiledTooBig") {
304                    // Automatically chunk the patterns
305                    eprintln!(
306                        "⚠️  Category {:?} ({} patterns) exceeds regex size limit, splitting into chunks...",
307                        self.category,
308                        self.patterns.len()
309                    );
310                    self.compile_chunked()
311                } else {
312                    // Other regex error, propagate it
313                    Err(RuleError::RegexError(e))
314                }
315            }
316        }
317    }
318
319    /// Compile patterns into multiple chunks using adaptive sizing
320    fn compile_chunked(&mut self) -> Result<(), RuleError> {
321        let total_patterns = self.patterns.len();
322
323        // Start with an initial chunk size estimate
324        // We use binary search to find the optimal chunk size
325        let mut chunk_size = self.find_optimal_chunk_size()?;
326
327        eprintln!(
328            "   Splitting into {} chunks of ~{} patterns each",
329            total_patterns.div_ceil(chunk_size),
330            chunk_size
331        );
332
333        let mut chunks = Vec::new();
334        let mut retry_count = 0;
335        let max_retries = 3;
336
337        loop {
338            chunks.clear();
339            let mut failed = false;
340
341            for (chunk_idx, pattern_chunk) in self.patterns.chunks(chunk_size).enumerate() {
342                match RegexSet::new(pattern_chunk) {
343                    Ok(regex_set) => {
344                        chunks.push(regex_set);
345                    }
346                    Err(e) => {
347                        if e.to_string().contains("size limit") && retry_count < max_retries {
348                            // Some chunks are more complex than others
349                            // Reduce chunk size and retry
350                            chunk_size = (chunk_size * 3) / 4; // Reduce by 25%
351                            if chunk_size < 5 {
352                                chunk_size = 5;
353                            }
354                            eprintln!(
355                                "   Chunk {} too large, reducing chunk size to {} and retrying...",
356                                chunk_idx, chunk_size
357                            );
358                            retry_count += 1;
359                            failed = true;
360                            break;
361                        } else {
362                            eprintln!(
363                                "   Failed to compile chunk {} (size {}): {}",
364                                chunk_idx,
365                                pattern_chunk.len(),
366                                e
367                            );
368                            return Err(RuleError::RegexError(e));
369                        }
370                    }
371                }
372            }
373
374            if !failed {
375                // All chunks compiled successfully
376                break;
377            }
378        }
379
380        eprintln!(
381            "✅ Successfully compiled {:?} category into {} chunks",
382            self.category,
383            chunks.len()
384        );
385
386        self.compiled = Some(CompiledRules::Chunked(chunks));
387        Ok(())
388    }
389
390    /// Find optimal chunk size using binary search
391    /// Returns the largest chunk size that compiles successfully
392    fn find_optimal_chunk_size(&self) -> Result<usize, RuleError> {
393        let total = self.patterns.len();
394
395        // Start with a conservative estimate
396        // Typical safe size is around 50-100 patterns for normal rules
397        // For very complex patterns, we may need smaller chunks
398        let mut low = 10; // Minimum chunk size (very conservative)
399        let mut high = total.min(200); // Maximum chunk size to try
400        let mut best_size = low;
401
402        // Binary search for optimal chunk size
403        while low <= high {
404            let mid = (low + high) / 2;
405
406            // Try compiling a chunk of this size
407            let test_chunk = &self.patterns[0..mid.min(total)];
408
409            match RegexSet::new(test_chunk) {
410                Ok(_) => {
411                    // This size works, try larger
412                    best_size = mid;
413                    low = mid + 1;
414                }
415                Err(e) => {
416                    if e.to_string().contains("size limit")
417                        || e.to_string().contains("CompiledTooBig")
418                    {
419                        // Too large, try smaller
420                        high = mid - 1;
421                    } else {
422                        // Other error, this might be a regex syntax issue
423                        // Try smaller chunks anyway
424                        high = mid - 1;
425                    }
426                }
427            }
428        }
429
430        // Ensure we have a valid chunk size
431        if best_size < 5 {
432            best_size = 5; // Absolute minimum fallback
433        }
434
435        Ok(best_size)
436    }
437
438    /// Check if message matches any pattern in this rule set
439    pub fn matches(&self, message: &str) -> bool {
440        match &self.compiled {
441            Some(CompiledRules::Single(regex_set)) => regex_set.is_match(message),
442            Some(CompiledRules::Chunked(chunks)) => {
443                // Check all chunks, return true if any chunk matches
444                chunks.iter().any(|chunk| chunk.is_match(message))
445            }
446            None => false,
447        }
448    }
449
450    pub fn add_pattern(&mut self, pattern: String) -> Result<(), RuleError> {
451        // Validate regex before adding
452        Regex::new(&pattern)?;
453        self.patterns.push(pattern);
454        Ok(())
455    }
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461    use std::fs;
462    use std::io::Write;
463    use tempfile::TempDir;
464
465    fn create_test_rules_directory() -> TempDir {
466        let temp_dir = TempDir::new().unwrap();
467
468        // Create violations.d directory with some rules
469        let violations_dir = temp_dir.path().join("violations.d");
470        fs::create_dir_all(&violations_dir).unwrap();
471
472        let mut violations_file = fs::File::create(violations_dir.join("security")).unwrap();
473        writeln!(violations_file, "# Security violation rules").unwrap();
474        writeln!(violations_file, "^.*authentication failure.*$").unwrap();
475        writeln!(violations_file, "^.*failed password.*$").unwrap();
476
477        // Create ignore.d.server directory
478        let server_dir = temp_dir.path().join("ignore.d.server");
479        fs::create_dir_all(&server_dir).unwrap();
480
481        let mut server_file = fs::File::create(server_dir.join("systemd")).unwrap();
482        writeln!(server_file, "# SystemD ignore rules").unwrap();
483        writeln!(server_file, "^.*systemd.*: Started Session.*$").unwrap();
484        writeln!(server_file, "^.*systemd.*: Stopped Session.*$").unwrap();
485
486        temp_dir
487    }
488
489    #[test]
490    fn test_load_from_directory() {
491        let temp_dir = create_test_rules_directory();
492        let database = LogcheckDatabase::load_from_directory(temp_dir.path()).unwrap();
493
494        assert_eq!(database.violations_rules.patterns.len(), 2);
495        assert_eq!(database.server.patterns.len(), 2);
496
497        let stats = database.get_stats();
498        assert_eq!(stats["violations_rules"], 2);
499        assert_eq!(stats["server"], 2);
500        assert_eq!(stats["total_rules"], 4);
501    }
502
503    #[test]
504    fn test_rule_matching() {
505        let temp_dir = create_test_rules_directory();
506        let database = LogcheckDatabase::load_from_directory(temp_dir.path()).unwrap();
507
508        // Test violation match
509        let violation_msg = "Jan 01 12:00:00 host sshd[1234]: authentication failure for user";
510        assert_eq!(
511            database.match_message(violation_msg),
512            Some(RuleCategory::Violations)
513        );
514
515        // Test system event match (ignore)
516        let system_msg = "Jan 01 12:00:00 host systemd[1]: Started Session 123 of user alice";
517        assert_eq!(
518            database.match_message(system_msg),
519            Some(RuleCategory::SystemEvents)
520        );
521
522        // Test no match
523        let unknown_msg = "Jan 01 12:00:00 host myapp[999]: Some custom message";
524        assert_eq!(database.match_message(unknown_msg), None);
525    }
526
527    #[test]
528    fn test_rule_validation() {
529        let mut rule_set = RuleSet::new(RuleCategory::Local);
530
531        // Valid regex should work
532        assert!(rule_set.add_pattern("^.*valid.*$".to_string()).is_ok());
533
534        // Invalid regex should fail
535        assert!(rule_set.add_pattern("[invalid regex(".to_string()).is_err());
536    }
537}