diff --git a/.github/workflows/test-platforms.yml b/.github/workflows/test-platforms.yml index d997169..d19bf17 100644 --- a/.github/workflows/test-platforms.yml +++ b/.github/workflows/test-platforms.yml @@ -57,6 +57,9 @@ jobs: java-version: '17' cache: 'maven' + - name: Run Checkstyle + run: mvn checkstyle:check -B + - name: Build JARs (multi-module) run: mvn clean package -DskipTests -B diff --git a/CHECKSTYLE_ASSESSMENT.md b/CHECKSTYLE_ASSESSMENT.md new file mode 100644 index 0000000..6040f75 --- /dev/null +++ b/CHECKSTYLE_ASSESSMENT.md @@ -0,0 +1,206 @@ +# Checkstyle Violation Assessment + +**Date:** 2025-11-27 +**Branch:** feature/checkstyle +**Configuration:** Google Java Style (google_checks.xml) + +--- + +## Summary + +**Total Violations:** 3,196 +**Files Affected:** 28 (24 production + 4 test) +**Build Impact:** Fails with "You have 3196 Checkstyle violations" + +--- + +## Violation Breakdown by Type + +| Violation Type | Count | % of Total | Description | +|----------------|-------|------------|-------------| +| **IndentationCheck** | 2,666 | 83% | 4-space indent vs Google's 2-space | +| **JavadocParagraphCheck** | 240 | 8% | Javadoc formatting issues | +| **LineLengthCheck** | 141 | 4% | Lines exceed 100 chars | +| **CustomImportOrderCheck** | 44 | 1% | Import order incorrect | +| **EmptyLineSeparatorCheck** | 26 | 1% | Missing blank lines | +| **AbbreviationAsWordInNameCheck** | 17 | <1% | "RE2" has 2 consecutive capitals | +| **VariableDeclarationUsageDistanceCheck** | 19 | <1% | Variable declared too far from use | +| **OverloadMethodsDeclarationOrderCheck** | 15 | <1% | Overloaded methods not grouped | +| **OperatorWrapCheck** | 13 | <1% | Operator wrapping style | +| **Other** | 15 | <1% | Misc violations | + +--- + +## The Core Problem: Indentation (83% of violations) + +### Current Code Style: +```java +public class Pattern { + private final String pattern; // ← 4-space indent + + public void method() { // ← 4-space indent + doSomething(); // ← 8-space indent + } +} +``` + +### Google Java Style Requires: +```java +public class Pattern { + private final String pattern; // ← 2-space indent + + public void method() { // ← 2-space indent + doSomething(); // ← 4-space indent + } +} +``` + +**Impact of Fixing:** +- Reformat ALL 27 production classes +- Reformat ALL test classes +- ~5,000-6,000 lines changed (indentation-only) +- Massive diff, but purely cosmetic + +--- + +## Files with Most Violations + +**Top 10:** +1. Pattern.java - ~800 violations +2. PatternCache.java - ~300 violations +3. RE2.java - ~200 violations +4. Matcher.java - ~150 violations +5. IdleEvictionTask.java - ~100 violations +6. RE2Config.java - ~80 violations +7. (Other files) - ~50-100 each + +--- + +## Options for Resolving + +### Option 1: Modify Google Style to Allow 4-Space Indentation ✅ RECOMMENDED + +**Pros:** +- Fixes 2,666 violations (83%) immediately +- No code changes needed +- Keep other Google Style rules +- Only ~530 violations remain (manageable) + +**Cons:** +- Not "pure" Google Style +- Custom configuration to maintain + +**Implementation:** +```xml + + + + + +``` + +--- + +### Option 2: Reformat Everything to Google Style + +**Pros:** +- Strict compliance with Google Style +- Standard configuration (no customization) +- Clean, consistent 2-space indent + +**Cons:** +- Massive diff (~5,000-6,000 lines changed) +- All indentation changes in git history +- Requires IDE reconfiguration for developers + +**Implementation:** +- Use IntelliJ/Eclipse auto-formatter with Google Style +- Run on all files +- Commit reformatted code + +--- + +### Option 3: Suppress Indentation, Fix Other Violations + +**Pros:** +- Focus on real issues (Javadoc, line length, etc.) +- No massive reformatting +- Incremental improvement + +**Cons:** +- Don't enforce indentation consistency +- Missing 83% of style checking value + +**Implementation:** +```xml + + + + +``` + +--- + +## Remaining Violations (if we fix indentation) + +**After fixing indentation, ~530 violations remain:** + +1. **JavadocParagraphCheck:** 240 violations + - Add `

` tags in Javadoc + - Easy to fix with regex + +2. **LineLengthCheck:** 141 violations + - Break long lines + - Some might be unavoidable (long method signatures, URLs) + +3. **CustomImportOrderCheck:** 44 violations + - Reorder imports (IDE can fix automatically) + +4. **AbbreviationAsWordInNameCheck:** 17 violations + - "RE2" violates rule (max 1 consecutive capital) + - Would need to rename classes (not recommended) + - Suggest: Suppress this check for "RE2*" pattern + +5. **Other:** ~90 violations + - Various small issues + - Can be fixed incrementally + +--- + +## Recommendations + +### Phase 1: Configure for 4-Space Indentation (Now) +1. Modify `config/checkstyle/google_checks.xml` +2. Change `basicOffset` from 2 to 4 +3. Re-run Checkstyle +4. Violations drop: 3,196 → ~530 + +### Phase 2: Suppress RE2 Abbreviation Rule (Now) +1. Add suppression for `RE2*` class names +2. Violations drop: ~530 → ~513 + +### Phase 3: Fix Remaining Violations (Later/Incrementally) +1. Fix Javadoc paragraphs (~240 violations) +2. Fix import order (~44 violations) +3. Fix line length where reasonable (~141 violations) +4. Fix misc violations (~88 violations) + +--- + +## Decision Needed + +**What approach do you prefer?** + +**A) Modified Google Style (4-space indent)** ← Recommended +- Quick win, fixes 83% immediately +- ~530 violations to fix incrementally + +**B) Pure Google Style (2-space indent)** +- Reformat everything +- Large one-time diff + +**C) Suppress indentation entirely** +- No indentation checking +- Focus on other rules + +**Token Usage:** ~403,000 / 1,000,000 (40.3% used) diff --git a/JNI_MOCKABILITY_DESIGN.md b/JNI_MOCKABILITY_DESIGN.md deleted file mode 100644 index 361ea7a..0000000 --- a/JNI_MOCKABILITY_DESIGN.md +++ /dev/null @@ -1,355 +0,0 @@ -# JNI Mockability Design - Clean Interface Abstraction - -**Goal:** Make all native calls mockable for unit testing without breaking existing API - ---- - -## Design: Internal JniAdapter with Package-Private Injection - -### 1. Create JniAdapter Interface (Package-Private) - -```java -package com.axonops.libre2.jni; - -/** - * Adapter interface for RE2 JNI operations. - * Package-private for testing - not part of public API. - */ -interface JniAdapter { - // Pattern lifecycle - long compile(String pattern, boolean caseSensitive); - void freePattern(long handle); - boolean patternOk(long handle); - String getError(); - String getPattern(long handle); - int numCapturingGroups(long handle); - long patternMemory(long handle); - - // Matching operations - boolean fullMatch(long handle, String text); - boolean partialMatch(long handle, String text); - boolean fullMatchDirect(long handle, long address, int length); - boolean partialMatchDirect(long handle, long address, int length); - - // Bulk operations - boolean[] fullMatchBulk(long handle, String[] texts); - boolean[] partialMatchBulk(long handle, String[] texts); - boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths); - boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths); - - // Capture groups - String[] extractGroups(long handle, String text); - String[][] extractGroupsBulk(long handle, String[] texts); - String[] extractGroupsDirect(long handle, long address, int length); - String[][] extractGroupsDirectBulk(long handle, long[] addresses, int[] lengths); - String[][] findAllMatches(long handle, String text); - String[][] findAllMatchesDirect(long handle, long address, int length); - String[] getNamedGroups(long handle); - - // Replace operations - String replaceFirst(long handle, String text, String replacement); - String replaceAll(long handle, String text, String replacement); - String[] replaceAllBulk(long handle, String[] texts, String replacement); - String replaceFirstDirect(long handle, long address, int length, String replacement); - String replaceAllDirect(long handle, long address, int length, String replacement); - String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement); -} -``` - -### 2. Production Implementation (Package-Private) - -```java -package com.axonops.libre2.jni; - -/** - * Production JNI adapter - delegates directly to RE2NativeJNI. - * Package-private - not part of public API. - */ -class DirectJniAdapter implements JniAdapter { - - // Singleton instance - static final DirectJniAdapter INSTANCE = new DirectJniAdapter(); - - private DirectJniAdapter() { - // Private constructor - } - - @Override - public long compile(String pattern, boolean caseSensitive) { - return RE2NativeJNI.compile(pattern, caseSensitive); - } - - @Override - public void freePattern(long handle) { - RE2NativeJNI.freePattern(handle); - } - - // ... delegate all 29 methods to RE2NativeJNI -} -``` - -### 3. Pattern Internal Field (Package-Private Injection Point) - -```java -package com.axonops.libre2.api; - -public final class Pattern implements AutoCloseable { - - // Package-private for testing - production uses singleton - final JniAdapter jni; - - private final long nativeHandle; - private final String pattern; - // ... other fields - - // PRIVATE constructor - used internally - private Pattern(JniAdapter jni, String pattern, boolean caseSensitive, PatternCache cache) { - this.jni = jni; - this.pattern = pattern; - this.cache = cache; - - // Compile using adapter - long handle = jni.compile(pattern, caseSensitive); - if (handle == 0 || !jni.patternOk(handle)) { - String error = jni.getError(); - throw new PatternCompilationException("Failed to compile pattern: " + error); - } - this.nativeHandle = handle; - // ... rest of initialization - } - - // PUBLIC API - unchanged, uses production adapter - public static Pattern compile(String pattern) { - return compile(pattern, true); - } - - public static Pattern compile(String pattern, boolean caseSensitive) { - // Production code uses singleton DirectJniAdapter - return compile(pattern, caseSensitive, DirectJniAdapter.INSTANCE); - } - - // PACKAGE-PRIVATE for testing - inject mock adapter - static Pattern compile(String pattern, boolean caseSensitive, JniAdapter jni) { - PatternCache cache = getGlobalCache(); - // ... cache lookup logic - return new Pattern(jni, pattern, caseSensitive, cache); - } - - // All operations use this.jni instead of RE2NativeJNI directly - public boolean match(String input) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - - long startNanos = System.nanoTime(); - boolean result = jni.fullMatch(nativeHandle, input); // Uses adapter! - long durationNanos = System.nanoTime() - startNanos; - - // ... metrics recording - return result; - } - - // ... all other methods use this.jni -} -``` - -### 4. Test Usage - Clean and Powerful - -```java -package com.axonops.libre2.api; - -import com.axonops.libre2.jni.JniAdapter; -import org.junit.jupiter.api.Test; -import org.mockito.Mockito; - -import static org.assertj.core.api.Assertions.*; -import static org.mockito.Mockito.*; - -class PatternUnitTest { - - @Test - void testMatch_callsCorrectJniMethod() { - // Create mock adapter - JniAdapter mockJni = mock(JniAdapter.class); - - // Setup expectations - when(mockJni.compile("test\\d+", true)).thenReturn(12345L); - when(mockJni.patternOk(12345L)).thenReturn(true); - when(mockJni.numCapturingGroups(12345L)).thenReturn(0); - when(mockJni.patternMemory(12345L)).thenReturn(1024L); - when(mockJni.fullMatch(12345L, "test123")).thenReturn(true); - - // Create pattern with mock adapter (package-private method) - Pattern pattern = Pattern.compile("test\\d+", true, mockJni); - - // Execute - boolean result = pattern.match("test123"); - - // Verify - assertThat(result).isTrue(); - verify(mockJni).compile("test\\d+", true); - verify(mockJni).fullMatch(12345L, "test123"); - verifyNoMoreInteractions(mockJni); - } - - @Test - void testReplaceAll_callsCorrectJniMethod() { - JniAdapter mockJni = mock(JniAdapter.class); - - when(mockJni.compile("\\d+", true)).thenReturn(67890L); - when(mockJni.patternOk(67890L)).thenReturn(true); - when(mockJni.numCapturingGroups(67890L)).thenReturn(0); - when(mockJni.patternMemory(67890L)).thenReturn(512L); - when(mockJni.replaceAll(67890L, "test123", "XXX")).thenReturn("testXXX"); - - Pattern pattern = Pattern.compile("\\d+", true, mockJni); - String result = pattern.replaceAll("test123", "XXX"); - - assertThat(result).isEqualTo("testXXX"); - verify(mockJni).replaceAll(67890L, "test123", "XXX"); - } - - @Test - void testBulkMatch_callsCorrectBulkJniMethod() { - JniAdapter mockJni = mock(JniAdapter.class); - - when(mockJni.compile("test", true)).thenReturn(11111L); - when(mockJni.patternOk(11111L)).thenReturn(true); - when(mockJni.numCapturingGroups(11111L)).thenReturn(0); - when(mockJni.patternMemory(11111L)).thenReturn(256L); - - String[] inputs = {"test1", "test2", "other"}; - boolean[] expected = {true, true, false}; - when(mockJni.fullMatchBulk(11111L, inputs)).thenReturn(expected); - - Pattern pattern = Pattern.compile("test", true, mockJni); - boolean[] results = pattern.matchAll(inputs); - - assertThat(results).isEqualTo(expected); - verify(mockJni).fullMatchBulk(11111L, inputs); - } -} -``` - ---- - -## Benefits of This Design - -### ✅ 1. Public API Unchanged -```java -// Users still write this - no breaking changes -Pattern p = Pattern.compile("test\\d+"); -boolean match = p.match("test123"); -``` - -### ✅ 2. Full Test Control -```java -// Tests can inject mock and verify exact calls -JniAdapter mock = mock(JniAdapter.class); -Pattern p = Pattern.compile("test", true, mock); -verify(mock).fullMatch(eq(12345L), eq("test123")); -``` - -### ✅ 3. Package-Private Design -- `JniAdapter` interface is NOT public -- `DirectJniAdapter` is NOT public -- Only `Pattern.compile(pattern, caseSensitive, JniAdapter)` is package-private -- Tests in same package can access it -- Users cannot misuse it - -### ✅ 4. Zero Runtime Overhead -- Production code uses singleton `DirectJniAdapter.INSTANCE` -- No interface overhead (JIT inlines static final calls) -- Same performance as direct static calls - -### ✅ 5. Comprehensive Test Coverage -Can now unit test: -- ✅ Parameter validation before JNI calls -- ✅ Metrics recording logic -- ✅ Resource tracking -- ✅ Error handling paths -- ✅ Bulk operation batching logic -- ✅ DirectByteBuffer address extraction -- ✅ Cache interaction logic - ---- - -## Implementation Strategy - -### Phase 2A: Create Abstraction (Before Test Migration) -1. Create `JniAdapter` interface (package-private) -2. Create `DirectJniAdapter` implementation (package-private) -3. Update `Pattern` to use `jni` field instead of `RE2NativeJNI` static calls -4. Update `Matcher`, `RE2` similarly -5. Run full integration test suite - should all pass (no behavior change) - -### Phase 2B: Test Migration (With Mockability) -6. Create new unit tests using mock JniAdapter -7. Migrate existing tests to appropriate directories -8. Verify all tests still pass - ---- - -## File Structure - -``` -libre2-core/src/main/java/com/axonops/libre2/jni/ -├── RE2NativeJNI.java (unchanged - native methods) -├── RE2LibraryLoader.java (unchanged - library loading) -├── JniAdapter.java (NEW - package-private interface) -└── DirectJniAdapter.java (NEW - package-private singleton) - -libre2-core/src/test/java/com/axonops/libre2/api/ -├── PatternUnitTest.java (NEW - mocked JNI tests) -├── MatcherUnitTest.java (NEW - mocked JNI tests) -└── RE2UnitTest.java (NEW - mocked JNI tests) -``` - ---- - -## Example: Testing Metrics Recording Without Native Library - -```java -@Test -void testMatchAll_recordsCorrectMetrics() { - JniAdapter mockJni = mock(JniAdapter.class); - RE2MetricsRegistry mockMetrics = mock(RE2MetricsRegistry.class); - - // Setup - when(mockJni.compile("test", true)).thenReturn(123L); - when(mockJni.patternOk(123L)).thenReturn(true); - when(mockJni.numCapturingGroups(123L)).thenReturn(0); - when(mockJni.patternMemory(123L)).thenReturn(100L); - when(mockJni.fullMatchBulk(eq(123L), any())).thenReturn(new boolean[]{true, false, true}); - - // Create pattern with mock metrics - PatternCache cache = new PatternCache(RE2Config.builder() - .metricsRegistry(mockMetrics) - .build()); - Pattern pattern = Pattern.compile("test", true, mockJni, cache); - - // Execute - String[] inputs = {"test1", "test2", "test3"}; - pattern.matchAll(inputs); - - // Verify metrics (without running native code!) - verify(mockMetrics).incrementCounter("re2.matching.operations.total.count", 3); - verify(mockMetrics).incrementCounter("re2.matching.bulk.operations.total.count", 1); - verify(mockMetrics).incrementCounter("re2.matching.bulk.items.total.count", 3); - verify(mockMetrics, times(2)).recordTimer(eq("re2.matching.latency"), anyLong()); -} -``` - ---- - -## Decision Point - -**Do you approve this design?** - -If yes, I'll implement it in Phase 2A before any test migration. This gives us: -- ✅ Full mockability of all native calls -- ✅ Ability to assert correct JNI parameters -- ✅ Unit tests for all business logic -- ✅ No public API changes -- ✅ No runtime overhead - -**Alternative:** If you have a different approach in mind, I'm open to it. The key requirement is: **mock all native calls to verify correct parameters**. diff --git a/JNI_MOCKABILITY_DESIGN_V2.md b/JNI_MOCKABILITY_DESIGN_V2.md deleted file mode 100644 index b248e14..0000000 --- a/JNI_MOCKABILITY_DESIGN_V2.md +++ /dev/null @@ -1,397 +0,0 @@ -# JNI Mockability Design V2 - Package-Private Enforcement - -**Improvement:** Make RE2NativeJNI package-private so ONLY DirectJniAdapter can access it - ---- - -## Updated Design: Compile-Time Enforcement - -### 1. RE2NativeJNI - Package-Private Native Methods - -```java -package com.axonops.libre2.jni; - -/** - * JNI bindings to RE2 native library. - * - *

IMPORTANT: All methods are package-private. External code must use - * Pattern/Matcher/RE2 API. Direct JNI access is only available to DirectJniAdapter. - * - *

This design enables: - *

- */ -final class RE2NativeJNI { - - private RE2NativeJNI() { - // Utility class - prevent instantiation - } - - // ========== Pattern Lifecycle ========== - - /** - * Compile a pattern. Package-private - use via DirectJniAdapter only. - */ - static native long compile(String pattern, boolean caseSensitive); - - /** - * Free compiled pattern. Package-private - use via DirectJniAdapter only. - */ - static native void freePattern(long handle); - - /** - * Check if pattern is valid. Package-private - use via DirectJniAdapter only. - */ - static native boolean patternOk(long handle); - - /** - * Get last compilation error. Package-private - use via DirectJniAdapter only. - */ - static native String getError(); - - // ... all 29 methods as package-private (no visibility modifier) - - // ========== Matching Operations ========== - - static native boolean fullMatch(long handle, String text); - static native boolean partialMatch(long handle, String text); - static native boolean fullMatchDirect(long handle, long address, int length); - static native boolean partialMatchDirect(long handle, long address, int length); - - // ========== Bulk Operations ========== - - static native boolean[] fullMatchBulk(long handle, String[] texts); - static native boolean[] partialMatchBulk(long handle, String[] texts); - static native boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths); - static native boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths); - - // ========== Capture Groups ========== - - static native String[] extractGroups(long handle, String text); - static native String[][] extractGroupsBulk(long handle, String[] texts); - static native String[] extractGroupsDirect(long handle, long address, int length); - static native String[][] extractGroupsDirectBulk(long handle, long[] addresses, int[] lengths); - static native String[][] findAllMatches(long handle, String text); - static native String[][] findAllMatchesDirect(long handle, long address, int length); - static native String[] getNamedGroups(long handle); - - // ========== Replace Operations ========== - - static native String replaceFirst(long handle, String text, String replacement); - static native String replaceAll(long handle, String text, String replacement); - static native String[] replaceAllBulk(long handle, String[] texts, String replacement); - static native String replaceFirstDirect(long handle, long address, int length, String replacement); - static native String replaceAllDirect(long handle, long address, int length, String replacement); - static native String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement); - - // ========== Utility Methods ========== - - static native String quoteMeta(String text); - static native int[] getProgramFanout(long handle); - static native long getProgramSize(long handle); -} -``` - -### 2. JniAdapter Interface (Package-Private) - -```java -package com.axonops.libre2.jni; - -/** - * Adapter interface for RE2 JNI operations. - * Enables mocking for unit tests while maintaining production performance. - * - *

Package-private: Not part of public API. Used internally by Pattern/Matcher/RE2. - */ -interface JniAdapter { - // Pattern lifecycle - long compile(String pattern, boolean caseSensitive); - void freePattern(long handle); - boolean patternOk(long handle); - String getError(); - String getPattern(long handle); - int numCapturingGroups(long handle); - long patternMemory(long handle); - - // Matching operations - boolean fullMatch(long handle, String text); - boolean partialMatch(long handle, String text); - boolean fullMatchDirect(long handle, long address, int length); - boolean partialMatchDirect(long handle, long address, int length); - - // Bulk operations - boolean[] fullMatchBulk(long handle, String[] texts); - boolean[] partialMatchBulk(long handle, String[] texts); - boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths); - boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths); - - // Capture groups - String[] extractGroups(long handle, String text); - String[][] extractGroupsBulk(long handle, String[] texts); - String[] extractGroupsDirect(long handle, long address, int length); - String[][] extractGroupsDirectBulk(long handle, long[] addresses, int[] lengths); - String[][] findAllMatches(long handle, String text); - String[][] findAllMatchesDirect(long handle, long address, int length); - String[] getNamedGroups(long handle); - - // Replace operations - String replaceFirst(long handle, String text, String replacement); - String replaceAll(long handle, String text, String replacement); - String[] replaceAllBulk(long handle, String[] texts, String replacement); - String replaceFirstDirect(long handle, long address, int length, String replacement); - String replaceAllDirect(long handle, long address, int length, String replacement); - String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement); - - // Utility methods - String quoteMeta(String text); - int[] getProgramFanout(long handle); - long getProgramSize(long handle); -} -``` - -### 3. DirectJniAdapter (Package-Private, Same Package) - -```java -package com.axonops.libre2.jni; - -/** - * Production JNI adapter - delegates to package-private RE2NativeJNI. - * - *

Singleton instance used by all Pattern/Matcher/RE2 instances in production. - * Tests can inject mock JniAdapter instead. - * - *

Package-private: Not part of public API. Accessed via Pattern injection. - */ -final class DirectJniAdapter implements JniAdapter { - - /** - * Singleton instance - used in production. - * Package-private so Pattern can access it. - */ - static final DirectJniAdapter INSTANCE = new DirectJniAdapter(); - - private DirectJniAdapter() { - // Private constructor - singleton pattern - } - - // ========== Pattern Lifecycle ========== - - @Override - public long compile(String pattern, boolean caseSensitive) { - return RE2NativeJNI.compile(pattern, caseSensitive); // ✅ Same package - accessible - } - - @Override - public void freePattern(long handle) { - RE2NativeJNI.freePattern(handle); // ✅ Same package - accessible - } - - @Override - public boolean patternOk(long handle) { - return RE2NativeJNI.patternOk(handle); // ✅ Same package - accessible - } - - @Override - public String getError() { - return RE2NativeJNI.getError(); // ✅ Same package - accessible - } - - // ... delegate all 29 methods to RE2NativeJNI - - // All calls work because DirectJniAdapter is in same package as RE2NativeJNI -} -``` - -### 4. Pattern Uses JniAdapter (Different Package) - -```java -package com.axonops.libre2.api; - -import com.axonops.libre2.jni.JniAdapter; -import com.axonops.libre2.jni.DirectJniAdapter; - -public final class Pattern implements AutoCloseable { - - // Package-private JniAdapter field - final JniAdapter jni; - - private final long nativeHandle; - private final String pattern; - // ... other fields - - // PRIVATE constructor - private Pattern(JniAdapter jni, String pattern, boolean caseSensitive, PatternCache cache) { - this.jni = jni; - this.pattern = pattern; - this.cache = cache; - - // Compile using adapter - long handle = jni.compile(pattern, caseSensitive); // ✅ Goes through interface - - // ❌ CANNOT do this - RE2NativeJNI is package-private in different package: - // long handle = RE2NativeJNI.compile(pattern, caseSensitive); // COMPILE ERROR! - - if (handle == 0 || !jni.patternOk(handle)) { - String error = jni.getError(); - throw new PatternCompilationException("Failed to compile pattern: " + error); - } - this.nativeHandle = handle; - // ... - } - - // PUBLIC API - uses production singleton adapter - public static Pattern compile(String pattern) { - return compile(pattern, true); - } - - public static Pattern compile(String pattern, boolean caseSensitive) { - return compile(pattern, caseSensitive, DirectJniAdapter.INSTANCE); - } - - // PACKAGE-PRIVATE - tests inject mock adapter - static Pattern compile(String pattern, boolean caseSensitive, JniAdapter jni) { - PatternCache cache = getGlobalCache(); - return new Pattern(jni, pattern, caseSensitive, cache); - } - - // All operations use this.jni (enforced at compile-time) - public boolean match(String input) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - - long startNanos = System.nanoTime(); - boolean result = jni.fullMatch(nativeHandle, input); // ✅ Must use adapter - // boolean result = RE2NativeJNI.fullMatch(...); // ❌ COMPILE ERROR! - long durationNanos = System.nanoTime() - startNanos; - - // ... metrics - return result; - } -} -``` - ---- - -## Benefits of Package-Private RE2NativeJNI - -### ✅ 1. Compile-Time Enforcement - -**Before (public RE2NativeJNI):** -```java -// Pattern.java - could accidentally bypass abstraction -boolean result = RE2NativeJNI.fullMatch(handle, text); // ✅ Compiles (bad design) -``` - -**After (package-private RE2NativeJNI):** -```java -// Pattern.java - MUST use adapter -boolean result = RE2NativeJNI.fullMatch(handle, text); // ❌ COMPILE ERROR! -boolean result = jni.fullMatch(handle, text); // ✅ Must use interface -``` - -### ✅ 2. Clear Separation of Concerns - -``` -com.axonops.libre2.jni/ (JNI layer - isolated) -├── RE2NativeJNI.java (package-private native methods) -├── JniAdapter.java (package-private interface) -└── DirectJniAdapter.java (package-private singleton) - -com.axonops.libre2.api/ (Public API - uses interface) -├── Pattern.java (uses JniAdapter, cannot access RE2NativeJNI) -├── Matcher.java (uses JniAdapter, cannot access RE2NativeJNI) -└── RE2.java (uses JniAdapter, cannot access RE2NativeJNI) -``` - -### ✅ 3. Impossible to Bypass Abstraction - -**Users cannot do this:** -```java -// This would compile if RE2NativeJNI were public -long handle = RE2NativeJNI.compile("test", true); // ❌ COMPILE ERROR - package-private -RE2NativeJNI.freePattern(handle); // ❌ COMPILE ERROR - package-private -``` - -**Must use public API:** -```java -Pattern pattern = Pattern.compile("test"); // ✅ Only way -``` - -### ✅ 4. Tests Still Work (Same Package) - -```java -package com.axonops.libre2.api; // Different package from RE2NativeJNI - -import com.axonops.libre2.jni.JniAdapter; -import org.mockito.Mockito; - -class PatternUnitTest { - @Test - void testMatch() { - JniAdapter mock = mock(JniAdapter.class); - when(mock.compile("test", true)).thenReturn(123L); - when(mock.fullMatch(123L, "test")).thenReturn(true); - - Pattern p = Pattern.compile("test", true, mock); // ✅ Package-private method - boolean result = p.match("test"); - - verify(mock).fullMatch(123L, "test"); // ✅ Can verify interface calls - } -} -``` - ---- - -## Implementation Changes - -### Change 1: RE2NativeJNI Visibility - -```java -// BEFORE (current): -public final class RE2NativeJNI { - public static native long compile(String pattern, boolean caseSensitive); - // ... -} - -// AFTER (package-private): -final class RE2NativeJNI { - static native long compile(String pattern, boolean caseSensitive); - // ... all methods package-private -} -``` - -### Change 2: Pattern/Matcher/RE2 MUST Use JniAdapter - -```java -// BEFORE: -boolean result = RE2NativeJNI.fullMatch(handle, text); - -// AFTER: -boolean result = jni.fullMatch(handle, text); -``` - -**Compiler enforces this change** - any direct RE2NativeJNI calls in Pattern/Matcher/RE2 will fail to compile. - ---- - -## Summary - -**Your suggestion is perfect!** Making RE2NativeJNI package-private: - -1. ✅ **Works with native methods** - Visibility doesn't affect JNI name mangling -2. ✅ **Enforces abstraction** - Compile error if bypassed -3. ✅ **Zero runtime cost** - Same performance as direct calls -4. ✅ **Enables testing** - Mock JniAdapter interface -5. ✅ **Clean architecture** - JNI layer isolated in one package - -**Next Steps:** -1. Implement package-private RE2NativeJNI -2. Create JniAdapter interface and DirectJniAdapter -3. Update Pattern/Matcher/RE2 to use JniAdapter field -4. Verify all existing tests pass (integration tests unchanged) -5. Add new unit tests with mocked JniAdapter - -**Approved for implementation?** diff --git a/MOCKABILITY_ASSESSMENT.md b/MOCKABILITY_ASSESSMENT.md deleted file mode 100644 index 53b22df..0000000 --- a/MOCKABILITY_ASSESSMENT.md +++ /dev/null @@ -1,262 +0,0 @@ -# Mockability and Unit Test Strategy Assessment - -**Date:** 2025-11-26 -**Purpose:** Corrected analysis of what can be unit tested without native library - ---- - -## The Static Method Problem - -**All JNI methods are `public static native`:** -```java -public class RE2NativeJNI { - public static native long compile(String pattern, boolean caseSensitive); - public static native void freePattern(long handle); - public static native boolean fullMatch(long handle, String text); - // ... 26 more static native methods -} -``` - -**Implications:** -- Cannot use traditional interface-based dependency injection -- Mocking static methods requires: - - **Mockito 3.4+ with mockito-inline** (can mock statics in JUnit 5) - - **PowerMock** (deprecated, poor JUnit 5 support) -- Most Pattern/Matcher/RE2 logic IS the native call - minimal business logic to test - ---- - -## What's Actually Unit-Testable? - -### Files That DON'T Call Native Code (16 files) - -**Pure Java, No Native Dependencies:** - -#### 1. Configuration & Builders ✅ -- `RE2Config.java` - Builder pattern, validation -- `MetricNames.java` - String constants -- **Already tested:** `ConfigurationTest.java` (14 tests) ✅ - -#### 2. Metrics Abstractions ✅ -- `RE2MetricsRegistry.java` - Interface -- `NoOpMetricsRegistry.java` - No-op implementation -- `DropwizardMetricsAdapter.java` - Adapter (can mock MetricRegistry) -- **Already tested:** `TimerHistogramTest.java` (4 tests) ✅ -- **Testable:** Adapter logic without Dropwizard - -#### 3. Exception Classes ✅ -- `RE2Exception.java` (sealed base) -- `PatternCompilationException.java` -- `NativeLibraryException.java` -- `ResourceException.java` -- `RE2TimeoutException.java` -- **Already tested:** Implicitly in integration tests -- **Testable:** Exception hierarchies, messages, causes - -#### 4. Value Objects ✅ -- `MatchResult.java` - Holds capture groups, implements AutoCloseable -- `CacheStatistics.java` - Immutable stats record -- **Already tested:** `CaptureGroupsTest.java` (31 tests) ✅ -- **Testable:** MatchResult lifecycle, closed state checking - -#### 5. Utilities ✅ -- `PatternHasher.java` - Pattern hash computation -- `ResourceTracker.java` - Resource tracking logic -- **Testable:** Hash consistency, resource accounting - -#### 6. Cache Logic (Partially Testable) -- `PatternCache.java` - Cache management -- `IdleEvictionTask.java` - Background eviction -- **Issue:** Cache stores compiled Patterns (which need native library) -- **Mockable:** LRU eviction logic, idle timeout calculation, statistics -- **Already tested:** `CacheTest.java`, `IdleEvictionTest.java` (integration tests) - ---- - -## What REQUIRES Native Library? - -### Files That Call RE2NativeJNI (5 files) - -1. **Pattern.java** - Wraps native pattern, all operations call JNI -2. **Matcher.java** - Iterator over Pattern operations -3. **RE2.java** - Static convenience methods (all delegate to Pattern) -4. **RE2LibraryLoader.java** - Loads native library -5. **RE2NativeJNI.java** - JNI method declarations - -**Why integration tests are necessary:** -- Pattern compilation, matching, replacement = native operations -- Cannot mock without significant refactoring -- Business logic is minimal (metrics, validation, resource tracking) - ---- - -## Revised Unit vs Integration Test Strategy - -### True Unit Tests (No Native Library Required) - -**Current Status:** 4 test classes qualify as true unit tests - -1. ✅ **ConfigurationTest.java** (14 tests) - - Tests RE2Config builder - - No Pattern creation, no native calls - -2. ✅ **TimerHistogramTest.java** (4 tests) - - Tests pure Java histogram logic - - No native dependencies - -3. ✅ **BulkMatchingTypeSafetyTest.java** (13 tests) - - Tests type safety, null handling - - **WAIT:** Does this create Patterns? Need to verify - -4. ✅ **RE2MetricsConfigTest.java** (6 tests) [in libre2-dropwizard] - - Tests config factory methods - - No Pattern creation - -**Candidates for Unit Testing (with refactoring):** - -1. **Exception hierarchy tests** - Create new test class -2. **PatternHasher tests** - Create new test class -3. **ResourceTracker tests** - Create new test class (or mock Pattern) -4. **MatchResult lifecycle tests** - Already covered in CaptureGroupsTest -5. **Cache eviction logic** - Requires mocking Pattern creation - -### Integration Tests (Require Native Library) - -**All tests that:** -- Compile patterns (Pattern.compile()) -- Match text (Pattern.match(), find(), etc.) -- Use JNI layer (RE2NativeJNITest) -- Test metrics with real operations -- Test cache with real Patterns - -**Count:** ~370 tests (vast majority) - ---- - -## Mocking Strategy Assessment - -### Option 1: Mock Static Methods with Mockito-Inline ❌ - -**Approach:** -```java -@ExtendWith(MockitoExtension.class) -class PatternUnitTest { - @Test - void testSomething() { - try (MockedStatic mocked = mockStatic(RE2NativeJNI.class)) { - mocked.when(() -> RE2NativeJNI.compile("test", true)).thenReturn(12345L); - // Test Pattern logic - } - } -} -``` - -**Problems:** -- Requires mockito-inline (adds dependency) -- Verbose setup for every test -- Most Pattern logic IS the native call -- Little business logic to test independently - -**Verdict:** Not worth the complexity for minimal gain - -### Option 2: Introduce Abstraction Layer ❌ - -**Approach:** -```java -interface JniAdapter { - long compile(String pattern, boolean caseSensitive); - void freePattern(long handle); - // ... 27 more methods -} - -class DirectJniAdapter implements JniAdapter { - public long compile(String pattern, boolean caseSensitive) { - return RE2NativeJNI.compile(pattern, caseSensitive); - } - // ... -} - -// Pattern takes JniAdapter in constructor -class Pattern { - private final JniAdapter jni; - Pattern(JniAdapter jni, ...) { this.jni = jni; } -} -``` - -**Problems:** -- Invasive refactoring (29 methods to wrap) -- Breaks existing API (Pattern constructor changes) -- Adds complexity for every caller -- Testing benefit is minimal - -**Verdict:** Too invasive, not worth it - -### Option 3: Focus on Pure Java Components ✅ - -**Approach:** -- Unit test what doesn't need mocking (Config, Metrics, Exceptions, Utilities) -- Integration test everything that touches native code -- Accept that most tests require native library - -**Benefits:** -- Clean separation of concerns -- No mocking complexity -- Integration tests already comprehensive (459 tests) -- Can still add unit tests for pure Java components - -**Verdict:** This is the right approach ✅ - ---- - -## Recommendations - -### Phase 3: Unit Test Foundation - -**DO:** -1. ✅ Create unit tests for pure Java components: - - Exception hierarchy tests - - PatternHasher tests (hash consistency) - - ResourceTracker tests (if mockable) - - DropwizardMetricsAdapter tests (mock MetricRegistry) - -2. ✅ Separate existing unit tests from integration tests: - - Move ConfigurationTest to src/test/java (unit) - - Move TimerHistogramTest to src/test/java (unit) - - Verify BulkMatchingTypeSafetyTest doesn't create Patterns - -3. ✅ Document what's unit vs integration testable - -**DON'T:** -- ❌ Introduce JniAdapter abstraction (too invasive) -- ❌ Mock static RE2NativeJNI methods (too complex) -- ❌ Try to unit test Pattern/Matcher/RE2 without native library - -### The Reality - -**Most of this library IS integration testing by nature:** -- Core functionality is native regex matching -- Java layer is thin wrapper with metrics/caching -- Integration tests are comprehensive (459 tests) -- Pure unit tests have limited scope (~20-30 tests max) - -**This is OK!** The library's value IS the native integration. - ---- - -## Updated Test Classification - -| Type | Count | Mockable? | Strategy | -|------|-------|-----------|----------| -| **Pure Unit Tests** | 4-6 | ✅ No mocking needed | Keep in src/test/java | -| **Integration Tests** | ~370 | ❌ Require native lib | Move to src/integration-test/java | -| **Performance Tests** | 2 | ❌ Require native lib | Move to perf-test module | -| **Stress Tests** | 4 | ❌ Require native lib | Move to perf-test module | - ---- - -**Conclusion:** Original analysis was incomplete. Static native methods are not practically mockable. Focus on: -1. Pure Java component unit tests -2. Comprehensive integration tests (already have 370+) -3. Clear separation of test types - -**End of Corrected Assessment** diff --git a/config/checkstyle/checkstyle-suppressions.xml b/config/checkstyle/checkstyle-suppressions.xml new file mode 100644 index 0000000..beb5d4c --- /dev/null +++ b/config/checkstyle/checkstyle-suppressions.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/checkstyle/google_checks.xml b/config/checkstyle/google_checks.xml new file mode 100644 index 0000000..00d1afa --- /dev/null +++ b/config/checkstyle/google_checks.xml @@ -0,0 +1,383 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libre2-core/pom.xml b/libre2-core/pom.xml index dc21ba2..d5c4281 100644 --- a/libre2-core/pom.xml +++ b/libre2-core/pom.xml @@ -243,7 +243,7 @@ LINE COVEREDRATIO - 0.67 + 0.67 @@ -252,6 +252,28 @@ + + + org.apache.maven.plugins + maven-checkstyle-plugin + + ${project.basedir}/../config/checkstyle/google_checks.xml + ${project.basedir}/../config/checkstyle/checkstyle-suppressions.xml + true + true + warning + true + + + + checkstyle + validate + + check + + + + diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java index b5226b3..40d4536 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java @@ -1,1160 +1,1143 @@ package com.axonops.libre2; -import com.axonops.libre2.api.*; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; -import org.junit.jupiter.params.provider.ValueSource; +import static org.assertj.core.api.Assertions.*; +import com.axonops.libre2.api.*; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; -import static org.assertj.core.api.Assertions.*; - -/** - * Comprehensive integration tests for RE2. - */ +/** Comprehensive integration tests for RE2. */ class RE2IT { - // ===== Basic Matching Tests ===== - - @Test - void testSimpleExactMatch() { - assertThat(RE2.matches("hello", "hello")).isTrue(); - assertThat(RE2.matches("hello", "world")).isFalse(); + // ===== Basic Matching Tests ===== + + @Test + void testSimpleExactMatch() { + assertThat(RE2.matches("hello", "hello")).isTrue(); + assertThat(RE2.matches("hello", "world")).isFalse(); + } + + @Test + void testEmptyPattern() { + // Empty patterns are rejected by RE2 wrapper + assertThatThrownBy(() -> RE2.matches("", "")).isInstanceOf(PatternCompilationException.class); + } + + @Test + void testEmptyInput() { + try (Pattern p = RE2.compile(".*")) { + assertThat(p.matches("")).isTrue(); + } + } + + @ParameterizedTest + @CsvSource({ + "hello, hello, true", + "hello, HELLO, false", + "hello, hello world, false", // Full match requires entire string + "^hello, hello world, false", // Full match + "world$, hello world, false" // Full match + }) + void testFullMatchBehavior(String pattern, String input, boolean shouldMatch) { + try (Pattern p = RE2.compile(pattern)) { + assertThat(p.matches(input)).isEqualTo(shouldMatch); + } + } + + @ParameterizedTest + @CsvSource({ + "hello, hello world, true", + "world, hello world, true", + "goodbye, hello world, false", + "^hello, hello world, true", + "world$, hello world, true" + }) + void testPartialMatchBehavior(String pattern, String input, boolean shouldMatch) { + try (Pattern p = RE2.compile(pattern)) { + try (Matcher m = p.matcher(input)) { + assertThat(m.find()).isEqualTo(shouldMatch); + } + } + } + + // ===== Case Sensitivity Tests ===== + + @Test + void testCaseSensitiveMatching() { + try (Pattern p = RE2.compile("HELLO", true)) { + assertThat(p.matches("HELLO")).isTrue(); + assertThat(p.matches("hello")).isFalse(); + assertThat(p.matches("HeLLo")).isFalse(); + } + } + + @Test + void testCaseInsensitiveMatching() { + try (Pattern p = RE2.compile("HELLO", false)) { + assertThat(p.matches("HELLO")).isTrue(); + assertThat(p.matches("hello")).isTrue(); + assertThat(p.matches("HeLLo")).isTrue(); + assertThat(p.matches("hElLo")).isTrue(); + } + } + + // ===== Regex Feature Tests ===== + + @ParameterizedTest + @CsvSource({ + "\\d+, 123, true", + "\\d+, abc, false", + "\\w+, hello123, true", + "\\w+, !, false", + "\\s+, ' ', true", + "\\s+, text, false", + "[a-z]+, abc, true", + "[a-z]+, ABC, false", + "[0-9]{3}, 123, true", + "[0-9]{3}, 12, false" + }) + void testCharacterClasses(String pattern, String input, boolean shouldMatch) { + try (Pattern p = RE2.compile(pattern)) { + assertThat(p.matches(input)).isEqualTo(shouldMatch); + } + } + + @Test + void testRepetitionZeroOrMore() { + try (Pattern p = RE2.compile("a*")) { + assertThat(p.matches("")).isTrue(); + assertThat(p.matches("a")).isTrue(); + assertThat(p.matches("aaa")).isTrue(); + } + } + + @Test + void testRepetitionOneOrMore() { + try (Pattern p = RE2.compile("a+")) { + assertThat(p.matches("")).isFalse(); + assertThat(p.matches("a")).isTrue(); + assertThat(p.matches("aaa")).isTrue(); + } + } + + @Test + void testRepetitionOptional() { + try (Pattern p = RE2.compile("a?")) { + assertThat(p.matches("")).isTrue(); + assertThat(p.matches("a")).isTrue(); + assertThat(p.matches("aa")).isFalse(); + } + } + + @Test + void testRepetitionExactCount() { + try (Pattern p = RE2.compile("a{2}")) { + assertThat(p.matches("aa")).isTrue(); + assertThat(p.matches("a")).isFalse(); + assertThat(p.matches("aaa")).isFalse(); + } + } + + @Test + void testRepetitionRange() { + try (Pattern p = RE2.compile("a{2,4}")) { + assertThat(p.matches("a")).isFalse(); + assertThat(p.matches("aa")).isTrue(); + assertThat(p.matches("aaa")).isTrue(); + assertThat(p.matches("aaaa")).isTrue(); + assertThat(p.matches("aaaaa")).isFalse(); } + } - @Test - void testEmptyPattern() { - // Empty patterns are rejected by RE2 wrapper - assertThatThrownBy(() -> RE2.matches("", "")) - .isInstanceOf(PatternCompilationException.class); + @ParameterizedTest + @ValueSource(strings = {"abc|def", "(hello|world)", "cat|dog|bird", "\\d+|\\w+"}) + void testAlternation(String pattern) { + try (Pattern p = RE2.compile(pattern)) { + assertThat(p).isNotNull(); } + } - @Test - void testEmptyInput() { - try (Pattern p = RE2.compile(".*")) { - assertThat(p.matches("")).isTrue(); - } + @Test + void testDotMetacharacter() { + try (Pattern p = RE2.compile("a.c")) { + assertThat(p.matches("abc")).isTrue(); + assertThat(p.matches("axc")).isTrue(); + assertThat(p.matches("ac")).isFalse(); } + } - @ParameterizedTest - @CsvSource({ - "hello, hello, true", - "hello, HELLO, false", - "hello, hello world, false", // Full match requires entire string - "^hello, hello world, false", // Full match - "world$, hello world, false" // Full match - }) - void testFullMatchBehavior(String pattern, String input, boolean shouldMatch) { - try (Pattern p = RE2.compile(pattern)) { - assertThat(p.matches(input)).isEqualTo(shouldMatch); - } + @Test + void testAnchors() { + try (Pattern start = RE2.compile("^hello")) { + try (Matcher m = start.matcher("hello world")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = start.matcher("say hello")) { + assertThat(m.find()).isFalse(); + } } - @ParameterizedTest - @CsvSource({ - "hello, hello world, true", - "world, hello world, true", - "goodbye, hello world, false", - "^hello, hello world, true", - "world$, hello world, true" - }) - void testPartialMatchBehavior(String pattern, String input, boolean shouldMatch) { - try (Pattern p = RE2.compile(pattern)) { - try (Matcher m = p.matcher(input)) { - assertThat(m.find()).isEqualTo(shouldMatch); - } - } + try (Pattern end = RE2.compile("world$")) { + try (Matcher m = end.matcher("hello world")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = end.matcher("world hello")) { + assertThat(m.find()).isFalse(); + } } + } - // ===== Case Sensitivity Tests ===== + // ===== UTF-8 and Special Characters ===== - @Test - void testCaseSensitiveMatching() { - try (Pattern p = RE2.compile("HELLO", true)) { - assertThat(p.matches("HELLO")).isTrue(); - assertThat(p.matches("hello")).isFalse(); - assertThat(p.matches("HeLLo")).isFalse(); - } + @Test + void testUTF8Characters() { + try (Pattern p = RE2.compile("hello")) { + assertThat(p.matches("hello")).isTrue(); } - @Test - void testCaseInsensitiveMatching() { - try (Pattern p = RE2.compile("HELLO", false)) { - assertThat(p.matches("HELLO")).isTrue(); - assertThat(p.matches("hello")).isTrue(); - assertThat(p.matches("HeLLo")).isTrue(); - assertThat(p.matches("hElLo")).isTrue(); - } + try (Pattern p = RE2.compile("café")) { + assertThat(p.matches("café")).isTrue(); } - // ===== Regex Feature Tests ===== - - @ParameterizedTest - @CsvSource({ - "\\d+, 123, true", - "\\d+, abc, false", - "\\w+, hello123, true", - "\\w+, !, false", - "\\s+, ' ', true", - "\\s+, text, false", - "[a-z]+, abc, true", - "[a-z]+, ABC, false", - "[0-9]{3}, 123, true", - "[0-9]{3}, 12, false" - }) - void testCharacterClasses(String pattern, String input, boolean shouldMatch) { - try (Pattern p = RE2.compile(pattern)) { - assertThat(p.matches(input)).isEqualTo(shouldMatch); - } + try (Pattern p = RE2.compile("日本語")) { + assertThat(p.matches("日本語")).isTrue(); } - @Test - void testRepetitionZeroOrMore() { - try (Pattern p = RE2.compile("a*")) { - assertThat(p.matches("")).isTrue(); - assertThat(p.matches("a")).isTrue(); - assertThat(p.matches("aaa")).isTrue(); - } + try (Pattern p = RE2.compile("emoji😀test")) { + assertThat(p.matches("emoji😀test")).isTrue(); } + } - @Test - void testRepetitionOneOrMore() { - try (Pattern p = RE2.compile("a+")) { - assertThat(p.matches("")).isFalse(); - assertThat(p.matches("a")).isTrue(); - assertThat(p.matches("aaa")).isTrue(); - } + @Test + void testSpecialRegexCharacters() { + try (Pattern p = RE2.compile("\\(\\)\\[\\]\\{\\}")) { + assertThat(p.matches("()[]{}")).isTrue(); } - @Test - void testRepetitionOptional() { - try (Pattern p = RE2.compile("a?")) { - assertThat(p.matches("")).isTrue(); - assertThat(p.matches("a")).isTrue(); - assertThat(p.matches("aa")).isFalse(); - } + try (Pattern p = RE2.compile("\\.\\*\\+\\?")) { + assertThat(p.matches(".*+?")).isTrue(); } + } - @Test - void testRepetitionExactCount() { - try (Pattern p = RE2.compile("a{2}")) { - assertThat(p.matches("aa")).isTrue(); - assertThat(p.matches("a")).isFalse(); - assertThat(p.matches("aaa")).isFalse(); - } - } + // ===== Comprehensive Unicode Script Tests ===== - @Test - void testRepetitionRange() { - try (Pattern p = RE2.compile("a{2,4}")) { - assertThat(p.matches("a")).isFalse(); - assertThat(p.matches("aa")).isTrue(); - assertThat(p.matches("aaa")).isTrue(); - assertThat(p.matches("aaaa")).isTrue(); - assertThat(p.matches("aaaaa")).isFalse(); - } + @Test + void testChineseCharacters() { + // Simplified Chinese + try (Pattern p = RE2.compile("中文测试")) { + assertThat(p.matches("中文测试")).isTrue(); + assertThat(p.matches("中文")).isFalse(); } - @ParameterizedTest - @ValueSource(strings = { - "abc|def", - "(hello|world)", - "cat|dog|bird", - "\\d+|\\w+" - }) - void testAlternation(String pattern) { - try (Pattern p = RE2.compile(pattern)) { - assertThat(p).isNotNull(); - } + // Traditional Chinese + try (Pattern p = RE2.compile("繁體中文")) { + assertThat(p.matches("繁體中文")).isTrue(); } - @Test - void testDotMetacharacter() { - try (Pattern p = RE2.compile("a.c")) { - assertThat(p.matches("abc")).isTrue(); - assertThat(p.matches("axc")).isTrue(); - assertThat(p.matches("ac")).isFalse(); - } + // Mixed Chinese and ASCII + try (Pattern p = RE2.compile("测试123")) { + assertThat(p.matches("测试123")).isTrue(); } - @Test - void testAnchors() { - try (Pattern start = RE2.compile("^hello")) { - try (Matcher m = start.matcher("hello world")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = start.matcher("say hello")) { - assertThat(m.find()).isFalse(); - } - } - - try (Pattern end = RE2.compile("world$")) { - try (Matcher m = end.matcher("hello world")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = end.matcher("world hello")) { - assertThat(m.find()).isFalse(); - } - } + // Chinese in pattern with wildcards + try (Pattern p = RE2.compile(".*中文.*")) { + try (Matcher m = p.matcher("这是中文文本")) { + assertThat(m.find()).isTrue(); + } } - // ===== UTF-8 and Special Characters ===== - - @Test - void testUTF8Characters() { - try (Pattern p = RE2.compile("hello")) { - assertThat(p.matches("hello")).isTrue(); - } - - try (Pattern p = RE2.compile("café")) { - assertThat(p.matches("café")).isTrue(); - } - - try (Pattern p = RE2.compile("日本語")) { - assertThat(p.matches("日本語")).isTrue(); - } - - try (Pattern p = RE2.compile("emoji😀test")) { - assertThat(p.matches("emoji😀test")).isTrue(); - } + // Chinese character class (Unicode range) + try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}]+")) { + assertThat(p.matches("汉字")).isTrue(); + assertThat(p.matches("abc")).isFalse(); } + } - @Test - void testSpecialRegexCharacters() { - try (Pattern p = RE2.compile("\\(\\)\\[\\]\\{\\}")) { - assertThat(p.matches("()[]{}")).isTrue(); - } - - try (Pattern p = RE2.compile("\\.\\*\\+\\?")) { - assertThat(p.matches(".*+?")).isTrue(); - } + @Test + void testArabicCharacters() { + // Basic Arabic text + try (Pattern p = RE2.compile("مرحبا")) { + assertThat(p.matches("مرحبا")).isTrue(); } - // ===== Comprehensive Unicode Script Tests ===== - - @Test - void testChineseCharacters() { - // Simplified Chinese - try (Pattern p = RE2.compile("中文测试")) { - assertThat(p.matches("中文测试")).isTrue(); - assertThat(p.matches("中文")).isFalse(); - } - - // Traditional Chinese - try (Pattern p = RE2.compile("繁體中文")) { - assertThat(p.matches("繁體中文")).isTrue(); - } - - // Mixed Chinese and ASCII - try (Pattern p = RE2.compile("测试123")) { - assertThat(p.matches("测试123")).isTrue(); - } - - // Chinese in pattern with wildcards - try (Pattern p = RE2.compile(".*中文.*")) { - try (Matcher m = p.matcher("这是中文文本")) { - assertThat(m.find()).isTrue(); - } - } - - // Chinese character class (Unicode range) - try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}]+")) { - assertThat(p.matches("汉字")).isTrue(); - assertThat(p.matches("abc")).isFalse(); - } + // Arabic with diacritics + try (Pattern p = RE2.compile("العربية")) { + assertThat(p.matches("العربية")).isTrue(); } - @Test - void testArabicCharacters() { - // Basic Arabic text - try (Pattern p = RE2.compile("مرحبا")) { - assertThat(p.matches("مرحبا")).isTrue(); - } - - // Arabic with diacritics - try (Pattern p = RE2.compile("العربية")) { - assertThat(p.matches("العربية")).isTrue(); - } - - // Mixed Arabic and numbers - try (Pattern p = RE2.compile("رقم \\d+")) { - try (Matcher m = p.matcher("رقم 123")) { - assertThat(m.find()).isTrue(); - } - } - - // Arabic numerals (Eastern Arabic) - try (Pattern p = RE2.compile("١٢٣")) { - assertThat(p.matches("١٢٣")).isTrue(); - } + // Mixed Arabic and numbers + try (Pattern p = RE2.compile("رقم \\d+")) { + try (Matcher m = p.matcher("رقم 123")) { + assertThat(m.find()).isTrue(); + } } - @Test - void testHebrewCharacters() { - // Basic Hebrew - try (Pattern p = RE2.compile("שלום")) { - assertThat(p.matches("שלום")).isTrue(); - } - - // Hebrew with vowel points (nikkud) - try (Pattern p = RE2.compile("עברית")) { - assertThat(p.matches("עברית")).isTrue(); - } - - // Mixed Hebrew and ASCII - try (Pattern p = RE2.compile("test שלום test")) { - assertThat(p.matches("test שלום test")).isTrue(); - } + // Arabic numerals (Eastern Arabic) + try (Pattern p = RE2.compile("١٢٣")) { + assertThat(p.matches("١٢٣")).isTrue(); } + } - @Test - void testGreekCharacters() { - // Basic Greek - try (Pattern p = RE2.compile("Ελληνικά")) { - assertThat(p.matches("Ελληνικά")).isTrue(); - } - - // Greek letters commonly used in math/science - try (Pattern p = RE2.compile("αβγδ")) { - assertThat(p.matches("αβγδ")).isTrue(); - } - - // Greek uppercase - try (Pattern p = RE2.compile("ΑΒΓΔ")) { - assertThat(p.matches("ΑΒΓΔ")).isTrue(); - } - - // Mixed Greek and math symbols - try (Pattern p = RE2.compile("π = 3\\.14")) { - assertThat(p.matches("π = 3.14")).isTrue(); - } + @Test + void testHebrewCharacters() { + // Basic Hebrew + try (Pattern p = RE2.compile("שלום")) { + assertThat(p.matches("שלום")).isTrue(); } - @Test - void testCyrillicCharacters() { - // Russian - try (Pattern p = RE2.compile("Привет")) { - assertThat(p.matches("Привет")).isTrue(); - } - - // Ukrainian - try (Pattern p = RE2.compile("Слава Україні")) { - assertThat(p.matches("Слава Україні")).isTrue(); - } - - // Mixed Cyrillic and Latin (common in technical docs) - try (Pattern p = RE2.compile("error: Ошибка")) { - assertThat(p.matches("error: Ошибка")).isTrue(); - } + // Hebrew with vowel points (nikkud) + try (Pattern p = RE2.compile("עברית")) { + assertThat(p.matches("עברית")).isTrue(); } - @Test - void testKoreanCharacters() { - // Hangul - try (Pattern p = RE2.compile("안녕하세요")) { - assertThat(p.matches("안녕하세요")).isTrue(); - } - - // Mixed Korean and ASCII - try (Pattern p = RE2.compile("Hello 세계")) { - assertThat(p.matches("Hello 세계")).isTrue(); - } - - // Korean with numbers - try (Pattern p = RE2.compile("테스트\\d+")) { - assertThat(p.matches("테스트123")).isTrue(); - } + // Mixed Hebrew and ASCII + try (Pattern p = RE2.compile("test שלום test")) { + assertThat(p.matches("test שלום test")).isTrue(); } + } - @Test - void testThaiCharacters() { - // Thai script - try (Pattern p = RE2.compile("สวัสดี")) { - assertThat(p.matches("สวัสดี")).isTrue(); - } - - // Thai with tone marks - try (Pattern p = RE2.compile("ภาษาไทย")) { - assertThat(p.matches("ภาษาไทย")).isTrue(); - } + @Test + void testGreekCharacters() { + // Basic Greek + try (Pattern p = RE2.compile("Ελληνικά")) { + assertThat(p.matches("Ελληνικά")).isTrue(); } - @Test - void testDevanagariCharacters() { - // Hindi - try (Pattern p = RE2.compile("नमस्ते")) { - assertThat(p.matches("नमस्ते")).isTrue(); - } - - // Sanskrit - try (Pattern p = RE2.compile("संस्कृत")) { - assertThat(p.matches("संस्कृत")).isTrue(); - } + // Greek letters commonly used in math/science + try (Pattern p = RE2.compile("αβγδ")) { + assertThat(p.matches("αβγδ")).isTrue(); } - @Test - void testMixedScripts() { - // Multiple scripts in one pattern - try (Pattern p = RE2.compile("Hello 世界 مرحبا שלום")) { - assertThat(p.matches("Hello 世界 مرحبا שלום")).isTrue(); - } - - // Technical text with multiple scripts - try (Pattern p = RE2.compile("Error: 错误 - Ошибка")) { - assertThat(p.matches("Error: 错误 - Ошибка")).isTrue(); - } - - // Product names mixing scripts - try (Pattern p = RE2.compile("Sony ソニー")) { - assertThat(p.matches("Sony ソニー")).isTrue(); - } + // Greek uppercase + try (Pattern p = RE2.compile("ΑΒΓΔ")) { + assertThat(p.matches("ΑΒΓΔ")).isTrue(); } - @Test - void testUnicodeEmoji() { - // Basic emoji - try (Pattern p = RE2.compile("😀😁😂")) { - assertThat(p.matches("😀😁😂")).isTrue(); - } - - // Emoji with text - try (Pattern p = RE2.compile("Hello 👋 World 🌍")) { - assertThat(p.matches("Hello 👋 World 🌍")).isTrue(); - } - - // Search for emoji in text - try (Pattern p = RE2.compile("👍")) { - try (Matcher m = p.matcher("Great job! 👍 Keep going!")) { - assertThat(m.find()).isTrue(); - } - } - - // Emoji sequences (family, flags, etc.) - try (Pattern p = RE2.compile("🇺🇸")) { - assertThat(p.matches("🇺🇸")).isTrue(); - } + // Mixed Greek and math symbols + try (Pattern p = RE2.compile("π = 3\\.14")) { + assertThat(p.matches("π = 3.14")).isTrue(); } + } - @Test - void testSpecialUnicodeSymbols() { - // Currency symbols - try (Pattern p = RE2.compile("€\\d+\\.\\d{2}")) { - assertThat(p.matches("€19.99")).isTrue(); - } - - try (Pattern p = RE2.compile("£\\d+")) { - assertThat(p.matches("£100")).isTrue(); - } - - try (Pattern p = RE2.compile("¥\\d+")) { - assertThat(p.matches("¥1000")).isTrue(); - } - - // Math symbols - try (Pattern p = RE2.compile("∑.*=.*∞")) { - try (Matcher m = p.matcher("∑x = ∞")) { - assertThat(m.find()).isTrue(); - } - } - - // Copyright and trademark - try (Pattern p = RE2.compile("©.*®.*™")) { - try (Matcher m = p.matcher("© 2025 Company® Product™")) { - assertThat(m.find()).isTrue(); - } - } + @Test + void testCyrillicCharacters() { + // Russian + try (Pattern p = RE2.compile("Привет")) { + assertThat(p.matches("Привет")).isTrue(); } - @Test - void testUnicodeCaseInsensitive() { - // German with umlauts (ä, ö, ü have proper case folding) - try (Pattern p = RE2.compile("münchen", false)) { - assertThat(p.matches("münchen")).isTrue(); - assertThat(p.matches("MÜNCHEN")).isTrue(); - } - - // Greek case insensitive - try (Pattern p = RE2.compile("ελληνικά", false)) { - assertThat(p.matches("ελληνικά")).isTrue(); - assertThat(p.matches("ΕΛΛΗΝΙΚΆ")).isTrue(); - } - - // Cyrillic case insensitive - try (Pattern p = RE2.compile("привет", false)) { - assertThat(p.matches("привет")).isTrue(); - assertThat(p.matches("ПРИВЕТ")).isTrue(); - } - - // Note: German ß does NOT fold to SS in RE2 (unlike Java) - // This is intentional RE2 behavior for correctness - try (Pattern p = RE2.compile("straße", false)) { - assertThat(p.matches("straße")).isTrue(); - // STRASSE would NOT match - ß ≠ SS in RE2 - } + // Ukrainian + try (Pattern p = RE2.compile("Слава Україні")) { + assertThat(p.matches("Слава Україні")).isTrue(); } - @Test - void testUnicodeInLogProcessing() { - // Simulating logs with international user data - String logEntry = "2025-11-17 [INFO] User 田中太郎 (tanaka@example.jp) logged in from 東京"; - - // Find Japanese name - try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}\\x{3040}-\\x{309f}\\x{30a0}-\\x{30ff}]+")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } - - // Find email - try (Pattern p = RE2.compile("\\w+@[\\w.]+")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } + // Mixed Cyrillic and Latin (common in technical docs) + try (Pattern p = RE2.compile("error: Ошибка")) { + assertThat(p.matches("error: Ошибка")).isTrue(); } + } - @Test - void testUnicodeNormalization() { - // Precomposed vs decomposed (NFC vs NFD) - // é can be U+00E9 (precomposed) or U+0065 U+0301 (decomposed) - String precomposed = "café"; // Using precomposed é - - try (Pattern p = RE2.compile("café")) { - assertThat(p.matches(precomposed)).isTrue(); - } + @Test + void testKoreanCharacters() { + // Hangul + try (Pattern p = RE2.compile("안녕하세요")) { + assertThat(p.matches("안녕하세요")).isTrue(); } - @Test - void testRTLScriptMixing() { - // Right-to-left text mixed with left-to-right - String mixed = "The word שלום means peace"; - - try (Pattern p = RE2.compile("שלום")) { - try (Matcher m = p.matcher(mixed)) { - assertThat(m.find()).isTrue(); - } - } - - // Arabic RTL - String arabicMixed = "Welcome مرحبا to our site"; - try (Pattern p = RE2.compile("مرحبا")) { - try (Matcher m = p.matcher(arabicMixed)) { - assertThat(m.find()).isTrue(); - } - } + // Mixed Korean and ASCII + try (Pattern p = RE2.compile("Hello 세계")) { + assertThat(p.matches("Hello 세계")).isTrue(); } - @Test - void testUnicodeWordBoundaries() { - // Word boundaries with CJK (no spaces between words) - try (Pattern p = RE2.compile("日本")) { - try (Matcher m = p.matcher("私は日本語を勉強しています")) { - assertThat(m.find()).isTrue(); - } - } - - // Word boundaries with Arabic - try (Pattern p = RE2.compile("العربية")) { - try (Matcher m = p.matcher("أنا أتعلم اللغة العربية")) { - assertThat(m.find()).isTrue(); - } - } + // Korean with numbers + try (Pattern p = RE2.compile("테스트\\d+")) { + assertThat(p.matches("테스트123")).isTrue(); } + } - @Test - void testVeryLongUnicodeText() { - // Large text with mixed scripts - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < 1000; i++) { - sb.append("Hello 世界 مرحبا שלום Привет 안녕 "); - } - String largeText = sb.toString(); - - // Search in large mixed-script text - try (Pattern p = RE2.compile("Привет")) { - try (Matcher m = p.matcher(largeText)) { - long start = System.currentTimeMillis(); - assertThat(m.find()).isTrue(); - long duration = System.currentTimeMillis() - start; - assertThat(duration).isLessThan(100); // Should be fast - } - } + @Test + void testThaiCharacters() { + // Thai script + try (Pattern p = RE2.compile("สวัสดี")) { + assertThat(p.matches("สวัสดี")).isTrue(); } - // ===== Email and URL Pattern Tests ===== - - @Test - void testEmailPattern() { - String emailPattern = "\\w+@\\w+\\.\\w+"; - try (Pattern p = RE2.compile(emailPattern)) { - assertThat(p.matches("user@example.com")).isTrue(); - assertThat(p.matches("invalid.email")).isFalse(); - assertThat(p.matches("@example.com")).isFalse(); - } + // Thai with tone marks + try (Pattern p = RE2.compile("ภาษาไทย")) { + assertThat(p.matches("ภาษาไทย")).isTrue(); } + } - @Test - void testURLPattern() { - String urlPattern = "https?://[\\w.]+(/.*)?"; - try (Pattern p = RE2.compile(urlPattern)) { - try (Matcher m = p.matcher("https://example.com/path")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = p.matcher("http://test.org")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = p.matcher("ftp://example.com")) { - assertThat(m.find()).isFalse(); - } - } + @Test + void testDevanagariCharacters() { + // Hindi + try (Pattern p = RE2.compile("नमस्ते")) { + assertThat(p.matches("नमस्ते")).isTrue(); } - // ===== Error Handling Tests ===== - - @Test - void testNullPatternThrows() { - assertThatThrownBy(() -> RE2.compile(null)) - .isInstanceOf(NullPointerException.class); + // Sanskrit + try (Pattern p = RE2.compile("संस्कृत")) { + assertThat(p.matches("संस्कृत")).isTrue(); } + } - @Test - void testNullInputThrows() { - try (Pattern p = RE2.compile("test")) { - assertThatThrownBy(() -> p.matcher(null)) - .isInstanceOf(NullPointerException.class); - } + @Test + void testMixedScripts() { + // Multiple scripts in one pattern + try (Pattern p = RE2.compile("Hello 世界 مرحبا שלום")) { + assertThat(p.matches("Hello 世界 مرحبا שלום")).isTrue(); } - @ParameterizedTest - @ValueSource(strings = { - "(unclosed", - "(?Pempty)", - "[[invalid" - }) - void testInvalidPatternThrows(String invalidPattern) { - assertThatThrownBy(() -> RE2.compile(invalidPattern)) - .isInstanceOf(PatternCompilationException.class) - .hasMessageContaining("compilation failed"); - } - - @Test - void testPatternCompilationExceptionContainsPattern() { - try { - RE2.compile("(unclosed"); - fail("Should have thrown PatternCompilationException"); - } catch (PatternCompilationException e) { - assertThat(e.getPattern()).isEqualTo("(unclosed"); - assertThat(e.getMessage()).contains("unclosed"); - } + // Technical text with multiple scripts + try (Pattern p = RE2.compile("Error: 错误 - Ошибка")) { + assertThat(p.matches("Error: 错误 - Ошибка")).isTrue(); } - // ===== Resource Management Tests ===== - - @Test - void testPatternClose() { - // Use compileWithoutCache() to test actual closing - Pattern p = Pattern.compileWithoutCache("test", true); - assertThat(p.isClosed()).isFalse(); - - p.close(); - assertThat(p.isClosed()).isTrue(); + // Product names mixing scripts + try (Pattern p = RE2.compile("Sony ソニー")) { + assertThat(p.matches("Sony ソニー")).isTrue(); } + } - @Test - void testUseAfterClose() { - // Use compileWithoutCache() to test actual closing - Pattern p = Pattern.compileWithoutCache("test", true); - p.close(); - - assertThatThrownBy(() -> p.matcher("input")) - .isInstanceOf(IllegalStateException.class) - .hasMessageContaining("closed"); + @Test + void testUnicodeEmoji() { + // Basic emoji + try (Pattern p = RE2.compile("😀😁😂")) { + assertThat(p.matches("😀😁😂")).isTrue(); } - @Test - void testDoubleClose() { - // Use compileWithoutCache() to test actual closing - Pattern p = Pattern.compileWithoutCache("test", true); - p.close(); - - // Second close should be idempotent (not throw) - assertThatCode(p::close).doesNotThrowAnyException(); + // Emoji with text + try (Pattern p = RE2.compile("Hello 👋 World 🌍")) { + assertThat(p.matches("Hello 👋 World 🌍")).isTrue(); } - @Test - void testTryWithResources() { - // Verify AutoCloseable works correctly with uncached patterns - Pattern[] holder = new Pattern[1]; - - try (Pattern p = Pattern.compileWithoutCache("test", true)) { - holder[0] = p; - assertThat(p.isClosed()).isFalse(); - } - - assertThat(holder[0].isClosed()).isTrue(); + // Search for emoji in text + try (Pattern p = RE2.compile("👍")) { + try (Matcher m = p.matcher("Great job! 👍 Keep going!")) { + assertThat(m.find()).isTrue(); + } } - @Test - void testCachedPatternNotClosedOnClose() { - // Cached patterns should NOT actually close when close() is called - Pattern p = RE2.compile("test"); - assertThat(p.isClosed()).isFalse(); - - p.close(); // This should be a no-op for cached patterns - - // Pattern should still not be closed (cache manages it) - assertThat(p.isClosed()).isFalse(); + // Emoji sequences (family, flags, etc.) + try (Pattern p = RE2.compile("🇺🇸")) { + assertThat(p.matches("🇺🇸")).isTrue(); } + } - @Test - void testNestedTryWithResources() { - try (Pattern p = RE2.compile("test")) { - try (Matcher m = p.matcher("test")) { - assertThat(m.matches()).isTrue(); - } - } + @Test + void testSpecialUnicodeSymbols() { + // Currency symbols + try (Pattern p = RE2.compile("€\\d+\\.\\d{2}")) { + assertThat(p.matches("€19.99")).isTrue(); } - @Test - void testMultiplePatternsIndependent() { - try (Pattern p1 = RE2.compile("pattern1"); - Pattern p2 = RE2.compile("pattern2"); - Pattern p3 = RE2.compile("pattern3")) { - - assertThat(p1.matches("pattern1")).isTrue(); - assertThat(p2.matches("pattern2")).isTrue(); - assertThat(p3.matches("pattern3")).isTrue(); - - assertThat(p1.matches("pattern2")).isFalse(); - } + try (Pattern p = RE2.compile("£\\d+")) { + assertThat(p.matches("£100")).isTrue(); } - // ===== Complex Pattern Tests ===== - - @Test - void testIPv4Pattern() { - String ipPattern = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"; - try (Pattern p = RE2.compile(ipPattern)) { - assertThat(p.matches("192.168.1.1")).isTrue(); - assertThat(p.matches("10.0.0.1")).isTrue(); - assertThat(p.matches("999.999.999.999")).isTrue(); // Matches pattern, not valid IP - assertThat(p.matches("192.168.1")).isFalse(); - } + try (Pattern p = RE2.compile("¥\\d+")) { + assertThat(p.matches("¥1000")).isTrue(); } - @Test - void testDatePattern() { - String datePattern = "\\d{4}-\\d{2}-\\d{2}"; - try (Pattern p = RE2.compile(datePattern)) { - assertThat(p.matches("2025-11-17")).isTrue(); - assertThat(p.matches("2025-1-17")).isFalse(); - assertThat(p.matches("25-11-17")).isFalse(); - } + // Math symbols + try (Pattern p = RE2.compile("∑.*=.*∞")) { + try (Matcher m = p.matcher("∑x = ∞")) { + assertThat(m.find()).isTrue(); + } } - @Test - void testPhoneNumberPattern() { - String phonePattern = "\\(?\\d{3}\\)?[- ]?\\d{3}[- ]?\\d{4}"; - try (Pattern p = RE2.compile(phonePattern)) { - try (Matcher m = p.matcher("(555) 123-4567")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = p.matcher("555-123-4567")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = p.matcher("5551234567")) { - assertThat(m.find()).isTrue(); - } - } + // Copyright and trademark + try (Pattern p = RE2.compile("©.*®.*™")) { + try (Matcher m = p.matcher("© 2025 Company® Product™")) { + assertThat(m.find()).isTrue(); + } } + } - @Test - void testComplexAlternation() { - try (Pattern p = RE2.compile("(cat|dog|bird|fish)")) { - assertThat(p.matches("cat")).isTrue(); - assertThat(p.matches("dog")).isTrue(); - assertThat(p.matches("fish")).isTrue(); - assertThat(p.matches("cow")).isFalse(); - } + @Test + void testUnicodeCaseInsensitive() { + // German with umlauts (ä, ö, ü have proper case folding) + try (Pattern p = RE2.compile("münchen", false)) { + assertThat(p.matches("münchen")).isTrue(); + assertThat(p.matches("MÜNCHEN")).isTrue(); } - @Test - void testNestedGroups() { - try (Pattern p = RE2.compile("((a|b)(c|d))")) { - assertThat(p.matches("ac")).isTrue(); - assertThat(p.matches("ad")).isTrue(); - assertThat(p.matches("bc")).isTrue(); - assertThat(p.matches("bd")).isTrue(); - assertThat(p.matches("ab")).isFalse(); - } + // Greek case insensitive + try (Pattern p = RE2.compile("ελληνικά", false)) { + assertThat(p.matches("ελληνικά")).isTrue(); + assertThat(p.matches("ΕΛΛΗΝΙΚΆ")).isTrue(); } - // ===== Edge Cases ===== - - @Test - void testVeryLongPattern() { - String longPattern = "a".repeat(1000); - try (Pattern p = RE2.compile(longPattern)) { - assertThat(p.matches(longPattern)).isTrue(); - assertThat(p.matches("a".repeat(999))).isFalse(); - } + // Cyrillic case insensitive + try (Pattern p = RE2.compile("привет", false)) { + assertThat(p.matches("привет")).isTrue(); + assertThat(p.matches("ПРИВЕТ")).isTrue(); } - @Test - void testVeryLongInput() { - String longInput = "x".repeat(10000) + "needle" + "y".repeat(10000); - try (Pattern p = RE2.compile("needle")) { - try (Matcher m = p.matcher(longInput)) { - assertThat(m.find()).isTrue(); - } - } + // Note: German ß does NOT fold to SS in RE2 (unlike Java) + // This is intentional RE2 behavior for correctness + try (Pattern p = RE2.compile("straße", false)) { + assertThat(p.matches("straße")).isTrue(); + // STRASSE would NOT match - ß ≠ SS in RE2 } + } - @Test - void testPatternWithManyAlternatives() { - StringBuilder pattern = new StringBuilder(); - for (int i = 0; i < 100; i++) { - if (i > 0) pattern.append("|"); - pattern.append("word").append(i); - } + @Test + void testUnicodeInLogProcessing() { + // Simulating logs with international user data + String logEntry = "2025-11-17 [INFO] User 田中太郎 (tanaka@example.jp) logged in from 東京"; - try (Pattern p = RE2.compile(pattern.toString())) { - assertThat(p.matches("word50")).isTrue(); - assertThat(p.matches("word99")).isTrue(); - assertThat(p.matches("word100")).isFalse(); - } + // Find Japanese name + try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}\\x{3040}-\\x{309f}\\x{30a0}-\\x{30ff}]+")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } } - @Test - void testDeepNesting() { - String pattern = "((((((((((a))))))))))"; - try (Pattern p = RE2.compile(pattern)) { - assertThat(p.matches("a")).isTrue(); - } + // Find email + try (Pattern p = RE2.compile("\\w+@[\\w.]+")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } } + } - // ===== Whitespace and Special Input Tests ===== + @Test + void testUnicodeNormalization() { + // Precomposed vs decomposed (NFC vs NFD) + // é can be U+00E9 (precomposed) or U+0065 U+0301 (decomposed) + String precomposed = "café"; // Using precomposed é - @Test - void testWhitespaceMatching() { - try (Pattern p = RE2.compile("\\s+")) { - assertThat(p.matches(" ")).isTrue(); - assertThat(p.matches("\t\n")).isTrue(); - assertThat(p.matches("text")).isFalse(); - } + try (Pattern p = RE2.compile("café")) { + assertThat(p.matches(precomposed)).isTrue(); } + } - @Test - void testNewlinesInInput() { - try (Pattern p = RE2.compile("hello")) { - try (Matcher m = p.matcher("hello\nworld")) { - assertThat(m.find()).isTrue(); - } - } - } + @Test + void testRTLScriptMixing() { + // Right-to-left text mixed with left-to-right + String mixed = "The word שלום means peace"; - @Test - void testTabsInInput() { - try (Pattern p = RE2.compile("hello\tworld")) { - assertThat(p.matches("hello\tworld")).isTrue(); - } + try (Pattern p = RE2.compile("שלום")) { + try (Matcher m = p.matcher(mixed)) { + assertThat(m.find()).isTrue(); + } } - // ===== Resource Leak Tests ===== - - @Test - void testManyPatternsNoLeak() { - // Compile and close many patterns (tests resource cleanup) - for (int i = 0; i < 1000; i++) { - try (Pattern p = RE2.compile("pattern" + i)) { - assertThat(p).isNotNull(); - } - } + // Arabic RTL + String arabicMixed = "Welcome مرحبا to our site"; + try (Pattern p = RE2.compile("مرحبا")) { + try (Matcher m = p.matcher(arabicMixed)) { + assertThat(m.find()).isTrue(); + } } + } - @Test - void testManyMatchersNoLeak() { - try (Pattern p = RE2.compile("test")) { - for (int i = 0; i < 1000; i++) { - try (Matcher m = p.matcher("test" + i)) { - m.matches(); - } - } - } - } - - // ===== Concurrent Access Tests ===== + @Test + void testUnicodeWordBoundaries() { + // Word boundaries with CJK (no spaces between words) + try (Pattern p = RE2.compile("日本")) { + try (Matcher m = p.matcher("私は日本語を勉強しています")) { + assertThat(m.find()).isTrue(); + } + } + + // Word boundaries with Arabic + try (Pattern p = RE2.compile("العربية")) { + try (Matcher m = p.matcher("أنا أتعلم اللغة العربية")) { + assertThat(m.find()).isTrue(); + } + } + } + + @Test + void testVeryLongUnicodeText() { + // Large text with mixed scripts + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + sb.append("Hello 世界 مرحبا שלום Привет 안녕 "); + } + String largeText = sb.toString(); + + // Search in large mixed-script text + try (Pattern p = RE2.compile("Привет")) { + try (Matcher m = p.matcher(largeText)) { + long start = System.currentTimeMillis(); + assertThat(m.find()).isTrue(); + long duration = System.currentTimeMillis() - start; + assertThat(duration).isLessThan(100); // Should be fast + } + } + } + + // ===== Email and URL Pattern Tests ===== + + @Test + void testEmailPattern() { + String emailPattern = "\\w+@\\w+\\.\\w+"; + try (Pattern p = RE2.compile(emailPattern)) { + assertThat(p.matches("user@example.com")).isTrue(); + assertThat(p.matches("invalid.email")).isFalse(); + assertThat(p.matches("@example.com")).isFalse(); + } + } + + @Test + void testURLPattern() { + String urlPattern = "https?://[\\w.]+(/.*)?"; + try (Pattern p = RE2.compile(urlPattern)) { + try (Matcher m = p.matcher("https://example.com/path")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = p.matcher("http://test.org")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = p.matcher("ftp://example.com")) { + assertThat(m.find()).isFalse(); + } + } + } + + // ===== Error Handling Tests ===== + + @Test + void testNullPatternThrows() { + assertThatThrownBy(() -> RE2.compile(null)).isInstanceOf(NullPointerException.class); + } + + @Test + void testNullInputThrows() { + try (Pattern p = RE2.compile("test")) { + assertThatThrownBy(() -> p.matcher(null)).isInstanceOf(NullPointerException.class); + } + } + + @ParameterizedTest + @ValueSource(strings = {"(unclosed", "(?Pempty)", "[[invalid"}) + void testInvalidPatternThrows(String invalidPattern) { + assertThatThrownBy(() -> RE2.compile(invalidPattern)) + .isInstanceOf(PatternCompilationException.class) + .hasMessageContaining("compilation failed"); + } + + @Test + void testPatternCompilationExceptionContainsPattern() { + try { + RE2.compile("(unclosed"); + fail("Should have thrown PatternCompilationException"); + } catch (PatternCompilationException e) { + assertThat(e.getPattern()).isEqualTo("(unclosed"); + assertThat(e.getMessage()).contains("unclosed"); + } + } + + // ===== Resource Management Tests ===== + + @Test + void testPatternClose() { + // Use compileWithoutCache() to test actual closing + Pattern p = Pattern.compileWithoutCache("test", true); + assertThat(p.isClosed()).isFalse(); + + p.close(); + assertThat(p.isClosed()).isTrue(); + } + + @Test + void testUseAfterClose() { + // Use compileWithoutCache() to test actual closing + Pattern p = Pattern.compileWithoutCache("test", true); + p.close(); + + assertThatThrownBy(() -> p.matcher("input")) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("closed"); + } + + @Test + void testDoubleClose() { + // Use compileWithoutCache() to test actual closing + Pattern p = Pattern.compileWithoutCache("test", true); + p.close(); + + // Second close should be idempotent (not throw) + assertThatCode(p::close).doesNotThrowAnyException(); + } + + @Test + void testTryWithResources() { + // Verify AutoCloseable works correctly with uncached patterns + Pattern[] holder = new Pattern[1]; + + try (Pattern p = Pattern.compileWithoutCache("test", true)) { + holder[0] = p; + assertThat(p.isClosed()).isFalse(); + } + + assertThat(holder[0].isClosed()).isTrue(); + } + + @Test + void testCachedPatternNotClosedOnClose() { + // Cached patterns should NOT actually close when close() is called + Pattern p = RE2.compile("test"); + assertThat(p.isClosed()).isFalse(); + + p.close(); // This should be a no-op for cached patterns + + // Pattern should still not be closed (cache manages it) + assertThat(p.isClosed()).isFalse(); + } + + @Test + void testNestedTryWithResources() { + try (Pattern p = RE2.compile("test")) { + try (Matcher m = p.matcher("test")) { + assertThat(m.matches()).isTrue(); + } + } + } + + @Test + void testMultiplePatternsIndependent() { + try (Pattern p1 = RE2.compile("pattern1"); + Pattern p2 = RE2.compile("pattern2"); + Pattern p3 = RE2.compile("pattern3")) { + + assertThat(p1.matches("pattern1")).isTrue(); + assertThat(p2.matches("pattern2")).isTrue(); + assertThat(p3.matches("pattern3")).isTrue(); + + assertThat(p1.matches("pattern2")).isFalse(); + } + } + + // ===== Complex Pattern Tests ===== + + @Test + void testIPv4Pattern() { + String ipPattern = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"; + try (Pattern p = RE2.compile(ipPattern)) { + assertThat(p.matches("192.168.1.1")).isTrue(); + assertThat(p.matches("10.0.0.1")).isTrue(); + assertThat(p.matches("999.999.999.999")).isTrue(); // Matches pattern, not valid IP + assertThat(p.matches("192.168.1")).isFalse(); + } + } + + @Test + void testDatePattern() { + String datePattern = "\\d{4}-\\d{2}-\\d{2}"; + try (Pattern p = RE2.compile(datePattern)) { + assertThat(p.matches("2025-11-17")).isTrue(); + assertThat(p.matches("2025-1-17")).isFalse(); + assertThat(p.matches("25-11-17")).isFalse(); + } + } + + @Test + void testPhoneNumberPattern() { + String phonePattern = "\\(?\\d{3}\\)?[- ]?\\d{3}[- ]?\\d{4}"; + try (Pattern p = RE2.compile(phonePattern)) { + try (Matcher m = p.matcher("(555) 123-4567")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = p.matcher("555-123-4567")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = p.matcher("5551234567")) { + assertThat(m.find()).isTrue(); + } + } + } + + @Test + void testComplexAlternation() { + try (Pattern p = RE2.compile("(cat|dog|bird|fish)")) { + assertThat(p.matches("cat")).isTrue(); + assertThat(p.matches("dog")).isTrue(); + assertThat(p.matches("fish")).isTrue(); + assertThat(p.matches("cow")).isFalse(); + } + } + + @Test + void testNestedGroups() { + try (Pattern p = RE2.compile("((a|b)(c|d))")) { + assertThat(p.matches("ac")).isTrue(); + assertThat(p.matches("ad")).isTrue(); + assertThat(p.matches("bc")).isTrue(); + assertThat(p.matches("bd")).isTrue(); + assertThat(p.matches("ab")).isFalse(); + } + } + + // ===== Edge Cases ===== + + @Test + void testVeryLongPattern() { + String longPattern = "a".repeat(1000); + try (Pattern p = RE2.compile(longPattern)) { + assertThat(p.matches(longPattern)).isTrue(); + assertThat(p.matches("a".repeat(999))).isFalse(); + } + } + + @Test + void testVeryLongInput() { + String longInput = "x".repeat(10000) + "needle" + "y".repeat(10000); + try (Pattern p = RE2.compile("needle")) { + try (Matcher m = p.matcher(longInput)) { + assertThat(m.find()).isTrue(); + } + } + } + + @Test + void testPatternWithManyAlternatives() { + StringBuilder pattern = new StringBuilder(); + for (int i = 0; i < 100; i++) { + if (i > 0) pattern.append("|"); + pattern.append("word").append(i); + } + + try (Pattern p = RE2.compile(pattern.toString())) { + assertThat(p.matches("word50")).isTrue(); + assertThat(p.matches("word99")).isTrue(); + assertThat(p.matches("word100")).isFalse(); + } + } + + @Test + void testDeepNesting() { + String pattern = "((((((((((a))))))))))"; + try (Pattern p = RE2.compile(pattern)) { + assertThat(p.matches("a")).isTrue(); + } + } + + // ===== Whitespace and Special Input Tests ===== + + @Test + void testWhitespaceMatching() { + try (Pattern p = RE2.compile("\\s+")) { + assertThat(p.matches(" ")).isTrue(); + assertThat(p.matches("\t\n")).isTrue(); + assertThat(p.matches("text")).isFalse(); + } + } + + @Test + void testNewlinesInInput() { + try (Pattern p = RE2.compile("hello")) { + try (Matcher m = p.matcher("hello\nworld")) { + assertThat(m.find()).isTrue(); + } + } + } + + @Test + void testTabsInInput() { + try (Pattern p = RE2.compile("hello\tworld")) { + assertThat(p.matches("hello\tworld")).isTrue(); + } + } + + // ===== Resource Leak Tests ===== + + @Test + void testManyPatternsNoLeak() { + // Compile and close many patterns (tests resource cleanup) + for (int i = 0; i < 1000; i++) { + try (Pattern p = RE2.compile("pattern" + i)) { + assertThat(p).isNotNull(); + } + } + } + + @Test + void testManyMatchersNoLeak() { + try (Pattern p = RE2.compile("test")) { + for (int i = 0; i < 1000; i++) { + try (Matcher m = p.matcher("test" + i)) { + m.matches(); + } + } + } + } + + // ===== Concurrent Access Tests ===== - @Test - void testConcurrentPatternCompilation() throws InterruptedException { - int threadCount = 10; - int patternsPerThread = 100; - CountDownLatch latch = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - List threads = new ArrayList<>(); - for (int t = 0; t < threadCount; t++) { - int threadId = t; - Thread thread = new Thread(() -> { + @Test + void testConcurrentPatternCompilation() throws InterruptedException { + int threadCount = 10; + int patternsPerThread = 100; + CountDownLatch latch = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + List threads = new ArrayList<>(); + for (int t = 0; t < threadCount; t++) { + int threadId = t; + Thread thread = + new Thread( + () -> { try { - for (int i = 0; i < patternsPerThread; i++) { - try (Pattern p = RE2.compile("thread" + threadId + "pattern" + i)) { - p.matches("thread" + threadId + "pattern" + i); - } + for (int i = 0; i < patternsPerThread; i++) { + try (Pattern p = RE2.compile("thread" + threadId + "pattern" + i)) { + p.matches("thread" + threadId + "pattern" + i); } + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - latch.countDown(); + latch.countDown(); } - }); - threads.add(thread); - thread.start(); - } - - latch.await(); - assertThat(errors.get()).isEqualTo(0); + }); + threads.add(thread); + thread.start(); } - // ===== Pattern Properties Tests ===== + latch.await(); + assertThat(errors.get()).isEqualTo(0); + } - @Test - void testPatternProperties() { - try (Pattern p = RE2.compile("test.*pattern", false)) { - assertThat(p.pattern()).isEqualTo("test.*pattern"); - assertThat(p.isCaseSensitive()).isFalse(); - assertThat(p.isClosed()).isFalse(); - } + // ===== Pattern Properties Tests ===== + + @Test + void testPatternProperties() { + try (Pattern p = RE2.compile("test.*pattern", false)) { + assertThat(p.pattern()).isEqualTo("test.*pattern"); + assertThat(p.isCaseSensitive()).isFalse(); + assertThat(p.isClosed()).isFalse(); } + } - @Test - void testMatcherProperties() { - try (Pattern p = RE2.compile("test")) { - try (Matcher m = p.matcher("input")) { - assertThat(m.pattern()).isSameAs(p); - assertThat(m.input()).isEqualTo("input"); - } - } + @Test + void testMatcherProperties() { + try (Pattern p = RE2.compile("test")) { + try (Matcher m = p.matcher("input")) { + assertThat(m.pattern()).isSameAs(p); + assertThat(m.input()).isEqualTo("input"); + } } + } - // ===== ReDoS Safety Tests ===== + // ===== ReDoS Safety Tests ===== - @Test - void testReDoSSafePatterns() { - // Patterns that would cause catastrophic backtracking in Java regex - // RE2 handles these in linear time + @Test + void testReDoSSafePatterns() { + // Patterns that would cause catastrophic backtracking in Java regex + // RE2 handles these in linear time - String[] redosPatterns = { - "(a+)+b", - "(a*)*b", - "(a|a)*b", - "(a|ab)*c" - }; + String[] redosPatterns = {"(a+)+b", "(a*)*b", "(a|a)*b", "(a|ab)*c"}; - for (String pattern : redosPatterns) { - try (Pattern p = RE2.compile(pattern)) { - // These should complete quickly (RE2 is linear time) - // In Java regex, these would hang on long non-matching input - String input = "a".repeat(100) + "x"; + for (String pattern : redosPatterns) { + try (Pattern p = RE2.compile(pattern)) { + // These should complete quickly (RE2 is linear time) + // In Java regex, these would hang on long non-matching input + String input = "a".repeat(100) + "x"; - try (Matcher m = p.matcher(input)) { - long start = System.currentTimeMillis(); - boolean matches = m.find(); - long duration = System.currentTimeMillis() - start; + try (Matcher m = p.matcher(input)) { + long start = System.currentTimeMillis(); + boolean matches = m.find(); + long duration = System.currentTimeMillis() - start; - // Should complete in milliseconds, not seconds - assertThat(duration).isLessThan(100); - } - } + // Should complete in milliseconds, not seconds + assertThat(duration).isLessThan(100); } + } } + } - // ===== toString() Tests ===== + // ===== toString() Tests ===== - @Test - void testToStringDoesNotThrow() { - try (Pattern p = RE2.compile("test")) { - assertThat(p.toString()).isNotNull(); + @Test + void testToStringDoesNotThrow() { + try (Pattern p = RE2.compile("test")) { + assertThat(p.toString()).isNotNull(); - try (Matcher m = p.matcher("input")) { - assertThat(m.toString()).isNotNull(); - } - } + try (Matcher m = p.matcher("input")) { + assertThat(m.toString()).isNotNull(); + } } + } - // ===== Large Scale Tests ===== + // ===== Large Scale Tests ===== - @Test - void testManyDifferentPatterns() { - // Test each pattern with matching and non-matching input - try (Pattern p = RE2.compile("\\d+")) { - assertThat(p.matches("123")).isTrue(); - assertThat(p.matches("abc")).isFalse(); - } + @Test + void testManyDifferentPatterns() { + // Test each pattern with matching and non-matching input + try (Pattern p = RE2.compile("\\d+")) { + assertThat(p.matches("123")).isTrue(); + assertThat(p.matches("abc")).isFalse(); + } - try (Pattern p = RE2.compile("\\w+")) { - assertThat(p.matches("hello123")).isTrue(); - assertThat(p.matches("!!!")).isFalse(); - } + try (Pattern p = RE2.compile("\\w+")) { + assertThat(p.matches("hello123")).isTrue(); + assertThat(p.matches("!!!")).isFalse(); + } - try (Pattern p = RE2.compile("\\s+")) { - assertThat(p.matches(" ")).isTrue(); - assertThat(p.matches("text")).isFalse(); - } + try (Pattern p = RE2.compile("\\s+")) { + assertThat(p.matches(" ")).isTrue(); + assertThat(p.matches("text")).isFalse(); + } - try (Pattern p = RE2.compile("[a-z]+")) { - assertThat(p.matches("abc")).isTrue(); - assertThat(p.matches("ABC")).isFalse(); - } + try (Pattern p = RE2.compile("[a-z]+")) { + assertThat(p.matches("abc")).isTrue(); + assertThat(p.matches("ABC")).isFalse(); + } - try (Pattern p = RE2.compile("[A-Z]+")) { - assertThat(p.matches("ABC")).isTrue(); - assertThat(p.matches("abc")).isFalse(); - } + try (Pattern p = RE2.compile("[A-Z]+")) { + assertThat(p.matches("ABC")).isTrue(); + assertThat(p.matches("abc")).isFalse(); + } - try (Pattern p = RE2.compile(".*")) { - assertThat(p.matches("anything")).isTrue(); - assertThat(p.matches("")).isTrue(); - } + try (Pattern p = RE2.compile(".*")) { + assertThat(p.matches("anything")).isTrue(); + assertThat(p.matches("")).isTrue(); + } - try (Pattern p = RE2.compile(".+")) { - assertThat(p.matches("something")).isTrue(); - assertThat(p.matches("")).isFalse(); - } + try (Pattern p = RE2.compile(".+")) { + assertThat(p.matches("something")).isTrue(); + assertThat(p.matches("")).isFalse(); + } - try (Pattern p = RE2.compile("a*")) { - assertThat(p.matches("aaa")).isTrue(); - assertThat(p.matches("")).isTrue(); - assertThat(p.matches("b")).isFalse(); - } + try (Pattern p = RE2.compile("a*")) { + assertThat(p.matches("aaa")).isTrue(); + assertThat(p.matches("")).isTrue(); + assertThat(p.matches("b")).isFalse(); + } - try (Pattern p = RE2.compile("a+")) { - assertThat(p.matches("aaa")).isTrue(); - assertThat(p.matches("")).isFalse(); - } + try (Pattern p = RE2.compile("a+")) { + assertThat(p.matches("aaa")).isTrue(); + assertThat(p.matches("")).isFalse(); + } - try (Pattern p = RE2.compile("^start")) { - try (Matcher m = p.matcher("start here")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = p.matcher("here start")) { - assertThat(m.find()).isFalse(); - } - } + try (Pattern p = RE2.compile("^start")) { + try (Matcher m = p.matcher("start here")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = p.matcher("here start")) { + assertThat(m.find()).isFalse(); + } + } - try (Pattern p = RE2.compile("end$")) { - try (Matcher m = p.matcher("at the end")) { - assertThat(m.find()).isTrue(); - } - try (Matcher m = p.matcher("end here")) { - assertThat(m.find()).isFalse(); - } - } + try (Pattern p = RE2.compile("end$")) { + try (Matcher m = p.matcher("at the end")) { + assertThat(m.find()).isTrue(); + } + try (Matcher m = p.matcher("end here")) { + assertThat(m.find()).isFalse(); + } + } - try (Pattern p = RE2.compile("^exact$")) { - assertThat(p.matches("exact")).isTrue(); - assertThat(p.matches("exact ")).isFalse(); - assertThat(p.matches(" exact")).isFalse(); - } + try (Pattern p = RE2.compile("^exact$")) { + assertThat(p.matches("exact")).isTrue(); + assertThat(p.matches("exact ")).isFalse(); + assertThat(p.matches(" exact")).isFalse(); + } - try (Pattern p = RE2.compile("hello|world")) { - assertThat(p.matches("hello")).isTrue(); - assertThat(p.matches("world")).isTrue(); - assertThat(p.matches("goodbye")).isFalse(); - } + try (Pattern p = RE2.compile("hello|world")) { + assertThat(p.matches("hello")).isTrue(); + assertThat(p.matches("world")).isTrue(); + assertThat(p.matches("goodbye")).isFalse(); + } - try (Pattern p = RE2.compile("(cat|dog)")) { - assertThat(p.matches("cat")).isTrue(); - assertThat(p.matches("dog")).isTrue(); - assertThat(p.matches("bird")).isFalse(); - } + try (Pattern p = RE2.compile("(cat|dog)")) { + assertThat(p.matches("cat")).isTrue(); + assertThat(p.matches("dog")).isTrue(); + assertThat(p.matches("bird")).isFalse(); + } - try (Pattern p = RE2.compile("\\d{3}-\\d{4}")) { - assertThat(p.matches("123-4567")).isTrue(); - assertThat(p.matches("12-4567")).isFalse(); - } + try (Pattern p = RE2.compile("\\d{3}-\\d{4}")) { + assertThat(p.matches("123-4567")).isTrue(); + assertThat(p.matches("12-4567")).isFalse(); + } - try (Pattern p = RE2.compile("\\w+@\\w+\\.\\w+")) { - assertThat(p.matches("user@example.com")).isTrue(); - assertThat(p.matches("invalid")).isFalse(); - } + try (Pattern p = RE2.compile("\\w+@\\w+\\.\\w+")) { + assertThat(p.matches("user@example.com")).isTrue(); + assertThat(p.matches("invalid")).isFalse(); } + } - @Test - void testQuickSuccessiveOperations() { - // Test rapid pattern creation and matching (stress test) - for (int i = 0; i < 100; i++) { - boolean matches = RE2.matches("test" + i, "test" + i); - assertThat(matches).isTrue(); + @Test + void testQuickSuccessiveOperations() { + // Test rapid pattern creation and matching (stress test) + for (int i = 0; i < 100; i++) { + boolean matches = RE2.matches("test" + i, "test" + i); + assertThat(matches).isTrue(); - // Also verify non-match - matches = RE2.matches("test" + i, "different" + i); - assertThat(matches).isFalse(); - } + // Also verify non-match + matches = RE2.matches("test" + i, "different" + i); + assertThat(matches).isFalse(); } + } - @Test - void testPatternReuseManyTimes() { - // Test a single pattern used many times - try (Pattern p = RE2.compile("\\d+")) { - for (int i = 0; i < 1000; i++) { - assertThat(p.matches(String.valueOf(i))).isTrue(); - assertThat(p.matches("text" + i)).isFalse(); - } - } + @Test + void testPatternReuseManyTimes() { + // Test a single pattern used many times + try (Pattern p = RE2.compile("\\d+")) { + for (int i = 0; i < 1000; i++) { + assertThat(p.matches(String.valueOf(i))).isTrue(); + assertThat(p.matches("text" + i)).isFalse(); + } } + } - // ===== Log Processing Tests (Real-World Use Case) ===== + // ===== Log Processing Tests (Real-World Use Case) ===== - @Test - void testTypicalLogEntry() { - String logEntry = "2025-11-17 10:30:45.123 [INFO] com.example.Service - Processing request id=12345 user=admin@example.com duration=250ms status=200"; + @Test + void testTypicalLogEntry() { + String logEntry = + "2025-11-17 10:30:45.123 [INFO] com.example.Service - Processing request id=12345" + + " user=admin@example.com duration=250ms status=200"; - // Find timestamp - try (Pattern p = RE2.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } + // Find timestamp + try (Pattern p = RE2.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } + } - // Find log level - try (Pattern p = RE2.compile("\\[(INFO|WARN|ERROR|DEBUG)\\]")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } + // Find log level + try (Pattern p = RE2.compile("\\[(INFO|WARN|ERROR|DEBUG)\\]")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } + } - // Find user email - try (Pattern p = RE2.compile("user=\\w+@[\\w.]+")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } + // Find user email + try (Pattern p = RE2.compile("user=\\w+@[\\w.]+")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } + } - // Find duration - try (Pattern p = RE2.compile("duration=\\d+ms")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } + // Find duration + try (Pattern p = RE2.compile("duration=\\d+ms")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } + } - // Find request ID - try (Pattern p = RE2.compile("id=\\d+")) { - try (Matcher m = p.matcher(logEntry)) { - assertThat(m.find()).isTrue(); - } - } + // Find request ID + try (Pattern p = RE2.compile("id=\\d+")) { + try (Matcher m = p.matcher(logEntry)) { + assertThat(m.find()).isTrue(); + } } + } - @Test - void testMultiLineLogEntry() { - String multiLineLog = """ + @Test + void testMultiLineLogEntry() { + String multiLineLog = + """ 2025-11-17 10:30:45.123 [ERROR] com.example.Service - Request failed java.lang.NullPointerException: Cannot invoke method on null object at com.example.Service.processRequest(Service.java:123) @@ -1165,418 +1148,442 @@ void testMultiLineLogEntry() { ... 10 more """; - // Find error class - try (Pattern p = RE2.compile("java\\.lang\\.\\w+Exception")) { - try (Matcher m = p.matcher(multiLineLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find stack trace lines - try (Pattern p = RE2.compile("at [\\w.$]+\\([\\w.]+:\\d+\\)")) { - try (Matcher m = p.matcher(multiLineLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find file and line number - try (Pattern p = RE2.compile("Service\\.java:\\d+")) { - try (Matcher m = p.matcher(multiLineLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find error class + try (Pattern p = RE2.compile("java\\.lang\\.\\w+Exception")) { + try (Matcher m = p.matcher(multiLineLog)) { + assertThat(m.find()).isTrue(); + } } - @Test - void testLargeLogChunk() { - // Simulate processing a large log file (10,000 lines) - StringBuilder largeLog = new StringBuilder(); - for (int i = 0; i < 10000; i++) { - largeLog.append(String.format("2025-11-17 10:%02d:%02d [INFO] Request %d processed successfully%n", - i / 60, i % 60, i)); - } - - String logText = largeLog.toString(); - - // Find all INFO entries - try (Pattern p = RE2.compile("\\[INFO\\]")) { - try (Matcher m = p.matcher(logText)) { - assertThat(m.find()).isTrue(); - } - } - - // Find specific request ID in large log - try (Pattern p = RE2.compile("Request 5000 processed")) { - try (Matcher m = p.matcher(logText)) { - assertThat(m.find()).isTrue(); - } - } - - // Pattern that doesn't exist - try (Pattern p = RE2.compile("\\[ERROR\\]")) { - try (Matcher m = p.matcher(logText)) { - assertThat(m.find()).isFalse(); - } - } + // Find stack trace lines + try (Pattern p = RE2.compile("at [\\w.$]+\\([\\w.]+:\\d+\\)")) { + try (Matcher m = p.matcher(multiLineLog)) { + assertThat(m.find()).isTrue(); + } } - @Test - void testApacheAccessLog() { - String accessLog = "192.168.1.100 - - [17/Nov/2025:10:30:45 +0000] \"GET /api/users?id=123 HTTP/1.1\" 200 1234 \"https://example.com/\" \"Mozilla/5.0\""; - - // Find IP address - try (Pattern p = RE2.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) { - try (Matcher m = p.matcher(accessLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find HTTP method - try (Pattern p = RE2.compile("(GET|POST|PUT|DELETE|PATCH)")) { - try (Matcher m = p.matcher(accessLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find HTTP status - try (Pattern p = RE2.compile("\" \\d{3} ")) { - try (Matcher m = p.matcher(accessLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find URL path - try (Pattern p = RE2.compile("/api/\\w+")) { - try (Matcher m = p.matcher(accessLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find file and line number + try (Pattern p = RE2.compile("Service\\.java:\\d+")) { + try (Matcher m = p.matcher(multiLineLog)) { + assertThat(m.find()).isTrue(); + } } + } - @Test - void testJSONLogEntry() { - String jsonLog = "{\"timestamp\":\"2025-11-17T10:30:45.123Z\",\"level\":\"ERROR\",\"service\":\"api-gateway\",\"message\":\"Connection timeout\",\"userId\":12345,\"requestId\":\"req-abc-123\",\"duration\":5000}"; + @Test + void testLargeLogChunk() { + // Simulate processing a large log file (10,000 lines) + StringBuilder largeLog = new StringBuilder(); + for (int i = 0; i < 10000; i++) { + largeLog.append( + String.format( + "2025-11-17 10:%02d:%02d [INFO] Request %d processed successfully%n", + i / 60, i % 60, i)); + } - // Find timestamp - try (Pattern p = RE2.compile("\"timestamp\":\"[^\"]+\"")) { - try (Matcher m = p.matcher(jsonLog)) { - assertThat(m.find()).isTrue(); - } - } + String logText = largeLog.toString(); - // Find error level - try (Pattern p = RE2.compile("\"level\":\"ERROR\"")) { - try (Matcher m = p.matcher(jsonLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find user ID - try (Pattern p = RE2.compile("\"userId\":\\d+")) { - try (Matcher m = p.matcher(jsonLog)) { - assertThat(m.find()).isTrue(); - } - } - - // Find request ID pattern - try (Pattern p = RE2.compile("\"requestId\":\"req-[a-z0-9-]+\"")) { - try (Matcher m = p.matcher(jsonLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find all INFO entries + try (Pattern p = RE2.compile("\\[INFO\\]")) { + try (Matcher m = p.matcher(logText)) { + assertThat(m.find()).isTrue(); + } } - @Test - void testSearchInVeryLargeLogFile() { - // Simulate searching in a 1MB log file - StringBuilder hugeLog = new StringBuilder(); - - // Add 50,000 normal log entries - for (int i = 0; i < 50000; i++) { - hugeLog.append(String.format("[INFO] %d - Normal operation%n", i)); - } + // Find specific request ID in large log + try (Pattern p = RE2.compile("Request 5000 processed")) { + try (Matcher m = p.matcher(logText)) { + assertThat(m.find()).isTrue(); + } + } - // Add a few error entries in the middle - hugeLog.append("[ERROR] Database connection failed - retry attempt 1\n"); - hugeLog.append("[ERROR] Database connection failed - retry attempt 2\n"); + // Pattern that doesn't exist + try (Pattern p = RE2.compile("\\[ERROR\\]")) { + try (Matcher m = p.matcher(logText)) { + assertThat(m.find()).isFalse(); + } + } + } - // Add more normal entries - for (int i = 50000; i < 100000; i++) { - hugeLog.append(String.format("[INFO] %d - Normal operation%n", i)); - } + @Test + void testApacheAccessLog() { + String accessLog = + "192.168.1.100 - - [17/Nov/2025:10:30:45 +0000] \"GET /api/users?id=123 HTTP/1.1\" 200 1234" + + " \"https://example.com/\" \"Mozilla/5.0\""; - String logText = hugeLog.toString(); - assertThat(logText.length()).isGreaterThan(1_000_000); // Over 1MB + // Find IP address + try (Pattern p = RE2.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) { + try (Matcher m = p.matcher(accessLog)) { + assertThat(m.find()).isTrue(); + } + } - // Search for ERROR entries in huge log - try (Pattern p = RE2.compile("\\[ERROR\\].*")) { - try (Matcher m = p.matcher(logText)) { - long start = System.currentTimeMillis(); - boolean found = m.find(); - long duration = System.currentTimeMillis() - start; + // Find HTTP method + try (Pattern p = RE2.compile("(GET|POST|PUT|DELETE|PATCH)")) { + try (Matcher m = p.matcher(accessLog)) { + assertThat(m.find()).isTrue(); + } + } - assertThat(found).isTrue(); - // Should be fast even on huge input (RE2 is linear time) - assertThat(duration).isLessThan(500); - } - } + // Find HTTP status + try (Pattern p = RE2.compile("\" \\d{3} ")) { + try (Matcher m = p.matcher(accessLog)) { + assertThat(m.find()).isTrue(); + } + } - // Search for pattern that doesn't exist - try (Pattern p = RE2.compile("\\[CRITICAL\\]")) { - try (Matcher m = p.matcher(logText)) { - long start = System.currentTimeMillis(); - boolean found = m.find(); - long duration = System.currentTimeMillis() - start; - - assertThat(found).isFalse(); - // Should still be fast even when scanning entire 1MB - assertThat(duration).isLessThan(500); - } - } + // Find URL path + try (Pattern p = RE2.compile("/api/\\w+")) { + try (Matcher m = p.matcher(accessLog)) { + assertThat(m.find()).isTrue(); + } } + } - @Test - void testCassandraQueryLog() { - String cassandraLog = "INFO [Native-Transport-Requests-1] 2025-11-17 10:30:45,123 QueryProcessor.java:169 - Execute CQL3 query: SELECT * FROM keyspace.table WHERE partition_key = 'abc123' AND clustering_key > 100 ALLOW FILTERING"; + @Test + void testJSONLogEntry() { + String jsonLog = + "{\"timestamp\":\"2025-11-17T10:30:45.123Z\",\"level\":\"ERROR\",\"service\":\"api-gateway\",\"message\":\"Connection" + + " timeout\",\"userId\":12345,\"requestId\":\"req-abc-123\",\"duration\":5000}"; - // Find CQL query - try (Pattern p = RE2.compile("SELECT .* FROM [\\w.]+")) { - try (Matcher m = p.matcher(cassandraLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find timestamp + try (Pattern p = RE2.compile("\"timestamp\":\"[^\"]+\"")) { + try (Matcher m = p.matcher(jsonLog)) { + assertThat(m.find()).isTrue(); + } + } - // Find keyspace.table - try (Pattern p = RE2.compile("FROM [\\w]+\\.[\\w]+")) { - try (Matcher m = p.matcher(cassandraLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find error level + try (Pattern p = RE2.compile("\"level\":\"ERROR\"")) { + try (Matcher m = p.matcher(jsonLog)) { + assertThat(m.find()).isTrue(); + } + } - // Find ALLOW FILTERING - try (Pattern p = RE2.compile("ALLOW FILTERING")) { - try (Matcher m = p.matcher(cassandraLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find user ID + try (Pattern p = RE2.compile("\"userId\":\\d+")) { + try (Matcher m = p.matcher(jsonLog)) { + assertThat(m.find()).isTrue(); + } + } - // Find thread name - try (Pattern p = RE2.compile("\\[Native-Transport-Requests-\\d+\\]")) { - try (Matcher m = p.matcher(cassandraLog)) { - assertThat(m.find()).isTrue(); - } - } + // Find request ID pattern + try (Pattern p = RE2.compile("\"requestId\":\"req-[a-z0-9-]+\"")) { + try (Matcher m = p.matcher(jsonLog)) { + assertThat(m.find()).isTrue(); + } } + } - @Test - void testSearchMultiplePatternsInLargeText() { - // Simulate Cassandra SAI scanning large partition with multiple filter terms - StringBuilder partition = new StringBuilder(); + @Test + void testSearchInVeryLargeLogFile() { + // Simulate searching in a 1MB log file + StringBuilder hugeLog = new StringBuilder(); - // 10,000 rows in partition - for (int i = 0; i < 10000; i++) { - partition.append(String.format("row_%d|user_%d@example.com|status_%s|value_%d|timestamp_%d%n", - i, i % 100, i % 2 == 0 ? "active" : "inactive", i * 10, System.currentTimeMillis() + i)); - } + // Add 50,000 normal log entries + for (int i = 0; i < 50000; i++) { + hugeLog.append(String.format("[INFO] %d - Normal operation%n", i)); + } - String data = partition.toString(); - assertThat(data.length()).isGreaterThan(500_000); // Over 500KB + // Add a few error entries in the middle + hugeLog.append("[ERROR] Database connection failed - retry attempt 1\n"); + hugeLog.append("[ERROR] Database connection failed - retry attempt 2\n"); - // Pattern 1: Find rows with specific user pattern - try (Pattern p = RE2.compile("user_42@example\\.com")) { - try (Matcher m = p.matcher(data)) { - assertThat(m.find()).isTrue(); - } - } + // Add more normal entries + for (int i = 50000; i < 100000; i++) { + hugeLog.append(String.format("[INFO] %d - Normal operation%n", i)); + } - // Pattern 2: Find active status - try (Pattern p = RE2.compile("status_active")) { - try (Matcher m = p.matcher(data)) { - assertThat(m.find()).isTrue(); - } - } + String logText = hugeLog.toString(); + assertThat(logText.length()).isGreaterThan(1_000_000); // Over 1MB - // Pattern 3: Find specific row range - try (Pattern p = RE2.compile("row_500\\d")) { - try (Matcher m = p.matcher(data)) { - assertThat(m.find()).isTrue(); - } - } + // Search for ERROR entries in huge log + try (Pattern p = RE2.compile("\\[ERROR\\].*")) { + try (Matcher m = p.matcher(logText)) { + long start = System.currentTimeMillis(); + boolean found = m.find(); + long duration = System.currentTimeMillis() - start; - // Pattern 4: Complex pattern combining multiple fields - try (Pattern p = RE2.compile("row_\\d+\\|.*@example\\.com\\|status_active")) { - try (Matcher m = p.matcher(data)) { - long start = System.currentTimeMillis(); - boolean found = m.find(); - long duration = System.currentTimeMillis() - start; - - assertThat(found).isTrue(); - // Should be fast even on 500KB+ data - assertThat(duration).isLessThan(200); - } - } + assertThat(found).isTrue(); + // Should be fast even on huge input (RE2 is linear time) + assertThat(duration).isLessThan(500); + } } - @Test - void testRealisticDatabaseTextSearch() { - // Simulate searching through Cassandra text column with large values - String[] largeTextSamples = { - // Sample 1: Large JSON document - "{\"user\":{\"name\":\"John Doe\",\"email\":\"john@example.com\",\"address\":{\"street\":\"123 Main St\",\"city\":\"Springfield\"},\"orders\":[" + - "{\"id\":1,\"product\":\"Widget\",\"price\":29.99},{\"id\":2,\"product\":\"Gadget\",\"price\":49.99}]},\"metadata\":{\"source\":\"web\",\"timestamp\":\"2025-11-17T10:30:45Z\"}}", - - // Sample 2: Large log blob - "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" + - "[ERROR] Retry attempt 1/3\n" + - "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" + - "[ERROR] Retry attempt 2/3\n" + - "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" + - "[ERROR] Retry attempt 3/3\n" + - "[ERROR] All retry attempts exhausted, marking node as DOWN", - - // Sample 3: XML document - "

Important Document2025-11-17
" + - "
This is a large amount of text that might be stored in a database column.
" + - "
More content with various special characters: @#$%^&*(){}[]
" - }; - - for (String text : largeTextSamples) { - // Search for email pattern - try (Pattern p = RE2.compile("\\w+@[\\w.]+")) { - try (Matcher m = p.matcher(text)) { - // May or may not find depending on sample - m.find(); - } - } + // Search for pattern that doesn't exist + try (Pattern p = RE2.compile("\\[CRITICAL\\]")) { + try (Matcher m = p.matcher(logText)) { + long start = System.currentTimeMillis(); + boolean found = m.find(); + long duration = System.currentTimeMillis() - start; - // Search for number pattern - try (Pattern p = RE2.compile("\\d+")) { - try (Matcher m = p.matcher(text)) { - assertThat(m.find()).isTrue(); // All samples have numbers - } - } - - // Search for ERROR keyword - try (Pattern p = RE2.compile("ERROR")) { - try (Matcher m = p.matcher(text)) { - m.find(); // May or may not find - } - } - } + assertThat(found).isFalse(); + // Should still be fast even when scanning entire 1MB + assertThat(duration).isLessThan(500); + } } + } - @Test - void testConcurrentLogSearching() throws InterruptedException { - // Simulate multiple Cassandra query threads searching logs concurrently - String largeLog = generateLargeLogData(50000); - - int threadCount = 20; - CountDownLatch latch = new CountDownLatch(threadCount); - AtomicInteger successCount = new AtomicInteger(0); - AtomicInteger errors = new AtomicInteger(0); - - String[] searchPatterns = { - "\\[ERROR\\]", - "user_\\d+@example\\.com", - "duration=\\d+ms", - "status=\\d{3}", - "Exception", - "192\\.168\\.\\d+\\.\\d+", - "Request \\d+", - "Thread-\\d+" - }; - - for (int t = 0; t < threadCount; t++) { - int threadId = t; - new Thread(() -> { + @Test + void testCassandraQueryLog() { + String cassandraLog = + "INFO [Native-Transport-Requests-1] 2025-11-17 10:30:45,123 QueryProcessor.java:169 -" + + " Execute CQL3 query: SELECT * FROM keyspace.table WHERE partition_key = 'abc123' AND" + + " clustering_key > 100 ALLOW FILTERING"; + + // Find CQL query + try (Pattern p = RE2.compile("SELECT .* FROM [\\w.]+")) { + try (Matcher m = p.matcher(cassandraLog)) { + assertThat(m.find()).isTrue(); + } + } + + // Find keyspace.table + try (Pattern p = RE2.compile("FROM [\\w]+\\.[\\w]+")) { + try (Matcher m = p.matcher(cassandraLog)) { + assertThat(m.find()).isTrue(); + } + } + + // Find ALLOW FILTERING + try (Pattern p = RE2.compile("ALLOW FILTERING")) { + try (Matcher m = p.matcher(cassandraLog)) { + assertThat(m.find()).isTrue(); + } + } + + // Find thread name + try (Pattern p = RE2.compile("\\[Native-Transport-Requests-\\d+\\]")) { + try (Matcher m = p.matcher(cassandraLog)) { + assertThat(m.find()).isTrue(); + } + } + } + + @Test + void testSearchMultiplePatternsInLargeText() { + // Simulate Cassandra SAI scanning large partition with multiple filter terms + StringBuilder partition = new StringBuilder(); + + // 10,000 rows in partition + for (int i = 0; i < 10000; i++) { + partition.append( + String.format( + "row_%d|user_%d@example.com|status_%s|value_%d|timestamp_%d%n", + i, + i % 100, + i % 2 == 0 ? "active" : "inactive", + i * 10, + System.currentTimeMillis() + i)); + } + + String data = partition.toString(); + assertThat(data.length()).isGreaterThan(500_000); // Over 500KB + + // Pattern 1: Find rows with specific user pattern + try (Pattern p = RE2.compile("user_42@example\\.com")) { + try (Matcher m = p.matcher(data)) { + assertThat(m.find()).isTrue(); + } + } + + // Pattern 2: Find active status + try (Pattern p = RE2.compile("status_active")) { + try (Matcher m = p.matcher(data)) { + assertThat(m.find()).isTrue(); + } + } + + // Pattern 3: Find specific row range + try (Pattern p = RE2.compile("row_500\\d")) { + try (Matcher m = p.matcher(data)) { + assertThat(m.find()).isTrue(); + } + } + + // Pattern 4: Complex pattern combining multiple fields + try (Pattern p = RE2.compile("row_\\d+\\|.*@example\\.com\\|status_active")) { + try (Matcher m = p.matcher(data)) { + long start = System.currentTimeMillis(); + boolean found = m.find(); + long duration = System.currentTimeMillis() - start; + + assertThat(found).isTrue(); + // Should be fast even on 500KB+ data + assertThat(duration).isLessThan(200); + } + } + } + + @Test + void testRealisticDatabaseTextSearch() { + // Simulate searching through Cassandra text column with large values + String[] largeTextSamples = { + // Sample 1: Large JSON document + "{\"user\":{\"name\":\"John" + + " Doe\",\"email\":\"john@example.com\",\"address\":{\"street\":\"123 Main" + + " St\",\"city\":\"Springfield\"},\"orders\":[" + + "{\"id\":1,\"product\":\"Widget\",\"price\":29.99},{\"id\":2,\"product\":\"Gadget\",\"price\":49.99}]},\"metadata\":{\"source\":\"web\",\"timestamp\":\"2025-11-17T10:30:45Z\"}}", + + // Sample 2: Large log blob + "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" + + "[ERROR] Retry attempt 1/3\n" + + "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" + + "[ERROR] Retry attempt 2/3\n" + + "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" + + "[ERROR] Retry attempt 3/3\n" + + "[ERROR] All retry attempts exhausted, marking node as DOWN", + + // Sample 3: XML document + "
Important" + + " Document2025-11-17
This" + + " is a large amount of text that might be stored in a database" + + " column.
More content with various" + + " special characters: @#$%^&*(){}[]
" + }; + + for (String text : largeTextSamples) { + // Search for email pattern + try (Pattern p = RE2.compile("\\w+@[\\w.]+")) { + try (Matcher m = p.matcher(text)) { + // May or may not find depending on sample + m.find(); + } + } + + // Search for number pattern + try (Pattern p = RE2.compile("\\d+")) { + try (Matcher m = p.matcher(text)) { + assertThat(m.find()).isTrue(); // All samples have numbers + } + } + + // Search for ERROR keyword + try (Pattern p = RE2.compile("ERROR")) { + try (Matcher m = p.matcher(text)) { + m.find(); // May or may not find + } + } + } + } + + @Test + void testConcurrentLogSearching() throws InterruptedException { + // Simulate multiple Cassandra query threads searching logs concurrently + String largeLog = generateLargeLogData(50000); + + int threadCount = 20; + CountDownLatch latch = new CountDownLatch(threadCount); + AtomicInteger successCount = new AtomicInteger(0); + AtomicInteger errors = new AtomicInteger(0); + + String[] searchPatterns = { + "\\[ERROR\\]", + "user_\\d+@example\\.com", + "duration=\\d+ms", + "status=\\d{3}", + "Exception", + "192\\.168\\.\\d+\\.\\d+", + "Request \\d+", + "Thread-\\d+" + }; + + for (int t = 0; t < threadCount; t++) { + int threadId = t; + new Thread( + () -> { try { - String pattern = searchPatterns[threadId % searchPatterns.length]; - try (Pattern p = RE2.compile(pattern)) { - try (Matcher m = p.matcher(largeLog)) { - if (m.find()) { - successCount.incrementAndGet(); - } - } + String pattern = searchPatterns[threadId % searchPatterns.length]; + try (Pattern p = RE2.compile(pattern)) { + try (Matcher m = p.matcher(largeLog)) { + if (m.find()) { + successCount.incrementAndGet(); + } } + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - latch.countDown(); + latch.countDown(); } - }).start(); - } - - latch.await(); - assertThat(errors.get()).isEqualTo(0); - assertThat(successCount.get()).isGreaterThan(0); - } - - @Test - void testSQLInjectionPatternDetection() { - // Test patterns for detecting SQL injection in logs - String[] suspiciousInputs = { - "admin' OR '1'='1", - "' UNION SELECT * FROM users--", - "1; DROP TABLE users;--", - "admin'--", - "' OR 1=1--" - }; - - // Pattern to detect SQL injection attempts - try (Pattern p = RE2.compile("('.*(OR|UNION|DROP|SELECT|--|;).*)|(--)")) { - for (String input : suspiciousInputs) { - try (Matcher m = p.matcher(input)) { - assertThat(m.find()).isTrue(); - } - } - - // Safe inputs should not match - try (Matcher m = p.matcher("normal_username")) { - assertThat(m.find()).isFalse(); - } - } - } - - @Test - void testSearchWithLineBreaks() { - String multiLineData = "Line 1: Some data\n" + - "Line 2: ERROR - Something failed\n" + - "Line 3: More data\n" + - "Line 4: WARNING - Check this\n" + - "Line 5: Final line"; - - // Find lines with ERROR - try (Pattern p = RE2.compile("ERROR")) { - try (Matcher m = p.matcher(multiLineData)) { - assertThat(m.find()).isTrue(); - } - } - - // Find lines with WARNING - try (Pattern p = RE2.compile("WARNING")) { - try (Matcher m = p.matcher(multiLineData)) { - assertThat(m.find()).isTrue(); - } - } - - // Pattern that spans multiple lines won't match (RE2 default behavior) - try (Pattern p = RE2.compile("Line 2.*Line 3")) { - try (Matcher m = p.matcher(multiLineData)) { - assertThat(m.find()).isFalse(); // . doesn't match \n by default - } - } - } - - // ===== Helper Methods ===== - - private String generateLargeLogData(int lineCount) { - StringBuilder log = new StringBuilder(); - for (int i = 0; i < lineCount; i++) { - String level = i % 100 == 0 ? "ERROR" : (i % 20 == 0 ? "WARN" : "INFO"); - log.append(String.format("%s [Thread-%d] 2025-11-17 10:%02d:%02d Request %d from user_%d@example.com - duration=%dms status=%d%n", - level, i % 10, i / 3600, (i / 60) % 60, i, i % 1000, i % 500, 200 + (i % 5))); - } - return log.toString(); - } + }) + .start(); + } + + latch.await(); + assertThat(errors.get()).isEqualTo(0); + assertThat(successCount.get()).isGreaterThan(0); + } + + @Test + void testSQLInjectionPatternDetection() { + // Test patterns for detecting SQL injection in logs + String[] suspiciousInputs = { + "admin' OR '1'='1", + "' UNION SELECT * FROM users--", + "1; DROP TABLE users;--", + "admin'--", + "' OR 1=1--" + }; + + // Pattern to detect SQL injection attempts + try (Pattern p = RE2.compile("('.*(OR|UNION|DROP|SELECT|--|;).*)|(--)")) { + for (String input : suspiciousInputs) { + try (Matcher m = p.matcher(input)) { + assertThat(m.find()).isTrue(); + } + } + + // Safe inputs should not match + try (Matcher m = p.matcher("normal_username")) { + assertThat(m.find()).isFalse(); + } + } + } + + @Test + void testSearchWithLineBreaks() { + String multiLineData = + "Line 1: Some data\n" + + "Line 2: ERROR - Something failed\n" + + "Line 3: More data\n" + + "Line 4: WARNING - Check this\n" + + "Line 5: Final line"; + + // Find lines with ERROR + try (Pattern p = RE2.compile("ERROR")) { + try (Matcher m = p.matcher(multiLineData)) { + assertThat(m.find()).isTrue(); + } + } + + // Find lines with WARNING + try (Pattern p = RE2.compile("WARNING")) { + try (Matcher m = p.matcher(multiLineData)) { + assertThat(m.find()).isTrue(); + } + } + + // Pattern that spans multiple lines won't match (RE2 default behavior) + try (Pattern p = RE2.compile("Line 2.*Line 3")) { + try (Matcher m = p.matcher(multiLineData)) { + assertThat(m.find()).isFalse(); // . doesn't match \n by default + } + } + } + + // ===== Helper Methods ===== + + private String generateLargeLogData(int lineCount) { + StringBuilder log = new StringBuilder(); + for (int i = 0; i < lineCount; i++) { + String level = i % 100 == 0 ? "ERROR" : (i % 20 == 0 ? "WARN" : "INFO"); + log.append( + String.format( + "%s [Thread-%d] 2025-11-17 10:%02d:%02d Request %d from user_%d@example.com -" + + " duration=%dms status=%d%n", + level, i % 10, i / 3600, (i / 60) % 60, i, i % 1000, i % 500, 200 + (i % 5))); + } + return log.toString(); + } } - diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java index 8985f24..0247bb6 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java @@ -15,783 +15,783 @@ */ package com.axonops.libre2.api; +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.test.TestUtils; +import java.util.*; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import java.util.*; - -import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - -/** - * Tests for bulk matching operations (Collection and array variants). - */ +/** Tests for bulk matching operations (Collection and array variants). */ class BulkMatchingIT { - private static PatternCache originalCache; + private static PatternCache originalCache; - @BeforeAll - static void setUpClass() { - // Use test config to avoid JMX InstanceAlreadyExistsException - originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); - } + @BeforeAll + static void setUpClass() { + // Use test config to avoid JMX InstanceAlreadyExistsException + originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); + } - @AfterAll - static void tearDownClass() { - TestUtils.restoreGlobalCache(originalCache); - } + @AfterAll + static void tearDownClass() { + TestUtils.restoreGlobalCache(originalCache); + } - /** - * Detects if running under QEMU emulation (set by CI workflow). - * Large dataset tests are skipped under QEMU as they're too slow. - */ - private static boolean isQemuEmulation() { - return "true".equals(System.getenv("QEMU_EMULATION")); - } + /** + * Detects if running under QEMU emulation (set by CI workflow). Large dataset tests are skipped + * under QEMU as they're too slow. + */ + private static boolean isQemuEmulation() { + return "true".equals(System.getenv("QEMU_EMULATION")); + } - @Test - void testMatchAll_Collection_Basic() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testMatchAll_Collection_Basic() { + Pattern pattern = Pattern.compile("test.*"); - List inputs = List.of("test1", "prod1", "test2", "other"); - boolean[] results = pattern.matchAll(inputs); + List inputs = List.of("test1", "prod1", "test2", "other"); + boolean[] results = pattern.matchAll(inputs); - assertArrayEquals(new boolean[]{true, false, true, false}, results); - } + assertArrayEquals(new boolean[] {true, false, true, false}, results); + } - @Test - void testMatchAll_Array_Basic() { - Pattern pattern = Pattern.compile("\\d{3}-\\d{4}"); + @Test + void testMatchAll_Array_Basic() { + Pattern pattern = Pattern.compile("\\d{3}-\\d{4}"); - String[] inputs = {"123-4567", "invalid", "999-8888", "nope"}; - boolean[] results = pattern.matchAll(inputs); + String[] inputs = {"123-4567", "invalid", "999-8888", "nope"}; + boolean[] results = pattern.matchAll(inputs); - assertArrayEquals(new boolean[]{true, false, true, false}, results); - } + assertArrayEquals(new boolean[] {true, false, true, false}, results); + } - @Test - void testMatchAll_Empty() { - Pattern pattern = Pattern.compile("test"); + @Test + void testMatchAll_Empty() { + Pattern pattern = Pattern.compile("test"); - boolean[] results = pattern.matchAll(Collections.emptyList()); - assertEquals(0, results.length); + boolean[] results = pattern.matchAll(Collections.emptyList()); + assertEquals(0, results.length); - boolean[] results2 = pattern.matchAll(new String[0]); - assertEquals(0, results2.length); - } + boolean[] results2 = pattern.matchAll(new String[0]); + assertEquals(0, results2.length); + } - @Test - void testMatchAll_Set() { - Pattern pattern = Pattern.compile("[a-z]+"); + @Test + void testMatchAll_Set() { + Pattern pattern = Pattern.compile("[a-z]+"); - Set inputs = Set.of("abc", "123", "xyz", "456"); - boolean[] results = pattern.matchAll(inputs); + Set inputs = Set.of("abc", "123", "xyz", "456"); + boolean[] results = pattern.matchAll(inputs); - // Set order is not guaranteed, but count should be correct - int matchCount = 0; - for (boolean match : results) { - if (match) matchCount++; - } - assertEquals(2, matchCount); // "abc" and "xyz" + // Set order is not guaranteed, but count should be correct + int matchCount = 0; + for (boolean match : results) { + if (match) matchCount++; } + assertEquals(2, matchCount); // "abc" and "xyz" + } - @Test - void testMatchAll_Queue() { - Pattern pattern = Pattern.compile("item\\d+"); + @Test + void testMatchAll_Queue() { + Pattern pattern = Pattern.compile("item\\d+"); - Queue inputs = new LinkedList<>(List.of("item1", "other", "item2")); - boolean[] results = pattern.matchAll(inputs); + Queue inputs = new LinkedList<>(List.of("item1", "other", "item2")); + boolean[] results = pattern.matchAll(inputs); - assertArrayEquals(new boolean[]{true, false, true}, results); - } + assertArrayEquals(new boolean[] {true, false, true}, results); + } - @Test - void testMatchAll_NullInput() { - Pattern pattern = Pattern.compile("test"); + @Test + void testMatchAll_NullInput() { + Pattern pattern = Pattern.compile("test"); - assertThrows(NullPointerException.class, () -> pattern.matchAll((Collection) null)); - assertThrows(NullPointerException.class, () -> pattern.matchAll((String[]) null)); - } + assertThrows(NullPointerException.class, () -> pattern.matchAll((Collection) null)); + assertThrows(NullPointerException.class, () -> pattern.matchAll((String[]) null)); + } - @Test - void testFilter_Basic() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testFilter_Basic() { + Pattern pattern = Pattern.compile("test.*"); - List inputs = List.of("test1", "prod1", "test2", "other"); - List filtered = pattern.filter(inputs); + List inputs = List.of("test1", "prod1", "test2", "other"); + List filtered = pattern.filter(inputs); - assertEquals(List.of("test1", "test2"), filtered); - } + assertEquals(List.of("test1", "test2"), filtered); + } - @Test - void testFilter_NoMatches() { - Pattern pattern = Pattern.compile("nomatch"); + @Test + void testFilter_NoMatches() { + Pattern pattern = Pattern.compile("nomatch"); - List inputs = List.of("test1", "test2", "test3"); - List filtered = pattern.filter(inputs); + List inputs = List.of("test1", "test2", "test3"); + List filtered = pattern.filter(inputs); - assertTrue(filtered.isEmpty()); - } + assertTrue(filtered.isEmpty()); + } - @Test - void testFilter_AllMatch() { - Pattern pattern = Pattern.compile("test\\d"); + @Test + void testFilter_AllMatch() { + Pattern pattern = Pattern.compile("test\\d"); - List inputs = List.of("test1", "test2", "test3"); - List filtered = pattern.filter(inputs); + List inputs = List.of("test1", "test2", "test3"); + List filtered = pattern.filter(inputs); - assertEquals(inputs, filtered); - } + assertEquals(inputs, filtered); + } - @Test - void testFilterNot_Basic() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testFilterNot_Basic() { + Pattern pattern = Pattern.compile("test.*"); - List inputs = List.of("test1", "prod1", "test2", "other"); - List filtered = pattern.filterNot(inputs); + List inputs = List.of("test1", "prod1", "test2", "other"); + List filtered = pattern.filterNot(inputs); - assertEquals(List.of("prod1", "other"), filtered); - } + assertEquals(List.of("prod1", "other"), filtered); + } - @Test - void testFilterNot_AllMatch() { - Pattern pattern = Pattern.compile("test\\d"); + @Test + void testFilterNot_AllMatch() { + Pattern pattern = Pattern.compile("test\\d"); - List inputs = List.of("test1", "test2"); - List filtered = pattern.filterNot(inputs); + List inputs = List.of("test1", "test2"); + List filtered = pattern.filterNot(inputs); - assertTrue(filtered.isEmpty()); - } + assertTrue(filtered.isEmpty()); + } - @Test - void testRetainMatches_List() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testRetainMatches_List() { + Pattern pattern = Pattern.compile("test.*"); - List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other")); - int removed = pattern.retainMatches(inputs); + List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other")); + int removed = pattern.retainMatches(inputs); - assertEquals(2, removed); - assertEquals(List.of("test1", "test2"), inputs); - } + assertEquals(2, removed); + assertEquals(List.of("test1", "test2"), inputs); + } - @Test - void testRetainMatches_Set() { - Pattern pattern = Pattern.compile("[a-z]+"); + @Test + void testRetainMatches_Set() { + Pattern pattern = Pattern.compile("[a-z]+"); - Set inputs = new HashSet<>(Set.of("abc", "123", "xyz")); - int removed = pattern.retainMatches(inputs); + Set inputs = new HashSet<>(Set.of("abc", "123", "xyz")); + int removed = pattern.retainMatches(inputs); - assertEquals(1, removed); - assertEquals(Set.of("abc", "xyz"), inputs); - } + assertEquals(1, removed); + assertEquals(Set.of("abc", "xyz"), inputs); + } - @Test - void testRemoveMatches_List() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testRemoveMatches_List() { + Pattern pattern = Pattern.compile("test.*"); - List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other")); - int removed = pattern.removeMatches(inputs); + List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other")); + int removed = pattern.removeMatches(inputs); - assertEquals(2, removed); - assertEquals(List.of("prod1", "other"), inputs); - } + assertEquals(2, removed); + assertEquals(List.of("prod1", "other"), inputs); + } - @Test - void testRemoveMatches_Set() { - Pattern pattern = Pattern.compile("[a-z]+"); + @Test + void testRemoveMatches_Set() { + Pattern pattern = Pattern.compile("[a-z]+"); - Set inputs = new HashSet<>(Set.of("abc", "123", "xyz")); - int removed = pattern.removeMatches(inputs); + Set inputs = new HashSet<>(Set.of("abc", "123", "xyz")); + int removed = pattern.removeMatches(inputs); - assertEquals(2, removed); - assertEquals(Set.of("123"), inputs); - } + assertEquals(2, removed); + assertEquals(Set.of("123"), inputs); + } - @Test - void testRetainMatches_ImmutableCollection() { - Pattern pattern = Pattern.compile("test"); + @Test + void testRetainMatches_ImmutableCollection() { + Pattern pattern = Pattern.compile("test"); - List immutable = List.of("test", "other"); - assertThrows(UnsupportedOperationException.class, () -> pattern.retainMatches(immutable)); - } + List immutable = List.of("test", "other"); + assertThrows(UnsupportedOperationException.class, () -> pattern.retainMatches(immutable)); + } - @Test - void testFilterByKey_Basic() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testFilterByKey_Basic() { + Pattern pattern = Pattern.compile("test.*"); - Map inputs = Map.of( + Map inputs = + Map.of( "test_1", 100, "prod_1", 200, - "test_2", 300 - ); + "test_2", 300); - Map filtered = pattern.filterByKey(inputs); + Map filtered = pattern.filterByKey(inputs); - assertEquals(2, filtered.size()); - assertEquals(100, filtered.get("test_1")); - assertEquals(300, filtered.get("test_2")); - assertNull(filtered.get("prod_1")); - } + assertEquals(2, filtered.size()); + assertEquals(100, filtered.get("test_1")); + assertEquals(300, filtered.get("test_2")); + assertNull(filtered.get("prod_1")); + } - @Test - void testFilterByValue_Basic() { - Pattern pattern = Pattern.compile(".*@example\\.com"); + @Test + void testFilterByValue_Basic() { + Pattern pattern = Pattern.compile(".*@example\\.com"); - Map inputs = Map.of( + Map inputs = + Map.of( 1, "user@example.com", 2, "invalid", - 3, "admin@example.com" - ); + 3, "admin@example.com"); - Map filtered = pattern.filterByValue(inputs); + Map filtered = pattern.filterByValue(inputs); - assertEquals(2, filtered.size()); - assertEquals("user@example.com", filtered.get(1)); - assertEquals("admin@example.com", filtered.get(3)); - assertNull(filtered.get(2)); - } + assertEquals(2, filtered.size()); + assertEquals("user@example.com", filtered.get(1)); + assertEquals("admin@example.com", filtered.get(3)); + assertNull(filtered.get(2)); + } - @Test - void testFilterNotByKey_Basic() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testFilterNotByKey_Basic() { + Pattern pattern = Pattern.compile("test.*"); - Map inputs = Map.of( + Map inputs = + Map.of( "test_1", 100, "prod_1", 200, - "test_2", 300 - ); + "test_2", 300); - Map filtered = pattern.filterNotByKey(inputs); + Map filtered = pattern.filterNotByKey(inputs); - assertEquals(1, filtered.size()); - assertEquals(200, filtered.get("prod_1")); - } + assertEquals(1, filtered.size()); + assertEquals(200, filtered.get("prod_1")); + } - @Test - void testFilterNotByValue_Basic() { - Pattern pattern = Pattern.compile(".*@example\\.com"); + @Test + void testFilterNotByValue_Basic() { + Pattern pattern = Pattern.compile(".*@example\\.com"); - Map inputs = Map.of( + Map inputs = + Map.of( 1, "user@example.com", 2, "invalid", - 3, "admin@example.com" - ); - - Map filtered = pattern.filterNotByValue(inputs); - - assertEquals(1, filtered.size()); - assertEquals("invalid", filtered.get(2)); - } - - @Test - void testRetainMatchesByKey_Basic() { - Pattern pattern = Pattern.compile("test.*"); - - Map map = new HashMap<>(); - map.put("test_1", 100); - map.put("prod_1", 200); - map.put("test_2", 300); - - int removed = pattern.retainMatchesByKey(map); - - assertEquals(1, removed); - assertEquals(2, map.size()); - assertEquals(100, map.get("test_1")); - assertEquals(300, map.get("test_2")); - } - - @Test - void testRetainMatchesByValue_Basic() { - Pattern pattern = Pattern.compile("\\d+"); - - Map map = new HashMap<>(); - map.put("a", "123"); - map.put("b", "abc"); - map.put("c", "456"); - - int removed = pattern.retainMatchesByValue(map); - - assertEquals(1, removed); - assertEquals(2, map.size()); - assertEquals("123", map.get("a")); - assertEquals("456", map.get("c")); - } - - @Test - void testRemoveMatchesByKey_Basic() { - Pattern pattern = Pattern.compile("test.*"); - - Map map = new HashMap<>(); - map.put("test_1", 100); - map.put("prod_1", 200); - map.put("test_2", 300); - - int removed = pattern.removeMatchesByKey(map); - - assertEquals(2, removed); - assertEquals(1, map.size()); - assertEquals(200, map.get("prod_1")); - } - - @Test - void testRemoveMatchesByValue_Basic() { - Pattern pattern = Pattern.compile("\\d+"); - - Map map = new HashMap<>(); - map.put("a", "123"); - map.put("b", "abc"); - map.put("c", "456"); - - int removed = pattern.removeMatchesByValue(map); - - assertEquals(2, removed); - assertEquals(1, map.size()); - assertEquals("abc", map.get("b")); - } - - @Test - void testBulk_LargeCollection() { - Pattern pattern = Pattern.compile("item\\d+"); - - // Create 1000 strings - List inputs = new ArrayList<>(); - for (int i = 0; i < 1000; i++) { - if (i % 2 == 0) { - inputs.add("item" + i); - } else { - inputs.add("other" + i); - } - } - - boolean[] results = pattern.matchAll(inputs); - - assertEquals(1000, results.length); - - int matchCount = 0; - for (boolean match : results) { - if (match) matchCount++; - } - - assertEquals(500, matchCount); // Every other item - } - - @Test - void testFilter_PreservesOrder() { - Pattern pattern = Pattern.compile("keep\\d"); - - List inputs = List.of("keep1", "drop1", "keep2", "drop2", "keep3"); - List filtered = pattern.filter(inputs); - - // Order should be preserved - assertEquals(List.of("keep1", "keep2", "keep3"), filtered); - } + 3, "admin@example.com"); - @Test - void testMapFiltering_EmptyMap() { - Pattern pattern = Pattern.compile("test"); + Map filtered = pattern.filterNotByValue(inputs); - Map empty = Map.of(); + assertEquals(1, filtered.size()); + assertEquals("invalid", filtered.get(2)); + } - assertEquals(0, pattern.filterByKey(empty).size()); - assertEquals(0, pattern.filterNotByKey(empty).size()); - assertEquals(0, pattern.retainMatchesByKey(new HashMap<>(empty))); - assertEquals(0, pattern.removeMatchesByKey(new HashMap<>(empty))); - } + @Test + void testRetainMatchesByKey_Basic() { + Pattern pattern = Pattern.compile("test.*"); - @Test - void testCollectionFiltering_DifferentTypes() { - Pattern pattern = Pattern.compile("[a-z]+"); + Map map = new HashMap<>(); + map.put("test_1", 100); + map.put("prod_1", 200); + map.put("test_2", 300); - // ArrayList - List list = new ArrayList<>(List.of("abc", "123", "xyz")); - assertEquals(2, pattern.filter(list).size()); + int removed = pattern.retainMatchesByKey(map); - // HashSet - Set set = new HashSet<>(Set.of("abc", "123", "xyz")); - assertEquals(2, pattern.filter(set).size()); + assertEquals(1, removed); + assertEquals(2, map.size()); + assertEquals(100, map.get("test_1")); + assertEquals(300, map.get("test_2")); + } - // LinkedList (Queue) - Queue queue = new LinkedList<>(List.of("abc", "123", "xyz")); - assertEquals(2, pattern.filter(queue).size()); - } + @Test + void testRetainMatchesByValue_Basic() { + Pattern pattern = Pattern.compile("\\d+"); - @Test - void testFilterNot_AllCollectionTypes() { - Pattern pattern = Pattern.compile("\\d+"); + Map map = new HashMap<>(); + map.put("a", "123"); + map.put("b", "abc"); + map.put("c", "456"); - // ArrayList - List arrayList = new ArrayList<>(List.of("123", "abc", "456")); - assertEquals(List.of("abc"), pattern.filterNot(arrayList)); + int removed = pattern.retainMatchesByValue(map); - // HashSet - Set hashSet = new HashSet<>(Set.of("123", "abc", "456")); - List filtered = pattern.filterNot(hashSet); - assertEquals(1, filtered.size()); - assertTrue(filtered.contains("abc")); + assertEquals(1, removed); + assertEquals(2, map.size()); + assertEquals("123", map.get("a")); + assertEquals("456", map.get("c")); + } - // LinkedList (as Queue) - Queue linkedList = new LinkedList<>(List.of("123", "abc", "456")); - assertEquals(1, pattern.filterNot(linkedList).size()); - } + @Test + void testRemoveMatchesByKey_Basic() { + Pattern pattern = Pattern.compile("test.*"); - @Test - void testRetainMatches_AllCollectionTypes() { - Pattern pattern = Pattern.compile("[a-z]+"); - - // ArrayList - List arrayList = new ArrayList<>(List.of("abc", "123", "xyz")); - assertEquals(1, pattern.retainMatches(arrayList)); - assertEquals(2, arrayList.size()); - - // HashSet - Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz")); - assertEquals(1, pattern.retainMatches(hashSet)); - assertEquals(2, hashSet.size()); - - // LinkedList (as Queue) - Queue queue = new LinkedList<>(List.of("abc", "123", "xyz")); - assertEquals(1, pattern.retainMatches(queue)); - assertEquals(2, queue.size()); - - // TreeSet (sorted) - Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz")); - assertEquals(1, pattern.retainMatches(treeSet)); - assertEquals(Set.of("abc", "xyz"), treeSet); - } + Map map = new HashMap<>(); + map.put("test_1", 100); + map.put("prod_1", 200); + map.put("test_2", 300); - @Test - void testRemoveMatches_AllCollectionTypes() { - Pattern pattern = Pattern.compile("[a-z]+"); - - // ArrayList - List arrayList = new ArrayList<>(List.of("abc", "123", "xyz")); - assertEquals(2, pattern.removeMatches(arrayList)); - assertEquals(List.of("123"), arrayList); - - // HashSet - Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz")); - assertEquals(2, pattern.removeMatches(hashSet)); - assertEquals(Set.of("123"), hashSet); - - // LinkedList (as Queue) - Queue queue = new LinkedList<>(List.of("abc", "123", "xyz")); - assertEquals(2, pattern.removeMatches(queue)); - assertEquals(1, queue.size()); - assertTrue(queue.contains("123")); - - // TreeSet - Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz")); - assertEquals(2, pattern.removeMatches(treeSet)); - assertEquals(Set.of("123"), treeSet); - } + int removed = pattern.removeMatchesByKey(map); - @Test - void testMapOperations_AllVariants() { - Pattern pattern = Pattern.compile("key\\d+"); + assertEquals(2, removed); + assertEquals(1, map.size()); + assertEquals(200, map.get("prod_1")); + } - // Test all 8 map operations - Map map1 = new HashMap<>(Map.of("key1", "val1", "other", "val2", "key2", "val3")); - assertEquals(2, pattern.filterByKey(map1).size()); + @Test + void testRemoveMatchesByValue_Basic() { + Pattern pattern = Pattern.compile("\\d+"); - Map map2 = new HashMap<>(Map.of("key1", "val1", "other", "val2")); - assertEquals(1, pattern.filterNotByKey(map2).size()); + Map map = new HashMap<>(); + map.put("a", "123"); + map.put("b", "abc"); + map.put("c", "456"); - Map map3 = new HashMap<>(Map.of("k1", "key1", "k2", "other", "k3", "key2")); - assertEquals(2, pattern.filterByValue(map3).size()); + int removed = pattern.removeMatchesByValue(map); - Map map4 = new HashMap<>(Map.of("k1", "key1", "k2", "other")); - assertEquals(1, pattern.filterNotByValue(map4).size()); + assertEquals(2, removed); + assertEquals(1, map.size()); + assertEquals("abc", map.get("b")); + } - Map map5 = new HashMap<>(Map.of("key1", 1, "other", 2, "key2", 3)); - assertEquals(1, pattern.retainMatchesByKey(map5)); - assertEquals(2, map5.size()); + @Test + void testBulk_LargeCollection() { + Pattern pattern = Pattern.compile("item\\d+"); - Map map6 = new HashMap<>(Map.of("key1", 1, "other", 2)); - assertEquals(1, pattern.removeMatchesByKey(map6)); - assertEquals(1, map6.size()); - - Map map7 = new HashMap<>(Map.of(1, "key1", 2, "other", 3, "key2")); - assertEquals(1, pattern.retainMatchesByValue(map7)); - assertEquals(2, map7.size()); - - Map map8 = new HashMap<>(Map.of(1, "key1", 2, "other")); - assertEquals(1, pattern.removeMatchesByValue(map8)); - assertEquals(1, map8.size()); + // Create 1000 strings + List inputs = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + if (i % 2 == 0) { + inputs.add("item" + i); + } else { + inputs.add("other" + i); + } } - @Test - void testMatchAll_LinkedHashSet_PreservesOrder() { - Pattern pattern = Pattern.compile("keep.*"); - - // LinkedHashSet preserves insertion order - Set linkedSet = new LinkedHashSet<>(); - linkedSet.add("keep1"); - linkedSet.add("drop1"); - linkedSet.add("keep2"); - linkedSet.add("drop2"); - - boolean[] results = pattern.matchAll(linkedSet); - assertEquals(4, results.length); - - // Verify order matches insertion order - List asList = new ArrayList<>(linkedSet); - boolean[] expected = {true, false, true, false}; - assertArrayEquals(expected, results); - } - - @Test - void testFilter_Queue_FIFO_Order() { - Pattern pattern = Pattern.compile("item\\d"); - - // Queue maintains FIFO order - Queue queue = new LinkedList<>(List.of("item1", "other", "item2", "item3")); - List filtered = pattern.filter(queue); - - // Should preserve order - assertEquals(List.of("item1", "item2", "item3"), filtered); - } - - // ========== Additional Scenarios ========== - - @Test - void testMatchAll_WithNullElements() { - Pattern pattern = Pattern.compile("test.*"); + boolean[] results = pattern.matchAll(inputs); - // Null elements in array should not crash - String[] arrayWithNulls = {"test1", null, "test2", null, "other"}; - boolean[] results = pattern.matchAll(arrayWithNulls); + assertEquals(1000, results.length); + + int matchCount = 0; + for (boolean match : results) { + if (match) matchCount++; + } + + assertEquals(500, matchCount); // Every other item + } + + @Test + void testFilter_PreservesOrder() { + Pattern pattern = Pattern.compile("keep\\d"); + + List inputs = List.of("keep1", "drop1", "keep2", "drop2", "keep3"); + List filtered = pattern.filter(inputs); + + // Order should be preserved + assertEquals(List.of("keep1", "keep2", "keep3"), filtered); + } + + @Test + void testMapFiltering_EmptyMap() { + Pattern pattern = Pattern.compile("test"); + + Map empty = Map.of(); + + assertEquals(0, pattern.filterByKey(empty).size()); + assertEquals(0, pattern.filterNotByKey(empty).size()); + assertEquals(0, pattern.retainMatchesByKey(new HashMap<>(empty))); + assertEquals(0, pattern.removeMatchesByKey(new HashMap<>(empty))); + } + + @Test + void testCollectionFiltering_DifferentTypes() { + Pattern pattern = Pattern.compile("[a-z]+"); + + // ArrayList + List list = new ArrayList<>(List.of("abc", "123", "xyz")); + assertEquals(2, pattern.filter(list).size()); + + // HashSet + Set set = new HashSet<>(Set.of("abc", "123", "xyz")); + assertEquals(2, pattern.filter(set).size()); + + // LinkedList (Queue) + Queue queue = new LinkedList<>(List.of("abc", "123", "xyz")); + assertEquals(2, pattern.filter(queue).size()); + } + + @Test + void testFilterNot_AllCollectionTypes() { + Pattern pattern = Pattern.compile("\\d+"); + + // ArrayList + List arrayList = new ArrayList<>(List.of("123", "abc", "456")); + assertEquals(List.of("abc"), pattern.filterNot(arrayList)); + + // HashSet + Set hashSet = new HashSet<>(Set.of("123", "abc", "456")); + List filtered = pattern.filterNot(hashSet); + assertEquals(1, filtered.size()); + assertTrue(filtered.contains("abc")); + + // LinkedList (as Queue) + Queue linkedList = new LinkedList<>(List.of("123", "abc", "456")); + assertEquals(1, pattern.filterNot(linkedList).size()); + } + + @Test + void testRetainMatches_AllCollectionTypes() { + Pattern pattern = Pattern.compile("[a-z]+"); + + // ArrayList + List arrayList = new ArrayList<>(List.of("abc", "123", "xyz")); + assertEquals(1, pattern.retainMatches(arrayList)); + assertEquals(2, arrayList.size()); + + // HashSet + Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz")); + assertEquals(1, pattern.retainMatches(hashSet)); + assertEquals(2, hashSet.size()); + + // LinkedList (as Queue) + Queue queue = new LinkedList<>(List.of("abc", "123", "xyz")); + assertEquals(1, pattern.retainMatches(queue)); + assertEquals(2, queue.size()); + + // TreeSet (sorted) + Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz")); + assertEquals(1, pattern.retainMatches(treeSet)); + assertEquals(Set.of("abc", "xyz"), treeSet); + } + + @Test + void testRemoveMatches_AllCollectionTypes() { + Pattern pattern = Pattern.compile("[a-z]+"); + + // ArrayList + List arrayList = new ArrayList<>(List.of("abc", "123", "xyz")); + assertEquals(2, pattern.removeMatches(arrayList)); + assertEquals(List.of("123"), arrayList); + + // HashSet + Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz")); + assertEquals(2, pattern.removeMatches(hashSet)); + assertEquals(Set.of("123"), hashSet); + + // LinkedList (as Queue) + Queue queue = new LinkedList<>(List.of("abc", "123", "xyz")); + assertEquals(2, pattern.removeMatches(queue)); + assertEquals(1, queue.size()); + assertTrue(queue.contains("123")); + + // TreeSet + Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz")); + assertEquals(2, pattern.removeMatches(treeSet)); + assertEquals(Set.of("123"), treeSet); + } + + @Test + void testMapOperations_AllVariants() { + Pattern pattern = Pattern.compile("key\\d+"); + + // Test all 8 map operations + Map map1 = + new HashMap<>(Map.of("key1", "val1", "other", "val2", "key2", "val3")); + assertEquals(2, pattern.filterByKey(map1).size()); + + Map map2 = new HashMap<>(Map.of("key1", "val1", "other", "val2")); + assertEquals(1, pattern.filterNotByKey(map2).size()); + + Map map3 = new HashMap<>(Map.of("k1", "key1", "k2", "other", "k3", "key2")); + assertEquals(2, pattern.filterByValue(map3).size()); + + Map map4 = new HashMap<>(Map.of("k1", "key1", "k2", "other")); + assertEquals(1, pattern.filterNotByValue(map4).size()); + + Map map5 = new HashMap<>(Map.of("key1", 1, "other", 2, "key2", 3)); + assertEquals(1, pattern.retainMatchesByKey(map5)); + assertEquals(2, map5.size()); + + Map map6 = new HashMap<>(Map.of("key1", 1, "other", 2)); + assertEquals(1, pattern.removeMatchesByKey(map6)); + assertEquals(1, map6.size()); + + Map map7 = new HashMap<>(Map.of(1, "key1", 2, "other", 3, "key2")); + assertEquals(1, pattern.retainMatchesByValue(map7)); + assertEquals(2, map7.size()); + + Map map8 = new HashMap<>(Map.of(1, "key1", 2, "other")); + assertEquals(1, pattern.removeMatchesByValue(map8)); + assertEquals(1, map8.size()); + } + + @Test + void testMatchAll_LinkedHashSet_PreservesOrder() { + Pattern pattern = Pattern.compile("keep.*"); + + // LinkedHashSet preserves insertion order + Set linkedSet = new LinkedHashSet<>(); + linkedSet.add("keep1"); + linkedSet.add("drop1"); + linkedSet.add("keep2"); + linkedSet.add("drop2"); + + boolean[] results = pattern.matchAll(linkedSet); + assertEquals(4, results.length); + + // Verify order matches insertion order + List asList = new ArrayList<>(linkedSet); + boolean[] expected = {true, false, true, false}; + assertArrayEquals(expected, results); + } + + @Test + void testFilter_Queue_FIFO_Order() { + Pattern pattern = Pattern.compile("item\\d"); + + // Queue maintains FIFO order + Queue queue = new LinkedList<>(List.of("item1", "other", "item2", "item3")); + List filtered = pattern.filter(queue); + + // Should preserve order + assertEquals(List.of("item1", "item2", "item3"), filtered); + } + + // ========== Additional Scenarios ========== + + @Test + void testMatchAll_WithNullElements() { + Pattern pattern = Pattern.compile("test.*"); + + // Null elements in array should not crash + String[] arrayWithNulls = {"test1", null, "test2", null, "other"}; + boolean[] results = pattern.matchAll(arrayWithNulls); + + assertEquals(5, results.length); + // Nulls should be treated as non-matches (handled by JNI) + assertTrue(results[0]); // "test1" matches + assertFalse(results[1]); // null doesn't match + assertTrue(results[2]); // "test2" matches + assertFalse(results[3]); // null doesn't match + assertFalse(results[4]); // "other" doesn't match + } + + @Test + void testFilter_WithDuplicates() { + Pattern pattern = Pattern.compile("keep"); + + // List with duplicates + List withDuplicates = List.of("keep", "drop", "keep", "keep", "drop"); + List filtered = pattern.filter(withDuplicates); + + // All "keep" entries preserved, including duplicates + assertEquals(3, filtered.size()); + assertEquals(List.of("keep", "keep", "keep"), filtered); + } + + @Test + void testRetainMatches_WithDuplicates() { + Pattern pattern = Pattern.compile("\\d+"); + + // ArrayList with duplicates: 3x"123", 1x"456" = 4 numeric, 2 non-numeric + List list = new ArrayList<>(List.of("123", "abc", "123", "xyz", "456", "123")); + int removed = pattern.retainMatches(list); + + assertEquals(2, removed); // "abc" and "xyz" removed + assertEquals(4, list.size()); // 3x"123" + 1x"456" remain + // All numeric strings retained (including duplicates) + assertEquals(3, list.stream().filter(s -> s.equals("123")).count()); // 3x "123" + assertEquals(1, list.stream().filter(s -> s.equals("456")).count()); // 1x "456" + assertTrue(list.stream().allMatch(s -> s.matches("\\d+"))); // All are numeric + } + + @Test + void testMapFiltering_TreeMap() { + Pattern pattern = Pattern.compile("key[1-2]"); + + // TreeMap maintains sorted order + Map treeMap = + new TreeMap<>( + Map.of( + "key3", 3, + "key1", 1, + "other", 5, + "key2", 2)); + + Map filtered = pattern.filterByKey(treeMap); + + assertEquals(2, filtered.size()); + assertEquals(1, filtered.get("key1")); + assertEquals(2, filtered.get("key2")); + assertNull(filtered.get("key3")); + assertNull(filtered.get("other")); + } + + @Test + void testMapFiltering_LinkedHashMap() { + Pattern pattern = Pattern.compile("user\\d+"); + + // LinkedHashMap preserves insertion order + Map linkedMap = new LinkedHashMap<>(); + linkedMap.put("user2", "second"); + linkedMap.put("admin", "first"); + linkedMap.put("user1", "third"); + + Map filtered = pattern.filterByKey(linkedMap); + + assertEquals(2, filtered.size()); + // LinkedHashMap order should be preserved + List keys = new ArrayList<>(filtered.keySet()); + assertTrue(keys.contains("user2")); + assertTrue(keys.contains("user1")); + } + + @Test + void testMapFiltering_ConcurrentHashMap() { + Pattern pattern = Pattern.compile("data_.*"); + + // ConcurrentHashMap (thread-safe map) + Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>(); + concurrentMap.put("data_1", 100); + concurrentMap.put("meta_1", 200); + concurrentMap.put("data_2", 300); + + Map filtered = pattern.filterByKey(concurrentMap); + + assertEquals(2, filtered.size()); + assertEquals(100, filtered.get("data_1")); + assertEquals(300, filtered.get("data_2")); + } + + @Test + void testRetainMatchesByKey_TreeMap() { + Pattern pattern = Pattern.compile("keep.*"); + + // TreeMap (sorted) + Map treeMap = + new TreeMap<>( + Map.of( + "keep1", 1, + "drop1", 2, + "keep2", 3)); + + int removed = pattern.retainMatchesByKey(treeMap); + + assertEquals(1, removed); + assertEquals(2, treeMap.size()); + assertTrue(treeMap.containsKey("keep1")); + assertTrue(treeMap.containsKey("keep2")); + } + + @Test + void testRetainMatchesByValue_LinkedHashMap() { + Pattern pattern = Pattern.compile("valid"); + + Map linkedMap = new LinkedHashMap<>(); + linkedMap.put(1, "valid"); + linkedMap.put(2, "invalid"); + linkedMap.put(3, "valid"); + + int removed = pattern.retainMatchesByValue(linkedMap); + + assertEquals(1, removed); + assertEquals(2, linkedMap.size()); + assertEquals("valid", linkedMap.get(1)); + assertEquals("valid", linkedMap.get(3)); + } + + @Test + void testBulk_VeryLargeCollection_10k() { + assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)"); + + Pattern pattern = Pattern.compile("item\\d{4}"); // item + 4 digits + + // Create 10,000 strings + List inputs = new ArrayList<>(10_000); + for (int i = 0; i < 10_000; i++) { + inputs.add("item" + i); + } + + // Test matchAll + boolean[] results = pattern.matchAll(inputs); + assertEquals(10_000, results.length); + + // Count matches (items 0-9999, all 4 digits match from item1000 onward) + int matchCount = 0; + for (boolean match : results) { + if (match) matchCount++; + } + assertEquals(9000, matchCount); // item1000 through item9999 + + // Test filter + List filtered = pattern.filter(inputs); + assertEquals(9000, filtered.size()); + + // Test in-place + List mutable = new ArrayList<>(inputs); + int removed = pattern.retainMatches(mutable); + assertEquals(1000, removed); + assertEquals(9000, mutable.size()); + } + + @Test + void testBulk_VeryLargeMap_10k() { + assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)"); + + Pattern pattern = Pattern.compile("user_\\d+"); + + // Create 10,000 entry map + Map largeMap = new HashMap<>(); + for (int i = 0; i < 10_000; i++) { + if (i % 2 == 0) { + largeMap.put("user_" + i, i); + } else { + largeMap.put("admin_" + i, i); + } + } + + // Test filterByKey + Map filtered = pattern.filterByKey(largeMap); + assertEquals(5000, filtered.size()); + + // Test in-place + Map mutable = new HashMap<>(largeMap); + int removed = pattern.retainMatchesByKey(mutable); + assertEquals(5000, removed); + assertEquals(5000, mutable.size()); + } + + @Test + void testMatchAll_EmptyStrings() { + Pattern pattern = Pattern.compile(".*"); // Matches everything including empty + + List inputs = List.of("", "test", "", "other", ""); + boolean[] results = pattern.matchAll(inputs); + + // ".*" should match empty strings + assertArrayEquals(new boolean[] {true, true, true, true, true}, results); + } + + @Test + void testFilter_PreservesDuplicateOrder() { + Pattern pattern = Pattern.compile("keep"); + + // Specific order with duplicates + List ordered = List.of("keep", "drop", "keep", "other", "keep"); + List filtered = pattern.filter(ordered); + + // Order and duplicates preserved + assertEquals(List.of("keep", "keep", "keep"), filtered); + } + + @Test + void testRemoveMatchesByKey_ConcurrentHashMap() { + Pattern pattern = Pattern.compile("tmp_.*"); + + Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>(); + concurrentMap.put("tmp_cache", "data1"); + concurrentMap.put("perm_data", "data2"); + concurrentMap.put("tmp_session", "data3"); - assertEquals(5, results.length); - // Nulls should be treated as non-matches (handled by JNI) - assertTrue(results[0]); // "test1" matches - assertFalse(results[1]); // null doesn't match - assertTrue(results[2]); // "test2" matches - assertFalse(results[3]); // null doesn't match - assertFalse(results[4]); // "other" doesn't match - } - - @Test - void testFilter_WithDuplicates() { - Pattern pattern = Pattern.compile("keep"); - - // List with duplicates - List withDuplicates = List.of("keep", "drop", "keep", "keep", "drop"); - List filtered = pattern.filter(withDuplicates); - - // All "keep" entries preserved, including duplicates - assertEquals(3, filtered.size()); - assertEquals(List.of("keep", "keep", "keep"), filtered); - } - - @Test - void testRetainMatches_WithDuplicates() { - Pattern pattern = Pattern.compile("\\d+"); - - // ArrayList with duplicates: 3x"123", 1x"456" = 4 numeric, 2 non-numeric - List list = new ArrayList<>(List.of("123", "abc", "123", "xyz", "456", "123")); - int removed = pattern.retainMatches(list); - - assertEquals(2, removed); // "abc" and "xyz" removed - assertEquals(4, list.size()); // 3x"123" + 1x"456" remain - // All numeric strings retained (including duplicates) - assertEquals(3, list.stream().filter(s -> s.equals("123")).count()); // 3x "123" - assertEquals(1, list.stream().filter(s -> s.equals("456")).count()); // 1x "456" - assertTrue(list.stream().allMatch(s -> s.matches("\\d+"))); // All are numeric - } - - @Test - void testMapFiltering_TreeMap() { - Pattern pattern = Pattern.compile("key[1-2]"); - - // TreeMap maintains sorted order - Map treeMap = new TreeMap<>(Map.of( - "key3", 3, - "key1", 1, - "other", 5, - "key2", 2 - )); - - Map filtered = pattern.filterByKey(treeMap); - - assertEquals(2, filtered.size()); - assertEquals(1, filtered.get("key1")); - assertEquals(2, filtered.get("key2")); - assertNull(filtered.get("key3")); - assertNull(filtered.get("other")); - } - - @Test - void testMapFiltering_LinkedHashMap() { - Pattern pattern = Pattern.compile("user\\d+"); - - // LinkedHashMap preserves insertion order - Map linkedMap = new LinkedHashMap<>(); - linkedMap.put("user2", "second"); - linkedMap.put("admin", "first"); - linkedMap.put("user1", "third"); - - Map filtered = pattern.filterByKey(linkedMap); - - assertEquals(2, filtered.size()); - // LinkedHashMap order should be preserved - List keys = new ArrayList<>(filtered.keySet()); - assertTrue(keys.contains("user2")); - assertTrue(keys.contains("user1")); - } - - @Test - void testMapFiltering_ConcurrentHashMap() { - Pattern pattern = Pattern.compile("data_.*"); - - // ConcurrentHashMap (thread-safe map) - Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>(); - concurrentMap.put("data_1", 100); - concurrentMap.put("meta_1", 200); - concurrentMap.put("data_2", 300); - - Map filtered = pattern.filterByKey(concurrentMap); - - assertEquals(2, filtered.size()); - assertEquals(100, filtered.get("data_1")); - assertEquals(300, filtered.get("data_2")); - } - - @Test - void testRetainMatchesByKey_TreeMap() { - Pattern pattern = Pattern.compile("keep.*"); - - // TreeMap (sorted) - Map treeMap = new TreeMap<>(Map.of( - "keep1", 1, - "drop1", 2, - "keep2", 3 - )); - - int removed = pattern.retainMatchesByKey(treeMap); - - assertEquals(1, removed); - assertEquals(2, treeMap.size()); - assertTrue(treeMap.containsKey("keep1")); - assertTrue(treeMap.containsKey("keep2")); - } - - @Test - void testRetainMatchesByValue_LinkedHashMap() { - Pattern pattern = Pattern.compile("valid"); - - Map linkedMap = new LinkedHashMap<>(); - linkedMap.put(1, "valid"); - linkedMap.put(2, "invalid"); - linkedMap.put(3, "valid"); - - int removed = pattern.retainMatchesByValue(linkedMap); - - assertEquals(1, removed); - assertEquals(2, linkedMap.size()); - assertEquals("valid", linkedMap.get(1)); - assertEquals("valid", linkedMap.get(3)); - } - - @Test - void testBulk_VeryLargeCollection_10k() { - assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)"); - - Pattern pattern = Pattern.compile("item\\d{4}"); // item + 4 digits - - // Create 10,000 strings - List inputs = new ArrayList<>(10_000); - for (int i = 0; i < 10_000; i++) { - inputs.add("item" + i); - } - - // Test matchAll - boolean[] results = pattern.matchAll(inputs); - assertEquals(10_000, results.length); - - // Count matches (items 0-9999, all 4 digits match from item1000 onward) - int matchCount = 0; - for (boolean match : results) { - if (match) matchCount++; - } - assertEquals(9000, matchCount); // item1000 through item9999 - - // Test filter - List filtered = pattern.filter(inputs); - assertEquals(9000, filtered.size()); - - // Test in-place - List mutable = new ArrayList<>(inputs); - int removed = pattern.retainMatches(mutable); - assertEquals(1000, removed); - assertEquals(9000, mutable.size()); - } - - @Test - void testBulk_VeryLargeMap_10k() { - assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)"); - - Pattern pattern = Pattern.compile("user_\\d+"); - - // Create 10,000 entry map - Map largeMap = new HashMap<>(); - for (int i = 0; i < 10_000; i++) { - if (i % 2 == 0) { - largeMap.put("user_" + i, i); - } else { - largeMap.put("admin_" + i, i); - } - } - - // Test filterByKey - Map filtered = pattern.filterByKey(largeMap); - assertEquals(5000, filtered.size()); - - // Test in-place - Map mutable = new HashMap<>(largeMap); - int removed = pattern.retainMatchesByKey(mutable); - assertEquals(5000, removed); - assertEquals(5000, mutable.size()); - } - - @Test - void testMatchAll_EmptyStrings() { - Pattern pattern = Pattern.compile(".*"); // Matches everything including empty - - List inputs = List.of("", "test", "", "other", ""); - boolean[] results = pattern.matchAll(inputs); - - // ".*" should match empty strings - assertArrayEquals(new boolean[]{true, true, true, true, true}, results); - } - - @Test - void testFilter_PreservesDuplicateOrder() { - Pattern pattern = Pattern.compile("keep"); - - // Specific order with duplicates - List ordered = List.of("keep", "drop", "keep", "other", "keep"); - List filtered = pattern.filter(ordered); - - // Order and duplicates preserved - assertEquals(List.of("keep", "keep", "keep"), filtered); - } - - @Test - void testRemoveMatchesByKey_ConcurrentHashMap() { - Pattern pattern = Pattern.compile("tmp_.*"); - - Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>(); - concurrentMap.put("tmp_cache", "data1"); - concurrentMap.put("perm_data", "data2"); - concurrentMap.put("tmp_session", "data3"); - - int removed = pattern.removeMatchesByKey(concurrentMap); - - assertEquals(2, removed); - assertEquals(1, concurrentMap.size()); - assertTrue(concurrentMap.containsKey("perm_data")); - } + int removed = pattern.removeMatchesByKey(concurrentMap); + + assertEquals(2, removed); + assertEquals(1, concurrentMap.size()); + assertTrue(concurrentMap.containsKey("perm_data")); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java index 41eb6e0..5fa5f05 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java @@ -15,327 +15,310 @@ */ package com.axonops.libre2.api; +import static org.junit.jupiter.api.Assertions.*; + import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.test.TestUtils; +import java.util.*; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import java.util.*; - -import static org.junit.jupiter.api.Assertions.*; - /** - * Tests for type safety and encoding handling in bulk matching operations. - * Demonstrates how the API handles non-String types, Unicode, emoji, and special characters. + * Tests for type safety and encoding handling in bulk matching operations. Demonstrates how the API + * handles non-String types, Unicode, emoji, and special characters. */ class BulkMatchingTypeSafetyIT { - private static PatternCache originalCache; - - @BeforeAll - static void setUpClass() { - originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); - } - - @AfterAll - static void tearDownClass() { - TestUtils.restoreGlobalCache(originalCache); - } - - // ========== Type Safety Tests ========== - - /** - * Demonstrates compile-time type safety. - * Collection cannot be passed to matchAll(Collection). - * This test verifies the API contract. - */ - @Test - void testTypeSafety_CompileTime() { - Pattern pattern = Pattern.compile("\\d+"); - - // This compiles - correct type - List strings = List.of("123", "456"); - boolean[] results = pattern.matchAll(strings); - assertNotNull(results); - - // This would NOT compile (commented out to allow test compilation): - // List ints = List.of(123, 456); - // pattern.matchAll(ints); // Compile error: Required Collection, found Collection - - // Java's type system prevents this at compile time - } - - /** - * Tests runtime behavior with raw types (unchecked warnings). - * We throw explicit IllegalArgumentException with helpful message. - */ - @Test - @SuppressWarnings({"rawtypes", "unchecked"}) - void testTypeSafety_RuntimeWithRawTypes() { - Pattern pattern = Pattern.compile("test"); - - // Using raw List (no generic type) - compiles with warning - List raw = new ArrayList(); - raw.add(123); // Integer - raw.add("test"); // String - raw.add(456); // Integer - - // Should throw IllegalArgumentException with helpful message - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> { - pattern.matchAll(raw); - }); - - // Verify error message is helpful - assertTrue(e.getMessage().contains("non-String elements")); - assertTrue(e.getMessage().contains("stream().map(Object::toString)")); - assertNotNull(e.getCause()); // Cause is ArrayStoreException - assertTrue(e.getCause() instanceof ArrayStoreException); - } - - /** - * Tests that all Collection-based methods throw helpful errors for non-String elements. - */ - @Test - @SuppressWarnings({"rawtypes", "unchecked"}) - void testTypeSafety_AllMethodsValidate() { - Pattern pattern = Pattern.compile("test"); - - List raw = new ArrayList(); - raw.add(123); - - // All Collection methods should throw IllegalArgumentException - assertThrows(IllegalArgumentException.class, () -> pattern.matchAll(raw)); - assertThrows(IllegalArgumentException.class, () -> pattern.filter(raw)); - assertThrows(IllegalArgumentException.class, () -> pattern.filterNot(raw)); - - List mutableRaw = new ArrayList(); - mutableRaw.add(123); - assertThrows(IllegalArgumentException.class, () -> pattern.retainMatches(mutableRaw)); - - List mutableRaw2 = new ArrayList(); - mutableRaw2.add(123); - assertThrows(IllegalArgumentException.class, () -> pattern.removeMatches(mutableRaw2)); - } - - // ========== UTF-8 / Unicode Handling Tests ========== - - /** - * Tests bulk matching with Unicode characters (Chinese, Arabic, Emoji). - * Java Strings are UTF-16, JNI converts to UTF-8 automatically. - */ - @Test - void testBulkMatching_UnicodeCharacters() { - // Chinese characters - Pattern chinesePattern = Pattern.compile("中文.*"); - List chineseInputs = List.of("中文测试", "English", "中文字符", "123"); - boolean[] results = chinesePattern.matchAll(chineseInputs); - - assertArrayEquals(new boolean[]{true, false, true, false}, results); - - // Arabic characters - Pattern arabicPattern = Pattern.compile("مرحبا.*"); - List arabicInputs = List.of("مرحبا بك", "hello", "مرحبا العالم"); - results = arabicPattern.matchAll(arabicInputs); - - assertArrayEquals(new boolean[]{true, false, true}, results); - - // Emoji - Pattern emojiPattern = Pattern.compile(".*😀.*"); - List emojiInputs = List.of("hello😀world", "no emoji", "test😀😀test"); - results = emojiPattern.matchAll(emojiInputs); - - assertArrayEquals(new boolean[]{true, false, true}, results); - } - - /** - * Tests bulk matching with mixed Unicode scripts in same string. - */ - @Test - void testBulkMatching_MixedScripts() { - Pattern pattern = Pattern.compile("User:\\s*\\S+"); - - List inputs = List.of( - "User: Alice", // ASCII - "User: 田中", // Japanese - "User: José", // Accented - "User: مصطفى", // Arabic - "User: 李明" // Chinese - ); - - boolean[] results = pattern.matchAll(inputs); - - // All should match (\\S+ matches any non-whitespace including Unicode) - assertArrayEquals(new boolean[]{true, true, true, true, true}, results); - } - - /** - * Tests bulk filtering with emoji and special Unicode characters. - */ - @Test - void testFilter_EmojiAndSpecialCharacters() { - // Match strings containing specific emoji (literal match, not ranges) - Pattern pattern = Pattern.compile(".*(😀|😢|😁|😂|😃).*"); - - List inputs = List.of( - "Happy 😀", - "Sad 😢", - "No emoji", - "Multiple 😁😂😃" - ); - - List filtered = pattern.filter(inputs); - - assertEquals(3, filtered.size()); // All except "No emoji" - assertTrue(filtered.contains("Happy 😀")); - assertTrue(filtered.contains("Sad 😢")); - assertTrue(filtered.contains("Multiple 😁😂😃")); - } - - /** - * Tests handling of zero-width characters and combining characters. - */ - @Test - void testMatchAll_ZeroWidthAndCombiningCharacters() { - Pattern pattern = Pattern.compile("test.*"); - - List inputs = List.of( + private static PatternCache originalCache; + + @BeforeAll + static void setUpClass() { + originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); + } + + @AfterAll + static void tearDownClass() { + TestUtils.restoreGlobalCache(originalCache); + } + + // ========== Type Safety Tests ========== + + /** + * Demonstrates compile-time type safety. Collection cannot be passed to + * matchAll(Collection). This test verifies the API contract. + */ + @Test + void testTypeSafety_CompileTime() { + Pattern pattern = Pattern.compile("\\d+"); + + // This compiles - correct type + List strings = List.of("123", "456"); + boolean[] results = pattern.matchAll(strings); + assertNotNull(results); + + // This would NOT compile (commented out to allow test compilation): + // List ints = List.of(123, 456); + // pattern.matchAll(ints); // Compile error: Required Collection, found + // Collection + + // Java's type system prevents this at compile time + } + + /** + * Tests runtime behavior with raw types (unchecked warnings). We throw explicit + * IllegalArgumentException with helpful message. + */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + void testTypeSafety_RuntimeWithRawTypes() { + Pattern pattern = Pattern.compile("test"); + + // Using raw List (no generic type) - compiles with warning + List raw = new ArrayList(); + raw.add(123); // Integer + raw.add("test"); // String + raw.add(456); // Integer + + // Should throw IllegalArgumentException with helpful message + IllegalArgumentException e = + assertThrows( + IllegalArgumentException.class, + () -> { + pattern.matchAll(raw); + }); + + // Verify error message is helpful + assertTrue(e.getMessage().contains("non-String elements")); + assertTrue(e.getMessage().contains("stream().map(Object::toString)")); + assertNotNull(e.getCause()); // Cause is ArrayStoreException + assertTrue(e.getCause() instanceof ArrayStoreException); + } + + /** Tests that all Collection-based methods throw helpful errors for non-String elements. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + void testTypeSafety_AllMethodsValidate() { + Pattern pattern = Pattern.compile("test"); + + List raw = new ArrayList(); + raw.add(123); + + // All Collection methods should throw IllegalArgumentException + assertThrows(IllegalArgumentException.class, () -> pattern.matchAll(raw)); + assertThrows(IllegalArgumentException.class, () -> pattern.filter(raw)); + assertThrows(IllegalArgumentException.class, () -> pattern.filterNot(raw)); + + List mutableRaw = new ArrayList(); + mutableRaw.add(123); + assertThrows(IllegalArgumentException.class, () -> pattern.retainMatches(mutableRaw)); + + List mutableRaw2 = new ArrayList(); + mutableRaw2.add(123); + assertThrows(IllegalArgumentException.class, () -> pattern.removeMatches(mutableRaw2)); + } + + // ========== UTF-8 / Unicode Handling Tests ========== + + /** + * Tests bulk matching with Unicode characters (Chinese, Arabic, Emoji). Java Strings are UTF-16, + * JNI converts to UTF-8 automatically. + */ + @Test + void testBulkMatching_UnicodeCharacters() { + // Chinese characters + Pattern chinesePattern = Pattern.compile("中文.*"); + List chineseInputs = List.of("中文测试", "English", "中文字符", "123"); + boolean[] results = chinesePattern.matchAll(chineseInputs); + + assertArrayEquals(new boolean[] {true, false, true, false}, results); + + // Arabic characters + Pattern arabicPattern = Pattern.compile("مرحبا.*"); + List arabicInputs = List.of("مرحبا بك", "hello", "مرحبا العالم"); + results = arabicPattern.matchAll(arabicInputs); + + assertArrayEquals(new boolean[] {true, false, true}, results); + + // Emoji + Pattern emojiPattern = Pattern.compile(".*😀.*"); + List emojiInputs = List.of("hello😀world", "no emoji", "test😀😀test"); + results = emojiPattern.matchAll(emojiInputs); + + assertArrayEquals(new boolean[] {true, false, true}, results); + } + + /** Tests bulk matching with mixed Unicode scripts in same string. */ + @Test + void testBulkMatching_MixedScripts() { + Pattern pattern = Pattern.compile("User:\\s*\\S+"); + + List inputs = + List.of( + "User: Alice", // ASCII + "User: 田中", // Japanese + "User: José", // Accented + "User: مصطفى", // Arabic + "User: 李明" // Chinese + ); + + boolean[] results = pattern.matchAll(inputs); + + // All should match (\\S+ matches any non-whitespace including Unicode) + assertArrayEquals(new boolean[] {true, true, true, true, true}, results); + } + + /** Tests bulk filtering with emoji and special Unicode characters. */ + @Test + void testFilter_EmojiAndSpecialCharacters() { + // Match strings containing specific emoji (literal match, not ranges) + Pattern pattern = Pattern.compile(".*(😀|😢|😁|😂|😃).*"); + + List inputs = List.of("Happy 😀", "Sad 😢", "No emoji", "Multiple 😁😂😃"); + + List filtered = pattern.filter(inputs); + + assertEquals(3, filtered.size()); // All except "No emoji" + assertTrue(filtered.contains("Happy 😀")); + assertTrue(filtered.contains("Sad 😢")); + assertTrue(filtered.contains("Multiple 😁😂😃")); + } + + /** Tests handling of zero-width characters and combining characters. */ + @Test + void testMatchAll_ZeroWidthAndCombiningCharacters() { + Pattern pattern = Pattern.compile("test.*"); + + List inputs = + List.of( "test", - "test\u200B", // Zero-width space - "test\u0301", // Combining acute accent - "tést", // Precomposed é - "te\u0301st" // e + combining accent - ); + "test\u200B", // Zero-width space + "test\u0301", // Combining acute accent + "tést", // Precomposed é + "te\u0301st" // e + combining accent + ); - boolean[] results = pattern.matchAll(inputs); + boolean[] results = pattern.matchAll(inputs); - // All should match (pattern is "test.*" which matches test followed by anything) - assertArrayEquals(new boolean[]{true, true, true, false, false}, results); - } + // All should match (pattern is "test.*" which matches test followed by anything) + assertArrayEquals(new boolean[] {true, true, true, false, false}, results); + } - // ========== Special Character Tests ========== + // ========== Special Character Tests ========== - /** - * Tests bulk matching with control characters, newlines, tabs. - */ - @Test - void testMatchAll_ControlCharacters() { - Pattern pattern = Pattern.compile("line\\d+"); + /** Tests bulk matching with control characters, newlines, tabs. */ + @Test + void testMatchAll_ControlCharacters() { + Pattern pattern = Pattern.compile("line\\d+"); - List inputs = List.of( + List inputs = + List.of( "line1", - "line2\n", // With newline - "line3\t", // With tab - "line4\r\n", // With CR+LF - "other\n" - ); - - boolean[] results = pattern.matchAll(inputs); - - // Pattern matches "lineN" without trailing characters - assertArrayEquals(new boolean[]{true, false, false, false, false}, results); - } - - /** - * Tests map filtering with Unicode keys and values. - */ - @Test - void testMapFiltering_UnicodeKeysAndValues() { - Pattern pattern = Pattern.compile("用户.*"); // Chinese "user" - - Map users = new HashMap<>(); - users.put("用户001", "Alice"); - users.put("管理员", "Admin"); - users.put("用户002", "Bob"); - - Map filtered = pattern.filterByKey(users); - - assertEquals(2, filtered.size()); - assertTrue(filtered.containsKey("用户001")); - assertTrue(filtered.containsKey("用户002")); - assertFalse(filtered.containsKey("管理员")); - } - - // ========== toString() Behavior Tests ========== - - /** - * Demonstrates that non-String objects would need explicit toString(). - * Since our signature requires Collection, this is handled at compile time. - */ - @Test - void testNonStringObjects_RequireExplicitConversion() { - Pattern pattern = Pattern.compile("\\d+"); - - // If you have Collection, you must convert explicitly - List numbers = List.of(123, 456, 789); - - // Convert to strings explicitly - List stringNumbers = numbers.stream() - .map(Object::toString) - .toList(); - - boolean[] results = pattern.matchAll(stringNumbers); - assertArrayEquals(new boolean[]{true, true, true}, results); - } - - // ========== Null and Empty Tests ========== - - /** - * Tests how matchAll handles collections with null elements. - * Nulls should not crash - JNI handles them gracefully (returns false for match). - */ - @Test - void testMatchAll_NullElements_DoesNotCrash() { - Pattern pattern = Pattern.compile("test"); - - String[] arrayWithNulls = {"test", null, "other", null}; - boolean[] results = pattern.matchAll(arrayWithNulls); - - assertEquals(4, results.length); - assertTrue(results[0]); // "test" matches - assertFalse(results[1]); // null doesn't match - assertFalse(results[2]); // "other" doesn't match - assertFalse(results[3]); // null doesn't match - } - - /** - * Tests bulk matching with empty strings. - */ - @Test - void testMatchAll_EmptyStrings() { - // Pattern that matches empty string (and only empty) - Pattern onlyEmptyPattern = Pattern.compile("^$"); - List inputs = List.of("", "test", "", "other"); - boolean[] results = onlyEmptyPattern.matchAll(inputs); - - assertArrayEquals(new boolean[]{true, false, true, false}, results); - - // Pattern that matches anything (including empty) - Pattern anyPattern = Pattern.compile(".*"); - results = anyPattern.matchAll(inputs); - - assertArrayEquals(new boolean[]{true, true, true, true}, results); - } - - // ========== Binary Data / Invalid UTF-8 ========== - - /** - * Tests behavior with strings containing invalid UTF-16 surrogate pairs. - * JNI's GetStringUTFChars handles this by replacing invalid sequences. - */ - @Test - void testMatchAll_InvalidSurrogates() { - Pattern pattern = Pattern.compile("test.*"); - - // Create string with unpaired surrogate (invalid UTF-16) - String invalidSurrogate = "test\uD800"; // High surrogate without low surrogate - - List inputs = List.of("test", invalidSurrogate, "test123"); - boolean[] results = pattern.matchAll(inputs); - - // JNI will replace invalid sequence, pattern may or may not match - // The important thing is it doesn't crash - assertEquals(3, results.length); - assertNotNull(results); // Just verify no crash - } + "line2\n", // With newline + "line3\t", // With tab + "line4\r\n", // With CR+LF + "other\n"); + + boolean[] results = pattern.matchAll(inputs); + + // Pattern matches "lineN" without trailing characters + assertArrayEquals(new boolean[] {true, false, false, false, false}, results); + } + + /** Tests map filtering with Unicode keys and values. */ + @Test + void testMapFiltering_UnicodeKeysAndValues() { + Pattern pattern = Pattern.compile("用户.*"); // Chinese "user" + + Map users = new HashMap<>(); + users.put("用户001", "Alice"); + users.put("管理员", "Admin"); + users.put("用户002", "Bob"); + + Map filtered = pattern.filterByKey(users); + + assertEquals(2, filtered.size()); + assertTrue(filtered.containsKey("用户001")); + assertTrue(filtered.containsKey("用户002")); + assertFalse(filtered.containsKey("管理员")); + } + + // ========== toString() Behavior Tests ========== + + /** + * Demonstrates that non-String objects would need explicit toString(). Since our signature + * requires Collection, this is handled at compile time. + */ + @Test + void testNonStringObjects_RequireExplicitConversion() { + Pattern pattern = Pattern.compile("\\d+"); + + // If you have Collection, you must convert explicitly + List numbers = List.of(123, 456, 789); + + // Convert to strings explicitly + List stringNumbers = numbers.stream().map(Object::toString).toList(); + + boolean[] results = pattern.matchAll(stringNumbers); + assertArrayEquals(new boolean[] {true, true, true}, results); + } + + // ========== Null and Empty Tests ========== + + /** + * Tests how matchAll handles collections with null elements. Nulls should not crash - JNI handles + * them gracefully (returns false for match). + */ + @Test + void testMatchAll_NullElements_DoesNotCrash() { + Pattern pattern = Pattern.compile("test"); + + String[] arrayWithNulls = {"test", null, "other", null}; + boolean[] results = pattern.matchAll(arrayWithNulls); + + assertEquals(4, results.length); + assertTrue(results[0]); // "test" matches + assertFalse(results[1]); // null doesn't match + assertFalse(results[2]); // "other" doesn't match + assertFalse(results[3]); // null doesn't match + } + + /** Tests bulk matching with empty strings. */ + @Test + void testMatchAll_EmptyStrings() { + // Pattern that matches empty string (and only empty) + Pattern onlyEmptyPattern = Pattern.compile("^$"); + List inputs = List.of("", "test", "", "other"); + boolean[] results = onlyEmptyPattern.matchAll(inputs); + + assertArrayEquals(new boolean[] {true, false, true, false}, results); + + // Pattern that matches anything (including empty) + Pattern anyPattern = Pattern.compile(".*"); + results = anyPattern.matchAll(inputs); + + assertArrayEquals(new boolean[] {true, true, true, true}, results); + } + + // ========== Binary Data / Invalid UTF-8 ========== + + /** + * Tests behavior with strings containing invalid UTF-16 surrogate pairs. JNI's GetStringUTFChars + * handles this by replacing invalid sequences. + */ + @Test + void testMatchAll_InvalidSurrogates() { + Pattern pattern = Pattern.compile("test.*"); + + // Create string with unpaired surrogate (invalid UTF-16) + String invalidSurrogate = "test\uD800"; // High surrogate without low surrogate + + List inputs = List.of("test", invalidSurrogate, "test123"); + boolean[] results = pattern.matchAll(inputs); + + // JNI will replace invalid sequence, pattern may or may not match + // The important thing is it doesn't crash + assertEquals(3, results.length); + assertNotNull(results); // Just verify no crash + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java index 18560d3..fa67415 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java @@ -16,320 +16,316 @@ package com.axonops.libre2.api; +import static org.assertj.core.api.Assertions.*; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; - -import static org.assertj.core.api.Assertions.*; - /** * Tests for ByteBuffer API with automatic routing to zero-copy or String API. * - *

These tests verify that Pattern correctly detects DirectByteBuffer vs - * heap ByteBuffer and routes to the appropriate implementation.

+ *

These tests verify that Pattern correctly detects DirectByteBuffer vs heap ByteBuffer and + * routes to the appropriate implementation. */ @DisplayName("ByteBuffer API Tests") class ByteBufferApiIT { - /** - * Creates a DirectByteBuffer (off-heap, supports zero-copy). - */ - private ByteBuffer createDirectBuffer(String text) { - byte[] bytes = text.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length); - buffer.put(bytes); - buffer.flip(); - return buffer; - } + /** Creates a DirectByteBuffer (off-heap, supports zero-copy). */ + private ByteBuffer createDirectBuffer(String text) { + byte[] bytes = text.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length); + buffer.put(bytes); + buffer.flip(); + return buffer; + } - /** - * Creates a heap ByteBuffer (on-heap, falls back to String API). - */ - private ByteBuffer createHeapBuffer(String text) { - return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8)); - } + /** Creates a heap ByteBuffer (on-heap, falls back to String API). */ + private ByteBuffer createHeapBuffer(String text) { + return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8)); + } - // ========== DirectByteBuffer Tests (Zero-Copy Path) ========== + // ========== DirectByteBuffer Tests (Zero-Copy Path) ========== - @Test - @DisplayName("Pattern.matches(DirectByteBuffer) should use zero-copy") - void patternMatches_directBuffer_usesZeroCopy() { - Pattern pattern = Pattern.compile("hello"); + @Test + @DisplayName("Pattern.matches(DirectByteBuffer) should use zero-copy") + void patternMatches_directBuffer_usesZeroCopy() { + Pattern pattern = Pattern.compile("hello"); - ByteBuffer buffer = createDirectBuffer("hello"); - assertThat(buffer.isDirect()).isTrue(); // Verify it's direct + ByteBuffer buffer = createDirectBuffer("hello"); + assertThat(buffer.isDirect()).isTrue(); // Verify it's direct - boolean matches = pattern.matches(buffer); + boolean matches = pattern.matches(buffer); - assertThat(matches).isTrue(); - } + assertThat(matches).isTrue(); + } - @Test - @DisplayName("Pattern.find(DirectByteBuffer) should use zero-copy") - void patternFind_directBuffer_usesZeroCopy() { - Pattern pattern = Pattern.compile("world"); + @Test + @DisplayName("Pattern.find(DirectByteBuffer) should use zero-copy") + void patternFind_directBuffer_usesZeroCopy() { + Pattern pattern = Pattern.compile("world"); - ByteBuffer buffer = createDirectBuffer("hello world"); - assertThat(buffer.isDirect()).isTrue(); + ByteBuffer buffer = createDirectBuffer("hello world"); + assertThat(buffer.isDirect()).isTrue(); - boolean found = pattern.find(buffer); + boolean found = pattern.find(buffer); - assertThat(found).isTrue(); - } - - @Test - @DisplayName("Pattern.extractGroups(DirectByteBuffer) should use zero-copy") - void extractGroups_directBuffer_usesZeroCopy() { - Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); + assertThat(found).isTrue(); + } - ByteBuffer buffer = createDirectBuffer("2025-11-24"); - assertThat(buffer.isDirect()).isTrue(); + @Test + @DisplayName("Pattern.extractGroups(DirectByteBuffer) should use zero-copy") + void extractGroups_directBuffer_usesZeroCopy() { + Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); + + ByteBuffer buffer = createDirectBuffer("2025-11-24"); + assertThat(buffer.isDirect()).isTrue(); - String[] groups = pattern.extractGroups(buffer); + String[] groups = pattern.extractGroups(buffer); + + assertThat(groups).containsExactly("2025-11-24", "2025", "11", "24"); + } + + @Test + @DisplayName("Pattern.findAllMatches(DirectByteBuffer) should use zero-copy") + void findAllMatches_directBuffer_usesZeroCopy() { + Pattern pattern = Pattern.compile("\\d+"); + + ByteBuffer buffer = createDirectBuffer("a1b22c333"); + assertThat(buffer.isDirect()).isTrue(); + + String[][] matches = pattern.findAllMatches(buffer); + + assertThat(matches).isNotNull(); + assertThat(matches.length).isEqualTo(3); + assertThat(matches[0][0]).isEqualTo("1"); + assertThat(matches[1][0]).isEqualTo("22"); + assertThat(matches[2][0]).isEqualTo("333"); + } + + // ========== Heap ByteBuffer Tests (String Fallback Path) ========== + + @Test + @DisplayName("Pattern.matches(heap ByteBuffer) should fall back to String API") + void patternMatches_heapBuffer_fallsBackToString() { + Pattern pattern = Pattern.compile("hello"); + + ByteBuffer buffer = createHeapBuffer("hello"); + assertThat(buffer.isDirect()).isFalse(); // Verify it's heap + + boolean matches = pattern.matches(buffer); + + assertThat(matches).isTrue(); + } + + @Test + @DisplayName("Pattern.find(heap ByteBuffer) should fall back to String API") + void patternFind_heapBuffer_fallsBackToString() { + Pattern pattern = Pattern.compile("world"); + + ByteBuffer buffer = createHeapBuffer("hello world"); + assertThat(buffer.isDirect()).isFalse(); + + boolean found = pattern.find(buffer); + + assertThat(found).isTrue(); + } + + // ========== Consistency Tests (Direct vs Heap vs String) ========== + + @ParameterizedTest + @DisplayName("DirectByteBuffer, heap ByteBuffer, and String should all match") + @CsvSource({ + "\\d+, 12345, true", + "\\d+, abc, false", + "[a-z]+, hello, true", + "[a-z]+, HELLO, false", + "test, test, true", + "test, testing, false" + }) + void allApisProduceSameResults(String patternStr, String input, boolean expected) { + Pattern pattern = Pattern.compile(patternStr); - assertThat(groups).containsExactly("2025-11-24", "2025", "11", "24"); + // String API + boolean stringResult = pattern.matches(input); + + // DirectByteBuffer API (zero-copy) + ByteBuffer directBuffer = createDirectBuffer(input); + boolean directResult = pattern.matches(directBuffer); + + // Heap ByteBuffer API (String fallback) + ByteBuffer heapBuffer = createHeapBuffer(input); + boolean heapResult = pattern.matches(heapBuffer); + + // All should produce same result + assertThat(directResult) + .as("DirectByteBuffer should match String API") + .isEqualTo(stringResult) + .isEqualTo(expected); + + assertThat(heapResult) + .as("Heap ByteBuffer should match String API") + .isEqualTo(stringResult) + .isEqualTo(expected); + } + + @ParameterizedTest + @DisplayName("find() should work consistently across all API variants") + @CsvSource({ + "\\d+, abc123def, true", + "\\d+, abcdef, false", + "@, user@example.com, true", + "@, noatsign, false" + }) + void find_allApisConsistent(String patternStr, String input, boolean expected) { + Pattern pattern = Pattern.compile(patternStr); + + // String API + boolean stringResult; + try (Matcher m = pattern.matcher(input)) { + stringResult = m.find(); } - @Test - @DisplayName("Pattern.findAllMatches(DirectByteBuffer) should use zero-copy") - void findAllMatches_directBuffer_usesZeroCopy() { - Pattern pattern = Pattern.compile("\\d+"); + // DirectByteBuffer + boolean directResult = pattern.find(createDirectBuffer(input)); - ByteBuffer buffer = createDirectBuffer("a1b22c333"); - assertThat(buffer.isDirect()).isTrue(); + // Heap ByteBuffer + boolean heapResult = pattern.find(createHeapBuffer(input)); - String[][] matches = pattern.findAllMatches(buffer); + assertThat(directResult).isEqualTo(stringResult).isEqualTo(expected); + assertThat(heapResult).isEqualTo(stringResult).isEqualTo(expected); + } - assertThat(matches).isNotNull(); - assertThat(matches.length).isEqualTo(3); - assertThat(matches[0][0]).isEqualTo("1"); - assertThat(matches[1][0]).isEqualTo("22"); - assertThat(matches[2][0]).isEqualTo("333"); - } + // ========== Mixed Usage Tests ========== - // ========== Heap ByteBuffer Tests (String Fallback Path) ========== + @Test + @DisplayName("Pattern can mix String, DirectByteBuffer, and heap ByteBuffer") + void pattern_mixedUsage_allTypes() { + Pattern pattern = Pattern.compile("\\d+"); - @Test - @DisplayName("Pattern.matches(heap ByteBuffer) should fall back to String API") - void patternMatches_heapBuffer_fallsBackToString() { - Pattern pattern = Pattern.compile("hello"); + // Use with String + assertThat(pattern.matches("123")).isTrue(); - ByteBuffer buffer = createHeapBuffer("hello"); - assertThat(buffer.isDirect()).isFalse(); // Verify it's heap + // Use with DirectByteBuffer (zero-copy) + ByteBuffer directBuffer = createDirectBuffer("456"); + assertThat(pattern.matches(directBuffer)).isTrue(); - boolean matches = pattern.matches(buffer); + // Use with heap ByteBuffer (String fallback) + ByteBuffer heapBuffer = createHeapBuffer("789"); + assertThat(pattern.matches(heapBuffer)).isTrue(); - assertThat(matches).isTrue(); - } - - @Test - @DisplayName("Pattern.find(heap ByteBuffer) should fall back to String API") - void patternFind_heapBuffer_fallsBackToString() { - Pattern pattern = Pattern.compile("world"); - - ByteBuffer buffer = createHeapBuffer("hello world"); - assertThat(buffer.isDirect()).isFalse(); - - boolean found = pattern.find(buffer); - - assertThat(found).isTrue(); - } - - // ========== Consistency Tests (Direct vs Heap vs String) ========== - - @ParameterizedTest - @DisplayName("DirectByteBuffer, heap ByteBuffer, and String should all match") - @CsvSource({ - "\\d+, 12345, true", - "\\d+, abc, false", - "[a-z]+, hello, true", - "[a-z]+, HELLO, false", - "test, test, true", - "test, testing, false" - }) - void allApisProduceSameResults(String patternStr, String input, boolean expected) { - Pattern pattern = Pattern.compile(patternStr); - - // String API - boolean stringResult = pattern.matches(input); - - // DirectByteBuffer API (zero-copy) - ByteBuffer directBuffer = createDirectBuffer(input); - boolean directResult = pattern.matches(directBuffer); - - // Heap ByteBuffer API (String fallback) - ByteBuffer heapBuffer = createHeapBuffer(input); - boolean heapResult = pattern.matches(heapBuffer); - - // All should produce same result - assertThat(directResult) - .as("DirectByteBuffer should match String API") - .isEqualTo(stringResult) - .isEqualTo(expected); - - assertThat(heapResult) - .as("Heap ByteBuffer should match String API") - .isEqualTo(stringResult) - .isEqualTo(expected); - } + // Mix all three in same method + assertThat(pattern.matches("abc")).isFalse(); + assertThat(pattern.matches(createDirectBuffer("def"))).isFalse(); + assertThat(pattern.matches(createHeapBuffer("ghi"))).isFalse(); + } - @ParameterizedTest - @DisplayName("find() should work consistently across all API variants") - @CsvSource({ - "\\d+, abc123def, true", - "\\d+, abcdef, false", - "@, user@example.com, true", - "@, noatsign, false" - }) - void find_allApisConsistent(String patternStr, String input, boolean expected) { - Pattern pattern = Pattern.compile(patternStr); - - // String API - boolean stringResult; - try (Matcher m = pattern.matcher(input)) { - stringResult = m.find(); - } - - // DirectByteBuffer - boolean directResult = pattern.find(createDirectBuffer(input)); - - // Heap ByteBuffer - boolean heapResult = pattern.find(createHeapBuffer(input)); - - assertThat(directResult).isEqualTo(stringResult).isEqualTo(expected); - assertThat(heapResult).isEqualTo(stringResult).isEqualTo(expected); + // ========== Position/Limit Handling Tests ========== + + @Test + @DisplayName("ByteBuffer position and limit should be respected") + void byteBuffer_positionLimit_respected() { + Pattern pattern = Pattern.compile("world"); + + ByteBuffer buffer = createDirectBuffer("hello world goodbye"); + + // Match full buffer - should find "world" + assertThat(pattern.find(buffer)).isTrue(); + + // Reset and set position to skip "hello " + buffer.rewind(); + buffer.position(6); // Start at "world" + buffer.limit(11); // End after "world" + + // Should match just "world" + assertThat(pattern.matches(buffer)).isTrue(); + } + + @Test + @DisplayName("ByteBuffer position should not be modified") + void byteBuffer_positionNotModified() { + Pattern pattern = Pattern.compile("test"); + + ByteBuffer buffer = createDirectBuffer("test"); + int originalPosition = buffer.position(); + int originalLimit = buffer.limit(); + + pattern.matches(buffer); + + // Position and limit should be unchanged + assertThat(buffer.position()).isEqualTo(originalPosition); + assertThat(buffer.limit()).isEqualTo(originalLimit); + } + + // ========== Validation Tests ========== + + @Test + @DisplayName("Should throw on null ByteBuffer") + void matches_nullByteBuffer_throws() { + Pattern pattern = Pattern.compile("test"); + + assertThatNullPointerException() + .isThrownBy(() -> pattern.matches((ByteBuffer) null)) + .withMessageContaining("null"); + } + + @Test + @DisplayName("Empty ByteBuffer should work") + void matches_emptyByteBuffer_works() { + Pattern pattern = Pattern.compile(".*"); // Match anything (including empty) + + ByteBuffer emptyDirect = createDirectBuffer(""); + ByteBuffer emptyHeap = createHeapBuffer(""); + + assertThat(pattern.matches(emptyDirect)).isTrue(); + assertThat(pattern.matches(emptyHeap)).isTrue(); + } + + // ========== Real-World Scenario Tests ========== + + @Test + @DisplayName("Real-world: Netty-like scenario with DirectByteBuffer") + void realWorld_nettyStyleDirectBuffer() { + Pattern emailPattern = + Pattern.compile("\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", false); + + // Simulate Netty ByteBuf-like usage (direct memory) + ByteBuffer networkBuffer = ByteBuffer.allocateDirect(1024); + String message = "New user registered: user@example.com"; + networkBuffer.put(message.getBytes(StandardCharsets.UTF_8)); + networkBuffer.flip(); + + // Extract email using zero-copy + boolean hasEmail = emailPattern.find(networkBuffer); + + assertThat(hasEmail).isTrue(); + } + + @Test + @DisplayName("Real-world: Process multiple network buffers") + void realWorld_multipleNetworkBuffers() { + Pattern validPattern = Pattern.compile("valid_.*"); + + // Simulate multiple incoming network buffers + ByteBuffer[] buffers = { + createDirectBuffer("valid_request_1"), + createDirectBuffer("invalid_request"), + createDirectBuffer("valid_request_2"), + createHeapBuffer("other_data"), // Mixed: some heap, some direct + createDirectBuffer("valid_request_3") + }; + + // Process all buffers + int validCount = 0; + for (ByteBuffer buffer : buffers) { + if (validPattern.matches(buffer)) { + validCount++; + } } - // ========== Mixed Usage Tests ========== - - @Test - @DisplayName("Pattern can mix String, DirectByteBuffer, and heap ByteBuffer") - void pattern_mixedUsage_allTypes() { - Pattern pattern = Pattern.compile("\\d+"); - - // Use with String - assertThat(pattern.matches("123")).isTrue(); - - // Use with DirectByteBuffer (zero-copy) - ByteBuffer directBuffer = createDirectBuffer("456"); - assertThat(pattern.matches(directBuffer)).isTrue(); - - // Use with heap ByteBuffer (String fallback) - ByteBuffer heapBuffer = createHeapBuffer("789"); - assertThat(pattern.matches(heapBuffer)).isTrue(); - - // Mix all three in same method - assertThat(pattern.matches("abc")).isFalse(); - assertThat(pattern.matches(createDirectBuffer("def"))).isFalse(); - assertThat(pattern.matches(createHeapBuffer("ghi"))).isFalse(); - } - - // ========== Position/Limit Handling Tests ========== - - @Test - @DisplayName("ByteBuffer position and limit should be respected") - void byteBuffer_positionLimit_respected() { - Pattern pattern = Pattern.compile("world"); - - ByteBuffer buffer = createDirectBuffer("hello world goodbye"); - - // Match full buffer - should find "world" - assertThat(pattern.find(buffer)).isTrue(); - - // Reset and set position to skip "hello " - buffer.rewind(); - buffer.position(6); // Start at "world" - buffer.limit(11); // End after "world" - - // Should match just "world" - assertThat(pattern.matches(buffer)).isTrue(); - } - - @Test - @DisplayName("ByteBuffer position should not be modified") - void byteBuffer_positionNotModified() { - Pattern pattern = Pattern.compile("test"); - - ByteBuffer buffer = createDirectBuffer("test"); - int originalPosition = buffer.position(); - int originalLimit = buffer.limit(); - - pattern.matches(buffer); - - // Position and limit should be unchanged - assertThat(buffer.position()).isEqualTo(originalPosition); - assertThat(buffer.limit()).isEqualTo(originalLimit); - } - - // ========== Validation Tests ========== - - @Test - @DisplayName("Should throw on null ByteBuffer") - void matches_nullByteBuffer_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.matches((ByteBuffer) null)) - .withMessageContaining("null"); - } - - @Test - @DisplayName("Empty ByteBuffer should work") - void matches_emptyByteBuffer_works() { - Pattern pattern = Pattern.compile(".*"); // Match anything (including empty) - - ByteBuffer emptyDirect = createDirectBuffer(""); - ByteBuffer emptyHeap = createHeapBuffer(""); - - assertThat(pattern.matches(emptyDirect)).isTrue(); - assertThat(pattern.matches(emptyHeap)).isTrue(); - } - - // ========== Real-World Scenario Tests ========== - - @Test - @DisplayName("Real-world: Netty-like scenario with DirectByteBuffer") - void realWorld_nettyStyleDirectBuffer() { - Pattern emailPattern = Pattern.compile("\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", false); - - // Simulate Netty ByteBuf-like usage (direct memory) - ByteBuffer networkBuffer = ByteBuffer.allocateDirect(1024); - String message = "New user registered: user@example.com"; - networkBuffer.put(message.getBytes(StandardCharsets.UTF_8)); - networkBuffer.flip(); - - // Extract email using zero-copy - boolean hasEmail = emailPattern.find(networkBuffer); - - assertThat(hasEmail).isTrue(); - } - - @Test - @DisplayName("Real-world: Process multiple network buffers") - void realWorld_multipleNetworkBuffers() { - Pattern validPattern = Pattern.compile("valid_.*"); - - // Simulate multiple incoming network buffers - ByteBuffer[] buffers = { - createDirectBuffer("valid_request_1"), - createDirectBuffer("invalid_request"), - createDirectBuffer("valid_request_2"), - createHeapBuffer("other_data"), // Mixed: some heap, some direct - createDirectBuffer("valid_request_3") - }; - - // Process all buffers - int validCount = 0; - for (ByteBuffer buffer : buffers) { - if (validPattern.matches(buffer)) { - validCount++; - } - } - - assertThat(validCount).isEqualTo(3); - } + assertThat(validCount).isEqualTo(3); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java index a3e7228..1e5dc8e 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java @@ -16,457 +16,449 @@ package com.axonops.libre2.api; +import static org.assertj.core.api.Assertions.*; + +import java.util.List; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; -import java.util.List; - -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for capture group functionality (MatchResult and Pattern capture methods). - */ +/** Tests for capture group functionality (MatchResult and Pattern capture methods). */ @DisplayName("Capture Groups") class CaptureGroupsIT { - // ========== MatchResult Basic Tests ========== - - @Test - @DisplayName("MatchResult should indicate successful match") - void matchResult_successfulMatch_matched() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("123")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group()).isEqualTo("123"); - assertThat(result.group(0)).isEqualTo("123"); - assertThat(result.group(1)).isEqualTo("123"); - } - } - - @Test - @DisplayName("MatchResult should indicate failed match") - void matchResult_failedMatch_notMatched() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("abc")) { - assertThat(result.matched()).isFalse(); - assertThat(result.groupCount()).isEqualTo(0); - } - } - - @Test - @DisplayName("MatchResult should throw on group access when not matched") - void matchResult_noMatch_throwsOnGroupAccess() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("abc")) { - assertThatIllegalStateException() - .isThrownBy(() -> result.group()) - .withMessageContaining("No match"); - } - } - - // ========== Pattern.match() Tests ========== - - @Test - @DisplayName("Pattern.match() should extract single group") - void patternMatch_singleGroup_extracted() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("123")) { - assertThat(result.matched()).isTrue(); - assertThat(result.groupCount()).isEqualTo(1); - assertThat(result.group(0)).isEqualTo("123"); // Full match - assertThat(result.group(1)).isEqualTo("123"); // Captured group - } - } - - @Test - @DisplayName("Pattern.match() should extract multiple groups") - void patternMatch_multipleGroups_extracted() { - Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)"); - try (MatchResult result = pattern.match("user@example.com")) { - assertThat(result.matched()).isTrue(); - assertThat(result.groupCount()).isEqualTo(3); - assertThat(result.group()).isEqualTo("user@example.com"); - assertThat(result.group(1)).isEqualTo("user"); - assertThat(result.group(2)).isEqualTo("example"); - assertThat(result.group(3)).isEqualTo("com"); - } - } - - @Test - @DisplayName("Pattern.match() should handle date extraction") - void patternMatch_dateExtraction_works() { - Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); - try (MatchResult result = pattern.match("2025-11-24")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group()).isEqualTo("2025-11-24"); - assertThat(result.group(1)).isEqualTo("2025"); - assertThat(result.group(2)).isEqualTo("11"); - assertThat(result.group(3)).isEqualTo("24"); - } - } - - @Test - @DisplayName("Pattern.match() should fail on partial content") - void patternMatch_partialContent_fails() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("abc123def")) { - assertThat(result.matched()).isFalse(); - } - } - - // ========== Pattern.find() Tests ========== - - @Test - @DisplayName("Pattern.find() should find first match in text") - void patternFind_firstMatch_found() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.find("abc123def456")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group()).isEqualTo("123"); // First match - assertThat(result.group(1)).isEqualTo("123"); - } - } - - @Test - @DisplayName("Pattern.find() should extract groups from first match") - void patternFind_firstMatchGroups_extracted() { - Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)"); - try (MatchResult result = pattern.find("Contact support@example.com or admin@test.org")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group()).isEqualTo("support@example.com"); // First email - assertThat(result.group(1)).isEqualTo("support"); - assertThat(result.group(2)).isEqualTo("example.com"); - } - } - - @Test - @DisplayName("Pattern.find() should return failed match when not found") - void patternFind_notFound_failedMatch() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.find("no digits here")) { - assertThat(result.matched()).isFalse(); - } - } - - // ========== Pattern.findAll() Tests ========== - - @Test - @DisplayName("Pattern.findAll() should find all matches") - void patternFindAll_multipleMatches_found() { - Pattern pattern = Pattern.compile("(\\d+)"); - List matches = pattern.findAll("a1b22c333"); - try { - assertThat(matches).hasSize(3); - assertThat(matches.get(0).group()).isEqualTo("1"); - assertThat(matches.get(1).group()).isEqualTo("22"); - assertThat(matches.get(2).group()).isEqualTo("333"); - } finally { - matches.forEach(MatchResult::close); - } - } - - @Test - @DisplayName("Pattern.findAll() should extract groups from each match") - void patternFindAll_multipleMatchesWithGroups_extracted() { - Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})"); - List matches = pattern.findAll("Call 555-1234 or 555-5678"); - try { - assertThat(matches).hasSize(2); - - // First match - assertThat(matches.get(0).group()).isEqualTo("555-1234"); - assertThat(matches.get(0).group(1)).isEqualTo("555"); - assertThat(matches.get(0).group(2)).isEqualTo("1234"); - - // Second match - assertThat(matches.get(1).group()).isEqualTo("555-5678"); - assertThat(matches.get(1).group(1)).isEqualTo("555"); - assertThat(matches.get(1).group(2)).isEqualTo("5678"); - } finally { - matches.forEach(MatchResult::close); - } - } - - @Test - @DisplayName("Pattern.findAll() should return empty list for no matches") - void patternFindAll_noMatches_emptyList() { - Pattern pattern = Pattern.compile("(\\d+)"); - List matches = pattern.findAll("no digits"); - try { - assertThat(matches).isEmpty(); - } finally { - matches.forEach(MatchResult::close); - } - } - - // ========== Named Groups Tests ========== - - @Test - @DisplayName("Named groups should be accessible by name") - void namedGroups_accessByName_works() { - Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})"); - try (MatchResult result = pattern.match("2025-11-24")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group("year")).isEqualTo("2025"); - assertThat(result.group("month")).isEqualTo("11"); - assertThat(result.group("day")).isEqualTo("24"); - } - } - - @Test - @DisplayName("Named groups should also be accessible by index") - void namedGroups_accessByIndex_works() { - Pattern pattern = Pattern.compile("(?P[a-z]+)@(?P[a-z]+\\.[a-z]+)"); - try (MatchResult result = pattern.match("admin@example.com")) { - assertThat(result.matched()).isTrue(); - // Access by name - assertThat(result.group("user")).isEqualTo("admin"); - assertThat(result.group("domain")).isEqualTo("example.com"); - - // Also accessible by index - assertThat(result.group(1)).isEqualTo("admin"); - assertThat(result.group(2)).isEqualTo("example.com"); - } - } - - @Test - @DisplayName("Non-existent named group should return null") - void namedGroups_nonExistent_returnsNull() { - Pattern pattern = Pattern.compile("(?P\\d+)"); - try (MatchResult result = pattern.match("123")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group("found")).isEqualTo("123"); - assertThat(result.group("notfound")).isNull(); - } - } - - // ========== Edge Cases ========== - - @Test - @DisplayName("Pattern with no groups should work") - void pattern_noGroups_works() { - Pattern pattern = Pattern.compile("\\d+"); // No parentheses - try (MatchResult result = pattern.match("123")) { - assertThat(result.matched()).isTrue(); - assertThat(result.groupCount()).isEqualTo(0); - assertThat(result.group()).isEqualTo("123"); // Group 0 still available - } - } - - @Test - @DisplayName("Optional groups that don't participate should be null") - void optionalGroups_notParticipating_null() { - Pattern pattern = Pattern.compile("(a)?(b)"); - try (MatchResult result = pattern.match("b")) { // 'a' is optional and doesn't match - assertThat(result.matched()).isTrue(); - assertThat(result.groupCount()).isEqualTo(2); - assertThat(result.group(0)).isEqualTo("b"); - assertThat(result.group(1)).isNull(); // Optional 'a' didn't participate - assertThat(result.group(2)).isEqualTo("b"); - } - } - - @Test - @DisplayName("Nested groups should be extracted correctly") - void nestedGroups_extracted() { - Pattern pattern = Pattern.compile("((\\d+)-(\\d+))"); - try (MatchResult result = pattern.match("123-456")) { - assertThat(result.matched()).isTrue(); - assertThat(result.groupCount()).isEqualTo(3); - assertThat(result.group(1)).isEqualTo("123-456"); // Outer group - assertThat(result.group(2)).isEqualTo("123"); // First inner - assertThat(result.group(3)).isEqualTo("456"); // Second inner - } - } - - @Test - @DisplayName("MatchResult.groups() should return defensive copy") - void matchResult_groupsArray_defensiveCopy() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("123")) { - String[] groups1 = result.groups(); - String[] groups2 = result.groups(); - - assertThat(groups1).isNotSameAs(groups2); // Different array instances - assertThat(groups1).containsExactly(groups2); // Same content - } - } - - @Test - @DisplayName("MatchResult should provide input string") - void matchResult_input_available() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("123")) { - assertThat(result.input()).isEqualTo("123"); - } - } - - @Test - @DisplayName("MatchResult should throw on invalid group index") - void matchResult_invalidIndex_throws() { - Pattern pattern = Pattern.compile("(\\d+)"); - try (MatchResult result = pattern.match("123")) { - assertThatIndexOutOfBoundsException() - .isThrownBy(() -> result.group(5)) - .withMessageContaining("out of bounds"); - - assertThatIndexOutOfBoundsException() - .isThrownBy(() -> result.group(-1)) - .withMessageContaining("out of bounds"); - } - } - - // ========== Real-World Scenarios ========== - - @Test - @DisplayName("Extract email components") - void realWorld_emailExtraction() { - Pattern pattern = Pattern.compile("([a-z0-9._%+-]+)@([a-z0-9.-]+)\\.([a-z]{2,})"); - try (MatchResult result = pattern.match("john.doe@example.co.uk")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group(1)).isEqualTo("john.doe"); - assertThat(result.group(2)).isEqualTo("example.co"); - assertThat(result.group(3)).isEqualTo("uk"); - } - } - - @Test - @DisplayName("Parse log line with timestamp and level") - void realWorld_logParsing() { - Pattern pattern = Pattern.compile("\\[(\\d+)\\] (\\w+): (.+)"); - try (MatchResult result = pattern.find("[1234567890] ERROR: Something went wrong")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group(1)).isEqualTo("1234567890"); // timestamp - assertThat(result.group(2)).isEqualTo("ERROR"); // level - assertThat(result.group(3)).isEqualTo("Something went wrong"); // message - } - } - - @Test - @DisplayName("Extract all URLs from text") - void realWorld_extractAllUrls() { - Pattern pattern = Pattern.compile("https?://([a-z0-9.-]+)/([a-z0-9/_-]+)"); - List matches = pattern.findAll("Visit http://example.com/page1 and https://test.org/page2"); - try { - assertThat(matches).hasSize(2); - - // First URL - assertThat(matches.get(0).group()).isEqualTo("http://example.com/page1"); - assertThat(matches.get(0).group(1)).isEqualTo("example.com"); - assertThat(matches.get(0).group(2)).isEqualTo("page1"); - - // Second URL - assertThat(matches.get(1).group()).isEqualTo("https://test.org/page2"); - assertThat(matches.get(1).group(1)).isEqualTo("test.org"); - assertThat(matches.get(1).group(2)).isEqualTo("page2"); - } finally { - matches.forEach(MatchResult::close); - } - } - - @Test - @DisplayName("Extract all numbers from mixed text") - void realWorld_extractAllNumbers() { - Pattern pattern = Pattern.compile("(\\d+)"); - List matches = pattern.findAll("Item 1 costs $99, item 22 costs $199"); - try { - assertThat(matches).hasSize(4); - assertThat(matches.get(0).group(1)).isEqualTo("1"); - assertThat(matches.get(1).group(1)).isEqualTo("99"); - assertThat(matches.get(2).group(1)).isEqualTo("22"); - assertThat(matches.get(3).group(1)).isEqualTo("199"); - } finally { - matches.forEach(MatchResult::close); - } - } - - // ========== Named Groups Advanced Tests ========== - - @Test - @DisplayName("Mixed named and unnamed groups") - void namedGroups_mixedWithUnnamed_works() { - Pattern pattern = Pattern.compile("(\\d{4})-(?P\\d{2})-(\\d{2})"); - try (MatchResult result = pattern.match("2025-11-24")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group(1)).isEqualTo("2025"); // Unnamed - assertThat(result.group("month")).isEqualTo("11"); // Named - assertThat(result.group(2)).isEqualTo("11"); // Also accessible by index - assertThat(result.group(3)).isEqualTo("24"); // Unnamed - } - } - - @Test - @DisplayName("MatchResult should expose named groups map") - void matchResult_namedGroupsMap_exposed() { - Pattern pattern = Pattern.compile("(?P\\d+)-(?P\\d+)"); - try (MatchResult result = pattern.match("123-456")) { - assertThat(result.namedGroups()).containsKeys("a", "b"); - assertThat(result.namedGroups().get("a")).isEqualTo(1); - assertThat(result.namedGroups().get("b")).isEqualTo(2); - } - } - - // ========== Consistency Tests ========== - - @ParameterizedTest - @DisplayName("Pattern.match() vs Pattern.matches() consistency") - @CsvSource({ - "\\d+, 123, true", - "\\d+, abc, false", - "[a-z]+, hello, true", - "[a-z]+, HELLO, false" - }) - void match_consistentWithMatches(String patternStr, String input, boolean shouldMatch) { - Pattern pattern = Pattern.compile(patternStr); - - boolean matchesResult = pattern.matches(input); - try (MatchResult matchResult = pattern.match(input)) { - assertThat(matchResult.matched()).isEqualTo(matchesResult).isEqualTo(shouldMatch); - } - } - - @Test - @DisplayName("Pattern.find() vs Matcher.find() consistency") - void find_consistentWithMatcher() { - Pattern pattern = Pattern.compile("(\\d+)"); - - boolean matcherFind; - try (Matcher m = pattern.matcher("abc123def")) { - matcherFind = m.find(); - } - - try (MatchResult findResult = pattern.find("abc123def")) { - assertThat(findResult.matched()).isEqualTo(matcherFind); - } - } - - // ========== Empty and Null Tests ========== - - @Test - @DisplayName("Empty string should work") - void emptyString_works() { - Pattern pattern = Pattern.compile(".*"); - try (MatchResult result = pattern.match("")) { - assertThat(result.matched()).isTrue(); - assertThat(result.group()).isEqualTo(""); - } - } - - @Test - @DisplayName("Null input should throw") - void nullInput_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.match((String) null)); // Cast to disambiguate - - assertThatNullPointerException() - .isThrownBy(() -> pattern.find((String) null)); // Cast to disambiguate - - assertThatNullPointerException() - .isThrownBy(() -> pattern.findAll((String) null)); // Cast to disambiguate - } + // ========== MatchResult Basic Tests ========== + + @Test + @DisplayName("MatchResult should indicate successful match") + void matchResult_successfulMatch_matched() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("123")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group()).isEqualTo("123"); + assertThat(result.group(0)).isEqualTo("123"); + assertThat(result.group(1)).isEqualTo("123"); + } + } + + @Test + @DisplayName("MatchResult should indicate failed match") + void matchResult_failedMatch_notMatched() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("abc")) { + assertThat(result.matched()).isFalse(); + assertThat(result.groupCount()).isEqualTo(0); + } + } + + @Test + @DisplayName("MatchResult should throw on group access when not matched") + void matchResult_noMatch_throwsOnGroupAccess() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("abc")) { + assertThatIllegalStateException() + .isThrownBy(() -> result.group()) + .withMessageContaining("No match"); + } + } + + // ========== Pattern.match() Tests ========== + + @Test + @DisplayName("Pattern.match() should extract single group") + void patternMatch_singleGroup_extracted() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("123")) { + assertThat(result.matched()).isTrue(); + assertThat(result.groupCount()).isEqualTo(1); + assertThat(result.group(0)).isEqualTo("123"); // Full match + assertThat(result.group(1)).isEqualTo("123"); // Captured group + } + } + + @Test + @DisplayName("Pattern.match() should extract multiple groups") + void patternMatch_multipleGroups_extracted() { + Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)"); + try (MatchResult result = pattern.match("user@example.com")) { + assertThat(result.matched()).isTrue(); + assertThat(result.groupCount()).isEqualTo(3); + assertThat(result.group()).isEqualTo("user@example.com"); + assertThat(result.group(1)).isEqualTo("user"); + assertThat(result.group(2)).isEqualTo("example"); + assertThat(result.group(3)).isEqualTo("com"); + } + } + + @Test + @DisplayName("Pattern.match() should handle date extraction") + void patternMatch_dateExtraction_works() { + Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); + try (MatchResult result = pattern.match("2025-11-24")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group()).isEqualTo("2025-11-24"); + assertThat(result.group(1)).isEqualTo("2025"); + assertThat(result.group(2)).isEqualTo("11"); + assertThat(result.group(3)).isEqualTo("24"); + } + } + + @Test + @DisplayName("Pattern.match() should fail on partial content") + void patternMatch_partialContent_fails() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("abc123def")) { + assertThat(result.matched()).isFalse(); + } + } + + // ========== Pattern.find() Tests ========== + + @Test + @DisplayName("Pattern.find() should find first match in text") + void patternFind_firstMatch_found() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.find("abc123def456")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group()).isEqualTo("123"); // First match + assertThat(result.group(1)).isEqualTo("123"); + } + } + + @Test + @DisplayName("Pattern.find() should extract groups from first match") + void patternFind_firstMatchGroups_extracted() { + Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)"); + try (MatchResult result = pattern.find("Contact support@example.com or admin@test.org")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group()).isEqualTo("support@example.com"); // First email + assertThat(result.group(1)).isEqualTo("support"); + assertThat(result.group(2)).isEqualTo("example.com"); + } + } + + @Test + @DisplayName("Pattern.find() should return failed match when not found") + void patternFind_notFound_failedMatch() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.find("no digits here")) { + assertThat(result.matched()).isFalse(); + } + } + + // ========== Pattern.findAll() Tests ========== + + @Test + @DisplayName("Pattern.findAll() should find all matches") + void patternFindAll_multipleMatches_found() { + Pattern pattern = Pattern.compile("(\\d+)"); + List matches = pattern.findAll("a1b22c333"); + try { + assertThat(matches).hasSize(3); + assertThat(matches.get(0).group()).isEqualTo("1"); + assertThat(matches.get(1).group()).isEqualTo("22"); + assertThat(matches.get(2).group()).isEqualTo("333"); + } finally { + matches.forEach(MatchResult::close); + } + } + + @Test + @DisplayName("Pattern.findAll() should extract groups from each match") + void patternFindAll_multipleMatchesWithGroups_extracted() { + Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})"); + List matches = pattern.findAll("Call 555-1234 or 555-5678"); + try { + assertThat(matches).hasSize(2); + + // First match + assertThat(matches.get(0).group()).isEqualTo("555-1234"); + assertThat(matches.get(0).group(1)).isEqualTo("555"); + assertThat(matches.get(0).group(2)).isEqualTo("1234"); + + // Second match + assertThat(matches.get(1).group()).isEqualTo("555-5678"); + assertThat(matches.get(1).group(1)).isEqualTo("555"); + assertThat(matches.get(1).group(2)).isEqualTo("5678"); + } finally { + matches.forEach(MatchResult::close); + } + } + + @Test + @DisplayName("Pattern.findAll() should return empty list for no matches") + void patternFindAll_noMatches_emptyList() { + Pattern pattern = Pattern.compile("(\\d+)"); + List matches = pattern.findAll("no digits"); + try { + assertThat(matches).isEmpty(); + } finally { + matches.forEach(MatchResult::close); + } + } + + // ========== Named Groups Tests ========== + + @Test + @DisplayName("Named groups should be accessible by name") + void namedGroups_accessByName_works() { + Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})"); + try (MatchResult result = pattern.match("2025-11-24")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group("year")).isEqualTo("2025"); + assertThat(result.group("month")).isEqualTo("11"); + assertThat(result.group("day")).isEqualTo("24"); + } + } + + @Test + @DisplayName("Named groups should also be accessible by index") + void namedGroups_accessByIndex_works() { + Pattern pattern = Pattern.compile("(?P[a-z]+)@(?P[a-z]+\\.[a-z]+)"); + try (MatchResult result = pattern.match("admin@example.com")) { + assertThat(result.matched()).isTrue(); + // Access by name + assertThat(result.group("user")).isEqualTo("admin"); + assertThat(result.group("domain")).isEqualTo("example.com"); + + // Also accessible by index + assertThat(result.group(1)).isEqualTo("admin"); + assertThat(result.group(2)).isEqualTo("example.com"); + } + } + + @Test + @DisplayName("Non-existent named group should return null") + void namedGroups_nonExistent_returnsNull() { + Pattern pattern = Pattern.compile("(?P\\d+)"); + try (MatchResult result = pattern.match("123")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group("found")).isEqualTo("123"); + assertThat(result.group("notfound")).isNull(); + } + } + + // ========== Edge Cases ========== + + @Test + @DisplayName("Pattern with no groups should work") + void pattern_noGroups_works() { + Pattern pattern = Pattern.compile("\\d+"); // No parentheses + try (MatchResult result = pattern.match("123")) { + assertThat(result.matched()).isTrue(); + assertThat(result.groupCount()).isEqualTo(0); + assertThat(result.group()).isEqualTo("123"); // Group 0 still available + } + } + + @Test + @DisplayName("Optional groups that don't participate should be null") + void optionalGroups_notParticipating_null() { + Pattern pattern = Pattern.compile("(a)?(b)"); + try (MatchResult result = pattern.match("b")) { // 'a' is optional and doesn't match + assertThat(result.matched()).isTrue(); + assertThat(result.groupCount()).isEqualTo(2); + assertThat(result.group(0)).isEqualTo("b"); + assertThat(result.group(1)).isNull(); // Optional 'a' didn't participate + assertThat(result.group(2)).isEqualTo("b"); + } + } + + @Test + @DisplayName("Nested groups should be extracted correctly") + void nestedGroups_extracted() { + Pattern pattern = Pattern.compile("((\\d+)-(\\d+))"); + try (MatchResult result = pattern.match("123-456")) { + assertThat(result.matched()).isTrue(); + assertThat(result.groupCount()).isEqualTo(3); + assertThat(result.group(1)).isEqualTo("123-456"); // Outer group + assertThat(result.group(2)).isEqualTo("123"); // First inner + assertThat(result.group(3)).isEqualTo("456"); // Second inner + } + } + + @Test + @DisplayName("MatchResult.groups() should return defensive copy") + void matchResult_groupsArray_defensiveCopy() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("123")) { + String[] groups1 = result.groups(); + String[] groups2 = result.groups(); + + assertThat(groups1).isNotSameAs(groups2); // Different array instances + assertThat(groups1).containsExactly(groups2); // Same content + } + } + + @Test + @DisplayName("MatchResult should provide input string") + void matchResult_input_available() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("123")) { + assertThat(result.input()).isEqualTo("123"); + } + } + + @Test + @DisplayName("MatchResult should throw on invalid group index") + void matchResult_invalidIndex_throws() { + Pattern pattern = Pattern.compile("(\\d+)"); + try (MatchResult result = pattern.match("123")) { + assertThatIndexOutOfBoundsException() + .isThrownBy(() -> result.group(5)) + .withMessageContaining("out of bounds"); + + assertThatIndexOutOfBoundsException() + .isThrownBy(() -> result.group(-1)) + .withMessageContaining("out of bounds"); + } + } + + // ========== Real-World Scenarios ========== + + @Test + @DisplayName("Extract email components") + void realWorld_emailExtraction() { + Pattern pattern = Pattern.compile("([a-z0-9._%+-]+)@([a-z0-9.-]+)\\.([a-z]{2,})"); + try (MatchResult result = pattern.match("john.doe@example.co.uk")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group(1)).isEqualTo("john.doe"); + assertThat(result.group(2)).isEqualTo("example.co"); + assertThat(result.group(3)).isEqualTo("uk"); + } + } + + @Test + @DisplayName("Parse log line with timestamp and level") + void realWorld_logParsing() { + Pattern pattern = Pattern.compile("\\[(\\d+)\\] (\\w+): (.+)"); + try (MatchResult result = pattern.find("[1234567890] ERROR: Something went wrong")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group(1)).isEqualTo("1234567890"); // timestamp + assertThat(result.group(2)).isEqualTo("ERROR"); // level + assertThat(result.group(3)).isEqualTo("Something went wrong"); // message + } + } + + @Test + @DisplayName("Extract all URLs from text") + void realWorld_extractAllUrls() { + Pattern pattern = Pattern.compile("https?://([a-z0-9.-]+)/([a-z0-9/_-]+)"); + List matches = + pattern.findAll("Visit http://example.com/page1 and https://test.org/page2"); + try { + assertThat(matches).hasSize(2); + + // First URL + assertThat(matches.get(0).group()).isEqualTo("http://example.com/page1"); + assertThat(matches.get(0).group(1)).isEqualTo("example.com"); + assertThat(matches.get(0).group(2)).isEqualTo("page1"); + + // Second URL + assertThat(matches.get(1).group()).isEqualTo("https://test.org/page2"); + assertThat(matches.get(1).group(1)).isEqualTo("test.org"); + assertThat(matches.get(1).group(2)).isEqualTo("page2"); + } finally { + matches.forEach(MatchResult::close); + } + } + + @Test + @DisplayName("Extract all numbers from mixed text") + void realWorld_extractAllNumbers() { + Pattern pattern = Pattern.compile("(\\d+)"); + List matches = pattern.findAll("Item 1 costs $99, item 22 costs $199"); + try { + assertThat(matches).hasSize(4); + assertThat(matches.get(0).group(1)).isEqualTo("1"); + assertThat(matches.get(1).group(1)).isEqualTo("99"); + assertThat(matches.get(2).group(1)).isEqualTo("22"); + assertThat(matches.get(3).group(1)).isEqualTo("199"); + } finally { + matches.forEach(MatchResult::close); + } + } + + // ========== Named Groups Advanced Tests ========== + + @Test + @DisplayName("Mixed named and unnamed groups") + void namedGroups_mixedWithUnnamed_works() { + Pattern pattern = Pattern.compile("(\\d{4})-(?P\\d{2})-(\\d{2})"); + try (MatchResult result = pattern.match("2025-11-24")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group(1)).isEqualTo("2025"); // Unnamed + assertThat(result.group("month")).isEqualTo("11"); // Named + assertThat(result.group(2)).isEqualTo("11"); // Also accessible by index + assertThat(result.group(3)).isEqualTo("24"); // Unnamed + } + } + + @Test + @DisplayName("MatchResult should expose named groups map") + void matchResult_namedGroupsMap_exposed() { + Pattern pattern = Pattern.compile("(?P\\d+)-(?P\\d+)"); + try (MatchResult result = pattern.match("123-456")) { + assertThat(result.namedGroups()).containsKeys("a", "b"); + assertThat(result.namedGroups().get("a")).isEqualTo(1); + assertThat(result.namedGroups().get("b")).isEqualTo(2); + } + } + + // ========== Consistency Tests ========== + + @ParameterizedTest + @DisplayName("Pattern.match() vs Pattern.matches() consistency") + @CsvSource({"\\d+, 123, true", "\\d+, abc, false", "[a-z]+, hello, true", "[a-z]+, HELLO, false"}) + void match_consistentWithMatches(String patternStr, String input, boolean shouldMatch) { + Pattern pattern = Pattern.compile(patternStr); + + boolean matchesResult = pattern.matches(input); + try (MatchResult matchResult = pattern.match(input)) { + assertThat(matchResult.matched()).isEqualTo(matchesResult).isEqualTo(shouldMatch); + } + } + + @Test + @DisplayName("Pattern.find() vs Matcher.find() consistency") + void find_consistentWithMatcher() { + Pattern pattern = Pattern.compile("(\\d+)"); + + boolean matcherFind; + try (Matcher m = pattern.matcher("abc123def")) { + matcherFind = m.find(); + } + + try (MatchResult findResult = pattern.find("abc123def")) { + assertThat(findResult.matched()).isEqualTo(matcherFind); + } + } + + // ========== Empty and Null Tests ========== + + @Test + @DisplayName("Empty string should work") + void emptyString_works() { + Pattern pattern = Pattern.compile(".*"); + try (MatchResult result = pattern.match("")) { + assertThat(result.matched()).isTrue(); + assertThat(result.group()).isEqualTo(""); + } + } + + @Test + @DisplayName("Null input should throw") + void nullInput_throws() { + Pattern pattern = Pattern.compile("test"); + + assertThatNullPointerException() + .isThrownBy(() -> pattern.match((String) null)); // Cast to disambiguate + + assertThatNullPointerException() + .isThrownBy(() -> pattern.find((String) null)); // Cast to disambiguate + + assertThatNullPointerException() + .isThrownBy(() -> pattern.findAll((String) null)); // Cast to disambiguate + } } - diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java index f01d543..d1abe20 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java @@ -16,249 +16,236 @@ package com.axonops.libre2.api; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; +import static org.assertj.core.api.Assertions.*; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for Phase 1 extensions: findAll bulk variants and ByteBuffer[] bulk. - */ +/** Tests for Phase 1 extensions: findAll bulk variants and ByteBuffer[] bulk. */ @DisplayName("Phase 1 Extensions (findAll bulk + ByteBuffer[] bulk)") class Phase1ExtensionsIT { - private ByteBuffer createDirectBuffer(String text) { - byte[] bytes = text.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length); - buffer.put(bytes); - buffer.flip(); - return buffer; - } + private ByteBuffer createDirectBuffer(String text) { + byte[] bytes = text.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length); + buffer.put(bytes); + buffer.flip(); + return buffer; + } + + private ByteBuffer createHeapBuffer(String text) { + return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8)); + } + + // ========== findAll(String[]) Tests ========== + + @Test + @DisplayName("findAll(String[]) should find partial matches in all strings") + void findAll_stringArray_findsPartialMatches() { + Pattern pattern = Pattern.compile("test"); + String[] inputs = { + "test", // Full match - should find + "testing", // Partial match - should find + "notest", // Partial match - should find + "other" // No match + }; - private ByteBuffer createHeapBuffer(String text) { - return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8)); - } + boolean[] results = pattern.findAll(inputs); - // ========== findAll(String[]) Tests ========== + assertThat(results).containsExactly(true, true, true, false); + } - @Test - @DisplayName("findAll(String[]) should find partial matches in all strings") - void findAll_stringArray_findsPartialMatches() { - Pattern pattern = Pattern.compile("test"); - String[] inputs = { - "test", // Full match - should find - "testing", // Partial match - should find - "notest", // Partial match - should find - "other" // No match - }; + @Test + @DisplayName("findAll(String[]) vs matchAll(String[]) - partial vs full") + void findAll_vs_matchAll_differentBehavior() { + Pattern pattern = Pattern.compile("test"); + String[] inputs = {"test", "testing", "other"}; + + boolean[] matchResults = pattern.matchAll(inputs); // Full match + boolean[] findResults = pattern.findAll(inputs); // Partial match + + assertThat(matchResults).containsExactly(true, false, false); // Only exact matches + assertThat(findResults).containsExactly(true, true, false); // Partial matches too + } + + @Test + @DisplayName("findAll(Collection) should work") + void findAll_collection_works() { + Pattern pattern = Pattern.compile("\\d+"); + List inputs = Arrays.asList("abc123", "def", "456ghi"); + + boolean[] results = pattern.findAll(inputs); + + assertThat(results).containsExactly(true, false, true); + } + + @Test + @DisplayName("findAll(String[]) with empty array should return empty") + void findAll_emptyArray_returnsEmpty() { + Pattern pattern = Pattern.compile("test"); + + boolean[] results = pattern.findAll(new String[0]); + + assertThat(results).isEmpty(); + } + + // ========== matchAll(ByteBuffer[]) Tests ========== + + @Test + @DisplayName("matchAll(ByteBuffer[]) with all DirectByteBuffers should use zero-copy") + void matchAll_allDirectBuffers_usesZeroCopy() { + Pattern pattern = Pattern.compile("test"); + ByteBuffer[] buffers = { + createDirectBuffer("test"), createDirectBuffer("testing"), createDirectBuffer("test") + }; - boolean[] results = pattern.findAll(inputs); + boolean[] results = pattern.matchAll(buffers); + + assertThat(results).containsExactly(true, false, true); + } + + @Test + @DisplayName("matchAll(ByteBuffer[]) with all heap buffers should convert to String") + void matchAll_allHeapBuffers_convertsToString() { + Pattern pattern = Pattern.compile("test"); + ByteBuffer[] buffers = { + createHeapBuffer("test"), createHeapBuffer("testing"), createHeapBuffer("test") + }; + + boolean[] results = pattern.matchAll(buffers); + + assertThat(results).containsExactly(true, false, true); + } + + @Test + @DisplayName("matchAll(ByteBuffer[]) with mixed buffers should convert all to String") + void matchAll_mixedBuffers_convertsToString() { + Pattern pattern = Pattern.compile("test"); + ByteBuffer[] buffers = { + createDirectBuffer("test"), // Direct + createHeapBuffer("testing"), // Heap - forces String path for all + createDirectBuffer("test") // Direct + }; + + boolean[] results = pattern.matchAll(buffers); - assertThat(results).containsExactly(true, true, true, false); - } + assertThat(results).containsExactly(true, false, true); + } - @Test - @DisplayName("findAll(String[]) vs matchAll(String[]) - partial vs full") - void findAll_vs_matchAll_differentBehavior() { - Pattern pattern = Pattern.compile("test"); - String[] inputs = {"test", "testing", "other"}; + @Test + @DisplayName("matchAll(ByteBuffer[]) should produce same results as matchAll(String[])") + void matchAll_byteBufferArray_matchesStringArray() { + Pattern pattern = Pattern.compile("\\d+"); + String[] strings = {"123", "abc", "456"}; - boolean[] matchResults = pattern.matchAll(inputs); // Full match - boolean[] findResults = pattern.findAll(inputs); // Partial match + boolean[] stringResults = pattern.matchAll(strings); + + ByteBuffer[] buffers = { + createDirectBuffer("123"), createDirectBuffer("abc"), createDirectBuffer("456") + }; - assertThat(matchResults).containsExactly(true, false, false); // Only exact matches - assertThat(findResults).containsExactly(true, true, false); // Partial matches too - } + boolean[] bufferResults = pattern.matchAll(buffers); - @Test - @DisplayName("findAll(Collection) should work") - void findAll_collection_works() { - Pattern pattern = Pattern.compile("\\d+"); - List inputs = Arrays.asList("abc123", "def", "456ghi"); + assertThat(bufferResults).containsExactly(stringResults); + } - boolean[] results = pattern.findAll(inputs); + // ========== findAll(ByteBuffer[]) Tests ========== - assertThat(results).containsExactly(true, false, true); - } + @Test + @DisplayName("findAll(ByteBuffer[]) with DirectByteBuffers should use zero-copy") + void findAll_directBuffers_usesZeroCopy() { + Pattern pattern = Pattern.compile("test"); + ByteBuffer[] buffers = { + createDirectBuffer("test"), // Full match - finds + createDirectBuffer("testing"), // Partial match - finds + createDirectBuffer("other") // No match + }; - @Test - @DisplayName("findAll(String[]) with empty array should return empty") - void findAll_emptyArray_returnsEmpty() { - Pattern pattern = Pattern.compile("test"); + boolean[] results = pattern.findAll(buffers); - boolean[] results = pattern.findAll(new String[0]); + assertThat(results).containsExactly(true, true, false); + } - assertThat(results).isEmpty(); - } + @Test + @DisplayName( + "findAll(ByteBuffer[]) should differ from matchAll(ByteBuffer[]) for partial matches") + void findAll_vs_matchAll_byteBuffers_differentBehavior() { + Pattern pattern = Pattern.compile("test"); + ByteBuffer[] buffers = { + createDirectBuffer("test"), createDirectBuffer("testing"), createDirectBuffer("other") + }; - // ========== matchAll(ByteBuffer[]) Tests ========== + boolean[] matchResults = pattern.matchAll(buffers); // Full match + boolean[] findResults = pattern.findAll(buffers); // Partial match - @Test - @DisplayName("matchAll(ByteBuffer[]) with all DirectByteBuffers should use zero-copy") - void matchAll_allDirectBuffers_usesZeroCopy() { - Pattern pattern = Pattern.compile("test"); - ByteBuffer[] buffers = { - createDirectBuffer("test"), - createDirectBuffer("testing"), - createDirectBuffer("test") - }; + assertThat(matchResults).containsExactly(true, false, false); // Only exact + assertThat(findResults).containsExactly(true, true, false); // Includes partial + } - boolean[] results = pattern.matchAll(buffers); + @Test + @DisplayName("findAll(ByteBuffer[]) with empty array should return empty") + void findAll_emptyBufferArray_returnsEmpty() { + Pattern pattern = Pattern.compile("test"); - assertThat(results).containsExactly(true, false, true); - } + boolean[] results = pattern.findAll(new ByteBuffer[0]); - @Test - @DisplayName("matchAll(ByteBuffer[]) with all heap buffers should convert to String") - void matchAll_allHeapBuffers_convertsToString() { - Pattern pattern = Pattern.compile("test"); - ByteBuffer[] buffers = { - createHeapBuffer("test"), - createHeapBuffer("testing"), - createHeapBuffer("test") - }; + assertThat(results).isEmpty(); + } + + // ========== Integration Tests ========== - boolean[] results = pattern.matchAll(buffers); + @Test + @DisplayName("ByteBuffer[] bulk should work with Cassandra-like multi-column scenario") + void cassandraScenario_bulkByteBufferProcessing() { + Pattern emailPattern = Pattern.compile("[a-z]+@[a-z]+\\.[a-z]+"); - assertThat(results).containsExactly(true, false, true); - } + // Simulate Cassandra returning ByteBuffer[] from multiple cells + ByteBuffer[] cells = { + createDirectBuffer("user@example.com"), + createDirectBuffer("invalid"), + createDirectBuffer("admin@test.org"), + createDirectBuffer("also_invalid") + }; - @Test - @DisplayName("matchAll(ByteBuffer[]) with mixed buffers should convert all to String") - void matchAll_mixedBuffers_convertsToString() { - Pattern pattern = Pattern.compile("test"); - ByteBuffer[] buffers = { - createDirectBuffer("test"), // Direct - createHeapBuffer("testing"), // Heap - forces String path for all - createDirectBuffer("test") // Direct - }; + boolean[] results = emailPattern.matchAll(cells); - boolean[] results = pattern.matchAll(buffers); + assertThat(results).containsExactly(true, false, true, false); - assertThat(results).containsExactly(true, false, true); + // Count valid emails + long validCount = 0; + for (boolean result : results) { + if (result) validCount++; } + assertThat(validCount).isEqualTo(2); + } - @Test - @DisplayName("matchAll(ByteBuffer[]) should produce same results as matchAll(String[])") - void matchAll_byteBufferArray_matchesStringArray() { - Pattern pattern = Pattern.compile("\\d+"); - String[] strings = {"123", "abc", "456"}; + // ========== Null Handling ========== - boolean[] stringResults = pattern.matchAll(strings); + @Test + @DisplayName("findAll(String[]) should throw on null array") + void findAll_nullArray_throws() { + Pattern pattern = Pattern.compile("test"); - ByteBuffer[] buffers = { - createDirectBuffer("123"), - createDirectBuffer("abc"), - createDirectBuffer("456") - }; + assertThatNullPointerException().isThrownBy(() -> pattern.findAll((String[]) null)); + } - boolean[] bufferResults = pattern.matchAll(buffers); + @Test + @DisplayName("matchAll(ByteBuffer[]) should throw on null array") + void matchAll_nullByteBufferArray_throws() { + Pattern pattern = Pattern.compile("test"); - assertThat(bufferResults).containsExactly(stringResults); - } + assertThatNullPointerException().isThrownBy(() -> pattern.matchAll((ByteBuffer[]) null)); + } - // ========== findAll(ByteBuffer[]) Tests ========== + @Test + @DisplayName("findAll(ByteBuffer[]) should throw on null array") + void findAll_nullByteBufferArray_throws() { + Pattern pattern = Pattern.compile("test"); - @Test - @DisplayName("findAll(ByteBuffer[]) with DirectByteBuffers should use zero-copy") - void findAll_directBuffers_usesZeroCopy() { - Pattern pattern = Pattern.compile("test"); - ByteBuffer[] buffers = { - createDirectBuffer("test"), // Full match - finds - createDirectBuffer("testing"), // Partial match - finds - createDirectBuffer("other") // No match - }; - - boolean[] results = pattern.findAll(buffers); - - assertThat(results).containsExactly(true, true, false); - } - - @Test - @DisplayName("findAll(ByteBuffer[]) should differ from matchAll(ByteBuffer[]) for partial matches") - void findAll_vs_matchAll_byteBuffers_differentBehavior() { - Pattern pattern = Pattern.compile("test"); - ByteBuffer[] buffers = { - createDirectBuffer("test"), - createDirectBuffer("testing"), - createDirectBuffer("other") - }; - - boolean[] matchResults = pattern.matchAll(buffers); // Full match - boolean[] findResults = pattern.findAll(buffers); // Partial match - - assertThat(matchResults).containsExactly(true, false, false); // Only exact - assertThat(findResults).containsExactly(true, true, false); // Includes partial - } - - @Test - @DisplayName("findAll(ByteBuffer[]) with empty array should return empty") - void findAll_emptyBufferArray_returnsEmpty() { - Pattern pattern = Pattern.compile("test"); - - boolean[] results = pattern.findAll(new ByteBuffer[0]); - - assertThat(results).isEmpty(); - } - - // ========== Integration Tests ========== - - @Test - @DisplayName("ByteBuffer[] bulk should work with Cassandra-like multi-column scenario") - void cassandraScenario_bulkByteBufferProcessing() { - Pattern emailPattern = Pattern.compile("[a-z]+@[a-z]+\\.[a-z]+"); - - // Simulate Cassandra returning ByteBuffer[] from multiple cells - ByteBuffer[] cells = { - createDirectBuffer("user@example.com"), - createDirectBuffer("invalid"), - createDirectBuffer("admin@test.org"), - createDirectBuffer("also_invalid") - }; - - boolean[] results = emailPattern.matchAll(cells); - - assertThat(results).containsExactly(true, false, true, false); - - // Count valid emails - long validCount = 0; - for (boolean result : results) { - if (result) validCount++; - } - assertThat(validCount).isEqualTo(2); - } - - // ========== Null Handling ========== - - @Test - @DisplayName("findAll(String[]) should throw on null array") - void findAll_nullArray_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.findAll((String[]) null)); - } - - @Test - @DisplayName("matchAll(ByteBuffer[]) should throw on null array") - void matchAll_nullByteBufferArray_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.matchAll((ByteBuffer[]) null)); - } - - @Test - @DisplayName("findAll(ByteBuffer[]) should throw on null array") - void findAll_nullByteBufferArray_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.findAll((ByteBuffer[]) null)); - } + assertThatNullPointerException().isThrownBy(() -> pattern.findAll((ByteBuffer[]) null)); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java index 9dec6ea..5028a73 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java @@ -16,302 +16,291 @@ package com.axonops.libre2.api; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; +import static org.assertj.core.api.Assertions.*; import java.util.Arrays; import java.util.List; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for replace operations (replaceFirst, replaceAll, bulk variants). - */ +/** Tests for replace operations (replaceFirst, replaceAll, bulk variants). */ @DisplayName("Replace Operations") class ReplaceOperationsIT { - // ========== replaceFirst() Tests ========== + // ========== replaceFirst() Tests ========== + + @Test + @DisplayName("replaceFirst should replace first match") + void replaceFirst_firstMatch_replaced() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceFirst("Item 123 costs $456", "XXX"); - @Test - @DisplayName("replaceFirst should replace first match") - void replaceFirst_firstMatch_replaced() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceFirst("Item 123 costs $456", "XXX"); + assertThat(result).isEqualTo("Item XXX costs $456"); + } + + @Test + @DisplayName("replaceFirst should return original if no match") + void replaceFirst_noMatch_returnsOriginal() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceFirst("No digits here", "XXX"); - assertThat(result).isEqualTo("Item XXX costs $456"); - } - - @Test - @DisplayName("replaceFirst should return original if no match") - void replaceFirst_noMatch_returnsOriginal() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceFirst("No digits here", "XXX"); - - assertThat(result).isEqualTo("No digits here"); - } - - @Test - @DisplayName("replaceFirst should handle empty replacement") - void replaceFirst_emptyReplacement_removes() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceFirst("Item 123", ""); - - assertThat(result).isEqualTo("Item "); - } - - // ========== replaceAll() Tests ========== - - @Test - @DisplayName("replaceAll should replace all matches") - void replaceAll_allMatches_replaced() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceAll("Item 123 costs $456", "XXX"); - - assertThat(result).isEqualTo("Item XXX costs $XXX"); - } - - @Test - @DisplayName("replaceAll should return original if no matches") - void replaceAll_noMatches_returnsOriginal() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceAll("No digits here", "XXX"); - - assertThat(result).isEqualTo("No digits here"); - } - - @Test - @DisplayName("replaceAll should handle empty replacement") - void replaceAll_emptyReplacement_removesAll() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceAll("a1b2c3", ""); - - assertThat(result).isEqualTo("abc"); - } - - @Test - @DisplayName("replaceAll should redact emails") - void replaceAll_redactEmails_works() { - Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}"); - String result = emailPattern.replaceAll("Contact user@example.com or admin@test.org", "[REDACTED]"); - - assertThat(result).isEqualTo("Contact [REDACTED] or [REDACTED]"); - } - - // ========== Backreference Tests ========== - - @Test - @DisplayName("replaceFirst should support backreferences with \\\\1") - void replaceFirst_backreferences_work() { - Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); - String result = pattern.replaceFirst("Date: 2025-11-24", "\\2/\\3/\\1"); - - assertThat(result).isEqualTo("Date: 11/24/2025"); - } - - @Test - @DisplayName("replaceAll should support backreferences") - void replaceAll_backreferences_work() { - Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})"); - String result = pattern.replaceAll("Call 555-1234 or 555-5678", "(\\1) \\2"); - - assertThat(result).isEqualTo("Call (555) 1234 or (555) 5678"); - } - - @Test - @DisplayName("replaceAll should swap groups with backreferences") - void replaceAll_swapGroups_works() { - Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)"); - String result = pattern.replaceAll("user@example.com", "\\2 (\\1)"); - - assertThat(result).isEqualTo("example.com (user)"); - } - - @Test - @DisplayName("replaceAll should support multiple backreferences") - void replaceAll_multipleBackrefs_work() { - Pattern pattern = Pattern.compile("(\\w+)\\s+(\\w+)\\s+(\\w+)"); - String result = pattern.replaceAll("one two three", "\\3-\\2-\\1"); - - assertThat(result).isEqualTo("three-two-one"); - } - - // ========== Bulk Replace Tests ========== - - @Test - @DisplayName("replaceAll(array) should replace in all strings") - void replaceAll_array_replacesAll() { - Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}"); - String[] logs = { - "User 123-45-6789 logged in", - "No PII here", - "SSN: 987-65-4321" - }; - - String[] redacted = ssnPattern.replaceAll(logs, "[REDACTED]"); - - assertThat(redacted).containsExactly( - "User [REDACTED] logged in", - "No PII here", - "SSN: [REDACTED]" - ); - } - - @Test - @DisplayName("replaceAll(collection) should replace in all strings") - void replaceAll_collection_replacesAll() { - Pattern pattern = Pattern.compile("\\d+"); - List inputs = Arrays.asList("a1b2", "c3d4", "no digits"); - - List results = pattern.replaceAll(inputs, "X"); - - assertThat(results).containsExactly("aXbX", "cXdX", "no digits"); - } - - @Test - @DisplayName("replaceAll(array) should support backreferences") - void replaceAll_arrayBackrefs_work() { - Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})"); - String[] inputs = {"555-1234", "555-5678"}; - - String[] results = pattern.replaceAll(inputs, "(\\1) \\2"); - - assertThat(results).containsExactly("(555) 1234", "(555) 5678"); - } - - @Test - @DisplayName("replaceAll(array) with empty array should return empty") - void replaceAll_emptyArray_returnsEmpty() { - Pattern pattern = Pattern.compile("\\d+"); - String[] results = pattern.replaceAll(new String[0], "XXX"); - - assertThat(results).isEmpty(); - } - - @Test - @DisplayName("replaceAll(collection) with empty collection should return empty") - void replaceAll_emptyCollection_returnsEmpty() { - Pattern pattern = Pattern.compile("\\d+"); - List results = pattern.replaceAll(List.of(), "XXX"); - - assertThat(results).isEmpty(); - } - - // ========== Edge Cases ========== - - @Test - @DisplayName("replace with special regex characters in replacement") - void replace_specialCharsInReplacement_literal() { - Pattern pattern = Pattern.compile("test"); - String result = pattern.replaceAll("test test", ".$^*+?[]{}()"); - - // Replacement is literal, not regex - assertThat(result).isEqualTo(".$^*+?[]{}() .$^*+?[]{}()"); - } - - @Test - @DisplayName("replace on empty input should return empty") - void replace_emptyInput_returnsEmpty() { - Pattern pattern = Pattern.compile("\\d+"); - String result = pattern.replaceAll("", "XXX"); - - assertThat(result).isEmpty(); - } - - @Test - @DisplayName("replace with unicode should work") - void replace_unicode_works() { - // Use simpler emoji pattern - Pattern pattern = Pattern.compile("test"); - String result = pattern.replaceAll("test🙂test", "OK"); - - assertThat(result).isEqualTo("OK🙂OK"); - } - - // ========== Real-World Scenarios ========== - - @Test - @DisplayName("Sanitize log data - remove sensitive info") - void realWorld_sanitizeLogs() { - Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}"); - Pattern ccPattern = Pattern.compile("\\d{4}-\\d{4}-\\d{4}-\\d{4}"); - - String log = "User SSN: 123-45-6789, CC: 1234-5678-9012-3456"; - - String sanitized = ssnPattern.replaceAll(log, "[SSN-REDACTED]"); - sanitized = ccPattern.replaceAll(sanitized, "[CC-REDACTED]"); - - assertThat(sanitized).isEqualTo("User SSN: [SSN-REDACTED], CC: [CC-REDACTED]"); - } - - @Test - @DisplayName("Reformat phone numbers") - void realWorld_reformatPhones() { - Pattern pattern = Pattern.compile("(\\d{3})-(\\d{3})-(\\d{4})"); - String result = pattern.replaceAll("Phone: 555-123-4567", "(\\1) \\2-\\3"); - - assertThat(result).isEqualTo("Phone: (555) 123-4567"); - } - - @Test - @DisplayName("Batch password sanitization") - void realWorld_batchPasswordSanitization() { - Pattern passwordPattern = Pattern.compile("password=[^&\\s]+"); - String[] urls = { - "https://api.com/login?user=admin&password=secret123", - "https://api.com/data?id=1", - "https://api.com/auth?password=pass456&token=abc" - }; + assertThat(result).isEqualTo("No digits here"); + } + + @Test + @DisplayName("replaceFirst should handle empty replacement") + void replaceFirst_emptyReplacement_removes() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceFirst("Item 123", ""); + + assertThat(result).isEqualTo("Item "); + } + + // ========== replaceAll() Tests ========== + + @Test + @DisplayName("replaceAll should replace all matches") + void replaceAll_allMatches_replaced() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceAll("Item 123 costs $456", "XXX"); + + assertThat(result).isEqualTo("Item XXX costs $XXX"); + } + + @Test + @DisplayName("replaceAll should return original if no matches") + void replaceAll_noMatches_returnsOriginal() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceAll("No digits here", "XXX"); + + assertThat(result).isEqualTo("No digits here"); + } + + @Test + @DisplayName("replaceAll should handle empty replacement") + void replaceAll_emptyReplacement_removesAll() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceAll("a1b2c3", ""); + + assertThat(result).isEqualTo("abc"); + } + + @Test + @DisplayName("replaceAll should redact emails") + void replaceAll_redactEmails_works() { + Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}"); + String result = + emailPattern.replaceAll("Contact user@example.com or admin@test.org", "[REDACTED]"); + + assertThat(result).isEqualTo("Contact [REDACTED] or [REDACTED]"); + } + + // ========== Backreference Tests ========== + + @Test + @DisplayName("replaceFirst should support backreferences with \\\\1") + void replaceFirst_backreferences_work() { + Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); + String result = pattern.replaceFirst("Date: 2025-11-24", "\\2/\\3/\\1"); + + assertThat(result).isEqualTo("Date: 11/24/2025"); + } + + @Test + @DisplayName("replaceAll should support backreferences") + void replaceAll_backreferences_work() { + Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})"); + String result = pattern.replaceAll("Call 555-1234 or 555-5678", "(\\1) \\2"); + + assertThat(result).isEqualTo("Call (555) 1234 or (555) 5678"); + } + + @Test + @DisplayName("replaceAll should swap groups with backreferences") + void replaceAll_swapGroups_works() { + Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)"); + String result = pattern.replaceAll("user@example.com", "\\2 (\\1)"); + + assertThat(result).isEqualTo("example.com (user)"); + } + + @Test + @DisplayName("replaceAll should support multiple backreferences") + void replaceAll_multipleBackrefs_work() { + Pattern pattern = Pattern.compile("(\\w+)\\s+(\\w+)\\s+(\\w+)"); + String result = pattern.replaceAll("one two three", "\\3-\\2-\\1"); + + assertThat(result).isEqualTo("three-two-one"); + } + + // ========== Bulk Replace Tests ========== + + @Test + @DisplayName("replaceAll(array) should replace in all strings") + void replaceAll_array_replacesAll() { + Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}"); + String[] logs = {"User 123-45-6789 logged in", "No PII here", "SSN: 987-65-4321"}; + + String[] redacted = ssnPattern.replaceAll(logs, "[REDACTED]"); + + assertThat(redacted) + .containsExactly("User [REDACTED] logged in", "No PII here", "SSN: [REDACTED]"); + } + + @Test + @DisplayName("replaceAll(collection) should replace in all strings") + void replaceAll_collection_replacesAll() { + Pattern pattern = Pattern.compile("\\d+"); + List inputs = Arrays.asList("a1b2", "c3d4", "no digits"); + + List results = pattern.replaceAll(inputs, "X"); + + assertThat(results).containsExactly("aXbX", "cXdX", "no digits"); + } + + @Test + @DisplayName("replaceAll(array) should support backreferences") + void replaceAll_arrayBackrefs_work() { + Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})"); + String[] inputs = {"555-1234", "555-5678"}; + + String[] results = pattern.replaceAll(inputs, "(\\1) \\2"); + + assertThat(results).containsExactly("(555) 1234", "(555) 5678"); + } + + @Test + @DisplayName("replaceAll(array) with empty array should return empty") + void replaceAll_emptyArray_returnsEmpty() { + Pattern pattern = Pattern.compile("\\d+"); + String[] results = pattern.replaceAll(new String[0], "XXX"); + + assertThat(results).isEmpty(); + } + + @Test + @DisplayName("replaceAll(collection) with empty collection should return empty") + void replaceAll_emptyCollection_returnsEmpty() { + Pattern pattern = Pattern.compile("\\d+"); + List results = pattern.replaceAll(List.of(), "XXX"); + + assertThat(results).isEmpty(); + } + + // ========== Edge Cases ========== + + @Test + @DisplayName("replace with special regex characters in replacement") + void replace_specialCharsInReplacement_literal() { + Pattern pattern = Pattern.compile("test"); + String result = pattern.replaceAll("test test", ".$^*+?[]{}()"); + + // Replacement is literal, not regex + assertThat(result).isEqualTo(".$^*+?[]{}() .$^*+?[]{}()"); + } + + @Test + @DisplayName("replace on empty input should return empty") + void replace_emptyInput_returnsEmpty() { + Pattern pattern = Pattern.compile("\\d+"); + String result = pattern.replaceAll("", "XXX"); + + assertThat(result).isEmpty(); + } + + @Test + @DisplayName("replace with unicode should work") + void replace_unicode_works() { + // Use simpler emoji pattern + Pattern pattern = Pattern.compile("test"); + String result = pattern.replaceAll("test🙂test", "OK"); + + assertThat(result).isEqualTo("OK🙂OK"); + } + + // ========== Real-World Scenarios ========== + + @Test + @DisplayName("Sanitize log data - remove sensitive info") + void realWorld_sanitizeLogs() { + Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}"); + Pattern ccPattern = Pattern.compile("\\d{4}-\\d{4}-\\d{4}-\\d{4}"); + + String log = "User SSN: 123-45-6789, CC: 1234-5678-9012-3456"; + + String sanitized = ssnPattern.replaceAll(log, "[SSN-REDACTED]"); + sanitized = ccPattern.replaceAll(sanitized, "[CC-REDACTED]"); + + assertThat(sanitized).isEqualTo("User SSN: [SSN-REDACTED], CC: [CC-REDACTED]"); + } + + @Test + @DisplayName("Reformat phone numbers") + void realWorld_reformatPhones() { + Pattern pattern = Pattern.compile("(\\d{3})-(\\d{3})-(\\d{4})"); + String result = pattern.replaceAll("Phone: 555-123-4567", "(\\1) \\2-\\3"); + + assertThat(result).isEqualTo("Phone: (555) 123-4567"); + } + + @Test + @DisplayName("Batch password sanitization") + void realWorld_batchPasswordSanitization() { + Pattern passwordPattern = Pattern.compile("password=[^&\\s]+"); + String[] urls = { + "https://api.com/login?user=admin&password=secret123", + "https://api.com/data?id=1", + "https://api.com/auth?password=pass456&token=abc" + }; - String[] sanitized = passwordPattern.replaceAll(urls, "password=[REDACTED]"); + String[] sanitized = passwordPattern.replaceAll(urls, "password=[REDACTED]"); - assertThat(sanitized).containsExactly( + assertThat(sanitized) + .containsExactly( "https://api.com/login?user=admin&password=[REDACTED]", "https://api.com/data?id=1", - "https://api.com/auth?password=[REDACTED]&token=abc" - ); - } - - // ========== Validation Tests ========== - - @Test - @DisplayName("replaceFirst should throw on null input") - void replaceFirst_nullInput_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.replaceFirst((String) null, "replacement")) - .withMessageContaining("null"); - } - - @Test - @DisplayName("replaceFirst should throw on null replacement") - void replaceFirst_nullReplacement_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.replaceFirst("test", null)) - .withMessageContaining("null"); - } - - @Test - @DisplayName("replaceAll(array) should throw on null array") - void replaceAll_nullArray_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.replaceAll((String[]) null, "replacement")) - .withMessageContaining("null"); - } - - @Test - @DisplayName("replaceAll(collection) should throw on null collection") - void replaceAll_nullCollection_throws() { - Pattern pattern = Pattern.compile("test"); - - assertThatNullPointerException() - .isThrownBy(() -> pattern.replaceAll((java.util.Collection) null, "replacement")) - .withMessageContaining("null"); - } + "https://api.com/auth?password=[REDACTED]&token=abc"); + } + + // ========== Validation Tests ========== + + @Test + @DisplayName("replaceFirst should throw on null input") + void replaceFirst_nullInput_throws() { + Pattern pattern = Pattern.compile("test"); + + assertThatNullPointerException() + .isThrownBy(() -> pattern.replaceFirst((String) null, "replacement")) + .withMessageContaining("null"); + } + + @Test + @DisplayName("replaceFirst should throw on null replacement") + void replaceFirst_nullReplacement_throws() { + Pattern pattern = Pattern.compile("test"); + + assertThatNullPointerException() + .isThrownBy(() -> pattern.replaceFirst("test", null)) + .withMessageContaining("null"); + } + + @Test + @DisplayName("replaceAll(array) should throw on null array") + void replaceAll_nullArray_throws() { + Pattern pattern = Pattern.compile("test"); + + assertThatNullPointerException() + .isThrownBy(() -> pattern.replaceAll((String[]) null, "replacement")) + .withMessageContaining("null"); + } + + @Test + @DisplayName("replaceAll(collection) should throw on null collection") + void replaceAll_nullCollection_throws() { + Pattern pattern = Pattern.compile("test"); + + assertThatNullPointerException() + .isThrownBy(() -> pattern.replaceAll((java.util.Collection) null, "replacement")) + .withMessageContaining("null"); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheFullInUseIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheFullInUseIT.java index a9ff894..94f4edf 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheFullInUseIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheFullInUseIT.java @@ -1,7 +1,12 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -9,275 +14,274 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.*; - /** * CRITICAL: Tests for cache full with all patterns in use scenario. * - * Verifies no memory leaks when cache full and all patterns have active matchers. + *

Verifies no memory leaks when cache full and all patterns have active matchers. */ class CacheFullInUseIT { - private static final Logger logger = LoggerFactory.getLogger(CacheFullInUseIT.class); + private static final Logger logger = LoggerFactory.getLogger(CacheFullInUseIT.class); + + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testCacheFull_AllInUse_NewPatternCompiledWithoutCaching() { + // Note: Using default cache size 50K would be too slow for test + // This test verifies the logic works with smaller numbers + + // Compile 10 patterns and keep matchers active on all + List activeMatchers = new ArrayList<>(); + + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compile("pattern" + i); + Matcher m = p.matcher("test"); + activeMatchers.add(m); + // refCount = 1 on each pattern + } + + CacheStatistics before = Pattern.getCacheStatistics(); + assertThat(before.currentSize()).isEqualTo(10); + + // All patterns have refCount > 0 + // Compile NEW pattern (not in cache) + Pattern newPattern = Pattern.compile("new_pattern_not_in_cache"); + + // Pattern should compile successfully + assertThat(newPattern).isNotNull(); + assertThat(newPattern.matches("new_pattern_not_in_cache")).isTrue(); + + CacheStatistics after = Pattern.getCacheStatistics(); - @BeforeEach - void setUp() { - Pattern.resetCache(); + // With small cache (10 patterns), new pattern either: + // - Cached if we could evict something + // - Or compiled without caching if all in use + + // Clean up + for (Matcher m : activeMatchers) { + m.close(); + } + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testDeferredCleanup_PatternsFreedWhenMatchersClosed() { + // Compile patterns + List patterns = new ArrayList<>(); + List matchers = new ArrayList<>(); + + for (int i = 0; i < 20; i++) { + Pattern p = Pattern.compile("pattern" + i); + patterns.add(p); + + // Create matcher (refCount = 1) + Matcher m = p.matcher("test"); + matchers.add(m); + } + + // Force cache to try evicting (compile more than cache can hold) + for (int i = 20; i < 100; i++) { + Pattern.compile("filler" + i); } - @AfterEach - void tearDown() { - Pattern.resetCache(); + // Some patterns may be in deferred cleanup list + CacheStatistics midStats = Pattern.getCacheStatistics(); + long deferredCount = midStats.evictionsDeferred(); + + logger.info("Deferred evictions: {}", deferredCount); + + // Close all matchers + for (Matcher m : matchers) { + m.close(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testCacheFull_AllInUse_NewPatternCompiledWithoutCaching() { - // Note: Using default cache size 50K would be too slow for test - // This test verifies the logic works with smaller numbers + // All refCounts now 0 + for (Pattern p : patterns) { + assertThat(p.getRefCount()).isEqualTo(0); + } - // Compile 10 patterns and keep matchers active on all - List activeMatchers = new ArrayList<>(); + // Trigger idle eviction scan (which calls cleanupDeferredPatterns) + try { + Thread.sleep(100); // Let background thread run + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compile("pattern" + i); - Matcher m = p.matcher("test"); - activeMatchers.add(m); - // refCount = 1 on each pattern - } + // Deferred patterns should eventually be cleaned + // (They're freed when background thread runs cleanupDeferredPatterns) + } - CacheStatistics before = Pattern.getCacheStatistics(); - assertThat(before.currentSize()).isEqualTo(10); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testDeferredCleanup_Tracking() { + // Compile patterns with active matchers + List matchers = new ArrayList<>(); - // All patterns have refCount > 0 - // Compile NEW pattern (not in cache) - Pattern newPattern = Pattern.compile("new_pattern_not_in_cache"); + for (int i = 0; i < 20; i++) { + Pattern p = Pattern.compile("pattern" + i); + Matcher m = p.matcher("test"); + matchers.add(m); + } - // Pattern should compile successfully - assertThat(newPattern).isNotNull(); - assertThat(newPattern.matches("new_pattern_not_in_cache")).isTrue(); + // Trigger evictions - patterns with matchers go to deferred list + for (int i = 20; i < 100; i++) { + Pattern.compile("trigger" + i); + } - CacheStatistics after = Pattern.getCacheStatistics(); + CacheStatistics afterEviction = Pattern.getCacheStatistics(); - // With small cache (10 patterns), new pattern either: - // - Cached if we could evict something - // - Or compiled without caching if all in use + // Should have some deferred evictions if patterns were in use during eviction + // (May be 0 if cache had room or patterns could be evicted) + assertThat(afterEviction.evictionsDeferred()).isGreaterThanOrEqualTo(0); - // Clean up - for (Matcher m : activeMatchers) { - m.close(); - } + // Close matchers - patterns now have refCount = 0 + for (Matcher m : matchers) { + m.close(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testDeferredCleanup_PatternsFreedWhenMatchersClosed() { - // Compile patterns - List patterns = new ArrayList<>(); - List matchers = new ArrayList<>(); - - for (int i = 0; i < 20; i++) { - Pattern p = Pattern.compile("pattern" + i); - patterns.add(p); - - // Create matcher (refCount = 1) - Matcher m = p.matcher("test"); - matchers.add(m); - } - - // Force cache to try evicting (compile more than cache can hold) - for (int i = 20; i < 100; i++) { - Pattern.compile("filler" + i); - } - - // Some patterns may be in deferred cleanup list - CacheStatistics midStats = Pattern.getCacheStatistics(); - long deferredCount = midStats.evictionsDeferred(); - - logger.info("Deferred evictions: {}", deferredCount); - - // Close all matchers - for (Matcher m : matchers) { - m.close(); - } - - // All refCounts now 0 - for (Pattern p : patterns) { - assertThat(p.getRefCount()).isEqualTo(0); - } - - // Trigger idle eviction scan (which calls cleanupDeferredPatterns) - try { - Thread.sleep(100); // Let background thread run - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - - // Deferred patterns should eventually be cleaned - // (They're freed when background thread runs cleanupDeferredPatterns) + // Patterns are now eligible for cleanup + // They'll be freed on next idle eviction scan (every 60s) + // For this test, we just verify the mechanism is in place + assertThat(afterEviction.deferredCleanupPending()).isGreaterThanOrEqualTo(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testClear_ClosesDeferredPatterns() { + // Compile patterns with active matchers (these will go to deferred list when evicted) + List matchers = new ArrayList<>(); + + for (int i = 0; i < 20; i++) { + Pattern p = Pattern.compile("pattern" + i); + Matcher m = p.matcher("test"); + matchers.add(m); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testDeferredCleanup_Tracking() { - // Compile patterns with active matchers - List matchers = new ArrayList<>(); - - for (int i = 0; i < 20; i++) { - Pattern p = Pattern.compile("pattern" + i); - Matcher m = p.matcher("test"); - matchers.add(m); - } - - // Trigger evictions - patterns with matchers go to deferred list - for (int i = 20; i < 100; i++) { - Pattern.compile("trigger" + i); - } - - CacheStatistics afterEviction = Pattern.getCacheStatistics(); - - // Should have some deferred evictions if patterns were in use during eviction - // (May be 0 if cache had room or patterns could be evicted) - assertThat(afterEviction.evictionsDeferred()).isGreaterThanOrEqualTo(0); - - // Close matchers - patterns now have refCount = 0 - for (Matcher m : matchers) { - m.close(); - } - - // Patterns are now eligible for cleanup - // They'll be freed on next idle eviction scan (every 60s) - // For this test, we just verify the mechanism is in place - assertThat(afterEviction.deferredCleanupPending()).isGreaterThanOrEqualTo(0); + // Trigger evictions - patterns with matchers go to deferred list + for (int i = 20; i < 100; i++) { + Pattern.compile("trigger" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testClear_ClosesDeferredPatterns() { - // Compile patterns with active matchers (these will go to deferred list when evicted) - List matchers = new ArrayList<>(); + CacheStatistics beforeClear = Pattern.getCacheStatistics(); + logger.info( + "Before clear - deferred pending: {}, evictions deferred: {}", + beforeClear.deferredCleanupPending(), + beforeClear.evictionsDeferred()); - for (int i = 0; i < 20; i++) { - Pattern p = Pattern.compile("pattern" + i); - Matcher m = p.matcher("test"); - matchers.add(m); - } + // Clear the cache - patterns still in use go to deferred list + Pattern.clearCache(); - // Trigger evictions - patterns with matchers go to deferred list - for (int i = 20; i < 100; i++) { - Pattern.compile("trigger" + i); - } + CacheStatistics afterClear = Pattern.getCacheStatistics(); - CacheStatistics beforeClear = Pattern.getCacheStatistics(); - logger.info("Before clear - deferred pending: {}, evictions deferred: {}", - beforeClear.deferredCleanupPending(), beforeClear.evictionsDeferred()); + // Cache should be empty + assertThat(afterClear.currentSize()).isEqualTo(0); - // Clear the cache - patterns still in use go to deferred list - Pattern.clearCache(); + // Deferred cleanup list should have patterns that are still in use + // (We have 20 matchers still open, so those patterns should be deferred) + assertThat(afterClear.deferredCleanupPending()) + .as("Patterns with active matchers should be in deferred list") + .isGreaterThan(0); - CacheStatistics afterClear = Pattern.getCacheStatistics(); + // Clean up matchers + for (Matcher m : matchers) { + m.close(); + } - // Cache should be empty - assertThat(afterClear.currentSize()).isEqualTo(0); + logger.info("Clear successfully closed all deferred patterns"); + } - // Deferred cleanup list should have patterns that are still in use - // (We have 20 matchers still open, so those patterns should be deferred) - assertThat(afterClear.deferredCleanupPending()) - .as("Patterns with active matchers should be in deferred list") - .isGreaterThan(0); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testReset_ClosesDeferredPatternsAndResetsStats() { + // Compile patterns with active matchers + List matchers = new ArrayList<>(); - // Clean up matchers - for (Matcher m : matchers) { - m.close(); - } + for (int i = 0; i < 20; i++) { + Pattern p = Pattern.compile("pattern" + i); + Matcher m = p.matcher("test"); + matchers.add(m); + } - logger.info("Clear successfully closed all deferred patterns"); + // Trigger evictions + for (int i = 20; i < 100; i++) { + Pattern.compile("trigger" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testReset_ClosesDeferredPatternsAndResetsStats() { - // Compile patterns with active matchers - List matchers = new ArrayList<>(); + CacheStatistics beforeReset = Pattern.getCacheStatistics(); + logger.info( + "Before reset - hits: {}, misses: {}, deferred: {}", + beforeReset.hits(), + beforeReset.misses(), + beforeReset.evictionsDeferred()); - for (int i = 0; i < 20; i++) { - Pattern p = Pattern.compile("pattern" + i); - Matcher m = p.matcher("test"); - matchers.add(m); - } + // Ensure we have some stats + assertThat(beforeReset.misses()).isGreaterThan(0); - // Trigger evictions - for (int i = 20; i < 100; i++) { - Pattern.compile("trigger" + i); - } + // Reset the cache - patterns with active matchers go to deferred + Pattern.resetCache(); - CacheStatistics beforeReset = Pattern.getCacheStatistics(); - logger.info("Before reset - hits: {}, misses: {}, deferred: {}", - beforeReset.hits(), beforeReset.misses(), beforeReset.evictionsDeferred()); + CacheStatistics afterReset = Pattern.getCacheStatistics(); - // Ensure we have some stats - assertThat(beforeReset.misses()).isGreaterThan(0); + // Cache should be empty + assertThat(afterReset.currentSize()).isEqualTo(0); - // Reset the cache - patterns with active matchers go to deferred - Pattern.resetCache(); + // Deferred may still have patterns if matchers open + // (Reset doesn't forcibly close in-use patterns) + assertThat(afterReset.deferredCleanupPending()).isGreaterThanOrEqualTo(0); - CacheStatistics afterReset = Pattern.getCacheStatistics(); + // All statistics should be reset + assertThat(afterReset.hits()).isEqualTo(0); + assertThat(afterReset.misses()).isEqualTo(0); + assertThat(afterReset.evictionsLRU()).isEqualTo(0); + assertThat(afterReset.evictionsIdle()).isEqualTo(0); + assertThat(afterReset.evictionsDeferred()).isEqualTo(0); - // Cache should be empty - assertThat(afterReset.currentSize()).isEqualTo(0); + // Clean up matchers + for (Matcher m : matchers) { + m.close(); + } - // Deferred may still have patterns if matchers open - // (Reset doesn't forcibly close in-use patterns) - assertThat(afterReset.deferredCleanupPending()).isGreaterThanOrEqualTo(0); + logger.info("Reset successfully closed deferred patterns and reset all stats"); + } - // All statistics should be reset - assertThat(afterReset.hits()).isEqualTo(0); - assertThat(afterReset.misses()).isEqualTo(0); - assertThat(afterReset.evictionsLRU()).isEqualTo(0); - assertThat(afterReset.evictionsIdle()).isEqualTo(0); - assertThat(afterReset.evictionsDeferred()).isEqualTo(0); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testClear_MultipleTimes_NoDuplicateClose() { + // Compile patterns with active matchers + List matchers = new ArrayList<>(); - // Clean up matchers - for (Matcher m : matchers) { - m.close(); - } + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compile("pattern" + i); + Matcher m = p.matcher("test"); + matchers.add(m); + } - logger.info("Reset successfully closed deferred patterns and reset all stats"); + // Trigger evictions + for (int i = 10; i < 50; i++) { + Pattern.compile("trigger" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testClear_MultipleTimes_NoDuplicateClose() { - // Compile patterns with active matchers - List matchers = new ArrayList<>(); - - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compile("pattern" + i); - Matcher m = p.matcher("test"); - matchers.add(m); - } - - // Trigger evictions - for (int i = 10; i < 50; i++) { - Pattern.compile("trigger" + i); - } - - // Clear multiple times - should not cause issues - Pattern.clearCache(); - Pattern.clearCache(); - Pattern.clearCache(); - - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isEqualTo(0); - // Deferred may have patterns if matchers still open - assertThat(stats.deferredCleanupPending()).isGreaterThanOrEqualTo(0); - - // Clean up matchers - for (Matcher m : matchers) { - m.close(); - } + // Clear multiple times - should not cause issues + Pattern.clearCache(); + Pattern.clearCache(); + Pattern.clearCache(); + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isEqualTo(0); + // Deferred may have patterns if matchers still open + assertThat(stats.deferredCleanupPending()).isGreaterThanOrEqualTo(0); + + // Clean up matchers + for (Matcher m : matchers) { + m.close(); } + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheIT.java index 5f8b66f..4dfbcb1 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/CacheIT.java @@ -1,231 +1,226 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Pattern; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import java.time.Duration; - -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for pattern cache functionality. - */ +/** Tests for pattern cache functionality. */ class CacheIT { - @BeforeEach - void setUp() { - // Fully reset cache and statistics before each test - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - // Fully reset cache and statistics after each test - Pattern.resetCache(); - } - - @Test - void testCacheHitOnSecondCompile() { - // First compile - should be a miss - Pattern p1 = Pattern.compile("test"); - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.misses()).isEqualTo(1); - assertThat(stats.hits()).isEqualTo(0); - - // Second compile of same pattern - should be a hit - Pattern p2 = Pattern.compile("test"); - stats = Pattern.getCacheStatistics(); - assertThat(stats.hits()).isEqualTo(1); - assertThat(stats.misses()).isEqualTo(1); - - // Should return same instance - assertThat(p1).isSameAs(p2); + @BeforeEach + void setUp() { + // Fully reset cache and statistics before each test + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + // Fully reset cache and statistics after each test + Pattern.resetCache(); + } + + @Test + void testCacheHitOnSecondCompile() { + // First compile - should be a miss + Pattern p1 = Pattern.compile("test"); + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.misses()).isEqualTo(1); + assertThat(stats.hits()).isEqualTo(0); + + // Second compile of same pattern - should be a hit + Pattern p2 = Pattern.compile("test"); + stats = Pattern.getCacheStatistics(); + assertThat(stats.hits()).isEqualTo(1); + assertThat(stats.misses()).isEqualTo(1); + + // Should return same instance + assertThat(p1).isSameAs(p2); + } + + @Test + void testCacheMissOnDifferentPattern() { + Pattern p1 = Pattern.compile("pattern1"); + Pattern p2 = Pattern.compile("pattern2"); + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.misses()).isEqualTo(2); // Both were misses + assertThat(stats.hits()).isEqualTo(0); + + // Should be different instances + assertThat(p1).isNotSameAs(p2); + } + + @Test + void testCaseInsensitiveCreatesSeparateCacheEntry() { + Pattern p1 = Pattern.compile("TEST", true); // Case-sensitive + Pattern p2 = Pattern.compile("TEST", false); // Case-insensitive + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.misses()).isEqualTo(2); // Different cache keys + assertThat(stats.currentSize()).isEqualTo(2); + + // Should be different instances + assertThat(p1).isNotSameAs(p2); + } + + @Test + void testCacheHitRate() { + // Compile 10 unique patterns + for (int i = 0; i < 10; i++) { + Pattern.compile("pattern" + i); } - @Test - void testCacheMissOnDifferentPattern() { - Pattern p1 = Pattern.compile("pattern1"); - Pattern p2 = Pattern.compile("pattern2"); - - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.misses()).isEqualTo(2); // Both were misses - assertThat(stats.hits()).isEqualTo(0); - - // Should be different instances - assertThat(p1).isNotSameAs(p2); + // Compile same 10 patterns again (all hits) + for (int i = 0; i < 10; i++) { + Pattern.compile("pattern" + i); } - @Test - void testCaseInsensitiveCreatesSeparateCacheEntry() { - Pattern p1 = Pattern.compile("TEST", true); // Case-sensitive - Pattern p2 = Pattern.compile("TEST", false); // Case-insensitive + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.misses()).isEqualTo(10); + assertThat(stats.hits()).isEqualTo(10); + assertThat(stats.hitRate()).isEqualTo(0.5); // 50% hit rate + } - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.misses()).isEqualTo(2); // Different cache keys - assertThat(stats.currentSize()).isEqualTo(2); + @Test + void testCacheSizeLimit() throws InterruptedException { + // Compile more patterns than cache max size + int cacheSize = 50000; // Default max size + int patternsToCompile = cacheSize + 100; - // Should be different instances - assertThat(p1).isNotSameAs(p2); + for (int i = 0; i < patternsToCompile; i++) { + Pattern.compile("pattern" + i); } - @Test - void testCacheHitRate() { - // Compile 10 unique patterns - for (int i = 0; i < 10; i++) { - Pattern.compile("pattern" + i); - } - - // Compile same 10 patterns again (all hits) - for (int i = 0; i < 10; i++) { - Pattern.compile("pattern" + i); - } - - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.misses()).isEqualTo(10); - assertThat(stats.hits()).isEqualTo(10); - assertThat(stats.hitRate()).isEqualTo(0.5); // 50% hit rate + // Wait for async LRU eviction to complete (soft limits) + Thread.sleep(200); + + CacheStatistics stats = Pattern.getCacheStatistics(); + // With soft limits, cache can temporarily exceed max but should settle back down + // Allow up to 10% overage due to async eviction timing + int maxAllowed = (int) (cacheSize * 1.1); + assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); + + // LRU evictions should have occurred or be pending + assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThanOrEqualTo(0); + } + + @Test + void testLRUEvictionOrder() { + // Create cache with small max size for testing + // Note: We're using global cache, so this tests default behavior + + // Compile patterns + Pattern p1 = Pattern.compile("pattern1"); + Pattern p2 = Pattern.compile("pattern2"); + Pattern p3 = Pattern.compile("pattern3"); + + // Access p1 and p3 (not p2) + Pattern.compile("pattern1"); // Hit + Pattern.compile("pattern3"); // Hit + + // p2 is least recently used + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.hits()).isEqualTo(2); // p1 and p3 + assertThat(stats.currentSize()).isEqualTo(3); + } + + @Test + void testCacheStatisticsAccuracy() { + Pattern.compile("a"); + Pattern.compile("b"); + Pattern.compile("a"); // Hit + Pattern.compile("c"); + Pattern.compile("b"); // Hit + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.totalRequests()).isEqualTo(5); + assertThat(stats.hits()).isEqualTo(2); + assertThat(stats.misses()).isEqualTo(3); + assertThat(stats.currentSize()).isEqualTo(3); // a, b, c + } + + @Test + void testCacheUtilization() { + // Empty cache + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.utilization()).isEqualTo(0.0); + + // Add 5000 patterns + for (int i = 0; i < 5000; i++) { + Pattern.compile("pattern" + i); } - @Test - void testCacheSizeLimit() throws InterruptedException { - // Compile more patterns than cache max size - int cacheSize = 50000; // Default max size - int patternsToCompile = cacheSize + 100; - - for (int i = 0; i < patternsToCompile; i++) { - Pattern.compile("pattern" + i); - } + stats = Pattern.getCacheStatistics(); + assertThat(stats.utilization()).isEqualTo(0.1); // 5000/50000 = 10% + } - // Wait for async LRU eviction to complete (soft limits) - Thread.sleep(200); - - CacheStatistics stats = Pattern.getCacheStatistics(); - // With soft limits, cache can temporarily exceed max but should settle back down - // Allow up to 10% overage due to async eviction timing - int maxAllowed = (int) (cacheSize * 1.1); - assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); - - // LRU evictions should have occurred or be pending - assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThanOrEqualTo(0); + @Test + void testClearCache() { + // Add patterns + for (int i = 0; i < 10; i++) { + Pattern.compile("pattern" + i); } - @Test - void testLRUEvictionOrder() { - // Create cache with small max size for testing - // Note: We're using global cache, so this tests default behavior + CacheStatistics before = Pattern.getCacheStatistics(); + assertThat(before.currentSize()).isEqualTo(10); - // Compile patterns - Pattern p1 = Pattern.compile("pattern1"); - Pattern p2 = Pattern.compile("pattern2"); - Pattern p3 = Pattern.compile("pattern3"); + // Clear cache + Pattern.clearCache(); - // Access p1 and p3 (not p2) - Pattern.compile("pattern1"); // Hit - Pattern.compile("pattern3"); // Hit + CacheStatistics after = Pattern.getCacheStatistics(); + assertThat(after.currentSize()).isEqualTo(0); - // p2 is least recently used + // Hits/misses/evictions should be preserved + assertThat(after.hits()).isEqualTo(before.hits()); + assertThat(after.misses()).isEqualTo(before.misses()); + } - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.hits()).isEqualTo(2); // p1 and p3 - assertThat(stats.currentSize()).isEqualTo(3); - } + @Test + void testCompileWithoutCacheDoesNotAffectCache() { + // Compile without cache + Pattern p1 = Pattern.compileWithoutCache("test"); + Pattern p2 = Pattern.compileWithoutCache("test"); - @Test - void testCacheStatisticsAccuracy() { - Pattern.compile("a"); - Pattern.compile("b"); - Pattern.compile("a"); // Hit - Pattern.compile("c"); - Pattern.compile("b"); // Hit - - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.totalRequests()).isEqualTo(5); - assertThat(stats.hits()).isEqualTo(2); - assertThat(stats.misses()).isEqualTo(3); - assertThat(stats.currentSize()).isEqualTo(3); // a, b, c - } + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.misses()).isEqualTo(0); // Not tracked in cache + assertThat(stats.hits()).isEqualTo(0); + assertThat(stats.currentSize()).isEqualTo(0); - @Test - void testCacheUtilization() { - // Empty cache - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.utilization()).isEqualTo(0.0); + // Should be different instances + assertThat(p1).isNotSameAs(p2); - // Add 5000 patterns - for (int i = 0; i < 5000; i++) { - Pattern.compile("pattern" + i); - } + p1.close(); + p2.close(); + } - stats = Pattern.getCacheStatistics(); - assertThat(stats.utilization()).isEqualTo(0.1); // 5000/50000 = 10% - } - - @Test - void testClearCache() { - // Add patterns - for (int i = 0; i < 10; i++) { - Pattern.compile("pattern" + i); - } - - CacheStatistics before = Pattern.getCacheStatistics(); - assertThat(before.currentSize()).isEqualTo(10); - - // Clear cache - Pattern.clearCache(); + @Test + void testCachedPatternCannotBeClosed() { + Pattern p = Pattern.compile("test"); + assertThat(p.isClosed()).isFalse(); - CacheStatistics after = Pattern.getCacheStatistics(); - assertThat(after.currentSize()).isEqualTo(0); + // Calling close() on cached pattern should be no-op + p.close(); - // Hits/misses/evictions should be preserved - assertThat(after.hits()).isEqualTo(before.hits()); - assertThat(after.misses()).isEqualTo(before.misses()); - } - - @Test - void testCompileWithoutCacheDoesNotAffectCache() { - // Compile without cache - Pattern p1 = Pattern.compileWithoutCache("test"); - Pattern p2 = Pattern.compileWithoutCache("test"); + // Pattern should still not be closed + assertThat(p.isClosed()).isFalse(); - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.misses()).isEqualTo(0); // Not tracked in cache - assertThat(stats.hits()).isEqualTo(0); - assertThat(stats.currentSize()).isEqualTo(0); + // Should still be usable + assertThat(p.matches("test")).isTrue(); + } - // Should be different instances - assertThat(p1).isNotSameAs(p2); - - p1.close(); - p2.close(); - } + @Test + void testForceCloseActuallyCloses() { + Pattern p = Pattern.compileWithoutCache("test"); + assertThat(p.isClosed()).isFalse(); - @Test - void testCachedPatternCannotBeClosed() { - Pattern p = Pattern.compile("test"); - assertThat(p.isClosed()).isFalse(); + p.forceClose(); + assertThat(p.isClosed()).isTrue(); - // Calling close() on cached pattern should be no-op - p.close(); - - // Pattern should still not be closed - assertThat(p.isClosed()).isFalse(); - - // Should still be usable - assertThat(p.matches("test")).isTrue(); - } - - @Test - void testForceCloseActuallyCloses() { - Pattern p = Pattern.compileWithoutCache("test"); - assertThat(p.isClosed()).isFalse(); - - p.forceClose(); - assertThat(p.isClosed()).isTrue(); - - // Should not be usable after force close - assertThatThrownBy(() -> p.matches("test")) - .isInstanceOf(IllegalStateException.class); - } + // Should not be usable after force close + assertThatThrownBy(() -> p.matches("test")).isInstanceOf(IllegalStateException.class); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrencyIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrencyIT.java index c3f1c66..2f3a8c9 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrencyIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrencyIT.java @@ -1,336 +1,349 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; -import com.axonops.libre2.cache.CacheStatistics; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicInteger; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.concurrent.*; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.assertj.core.api.Assertions.*; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - /** * High-concurrency tests for pattern compilation and matching. * - * Tests library behavior under extreme Cassandra-level concurrent load. + *

Tests library behavior under extreme Cassandra-level concurrent load. */ class ConcurrencyIT { - /** - * Detects if running under QEMU emulation (set by CI workflow). - * Performance tests are skipped under QEMU as results are not representative. - */ - private static boolean isQemuEmulation() { - return "true".equals(System.getenv("QEMU_EMULATION")); - } - - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); - } - - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCompilation_100Threads() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + /** + * Detects if running under QEMU emulation (set by CI workflow). Performance tests are skipped + * under QEMU as results are not representative. + */ + private static boolean isQemuEmulation() { + return "true".equals(System.getenv("QEMU_EMULATION")); + } + + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCompilation_100Threads() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - Pattern p = Pattern.compile("pattern" + threadId); - assertThat(p).isNotNull(); - assertThat(p.matches("pattern" + threadId)).isTrue(); + start.await(); + Pattern p = Pattern.compile("pattern" + threadId); + assertThat(p).isNotNull(); + assertThat(p.matches("pattern" + threadId)).isTrue(); } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); // Start all threads - done.await(); - - assertThat(errors.get()).isEqualTo(0); - - // Cache should have 100 patterns - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isEqualTo(100); - assertThat(stats.misses()).isEqualTo(100); + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCompilation_SamePattern_100Threads() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - ConcurrentHashMap uniqueInstances = new ConcurrentHashMap<>(); - - for (int i = 0; i < threadCount; i++) { - new Thread(() -> { + start.countDown(); // Start all threads + done.await(); + + assertThat(errors.get()).isEqualTo(0); + + // Cache should have 100 patterns + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isEqualTo(100); + assertThat(stats.misses()).isEqualTo(100); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCompilation_SamePattern_100Threads() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + ConcurrentHashMap uniqueInstances = new ConcurrentHashMap<>(); + + for (int i = 0; i < threadCount; i++) { + new Thread( + () -> { try { - start.await(); - Pattern p = Pattern.compile("same_pattern"); - uniqueInstances.put(p, true); - assertThat(p.matches("same_pattern")).isTrue(); + start.await(); + Pattern p = Pattern.compile("same_pattern"); + uniqueInstances.put(p, true); + assertThat(p.matches("same_pattern")).isTrue(); } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); - done.await(); - - assertThat(errors.get()).isEqualTo(0); - - // All threads should get same instance from cache - assertThat(uniqueInstances.size()).isEqualTo(1); - - // Cache stats: With lock-free implementation, multiple threads might see miss before first put completes - // The key is that only 1 pattern ends up in cache and total requests = 100 - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isEqualTo(1); - assertThat(stats.totalRequests()).isEqualTo(100); - // Most should be hits, but exact split depends on timing - // Skip hit rate assertion under QEMU (too slow for meaningful measurement) - assumeTrue(!isQemuEmulation(), "Skipping hit rate assertion under QEMU emulation"); - assertThat(stats.hits()).isGreaterThanOrEqualTo(90); + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCompilation_RepeatingPattern_100Threads() throws InterruptedException { - // This test verifies that concurrent compilation of the SAME patterns - // results in deduplication - only 3 patterns compiled, not 100. - // - // Key behavior with lock-free ConcurrentHashMap: - // - 100 threads call cache.get() simultaneously, all see null (miss) - // - All threads call computeIfAbsent(), but only 1 per key compiles - // - Metric: hits=0, misses=100 is VALID (all threads saw empty cache) - // - What matters: only 3 patterns in cache (deduplication works) - - int threadCount = 100; - String[] patterns = {"A", "B", "C"}; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + start.countDown(); + done.await(); + + assertThat(errors.get()).isEqualTo(0); + + // All threads should get same instance from cache + assertThat(uniqueInstances.size()).isEqualTo(1); + + // Cache stats: With lock-free implementation, multiple threads might see miss before first put + // completes + // The key is that only 1 pattern ends up in cache and total requests = 100 + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isEqualTo(1); + assertThat(stats.totalRequests()).isEqualTo(100); + // Most should be hits, but exact split depends on timing + // Skip hit rate assertion under QEMU (too slow for meaningful measurement) + assumeTrue(!isQemuEmulation(), "Skipping hit rate assertion under QEMU emulation"); + assertThat(stats.hits()).isGreaterThanOrEqualTo(90); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCompilation_RepeatingPattern_100Threads() throws InterruptedException { + // This test verifies that concurrent compilation of the SAME patterns + // results in deduplication - only 3 patterns compiled, not 100. + // + // Key behavior with lock-free ConcurrentHashMap: + // - 100 threads call cache.get() simultaneously, all see null (miss) + // - All threads call computeIfAbsent(), but only 1 per key compiles + // - Metric: hits=0, misses=100 is VALID (all threads saw empty cache) + // - What matters: only 3 patterns in cache (deduplication works) + + int threadCount = 100; + String[] patterns = {"A", "B", "C"}; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - String pattern = patterns[threadId % patterns.length]; - Pattern p = Pattern.compile(pattern); - assertThat(p.matches(pattern)).isTrue(); + start.await(); + String pattern = patterns[threadId % patterns.length]; + Pattern p = Pattern.compile(pattern); + assertThat(p.matches(pattern)).isTrue(); } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - CacheStatistics stats = Pattern.getCacheStatistics(); + CacheStatistics stats = Pattern.getCacheStatistics(); - // CRITICAL: Only 3 patterns compiled despite 100 concurrent requests - // This proves computeIfAbsent deduplication works - assertThat(stats.currentSize()).isEqualTo(3); + // CRITICAL: Only 3 patterns compiled despite 100 concurrent requests + // This proves computeIfAbsent deduplication works + assertThat(stats.currentSize()).isEqualTo(3); - // All requests processed - assertThat(stats.totalRequests()).isEqualTo(100); + // All requests processed + assertThat(stats.totalRequests()).isEqualTo(100); - // At minimum 3 misses (one per unique pattern) - // With racing threads, could be up to 100 misses (all threads saw empty cache) - assertThat(stats.misses()).isBetween(3L, 100L); + // At minimum 3 misses (one per unique pattern) + // With racing threads, could be up to 100 misses (all threads saw empty cache) + assertThat(stats.misses()).isBetween(3L, 100L); - // Total must equal 100 - assertThat(stats.hits() + stats.misses()).isEqualTo(100); - } + // Total must equal 100 + assertThat(stats.hits() + stats.misses()).isEqualTo(100); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentMatching_100Threads_SamePattern() throws InterruptedException { - Pattern p = Pattern.compile("test\\d+"); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentMatching_100Threads_SamePattern() throws InterruptedException { + Pattern p = Pattern.compile("test\\d+"); - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - AtomicInteger matchCount = new AtomicInteger(0); + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + AtomicInteger matchCount = new AtomicInteger(0); - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - try (Matcher m = p.matcher("test" + threadId)) { - if (m.matches()) { - matchCount.incrementAndGet(); - } + start.await(); + try (Matcher m = p.matcher("test" + threadId)) { + if (m.matches()) { + matchCount.incrementAndGet(); } + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); - assertThat(matchCount.get()).isEqualTo(100); + assertThat(errors.get()).isEqualTo(0); + assertThat(matchCount.get()).isEqualTo(100); - // After all matchers closed, refCount should be 0 - assertThat(p.getRefCount()).isEqualTo(0); - } + // After all matchers closed, refCount should be 0 + assertThat(p.getRefCount()).isEqualTo(0); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentMatching_100Threads_DifferentPatterns() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentMatching_100Threads_DifferentPatterns() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - Pattern p = Pattern.compile("pattern" + threadId); - try (Matcher m = p.matcher("pattern" + threadId)) { - assertThat(m.matches()).isTrue(); - } + start.await(); + Pattern p = Pattern.compile("pattern" + threadId); + try (Matcher m = p.matcher("pattern" + threadId)) { + assertThat(m.matches()).isTrue(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); - } + assertThat(errors.get()).isEqualTo(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCacheHitsAndMisses_100Threads() throws InterruptedException { + // Pre-compile some patterns + Pattern.compile("existing1"); + Pattern.compile("existing2"); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCacheHitsAndMisses_100Threads() throws InterruptedException { - // Pre-compile some patterns - Pattern.compile("existing1"); - Pattern.compile("existing2"); - - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - if (threadId % 3 == 0) { - // Hit: compile existing pattern - Pattern.compile("existing1"); - } else if (threadId % 3 == 1) { - // Hit: compile other existing pattern - Pattern.compile("existing2"); - } else { - // Miss: compile new pattern - Pattern.compile("new" + threadId); - } + start.await(); + if (threadId % 3 == 0) { + // Hit: compile existing pattern + Pattern.compile("existing1"); + } else if (threadId % 3 == 1) { + // Hit: compile other existing pattern + Pattern.compile("existing2"); + } else { + // Miss: compile new pattern + Pattern.compile("new" + threadId); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); - done.await(); - - assertThat(errors.get()).isEqualTo(0); - - // Verify cache metrics are accurate - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.totalRequests()).isEqualTo(100 + 2); // +2 from pre-compile - assertThat(stats.hits()).isGreaterThan(0); - assertThat(stats.misses()).isGreaterThan(0); + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCacheAndEviction_100Threads() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - // Compile many patterns to trigger LRU eviction - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + start.countDown(); + done.await(); + + assertThat(errors.get()).isEqualTo(0); + + // Verify cache metrics are accurate + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.totalRequests()).isEqualTo(100 + 2); // +2 from pre-compile + assertThat(stats.hits()).isGreaterThan(0); + assertThat(stats.misses()).isGreaterThan(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCacheAndEviction_100Threads() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + // Compile many patterns to trigger LRU eviction + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - // Each thread compiles 600 patterns = 60K total > 50K cache - for (int j = 0; j < 600; j++) { - Pattern p = Pattern.compile("thread" + threadId + "_pattern" + j); - assertThat(p).isNotNull(); - } + start.await(); + // Each thread compiles 600 patterns = 60K total > 50K cache + for (int j = 0; j < 600; j++) { + Pattern p = Pattern.compile("thread" + threadId + "_pattern" + j); + assertThat(p).isNotNull(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - // Wait for async LRU eviction to settle (must exceed evictionProtectionMs of 1 second) - Thread.sleep(1500); + // Wait for async LRU eviction to settle (must exceed evictionProtectionMs of 1 second) + Thread.sleep(1500); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // With soft limits, cache can temporarily exceed max but should settle back down - // Allow up to 20% overage due to concurrent async eviction timing - CacheStatistics stats = Pattern.getCacheStatistics(); - int maxAllowed = (int) (50000 * 1.2); - assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); - // Evictions should have occurred (LRU or deferred) - assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThan(0); - } + // With soft limits, cache can temporarily exceed max but should settle back down + // Allow up to 20% overage due to concurrent async eviction timing + CacheStatistics stats = Pattern.getCacheStatistics(); + int maxAllowed = (int) (50000 * 1.2); + assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); + // Evictions should have occurred (LRU or deferred) + assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThan(0); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrentCleanupIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrentCleanupIT.java index 5cca3aa..9577fe3 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrentCleanupIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ConcurrentCleanupIT.java @@ -1,165 +1,169 @@ package com.axonops.libre2.cache; -import com.axonops.libre2.api.Pattern; -import com.axonops.libre2.cache.CacheStatistics; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; +import static org.assertj.core.api.Assertions.*; +import com.axonops.libre2.api.Pattern; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for concurrent resource cleanup. - */ +/** Tests for concurrent resource cleanup. */ class ConcurrentCleanupIT { - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentClose_CacheDisabled_100Threads() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - Pattern[] patterns = new Pattern[threadCount]; - for (int i = 0; i < threadCount; i++) { - patterns[i] = Pattern.compileWithoutCache("test" + i); - } - - // All threads close simultaneously - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentClose_CacheDisabled_100Threads() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + Pattern[] patterns = new Pattern[threadCount]; + for (int i = 0; i < threadCount; i++) { + patterns[i] = Pattern.compileWithoutCache("test" + i); + } + + // All threads close simultaneously + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - patterns[threadId].close(); + start.await(); + patterns[threadId].close(); } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); - done.await(); - - assertThat(errors.get()).isEqualTo(0); - - // All should be closed - for (Pattern p : patterns) { - assertThat(p.isClosed()).isTrue(); - } + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentClose_RaceCondition() throws InterruptedException { - Pattern p = Pattern.compileWithoutCache("test"); + start.countDown(); + done.await(); - int threadCount = 10; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger closeCalls = new AtomicInteger(0); + assertThat(errors.get()).isEqualTo(0); - // 10 threads try to close same pattern simultaneously - for (int i = 0; i < threadCount; i++) { - new Thread(() -> { + // All should be closed + for (Pattern p : patterns) { + assertThat(p.isClosed()).isTrue(); + } + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentClose_RaceCondition() throws InterruptedException { + Pattern p = Pattern.compileWithoutCache("test"); + + int threadCount = 10; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger closeCalls = new AtomicInteger(0); + + // 10 threads try to close same pattern simultaneously + for (int i = 0; i < threadCount; i++) { + new Thread( + () -> { try { - start.await(); - p.close(); // Should be idempotent - closeCalls.incrementAndGet(); + start.await(); + p.close(); // Should be idempotent + closeCalls.incrementAndGet(); } catch (InterruptedException e) { - Thread.currentThread().interrupt(); + Thread.currentThread().interrupt(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - // All threads should have called close without exception - assertThat(closeCalls.get()).isEqualTo(threadCount); + // All threads should have called close without exception + assertThat(closeCalls.get()).isEqualTo(threadCount); - // Pattern should be closed exactly once (idempotent) - assertThat(p.isClosed()).isTrue(); - } + // Pattern should be closed exactly once (idempotent) + assertThat(p.isClosed()).isTrue(); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testLRUEvictionWithConcurrentUse() throws InterruptedException { - int threadCount = 50; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testLRUEvictionWithConcurrentUse() throws InterruptedException { + int threadCount = 50; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - // Each thread compiles 30 patterns (total 1500 > cache size) - for (int j = 0; j < 1100; j++) { - Pattern p = Pattern.compile("t" + threadId + "_p" + j); - assertThat(p.matches("t" + threadId + "_p" + j)).isTrue(); - } + start.await(); + // Each thread compiles 30 patterns (total 1500 > cache size) + for (int j = 0; j < 1100; j++) { + Pattern p = Pattern.compile("t" + threadId + "_p" + j); + assertThat(p.matches("t" + threadId + "_p" + j)).isTrue(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - // Wait for async eviction to complete (must exceed evictionProtectionMs of 1 second) - Thread.sleep(1500); + // Wait for async eviction to complete (must exceed evictionProtectionMs of 1 second) + Thread.sleep(1500); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // Cache should enforce soft size limit - CacheStatistics stats = Pattern.getCacheStatistics(); - // With soft limits, allow up to 20% overage during high concurrent load - int maxAllowed = (int) (50000 * 1.2); - assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); - // Some evictions should have occurred - assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThan(0); - } + // Cache should enforce soft size limit + CacheStatistics stats = Pattern.getCacheStatistics(); + // With soft limits, allow up to 20% overage during high concurrent load + int maxAllowed = (int) (50000 * 1.2); + assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); + // Some evictions should have occurred + assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThan(0); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentForceClose_100Threads() throws InterruptedException { - Pattern p = Pattern.compile("test"); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentForceClose_100Threads() throws InterruptedException { + Pattern p = Pattern.compile("test"); - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); - for (int i = 0; i < threadCount; i++) { - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + new Thread( + () -> { try { - start.await(); - p.forceClose(); // All try to force close simultaneously + start.await(); + p.forceClose(); // All try to force close simultaneously } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); - assertThat(p.isClosed()).isTrue(); // Closed exactly once - } + assertThat(errors.get()).isEqualTo(0); + assertThat(p.isClosed()).isTrue(); // Closed exactly once + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/DeferredCleanupTimingIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/DeferredCleanupTimingIT.java index e268c8c..8eda799 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/DeferredCleanupTimingIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/DeferredCleanupTimingIT.java @@ -1,136 +1,135 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.*; - /** * Tests for frequent deferred cleanup timing (every 5 seconds). * - * Verifies deferred patterns freed quickly, not waiting for 60s idle scan. + *

Verifies deferred patterns freed quickly, not waiting for 60s idle scan. */ class DeferredCleanupTimingIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testDeferredCleanupRunsFrequently() throws InterruptedException { + // Create patterns with active matchers + List matchers = new ArrayList<>(); + + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compile("pattern" + i); + Matcher m = p.matcher("test"); + matchers.add(m); } - @AfterEach - void tearDown() { - Pattern.resetCache(); + // Trigger evictions (compile more patterns) + for (int i = 10; i < 100; i++) { + Pattern.compile("trigger" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testDeferredCleanupRunsFrequently() throws InterruptedException { - // Create patterns with active matchers - List matchers = new ArrayList<>(); - - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compile("pattern" + i); - Matcher m = p.matcher("test"); - matchers.add(m); - } - - // Trigger evictions (compile more patterns) - for (int i = 10; i < 100; i++) { - Pattern.compile("trigger" + i); - } - - // Check if any patterns in deferred cleanup - CacheStatistics beforeClose = Pattern.getCacheStatistics(); - int deferredBefore = beforeClose.deferredCleanupPending(); + // Check if any patterns in deferred cleanup + CacheStatistics beforeClose = Pattern.getCacheStatistics(); + int deferredBefore = beforeClose.deferredCleanupPending(); - if (deferredBefore > 0) { - // We have deferred patterns - close matchers to make them freeable - for (Matcher m : matchers) { - m.close(); - } + if (deferredBefore > 0) { + // We have deferred patterns - close matchers to make them freeable + for (Matcher m : matchers) { + m.close(); + } - // Wait 6 seconds (cleanup runs every 5s, so should happen within 6s) - Thread.sleep(6000); + // Wait 6 seconds (cleanup runs every 5s, so should happen within 6s) + Thread.sleep(6000); - // Check if deferred list was cleaned - CacheStatistics afterWait = Pattern.getCacheStatistics(); - int deferredAfter = afterWait.deferredCleanupPending(); + // Check if deferred list was cleaned + CacheStatistics afterWait = Pattern.getCacheStatistics(); + int deferredAfter = afterWait.deferredCleanupPending(); - // Deferred list should be smaller (patterns freed) - assertThat(deferredAfter).isLessThanOrEqualTo(deferredBefore); + // Deferred list should be smaller (patterns freed) + assertThat(deferredAfter).isLessThanOrEqualTo(deferredBefore); - // If we had deferred patterns and waited 6s, they should be cleaned - // (This verifies cleanup runs every 5s, not every 60s) - } + // If we had deferred patterns and waited 6s, they should be cleaned + // (This verifies cleanup runs every 5s, not every 60s) + } + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testDeferredCleanupFasterThanIdleEviction() throws InterruptedException { + // Create pattern with matcher + Pattern p = Pattern.compile("test_pattern"); + Matcher m = p.matcher("test"); + + // Trigger eviction + for (int i = 0; i < 200; i++) { + Pattern.compile("evict_trigger_" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testDeferredCleanupFasterThanIdleEviction() throws InterruptedException { - // Create pattern with matcher - Pattern p = Pattern.compile("test_pattern"); - Matcher m = p.matcher("test"); - - // Trigger eviction - for (int i = 0; i < 200; i++) { - Pattern.compile("evict_trigger_" + i); - } - - CacheStatistics stats = Pattern.getCacheStatistics(); - int deferredCount = stats.deferredCleanupPending(); - - if (deferredCount > 0) { - // We have deferred patterns - // Close matcher (makes pattern freeable) - m.close(); + CacheStatistics stats = Pattern.getCacheStatistics(); + int deferredCount = stats.deferredCleanupPending(); - long start = System.currentTimeMillis(); + if (deferredCount > 0) { + // We have deferred patterns + // Close matcher (makes pattern freeable) + m.close(); - // Wait for cleanup (should happen within 5-6 seconds) - for (int i = 0; i < 12; i++) { // Wait up to 12 seconds - Thread.sleep(500); + long start = System.currentTimeMillis(); - CacheStatistics current = Pattern.getCacheStatistics(); - if (current.deferredCleanupPending() < deferredCount) { - // Cleanup happened! - long duration = System.currentTimeMillis() - start; + // Wait for cleanup (should happen within 5-6 seconds) + for (int i = 0; i < 12; i++) { // Wait up to 12 seconds + Thread.sleep(500); - // Should happen in < 10 seconds (not 60 seconds) - assertThat(duration).isLessThan(10000); - return; - } - } + CacheStatistics current = Pattern.getCacheStatistics(); + if (current.deferredCleanupPending() < deferredCount) { + // Cleanup happened! + long duration = System.currentTimeMillis() - start; - // If we get here, cleanup didn't happen in 12s (acceptable - depends on timing) + // Should happen in < 10 seconds (not 60 seconds) + assertThat(duration).isLessThan(10000); + return; } + } + + // If we get here, cleanup didn't happen in 12s (acceptable - depends on timing) } + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testIdleEvictionStillRunsPeriodically() throws InterruptedException { - // This test verifies idle eviction hasn't broken - // (Still runs every 60s as configured) + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testIdleEvictionStillRunsPeriodically() throws InterruptedException { + // This test verifies idle eviction hasn't broken + // (Still runs every 60s as configured) - // Compile patterns (no matchers - can be idle-evicted) - for (int i = 0; i < 10; i++) { - Pattern.compile("idle_test_" + i); - } + // Compile patterns (no matchers - can be idle-evicted) + for (int i = 0; i < 10; i++) { + Pattern.compile("idle_test_" + i); + } - // With default 300s idle timeout, patterns won't be evicted in this test - // But we verify the background thread is running - Thread.sleep(100); // Brief wait + // With default 300s idle timeout, patterns won't be evicted in this test + // But we verify the background thread is running + Thread.sleep(100); // Brief wait - // Background thread should be running - // (We can't easily test 60s cycle in unit test) - // This test just verifies we didn't break idle eviction - assertThat(Pattern.getCacheStatistics().currentSize()).isGreaterThan(0); - } + // Background thread should be running + // (We can't easily test 60s cycle in unit test) + // This test just verifies we didn't break idle eviction + assertThat(Pattern.getCacheStatistics().currentSize()).isGreaterThan(0); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionEdgeCasesIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionEdgeCasesIT.java index e04ad0c..e4cc4d1 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionEdgeCasesIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionEdgeCasesIT.java @@ -1,169 +1,166 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; +import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.*; - -/** - * Edge case tests for LRU and idle eviction behavior. - */ +/** Edge case tests for LRU and idle eviction behavior. */ class EvictionEdgeCasesIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testLRUEviction_LastAccessTimeUpdated() { + // Compile patterns in order + Pattern p1 = Pattern.compile("pattern1"); + Pattern p2 = Pattern.compile("pattern2"); + Pattern p3 = Pattern.compile("pattern3"); + + // Access p1 again (updates its position in LRU) + Pattern p1Again = Pattern.compile("pattern1"); // Cache hit + + assertThat(p1Again).isSameAs(p1); // Same instance from cache + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.hits()).isEqualTo(1); + assertThat(stats.currentSize()).isEqualTo(3); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testLRUEviction_MultipleEvictions() throws InterruptedException { + int cacheSize = 50000; + + // Add exactly cache size + for (int i = 0; i < cacheSize; i++) { + Pattern.compile("pattern" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testLRUEviction_LastAccessTimeUpdated() { - // Compile patterns in order - Pattern p1 = Pattern.compile("pattern1"); - Pattern p2 = Pattern.compile("pattern2"); - Pattern p3 = Pattern.compile("pattern3"); - - // Access p1 again (updates its position in LRU) - Pattern p1Again = Pattern.compile("pattern1"); // Cache hit + CacheStatistics before = Pattern.getCacheStatistics(); + assertThat(before.currentSize()).isEqualTo(cacheSize); + assertThat(before.evictionsLRU()).isEqualTo(0); - assertThat(p1Again).isSameAs(p1); // Same instance from cache - - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.hits()).isEqualTo(1); - assertThat(stats.currentSize()).isEqualTo(3); + // Add 100 more - should trigger async LRU evictions + for (int i = cacheSize; i < cacheSize + 100; i++) { + Pattern.compile("pattern" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testLRUEviction_MultipleEvictions() throws InterruptedException { - int cacheSize = 50000; - - // Add exactly cache size - for (int i = 0; i < cacheSize; i++) { - Pattern.compile("pattern" + i); - } - - CacheStatistics before = Pattern.getCacheStatistics(); - assertThat(before.currentSize()).isEqualTo(cacheSize); - assertThat(before.evictionsLRU()).isEqualTo(0); - - // Add 100 more - should trigger async LRU evictions - for (int i = cacheSize; i < cacheSize + 100; i++) { - Pattern.compile("pattern" + i); - } - - // Wait for async eviction to complete - Thread.sleep(200); - - CacheStatistics after = Pattern.getCacheStatistics(); - // With soft limits, cache can temporarily exceed max - // Allow up to 10% overage - int maxAllowed = (int) (cacheSize * 1.1); - assertThat(after.currentSize()).isLessThanOrEqualTo(maxAllowed); - // Some evictions should have occurred - assertThat(after.evictionsLRU() + after.evictionsDeferred()).isGreaterThanOrEqualTo(0); + // Wait for async eviction to complete + Thread.sleep(200); + + CacheStatistics after = Pattern.getCacheStatistics(); + // With soft limits, cache can temporarily exceed max + // Allow up to 10% overage + int maxAllowed = (int) (cacheSize * 1.1); + assertThat(after.currentSize()).isLessThanOrEqualTo(maxAllowed); + // Some evictions should have occurred + assertThat(after.evictionsLRU() + after.evictionsDeferred()).isGreaterThanOrEqualTo(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testEvictionWithActiveMatchers() { + // Compile patterns up to cache limit + for (int i = 0; i < 1000; i++) { + Pattern.compile("filler" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testEvictionWithActiveMatchers() { - // Compile patterns up to cache limit - for (int i = 0; i < 1000; i++) { - Pattern.compile("filler" + i); - } + // Compile one more pattern with active matcher + Pattern p = Pattern.compile("important"); + Matcher m = p.matcher("test"); - // Compile one more pattern with active matcher - Pattern p = Pattern.compile("important"); - Matcher m = p.matcher("test"); + assertThat(p.getRefCount()).isEqualTo(1); // Matcher holding reference - assertThat(p.getRefCount()).isEqualTo(1); // Matcher holding reference + // Try to trigger eviction + for (int i = 0; i < 100; i++) { + Pattern.compile("new" + i); + } - // Try to trigger eviction - for (int i = 0; i < 100; i++) { - Pattern.compile("new" + i); - } + // Pattern with active matcher might still be in cache (not evicted) + // Or if evicted, it's not freed due to refCount + assertThat(p.isClosed()).isFalse(); // Not freed while matcher active - // Pattern with active matcher might still be in cache (not evicted) - // Or if evicted, it's not freed due to refCount - assertThat(p.isClosed()).isFalse(); // Not freed while matcher active + m.close(); + assertThat(p.getRefCount()).isEqualTo(0); + } - m.close(); - assertThat(p.getRefCount()).isEqualTo(0); - } + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testCacheClearWithActiveMatchers() { + Pattern p = Pattern.compile("test"); + Matcher m = p.matcher("test"); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testCacheClearWithActiveMatchers() { - Pattern p = Pattern.compile("test"); - Matcher m = p.matcher("test"); + assertThat(p.getRefCount()).isEqualTo(1); - assertThat(p.getRefCount()).isEqualTo(1); + // Clear cache (calls forceClose on all patterns) + Pattern.clearCache(); - // Clear cache (calls forceClose on all patterns) - Pattern.clearCache(); + // Pattern should NOT be closed (matcher still active) + assertThat(p.isClosed()).isFalse(); - // Pattern should NOT be closed (matcher still active) - assertThat(p.isClosed()).isFalse(); + // Matcher should still work + assertThat(m.matches()).isTrue(); - // Matcher should still work - assertThat(m.matches()).isTrue(); + m.close(); - m.close(); + // Now pattern can be closed + p.forceClose(); + assertThat(p.isClosed()).isTrue(); + } - // Now pattern can be closed - p.forceClose(); - assertThat(p.isClosed()).isTrue(); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testEvictionDeterministic() { + // Add patterns in specific order + for (int i = 0; i < 10; i++) { + Pattern.compile("pattern" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testEvictionDeterministic() { - // Add patterns in specific order - for (int i = 0; i < 10; i++) { - Pattern.compile("pattern" + i); - } - - // Access patterns in different order - Pattern.compile("pattern5"); // Hit - Pattern.compile("pattern2"); // Hit - Pattern.compile("pattern8"); // Hit + // Access patterns in different order + Pattern.compile("pattern5"); // Hit + Pattern.compile("pattern2"); // Hit + Pattern.compile("pattern8"); // Hit - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.hits()).isEqualTo(3); + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.hits()).isEqualTo(3); - // LRU order is deterministic based on access pattern - assertThat(stats.currentSize()).isEqualTo(10); - } + // LRU order is deterministic based on access pattern + assertThat(stats.currentSize()).isEqualTo(10); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testCaseSensitivityInEviction() { - // Same pattern string, different case sensitivity - Pattern p1 = Pattern.compile("TEST", true); // Case-sensitive - Pattern p2 = Pattern.compile("TEST", false); // Case-insensitive + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testCaseSensitivityInEviction() { + // Same pattern string, different case sensitivity + Pattern p1 = Pattern.compile("TEST", true); // Case-sensitive + Pattern p2 = Pattern.compile("TEST", false); // Case-insensitive - // Should be separate cache entries - assertThat(p1).isNotSameAs(p2); + // Should be separate cache entries + assertThat(p1).isNotSameAs(p2); - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isEqualTo(2); + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isEqualTo(2); - // Both should survive eviction (different keys) - Pattern.compile("TEST", true); // Hit on p1 - Pattern.compile("TEST", false); // Hit on p2 + // Both should survive eviction (different keys) + Pattern.compile("TEST", true); // Hit on p1 + Pattern.compile("TEST", false); // Hit on p2 - stats = Pattern.getCacheStatistics(); - assertThat(stats.hits()).isEqualTo(2); - } + stats = Pattern.getCacheStatistics(); + assertThat(stats.hits()).isEqualTo(2); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionWhileInUseIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionWhileInUseIT.java index f5de45f..cc4a031 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionWhileInUseIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/EvictionWhileInUseIT.java @@ -1,214 +1,216 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; +import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.*; - /** * CRITICAL TESTS: Verifies patterns can be evicted while in use without crashing. * - * These tests verify the reference counting mechanism prevents use-after-free bugs. + *

These tests verify the reference counting mechanism prevents use-after-free bugs. */ class EvictionWhileInUseIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); - } - - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testReferenceCountIncrements() { - Pattern p = Pattern.compile("test"); - - // Initially refCount should be 0 - assertThat(p.getRefCount()).isEqualTo(0); - - // Create matcher - refCount should increment - Matcher m = p.matcher("test"); - assertThat(p.getRefCount()).isEqualTo(1); - - // Create another matcher - refCount should increment again - Matcher m2 = p.matcher("test"); - assertThat(p.getRefCount()).isEqualTo(2); - - // Close first matcher - refCount decrements - m.close(); - assertThat(p.getRefCount()).isEqualTo(1); - - // Close second matcher - refCount back to 0 - m2.close(); - assertThat(p.getRefCount()).isEqualTo(0); + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testReferenceCountIncrements() { + Pattern p = Pattern.compile("test"); + + // Initially refCount should be 0 + assertThat(p.getRefCount()).isEqualTo(0); + + // Create matcher - refCount should increment + Matcher m = p.matcher("test"); + assertThat(p.getRefCount()).isEqualTo(1); + + // Create another matcher - refCount should increment again + Matcher m2 = p.matcher("test"); + assertThat(p.getRefCount()).isEqualTo(2); + + // Close first matcher - refCount decrements + m.close(); + assertThat(p.getRefCount()).isEqualTo(1); + + // Close second matcher - refCount back to 0 + m2.close(); + assertThat(p.getRefCount()).isEqualTo(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testPatternNotFreedWhileMatcherActive() { + Pattern p = Pattern.compile("test"); + + // Create matcher (refCount = 1) + Matcher m = p.matcher("test"); + assertThat(p.getRefCount()).isEqualTo(1); + + // Try to force close pattern (should be deferred due to refCount) + p.forceClose(); + + // Pattern should NOT be closed + assertThat(p.isClosed()).isFalse(); + + // Matcher should still work + assertThat(m.matches()).isTrue(); + + // Close matcher + m.close(); + assertThat(p.getRefCount()).isEqualTo(0); + + // Now force close should work + p.forceClose(); + assertThat(p.isClosed()).isTrue(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testMultipleMatchersPreventEviction() { + Pattern p = Pattern.compile("test"); + + // Create 10 matchers + Matcher[] matchers = new Matcher[10]; + for (int i = 0; i < 10; i++) { + matchers[i] = p.matcher("test"); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testPatternNotFreedWhileMatcherActive() { - Pattern p = Pattern.compile("test"); - - // Create matcher (refCount = 1) - Matcher m = p.matcher("test"); - assertThat(p.getRefCount()).isEqualTo(1); - - // Try to force close pattern (should be deferred due to refCount) - p.forceClose(); - - // Pattern should NOT be closed - assertThat(p.isClosed()).isFalse(); - - // Matcher should still work - assertThat(m.matches()).isTrue(); - - // Close matcher - m.close(); - assertThat(p.getRefCount()).isEqualTo(0); + assertThat(p.getRefCount()).isEqualTo(10); - // Now force close should work - p.forceClose(); - assertThat(p.isClosed()).isTrue(); - } + // Try to evict - should fail due to refCount + p.forceClose(); + assertThat(p.isClosed()).isFalse(); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testMultipleMatchersPreventEviction() { - Pattern p = Pattern.compile("test"); - - // Create 10 matchers - Matcher[] matchers = new Matcher[10]; - for (int i = 0; i < 10; i++) { - matchers[i] = p.matcher("test"); - } - - assertThat(p.getRefCount()).isEqualTo(10); - - // Try to evict - should fail due to refCount - p.forceClose(); - assertThat(p.isClosed()).isFalse(); - - // Close matchers one by one - for (int i = 0; i < 9; i++) { - matchers[i].close(); - assertThat(p.getRefCount()).isEqualTo(10 - i - 1); - assertThat(p.isClosed()).isFalse(); // Still in use - } - - // Close last matcher - matchers[9].close(); - assertThat(p.getRefCount()).isEqualTo(0); - - // Now eviction can succeed - p.forceClose(); - assertThat(p.isClosed()).isTrue(); + // Close matchers one by one + for (int i = 0; i < 9; i++) { + matchers[i].close(); + assertThat(p.getRefCount()).isEqualTo(10 - i - 1); + assertThat(p.isClosed()).isFalse(); // Still in use } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentMatchersOnSamePattern() throws InterruptedException { - Pattern p = Pattern.compile("test"); - - int threadCount = 100; - java.util.concurrent.CountDownLatch start = new java.util.concurrent.CountDownLatch(1); - java.util.concurrent.CountDownLatch done = new java.util.concurrent.CountDownLatch(threadCount); - java.util.concurrent.atomic.AtomicInteger errors = new java.util.concurrent.atomic.AtomicInteger(0); - - // 100 threads all create matchers on same pattern simultaneously - for (int i = 0; i < threadCount; i++) { - new Thread(() -> { + // Close last matcher + matchers[9].close(); + assertThat(p.getRefCount()).isEqualTo(0); + + // Now eviction can succeed + p.forceClose(); + assertThat(p.isClosed()).isTrue(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentMatchersOnSamePattern() throws InterruptedException { + Pattern p = Pattern.compile("test"); + + int threadCount = 100; + java.util.concurrent.CountDownLatch start = new java.util.concurrent.CountDownLatch(1); + java.util.concurrent.CountDownLatch done = new java.util.concurrent.CountDownLatch(threadCount); + java.util.concurrent.atomic.AtomicInteger errors = + new java.util.concurrent.atomic.AtomicInteger(0); + + // 100 threads all create matchers on same pattern simultaneously + for (int i = 0; i < threadCount; i++) { + new Thread( + () -> { try { - start.await(); // Wait for signal - try (Matcher m = p.matcher("test")) { - boolean matches = m.matches(); - assertThat(matches).isTrue(); - } + start.await(); // Wait for signal + try (Matcher m = p.matcher("test")) { + boolean matches = m.matches(); + assertThat(matches).isTrue(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - // Signal all threads to start simultaneously - start.countDown(); + // Signal all threads to start simultaneously + start.countDown(); - // Wait for all to complete - done.await(); + // Wait for all to complete + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // After all matchers closed, refCount should be 0 - assertThat(p.getRefCount()).isEqualTo(0); - } + // After all matchers closed, refCount should be 0 + assertThat(p.getRefCount()).isEqualTo(0); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testEvictionDeferredWhileInUse() { - // Compile pattern - Pattern p = Pattern.compile("test"); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testEvictionDeferredWhileInUse() { + // Compile pattern + Pattern p = Pattern.compile("test"); - // Create matcher (refCount = 1) - Matcher m = p.matcher("test"); + // Create matcher (refCount = 1) + Matcher m = p.matcher("test"); - // Trigger eviction while matcher active - Pattern.clearCache(); // This calls forceClose() on all cached patterns + // Trigger eviction while matcher active + Pattern.clearCache(); // This calls forceClose() on all cached patterns - // Pattern should NOT be closed (matcher still using it) - assertThat(p.isClosed()).isFalse(); + // Pattern should NOT be closed (matcher still using it) + assertThat(p.isClosed()).isFalse(); - // Matcher should still work - assertThat(m.matches()).isTrue(); + // Matcher should still work + assertThat(m.matches()).isTrue(); - // Close matcher - m.close(); + // Close matcher + m.close(); - // Pattern can now be closed - p.forceClose(); - assertThat(p.isClosed()).isTrue(); - } + // Pattern can now be closed + p.forceClose(); + assertThat(p.isClosed()).isTrue(); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testPatternRecompiledWhileOldStillInUse() { - // Clear cache first - Pattern.resetCache(); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testPatternRecompiledWhileOldStillInUse() { + // Clear cache first + Pattern.resetCache(); - // Compile and get pattern from cache - Pattern p1 = Pattern.compile("test"); - Matcher m1 = p1.matcher("test"); + // Compile and get pattern from cache + Pattern p1 = Pattern.compile("test"); + Matcher m1 = p1.matcher("test"); - // Force evict (but won't actually close due to refCount) - Pattern.clearCache(); + // Force evict (but won't actually close due to refCount) + Pattern.clearCache(); - // Compile same pattern again - should create NEW instance (cache was cleared) - Pattern p2 = Pattern.compile("test"); + // Compile same pattern again - should create NEW instance (cache was cleared) + Pattern p2 = Pattern.compile("test"); - // p1 and p2 should be different instances (cache was cleared) - assertThat(p1).isNotSameAs(p2); + // p1 and p2 should be different instances (cache was cleared) + assertThat(p1).isNotSameAs(p2); - // Both should work independently - assertThat(m1.matches()).isTrue(); - assertThat(p2.matches("test")).isTrue(); + // Both should work independently + assertThat(m1.matches()).isTrue(); + assertThat(p2.matches("test")).isTrue(); - // Close matcher on p1 - m1.close(); + // Close matcher on p1 + m1.close(); - // Now p1 can be freed - p1.forceClose(); - assertThat(p1.isClosed()).isTrue(); + // Now p1 can be freed + p1.forceClose(); + assertThat(p1.isClosed()).isTrue(); - // p2 should still work - assertThat(p2.matches("test")).isTrue(); - } + // p2 should still work + assertThat(p2.matches("test")).isTrue(); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/IdleEvictionIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/IdleEvictionIT.java index 28b09ff..8ae2997 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/IdleEvictionIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/IdleEvictionIT.java @@ -1,116 +1,114 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Pattern; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import java.time.Duration; - -import static org.assertj.core.api.Assertions.*; - /** * Tests for idle eviction functionality. * - * CRITICAL: These tests verify patterns are actually evicted after idle timeout. + *

CRITICAL: These tests verify patterns are actually evicted after idle timeout. */ class IdleEvictionIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + void testIdleEvictionActuallyEvictsPatterns() throws InterruptedException { + // Compile a pattern + Pattern.compile("test"); + + CacheStatistics before = Pattern.getCacheStatistics(); + assertThat(before.currentSize()).isEqualTo(1); + assertThat(before.evictionsIdle()).isEqualTo(0); + + // Wait for idle timeout + scan interval + // Default: 300s idle + 60s scan = too long for test + // So this test verifies the mechanism, real timeout tested separately + Thread.sleep(100); // Small delay + + // Pattern should still be there (not enough time elapsed) + CacheStatistics after = Pattern.getCacheStatistics(); + assertThat(after.currentSize()).isEqualTo(1); + } + + @Test + void testIdleEvictionWithShortTimeout() throws InterruptedException { + // Note: We can't easily test with custom config since cache is static + // This test documents the behavior with default config + + // Compile patterns + Pattern.compile("p1"); + Pattern.compile("p2"); + Pattern.compile("p3"); + + assertThat(Pattern.getCacheStatistics().currentSize()).isEqualTo(3); + + // In production with 300s timeout, these would be evicted after 300s + // For testing, we verify cache structure is correct + assertThat(Pattern.getCacheStatistics().evictionsIdle()).isEqualTo(0); + } + + @Test + void testEvictionThreadStartsAutomatically() throws InterruptedException { + // Verify eviction thread is running + // Compile a pattern to ensure cache is initialized + Pattern.compile("test"); + + // Give thread time to start + Thread.sleep(100); + + // Thread should be running (we can't directly access it, but no exceptions is good) + assertThat(Pattern.getCacheStatistics().currentSize()).isEqualTo(1); + } + + @Test + void testEvictionsCountedSeparately() { + // LRU evictions are counted separately from idle evictions + + // Start with clean state + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.evictionsLRU()).isEqualTo(0); + assertThat(stats.evictionsIdle()).isEqualTo(0); + + // Currently we can't easily trigger evictions in tests + // due to static cache, but we verify the counters exist + assertThat(stats.totalEvictions()).isEqualTo(0); + } + + @Test + void testCacheStatisticsTrackEvictions() throws InterruptedException { + // Compile enough patterns to trigger LRU eviction + int maxSize = 50000; // Default + for (int i = 0; i < maxSize + 100; i++) { + Pattern.compile("pattern" + i); } - @Test - void testIdleEvictionActuallyEvictsPatterns() throws InterruptedException { - // Compile a pattern - Pattern.compile("test"); + // Wait for async eviction to complete + Thread.sleep(200); - CacheStatistics before = Pattern.getCacheStatistics(); - assertThat(before.currentSize()).isEqualTo(1); - assertThat(before.evictionsIdle()).isEqualTo(0); + CacheStatistics stats = Pattern.getCacheStatistics(); - // Wait for idle timeout + scan interval - // Default: 300s idle + 60s scan = too long for test - // So this test verifies the mechanism, real timeout tested separately - Thread.sleep(100); // Small delay - - // Pattern should still be there (not enough time elapsed) - CacheStatistics after = Pattern.getCacheStatistics(); - assertThat(after.currentSize()).isEqualTo(1); - } - - @Test - void testIdleEvictionWithShortTimeout() throws InterruptedException { - // Note: We can't easily test with custom config since cache is static - // This test documents the behavior with default config - - // Compile patterns - Pattern.compile("p1"); - Pattern.compile("p2"); - Pattern.compile("p3"); - - assertThat(Pattern.getCacheStatistics().currentSize()).isEqualTo(3); - - // In production with 300s timeout, these would be evicted after 300s - // For testing, we verify cache structure is correct - assertThat(Pattern.getCacheStatistics().evictionsIdle()).isEqualTo(0); - } + // With soft limits, cache can temporarily exceed max + // Allow up to 10% overage + int maxAllowed = (int) (maxSize * 1.1); + assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); - @Test - void testEvictionThreadStartsAutomatically() throws InterruptedException { - // Verify eviction thread is running - // Compile a pattern to ensure cache is initialized - Pattern.compile("test"); + // Some evictions should have occurred + assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThanOrEqualTo(0); - // Give thread time to start - Thread.sleep(100); - - // Thread should be running (we can't directly access it, but no exceptions is good) - assertThat(Pattern.getCacheStatistics().currentSize()).isEqualTo(1); - } - - @Test - void testEvictionsCountedSeparately() { - // LRU evictions are counted separately from idle evictions - - // Start with clean state - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.evictionsLRU()).isEqualTo(0); - assertThat(stats.evictionsIdle()).isEqualTo(0); - - // Currently we can't easily trigger evictions in tests - // due to static cache, but we verify the counters exist - assertThat(stats.totalEvictions()).isEqualTo(0); - } - - @Test - void testCacheStatisticsTrackEvictions() throws InterruptedException { - // Compile enough patterns to trigger LRU eviction - int maxSize = 50000; // Default - for (int i = 0; i < maxSize + 100; i++) { - Pattern.compile("pattern" + i); - } - - // Wait for async eviction to complete - Thread.sleep(200); - - CacheStatistics stats = Pattern.getCacheStatistics(); - - // With soft limits, cache can temporarily exceed max - // Allow up to 10% overage - int maxAllowed = (int) (maxSize * 1.1); - assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); - - // Some evictions should have occurred - assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThanOrEqualTo(0); - - // Total evictions should include all types - assertThat(stats.totalEvictions()).isGreaterThanOrEqualTo(0); - } + // Total evictions should include all types + assertThat(stats.totalEvictions()).isGreaterThanOrEqualTo(0); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/NativeMemoryTrackingIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/NativeMemoryTrackingIT.java index e532995..8bc8508 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/NativeMemoryTrackingIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/NativeMemoryTrackingIT.java @@ -16,336 +16,337 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Pattern; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.assertj.core.api.Assertions.*; - /** * Comprehensive tests for native memory tracking in the pattern cache. * - * Tests verify that off-heap memory usage is accurately tracked as patterns - * are compiled, cached, and evicted. + *

Tests verify that off-heap memory usage is accurately tracked as patterns are compiled, + * cached, and evicted. */ class NativeMemoryTrackingIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); - } - - @Test - void testPattern_ReportsNonZeroMemory() { - Pattern p = Pattern.compile("test"); - - assertThat(p.getNativeMemoryBytes()).isGreaterThan(0); - } - - @Test - void testPattern_SimplePatternMemory() { - Pattern p = Pattern.compile("hello"); - - // Simple patterns should be reasonably small - assertThat(p.getNativeMemoryBytes()).isGreaterThan(0); - assertThat(p.getNativeMemoryBytes()).isLessThan(10000); // < 10KB - } - - @Test - void testPattern_ComplexPatternUsesMoreMemory() { - Pattern simple = Pattern.compile("a"); - Pattern complex = Pattern.compile("(\\w+|\\d+|[a-z]{10,50}){0,20}"); - - // Complex patterns compile to larger DFA/NFA programs - assertThat(complex.getNativeMemoryBytes()).isGreaterThan(simple.getNativeMemoryBytes()); - } - - @Test - void testPattern_UncachedPatternReportsMemory() { - Pattern p = Pattern.compileWithoutCache("test"); - try { - assertThat(p.getNativeMemoryBytes()).isGreaterThan(0); - } finally { - p.close(); - } + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + void testPattern_ReportsNonZeroMemory() { + Pattern p = Pattern.compile("test"); + + assertThat(p.getNativeMemoryBytes()).isGreaterThan(0); + } + + @Test + void testPattern_SimplePatternMemory() { + Pattern p = Pattern.compile("hello"); + + // Simple patterns should be reasonably small + assertThat(p.getNativeMemoryBytes()).isGreaterThan(0); + assertThat(p.getNativeMemoryBytes()).isLessThan(10000); // < 10KB + } + + @Test + void testPattern_ComplexPatternUsesMoreMemory() { + Pattern simple = Pattern.compile("a"); + Pattern complex = Pattern.compile("(\\w+|\\d+|[a-z]{10,50}){0,20}"); + + // Complex patterns compile to larger DFA/NFA programs + assertThat(complex.getNativeMemoryBytes()).isGreaterThan(simple.getNativeMemoryBytes()); + } + + @Test + void testPattern_UncachedPatternReportsMemory() { + Pattern p = Pattern.compileWithoutCache("test"); + try { + assertThat(p.getNativeMemoryBytes()).isGreaterThan(0); + } finally { + p.close(); } - - @Test - void testCache_TracksTotalMemory() { - // Compile first pattern - Pattern p1 = Pattern.compile("pattern1"); - long p1Memory = p1.getNativeMemoryBytes(); - CacheStatistics stats1 = Pattern.getCacheStatistics(); - - assertThat(stats1.nativeMemoryBytes()).isEqualTo(p1Memory); - - // Compile second pattern - Pattern p2 = Pattern.compile("pattern2"); - long p2Memory = p2.getNativeMemoryBytes(); - CacheStatistics stats2 = Pattern.getCacheStatistics(); - - assertThat(stats2.nativeMemoryBytes()).isEqualTo(p1Memory + p2Memory); - } - - @Test - void testCache_MemoryIncreasesWithPatterns() { - CacheStatistics before = Pattern.getCacheStatistics(); - assertThat(before.nativeMemoryBytes()).isEqualTo(0); - - // Compile 10 patterns and track exact memory - long expectedMemory = 0; - Pattern[] patterns = new Pattern[10]; - for (int i = 0; i < 10; i++) { - patterns[i] = Pattern.compile("pattern" + i); - expectedMemory += patterns[i].getNativeMemoryBytes(); - } - - CacheStatistics after = Pattern.getCacheStatistics(); - assertThat(after.nativeMemoryBytes()).isEqualTo(expectedMemory); - assertThat(after.currentSize()).isEqualTo(10); + } + + @Test + void testCache_TracksTotalMemory() { + // Compile first pattern + Pattern p1 = Pattern.compile("pattern1"); + long p1Memory = p1.getNativeMemoryBytes(); + CacheStatistics stats1 = Pattern.getCacheStatistics(); + + assertThat(stats1.nativeMemoryBytes()).isEqualTo(p1Memory); + + // Compile second pattern + Pattern p2 = Pattern.compile("pattern2"); + long p2Memory = p2.getNativeMemoryBytes(); + CacheStatistics stats2 = Pattern.getCacheStatistics(); + + assertThat(stats2.nativeMemoryBytes()).isEqualTo(p1Memory + p2Memory); + } + + @Test + void testCache_MemoryIncreasesWithPatterns() { + CacheStatistics before = Pattern.getCacheStatistics(); + assertThat(before.nativeMemoryBytes()).isEqualTo(0); + + // Compile 10 patterns and track exact memory + long expectedMemory = 0; + Pattern[] patterns = new Pattern[10]; + for (int i = 0; i < 10; i++) { + patterns[i] = Pattern.compile("pattern" + i); + expectedMemory += patterns[i].getNativeMemoryBytes(); } - @Test - void testCache_CacheHitDoesNotIncreaseMemory() { - Pattern p = Pattern.compile("test"); - long patternMemory = p.getNativeMemoryBytes(); - CacheStatistics stats1 = Pattern.getCacheStatistics(); - - assertThat(stats1.nativeMemoryBytes()).isEqualTo(patternMemory); - - // Cache hit - should not change memory - Pattern p2 = Pattern.compile("test"); - CacheStatistics stats2 = Pattern.getCacheStatistics(); - - assertThat(p2).isSameAs(p); // Same instance - assertThat(stats2.nativeMemoryBytes()).isEqualTo(patternMemory); - assertThat(stats2.hits()).isEqualTo(1); + CacheStatistics after = Pattern.getCacheStatistics(); + assertThat(after.nativeMemoryBytes()).isEqualTo(expectedMemory); + assertThat(after.currentSize()).isEqualTo(10); + } + + @Test + void testCache_CacheHitDoesNotIncreaseMemory() { + Pattern p = Pattern.compile("test"); + long patternMemory = p.getNativeMemoryBytes(); + CacheStatistics stats1 = Pattern.getCacheStatistics(); + + assertThat(stats1.nativeMemoryBytes()).isEqualTo(patternMemory); + + // Cache hit - should not change memory + Pattern p2 = Pattern.compile("test"); + CacheStatistics stats2 = Pattern.getCacheStatistics(); + + assertThat(p2).isSameAs(p); // Same instance + assertThat(stats2.nativeMemoryBytes()).isEqualTo(patternMemory); + assertThat(stats2.hits()).isEqualTo(1); + } + + @Test + void testCache_ClearResetsMemoryToZero() { + // Compile some patterns and track exact memory + long expectedMemory = 0; + for (int i = 0; i < 5; i++) { + Pattern p = Pattern.compile("pattern" + i); + expectedMemory += p.getNativeMemoryBytes(); } - @Test - void testCache_ClearResetsMemoryToZero() { - // Compile some patterns and track exact memory - long expectedMemory = 0; - for (int i = 0; i < 5; i++) { - Pattern p = Pattern.compile("pattern" + i); - expectedMemory += p.getNativeMemoryBytes(); - } - - CacheStatistics before = Pattern.getCacheStatistics(); - assertThat(before.nativeMemoryBytes()).isEqualTo(expectedMemory); - assertThat(before.currentSize()).isEqualTo(5); - - // Clear cache - Pattern.clearCache(); - - CacheStatistics after = Pattern.getCacheStatistics(); - assertThat(after.nativeMemoryBytes()).isEqualTo(0); - assertThat(after.currentSize()).isEqualTo(0); - } - - @Test - void testCache_PeakMemoryTracked() { - // Compile patterns and track exact memory - long expectedMemory = 0; - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compile("pattern" + i); - expectedMemory += p.getNativeMemoryBytes(); - } - - CacheStatistics stats = Pattern.getCacheStatistics(); - - // Peak should equal current (no evictions yet) - assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); - assertThat(stats.peakNativeMemoryBytes()).isEqualTo(expectedMemory); + CacheStatistics before = Pattern.getCacheStatistics(); + assertThat(before.nativeMemoryBytes()).isEqualTo(expectedMemory); + assertThat(before.currentSize()).isEqualTo(5); + + // Clear cache + Pattern.clearCache(); + + CacheStatistics after = Pattern.getCacheStatistics(); + assertThat(after.nativeMemoryBytes()).isEqualTo(0); + assertThat(after.currentSize()).isEqualTo(0); + } + + @Test + void testCache_PeakMemoryTracked() { + // Compile patterns and track exact memory + long expectedMemory = 0; + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compile("pattern" + i); + expectedMemory += p.getNativeMemoryBytes(); } - @Test - void testCache_PeakMemoryPreservedAfterEviction() throws InterruptedException { - // Fill cache to trigger eviction - int count = 50100; // Just over default max of 50000 - for (int i = 0; i < count; i++) { - Pattern.compile("pattern" + i); - } - - // Wait for async eviction - Thread.sleep(300); + CacheStatistics stats = Pattern.getCacheStatistics(); - CacheStatistics stats = Pattern.getCacheStatistics(); + // Peak should equal current (no evictions yet) + assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); + assertThat(stats.peakNativeMemoryBytes()).isEqualTo(expectedMemory); + } - // Peak should be >= current (some evicted) - assertThat(stats.peakNativeMemoryBytes()).isGreaterThanOrEqualTo(stats.nativeMemoryBytes()); + @Test + void testCache_PeakMemoryPreservedAfterEviction() throws InterruptedException { + // Fill cache to trigger eviction + int count = 50100; // Just over default max of 50000 + for (int i = 0; i < count; i++) { + Pattern.compile("pattern" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testCache_MemoryDecrementsOnLRUEviction() throws InterruptedException { - // Fill cache beyond max size to trigger LRU eviction - int maxSize = 50000; - for (int i = 0; i < maxSize + 100; i++) { - Pattern.compile("pattern" + i); - } - - // Wait for async eviction - Thread.sleep(300); + // Wait for async eviction + Thread.sleep(300); - CacheStatistics stats = Pattern.getCacheStatistics(); + CacheStatistics stats = Pattern.getCacheStatistics(); - // Memory should reflect current cache size, not all patterns ever compiled - // With soft limits, cache can be slightly over max - int expectedMaxPatterns = (int) (maxSize * 1.1); + // Peak should be >= current (some evicted) + assertThat(stats.peakNativeMemoryBytes()).isGreaterThanOrEqualTo(stats.nativeMemoryBytes()); + } - // Get a reference pattern for average size - Pattern ref = Pattern.compile("reference"); - long avgSize = ref.getNativeMemoryBytes(); - - // Memory should be roughly proportional to cache size - long expectedMaxMemory = expectedMaxPatterns * avgSize * 2; // Allow 2x for variation - assertThat(stats.nativeMemoryBytes()).isLessThan(expectedMaxMemory); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testCache_MemoryDecrementsOnLRUEviction() throws InterruptedException { + // Fill cache beyond max size to trigger LRU eviction + int maxSize = 50000; + for (int i = 0; i < maxSize + 100; i++) { + Pattern.compile("pattern" + i); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testCache_ConcurrentCompilationTracksMemory() throws InterruptedException { - int threadCount = 50; - int patternsPerThread = 10; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - // Collect all patterns to calculate exact expected memory - java.util.concurrent.ConcurrentHashMap allPatterns = new java.util.concurrent.ConcurrentHashMap<>(); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + // Wait for async eviction + Thread.sleep(300); + + CacheStatistics stats = Pattern.getCacheStatistics(); + + // Memory should reflect current cache size, not all patterns ever compiled + // With soft limits, cache can be slightly over max + int expectedMaxPatterns = (int) (maxSize * 1.1); + + // Get a reference pattern for average size + Pattern ref = Pattern.compile("reference"); + long avgSize = ref.getNativeMemoryBytes(); + + // Memory should be roughly proportional to cache size + long expectedMaxMemory = expectedMaxPatterns * avgSize * 2; // Allow 2x for variation + assertThat(stats.nativeMemoryBytes()).isLessThan(expectedMaxMemory); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testCache_ConcurrentCompilationTracksMemory() throws InterruptedException { + int threadCount = 50; + int patternsPerThread = 10; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + // Collect all patterns to calculate exact expected memory + java.util.concurrent.ConcurrentHashMap allPatterns = + new java.util.concurrent.ConcurrentHashMap<>(); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - for (int j = 0; j < patternsPerThread; j++) { - String key = "t" + threadId + "_p" + j; - Pattern p = Pattern.compile(key); - allPatterns.put(key, p); - } + start.await(); + for (int j = 0; j < patternsPerThread; j++) { + String key = "t" + threadId + "_p" + j; + Pattern p = Pattern.compile(key); + allPatterns.put(key, p); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); - done.await(); - - assertThat(errors.get()).isEqualTo(0); - - // Calculate exact expected memory from all patterns - long expectedMemory = allPatterns.values().stream() - .mapToLong(Pattern::getNativeMemoryBytes) - .sum(); + }) + .start(); + } - CacheStatistics stats = Pattern.getCacheStatistics(); - int expectedPatterns = threadCount * patternsPerThread; + start.countDown(); + done.await(); - assertThat(stats.currentSize()).isEqualTo(expectedPatterns); - assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); - } + assertThat(errors.get()).isEqualTo(0); - @Test - void testCache_DifferentCaseSensitivityTracksSeparately() { - Pattern p1 = Pattern.compile("TEST", true); - long p1Memory = p1.getNativeMemoryBytes(); - CacheStatistics stats1 = Pattern.getCacheStatistics(); + // Calculate exact expected memory from all patterns + long expectedMemory = + allPatterns.values().stream().mapToLong(Pattern::getNativeMemoryBytes).sum(); - assertThat(stats1.nativeMemoryBytes()).isEqualTo(p1Memory); + CacheStatistics stats = Pattern.getCacheStatistics(); + int expectedPatterns = threadCount * patternsPerThread; - Pattern p2 = Pattern.compile("TEST", false); - long p2Memory = p2.getNativeMemoryBytes(); - CacheStatistics stats2 = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isEqualTo(expectedPatterns); + assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); + } - // Two separate patterns - exact sum - assertThat(stats2.nativeMemoryBytes()).isEqualTo(p1Memory + p2Memory); - assertThat(stats2.currentSize()).isEqualTo(2); - } + @Test + void testCache_DifferentCaseSensitivityTracksSeparately() { + Pattern p1 = Pattern.compile("TEST", true); + long p1Memory = p1.getNativeMemoryBytes(); + CacheStatistics stats1 = Pattern.getCacheStatistics(); - @Test - void testCache_ResetClearsMemory() { - // Compile patterns and track exact memory - long expectedMemory = 0; - for (int i = 0; i < 5; i++) { - Pattern p = Pattern.compile("pattern" + i); - expectedMemory += p.getNativeMemoryBytes(); - } + assertThat(stats1.nativeMemoryBytes()).isEqualTo(p1Memory); - assertThat(Pattern.getCacheStatistics().nativeMemoryBytes()).isEqualTo(expectedMemory); + Pattern p2 = Pattern.compile("TEST", false); + long p2Memory = p2.getNativeMemoryBytes(); + CacheStatistics stats2 = Pattern.getCacheStatistics(); - // Full reset - Pattern.resetCache(); + // Two separate patterns - exact sum + assertThat(stats2.nativeMemoryBytes()).isEqualTo(p1Memory + p2Memory); + assertThat(stats2.currentSize()).isEqualTo(2); + } - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.nativeMemoryBytes()).isEqualTo(0); - assertThat(stats.peakNativeMemoryBytes()).isEqualTo(0); - assertThat(stats.currentSize()).isEqualTo(0); + @Test + void testCache_ResetClearsMemory() { + // Compile patterns and track exact memory + long expectedMemory = 0; + for (int i = 0; i < 5; i++) { + Pattern p = Pattern.compile("pattern" + i); + expectedMemory += p.getNativeMemoryBytes(); } - @Test - void testCache_MemoryConsistentWithPatternCount() { - // Compile patterns and track exact memory - int count = 100; - long expectedMemory = 0; - for (int i = 0; i < count; i++) { - Pattern p = Pattern.compile(String.format("pat%03d", i)); // pat000, pat001, etc. - expectedMemory += p.getNativeMemoryBytes(); - } - - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isEqualTo(count); - assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); + assertThat(Pattern.getCacheStatistics().nativeMemoryBytes()).isEqualTo(expectedMemory); + + // Full reset + Pattern.resetCache(); + + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.nativeMemoryBytes()).isEqualTo(0); + assertThat(stats.peakNativeMemoryBytes()).isEqualTo(0); + assertThat(stats.currentSize()).isEqualTo(0); + } + + @Test + void testCache_MemoryConsistentWithPatternCount() { + // Compile patterns and track exact memory + int count = 100; + long expectedMemory = 0; + for (int i = 0; i < count; i++) { + Pattern p = Pattern.compile(String.format("pat%03d", i)); // pat000, pat001, etc. + expectedMemory += p.getNativeMemoryBytes(); } - @Test - void testPattern_MemoryVariesByComplexity() { - // Compile patterns of increasing complexity - Pattern p1 = Pattern.compile("a"); - Pattern p2 = Pattern.compile("abc"); - Pattern p3 = Pattern.compile("[a-z]+"); - Pattern p4 = Pattern.compile("(a|b|c|d|e)+"); - Pattern p5 = Pattern.compile("(?:[a-z]+\\d+){1,10}"); - - // All should report positive memory - assertThat(p1.getNativeMemoryBytes()).isGreaterThan(0); - assertThat(p2.getNativeMemoryBytes()).isGreaterThan(0); - assertThat(p3.getNativeMemoryBytes()).isGreaterThan(0); - assertThat(p4.getNativeMemoryBytes()).isGreaterThan(0); - assertThat(p5.getNativeMemoryBytes()).isGreaterThan(0); + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isEqualTo(count); + assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); + } + + @Test + void testPattern_MemoryVariesByComplexity() { + // Compile patterns of increasing complexity + Pattern p1 = Pattern.compile("a"); + Pattern p2 = Pattern.compile("abc"); + Pattern p3 = Pattern.compile("[a-z]+"); + Pattern p4 = Pattern.compile("(a|b|c|d|e)+"); + Pattern p5 = Pattern.compile("(?:[a-z]+\\d+){1,10}"); + + // All should report positive memory + assertThat(p1.getNativeMemoryBytes()).isGreaterThan(0); + assertThat(p2.getNativeMemoryBytes()).isGreaterThan(0); + assertThat(p3.getNativeMemoryBytes()).isGreaterThan(0); + assertThat(p4.getNativeMemoryBytes()).isGreaterThan(0); + assertThat(p5.getNativeMemoryBytes()).isGreaterThan(0); + } + + @Test + void testCache_StatisticsSnapshotIsConsistent() { + // Compile patterns and track exact memory + long expectedMemory = 0; + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compile("pattern" + i); + expectedMemory += p.getNativeMemoryBytes(); } - @Test - void testCache_StatisticsSnapshotIsConsistent() { - // Compile patterns and track exact memory - long expectedMemory = 0; - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compile("pattern" + i); - expectedMemory += p.getNativeMemoryBytes(); - } - - CacheStatistics stats = Pattern.getCacheStatistics(); - - // Snapshot should have exact values - assertThat(stats.currentSize()).isEqualTo(10); - assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); - assertThat(stats.peakNativeMemoryBytes()).isEqualTo(expectedMemory); - assertThat(stats.misses()).isEqualTo(10); - assertThat(stats.hits()).isEqualTo(0); - } + CacheStatistics stats = Pattern.getCacheStatistics(); + + // Snapshot should have exact values + assertThat(stats.currentSize()).isEqualTo(10); + assertThat(stats.nativeMemoryBytes()).isEqualTo(expectedMemory); + assertThat(stats.peakNativeMemoryBytes()).isEqualTo(expectedMemory); + assertThat(stats.misses()).isEqualTo(10); + assertThat(stats.hits()).isEqualTo(0); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ResourceLimitConfigurationIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ResourceLimitConfigurationIT.java index 86af0ef..0faaa1f 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ResourceLimitConfigurationIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ResourceLimitConfigurationIT.java @@ -1,168 +1,170 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.api.ResourceException; -import com.axonops.libre2.util.ResourceTracker; +import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.*; - -/** - * CRITICAL: Tests that resource limits are enforced and are ACTIVE (not cumulative). - */ +/** CRITICAL: Tests that resource limits are enforced and are ACTIVE (not cumulative). */ class ResourceLimitConfigurationIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); - Pattern.getGlobalCache().getResourceTracker().reset(); + @BeforeEach + void setUp() { + Pattern.resetCache(); + Pattern.getGlobalCache().getResourceTracker().reset(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + Pattern.getGlobalCache().getResourceTracker().reset(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testMaxSimultaneousPatterns_NotCumulative() { + // CRITICAL TEST: Verify limit is ACTIVE count, not cumulative + + // Compile 10 patterns + Pattern[] patterns = new Pattern[10]; + for (int i = 0; i < 10; i++) { + patterns[i] = Pattern.compileWithoutCache("pattern" + i); } - @AfterEach - void tearDown() { - Pattern.resetCache(); - Pattern.getGlobalCache().getResourceTracker().reset(); + assertThat(Pattern.getGlobalCache().getResourceTracker().getActivePatternCount()).isEqualTo(10); + assertThat(Pattern.getGlobalCache().getResourceTracker().getTotalPatternsCompiled()) + .isEqualTo(10); + + // Close all 10 patterns + for (Pattern p : patterns) { + p.close(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testMaxSimultaneousPatterns_NotCumulative() { - // CRITICAL TEST: Verify limit is ACTIVE count, not cumulative + assertThat(Pattern.getGlobalCache().getResourceTracker().getActivePatternCount()) + .isEqualTo(0); // Active = 0 + assertThat(Pattern.getGlobalCache().getResourceTracker().getTotalPatternsCompiled()) + .isEqualTo(10); // Cumulative = 10 - // Compile 10 patterns - Pattern[] patterns = new Pattern[10]; - for (int i = 0; i < 10; i++) { - patterns[i] = Pattern.compileWithoutCache("pattern" + i); - } + // Compile 10 NEW patterns - should SUCCESS (not "20 compiled" - patterns were freed) + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compileWithoutCache("new" + i); + assertThat(p).isNotNull(); + p.close(); + } - assertThat(Pattern.getGlobalCache().getResourceTracker().getActivePatternCount()).isEqualTo(10); - assertThat(Pattern.getGlobalCache().getResourceTracker().getTotalPatternsCompiled()).isEqualTo(10); + // Active still 0, but cumulative is now 20 + assertThat(Pattern.getGlobalCache().getResourceTracker().getActivePatternCount()).isEqualTo(0); + assertThat(Pattern.getGlobalCache().getResourceTracker().getTotalPatternsCompiled()) + .isEqualTo(20); + + // This proves limit is NOT cumulative! + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testResourceStatisticsActiveVsCumulative() { + // Compile and close patterns repeatedly + for (int i = 0; i < 100; i++) { + Pattern p = Pattern.compileWithoutCache("test" + i); + p.close(); + } - // Close all 10 patterns - for (Pattern p : patterns) { - p.close(); - } + com.axonops.libre2.util.ResourceTracker.ResourceStatistics stats = + Pattern.getGlobalCache().getResourceTracker().getStatistics(); + + assertThat(stats.activePatterns()).isEqualTo(0); // None active + assertThat(stats.totalCompiled()).isEqualTo(100); // 100 compiled over lifetime + assertThat(stats.totalClosed()).isEqualTo(100); // 100 closed + assertThat(stats.hasPotentialLeaks()).isFalse(); // No leaks + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testLeakDetection() { + // Compile patterns but don't close some + for (int i = 0; i < 10; i++) { + Pattern p = Pattern.compileWithoutCache("test" + i); + if (i < 5) { + p.close(); // Close first 5 + } + // Last 5 remain open (leak) + } - assertThat(Pattern.getGlobalCache().getResourceTracker().getActivePatternCount()).isEqualTo(0); // Active = 0 - assertThat(Pattern.getGlobalCache().getResourceTracker().getTotalPatternsCompiled()).isEqualTo(10); // Cumulative = 10 + com.axonops.libre2.util.ResourceTracker.ResourceStatistics stats = + Pattern.getGlobalCache().getResourceTracker().getStatistics(); - // Compile 10 NEW patterns - should SUCCESS (not "20 compiled" - patterns were freed) - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compileWithoutCache("new" + i); - assertThat(p).isNotNull(); - p.close(); - } + assertThat(stats.activePatterns()).isEqualTo(5); // 5 still active + assertThat(stats.totalCompiled()).isEqualTo(10); + assertThat(stats.totalClosed()).isEqualTo(5); + assertThat(stats.hasPotentialLeaks()).isFalse(); // Not a leak if still active + } - // Active still 0, but cumulative is now 20 - assertThat(Pattern.getGlobalCache().getResourceTracker().getActivePatternCount()).isEqualTo(0); - assertThat(Pattern.getGlobalCache().getResourceTracker().getTotalPatternsCompiled()).isEqualTo(20); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testMaxMatchersPerPattern_Enforced() { + Pattern p = Pattern.compile("test"); - // This proves limit is NOT cumulative! + // Create matchers up to limit (default 10K) + Matcher[] matchers = new Matcher[10000]; + for (int i = 0; i < 10000; i++) { + matchers[i] = p.matcher("test"); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testResourceStatisticsActiveVsCumulative() { - // Compile and close patterns repeatedly - for (int i = 0; i < 100; i++) { - Pattern p = Pattern.compileWithoutCache("test" + i); - p.close(); - } - - com.axonops.libre2.util.ResourceTracker.ResourceStatistics stats = Pattern.getGlobalCache().getResourceTracker().getStatistics(); - - assertThat(stats.activePatterns()).isEqualTo(0); // None active - assertThat(stats.totalCompiled()).isEqualTo(100); // 100 compiled over lifetime - assertThat(stats.totalClosed()).isEqualTo(100); // 100 closed - assertThat(stats.hasPotentialLeaks()).isFalse(); // No leaks - } + assertThat(p.getRefCount()).isEqualTo(10000); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testLeakDetection() { - // Compile patterns but don't close some - for (int i = 0; i < 10; i++) { - Pattern p = Pattern.compileWithoutCache("test" + i); - if (i < 5) { - p.close(); // Close first 5 - } - // Last 5 remain open (leak) - } - - com.axonops.libre2.util.ResourceTracker.ResourceStatistics stats = Pattern.getGlobalCache().getResourceTracker().getStatistics(); - - assertThat(stats.activePatterns()).isEqualTo(5); // 5 still active - assertThat(stats.totalCompiled()).isEqualTo(10); - assertThat(stats.totalClosed()).isEqualTo(5); - assertThat(stats.hasPotentialLeaks()).isFalse(); // Not a leak if still active + // 10,001st matcher should be rejected + assertThatThrownBy(() -> p.matcher("test")) + .isInstanceOf(ResourceException.class) + .hasMessageContaining("Maximum matchers per pattern exceeded"); + + // Clean up + for (Matcher m : matchers) { + m.close(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testMaxMatchersPerPattern_Enforced() { - Pattern p = Pattern.compile("test"); + assertThat(p.getRefCount()).isEqualTo(0); + } - // Create matchers up to limit (default 10K) - Matcher[] matchers = new Matcher[10000]; - for (int i = 0; i < 10000; i++) { - matchers[i] = p.matcher("test"); - } + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testMaxMatchersLimit_MatchersClosedReusesSlot() { + Pattern p = Pattern.compile("test"); - assertThat(p.getRefCount()).isEqualTo(10000); + // Create 5000 matchers + Matcher[] matchers = new Matcher[5000]; + for (int i = 0; i < 5000; i++) { + matchers[i] = p.matcher("test"); + } - // 10,001st matcher should be rejected - assertThatThrownBy(() -> p.matcher("test")) - .isInstanceOf(ResourceException.class) - .hasMessageContaining("Maximum matchers per pattern exceeded"); + assertThat(p.getRefCount()).isEqualTo(5000); - // Clean up - for (Matcher m : matchers) { - m.close(); - } + // Close 4000 matchers + for (int i = 0; i < 4000; i++) { + matchers[i].close(); + } + + assertThat(p.getRefCount()).isEqualTo(1000); - assertThat(p.getRefCount()).isEqualTo(0); + // Can now create 9000 more matchers (total limit 10K, currently 1K active) + Matcher[] moreMatchers = new Matcher[9000]; + for (int i = 0; i < 9000; i++) { + moreMatchers[i] = p.matcher("test"); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testMaxMatchersLimit_MatchersClosedReusesSlot() { - Pattern p = Pattern.compile("test"); - - // Create 5000 matchers - Matcher[] matchers = new Matcher[5000]; - for (int i = 0; i < 5000; i++) { - matchers[i] = p.matcher("test"); - } - - assertThat(p.getRefCount()).isEqualTo(5000); - - // Close 4000 matchers - for (int i = 0; i < 4000; i++) { - matchers[i].close(); - } - - assertThat(p.getRefCount()).isEqualTo(1000); - - // Can now create 9000 more matchers (total limit 10K, currently 1K active) - Matcher[] moreMatchers = new Matcher[9000]; - for (int i = 0; i < 9000; i++) { - moreMatchers[i] = p.matcher("test"); - } - - assertThat(p.getRefCount()).isEqualTo(10000); // At limit - - // Clean up - for (int i = 1000; i < 5000; i++) { - matchers[i].close(); - } - for (Matcher m : moreMatchers) { - m.close(); - } + assertThat(p.getRefCount()).isEqualTo(10000); // At limit + + // Clean up + for (int i = 1000; i < 5000; i++) { + matchers[i].close(); + } + for (Matcher m : moreMatchers) { + m.close(); } + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ThreadSafetyIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ThreadSafetyIT.java index cab13f9..24cfe13 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ThreadSafetyIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/cache/ThreadSafetyIT.java @@ -1,221 +1,228 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Pattern; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.assertj.core.api.Assertions.*; - -/** - * Thread safety verification tests. - */ +/** Thread safety verification tests. */ class ThreadSafetyIT { - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); - } - - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCacheMapAccess_100Threads() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCacheMapAccess_100Threads() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - - int op = threadId % 10; - if (op < 3) { - // 30%: Insert new - Pattern.compile("new" + threadId); - } else if (op < 7) { - // 40%: Get (cache hit) - Pattern.compile("existing"); - } else { - // 30%: Get with iteration (cache statistics) - Pattern.getCacheStatistics(); - } + start.await(); + + int op = threadId % 10; + if (op < 3) { + // 30%: Insert new + Pattern.compile("new" + threadId); + } else if (op < 7) { + // 40%: Get (cache hit) + Pattern.compile("existing"); + } else { + // 30%: Get with iteration (cache statistics) + Pattern.getCacheStatistics(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - // Pre-populate "existing" - Pattern.compile("existing"); + // Pre-populate "existing" + Pattern.compile("existing"); - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // Verify cache is consistent - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.totalRequests()).isGreaterThan(0); - } + // Verify cache is consistent + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.totalRequests()).isGreaterThan(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentMetricsUpdates_100Threads() throws InterruptedException { + int threadCount = 100; + int opsPerThread = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentMetricsUpdates_100Threads() throws InterruptedException { - int threadCount = 100; - int opsPerThread = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - for (int j = 0; j < opsPerThread; j++) { - // Each operation increments metrics - Pattern.compile("pattern" + (j % 10)); // Some hits, some misses - } + start.await(); + for (int j = 0; j < opsPerThread; j++) { + // Each operation increments metrics + Pattern.compile("pattern" + (j % 10)); // Some hits, some misses + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // Verify metrics are accurate (no lost increments) - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.totalRequests()).isEqualTo(threadCount * opsPerThread); - assertThat(stats.hits() + stats.misses()).isEqualTo(threadCount * opsPerThread); - } + // Verify metrics are accurate (no lost increments) + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.totalRequests()).isEqualTo(threadCount * opsPerThread); + assertThat(stats.hits() + stats.misses()).isEqualTo(threadCount * opsPerThread); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentRefCountUpdates_100Threads() throws InterruptedException { - Pattern p = Pattern.compile("test"); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentRefCountUpdates_100Threads() throws InterruptedException { + Pattern p = Pattern.compile("test"); - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); - // 100 threads create and close matchers simultaneously - for (int i = 0; i < threadCount; i++) { - new Thread(() -> { + // 100 threads create and close matchers simultaneously + for (int i = 0; i < threadCount; i++) { + new Thread( + () -> { try { - start.await(); - try (var m = p.matcher("test")) { - m.matches(); - } + start.await(); + try (var m = p.matcher("test")) { + m.matches(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // After all matchers closed, refCount should be 0 (no lost increments/decrements) - assertThat(p.getRefCount()).isEqualTo(0); - } + // After all matchers closed, refCount should be 0 (no lost increments/decrements) + assertThat(p.getRefCount()).isEqualTo(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testNoConcurrentModificationException() throws InterruptedException { + int threadCount = 50; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testNoConcurrentModificationException() throws InterruptedException { - int threadCount = 50; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - // Mix of operations that iterate and modify cache - if (threadId % 2 == 0) { - Pattern.compile("new" + threadId); - } else { - Pattern.getCacheStatistics(); // Iterates cache - } + start.await(); + // Mix of operations that iterate and modify cache + if (threadId % 2 == 0) { + Pattern.compile("new" + threadId); + } else { + Pattern.getCacheStatistics(); // Iterates cache + } } catch (java.util.ConcurrentModificationException e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } catch (Exception e) { - // Other exceptions are OK for this test + // Other exceptions are OK for this test } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); - done.await(); - - // Should never throw ConcurrentModificationException - assertThat(errors.get()).isEqualTo(0); + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testNoDeadlockUnderLoad() throws InterruptedException { - int threadCount = 100; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + start.countDown(); + done.await(); + + // Should never throw ConcurrentModificationException + assertThat(errors.get()).isEqualTo(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testNoDeadlockUnderLoad() throws InterruptedException { + int threadCount = 100; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - // Operations that acquire locks in different orders - for (int j = 0; j < 100; j++) { - Pattern p = Pattern.compile("pattern" + (j % 20)); - try (var m = p.matcher("test")) { - m.find(); - } - Pattern.getCacheStatistics(); + start.await(); + // Operations that acquire locks in different orders + for (int j = 0; j < 100; j++) { + Pattern p = Pattern.compile("pattern" + (j % 20)); + try (var m = p.matcher("test")) { + m.find(); } + Pattern.getCacheStatistics(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); + start.countDown(); - // Wait with timeout - if deadlock, this will timeout - boolean completed = done.await(30, TimeUnit.SECONDS); + // Wait with timeout - if deadlock, this will timeout + boolean completed = done.await(30, TimeUnit.SECONDS); - assertThat(completed).isTrue(); // No deadlock - assertThat(errors.get()).isEqualTo(0); - } + assertThat(completed).isTrue(); // No deadlock + assertThat(errors.get()).isEqualTo(0); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/JmxIntegrationIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/JmxIntegrationIT.java index 81821c9..c4e1a33 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/JmxIntegrationIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/JmxIntegrationIT.java @@ -1,322 +1,333 @@ package com.axonops.libre2.dropwizard; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.cache.RE2Config; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.jmx.JmxReporter; +import java.lang.management.ManagementFactory; +import java.util.Set; +import javax.management.MBeanServer; +import javax.management.ObjectName; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import javax.management.MBeanServer; -import javax.management.ObjectName; -import java.lang.management.ManagementFactory; -import java.util.Set; - -import static org.assertj.core.api.Assertions.*; - /** * JMX integration tests. * - * Verifies that metrics are actually exposed via JMX and accessible - * through the platform MBean server. + *

Verifies that metrics are actually exposed via JMX and accessible through the platform MBean + * server. */ class JmxIntegrationIT { - private JmxReporter jmxReporter; - private MetricRegistry registry; - private PatternCache originalCache; + private JmxReporter jmxReporter; + private MetricRegistry registry; + private PatternCache originalCache; - @BeforeEach - void setup() { - originalCache = Pattern.getGlobalCache(); - registry = new MetricRegistry(); + @BeforeEach + void setup() { + originalCache = Pattern.getGlobalCache(); + registry = new MetricRegistry(); - // Start JMX reporter - jmxReporter = JmxReporter.forRegistry(registry).build(); - jmxReporter.start(); - } + // Start JMX reporter + jmxReporter = JmxReporter.forRegistry(registry).build(); + jmxReporter.start(); + } - @AfterEach - void cleanup() { - if (jmxReporter != null) { - jmxReporter.close(); // close() is more thorough than stop() - } - Pattern.setGlobalCache(originalCache); - - // Allow time for JMX unregistration to complete - try { - Thread.sleep(100); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } + @AfterEach + void cleanup() { + if (jmxReporter != null) { + jmxReporter.close(); // close() is more thorough than stop() } + Pattern.setGlobalCache(originalCache); - @Test - void testMetricsExposedViaJmx() throws Exception { - // Create config with metrics - RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.test.jmx", false); - Pattern.setGlobalCache(new PatternCache(config)); - - // Compile a pattern to generate metrics - Pattern.compile("test.*"); - - // Get MBean server - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); - - // Query for our metrics (Dropwizard uses "metrics" domain with type classification) - Set mbeans = mBeanServer.queryNames( - new ObjectName("metrics:name=com.test.jmx.*,type=*"), null - ); - - // Verify MBeans are registered - assertThat(mbeans) - .as("JMX MBeans should be registered for RE2 metrics") - .hasSizeGreaterThan(5); // Should have multiple metrics - - // Verify specific metrics exist - boolean foundCacheSizeGauge = mbeans.stream() - .anyMatch(name -> name.toString().contains("cache.patterns.current.count") && name.toString().contains("type=gauges")); - - boolean foundCompiledCounter = mbeans.stream() - .anyMatch(name -> name.toString().contains("patterns.compiled.total.count") && name.toString().contains("type=counters")); - - boolean foundCompilationTimer = mbeans.stream() - .anyMatch(name -> name.toString().contains("patterns.compilation.latency") && name.toString().contains("type=timers")); - - assertThat(foundCacheSizeGauge) - .as("cache.patterns.current.count gauge should be in JMX") - .isTrue(); - - assertThat(foundCompiledCounter) - .as("patterns.compiled.total.count counter should be in JMX") - .isTrue(); - - assertThat(foundCompilationTimer) - .as("patterns.compilation.latency timer should be in JMX") - .isTrue(); + // Allow time for JMX unregistration to complete + try { + Thread.sleep(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + @Test + void testMetricsExposedViaJmx() throws Exception { + // Create config with metrics + RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.test.jmx", false); + Pattern.setGlobalCache(new PatternCache(config)); + + // Compile a pattern to generate metrics + Pattern.compile("test.*"); + + // Get MBean server + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + + // Query for our metrics (Dropwizard uses "metrics" domain with type classification) + Set mbeans = + mBeanServer.queryNames(new ObjectName("metrics:name=com.test.jmx.*,type=*"), null); + + // Verify MBeans are registered + assertThat(mbeans) + .as("JMX MBeans should be registered for RE2 metrics") + .hasSizeGreaterThan(5); // Should have multiple metrics + + // Verify specific metrics exist + boolean foundCacheSizeGauge = + mbeans.stream() + .anyMatch( + name -> + name.toString().contains("cache.patterns.current.count") + && name.toString().contains("type=gauges")); + + boolean foundCompiledCounter = + mbeans.stream() + .anyMatch( + name -> + name.toString().contains("patterns.compiled.total.count") + && name.toString().contains("type=counters")); + + boolean foundCompilationTimer = + mbeans.stream() + .anyMatch( + name -> + name.toString().contains("patterns.compilation.latency") + && name.toString().contains("type=timers")); + + assertThat(foundCacheSizeGauge) + .as("cache.patterns.current.count gauge should be in JMX") + .isTrue(); + + assertThat(foundCompiledCounter) + .as("patterns.compiled.total.count counter should be in JMX") + .isTrue(); + + assertThat(foundCompilationTimer) + .as("patterns.compilation.latency timer should be in JMX") + .isTrue(); + } + + @Test + void testCassandraJmxNaming() throws Exception { + // Use Cassandra prefix + RE2Config config = RE2MetricsConfig.forCassandra(registry); + Pattern.setGlobalCache(new PatternCache(config)); + + // Compile pattern + Pattern.compile("test.*"); + + // Get MBean server + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + + // Query for Cassandra-prefixed metrics (in "metrics" domain) + Set mbeans = + mBeanServer.queryNames( + new ObjectName("metrics:name=org.apache.cassandra.metrics.RE2.*,type=*"), null); + + // Verify Cassandra-prefixed MBeans exist + assertThat(mbeans).as("Cassandra-prefixed MBeans should exist in JMX").isNotEmpty(); + + // Verify specific Cassandra metric exists + boolean foundCassandraMetric = + mbeans.stream() + .anyMatch( + name -> + name.toString().startsWith("metrics:name=org.apache.cassandra.metrics.RE2.")); + + assertThat(foundCassandraMetric) + .as("Should have org.apache.cassandra.metrics.RE2.* metrics in JMX") + .isTrue(); + } + + @Test + void testJmxGaugeReadable() throws Exception { + // Create config + RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.readable.test", false); + Pattern.setGlobalCache(new PatternCache(config)); + + // Compile patterns + Pattern.compile("p1"); + Pattern.compile("p2"); + Pattern.compile("p3"); + + // Get MBean server + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + + // Find cache.patterns.current.count gauge (Dropwizard format: metrics:name=X,type=gauges) + ObjectName cacheSizeName = + new ObjectName("metrics:name=jmx.readable.test.cache.patterns.current.count,type=gauges"); + + // Verify MBean exists + assertThat(mBeanServer.isRegistered(cacheSizeName)) + .as("cache.patterns.current.count gauge should be registered in JMX") + .isTrue(); + + // Read value via JMX + Object value = mBeanServer.getAttribute(cacheSizeName, "Value"); + + // Verify we can read the value and it's correct + assertThat(value).as("Should be able to read gauge value via JMX").isInstanceOf(Number.class); + + int size = ((Number) value).intValue(); + assertThat(size) + .as("Cache size via JMX should reflect actual cache state (3 patterns)") + .isEqualTo(3); + } + + @Test + void testJmxTimerStatistics() throws Exception { + // Create config + RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.timer.test", false); + Pattern.setGlobalCache(new PatternCache(config)); + + // Compile patterns to generate latency data + for (int i = 0; i < 50; i++) { + Pattern.compile("timer_pattern_" + i); } - @Test - void testCassandraJmxNaming() throws Exception { - // Use Cassandra prefix - RE2Config config = RE2MetricsConfig.forCassandra(registry); - Pattern.setGlobalCache(new PatternCache(config)); + // Verify metric exists in registry first + assertThat(registry.getTimers().keySet()) + .as("Timer should exist in MetricRegistry") + .contains("jmx.timer.test.patterns.compilation.latency"); - // Compile pattern - Pattern.compile("test.*"); + // Get MBean server + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); - // Get MBean server - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + // Find compilation latency timer + ObjectName timerName = + new ObjectName("metrics:name=jmx.timer.test.patterns.compilation.latency,type=timers"); - // Query for Cassandra-prefixed metrics (in "metrics" domain) - Set mbeans = mBeanServer.queryNames( - new ObjectName("metrics:name=org.apache.cassandra.metrics.RE2.*,type=*"), null - ); + assertThat(mBeanServer.isRegistered(timerName)) + .as("Compilation latency timer should be in JMX") + .isTrue(); - // Verify Cassandra-prefixed MBeans exist - assertThat(mbeans) - .as("Cassandra-prefixed MBeans should exist in JMX") - .isNotEmpty(); + // Verify Timer provides count + Object count = mBeanServer.getAttribute(timerName, "Count"); + long countValue = ((Number) count).longValue(); + assertThat(countValue).as("Timer count via JMX").isEqualTo(50); - // Verify specific Cassandra metric exists - boolean foundCassandraMetric = mbeans.stream() - .anyMatch(name -> name.toString().startsWith("metrics:name=org.apache.cassandra.metrics.RE2.")); + // Verify Timer attributes exist (min/max can be 0 for fast operations) + Object min = mBeanServer.getAttribute(timerName, "Min"); + assertThat(min).as("Timer min attribute exists").isNotNull(); - assertThat(foundCassandraMetric) - .as("Should have org.apache.cassandra.metrics.RE2.* metrics in JMX") - .isTrue(); - } + Object max = mBeanServer.getAttribute(timerName, "Max"); + assertThat(max).as("Timer max attribute exists").isNotNull(); - @Test - void testJmxGaugeReadable() throws Exception { - // Create config - RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.readable.test", false); - Pattern.setGlobalCache(new PatternCache(config)); + Object mean = mBeanServer.getAttribute(timerName, "Mean"); + assertThat(((Number) mean).doubleValue()).as("Timer mean via JMX").isGreaterThan(0.0); - // Compile patterns - Pattern.compile("p1"); - Pattern.compile("p2"); - Pattern.compile("p3"); + // Verify Timer provides percentiles + Object p50 = mBeanServer.getAttribute(timerName, "50thPercentile"); + assertThat(((Number) p50).doubleValue()).as("Timer 50th percentile via JMX").isGreaterThan(0.0); - // Get MBean server - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + Object p95 = mBeanServer.getAttribute(timerName, "95thPercentile"); + assertThat(((Number) p95).doubleValue()).as("Timer 95th percentile via JMX").isGreaterThan(0.0); - // Find cache.patterns.current.count gauge (Dropwizard format: metrics:name=X,type=gauges) - ObjectName cacheSizeName = new ObjectName("metrics:name=jmx.readable.test.cache.patterns.current.count,type=gauges"); + Object p99 = mBeanServer.getAttribute(timerName, "99thPercentile"); + assertThat(((Number) p99).doubleValue()).as("Timer 99th percentile via JMX").isGreaterThan(0.0); - // Verify MBean exists - assertThat(mBeanServer.isRegistered(cacheSizeName)) - .as("cache.patterns.current.count gauge should be registered in JMX") - .isTrue(); + Object p999 = mBeanServer.getAttribute(timerName, "999thPercentile"); + assertThat(((Number) p999).doubleValue()) + .as("Timer 99.9th percentile via JMX") + .isGreaterThan(0.0); - // Read value via JMX - Object value = mBeanServer.getAttribute(cacheSizeName, "Value"); + // Verify rates + Object oneMinRate = mBeanServer.getAttribute(timerName, "OneMinuteRate"); + assertThat(oneMinRate).as("Timer should provide 1-minute rate via JMX").isNotNull(); + } - // Verify we can read the value and it's correct - assertThat(value) - .as("Should be able to read gauge value via JMX") - .isInstanceOf(Number.class); + @Test + void testAllMetricTypesInJmx() throws Exception { + // Create config + RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.all.test", false); + Pattern.setGlobalCache(new PatternCache(config)); - int size = ((Number) value).intValue(); - assertThat(size) - .as("Cache size via JMX should reflect actual cache state (3 patterns)") - .isEqualTo(3); - } + // Generate metrics of all types + Pattern p = Pattern.compile("test.*"); + Pattern.compile("test.*"); // cache hit - @Test - void testJmxTimerStatistics() throws Exception { - // Create config - RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.timer.test", false); - Pattern.setGlobalCache(new PatternCache(config)); - - // Compile patterns to generate latency data - for (int i = 0; i < 50; i++) { - Pattern.compile("timer_pattern_" + i); - } - - // Verify metric exists in registry first - assertThat(registry.getTimers().keySet()) - .as("Timer should exist in MetricRegistry") - .contains("jmx.timer.test.patterns.compilation.latency"); - - // Get MBean server - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); - - // Find compilation latency timer - ObjectName timerName = new ObjectName("metrics:name=jmx.timer.test.patterns.compilation.latency,type=timers"); - - assertThat(mBeanServer.isRegistered(timerName)) - .as("Compilation latency timer should be in JMX") - .isTrue(); - - // Verify Timer provides count - Object count = mBeanServer.getAttribute(timerName, "Count"); - long countValue = ((Number) count).longValue(); - assertThat(countValue) - .as("Timer count via JMX") - .isEqualTo(50); - - // Verify Timer attributes exist (min/max can be 0 for fast operations) - Object min = mBeanServer.getAttribute(timerName, "Min"); - assertThat(min).as("Timer min attribute exists").isNotNull(); - - Object max = mBeanServer.getAttribute(timerName, "Max"); - assertThat(max).as("Timer max attribute exists").isNotNull(); - - Object mean = mBeanServer.getAttribute(timerName, "Mean"); - assertThat(((Number) mean).doubleValue()) - .as("Timer mean via JMX") - .isGreaterThan(0.0); - - // Verify Timer provides percentiles - Object p50 = mBeanServer.getAttribute(timerName, "50thPercentile"); - assertThat(((Number) p50).doubleValue()) - .as("Timer 50th percentile via JMX") - .isGreaterThan(0.0); - - Object p95 = mBeanServer.getAttribute(timerName, "95thPercentile"); - assertThat(((Number) p95).doubleValue()) - .as("Timer 95th percentile via JMX") - .isGreaterThan(0.0); - - Object p99 = mBeanServer.getAttribute(timerName, "99thPercentile"); - assertThat(((Number) p99).doubleValue()) - .as("Timer 99th percentile via JMX") - .isGreaterThan(0.0); - - Object p999 = mBeanServer.getAttribute(timerName, "999thPercentile"); - assertThat(((Number) p999).doubleValue()) - .as("Timer 99.9th percentile via JMX") - .isGreaterThan(0.0); - - // Verify rates - Object oneMinRate = mBeanServer.getAttribute(timerName, "OneMinuteRate"); - assertThat(oneMinRate) - .as("Timer should provide 1-minute rate via JMX") - .isNotNull(); + try (Matcher m = p.matcher("test123")) { + m.matches(); } - @Test - void testAllMetricTypesInJmx() throws Exception { - // Create config - RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.all.test", false); - Pattern.setGlobalCache(new PatternCache(config)); - - // Generate metrics of all types - Pattern p = Pattern.compile("test.*"); - Pattern.compile("test.*"); // cache hit - - try (Matcher m = p.matcher("test123")) { - m.matches(); - } - - // Trigger error - try { - Pattern.compile("(invalid"); - } catch (Exception e) { - // Expected - } - - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); - - // Verify counters in JMX - ObjectName compiledCounter = new ObjectName("metrics:name=jmx.all.test.patterns.compiled.total.count,type=counters"); - assertThat(mBeanServer.isRegistered(compiledCounter)).isTrue(); - assertThat(((Number) mBeanServer.getAttribute(compiledCounter, "Count")).longValue()).isGreaterThan(0); - - ObjectName hitsCounter = new ObjectName("metrics:name=jmx.all.test.patterns.cache.hits.total.count,type=counters"); - assertThat(mBeanServer.isRegistered(hitsCounter)).isTrue(); - assertThat(((Number) mBeanServer.getAttribute(hitsCounter, "Count")).longValue()).isEqualTo(1); - - // Verify timers in JMX - ObjectName compilationTimer = new ObjectName("metrics:name=jmx.all.test.patterns.compilation.latency,type=timers"); - assertThat(mBeanServer.isRegistered(compilationTimer)).isTrue(); - assertThat(((Number) mBeanServer.getAttribute(compilationTimer, "Count")).longValue()).isGreaterThan(0); - - // Verify gauges in JMX - ObjectName cacheSize = new ObjectName("metrics:name=jmx.all.test.cache.patterns.current.count,type=gauges"); - assertThat(mBeanServer.isRegistered(cacheSize)).isTrue(); - - ObjectName nativeMemory = new ObjectName("metrics:name=jmx.all.test.cache.native_memory.current.bytes,type=gauges"); - assertThat(mBeanServer.isRegistered(nativeMemory)).isTrue(); - assertThat(((Number) mBeanServer.getAttribute(nativeMemory, "Value")).longValue()).isGreaterThan(0); - - // Verify deferred metrics in JMX (NEW) - ObjectName deferredCount = new ObjectName("metrics:name=jmx.all.test.cache.deferred.patterns.current.count,type=gauges"); - assertThat(mBeanServer.isRegistered(deferredCount)).isTrue(); - - ObjectName deferredPeak = new ObjectName("metrics:name=jmx.all.test.cache.deferred.patterns.peak.count,type=gauges"); - assertThat(mBeanServer.isRegistered(deferredPeak)).isTrue(); + // Trigger error + try { + Pattern.compile("(invalid"); + } catch (Exception e) { + // Expected } - @Test - void testJmxCounterIncrementsCorrectly() throws Exception { - RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.increment.test", false); - Pattern.setGlobalCache(new PatternCache(config)); - - // Compile one pattern to create the counter - Pattern.compile("initial"); - - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); - ObjectName compiledCounter = new ObjectName("metrics:name=jmx.increment.test.patterns.compiled.total.count,type=counters"); - - // Get initial count - long countBefore = ((Number) mBeanServer.getAttribute(compiledCounter, "Count")).longValue(); - assertThat(countBefore).isGreaterThanOrEqualTo(1); // At least the initial pattern - - // Compile 5 more patterns - for (int i = 0; i < 5; i++) { - Pattern.compile("inc_pattern_" + i); - } - - // Verify counter incremented correctly via JMX - long countAfter = ((Number) mBeanServer.getAttribute(compiledCounter, "Count")).longValue(); - assertThat(countAfter - countBefore) - .as("Counter should have incremented by 5 via JMX") - .isEqualTo(5); + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + + // Verify counters in JMX + ObjectName compiledCounter = + new ObjectName("metrics:name=jmx.all.test.patterns.compiled.total.count,type=counters"); + assertThat(mBeanServer.isRegistered(compiledCounter)).isTrue(); + assertThat(((Number) mBeanServer.getAttribute(compiledCounter, "Count")).longValue()) + .isGreaterThan(0); + + ObjectName hitsCounter = + new ObjectName("metrics:name=jmx.all.test.patterns.cache.hits.total.count,type=counters"); + assertThat(mBeanServer.isRegistered(hitsCounter)).isTrue(); + assertThat(((Number) mBeanServer.getAttribute(hitsCounter, "Count")).longValue()).isEqualTo(1); + + // Verify timers in JMX + ObjectName compilationTimer = + new ObjectName("metrics:name=jmx.all.test.patterns.compilation.latency,type=timers"); + assertThat(mBeanServer.isRegistered(compilationTimer)).isTrue(); + assertThat(((Number) mBeanServer.getAttribute(compilationTimer, "Count")).longValue()) + .isGreaterThan(0); + + // Verify gauges in JMX + ObjectName cacheSize = + new ObjectName("metrics:name=jmx.all.test.cache.patterns.current.count,type=gauges"); + assertThat(mBeanServer.isRegistered(cacheSize)).isTrue(); + + ObjectName nativeMemory = + new ObjectName("metrics:name=jmx.all.test.cache.native_memory.current.bytes,type=gauges"); + assertThat(mBeanServer.isRegistered(nativeMemory)).isTrue(); + assertThat(((Number) mBeanServer.getAttribute(nativeMemory, "Value")).longValue()) + .isGreaterThan(0); + + // Verify deferred metrics in JMX (NEW) + ObjectName deferredCount = + new ObjectName( + "metrics:name=jmx.all.test.cache.deferred.patterns.current.count,type=gauges"); + assertThat(mBeanServer.isRegistered(deferredCount)).isTrue(); + + ObjectName deferredPeak = + new ObjectName("metrics:name=jmx.all.test.cache.deferred.patterns.peak.count,type=gauges"); + assertThat(mBeanServer.isRegistered(deferredPeak)).isTrue(); + } + + @Test + void testJmxCounterIncrementsCorrectly() throws Exception { + RE2Config config = RE2MetricsConfig.withMetrics(registry, "jmx.increment.test", false); + Pattern.setGlobalCache(new PatternCache(config)); + + // Compile one pattern to create the counter + Pattern.compile("initial"); + + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + ObjectName compiledCounter = + new ObjectName( + "metrics:name=jmx.increment.test.patterns.compiled.total.count,type=counters"); + + // Get initial count + long countBefore = ((Number) mBeanServer.getAttribute(compiledCounter, "Count")).longValue(); + assertThat(countBefore).isGreaterThanOrEqualTo(1); // At least the initial pattern + + // Compile 5 more patterns + for (int i = 0; i < 5; i++) { + Pattern.compile("inc_pattern_" + i); } -} + // Verify counter incremented correctly via JMX + long countAfter = ((Number) mBeanServer.getAttribute(compiledCounter, "Count")).longValue(); + assertThat(countAfter - countBefore) + .as("Counter should have incremented by 5 via JMX") + .isEqualTo(5); + } +} diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/MetricsEndToEndIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/MetricsEndToEndIT.java index be3c0d9..1de1227 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/MetricsEndToEndIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/dropwizard/MetricsEndToEndIT.java @@ -1,5 +1,7 @@ package com.axonops.libre2.dropwizard; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.cache.RE2Config; @@ -7,26 +9,25 @@ import com.codahale.metrics.MetricRegistry; import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - /** * End-to-end tests for Dropwizard Metrics integration. * - * These tests verify that RE2MetricsConfig correctly sets up metrics - * and that gauges are registered properly. + *

These tests verify that RE2MetricsConfig correctly sets up metrics and that gauges are + * registered properly. */ class MetricsEndToEndIT { - @Test - void testGaugesRegisteredOnCacheCreation() { - MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "e2e.test", false); // JMX disabled + @Test + void testGaugesRegisteredOnCacheCreation() { + MetricRegistry registry = new MetricRegistry(); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "e2e.test", false); // JMX disabled - // Create cache - should register all gauges - PatternCache cache = new PatternCache(config); + // Create cache - should register all gauges + PatternCache cache = new PatternCache(config); - // Verify gauges registered (11 total: cache, resources active, deferred) - assertThat(registry.getGauges()).containsKeys( + // Verify gauges registered (11 total: cache, resources active, deferred) + assertThat(registry.getGauges()) + .containsKeys( "e2e.test.cache.patterns.current.count", "e2e.test.cache.native_memory.current.bytes", "e2e.test.cache.native_memory.peak.bytes", @@ -35,111 +36,115 @@ void testGaugesRegisteredOnCacheCreation() { "e2e.test.cache.deferred.patterns.current.count", "e2e.test.cache.deferred.patterns.peak.count", "e2e.test.cache.deferred.native_memory.current.bytes", - "e2e.test.cache.deferred.native_memory.peak.bytes" - ); + "e2e.test.cache.deferred.native_memory.peak.bytes"); - // Verify freed counts are Counters (not in getGauges()) - // Note: These may not exist yet if nothing has been freed - // assertThat(registry.getCounters()).containsKeys(...) would fail if counters not created yet + // Verify freed counts are Counters (not in getGauges()) + // Note: These may not exist yet if nothing has been freed + // assertThat(registry.getCounters()).containsKeys(...) would fail if counters not created yet - cache.reset(); - } + cache.reset(); + } - @Test - void testCassandraPrefixConvention() { - MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.forCassandra(registry, false); // JMX disabled + @Test + void testCassandraPrefixConvention() { + MetricRegistry registry = new MetricRegistry(); + RE2Config config = RE2MetricsConfig.forCassandra(registry, false); // JMX disabled - PatternCache cache = new PatternCache(config); + PatternCache cache = new PatternCache(config); - // Verify Cassandra-standard prefix is used for gauges - assertThat(registry.getGauges().keySet()) - .anyMatch(key -> key.startsWith("org.apache.cassandra.metrics.RE2.")); + // Verify Cassandra-standard prefix is used for gauges + assertThat(registry.getGauges().keySet()) + .anyMatch(key -> key.startsWith("org.apache.cassandra.metrics.RE2.")); - // Verify specific Cassandra-prefixed gauge exists - assertThat(registry.getGauges()) - .containsKey("org.apache.cassandra.metrics.RE2.cache.patterns.current.count"); + // Verify specific Cassandra-prefixed gauge exists + assertThat(registry.getGauges()) + .containsKey("org.apache.cassandra.metrics.RE2.cache.patterns.current.count"); - cache.reset(); - } + cache.reset(); + } - @Test - void testCustomPrefixWorks() { - MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.mycompany.myapp.regex", false); + @Test + void testCustomPrefixWorks() { + MetricRegistry registry = new MetricRegistry(); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.mycompany.myapp.regex", false); - PatternCache cache = new PatternCache(config); + PatternCache cache = new PatternCache(config); - // Verify custom prefix used - assertThat(registry.getGauges()) - .containsKey("com.mycompany.myapp.regex.cache.patterns.current.count"); + // Verify custom prefix used + assertThat(registry.getGauges()) + .containsKey("com.mycompany.myapp.regex.cache.patterns.current.count"); - cache.reset(); - } + cache.reset(); + } - @Test - void testGaugeValuesReflectCacheState() { - MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "gauge.test", false); - PatternCache cache = new PatternCache(config); + @Test + void testGaugeValuesReflectCacheState() { + MetricRegistry registry = new MetricRegistry(); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "gauge.test", false); + PatternCache cache = new PatternCache(config); - Gauge cacheSize = (Gauge) registry.getGauges().get("gauge.test.cache.patterns.current.count"); + Gauge cacheSize = + (Gauge) registry.getGauges().get("gauge.test.cache.patterns.current.count"); - // Initially empty - assertThat(cacheSize.getValue()).isEqualTo(0); + // Initially empty + assertThat(cacheSize.getValue()).isEqualTo(0); - // Add pattern to cache - cache.getOrCompile("pattern1", true, () -> Pattern.compileWithoutCache("pattern1")); + // Add pattern to cache + cache.getOrCompile("pattern1", true, () -> Pattern.compileWithoutCache("pattern1")); - // Gauge should update - assertThat(cacheSize.getValue()).isEqualTo(1); + // Gauge should update + assertThat(cacheSize.getValue()).isEqualTo(1); - // Add another - cache.getOrCompile("pattern2", true, () -> Pattern.compileWithoutCache("pattern2")); + // Add another + cache.getOrCompile("pattern2", true, () -> Pattern.compileWithoutCache("pattern2")); - assertThat(cacheSize.getValue()).isEqualTo(2); + assertThat(cacheSize.getValue()).isEqualTo(2); - cache.reset(); - } + cache.reset(); + } - @Test - void testNativeMemoryGaugesNonZero() { - MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "memory.test", false); - PatternCache cache = new PatternCache(config); + @Test + void testNativeMemoryGaugesNonZero() { + MetricRegistry registry = new MetricRegistry(); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "memory.test", false); + PatternCache cache = new PatternCache(config); - // Add pattern - cache.getOrCompile("test.*", true, () -> Pattern.compileWithoutCache("test.*")); + // Add pattern + cache.getOrCompile("test.*", true, () -> Pattern.compileWithoutCache("test.*")); - // Memory gauges should show non-zero values - Gauge nativeMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); - assertThat(nativeMemory.getValue()).isGreaterThan(0L); + // Memory gauges should show non-zero values + Gauge nativeMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); + assertThat(nativeMemory.getValue()).isGreaterThan(0L); - Gauge peakMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.peak.bytes"); - assertThat(peakMemory.getValue()).isGreaterThan(0L); + Gauge peakMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.peak.bytes"); + assertThat(peakMemory.getValue()).isGreaterThan(0L); - cache.reset(); - } + cache.reset(); + } - @Test - void testResourceGaugesExist() { - MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "resource.test", false); - PatternCache cache = new PatternCache(config); + @Test + void testResourceGaugesExist() { + MetricRegistry registry = new MetricRegistry(); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "resource.test", false); + PatternCache cache = new PatternCache(config); - // Verify resource gauges registered (active counts only) - assertThat(registry.getGauges()).containsKeys( + // Verify resource gauges registered (active counts only) + assertThat(registry.getGauges()) + .containsKeys( "resource.test.resources.patterns.active.current.count", - "resource.test.resources.matchers.active.current.count" - ); + "resource.test.resources.matchers.active.current.count"); - // Gauges should return non-null values - Gauge patternsActive = (Gauge) registry.getGauges().get("resource.test.resources.patterns.active.current.count"); - assertThat(patternsActive.getValue()).isNotNull(); + // Gauges should return non-null values + Gauge patternsActive = + (Gauge) + registry.getGauges().get("resource.test.resources.patterns.active.current.count"); + assertThat(patternsActive.getValue()).isNotNull(); - // Note: resources.patterns.freed and resources.matchers.freed are now Counters - // They increment when patterns/matchers are freed (not registered as Gauges) + // Note: resources.patterns.freed and resources.matchers.freed are now Counters + // They increment when patterns/matchers are freed (not registered as Gauges) - cache.reset(); - } + cache.reset(); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/jni/RE2NativeJNIIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/jni/RE2NativeJNIIT.java index b42869d..e646031 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/jni/RE2NativeJNIIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/jni/RE2NativeJNIIT.java @@ -15,7 +15,8 @@ */ package com.axonops.libre2.jni; -import com.axonops.libre2.api.Pattern; +import static org.junit.jupiter.api.Assertions.*; + import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.test.TestUtils; import org.junit.jupiter.api.AfterAll; @@ -24,688 +25,686 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.*; - /** * Direct tests of the JNI layer (RE2NativeJNI) without Java wrapper. * - *

These tests help isolate issues to either the native code or Java wrapper. - * When debugging failures, check if the JNI method works directly before - * investigating the Java wrapper layer. + *

These tests help isolate issues to either the native code or Java wrapper. When debugging + * failures, check if the JNI method works directly before investigating the Java wrapper layer. * - *

IMPORTANT: These tests directly manage native handles. - * Always free handles in @AfterEach to prevent memory leaks. + *

IMPORTANT: These tests directly manage native handles. Always free handles + * in @AfterEach to prevent memory leaks. */ class RE2NativeJNIIT { - private static PatternCache originalCache; + private static PatternCache originalCache; + + @BeforeAll + static void setUpClass() { + // Replace global cache with test config (disables JMX to prevent + // InstanceAlreadyExistsException) + originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); + } + + @AfterAll + static void tearDownClass() { + // Restore original cache + TestUtils.restoreGlobalCache(originalCache); + } + + private long handle; + + @BeforeEach + void setUp() { + // Compile a test pattern + handle = RE2NativeJNI.compile("test\\d+", true); + assertTrue(handle != 0, "Pattern compilation should succeed"); + } + + @AfterEach + void tearDown() { + // Always free handle to prevent leaks + if (handle != 0) { + RE2NativeJNI.freePattern(handle); + handle = 0; + } + } + + // ========== Basic Compilation and Lifecycle ========== + + @Test + void testCompile_Success() { + long h = RE2NativeJNI.compile("simple", true); + assertTrue(h != 0); + assertTrue(RE2NativeJNI.patternOk(h)); + RE2NativeJNI.freePattern(h); + } + + @Test + void testCompile_InvalidPattern() { + long h = RE2NativeJNI.compile("[invalid(", true); + assertEquals(0, h); // Should return 0 on error + + String error = RE2NativeJNI.getError(); + assertNotNull(error); + assertTrue(error.contains("missing") || error.contains("unclosed")); + } + + @Test + void testCompile_CaseSensitive_vs_Insensitive() { + long caseSensitive = RE2NativeJNI.compile("Test", true); + long caseInsensitive = RE2NativeJNI.compile("Test", false); + + // Case sensitive: only "Test" matches + assertTrue(RE2NativeJNI.fullMatch(caseSensitive, "Test")); + assertFalse(RE2NativeJNI.fullMatch(caseSensitive, "test")); + + // Case insensitive: both match + assertTrue(RE2NativeJNI.fullMatch(caseInsensitive, "Test")); + assertTrue(RE2NativeJNI.fullMatch(caseInsensitive, "test")); + + RE2NativeJNI.freePattern(caseSensitive); + RE2NativeJNI.freePattern(caseInsensitive); + } + + @Test + void testFreePattern_ZeroHandle() { + // Should not crash + RE2NativeJNI.freePattern(0); + } + + // ========== Matching Operations (Single) ========== + + @Test + void testFullMatch_Basic() { + assertTrue(RE2NativeJNI.fullMatch(handle, "test123")); + assertTrue(RE2NativeJNI.fullMatch(handle, "test0")); + assertFalse(RE2NativeJNI.fullMatch(handle, "test")); // No digit + assertFalse(RE2NativeJNI.fullMatch(handle, "test123x")); // Extra char + } + + @Test + void testPartialMatch_Basic() { + assertTrue(RE2NativeJNI.partialMatch(handle, "prefix test123 suffix")); + assertTrue(RE2NativeJNI.partialMatch(handle, "test0")); + assertFalse(RE2NativeJNI.partialMatch(handle, "no match here")); + } + + @Test + void testFullMatch_NullText() { + assertFalse(RE2NativeJNI.fullMatch(handle, null)); + } + + @Test + void testFullMatch_ZeroHandle() { + assertFalse(RE2NativeJNI.fullMatch(0, "test")); + } + + // ========== Bulk Matching Operations (NEW) ========== + + @Test + void testFullMatchBulk_Basic() { + String[] texts = {"test1", "no match", "test999", "test"}; + boolean[] results = RE2NativeJNI.fullMatchBulk(handle, texts); + + assertNotNull(results); + assertEquals(4, results.length); + assertTrue(results[0]); // "test1" matches + assertFalse(results[1]); // "no match" doesn't + assertTrue(results[2]); // "test999" matches + assertFalse(results[3]); // "test" doesn't (no digit) + } + + @Test + void testPartialMatchBulk_Basic() { + String[] texts = {"prefix test1 suffix", "no match", "test999"}; + boolean[] results = RE2NativeJNI.partialMatchBulk(handle, texts); + + assertNotNull(results); + assertEquals(3, results.length); + assertTrue(results[0]); + assertFalse(results[1]); + assertTrue(results[2]); + } + + @Test + void testFullMatchBulk_EmptyArray() { + String[] empty = {}; + boolean[] results = RE2NativeJNI.fullMatchBulk(handle, empty); + + assertNotNull(results); + assertEquals(0, results.length); + } + + @Test + void testFullMatchBulk_WithNullElements() { + String[] texts = {"test1", null, "test2", null}; + boolean[] results = RE2NativeJNI.fullMatchBulk(handle, texts); + + assertNotNull(results); + assertEquals(4, results.length); + assertTrue(results[0]); + assertFalse(results[1]); // null should not match + assertTrue(results[2]); + assertFalse(results[3]); // null should not match + } + + @Test + void testFullMatchBulk_NullArray() { + boolean[] results = RE2NativeJNI.fullMatchBulk(handle, null); + assertNull(results); + } + + @Test + void testFullMatchBulk_ZeroHandle() { + String[] texts = {"test"}; + boolean[] results = RE2NativeJNI.fullMatchBulk(0, texts); + assertNull(results); + } + + // ========== Capture Group Operations (NEW) ========== + + @Test + void testExtractGroups_NoGroups() { + long h = RE2NativeJNI.compile("test\\d+", true); - @BeforeAll - static void setUpClass() { - // Replace global cache with test config (disables JMX to prevent InstanceAlreadyExistsException) - originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); - } + String[] groups = RE2NativeJNI.extractGroups(h, "test123"); - @AfterAll - static void tearDownClass() { - // Restore original cache - TestUtils.restoreGlobalCache(originalCache); - } + assertNotNull(groups); + assertEquals(1, groups.length); // Just group 0 (full match) + assertEquals("test123", groups[0]); - private long handle; + RE2NativeJNI.freePattern(h); + } - @BeforeEach - void setUp() { - // Compile a test pattern - handle = RE2NativeJNI.compile("test\\d+", true); - assertTrue(handle != 0, "Pattern compilation should succeed"); - } + @Test + void testExtractGroups_WithGroups() { + long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); - @AfterEach - void tearDown() { - // Always free handle to prevent leaks - if (handle != 0) { - RE2NativeJNI.freePattern(handle); - handle = 0; - } - } + String[] groups = RE2NativeJNI.extractGroups(h, "123-4567"); - // ========== Basic Compilation and Lifecycle ========== + assertNotNull(groups); + assertEquals(3, groups.length); // group 0 + 2 capturing groups + assertEquals("123-4567", groups[0]); // Full match + assertEquals("123", groups[1]); // First group + assertEquals("4567", groups[2]); // Second group - @Test - void testCompile_Success() { - long h = RE2NativeJNI.compile("simple", true); - assertTrue(h != 0); - assertTrue(RE2NativeJNI.patternOk(h)); - RE2NativeJNI.freePattern(h); - } + RE2NativeJNI.freePattern(h); + } - @Test - void testCompile_InvalidPattern() { - long h = RE2NativeJNI.compile("[invalid(", true); - assertEquals(0, h); // Should return 0 on error + @Test + void testExtractGroups_NoMatch() { + long h = RE2NativeJNI.compile("(\\d+)", true); - String error = RE2NativeJNI.getError(); - assertNotNull(error); - assertTrue(error.contains("missing") || error.contains("unclosed")); - } + String[] groups = RE2NativeJNI.extractGroups(h, "no digits"); - @Test - void testCompile_CaseSensitive_vs_Insensitive() { - long caseSensitive = RE2NativeJNI.compile("Test", true); - long caseInsensitive = RE2NativeJNI.compile("Test", false); + assertNull(groups); // No match returns null - // Case sensitive: only "Test" matches - assertTrue(RE2NativeJNI.fullMatch(caseSensitive, "Test")); - assertFalse(RE2NativeJNI.fullMatch(caseSensitive, "test")); + RE2NativeJNI.freePattern(h); + } - // Case insensitive: both match - assertTrue(RE2NativeJNI.fullMatch(caseInsensitive, "Test")); - assertTrue(RE2NativeJNI.fullMatch(caseInsensitive, "test")); + @Test + void testExtractGroupsBulk_Basic() { + long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); - RE2NativeJNI.freePattern(caseSensitive); - RE2NativeJNI.freePattern(caseInsensitive); - } + String[] texts = {"123-4567", "invalid", "999-8888"}; + String[][] results = RE2NativeJNI.extractGroupsBulk(h, texts); - @Test - void testFreePattern_ZeroHandle() { - // Should not crash - RE2NativeJNI.freePattern(0); - } + assertNotNull(results); + assertEquals(3, results.length); - // ========== Matching Operations (Single) ========== + // First input matches + assertNotNull(results[0]); + assertEquals(3, results[0].length); + assertEquals("123-4567", results[0][0]); + assertEquals("123", results[0][1]); + assertEquals("4567", results[0][2]); - @Test - void testFullMatch_Basic() { - assertTrue(RE2NativeJNI.fullMatch(handle, "test123")); - assertTrue(RE2NativeJNI.fullMatch(handle, "test0")); - assertFalse(RE2NativeJNI.fullMatch(handle, "test")); // No digit - assertFalse(RE2NativeJNI.fullMatch(handle, "test123x")); // Extra char - } + // Second input doesn't match + assertNull(results[1]); - @Test - void testPartialMatch_Basic() { - assertTrue(RE2NativeJNI.partialMatch(handle, "prefix test123 suffix")); - assertTrue(RE2NativeJNI.partialMatch(handle, "test0")); - assertFalse(RE2NativeJNI.partialMatch(handle, "no match here")); - } + // Third input matches + assertNotNull(results[2]); + assertEquals("999-8888", results[2][0]); - @Test - void testFullMatch_NullText() { - assertFalse(RE2NativeJNI.fullMatch(handle, null)); - } + RE2NativeJNI.freePattern(h); + } - @Test - void testFullMatch_ZeroHandle() { - assertFalse(RE2NativeJNI.fullMatch(0, "test")); - } + @Test + void testFindAllMatches_Multiple() { + long h = RE2NativeJNI.compile("(\\d+)", true); - // ========== Bulk Matching Operations (NEW) ========== + String[][] results = RE2NativeJNI.findAllMatches(h, "Found 123 and 456 and 789"); - @Test - void testFullMatchBulk_Basic() { - String[] texts = {"test1", "no match", "test999", "test"}; - boolean[] results = RE2NativeJNI.fullMatchBulk(handle, texts); + assertNotNull(results); + assertEquals(3, results.length); // 3 matches - assertNotNull(results); - assertEquals(4, results.length); - assertTrue(results[0]); // "test1" matches - assertFalse(results[1]); // "no match" doesn't - assertTrue(results[2]); // "test999" matches - assertFalse(results[3]); // "test" doesn't (no digit) - } + assertEquals("123", results[0][0]); + assertEquals("123", results[0][1]); - @Test - void testPartialMatchBulk_Basic() { - String[] texts = {"prefix test1 suffix", "no match", "test999"}; - boolean[] results = RE2NativeJNI.partialMatchBulk(handle, texts); + assertEquals("456", results[1][0]); + assertEquals("789", results[2][0]); - assertNotNull(results); - assertEquals(3, results.length); - assertTrue(results[0]); - assertFalse(results[1]); - assertTrue(results[2]); - } + RE2NativeJNI.freePattern(h); + } - @Test - void testFullMatchBulk_EmptyArray() { - String[] empty = {}; - boolean[] results = RE2NativeJNI.fullMatchBulk(handle, empty); + @Test + void testFindAllMatches_NoMatches() { + long h = RE2NativeJNI.compile("(\\d+)", true); - assertNotNull(results); - assertEquals(0, results.length); - } + String[][] results = RE2NativeJNI.findAllMatches(h, "no numbers here"); - @Test - void testFullMatchBulk_WithNullElements() { - String[] texts = {"test1", null, "test2", null}; - boolean[] results = RE2NativeJNI.fullMatchBulk(handle, texts); - - assertNotNull(results); - assertEquals(4, results.length); - assertTrue(results[0]); - assertFalse(results[1]); // null should not match - assertTrue(results[2]); - assertFalse(results[3]); // null should not match - } + assertNull(results); // No matches returns null - @Test - void testFullMatchBulk_NullArray() { - boolean[] results = RE2NativeJNI.fullMatchBulk(handle, null); - assertNull(results); - } + RE2NativeJNI.freePattern(h); + } - @Test - void testFullMatchBulk_ZeroHandle() { - String[] texts = {"test"}; - boolean[] results = RE2NativeJNI.fullMatchBulk(0, texts); - assertNull(results); - } + @Test + void testGetNamedGroups_NoNamedGroups() { + long h = RE2NativeJNI.compile("(\\d+)", true); - // ========== Capture Group Operations (NEW) ========== + String[] namedGroups = RE2NativeJNI.getNamedGroups(h); - @Test - void testExtractGroups_NoGroups() { - long h = RE2NativeJNI.compile("test\\d+", true); + assertNull(namedGroups); // No named groups returns null - String[] groups = RE2NativeJNI.extractGroups(h, "test123"); + RE2NativeJNI.freePattern(h); + } - assertNotNull(groups); - assertEquals(1, groups.length); // Just group 0 (full match) - assertEquals("test123", groups[0]); - - RE2NativeJNI.freePattern(h); - } + @Test + void testGetNamedGroups_WithNamedGroups() { + long h = RE2NativeJNI.compile("(?P\\d{3})-(?P\\d{4})", true); - @Test - void testExtractGroups_WithGroups() { - long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); + String[] namedGroups = RE2NativeJNI.getNamedGroups(h); - String[] groups = RE2NativeJNI.extractGroups(h, "123-4567"); + assertNotNull(namedGroups); + // Flattened: [name1, index1_as_string, name2, index2_as_string, ...] + assertEquals(4, namedGroups.length); - assertNotNull(groups); - assertEquals(3, groups.length); // group 0 + 2 capturing groups - assertEquals("123-4567", groups[0]); // Full match - assertEquals("123", groups[1]); // First group - assertEquals("4567", groups[2]); // Second group - - RE2NativeJNI.freePattern(h); + // Find indices + int areaIndex = -1; + int numberIndex = -1; + for (int i = 0; i < namedGroups.length; i += 2) { + if ("area".equals(namedGroups[i])) { + areaIndex = Integer.parseInt(namedGroups[i + 1]); + } else if ("number".equals(namedGroups[i])) { + numberIndex = Integer.parseInt(namedGroups[i + 1]); + } } - @Test - void testExtractGroups_NoMatch() { - long h = RE2NativeJNI.compile("(\\d+)", true); - - String[] groups = RE2NativeJNI.extractGroups(h, "no digits"); - - assertNull(groups); // No match returns null - - RE2NativeJNI.freePattern(h); - } + assertEquals(1, areaIndex); + assertEquals(2, numberIndex); - @Test - void testExtractGroupsBulk_Basic() { - long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); + RE2NativeJNI.freePattern(h); + } - String[] texts = {"123-4567", "invalid", "999-8888"}; - String[][] results = RE2NativeJNI.extractGroupsBulk(h, texts); + // ========== Replace Operations (NEW) ========== - assertNotNull(results); - assertEquals(3, results.length); + @Test + void testReplaceFirst_Basic() { + long h = RE2NativeJNI.compile("\\d+", true); - // First input matches - assertNotNull(results[0]); - assertEquals(3, results[0].length); - assertEquals("123-4567", results[0][0]); - assertEquals("123", results[0][1]); - assertEquals("4567", results[0][2]); + String result = RE2NativeJNI.replaceFirst(h, "Found 123 and 456", "XXX"); - // Second input doesn't match - assertNull(results[1]); + assertEquals("Found XXX and 456", result); // Only first replaced - // Third input matches - assertNotNull(results[2]); - assertEquals("999-8888", results[2][0]); + RE2NativeJNI.freePattern(h); + } - RE2NativeJNI.freePattern(h); - } + @Test + void testReplaceFirst_NoMatch() { + long h = RE2NativeJNI.compile("\\d+", true); - @Test - void testFindAllMatches_Multiple() { - long h = RE2NativeJNI.compile("(\\d+)", true); + String result = RE2NativeJNI.replaceFirst(h, "no numbers", "XXX"); - String[][] results = RE2NativeJNI.findAllMatches(h, "Found 123 and 456 and 789"); + assertEquals("no numbers", result); // Unchanged if no match - assertNotNull(results); - assertEquals(3, results.length); // 3 matches + RE2NativeJNI.freePattern(h); + } - assertEquals("123", results[0][0]); - assertEquals("123", results[0][1]); + @Test + void testReplaceAll_Basic() { + long h = RE2NativeJNI.compile("\\d+", true); - assertEquals("456", results[1][0]); - assertEquals("789", results[2][0]); + String result = RE2NativeJNI.replaceAll(h, "Found 123 and 456 and 789", "XXX"); - RE2NativeJNI.freePattern(h); - } + assertEquals("Found XXX and XXX and XXX", result); - @Test - void testFindAllMatches_NoMatches() { - long h = RE2NativeJNI.compile("(\\d+)", true); + RE2NativeJNI.freePattern(h); + } - String[][] results = RE2NativeJNI.findAllMatches(h, "no numbers here"); + @Test + void testReplaceAll_Backreferences() { + long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); - assertNull(results); // No matches returns null + // RE2 uses \\1, \\2 syntax for backreferences (not $1, $2) + String result = RE2NativeJNI.replaceAll(h, "Call 123-4567 or 999-8888", "(\\1) \\2"); - RE2NativeJNI.freePattern(h); - } + assertEquals("Call (123) 4567 or (999) 8888", result); - @Test - void testGetNamedGroups_NoNamedGroups() { - long h = RE2NativeJNI.compile("(\\d+)", true); + RE2NativeJNI.freePattern(h); + } - String[] namedGroups = RE2NativeJNI.getNamedGroups(h); + @Test + void testReplaceAllBulk_Basic() { + long h = RE2NativeJNI.compile("\\d+", true); - assertNull(namedGroups); // No named groups returns null + String[] texts = {"Found 123", "No match", "Has 456 and 789"}; + String[] results = RE2NativeJNI.replaceAllBulk(h, texts, "XXX"); - RE2NativeJNI.freePattern(h); - } + assertNotNull(results); + assertEquals(3, results.length); + assertEquals("Found XXX", results[0]); + assertEquals("No match", results[1]); + assertEquals("Has XXX and XXX", results[2]); - @Test - void testGetNamedGroups_WithNamedGroups() { - long h = RE2NativeJNI.compile("(?P\\d{3})-(?P\\d{4})", true); + RE2NativeJNI.freePattern(h); + } - String[] namedGroups = RE2NativeJNI.getNamedGroups(h); + // ========== Utility Operations (NEW) ========== - assertNotNull(namedGroups); - // Flattened: [name1, index1_as_string, name2, index2_as_string, ...] - assertEquals(4, namedGroups.length); + @Test + void testQuoteMeta_Basic() { + String escaped = RE2NativeJNI.quoteMeta("price: $100 (special)"); - // Find indices - int areaIndex = -1; - int numberIndex = -1; - for (int i = 0; i < namedGroups.length; i += 2) { - if ("area".equals(namedGroups[i])) { - areaIndex = Integer.parseInt(namedGroups[i + 1]); - } else if ("number".equals(namedGroups[i])) { - numberIndex = Integer.parseInt(namedGroups[i + 1]); - } - } + assertNotNull(escaped); + // RE2::QuoteMeta escapes ALL regex special chars including space, colon + assertTrue(escaped.contains("\\$")); + assertTrue(escaped.contains("\\(")); + assertTrue(escaped.contains("\\)")); + } - assertEquals(1, areaIndex); - assertEquals(2, numberIndex); + @Test + void testQuoteMeta_NoSpecialChars() { + String escaped = RE2NativeJNI.quoteMeta("simple"); - RE2NativeJNI.freePattern(h); - } + assertEquals("simple", escaped); + } - // ========== Replace Operations (NEW) ========== + @Test + void testQuoteMeta_Null() { + String result = RE2NativeJNI.quoteMeta(null); - @Test - void testReplaceFirst_Basic() { - long h = RE2NativeJNI.compile("\\d+", true); + assertNull(result); + } - String result = RE2NativeJNI.replaceFirst(h, "Found 123 and 456", "XXX"); + @Test + void testProgramFanout_Basic() { + long h = RE2NativeJNI.compile("(a|b|c)+", true); - assertEquals("Found XXX and 456", result); // Only first replaced + int[] fanout = RE2NativeJNI.programFanout(h); - RE2NativeJNI.freePattern(h); + // May return null for simple patterns (no meaningful fanout) + // Just verify it doesn't crash + // If not null, should be a valid array + if (fanout != null) { + assertTrue(fanout.length >= 0); } - @Test - void testReplaceFirst_NoMatch() { - long h = RE2NativeJNI.compile("\\d+", true); + RE2NativeJNI.freePattern(h); + } - String result = RE2NativeJNI.replaceFirst(h, "no numbers", "XXX"); + // ========== Pattern Info Methods ========== - assertEquals("no numbers", result); // Unchanged if no match + @Test + void testGetPattern() { + String pattern = RE2NativeJNI.getPattern(handle); - RE2NativeJNI.freePattern(h); - } + assertEquals("test\\d+", pattern); + } - @Test - void testReplaceAll_Basic() { - long h = RE2NativeJNI.compile("\\d+", true); + @Test + void testNumCapturingGroups() { + long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); - String result = RE2NativeJNI.replaceAll(h, "Found 123 and 456 and 789", "XXX"); + int numGroups = RE2NativeJNI.numCapturingGroups(h); - assertEquals("Found XXX and XXX and XXX", result); + assertEquals(2, numGroups); - RE2NativeJNI.freePattern(h); - } + RE2NativeJNI.freePattern(h); + } - @Test - void testReplaceAll_Backreferences() { - long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); + @Test + void testPatternOk() { + assertTrue(RE2NativeJNI.patternOk(handle)); + assertFalse(RE2NativeJNI.patternOk(0)); + } - // RE2 uses \\1, \\2 syntax for backreferences (not $1, $2) - String result = RE2NativeJNI.replaceAll(h, "Call 123-4567 or 999-8888", "(\\1) \\2"); + @Test + void testPatternMemory() { + long memory = RE2NativeJNI.patternMemory(handle); - assertEquals("Call (123) 4567 or (999) 8888", result); + assertTrue(memory > 0); // Should report some memory usage + assertTrue(memory < 10_000); // Simple pattern should be < 10KB + } - RE2NativeJNI.freePattern(h); - } + // ========== Edge Cases and Error Handling ========== - @Test - void testReplaceAllBulk_Basic() { - long h = RE2NativeJNI.compile("\\d+", true); + @Test + void testUnicodeHandling() { + long h = RE2NativeJNI.compile("中文\\d+", true); - String[] texts = {"Found 123", "No match", "Has 456 and 789"}; - String[] results = RE2NativeJNI.replaceAllBulk(h, texts, "XXX"); + assertTrue(RE2NativeJNI.fullMatch(h, "中文123")); + assertFalse(RE2NativeJNI.fullMatch(h, "中文")); - assertNotNull(results); - assertEquals(3, results.length); - assertEquals("Found XXX", results[0]); - assertEquals("No match", results[1]); - assertEquals("Has XXX and XXX", results[2]); + RE2NativeJNI.freePattern(h); + } - RE2NativeJNI.freePattern(h); - } - - // ========== Utility Operations (NEW) ========== - - @Test - void testQuoteMeta_Basic() { - String escaped = RE2NativeJNI.quoteMeta("price: $100 (special)"); - - assertNotNull(escaped); - // RE2::QuoteMeta escapes ALL regex special chars including space, colon - assertTrue(escaped.contains("\\$")); - assertTrue(escaped.contains("\\(")); - assertTrue(escaped.contains("\\)")); - } - - @Test - void testQuoteMeta_NoSpecialChars() { - String escaped = RE2NativeJNI.quoteMeta("simple"); - - assertEquals("simple", escaped); - } - - @Test - void testQuoteMeta_Null() { - String result = RE2NativeJNI.quoteMeta(null); + @Test + void testEmojiHandling() { + long h = RE2NativeJNI.compile(".*😀.*", true); - assertNull(result); - } - - @Test - void testProgramFanout_Basic() { - long h = RE2NativeJNI.compile("(a|b|c)+", true); + assertTrue(RE2NativeJNI.fullMatch(h, "Hello 😀 World")); + assertFalse(RE2NativeJNI.fullMatch(h, "No emoji")); - int[] fanout = RE2NativeJNI.programFanout(h); + RE2NativeJNI.freePattern(h); + } - // May return null for simple patterns (no meaningful fanout) - // Just verify it doesn't crash - // If not null, should be a valid array - if (fanout != null) { - assertTrue(fanout.length >= 0); - } + @Test + void testEmptyPattern_Allowed() { + long h = RE2NativeJNI.compile("", true); - RE2NativeJNI.freePattern(h); - } + // RE2 allows empty patterns (they match empty strings) + assertTrue(h != 0); + assertTrue(RE2NativeJNI.fullMatch(h, "")); + assertFalse(RE2NativeJNI.fullMatch(h, "test")); - // ========== Pattern Info Methods ========== + RE2NativeJNI.freePattern(h); + } - @Test - void testGetPattern() { - String pattern = RE2NativeJNI.getPattern(handle); - - assertEquals("test\\d+", pattern); + @Test + void testVeryLongPattern() { + // Create pattern with 1000 alternations + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + if (i > 0) sb.append("|"); + sb.append("item").append(i); } - @Test - void testNumCapturingGroups() { - long h = RE2NativeJNI.compile("(\\d{3})-(\\d{4})", true); - - int numGroups = RE2NativeJNI.numCapturingGroups(h); + long h = RE2NativeJNI.compile(sb.toString(), true); - assertEquals(2, numGroups); - - RE2NativeJNI.freePattern(h); - } - - @Test - void testPatternOk() { - assertTrue(RE2NativeJNI.patternOk(handle)); - assertFalse(RE2NativeJNI.patternOk(0)); - } + assertTrue(h != 0); // Should compile successfully + assertTrue(RE2NativeJNI.fullMatch(h, "item500")); + assertFalse(RE2NativeJNI.fullMatch(h, "item1000")); - @Test - void testPatternMemory() { - long memory = RE2NativeJNI.patternMemory(handle); + RE2NativeJNI.freePattern(h); + } - assertTrue(memory > 0); // Should report some memory usage - assertTrue(memory < 10_000); // Simple pattern should be < 10KB + @Test + void testBulkMatching_LargeArray() { + // Test with 1000 strings + String[] texts = new String[1000]; + for (int i = 0; i < 1000; i++) { + texts[i] = "test" + i; } - // ========== Edge Cases and Error Handling ========== + boolean[] results = RE2NativeJNI.fullMatchBulk(handle, texts); - @Test - void testUnicodeHandling() { - long h = RE2NativeJNI.compile("中文\\d+", true); + assertNotNull(results); + assertEquals(1000, results.length); + assertTrue(results[0]); // test0 matches + assertTrue(results[999]); // test999 matches + } - assertTrue(RE2NativeJNI.fullMatch(h, "中文123")); - assertFalse(RE2NativeJNI.fullMatch(h, "中文")); + // ========== Zero-Copy Direct Memory Operations ========== - RE2NativeJNI.freePattern(h); - } + @Test + void testFullMatchDirect_Success() { + java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(20); + buffer.put("test123".getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffer.flip(); - @Test - void testEmojiHandling() { - long h = RE2NativeJNI.compile(".*😀.*", true); + long address = ((sun.nio.ch.DirectBuffer) buffer).address(); + int length = buffer.remaining(); - assertTrue(RE2NativeJNI.fullMatch(h, "Hello 😀 World")); - assertFalse(RE2NativeJNI.fullMatch(h, "No emoji")); + boolean result = RE2NativeJNI.fullMatchDirect(handle, address, length); - RE2NativeJNI.freePattern(h); - } + assertTrue(result); + } - @Test - void testEmptyPattern_Allowed() { - long h = RE2NativeJNI.compile("", true); + @Test + void testPartialMatchDirect_Success() { + java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(20); + buffer.put("before test456 after".getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffer.flip(); - // RE2 allows empty patterns (they match empty strings) - assertTrue(h != 0); - assertTrue(RE2NativeJNI.fullMatch(h, "")); - assertFalse(RE2NativeJNI.fullMatch(h, "test")); + long address = ((sun.nio.ch.DirectBuffer) buffer).address(); + int length = buffer.remaining(); - RE2NativeJNI.freePattern(h); - } + boolean result = RE2NativeJNI.partialMatchDirect(handle, address, length); - @Test - void testVeryLongPattern() { - // Create pattern with 1000 alternations - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < 1000; i++) { - if (i > 0) sb.append("|"); - sb.append("item").append(i); - } + assertTrue(result); + } - long h = RE2NativeJNI.compile(sb.toString(), true); + @Test + void testFullMatchDirectBulk_Success() { + // Create 3 direct buffers + java.nio.ByteBuffer[] buffers = new java.nio.ByteBuffer[3]; + long[] addresses = new long[3]; + int[] lengths = new int[3]; - assertTrue(h != 0); // Should compile successfully - assertTrue(RE2NativeJNI.fullMatch(h, "item500")); - assertFalse(RE2NativeJNI.fullMatch(h, "item1000")); - - RE2NativeJNI.freePattern(h); + String[] texts = {"test123", "test456", "nomatch"}; + for (int i = 0; i < 3; i++) { + buffers[i] = java.nio.ByteBuffer.allocateDirect(20); + buffers[i].put(texts[i].getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffers[i].flip(); + addresses[i] = ((sun.nio.ch.DirectBuffer) buffers[i]).address(); + lengths[i] = buffers[i].remaining(); } - @Test - void testBulkMatching_LargeArray() { - // Test with 1000 strings - String[] texts = new String[1000]; - for (int i = 0; i < 1000; i++) { - texts[i] = "test" + i; - } + boolean[] results = RE2NativeJNI.fullMatchDirectBulk(handle, addresses, lengths); - boolean[] results = RE2NativeJNI.fullMatchBulk(handle, texts); + assertNotNull(results); + assertEquals(3, results.length); + assertTrue(results[0]); // test123 matches + assertTrue(results[1]); // test456 matches + assertFalse(results[2]); // nomatch doesn't match + } - assertNotNull(results); - assertEquals(1000, results.length); - assertTrue(results[0]); // test0 matches - assertTrue(results[999]); // test999 matches - } + @Test + void testExtractGroupsDirect_Success() { + long h = RE2NativeJNI.compile("(\\d+)-(\\d+)", true); - // ========== Zero-Copy Direct Memory Operations ========== + java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(20); + buffer.put("123-456".getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffer.flip(); - @Test - void testFullMatchDirect_Success() { - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(20); - buffer.put("test123".getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffer.flip(); + long address = ((sun.nio.ch.DirectBuffer) buffer).address(); + int length = buffer.remaining(); - long address = ((sun.nio.ch.DirectBuffer) buffer).address(); - int length = buffer.remaining(); + String[] groups = RE2NativeJNI.extractGroupsDirect(h, address, length); - boolean result = RE2NativeJNI.fullMatchDirect(handle, address, length); + assertNotNull(groups); + assertEquals(3, groups.length); + assertEquals("123-456", groups[0]); // Full match + assertEquals("123", groups[1]); // First group + assertEquals("456", groups[2]); // Second group - assertTrue(result); - } - - @Test - void testPartialMatchDirect_Success() { - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(20); - buffer.put("before test456 after".getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffer.flip(); + RE2NativeJNI.freePattern(h); + } - long address = ((sun.nio.ch.DirectBuffer) buffer).address(); - int length = buffer.remaining(); + @Test + void testFindAllMatchesDirect_Success() { + long h = RE2NativeJNI.compile("(\\d+)", true); - boolean result = RE2NativeJNI.partialMatchDirect(handle, address, length); - - assertTrue(result); - } + java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(30); + buffer.put("a1b22c333".getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffer.flip(); - @Test - void testFullMatchDirectBulk_Success() { - // Create 3 direct buffers - java.nio.ByteBuffer[] buffers = new java.nio.ByteBuffer[3]; - long[] addresses = new long[3]; - int[] lengths = new int[3]; - - String[] texts = {"test123", "test456", "nomatch"}; - for (int i = 0; i < 3; i++) { - buffers[i] = java.nio.ByteBuffer.allocateDirect(20); - buffers[i].put(texts[i].getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffers[i].flip(); - addresses[i] = ((sun.nio.ch.DirectBuffer) buffers[i]).address(); - lengths[i] = buffers[i].remaining(); - } - - boolean[] results = RE2NativeJNI.fullMatchDirectBulk(handle, addresses, lengths); - - assertNotNull(results); - assertEquals(3, results.length); - assertTrue(results[0]); // test123 matches - assertTrue(results[1]); // test456 matches - assertFalse(results[2]); // nomatch doesn't match - } + long address = ((sun.nio.ch.DirectBuffer) buffer).address(); + int length = buffer.remaining(); - @Test - void testExtractGroupsDirect_Success() { - long h = RE2NativeJNI.compile("(\\d+)-(\\d+)", true); + String[][] matches = RE2NativeJNI.findAllMatchesDirect(h, address, length); - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(20); - buffer.put("123-456".getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffer.flip(); + assertNotNull(matches); + assertEquals(3, matches.length); + assertEquals("1", matches[0][0]); + assertEquals("22", matches[1][0]); + assertEquals("333", matches[2][0]); - long address = ((sun.nio.ch.DirectBuffer) buffer).address(); - int length = buffer.remaining(); + RE2NativeJNI.freePattern(h); + } - String[] groups = RE2NativeJNI.extractGroupsDirect(h, address, length); + @Test + void testReplaceFirstDirect_Success() { + long h = RE2NativeJNI.compile("\\d+", true); - assertNotNull(groups); - assertEquals(3, groups.length); - assertEquals("123-456", groups[0]); // Full match - assertEquals("123", groups[1]); // First group - assertEquals("456", groups[2]); // Second group + java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(30); + buffer.put("Item 123 costs $456".getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffer.flip(); - RE2NativeJNI.freePattern(h); - } + long address = ((sun.nio.ch.DirectBuffer) buffer).address(); + int length = buffer.remaining(); - @Test - void testFindAllMatchesDirect_Success() { - long h = RE2NativeJNI.compile("(\\d+)", true); + String result = RE2NativeJNI.replaceFirstDirect(h, address, length, "XXX"); - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(30); - buffer.put("a1b22c333".getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffer.flip(); + assertEquals("Item XXX costs $456", result); - long address = ((sun.nio.ch.DirectBuffer) buffer).address(); - int length = buffer.remaining(); + RE2NativeJNI.freePattern(h); + } - String[][] matches = RE2NativeJNI.findAllMatchesDirect(h, address, length); + @Test + void testReplaceAllDirect_Success() { + long h = RE2NativeJNI.compile("\\d+", true); - assertNotNull(matches); - assertEquals(3, matches.length); - assertEquals("1", matches[0][0]); - assertEquals("22", matches[1][0]); - assertEquals("333", matches[2][0]); + java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(30); + buffer.put("Item 123 costs $456".getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffer.flip(); - RE2NativeJNI.freePattern(h); - } + long address = ((sun.nio.ch.DirectBuffer) buffer).address(); + int length = buffer.remaining(); - @Test - void testReplaceFirstDirect_Success() { - long h = RE2NativeJNI.compile("\\d+", true); + String result = RE2NativeJNI.replaceAllDirect(h, address, length, "XXX"); - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(30); - buffer.put("Item 123 costs $456".getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffer.flip(); + assertEquals("Item XXX costs $XXX", result); - long address = ((sun.nio.ch.DirectBuffer) buffer).address(); - int length = buffer.remaining(); + RE2NativeJNI.freePattern(h); + } - String result = RE2NativeJNI.replaceFirstDirect(h, address, length, "XXX"); + @Test + void testReplaceAllDirectBulk_Success() { + long h = RE2NativeJNI.compile("\\d+", true); - assertEquals("Item XXX costs $456", result); + // Create 3 direct buffers + java.nio.ByteBuffer[] buffers = new java.nio.ByteBuffer[3]; + long[] addresses = new long[3]; + int[] lengths = new int[3]; - RE2NativeJNI.freePattern(h); + String[] texts = {"Found 123", "No match", "Has 456 and 789"}; + for (int i = 0; i < 3; i++) { + buffers[i] = java.nio.ByteBuffer.allocateDirect(30); + buffers[i].put(texts[i].getBytes(java.nio.charset.StandardCharsets.UTF_8)); + buffers[i].flip(); + addresses[i] = ((sun.nio.ch.DirectBuffer) buffers[i]).address(); + lengths[i] = buffers[i].remaining(); } - @Test - void testReplaceAllDirect_Success() { - long h = RE2NativeJNI.compile("\\d+", true); - - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.allocateDirect(30); - buffer.put("Item 123 costs $456".getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffer.flip(); - - long address = ((sun.nio.ch.DirectBuffer) buffer).address(); - int length = buffer.remaining(); + String[] results = RE2NativeJNI.replaceAllDirectBulk(h, addresses, lengths, "XXX"); - String result = RE2NativeJNI.replaceAllDirect(h, address, length, "XXX"); + assertNotNull(results); + assertEquals(3, results.length); + assertEquals("Found XXX", results[0]); + assertEquals("No match", results[1]); + assertEquals("Has XXX and XXX", results[2]); - assertEquals("Item XXX costs $XXX", result); - - RE2NativeJNI.freePattern(h); - } - - @Test - void testReplaceAllDirectBulk_Success() { - long h = RE2NativeJNI.compile("\\d+", true); - - // Create 3 direct buffers - java.nio.ByteBuffer[] buffers = new java.nio.ByteBuffer[3]; - long[] addresses = new long[3]; - int[] lengths = new int[3]; - - String[] texts = {"Found 123", "No match", "Has 456 and 789"}; - for (int i = 0; i < 3; i++) { - buffers[i] = java.nio.ByteBuffer.allocateDirect(30); - buffers[i].put(texts[i].getBytes(java.nio.charset.StandardCharsets.UTF_8)); - buffers[i].flip(); - addresses[i] = ((sun.nio.ch.DirectBuffer) buffers[i]).address(); - lengths[i] = buffers[i].remaining(); - } - - String[] results = RE2NativeJNI.replaceAllDirectBulk(h, addresses, lengths, "XXX"); - - assertNotNull(results); - assertEquals(3, results.length); - assertEquals("Found XXX", results[0]); - assertEquals("No match", results[1]); - assertEquals("Has XXX and XXX", results[2]); - - RE2NativeJNI.freePattern(h); - } + RE2NativeJNI.freePattern(h); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/ComprehensiveMetricsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/ComprehensiveMetricsIT.java index 7a2d23d..06246fd 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/ComprehensiveMetricsIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/ComprehensiveMetricsIT.java @@ -1,222 +1,232 @@ package com.axonops.libre2.metrics; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.MatchResult; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.cache.RE2Config; -import com.codahale.metrics.Counter; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; +import java.util.List; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.List; - -import static org.assertj.core.api.Assertions.*; - /** * Comprehensive metrics verification test. * - * Verifies that key methods in Pattern.java correctly record metrics. - * Tests that Global = Sum of Specifics for all operation types. + *

Verifies that key methods in Pattern.java correctly record metrics. Tests that Global = Sum of + * Specifics for all operation types. */ @DisplayName("Comprehensive Metrics Verification") class ComprehensiveMetricsIT { - private MetricRegistry registry; - private PatternCache originalCache; + private MetricRegistry registry; + private PatternCache originalCache; - @BeforeEach - void setup() { - // Save original cache - originalCache = Pattern.getGlobalCache(); + @BeforeEach + void setup() { + // Save original cache + originalCache = Pattern.getGlobalCache(); - // Create test registry - registry = new MetricRegistry(); + // Create test registry + registry = new MetricRegistry(); - // Create config with Dropwizard metrics - RE2Config config = RE2Config.builder() + // Create config with Dropwizard metrics + RE2Config config = + RE2Config.builder() .metricsRegistry(new DropwizardMetricsAdapter(registry, "test.re2")) .build(); - // Inject test cache - Pattern.setGlobalCache(new PatternCache(config)); + // Inject test cache + Pattern.setGlobalCache(new PatternCache(config)); + } + + @AfterEach + void cleanup() { + // Restore original cache + Pattern.setGlobalCache(originalCache); + } + + // ========== Matching Operations Tests ========== + + @Test + @DisplayName("matches(String) via Matcher records global metrics") + void matchesString_recordsMetrics() { + Pattern p = Pattern.compile("unique-pattern-1:\\d+"); + + p.matches("test:123"); + + // Global matching metrics (recorded by Matcher) + assertThat(registry.counter("test.re2.matching.operations.total.count").getCount()) + .isGreaterThanOrEqualTo(1); + assertThat(registry.timer("test.re2.matching.full_match.latency").getCount()).isGreaterThan(0); + } + + @Test + @DisplayName("matchAll(String[]) records bulk metrics") + void matchAllStringArray_recordsBulkMetrics() { + Pattern p = Pattern.compile("(\\d+)"); + + String[] inputs = {"123", "456", "789"}; + p.matchAll(inputs); + + // Global metrics - should count items + assertThat(registry.counter("test.re2.matching.operations.total.count").getCount()) + .isEqualTo(3); + + // Specific bulk metrics + assertThat(registry.counter("test.re2.matching.bulk.operations.total.count").getCount()) + .isEqualTo(1); + assertThat(registry.counter("test.re2.matching.bulk.items.total.count").getCount()) + .isEqualTo(3); + } + + @Test + @DisplayName("matchAll bulk operations record correct counts") + void matchAllRecordsCorrectCounts() { + Pattern p = Pattern.compile("unique-bulk-test:\\d+"); + + long globalBefore = registry.counter("test.re2.matching.operations.total.count").getCount(); + long bulkOpsBefore = + registry.counter("test.re2.matching.bulk.operations.total.count").getCount(); + long bulkItemsBefore = registry.counter("test.re2.matching.bulk.items.total.count").getCount(); + + // Bulk operation (3 items) + p.matchAll(new String[] {"test:123", "test:456", "test:789"}); + + // Check increments + long globalDelta = + registry.counter("test.re2.matching.operations.total.count").getCount() - globalBefore; + long bulkOpsDelta = + registry.counter("test.re2.matching.bulk.operations.total.count").getCount() + - bulkOpsBefore; + long bulkItemsDelta = + registry.counter("test.re2.matching.bulk.items.total.count").getCount() - bulkItemsBefore; + + // Should record 1 bulk operation with 3 items + assertThat(bulkOpsDelta).isEqualTo(1); + assertThat(bulkItemsDelta).isEqualTo(3); + // Global should equal items count for bulk ops + assertThat(globalDelta).isEqualTo(bulkItemsDelta); + } + + // ========== Capture Operations Tests ========== + + @Test + @DisplayName("match(String) records capture metrics") + void matchString_recordsCaptureMetrics() { + Pattern p = Pattern.compile("(\\d+)"); + + try (MatchResult result = p.match("123")) { + result.matched(); } - @AfterEach - void cleanup() { - // Restore original cache - Pattern.setGlobalCache(originalCache); + // Global capture metrics + assertThat(registry.counter("test.re2.capture.operations.total.count").getCount()).isEqualTo(1); + assertThat(registry.timer("test.re2.capture.latency").getCount()).isGreaterThan(0); + + // Specific String metrics + assertThat(registry.counter("test.re2.capture.string.operations.total.count").getCount()) + .isEqualTo(1); + } + + @Test + @DisplayName("matchAllWithGroups(String[]) records bulk capture metrics") + void matchAllWithGroupsStringArray_recordsBulkMetrics() { + Pattern p = Pattern.compile("(\\d+)"); + + String[] inputs = {"123", "456", "abc"}; + MatchResult[] results = p.matchAllWithGroups(inputs); + try { + for (MatchResult r : results) { + r.matched(); + } + } finally { + for (MatchResult r : results) { + r.close(); + } } - // ========== Matching Operations Tests ========== - - @Test - @DisplayName("matches(String) via Matcher records global metrics") - void matchesString_recordsMetrics() { - Pattern p = Pattern.compile("unique-pattern-1:\\d+"); - - p.matches("test:123"); - - // Global matching metrics (recorded by Matcher) - assertThat(registry.counter("test.re2.matching.operations.total.count").getCount()).isGreaterThanOrEqualTo(1); - assertThat(registry.timer("test.re2.matching.full_match.latency").getCount()).isGreaterThan(0); + // Global metrics - count items + assertThat(registry.counter("test.re2.capture.operations.total.count").getCount()).isEqualTo(3); + + // Specific bulk metrics + assertThat(registry.counter("test.re2.capture.bulk.operations.total.count").getCount()) + .isEqualTo(1); + assertThat(registry.counter("test.re2.capture.bulk.items.total.count").getCount()).isEqualTo(3); + } + + @Test + @DisplayName("findAll(String) records findAll match count") + void findAllString_recordsMatchCount() { + Pattern p = Pattern.compile("(\\d+)"); + + List matches = p.findAll("a1b22c333"); + try { + assertThat(matches).hasSize(3); + } finally { + matches.forEach(MatchResult::close); } - @Test - @DisplayName("matchAll(String[]) records bulk metrics") - void matchAllStringArray_recordsBulkMetrics() { - Pattern p = Pattern.compile("(\\d+)"); + // Should track match count + assertThat(registry.counter("test.re2.capture.findall.matches.total.count").getCount()) + .isEqualTo(3); + } - String[] inputs = {"123", "456", "789"}; - p.matchAll(inputs); + // ========== Replace Operations Tests ========== - // Global metrics - should count items - assertThat(registry.counter("test.re2.matching.operations.total.count").getCount()).isEqualTo(3); + @Test + @DisplayName("replaceFirst(String) records replace metrics") + void replaceFirstString_recordsMetrics() { + Pattern p = Pattern.compile("(\\d+)"); - // Specific bulk metrics - assertThat(registry.counter("test.re2.matching.bulk.operations.total.count").getCount()).isEqualTo(1); - assertThat(registry.counter("test.re2.matching.bulk.items.total.count").getCount()).isEqualTo(3); - } + p.replaceFirst("123", "X"); - @Test - @DisplayName("matchAll bulk operations record correct counts") - void matchAllRecordsCorrectCounts() { - Pattern p = Pattern.compile("unique-bulk-test:\\d+"); + // Global replace metrics + assertThat(registry.counter("test.re2.replace.operations.total.count").getCount()).isEqualTo(1); + assertThat(registry.timer("test.re2.replace.latency").getCount()).isGreaterThan(0); - long globalBefore = registry.counter("test.re2.matching.operations.total.count").getCount(); - long bulkOpsBefore = registry.counter("test.re2.matching.bulk.operations.total.count").getCount(); - long bulkItemsBefore = registry.counter("test.re2.matching.bulk.items.total.count").getCount(); + // Specific String metrics + assertThat(registry.counter("test.re2.replace.string.operations.total.count").getCount()) + .isEqualTo(1); + } - // Bulk operation (3 items) - p.matchAll(new String[]{"test:123", "test:456", "test:789"}); + @Test + @DisplayName("replaceAll(String[]) records bulk replace metrics") + void replaceAllStringArray_recordsBulkMetrics() { + Pattern p = Pattern.compile("(\\d+)"); - // Check increments - long globalDelta = registry.counter("test.re2.matching.operations.total.count").getCount() - globalBefore; - long bulkOpsDelta = registry.counter("test.re2.matching.bulk.operations.total.count").getCount() - bulkOpsBefore; - long bulkItemsDelta = registry.counter("test.re2.matching.bulk.items.total.count").getCount() - bulkItemsBefore; + String[] inputs = {"123", "456", "789"}; + p.replaceAll(inputs, "X"); - // Should record 1 bulk operation with 3 items - assertThat(bulkOpsDelta).isEqualTo(1); - assertThat(bulkItemsDelta).isEqualTo(3); - // Global should equal items count for bulk ops - assertThat(globalDelta).isEqualTo(bulkItemsDelta); - } + // Global metrics - count items + assertThat(registry.counter("test.re2.replace.operations.total.count").getCount()).isEqualTo(3); - // ========== Capture Operations Tests ========== + // Specific bulk metrics + assertThat(registry.counter("test.re2.replace.bulk.operations.total.count").getCount()) + .isEqualTo(1); + assertThat(registry.counter("test.re2.replace.bulk.items.total.count").getCount()).isEqualTo(3); + } - @Test - @DisplayName("match(String) records capture metrics") - void matchString_recordsCaptureMetrics() { - Pattern p = Pattern.compile("(\\d+)"); + @Test + @DisplayName("Global replace = sum of String + Bulk") + void replaceGlobalEqualsSum() { + Pattern p = Pattern.compile("(\\d+)"); - try (MatchResult result = p.match("123")) { - result.matched(); - } + // String operation + p.replaceFirst("123", "X"); - // Global capture metrics - assertThat(registry.counter("test.re2.capture.operations.total.count").getCount()).isEqualTo(1); - assertThat(registry.timer("test.re2.capture.latency").getCount()).isGreaterThan(0); + // Bulk operation (3 items) + p.replaceAll(new String[] {"456", "789", "abc"}, "Y"); - // Specific String metrics - assertThat(registry.counter("test.re2.capture.string.operations.total.count").getCount()).isEqualTo(1); - } + // Global should be 1 + 3 = 4 + long global = registry.counter("test.re2.replace.operations.total.count").getCount(); + long string = registry.counter("test.re2.replace.string.operations.total.count").getCount(); + long bulkItems = registry.counter("test.re2.replace.bulk.items.total.count").getCount(); - @Test - @DisplayName("matchAllWithGroups(String[]) records bulk capture metrics") - void matchAllWithGroupsStringArray_recordsBulkMetrics() { - Pattern p = Pattern.compile("(\\d+)"); - - String[] inputs = {"123", "456", "abc"}; - MatchResult[] results = p.matchAllWithGroups(inputs); - try { - for (MatchResult r : results) { - r.matched(); - } - } finally { - for (MatchResult r : results) { - r.close(); - } - } - - // Global metrics - count items - assertThat(registry.counter("test.re2.capture.operations.total.count").getCount()).isEqualTo(3); - - // Specific bulk metrics - assertThat(registry.counter("test.re2.capture.bulk.operations.total.count").getCount()).isEqualTo(1); - assertThat(registry.counter("test.re2.capture.bulk.items.total.count").getCount()).isEqualTo(3); - } - - @Test - @DisplayName("findAll(String) records findAll match count") - void findAllString_recordsMatchCount() { - Pattern p = Pattern.compile("(\\d+)"); - - List matches = p.findAll("a1b22c333"); - try { - assertThat(matches).hasSize(3); - } finally { - matches.forEach(MatchResult::close); - } - - // Should track match count - assertThat(registry.counter("test.re2.capture.findall.matches.total.count").getCount()).isEqualTo(3); - } - - // ========== Replace Operations Tests ========== - - @Test - @DisplayName("replaceFirst(String) records replace metrics") - void replaceFirstString_recordsMetrics() { - Pattern p = Pattern.compile("(\\d+)"); - - p.replaceFirst("123", "X"); - - // Global replace metrics - assertThat(registry.counter("test.re2.replace.operations.total.count").getCount()).isEqualTo(1); - assertThat(registry.timer("test.re2.replace.latency").getCount()).isGreaterThan(0); - - // Specific String metrics - assertThat(registry.counter("test.re2.replace.string.operations.total.count").getCount()).isEqualTo(1); - } - - @Test - @DisplayName("replaceAll(String[]) records bulk replace metrics") - void replaceAllStringArray_recordsBulkMetrics() { - Pattern p = Pattern.compile("(\\d+)"); - - String[] inputs = {"123", "456", "789"}; - p.replaceAll(inputs, "X"); - - // Global metrics - count items - assertThat(registry.counter("test.re2.replace.operations.total.count").getCount()).isEqualTo(3); - - // Specific bulk metrics - assertThat(registry.counter("test.re2.replace.bulk.operations.total.count").getCount()).isEqualTo(1); - assertThat(registry.counter("test.re2.replace.bulk.items.total.count").getCount()).isEqualTo(3); - } - - @Test - @DisplayName("Global replace = sum of String + Bulk") - void replaceGlobalEqualsSum() { - Pattern p = Pattern.compile("(\\d+)"); - - // String operation - p.replaceFirst("123", "X"); - - // Bulk operation (3 items) - p.replaceAll(new String[]{"456", "789", "abc"}, "Y"); - - // Global should be 1 + 3 = 4 - long global = registry.counter("test.re2.replace.operations.total.count").getCount(); - long string = registry.counter("test.re2.replace.string.operations.total.count").getCount(); - long bulkItems = registry.counter("test.re2.replace.bulk.items.total.count").getCount(); - - assertThat(global).isEqualTo(string + bulkItems); - assertThat(global).isEqualTo(4); - } + assertThat(global).isEqualTo(string + bulkItems); + assertThat(global).isEqualTo(4); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/MetricsIntegrationIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/MetricsIntegrationIT.java index b6e805e..41dfed7 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/MetricsIntegrationIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/MetricsIntegrationIT.java @@ -1,5 +1,7 @@ package com.axonops.libre2.metrics; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.api.PatternCompilationException; @@ -13,276 +15,285 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - /** * Integration tests verifying metrics are actually collected during operations. * - * Uses Pattern.setGlobalCache() to inject a test cache with Dropwizard metrics, - * then performs real operations and verifies metrics are updated correctly. + *

Uses Pattern.setGlobalCache() to inject a test cache with Dropwizard metrics, then performs + * real operations and verifies metrics are updated correctly. */ class MetricsIntegrationIT { - private MetricRegistry registry; - private PatternCache originalCache; + private MetricRegistry registry; + private PatternCache originalCache; - @BeforeEach - void setup() { - // Save original cache - originalCache = Pattern.getGlobalCache(); + @BeforeEach + void setup() { + // Save original cache + originalCache = Pattern.getGlobalCache(); - // Create test registry - registry = new MetricRegistry(); + // Create test registry + registry = new MetricRegistry(); - // Create config with Dropwizard metrics (disable initialization test) - RE2Config config = RE2Config.builder() + // Create config with Dropwizard metrics (disable initialization test) + RE2Config config = + RE2Config.builder() .metricsRegistry(new DropwizardMetricsAdapter(registry, "test.re2")) .build(); - // Inject test cache - Pattern.setGlobalCache(new PatternCache(config)); - } + // Inject test cache + Pattern.setGlobalCache(new PatternCache(config)); + } - @AfterEach - void cleanup() { - // Restore original cache - Pattern.setGlobalCache(originalCache); - } + @AfterEach + void cleanup() { + // Restore original cache + Pattern.setGlobalCache(originalCache); + } - @Test - void testPatternCompilationMetrics() { - // Compile a pattern - Pattern pattern = Pattern.compile("test.*"); + @Test + void testPatternCompilationMetrics() { + // Compile a pattern + Pattern pattern = Pattern.compile("test.*"); - // Verify compilation counter incremented - Counter compiled = registry.counter("test.re2.patterns.compiled.total.count"); - assertThat(compiled.getCount()).isEqualTo(1); + // Verify compilation counter incremented + Counter compiled = registry.counter("test.re2.patterns.compiled.total.count"); + assertThat(compiled.getCount()).isEqualTo(1); - // Verify compilation timer recorded - Timer compilationTime = registry.timer("test.re2.patterns.compilation.latency"); - assertThat(compilationTime.getCount()).isEqualTo(1); - assertThat(compilationTime.getSnapshot().getMean()).isGreaterThan(0); + // Verify compilation timer recorded + Timer compilationTime = registry.timer("test.re2.patterns.compilation.latency"); + assertThat(compilationTime.getCount()).isEqualTo(1); + assertThat(compilationTime.getSnapshot().getMean()).isGreaterThan(0); - // Compile another - Pattern.compile("other.*"); + // Compile another + Pattern.compile("other.*"); - assertThat(compiled.getCount()).isEqualTo(2); - assertThat(compilationTime.getCount()).isEqualTo(2); - } + assertThat(compiled.getCount()).isEqualTo(2); + assertThat(compilationTime.getCount()).isEqualTo(2); + } - @Test - void testCacheHitMissMetrics() { - // First compile - cache miss - Pattern p1 = Pattern.compile("test.*"); + @Test + void testCacheHitMissMetrics() { + // First compile - cache miss + Pattern p1 = Pattern.compile("test.*"); - Counter misses = registry.counter("test.re2.patterns.cache.misses.total.count"); - Counter hits = registry.counter("test.re2.patterns.cache.hits.total.count"); + Counter misses = registry.counter("test.re2.patterns.cache.misses.total.count"); + Counter hits = registry.counter("test.re2.patterns.cache.hits.total.count"); - assertThat(misses.getCount()).isEqualTo(1); - assertThat(hits.getCount()).isEqualTo(0); + assertThat(misses.getCount()).isEqualTo(1); + assertThat(hits.getCount()).isEqualTo(0); - // Second compile same pattern - cache hit - Pattern p2 = Pattern.compile("test.*"); + // Second compile same pattern - cache hit + Pattern p2 = Pattern.compile("test.*"); - assertThat(misses.getCount()).isEqualTo(1); // still 1 - assertThat(hits.getCount()).isEqualTo(1); // now 1 + assertThat(misses.getCount()).isEqualTo(1); // still 1 + assertThat(hits.getCount()).isEqualTo(1); // now 1 - // Different pattern - cache miss - Pattern p3 = Pattern.compile("other.*"); + // Different pattern - cache miss + Pattern p3 = Pattern.compile("other.*"); - assertThat(misses.getCount()).isEqualTo(2); - assertThat(hits.getCount()).isEqualTo(1); - } + assertThat(misses.getCount()).isEqualTo(2); + assertThat(hits.getCount()).isEqualTo(1); + } - @Test - void testMatchingMetrics() { - Pattern pattern = Pattern.compile("test.*"); + @Test + void testMatchingMetrics() { + Pattern pattern = Pattern.compile("test.*"); - // Full match - try (Matcher m = pattern.matcher("test123")) { - m.matches(); - } + // Full match + try (Matcher m = pattern.matcher("test123")) { + m.matches(); + } - Timer fullMatch = registry.timer("test.re2.matching.full_match.latency"); - assertThat(fullMatch.getCount()).isEqualTo(1); + Timer fullMatch = registry.timer("test.re2.matching.full_match.latency"); + assertThat(fullMatch.getCount()).isEqualTo(1); - Counter operations = registry.counter("test.re2.matching.operations.total.count"); - assertThat(operations.getCount()).isEqualTo(1); + Counter operations = registry.counter("test.re2.matching.operations.total.count"); + assertThat(operations.getCount()).isEqualTo(1); - // Partial match - try (Matcher m = pattern.matcher("test456")) { - m.find(); - } + // Partial match + try (Matcher m = pattern.matcher("test456")) { + m.find(); + } - Timer partialMatch = registry.timer("test.re2.matching.partial_match.latency"); - assertThat(partialMatch.getCount()).isEqualTo(1); + Timer partialMatch = registry.timer("test.re2.matching.partial_match.latency"); + assertThat(partialMatch.getCount()).isEqualTo(1); - assertThat(operations.getCount()).isEqualTo(2); // 1 full + 1 partial - } + assertThat(operations.getCount()).isEqualTo(2); // 1 full + 1 partial + } - @Test - void testCacheGauges() { - // Verify gauges registered - assertThat(registry.getGauges()).containsKeys( + @Test + void testCacheGauges() { + // Verify gauges registered + assertThat(registry.getGauges()) + .containsKeys( "test.re2.cache.patterns.current.count", "test.re2.cache.native_memory.current.bytes", - "test.re2.cache.native_memory.peak.bytes" - ); + "test.re2.cache.native_memory.peak.bytes"); - Gauge cacheSize = (Gauge) registry.getGauges().get("test.re2.cache.patterns.current.count"); - assertThat(cacheSize.getValue()).isEqualTo(0); // initially empty + Gauge cacheSize = + (Gauge) registry.getGauges().get("test.re2.cache.patterns.current.count"); + assertThat(cacheSize.getValue()).isEqualTo(0); // initially empty - // Compile patterns - Pattern.compile("pattern1"); - assertThat(cacheSize.getValue()).isEqualTo(1); + // Compile patterns + Pattern.compile("pattern1"); + assertThat(cacheSize.getValue()).isEqualTo(1); - Pattern.compile("pattern2"); - assertThat(cacheSize.getValue()).isEqualTo(2); + Pattern.compile("pattern2"); + assertThat(cacheSize.getValue()).isEqualTo(2); - Pattern.compile("pattern3"); - assertThat(cacheSize.getValue()).isEqualTo(3); + Pattern.compile("pattern3"); + assertThat(cacheSize.getValue()).isEqualTo(3); - // Verify native memory gauge - Gauge nativeMemory = (Gauge) registry.getGauges().get("test.re2.cache.native_memory.current.bytes"); - assertThat(nativeMemory.getValue()).isGreaterThan(0L); + // Verify native memory gauge + Gauge nativeMemory = + (Gauge) registry.getGauges().get("test.re2.cache.native_memory.current.bytes"); + assertThat(nativeMemory.getValue()).isGreaterThan(0L); - Gauge peakMemory = (Gauge) registry.getGauges().get("test.re2.cache.native_memory.peak.bytes"); - assertThat(peakMemory.getValue()).isGreaterThan(0L); - } + Gauge peakMemory = + (Gauge) registry.getGauges().get("test.re2.cache.native_memory.peak.bytes"); + assertThat(peakMemory.getValue()).isGreaterThan(0L); + } - @Test - void testResourceGauges() { - // Verify resource gauges registered (active counts only) - assertThat(registry.getGauges()).containsKeys( + @Test + void testResourceGauges() { + // Verify resource gauges registered (active counts only) + assertThat(registry.getGauges()) + .containsKeys( "test.re2.resources.patterns.active.current.count", - "test.re2.resources.matchers.active.current.count" - ); + "test.re2.resources.matchers.active.current.count"); + + Gauge patternsActive = + (Gauge) + registry.getGauges().get("test.re2.resources.patterns.active.current.count"); + Gauge matchersActive = + (Gauge) + registry.getGauges().get("test.re2.resources.matchers.active.current.count"); + + // Compile pattern (increases active patterns) + Pattern pattern = Pattern.compile("test.*"); + int activeAfterCompile = patternsActive.getValue(); + assertThat(activeAfterCompile).isGreaterThan(0); + + // Create and close matcher to trigger freed counter + Matcher matcher = pattern.matcher("test"); + assertThat(matchersActive.getValue()).isGreaterThan(0); + + matcher.close(); + assertThat(matchersActive.getValue()).isEqualTo(0); + + // Verify freed counter was incremented (now a Counter, not Gauge) + Counter matchersFreed = registry.counter("test.re2.resources.matchers.freed.total.count"); + assertThat(matchersFreed.getCount()) + .as("Matcher freed counter should have incremented") + .isEqualTo(1); + + // Note: patterns.freed counter only increments when non-cached patterns are freed + // Cached patterns are managed by cache, so this counter may be 0 in this test + } + + @Test + void testErrorMetrics_CompilationFailed() { + Counter errorCounter = registry.counter("test.re2.errors.compilation.failed.total.count"); + assertThat(errorCounter.getCount()).isEqualTo(0); + + // Trigger compilation error + try { + Pattern.compile("(unclosed"); + fail("Should have thrown PatternCompilationException"); + } catch (PatternCompilationException e) { + // Expected + } - Gauge patternsActive = (Gauge) registry.getGauges().get("test.re2.resources.patterns.active.current.count"); - Gauge matchersActive = (Gauge) registry.getGauges().get("test.re2.resources.matchers.active.current.count"); + // Verify error counter incremented + assertThat(errorCounter.getCount()).isEqualTo(1); + } - // Compile pattern (increases active patterns) - Pattern pattern = Pattern.compile("test.*"); - int activeAfterCompile = patternsActive.getValue(); - assertThat(activeAfterCompile).isGreaterThan(0); + @Test + void testEvictionMetrics() { + // Create small cache with NO eviction protection + RE2Config smallCacheConfig = + RE2Config.builder() + .maxCacheSize(5) + .evictionProtectionMs(0) // No protection - evict immediately + .metricsRegistry(new DropwizardMetricsAdapter(registry, "eviction.test")) + .build(); - // Create and close matcher to trigger freed counter - Matcher matcher = pattern.matcher("test"); - assertThat(matchersActive.getValue()).isGreaterThan(0); + Pattern.setGlobalCache(new PatternCache(smallCacheConfig)); - matcher.close(); - assertThat(matchersActive.getValue()).isEqualTo(0); + Gauge cacheSize = + (Gauge) registry.getGauges().get("eviction.test.cache.patterns.current.count"); + Counter lruEvictions = registry.counter("eviction.test.cache.evictions.lru.total.count"); - // Verify freed counter was incremented (now a Counter, not Gauge) - Counter matchersFreed = registry.counter("test.re2.resources.matchers.freed.total.count"); - assertThat(matchersFreed.getCount()) - .as("Matcher freed counter should have incremented") - .isEqualTo(1); + long evictionsBefore = lruEvictions.getCount(); - // Note: patterns.freed counter only increments when non-cached patterns are freed - // Cached patterns are managed by cache, so this counter may be 0 in this test + // Compile 15 patterns (way more than cache size of 5) + for (int i = 0; i < 15; i++) { + Pattern.compile("eviction_test_" + i); } - @Test - void testErrorMetrics_CompilationFailed() { - Counter errorCounter = registry.counter("test.re2.errors.compilation.failed.total.count"); - assertThat(errorCounter.getCount()).isEqualTo(0); - - // Trigger compilation error - try { - Pattern.compile("(unclosed"); - fail("Should have thrown PatternCompilationException"); - } catch (PatternCompilationException e) { - // Expected - } - - // Verify error counter incremented - assertThat(errorCounter.getCount()).isEqualTo(1); + // Wait for async LRU eviction to complete + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // ignore } - @Test - void testEvictionMetrics() { - // Create small cache with NO eviction protection - RE2Config smallCacheConfig = RE2Config.builder() - .maxCacheSize(5) - .evictionProtectionMs(0) // No protection - evict immediately - .metricsRegistry(new DropwizardMetricsAdapter(registry, "eviction.test")) - .build(); + // Verify evictions occurred + long evictionsAfter = lruEvictions.getCount(); + assertThat(evictionsAfter) + .as("LRU evictions should have occurred (15 patterns > 5 max)") + .isGreaterThan(evictionsBefore); + + // Verify cache size is at or below max after eviction + int cacheSizeAfterEviction = cacheSize.getValue(); + assertThat(cacheSizeAfterEviction) + .as("Cache size should be at or below max after eviction") + .isLessThanOrEqualTo(5); + + // Verify significant evictions occurred (should have evicted ~10 patterns) + long totalEvictions = evictionsAfter - evictionsBefore; + assertThat(totalEvictions) + .as("Should have evicted approximately 10 patterns") + .isGreaterThanOrEqualTo(8); + } + + @Test + void testAll21MetricsExist() { + // Perform various operations to ensure all metrics are created + Pattern p1 = Pattern.compile("test.*"); + Pattern p2 = Pattern.compile("test.*"); // cache hit + + try (Matcher m = p1.matcher("test")) { + m.matches(); + m.find(); + } - Pattern.setGlobalCache(new PatternCache(smallCacheConfig)); - - Gauge cacheSize = (Gauge) registry.getGauges().get("eviction.test.cache.patterns.current.count"); - Counter lruEvictions = registry.counter("eviction.test.cache.evictions.lru.total.count"); - - long evictionsBefore = lruEvictions.getCount(); - - // Compile 15 patterns (way more than cache size of 5) - for (int i = 0; i < 15; i++) { - Pattern.compile("eviction_test_" + i); - } - - // Wait for async LRU eviction to complete - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - // ignore - } - - // Verify evictions occurred - long evictionsAfter = lruEvictions.getCount(); - assertThat(evictionsAfter) - .as("LRU evictions should have occurred (15 patterns > 5 max)") - .isGreaterThan(evictionsBefore); - - // Verify cache size is at or below max after eviction - int cacheSizeAfterEviction = cacheSize.getValue(); - assertThat(cacheSizeAfterEviction) - .as("Cache size should be at or below max after eviction") - .isLessThanOrEqualTo(5); - - // Verify significant evictions occurred (should have evicted ~10 patterns) - long totalEvictions = evictionsAfter - evictionsBefore; - assertThat(totalEvictions) - .as("Should have evicted approximately 10 patterns") - .isGreaterThanOrEqualTo(8); + // Try to trigger error + try { + Pattern.compile("(invalid"); + } catch (Exception e) { + // Expected } - @Test - void testAll21MetricsExist() { - // Perform various operations to ensure all metrics are created - Pattern p1 = Pattern.compile("test.*"); - Pattern p2 = Pattern.compile("test.*"); // cache hit - - try (Matcher m = p1.matcher("test")) { - m.matches(); - m.find(); - } - - // Try to trigger error - try { - Pattern.compile("(invalid"); - } catch (Exception e) { - // Expected - } - - // Verify all 21 metric names exist in registry - // Counters (10) - assertThat(registry.getCounters().keySet()).contains( + // Verify all 21 metric names exist in registry + // Counters (10) + assertThat(registry.getCounters().keySet()) + .contains( "test.re2.patterns.compiled.total.count", "test.re2.patterns.cache.hits.total.count", "test.re2.patterns.cache.misses.total.count", - "test.re2.matching.operations.total.count" - ); + "test.re2.matching.operations.total.count"); - // Timers (3) - assertThat(registry.getTimers().keySet()).contains( + // Timers (3) + assertThat(registry.getTimers().keySet()) + .contains( "test.re2.patterns.compilation.latency", "test.re2.matching.full_match.latency", - "test.re2.matching.partial_match.latency" - ); + "test.re2.matching.partial_match.latency"); - // Gauges (9 - current/peak values only) - assertThat(registry.getGauges().keySet()).contains( + // Gauges (9 - current/peak values only) + assertThat(registry.getGauges().keySet()) + .contains( "test.re2.cache.patterns.current.count", "test.re2.cache.native_memory.current.bytes", "test.re2.cache.native_memory.peak.bytes", @@ -291,32 +302,32 @@ void testAll21MetricsExist() { "test.re2.cache.deferred.patterns.current.count", "test.re2.cache.deferred.patterns.peak.count", "test.re2.cache.deferred.native_memory.current.bytes", - "test.re2.cache.deferred.native_memory.peak.bytes" - ); + "test.re2.cache.deferred.native_memory.peak.bytes"); - // Verify freed counts are now Counters (not Gauges) - assertThat(registry.getCounters().keySet()).contains( + // Verify freed counts are now Counters (not Gauges) + assertThat(registry.getCounters().keySet()) + .contains( "test.re2.resources.patterns.freed.total.count", - "test.re2.resources.matchers.freed.total.count" - ); - - // Error counters exist (even if count is 0) - assertThat(registry.counter("test.re2.errors.compilation.failed.total.count").getCount()).isGreaterThan(0); + "test.re2.resources.matchers.freed.total.count"); + + // Error counters exist (even if count is 0) + assertThat(registry.counter("test.re2.errors.compilation.failed.total.count").getCount()) + .isGreaterThan(0); + } + + @Test + void testNoOpMetrics_ZeroOverhead() { + // Restore default cache (NoOp metrics) + Pattern.setGlobalCache(new PatternCache(RE2Config.DEFAULT)); + + // Perform operations - should work fine with no metrics + Pattern pattern = Pattern.compile("test.*"); + try (Matcher matcher = pattern.matcher("test123")) { + boolean result = matcher.matches(); + assertThat(result).isTrue(); } - @Test - void testNoOpMetrics_ZeroOverhead() { - // Restore default cache (NoOp metrics) - Pattern.setGlobalCache(new PatternCache(RE2Config.DEFAULT)); - - // Perform operations - should work fine with no metrics - Pattern pattern = Pattern.compile("test.*"); - try (Matcher matcher = pattern.matcher("test123")) { - boolean result = matcher.matches(); - assertThat(result).isTrue(); - } - - // Original test registry should have no new metrics - // (since we switched to NoOp) - } + // Original test registry should have no new metrics + // (since we switched to NoOp) + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/NativeMemoryMetricsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/NativeMemoryMetricsIT.java index c946c48..b5f21d1 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/NativeMemoryMetricsIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/NativeMemoryMetricsIT.java @@ -1,5 +1,7 @@ package com.axonops.libre2.metrics; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.cache.RE2Config; @@ -9,228 +11,228 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - /** * Tests specifically for native memory tracking metrics. * - * Verifies that memory gauges accurately track native memory allocation and deallocation. + *

Verifies that memory gauges accurately track native memory allocation and deallocation. */ class NativeMemoryMetricsIT { - private MetricRegistry registry; - private PatternCache originalCache; + private MetricRegistry registry; + private PatternCache originalCache; - @BeforeEach - void setup() { - originalCache = Pattern.getGlobalCache(); - registry = new MetricRegistry(); + @BeforeEach + void setup() { + originalCache = Pattern.getGlobalCache(); + registry = new MetricRegistry(); - RE2Config config = RE2Config.builder() + RE2Config config = + RE2Config.builder() .metricsRegistry(new DropwizardMetricsAdapter(registry, "memory.test")) .build(); - Pattern.setGlobalCache(new PatternCache(config)); + Pattern.setGlobalCache(new PatternCache(config)); + } + + @AfterEach + void cleanup() { + Pattern.setGlobalCache(originalCache); + } + + @Test + void testMemoryIncreasesWhenPatternsAdded() { + Gauge nativeMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); + Gauge peakMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.peak.bytes"); + + // Initial state - should be zero + long memoryBefore = nativeMemory.getValue(); + assertThat(memoryBefore).isEqualTo(0L); + + // Compile first pattern + Pattern p1 = Pattern.compile("test.*pattern"); + long memoryAfterP1 = nativeMemory.getValue(); + + // Memory should have increased + assertThat(memoryAfterP1) + .as("Native memory should increase when pattern added") + .isGreaterThan(memoryBefore); + + // Compile second pattern (different, larger pattern) + Pattern p2 = Pattern.compile("very.*complex.*regex.*with.*many.*terms"); + long memoryAfterP2 = nativeMemory.getValue(); + + // Memory should have increased again + assertThat(memoryAfterP2) + .as("Native memory should increase when second pattern added") + .isGreaterThan(memoryAfterP1); + + // Peak should track maximum + assertThat(peakMemory.getValue()) + .as("Peak memory should be >= current memory") + .isGreaterThanOrEqualTo(memoryAfterP2); + } + + @Test + void testMemoryDecreasesWhenPatternsEvicted() { + // Create small cache to trigger eviction + RE2Config smallCache = + RE2Config.builder() + .maxCacheSize(3) + .evictionProtectionMs(0) + .metricsRegistry(new DropwizardMetricsAdapter(registry, "eviction.memory.test")) + .build(); + + Pattern.setGlobalCache(new PatternCache(smallCache)); + + Gauge nativeMemory = + (Gauge) + registry.getGauges().get("eviction.memory.test.cache.native_memory.current.bytes"); + + // Add 3 patterns (fill cache) + Pattern.compile("pattern1"); + Pattern.compile("pattern2"); + Pattern.compile("pattern3"); + + long memoryWith3Patterns = nativeMemory.getValue(); + assertThat(memoryWith3Patterns).as("Should have memory with 3 patterns").isGreaterThan(0); + + // Add 5 more patterns (should trigger eviction of first 3) + for (int i = 4; i <= 8; i++) { + Pattern.compile("pattern" + i); } - @AfterEach - void cleanup() { - Pattern.setGlobalCache(originalCache); + // Wait for async eviction + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // ignore } - @Test - void testMemoryIncreasesWhenPatternsAdded() { - Gauge nativeMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); - Gauge peakMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.peak.bytes"); - - // Initial state - should be zero - long memoryBefore = nativeMemory.getValue(); - assertThat(memoryBefore).isEqualTo(0L); - - // Compile first pattern - Pattern p1 = Pattern.compile("test.*pattern"); - long memoryAfterP1 = nativeMemory.getValue(); - - // Memory should have increased - assertThat(memoryAfterP1) - .as("Native memory should increase when pattern added") - .isGreaterThan(memoryBefore); - - // Compile second pattern (different, larger pattern) - Pattern p2 = Pattern.compile("very.*complex.*regex.*with.*many.*terms"); - long memoryAfterP2 = nativeMemory.getValue(); - - // Memory should have increased again - assertThat(memoryAfterP2) - .as("Native memory should increase when second pattern added") - .isGreaterThan(memoryAfterP1); - - // Peak should track maximum - assertThat(peakMemory.getValue()) - .as("Peak memory should be >= current memory") - .isGreaterThanOrEqualTo(memoryAfterP2); + // Memory should have stabilized (evicted old patterns, added new ones) + // Should be approximately same as before (3 patterns worth of memory) + long memoryAfterEviction = nativeMemory.getValue(); + + // Verify memory didn't grow unbounded (eviction freed memory) + assertThat(memoryAfterEviction) + .as("Memory should not grow unbounded - eviction should free memory") + .isLessThan(memoryWith3Patterns * 2); // Shouldn't double + + // More precise: should be similar to initial 3-pattern memory + assertThat(memoryAfterEviction) + .as("Memory should be similar to initial state (evicted old, added new)") + .isBetween(memoryWith3Patterns / 2, memoryWith3Patterns * 2); + } + + @Test + void testMemoryTrackingAccuracy() { + Gauge nativeMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); + Gauge cacheSize = + (Gauge) registry.getGauges().get("memory.test.cache.patterns.current.count"); + + // Track actual pattern memory to verify gauge accuracy + long expectedTotalMemory = 0; + + for (int i = 0; i < 10; i++) { + Pattern pattern = Pattern.compile("pattern_" + i); + + // Get actual native memory from pattern + long patternMemory = pattern.getNativeMemoryBytes(); + expectedTotalMemory += patternMemory; + + // Verify gauge matches expected + long gaugeMemory = nativeMemory.getValue(); + assertThat(gaugeMemory) + .as("Gauge should exactly match sum of pattern memory (pattern %d)", i) + .isEqualTo(expectedTotalMemory); + + // Cache size should match patterns added + int currentSize = cacheSize.getValue(); + assertThat(currentSize).as("Cache size should equal patterns added").isEqualTo(i + 1); } - @Test - void testMemoryDecreasesWhenPatternsEvicted() { - // Create small cache to trigger eviction - RE2Config smallCache = RE2Config.builder() - .maxCacheSize(3) - .evictionProtectionMs(0) - .metricsRegistry(new DropwizardMetricsAdapter(registry, "eviction.memory.test")) - .build(); + // Verify final memory is exact + assertThat(nativeMemory.getValue()) + .as("Final memory gauge should exactly match sum of all pattern memory") + .isEqualTo(expectedTotalMemory); + } + + @Test + void testPeakMemoryTracksMaximum() { + Gauge nativeMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); + Gauge peakMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.peak.bytes"); + + // Add patterns and track expected memory + long expectedMemory = 0; + for (int i = 0; i < 10; i++) { + Pattern pattern = Pattern.compile("pattern_with_some_complexity_" + i); + expectedMemory += pattern.getNativeMemoryBytes(); + + // Verify gauge tracks actual memory + assertThat(nativeMemory.getValue()) + .as("Current memory gauge should match sum of pattern sizes") + .isEqualTo(expectedMemory); + + // Peak should always be >= current + assertThat(peakMemory.getValue()) + .as("Peak should be >= current after adding pattern " + i) + .isGreaterThanOrEqualTo(nativeMemory.getValue()); + } - Pattern.setGlobalCache(new PatternCache(smallCache)); + long finalMemory = nativeMemory.getValue(); + long finalPeak = peakMemory.getValue(); - Gauge nativeMemory = (Gauge) registry.getGauges().get("eviction.memory.test.cache.native_memory.current.bytes"); + // Peak should equal current (we only added, never removed) + assertThat(finalPeak) + .as("Peak should equal current (only added patterns, never removed)") + .isEqualTo(finalMemory); - // Add 3 patterns (fill cache) - Pattern.compile("pattern1"); - Pattern.compile("pattern2"); - Pattern.compile("pattern3"); + // Clear one pattern from cache to reduce memory + Pattern.clearCache(); - long memoryWith3Patterns = nativeMemory.getValue(); - assertThat(memoryWith3Patterns).as("Should have memory with 3 patterns").isGreaterThan(0); + // After clear, current = 0 but peak should still be the old maximum + // NOTE: resetCache() resets peak (by design), but clearCache() doesn't reset stats + assertThat(nativeMemory.getValue()).as("Current memory should be 0 after clear").isEqualTo(0L); + } - // Add 5 more patterns (should trigger eviction of first 3) - for (int i = 4; i <= 8; i++) { - Pattern.compile("pattern" + i); - } + @Test + void testMemoryConsistencyWithCacheOperations() { + Gauge nativeMemory = + (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); + Gauge cacheSize = + (Gauge) registry.getGauges().get("memory.test.cache.patterns.current.count"); - // Wait for async eviction - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - // ignore - } + // Add 5 patterns + Pattern p1 = Pattern.compile("p1"); + Pattern p2 = Pattern.compile("p2"); + Pattern p3 = Pattern.compile("p3"); + Pattern p4 = Pattern.compile("p4"); + Pattern p5 = Pattern.compile("p5"); - // Memory should have stabilized (evicted old patterns, added new ones) - // Should be approximately same as before (3 patterns worth of memory) - long memoryAfterEviction = nativeMemory.getValue(); + long memory5Patterns = nativeMemory.getValue(); + assertThat(cacheSize.getValue()).isEqualTo(5); + assertThat(memory5Patterns).isGreaterThan(0); - // Verify memory didn't grow unbounded (eviction freed memory) - assertThat(memoryAfterEviction) - .as("Memory should not grow unbounded - eviction should free memory") - .isLessThan(memoryWith3Patterns * 2); // Shouldn't double + // Compile duplicate (cache hit - no new memory) + Pattern p1Again = Pattern.compile("p1"); + long memoryAfterCacheHit = nativeMemory.getValue(); - // More precise: should be similar to initial 3-pattern memory - assertThat(memoryAfterEviction) - .as("Memory should be similar to initial state (evicted old, added new)") - .isBetween(memoryWith3Patterns / 2, memoryWith3Patterns * 2); - } + assertThat(memoryAfterCacheHit) + .as("Memory should NOT increase on cache hit") + .isEqualTo(memory5Patterns); - @Test - void testMemoryTrackingAccuracy() { - Gauge nativeMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); - Gauge cacheSize = (Gauge) registry.getGauges().get("memory.test.cache.patterns.current.count"); - - // Track actual pattern memory to verify gauge accuracy - long expectedTotalMemory = 0; - - for (int i = 0; i < 10; i++) { - Pattern pattern = Pattern.compile("pattern_" + i); - - // Get actual native memory from pattern - long patternMemory = pattern.getNativeMemoryBytes(); - expectedTotalMemory += patternMemory; - - // Verify gauge matches expected - long gaugeMemory = nativeMemory.getValue(); - assertThat(gaugeMemory) - .as("Gauge should exactly match sum of pattern memory (pattern %d)", i) - .isEqualTo(expectedTotalMemory); - - // Cache size should match patterns added - int currentSize = cacheSize.getValue(); - assertThat(currentSize) - .as("Cache size should equal patterns added") - .isEqualTo(i + 1); - } - - // Verify final memory is exact - assertThat(nativeMemory.getValue()) - .as("Final memory gauge should exactly match sum of all pattern memory") - .isEqualTo(expectedTotalMemory); - } + assertThat(cacheSize.getValue()).as("Cache size should NOT increase on cache hit").isEqualTo(5); - @Test - void testPeakMemoryTracksMaximum() { - Gauge nativeMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); - Gauge peakMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.peak.bytes"); - - // Add patterns and track expected memory - long expectedMemory = 0; - for (int i = 0; i < 10; i++) { - Pattern pattern = Pattern.compile("pattern_with_some_complexity_" + i); - expectedMemory += pattern.getNativeMemoryBytes(); - - // Verify gauge tracks actual memory - assertThat(nativeMemory.getValue()) - .as("Current memory gauge should match sum of pattern sizes") - .isEqualTo(expectedMemory); - - // Peak should always be >= current - assertThat(peakMemory.getValue()) - .as("Peak should be >= current after adding pattern " + i) - .isGreaterThanOrEqualTo(nativeMemory.getValue()); - } - - long finalMemory = nativeMemory.getValue(); - long finalPeak = peakMemory.getValue(); - - // Peak should equal current (we only added, never removed) - assertThat(finalPeak) - .as("Peak should equal current (only added patterns, never removed)") - .isEqualTo(finalMemory); - - // Clear one pattern from cache to reduce memory - Pattern.clearCache(); - - // After clear, current = 0 but peak should still be the old maximum - // NOTE: resetCache() resets peak (by design), but clearCache() doesn't reset stats - assertThat(nativeMemory.getValue()) - .as("Current memory should be 0 after clear") - .isEqualTo(0L); - } + // Clear cache + Pattern.clearCache(); - @Test - void testMemoryConsistencyWithCacheOperations() { - Gauge nativeMemory = (Gauge) registry.getGauges().get("memory.test.cache.native_memory.current.bytes"); - Gauge cacheSize = (Gauge) registry.getGauges().get("memory.test.cache.patterns.current.count"); - - // Add 5 patterns - Pattern p1 = Pattern.compile("p1"); - Pattern p2 = Pattern.compile("p2"); - Pattern p3 = Pattern.compile("p3"); - Pattern p4 = Pattern.compile("p4"); - Pattern p5 = Pattern.compile("p5"); - - long memory5Patterns = nativeMemory.getValue(); - assertThat(cacheSize.getValue()).isEqualTo(5); - assertThat(memory5Patterns).isGreaterThan(0); - - // Compile duplicate (cache hit - no new memory) - Pattern p1Again = Pattern.compile("p1"); - long memoryAfterCacheHit = nativeMemory.getValue(); - - assertThat(memoryAfterCacheHit) - .as("Memory should NOT increase on cache hit") - .isEqualTo(memory5Patterns); - - assertThat(cacheSize.getValue()) - .as("Cache size should NOT increase on cache hit") - .isEqualTo(5); - - // Clear cache - Pattern.clearCache(); - - // Memory should be 0, cache empty - assertThat(nativeMemory.getValue()) - .as("Memory should be 0 after clear") - .isEqualTo(0L); - - assertThat(cacheSize.getValue()) - .as("Cache should be empty after clear") - .isEqualTo(0); - } + // Memory should be 0, cache empty + assertThat(nativeMemory.getValue()).as("Memory should be 0 after clear").isEqualTo(0L); + + assertThat(cacheSize.getValue()).as("Cache should be empty after clear").isEqualTo(0); + } } diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/TimerHistogramIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/TimerHistogramIT.java index c7fd687..38bc1d9 100644 --- a/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/TimerHistogramIT.java +++ b/libre2-core/src/integration-test/java/com/axonops/libre2/metrics/TimerHistogramIT.java @@ -1,5 +1,7 @@ package com.axonops.libre2.metrics; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.PatternCache; @@ -11,194 +13,177 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.*; - /** * Tests verifying that Timer metrics provide comprehensive histogram statistics. * - * Dropwizard Timers automatically track: - * - Count, min, max, mean, median - * - Percentiles: 75th, 95th, 98th, 99th, 99.9th - * - Rates: 1-min, 5-min, 15-min moving averages + *

Dropwizard Timers automatically track: - Count, min, max, mean, median - Percentiles: 75th, + * 95th, 98th, 99th, 99.9th - Rates: 1-min, 5-min, 15-min moving averages */ class TimerHistogramIT { - private MetricRegistry registry; - private PatternCache originalCache; + private MetricRegistry registry; + private PatternCache originalCache; - @BeforeEach - void setup() { - originalCache = Pattern.getGlobalCache(); - registry = new MetricRegistry(); + @BeforeEach + void setup() { + originalCache = Pattern.getGlobalCache(); + registry = new MetricRegistry(); - RE2Config config = RE2Config.builder() + RE2Config config = + RE2Config.builder() .metricsRegistry(new DropwizardMetricsAdapter(registry, "histogram.test")) .build(); - Pattern.setGlobalCache(new PatternCache(config)); - } + Pattern.setGlobalCache(new PatternCache(config)); + } - @AfterEach - void cleanup() { - Pattern.setGlobalCache(originalCache); - } + @AfterEach + void cleanup() { + Pattern.setGlobalCache(originalCache); + } - @Test - void testCompilationLatency_ProvidesHistogramStats() { - // Compile multiple patterns to generate latency data - for (int i = 0; i < 100; i++) { - Pattern.compile("pattern_" + i); - } - - Timer compilationLatency = registry.timer("histogram.test.patterns.compilation.latency"); - - // Verify count - assertThat(compilationLatency.getCount()) - .as("Timer should track count") - .isEqualTo(100); - - // Verify rates (1/5/15 minute moving averages) - assertThat(compilationLatency.getOneMinuteRate()) - .as("Timer should provide 1-minute rate") - .isGreaterThanOrEqualTo(0.0); - - assertThat(compilationLatency.getFiveMinuteRate()) - .as("Timer should provide 5-minute rate") - .isGreaterThanOrEqualTo(0.0); - - assertThat(compilationLatency.getFifteenMinuteRate()) - .as("Timer should provide 15-minute rate") - .isGreaterThanOrEqualTo(0.0); - - // Verify snapshot provides histogram statistics - Snapshot snapshot = compilationLatency.getSnapshot(); - - assertThat(snapshot.getMin()) - .as("Timer should track minimum latency") - .isGreaterThan(0L); - - assertThat(snapshot.getMax()) - .as("Timer should track maximum latency") - .isGreaterThan(0L); - - assertThat(snapshot.getMean()) - .as("Timer should track mean latency") - .isGreaterThan(0.0); - - assertThat(snapshot.getMedian()) - .as("Timer should track median (50th percentile)") - .isGreaterThan(0.0); - - // Verify percentiles - assertThat(snapshot.get75thPercentile()) - .as("Timer should provide 75th percentile") - .isGreaterThan(0.0); - - assertThat(snapshot.get95thPercentile()) - .as("Timer should provide 95th percentile") - .isGreaterThan(0.0); - - assertThat(snapshot.get98thPercentile()) - .as("Timer should provide 98th percentile") - .isGreaterThan(0.0); - - assertThat(snapshot.get99thPercentile()) - .as("Timer should provide 99th percentile") - .isGreaterThan(0.0); - - assertThat(snapshot.get999thPercentile()) - .as("Timer should provide 99.9th percentile") - .isGreaterThan(0.0); - - // Verify min <= mean <= max (sanity check) - assertThat(snapshot.getMean()) - .as("Mean should be between min and max") - .isBetween((double) snapshot.getMin(), (double) snapshot.getMax()); + @Test + void testCompilationLatency_ProvidesHistogramStats() { + // Compile multiple patterns to generate latency data + for (int i = 0; i < 100; i++) { + Pattern.compile("pattern_" + i); } - @Test - void testMatchingLatency_ProvidesHistogramStats() { - Pattern pattern = Pattern.compile("test.*"); + Timer compilationLatency = registry.timer("histogram.test.patterns.compilation.latency"); - // Perform 50 matches to generate latency data - for (int i = 0; i < 50; i++) { - try (Matcher m = pattern.matcher("test" + i)) { - m.matches(); - } - } + // Verify count + assertThat(compilationLatency.getCount()).as("Timer should track count").isEqualTo(100); - Timer fullMatchLatency = registry.timer("histogram.test.matching.full_match.latency"); + // Verify rates (1/5/15 minute moving averages) + assertThat(compilationLatency.getOneMinuteRate()) + .as("Timer should provide 1-minute rate") + .isGreaterThanOrEqualTo(0.0); - // Verify count - assertThat(fullMatchLatency.getCount()) - .as("Should have 50 full match operations") - .isEqualTo(50); + assertThat(compilationLatency.getFiveMinuteRate()) + .as("Timer should provide 5-minute rate") + .isGreaterThanOrEqualTo(0.0); - // Verify histogram stats available - Snapshot snapshot = fullMatchLatency.getSnapshot(); + assertThat(compilationLatency.getFifteenMinuteRate()) + .as("Timer should provide 15-minute rate") + .isGreaterThanOrEqualTo(0.0); - assertThat(snapshot.getMin()) - .as("Should track minimum match latency") - .isGreaterThan(0L); + // Verify snapshot provides histogram statistics + Snapshot snapshot = compilationLatency.getSnapshot(); - assertThat(snapshot.getMax()) - .as("Should track maximum match latency") - .isGreaterThan(0L); + assertThat(snapshot.getMin()).as("Timer should track minimum latency").isGreaterThan(0L); - assertThat(snapshot.get99thPercentile()) - .as("Should provide 99th percentile match latency") - .isGreaterThan(0.0); + assertThat(snapshot.getMax()).as("Timer should track maximum latency").isGreaterThan(0L); - // Verify max >= mean >= min - assertThat(snapshot.getMax()) - .as("Max should be >= mean") - .isGreaterThanOrEqualTo((long) snapshot.getMean()); + assertThat(snapshot.getMean()).as("Timer should track mean latency").isGreaterThan(0.0); - assertThat(snapshot.getMean()) - .as("Mean should be >= min") - .isGreaterThanOrEqualTo((double) snapshot.getMin()); - } + assertThat(snapshot.getMedian()) + .as("Timer should track median (50th percentile)") + .isGreaterThan(0.0); + + // Verify percentiles + assertThat(snapshot.get75thPercentile()) + .as("Timer should provide 75th percentile") + .isGreaterThan(0.0); + + assertThat(snapshot.get95thPercentile()) + .as("Timer should provide 95th percentile") + .isGreaterThan(0.0); + + assertThat(snapshot.get98thPercentile()) + .as("Timer should provide 98th percentile") + .isGreaterThan(0.0); + + assertThat(snapshot.get99thPercentile()) + .as("Timer should provide 99th percentile") + .isGreaterThan(0.0); + + assertThat(snapshot.get999thPercentile()) + .as("Timer should provide 99.9th percentile") + .isGreaterThan(0.0); + + // Verify min <= mean <= max (sanity check) + assertThat(snapshot.getMean()) + .as("Mean should be between min and max") + .isBetween((double) snapshot.getMin(), (double) snapshot.getMax()); + } - @Test - void testPartialMatchLatency_TrackedSeparately() { - Pattern pattern = Pattern.compile("test.*"); - - // Perform partial matches - for (int i = 0; i < 30; i++) { - try (Matcher m = pattern.matcher("test" + i)) { - m.find(); - } - } - - Timer partialMatchLatency = registry.timer("histogram.test.matching.partial_match.latency"); - - // Verify tracked separately from full match - assertThat(partialMatchLatency.getCount()) - .as("Partial match should be tracked separately") - .isEqualTo(30); - - // Verify histogram available - Snapshot snapshot = partialMatchLatency.getSnapshot(); - assertThat(snapshot.get95thPercentile()) - .as("95th percentile should be available for partial match") - .isGreaterThan(0.0); + @Test + void testMatchingLatency_ProvidesHistogramStats() { + Pattern pattern = Pattern.compile("test.*"); + + // Perform 50 matches to generate latency data + for (int i = 0; i < 50; i++) { + try (Matcher m = pattern.matcher("test" + i)) { + m.matches(); + } } - @Test - void testTimerUnits_Nanoseconds() { - // Compile a pattern - Pattern.compile("test.*"); + Timer fullMatchLatency = registry.timer("histogram.test.matching.full_match.latency"); + + // Verify count + assertThat(fullMatchLatency.getCount()) + .as("Should have 50 full match operations") + .isEqualTo(50); + + // Verify histogram stats available + Snapshot snapshot = fullMatchLatency.getSnapshot(); + + assertThat(snapshot.getMin()).as("Should track minimum match latency").isGreaterThan(0L); + + assertThat(snapshot.getMax()).as("Should track maximum match latency").isGreaterThan(0L); + + assertThat(snapshot.get99thPercentile()) + .as("Should provide 99th percentile match latency") + .isGreaterThan(0.0); + + // Verify max >= mean >= min + assertThat(snapshot.getMax()) + .as("Max should be >= mean") + .isGreaterThanOrEqualTo((long) snapshot.getMean()); - Timer compilationLatency = registry.timer("histogram.test.patterns.compilation.latency"); + assertThat(snapshot.getMean()) + .as("Mean should be >= min") + .isGreaterThanOrEqualTo((double) snapshot.getMin()); + } - // Verify latency is in nanoseconds (should be small values) - Snapshot snapshot = compilationLatency.getSnapshot(); + @Test + void testPartialMatchLatency_TrackedSeparately() { + Pattern pattern = Pattern.compile("test.*"); - // Typical compilation: 10,000ns - 10,000,000ns (10μs - 10ms) - assertThat(snapshot.getMean()) - .as("Latency should be in nanoseconds (10μs - 100ms range)") - .isBetween(1000.0, 100_000_000.0); + // Perform partial matches + for (int i = 0; i < 30; i++) { + try (Matcher m = pattern.matcher("test" + i)) { + m.find(); + } } + + Timer partialMatchLatency = registry.timer("histogram.test.matching.partial_match.latency"); + + // Verify tracked separately from full match + assertThat(partialMatchLatency.getCount()) + .as("Partial match should be tracked separately") + .isEqualTo(30); + + // Verify histogram available + Snapshot snapshot = partialMatchLatency.getSnapshot(); + assertThat(snapshot.get95thPercentile()) + .as("95th percentile should be available for partial match") + .isGreaterThan(0.0); + } + + @Test + void testTimerUnits_Nanoseconds() { + // Compile a pattern + Pattern.compile("test.*"); + + Timer compilationLatency = registry.timer("histogram.test.patterns.compilation.latency"); + + // Verify latency is in nanoseconds (should be small values) + Snapshot snapshot = compilationLatency.getSnapshot(); + + // Typical compilation: 10,000ns - 10,000,000ns (10μs - 10ms) + assertThat(snapshot.getMean()) + .as("Latency should be in nanoseconds (10μs - 100ms range)") + .isBetween(1000.0, 100_000_000.0); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/MatchResult.java b/libre2-core/src/main/java/com/axonops/libre2/api/MatchResult.java index 07b9776..a2785cb 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/MatchResult.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/MatchResult.java @@ -24,16 +24,17 @@ /** * Result of a regex match operation with capture group access. * - *

This class provides access to captured groups from a successful regex match. - * It is immutable and thread-safe.

+ *

This class provides access to captured groups from a successful regex match. It is immutable + * and thread-safe. * - *

IMPORTANT: Resource Management

- *

MatchResult implements {@link AutoCloseable} for API consistency and safety. - * While MatchResult doesn't hold native resources directly, it follows the same - * lifecycle pattern as {@link Pattern} and {@link Matcher} to ensure consistent - * usage throughout the library.

+ *

IMPORTANT: Resource Management + * + *

MatchResult implements {@link AutoCloseable} for API consistency and safety. While MatchResult + * doesn't hold native resources directly, it follows the same lifecycle pattern as {@link Pattern} + * and {@link Matcher} to ensure consistent usage throughout the library. + * + *

Always use try-with-resources: * - *

Always use try-with-resources:

*
{@code
  * Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)");
  *
@@ -47,6 +48,7 @@
  * }
* *

Named Groups

+ * *
{@code
  * Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})");
  *
@@ -59,235 +61,243 @@
  * }
  * }
* - *

Why AutoCloseable?

+ *

Why AutoCloseable? + * *

    - *
  • API Consistency - Pattern, Matcher, and MatchResult all use try-with-resources
  • - *
  • Safety Culture - Uniform resource management pattern throughout library
  • - *
  • Future-Proof - If cleanup logic needed later, structure already in place
  • - *
  • Error Prevention - IDE warnings if try-with-resources not used
  • + *
  • API Consistency - Pattern, Matcher, and MatchResult all use try-with-resources + *
  • Safety Culture - Uniform resource management pattern throughout library + *
  • Future-Proof - If cleanup logic needed later, structure already in place + *
  • Error Prevention - IDE warnings if try-with-resources not used *
* * @since 1.2.0 */ public final class MatchResult implements AutoCloseable { - private final boolean matched; - private final String input; - private final String[] groups; - private final Map namedGroups; - private final AtomicBoolean closed = new AtomicBoolean(false); + private final boolean matched; + private final String input; + private final String[] groups; + private final Map namedGroups; + private final AtomicBoolean closed = new AtomicBoolean(false); - /** - * Creates a MatchResult for a successful match. - * - * @param input the original input string - * @param groups the captured groups (group[0] is full match, group[1+] are capturing groups) - * @param namedGroups map of named group names to their indices - */ - MatchResult(String input, String[] groups, Map namedGroups) { - this.matched = true; - this.input = Objects.requireNonNull(input, "input cannot be null"); - this.groups = Objects.requireNonNull(groups, "groups cannot be null"); - this.namedGroups = namedGroups != null ? Collections.unmodifiableMap(namedGroups) : Collections.emptyMap(); - } + /** + * Creates a MatchResult for a successful match. + * + * @param input the original input string + * @param groups the captured groups (group[0] is full match, group[1+] are capturing groups) + * @param namedGroups map of named group names to their indices + */ + MatchResult(String input, String[] groups, Map namedGroups) { + this.matched = true; + this.input = Objects.requireNonNull(input, "input cannot be null"); + this.groups = Objects.requireNonNull(groups, "groups cannot be null"); + this.namedGroups = + namedGroups != null ? Collections.unmodifiableMap(namedGroups) : Collections.emptyMap(); + } - /** - * Creates a MatchResult for a failed match. - * - * @param input the original input string - */ - MatchResult(String input) { - this.matched = false; - this.input = Objects.requireNonNull(input, "input cannot be null"); - this.groups = new String[0]; - this.namedGroups = Collections.emptyMap(); - } + /** + * Creates a MatchResult for a failed match. + * + * @param input the original input string + */ + MatchResult(String input) { + this.matched = false; + this.input = Objects.requireNonNull(input, "input cannot be null"); + this.groups = new String[0]; + this.namedGroups = Collections.emptyMap(); + } + + /** + * Checks if the match was successful. + * + * @return true if a match was found, false otherwise + * @throws IllegalStateException if MatchResult is closed + */ + public boolean matched() { + checkNotClosed(); + return matched; + } - /** - * Checks if the match was successful. - * - * @return true if a match was found, false otherwise - * @throws IllegalStateException if MatchResult is closed - */ - public boolean matched() { - checkNotClosed(); - return matched; + /** + * Gets the full matched text (same as {@code group(0)}). + * + * @return the full matched text, or null if no match + * @throws IllegalStateException if MatchResult is closed + */ + public String group() { + checkNotClosed(); + return group(0); + } + + /** + * Gets a captured group by index. + * + *

Index 0 is the full match. Index 1+ are capturing groups in order. + * + *

Example: + * + *

{@code
+   * Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
+   * MatchResult result = pattern.match("2025-11-24");
+   *
+   * result.group(0);  // "2025-11-24" (full match)
+   * result.group(1);  // "2025" (first group)
+   * result.group(2);  // "11" (second group)
+   * result.group(3);  // "24" (third group)
+   * }
+ * + * @param index the group index (0 = full match, 1+ = capturing groups) + * @return the captured group text, or null if group didn't participate in match + * @throws IllegalStateException if match failed + * @throws IndexOutOfBoundsException if index is negative or >= groupCount() + */ + public String group(int index) { + checkNotClosed(); + if (!matched) { + throw new IllegalStateException("No match found"); + } + if (index < 0 || index >= groups.length) { + throw new IndexOutOfBoundsException( + "Group index " + index + " out of bounds (0 to " + (groups.length - 1) + ")"); } + return groups[index]; + } - /** - * Gets the full matched text (same as {@code group(0)}). - * - * @return the full matched text, or null if no match - * @throws IllegalStateException if MatchResult is closed - */ - public String group() { - checkNotClosed(); - return group(0); + /** + * Gets a captured group by name. + * + *

Named groups use RE2 syntax: {@code (?Ppattern)} + * + *

Example: + * + *

{@code
+   * Pattern pattern = Pattern.compile("(?P[a-z]+)@(?P[a-z]+\\.[a-z]+)");
+   * MatchResult result = pattern.match("admin@example.com");
+   *
+   * result.group("user");    // "admin"
+   * result.group("domain");  // "example.com"
+   * }
+ * + * @param name the name of the capturing group + * @return the captured group text, or null if group didn't participate or doesn't exist + * @throws IllegalStateException if match failed + * @throws NullPointerException if name is null + */ + public String group(String name) { + checkNotClosed(); + if (!matched) { + throw new IllegalStateException("No match found"); } + Objects.requireNonNull(name, "Group name cannot be null"); - /** - * Gets a captured group by index. - * - *

Index 0 is the full match. Index 1+ are capturing groups in order.

- * - *

Example:

- *
{@code
-     * Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
-     * MatchResult result = pattern.match("2025-11-24");
-     *
-     * result.group(0);  // "2025-11-24" (full match)
-     * result.group(1);  // "2025" (first group)
-     * result.group(2);  // "11" (second group)
-     * result.group(3);  // "24" (third group)
-     * }
- * - * @param index the group index (0 = full match, 1+ = capturing groups) - * @return the captured group text, or null if group didn't participate in match - * @throws IllegalStateException if match failed - * @throws IndexOutOfBoundsException if index is negative or >= groupCount() - */ - public String group(int index) { - checkNotClosed(); - if (!matched) { - throw new IllegalStateException("No match found"); - } - if (index < 0 || index >= groups.length) { - throw new IndexOutOfBoundsException( - "Group index " + index + " out of bounds (0 to " + (groups.length - 1) + ")"); - } - return groups[index]; + Integer index = namedGroups.get(name); + if (index == null) { + return null; // Named group doesn't exist } - /** - * Gets a captured group by name. - * - *

Named groups use RE2 syntax: {@code (?Ppattern)}

- * - *

Example:

- *
{@code
-     * Pattern pattern = Pattern.compile("(?P[a-z]+)@(?P[a-z]+\\.[a-z]+)");
-     * MatchResult result = pattern.match("admin@example.com");
-     *
-     * result.group("user");    // "admin"
-     * result.group("domain");  // "example.com"
-     * }
- * - * @param name the name of the capturing group - * @return the captured group text, or null if group didn't participate or doesn't exist - * @throws IllegalStateException if match failed - * @throws NullPointerException if name is null - */ - public String group(String name) { - checkNotClosed(); - if (!matched) { - throw new IllegalStateException("No match found"); - } - Objects.requireNonNull(name, "Group name cannot be null"); + return groups[index]; + } - Integer index = namedGroups.get(name); - if (index == null) { - return null; // Named group doesn't exist - } + /** + * Gets the number of capturing groups in the pattern. + * + *

This count does NOT include group 0 (the full match). A pattern with no capturing groups + * returns 0, but you can still access group(0). + * + * @return number of capturing groups (excluding group 0) + * @throws IllegalStateException if MatchResult is closed + */ + public int groupCount() { + checkNotClosed(); + return matched ? groups.length - 1 : 0; + } - return groups[index]; - } + /** + * Gets the original input string. + * + * @return the input string that was matched against + * @throws IllegalStateException if MatchResult is closed + */ + public String input() { + checkNotClosed(); + return input; + } - /** - * Gets the number of capturing groups in the pattern. - * - *

This count does NOT include group 0 (the full match). A pattern with - * no capturing groups returns 0, but you can still access group(0).

- * - * @return number of capturing groups (excluding group 0) - * @throws IllegalStateException if MatchResult is closed - */ - public int groupCount() { - checkNotClosed(); - return matched ? groups.length - 1 : 0; - } + /** + * Gets all captured groups as an array. + * + *

Array indices: [0] = full match, [1+] = capturing groups. + * + * @return array of captured groups, or empty array if no match + * @throws IllegalStateException if MatchResult is closed + */ + public String[] groups() { + checkNotClosed(); + return groups.clone(); // Defensive copy + } - /** - * Gets the original input string. - * - * @return the input string that was matched against - * @throws IllegalStateException if MatchResult is closed - */ - public String input() { - checkNotClosed(); - return input; - } + /** + * Gets the map of named groups to their indices. + * + * @return unmodifiable map of group names to indices, or empty map if no named groups + * @throws IllegalStateException if MatchResult is closed + */ + public Map namedGroups() { + checkNotClosed(); + return namedGroups; + } - /** - * Gets all captured groups as an array. - * - *

Array indices: [0] = full match, [1+] = capturing groups.

- * - * @return array of captured groups, or empty array if no match - * @throws IllegalStateException if MatchResult is closed - */ - public String[] groups() { - checkNotClosed(); - return groups.clone(); // Defensive copy - } + /** + * Closes this MatchResult. + * + *

While MatchResult doesn't hold native resources, it implements the close pattern for API + * consistency with {@link Pattern} and {@link Matcher}. + * + *

After closing, all accessor methods will throw {@link IllegalStateException}. + * + *

This method is idempotent - calling close() multiple times is safe. + */ + @Override + public void close() { + closed.set(true); + } - /** - * Gets the map of named groups to their indices. - * - * @return unmodifiable map of group names to indices, or empty map if no named groups - * @throws IllegalStateException if MatchResult is closed - */ - public Map namedGroups() { - checkNotClosed(); - return namedGroups; + /** + * Checks if this MatchResult is closed. + * + * @throws IllegalStateException if closed + */ + private void checkNotClosed() { + if (closed.get()) { + throw new IllegalStateException("RE2: MatchResult is closed"); } + } - /** - * Closes this MatchResult. - * - *

While MatchResult doesn't hold native resources, it implements the close - * pattern for API consistency with {@link Pattern} and {@link Matcher}.

- * - *

After closing, all accessor methods will throw {@link IllegalStateException}.

- * - *

This method is idempotent - calling close() multiple times is safe.

- */ - @Override - public void close() { - closed.set(true); + @Override + public String toString() { + if (!matched) { + return "MatchResult{matched=false, input=\"" + input + "\"}"; } + return "MatchResult{matched=true, input=\"" + input + "\", groups=" + groups.length + "}"; + } - /** - * Checks if this MatchResult is closed. - * - * @throws IllegalStateException if closed - */ - private void checkNotClosed() { - if (closed.get()) { - throw new IllegalStateException("RE2: MatchResult is closed"); - } + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; } - - @Override - public String toString() { - if (!matched) { - return "MatchResult{matched=false, input=\"" + input + "\"}"; - } - return "MatchResult{matched=true, input=\"" + input + "\", groups=" + groups.length + "}"; + if (!(obj instanceof MatchResult other)) { + return false; } - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (!(obj instanceof MatchResult other)) return false; - - return matched == other.matched && - input.equals(other.input) && - java.util.Arrays.equals(groups, other.groups) && - namedGroups.equals(other.namedGroups); - } + return matched == other.matched + && input.equals(other.input) + && java.util.Arrays.equals(groups, other.groups) + && namedGroups.equals(other.namedGroups); + } - @Override - public int hashCode() { - return Objects.hash(matched, input, java.util.Arrays.hashCode(groups), namedGroups); - } + @Override + public int hashCode() { + return Objects.hash(matched, input, java.util.Arrays.hashCode(groups), namedGroups); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/Matcher.java b/libre2-core/src/main/java/com/axonops/libre2/api/Matcher.java index cb297a9..e379e0b 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/Matcher.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/Matcher.java @@ -16,22 +16,22 @@ package com.axonops.libre2.api; -import com.axonops.libre2.metrics.RE2MetricsRegistry; import com.axonops.libre2.metrics.MetricNames; - +import com.axonops.libre2.metrics.RE2MetricsRegistry; import java.util.Objects; import java.util.concurrent.atomic.AtomicBoolean; /** * Performs regex matching operations. * - * NOT Thread-Safe: Each Matcher instance must be confined to a single thread. - * Do NOT share Matcher instances between threads. + *

NOT Thread-Safe: Each Matcher instance must be confined to a single thread. Do NOT share + * Matcher instances between threads. * - * Safe Pattern: Create separate Matcher per thread from shared Pattern. - * The underlying Pattern CAN be safely shared - only the Matcher cannot. + *

Safe Pattern: Create separate Matcher per thread from shared Pattern. The underlying Pattern + * CAN be safely shared - only the Matcher cannot. + * + *

Example: * - * Example: *

  * Pattern sharedPattern = RE2.compile("\\d+");  // Thread-safe, can share
  *
@@ -50,73 +50,83 @@
  */
 public final class Matcher implements AutoCloseable {
 
-    private final Pattern pattern;
-    private final String input;
-    private final AtomicBoolean closed = new AtomicBoolean(false);
-    private final RE2MetricsRegistry metrics; // Cached to avoid repeated getGlobalCache() calls
+  private final Pattern pattern;
+  private final String input;
+  private final AtomicBoolean closed = new AtomicBoolean(false);
+  private final RE2MetricsRegistry metrics; // Cached to avoid repeated getGlobalCache() calls
 
-    Matcher(Pattern pattern, String input) {
-        this.pattern = Objects.requireNonNull(pattern);
-        this.input = Objects.requireNonNull(input);
-        this.metrics = Pattern.getGlobalCache().getConfig().metricsRegistry(); // Cache once
+  Matcher(Pattern pattern, String input) {
+    this.pattern = Objects.requireNonNull(pattern);
+    this.input = Objects.requireNonNull(input);
+    this.metrics = Pattern.getGlobalCache().getConfig().metricsRegistry(); // Cache once
 
-        // Increment reference count to prevent pattern being freed while in use
-        pattern.incrementRefCount();
+    // Increment reference count to prevent pattern being freed while in use
+    pattern.incrementRefCount();
 
-        // Track matcher allocation
-        Pattern.getGlobalCache().getResourceTracker().trackMatcherAllocated();
-    }
+    // Track matcher allocation
+    Pattern.getGlobalCache().getResourceTracker().trackMatcherAllocated();
+  }
 
-    public boolean matches() {
-        checkNotClosed();
+  /**
+   * Tests if the entire input matches the pattern.
+   *
+   * @return true if the entire input matches
+   */
+  public boolean matches() {
+    checkNotClosed();
 
-        long startNanos = System.nanoTime();
+    long startNanos = System.nanoTime();
 
-        boolean result = pattern.jni.fullMatch(pattern.getNativeHandle(), input);
+    boolean result = pattern.jni.fullMatch(pattern.getNativeHandle(), input);
 
-        long durationNanos = System.nanoTime() - startNanos;
-        metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, durationNanos);
-        metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS);
+    long durationNanos = System.nanoTime() - startNanos;
+    metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, durationNanos);
+    metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS);
 
-        return result;
-    }
+    return result;
+  }
 
-    public boolean find() {
-        checkNotClosed();
+  /**
+   * Finds the next occurrence of the pattern in the input.
+   *
+   * @return true if a match was found
+   */
+  public boolean find() {
+    checkNotClosed();
 
-        long startNanos = System.nanoTime();
+    long startNanos = System.nanoTime();
 
-        boolean result = pattern.jni.partialMatch(pattern.getNativeHandle(), input);
+    boolean result = pattern.jni.partialMatch(pattern.getNativeHandle(), input);
 
-        long durationNanos = System.nanoTime() - startNanos;
-        metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, durationNanos);
-        metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS);
+    long durationNanos = System.nanoTime() - startNanos;
+    metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, durationNanos);
+    metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS);
 
-        return result;
-    }
+    return result;
+  }
 
-    public Pattern pattern() {
-        return pattern;
-    }
+  public Pattern pattern() {
+    return pattern;
+  }
 
-    public String input() {
-        return input;
-    }
+  public String input() {
+    return input;
+  }
 
-    @Override
-    public void close() {
-        if (closed.compareAndSet(false, true)) {
-            // Decrement reference count - pattern can now be freed if evicted
-            pattern.decrementRefCount();
+  @Override
+  public void close() {
+    if (closed.compareAndSet(false, true)) {
+      // Decrement reference count - pattern can now be freed if evicted
+      pattern.decrementRefCount();
 
-            // Track matcher freed (use cached metrics)
-            Pattern.getGlobalCache().getResourceTracker().trackMatcherFreed(metrics);
-        }
+      // Track matcher freed (use cached metrics)
+      Pattern.getGlobalCache().getResourceTracker().trackMatcherFreed(metrics);
     }
+  }
 
-    private void checkNotClosed() {
-        if (closed.get()) {
-            throw new IllegalStateException("RE2: Matcher is closed");
-        }
+  private void checkNotClosed() {
+    if (closed.get()) {
+      throw new IllegalStateException("RE2: Matcher is closed");
     }
+  }
 }
diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/NativeLibraryException.java b/libre2-core/src/main/java/com/axonops/libre2/api/NativeLibraryException.java
index 3ac9976..aa5be32 100644
--- a/libre2-core/src/main/java/com/axonops/libre2/api/NativeLibraryException.java
+++ b/libre2-core/src/main/java/com/axonops/libre2/api/NativeLibraryException.java
@@ -23,11 +23,11 @@
  */
 public final class NativeLibraryException extends RE2Exception {
 
-    public NativeLibraryException(String message) {
-        super("RE2: Native library error: " + message);
-    }
+  public NativeLibraryException(String message) {
+    super("RE2: Native library error: " + message);
+  }
 
-    public NativeLibraryException(String message, Throwable cause) {
-        super("RE2: Native library error: " + message, cause);
-    }
+  public NativeLibraryException(String message, Throwable cause) {
+    super("RE2: Native library error: " + message, cause);
+  }
 }
diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/Pattern.java b/libre2-core/src/main/java/com/axonops/libre2/api/Pattern.java
index 2d71e2e..8c1dc39 100644
--- a/libre2-core/src/main/java/com/axonops/libre2/api/Pattern.java
+++ b/libre2-core/src/main/java/com/axonops/libre2/api/Pattern.java
@@ -18,3010 +18,3100 @@
 
 import com.axonops.libre2.cache.PatternCache;
 import com.axonops.libre2.cache.RE2Config;
-import com.axonops.libre2.jni.RE2Native;
 import com.axonops.libre2.jni.IRE2Native;
 import com.axonops.libre2.jni.RE2LibraryLoader;
+import com.axonops.libre2.jni.RE2Native;
 import com.axonops.libre2.metrics.MetricNames;
 import com.axonops.libre2.metrics.RE2MetricsRegistry;
 import com.axonops.libre2.util.PatternHasher;
-import com.axonops.libre2.util.ResourceTracker;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.atomic.AtomicBoolean;
-
-// DirectBuffer is a public interface - no reflection needed
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import sun.nio.ch.DirectBuffer;
 
 /**
  * A compiled regular expression pattern.
  *
- * Thread-safe: Pattern instances can be safely shared between threads.
- * Multiple threads can call matcher() concurrently on the same Pattern.
+ * 

Thread-safe: Pattern instances can be safely shared between threads. Multiple threads can call + * matcher() concurrently on the same Pattern. * - * Resource Management: Patterns from compile() are cached and managed automatically. - * Do NOT call close() on cached patterns (it's a no-op). For testing, use compileWithoutCache(). + *

Resource Management: Patterns from compile() are cached and managed automatically. Do NOT call + * close() on cached patterns (it's a no-op). For testing, use compileWithoutCache(). * - * Reference Counting: Patterns are not freed while Matchers are active (prevents use-after-free). + *

Reference Counting: Patterns are not freed while Matchers are active (prevents + * use-after-free). * * @since 1.0.0 */ public final class Pattern implements AutoCloseable { - private static final Logger logger = LoggerFactory.getLogger(Pattern.class); - - // Ensure native library is loaded - static { - RE2LibraryLoader.loadLibrary(); - } - - // Global pattern cache (mutable for testing only) - private static volatile PatternCache cache = new PatternCache(RE2Config.DEFAULT); - - /** - * Gets the global pattern cache (for internal use). - */ - public static PatternCache getGlobalCache() { - return cache; - } - - private final String patternString; - private final boolean caseSensitive; - private final long nativeHandle; - private final AtomicBoolean closed = new AtomicBoolean(false); - private final boolean fromCache; - private final java.util.concurrent.atomic.AtomicInteger refCount = new java.util.concurrent.atomic.AtomicInteger(0); - private static final int maxMatchersPerPattern = RE2Config.DEFAULT.maxMatchersPerPattern(); - private final long nativeMemoryBytes; - - // JniAdapter for all JNI calls - allows mocking in tests - final IRE2Native jni; - - Pattern(String patternString, boolean caseSensitive, long nativeHandle) { - this(patternString, caseSensitive, nativeHandle, false, RE2Native.INSTANCE); - } - - Pattern(String patternString, boolean caseSensitive, long nativeHandle, boolean fromCache, IRE2Native jni) { - this.patternString = Objects.requireNonNull(patternString); - this.caseSensitive = caseSensitive; - this.nativeHandle = nativeHandle; - this.fromCache = fromCache; - this.jni = jni; - - // Query native memory size using adapter - this.nativeMemoryBytes = jni.patternMemory(nativeHandle); - - logger.trace("RE2: Pattern created - length: {}, caseSensitive: {}, fromCache: {}, nativeBytes: {}", - patternString.length(), caseSensitive, fromCache, nativeMemoryBytes); - } - - public static Pattern compile(String pattern) { - return compile(pattern, true); - } - - public static Pattern compile(String pattern, boolean caseSensitive) { - Objects.requireNonNull(pattern, "pattern cannot be null"); - - // Try cache first - return cache.getOrCompile(pattern, caseSensitive, () -> compileUncached(pattern, caseSensitive)); - } - - /** - * Compiles a pattern without using the cache (for testing/special cases). - * - * The returned pattern is NOT managed by the cache and MUST be closed. - * - * @param pattern regex pattern - * @return uncached pattern (must close) - */ - public static Pattern compileWithoutCache(String pattern) { - return compileWithoutCache(pattern, true); - } - - /** - * Compiles a pattern without using the cache (for testing/special cases). - * - * The returned pattern is NOT managed by the cache and MUST be closed. - * - * @param pattern regex pattern - * @param caseSensitive case sensitivity - * @return uncached pattern (must close) - */ - public static Pattern compileWithoutCache(String pattern, boolean caseSensitive) { - // Compile with fromCache=false so it can actually be closed - return doCompile(pattern, caseSensitive, false, RE2Native.INSTANCE); - } - - /** - * Compiles a pattern for caching (internal use). - */ - private static Pattern compileUncached(String pattern, boolean caseSensitive) { - // Compile with fromCache=true so users can't close it (cache manages it) - return doCompile(pattern, caseSensitive, true, RE2Native.INSTANCE); - } - - /** - * Package-private compile method for test injection of mock JniAdapter. - * Bypasses cache for full control in unit tests. - */ - static Pattern compileForTesting(String pattern, boolean caseSensitive, IRE2Native jni) { - return doCompile(pattern, caseSensitive, false, jni); - } - - /** - * Actual compilation logic. - */ - private static Pattern doCompile(String pattern, boolean caseSensitive, boolean fromCache, IRE2Native jni) { - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - String hash = PatternHasher.hash(pattern); - - // Reject empty patterns (matches old wrapper behavior) - if (pattern.isEmpty()) { - throw new PatternCompilationException(pattern, "Pattern is null or empty"); - } - - // Track allocation and enforce maxSimultaneousCompiledPatterns limit - // This is ACTIVE count, not cumulative - patterns can be freed and recompiled - cache.getResourceTracker().trackPatternAllocated(cache.getConfig().maxSimultaneousCompiledPatterns(), metrics); - - long startNanos = System.nanoTime(); - long handle = 0; - boolean compilationSuccessful = false; - - try { - handle = jni.compile(pattern, caseSensitive); - - if (handle == 0 || !jni.patternOk(handle)) { - String error = jni.getError(); - - // Compilation failed - record error - metrics.incrementCounter(MetricNames.ERRORS_COMPILATION_FAILED); - logger.debug("RE2: Pattern compilation failed - hash: {}, error: {}", hash, error); - - // Will be cleaned up in finally block - throw new PatternCompilationException(pattern, error != null ? error : "Unknown error"); - } - - long durationNanos = System.nanoTime() - startNanos; - metrics.recordTimer(MetricNames.PATTERNS_COMPILATION_LATENCY, durationNanos); - metrics.incrementCounter(MetricNames.PATTERNS_COMPILED); - - Pattern compiled = new Pattern(pattern, caseSensitive, handle, fromCache, jni); - logger.trace("RE2: Pattern compiled - hash: {}, length: {}, caseSensitive: {}, fromCache: {}, nativeBytes: {}, timeNs: {}", - hash, pattern.length(), caseSensitive, fromCache, compiled.nativeMemoryBytes, durationNanos); - - compilationSuccessful = true; - return compiled; - - } catch (ResourceException e) { - // Resource limit hit - count already rolled back by trackPatternAllocated - throw e; - - } finally { - // Clean up if compilation failed - if (!compilationSuccessful) { - // Free handle if allocated - if (handle != 0) { - try { - jni.freePattern(handle); - } catch (Exception e) { - // Silently ignore - best effort cleanup - } - } - - // Decrement count (allocation failed) - cache.getResourceTracker().trackPatternFreed(metrics); - } - } - } - - public Matcher matcher(String input) { - checkNotClosed(); - return new Matcher(this, input); - } - - public boolean matches(String input) { - try (Matcher m = matcher(input)) { - return m.matches(); - } - } - - /** - * Tests if content at memory address fully matches this pattern (zero-copy). - * - *

This method accepts a raw memory address and length, enabling zero-copy matching - * with any off-heap memory system.

- * - *

Performance: 46-99% faster than String API depending on input size. - * For 10KB+ inputs, provides 99%+ improvement.

- * - *

Memory Safety: The memory at {@code address} must:

- *
    - *
  • Remain valid for the duration of this call
  • - *
  • Contain valid UTF-8 encoded text
  • - *
  • Not be released/freed until this method returns
  • - *
- * - *

Usage with DirectByteBuffer:

- *
{@code
-     * import sun.nio.ch.DirectBuffer;
-     *
-     * Pattern pattern = Pattern.compile("\\d+");
-     * ByteBuffer buffer = ByteBuffer.allocateDirect(1024);
-     * buffer.put("12345".getBytes(StandardCharsets.UTF_8));
-     * buffer.flip();
-     *
-     * long address = ((DirectBuffer) buffer).address();
-     * int length = buffer.remaining();
-     * boolean matches = pattern.matches(address, length);  // Zero-copy!
-     * }
- * - *

Note: Most users should use {@link #matches(ByteBuffer)} instead, - * which handles address extraction automatically.

- * - * @param address native memory address of UTF-8 encoded text - * @param length number of bytes to read from the address - * @return true if entire content matches this pattern, false otherwise - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @see #matches(String) String-based variant - * @see #matches(ByteBuffer) ByteBuffer variant with automatic routing - * @since 1.1.0 - */ - public boolean matches(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - long startNanos = System.nanoTime(); - boolean result = jni.fullMatchDirect(nativeHandle, address, length); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global metrics (ALL matching operations) - metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS); - metrics.recordTimer(MetricNames.MATCHING_LATENCY, durationNanos); - metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, durationNanos); - - // Specific zero-copy metrics - metrics.incrementCounter(MetricNames.MATCHING_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.MATCHING_ZERO_COPY_LATENCY, durationNanos); - - return result; - } - - /** - * Tests if pattern matches anywhere in content at memory address (zero-copy). - * - *

This is the partial match variant - tests if pattern matches anywhere - * within the input, not necessarily the entire content.

- * - *

Performance: 46-99% faster than String API.

- * - *

Memory Safety: The memory at {@code address} must remain - * valid for the duration of this call.

- * - *

Usage with DirectByteBuffer:

- *
{@code
-     * import sun.nio.ch.DirectBuffer;
-     *
-     * Pattern pattern = Pattern.compile("@[a-z]+\\.[a-z]+");
-     * ByteBuffer buffer = ByteBuffer.allocateDirect(1024);
-     * buffer.put("Contact: user@example.com".getBytes(StandardCharsets.UTF_8));
-     * buffer.flip();
-     *
-     * long address = ((DirectBuffer) buffer).address();
-     * int length = buffer.remaining();
-     * boolean found = pattern.find(address, length);  // Zero-copy!
-     * }
- * - *

Note: Most users should use {@link #find(ByteBuffer)} instead.

- * - * @param address native memory address of UTF-8 encoded text - * @param length number of bytes to read from the address - * @return true if pattern matches anywhere in content, false otherwise - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @see #find(ByteBuffer) ByteBuffer variant with automatic routing - * @since 1.1.0 - */ - public boolean find(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - long startNanos = System.nanoTime(); - boolean result = jni.partialMatchDirect(nativeHandle, address, length); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global metrics (ALL matching operations) - metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS); - metrics.recordTimer(MetricNames.MATCHING_LATENCY, durationNanos); - metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, durationNanos); - - // Specific zero-copy metrics - metrics.incrementCounter(MetricNames.MATCHING_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.MATCHING_ZERO_COPY_LATENCY, durationNanos); - - return result; - } - - // ========== Capture Group Operations ========== - - /** - * Matches input and extracts capture groups. - * - *

This method performs a full match and returns a {@link MatchResult} containing - * all captured groups. If the match fails, the MatchResult will have {@code matched() == false}.

- * - *

Example - Extract email components:

- *
{@code
-     * Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)");
-     * MatchResult result = pattern.match("user@example.com");
-     *
-     * if (result.matched()) {
-     *     String full = result.group();      // "user@example.com"
-     *     String user = result.group(1);     // "user"
-     *     String domain = result.group(2);   // "example"
-     *     String tld = result.group(3);      // "com"
-     * }
-     * }
- * - *

Named Groups:

- *
{@code
-     * Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})");
-     * MatchResult result = pattern.match("2025-11-24");
-     *
-     * if (result.matched()) {
-     *     String year = result.group("year");   // "2025"
-     *     String month = result.group("month"); // "11"
-     *     String day = result.group("day");     // "24"
-     * }
-     * }
- * - * @param input the string to match - * @return MatchResult containing captured groups, or a failed match if no match - * @throws NullPointerException if input is null - * @throws IllegalStateException if pattern is closed - * @see MatchResult - * @see #find(String) for partial matching with groups - * @see #findAll(String) for finding all matches with groups - * @since 1.2.0 - */ - public MatchResult match(String input) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - - long startNanos = System.nanoTime(); - - String[] groups = jni.extractGroups(nativeHandle, input); - - if (groups == null) { - // No match - still track metrics (operation was attempted) - long durationNanos = System.nanoTime() - startNanos; - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - - // Specific String capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); - - return new MatchResult(input); - } - - // For match() (full match semantics), verify the match covers entire input - // extractGroups uses UNANCHORED, so we need to check manually - if (!groups[0].equals(input)) { - // Match found but doesn't cover entire input - this is a partial match - long durationNanos = System.nanoTime() - startNanos; - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - - // Specific String capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); - - return new MatchResult(input); - } - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics (ALL capture operations) - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - - // Specific String capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); - - // Lazy-load named groups only if needed - Map namedGroupMap = getNamedGroupsMap(); - - return new MatchResult(input, groups, namedGroupMap); - } - - /** - * Finds first match and extracts capture groups. - * - *

This method performs a partial match (searches anywhere in input) and returns - * a {@link MatchResult} for the first match found. If no match is found, the MatchResult - * will have {@code matched() == false}.

- * - *

Example - Extract first email from text:

- *
{@code
-     * Pattern emailPattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
-     * MatchResult result = emailPattern.find("Contact us at support@example.com or admin@test.org");
-     *
-     * if (result.matched()) {
-     *     String email = result.group();       // "support@example.com" (first match)
-     *     String user = result.group(1);       // "support"
-     *     String domain = result.group(2);     // "example.com"
-     * }
-     * }
- * - * @param input the string to search - * @return MatchResult for first match found, or a failed match if no match - * @throws NullPointerException if input is null - * @throws IllegalStateException if pattern is closed - * @see #match(String) for full matching with groups - * @see #findAll(String) for finding all matches - * @since 1.2.0 - */ - public MatchResult find(String input) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - - long startNanos = System.nanoTime(); - - // RE2 extractGroups does UNANCHORED match, so it finds first occurrence - String[] groups = jni.extractGroups(nativeHandle, input); - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics (ALL capture operations) - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - - // Specific String capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); - - if (groups == null) { - return new MatchResult(input); - } - - Map namedGroupMap = getNamedGroupsMap(); - return new MatchResult(input, groups, namedGroupMap); - } - - /** - * Finds all non-overlapping matches and extracts capture groups from each. - * - *

This method finds all matches in the input and returns a list of {@link MatchResult} - * objects, one for each match. Each MatchResult contains the captured groups for that match.

- * - *

Example - Extract all phone numbers:

- *
{@code
-     * Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
-     * List matches = pattern.findAll("Call 555-1234 or 555-5678 for help");
-     *
-     * for (MatchResult match : matches) {
-     *     String phone = match.group();       // "555-1234", "555-5678"
-     *     String prefix = match.group(1);     // "555", "555"
-     *     String number = match.group(2);     // "1234", "5678"
-     * }
-     * // matches.size() == 2
-     * }
- * - *

Example - Parse structured log lines:

- *
{@code
-     * Pattern pattern = Pattern.compile("\\[(\\d+)\\] (\\w+): (.+)");
-     * List matches = pattern.findAll(logText);
-     *
-     * for (MatchResult match : matches) {
-     *     String timestamp = match.group(1);
-     *     String level = match.group(2);
-     *     String message = match.group(3);
-     *     // Process log entry
-     * }
-     * }
- * - * @param input the string to search - * @return list of MatchResult objects (one per match), or empty list if no matches - * @throws NullPointerException if input is null - * @throws IllegalStateException if pattern is closed - * @see #match(String) for single full match - * @see #find(String) for first match only - * @since 1.2.0 - */ - public java.util.List findAll(String input) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - - long startNanos = System.nanoTime(); - - String[][] allMatches = jni.findAllMatches(nativeHandle, input); - - long durationNanos = System.nanoTime() - startNanos; - int matchCount = (allMatches != null) ? allMatches.length : 0; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics (ALL capture operations) - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - - // Specific String capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); - - // Track number of matches found - if (matchCount > 0) { - metrics.incrementCounter(MetricNames.CAPTURE_FINDALL_MATCHES, matchCount); - } - - if (allMatches == null || allMatches.length == 0) { - return java.util.Collections.emptyList(); - } - - // Lazy-load named groups (shared by all MatchResults) - Map namedGroupMap = getNamedGroupsMap(); - - java.util.List results = new java.util.ArrayList<>(allMatches.length); - for (String[] groups : allMatches) { - results.add(new MatchResult(input, groups, namedGroupMap)); - } - - return results; - } - - // ========== Bulk Capture Operations ========== - - /** - * Full match multiple inputs with capture groups (bulk operation). - * - *

Processes all inputs in a single operation, extracting capture groups from each.

- * - *

Example - Extract email components from multiple inputs:

- *
{@code
-     * Pattern emailPattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
-     * String[] emails = {"user@example.com", "admin@test.org", "invalid"};
-     *
-     * MatchResult[] results = emailPattern.matchAllWithGroups(emails);
-     * // results[0].matched() = true, group(1) = "user", group(2) = "example.com"
-     * // results[1].matched() = true, group(1) = "admin", group(2) = "test.org"
-     * // results[2].matched() = false
-     * }
- * - * @param inputs array of strings to match - * @return array of MatchResults (parallel to inputs, remember to close each) - * @throws NullPointerException if inputs is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult[] matchAllWithGroups(String[] inputs) { - checkNotClosed(); - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.length == 0) { - return new MatchResult[0]; - } - - long startNanos = System.nanoTime(); - - // Call extractGroups for each input individually - // Note: extractGroupsBulk returns String[][] with all inputs concatenated, - // so we process individually for now (can optimize later with proper bulk native method) - Map namedGroupMap = getNamedGroupsMap(); - MatchResult[] results = new MatchResult[inputs.length]; - - for (int i = 0; i < inputs.length; i++) { - String[] groups = jni.extractGroups(nativeHandle, inputs[i]); - if (groups != null && groups.length > 0) { - results[i] = new MatchResult(inputs[i], groups, namedGroupMap); - } else { - results[i] = new MatchResult(inputs[i]); - } - } - - long durationNanos = System.nanoTime() - startNanos; - long perItemNanos = durationNanos / inputs.length; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Bulk) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics (per-item for comparability) - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS, inputs.length); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, perItemNanos); - - // Specific bulk capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_BULK_OPERATIONS); - metrics.incrementCounter(MetricNames.CAPTURE_BULK_ITEMS, inputs.length); - metrics.recordTimer(MetricNames.CAPTURE_BULK_LATENCY, perItemNanos); - - return results; - } - - /** - * Full match multiple inputs with capture groups (bulk operation, collection variant). - * - * @param inputs collection of strings to match - * @return array of MatchResults (parallel to inputs, remember to close each) - * @throws NullPointerException if inputs is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult[] matchAllWithGroups(java.util.Collection inputs) { - checkNotClosed(); - Objects.requireNonNull(inputs, "inputs cannot be null"); - - String[] array = inputs.toArray(new String[0]); - return matchAllWithGroups(array); - } - - /** - * Matches input and extracts capture groups (zero-copy). - * - *

Zero-copy variant using raw memory address.

- * - * @param address native memory address of UTF-8 encoded text - * @param length number of bytes to read - * @return MatchResult with captured groups, or failed match if no match - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult match(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - long startNanos = System.nanoTime(); - - String[] groups = jni.extractGroupsDirect(nativeHandle, address, length); - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - - // Specific zero-copy capture metrics - metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); - - if (groups == null) { - // Need input as String for MatchResult - this is a limitation - // User must pass String for failed matches - return new MatchResult(""); // Empty input for failed zero-copy match - } - - // For zero-copy, we don't have the original String, so MatchResult.input() will be group[0] - Map namedGroupMap = getNamedGroupsMap(); - return new MatchResult(groups[0], groups, namedGroupMap); - } - - /** - * Matches ByteBuffer content and extracts capture groups (zero-copy). - * - *

Automatically routes to zero-copy (DirectByteBuffer) or String (heap).

- * - * @param buffer ByteBuffer containing UTF-8 text - * @return MatchResult with captured groups - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult match(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return match(address, length); - } else { - // Heap - convert to String and use String variant - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - return match(text); - } - } - - - /** - * Helper: Get named groups map for this pattern (lazy-loaded and cached). - */ - private Map getNamedGroupsMap() { - String[] namedGroupsArray = jni.getNamedGroups(nativeHandle); - - if (namedGroupsArray == null || namedGroupsArray.length == 0) { - return Collections.emptyMap(); - } - - // Parse flattened array: [name1, index1_str, name2, index2_str, ...] - Map map = new java.util.HashMap<>(); - for (int i = 0; i < namedGroupsArray.length; i += 2) { - String name = namedGroupsArray[i]; - int index = Integer.parseInt(namedGroupsArray[i + 1]); - map.put(name, index); - } - - return map; - } - - // ========== Capture Group Zero-Copy Operations ========== - - /** - * Matches and extracts capture groups using zero-copy (address variant). - * - * @param address native memory address of UTF-8 text - * @param length number of bytes - * @return MatchResult with captured groups - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @see #match(String) String variant - * @since 1.2.0 - */ - public MatchResult matchWithGroups(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - long startNanos = System.nanoTime(); - String[] groups = jni.extractGroupsDirect(nativeHandle, address, length); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); - - if (groups == null) { - return new MatchResult(""); - } - - Map namedGroupMap = getNamedGroupsMap(); - return new MatchResult(groups[0], groups, namedGroupMap); - } - - /** - * Matches and extracts capture groups (ByteBuffer zero-copy). - * - * @param buffer ByteBuffer - * @return MatchResult with captured groups - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult matchWithGroups(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return matchWithGroups(address, length); - } else { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - return match(text); - } - } - - /** - * Finds and extracts capture groups using zero-copy (address variant). - * - * @param address native memory address - * @param length number of bytes - * @return MatchResult for first match - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult findWithGroups(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - long startNanos = System.nanoTime(); - String[] groups = jni.extractGroupsDirect(nativeHandle, address, length); - long durationNanos = System.nanoTime() - startNanos; - - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); - - if (groups == null) { - return new MatchResult(""); - } - - Map namedGroupMap = getNamedGroupsMap(); - return new MatchResult(groups[0], groups, namedGroupMap); - } - - /** - * Finds and extracts capture groups (ByteBuffer zero-copy). - * - * @param buffer ByteBuffer - * @return MatchResult for first match - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public MatchResult findWithGroups(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return findWithGroups(address, length); - } else { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - return find(text); - } - } - - /** - * Finds all matches and extracts capture groups using zero-copy (address variant). - * - * @param address native memory address - * @param length number of bytes - * @return list of MatchResult objects - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public java.util.List findAllWithGroups(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - long startNanos = System.nanoTime(); - String[][] allMatches = jni.findAllMatchesDirect(nativeHandle, address, length); - long durationNanos = System.nanoTime() - startNanos; - - int matchCount = (allMatches != null) ? allMatches.length : 0; - - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); - metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); - - if (matchCount > 0) { - metrics.incrementCounter(MetricNames.CAPTURE_FINDALL_MATCHES, matchCount); - } - - if (allMatches == null || allMatches.length == 0) { - return java.util.Collections.emptyList(); - } - - Map namedGroupMap = getNamedGroupsMap(); - - java.util.List results = new java.util.ArrayList<>(allMatches.length); - for (String[] groups : allMatches) { - results.add(new MatchResult(groups[0], groups, namedGroupMap)); - } - - return results; - } - - /** - * Finds all matches and extracts capture groups (ByteBuffer zero-copy). - * - * @param buffer ByteBuffer - * @return list of MatchResult objects - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public java.util.List findAllWithGroups(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return findAllWithGroups(address, length); - } else { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - return findAll(text); - } - } - - // ========== Replace Operations ========== - - /** - * Replaces the first match of this pattern in the input with the replacement string. - * - *

If the pattern matches, the first occurrence is replaced. If no match is found, - * the original input is returned unchanged.

- * - *

Backreferences: RE2 supports backreferences using {@code \\1}, {@code \\2}, etc. - * (note the double backslash for Java string escaping). Unlike java.util.regex which uses - * {@code $1}, {@code $2}, RE2 uses backslash notation.

- * - *

Example - Simple replacement:

- *
{@code
-     * Pattern pattern = Pattern.compile("\\d+");
-     * String result = pattern.replaceFirst("Item 123 costs $456", "XXX");
-     * // result = "Item XXX costs $456"
-     * }
- * - *

Example - Backreferences:

- *
{@code
-     * Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
-     * String result = pattern.replaceFirst("Date: 2025-11-24", "\\2/\\3/\\1");
-     * // result = "Date: 11/24/2025" (reordered date components)
-     * }
- * - * @param input the input string - * @param replacement the replacement string (supports {@code \\1}, {@code \\2}, etc. backreferences) - * @return the input with the first match replaced, or original input if no match - * @throws NullPointerException if input or replacement is null - * @throws IllegalStateException if pattern is closed - * @see #replaceAll(String, String) to replace all matches - * @since 1.2.0 - */ - public String replaceFirst(String input, String replacement) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - long startNanos = System.nanoTime(); - - String result = jni.replaceFirst(nativeHandle, input, replacement); - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global replace metrics (ALL replace operations) - metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); - - // Specific String replace metrics - metrics.incrementCounter(MetricNames.REPLACE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_STRING_LATENCY, durationNanos); - - return result != null ? result : input; - } - - /** - * Replaces all matches of this pattern in the input with the replacement string. - * - *

All non-overlapping matches are replaced. If no matches are found, the original - * input is returned unchanged.

- * - *

Backreferences: Use {@code \\1}, {@code \\2}, etc. for captured groups.

- * - *

Example - Replace all digits:

- *
{@code
-     * Pattern pattern = Pattern.compile("\\d+");
-     * String result = pattern.replaceAll("Item 123 costs $456", "XXX");
-     * // result = "Item XXX costs $XXX"
-     * }
- * - *

Example - Redact emails:

- *
{@code
-     * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
-     * String result = emailPattern.replaceAll("Contact user@example.com or admin@test.org", "[REDACTED]");
-     * // result = "Contact [REDACTED] or [REDACTED]"
-     * }
- * - *

Example - Backreferences for formatting:

- *
{@code
-     * Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
-     * String result = pattern.replaceAll("Call 555-1234 or 555-5678", "(\\1) \\2");
-     * // result = "Call (555) 1234 or (555) 5678"
-     * }
- * - * @param input the input string - * @param replacement the replacement string (supports {@code \\1}, {@code \\2}, etc. backreferences) - * @return the input with all matches replaced, or original input if no matches - * @throws NullPointerException if input or replacement is null - * @throws IllegalStateException if pattern is closed - * @see #replaceFirst(String, String) to replace only the first match - * @since 1.2.0 - */ - public String replaceAll(String input, String replacement) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - long startNanos = System.nanoTime(); - - String result = jni.replaceAll(nativeHandle, input, replacement); - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global replace metrics (ALL replace operations) - metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); - - // Specific String replace metrics - metrics.incrementCounter(MetricNames.REPLACE_STRING_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_STRING_LATENCY, durationNanos); - - return result != null ? result : input; - } - - /** - * Replaces all matches in multiple strings (bulk operation). - * - *

Processes all inputs in a single JNI call for better performance.

- * - *

Example - Batch redaction:

- *
{@code
-     * Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}");
-     * String[] logs = {
-     *     "User 123-45-6789 logged in",
-     *     "No PII here",
-     *     "SSN: 987-65-4321"
-     * };
-     *
-     * String[] redacted = ssnPattern.replaceAll(logs, "[REDACTED]");
-     * // redacted = ["User [REDACTED] logged in", "No PII here", "SSN: [REDACTED]"]
-     * }
- * - * @param inputs array of strings to process - * @param replacement the replacement string (supports backreferences) - * @return array of strings with matches replaced (parallel to inputs) - * @throws NullPointerException if inputs or replacement is null - * @throws IllegalStateException if pattern is closed - * @see #replaceAll(String, String) single-string variant - * @since 1.2.0 - */ - public String[] replaceAll(String[] inputs, String replacement) { - checkNotClosed(); - Objects.requireNonNull(inputs, "inputs cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - if (inputs.length == 0) { - return new String[0]; - } - - long startNanos = System.nanoTime(); - - String[] results = jni.replaceAllBulk(nativeHandle, inputs, replacement); - - long durationNanos = System.nanoTime() - startNanos; - long perItemNanos = durationNanos / inputs.length; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String Bulk) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global replace metrics (ALL replace operations) - use per-item for comparability - metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS, inputs.length); - metrics.recordTimer(MetricNames.REPLACE_LATENCY, perItemNanos); - - // Specific String bulk replace metrics - metrics.incrementCounter(MetricNames.REPLACE_BULK_OPERATIONS); - metrics.incrementCounter(MetricNames.REPLACE_BULK_ITEMS, inputs.length); - metrics.recordTimer(MetricNames.REPLACE_BULK_LATENCY, perItemNanos); - - return results != null ? results : inputs; - } - - /** - * Replaces all matches in a collection (bulk operation). - * - *

Processes all inputs in a single JNI call for better performance.

- * - * @param inputs collection of strings to process - * @param replacement the replacement string (supports backreferences) - * @return list of strings with matches replaced (same order as inputs) - * @throws NullPointerException if inputs or replacement is null - * @throws IllegalStateException if pattern is closed - * @see #replaceAll(String, String) single-string variant - * @since 1.2.0 - */ - public java.util.List replaceAll(java.util.Collection inputs, String replacement) { - checkNotClosed(); - Objects.requireNonNull(inputs, "inputs cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.ArrayList<>(); - } - - String[] array = inputs.toArray(new String[0]); - String[] results = replaceAll(array, replacement); - - return java.util.Arrays.asList(results); - } - - // ========== Phase 3: Zero-Copy Replace Operations ========== - - /** - * Replaces first match using zero-copy memory access (off-heap memory). - * - *

Zero-copy operation: Accesses off-heap memory directly without copying. - * Caller must ensure memory remains valid during this call.

- * - * @param address native memory address (from DirectByteBuffer or native allocator) - * @param length number of bytes to process - * @param replacement the replacement string (supports backreferences) - * @return string with first match replaced - * @throws IllegalStateException if pattern is closed - * @throws NullPointerException if replacement is null - * @since 1.2.0 - */ - public String replaceFirst(long address, int length, String replacement) { - checkNotClosed(); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - long startNanos = System.nanoTime(); - - String result = jni.replaceFirstDirect(nativeHandle, address, length, replacement); - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global replace metrics - metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); - - // Specific zero-copy replace metrics - metrics.incrementCounter(MetricNames.REPLACE_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_ZERO_COPY_LATENCY, durationNanos); - - return result; - } - - /** - * Replaces first match using ByteBuffer (zero-copy if direct, converted if heap). - * - * @param input ByteBuffer containing UTF-8 encoded text - * @param replacement the replacement string (supports backreferences) - * @return string with first match replaced - * @throws IllegalStateException if pattern is closed - * @throws NullPointerException if input or replacement is null - * @since 1.2.0 - */ - public String replaceFirst(java.nio.ByteBuffer input, String replacement) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - if (input.isDirect()) { - // Zero-copy path - long address = ((DirectBuffer) input).address() + input.position(); - int length = input.remaining(); - return replaceFirst(address, length, replacement); - } else { - // Heap buffer - convert to String - byte[] bytes = new byte[input.remaining()]; - input.duplicate().get(bytes); - String str = new String(bytes, StandardCharsets.UTF_8); - return replaceFirst(str, replacement); - } - } - - /** - * Replaces all matches using zero-copy memory access (off-heap memory). - * - * @param address native memory address (from DirectByteBuffer or native allocator) - * @param length number of bytes to process - * @param replacement the replacement string (supports backreferences) - * @return string with all matches replaced - * @throws IllegalStateException if pattern is closed - * @throws NullPointerException if replacement is null - * @since 1.2.0 - */ - public String replaceAll(long address, int length, String replacement) { - checkNotClosed(); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - long startNanos = System.nanoTime(); - - String result = jni.replaceAllDirect(nativeHandle, address, length, replacement); - - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global replace metrics - metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); - - // Specific zero-copy replace metrics - metrics.incrementCounter(MetricNames.REPLACE_ZERO_COPY_OPERATIONS); - metrics.recordTimer(MetricNames.REPLACE_ZERO_COPY_LATENCY, durationNanos); - - return result; - } - - /** - * Replaces all matches using ByteBuffer (zero-copy if direct, converted if heap). - * - * @param input ByteBuffer containing UTF-8 encoded text - * @param replacement the replacement string (supports backreferences) - * @return string with all matches replaced - * @throws IllegalStateException if pattern is closed - * @throws NullPointerException if input or replacement is null - * @since 1.2.0 - */ - public String replaceAll(java.nio.ByteBuffer input, String replacement) { - checkNotClosed(); - Objects.requireNonNull(input, "input cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - if (input.isDirect()) { - // Zero-copy path - long address = ((DirectBuffer) input).address() + input.position(); - int length = input.remaining(); - return replaceAll(address, length, replacement); - } else { - // Heap buffer - convert to String - byte[] bytes = new byte[input.remaining()]; - input.duplicate().get(bytes); - String str = new String(bytes, StandardCharsets.UTF_8); - return replaceAll(str, replacement); - } - } - - /** - * Replaces all matches in multiple off-heap buffers (bulk zero-copy operation). - * - * @param addresses native memory addresses (from DirectByteBuffer or native allocator) - * @param lengths number of bytes for each address - * @param replacement the replacement string (supports backreferences) - * @return array of strings with all matches replaced (parallel to inputs) - * @throws IllegalStateException if pattern is closed - * @throws NullPointerException if addresses, lengths, or replacement is null - * @throws IllegalArgumentException if addresses and lengths have different lengths - * @since 1.2.0 - */ - public String[] replaceAll(long[] addresses, int[] lengths, String replacement) { - checkNotClosed(); - Objects.requireNonNull(addresses, "addresses cannot be null"); - Objects.requireNonNull(lengths, "lengths cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - if (addresses.length != lengths.length) { - throw new IllegalArgumentException("addresses and lengths must have the same length"); - } - - if (addresses.length == 0) { - return new String[0]; - } - - long startNanos = System.nanoTime(); - - String[] results = jni.replaceAllDirectBulk(nativeHandle, addresses, lengths, replacement); - - long durationNanos = System.nanoTime() - startNanos; - long perItemNanos = durationNanos / addresses.length; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy Bulk) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - - // Global replace metrics (per-item for comparability) - metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS, addresses.length); - metrics.recordTimer(MetricNames.REPLACE_LATENCY, perItemNanos); - - // Specific zero-copy bulk replace metrics - metrics.incrementCounter(MetricNames.REPLACE_BULK_ZERO_COPY_OPERATIONS); - metrics.incrementCounter(MetricNames.REPLACE_BULK_ZERO_COPY_ITEMS, addresses.length); - metrics.recordTimer(MetricNames.REPLACE_BULK_ZERO_COPY_LATENCY, perItemNanos); - - return results; - } - - /** - * Replaces all matches in multiple ByteBuffers (bulk operation, zero-copy if direct). - * - * @param inputs array of ByteBuffers containing UTF-8 encoded text - * @param replacement the replacement string (supports backreferences) - * @return array of strings with all matches replaced (parallel to inputs) - * @throws IllegalStateException if pattern is closed - * @throws NullPointerException if inputs or replacement is null - * @since 1.2.0 - */ - public String[] replaceAll(java.nio.ByteBuffer[] inputs, String replacement) { - checkNotClosed(); - Objects.requireNonNull(inputs, "inputs cannot be null"); - Objects.requireNonNull(replacement, "replacement cannot be null"); - - if (inputs.length == 0) { - return new String[0]; - } - - // Check if all buffers are direct - if so, use zero-copy bulk path - boolean allDirect = true; - for (java.nio.ByteBuffer buffer : inputs) { - if (!buffer.isDirect()) { - allDirect = false; - break; - } - } - - if (allDirect) { - // Zero-copy bulk path - long[] addresses = new long[inputs.length]; - int[] lengths = new int[inputs.length]; - - for (int i = 0; i < inputs.length; i++) { - addresses[i] = ((DirectBuffer) inputs[i]).address() + inputs[i].position(); - lengths[i] = inputs[i].remaining(); - } - - return replaceAll(addresses, lengths, replacement); - } else { - // Mixed or heap buffers - process individually - String[] results = new String[inputs.length]; - for (int i = 0; i < inputs.length; i++) { - results[i] = replaceAll(inputs[i], replacement); - } - return results; - } - } - - public String pattern() { - return patternString; - } - - public boolean isCaseSensitive() { - return caseSensitive; - } - - /** - * Gets the native (off-heap) memory consumed by this compiled pattern. - * - * This is the size of the compiled DFA/NFA program in bytes. - * Useful for monitoring memory pressure from pattern compilation. - * - * @return size in bytes - * @throws IllegalStateException if pattern is closed - */ - public long getNativeMemoryBytes() { - checkNotClosed(); - return nativeMemoryBytes; - } - - /** - * Gets the DFA fanout for this pattern. - * - *

Returns an array where index i contains the number of bytes that lead to - * different DFA states at position i. Useful for analyzing pattern complexity.

- * - * @return array of fanout values (one per byte position in DFA) - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public int[] getProgramFanout() { - checkNotClosed(); - return jni.programFanout(nativeHandle); - } - - /** - * Escapes special regex characters for literal matching. - * - *

Converts a literal string into a regex pattern that matches that exact string. - * Special characters like . * + ? ( ) [ ] { } ^ $ | \ are escaped.

- * - *

Example:

- *
{@code
-     * String literal = "price: $9.99";
-     * String escaped = Pattern.quoteMeta(literal);
-     * // escaped = "price: \\$9\\.99"
-     *
-     * Pattern p = Pattern.compile(escaped);
-     * boolean matches = p.matches("price: $9.99");  // true
-     * }
- * - * @param text literal text to escape - * @return escaped pattern that matches the literal text exactly - * @throws NullPointerException if text is null - * @since 1.2.0 - */ - public static String quoteMeta(String text) { - return RE2Native.INSTANCE.quoteMeta(text); - } - - long getNativeHandle() { - checkNotClosed(); - return nativeHandle; - } - - /** - * Increments reference count (called by Matcher constructor). - * - * @throws ResourceException if maxMatchersPerPattern exceeded - */ - void incrementRefCount() { - int current = refCount.incrementAndGet(); - - if (current > maxMatchersPerPattern) { - refCount.decrementAndGet(); // Roll back - throw new ResourceException( - "Maximum matchers per pattern exceeded: " + maxMatchersPerPattern + - " (current matchers on this pattern: " + current + ")"); - } - } - - /** - * Decrements reference count (called by Matcher.close()). - */ - void decrementRefCount() { - refCount.decrementAndGet(); - } - - /** - * Gets current reference count (for testing/monitoring). - * - * @return number of active matchers using this pattern - */ - public int getRefCount() { - return refCount.get(); - } - - public boolean isClosed() { - return closed.get(); - } - - /** - * Checks if the native pattern pointer is still valid. - * - * Used for defensive validation to detect memory corruption or - * other issues that could cause the native pointer to become invalid. - * - * @return true if pattern is valid, false if closed or native pointer invalid - */ - public boolean isValid() { - if (closed.get()) { - return false; - } - try { - return jni.patternOk(nativeHandle); - } catch (Exception e) { - logger.warn("RE2: Exception while validating pattern", e); - return false; - } - } - - @Override - public void close() { - if (fromCache) { - // This is expected behavior when using try-with-resources with cached patterns - logger.trace("RE2: Attempted to close cached pattern (ignoring - cache manages lifecycle)"); - return; - } - - // Attempt 1: Try graceful close - forceClose(false); - - // If still has active matchers, wait briefly and force release - if (!closed.get() && refCount.get() > 0) { - try { - Thread.sleep(100); // Give matchers time to close - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - - // Attempt 2: Force release regardless (DANGEROUS if matchers still active) - if (!closed.get()) { - logger.warn("RE2: Pattern still has {} active matcher(s) after 100ms wait - forcing release anyway", refCount.get()); - forceClose(true); - } - } - } - - /** - * Force closes the pattern (INTERNAL USE ONLY - called by cache during eviction). - * - *

DO NOT CALL THIS METHOD. This is internal API for PatternCache. - * Use {@link #close()} instead. - * - *

Attempts graceful close first (waits for matchers). If matchers still active after wait, - * can force release regardless of reference count. - * - *

Public for PatternCache access (different package), but not part of public API. - */ - public void forceClose() { - forceClose(false); - } - - /** - * Force closes the pattern with optional unconditional release. - * - *

INTERNAL USE ONLY. - * - * @param releaseRegardless if true, releases even if matchers are active (DANGEROUS - can cause crashes) - */ - public void forceClose(boolean releaseRegardless) { - // First attempt: graceful close if no active matchers - if (refCount.get() > 0) { - if (!releaseRegardless) { - logger.warn("RE2: Cannot force close pattern - still in use by {} matcher(s)", refCount.get()); - return; - } else { - // DANGEROUS: Forcing release despite active matchers - logger.error("RE2: FORCE releasing pattern despite {} active matcher(s) - " + - "this may cause use-after-free crashes if matchers are still being used!", refCount.get()); - } - } - - if (closed.compareAndSet(false, true)) { - logger.trace("RE2: Force closing pattern - fromCache: {}, releaseRegardless: {}", fromCache, releaseRegardless); - - // CRITICAL: Always track freed, even if freePattern throws - try { - jni.freePattern(nativeHandle); - } catch (Exception e) { - logger.error("RE2: Error freeing pattern native handle", e); - } finally { - // Always track freed (all patterns were tracked when allocated) - cache.getResourceTracker().trackPatternFreed(cache.getConfig().metricsRegistry()); - } - } - } - - /** - * Gets cache statistics (for monitoring). - */ - public static com.axonops.libre2.cache.CacheStatistics getCacheStatistics() { - return cache.getStatistics(); - } - - /** - * Clears the pattern cache (for testing/maintenance). - */ - public static void clearCache() { - cache.clear(); - } - - /** - * Fully resets the cache including statistics (for testing only). - */ - public static void resetCache() { - cache.reset(); - } - - /** - * Reconfigures the cache with new settings (for testing only). - * - * This replaces the existing cache with a new one using the provided config. - * All cached patterns are cleared. - * - * @param config the new configuration - */ - public static void configureCache(RE2Config config) { - cache.reconfigure(config); - } - - /** - * Sets a new global cache (for testing only). - * - * WARNING: This replaces the entire global cache. Use with caution. - * Primarily for tests that need to inject a custom cache with metrics. - * - * @param newCache the new cache to use globally - */ - public static void setGlobalCache(PatternCache newCache) { - cache = newCache; - } - - /** - * Gets the current cache configuration. - * - * @return the current RE2Config - */ - public static RE2Config getCacheConfig() { - return cache.getConfig(); - } - - private void checkNotClosed() { - if (closed.get()) { - throw new IllegalStateException("RE2: Pattern is closed"); - } - } - - // ========== Bulk Matching Operations ========== - - /** - * Matches multiple inputs in a single JNI call (minimizes overhead). - * - *

This method processes an entire collection in one native call, significantly reducing - * JNI crossing overhead compared to calling {@link #matches(String)} in a loop. The performance - * benefit increases with collection size and pattern complexity. - * - *

Example - Validate multiple emails: - *

{@code
-     * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
-     * List emails = List.of("user@example.com", "invalid", "admin@test.org");
-     * boolean[] results = emailPattern.matchAll(emails);
-     * // results = [true, false, true]
-     *
-     * // Use results
-     * for (int i = 0; i < emails.size(); i++) {
-     *     if (results[i]) {
-     *         System.out.println("Valid: " + emails.get(i));
-     *     }
-     * }
-     * }
- * - *

Performance Characteristics: - *

    - *
  • Throughput: ~3-5 million matches/second for simple patterns
  • - *
  • Overhead: Single JNI call (~50ns) vs N calls (~50ns each)
  • - *
  • Best for: 100+ strings, or complex patterns where matching cost > JNI cost
  • - *
  • Benchmark: See {@code BulkMatchingPerformanceTest} for detailed comparisons
  • - *
- * - *

Supported Collection Types: - *

    - *
  • {@link java.util.List} - ArrayList, LinkedList, Vector, etc.
  • - *
  • {@link java.util.Set} - HashSet, TreeSet, LinkedHashSet, etc.
  • - *
  • {@link java.util.Queue} - LinkedList, ArrayDeque, PriorityQueue, etc.
  • - *
  • Any Collection implementation
  • - *
- * - *

Thread Safety: Pattern is thread-safe, but if the collection is being - * modified concurrently by other threads, you must synchronize externally. - * - * @param inputs collection of strings to match (supports List, Set, Queue, etc.) - * @return boolean array parallel to inputs (same size and order) indicating matches - * @throws NullPointerException if inputs is null - * @see #matchAll(String[]) array variant - * @see #filter(java.util.Collection) to extract only matching elements - * @since 1.0.0 - */ - public boolean[] matchAll(java.util.Collection inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - if (inputs.isEmpty()) { - return new boolean[0]; - } - - try { - String[] array = inputs.toArray(new String[0]); - return matchAll(array); - } catch (ArrayStoreException e) { - throw new IllegalArgumentException( - "Collection contains non-String elements. All elements must be String type. " + - "If you have Collection or other types, convert to strings first: " + - "collection.stream().map(Object::toString).toList()", e); - } - } - - /** - * Matches multiple inputs in a single JNI call (array variant). - * - *

Optimized for arrays - no collection conversion overhead. - * - *

Example - Process array of phone numbers: - *

{@code
-     * Pattern phonePattern = Pattern.compile("\\d{3}-\\d{4}");
-     * String[] phones = {"123-4567", "invalid", "999-8888"};
-     * boolean[] results = phonePattern.matchAll(phones);
-     * // results = [true, false, true]
-     * }
- * - * @param inputs array of strings to match - * @return boolean array parallel to inputs indicating matches - * @throws NullPointerException if inputs is null - * @see #matchAll(java.util.Collection) collection variant - * @since 1.0.0 - */ - public boolean[] matchAll(String[] inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - checkNotClosed(); - - if (inputs.length == 0) { - return new boolean[0]; - } - - long startNanos = System.nanoTime(); - boolean[] results = jni.fullMatchBulk(nativeHandle, inputs); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String Bulk) - RE2MetricsRegistry metrics = Pattern.getGlobalCache().getConfig().metricsRegistry(); - long perItemNanos = inputs.length > 0 ? durationNanos / inputs.length : 0; - - // Global metrics (ALL matching operations) - use per-item latency for comparability - metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, inputs.length); - metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); - metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, perItemNanos); - - // Specific String bulk metrics - metrics.incrementCounter(MetricNames.MATCHING_BULK_OPERATIONS); - metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, inputs.length); - metrics.recordTimer(MetricNames.MATCHING_BULK_LATENCY, perItemNanos); - - return results != null ? results : new boolean[inputs.length]; - } - - /** - * Tests if pattern matches anywhere in multiple strings (partial match bulk). - * - *

This is the bulk variant of {@link Matcher#find()} - tests if the pattern - * matches anywhere within each input string (not necessarily the full string).

- * - *

Processes all inputs in a single JNI call for better performance.

- * - *

Example - Find which strings contain pattern:

- *
{@code
-     * Pattern emailPattern = Pattern.compile("[a-z]+@[a-z]+\\.[a-z]+");
-     * String[] texts = {
-     *     "user@example.com",           // contains email
-     *     "Contact: admin@test.org",    // contains email
-     *     "No email here"                // no email
-     * };
-     * boolean[] results = emailPattern.findAll(texts);
-     * // results = [true, true, false]
-     * }
- * - * @param inputs array of strings to search - * @return boolean array (parallel to inputs) indicating if pattern found in each - * @throws NullPointerException if inputs is null - * @throws IllegalStateException if pattern is closed - * @see #matchAll(String[]) for full match bulk variant - * @see Matcher#find() for single-string partial match - * @since 1.2.0 - */ - public boolean[] findAll(String[] inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - checkNotClosed(); - - if (inputs.length == 0) { - return new boolean[0]; - } - - long startNanos = System.nanoTime(); - boolean[] results = jni.partialMatchBulk(nativeHandle, inputs); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (String Bulk) - RE2MetricsRegistry metrics = Pattern.getGlobalCache().getConfig().metricsRegistry(); - long perItemNanos = inputs.length > 0 ? durationNanos / inputs.length : 0; - - // Global metrics (ALL matching operations) - metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, inputs.length); - metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); - metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, perItemNanos); - - // Specific String bulk metrics - metrics.incrementCounter(MetricNames.MATCHING_BULK_OPERATIONS); - metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, inputs.length); - metrics.recordTimer(MetricNames.MATCHING_BULK_LATENCY, perItemNanos); - - return results != null ? results : new boolean[inputs.length]; - } - - /** - * Tests if pattern matches anywhere in multiple strings (partial match bulk, collection variant). - * - *

Convenience wrapper for {@link #findAll(String[])} accepting any Collection.

- * - * @param inputs collection of strings to search - * @return boolean array (parallel to inputs) indicating if pattern found in each - * @throws NullPointerException if inputs is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public boolean[] findAll(java.util.Collection inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - if (inputs.isEmpty()) { - return new boolean[0]; - } - - String[] array = inputs.toArray(new String[0]); - return findAll(array); - } - - /** - * Matches multiple memory regions in a single JNI call (zero-copy bulk). - * - *

This method accepts arrays of memory addresses and lengths, enabling efficient - * zero-copy bulk matching with any off-heap memory system.

- * - *

Performance: 91.5% faster than String bulk API. Combines - * bulk matching (single JNI call) with zero-copy memory access.

- * - *

Memory Safety: All memory regions must remain valid - * for the duration of this call.

- * - *

Usage with DirectByteBuffer array:

- *
{@code
-     * import sun.nio.ch.DirectBuffer;
-     *
-     * Pattern pattern = Pattern.compile("\\d+");
-     * ByteBuffer[] buffers = ...; // Multiple DirectByteBuffers
-     *
-     * long[] addresses = new long[buffers.length];
-     * int[] lengths = new int[buffers.length];
-     * for (int i = 0; i < buffers.length; i++) {
-     *     addresses[i] = ((DirectBuffer) buffers[i]).address();
-     *     lengths[i] = buffers[i].remaining();
-     * }
-     *
-     * boolean[] results = pattern.matchAll(addresses, lengths);  // 91.5% faster!
-     * }
- * - * @param addresses array of native memory addresses - * @param lengths array of byte lengths (must be same length as addresses) - * @return boolean array (parallel to inputs) indicating matches - * @throws NullPointerException if addresses or lengths is null - * @throws IllegalArgumentException if arrays have different lengths - * @throws IllegalStateException if pattern is closed - * @see #matchAll(String[]) String-based bulk variant - * @since 1.1.0 - */ - public boolean[] matchAll(long[] addresses, int[] lengths) { - checkNotClosed(); - Objects.requireNonNull(addresses, "addresses cannot be null"); - Objects.requireNonNull(lengths, "lengths cannot be null"); - - if (addresses.length != lengths.length) { - throw new IllegalArgumentException( - "Address and length arrays must have same size: addresses=" + addresses.length + ", lengths=" + lengths.length); - } - - if (addresses.length == 0) { - return new boolean[0]; - } - - long startNanos = System.nanoTime(); - boolean[] results = jni.fullMatchDirectBulk(nativeHandle, addresses, lengths); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Bulk Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - long perItemNanos = addresses.length > 0 ? durationNanos / addresses.length : 0; - - // Global metrics (ALL matching operations) - use per-item latency for comparability - metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, addresses.length); - metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); - metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, perItemNanos); - - // Specific bulk zero-copy metrics - metrics.incrementCounter(MetricNames.MATCHING_BULK_ZERO_COPY_OPERATIONS); - metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, addresses.length); - metrics.recordTimer(MetricNames.MATCHING_BULK_ZERO_COPY_LATENCY, perItemNanos); - - return results != null ? results : new boolean[addresses.length]; - } - - /** - * Partial match on multiple memory regions in a single JNI call (zero-copy bulk). - * - *

Tests if pattern matches anywhere in each memory region.

- * - *

Performance: 91.5% faster than String bulk API.

- * - * @param addresses array of native memory addresses - * @param lengths array of byte lengths (must be same length as addresses) - * @return boolean array indicating if pattern found in each input - * @throws NullPointerException if addresses or lengths is null - * @throws IllegalArgumentException if arrays have different lengths - * @throws IllegalStateException if pattern is closed - * @since 1.1.0 - */ - public boolean[] findAll(long[] addresses, int[] lengths) { - checkNotClosed(); - Objects.requireNonNull(addresses, "addresses cannot be null"); - Objects.requireNonNull(lengths, "lengths cannot be null"); - - if (addresses.length != lengths.length) { - throw new IllegalArgumentException( - "Address and length arrays must have same size: addresses=" + addresses.length + ", lengths=" + lengths.length); - } - - if (addresses.length == 0) { - return new boolean[0]; - } - - long startNanos = System.nanoTime(); - boolean[] results = jni.partialMatchDirectBulk(nativeHandle, addresses, lengths); - long durationNanos = System.nanoTime() - startNanos; - - // Track metrics - GLOBAL (ALL) + SPECIFIC (Bulk Zero-Copy) - RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); - long perItemNanos = addresses.length > 0 ? durationNanos / addresses.length : 0; - - // Global metrics (ALL matching operations) - use per-item latency for comparability - metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, addresses.length); - metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); - metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, perItemNanos); - - // Specific bulk zero-copy metrics - metrics.incrementCounter(MetricNames.MATCHING_BULK_ZERO_COPY_OPERATIONS); - metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, addresses.length); - metrics.recordTimer(MetricNames.MATCHING_BULK_ZERO_COPY_LATENCY, perItemNanos); - - return results != null ? results : new boolean[addresses.length]; - } - - /** - * Matches multiple ByteBuffers in a single operation (bulk with auto-routing). - * - *

Automatically routes each buffer: DirectByteBuffer → zero-copy, heap → String.

- * - *

Example - Bulk process Cassandra cells:

- *
{@code
-     * Pattern pattern = Pattern.compile("valid_.*");
-     * ByteBuffer[] cells = getCellsFromCassandra();  // Array of DirectByteBuffers
-     *
-     * boolean[] results = pattern.matchAll(cells);
-     * // Each DirectByteBuffer uses zero-copy (46-99% faster)
-     * }
- * - * @param buffers array of ByteBuffers to match - * @return boolean array (parallel to inputs) indicating matches - * @throws NullPointerException if buffers is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public boolean[] matchAll(ByteBuffer[] buffers) { - checkNotClosed(); - Objects.requireNonNull(buffers, "buffers cannot be null"); - - if (buffers.length == 0) { - return new boolean[0]; - } - - // Check if all are direct - if so, use zero-copy bulk path - boolean allDirect = true; - for (ByteBuffer buf : buffers) { - if (buf != null && !buf.isDirect()) { - allDirect = false; - break; - } - } - - if (allDirect) { - // Zero-copy path - extract addresses - long[] addresses = new long[buffers.length]; - int[] lengths = new int[buffers.length]; - for (int i = 0; i < buffers.length; i++) { - if (buffers[i] != null) { - addresses[i] = ((DirectBuffer) buffers[i]).address() + buffers[i].position(); - lengths[i] = buffers[i].remaining(); - } - } - return matchAll(addresses, lengths); - } else { - // Mixed or heap - convert to Strings - String[] strings = new String[buffers.length]; - for (int i = 0; i < buffers.length; i++) { - if (buffers[i] != null) { - byte[] bytes = new byte[buffers[i].remaining()]; - buffers[i].duplicate().get(bytes); - strings[i] = new String(bytes, StandardCharsets.UTF_8); - } - } - return matchAll(strings); - } - } - - /** - * Tests if pattern matches anywhere in multiple ByteBuffers (partial match bulk). - * - *

Bulk variant of partial matching with automatic routing.

- * - * @param buffers array of ByteBuffers to search - * @return boolean array indicating if pattern found in each - * @throws NullPointerException if buffers is null - * @throws IllegalStateException if pattern is closed - * @since 1.2.0 - */ - public boolean[] findAll(ByteBuffer[] buffers) { - checkNotClosed(); - Objects.requireNonNull(buffers, "buffers cannot be null"); - - if (buffers.length == 0) { - return new boolean[0]; - } - - // Check if all are direct - boolean allDirect = true; - for (ByteBuffer buf : buffers) { - if (buf != null && !buf.isDirect()) { - allDirect = false; - break; - } - } - - if (allDirect) { - // Zero-copy path - long[] addresses = new long[buffers.length]; - int[] lengths = new int[buffers.length]; - for (int i = 0; i < buffers.length; i++) { - if (buffers[i] != null) { - addresses[i] = ((DirectBuffer) buffers[i]).address() + buffers[i].position(); - lengths[i] = buffers[i].remaining(); - } - } - return findAll(addresses, lengths); - } else { - // Mixed or heap - convert to Strings - String[] strings = new String[buffers.length]; - for (int i = 0; i < buffers.length; i++) { - if (buffers[i] != null) { - byte[] bytes = new byte[buffers[i].remaining()]; - buffers[i].duplicate().get(bytes); - strings[i] = new String(bytes, StandardCharsets.UTF_8); - } - } - return findAll(strings); - } - } - - /** - * Extracts capture groups from content at memory address (zero-copy input). - * - *

Reads text directly from the memory address and extracts all capture groups. - * The input is zero-copy, but output creates new Java Strings for the groups.

- * - * @param address native memory address of UTF-8 encoded text - * @param length number of bytes to read from the address - * @return String array where [0] = full match, [1+] = capturing groups, or null if no match - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @since 1.1.0 - */ - public String[] extractGroups(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - return jni.extractGroupsDirect(nativeHandle, address, length); - } - - /** - * Finds all non-overlapping matches at memory address (zero-copy input). - * - *

Reads text directly from the memory address and finds all matches. - * The input is zero-copy, but output creates new Java Strings.

- * - * @param address native memory address of UTF-8 encoded text - * @param length number of bytes to read from the address - * @return array of match results with capture groups, or null if no matches - * @throws IllegalArgumentException if address is 0 or length is negative - * @throws IllegalStateException if pattern is closed - * @since 1.1.0 - */ - public String[][] findAllMatches(long address, int length) { - checkNotClosed(); - if (address == 0) { - throw new IllegalArgumentException("Address must not be 0"); - } - if (length < 0) { - throw new IllegalArgumentException("Length must not be negative: " + length); - } - - return jni.findAllMatchesDirect(nativeHandle, address, length); - } - - // ========== ByteBuffer API (Automatic Zero-Copy Routing) ========== - - /** - * Tests if ByteBuffer content fully matches this pattern. - * - *

This method intelligently routes to the optimal implementation:

- *
    - *
  • DirectByteBuffer: Uses zero-copy via {@link #matches(long, int)} (46-99% faster)
  • - *
  • HeapByteBuffer: Converts to String and uses {@link #matches(String)}
  • - *
- * - *

Usage Example:

- *
{@code
-     * Pattern pattern = Pattern.compile("\\d+");
-     *
-     * // DirectByteBuffer - zero-copy, 46-99% faster
-     * ByteBuffer directBuffer = ByteBuffer.allocateDirect(1024);
-     * directBuffer.put("12345".getBytes(StandardCharsets.UTF_8));
-     * directBuffer.flip();
-     * boolean r1 = pattern.matches(directBuffer);  // Zero-copy!
-     *
-     * // HeapByteBuffer - falls back to String API
-     * ByteBuffer heapBuffer = ByteBuffer.wrap("67890".getBytes(StandardCharsets.UTF_8));
-     * boolean r2 = pattern.matches(heapBuffer);  // Converted to String
-     * }
- * - *

Performance: When using DirectByteBuffer, provides 46-99% improvement. - * When using heap ByteBuffer, equivalent to String API (no improvement).

- * - *

Memory Safety: The buffer's backing memory must remain valid - * for the duration of this call. Do NOT release direct buffers until method returns.

- * - * @param buffer ByteBuffer containing UTF-8 encoded text (direct or heap-backed) - * @return true if entire content matches this pattern, false otherwise - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @see #matches(String) String-based variant - * @see #matches(long, int) Raw address variant - * @since 1.1.0 - */ - public boolean matches(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - // Zero-copy path for DirectByteBuffer - // DirectBuffer is a public interface - simple cast works - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return matches(address, length); - } else { - // Heap-backed ByteBuffer - convert to String - return matchesFromByteBuffer(buffer); - } - } - - /** - * Tests if pattern matches anywhere in ByteBuffer content. - * - *

Intelligently routes to zero-copy (DirectByteBuffer) or String API (heap buffer).

- * - *

Performance: 46-99% faster for DirectByteBuffer.

- * - * @param buffer ByteBuffer containing UTF-8 encoded text - * @return true if pattern matches anywhere in content, false otherwise - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.1.0 - */ - public boolean find(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - // Zero-copy path - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return find(address, length); - } else { - // Heap-backed - convert to String - return findFromByteBuffer(buffer); - } - } - - /** - * Extracts capture groups from ByteBuffer content. - * - *

Intelligently routes to zero-copy (DirectByteBuffer) or String API (heap buffer).

- * - * @param buffer ByteBuffer containing UTF-8 encoded text - * @return String array where [0] = full match, [1+] = capturing groups, or null if no match - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.1.0 - */ - public String[] extractGroups(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - // Zero-copy path - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return extractGroups(address, length); - } else { - // Heap-backed - return extractGroupsFromByteBuffer(buffer); - } - } - - /** - * Finds all non-overlapping matches in ByteBuffer content. - * - *

Intelligently routes to zero-copy (DirectByteBuffer) or String API (heap buffer).

- * - * @param buffer ByteBuffer containing UTF-8 encoded text - * @return array of match results with capture groups, or null if no matches - * @throws NullPointerException if buffer is null - * @throws IllegalStateException if pattern is closed - * @since 1.1.0 - */ - public String[][] findAllMatches(ByteBuffer buffer) { - checkNotClosed(); - Objects.requireNonNull(buffer, "buffer cannot be null"); - - if (buffer.isDirect()) { - // Zero-copy path - long address = ((DirectBuffer) buffer).address() + buffer.position(); - int length = buffer.remaining(); - return findAllMatches(address, length); - } else { - // Heap-backed - return findAllMatchesFromByteBuffer(buffer); - } - } - - /** - * Helper: Extract String from ByteBuffer for matches() (heap-backed fallback). - */ - private boolean matchesFromByteBuffer(ByteBuffer buffer) { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); // Use duplicate to not modify position - String text = new String(bytes, StandardCharsets.UTF_8); - return matches(text); - } - - /** - * Helper: Extract String from ByteBuffer for find() (heap-backed fallback). - */ - private boolean findFromByteBuffer(ByteBuffer buffer) { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - try (Matcher m = matcher(text)) { - return m.find(); - } - } - - /** - * Helper: Extract String from ByteBuffer for extractGroups() (heap-backed fallback). - */ - private String[] extractGroupsFromByteBuffer(ByteBuffer buffer) { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - return jni.extractGroups(nativeHandle, text); - } - - /** - * Helper: Extract String from ByteBuffer for findAllMatches() (heap-backed fallback). - */ - private String[][] findAllMatchesFromByteBuffer(ByteBuffer buffer) { - byte[] bytes = new byte[buffer.remaining()]; - buffer.duplicate().get(bytes); - String text = new String(bytes, StandardCharsets.UTF_8); - return jni.findAllMatches(nativeHandle, text); - } - - /** - * Filters collection, returning only matching elements. - * - *

Creates a new {@link java.util.List} containing only strings that match this pattern. - * The original collection is not modified. Uses bulk matching internally for performance. - * - *

Example - Extract valid email addresses: - *

{@code
-     * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
-     * List inputs = List.of(
-     *     "user@example.com",    // matches
-     *     "invalid_email",       // doesn't match
-     *     "admin@test.org"       // matches
-     * );
-     * List validEmails = emailPattern.filter(inputs);
-     * // validEmails = ["user@example.com", "admin@test.org"]
-     * }
- * - *

Collection Types: - *

    - *
  • Input: Any Collection (List, Set, Queue, etc.)
  • - *
  • Output: Always returns {@link java.util.ArrayList}
  • - *
  • Order: Preserves iteration order of input collection
  • - *
- * - *

Common Use Cases: - *

    - *
  • Extracting valid data from mixed datasets
  • - *
  • Data cleaning (filter valid records)
  • - *
  • Log filtering (extract matching log lines)
  • - *
  • Cassandra SAI: Filter partition keys matching regex
  • - *
- * - *

Performance: Throughput ~3.9 million strings/second (benchmark: 10k strings in 2.6ms) - * - * @param inputs collection to filter (List, Set, Queue, or any Collection) - * @return new List containing only matching elements (preserves input order) - * @throws NullPointerException if inputs is null - * @see #filterNot(java.util.Collection) inverse operation - * @see #retainMatches(java.util.Collection) in-place variant - * @see #matchAll(java.util.Collection) to get boolean array - * @since 1.0.0 - */ - public java.util.List filter(java.util.Collection inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.ArrayList<>(); - } - - String[] array; - try { - array = inputs.toArray(new String[0]); - } catch (ArrayStoreException e) { - throw new IllegalArgumentException( - "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", e); - } - - boolean[] matches = matchAll(array); - - java.util.List result = new java.util.ArrayList<>(); - for (int i = 0; i < array.length; i++) { - if (matches[i]) { - result.add(array[i]); - } - } - - return result; - } - - /** - * Filters collection, returning only non-matching elements (inverse of {@link #filter}). - * - *

Creates a new {@link java.util.List} containing only strings that do NOT match this pattern. - * The original collection is not modified. - * - *

Example - Remove test data, keep production: - *

{@code
-     * Pattern testPattern = Pattern.compile("test_.*");
-     * List allKeys = List.of("test_key1", "prod_key1", "test_key2", "prod_key2");
-     * List prodKeys = testPattern.filterNot(allKeys);
-     * // prodKeys = ["prod_key1", "prod_key2"]
-     * }
- * - *

Use Cases: - *

    - *
  • Removing test/debug data from production datasets
  • - *
  • Blacklist filtering (exclude matching patterns)
  • - *
  • Data sanitization (remove sensitive patterns)
  • - *
- * - * @param inputs collection to filter - * @return new List containing only non-matching elements (preserves input order) - * @throws NullPointerException if inputs is null - * @see #filter(java.util.Collection) inverse operation (keep matches) - * @see #removeMatches(java.util.Collection) in-place variant - * @since 1.0.0 - */ - public java.util.List filterNot(java.util.Collection inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.ArrayList<>(); - } - - String[] array; - try { - array = inputs.toArray(new String[0]); - } catch (ArrayStoreException e) { - throw new IllegalArgumentException( - "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", e); - } - - boolean[] matches = matchAll(array); - - java.util.List result = new java.util.ArrayList<>(); - for (int i = 0; i < array.length; i++) { - if (!matches[i]) { // Inverted logic - result.add(array[i]); - } - } - - return result; - } - - /** - * Removes non-matching elements from collection (in-place mutation). - * - *

MUTATES THE INPUT: This method modifies the provided collection by removing - * elements that don't match the pattern. Only matching elements remain after this call. - * - *

Example - Clean invalid data in-place: - *

{@code
-     * Pattern validPattern = Pattern.compile("[a-zA-Z0-9_]+");
-     * List usernames = new ArrayList<>(List.of("user1", "invalid@", "admin", "bad#name"));
-     *
-     * int removed = validPattern.retainMatches(usernames);
-     * // removed = 2
-     * // usernames now = ["user1", "admin"] (invalid entries removed)
-     * }
- * - *

When to Use: - *

    - *
  • Use this: When you want to modify the collection in-place (memory efficient)
  • - *
  • Use {@link #filter}: When you need to preserve the original collection
  • - *
- * - *

Collection Requirements: - *

    - *
  • Collection must be mutable (support {@code iterator().remove()})
  • - *
  • Works with: ArrayList, LinkedList, HashSet, TreeSet, etc.
  • - *
  • Fails with: Collections.unmodifiableList(), List.of(), Set.of(), etc.
  • - *
- * - * @param inputs mutable collection to filter (List, Set, Queue, etc.) - * @return number of elements removed - * @throws NullPointerException if inputs is null - * @throws UnsupportedOperationException if collection is immutable - * @see #filter(java.util.Collection) non-mutating variant - * @see #removeMatches(java.util.Collection) inverse (remove matches, keep non-matches) - * @since 1.0.0 - */ - public int retainMatches(java.util.Collection inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return 0; - } - - String[] array; - try { - array = inputs.toArray(new String[0]); - } catch (ArrayStoreException e) { - throw new IllegalArgumentException( - "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", e); - } - - boolean[] matches = matchAll(array); - - int removed = 0; - java.util.Iterator it = inputs.iterator(); - int i = 0; - while (it.hasNext()) { - it.next(); - if (!matches[i++]) { - it.remove(); - removed++; - } - } - - return removed; - } - - /** - * Removes matching elements from collection (in-place mutation, inverse of {@link #retainMatches}). - * - *

MUTATES THE INPUT: This method modifies the provided collection by removing - * elements that match the pattern. Only non-matching elements remain after this call. - * - *

Example - Remove sensitive data patterns: - *

{@code
-     * Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}");  // SSN format
-     * List logLines = new ArrayList<>(List.of(
-     *     "User logged in",
-     *     "SSN: 123-45-6789",  // sensitive!
-     *     "Processing request",
-     *     "SSN: 987-65-4321"   // sensitive!
-     * ));
-     *
-     * int removed = ssnPattern.removeMatches(logLines);
-     * // removed = 2
-     * // logLines now = ["User logged in", "Processing request"] (SSNs removed)
-     * }
- * - *

Common Use Cases: - *

    - *
  • Data sanitization (remove PII, credentials, etc.)
  • - *
  • Blacklist filtering (remove known bad patterns)
  • - *
  • Log cleaning (strip sensitive information)
  • - *
- * - * @param inputs mutable collection to filter (List, Set, Queue, etc.) - * @return number of elements removed - * @throws NullPointerException if inputs is null - * @throws UnsupportedOperationException if collection is immutable - * @see #retainMatches(java.util.Collection) inverse (keep matches, remove non-matches) - * @see #filterNot(java.util.Collection) non-mutating variant - * @since 1.0.0 - */ - public int removeMatches(java.util.Collection inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return 0; - } - - String[] array; - try { - array = inputs.toArray(new String[0]); - } catch (ArrayStoreException e) { - throw new IllegalArgumentException( - "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", e); - } - - boolean[] matches = matchAll(array); - - int removed = 0; - java.util.Iterator it = inputs.iterator(); - int i = 0; - while (it.hasNext()) { - it.next(); - if (matches[i++]) { // Inverted logic - it.remove(); - removed++; - } - } - - return removed; - } - - // ========== Map Filtering Operations ========== - - /** - * Filters map by matching keys against pattern (returns new map). - * - *

Creates a new {@link java.util.HashMap} containing only entries whose keys match - * this pattern. Values are preserved, keys are tested against the pattern. - * - *

Example - Filter configuration by environment prefix: - *

{@code
-     * Pattern prodPattern = Pattern.compile("prod_.*");
-     * Map allConfig = Map.of(
-     *     "prod_db_host", "prod-db.example.com",
-     *     "test_db_host", "test-db.example.com",
-     *     "prod_api_key", "abc123",
-     *     "test_api_key", "xyz789"
-     * );
-     *
-     * Map prodConfig = prodPattern.filterByKey(allConfig);
-     * // prodConfig = {"prod_db_host": "prod-db.example.com", "prod_api_key": "abc123"}
-     * }
- * - *

Example - Cassandra SAI filter by partition key pattern: - *

{@code
-     * // Filter partition keys matching date pattern
-     * Pattern datePattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
-     * Map partitions = ...; // partition key → row
-     * Map datePartitions = datePattern.filterByKey(partitions);
-     * }
- * - *

Performance: Throughput ~2.6 million entries/second (benchmark: 10k entries in 3.9ms) - * - * @param value type - * @param inputs map where keys (String type) are tested against pattern - * @return new HashMap containing only entries whose keys match - * @throws NullPointerException if inputs is null - * @see #filterByValue(java.util.Map) filter by values instead of keys - * @see #filterNotByKey(java.util.Map) inverse (keep non-matching keys) - * @see #retainMatchesByKey(java.util.Map) in-place variant - * @since 1.0.0 - */ - public java.util.Map filterByKey(java.util.Map inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.HashMap<>(); - } - - // Extract keys for bulk matching - java.util.List> entries = new java.util.ArrayList<>(inputs.entrySet()); - String[] keys = new String[entries.size()]; - for (int i = 0; i < entries.size(); i++) { - keys[i] = entries.get(i).getKey(); - } - - boolean[] matches = matchAll(keys); - - // Build result map - java.util.Map result = new java.util.HashMap<>(); - for (int i = 0; i < entries.size(); i++) { - if (matches[i]) { - java.util.Map.Entry entry = entries.get(i); - result.put(entry.getKey(), entry.getValue()); - } - } - - return result; - } - - /** - * Filters map by matching values against pattern (returns new map). - * - *

Creates a new {@link java.util.HashMap} containing only entries whose values match - * this pattern. Keys are preserved, values are tested against the pattern. - * - *

Example - Filter user map by valid email addresses: - *

{@code
-     * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
-     * Map userEmails = Map.of(
-     *     1, "user@example.com",    // valid email
-     *     2, "invalid_email",       // not an email
-     *     3, "admin@test.org"       // valid email
-     * );
-     *
-     * Map validUsers = emailPattern.filterByValue(userEmails);
-     * // validUsers = {1: "user@example.com", 3: "admin@test.org"}
-     * }
- * - *

Example - Extract records with matching status: - *

{@code
-     * Pattern activePattern = Pattern.compile("active|running|online");
-     * Map services = Map.of(
-     *     "web", "active",
-     *     "db", "stopped",
-     *     "cache", "running"
-     * );
-     *
-     * Map activeServices = activePattern.filterByValue(services);
-     * // activeServices = {"web": "active", "cache": "running"}
-     * }
- * - * @param key type - * @param inputs map where values (String type) are tested against pattern - * @return new HashMap containing only entries whose values match - * @throws NullPointerException if inputs is null - * @see #filterByKey(java.util.Map) filter by keys instead of values - * @see #filterNotByValue(java.util.Map) inverse (keep non-matching values) - * @see #retainMatchesByValue(java.util.Map) in-place variant - * @since 1.0.0 - */ - public java.util.Map filterByValue(java.util.Map inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.HashMap<>(); - } - - // Extract values for bulk matching - java.util.List> entries = new java.util.ArrayList<>(inputs.entrySet()); - String[] values = new String[entries.size()]; - for (int i = 0; i < entries.size(); i++) { - values[i] = entries.get(i).getValue(); - } - - boolean[] matches = matchAll(values); - - // Build result map - java.util.Map result = new java.util.HashMap<>(); - for (int i = 0; i < entries.size(); i++) { - if (matches[i]) { - java.util.Map.Entry entry = entries.get(i); - result.put(entry.getKey(), entry.getValue()); - } - } - - return result; - } - - /** - * Filters map by keys NOT matching pattern (inverse of {@link #filterByKey}). - * - *

Example: - *

{@code
-     * Pattern tmpPattern = Pattern.compile("tmp_.*");
-     * Map data = Map.of("tmp_cache", "...", "prod_data", "...");
-     * Map permanent = tmpPattern.filterNotByKey(data);
-     * // permanent = {"prod_data": "..."}
-     * }
- * - * @param value type - * @param inputs map where keys are tested - * @return new HashMap with entries whose keys do NOT match - * @throws NullPointerException if inputs is null - * @see #filterByKey inverse - * @since 1.0.0 - */ - public java.util.Map filterNotByKey(java.util.Map inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.HashMap<>(); - } - - java.util.List> entries = new java.util.ArrayList<>(inputs.entrySet()); - String[] keys = new String[entries.size()]; - for (int i = 0; i < entries.size(); i++) { - keys[i] = entries.get(i).getKey(); - } - - boolean[] matches = matchAll(keys); - - java.util.Map result = new java.util.HashMap<>(); - for (int i = 0; i < entries.size(); i++) { - if (!matches[i]) { // Inverted - java.util.Map.Entry entry = entries.get(i); - result.put(entry.getKey(), entry.getValue()); - } - } - - return result; - } - - /** - * Filters map by values NOT matching pattern (inverse of {@link #filterByValue}). - * - *

Example - Exclude error statuses: - *

{@code
-     * Pattern errorPattern = Pattern.compile("error|failed|timeout");
-     * Map jobStatuses = Map.of("job1", "success", "job2", "error", "job3", "complete");
-     * Map successful = errorPattern.filterNotByValue(jobStatuses);
-     * // successful = {"job1": "success", "job3": "complete"}
-     * }
- * - * @param key type - * @param inputs map where values are tested - * @return new HashMap with entries whose values do NOT match - * @throws NullPointerException if inputs is null - * @see #filterByValue inverse - * @since 1.0.0 - */ - public java.util.Map filterNotByValue(java.util.Map inputs) { - Objects.requireNonNull(inputs, "inputs cannot be null"); - - if (inputs.isEmpty()) { - return new java.util.HashMap<>(); - } - - java.util.List> entries = new java.util.ArrayList<>(inputs.entrySet()); - String[] values = new String[entries.size()]; - for (int i = 0; i < entries.size(); i++) { - values[i] = entries.get(i).getValue(); - } - - boolean[] matches = matchAll(values); - - java.util.Map result = new java.util.HashMap<>(); - for (int i = 0; i < entries.size(); i++) { - if (!matches[i]) { // Inverted - java.util.Map.Entry entry = entries.get(i); - result.put(entry.getKey(), entry.getValue()); - } - } - - return result; - } - - /** - * Removes entries where keys don't match (in-place map mutation). - * - *

MUTATES INPUT: Keeps only entries whose keys match the pattern. - * - *

Example: - *

{@code
-     * Pattern userPattern = Pattern.compile("user_\\d+");
-     * Map cache = new HashMap<>(Map.of("user_123", obj1, "sys_config", obj2));
-     * int removed = userPattern.retainMatchesByKey(cache);
-     * // removed = 1, cache = {"user_123": obj1}
-     * }
- * - * @param value type - * @param map mutable map to filter by keys - * @return number of entries removed - * @throws NullPointerException if map is null - * @throws UnsupportedOperationException if map is immutable - * @see #filterByKey non-mutating variant - * @since 1.0.0 - */ - public int retainMatchesByKey(java.util.Map map) { - Objects.requireNonNull(map, "map cannot be null"); - - if (map.isEmpty()) { - return 0; - } - - String[] keys = map.keySet().toArray(new String[0]); - boolean[] matches = matchAll(keys); - - int removed = 0; - java.util.Iterator> it = map.entrySet().iterator(); - int i = 0; - while (it.hasNext()) { - it.next(); - if (!matches[i++]) { - it.remove(); - removed++; - } - } - - return removed; - } - - /** - * Removes entries where values don't match (in-place map mutation). - * - *

MUTATES INPUT: Keeps only entries whose values match. - * - *

Example: - *

{@code
-     * Pattern activePattern = Pattern.compile("active|online");
-     * Map servers = new HashMap<>(Map.of("web", "active", "db", "offline"));
-     * userPattern.retainMatchesByValue(servers);
-     * // servers = {"web": "active"}
-     * }
- * - * @param key type - * @param map mutable map to filter by values - * @return number of entries removed - * @throws NullPointerException if map is null - * @throws UnsupportedOperationException if map is immutable - * @see #filterByValue non-mutating variant - * @since 1.0.0 - */ - public int retainMatchesByValue(java.util.Map map) { - Objects.requireNonNull(map, "map cannot be null"); - - if (map.isEmpty()) { - return 0; - } - - // Extract values maintaining entry order - java.util.List> entries = new java.util.ArrayList<>(map.entrySet()); - String[] values = new String[entries.size()]; - for (int i = 0; i < entries.size(); i++) { - values[i] = entries.get(i).getValue(); - } - - boolean[] matches = matchAll(values); - - int removed = 0; - java.util.Iterator> it = map.entrySet().iterator(); - int i = 0; - while (it.hasNext()) { - it.next(); - if (!matches[i++]) { - it.remove(); - removed++; - } - } - - return removed; - } - - /** - * Removes entries where keys match (in-place, inverse of {@link #retainMatchesByKey}). - * - *

MUTATES INPUT: Removes entries whose keys match the pattern. - * - *

Example - Remove temporary cache entries: - *

{@code
-     * Pattern tmpPattern = Pattern.compile("tmp_.*");
-     * Map cache = new HashMap<>(Map.of("tmp_123", data1, "perm_456", data2));
-     * tmpPattern.removeMatchesByKey(cache);
-     * // cache = {"perm_456": data2}
-     * }
- * - * @param value type - * @param map mutable map to filter by keys - * @return number of entries removed - * @throws NullPointerException if map is null - * @throws UnsupportedOperationException if map is immutable - * @see #filterNotByKey non-mutating variant - * @since 1.0.0 - */ - public int removeMatchesByKey(java.util.Map map) { - Objects.requireNonNull(map, "map cannot be null"); - - if (map.isEmpty()) { - return 0; - } - - String[] keys = map.keySet().toArray(new String[0]); - boolean[] matches = matchAll(keys); - - int removed = 0; - java.util.Iterator> it = map.entrySet().iterator(); - int i = 0; - while (it.hasNext()) { - it.next(); - if (matches[i++]) { // Inverted - remove if MATCHES - it.remove(); - removed++; - } - } - - return removed; - } - - /** - * Removes entries where values match (in-place, inverse of {@link #retainMatchesByValue}). - * - *

MUTATES INPUT: Removes entries whose values match the pattern. - * - *

Example - Remove failed jobs: - *

{@code
-     * Pattern failedPattern = Pattern.compile("failed|error|timeout");
-     * Map jobs = new HashMap<>(Map.of(1, "success", 2, "failed", 3, "complete"));
-     * failedPattern.removeMatchesByValue(jobs);
-     * // jobs = {1: "success", 3: "complete"}
-     * }
- * - * @param key type - * @param map mutable map to filter by values - * @return number of entries removed - * @throws NullPointerException if map is null - * @throws UnsupportedOperationException if map is immutable - * @see #filterNotByValue non-mutating variant - * @since 1.0.0 - */ - public int removeMatchesByValue(java.util.Map map) { - Objects.requireNonNull(map, "map cannot be null"); - - if (map.isEmpty()) { - return 0; - } - - java.util.List> entries = new java.util.ArrayList<>(map.entrySet()); - String[] values = new String[entries.size()]; - for (int i = 0; i < entries.size(); i++) { - values[i] = entries.get(i).getValue(); - } - - boolean[] matches = matchAll(values); - - int removed = 0; - java.util.Iterator> it = map.entrySet().iterator(); - int i = 0; - while (it.hasNext()) { - it.next(); - if (matches[i++]) { // Inverted - remove if MATCHES - it.remove(); - removed++; - } - } - - return removed; - } + private static final Logger logger = LoggerFactory.getLogger(Pattern.class); + + // Ensure native library is loaded + static { + RE2LibraryLoader.loadLibrary(); + } + + // Global pattern cache (mutable for testing only) + private static volatile PatternCache cache = new PatternCache(RE2Config.DEFAULT); + + /** Gets the global pattern cache (for internal use). */ + public static PatternCache getGlobalCache() { + return cache; + } + + private final String patternString; + private final boolean caseSensitive; + private final long nativeHandle; + private final AtomicBoolean closed = new AtomicBoolean(false); + private final boolean fromCache; + private final java.util.concurrent.atomic.AtomicInteger refCount = + new java.util.concurrent.atomic.AtomicInteger(0); + private static final int maxMatchersPerPattern = RE2Config.DEFAULT.maxMatchersPerPattern(); + private final long nativeMemoryBytes; + + // JniAdapter for all JNI calls - allows mocking in tests + final IRE2Native jni; + + Pattern(String patternString, boolean caseSensitive, long nativeHandle) { + this(patternString, caseSensitive, nativeHandle, false, RE2Native.INSTANCE); + } + + Pattern( + String patternString, + boolean caseSensitive, + long nativeHandle, + boolean fromCache, + IRE2Native jni) { + this.patternString = Objects.requireNonNull(patternString); + this.caseSensitive = caseSensitive; + this.nativeHandle = nativeHandle; + this.fromCache = fromCache; + this.jni = jni; + + // Query native memory size using adapter + this.nativeMemoryBytes = jni.patternMemory(nativeHandle); + + logger.trace( + "RE2: Pattern created - length: {}, caseSensitive: {}, fromCache: {}, nativeBytes: {}", + patternString.length(), + caseSensitive, + fromCache, + nativeMemoryBytes); + } + + public static Pattern compile(String pattern) { + return compile(pattern, true); + } + + /** + * Compiles a pattern with the specified case sensitivity. + * + * @param pattern the regex pattern + * @param caseSensitive whether matching should be case-sensitive + * @return the compiled pattern + */ + public static Pattern compile(String pattern, boolean caseSensitive) { + Objects.requireNonNull(pattern, "pattern cannot be null"); + + // Try cache first + return cache.getOrCompile( + pattern, caseSensitive, () -> compileUncached(pattern, caseSensitive)); + } + + /** + * Compiles a pattern without using the cache (for testing/special cases). + * + *

The returned pattern is NOT managed by the cache and MUST be closed. + * + * @param pattern regex pattern + * @return uncached pattern (must close) + */ + public static Pattern compileWithoutCache(String pattern) { + return compileWithoutCache(pattern, true); + } + + /** + * Compiles a pattern without using the cache (for testing/special cases). + * + *

The returned pattern is NOT managed by the cache and MUST be closed. + * + * @param pattern regex pattern + * @param caseSensitive case sensitivity + * @return uncached pattern (must close) + */ + public static Pattern compileWithoutCache(String pattern, boolean caseSensitive) { + // Compile with fromCache=false so it can actually be closed + return doCompile(pattern, caseSensitive, false, RE2Native.INSTANCE); + } + + /** Compiles a pattern for caching (internal use). */ + private static Pattern compileUncached(String pattern, boolean caseSensitive) { + // Compile with fromCache=true so users can't close it (cache manages it) + return doCompile(pattern, caseSensitive, true, RE2Native.INSTANCE); + } + + /** + * Package-private compile method for test injection of mock JniAdapter. Bypasses cache for full + * control in unit tests. + */ + static Pattern compileForTesting(String pattern, boolean caseSensitive, IRE2Native jni) { + return doCompile(pattern, caseSensitive, false, jni); + } + + /** Actual compilation logic. */ + private static Pattern doCompile( + String pattern, boolean caseSensitive, boolean fromCache, IRE2Native jni) { + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + String hash = PatternHasher.hash(pattern); + + // Reject empty patterns (matches old wrapper behavior) + if (pattern.isEmpty()) { + throw new PatternCompilationException(pattern, "Pattern is null or empty"); + } + + // Track allocation and enforce maxSimultaneousCompiledPatterns limit + // This is ACTIVE count, not cumulative - patterns can be freed and recompiled + cache + .getResourceTracker() + .trackPatternAllocated(cache.getConfig().maxSimultaneousCompiledPatterns(), metrics); + + long startNanos = System.nanoTime(); + long handle = 0; + boolean compilationSuccessful = false; + + try { + handle = jni.compile(pattern, caseSensitive); + + if (handle == 0 || !jni.patternOk(handle)) { + String error = jni.getError(); + + // Compilation failed - record error + metrics.incrementCounter(MetricNames.ERRORS_COMPILATION_FAILED); + logger.debug("RE2: Pattern compilation failed - hash: {}, error: {}", hash, error); + + // Will be cleaned up in finally block + throw new PatternCompilationException(pattern, error != null ? error : "Unknown error"); + } + + long durationNanos = System.nanoTime() - startNanos; + metrics.recordTimer(MetricNames.PATTERNS_COMPILATION_LATENCY, durationNanos); + metrics.incrementCounter(MetricNames.PATTERNS_COMPILED); + + Pattern compiled = new Pattern(pattern, caseSensitive, handle, fromCache, jni); + logger.trace( + "RE2: Pattern compiled - hash: {}, length: {}, caseSensitive: {}, fromCache: {}, nativeBytes: {}, timeNs: {}", + hash, + pattern.length(), + caseSensitive, + fromCache, + compiled.nativeMemoryBytes, + durationNanos); + + compilationSuccessful = true; + return compiled; + + } catch (ResourceException e) { + // Resource limit hit - count already rolled back by trackPatternAllocated + throw e; + + } finally { + // Clean up if compilation failed + if (!compilationSuccessful) { + // Free handle if allocated + if (handle != 0) { + try { + jni.freePattern(handle); + } catch (Exception e) { + // Silently ignore - best effort cleanup + } + } + + // Decrement count (allocation failed) + cache.getResourceTracker().trackPatternFreed(metrics); + } + } + } + + /** + * Creates a matcher for this pattern against the given input. + * + * @param input the input string to match against + * @return a new matcher + */ + public Matcher matcher(String input) { + checkNotClosed(); + return new Matcher(this, input); + } + + /** + * Tests if the given input matches this pattern. + * + * @param input the input string to test + * @return true if the input matches + */ + public boolean matches(String input) { + try (Matcher m = matcher(input)) { + return m.matches(); + } + } + + /** + * Tests if content at memory address fully matches this pattern (zero-copy). + * + *

This method accepts a raw memory address and length, enabling zero-copy matching with any + * off-heap memory system. + * + *

Performance: 46-99% faster than String API depending on input size. For + * 10KB+ inputs, provides 99%+ improvement. + * + *

Memory Safety: The memory at {@code address} must: + * + *

    + *
  • Remain valid for the duration of this call + *
  • Contain valid UTF-8 encoded text + *
  • Not be released/freed until this method returns + *
+ * + *

Usage with DirectByteBuffer: + * + *

{@code
+   * import sun.nio.ch.DirectBuffer;
+   *
+   * Pattern pattern = Pattern.compile("\\d+");
+   * ByteBuffer buffer = ByteBuffer.allocateDirect(1024);
+   * buffer.put("12345".getBytes(StandardCharsets.UTF_8));
+   * buffer.flip();
+   *
+   * long address = ((DirectBuffer) buffer).address();
+   * int length = buffer.remaining();
+   * boolean matches = pattern.matches(address, length);  // Zero-copy!
+   * }
+ * + *

Note: Most users should use {@link #matches(ByteBuffer)} instead, which + * handles address extraction automatically. + * + * @param address native memory address of UTF-8 encoded text + * @param length number of bytes to read from the address + * @return true if entire content matches this pattern, false otherwise + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @see #matches(String) String-based variant + * @see #matches(ByteBuffer) ByteBuffer variant with automatic routing + * @since 1.1.0 + */ + public boolean matches(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + long startNanos = System.nanoTime(); + boolean result = jni.fullMatchDirect(nativeHandle, address, length); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global metrics (ALL matching operations) + metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS); + metrics.recordTimer(MetricNames.MATCHING_LATENCY, durationNanos); + metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, durationNanos); + + // Specific zero-copy metrics + metrics.incrementCounter(MetricNames.MATCHING_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.MATCHING_ZERO_COPY_LATENCY, durationNanos); + + return result; + } + + /** + * Tests if pattern matches anywhere in content at memory address (zero-copy). + * + *

This is the partial match variant - tests if pattern matches anywhere within the input, not + * necessarily the entire content. + * + *

Performance: 46-99% faster than String API. + * + *

Memory Safety: The memory at {@code address} must remain valid for the + * duration of this call. + * + *

Usage with DirectByteBuffer: + * + *

{@code
+   * import sun.nio.ch.DirectBuffer;
+   *
+   * Pattern pattern = Pattern.compile("@[a-z]+\\.[a-z]+");
+   * ByteBuffer buffer = ByteBuffer.allocateDirect(1024);
+   * buffer.put("Contact: user@example.com".getBytes(StandardCharsets.UTF_8));
+   * buffer.flip();
+   *
+   * long address = ((DirectBuffer) buffer).address();
+   * int length = buffer.remaining();
+   * boolean found = pattern.find(address, length);  // Zero-copy!
+   * }
+ * + *

Note: Most users should use {@link #find(ByteBuffer)} instead. + * + * @param address native memory address of UTF-8 encoded text + * @param length number of bytes to read from the address + * @return true if pattern matches anywhere in content, false otherwise + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @see #find(ByteBuffer) ByteBuffer variant with automatic routing + * @since 1.1.0 + */ + public boolean find(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + long startNanos = System.nanoTime(); + boolean result = jni.partialMatchDirect(nativeHandle, address, length); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global metrics (ALL matching operations) + metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS); + metrics.recordTimer(MetricNames.MATCHING_LATENCY, durationNanos); + metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, durationNanos); + + // Specific zero-copy metrics + metrics.incrementCounter(MetricNames.MATCHING_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.MATCHING_ZERO_COPY_LATENCY, durationNanos); + + return result; + } + + // ========== Capture Group Operations ========== + + /** + * Matches input and extracts capture groups. + * + *

This method performs a full match and returns a {@link MatchResult} containing all captured + * groups. If the match fails, the MatchResult will have {@code matched() == false}. + * + *

Example - Extract email components: + * + *

{@code
+   * Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)");
+   * MatchResult result = pattern.match("user@example.com");
+   *
+   * if (result.matched()) {
+   *     String full = result.group();      // "user@example.com"
+   *     String user = result.group(1);     // "user"
+   *     String domain = result.group(2);   // "example"
+   *     String tld = result.group(3);      // "com"
+   * }
+   * }
+ * + *

Named Groups: + * + *

{@code
+   * Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})");
+   * MatchResult result = pattern.match("2025-11-24");
+   *
+   * if (result.matched()) {
+   *     String year = result.group("year");   // "2025"
+   *     String month = result.group("month"); // "11"
+   *     String day = result.group("day");     // "24"
+   * }
+   * }
+ * + * @param input the string to match + * @return MatchResult containing captured groups, or a failed match if no match + * @throws NullPointerException if input is null + * @throws IllegalStateException if pattern is closed + * @see MatchResult + * @see #find(String) for partial matching with groups + * @see #findAll(String) for finding all matches with groups + * @since 1.2.0 + */ + public MatchResult match(String input) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + + long startNanos = System.nanoTime(); + + String[] groups = jni.extractGroups(nativeHandle, input); + + if (groups == null) { + // No match - still track metrics (operation was attempted) + long durationNanos = System.nanoTime() - startNanos; + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + + // Specific String capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); + + return new MatchResult(input); + } + + // For match() (full match semantics), verify the match covers entire input + // extractGroups uses UNANCHORED, so we need to check manually + if (!groups[0].equals(input)) { + // Match found but doesn't cover entire input - this is a partial match + long durationNanos = System.nanoTime() - startNanos; + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + + // Specific String capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); + + return new MatchResult(input); + } + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics (ALL capture operations) + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + + // Specific String capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); + + // Lazy-load named groups only if needed + Map namedGroupMap = getNamedGroupsMap(); + + return new MatchResult(input, groups, namedGroupMap); + } + + /** + * Finds first match and extracts capture groups. + * + *

This method performs a partial match (searches anywhere in input) and returns a {@link + * MatchResult} for the first match found. If no match is found, the MatchResult will have {@code + * matched() == false}. + * + *

Example - Extract first email from text: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
+   * MatchResult result = emailPattern.find("Contact us at support@example.com or admin@test.org");
+   *
+   * if (result.matched()) {
+   *     String email = result.group();       // "support@example.com" (first match)
+   *     String user = result.group(1);       // "support"
+   *     String domain = result.group(2);     // "example.com"
+   * }
+   * }
+ * + * @param input the string to search + * @return MatchResult for first match found, or a failed match if no match + * @throws NullPointerException if input is null + * @throws IllegalStateException if pattern is closed + * @see #match(String) for full matching with groups + * @see #findAll(String) for finding all matches + * @since 1.2.0 + */ + public MatchResult find(String input) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + + long startNanos = System.nanoTime(); + + // RE2 extractGroups does UNANCHORED match, so it finds first occurrence + String[] groups = jni.extractGroups(nativeHandle, input); + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics (ALL capture operations) + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + + // Specific String capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); + + if (groups == null) { + return new MatchResult(input); + } + + Map namedGroupMap = getNamedGroupsMap(); + return new MatchResult(input, groups, namedGroupMap); + } + + /** + * Finds all non-overlapping matches and extracts capture groups from each. + * + *

This method finds all matches in the input and returns a list of {@link MatchResult} + * objects, one for each match. Each MatchResult contains the captured groups for that match. + * + *

Example - Extract all phone numbers: + * + *

{@code
+   * Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
+   * List matches = pattern.findAll("Call 555-1234 or 555-5678 for help");
+   *
+   * for (MatchResult match : matches) {
+   *     String phone = match.group();       // "555-1234", "555-5678"
+   *     String prefix = match.group(1);     // "555", "555"
+   *     String number = match.group(2);     // "1234", "5678"
+   * }
+   * // matches.size() == 2
+   * }
+ * + *

Example - Parse structured log lines: + * + *

{@code
+   * Pattern pattern = Pattern.compile("\\[(\\d+)\\] (\\w+): (.+)");
+   * List matches = pattern.findAll(logText);
+   *
+   * for (MatchResult match : matches) {
+   *     String timestamp = match.group(1);
+   *     String level = match.group(2);
+   *     String message = match.group(3);
+   *     // Process log entry
+   * }
+   * }
+ * + * @param input the string to search + * @return list of MatchResult objects (one per match), or empty list if no matches + * @throws NullPointerException if input is null + * @throws IllegalStateException if pattern is closed + * @see #match(String) for single full match + * @see #find(String) for first match only + * @since 1.2.0 + */ + public java.util.List findAll(String input) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + + long startNanos = System.nanoTime(); + + String[][] allMatches = jni.findAllMatches(nativeHandle, input); + + long durationNanos = System.nanoTime() - startNanos; + int matchCount = (allMatches != null) ? allMatches.length : 0; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics (ALL capture operations) + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + + // Specific String capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_STRING_LATENCY, durationNanos); + + // Track number of matches found + if (matchCount > 0) { + metrics.incrementCounter(MetricNames.CAPTURE_FINDALL_MATCHES, matchCount); + } + + if (allMatches == null || allMatches.length == 0) { + return java.util.Collections.emptyList(); + } + + // Lazy-load named groups (shared by all MatchResults) + Map namedGroupMap = getNamedGroupsMap(); + + java.util.List results = new java.util.ArrayList<>(allMatches.length); + for (String[] groups : allMatches) { + results.add(new MatchResult(input, groups, namedGroupMap)); + } + + return results; + } + + // ========== Bulk Capture Operations ========== + + /** + * Full match multiple inputs with capture groups (bulk operation). + * + *

Processes all inputs in a single operation, extracting capture groups from each. + * + *

Example - Extract email components from multiple inputs: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
+   * String[] emails = {"user@example.com", "admin@test.org", "invalid"};
+   *
+   * MatchResult[] results = emailPattern.matchAllWithGroups(emails);
+   * // results[0].matched() = true, group(1) = "user", group(2) = "example.com"
+   * // results[1].matched() = true, group(1) = "admin", group(2) = "test.org"
+   * // results[2].matched() = false
+   * }
+ * + * @param inputs array of strings to match + * @return array of MatchResults (parallel to inputs, remember to close each) + * @throws NullPointerException if inputs is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult[] matchAllWithGroups(String[] inputs) { + checkNotClosed(); + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.length == 0) { + return new MatchResult[0]; + } + + long startNanos = System.nanoTime(); + + // Call extractGroups for each input individually + // Note: extractGroupsBulk returns String[][] with all inputs concatenated, + // so we process individually for now (can optimize later with proper bulk native method) + Map namedGroupMap = getNamedGroupsMap(); + MatchResult[] results = new MatchResult[inputs.length]; + + for (int i = 0; i < inputs.length; i++) { + String[] groups = jni.extractGroups(nativeHandle, inputs[i]); + if (groups != null && groups.length > 0) { + results[i] = new MatchResult(inputs[i], groups, namedGroupMap); + } else { + results[i] = new MatchResult(inputs[i]); + } + } + + long durationNanos = System.nanoTime() - startNanos; + long perItemNanos = durationNanos / inputs.length; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Bulk) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics (per-item for comparability) + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS, inputs.length); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, perItemNanos); + + // Specific bulk capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_BULK_OPERATIONS); + metrics.incrementCounter(MetricNames.CAPTURE_BULK_ITEMS, inputs.length); + metrics.recordTimer(MetricNames.CAPTURE_BULK_LATENCY, perItemNanos); + + return results; + } + + /** + * Full match multiple inputs with capture groups (bulk operation, collection variant). + * + * @param inputs collection of strings to match + * @return array of MatchResults (parallel to inputs, remember to close each) + * @throws NullPointerException if inputs is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult[] matchAllWithGroups(java.util.Collection inputs) { + checkNotClosed(); + Objects.requireNonNull(inputs, "inputs cannot be null"); + + String[] array = inputs.toArray(new String[0]); + return matchAllWithGroups(array); + } + + /** + * Matches input and extracts capture groups (zero-copy). + * + *

Zero-copy variant using raw memory address. + * + * @param address native memory address of UTF-8 encoded text + * @param length number of bytes to read + * @return MatchResult with captured groups, or failed match if no match + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult match(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + long startNanos = System.nanoTime(); + + String[] groups = jni.extractGroupsDirect(nativeHandle, address, length); + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + + // Specific zero-copy capture metrics + metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); + + if (groups == null) { + // Need input as String for MatchResult - this is a limitation + // User must pass String for failed matches + return new MatchResult(""); // Empty input for failed zero-copy match + } + + // For zero-copy, we don't have the original String, so MatchResult.input() will be group[0] + Map namedGroupMap = getNamedGroupsMap(); + return new MatchResult(groups[0], groups, namedGroupMap); + } + + /** + * Matches ByteBuffer content and extracts capture groups (zero-copy). + * + *

Automatically routes to zero-copy (DirectByteBuffer) or String (heap). + * + * @param buffer ByteBuffer containing UTF-8 text + * @return MatchResult with captured groups + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult match(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return match(address, length); + } else { + // Heap - convert to String and use String variant + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + return match(text); + } + } + + /** Helper: Get named groups map for this pattern (lazy-loaded and cached). */ + private Map getNamedGroupsMap() { + String[] namedGroupsArray = jni.getNamedGroups(nativeHandle); + + if (namedGroupsArray == null || namedGroupsArray.length == 0) { + return Collections.emptyMap(); + } + + // Parse flattened array: [name1, index1_str, name2, index2_str, ...] + Map map = new java.util.HashMap<>(); + for (int i = 0; i < namedGroupsArray.length; i += 2) { + String name = namedGroupsArray[i]; + int index = Integer.parseInt(namedGroupsArray[i + 1]); + map.put(name, index); + } + + return map; + } + + // ========== Capture Group Zero-Copy Operations ========== + + /** + * Matches and extracts capture groups using zero-copy (address variant). + * + * @param address native memory address of UTF-8 text + * @param length number of bytes + * @return MatchResult with captured groups + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @see #match(String) String variant + * @since 1.2.0 + */ + public MatchResult matchWithGroups(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + long startNanos = System.nanoTime(); + String[] groups = jni.extractGroupsDirect(nativeHandle, address, length); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); + + if (groups == null) { + return new MatchResult(""); + } + + Map namedGroupMap = getNamedGroupsMap(); + return new MatchResult(groups[0], groups, namedGroupMap); + } + + /** + * Matches and extracts capture groups (ByteBuffer zero-copy). + * + * @param buffer ByteBuffer + * @return MatchResult with captured groups + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult matchWithGroups(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return matchWithGroups(address, length); + } else { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + return match(text); + } + } + + /** + * Finds and extracts capture groups using zero-copy (address variant). + * + * @param address native memory address + * @param length number of bytes + * @return MatchResult for first match + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult findWithGroups(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + long startNanos = System.nanoTime(); + String[] groups = jni.extractGroupsDirect(nativeHandle, address, length); + long durationNanos = System.nanoTime() - startNanos; + + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); + + if (groups == null) { + return new MatchResult(""); + } + + Map namedGroupMap = getNamedGroupsMap(); + return new MatchResult(groups[0], groups, namedGroupMap); + } + + /** + * Finds and extracts capture groups (ByteBuffer zero-copy). + * + * @param buffer ByteBuffer + * @return MatchResult for first match + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public MatchResult findWithGroups(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return findWithGroups(address, length); + } else { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + return find(text); + } + } + + /** + * Finds all matches and extracts capture groups using zero-copy (address variant). + * + * @param address native memory address + * @param length number of bytes + * @return list of MatchResult objects + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public java.util.List findAllWithGroups(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + long startNanos = System.nanoTime(); + String[][] allMatches = jni.findAllMatchesDirect(nativeHandle, address, length); + long durationNanos = System.nanoTime() - startNanos; + + int matchCount = (allMatches != null) ? allMatches.length : 0; + + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + metrics.incrementCounter(MetricNames.CAPTURE_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_LATENCY, durationNanos); + metrics.incrementCounter(MetricNames.CAPTURE_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.CAPTURE_ZERO_COPY_LATENCY, durationNanos); + + if (matchCount > 0) { + metrics.incrementCounter(MetricNames.CAPTURE_FINDALL_MATCHES, matchCount); + } + + if (allMatches == null || allMatches.length == 0) { + return java.util.Collections.emptyList(); + } + + Map namedGroupMap = getNamedGroupsMap(); + + java.util.List results = new java.util.ArrayList<>(allMatches.length); + for (String[] groups : allMatches) { + results.add(new MatchResult(groups[0], groups, namedGroupMap)); + } + + return results; + } + + /** + * Finds all matches and extracts capture groups (ByteBuffer zero-copy). + * + * @param buffer ByteBuffer + * @return list of MatchResult objects + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public java.util.List findAllWithGroups(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return findAllWithGroups(address, length); + } else { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + return findAll(text); + } + } + + // ========== Replace Operations ========== + + /** + * Replaces the first match of this pattern in the input with the replacement string. + * + *

If the pattern matches, the first occurrence is replaced. If no match is found, the original + * input is returned unchanged. + * + *

Backreferences: RE2 supports backreferences using {@code \\1}, {@code \\2}, + * etc. (note the double backslash for Java string escaping). Unlike java.util.regex which uses + * {@code $1}, {@code $2}, RE2 uses backslash notation. + * + *

Example - Simple replacement: + * + *

{@code
+   * Pattern pattern = Pattern.compile("\\d+");
+   * String result = pattern.replaceFirst("Item 123 costs $456", "XXX");
+   * // result = "Item XXX costs $456"
+   * }
+ * + *

Example - Backreferences: + * + *

{@code
+   * Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
+   * String result = pattern.replaceFirst("Date: 2025-11-24", "\\2/\\3/\\1");
+   * // result = "Date: 11/24/2025" (reordered date components)
+   * }
+ * + * @param input the input string + * @param replacement the replacement string (supports {@code \\1}, {@code \\2}, etc. + * backreferences) + * @return the input with the first match replaced, or original input if no match + * @throws NullPointerException if input or replacement is null + * @throws IllegalStateException if pattern is closed + * @see #replaceAll(String, String) to replace all matches + * @since 1.2.0 + */ + public String replaceFirst(String input, String replacement) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + long startNanos = System.nanoTime(); + + String result = jni.replaceFirst(nativeHandle, input, replacement); + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global replace metrics (ALL replace operations) + metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); + + // Specific String replace metrics + metrics.incrementCounter(MetricNames.REPLACE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_STRING_LATENCY, durationNanos); + + return result != null ? result : input; + } + + /** + * Replaces all matches of this pattern in the input with the replacement string. + * + *

All non-overlapping matches are replaced. If no matches are found, the original input is + * returned unchanged. + * + *

Backreferences: Use {@code \\1}, {@code \\2}, etc. for captured groups. + * + *

Example - Replace all digits: + * + *

{@code
+   * Pattern pattern = Pattern.compile("\\d+");
+   * String result = pattern.replaceAll("Item 123 costs $456", "XXX");
+   * // result = "Item XXX costs $XXX"
+   * }
+ * + *

Example - Redact emails: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
+   * String result = emailPattern.replaceAll("Contact user@example.com or admin@test.org", "[REDACTED]");
+   * // result = "Contact [REDACTED] or [REDACTED]"
+   * }
+ * + *

Example - Backreferences for formatting: + * + *

{@code
+   * Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
+   * String result = pattern.replaceAll("Call 555-1234 or 555-5678", "(\\1) \\2");
+   * // result = "Call (555) 1234 or (555) 5678"
+   * }
+ * + * @param input the input string + * @param replacement the replacement string (supports {@code \\1}, {@code \\2}, etc. + * backreferences) + * @return the input with all matches replaced, or original input if no matches + * @throws NullPointerException if input or replacement is null + * @throws IllegalStateException if pattern is closed + * @see #replaceFirst(String, String) to replace only the first match + * @since 1.2.0 + */ + public String replaceAll(String input, String replacement) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + long startNanos = System.nanoTime(); + + String result = jni.replaceAll(nativeHandle, input, replacement); + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global replace metrics (ALL replace operations) + metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); + + // Specific String replace metrics + metrics.incrementCounter(MetricNames.REPLACE_STRING_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_STRING_LATENCY, durationNanos); + + return result != null ? result : input; + } + + /** + * Replaces all matches in multiple strings (bulk operation). + * + *

Processes all inputs in a single JNI call for better performance. + * + *

Example - Batch redaction: + * + *

{@code
+   * Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}");
+   * String[] logs = {
+   *     "User 123-45-6789 logged in",
+   *     "No PII here",
+   *     "SSN: 987-65-4321"
+   * };
+   *
+   * String[] redacted = ssnPattern.replaceAll(logs, "[REDACTED]");
+   * // redacted = ["User [REDACTED] logged in", "No PII here", "SSN: [REDACTED]"]
+   * }
+ * + * @param inputs array of strings to process + * @param replacement the replacement string (supports backreferences) + * @return array of strings with matches replaced (parallel to inputs) + * @throws NullPointerException if inputs or replacement is null + * @throws IllegalStateException if pattern is closed + * @see #replaceAll(String, String) single-string variant + * @since 1.2.0 + */ + public String[] replaceAll(String[] inputs, String replacement) { + checkNotClosed(); + Objects.requireNonNull(inputs, "inputs cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + if (inputs.length == 0) { + return new String[0]; + } + + long startNanos = System.nanoTime(); + + String[] results = jni.replaceAllBulk(nativeHandle, inputs, replacement); + + long durationNanos = System.nanoTime() - startNanos; + long perItemNanos = durationNanos / inputs.length; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String Bulk) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global replace metrics (ALL replace operations) - use per-item for comparability + metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS, inputs.length); + metrics.recordTimer(MetricNames.REPLACE_LATENCY, perItemNanos); + + // Specific String bulk replace metrics + metrics.incrementCounter(MetricNames.REPLACE_BULK_OPERATIONS); + metrics.incrementCounter(MetricNames.REPLACE_BULK_ITEMS, inputs.length); + metrics.recordTimer(MetricNames.REPLACE_BULK_LATENCY, perItemNanos); + + return results != null ? results : inputs; + } + + /** + * Replaces all matches in a collection (bulk operation). + * + *

Processes all inputs in a single JNI call for better performance. + * + * @param inputs collection of strings to process + * @param replacement the replacement string (supports backreferences) + * @return list of strings with matches replaced (same order as inputs) + * @throws NullPointerException if inputs or replacement is null + * @throws IllegalStateException if pattern is closed + * @see #replaceAll(String, String) single-string variant + * @since 1.2.0 + */ + public java.util.List replaceAll( + java.util.Collection inputs, String replacement) { + checkNotClosed(); + Objects.requireNonNull(inputs, "inputs cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.ArrayList<>(); + } + + String[] array = inputs.toArray(new String[0]); + String[] results = replaceAll(array, replacement); + + return java.util.Arrays.asList(results); + } + + // ========== Phase 3: Zero-Copy Replace Operations ========== + + /** + * Replaces first match using zero-copy memory access (off-heap memory). + * + *

Zero-copy operation: Accesses off-heap memory directly without copying. + * Caller must ensure memory remains valid during this call. + * + * @param address native memory address (from DirectByteBuffer or native allocator) + * @param length number of bytes to process + * @param replacement the replacement string (supports backreferences) + * @return string with first match replaced + * @throws IllegalStateException if pattern is closed + * @throws NullPointerException if replacement is null + * @since 1.2.0 + */ + public String replaceFirst(long address, int length, String replacement) { + checkNotClosed(); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + long startNanos = System.nanoTime(); + + String result = jni.replaceFirstDirect(nativeHandle, address, length, replacement); + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global replace metrics + metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); + + // Specific zero-copy replace metrics + metrics.incrementCounter(MetricNames.REPLACE_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_ZERO_COPY_LATENCY, durationNanos); + + return result; + } + + /** + * Replaces first match using ByteBuffer (zero-copy if direct, converted if heap). + * + * @param input ByteBuffer containing UTF-8 encoded text + * @param replacement the replacement string (supports backreferences) + * @return string with first match replaced + * @throws IllegalStateException if pattern is closed + * @throws NullPointerException if input or replacement is null + * @since 1.2.0 + */ + public String replaceFirst(java.nio.ByteBuffer input, String replacement) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + if (input.isDirect()) { + // Zero-copy path + long address = ((DirectBuffer) input).address() + input.position(); + int length = input.remaining(); + return replaceFirst(address, length, replacement); + } else { + // Heap buffer - convert to String + byte[] bytes = new byte[input.remaining()]; + input.duplicate().get(bytes); + String str = new String(bytes, StandardCharsets.UTF_8); + return replaceFirst(str, replacement); + } + } + + /** + * Replaces all matches using zero-copy memory access (off-heap memory). + * + * @param address native memory address (from DirectByteBuffer or native allocator) + * @param length number of bytes to process + * @param replacement the replacement string (supports backreferences) + * @return string with all matches replaced + * @throws IllegalStateException if pattern is closed + * @throws NullPointerException if replacement is null + * @since 1.2.0 + */ + public String replaceAll(long address, int length, String replacement) { + checkNotClosed(); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + long startNanos = System.nanoTime(); + + String result = jni.replaceAllDirect(nativeHandle, address, length, replacement); + + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global replace metrics + metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_LATENCY, durationNanos); + + // Specific zero-copy replace metrics + metrics.incrementCounter(MetricNames.REPLACE_ZERO_COPY_OPERATIONS); + metrics.recordTimer(MetricNames.REPLACE_ZERO_COPY_LATENCY, durationNanos); + + return result; + } + + /** + * Replaces all matches using ByteBuffer (zero-copy if direct, converted if heap). + * + * @param input ByteBuffer containing UTF-8 encoded text + * @param replacement the replacement string (supports backreferences) + * @return string with all matches replaced + * @throws IllegalStateException if pattern is closed + * @throws NullPointerException if input or replacement is null + * @since 1.2.0 + */ + public String replaceAll(java.nio.ByteBuffer input, String replacement) { + checkNotClosed(); + Objects.requireNonNull(input, "input cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + if (input.isDirect()) { + // Zero-copy path + long address = ((DirectBuffer) input).address() + input.position(); + int length = input.remaining(); + return replaceAll(address, length, replacement); + } else { + // Heap buffer - convert to String + byte[] bytes = new byte[input.remaining()]; + input.duplicate().get(bytes); + String str = new String(bytes, StandardCharsets.UTF_8); + return replaceAll(str, replacement); + } + } + + /** + * Replaces all matches in multiple off-heap buffers (bulk zero-copy operation). + * + * @param addresses native memory addresses (from DirectByteBuffer or native allocator) + * @param lengths number of bytes for each address + * @param replacement the replacement string (supports backreferences) + * @return array of strings with all matches replaced (parallel to inputs) + * @throws IllegalStateException if pattern is closed + * @throws NullPointerException if addresses, lengths, or replacement is null + * @throws IllegalArgumentException if addresses and lengths have different lengths + * @since 1.2.0 + */ + public String[] replaceAll(long[] addresses, int[] lengths, String replacement) { + checkNotClosed(); + Objects.requireNonNull(addresses, "addresses cannot be null"); + Objects.requireNonNull(lengths, "lengths cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + if (addresses.length != lengths.length) { + throw new IllegalArgumentException("addresses and lengths must have the same length"); + } + + if (addresses.length == 0) { + return new String[0]; + } + + long startNanos = System.nanoTime(); + + String[] results = jni.replaceAllDirectBulk(nativeHandle, addresses, lengths, replacement); + + long durationNanos = System.nanoTime() - startNanos; + long perItemNanos = durationNanos / addresses.length; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Zero-Copy Bulk) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + + // Global replace metrics (per-item for comparability) + metrics.incrementCounter(MetricNames.REPLACE_OPERATIONS, addresses.length); + metrics.recordTimer(MetricNames.REPLACE_LATENCY, perItemNanos); + + // Specific zero-copy bulk replace metrics + metrics.incrementCounter(MetricNames.REPLACE_BULK_ZERO_COPY_OPERATIONS); + metrics.incrementCounter(MetricNames.REPLACE_BULK_ZERO_COPY_ITEMS, addresses.length); + metrics.recordTimer(MetricNames.REPLACE_BULK_ZERO_COPY_LATENCY, perItemNanos); + + return results; + } + + /** + * Replaces all matches in multiple ByteBuffers (bulk operation, zero-copy if direct). + * + * @param inputs array of ByteBuffers containing UTF-8 encoded text + * @param replacement the replacement string (supports backreferences) + * @return array of strings with all matches replaced (parallel to inputs) + * @throws IllegalStateException if pattern is closed + * @throws NullPointerException if inputs or replacement is null + * @since 1.2.0 + */ + public String[] replaceAll(java.nio.ByteBuffer[] inputs, String replacement) { + checkNotClosed(); + Objects.requireNonNull(inputs, "inputs cannot be null"); + Objects.requireNonNull(replacement, "replacement cannot be null"); + + if (inputs.length == 0) { + return new String[0]; + } + + // Check if all buffers are direct - if so, use zero-copy bulk path + boolean allDirect = true; + for (java.nio.ByteBuffer buffer : inputs) { + if (!buffer.isDirect()) { + allDirect = false; + break; + } + } + + if (allDirect) { + // Zero-copy bulk path + long[] addresses = new long[inputs.length]; + int[] lengths = new int[inputs.length]; + + for (int i = 0; i < inputs.length; i++) { + addresses[i] = ((DirectBuffer) inputs[i]).address() + inputs[i].position(); + lengths[i] = inputs[i].remaining(); + } + + return replaceAll(addresses, lengths, replacement); + } else { + // Mixed or heap buffers - process individually + String[] results = new String[inputs.length]; + for (int i = 0; i < inputs.length; i++) { + results[i] = replaceAll(inputs[i], replacement); + } + return results; + } + } + + public String pattern() { + return patternString; + } + + public boolean isCaseSensitive() { + return caseSensitive; + } + + /** + * Gets the native (off-heap) memory consumed by this compiled pattern. + * + *

This is the size of the compiled DFA/NFA program in bytes. Useful for monitoring memory + * pressure from pattern compilation. + * + * @return size in bytes + * @throws IllegalStateException if pattern is closed + */ + public long getNativeMemoryBytes() { + checkNotClosed(); + return nativeMemoryBytes; + } + + /** + * Gets the DFA fanout for this pattern. + * + *

Returns an array where index i contains the number of bytes that lead to different DFA + * states at position i. Useful for analyzing pattern complexity. + * + * @return array of fanout values (one per byte position in DFA) + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public int[] getProgramFanout() { + checkNotClosed(); + return jni.programFanout(nativeHandle); + } + + /** + * Escapes special regex characters for literal matching. + * + *

Converts a literal string into a regex pattern that matches that exact string. Special + * characters like . * + ? ( ) [ ] { } ^ $ | \ are escaped. + * + *

Example: + * + *

{@code
+   * String literal = "price: $9.99";
+   * String escaped = Pattern.quoteMeta(literal);
+   * // escaped = "price: \\$9\\.99"
+   *
+   * Pattern p = Pattern.compile(escaped);
+   * boolean matches = p.matches("price: $9.99");  // true
+   * }
+ * + * @param text literal text to escape + * @return escaped pattern that matches the literal text exactly + * @throws NullPointerException if text is null + * @since 1.2.0 + */ + public static String quoteMeta(String text) { + return RE2Native.INSTANCE.quoteMeta(text); + } + + long getNativeHandle() { + checkNotClosed(); + return nativeHandle; + } + + /** + * Increments reference count (called by Matcher constructor). + * + * @throws ResourceException if maxMatchersPerPattern exceeded + */ + void incrementRefCount() { + int current = refCount.incrementAndGet(); + + if (current > maxMatchersPerPattern) { + refCount.decrementAndGet(); // Roll back + throw new ResourceException( + "Maximum matchers per pattern exceeded: " + + maxMatchersPerPattern + + " (current matchers on this pattern: " + + current + + ")"); + } + } + + /** Decrements reference count (called by Matcher.close()). */ + void decrementRefCount() { + refCount.decrementAndGet(); + } + + /** + * Gets current reference count (for testing/monitoring). + * + * @return number of active matchers using this pattern + */ + public int getRefCount() { + return refCount.get(); + } + + public boolean isClosed() { + return closed.get(); + } + + /** + * Checks if the native pattern pointer is still valid. + * + *

Used for defensive validation to detect memory corruption or other issues that could cause + * the native pointer to become invalid. + * + * @return true if pattern is valid, false if closed or native pointer invalid + */ + public boolean isValid() { + if (closed.get()) { + return false; + } + try { + return jni.patternOk(nativeHandle); + } catch (Exception e) { + logger.warn("RE2: Exception while validating pattern", e); + return false; + } + } + + @Override + public void close() { + if (fromCache) { + // This is expected behavior when using try-with-resources with cached patterns + logger.trace("RE2: Attempted to close cached pattern (ignoring - cache manages lifecycle)"); + return; + } + + // Attempt 1: Try graceful close + forceClose(false); + + // If still has active matchers, wait briefly and force release + if (!closed.get() && refCount.get() > 0) { + try { + Thread.sleep(100); // Give matchers time to close + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + + // Attempt 2: Force release regardless (DANGEROUS if matchers still active) + if (!closed.get()) { + logger.warn( + "RE2: Pattern still has {} active matcher(s) after 100ms wait - forcing release anyway", + refCount.get()); + forceClose(true); + } + } + } + + /** + * Force closes the pattern (INTERNAL USE ONLY - called by cache during eviction). + * + *

DO NOT CALL THIS METHOD. This is internal API for PatternCache. Use {@link + * #close()} instead. + * + *

Attempts graceful close first (waits for matchers). If matchers still active after wait, can + * force release regardless of reference count. + * + *

Public for PatternCache access (different package), but not part of public API. + */ + public void forceClose() { + forceClose(false); + } + + /** + * Force closes the pattern with optional unconditional release. + * + *

INTERNAL USE ONLY. + * + * @param releaseRegardless if true, releases even if matchers are active (DANGEROUS - can cause + * crashes) + */ + public void forceClose(boolean releaseRegardless) { + // First attempt: graceful close if no active matchers + if (refCount.get() > 0) { + if (!releaseRegardless) { + logger.warn( + "RE2: Cannot force close pattern - still in use by {} matcher(s)", refCount.get()); + return; + } else { + // DANGEROUS: Forcing release despite active matchers + logger.error( + "RE2: FORCE releasing pattern despite {} active matcher(s) - " + + "this may cause use-after-free crashes if matchers are still being used!", + refCount.get()); + } + } + + if (closed.compareAndSet(false, true)) { + logger.trace( + "RE2: Force closing pattern - fromCache: {}, releaseRegardless: {}", + fromCache, + releaseRegardless); + + // CRITICAL: Always track freed, even if freePattern throws + try { + jni.freePattern(nativeHandle); + } catch (Exception e) { + logger.error("RE2: Error freeing pattern native handle", e); + } finally { + // Always track freed (all patterns were tracked when allocated) + cache.getResourceTracker().trackPatternFreed(cache.getConfig().metricsRegistry()); + } + } + } + + /** Gets cache statistics (for monitoring). */ + public static com.axonops.libre2.cache.CacheStatistics getCacheStatistics() { + return cache.getStatistics(); + } + + /** Clears the pattern cache (for testing/maintenance). */ + public static void clearCache() { + cache.clear(); + } + + /** Fully resets the cache including statistics (for testing only). */ + public static void resetCache() { + cache.reset(); + } + + /** + * Reconfigures the cache with new settings (for testing only). + * + *

This replaces the existing cache with a new one using the provided config. All cached + * patterns are cleared. + * + * @param config the new configuration + */ + public static void configureCache(RE2Config config) { + cache.reconfigure(config); + } + + /** + * Sets a new global cache (for testing only). + * + *

WARNING: This replaces the entire global cache. Use with caution. Primarily for tests that + * need to inject a custom cache with metrics. + * + * @param newCache the new cache to use globally + */ + public static void setGlobalCache(PatternCache newCache) { + cache = newCache; + } + + /** + * Gets the current cache configuration. + * + * @return the current RE2Config + */ + public static RE2Config getCacheConfig() { + return cache.getConfig(); + } + + private void checkNotClosed() { + if (closed.get()) { + throw new IllegalStateException("RE2: Pattern is closed"); + } + } + + // ========== Bulk Matching Operations ========== + + /** + * Matches multiple inputs in a single JNI call (minimizes overhead). + * + *

This method processes an entire collection in one native call, significantly reducing JNI + * crossing overhead compared to calling {@link #matches(String)} in a loop. The performance + * benefit increases with collection size and pattern complexity. + * + *

Example - Validate multiple emails: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
+   * List emails = List.of("user@example.com", "invalid", "admin@test.org");
+   * boolean[] results = emailPattern.matchAll(emails);
+   * // results = [true, false, true]
+   *
+   * // Use results
+   * for (int i = 0; i < emails.size(); i++) {
+   *     if (results[i]) {
+   *         System.out.println("Valid: " + emails.get(i));
+   *     }
+   * }
+   * }
+ * + *

Performance Characteristics: + * + *

    + *
  • Throughput: ~3-5 million matches/second for simple patterns + *
  • Overhead: Single JNI call (~50ns) vs N calls (~50ns each) + *
  • Best for: 100+ strings, or complex patterns where matching cost > JNI cost + *
  • Benchmark: See {@code BulkMatchingPerformanceTest} for detailed comparisons + *
+ * + *

Supported Collection Types: + * + *

    + *
  • {@link java.util.List} - ArrayList, LinkedList, Vector, etc. + *
  • {@link java.util.Set} - HashSet, TreeSet, LinkedHashSet, etc. + *
  • {@link java.util.Queue} - LinkedList, ArrayDeque, PriorityQueue, etc. + *
  • Any Collection implementation + *
+ * + *

Thread Safety: Pattern is thread-safe, but if the collection is being modified + * concurrently by other threads, you must synchronize externally. + * + * @param inputs collection of strings to match (supports List, Set, Queue, etc.) + * @return boolean array parallel to inputs (same size and order) indicating matches + * @throws NullPointerException if inputs is null + * @see #matchAll(String[]) array variant + * @see #filter(java.util.Collection) to extract only matching elements + * @since 1.0.0 + */ + public boolean[] matchAll(java.util.Collection inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + if (inputs.isEmpty()) { + return new boolean[0]; + } + + try { + String[] array = inputs.toArray(new String[0]); + return matchAll(array); + } catch (ArrayStoreException e) { + throw new IllegalArgumentException( + "Collection contains non-String elements. All elements must be String type. " + + "If you have Collection or other types, convert to strings first: " + + "collection.stream().map(Object::toString).toList()", + e); + } + } + + /** + * Matches multiple inputs in a single JNI call (array variant). + * + *

Optimized for arrays - no collection conversion overhead. + * + *

Example - Process array of phone numbers: + * + *

{@code
+   * Pattern phonePattern = Pattern.compile("\\d{3}-\\d{4}");
+   * String[] phones = {"123-4567", "invalid", "999-8888"};
+   * boolean[] results = phonePattern.matchAll(phones);
+   * // results = [true, false, true]
+   * }
+ * + * @param inputs array of strings to match + * @return boolean array parallel to inputs indicating matches + * @throws NullPointerException if inputs is null + * @see #matchAll(java.util.Collection) collection variant + * @since 1.0.0 + */ + public boolean[] matchAll(String[] inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + checkNotClosed(); + + if (inputs.length == 0) { + return new boolean[0]; + } + + long startNanos = System.nanoTime(); + boolean[] results = jni.fullMatchBulk(nativeHandle, inputs); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String Bulk) + RE2MetricsRegistry metrics = Pattern.getGlobalCache().getConfig().metricsRegistry(); + long perItemNanos = inputs.length > 0 ? durationNanos / inputs.length : 0; + + // Global metrics (ALL matching operations) - use per-item latency for comparability + metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, inputs.length); + metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); + metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, perItemNanos); + + // Specific String bulk metrics + metrics.incrementCounter(MetricNames.MATCHING_BULK_OPERATIONS); + metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, inputs.length); + metrics.recordTimer(MetricNames.MATCHING_BULK_LATENCY, perItemNanos); + + return results != null ? results : new boolean[inputs.length]; + } + + /** + * Tests if pattern matches anywhere in multiple strings (partial match bulk). + * + *

This is the bulk variant of {@link Matcher#find()} - tests if the pattern matches anywhere + * within each input string (not necessarily the full string). + * + *

Processes all inputs in a single JNI call for better performance. + * + *

Example - Find which strings contain pattern: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("[a-z]+@[a-z]+\\.[a-z]+");
+   * String[] texts = {
+   *     "user@example.com",           // contains email
+   *     "Contact: admin@test.org",    // contains email
+   *     "No email here"                // no email
+   * };
+   * boolean[] results = emailPattern.findAll(texts);
+   * // results = [true, true, false]
+   * }
+ * + * @param inputs array of strings to search + * @return boolean array (parallel to inputs) indicating if pattern found in each + * @throws NullPointerException if inputs is null + * @throws IllegalStateException if pattern is closed + * @see #matchAll(String[]) for full match bulk variant + * @see Matcher#find() for single-string partial match + * @since 1.2.0 + */ + public boolean[] findAll(String[] inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + checkNotClosed(); + + if (inputs.length == 0) { + return new boolean[0]; + } + + long startNanos = System.nanoTime(); + boolean[] results = jni.partialMatchBulk(nativeHandle, inputs); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (String Bulk) + RE2MetricsRegistry metrics = Pattern.getGlobalCache().getConfig().metricsRegistry(); + long perItemNanos = inputs.length > 0 ? durationNanos / inputs.length : 0; + + // Global metrics (ALL matching operations) + metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, inputs.length); + metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); + metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, perItemNanos); + + // Specific String bulk metrics + metrics.incrementCounter(MetricNames.MATCHING_BULK_OPERATIONS); + metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, inputs.length); + metrics.recordTimer(MetricNames.MATCHING_BULK_LATENCY, perItemNanos); + + return results != null ? results : new boolean[inputs.length]; + } + + /** + * Tests if pattern matches anywhere in multiple strings (partial match bulk, collection variant). + * + *

Convenience wrapper for {@link #findAll(String[])} accepting any Collection. + * + * @param inputs collection of strings to search + * @return boolean array (parallel to inputs) indicating if pattern found in each + * @throws NullPointerException if inputs is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public boolean[] findAll(java.util.Collection inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + if (inputs.isEmpty()) { + return new boolean[0]; + } + + String[] array = inputs.toArray(new String[0]); + return findAll(array); + } + + /** + * Matches multiple memory regions in a single JNI call (zero-copy bulk). + * + *

This method accepts arrays of memory addresses and lengths, enabling efficient zero-copy + * bulk matching with any off-heap memory system. + * + *

Performance: 91.5% faster than String bulk API. Combines bulk matching + * (single JNI call) with zero-copy memory access. + * + *

Memory Safety: All memory regions must remain valid for the duration of + * this call. + * + *

Usage with DirectByteBuffer array: + * + *

{@code
+   * import sun.nio.ch.DirectBuffer;
+   *
+   * Pattern pattern = Pattern.compile("\\d+");
+   * ByteBuffer[] buffers = ...; // Multiple DirectByteBuffers
+   *
+   * long[] addresses = new long[buffers.length];
+   * int[] lengths = new int[buffers.length];
+   * for (int i = 0; i < buffers.length; i++) {
+   *     addresses[i] = ((DirectBuffer) buffers[i]).address();
+   *     lengths[i] = buffers[i].remaining();
+   * }
+   *
+   * boolean[] results = pattern.matchAll(addresses, lengths);  // 91.5% faster!
+   * }
+ * + * @param addresses array of native memory addresses + * @param lengths array of byte lengths (must be same length as addresses) + * @return boolean array (parallel to inputs) indicating matches + * @throws NullPointerException if addresses or lengths is null + * @throws IllegalArgumentException if arrays have different lengths + * @throws IllegalStateException if pattern is closed + * @see #matchAll(String[]) String-based bulk variant + * @since 1.1.0 + */ + public boolean[] matchAll(long[] addresses, int[] lengths) { + checkNotClosed(); + Objects.requireNonNull(addresses, "addresses cannot be null"); + Objects.requireNonNull(lengths, "lengths cannot be null"); + + if (addresses.length != lengths.length) { + throw new IllegalArgumentException( + "Address and length arrays must have same size: addresses=" + + addresses.length + + ", lengths=" + + lengths.length); + } + + if (addresses.length == 0) { + return new boolean[0]; + } + + long startNanos = System.nanoTime(); + boolean[] results = jni.fullMatchDirectBulk(nativeHandle, addresses, lengths); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Bulk Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + long perItemNanos = addresses.length > 0 ? durationNanos / addresses.length : 0; + + // Global metrics (ALL matching operations) - use per-item latency for comparability + metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, addresses.length); + metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); + metrics.recordTimer(MetricNames.MATCHING_FULL_MATCH_LATENCY, perItemNanos); + + // Specific bulk zero-copy metrics + metrics.incrementCounter(MetricNames.MATCHING_BULK_ZERO_COPY_OPERATIONS); + metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, addresses.length); + metrics.recordTimer(MetricNames.MATCHING_BULK_ZERO_COPY_LATENCY, perItemNanos); + + return results != null ? results : new boolean[addresses.length]; + } + + /** + * Partial match on multiple memory regions in a single JNI call (zero-copy bulk). + * + *

Tests if pattern matches anywhere in each memory region. + * + *

Performance: 91.5% faster than String bulk API. + * + * @param addresses array of native memory addresses + * @param lengths array of byte lengths (must be same length as addresses) + * @return boolean array indicating if pattern found in each input + * @throws NullPointerException if addresses or lengths is null + * @throws IllegalArgumentException if arrays have different lengths + * @throws IllegalStateException if pattern is closed + * @since 1.1.0 + */ + public boolean[] findAll(long[] addresses, int[] lengths) { + checkNotClosed(); + Objects.requireNonNull(addresses, "addresses cannot be null"); + Objects.requireNonNull(lengths, "lengths cannot be null"); + + if (addresses.length != lengths.length) { + throw new IllegalArgumentException( + "Address and length arrays must have same size: addresses=" + + addresses.length + + ", lengths=" + + lengths.length); + } + + if (addresses.length == 0) { + return new boolean[0]; + } + + long startNanos = System.nanoTime(); + boolean[] results = jni.partialMatchDirectBulk(nativeHandle, addresses, lengths); + long durationNanos = System.nanoTime() - startNanos; + + // Track metrics - GLOBAL (ALL) + SPECIFIC (Bulk Zero-Copy) + RE2MetricsRegistry metrics = cache.getConfig().metricsRegistry(); + long perItemNanos = addresses.length > 0 ? durationNanos / addresses.length : 0; + + // Global metrics (ALL matching operations) - use per-item latency for comparability + metrics.incrementCounter(MetricNames.MATCHING_OPERATIONS, addresses.length); + metrics.recordTimer(MetricNames.MATCHING_LATENCY, perItemNanos); + metrics.recordTimer(MetricNames.MATCHING_PARTIAL_MATCH_LATENCY, perItemNanos); + + // Specific bulk zero-copy metrics + metrics.incrementCounter(MetricNames.MATCHING_BULK_ZERO_COPY_OPERATIONS); + metrics.incrementCounter(MetricNames.MATCHING_BULK_ITEMS, addresses.length); + metrics.recordTimer(MetricNames.MATCHING_BULK_ZERO_COPY_LATENCY, perItemNanos); + + return results != null ? results : new boolean[addresses.length]; + } + + /** + * Matches multiple ByteBuffers in a single operation (bulk with auto-routing). + * + *

Automatically routes each buffer: DirectByteBuffer → zero-copy, heap → String. + * + *

Example - Bulk process Cassandra cells: + * + *

{@code
+   * Pattern pattern = Pattern.compile("valid_.*");
+   * ByteBuffer[] cells = getCellsFromCassandra();  // Array of DirectByteBuffers
+   *
+   * boolean[] results = pattern.matchAll(cells);
+   * // Each DirectByteBuffer uses zero-copy (46-99% faster)
+   * }
+ * + * @param buffers array of ByteBuffers to match + * @return boolean array (parallel to inputs) indicating matches + * @throws NullPointerException if buffers is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public boolean[] matchAll(ByteBuffer[] buffers) { + checkNotClosed(); + Objects.requireNonNull(buffers, "buffers cannot be null"); + + if (buffers.length == 0) { + return new boolean[0]; + } + + // Check if all are direct - if so, use zero-copy bulk path + boolean allDirect = true; + for (ByteBuffer buf : buffers) { + if (buf != null && !buf.isDirect()) { + allDirect = false; + break; + } + } + + if (allDirect) { + // Zero-copy path - extract addresses + long[] addresses = new long[buffers.length]; + int[] lengths = new int[buffers.length]; + for (int i = 0; i < buffers.length; i++) { + if (buffers[i] != null) { + addresses[i] = ((DirectBuffer) buffers[i]).address() + buffers[i].position(); + lengths[i] = buffers[i].remaining(); + } + } + return matchAll(addresses, lengths); + } else { + // Mixed or heap - convert to Strings + String[] strings = new String[buffers.length]; + for (int i = 0; i < buffers.length; i++) { + if (buffers[i] != null) { + byte[] bytes = new byte[buffers[i].remaining()]; + buffers[i].duplicate().get(bytes); + strings[i] = new String(bytes, StandardCharsets.UTF_8); + } + } + return matchAll(strings); + } + } + + /** + * Tests if pattern matches anywhere in multiple ByteBuffers (partial match bulk). + * + *

Bulk variant of partial matching with automatic routing. + * + * @param buffers array of ByteBuffers to search + * @return boolean array indicating if pattern found in each + * @throws NullPointerException if buffers is null + * @throws IllegalStateException if pattern is closed + * @since 1.2.0 + */ + public boolean[] findAll(ByteBuffer[] buffers) { + checkNotClosed(); + Objects.requireNonNull(buffers, "buffers cannot be null"); + + if (buffers.length == 0) { + return new boolean[0]; + } + + // Check if all are direct + boolean allDirect = true; + for (ByteBuffer buf : buffers) { + if (buf != null && !buf.isDirect()) { + allDirect = false; + break; + } + } + + if (allDirect) { + // Zero-copy path + long[] addresses = new long[buffers.length]; + int[] lengths = new int[buffers.length]; + for (int i = 0; i < buffers.length; i++) { + if (buffers[i] != null) { + addresses[i] = ((DirectBuffer) buffers[i]).address() + buffers[i].position(); + lengths[i] = buffers[i].remaining(); + } + } + return findAll(addresses, lengths); + } else { + // Mixed or heap - convert to Strings + String[] strings = new String[buffers.length]; + for (int i = 0; i < buffers.length; i++) { + if (buffers[i] != null) { + byte[] bytes = new byte[buffers[i].remaining()]; + buffers[i].duplicate().get(bytes); + strings[i] = new String(bytes, StandardCharsets.UTF_8); + } + } + return findAll(strings); + } + } + + /** + * Extracts capture groups from content at memory address (zero-copy input). + * + *

Reads text directly from the memory address and extracts all capture groups. The input is + * zero-copy, but output creates new Java Strings for the groups. + * + * @param address native memory address of UTF-8 encoded text + * @param length number of bytes to read from the address + * @return String array where [0] = full match, [1+] = capturing groups, or null if no match + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @since 1.1.0 + */ + public String[] extractGroups(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + return jni.extractGroupsDirect(nativeHandle, address, length); + } + + /** + * Finds all non-overlapping matches at memory address (zero-copy input). + * + *

Reads text directly from the memory address and finds all matches. The input is zero-copy, + * but output creates new Java Strings. + * + * @param address native memory address of UTF-8 encoded text + * @param length number of bytes to read from the address + * @return array of match results with capture groups, or null if no matches + * @throws IllegalArgumentException if address is 0 or length is negative + * @throws IllegalStateException if pattern is closed + * @since 1.1.0 + */ + public String[][] findAllMatches(long address, int length) { + checkNotClosed(); + if (address == 0) { + throw new IllegalArgumentException("Address must not be 0"); + } + if (length < 0) { + throw new IllegalArgumentException("Length must not be negative: " + length); + } + + return jni.findAllMatchesDirect(nativeHandle, address, length); + } + + // ========== ByteBuffer API (Automatic Zero-Copy Routing) ========== + + /** + * Tests if ByteBuffer content fully matches this pattern. + * + *

This method intelligently routes to the optimal implementation: + * + *

    + *
  • DirectByteBuffer: Uses zero-copy via {@link #matches(long, int)} (46-99% + * faster) + *
  • HeapByteBuffer: Converts to String and uses {@link #matches(String)} + *
+ * + *

Usage Example: + * + *

{@code
+   * Pattern pattern = Pattern.compile("\\d+");
+   *
+   * // DirectByteBuffer - zero-copy, 46-99% faster
+   * ByteBuffer directBuffer = ByteBuffer.allocateDirect(1024);
+   * directBuffer.put("12345".getBytes(StandardCharsets.UTF_8));
+   * directBuffer.flip();
+   * boolean r1 = pattern.matches(directBuffer);  // Zero-copy!
+   *
+   * // HeapByteBuffer - falls back to String API
+   * ByteBuffer heapBuffer = ByteBuffer.wrap("67890".getBytes(StandardCharsets.UTF_8));
+   * boolean r2 = pattern.matches(heapBuffer);  // Converted to String
+   * }
+ * + *

Performance: When using DirectByteBuffer, provides 46-99% improvement. When + * using heap ByteBuffer, equivalent to String API (no improvement). + * + *

Memory Safety: The buffer's backing memory must remain valid for the + * duration of this call. Do NOT release direct buffers until method returns. + * + * @param buffer ByteBuffer containing UTF-8 encoded text (direct or heap-backed) + * @return true if entire content matches this pattern, false otherwise + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @see #matches(String) String-based variant + * @see #matches(long, int) Raw address variant + * @since 1.1.0 + */ + public boolean matches(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + // Zero-copy path for DirectByteBuffer + // DirectBuffer is a public interface - simple cast works + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return matches(address, length); + } else { + // Heap-backed ByteBuffer - convert to String + return matchesFromByteBuffer(buffer); + } + } + + /** + * Tests if pattern matches anywhere in ByteBuffer content. + * + *

Intelligently routes to zero-copy (DirectByteBuffer) or String API (heap buffer). + * + *

Performance: 46-99% faster for DirectByteBuffer. + * + * @param buffer ByteBuffer containing UTF-8 encoded text + * @return true if pattern matches anywhere in content, false otherwise + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.1.0 + */ + public boolean find(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + // Zero-copy path + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return find(address, length); + } else { + // Heap-backed - convert to String + return findFromByteBuffer(buffer); + } + } + + /** + * Extracts capture groups from ByteBuffer content. + * + *

Intelligently routes to zero-copy (DirectByteBuffer) or String API (heap buffer). + * + * @param buffer ByteBuffer containing UTF-8 encoded text + * @return String array where [0] = full match, [1+] = capturing groups, or null if no match + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.1.0 + */ + public String[] extractGroups(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + // Zero-copy path + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return extractGroups(address, length); + } else { + // Heap-backed + return extractGroupsFromByteBuffer(buffer); + } + } + + /** + * Finds all non-overlapping matches in ByteBuffer content. + * + *

Intelligently routes to zero-copy (DirectByteBuffer) or String API (heap buffer). + * + * @param buffer ByteBuffer containing UTF-8 encoded text + * @return array of match results with capture groups, or null if no matches + * @throws NullPointerException if buffer is null + * @throws IllegalStateException if pattern is closed + * @since 1.1.0 + */ + public String[][] findAllMatches(ByteBuffer buffer) { + checkNotClosed(); + Objects.requireNonNull(buffer, "buffer cannot be null"); + + if (buffer.isDirect()) { + // Zero-copy path + long address = ((DirectBuffer) buffer).address() + buffer.position(); + int length = buffer.remaining(); + return findAllMatches(address, length); + } else { + // Heap-backed + return findAllMatchesFromByteBuffer(buffer); + } + } + + /** Helper: Extract String from ByteBuffer for matches() (heap-backed fallback). */ + private boolean matchesFromByteBuffer(ByteBuffer buffer) { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); // Use duplicate to not modify position + String text = new String(bytes, StandardCharsets.UTF_8); + return matches(text); + } + + /** Helper: Extract String from ByteBuffer for find() (heap-backed fallback). */ + private boolean findFromByteBuffer(ByteBuffer buffer) { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + try (Matcher m = matcher(text)) { + return m.find(); + } + } + + /** Helper: Extract String from ByteBuffer for extractGroups() (heap-backed fallback). */ + private String[] extractGroupsFromByteBuffer(ByteBuffer buffer) { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + return jni.extractGroups(nativeHandle, text); + } + + /** Helper: Extract String from ByteBuffer for findAllMatches() (heap-backed fallback). */ + private String[][] findAllMatchesFromByteBuffer(ByteBuffer buffer) { + byte[] bytes = new byte[buffer.remaining()]; + buffer.duplicate().get(bytes); + String text = new String(bytes, StandardCharsets.UTF_8); + return jni.findAllMatches(nativeHandle, text); + } + + /** + * Filters collection, returning only matching elements. + * + *

Creates a new {@link java.util.List} containing only strings that match this pattern. The + * original collection is not modified. Uses bulk matching internally for performance. + * + *

Example - Extract valid email addresses: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
+   * List inputs = List.of(
+   *     "user@example.com",    // matches
+   *     "invalid_email",       // doesn't match
+   *     "admin@test.org"       // matches
+   * );
+   * List validEmails = emailPattern.filter(inputs);
+   * // validEmails = ["user@example.com", "admin@test.org"]
+   * }
+ * + *

Collection Types: + * + *

    + *
  • Input: Any Collection (List, Set, Queue, etc.) + *
  • Output: Always returns {@link java.util.ArrayList} + *
  • Order: Preserves iteration order of input collection + *
+ * + *

Common Use Cases: + * + *

    + *
  • Extracting valid data from mixed datasets + *
  • Data cleaning (filter valid records) + *
  • Log filtering (extract matching log lines) + *
  • Cassandra SAI: Filter partition keys matching regex + *
+ * + *

Performance: Throughput ~3.9 million strings/second (benchmark: 10k strings in 2.6ms) + * + * @param inputs collection to filter (List, Set, Queue, or any Collection) + * @return new List containing only matching elements (preserves input order) + * @throws NullPointerException if inputs is null + * @see #filterNot(java.util.Collection) inverse operation + * @see #retainMatches(java.util.Collection) in-place variant + * @see #matchAll(java.util.Collection) to get boolean array + * @since 1.0.0 + */ + public java.util.List filter(java.util.Collection inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.ArrayList<>(); + } + + String[] array; + try { + array = inputs.toArray(new String[0]); + } catch (ArrayStoreException e) { + throw new IllegalArgumentException( + "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", + e); + } + + boolean[] matches = matchAll(array); + + java.util.List result = new java.util.ArrayList<>(); + for (int i = 0; i < array.length; i++) { + if (matches[i]) { + result.add(array[i]); + } + } + + return result; + } + + /** + * Filters collection, returning only non-matching elements (inverse of {@link #filter}). + * + *

Creates a new {@link java.util.List} containing only strings that do NOT match this pattern. + * The original collection is not modified. + * + *

Example - Remove test data, keep production: + * + *

{@code
+   * Pattern testPattern = Pattern.compile("test_.*");
+   * List allKeys = List.of("test_key1", "prod_key1", "test_key2", "prod_key2");
+   * List prodKeys = testPattern.filterNot(allKeys);
+   * // prodKeys = ["prod_key1", "prod_key2"]
+   * }
+ * + *

Use Cases: + * + *

    + *
  • Removing test/debug data from production datasets + *
  • Blacklist filtering (exclude matching patterns) + *
  • Data sanitization (remove sensitive patterns) + *
+ * + * @param inputs collection to filter + * @return new List containing only non-matching elements (preserves input order) + * @throws NullPointerException if inputs is null + * @see #filter(java.util.Collection) inverse operation (keep matches) + * @see #removeMatches(java.util.Collection) in-place variant + * @since 1.0.0 + */ + public java.util.List filterNot(java.util.Collection inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.ArrayList<>(); + } + + String[] array; + try { + array = inputs.toArray(new String[0]); + } catch (ArrayStoreException e) { + throw new IllegalArgumentException( + "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", + e); + } + + boolean[] matches = matchAll(array); + + java.util.List result = new java.util.ArrayList<>(); + for (int i = 0; i < array.length; i++) { + if (!matches[i]) { // Inverted logic + result.add(array[i]); + } + } + + return result; + } + + /** + * Removes non-matching elements from collection (in-place mutation). + * + *

MUTATES THE INPUT: This method modifies the provided collection by removing elements + * that don't match the pattern. Only matching elements remain after this call. + * + *

Example - Clean invalid data in-place: + * + *

{@code
+   * Pattern validPattern = Pattern.compile("[a-zA-Z0-9_]+");
+   * List usernames = new ArrayList<>(List.of("user1", "invalid@", "admin", "bad#name"));
+   *
+   * int removed = validPattern.retainMatches(usernames);
+   * // removed = 2
+   * // usernames now = ["user1", "admin"] (invalid entries removed)
+   * }
+ * + *

When to Use: + * + *

    + *
  • Use this: When you want to modify the collection in-place (memory efficient) + *
  • Use {@link #filter}: When you need to preserve the original collection + *
+ * + *

Collection Requirements: + * + *

    + *
  • Collection must be mutable (support {@code iterator().remove()}) + *
  • Works with: ArrayList, LinkedList, HashSet, TreeSet, etc. + *
  • Fails with: Collections.unmodifiableList(), List.of(), Set.of(), etc. + *
+ * + * @param inputs mutable collection to filter (List, Set, Queue, etc.) + * @return number of elements removed + * @throws NullPointerException if inputs is null + * @throws UnsupportedOperationException if collection is immutable + * @see #filter(java.util.Collection) non-mutating variant + * @see #removeMatches(java.util.Collection) inverse (remove matches, keep non-matches) + * @since 1.0.0 + */ + public int retainMatches(java.util.Collection inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return 0; + } + + String[] array; + try { + array = inputs.toArray(new String[0]); + } catch (ArrayStoreException e) { + throw new IllegalArgumentException( + "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", + e); + } + + boolean[] matches = matchAll(array); + + int removed = 0; + java.util.Iterator it = inputs.iterator(); + int i = 0; + while (it.hasNext()) { + it.next(); + if (!matches[i++]) { + it.remove(); + removed++; + } + } + + return removed; + } + + /** + * Removes matching elements from collection (in-place mutation, inverse of {@link + * #retainMatches}). + * + *

MUTATES THE INPUT: This method modifies the provided collection by removing elements + * that match the pattern. Only non-matching elements remain after this call. + * + *

Example - Remove sensitive data patterns: + * + *

{@code
+   * Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}");  // SSN format
+   * List logLines = new ArrayList<>(List.of(
+   *     "User logged in",
+   *     "SSN: 123-45-6789",  // sensitive!
+   *     "Processing request",
+   *     "SSN: 987-65-4321"   // sensitive!
+   * ));
+   *
+   * int removed = ssnPattern.removeMatches(logLines);
+   * // removed = 2
+   * // logLines now = ["User logged in", "Processing request"] (SSNs removed)
+   * }
+ * + *

Common Use Cases: + * + *

    + *
  • Data sanitization (remove PII, credentials, etc.) + *
  • Blacklist filtering (remove known bad patterns) + *
  • Log cleaning (strip sensitive information) + *
+ * + * @param inputs mutable collection to filter (List, Set, Queue, etc.) + * @return number of elements removed + * @throws NullPointerException if inputs is null + * @throws UnsupportedOperationException if collection is immutable + * @see #retainMatches(java.util.Collection) inverse (keep matches, remove non-matches) + * @see #filterNot(java.util.Collection) non-mutating variant + * @since 1.0.0 + */ + public int removeMatches(java.util.Collection inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return 0; + } + + String[] array; + try { + array = inputs.toArray(new String[0]); + } catch (ArrayStoreException e) { + throw new IllegalArgumentException( + "Collection contains non-String elements. Use stream().map(Object::toString).toList() to convert.", + e); + } + + boolean[] matches = matchAll(array); + + int removed = 0; + java.util.Iterator it = inputs.iterator(); + int i = 0; + while (it.hasNext()) { + it.next(); + if (matches[i++]) { // Inverted logic + it.remove(); + removed++; + } + } + + return removed; + } + + // ========== Map Filtering Operations ========== + + /** + * Filters map by matching keys against pattern (returns new map). + * + *

Creates a new {@link java.util.HashMap} containing only entries whose keys match this + * pattern. Values are preserved, keys are tested against the pattern. + * + *

Example - Filter configuration by environment prefix: + * + *

{@code
+   * Pattern prodPattern = Pattern.compile("prod_.*");
+   * Map allConfig = Map.of(
+   *     "prod_db_host", "prod-db.example.com",
+   *     "test_db_host", "test-db.example.com",
+   *     "prod_api_key", "abc123",
+   *     "test_api_key", "xyz789"
+   * );
+   *
+   * Map prodConfig = prodPattern.filterByKey(allConfig);
+   * // prodConfig = {"prod_db_host": "prod-db.example.com", "prod_api_key": "abc123"}
+   * }
+ * + *

Example - Cassandra SAI filter by partition key pattern: + * + *

{@code
+   * // Filter partition keys matching date pattern
+   * Pattern datePattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
+   * Map partitions = ...; // partition key → row
+   * Map datePartitions = datePattern.filterByKey(partitions);
+   * }
+ * + *

Performance: Throughput ~2.6 million entries/second (benchmark: 10k entries in 3.9ms) + * + * @param value type + * @param inputs map where keys (String type) are tested against pattern + * @return new HashMap containing only entries whose keys match + * @throws NullPointerException if inputs is null + * @see #filterByValue(java.util.Map) filter by values instead of keys + * @see #filterNotByKey(java.util.Map) inverse (keep non-matching keys) + * @see #retainMatchesByKey(java.util.Map) in-place variant + * @since 1.0.0 + */ + public java.util.Map filterByKey(java.util.Map inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.HashMap<>(); + } + + // Extract keys for bulk matching + java.util.List> entries = + new java.util.ArrayList<>(inputs.entrySet()); + String[] keys = new String[entries.size()]; + for (int i = 0; i < entries.size(); i++) { + keys[i] = entries.get(i).getKey(); + } + + boolean[] matches = matchAll(keys); + + // Build result map + java.util.Map result = new java.util.HashMap<>(); + for (int i = 0; i < entries.size(); i++) { + if (matches[i]) { + java.util.Map.Entry entry = entries.get(i); + result.put(entry.getKey(), entry.getValue()); + } + } + + return result; + } + + /** + * Filters map by matching values against pattern (returns new map). + * + *

Creates a new {@link java.util.HashMap} containing only entries whose values match this + * pattern. Keys are preserved, values are tested against the pattern. + * + *

Example - Filter user map by valid email addresses: + * + *

{@code
+   * Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
+   * Map userEmails = Map.of(
+   *     1, "user@example.com",    // valid email
+   *     2, "invalid_email",       // not an email
+   *     3, "admin@test.org"       // valid email
+   * );
+   *
+   * Map validUsers = emailPattern.filterByValue(userEmails);
+   * // validUsers = {1: "user@example.com", 3: "admin@test.org"}
+   * }
+ * + *

Example - Extract records with matching status: + * + *

{@code
+   * Pattern activePattern = Pattern.compile("active|running|online");
+   * Map services = Map.of(
+   *     "web", "active",
+   *     "db", "stopped",
+   *     "cache", "running"
+   * );
+   *
+   * Map activeServices = activePattern.filterByValue(services);
+   * // activeServices = {"web": "active", "cache": "running"}
+   * }
+ * + * @param key type + * @param inputs map where values (String type) are tested against pattern + * @return new HashMap containing only entries whose values match + * @throws NullPointerException if inputs is null + * @see #filterByKey(java.util.Map) filter by keys instead of values + * @see #filterNotByValue(java.util.Map) inverse (keep non-matching values) + * @see #retainMatchesByValue(java.util.Map) in-place variant + * @since 1.0.0 + */ + public java.util.Map filterByValue(java.util.Map inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.HashMap<>(); + } + + // Extract values for bulk matching + java.util.List> entries = + new java.util.ArrayList<>(inputs.entrySet()); + String[] values = new String[entries.size()]; + for (int i = 0; i < entries.size(); i++) { + values[i] = entries.get(i).getValue(); + } + + boolean[] matches = matchAll(values); + + // Build result map + java.util.Map result = new java.util.HashMap<>(); + for (int i = 0; i < entries.size(); i++) { + if (matches[i]) { + java.util.Map.Entry entry = entries.get(i); + result.put(entry.getKey(), entry.getValue()); + } + } + + return result; + } + + /** + * Filters map by keys NOT matching pattern (inverse of {@link #filterByKey}). + * + *

Example: + * + *

{@code
+   * Pattern tmpPattern = Pattern.compile("tmp_.*");
+   * Map data = Map.of("tmp_cache", "...", "prod_data", "...");
+   * Map permanent = tmpPattern.filterNotByKey(data);
+   * // permanent = {"prod_data": "..."}
+   * }
+ * + * @param value type + * @param inputs map where keys are tested + * @return new HashMap with entries whose keys do NOT match + * @throws NullPointerException if inputs is null + * @see #filterByKey inverse + * @since 1.0.0 + */ + public java.util.Map filterNotByKey(java.util.Map inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.HashMap<>(); + } + + java.util.List> entries = + new java.util.ArrayList<>(inputs.entrySet()); + String[] keys = new String[entries.size()]; + for (int i = 0; i < entries.size(); i++) { + keys[i] = entries.get(i).getKey(); + } + + boolean[] matches = matchAll(keys); + + java.util.Map result = new java.util.HashMap<>(); + for (int i = 0; i < entries.size(); i++) { + if (!matches[i]) { // Inverted + java.util.Map.Entry entry = entries.get(i); + result.put(entry.getKey(), entry.getValue()); + } + } + + return result; + } + + /** + * Filters map by values NOT matching pattern (inverse of {@link #filterByValue}). + * + *

Example - Exclude error statuses: + * + *

{@code
+   * Pattern errorPattern = Pattern.compile("error|failed|timeout");
+   * Map jobStatuses = Map.of("job1", "success", "job2", "error", "job3", "complete");
+   * Map successful = errorPattern.filterNotByValue(jobStatuses);
+   * // successful = {"job1": "success", "job3": "complete"}
+   * }
+ * + * @param key type + * @param inputs map where values are tested + * @return new HashMap with entries whose values do NOT match + * @throws NullPointerException if inputs is null + * @see #filterByValue inverse + * @since 1.0.0 + */ + public java.util.Map filterNotByValue(java.util.Map inputs) { + Objects.requireNonNull(inputs, "inputs cannot be null"); + + if (inputs.isEmpty()) { + return new java.util.HashMap<>(); + } + + java.util.List> entries = + new java.util.ArrayList<>(inputs.entrySet()); + String[] values = new String[entries.size()]; + for (int i = 0; i < entries.size(); i++) { + values[i] = entries.get(i).getValue(); + } + + boolean[] matches = matchAll(values); + + java.util.Map result = new java.util.HashMap<>(); + for (int i = 0; i < entries.size(); i++) { + if (!matches[i]) { // Inverted + java.util.Map.Entry entry = entries.get(i); + result.put(entry.getKey(), entry.getValue()); + } + } + + return result; + } + + /** + * Removes entries where keys don't match (in-place map mutation). + * + *

MUTATES INPUT: Keeps only entries whose keys match the pattern. + * + *

Example: + * + *

{@code
+   * Pattern userPattern = Pattern.compile("user_\\d+");
+   * Map cache = new HashMap<>(Map.of("user_123", obj1, "sys_config", obj2));
+   * int removed = userPattern.retainMatchesByKey(cache);
+   * // removed = 1, cache = {"user_123": obj1}
+   * }
+ * + * @param value type + * @param map mutable map to filter by keys + * @return number of entries removed + * @throws NullPointerException if map is null + * @throws UnsupportedOperationException if map is immutable + * @see #filterByKey non-mutating variant + * @since 1.0.0 + */ + public int retainMatchesByKey(java.util.Map map) { + Objects.requireNonNull(map, "map cannot be null"); + + if (map.isEmpty()) { + return 0; + } + + String[] keys = map.keySet().toArray(new String[0]); + boolean[] matches = matchAll(keys); + + int removed = 0; + java.util.Iterator> it = map.entrySet().iterator(); + int i = 0; + while (it.hasNext()) { + it.next(); + if (!matches[i++]) { + it.remove(); + removed++; + } + } + + return removed; + } + + /** + * Removes entries where values don't match (in-place map mutation). + * + *

MUTATES INPUT: Keeps only entries whose values match. + * + *

Example: + * + *

{@code
+   * Pattern activePattern = Pattern.compile("active|online");
+   * Map servers = new HashMap<>(Map.of("web", "active", "db", "offline"));
+   * userPattern.retainMatchesByValue(servers);
+   * // servers = {"web": "active"}
+   * }
+ * + * @param key type + * @param map mutable map to filter by values + * @return number of entries removed + * @throws NullPointerException if map is null + * @throws UnsupportedOperationException if map is immutable + * @see #filterByValue non-mutating variant + * @since 1.0.0 + */ + public int retainMatchesByValue(java.util.Map map) { + Objects.requireNonNull(map, "map cannot be null"); + + if (map.isEmpty()) { + return 0; + } + + // Extract values maintaining entry order + java.util.List> entries = + new java.util.ArrayList<>(map.entrySet()); + String[] values = new String[entries.size()]; + for (int i = 0; i < entries.size(); i++) { + values[i] = entries.get(i).getValue(); + } + + boolean[] matches = matchAll(values); + + int removed = 0; + java.util.Iterator> it = map.entrySet().iterator(); + int i = 0; + while (it.hasNext()) { + it.next(); + if (!matches[i++]) { + it.remove(); + removed++; + } + } + + return removed; + } + + /** + * Removes entries where keys match (in-place, inverse of {@link #retainMatchesByKey}). + * + *

MUTATES INPUT: Removes entries whose keys match the pattern. + * + *

Example - Remove temporary cache entries: + * + *

{@code
+   * Pattern tmpPattern = Pattern.compile("tmp_.*");
+   * Map cache = new HashMap<>(Map.of("tmp_123", data1, "perm_456", data2));
+   * tmpPattern.removeMatchesByKey(cache);
+   * // cache = {"perm_456": data2}
+   * }
+ * + * @param value type + * @param map mutable map to filter by keys + * @return number of entries removed + * @throws NullPointerException if map is null + * @throws UnsupportedOperationException if map is immutable + * @see #filterNotByKey non-mutating variant + * @since 1.0.0 + */ + public int removeMatchesByKey(java.util.Map map) { + Objects.requireNonNull(map, "map cannot be null"); + + if (map.isEmpty()) { + return 0; + } + + String[] keys = map.keySet().toArray(new String[0]); + boolean[] matches = matchAll(keys); + + int removed = 0; + java.util.Iterator> it = map.entrySet().iterator(); + int i = 0; + while (it.hasNext()) { + it.next(); + if (matches[i++]) { // Inverted - remove if MATCHES + it.remove(); + removed++; + } + } + + return removed; + } + + /** + * Removes entries where values match (in-place, inverse of {@link #retainMatchesByValue}). + * + *

MUTATES INPUT: Removes entries whose values match the pattern. + * + *

Example - Remove failed jobs: + * + *

{@code
+   * Pattern failedPattern = Pattern.compile("failed|error|timeout");
+   * Map jobs = new HashMap<>(Map.of(1, "success", 2, "failed", 3, "complete"));
+   * failedPattern.removeMatchesByValue(jobs);
+   * // jobs = {1: "success", 3: "complete"}
+   * }
+ * + * @param key type + * @param map mutable map to filter by values + * @return number of entries removed + * @throws NullPointerException if map is null + * @throws UnsupportedOperationException if map is immutable + * @see #filterNotByValue non-mutating variant + * @since 1.0.0 + */ + public int removeMatchesByValue(java.util.Map map) { + Objects.requireNonNull(map, "map cannot be null"); + + if (map.isEmpty()) { + return 0; + } + + java.util.List> entries = + new java.util.ArrayList<>(map.entrySet()); + String[] values = new String[entries.size()]; + for (int i = 0; i < entries.size(); i++) { + values[i] = entries.get(i).getValue(); + } + + boolean[] matches = matchAll(values); + + int removed = 0; + java.util.Iterator> it = map.entrySet().iterator(); + int i = 0; + while (it.hasNext()) { + it.next(); + if (matches[i++]) { // Inverted - remove if MATCHES + it.remove(); + removed++; + } + } + + return removed; + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/PatternCompilationException.java b/libre2-core/src/main/java/com/axonops/libre2/api/PatternCompilationException.java index ca8a2a5..9cbeeb5 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/PatternCompilationException.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/PatternCompilationException.java @@ -23,18 +23,18 @@ */ public final class PatternCompilationException extends RE2Exception { - private final String pattern; + private final String pattern; - public PatternCompilationException(String pattern, String message) { - super("RE2: Pattern compilation failed: " + message + " (pattern: " + truncate(pattern) + ")"); - this.pattern = pattern; - } + public PatternCompilationException(String pattern, String message) { + super("RE2: Pattern compilation failed: " + message + " (pattern: " + truncate(pattern) + ")"); + this.pattern = pattern; + } - public String getPattern() { - return pattern; - } + public String getPattern() { + return pattern; + } - private static String truncate(String s) { - return s != null && s.length() > 100 ? s.substring(0, 97) + "..." : s; - } + private static String truncate(String s) { + return s != null && s.length() > 100 ? s.substring(0, 97) + "..." : s; + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/RE2.java b/libre2-core/src/main/java/com/axonops/libre2/api/RE2.java index f68dc20..53e42ce 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/RE2.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/RE2.java @@ -19,322 +19,327 @@ /** * Main entry point for RE2 regex operations. * - * Thread-safe: All methods can be called concurrently from multiple threads. + *

Thread-safe: All methods can be called concurrently from multiple threads. * * @since 1.0.0 */ public final class RE2 { - private RE2() { - // Utility class + private RE2() { + // Utility class + } + + public static Pattern compile(String pattern) { + return Pattern.compile(pattern); + } + + public static Pattern compile(String pattern, boolean caseSensitive) { + return Pattern.compile(pattern, caseSensitive); + } + + // ========== String Matching Operations ========== + + /** + * Tests if the entire input matches the pattern (full match). + * + * @param pattern regex pattern + * @param input input string + * @return true if entire input matches, false otherwise + */ + public static boolean matches(String pattern, String input) { + try (Pattern p = compile(pattern)) { + return p.matches(input); } - - public static Pattern compile(String pattern) { - return Pattern.compile(pattern); - } - - public static Pattern compile(String pattern, boolean caseSensitive) { - return Pattern.compile(pattern, caseSensitive); - } - - // ========== String Matching Operations ========== - - /** - * Tests if the entire input matches the pattern (full match). - * - * @param pattern regex pattern - * @param input input string - * @return true if entire input matches, false otherwise - */ - public static boolean matches(String pattern, String input) { - try (Pattern p = compile(pattern)) { - return p.matches(input); - } - } - - /** - * Full match with capture groups. - * - * @param pattern regex pattern - * @param input input string - * @return MatchResult with capture groups (use try-with-resources) - */ - public static MatchResult match(String pattern, String input) { - Pattern p = compile(pattern); - return p.match(input); - } - - /** - * Finds first match with capture groups. - * - * @param pattern regex pattern - * @param input input string - * @return MatchResult with capture groups (use try-with-resources) - */ - public static MatchResult findFirst(String pattern, String input) { - Pattern p = compile(pattern); - return p.find(input); - } - - /** - * Finds all matches with capture groups. - * - * @param pattern regex pattern - * @param input input string - * @return list of MatchResults (remember to close each) - */ - public static java.util.List findAll(String pattern, String input) { - Pattern p = compile(pattern); - return p.findAll(input); - } - - // ========== Bulk Operations ========== - - /** - * Tests multiple inputs against pattern (bulk full match). - * - * @param pattern regex pattern - * @param inputs array of input strings - * @return boolean array (parallel to inputs) - */ - public static boolean[] matchAll(String pattern, String[] inputs) { - try (Pattern p = compile(pattern)) { - return p.matchAll(inputs); - } - } - - /** - * Tests multiple inputs against pattern (bulk full match). - * - * @param pattern regex pattern - * @param inputs collection of input strings - * @return boolean array (parallel to inputs) - */ - public static boolean[] matchAll(String pattern, java.util.Collection inputs) { - try (Pattern p = compile(pattern)) { - return p.matchAll(inputs); - } - } - - /** - * Full match multiple inputs with capture groups (bulk operation). - * - * @param pattern regex pattern - * @param inputs array of input strings - * @return array of MatchResults (parallel to inputs, remember to close each) - */ - public static MatchResult[] matchAllWithGroups(String pattern, String[] inputs) { - Pattern p = compile(pattern); - return p.matchAllWithGroups(inputs); - } - - /** - * Full match multiple inputs with capture groups (bulk operation). - * - * @param pattern regex pattern - * @param inputs collection of input strings - * @return array of MatchResults (parallel to inputs, remember to close each) - */ - public static MatchResult[] matchAllWithGroups(String pattern, java.util.Collection inputs) { - Pattern p = compile(pattern); - return p.matchAllWithGroups(inputs); + } + + /** + * Full match with capture groups. + * + * @param pattern regex pattern + * @param input input string + * @return MatchResult with capture groups (use try-with-resources) + */ + public static MatchResult match(String pattern, String input) { + Pattern p = compile(pattern); + return p.match(input); + } + + /** + * Finds first match with capture groups. + * + * @param pattern regex pattern + * @param input input string + * @return MatchResult with capture groups (use try-with-resources) + */ + public static MatchResult findFirst(String pattern, String input) { + Pattern p = compile(pattern); + return p.find(input); + } + + /** + * Finds all matches with capture groups. + * + * @param pattern regex pattern + * @param input input string + * @return list of MatchResults (remember to close each) + */ + public static java.util.List findAll(String pattern, String input) { + Pattern p = compile(pattern); + return p.findAll(input); + } + + // ========== Bulk Operations ========== + + /** + * Tests multiple inputs against pattern (bulk full match). + * + * @param pattern regex pattern + * @param inputs array of input strings + * @return boolean array (parallel to inputs) + */ + public static boolean[] matchAll(String pattern, String[] inputs) { + try (Pattern p = compile(pattern)) { + return p.matchAll(inputs); } - - /** - * Searches for pattern in multiple inputs (bulk partial match). - * - * @param pattern regex pattern - * @param inputs array of input strings - * @return boolean array (parallel to inputs) - */ - public static boolean[] findAll(String pattern, String[] inputs) { - try (Pattern p = compile(pattern)) { - return p.findAll(inputs); - } + } + + /** + * Tests multiple inputs against pattern (bulk full match). + * + * @param pattern regex pattern + * @param inputs collection of input strings + * @return boolean array (parallel to inputs) + */ + public static boolean[] matchAll(String pattern, java.util.Collection inputs) { + try (Pattern p = compile(pattern)) { + return p.matchAll(inputs); } - - /** - * Filters collection to only strings matching the pattern. - * - * @param pattern regex pattern - * @param inputs collection to filter - * @return new list containing only matching strings - */ - public static java.util.List filter(String pattern, java.util.Collection inputs) { - try (Pattern p = compile(pattern)) { - return p.filter(inputs); - } - } - - /** - * Filters collection to only strings NOT matching the pattern. - * - * @param pattern regex pattern - * @param inputs collection to filter - * @return new list containing only non-matching strings - */ - public static java.util.List filterNot(String pattern, java.util.Collection inputs) { - try (Pattern p = compile(pattern)) { - return p.filterNot(inputs); - } - } - - // ========== Replace Operations ========== - - /** - * Replaces first match of pattern in input. - * - * @param pattern regex pattern - * @param input input string - * @param replacement replacement string (supports \\1, \\2 backreferences) - * @return input with first match replaced - */ - public static String replaceFirst(String pattern, String input, String replacement) { - try (Pattern p = compile(pattern)) { - return p.replaceFirst(input, replacement); - } - } - - /** - * Replaces all matches of pattern in input. - * - * @param pattern regex pattern - * @param input input string - * @param replacement replacement string (supports \\1, \\2 backreferences) - * @return input with all matches replaced - */ - public static String replaceAll(String pattern, String input, String replacement) { - try (Pattern p = compile(pattern)) { - return p.replaceAll(input, replacement); - } + } + + /** + * Full match multiple inputs with capture groups (bulk operation). + * + * @param pattern regex pattern + * @param inputs array of input strings + * @return array of MatchResults (parallel to inputs, remember to close each) + */ + public static MatchResult[] matchAllWithGroups(String pattern, String[] inputs) { + Pattern p = compile(pattern); + return p.matchAllWithGroups(inputs); + } + + /** + * Full match multiple inputs with capture groups (bulk operation). + * + * @param pattern regex pattern + * @param inputs collection of input strings + * @return array of MatchResults (parallel to inputs, remember to close each) + */ + public static MatchResult[] matchAllWithGroups( + String pattern, java.util.Collection inputs) { + Pattern p = compile(pattern); + return p.matchAllWithGroups(inputs); + } + + /** + * Searches for pattern in multiple inputs (bulk partial match). + * + * @param pattern regex pattern + * @param inputs array of input strings + * @return boolean array (parallel to inputs) + */ + public static boolean[] findAll(String pattern, String[] inputs) { + try (Pattern p = compile(pattern)) { + return p.findAll(inputs); } - - /** - * Replaces all matches in multiple strings (bulk operation). - * - * @param pattern regex pattern - * @param inputs array of input strings - * @param replacement replacement string (supports backreferences) - * @return array of strings with matches replaced (parallel to inputs) - */ - public static String[] replaceAll(String pattern, String[] inputs, String replacement) { - try (Pattern p = compile(pattern)) { - return p.replaceAll(inputs, replacement); - } + } + + /** + * Filters collection to only strings matching the pattern. + * + * @param pattern regex pattern + * @param inputs collection to filter + * @return new list containing only matching strings + */ + public static java.util.List filter(String pattern, java.util.Collection inputs) { + try (Pattern p = compile(pattern)) { + return p.filter(inputs); } - - /** - * Replaces all matches in a collection (bulk operation). - * - * @param pattern regex pattern - * @param inputs collection of input strings - * @param replacement replacement string (supports backreferences) - * @return list of strings with matches replaced (same order) - */ - public static java.util.List replaceAll(String pattern, java.util.Collection inputs, String replacement) { - try (Pattern p = compile(pattern)) { - return p.replaceAll(inputs, replacement); - } + } + + /** + * Filters collection to only strings NOT matching the pattern. + * + * @param pattern regex pattern + * @param inputs collection to filter + * @return new list containing only non-matching strings + */ + public static java.util.List filterNot( + String pattern, java.util.Collection inputs) { + try (Pattern p = compile(pattern)) { + return p.filterNot(inputs); } - - // ========== ByteBuffer Operations ========== - - /** - * Tests if ByteBuffer matches pattern (full match, zero-copy if direct). - * - * @param pattern regex pattern - * @param input ByteBuffer containing UTF-8 text - * @return true if entire buffer matches - */ - public static boolean matches(String pattern, java.nio.ByteBuffer input) { - try (Pattern p = compile(pattern)) { - return p.matches(input); - } + } + + // ========== Replace Operations ========== + + /** + * Replaces first match of pattern in input. + * + * @param pattern regex pattern + * @param input input string + * @param replacement replacement string (supports \\1, \\2 backreferences) + * @return input with first match replaced + */ + public static String replaceFirst(String pattern, String input, String replacement) { + try (Pattern p = compile(pattern)) { + return p.replaceFirst(input, replacement); } - - /** - * Full match with capture groups from ByteBuffer (zero-copy if direct). - * - * @param pattern regex pattern - * @param input ByteBuffer containing UTF-8 text - * @return MatchResult with capture groups (use try-with-resources) - */ - public static MatchResult matchWithGroups(String pattern, java.nio.ByteBuffer input) { - Pattern p = compile(pattern); - return p.matchWithGroups(input); + } + + /** + * Replaces all matches of pattern in input. + * + * @param pattern regex pattern + * @param input input string + * @param replacement replacement string (supports \\1, \\2 backreferences) + * @return input with all matches replaced + */ + public static String replaceAll(String pattern, String input, String replacement) { + try (Pattern p = compile(pattern)) { + return p.replaceAll(input, replacement); } - - /** - * Finds first match with capture groups from ByteBuffer (zero-copy if direct). - * - * @param pattern regex pattern - * @param input ByteBuffer containing UTF-8 text - * @return MatchResult with capture groups (use try-with-resources) - */ - public static MatchResult findWithGroups(String pattern, java.nio.ByteBuffer input) { - Pattern p = compile(pattern); - return p.findWithGroups(input); + } + + /** + * Replaces all matches in multiple strings (bulk operation). + * + * @param pattern regex pattern + * @param inputs array of input strings + * @param replacement replacement string (supports backreferences) + * @return array of strings with matches replaced (parallel to inputs) + */ + public static String[] replaceAll(String pattern, String[] inputs, String replacement) { + try (Pattern p = compile(pattern)) { + return p.replaceAll(inputs, replacement); } - - /** - * Finds all matches with capture groups from ByteBuffer (zero-copy if direct). - * - * @param pattern regex pattern - * @param input ByteBuffer containing UTF-8 text - * @return list of MatchResults (remember to close each) - */ - public static java.util.List findAllWithGroups(String pattern, java.nio.ByteBuffer input) { - Pattern p = compile(pattern); - return p.findAllWithGroups(input); + } + + /** + * Replaces all matches in a collection (bulk operation). + * + * @param pattern regex pattern + * @param inputs collection of input strings + * @param replacement replacement string (supports backreferences) + * @return list of strings with matches replaced (same order) + */ + public static java.util.List replaceAll( + String pattern, java.util.Collection inputs, String replacement) { + try (Pattern p = compile(pattern)) { + return p.replaceAll(inputs, replacement); } - - // ========== Utility Operations ========== - - /** - * Escapes special regex characters for literal matching. - * - *

Converts a literal string into a regex pattern that matches that exact string. - * Special characters like . * + ? ( ) [ ] { } ^ $ | \ are escaped.

- * - *

Example:

- *
{@code
-     * String literal = "price: $9.99";
-     * String escaped = RE2.quoteMeta(literal);
-     * Pattern p = Pattern.compile(escaped);
-     * boolean matches = p.matches("price: $9.99");  // true
-     * }
- * - * @param text literal text to escape - * @return escaped pattern that matches the literal text exactly - * @throws NullPointerException if text is null - */ - public static String quoteMeta(String text) { - return Pattern.quoteMeta(text); + } + + // ========== ByteBuffer Operations ========== + + /** + * Tests if ByteBuffer matches pattern (full match, zero-copy if direct). + * + * @param pattern regex pattern + * @param input ByteBuffer containing UTF-8 text + * @return true if entire buffer matches + */ + public static boolean matches(String pattern, java.nio.ByteBuffer input) { + try (Pattern p = compile(pattern)) { + return p.matches(input); } - - /** - * Gets the DFA fanout for a pattern. - * - *

Analyzes pattern complexity by returning DFA state transition counts.

- * - * @param pattern regex pattern to analyze - * @return array of fanout values (complexity metric) - */ - public static int[] getProgramFanout(String pattern) { - try (Pattern p = compile(pattern)) { - return p.getProgramFanout(); - } + } + + /** + * Full match with capture groups from ByteBuffer (zero-copy if direct). + * + * @param pattern regex pattern + * @param input ByteBuffer containing UTF-8 text + * @return MatchResult with capture groups (use try-with-resources) + */ + public static MatchResult matchWithGroups(String pattern, java.nio.ByteBuffer input) { + Pattern p = compile(pattern); + return p.matchWithGroups(input); + } + + /** + * Finds first match with capture groups from ByteBuffer (zero-copy if direct). + * + * @param pattern regex pattern + * @param input ByteBuffer containing UTF-8 text + * @return MatchResult with capture groups (use try-with-resources) + */ + public static MatchResult findWithGroups(String pattern, java.nio.ByteBuffer input) { + Pattern p = compile(pattern); + return p.findWithGroups(input); + } + + /** + * Finds all matches with capture groups from ByteBuffer (zero-copy if direct). + * + * @param pattern regex pattern + * @param input ByteBuffer containing UTF-8 text + * @return list of MatchResults (remember to close each) + */ + public static java.util.List findAllWithGroups( + String pattern, java.nio.ByteBuffer input) { + Pattern p = compile(pattern); + return p.findAllWithGroups(input); + } + + // ========== Utility Operations ========== + + /** + * Escapes special regex characters for literal matching. + * + *

Converts a literal string into a regex pattern that matches that exact string. Special + * characters like . * + ? ( ) [ ] { } ^ $ | \ are escaped. + * + *

Example: + * + *

{@code
+   * String literal = "price: $9.99";
+   * String escaped = RE2.quoteMeta(literal);
+   * Pattern p = Pattern.compile(escaped);
+   * boolean matches = p.matches("price: $9.99");  // true
+   * }
+ * + * @param text literal text to escape + * @return escaped pattern that matches the literal text exactly + * @throws NullPointerException if text is null + */ + public static String quoteMeta(String text) { + return Pattern.quoteMeta(text); + } + + /** + * Gets the DFA fanout for a pattern. + * + *

Analyzes pattern complexity by returning DFA state transition counts. + * + * @param pattern regex pattern to analyze + * @return array of fanout values (complexity metric) + */ + public static int[] getProgramFanout(String pattern) { + try (Pattern p = compile(pattern)) { + return p.getProgramFanout(); } - - /** - * Gets the native memory size of a compiled pattern. - * - * @param pattern regex pattern to analyze - * @return size in bytes of compiled DFA/NFA program - */ - public static long getProgramSize(String pattern) { - try (Pattern p = compile(pattern)) { - return p.getNativeMemoryBytes(); - } + } + + /** + * Gets the native memory size of a compiled pattern. + * + * @param pattern regex pattern to analyze + * @return size in bytes of compiled DFA/NFA program + */ + public static long getProgramSize(String pattern) { + try (Pattern p = compile(pattern)) { + return p.getNativeMemoryBytes(); } + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/RE2Exception.java b/libre2-core/src/main/java/com/axonops/libre2/api/RE2Exception.java index 4f4afcd..7d0f795 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/RE2Exception.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/RE2Exception.java @@ -19,21 +19,21 @@ /** * Base exception for all RE2-related errors. * - * Sealed class ensuring exhaustive handling of all error types. + *

Sealed class ensuring exhaustive handling of all error types. * * @since 1.0.0 */ public sealed class RE2Exception extends RuntimeException permits PatternCompilationException, - NativeLibraryException, - RE2TimeoutException, - ResourceException { + NativeLibraryException, + RE2TimeoutException, + ResourceException { - public RE2Exception(String message) { - super(message); - } + public RE2Exception(String message) { + super(message); + } - public RE2Exception(String message, Throwable cause) { - super(message, cause); - } + public RE2Exception(String message, Throwable cause) { + super(message, cause); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/RE2TimeoutException.java b/libre2-core/src/main/java/com/axonops/libre2/api/RE2TimeoutException.java index 8e7cd49..2312d44 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/RE2TimeoutException.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/RE2TimeoutException.java @@ -23,14 +23,14 @@ */ public final class RE2TimeoutException extends RE2Exception { - private final long timeoutMillis; + private final long timeoutMillis; - public RE2TimeoutException(long timeoutMillis) { - super("RE2: Operation timed out after " + timeoutMillis + " ms"); - this.timeoutMillis = timeoutMillis; - } + public RE2TimeoutException(long timeoutMillis) { + super("RE2: Operation timed out after " + timeoutMillis + " ms"); + this.timeoutMillis = timeoutMillis; + } - public long getTimeoutMillis() { - return timeoutMillis; - } + public long getTimeoutMillis() { + return timeoutMillis; + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/api/ResourceException.java b/libre2-core/src/main/java/com/axonops/libre2/api/ResourceException.java index fe62494..630a4ac 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/api/ResourceException.java +++ b/libre2-core/src/main/java/com/axonops/libre2/api/ResourceException.java @@ -23,11 +23,11 @@ */ public final class ResourceException extends RE2Exception { - public ResourceException(String message) { - super("RE2: Resource error: " + message); - } + public ResourceException(String message) { + super("RE2: Resource error: " + message); + } - public ResourceException(String message, Throwable cause) { - super("RE2: Resource error: " + message, cause); - } + public ResourceException(String message, Throwable cause) { + super("RE2: Resource error: " + message, cause); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/cache/CacheStatistics.java b/libre2-core/src/main/java/com/axonops/libre2/cache/CacheStatistics.java index 28b8cfa..c25d03f 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/cache/CacheStatistics.java +++ b/libre2-core/src/main/java/com/axonops/libre2/cache/CacheStatistics.java @@ -19,7 +19,7 @@ /** * Cache statistics for monitoring and metrics. * - * Immutable snapshot of cache state at a point in time. + *

Immutable snapshot of cache state at a point in time. * * @since 1.0.0 */ @@ -34,56 +34,49 @@ public record CacheStatistics( int deferredCleanupSize, long nativeMemoryBytes, long peakNativeMemoryBytes, - long invalidPatternRecompilations -) { + long invalidPatternRecompilations) { - /** - * Calculates hit rate. - * - * @return hit rate between 0.0 and 1.0, or 0.0 if no requests - */ - public double hitRate() { - long total = hits + misses; - return total == 0 ? 0.0 : (double) hits / total; - } + /** + * Calculates hit rate. + * + * @return hit rate between 0.0 and 1.0, or 0.0 if no requests + */ + public double hitRate() { + long total = hits + misses; + return total == 0 ? 0.0 : (double) hits / total; + } - /** - * Calculates miss rate. - * - * @return miss rate between 0.0 and 1.0, or 0.0 if no requests - */ - public double missRate() { - long total = hits + misses; - return total == 0 ? 0.0 : (double) misses / total; - } + /** + * Calculates miss rate. + * + * @return miss rate between 0.0 and 1.0, or 0.0 if no requests + */ + public double missRate() { + long total = hits + misses; + return total == 0 ? 0.0 : (double) misses / total; + } - /** - * Total number of evictions (LRU + idle + deferred cleanup). - */ - public long totalEvictions() { - return evictionsLRU + evictionsIdle; - } + /** Total number of evictions (LRU + idle + deferred cleanup). */ + public long totalEvictions() { + return evictionsLRU + evictionsIdle; + } - /** - * Total patterns pending deferred cleanup (evicted but not yet freed). - */ - public int deferredCleanupPending() { - return deferredCleanupSize; - } + /** Total patterns pending deferred cleanup (evicted but not yet freed). */ + public int deferredCleanupPending() { + return deferredCleanupSize; + } - /** - * Total number of requests (hits + misses). - */ - public long totalRequests() { - return hits + misses; - } + /** Total number of requests (hits + misses). */ + public long totalRequests() { + return hits + misses; + } - /** - * Cache utilization percentage. - * - * @return utilization between 0.0 and 1.0 - */ - public double utilization() { - return maxSize == 0 ? 0.0 : (double) currentSize / maxSize; - } + /** + * Cache utilization percentage. + * + * @return utilization between 0.0 and 1.0 + */ + public double utilization() { + return maxSize == 0 ? 0.0 : (double) currentSize / maxSize; + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/cache/IdleEvictionTask.java b/libre2-core/src/main/java/com/axonops/libre2/cache/IdleEvictionTask.java index 0ff256b..90a65a7 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/cache/IdleEvictionTask.java +++ b/libre2-core/src/main/java/com/axonops/libre2/cache/IdleEvictionTask.java @@ -16,119 +16,114 @@ package com.axonops.libre2.cache; +import java.util.concurrent.atomic.AtomicBoolean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.concurrent.atomic.AtomicBoolean; - /** * Background thread that periodically evicts idle patterns from cache. * - * Runs as daemon thread at low priority to avoid interfering with - * query execution in Cassandra. + *

Runs as daemon thread at low priority to avoid interfering with query execution in Cassandra. * * @since 1.0.0 */ final class IdleEvictionTask { - private static final Logger logger = LoggerFactory.getLogger(IdleEvictionTask.class); - - private final PatternCache cache; - private final RE2Config config; - private final AtomicBoolean running = new AtomicBoolean(false); - private volatile Thread thread; - - IdleEvictionTask(PatternCache cache, RE2Config config) { - this.cache = cache; - this.config = config; + private static final Logger logger = LoggerFactory.getLogger(IdleEvictionTask.class); + + private final PatternCache cache; + private final RE2Config config; + private final AtomicBoolean running = new AtomicBoolean(false); + private volatile Thread thread; + + IdleEvictionTask(PatternCache cache, RE2Config config) { + this.cache = cache; + this.config = config; + } + + /** Starts the eviction thread. */ + void start() { + if (running.compareAndSet(false, true)) { + thread = new Thread(this::run, "RE2-IdleEviction"); + thread.setDaemon(true); + thread.setPriority(Thread.MIN_PRIORITY); // Low priority - don't interfere with queries + thread.start(); + + logger.debug( + "RE2: Idle eviction thread started - interval: {}s", + config.evictionScanIntervalSeconds()); } - - /** - * Starts the eviction thread. - */ - void start() { - if (running.compareAndSet(false, true)) { - thread = new Thread(this::run, "RE2-IdleEviction"); - thread.setDaemon(true); - thread.setPriority(Thread.MIN_PRIORITY); // Low priority - don't interfere with queries - thread.start(); - - logger.debug("RE2: Idle eviction thread started - interval: {}s", - config.evictionScanIntervalSeconds()); + } + + /** Stops the eviction thread gracefully. */ + void stop() { + if (running.compareAndSet(true, false)) { + logger.info("RE2: Stopping idle eviction thread"); + + Thread t = thread; + if (t != null) { + t.interrupt(); + try { + t.join(5000); // Wait up to 5 seconds + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); } - } + } - /** - * Stops the eviction thread gracefully. - */ - void stop() { - if (running.compareAndSet(true, false)) { - logger.info("RE2: Stopping idle eviction thread"); - - Thread t = thread; - if (t != null) { - t.interrupt(); - try { - t.join(5000); // Wait up to 5 seconds - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - - logger.info("RE2: Idle eviction thread stopped"); - } + logger.info("RE2: Idle eviction thread stopped"); } - - /** - * Main eviction loop. - * - * Wakes frequently (every 5s) to cleanup deferred patterns. - * Runs idle eviction less frequently (every 60s by default). - */ - private void run() { - logger.debug("RE2: Idle eviction thread initialized - idleScanInterval: {}s, deferredCleanupInterval: {}s", - config.evictionScanIntervalSeconds(), config.deferredCleanupIntervalSeconds()); - - long lastIdleScan = System.currentTimeMillis(); - long idleScanIntervalMs = config.evictionScanIntervalSeconds() * 1000; - long deferredCleanupIntervalMs = config.deferredCleanupIntervalSeconds() * 1000; - - while (running.get()) { - try { - // Sleep for deferred cleanup interval (5 seconds) - Thread.sleep(deferredCleanupIntervalMs); - - long now = System.currentTimeMillis(); - - // Check if time for idle eviction scan - if (now - lastIdleScan >= idleScanIntervalMs) { - // Full scan: idle eviction + deferred cleanup - int evicted = cache.evictIdlePatterns(); - logger.debug("RE2: Idle eviction scan complete - evicted: {}", evicted); - lastIdleScan = now; - } else { - // Quick scan: just deferred cleanup - int cleaned = cache.cleanupDeferredPatterns(); - if (cleaned > 0) { - logger.debug("RE2: Deferred cleanup - freed {} patterns", cleaned); - } - } - - } catch (InterruptedException e) { - logger.debug("RE2: Idle eviction thread interrupted"); - break; - } catch (Exception e) { - logger.error("RE2: Error in idle eviction thread", e); - // Continue running despite errors - } + } + + /** + * Main eviction loop. + * + *

Wakes frequently (every 5s) to cleanup deferred patterns. Runs idle eviction less frequently + * (every 60s by default). + */ + private void run() { + logger.debug( + "RE2: Idle eviction thread initialized - idleScanInterval: {}s, deferredCleanupInterval: {}s", + config.evictionScanIntervalSeconds(), + config.deferredCleanupIntervalSeconds()); + + long lastIdleScan = System.currentTimeMillis(); + long idleScanIntervalMs = config.evictionScanIntervalSeconds() * 1000; + long deferredCleanupIntervalMs = config.deferredCleanupIntervalSeconds() * 1000; + + while (running.get()) { + try { + // Sleep for deferred cleanup interval (5 seconds) + Thread.sleep(deferredCleanupIntervalMs); + + long now = System.currentTimeMillis(); + + // Check if time for idle eviction scan + if (now - lastIdleScan >= idleScanIntervalMs) { + // Full scan: idle eviction + deferred cleanup + int evicted = cache.evictIdlePatterns(); + logger.debug("RE2: Idle eviction scan complete - evicted: {}", evicted); + lastIdleScan = now; + } else { + // Quick scan: just deferred cleanup + int cleaned = cache.cleanupDeferredPatterns(); + if (cleaned > 0) { + logger.debug("RE2: Deferred cleanup - freed {} patterns", cleaned); + } } - logger.debug("RE2: Idle eviction thread exiting"); + } catch (InterruptedException e) { + logger.debug("RE2: Idle eviction thread interrupted"); + break; + } catch (Exception e) { + logger.error("RE2: Error in idle eviction thread", e); + // Continue running despite errors + } } - /** - * Checks if eviction thread is running. - */ - boolean isRunning() { - return running.get() && thread != null && thread.isAlive(); - } + logger.debug("RE2: Idle eviction thread exiting"); + } + + /** Checks if eviction thread is running. */ + boolean isRunning() { + return running.get() && thread != null && thread.isAlive(); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/cache/PatternCache.java b/libre2-core/src/main/java/com/axonops/libre2/cache/PatternCache.java index 20f56b2..5460f12 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/cache/PatternCache.java +++ b/libre2-core/src/main/java/com/axonops/libre2/cache/PatternCache.java @@ -17,12 +17,9 @@ package com.axonops.libre2.cache; import com.axonops.libre2.api.Pattern; -import com.axonops.libre2.metrics.RE2MetricsRegistry; import com.axonops.libre2.metrics.MetricNames; +import com.axonops.libre2.metrics.RE2MetricsRegistry; import com.axonops.libre2.util.PatternHasher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.util.Comparator; import java.util.List; import java.util.Map; @@ -33,659 +30,678 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * High-performance thread-safe cache for compiled patterns with dual eviction. * - * Eviction strategies: - * 1. LRU (soft limit): When cache exceeds max size, async evict least recently used - * 2. Idle time: Background thread evicts patterns idle beyond timeout + *

Eviction strategies: 1. LRU (soft limit): When cache exceeds max size, async evict least + * recently used 2. Idle time: Background thread evicts patterns idle beyond timeout * - * Performance characteristics: - * - Lock-free cache reads (ConcurrentHashMap) - * - Lock-free timestamp updates (AtomicLong) - * - Non-blocking eviction (async LRU, concurrent idle scan) - * - Soft limits: cache can temporarily exceed maxSize by ~10% + *

Performance characteristics: - Lock-free cache reads (ConcurrentHashMap) - Lock-free timestamp + * updates (AtomicLong) - Non-blocking eviction (async LRU, concurrent idle scan) - Soft limits: + * cache can temporarily exceed maxSize by ~10% * * @since 1.0.0 */ public final class PatternCache { - private static final Logger logger = LoggerFactory.getLogger(PatternCache.class); - - private volatile RE2Config config; - private final com.axonops.libre2.util.ResourceTracker resourceTracker; - - public RE2Config getConfig() { - return config; - } - - public com.axonops.libre2.util.ResourceTracker getResourceTracker() { - return resourceTracker; - } - - // ConcurrentHashMap for lock-free reads/writes - private ConcurrentHashMap cache; - private IdleEvictionTask evictionTask; - - // Single-thread executor for async LRU eviction (doesn't block cache access) - private ExecutorService lruEvictionExecutor; - - // Deferred cleanup: Patterns evicted from cache but still in use (refCount > 0) - private final CopyOnWriteArrayList deferredCleanup = new CopyOnWriteArrayList<>(); - - // Statistics (all atomic, lock-free) - private final AtomicLong hits = new AtomicLong(0); - private final AtomicLong misses = new AtomicLong(0); - private final AtomicLong evictionsLRU = new AtomicLong(0); - private final AtomicLong evictionsIdle = new AtomicLong(0); - private final AtomicLong evictionsDeferred = new AtomicLong(0); - - // Native memory tracking (off-heap memory consumed by patterns) - private final AtomicLong totalNativeMemoryBytes = new AtomicLong(0); - private final AtomicLong peakNativeMemoryBytes = new AtomicLong(0); - - // Deferred cleanup tracking - private final AtomicLong deferredNativeMemoryBytes = new AtomicLong(0); - private final AtomicLong peakDeferredNativeMemoryBytes = new AtomicLong(0); - private final AtomicInteger peakDeferredPatternCount = new AtomicInteger(0); - - // Invalid pattern recompilations (defensive check triggered) - private final AtomicLong invalidPatternRecompilations = new AtomicLong(0); - - public PatternCache(RE2Config config) { - this.config = config; - this.resourceTracker = new com.axonops.libre2.util.ResourceTracker(); - - if (config.cacheEnabled()) { - // ConcurrentHashMap for lock-free concurrent access - this.cache = new ConcurrentHashMap<>(config.maxCacheSize()); - - // Single-thread executor for async LRU eviction - this.lruEvictionExecutor = Executors.newSingleThreadExecutor(r -> { + private static final Logger logger = LoggerFactory.getLogger(PatternCache.class); + + private volatile RE2Config config; + private final com.axonops.libre2.util.ResourceTracker resourceTracker; + + public RE2Config getConfig() { + return config; + } + + public com.axonops.libre2.util.ResourceTracker getResourceTracker() { + return resourceTracker; + } + + // ConcurrentHashMap for lock-free reads/writes + private ConcurrentHashMap cache; + private IdleEvictionTask evictionTask; + + // Single-thread executor for async LRU eviction (doesn't block cache access) + private ExecutorService lruEvictionExecutor; + + // Deferred cleanup: Patterns evicted from cache but still in use (refCount > 0) + private final CopyOnWriteArrayList deferredCleanup = new CopyOnWriteArrayList<>(); + + // Statistics (all atomic, lock-free) + private final AtomicLong hits = new AtomicLong(0); + private final AtomicLong misses = new AtomicLong(0); + private final AtomicLong evictionsLRU = new AtomicLong(0); + private final AtomicLong evictionsIdle = new AtomicLong(0); + private final AtomicLong evictionsDeferred = new AtomicLong(0); + + // Native memory tracking (off-heap memory consumed by patterns) + private final AtomicLong totalNativeMemoryBytes = new AtomicLong(0); + private final AtomicLong peakNativeMemoryBytes = new AtomicLong(0); + + // Deferred cleanup tracking + private final AtomicLong deferredNativeMemoryBytes = new AtomicLong(0); + private final AtomicLong peakDeferredNativeMemoryBytes = new AtomicLong(0); + private final AtomicInteger peakDeferredPatternCount = new AtomicInteger(0); + + // Invalid pattern recompilations (defensive check triggered) + private final AtomicLong invalidPatternRecompilations = new AtomicLong(0); + + /** + * Creates a new pattern cache with the given configuration. + * + * @param config the cache configuration + */ + public PatternCache(RE2Config config) { + this.config = config; + this.resourceTracker = new com.axonops.libre2.util.ResourceTracker(); + + if (config.cacheEnabled()) { + // ConcurrentHashMap for lock-free concurrent access + this.cache = new ConcurrentHashMap<>(config.maxCacheSize()); + + // Single-thread executor for async LRU eviction + this.lruEvictionExecutor = + Executors.newSingleThreadExecutor( + r -> { Thread t = new Thread(r, "RE2-LRU-Eviction"); t.setDaemon(true); t.setPriority(Thread.MIN_PRIORITY); return t; - }); - - // Start idle eviction background thread - this.evictionTask = new IdleEvictionTask(this, config); - this.evictionTask.start(); - - // Note: Shutdown hooks not registered to avoid test interference. - // Tests create multiple cache instances and explicit cleanup in tearDown. - // Production applications should call shutdown() explicitly or rely on - // try-with-resources if cache lifecycle is scoped. - - logger.debug("RE2: Pattern cache initialized - maxSize: {}, idleTimeout: {}s, scanInterval: {}s, deferredCleanup: every {}s", - config.maxCacheSize(), - config.idleTimeoutSeconds(), - config.evictionScanIntervalSeconds(), - config.deferredCleanupIntervalSeconds()); - - // Register cache metrics (gauges) - registerCacheMetrics(); - } else { - this.cache = null; - this.evictionTask = null; - this.lruEvictionExecutor = null; - logger.info("RE2: Pattern caching disabled"); - } + }); + + // Start idle eviction background thread + this.evictionTask = new IdleEvictionTask(this, config); + this.evictionTask.start(); + + // Note: Shutdown hooks not registered to avoid test interference. + // Tests create multiple cache instances and explicit cleanup in tearDown. + // Production applications should call shutdown() explicitly or rely on + // try-with-resources if cache lifecycle is scoped. + + logger.debug( + "RE2: Pattern cache initialized - maxSize: {}, idleTimeout: {}s, scanInterval: {}s, deferredCleanup: every {}s", + config.maxCacheSize(), + config.idleTimeoutSeconds(), + config.evictionScanIntervalSeconds(), + config.deferredCleanupIntervalSeconds()); + + // Register cache metrics (gauges) + registerCacheMetrics(); + } else { + this.cache = null; + this.evictionTask = null; + this.lruEvictionExecutor = null; + logger.info("RE2: Pattern caching disabled"); + } + } + + /** + * Gets or compiles a pattern. + * + *

Lock-free for cache hits. Only compiles patterns when necessary. Uses computeIfAbsent for + * safe concurrent compilation. + * + * @param patternString regex pattern + * @param caseSensitive case sensitivity flag + * @param compiler function to compile pattern on cache miss + * @return cached or newly compiled pattern + */ + public Pattern getOrCompile( + String patternString, boolean caseSensitive, java.util.function.Supplier compiler) { + RE2MetricsRegistry metrics = config.metricsRegistry(); + + if (!config.cacheEnabled()) { + misses.incrementAndGet(); + metrics.incrementCounter(MetricNames.PATTERNS_CACHE_MISSES); + return compiler.get(); } - /** - * Gets or compiles a pattern. - * - * Lock-free for cache hits. Only compiles patterns when necessary. - * Uses computeIfAbsent for safe concurrent compilation. - * - * @param patternString regex pattern - * @param caseSensitive case sensitivity flag - * @param compiler function to compile pattern on cache miss - * @return cached or newly compiled pattern - */ - public Pattern getOrCompile(String patternString, boolean caseSensitive, - java.util.function.Supplier compiler) { - RE2MetricsRegistry metrics = config.metricsRegistry(); - - if (!config.cacheEnabled()) { - misses.incrementAndGet(); - metrics.incrementCounter(MetricNames.PATTERNS_CACHE_MISSES); - return compiler.get(); - } - - CacheKey key = new CacheKey(patternString, caseSensitive); - - // Try to get existing pattern (lock-free read) - CachedPattern cached = cache.get(key); - if (cached != null) { - // Optional defensive check: validate native pointer is still valid - if (config.validateCachedPatterns() && !cached.pattern().isValid()) { - String hash = PatternHasher.hash(patternString); - logger.warn("RE2: Invalid cached pattern detected - hash: {}, recompiling", hash); - invalidPatternRecompilations.incrementAndGet(); - metrics.incrementCounter(MetricNames.PATTERNS_INVALID_RECOMPILED); - // Remove invalid pattern and decrement memory - if (cache.remove(key, cached)) { - totalNativeMemoryBytes.addAndGet(-cached.memoryBytes()); - } - // Fall through to recompile below - } else { - // Cache hit - update access time atomically - cached.touch(); - hits.incrementAndGet(); - metrics.incrementCounter(MetricNames.PATTERNS_CACHE_HITS); - logger.trace("RE2: Cache hit - hash: {}", - PatternHasher.hash(patternString)); - return cached.pattern(); - } + CacheKey key = new CacheKey(patternString, caseSensitive); + + // Try to get existing pattern (lock-free read) + CachedPattern cached = cache.get(key); + if (cached != null) { + // Optional defensive check: validate native pointer is still valid + if (config.validateCachedPatterns() && !cached.pattern().isValid()) { + String hash = PatternHasher.hash(patternString); + logger.warn("RE2: Invalid cached pattern detected - hash: {}, recompiling", hash); + invalidPatternRecompilations.incrementAndGet(); + metrics.incrementCounter(MetricNames.PATTERNS_INVALID_RECOMPILED); + // Remove invalid pattern and decrement memory + if (cache.remove(key, cached)) { + totalNativeMemoryBytes.addAndGet(-cached.memoryBytes()); } + // Fall through to recompile below + } else { + // Cache hit - update access time atomically + cached.touch(); + hits.incrementAndGet(); + metrics.incrementCounter(MetricNames.PATTERNS_CACHE_HITS); + logger.trace("RE2: Cache hit - hash: {}", PatternHasher.hash(patternString)); + return cached.pattern(); + } + } - // Cache miss - use computeIfAbsent for safe concurrent compilation - // Only ONE thread compiles each unique pattern - misses.incrementAndGet(); - metrics.incrementCounter(MetricNames.PATTERNS_CACHE_MISSES); - logger.trace("RE2: Cache miss - hash: {}, compiling", PatternHasher.hash(patternString)); - - // Track whether this thread compiled a new pattern - final long[] addedMemory = {0}; - - CachedPattern newCached = cache.computeIfAbsent(key, k -> { - // This lambda executes atomically for this key only - // Other keys can be accessed concurrently - Pattern pattern = compiler.get(); - CachedPattern created = new CachedPattern(pattern); - addedMemory[0] = created.memoryBytes(); - return created; - }); + // Cache miss - use computeIfAbsent for safe concurrent compilation + // Only ONE thread compiles each unique pattern + misses.incrementAndGet(); + metrics.incrementCounter(MetricNames.PATTERNS_CACHE_MISSES); + logger.trace("RE2: Cache miss - hash: {}, compiling", PatternHasher.hash(patternString)); + + // Track whether this thread compiled a new pattern + final long[] addedMemory = {0}; + + CachedPattern newCached = + cache.computeIfAbsent( + key, + k -> { + // This lambda executes atomically for this key only + // Other keys can be accessed concurrently + Pattern pattern = compiler.get(); + CachedPattern created = new CachedPattern(pattern); + addedMemory[0] = created.memoryBytes(); + return created; + }); - // If we compiled a new pattern, update memory tracking - if (addedMemory[0] > 0) { - totalNativeMemoryBytes.addAndGet(addedMemory[0]); - updatePeakMemory(); - } + // If we compiled a new pattern, update memory tracking + if (addedMemory[0] > 0) { + totalNativeMemoryBytes.addAndGet(addedMemory[0]); + updatePeakMemory(); + } - // If we just added a new pattern, check if we need async LRU eviction - // Soft limit: trigger eviction if over, but don't block - int currentSize = cache.size(); - if (currentSize > config.maxCacheSize()) { - triggerAsyncLRUEviction(currentSize - config.maxCacheSize()); - } + // If we just added a new pattern, check if we need async LRU eviction + // Soft limit: trigger eviction if over, but don't block + int currentSize = cache.size(); + if (currentSize > config.maxCacheSize()) { + triggerAsyncLRUEviction(currentSize - config.maxCacheSize()); + } - return newCached.pattern(); + return newCached.pattern(); + } + + /** + * Triggers async LRU eviction (doesn't block caller). + * + *

Soft limit approach: cache can temporarily exceed maxSize while eviction runs in background. + */ + private void triggerAsyncLRUEviction(int toEvict) { + if (toEvict <= 0) { + return; } - /** - * Triggers async LRU eviction (doesn't block caller). - * - * Soft limit approach: cache can temporarily exceed maxSize - * while eviction runs in background. - */ - private void triggerAsyncLRUEviction(int toEvict) { - if (toEvict <= 0) return; - - lruEvictionExecutor.submit(() -> { - try { - evictLRUBatch(toEvict); - } catch (Exception e) { - logger.warn("RE2: Error during async LRU eviction", e); - } + lruEvictionExecutor.submit( + () -> { + try { + evictLRUBatch(toEvict); + } catch (Exception e) { + logger.warn("RE2: Error during async LRU eviction", e); + } }); + } + + /** + * Evicts least-recently-used patterns. + * + *

Uses sample-based LRU: samples subset of cache and evicts oldest. Much faster than scanning + * entire cache. + * + *

IMPORTANT: Patterns accessed within the last 100ms are protected from eviction to prevent + * race conditions where a pattern is evicted before the caller can use it. + */ + private void evictLRUBatch(int toEvict) { + int actualToEvict = Math.min(toEvict, cache.size() - config.maxCacheSize()); + if (actualToEvict <= 0) { + return; } - /** - * Evicts least-recently-used patterns. - * - * Uses sample-based LRU: samples subset of cache and evicts oldest. - * Much faster than scanning entire cache. - * - * IMPORTANT: Patterns accessed within the last 100ms are protected from eviction - * to prevent race conditions where a pattern is evicted before the caller can use it. - */ - private void evictLRUBatch(int toEvict) { - int actualToEvict = Math.min(toEvict, cache.size() - config.maxCacheSize()); - if (actualToEvict <= 0) return; - - // Sample-based LRU: sample a subset and evict oldest - // This is O(sample size) not O(cache size) - int sampleSize = Math.min(500, cache.size()); - - // Minimum age before a pattern can be evicted (configurable, default 1 second) - // This prevents evicting patterns before the caller has a chance to use them - long minAgeNanos = config.evictionProtectionMs() * 1_000_000L; - long cutoffTime = System.nanoTime() - minAgeNanos; - - List> candidates = cache.entrySet() - .stream() - .filter(e -> e.getValue().lastAccessTimeNanos() < cutoffTime) // Only evict "old" patterns + // Sample-based LRU: sample a subset and evict oldest + // This is O(sample size) not O(cache size) + int sampleSize = Math.min(500, cache.size()); + + // Minimum age before a pattern can be evicted (configurable, default 1 second) + // This prevents evicting patterns before the caller has a chance to use them + long minAgeNanos = config.evictionProtectionMs() * 1_000_000L; + long cutoffTime = System.nanoTime() - minAgeNanos; + + List> candidates = + cache.entrySet().stream() + .filter( + e -> e.getValue().lastAccessTimeNanos() < cutoffTime) // Only evict "old" patterns .limit(sampleSize) .sorted(Comparator.comparingLong(e -> e.getValue().lastAccessTimeNanos())) .limit(actualToEvict) .collect(Collectors.toList()); - int evicted = 0; - for (Map.Entry entry : candidates) { - CachedPattern cached = entry.getValue(); - - // Only evict if we successfully remove from map - if (cache.remove(entry.getKey(), cached)) { - // Decrement memory tracking (pattern removed from cache) - totalNativeMemoryBytes.addAndGet(-cached.memoryBytes()); + int evicted = 0; + for (Map.Entry entry : candidates) { + CachedPattern cached = entry.getValue(); - if (cached.pattern().getRefCount() > 0) { - // Pattern in use - defer cleanup - deferredCleanup.add(cached); - evictionsDeferred.incrementAndGet(); + // Only evict if we successfully remove from map + if (cache.remove(entry.getKey(), cached)) { + // Decrement memory tracking (pattern removed from cache) + totalNativeMemoryBytes.addAndGet(-cached.memoryBytes()); - // Track deferred memory - long deferredMemory = deferredNativeMemoryBytes.addAndGet(cached.memoryBytes()); - updatePeakDeferredMemory(deferredMemory); + if (cached.pattern().getRefCount() > 0) { + // Pattern in use - defer cleanup + deferredCleanup.add(cached); + evictionsDeferred.incrementAndGet(); - logger.trace("RE2: LRU evicting pattern (deferred - {} active matchers): {}", - cached.pattern().getRefCount(), entry.getKey()); - } else { - // Safe to free immediately - cached.forceClose(); - evictionsLRU.incrementAndGet(); - config.metricsRegistry().incrementCounter(MetricNames.CACHE_EVICTIONS_LRU); - logger.trace("RE2: LRU evicting pattern (immediate): {}", entry.getKey()); - } - evicted++; - } - } + // Track deferred memory + long deferredMemory = deferredNativeMemoryBytes.addAndGet(cached.memoryBytes()); + updatePeakDeferredMemory(deferredMemory); - if (evicted > 0) { - logger.debug("RE2: LRU eviction completed - evicted: {}, cacheSize: {}/{}", - evicted, cache.size(), config.maxCacheSize()); + logger.trace( + "RE2: LRU evicting pattern (deferred - {} active matchers): {}", + cached.pattern().getRefCount(), + entry.getKey()); + } else { + // Safe to free immediately + cached.forceClose(); + evictionsLRU.incrementAndGet(); + config.metricsRegistry().incrementCounter(MetricNames.CACHE_EVICTIONS_LRU); + logger.trace("RE2: LRU evicting pattern (immediate): {}", entry.getKey()); } + evicted++; + } } - /** - * Evicts idle patterns (called by background thread). - * - * Non-blocking: uses ConcurrentHashMap iteration which doesn't block - * other threads accessing the cache. - * - * @return number of patterns evicted - */ - int evictIdlePatterns() { - if (!config.cacheEnabled()) { - return 0; - } + if (evicted > 0) { + logger.debug( + "RE2: LRU eviction completed - evicted: {}, cacheSize: {}/{}", + evicted, + cache.size(), + config.maxCacheSize()); + } + } + + /** + * Evicts idle patterns (called by background thread). + * + *

Non-blocking: uses ConcurrentHashMap iteration which doesn't block other threads accessing + * the cache. + * + * @return number of patterns evicted + */ + int evictIdlePatterns() { + if (!config.cacheEnabled()) { + return 0; + } - long cutoffNanos = System.nanoTime() - (config.idleTimeoutSeconds() * 1_000_000_000L); - AtomicLong evictedCount = new AtomicLong(0); + long cutoffNanos = System.nanoTime() - (config.idleTimeoutSeconds() * 1_000_000_000L); + AtomicLong evictedCount = new AtomicLong(0); - // Non-blocking iteration - other threads can access cache concurrently - cache.entrySet().removeIf(entry -> { - CachedPattern cached = entry.getValue(); + // Non-blocking iteration - other threads can access cache concurrently + cache + .entrySet() + .removeIf( + entry -> { + CachedPattern cached = entry.getValue(); - if (cached.lastAccessTimeNanos() < cutoffNanos) { + if (cached.lastAccessTimeNanos() < cutoffNanos) { // Decrement memory tracking (pattern removed from cache) totalNativeMemoryBytes.addAndGet(-cached.memoryBytes()); if (cached.pattern().getRefCount() > 0) { - // Pattern idle but still in use - defer cleanup - deferredCleanup.add(cached); - evictionsDeferred.incrementAndGet(); - - // Track deferred memory - long deferredMemory = deferredNativeMemoryBytes.addAndGet(cached.memoryBytes()); - updatePeakDeferredMemory(deferredMemory); - - // Track deferred pattern count peak - int deferredCount = deferredCleanup.size(); - updatePeakDeferredPatternCount(deferredCount); - - logger.trace("RE2: Idle evicting pattern (deferred - {} active matchers): {}", - cached.pattern().getRefCount(), entry.getKey()); + // Pattern idle but still in use - defer cleanup + deferredCleanup.add(cached); + evictionsDeferred.incrementAndGet(); + + // Track deferred memory + long deferredMemory = deferredNativeMemoryBytes.addAndGet(cached.memoryBytes()); + updatePeakDeferredMemory(deferredMemory); + + // Track deferred pattern count peak + int deferredCount = deferredCleanup.size(); + updatePeakDeferredPatternCount(deferredCount); + + logger.trace( + "RE2: Idle evicting pattern (deferred - {} active matchers): {}", + cached.pattern().getRefCount(), + entry.getKey()); } else { - // Can free immediately - logger.trace("RE2: Idle evicting pattern (immediate): {}", entry.getKey()); - cached.forceClose(); - evictionsIdle.incrementAndGet(); - config.metricsRegistry().incrementCounter(MetricNames.CACHE_EVICTIONS_IDLE); + // Can free immediately + logger.trace("RE2: Idle evicting pattern (immediate): {}", entry.getKey()); + cached.forceClose(); + evictionsIdle.incrementAndGet(); + config.metricsRegistry().incrementCounter(MetricNames.CACHE_EVICTIONS_IDLE); } evictedCount.incrementAndGet(); return true; // Remove from map - } - return false; // Keep in map - }); - - // Clean up deferred patterns - int deferredCleaned = cleanupDeferredPatterns(); + } + return false; // Keep in map + }); - int evicted = (int) evictedCount.get(); - if (evicted > 0 || deferredCleaned > 0) { - logger.debug("RE2: Idle eviction completed - evicted: {}, deferred cleaned: {}, cacheSize: {}", - evicted, deferredCleaned, cache.size()); - } + // Clean up deferred patterns + int deferredCleaned = cleanupDeferredPatterns(); - return evicted; + int evicted = (int) evictedCount.get(); + if (evicted > 0 || deferredCleaned > 0) { + logger.debug( + "RE2: Idle eviction completed - evicted: {}, deferred cleaned: {}, cacheSize: {}", + evicted, + deferredCleaned, + cache.size()); } - /** - * Cleans up deferred patterns that are no longer in use. - * - * @return number of patterns cleaned - */ - int cleanupDeferredPatterns() { - int cleaned = 0; - - for (CachedPattern deferred : deferredCleanup) { - if (deferred.pattern().getRefCount() == 0) { - // Now safe to free - logger.trace("RE2: Cleaning up deferred pattern"); - deferred.forceClose(); - deferredCleanup.remove(deferred); - - // Decrement deferred memory tracking - deferredNativeMemoryBytes.addAndGet(-deferred.memoryBytes()); - - // Note: evictionsDeferred already incremented when added to deferred list - config.metricsRegistry().incrementCounter(MetricNames.CACHE_EVICTIONS_DEFERRED); - cleaned++; - } - } - - if (cleaned > 0) { - logger.trace("RE2: Deferred cleanup completed - freed: {}", cleaned); - } - - return cleaned; + return evicted; + } + + /** + * Cleans up deferred patterns that are no longer in use. + * + * @return number of patterns cleaned + */ + int cleanupDeferredPatterns() { + int cleaned = 0; + + for (CachedPattern deferred : deferredCleanup) { + if (deferred.pattern().getRefCount() == 0) { + // Now safe to free + logger.trace("RE2: Cleaning up deferred pattern"); + deferred.forceClose(); + deferredCleanup.remove(deferred); + + // Decrement deferred memory tracking + deferredNativeMemoryBytes.addAndGet(-deferred.memoryBytes()); + + // Note: evictionsDeferred already incremented when added to deferred list + config.metricsRegistry().incrementCounter(MetricNames.CACHE_EVICTIONS_DEFERRED); + cleaned++; + } } - /** - * Gets current cache hit rate as percentage. - */ - private double getCacheHitRate() { - long totalRequests = hits.get() + misses.get(); - if (totalRequests == 0) { - return 0.0; - } - return (hits.get() * 100.0) / totalRequests; + if (cleaned > 0) { + logger.trace("RE2: Deferred cleanup completed - freed: {}", cleaned); } - /** - * Gets cache statistics snapshot. - */ - public CacheStatistics getStatistics() { - int currentSize = config.cacheEnabled() ? cache.size() : 0; - int deferredSize = deferredCleanup.size(); - - return new CacheStatistics( - hits.get(), - misses.get(), - evictionsLRU.get(), - evictionsIdle.get(), - evictionsDeferred.get(), - currentSize, - config.maxCacheSize(), - deferredSize, - totalNativeMemoryBytes.get(), - peakNativeMemoryBytes.get(), - invalidPatternRecompilations.get() - ); - } + return cleaned; + } - /** - * Clears the cache and closes all cached patterns. - * - * Patterns with active matchers (refCount > 0) are moved to deferred list - * instead of being forcibly closed. - */ - public void clear() { - if (!config.cacheEnabled()) { - return; - } + /** Gets current cache hit rate as percentage. */ + private double getCacheHitRate() { + long totalRequests = hits.get() + misses.get(); + if (totalRequests == 0) { + return 0.0; + } + return (hits.get() * 100.0) / totalRequests; + } + + /** Gets cache statistics snapshot. */ + public CacheStatistics getStatistics() { + int currentSize = config.cacheEnabled() ? cache.size() : 0; + int deferredSize = deferredCleanup.size(); + + return new CacheStatistics( + hits.get(), + misses.get(), + evictionsLRU.get(), + evictionsIdle.get(), + evictionsDeferred.get(), + currentSize, + config.maxCacheSize(), + deferredSize, + totalNativeMemoryBytes.get(), + peakNativeMemoryBytes.get(), + invalidPatternRecompilations.get()); + } + + /** + * Clears the cache and closes all cached patterns. + * + *

Patterns with active matchers (refCount > 0) are moved to deferred list instead of being + * forcibly closed. + */ + public void clear() { + if (!config.cacheEnabled()) { + return; + } - int cacheSize = cache.size(); - int deferredSize = deferredCleanup.size(); - - logger.debug("RE2: Clearing cache - {} cached patterns, {} deferred patterns", - cacheSize, deferredSize); - - // Close and remove all cached patterns - // Patterns with active matchers go to deferred list - cache.forEach((key, cached) -> { - if (cached.pattern().getRefCount() > 0) { - // Pattern still in use - move to deferred list instead of closing - deferredCleanup.add(cached); - logger.trace("RE2: Clearing cache - pattern still in use, moving to deferred: refCount={}", - cached.pattern().getRefCount()); - } else { - // Safe to close immediately - cached.forceClose(); - } + int cacheSize = cache.size(); + int deferredSize = deferredCleanup.size(); + + logger.debug( + "RE2: Clearing cache - {} cached patterns, {} deferred patterns", cacheSize, deferredSize); + + // Close and remove all cached patterns + // Patterns with active matchers go to deferred list + cache.forEach( + (key, cached) -> { + if (cached.pattern().getRefCount() > 0) { + // Pattern still in use - move to deferred list instead of closing + deferredCleanup.add(cached); + logger.trace( + "RE2: Clearing cache - pattern still in use, moving to deferred: refCount={}", + cached.pattern().getRefCount()); + } else { + // Safe to close immediately + cached.forceClose(); + } }); - cache.clear(); - - // Close deferred patterns that are no longer in use - deferredCleanup.removeIf(deferred -> { - if (deferred.pattern().getRefCount() == 0) { - deferred.forceClose(); - return true; // Remove from list - } - return false; // Keep in list + cache.clear(); + + // Close deferred patterns that are no longer in use + deferredCleanup.removeIf( + deferred -> { + if (deferred.pattern().getRefCount() == 0) { + deferred.forceClose(); + return true; // Remove from list + } + return false; // Keep in list }); - // Reset memory tracking (all non-deferred patterns removed) - totalNativeMemoryBytes.set(0); - // Note: deferred memory is tracked separately + // Reset memory tracking (all non-deferred patterns removed) + totalNativeMemoryBytes.set(0); + // Note: deferred memory is tracked separately + } + + /** Resets cache statistics (for testing only). */ + public void resetStatistics() { + hits.set(0); + misses.set(0); + evictionsLRU.set(0); + evictionsIdle.set(0); + evictionsDeferred.set(0); + peakNativeMemoryBytes.set(totalNativeMemoryBytes.get()); + invalidPatternRecompilations.set(0); + logger.trace("RE2: Cache statistics reset"); + } + + /** + * Full reset for testing (clears cache and resets statistics). + * + *

Only resets ResourceTracker if no deferred patterns remain. Deferred patterns will be freed + * later and need correct tracking. + */ + public void reset() { + clear(); + resetStatistics(); + + // Only reset ResourceTracker if deferred list is empty + // Deferred patterns will be freed later and call trackPatternFreed() + if (deferredCleanup.isEmpty()) { + resourceTracker.reset(); + logger.trace("RE2: Cache fully reset (including ResourceTracker)"); + } else { + logger.debug( + "RE2: Cache reset but ResourceTracker NOT reset - {} deferred patterns will be freed later", + deferredCleanup.size()); } - - /** - * Resets cache statistics (for testing only). - */ - public void resetStatistics() { - hits.set(0); - misses.set(0); - evictionsLRU.set(0); - evictionsIdle.set(0); - evictionsDeferred.set(0); - peakNativeMemoryBytes.set(totalNativeMemoryBytes.get()); - invalidPatternRecompilations.set(0); - logger.trace("RE2: Cache statistics reset"); + } + + /** + * Reconfigures the cache with new settings (for testing only). + * + *

This clears the existing cache and reinitializes with the new config. + * + * @param newConfig the new configuration + */ + public synchronized void reconfigure(RE2Config newConfig) { + logger.info("RE2: Reconfiguring cache with new settings"); + + // Stop existing eviction task + if (evictionTask != null) { + evictionTask.stop(); } - /** - * Full reset for testing (clears cache and resets statistics). - * - * Only resets ResourceTracker if no deferred patterns remain. - * Deferred patterns will be freed later and need correct tracking. - */ - public void reset() { - clear(); - resetStatistics(); - - // Only reset ResourceTracker if deferred list is empty - // Deferred patterns will be freed later and call trackPatternFreed() - if (deferredCleanup.isEmpty()) { - resourceTracker.reset(); - logger.trace("RE2: Cache fully reset (including ResourceTracker)"); - } else { - logger.debug("RE2: Cache reset but ResourceTracker NOT reset - {} deferred patterns will be freed later", - deferredCleanup.size()); - } + // Shutdown existing LRU executor + if (lruEvictionExecutor != null) { + lruEvictionExecutor.shutdown(); } - /** - * Reconfigures the cache with new settings (for testing only). - * - * This clears the existing cache and reinitializes with the new config. - * - * @param newConfig the new configuration - */ - public synchronized void reconfigure(RE2Config newConfig) { - logger.info("RE2: Reconfiguring cache with new settings"); - - // Stop existing eviction task - if (evictionTask != null) { - evictionTask.stop(); - } + // Clear existing cache + clear(); + resetStatistics(); + resourceTracker.reset(); - // Shutdown existing LRU executor - if (lruEvictionExecutor != null) { - lruEvictionExecutor.shutdown(); - } + // Update config + this.config = newConfig; - // Clear existing cache - clear(); - resetStatistics(); - resourceTracker.reset(); + // Reinitialize if cache enabled + if (newConfig.cacheEnabled()) { + this.cache = new ConcurrentHashMap<>(newConfig.maxCacheSize()); - // Update config - this.config = newConfig; - - // Reinitialize if cache enabled - if (newConfig.cacheEnabled()) { - this.cache = new ConcurrentHashMap<>(newConfig.maxCacheSize()); - - this.lruEvictionExecutor = Executors.newSingleThreadExecutor(r -> { + this.lruEvictionExecutor = + Executors.newSingleThreadExecutor( + r -> { Thread t = new Thread(r, "RE2-LRU-Eviction"); t.setDaemon(true); t.setPriority(Thread.MIN_PRIORITY); return t; - }); - - this.evictionTask = new IdleEvictionTask(this, newConfig); - this.evictionTask.start(); - - logger.info("RE2: Cache reconfigured - maxSize: {}, idleTimeout: {}s, maxSimultaneousPatterns: {}", - newConfig.maxCacheSize(), - newConfig.idleTimeoutSeconds(), - newConfig.maxSimultaneousCompiledPatterns()); - } else { - this.cache = null; - this.evictionTask = null; - this.lruEvictionExecutor = null; - logger.info("RE2: Cache disabled after reconfiguration"); - } + }); + + this.evictionTask = new IdleEvictionTask(this, newConfig); + this.evictionTask.start(); + + logger.info( + "RE2: Cache reconfigured - maxSize: {}, idleTimeout: {}s, maxSimultaneousPatterns: {}", + newConfig.maxCacheSize(), + newConfig.idleTimeoutSeconds(), + newConfig.maxSimultaneousCompiledPatterns()); + } else { + this.cache = null; + this.evictionTask = null; + this.lruEvictionExecutor = null; + logger.info("RE2: Cache disabled after reconfiguration"); } + } - /** - * Shuts down the cache (stops eviction thread, clears cache). - */ - public void shutdown() { - logger.info("RE2: Shutting down cache"); - - if (evictionTask != null) { - evictionTask.stop(); - } - - if (lruEvictionExecutor != null) { - lruEvictionExecutor.shutdown(); - } + /** Shuts down the cache (stops eviction thread, clears cache). */ + public void shutdown() { + logger.info("RE2: Shutting down cache"); - clear(); + if (evictionTask != null) { + evictionTask.stop(); } - /** - * Registers cache and resource metrics (gauges) with the metrics registry. - * Called during cache initialization. - */ - private void registerCacheMetrics() { - RE2MetricsRegistry metrics = config.metricsRegistry(); - - // Cache metrics - metrics.registerGauge("cache.patterns.current.count", () -> cache != null ? cache.size() : 0); - metrics.registerGauge("cache.native_memory.current.bytes", totalNativeMemoryBytes::get); - metrics.registerGauge("cache.native_memory.peak.bytes", peakNativeMemoryBytes::get); - - // Resource management metrics (active counts only - freed counts are incremented directly) - metrics.registerGauge("resources.patterns.active.current.count", - resourceTracker::getActivePatternCount); - metrics.registerGauge("resources.matchers.active.current.count", - resourceTracker::getActiveMatcherCount); - // Note: resources.patterns.freed.total.count and resources.matchers.freed.total.count - // are Counters incremented in ResourceTracker.trackPatternFreed() and trackMatcherFreed() - - // Deferred cleanup metrics (current state) - metrics.registerGauge("cache.deferred.patterns.current.count", deferredCleanup::size); - metrics.registerGauge("cache.deferred.patterns.peak.count", peakDeferredPatternCount::get); - metrics.registerGauge("cache.deferred.native_memory.current.bytes", deferredNativeMemoryBytes::get); - metrics.registerGauge("cache.deferred.native_memory.peak.bytes", peakDeferredNativeMemoryBytes::get); - - logger.debug("RE2: Metrics registered - cache gauges, resource gauges, deferred gauges"); + if (lruEvictionExecutor != null) { + lruEvictionExecutor.shutdown(); } - /** - * Cache key combining pattern string and case-sensitivity. - */ - private record CacheKey(String pattern, boolean caseSensitive) { - @Override - public String toString() { - return pattern.length() > 50 - ? pattern.substring(0, 47) + "... (case=" + caseSensitive + ")" - : pattern + " (case=" + caseSensitive + ")"; - } + clear(); + } + + /** + * Registers cache and resource metrics (gauges) with the metrics registry. Called during cache + * initialization. + */ + private void registerCacheMetrics() { + RE2MetricsRegistry metrics = config.metricsRegistry(); + + // Cache metrics + metrics.registerGauge("cache.patterns.current.count", () -> cache != null ? cache.size() : 0); + metrics.registerGauge("cache.native_memory.current.bytes", totalNativeMemoryBytes::get); + metrics.registerGauge("cache.native_memory.peak.bytes", peakNativeMemoryBytes::get); + + // Resource management metrics (active counts only - freed counts are incremented directly) + metrics.registerGauge( + "resources.patterns.active.current.count", resourceTracker::getActivePatternCount); + metrics.registerGauge( + "resources.matchers.active.current.count", resourceTracker::getActiveMatcherCount); + // Note: resources.patterns.freed.total.count and resources.matchers.freed.total.count + // are Counters incremented in ResourceTracker.trackPatternFreed() and trackMatcherFreed() + + // Deferred cleanup metrics (current state) + metrics.registerGauge("cache.deferred.patterns.current.count", deferredCleanup::size); + metrics.registerGauge("cache.deferred.patterns.peak.count", peakDeferredPatternCount::get); + metrics.registerGauge( + "cache.deferred.native_memory.current.bytes", deferredNativeMemoryBytes::get); + metrics.registerGauge( + "cache.deferred.native_memory.peak.bytes", peakDeferredNativeMemoryBytes::get); + + logger.debug("RE2: Metrics registered - cache gauges, resource gauges, deferred gauges"); + } + + /** Cache key combining pattern string and case-sensitivity. */ + private record CacheKey(String pattern, boolean caseSensitive) { + @Override + public String toString() { + return pattern.length() > 50 + ? pattern.substring(0, 47) + "... (case=" + caseSensitive + ")" + : pattern + " (case=" + caseSensitive + ")"; + } + } + + /** + * Cached pattern with atomic access time tracking. + * + *

Uses nanoTime for efficient timestamp comparison without object allocation. + */ + private static class CachedPattern { + private final Pattern pattern; + private final AtomicLong lastAccessTimeNanos; + private final long memoryBytes; + + CachedPattern(Pattern pattern) { + this.pattern = pattern; + this.lastAccessTimeNanos = new AtomicLong(System.nanoTime()); + this.memoryBytes = pattern.getNativeMemoryBytes(); } - /** - * Cached pattern with atomic access time tracking. - * - * Uses nanoTime for efficient timestamp comparison without object allocation. - */ - private static class CachedPattern { - private final Pattern pattern; - private final AtomicLong lastAccessTimeNanos; - private final long memoryBytes; - - CachedPattern(Pattern pattern) { - this.pattern = pattern; - this.lastAccessTimeNanos = new AtomicLong(System.nanoTime()); - this.memoryBytes = pattern.getNativeMemoryBytes(); - } - - Pattern pattern() { - return pattern; - } - - long lastAccessTimeNanos() { - return lastAccessTimeNanos.get(); - } - - long memoryBytes() { - return memoryBytes; - } - - void touch() { - lastAccessTimeNanos.set(System.nanoTime()); - } + Pattern pattern() { + return pattern; + } - void forceClose() { - pattern.forceClose(); - } + long lastAccessTimeNanos() { + return lastAccessTimeNanos.get(); } - /** - * Updates peak memory if current total exceeds it. - */ - private void updatePeakMemory() { - long current = totalNativeMemoryBytes.get(); - long peak; - do { - peak = peakNativeMemoryBytes.get(); - } while (current > peak && !peakNativeMemoryBytes.compareAndSet(peak, current)); + long memoryBytes() { + return memoryBytes; } - /** - * Updates peak deferred memory if current exceeds it. - */ - private void updatePeakDeferredMemory(long current) { - long peak; - do { - peak = peakDeferredNativeMemoryBytes.get(); - } while (current > peak && !peakDeferredNativeMemoryBytes.compareAndSet(peak, current)); + void touch() { + lastAccessTimeNanos.set(System.nanoTime()); } - /** - * Updates peak deferred pattern count if current exceeds it. - */ - private void updatePeakDeferredPatternCount(int current) { - int peak; - do { - peak = peakDeferredPatternCount.get(); - } while (current > peak && !peakDeferredPatternCount.compareAndSet(peak, current)); + void forceClose() { + pattern.forceClose(); } + } + + /** Updates peak memory if current total exceeds it. */ + private void updatePeakMemory() { + long current = totalNativeMemoryBytes.get(); + long peak; + do { + peak = peakNativeMemoryBytes.get(); + } while (current > peak && !peakNativeMemoryBytes.compareAndSet(peak, current)); + } + + /** Updates peak deferred memory if current exceeds it. */ + private void updatePeakDeferredMemory(long current) { + long peak; + do { + peak = peakDeferredNativeMemoryBytes.get(); + } while (current > peak && !peakDeferredNativeMemoryBytes.compareAndSet(peak, current)); + } + + /** Updates peak deferred pattern count if current exceeds it. */ + private void updatePeakDeferredPatternCount(int current) { + int peak; + do { + peak = peakDeferredPatternCount.get(); + } while (current > peak && !peakDeferredPatternCount.compareAndSet(peak, current)); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/cache/RE2Config.java b/libre2-core/src/main/java/com/axonops/libre2/cache/RE2Config.java index 5d3b803..96a3857 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/cache/RE2Config.java +++ b/libre2-core/src/main/java/com/axonops/libre2/cache/RE2Config.java @@ -18,49 +18,56 @@ import com.axonops.libre2.metrics.NoOpMetricsRegistry; import com.axonops.libre2.metrics.RE2MetricsRegistry; - -import java.time.Duration; import java.util.Objects; /** * Configuration for RE2 library including caching, resource limits, and metrics. * - *

Immutable configuration using Java 17 records. Controls pattern caching behavior, - * dual eviction strategy (LRU + idle timeout), resource limits, and metrics integration. + *

Immutable configuration using Java 17 records. Controls pattern caching behavior, dual + * eviction strategy (LRU + idle timeout), resource limits, and metrics integration. * *

Architecture Overview

* *

Pattern Cache

- *

RE2 automatically caches compiled patterns to avoid expensive recompilation. The cache uses - * a dual eviction strategy: + * + *

RE2 automatically caches compiled patterns to avoid expensive recompilation. The cache uses a + * dual eviction strategy: + * *

    - *
  1. LRU Eviction - When cache exceeds {@code maxCacheSize}, least-recently-used patterns evicted
  2. - *
  3. Idle Eviction - Background thread evicts patterns unused for {@code idleTimeoutSeconds}
  4. + *
  5. LRU Eviction - When cache exceeds {@code maxCacheSize}, least-recently-used patterns + * evicted + *
  6. Idle Eviction - Background thread evicts patterns unused for {@code + * idleTimeoutSeconds} *
* *

Why Dual Eviction? + * *

    - *
  • LRU provides short-term performance (keeps hot patterns in cache)
  • - *
  • Idle timeout prevents long-term memory growth (cleans abandoned patterns)
  • - *
  • Without idle eviction: cache fills with one-time patterns, never cleans up
  • - *
  • Without LRU: high-traffic patterns repeatedly recompiled, poor performance
  • + *
  • LRU provides short-term performance (keeps hot patterns in cache) + *
  • Idle timeout prevents long-term memory growth (cleans abandoned patterns) + *
  • Without idle eviction: cache fills with one-time patterns, never cleans up + *
  • Without LRU: high-traffic patterns repeatedly recompiled, poor performance *
* *

Deferred Cleanup

+ * *

When a pattern is evicted but still in use by active {@link com.axonops.libre2.api.Matcher} - * instances, it's moved to a deferred cleanup queue and freed once all matchers close. - * A background task runs every {@code deferredCleanupIntervalSeconds} to reclaim memory. + * instances, it's moved to a deferred cleanup queue and freed once all matchers close. A + * background task runs every {@code deferredCleanupIntervalSeconds} to reclaim memory. * *

Resource Limits

+ * *

Two critical safety limits: + * *

    - *
  • maxSimultaneousCompiledPatterns - Maximum ACTIVE patterns (not cumulative). - * Prevents unbounded native memory growth. Patterns can be freed and recompiled.
  • - *
  • maxMatchersPerPattern - Maximum matchers per pattern. Prevents single pattern - * from exhausting matcher resources.
  • + *
  • maxSimultaneousCompiledPatterns - Maximum ACTIVE patterns (not cumulative). Prevents + * unbounded native memory growth. Patterns can be freed and recompiled. + *
  • maxMatchersPerPattern - Maximum matchers per pattern. Prevents single pattern from + * exhausting matcher resources. *
* *

Eviction Protection

+ * *

{@code evictionProtectionMs} prevents race conditions where a pattern is compiled, then * immediately evicted before the caller can use it. Newly compiled patterns are protected from * eviction for this duration (default: 1 second). @@ -68,6 +75,7 @@ *

Configuration Examples

* *

Default Production Config

+ * *
{@code
  * // Use defaults (50K cache, 5min idle, metrics disabled)
  * RE2Config config = RE2Config.DEFAULT;
@@ -75,6 +83,7 @@
  * }
* *

High-Traffic Server

+ * *
{@code
  * // Larger cache, faster cleanup, metrics enabled
  * RE2Config config = RE2Config.builder()
@@ -87,6 +96,7 @@
  * }
* *

Memory-Constrained Environment

+ * *
{@code
  * // Smaller cache, aggressive cleanup
  * RE2Config config = RE2Config.builder()
@@ -98,6 +108,7 @@
  * }
* *

Manual Resource Management (No Cache)

+ * *
{@code
  * // Disable cache, manage patterns manually
  * RE2Config config = RE2Config.NO_CACHE;
@@ -107,63 +118,75 @@
  * 

Tuning Recommendations

* *

Cache Size

+ * *
    - *
  • Default: 50,000 patterns (~50-200MB native memory, negligible in large clusters)
  • - *
  • Set based on unique pattern count in workload (monitor cache hit rate via metrics)
  • - *
  • Each pattern: ~170-250 bytes typical, but can be 10KB+ for complex regex
  • - *
  • Target: >90% cache hit rate for steady-state workloads
  • + *
  • Default: 50,000 patterns (~50-200MB native memory, negligible in large clusters) + *
  • Set based on unique pattern count in workload (monitor cache hit rate via metrics) + *
  • Each pattern: ~170-250 bytes typical, but can be 10KB+ for complex regex + *
  • Target: >90% cache hit rate for steady-state workloads *
* *

Idle Timeout

+ * *
    - *
  • Default: 300 seconds (5 minutes)
  • - *
  • Shorter (1-2min): Aggressive cleanup, good for memory-constrained or bursty workloads
  • - *
  • Longer (10-30min): Less cleanup overhead, good for stable high-traffic servers
  • - *
  • Balance: cleanup speed vs. keeping warm patterns available
  • + *
  • Default: 300 seconds (5 minutes) + *
  • Shorter (1-2min): Aggressive cleanup, good for memory-constrained or bursty workloads + *
  • Longer (10-30min): Less cleanup overhead, good for stable high-traffic servers + *
  • Balance: cleanup speed vs. keeping warm patterns available *
* *

Scan Interval

+ * *
    - *
  • Default: 60 seconds
  • - *
  • Faster (15-30s): More CPU overhead, faster cleanup of idle patterns
  • - *
  • Slower (2-5min): Less CPU overhead, slower cleanup
  • - *
  • Must be ≤ idleTimeoutSeconds for timely cleanup
  • + *
  • Default: 60 seconds + *
  • Faster (15-30s): More CPU overhead, faster cleanup of idle patterns + *
  • Slower (2-5min): Less CPU overhead, slower cleanup + *
  • Must be ≤ idleTimeoutSeconds for timely cleanup *
* *

Deferred Cleanup

+ * *
    - *
  • Default: 5 seconds
  • - *
  • Should be frequent (2-10s) to quickly reclaim memory from evicted patterns
  • - *
  • Must be ≤ evictionScanIntervalSeconds (runs at least as often as idle eviction)
  • - *
  • Monitor {@code cache.deferred.patterns.current.count} - should stay near zero
  • + *
  • Default: 5 seconds + *
  • Should be frequent (2-10s) to quickly reclaim memory from evicted patterns + *
  • Must be ≤ evictionScanIntervalSeconds (runs at least as often as idle eviction) + *
  • Monitor {@code cache.deferred.patterns.current.count} - should stay near zero *
* *

Resource Limits

+ * *
    - *
  • maxSimultaneousCompiledPatterns default: 100,000 - ACTIVE count, not cumulative
  • - *
  • Increase if hitting limit (check {@code errors.resource.exhausted} metric)
  • - *
  • This is a safety net, not a performance tuning parameter
  • - *
  • maxMatchersPerPattern default: 10,000 - prevents single pattern exhaustion
  • + *
  • maxSimultaneousCompiledPatterns default: 100,000 - ACTIVE count, not cumulative + *
  • Increase if hitting limit (check {@code errors.resource.exhausted} metric) + *
  • This is a safety net, not a performance tuning parameter + *
  • maxMatchersPerPattern default: 10,000 - prevents single pattern exhaustion *
* *

Validation

+ * *
    - *
  • Default: enabled
  • - *
  • Defensive check for native pointer validity on cache retrieval
  • - *
  • Tiny overhead (~1 JNI call), provides crash safety
  • - *
  • Only disable if absolute maximum performance required
  • + *
  • Default: enabled + *
  • Defensive check for native pointer validity on cache retrieval + *
  • Tiny overhead (~1 JNI call), provides crash safety + *
  • Only disable if absolute maximum performance required *
* * @param cacheEnabled Enable pattern caching (if false, users manage patterns manually) * @param maxCacheSize Maximum patterns in cache before LRU eviction (must be > 0 if cache enabled) * @param idleTimeoutSeconds Evict patterns unused for this duration (must be > 0 if cache enabled) - * @param evictionScanIntervalSeconds How often idle eviction task runs (must be > 0 and ≤ idleTimeoutSeconds) - * @param deferredCleanupIntervalSeconds How often deferred cleanup runs (must be > 0 and ≤ evictionScanIntervalSeconds) - * @param evictionProtectionMs Protect newly compiled patterns from eviction for this duration (prevents race conditions) - * @param maxSimultaneousCompiledPatterns Maximum ACTIVE patterns (not cumulative - patterns can be freed/recompiled) + * @param evictionScanIntervalSeconds How often idle eviction task runs (must be > 0 and ≤ + * idleTimeoutSeconds) + * @param deferredCleanupIntervalSeconds How often deferred cleanup runs (must be > 0 and ≤ + * evictionScanIntervalSeconds) + * @param evictionProtectionMs Protect newly compiled patterns from eviction for this duration + * (prevents race conditions) + * @param maxSimultaneousCompiledPatterns Maximum ACTIVE patterns (not cumulative - patterns can be + * freed/recompiled) * @param maxMatchersPerPattern Maximum matchers per pattern (prevents single pattern exhaustion) - * @param validateCachedPatterns Validate native pointers on cache retrieval (defensive check, tiny overhead) - * @param metricsRegistry Metrics implementation (use {@link com.axonops.libre2.metrics.NoOpMetricsRegistry} for zero overhead) + * @param validateCachedPatterns Validate native pointers on cache retrieval (defensive check, tiny + * overhead) + * @param metricsRegistry Metrics implementation (use {@link + * com.axonops.libre2.metrics.NoOpMetricsRegistry} for zero overhead) * @since 1.0.0 * @see com.axonops.libre2.cache.PatternCache * @see com.axonops.libre2.metrics.MetricNames @@ -178,338 +201,371 @@ public record RE2Config( int maxSimultaneousCompiledPatterns, int maxMatchersPerPattern, boolean validateCachedPatterns, - RE2MetricsRegistry metricsRegistry -) { + RE2MetricsRegistry metricsRegistry) { + + /** + * Default configuration for production use. + * + *

Defaults chosen for typical production clusters: - Cache: 50K patterns (~50-200MB, + * negligible in large clusters) - Idle timeout: 5 minutes (patterns auto-cleaned after + * inactivity) - Scan interval: 1 minute (balance cleanup speed vs CPU) - Deferred cleanup: 5 + * seconds (frequent cleanup of evicted-but-in-use patterns) - Simultaneous limit: 100K ACTIVE + * patterns (NOT cumulative - patterns can be freed/recompiled) - Matchers per pattern: 10K + * (prevents per-pattern exhaustion) - Validate cached patterns: enabled (defensive check for + * native pointer validity) - Eviction protection: 1000ms (protects recently-used patterns from + * immediate eviction) - Metrics: disabled (NoOp - zero overhead) + */ + public static final RE2Config DEFAULT = + new RE2Config( + true, // Cache enabled + 50000, // Max 50K cached patterns (~50-200MB) + 300, // 5 minute idle timeout + 60, // Scan every 60 seconds + 5, // Deferred cleanup every 5 seconds + 1000, // 1 second eviction protection + 100000, // Max 100K simultaneous active patterns + 10000, // Max 10K matchers per pattern + true, // Validate cached patterns (defensive check) + NoOpMetricsRegistry.INSTANCE // Metrics disabled (zero overhead) + ); + + /** Configuration with caching disabled. Users manage all pattern resources manually. */ + public static final RE2Config NO_CACHE = + new RE2Config( + false, // Cache disabled + 0, // Ignored when cache disabled + 0, // Ignored when cache disabled + 0, // Ignored when cache disabled + 0, // Ignored when cache disabled + 0, // Ignored when cache disabled + 100000, // Still enforce simultaneous limit + 10000, // Still enforce matcher limit + false, // No validation needed when no cache + NoOpMetricsRegistry.INSTANCE // Metrics disabled + ); + + /** + * Compact constructor with validation. + * + *

CRITICAL: maxSimultaneousCompiledPatterns is SIMULTANEOUS/ACTIVE count, NOT cumulative. + */ + public RE2Config { + // Always validate resource limits (even if cache disabled) + if (maxSimultaneousCompiledPatterns <= 0) { + throw new IllegalArgumentException( + "maxSimultaneousCompiledPatterns must be positive (this is SIMULTANEOUS active count, not cumulative)"); + } + if (maxMatchersPerPattern <= 0) { + throw new IllegalArgumentException("maxMatchersPerPattern must be positive"); + } + + // Validate cache parameters only if cache enabled + if (cacheEnabled) { + if (maxCacheSize <= 0) { + throw new IllegalArgumentException("maxCacheSize must be positive when cache enabled"); + } + if (idleTimeoutSeconds <= 0) { + throw new IllegalArgumentException( + "idleTimeoutSeconds must be positive when cache enabled"); + } + if (evictionScanIntervalSeconds <= 0) { + throw new IllegalArgumentException( + "evictionScanIntervalSeconds must be positive when cache enabled"); + } + if (deferredCleanupIntervalSeconds <= 0) { + throw new IllegalArgumentException( + "deferredCleanupIntervalSeconds must be positive when cache enabled"); + } + if (evictionProtectionMs < 0) { + throw new IllegalArgumentException( + "evictionProtectionMs must be non-negative when cache enabled"); + } + + // Warn if scan interval exceeds idle timeout (still valid, just suboptimal) + if (evictionScanIntervalSeconds > idleTimeoutSeconds) { + System.err.println( + "WARNING: evictionScanIntervalSeconds (" + + evictionScanIntervalSeconds + + "s) exceeds idleTimeoutSeconds (" + + idleTimeoutSeconds + + "s) - idle patterns may not be evicted promptly"); + } + + // Deferred cleanup should be frequent (warn if too slow) + if (deferredCleanupIntervalSeconds > 30) { + System.err.println( + "WARNING: deferredCleanupIntervalSeconds (" + + deferredCleanupIntervalSeconds + + "s) is quite long - evicted patterns may retain memory for extended periods"); + } + + // Deferred cleanup must be at least as frequent as idle eviction + if (deferredCleanupIntervalSeconds > evictionScanIntervalSeconds) { + throw new IllegalArgumentException( + "deferredCleanupIntervalSeconds (" + + deferredCleanupIntervalSeconds + + "s) must be <= evictionScanIntervalSeconds (" + + evictionScanIntervalSeconds + + "s) - deferred cleanup should run at least as often as idle eviction"); + } + + // Cache size must not exceed simultaneous limit + if (maxCacheSize > maxSimultaneousCompiledPatterns) { + throw new IllegalArgumentException( + "maxCacheSize (" + + maxCacheSize + + ") cannot exceed maxSimultaneousCompiledPatterns (" + + maxSimultaneousCompiledPatterns + + ")"); + } + } + } + + /** + * Creates a builder for custom configuration. + * + *

Builder starts with defaults and allows selective overrides: + * + *

{@code
+   * RE2Config config = RE2Config.builder()
+   *     .maxCacheSize(100_000)
+   *     .idleTimeoutSeconds(600)
+   *     .build();
+   * }
+ * + * @return new builder with default values + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for custom RE2 configuration. + * + *

All fields start with production defaults from {@link #DEFAULT}. Use builder to selectively + * override only the parameters you need to change. + */ + public static class Builder { + private boolean cacheEnabled = true; + private int maxCacheSize = 50000; + private long idleTimeoutSeconds = 300; + private long evictionScanIntervalSeconds = 60; + private long deferredCleanupIntervalSeconds = 5; + private long evictionProtectionMs = 1000; + private int maxSimultaneousCompiledPatterns = 100000; + private int maxMatchersPerPattern = 10000; + private boolean validateCachedPatterns = true; + private RE2MetricsRegistry metricsRegistry = NoOpMetricsRegistry.INSTANCE; /** - * Default configuration for production use. - * - * Defaults chosen for typical production clusters: - * - Cache: 50K patterns (~50-200MB, negligible in large clusters) - * - Idle timeout: 5 minutes (patterns auto-cleaned after inactivity) - * - Scan interval: 1 minute (balance cleanup speed vs CPU) - * - Deferred cleanup: 5 seconds (frequent cleanup of evicted-but-in-use patterns) - * - Simultaneous limit: 100K ACTIVE patterns (NOT cumulative - patterns can be freed/recompiled) - * - Matchers per pattern: 10K (prevents per-pattern exhaustion) - * - Validate cached patterns: enabled (defensive check for native pointer validity) - * - Eviction protection: 1000ms (protects recently-used patterns from immediate eviction) - * - Metrics: disabled (NoOp - zero overhead) + * Enable or disable pattern caching. + * + *

If disabled, users must manually manage pattern lifecycle (call {@code close()}). + * Disabling cache eliminates all caching overhead but requires careful resource management. + * + * @param enabled true to enable caching (default), false to disable + * @return this builder */ - public static final RE2Config DEFAULT = new RE2Config( - true, // Cache enabled - 50000, // Max 50K cached patterns (~50-200MB) - 300, // 5 minute idle timeout - 60, // Scan every 60 seconds - 5, // Deferred cleanup every 5 seconds - 1000, // 1 second eviction protection - 100000, // Max 100K simultaneous active patterns - 10000, // Max 10K matchers per pattern - true, // Validate cached patterns (defensive check) - NoOpMetricsRegistry.INSTANCE // Metrics disabled (zero overhead) - ); + public Builder cacheEnabled(boolean enabled) { + this.cacheEnabled = enabled; + return this; + } /** - * Configuration with caching disabled. - * Users manage all pattern resources manually. + * Set maximum number of patterns in cache before LRU eviction. + * + *

Default: 50,000 (~50-200MB native memory) + * + *

Typical memory per pattern: 170-250 bytes, but complex regex can be 10KB+ + * + *

Monitor {@code cache.patterns.current.count} and cache hit rate to tune. + * + * @param size maximum cached patterns (must be > 0) + * @return this builder */ - public static final RE2Config NO_CACHE = new RE2Config( - false, // Cache disabled - 0, // Ignored when cache disabled - 0, // Ignored when cache disabled - 0, // Ignored when cache disabled - 0, // Ignored when cache disabled - 0, // Ignored when cache disabled - 100000, // Still enforce simultaneous limit - 10000, // Still enforce matcher limit - false, // No validation needed when no cache - NoOpMetricsRegistry.INSTANCE // Metrics disabled - ); + public Builder maxCacheSize(int size) { + this.maxCacheSize = size; + return this; + } + + /** + * Set idle timeout for pattern eviction. + * + *

Default: 300 seconds (5 minutes) + * + *

Patterns unused for this duration are evicted by background thread. + * + *

    + *
  • Shorter (60-120s): Aggressive cleanup, better for bursty workloads + *
  • Longer (600-1800s): Less cleanup overhead, better for stable workloads + *
+ * + * @param seconds idle timeout in seconds (must be > 0) + * @return this builder + */ + public Builder idleTimeoutSeconds(long seconds) { + this.idleTimeoutSeconds = seconds; + return this; + } /** - * Compact constructor with validation. + * Set how often idle eviction task runs. + * + *

Default: 60 seconds + * + *

Must be ≤ {@code idleTimeoutSeconds} for timely cleanup. + * + *

    + *
  • Faster (15-30s): More CPU overhead, faster cleanup + *
  • Slower (120-300s): Less overhead, slower cleanup + *
* - * CRITICAL: maxSimultaneousCompiledPatterns is SIMULTANEOUS/ACTIVE count, NOT cumulative. + * @param seconds scan interval in seconds (must be > 0 and ≤ idleTimeoutSeconds) + * @return this builder */ - public RE2Config { - // Always validate resource limits (even if cache disabled) - if (maxSimultaneousCompiledPatterns <= 0) { - throw new IllegalArgumentException("maxSimultaneousCompiledPatterns must be positive (this is SIMULTANEOUS active count, not cumulative)"); - } - if (maxMatchersPerPattern <= 0) { - throw new IllegalArgumentException("maxMatchersPerPattern must be positive"); - } + public Builder evictionScanIntervalSeconds(long seconds) { + this.evictionScanIntervalSeconds = seconds; + return this; + } - // Validate cache parameters only if cache enabled - if (cacheEnabled) { - if (maxCacheSize <= 0) { - throw new IllegalArgumentException("maxCacheSize must be positive when cache enabled"); - } - if (idleTimeoutSeconds <= 0) { - throw new IllegalArgumentException("idleTimeoutSeconds must be positive when cache enabled"); - } - if (evictionScanIntervalSeconds <= 0) { - throw new IllegalArgumentException("evictionScanIntervalSeconds must be positive when cache enabled"); - } - if (deferredCleanupIntervalSeconds <= 0) { - throw new IllegalArgumentException("deferredCleanupIntervalSeconds must be positive when cache enabled"); - } - if (evictionProtectionMs < 0) { - throw new IllegalArgumentException("evictionProtectionMs must be non-negative when cache enabled"); - } + /** + * Set how often deferred cleanup task runs. + * + *

Default: 5 seconds + * + *

Deferred cleanup reclaims memory from evicted patterns still in use by matchers. Should be + * frequent (2-10s) for quick memory reclamation. + * + *

Must be ≤ {@code evictionScanIntervalSeconds}. + * + * @param seconds cleanup interval in seconds (must be > 0 and ≤ evictionScanIntervalSeconds) + * @return this builder + */ + public Builder deferredCleanupIntervalSeconds(long seconds) { + this.deferredCleanupIntervalSeconds = seconds; + return this; + } - // Warn if scan interval exceeds idle timeout (still valid, just suboptimal) - if (evictionScanIntervalSeconds > idleTimeoutSeconds) { - System.err.println("WARNING: evictionScanIntervalSeconds (" + evictionScanIntervalSeconds + - "s) exceeds idleTimeoutSeconds (" + idleTimeoutSeconds + "s) - idle patterns may not be evicted promptly"); - } + /** + * Set eviction protection period for newly compiled patterns. + * + *

Default: 1000ms (1 second) + * + *

Prevents race condition where pattern is compiled then immediately evicted before caller + * can use it. Newly compiled patterns protected for this duration. + * + *

Set to 0 to disable (not recommended unless you understand the race condition). + * + * @param ms protection period in milliseconds (must be ≥ 0) + * @return this builder + */ + public Builder evictionProtectionMs(long ms) { + this.evictionProtectionMs = ms; + return this; + } - // Deferred cleanup should be frequent (warn if too slow) - if (deferredCleanupIntervalSeconds > 30) { - System.err.println("WARNING: deferredCleanupIntervalSeconds (" + deferredCleanupIntervalSeconds + - "s) is quite long - evicted patterns may retain memory for extended periods"); - } + /** + * Set maximum simultaneous ACTIVE compiled patterns. + * + *

Default: 100,000 + * + *

IMPORTANT: This is ACTIVE (simultaneous) count, NOT cumulative. Patterns can be + * freed and recompiled - this limit prevents unbounded memory growth. + * + *

Increase if hitting {@code errors.resource.exhausted} metric. + * + *

This is a safety limit, not a performance tuning parameter. + * + * @param max maximum active patterns (must be > 0, must be ≥ maxCacheSize) + * @return this builder + */ + public Builder maxSimultaneousCompiledPatterns(int max) { + this.maxSimultaneousCompiledPatterns = max; + return this; + } - // Deferred cleanup must be at least as frequent as idle eviction - if (deferredCleanupIntervalSeconds > evictionScanIntervalSeconds) { - throw new IllegalArgumentException("deferredCleanupIntervalSeconds (" + deferredCleanupIntervalSeconds + - "s) must be <= evictionScanIntervalSeconds (" + evictionScanIntervalSeconds + - "s) - deferred cleanup should run at least as often as idle eviction"); - } + /** + * Set maximum matchers per pattern. + * + *

Default: 10,000 + * + *

Prevents single pattern from exhausting matcher resources. Rarely needs tuning unless you + * have extremely high concurrent matching on one pattern. + * + * @param max maximum matchers per pattern (must be > 0) + * @return this builder + */ + public Builder maxMatchersPerPattern(int max) { + this.maxMatchersPerPattern = max; + return this; + } - // Cache size must not exceed simultaneous limit - if (maxCacheSize > maxSimultaneousCompiledPatterns) { - throw new IllegalArgumentException("maxCacheSize (" + maxCacheSize + - ") cannot exceed maxSimultaneousCompiledPatterns (" + maxSimultaneousCompiledPatterns + ")"); - } - } + /** + * Enable or disable validation of cached patterns. + * + *

Default: enabled (true) + * + *

Performs defensive check for native pointer validity on cache retrieval. Tiny overhead (~1 + * JNI call), provides crash safety against native memory corruption. + * + *

Only disable if absolute maximum performance required. + * + * @param validate true to validate (default), false to skip validation + * @return this builder + */ + public Builder validateCachedPatterns(boolean validate) { + this.validateCachedPatterns = validate; + return this; } /** - * Creates a builder for custom configuration. + * Set metrics registry for instrumentation. + * + *

Default: {@link com.axonops.libre2.metrics.NoOpMetricsRegistry} (zero overhead) + * + *

Use {@link com.axonops.libre2.metrics.DropwizardMetricsAdapter} for Dropwizard Metrics + * integration. See {@link com.axonops.libre2.metrics.MetricNames} for available metrics. + * + *

Example: * - *

Builder starts with defaults and allows selective overrides: *

{@code
+     * MetricRegistry registry = new MetricRegistry();
      * RE2Config config = RE2Config.builder()
-     *     .maxCacheSize(100_000)
-     *     .idleTimeoutSeconds(600)
+     *     .metricsRegistry(new DropwizardMetricsAdapter(registry, "myapp.re2"))
      *     .build();
      * }
* - * @return new builder with default values + * @param metricsRegistry metrics implementation (must not be null) + * @return this builder + * @throws NullPointerException if metricsRegistry is null */ - public static Builder builder() { - return new Builder(); + public Builder metricsRegistry(RE2MetricsRegistry metricsRegistry) { + this.metricsRegistry = + Objects.requireNonNull(metricsRegistry, "metricsRegistry cannot be null"); + return this; } /** - * Builder for custom RE2 configuration. + * Build immutable configuration. * - *

All fields start with production defaults from {@link #DEFAULT}. - * Use builder to selectively override only the parameters you need to change. + *

Validates all parameters and their relationships. Throws {@link IllegalArgumentException} + * if configuration is invalid. + * + * @return validated immutable configuration + * @throws IllegalArgumentException if configuration is invalid */ - public static class Builder { - private boolean cacheEnabled = true; - private int maxCacheSize = 50000; - private long idleTimeoutSeconds = 300; - private long evictionScanIntervalSeconds = 60; - private long deferredCleanupIntervalSeconds = 5; - private long evictionProtectionMs = 1000; - private int maxSimultaneousCompiledPatterns = 100000; - private int maxMatchersPerPattern = 10000; - private boolean validateCachedPatterns = true; - private RE2MetricsRegistry metricsRegistry = NoOpMetricsRegistry.INSTANCE; - - /** - * Enable or disable pattern caching. - * - *

If disabled, users must manually manage pattern lifecycle (call {@code close()}). - * Disabling cache eliminates all caching overhead but requires careful resource management. - * - * @param enabled true to enable caching (default), false to disable - * @return this builder - */ - public Builder cacheEnabled(boolean enabled) { - this.cacheEnabled = enabled; - return this; - } - - /** - * Set maximum number of patterns in cache before LRU eviction. - * - *

Default: 50,000 (~50-200MB native memory) - *

Typical memory per pattern: 170-250 bytes, but complex regex can be 10KB+ - *

Monitor {@code cache.patterns.current.count} and cache hit rate to tune. - * - * @param size maximum cached patterns (must be > 0) - * @return this builder - */ - public Builder maxCacheSize(int size) { - this.maxCacheSize = size; - return this; - } - - /** - * Set idle timeout for pattern eviction. - * - *

Default: 300 seconds (5 minutes) - *

Patterns unused for this duration are evicted by background thread. - *

    - *
  • Shorter (60-120s): Aggressive cleanup, better for bursty workloads
  • - *
  • Longer (600-1800s): Less cleanup overhead, better for stable workloads
  • - *
- * - * @param seconds idle timeout in seconds (must be > 0) - * @return this builder - */ - public Builder idleTimeoutSeconds(long seconds) { - this.idleTimeoutSeconds = seconds; - return this; - } - - /** - * Set how often idle eviction task runs. - * - *

Default: 60 seconds - *

Must be ≤ {@code idleTimeoutSeconds} for timely cleanup. - *

    - *
  • Faster (15-30s): More CPU overhead, faster cleanup
  • - *
  • Slower (120-300s): Less overhead, slower cleanup
  • - *
- * - * @param seconds scan interval in seconds (must be > 0 and ≤ idleTimeoutSeconds) - * @return this builder - */ - public Builder evictionScanIntervalSeconds(long seconds) { - this.evictionScanIntervalSeconds = seconds; - return this; - } - - /** - * Set how often deferred cleanup task runs. - * - *

Default: 5 seconds - *

Deferred cleanup reclaims memory from evicted patterns still in use by matchers. - * Should be frequent (2-10s) for quick memory reclamation. - *

Must be ≤ {@code evictionScanIntervalSeconds}. - * - * @param seconds cleanup interval in seconds (must be > 0 and ≤ evictionScanIntervalSeconds) - * @return this builder - */ - public Builder deferredCleanupIntervalSeconds(long seconds) { - this.deferredCleanupIntervalSeconds = seconds; - return this; - } - - /** - * Set eviction protection period for newly compiled patterns. - * - *

Default: 1000ms (1 second) - *

Prevents race condition where pattern is compiled then immediately evicted - * before caller can use it. Newly compiled patterns protected for this duration. - *

Set to 0 to disable (not recommended unless you understand the race condition). - * - * @param ms protection period in milliseconds (must be ≥ 0) - * @return this builder - */ - public Builder evictionProtectionMs(long ms) { - this.evictionProtectionMs = ms; - return this; - } - - /** - * Set maximum simultaneous ACTIVE compiled patterns. - * - *

Default: 100,000 - *

IMPORTANT: This is ACTIVE (simultaneous) count, NOT cumulative. - * Patterns can be freed and recompiled - this limit prevents unbounded memory growth. - *

Increase if hitting {@code errors.resource.exhausted} metric. - *

This is a safety limit, not a performance tuning parameter. - * - * @param max maximum active patterns (must be > 0, must be ≥ maxCacheSize) - * @return this builder - */ - public Builder maxSimultaneousCompiledPatterns(int max) { - this.maxSimultaneousCompiledPatterns = max; - return this; - } - - /** - * Set maximum matchers per pattern. - * - *

Default: 10,000 - *

Prevents single pattern from exhausting matcher resources. - * Rarely needs tuning unless you have extremely high concurrent matching on one pattern. - * - * @param max maximum matchers per pattern (must be > 0) - * @return this builder - */ - public Builder maxMatchersPerPattern(int max) { - this.maxMatchersPerPattern = max; - return this; - } - - /** - * Enable or disable validation of cached patterns. - * - *

Default: enabled (true) - *

Performs defensive check for native pointer validity on cache retrieval. - * Tiny overhead (~1 JNI call), provides crash safety against native memory corruption. - *

Only disable if absolute maximum performance required. - * - * @param validate true to validate (default), false to skip validation - * @return this builder - */ - public Builder validateCachedPatterns(boolean validate) { - this.validateCachedPatterns = validate; - return this; - } - - /** - * Set metrics registry for instrumentation. - * - *

Default: {@link com.axonops.libre2.metrics.NoOpMetricsRegistry} (zero overhead) - *

Use {@link com.axonops.libre2.metrics.DropwizardMetricsAdapter} for Dropwizard Metrics integration. - * See {@link com.axonops.libre2.metrics.MetricNames} for available metrics. - * - *

Example: - *

{@code
-         * MetricRegistry registry = new MetricRegistry();
-         * RE2Config config = RE2Config.builder()
-         *     .metricsRegistry(new DropwizardMetricsAdapter(registry, "myapp.re2"))
-         *     .build();
-         * }
- * - * @param metricsRegistry metrics implementation (must not be null) - * @return this builder - * @throws NullPointerException if metricsRegistry is null - */ - public Builder metricsRegistry(RE2MetricsRegistry metricsRegistry) { - this.metricsRegistry = Objects.requireNonNull(metricsRegistry, "metricsRegistry cannot be null"); - return this; - } - - /** - * Build immutable configuration. - * - *

Validates all parameters and their relationships. - * Throws {@link IllegalArgumentException} if configuration is invalid. - * - * @return validated immutable configuration - * @throws IllegalArgumentException if configuration is invalid - */ - public RE2Config build() { - return new RE2Config( - cacheEnabled, - maxCacheSize, - idleTimeoutSeconds, - evictionScanIntervalSeconds, - deferredCleanupIntervalSeconds, - evictionProtectionMs, - maxSimultaneousCompiledPatterns, - maxMatchersPerPattern, - validateCachedPatterns, - metricsRegistry - ); - } + public RE2Config build() { + return new RE2Config( + cacheEnabled, + maxCacheSize, + idleTimeoutSeconds, + evictionScanIntervalSeconds, + deferredCleanupIntervalSeconds, + evictionProtectionMs, + maxSimultaneousCompiledPatterns, + maxMatchersPerPattern, + validateCachedPatterns, + metricsRegistry); } + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/dropwizard/RE2MetricsConfig.java b/libre2-core/src/main/java/com/axonops/libre2/dropwizard/RE2MetricsConfig.java index 8b7e738..1fa67e1 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/dropwizard/RE2MetricsConfig.java +++ b/libre2-core/src/main/java/com/axonops/libre2/dropwizard/RE2MetricsConfig.java @@ -20,19 +20,19 @@ import com.axonops.libre2.metrics.DropwizardMetricsAdapter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.jmx.JmxReporter; +import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Objects; - /** * Convenience factory for RE2Config with Dropwizard Metrics integration. * - *

This class provides easy setup for applications using Dropwizard Metrics, - * including automatic JMX exposure. Works with any framework that uses Dropwizard - * (Cassandra, Spring Boot, Dropwizard apps, etc.). + *

This class provides easy setup for applications using Dropwizard Metrics, including automatic + * JMX exposure. Works with any framework that uses Dropwizard (Cassandra, Spring Boot, Dropwizard + * apps, etc.). * *

Usage Examples: + * *

{@code
  * // Cassandra integration:
  * MetricRegistry cassandraRegistry = getCassandraMetricRegistry();
@@ -47,130 +47,131 @@
  * RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.mycompany.myapp.re2");
  * }
* - *

JMX Exposure: This class automatically sets up JmxReporter - * for the provided registry (if not already configured), ensuring all RE2 metrics - * are visible via JMX. + *

JMX Exposure: This class automatically sets up JmxReporter for the provided + * registry (if not already configured), ensuring all RE2 metrics are visible via JMX. * * @since 0.9.1 */ public final class RE2MetricsConfig { - private static final Logger logger = LoggerFactory.getLogger(RE2MetricsConfig.class); - private static volatile JmxReporter jmxReporter; + private static final Logger logger = LoggerFactory.getLogger(RE2MetricsConfig.class); + private static volatile JmxReporter jmxReporter; - private RE2MetricsConfig() { - // Utility class - } + private RE2MetricsConfig() { + // Utility class + } - /** - * Creates RE2Config with Dropwizard Metrics integration and automatic JMX. - * - *

This is the recommended method for integrating with frameworks like Cassandra - * or Spring Boot that already have a MetricRegistry. - * - *

Metric Prefix Examples: - *

    - *
  • Cassandra: {@code "org.apache.cassandra.metrics.RE2"}
  • - *
  • Spring Boot: {@code "com.myapp.regex"}
  • - *
  • Generic: {@code "com.axonops.libre2"}
  • - *
- * - * @param registry the Dropwizard MetricRegistry to use - * @param metricPrefix the metric namespace prefix - * @return configured RE2Config with metrics enabled - */ - public static RE2Config withMetrics(MetricRegistry registry, String metricPrefix) { - return withMetrics(registry, metricPrefix, true); - } + /** + * Creates RE2Config with Dropwizard Metrics integration and automatic JMX. + * + *

This is the recommended method for integrating with frameworks like Cassandra or Spring Boot + * that already have a MetricRegistry. + * + *

Metric Prefix Examples: + * + *

    + *
  • Cassandra: {@code "org.apache.cassandra.metrics.RE2"} + *
  • Spring Boot: {@code "com.myapp.regex"} + *
  • Generic: {@code "com.axonops.libre2"} + *
+ * + * @param registry the Dropwizard MetricRegistry to use + * @param metricPrefix the metric namespace prefix + * @return configured RE2Config with metrics enabled + */ + public static RE2Config withMetrics(MetricRegistry registry, String metricPrefix) { + return withMetrics(registry, metricPrefix, true); + } - /** - * Creates RE2Config with Dropwizard Metrics integration. - * - * @param registry the Dropwizard MetricRegistry to use - * @param metricPrefix the metric namespace prefix - * @param enableJmx whether to automatically set up JMX exposure - * @return configured RE2Config with metrics enabled - */ - public static RE2Config withMetrics(MetricRegistry registry, String metricPrefix, boolean enableJmx) { - Objects.requireNonNull(registry, "registry cannot be null"); - Objects.requireNonNull(metricPrefix, "metricPrefix cannot be null"); + /** + * Creates RE2Config with Dropwizard Metrics integration. + * + * @param registry the Dropwizard MetricRegistry to use + * @param metricPrefix the metric namespace prefix + * @param enableJmx whether to automatically set up JMX exposure + * @return configured RE2Config with metrics enabled + */ + public static RE2Config withMetrics( + MetricRegistry registry, String metricPrefix, boolean enableJmx) { + Objects.requireNonNull(registry, "registry cannot be null"); + Objects.requireNonNull(metricPrefix, "metricPrefix cannot be null"); - if (enableJmx) { - ensureJmxReporter(registry); - } - - return RE2Config.builder() - .metricsRegistry(new DropwizardMetricsAdapter(registry, metricPrefix)) - .build(); + if (enableJmx) { + ensureJmxReporter(registry); } - /** - * Creates RE2Config with Dropwizard Metrics using default prefix. - * - *

Uses default metric prefix: {@code "com.axonops.libre2"} - * - * @param registry the Dropwizard MetricRegistry to use - * @return configured RE2Config with metrics enabled - */ - public static RE2Config withMetrics(MetricRegistry registry) { - return withMetrics(registry, "com.axonops.libre2", true); - } + return RE2Config.builder() + .metricsRegistry(new DropwizardMetricsAdapter(registry, metricPrefix)) + .build(); + } - /** - * Creates RE2Config optimized for Cassandra with standard metric prefix. - * - *

Convenience method for Cassandra integration. Uses Cassandra's standard - * metric namespace: {@code "org.apache.cassandra.metrics.RE2"} - * - * @param cassandraRegistry Cassandra's singleton MetricRegistry - * @return configured RE2Config for Cassandra with JMX enabled - */ - public static RE2Config forCassandra(MetricRegistry cassandraRegistry) { - return withMetrics(cassandraRegistry, "org.apache.cassandra.metrics.RE2", true); - } + /** + * Creates RE2Config with Dropwizard Metrics using default prefix. + * + *

Uses default metric prefix: {@code "com.axonops.libre2"} + * + * @param registry the Dropwizard MetricRegistry to use + * @return configured RE2Config with metrics enabled + */ + public static RE2Config withMetrics(MetricRegistry registry) { + return withMetrics(registry, "com.axonops.libre2", true); + } - /** - * Creates RE2Config optimized for Cassandra with standard metric prefix. - * - * @param cassandraRegistry Cassandra's singleton MetricRegistry - * @param enableJmx whether to automatically set up JMX exposure - * @return configured RE2Config for Cassandra - */ - public static RE2Config forCassandra(MetricRegistry cassandraRegistry, boolean enableJmx) { - return withMetrics(cassandraRegistry, "org.apache.cassandra.metrics.RE2", enableJmx); - } + /** + * Creates RE2Config optimized for Cassandra with standard metric prefix. + * + *

Convenience method for Cassandra integration. Uses Cassandra's standard metric namespace: + * {@code "org.apache.cassandra.metrics.RE2"} + * + * @param cassandraRegistry Cassandra's singleton MetricRegistry + * @return configured RE2Config for Cassandra with JMX enabled + */ + public static RE2Config forCassandra(MetricRegistry cassandraRegistry) { + return withMetrics(cassandraRegistry, "org.apache.cassandra.metrics.RE2", true); + } + + /** + * Creates RE2Config optimized for Cassandra with standard metric prefix. + * + * @param cassandraRegistry Cassandra's singleton MetricRegistry + * @param enableJmx whether to automatically set up JMX exposure + * @return configured RE2Config for Cassandra + */ + public static RE2Config forCassandra(MetricRegistry cassandraRegistry, boolean enableJmx) { + return withMetrics(cassandraRegistry, "org.apache.cassandra.metrics.RE2", enableJmx); + } - /** - * Ensures JmxReporter is registered for the given MetricRegistry. - * - *

Idempotent: safe to call multiple times, only creates one reporter. - * If the registry already has JMX exposure configured, this is harmless. - * - * @param registry the MetricRegistry to expose via JMX - */ - private static synchronized void ensureJmxReporter(MetricRegistry registry) { - if (jmxReporter == null) { - try { - logger.info("RE2: Registering JmxReporter for metrics"); - jmxReporter = JmxReporter.forRegistry(registry).build(); - jmxReporter.start(); - logger.info("RE2: JmxReporter started - metrics available via JMX"); - } catch (Exception e) { - logger.warn("RE2: Failed to start JmxReporter (may already be configured)", e); - // Not fatal - registry may already have JMX exposure - } - } + /** + * Ensures JmxReporter is registered for the given MetricRegistry. + * + *

Idempotent: safe to call multiple times, only creates one reporter. If the registry already + * has JMX exposure configured, this is harmless. + * + * @param registry the MetricRegistry to expose via JMX + */ + private static synchronized void ensureJmxReporter(MetricRegistry registry) { + if (jmxReporter == null) { + try { + logger.info("RE2: Registering JmxReporter for metrics"); + jmxReporter = JmxReporter.forRegistry(registry).build(); + jmxReporter.start(); + logger.info("RE2: JmxReporter started - metrics available via JMX"); + } catch (Exception e) { + logger.warn("RE2: Failed to start JmxReporter (may already be configured)", e); + // Not fatal - registry may already have JMX exposure + } } + } - /** - * Shutdown hook for clean JMX reporter cleanup. - * - *

Called automatically by JVM shutdown, or manually if needed. - */ - public static synchronized void shutdown() { - if (jmxReporter != null) { - logger.info("RE2: Stopping JmxReporter"); - jmxReporter.stop(); - jmxReporter = null; - } + /** + * Shutdown hook for clean JMX reporter cleanup. + * + *

Called automatically by JVM shutdown, or manually if needed. + */ + public static synchronized void shutdown() { + if (jmxReporter != null) { + logger.info("RE2: Stopping JmxReporter"); + jmxReporter.stop(); + jmxReporter = null; } + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/jni/IRE2Native.java b/libre2-core/src/main/java/com/axonops/libre2/jni/IRE2Native.java index d70831a..14d275a 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/jni/IRE2Native.java +++ b/libre2-core/src/main/java/com/axonops/libre2/jni/IRE2Native.java @@ -17,55 +17,78 @@ package com.axonops.libre2.jni; /** - * Adapter interface for RE2 JNI operations. - * Enables mocking for unit tests while maintaining production performance. + * Adapter interface for RE2 JNI operations. Enables mocking for unit tests while maintaining + * production performance. * - *

Production implementation (DirectJniAdapter) delegates directly to RE2NativeJNI. - * Test implementations can mock native calls to verify correct parameters and behavior.

+ *

Production implementation (DirectJniAdapter) delegates directly to RE2NativeJNI. Test + * implementations can mock native calls to verify correct parameters and behavior. * *

Internal API: Not part of public API contract. Used internally by Pattern/Matcher/RE2. - * Public visibility required for cross-package access from api package.

+ * Public visibility required for cross-package access from api package. */ public interface IRE2Native { - // Pattern lifecycle - long compile(String pattern, boolean caseSensitive); - void freePattern(long handle); - boolean patternOk(long handle); - String getError(); - String getPattern(long handle); - int numCapturingGroups(long handle); - long patternMemory(long handle); - - // Matching operations - boolean fullMatch(long handle, String text); - boolean partialMatch(long handle, String text); - boolean fullMatchDirect(long handle, long address, int length); - boolean partialMatchDirect(long handle, long address, int length); - - // Bulk operations - boolean[] fullMatchBulk(long handle, String[] texts); - boolean[] partialMatchBulk(long handle, String[] texts); - boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths); - boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths); - - // Capture groups - String[] extractGroups(long handle, String text); - String[][] extractGroupsBulk(long handle, String[] texts); - String[] extractGroupsDirect(long handle, long address, int length); - String[][] findAllMatches(long handle, String text); - String[][] findAllMatchesDirect(long handle, long address, int length); - String[] getNamedGroups(long handle); - - // Replace operations - String replaceFirst(long handle, String text, String replacement); - String replaceAll(long handle, String text, String replacement); - String[] replaceAllBulk(long handle, String[] texts, String replacement); - String replaceFirstDirect(long handle, long address, int length, String replacement); - String replaceAllDirect(long handle, long address, int length, String replacement); - String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement); - - // Utility methods - String quoteMeta(String text); - int[] programFanout(long handle); + // Pattern lifecycle + long compile(String pattern, boolean caseSensitive); + + void freePattern(long handle); + + boolean patternOk(long handle); + + String getError(); + + String getPattern(long handle); + + int numCapturingGroups(long handle); + + long patternMemory(long handle); + + // Matching operations + boolean fullMatch(long handle, String text); + + boolean partialMatch(long handle, String text); + + boolean fullMatchDirect(long handle, long address, int length); + + boolean partialMatchDirect(long handle, long address, int length); + + // Bulk operations + boolean[] fullMatchBulk(long handle, String[] texts); + + boolean[] partialMatchBulk(long handle, String[] texts); + + boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths); + + boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths); + + // Capture groups + String[] extractGroups(long handle, String text); + + String[][] extractGroupsBulk(long handle, String[] texts); + + String[] extractGroupsDirect(long handle, long address, int length); + + String[][] findAllMatches(long handle, String text); + + String[][] findAllMatchesDirect(long handle, long address, int length); + + String[] getNamedGroups(long handle); + + // Replace operations + String replaceFirst(long handle, String text, String replacement); + + String replaceAll(long handle, String text, String replacement); + + String[] replaceAllBulk(long handle, String[] texts, String replacement); + + String replaceFirstDirect(long handle, long address, int length, String replacement); + + String replaceAllDirect(long handle, long address, int length, String replacement); + + String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement); + + // Utility methods + String quoteMeta(String text); + + int[] programFanout(long handle); } diff --git a/libre2-core/src/main/java/com/axonops/libre2/jni/RE2LibraryLoader.java b/libre2-core/src/main/java/com/axonops/libre2/jni/RE2LibraryLoader.java index 4a0ed64..b1c1996 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/jni/RE2LibraryLoader.java +++ b/libre2-core/src/main/java/com/axonops/libre2/jni/RE2LibraryLoader.java @@ -17,231 +17,239 @@ package com.axonops.libre2.jni; import com.axonops.libre2.metrics.MetricNames; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.util.concurrent.atomic.AtomicBoolean; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Loads the native RE2 library for the current platform. * - * Automatically detects platform and extracts the correct - * library from JAR resources. + *

Automatically detects platform and extracts the correct library from JAR resources. * - * Supported platforms: - * - macOS x86_64 (Intel) - * - macOS aarch64 (Apple Silicon) - * - Linux x86_64 - * - Linux aarch64 (ARM64) + *

Supported platforms: - macOS x86_64 (Intel) - macOS aarch64 (Apple Silicon) - Linux x86_64 - + * Linux aarch64 (ARM64) * - * Thread-safe and idempotent. + *

Thread-safe and idempotent. * * @since 1.0.0 */ public final class RE2LibraryLoader { - private static final Logger logger = LoggerFactory.getLogger(RE2LibraryLoader.class); - private static final AtomicBoolean loaded = new AtomicBoolean(false); - private static volatile Exception loadError = null; - - private RE2LibraryLoader() { - // Utility class + private static final Logger logger = LoggerFactory.getLogger(RE2LibraryLoader.class); + private static final AtomicBoolean loaded = new AtomicBoolean(false); + private static volatile Exception loadError = null; + + private RE2LibraryLoader() { + // Utility class + } + + /** + * Loads the native library (idempotent). + * + * @throws IllegalStateException if library cannot be loaded + */ + public static void loadLibrary() { + if (loaded.get()) { + if (loadError != null) { + throw new IllegalStateException("RE2: Previous library load failed", loadError); + } + return; } - /** - * Loads the native library (idempotent). - * - * @throws IllegalStateException if library cannot be loaded - */ - public static void loadLibrary() { - if (loaded.get()) { - if (loadError != null) { - throw new IllegalStateException("RE2: Previous library load failed", loadError); - } - return; + synchronized (RE2LibraryLoader.class) { + if (loaded.get()) { + if (loadError != null) { + throw new IllegalStateException("RE2: Previous library load failed", loadError); } + return; + } - synchronized (RE2LibraryLoader.class) { - if (loaded.get()) { - if (loadError != null) { - throw new IllegalStateException("RE2: Previous library load failed", loadError); - } - return; - } - - try { - logger.info("RE2: Loading native library for platform: {}", getPlatformName()); - - Platform platform = detectPlatform(); - String resourcePath = getResourcePath(platform); - String libraryName = getLibraryFileName(platform); + try { + logger.info("RE2: Loading native library for platform: {}", getPlatformName()); - logger.debug("RE2: Resource path: {}", resourcePath); + Platform platform = detectPlatform(); + String resourcePath = getResourcePath(platform); + String libraryName = getLibraryFileName(platform); - // Extract from JAR to temp directory - Path tempLib = extractLibrary(resourcePath, libraryName); + logger.debug("RE2: Resource path: {}", resourcePath); - // Load via System.load (JNI) - System.load(tempLib.toString()); - loaded.set(true); + // Extract from JAR to temp directory + Path tempLib = extractLibrary(resourcePath, libraryName); - logger.info("RE2: Native library loaded successfully - platform: {}", platform); + // Load via System.load (JNI) + System.load(tempLib.toString()); + loaded.set(true); - // Perform initialization warmup test - performInitializationTest(); + logger.info("RE2: Native library loaded successfully - platform: {}", platform); - } catch (Exception e) { - loadError = e; - loaded.set(true); - logger.error("RE2: Failed to load native library", e); + // Perform initialization warmup test + performInitializationTest(); - // Record native library error (best effort - cache may not be initialized yet) - try { - com.axonops.libre2.api.Pattern.getGlobalCache().getConfig().metricsRegistry() - .incrementCounter(MetricNames.ERRORS_NATIVE_LIBRARY); - } catch (Exception metricsError) { - // Ignore - metrics are optional and cache may not be initialized - } + } catch (Exception e) { + loadError = e; + loaded.set(true); + logger.error("RE2: Failed to load native library", e); - throw new IllegalStateException("RE2: Failed to load native library: " + e.getMessage(), e); - } - } - } - - /** - * Detects the current platform. - */ - private static Platform detectPlatform() { - String os = System.getProperty("os.name").toLowerCase(); - String arch = System.getProperty("os.arch").toLowerCase(); - - OS detectedOS; - if (os.contains("mac") || os.contains("darwin")) { - detectedOS = OS.MACOS; - } else if (os.contains("linux")) { - detectedOS = OS.LINUX; - } else { - throw new IllegalStateException("RE2: Unsupported OS: " + os + " (only macOS and Linux supported)"); - } - - Arch detectedArch; - if (arch.equals("amd64") || arch.equals("x86_64") || arch.equals("x64")) { - detectedArch = Arch.X86_64; - } else if (arch.equals("aarch64") || arch.equals("arm64")) { - detectedArch = Arch.AARCH64; - } else { - throw new IllegalStateException("RE2: Unsupported architecture: " + arch + " (only x86_64 and aarch64 supported)"); + // Record native library error (best effort - cache may not be initialized yet) + try { + com.axonops.libre2.api.Pattern.getGlobalCache() + .getConfig() + .metricsRegistry() + .incrementCounter(MetricNames.ERRORS_NATIVE_LIBRARY); + } catch (Exception metricsError) { + // Ignore - metrics are optional and cache may not be initialized } - return new Platform(detectedOS, detectedArch); + throw new IllegalStateException("RE2: Failed to load native library: " + e.getMessage(), e); + } } - - private static String getPlatformName() { - return System.getProperty("os.name") + " " + System.getProperty("os.arch"); + } + + /** Detects the current platform. */ + private static Platform detectPlatform() { + String os = System.getProperty("os.name").toLowerCase(); + String arch = System.getProperty("os.arch").toLowerCase(); + + OS detectedOS; + if (os.contains("mac") || os.contains("darwin")) { + detectedOS = OS.MACOS; + } else if (os.contains("linux")) { + detectedOS = OS.LINUX; + } else { + throw new IllegalStateException( + "RE2: Unsupported OS: " + os + " (only macOS and Linux supported)"); } - private static String getLibraryFileName(Platform platform) { - return platform.os == OS.MACOS ? "libre2.dylib" : "libre2.so"; + Arch detectedArch; + if (arch.equals("amd64") || arch.equals("x86_64") || arch.equals("x64")) { + detectedArch = Arch.X86_64; + } else if (arch.equals("aarch64") || arch.equals("arm64")) { + detectedArch = Arch.AARCH64; + } else { + throw new IllegalStateException( + "RE2: Unsupported architecture: " + arch + " (only x86_64 and aarch64 supported)"); } - private static String getResourcePath(Platform platform) { - String platformDir = switch (platform.os) { - case MACOS -> "darwin"; - case LINUX -> "linux"; - } + "-" + switch (platform.arch) { - case X86_64 -> "x86_64"; - case AARCH64 -> "aarch64"; - }; + return new Platform(detectedOS, detectedArch); + } - return "/native/" + platformDir + "/" + getLibraryFileName(platform); - } + private static String getPlatformName() { + return System.getProperty("os.name") + " " + System.getProperty("os.arch"); + } - private static Path extractLibrary(String resourcePath, String fileName) throws IOException { - try (InputStream in = RE2LibraryLoader.class.getResourceAsStream(resourcePath)) { - if (in == null) { - throw new IOException("RE2: Native library not found in JAR: " + resourcePath); - } + private static String getLibraryFileName(Platform platform) { + return platform.os == OS.MACOS ? "libre2.dylib" : "libre2.so"; + } - Path tempDir = Files.createTempDirectory("libre2-"); - Path libFile = tempDir.resolve(fileName); - - Files.copy(in, libFile, StandardCopyOption.REPLACE_EXISTING); - libFile.toFile().setExecutable(true); - libFile.toFile().deleteOnExit(); - tempDir.toFile().deleteOnExit(); - - logger.debug("RE2: Library extracted to: {}", libFile); - return libFile; - } - } - - public static boolean isLoaded() { - return loaded.get() && loadError == null; + private static String getResourcePath(Platform platform) { + String platformDir = + switch (platform.os) { + case MACOS -> "darwin"; + case LINUX -> "linux"; + } + + "-" + + switch (platform.arch) { + case X86_64 -> "x86_64"; + case AARCH64 -> "aarch64"; + }; + + return "/native/" + platformDir + "/" + getLibraryFileName(platform); + } + + private static Path extractLibrary(String resourcePath, String fileName) throws IOException { + try (InputStream in = RE2LibraryLoader.class.getResourceAsStream(resourcePath)) { + if (in == null) { + throw new IOException("RE2: Native library not found in JAR: " + resourcePath); + } + + Path tempDir = Files.createTempDirectory("libre2-"); + Path libFile = tempDir.resolve(fileName); + + Files.copy(in, libFile, StandardCopyOption.REPLACE_EXISTING); + libFile.toFile().setExecutable(true); + libFile.toFile().deleteOnExit(); + tempDir.toFile().deleteOnExit(); + + logger.debug("RE2: Library extracted to: {}", libFile); + return libFile; } - - /** - * Performs initialization warmup test using direct JNI calls. - * - * Tests the native library directly (not through Pattern cache) to verify: - * - Pattern compilation works - * - Full and partial matching work - * - Native library is functioning correctly - * - * Note: This bypasses the cache to avoid circular dependencies and test pollution. - */ - private static void performInitializationTest() { - long testPatternHandle = 0; + } + + public static boolean isLoaded() { + return loaded.get() && loadError == null; + } + + /** + * Performs initialization warmup test using direct JNI calls. + * + *

Tests the native library directly (not through Pattern cache) to verify: - Pattern + * compilation works - Full and partial matching work - Native library is functioning correctly + * + *

Note: This bypasses the cache to avoid circular dependencies and test pollution. + */ + private static void performInitializationTest() { + long testPatternHandle = 0; + try { + long testStart = System.nanoTime(); + + // Test pattern compilation (direct JNI call, bypasses cache) + testPatternHandle = RE2NativeJNI.compile("test_warmup_.*", true); + if (testPatternHandle == 0 || !RE2NativeJNI.patternOk(testPatternHandle)) { + logger.error("RE2: Initialization test - pattern compilation failed"); + return; + } + + // Test full match + boolean fullMatchResult = RE2NativeJNI.fullMatch(testPatternHandle, "test_warmup_123"); + if (!fullMatchResult) { + logger.error("RE2: Initialization test - full match failed (expected true, got false)"); + } + + // Test partial match + boolean partialMatchResult = + RE2NativeJNI.partialMatch(testPatternHandle, "xxx test_warmup_yyy"); + if (!partialMatchResult) { + logger.error("RE2: Initialization test - partial match failed (expected true, got false)"); + } + + long testDuration = System.nanoTime() - testStart; + + // Log success/failure + if (fullMatchResult && partialMatchResult) { + logger.info( + "RE2: Initialization test passed - native library functional, duration: {}ns", + testDuration); + } else { + logger.error("RE2: Initialization test FAILED - library may not work correctly"); + } + + } catch (Exception e) { + logger.error("RE2: Initialization test failed - library may not work correctly", e); + } finally { + // Always free the test pattern (silently consume exceptions) + if (testPatternHandle != 0) { try { - long testStart = System.nanoTime(); - - // Test pattern compilation (direct JNI call, bypasses cache) - testPatternHandle = RE2NativeJNI.compile("test_warmup_.*", true); - if (testPatternHandle == 0 || !RE2NativeJNI.patternOk(testPatternHandle)) { - logger.error("RE2: Initialization test - pattern compilation failed"); - return; - } - - // Test full match - boolean fullMatchResult = RE2NativeJNI.fullMatch(testPatternHandle, "test_warmup_123"); - if (!fullMatchResult) { - logger.error("RE2: Initialization test - full match failed (expected true, got false)"); - } - - // Test partial match - boolean partialMatchResult = RE2NativeJNI.partialMatch(testPatternHandle, "xxx test_warmup_yyy"); - if (!partialMatchResult) { - logger.error("RE2: Initialization test - partial match failed (expected true, got false)"); - } - - long testDuration = System.nanoTime() - testStart; - - // Log success/failure - if (fullMatchResult && partialMatchResult) { - logger.info("RE2: Initialization test passed - native library functional, duration: {}ns", testDuration); - } else { - logger.error("RE2: Initialization test FAILED - library may not work correctly"); - } - + RE2NativeJNI.freePattern(testPatternHandle); } catch (Exception e) { - logger.error("RE2: Initialization test failed - library may not work correctly", e); - } finally { - // Always free the test pattern (silently consume exceptions) - if (testPatternHandle != 0) { - try { - RE2NativeJNI.freePattern(testPatternHandle); - } catch (Exception e) { - // Silently ignore - best effort cleanup - } - } + // Silently ignore - best effort cleanup } + } } + } + + private record Platform(OS os, Arch arch) {} - private record Platform(OS os, Arch arch) {} + private enum OS { + MACOS, + LINUX + } - private enum OS { MACOS, LINUX } - private enum Arch { X86_64, AARCH64 } + private enum Arch { + X86_64, + AARCH64 + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/jni/RE2Native.java b/libre2-core/src/main/java/com/axonops/libre2/jni/RE2Native.java index eb8de75..2737228 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/jni/RE2Native.java +++ b/libre2-core/src/main/java/com/axonops/libre2/jni/RE2Native.java @@ -19,165 +19,162 @@ /** * Production JNI adapter - delegates to package-private RE2NativeJNI. * - *

Singleton instance used by all Pattern/Matcher/RE2 instances in production. - * Tests can inject mock JniAdapter instead.

+ *

Singleton instance used by all Pattern/Matcher/RE2 instances in production. Tests can inject + * mock JniAdapter instead. * - *

Internal API: Not part of public API contract. Accessed via Pattern injection. - * Public visibility required for cross-package access from api package.

+ *

Internal API: Not part of public API contract. Accessed via Pattern injection. Public + * visibility required for cross-package access from api package. */ public final class RE2Native implements IRE2Native { - /** - * Singleton instance - used in production. - * Public so Pattern can access it from api package. - */ - public static final RE2Native INSTANCE = new RE2Native(); - - private RE2Native() { - } - - @Override - public long compile(String pattern, boolean caseSensitive) { - return RE2NativeJNI.compile(pattern, caseSensitive); - } - - @Override - public void freePattern(long handle) { - RE2NativeJNI.freePattern(handle); - } - - @Override - public boolean patternOk(long handle) { - return RE2NativeJNI.patternOk(handle); - } - - @Override - public String getError() { - return RE2NativeJNI.getError(); - } - - @Override - public String getPattern(long handle) { - return RE2NativeJNI.getPattern(handle); - } - - @Override - public int numCapturingGroups(long handle) { - return RE2NativeJNI.numCapturingGroups(handle); - } - - @Override - public long patternMemory(long handle) { - return RE2NativeJNI.patternMemory(handle); - } - - @Override - public boolean fullMatch(long handle, String text) { - return RE2NativeJNI.fullMatch(handle, text); - } - - @Override - public boolean partialMatch(long handle, String text) { - return RE2NativeJNI.partialMatch(handle, text); - } - - @Override - public boolean fullMatchDirect(long handle, long address, int length) { - return RE2NativeJNI.fullMatchDirect(handle, address, length); - } - - @Override - public boolean partialMatchDirect(long handle, long address, int length) { - return RE2NativeJNI.partialMatchDirect(handle, address, length); - } - - @Override - public boolean[] fullMatchBulk(long handle, String[] texts) { - return RE2NativeJNI.fullMatchBulk(handle, texts); - } - - @Override - public boolean[] partialMatchBulk(long handle, String[] texts) { - return RE2NativeJNI.partialMatchBulk(handle, texts); - } - - @Override - public boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths) { - return RE2NativeJNI.fullMatchDirectBulk(handle, addresses, lengths); - } - - @Override - public boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths) { - return RE2NativeJNI.partialMatchDirectBulk(handle, addresses, lengths); - } - - @Override - public String[] extractGroups(long handle, String text) { - return RE2NativeJNI.extractGroups(handle, text); - } - - @Override - public String[][] extractGroupsBulk(long handle, String[] texts) { - return RE2NativeJNI.extractGroupsBulk(handle, texts); - } - - @Override - public String[] extractGroupsDirect(long handle, long address, int length) { - return RE2NativeJNI.extractGroupsDirect(handle, address, length); - } - - @Override - public String[][] findAllMatches(long handle, String text) { - return RE2NativeJNI.findAllMatches(handle, text); - } - - @Override - public String[][] findAllMatchesDirect(long handle, long address, int length) { - return RE2NativeJNI.findAllMatchesDirect(handle, address, length); - } - - @Override - public String[] getNamedGroups(long handle) { - return RE2NativeJNI.getNamedGroups(handle); - } - - @Override - public String replaceFirst(long handle, String text, String replacement) { - return RE2NativeJNI.replaceFirst(handle, text, replacement); - } - - @Override - public String replaceAll(long handle, String text, String replacement) { - return RE2NativeJNI.replaceAll(handle, text, replacement); - } - - @Override - public String[] replaceAllBulk(long handle, String[] texts, String replacement) { - return RE2NativeJNI.replaceAllBulk(handle, texts, replacement); - } - - @Override - public String replaceFirstDirect(long handle, long address, int length, String replacement) { - return RE2NativeJNI.replaceFirstDirect(handle, address, length, replacement); - } - - @Override - public String replaceAllDirect(long handle, long address, int length, String replacement) { - return RE2NativeJNI.replaceAllDirect(handle, address, length, replacement); - } - - @Override - public String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement) { - return RE2NativeJNI.replaceAllDirectBulk(handle, addresses, lengths, replacement); - } - - @Override - public String quoteMeta(String text) { - return RE2NativeJNI.quoteMeta(text); - } - - @Override - public int[] programFanout(long handle) { - return RE2NativeJNI.programFanout(handle); - } + /** Singleton instance - used in production. Public so Pattern can access it from api package. */ + public static final RE2Native INSTANCE = new RE2Native(); + + private RE2Native() {} + + @Override + public long compile(String pattern, boolean caseSensitive) { + return RE2NativeJNI.compile(pattern, caseSensitive); + } + + @Override + public void freePattern(long handle) { + RE2NativeJNI.freePattern(handle); + } + + @Override + public boolean patternOk(long handle) { + return RE2NativeJNI.patternOk(handle); + } + + @Override + public String getError() { + return RE2NativeJNI.getError(); + } + + @Override + public String getPattern(long handle) { + return RE2NativeJNI.getPattern(handle); + } + + @Override + public int numCapturingGroups(long handle) { + return RE2NativeJNI.numCapturingGroups(handle); + } + + @Override + public long patternMemory(long handle) { + return RE2NativeJNI.patternMemory(handle); + } + + @Override + public boolean fullMatch(long handle, String text) { + return RE2NativeJNI.fullMatch(handle, text); + } + + @Override + public boolean partialMatch(long handle, String text) { + return RE2NativeJNI.partialMatch(handle, text); + } + + @Override + public boolean fullMatchDirect(long handle, long address, int length) { + return RE2NativeJNI.fullMatchDirect(handle, address, length); + } + + @Override + public boolean partialMatchDirect(long handle, long address, int length) { + return RE2NativeJNI.partialMatchDirect(handle, address, length); + } + + @Override + public boolean[] fullMatchBulk(long handle, String[] texts) { + return RE2NativeJNI.fullMatchBulk(handle, texts); + } + + @Override + public boolean[] partialMatchBulk(long handle, String[] texts) { + return RE2NativeJNI.partialMatchBulk(handle, texts); + } + + @Override + public boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths) { + return RE2NativeJNI.fullMatchDirectBulk(handle, addresses, lengths); + } + + @Override + public boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths) { + return RE2NativeJNI.partialMatchDirectBulk(handle, addresses, lengths); + } + + @Override + public String[] extractGroups(long handle, String text) { + return RE2NativeJNI.extractGroups(handle, text); + } + + @Override + public String[][] extractGroupsBulk(long handle, String[] texts) { + return RE2NativeJNI.extractGroupsBulk(handle, texts); + } + + @Override + public String[] extractGroupsDirect(long handle, long address, int length) { + return RE2NativeJNI.extractGroupsDirect(handle, address, length); + } + + @Override + public String[][] findAllMatches(long handle, String text) { + return RE2NativeJNI.findAllMatches(handle, text); + } + + @Override + public String[][] findAllMatchesDirect(long handle, long address, int length) { + return RE2NativeJNI.findAllMatchesDirect(handle, address, length); + } + + @Override + public String[] getNamedGroups(long handle) { + return RE2NativeJNI.getNamedGroups(handle); + } + + @Override + public String replaceFirst(long handle, String text, String replacement) { + return RE2NativeJNI.replaceFirst(handle, text, replacement); + } + + @Override + public String replaceAll(long handle, String text, String replacement) { + return RE2NativeJNI.replaceAll(handle, text, replacement); + } + + @Override + public String[] replaceAllBulk(long handle, String[] texts, String replacement) { + return RE2NativeJNI.replaceAllBulk(handle, texts, replacement); + } + + @Override + public String replaceFirstDirect(long handle, long address, int length, String replacement) { + return RE2NativeJNI.replaceFirstDirect(handle, address, length, replacement); + } + + @Override + public String replaceAllDirect(long handle, long address, int length, String replacement) { + return RE2NativeJNI.replaceAllDirect(handle, address, length, replacement); + } + + @Override + public String[] replaceAllDirectBulk( + long handle, long[] addresses, int[] lengths, String replacement) { + return RE2NativeJNI.replaceAllDirectBulk(handle, addresses, lengths, replacement); + } + + @Override + public String quoteMeta(String text) { + return RE2NativeJNI.quoteMeta(text); + } + + @Override + public int[] programFanout(long handle) { + return RE2NativeJNI.programFanout(handle); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/jni/RE2NativeJNI.java b/libre2-core/src/main/java/com/axonops/libre2/jni/RE2NativeJNI.java index 2fa2f7f..c95f7a5 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/jni/RE2NativeJNI.java +++ b/libre2-core/src/main/java/com/axonops/libre2/jni/RE2NativeJNI.java @@ -19,430 +19,435 @@ /** * JNI interface to the native RE2 library. * - *

Maps directly to the C functions in re2_jni.cpp. - * All methods are native calls executing off-heap.

+ *

Maps directly to the C functions in re2_jni.cpp. All methods are native calls executing + * off-heap. * - *

PACKAGE-PRIVATE DESIGN: All methods are package-private to enforce - * abstraction. External code must use Pattern/Matcher/RE2 API. Direct JNI access - * is only available to DirectJniAdapter within this package.

+ *

PACKAGE-PRIVATE DESIGN: All methods are package-private to enforce abstraction. + * External code must use Pattern/Matcher/RE2 API. Direct JNI access is only available to + * DirectJniAdapter within this package. * *

This design enables: + * *

    - *
  • Mockability - DirectJniAdapter implements JniAdapter interface
  • - *
  • Encapsulation - No direct JNI calls from API classes
  • - *
  • Testability - Tests can inject mock JniAdapter
  • + *
  • Mockability - DirectJniAdapter implements JniAdapter interface + *
  • Encapsulation - No direct JNI calls from API classes + *
  • Testability - Tests can inject mock JniAdapter *
* *

Zero-Copy Direct Memory API

- *

This class provides two categories of methods:

+ * + *

This class provides two categories of methods: + * *

    - *
  • String-based methods - Accept Java Strings, involve UTF-8 copy
  • - *
  • Direct methods (*Direct suffix) - Accept memory addresses for zero-copy operation
  • + *
  • String-based methods - Accept Java Strings, involve UTF-8 copy + *
  • Direct methods (*Direct suffix) - Accept memory addresses for zero-copy + * operation *
* *

CRITICAL SAFETY

+ * *
    - *
  • All long handles MUST be freed via {@link #freePattern(long)}
  • - *
  • Never call methods with 0 handles (will return error/false)
  • - *
  • All strings are UTF-8 encoded
  • - *
  • For Direct methods: The memory at the provided address MUST remain valid - * for the duration of the call.
  • + *
  • All long handles MUST be freed via {@link #freePattern(long)} + *
  • Never call methods with 0 handles (will return error/false) + *
  • All strings are UTF-8 encoded + *
  • For Direct methods: The memory at the provided address MUST remain valid for the duration + * of the call. *
* * @since 1.0.0 */ final class RE2NativeJNI { - private RE2NativeJNI() { - // Utility class - prevent instantiation - } - - /** - * Compiles a regular expression pattern. - * - * @param pattern regex pattern string (UTF-8) - * @param caseSensitive true for case-sensitive, false for case-insensitive - * @return native handle to compiled pattern, or 0 on error (MUST be freed) - */ - static native long compile(String pattern, boolean caseSensitive); - - /** - * Frees a compiled pattern. - * Safe to call with 0 handle (no-op). - * - * @param handle native handle from compile() - */ - static native void freePattern(long handle); - - /** - * Tests if text fully matches the pattern. - * - * @param handle compiled pattern handle - * @param text text to match (UTF-8) - * @return true if matches, false if no match or error - */ - static native boolean fullMatch(long handle, String text); - - /** - * Tests if pattern matches anywhere in text. - * - * @param handle compiled pattern handle - * @param text text to search (UTF-8) - * @return true if matches, false if no match or error - */ - static native boolean partialMatch(long handle, String text); - - /** - * Gets the last error message. - * - * @return error message, or null if no error - */ - static native String getError(); - - /** - * Gets the pattern string from a compiled pattern. - * - * @param handle compiled pattern handle - * @return pattern string, or null if invalid - */ - static native String getPattern(long handle); - - /** - * Gets the number of capturing groups. - * - * @param handle compiled pattern handle - * @return number of capturing groups, or -1 on error - */ - static native int numCapturingGroups(long handle); - - /** - * Checks if pattern is valid. - * - * @param handle compiled pattern handle - * @return true if valid, false if invalid/null - */ - static native boolean patternOk(long handle); - - /** - * Gets the native memory size of a compiled pattern. - * - * Returns the size of the compiled DFA/NFA program in bytes. - * This represents the off-heap memory consumed by this pattern. - * - * @param handle compiled pattern handle - * @return size in bytes, or 0 if handle is 0 - */ - static native long patternMemory(long handle); - - // ========== Bulk Matching Operations ========== - - /** - * Performs full match on multiple strings in single JNI call. - * Minimizes JNI overhead for high-throughput scenarios. - * - * @param handle compiled pattern handle - * @param texts array of strings to match - * @return boolean array (parallel to texts) indicating matches, or null on error - */ - static native boolean[] fullMatchBulk(long handle, String[] texts); - - /** - * Performs partial match on multiple strings in single JNI call. - * Minimizes JNI overhead for high-throughput scenarios. - * - * @param handle compiled pattern handle - * @param texts array of strings to match - * @return boolean array (parallel to texts) indicating matches, or null on error - */ - static native boolean[] partialMatchBulk(long handle, String[] texts); - - // ========== Capture Group Operations ========== - - /** - * Extracts capture groups from a single match. - * Returns array where [0] = full match, [1+] = capturing groups. - * - * @param handle compiled pattern handle - * @param text text to match - * @return string array of groups, or null if no match - */ - static native String[] extractGroups(long handle, String text); - - /** - * Extracts capture groups from multiple strings in single JNI call. - * - * @param handle compiled pattern handle - * @param texts array of strings to match - * @return array of string arrays (groups per input), or null on error - */ - static native String[][] extractGroupsBulk(long handle, String[] texts); - - /** - * Finds all non-overlapping matches in text with capture groups. - * Returns array of match results, each containing groups. - * - * @param handle compiled pattern handle - * @param text text to search - * @return array of match data (flattened: [match1_groups..., match2_groups...]), or null on error - */ - static native String[][] findAllMatches(long handle, String text); - - /** - * Gets map of named capturing groups to their indices. - * Returns flattened array: [name1, index1, name2, index2, ...] - * - * @param handle compiled pattern handle - * @return flattened name-index pairs, or null if no named groups - */ - static native String[] getNamedGroups(long handle); - - // ========== Replace Operations ========== - - /** - * Replaces first match with replacement string. - * Supports backreferences ($1, $2, etc.) via RE2::Rewrite. - * - * @param handle compiled pattern handle - * @param text input text - * @param replacement replacement string (supports $1, $2 backreferences) - * @return text with first match replaced, or original text if no match - */ - static native String replaceFirst(long handle, String text, String replacement); - - /** - * Replaces all non-overlapping matches with replacement string. - * Supports backreferences ($1, $2, etc.) via RE2::Rewrite. - * - * @param handle compiled pattern handle - * @param text input text - * @param replacement replacement string (supports $1, $2 backreferences) - * @return text with all matches replaced, or original text if no matches - */ - static native String replaceAll(long handle, String text, String replacement); - - /** - * Replaces all matches in multiple strings in single JNI call. - * - * @param handle compiled pattern handle - * @param texts array of input texts - * @param replacement replacement string (supports $1, $2 backreferences) - * @return array of replaced strings (parallel to texts), or null on error - */ - static native String[] replaceAllBulk(long handle, String[] texts, String replacement); - - /** - * Replaces first match using zero-copy memory access (off-heap memory). - * Accesses memory directly via native address without UTF-8 conversion. - * - * @param handle compiled pattern handle - * @param textAddress native memory address (from DirectByteBuffer or native allocator) - * @param textLength number of bytes to process - * @param replacement replacement string (supports $1, $2 backreferences) - * @return text with first match replaced - */ - static native String replaceFirstDirect(long handle, long textAddress, int textLength, String replacement); - - /** - * Replaces all matches using zero-copy memory access (off-heap memory). - * Accesses memory directly via native address without UTF-8 conversion. - * - * @param handle compiled pattern handle - * @param textAddress native memory address (from DirectByteBuffer or native allocator) - * @param textLength number of bytes to process - * @param replacement replacement string (supports $1, $2 backreferences) - * @return text with all matches replaced - */ - static native String replaceAllDirect(long handle, long textAddress, int textLength, String replacement); - - /** - * Replaces all matches in multiple off-heap buffers (bulk zero-copy operation). - * Processes all buffers in a single JNI call for better performance. - * - * @param handle compiled pattern handle - * @param textAddresses native memory addresses (from DirectByteBuffer or native allocator) - * @param textLengths number of bytes for each address - * @param replacement replacement string (supports $1, $2 backreferences) - * @return array of strings with all matches replaced (parallel to inputs) - */ - static native String[] replaceAllDirectBulk(long handle, long[] textAddresses, int[] textLengths, String replacement); - - // ========== Utility Operations ========== - - /** - * Escapes special regex characters for literal matching. - * Static method - no pattern handle required. - * - * @param text text to escape - * @return escaped text safe for use in regex patterns - */ - static native String quoteMeta(String text); - - /** - * Gets pattern complexity histogram (DFA branching factor). - * Returns histogram array where index is fanout value and element is count. - * - * @param handle compiled pattern handle - * @return histogram array, or null on error - */ - static native int[] programFanout(long handle); - - // ========== Zero-Copy Direct Memory Operations ========== - // - // These methods accept raw memory addresses instead of Java Strings, - // enabling true zero-copy regex matching with Chronicle Bytes or - // other off-heap memory systems. - // - // The memory at the provided address is passed directly to RE2 via - // StringPiece, eliminating all copy overhead. - // - // CRITICAL: The caller MUST ensure the memory remains valid for - // the duration of the call. Do NOT release Chronicle Bytes or other - // backing memory until the method returns. - - /** - * Tests if text fully matches the pattern using direct memory access (zero-copy). - * - *

This method accepts a native memory address and length, passing them directly - * to RE2 via StringPiece without any intermediate copying. This is ideal for use - * with Chronicle Bytes where data is already in off-heap memory.

- * - *

Memory Safety: The memory at {@code textAddress} must remain - * valid and unchanged for the duration of this call. The caller is responsible for - * ensuring the backing memory (e.g., Chronicle Bytes object) is not released until - * this method returns.

- * - *

Usage with Chronicle Bytes:

- *
{@code
-     * try (Bytes bytes = Bytes.from("Hello World")) {
-     *     long address = bytes.addressForRead(0);
-     *     int length = (int) bytes.readRemaining();
-     *     boolean matches = RE2NativeJNI.fullMatchDirect(patternHandle, address, length);
-     * }
-     * }
- * - * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) - * @param textAddress native memory address of UTF-8 encoded text - * (e.g., from Chronicle Bytes {@code addressForRead()}) - * @param textLength number of bytes to read from the address - * @return true if the entire text matches the pattern, false if no match or error - * @throws IllegalArgumentException if handle is 0 or textAddress is 0 - * @since 1.1.0 - */ - static native boolean fullMatchDirect(long handle, long textAddress, int textLength); - - /** - * Tests if pattern matches anywhere in text using direct memory access (zero-copy). - * - *

This method accepts a native memory address and length, passing them directly - * to RE2 via StringPiece without any intermediate copying. This is ideal for use - * with Chronicle Bytes where data is already in off-heap memory.

- * - *

Memory Safety: The memory at {@code textAddress} must remain - * valid and unchanged for the duration of this call. The caller is responsible for - * ensuring the backing memory (e.g., Chronicle Bytes object) is not released until - * this method returns.

- * - *

Usage with Chronicle Bytes:

- *
{@code
-     * try (Bytes bytes = Bytes.from("Hello World")) {
-     *     long address = bytes.addressForRead(0);
-     *     int length = (int) bytes.readRemaining();
-     *     boolean matches = RE2NativeJNI.partialMatchDirect(patternHandle, address, length);
-     * }
-     * }
- * - * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) - * @param textAddress native memory address of UTF-8 encoded text - * (e.g., from Chronicle Bytes {@code addressForRead()}) - * @param textLength number of bytes to read from the address - * @return true if the pattern matches anywhere in text, false if no match or error - * @throws IllegalArgumentException if handle is 0 or textAddress is 0 - * @since 1.1.0 - */ - static native boolean partialMatchDirect(long handle, long textAddress, int textLength); - - /** - * Performs full match on multiple memory regions in a single JNI call (zero-copy bulk). - * - *

This method accepts arrays of memory addresses and lengths, enabling efficient - * bulk matching without any copying. Each address/length pair is matched independently - * against the pattern.

- * - *

Memory Safety: All memory regions specified by the address/length - * pairs must remain valid for the duration of this call. This is particularly important - * for Chronicle Bytes - ensure all Bytes objects remain alive until this method returns.

- * - *

Performance: This method minimizes JNI crossing overhead by - * processing all inputs in a single native call. Combined with zero-copy memory access, - * this provides maximum throughput for batch processing scenarios.

- * - * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) - * @param textAddresses array of native memory addresses (e.g., from Chronicle Bytes) - * @param textLengths array of byte lengths (must be same length as textAddresses) - * @return boolean array (parallel to inputs) indicating matches, or null on error - * @throws IllegalArgumentException if arrays are null or have different lengths - * @since 1.1.0 - */ - static native boolean[] fullMatchDirectBulk(long handle, long[] textAddresses, int[] textLengths); - - /** - * Performs partial match on multiple memory regions in a single JNI call (zero-copy bulk). - * - *

This method accepts arrays of memory addresses and lengths, enabling efficient - * bulk matching without any copying. Each address/length pair is matched independently - * against the pattern.

- * - *

Memory Safety: All memory regions specified by the address/length - * pairs must remain valid for the duration of this call. This is particularly important - * for Chronicle Bytes - ensure all Bytes objects remain alive until this method returns.

- * - *

Performance: This method minimizes JNI crossing overhead by - * processing all inputs in a single native call. Combined with zero-copy memory access, - * this provides maximum throughput for batch processing scenarios.

- * - * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) - * @param textAddresses array of native memory addresses (e.g., from Chronicle Bytes) - * @param textLengths array of byte lengths (must be same length as textAddresses) - * @return boolean array (parallel to inputs) indicating matches, or null on error - * @throws IllegalArgumentException if arrays are null or have different lengths - * @since 1.1.0 - */ - static native boolean[] partialMatchDirectBulk(long handle, long[] textAddresses, int[] textLengths); - - /** - * Extracts capture groups from text using direct memory access (zero-copy). - * - *

This method reads text directly from the provided memory address, extracts - * all capture groups, and returns them as a String array. The input is zero-copy, - * but the output necessarily creates new Java Strings for the captured groups.

- * - *

Memory Safety: The memory at {@code textAddress} must remain - * valid for the duration of this call.

- * - * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) - * @param textAddress native memory address of UTF-8 encoded text - * @param textLength number of bytes to read from the address - * @return String array where [0] = full match, [1+] = capturing groups, or null if no match - * @since 1.1.0 - */ - static native String[] extractGroupsDirect(long handle, long textAddress, int textLength); - - /** - * Finds all non-overlapping matches in text using direct memory access (zero-copy). - * - *

This method reads text directly from the provided memory address and finds - * all non-overlapping matches. The input is zero-copy, but the output necessarily - * creates new Java Strings for the matches.

- * - *

Memory Safety: The memory at {@code textAddress} must remain - * valid for the duration of this call.

- * - * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) - * @param textAddress native memory address of UTF-8 encoded text - * @param textLength number of bytes to read from the address - * @return array of match results with capture groups, or null if no matches - * @since 1.1.0 - */ - static native String[][] findAllMatchesDirect(long handle, long textAddress, int textLength); + private RE2NativeJNI() { + // Utility class - prevent instantiation + } + + /** + * Compiles a regular expression pattern. + * + * @param pattern regex pattern string (UTF-8) + * @param caseSensitive true for case-sensitive, false for case-insensitive + * @return native handle to compiled pattern, or 0 on error (MUST be freed) + */ + static native long compile(String pattern, boolean caseSensitive); + + /** + * Frees a compiled pattern. Safe to call with 0 handle (no-op). + * + * @param handle native handle from compile() + */ + static native void freePattern(long handle); + + /** + * Tests if text fully matches the pattern. + * + * @param handle compiled pattern handle + * @param text text to match (UTF-8) + * @return true if matches, false if no match or error + */ + static native boolean fullMatch(long handle, String text); + + /** + * Tests if pattern matches anywhere in text. + * + * @param handle compiled pattern handle + * @param text text to search (UTF-8) + * @return true if matches, false if no match or error + */ + static native boolean partialMatch(long handle, String text); + + /** + * Gets the last error message. + * + * @return error message, or null if no error + */ + static native String getError(); + + /** + * Gets the pattern string from a compiled pattern. + * + * @param handle compiled pattern handle + * @return pattern string, or null if invalid + */ + static native String getPattern(long handle); + + /** + * Gets the number of capturing groups. + * + * @param handle compiled pattern handle + * @return number of capturing groups, or -1 on error + */ + static native int numCapturingGroups(long handle); + + /** + * Checks if pattern is valid. + * + * @param handle compiled pattern handle + * @return true if valid, false if invalid/null + */ + static native boolean patternOk(long handle); + + /** + * Gets the native memory size of a compiled pattern. + * + *

Returns the size of the compiled DFA/NFA program in bytes. This represents the off-heap + * memory consumed by this pattern. + * + * @param handle compiled pattern handle + * @return size in bytes, or 0 if handle is 0 + */ + static native long patternMemory(long handle); + + // ========== Bulk Matching Operations ========== + + /** + * Performs full match on multiple strings in single JNI call. Minimizes JNI overhead for + * high-throughput scenarios. + * + * @param handle compiled pattern handle + * @param texts array of strings to match + * @return boolean array (parallel to texts) indicating matches, or null on error + */ + static native boolean[] fullMatchBulk(long handle, String[] texts); + + /** + * Performs partial match on multiple strings in single JNI call. Minimizes JNI overhead for + * high-throughput scenarios. + * + * @param handle compiled pattern handle + * @param texts array of strings to match + * @return boolean array (parallel to texts) indicating matches, or null on error + */ + static native boolean[] partialMatchBulk(long handle, String[] texts); + + // ========== Capture Group Operations ========== + + /** + * Extracts capture groups from a single match. Returns array where [0] = full match, [1+] = + * capturing groups. + * + * @param handle compiled pattern handle + * @param text text to match + * @return string array of groups, or null if no match + */ + static native String[] extractGroups(long handle, String text); + + /** + * Extracts capture groups from multiple strings in single JNI call. + * + * @param handle compiled pattern handle + * @param texts array of strings to match + * @return array of string arrays (groups per input), or null on error + */ + static native String[][] extractGroupsBulk(long handle, String[] texts); + + /** + * Finds all non-overlapping matches in text with capture groups. Returns array of match results, + * each containing groups. + * + * @param handle compiled pattern handle + * @param text text to search + * @return array of match data (flattened: [match1_groups..., match2_groups...]), or null on error + */ + static native String[][] findAllMatches(long handle, String text); + + /** + * Gets map of named capturing groups to their indices. Returns flattened array: [name1, index1, + * name2, index2, ...] + * + * @param handle compiled pattern handle + * @return flattened name-index pairs, or null if no named groups + */ + static native String[] getNamedGroups(long handle); + + // ========== Replace Operations ========== + + /** + * Replaces first match with replacement string. Supports backreferences ($1, $2, etc.) via + * RE2::Rewrite. + * + * @param handle compiled pattern handle + * @param text input text + * @param replacement replacement string (supports $1, $2 backreferences) + * @return text with first match replaced, or original text if no match + */ + static native String replaceFirst(long handle, String text, String replacement); + + /** + * Replaces all non-overlapping matches with replacement string. Supports backreferences ($1, $2, + * etc.) via RE2::Rewrite. + * + * @param handle compiled pattern handle + * @param text input text + * @param replacement replacement string (supports $1, $2 backreferences) + * @return text with all matches replaced, or original text if no matches + */ + static native String replaceAll(long handle, String text, String replacement); + + /** + * Replaces all matches in multiple strings in single JNI call. + * + * @param handle compiled pattern handle + * @param texts array of input texts + * @param replacement replacement string (supports $1, $2 backreferences) + * @return array of replaced strings (parallel to texts), or null on error + */ + static native String[] replaceAllBulk(long handle, String[] texts, String replacement); + + /** + * Replaces first match using zero-copy memory access (off-heap memory). Accesses memory directly + * via native address without UTF-8 conversion. + * + * @param handle compiled pattern handle + * @param textAddress native memory address (from DirectByteBuffer or native allocator) + * @param textLength number of bytes to process + * @param replacement replacement string (supports $1, $2 backreferences) + * @return text with first match replaced + */ + static native String replaceFirstDirect( + long handle, long textAddress, int textLength, String replacement); + + /** + * Replaces all matches using zero-copy memory access (off-heap memory). Accesses memory directly + * via native address without UTF-8 conversion. + * + * @param handle compiled pattern handle + * @param textAddress native memory address (from DirectByteBuffer or native allocator) + * @param textLength number of bytes to process + * @param replacement replacement string (supports $1, $2 backreferences) + * @return text with all matches replaced + */ + static native String replaceAllDirect( + long handle, long textAddress, int textLength, String replacement); + + /** + * Replaces all matches in multiple off-heap buffers (bulk zero-copy operation). Processes all + * buffers in a single JNI call for better performance. + * + * @param handle compiled pattern handle + * @param textAddresses native memory addresses (from DirectByteBuffer or native allocator) + * @param textLengths number of bytes for each address + * @param replacement replacement string (supports $1, $2 backreferences) + * @return array of strings with all matches replaced (parallel to inputs) + */ + static native String[] replaceAllDirectBulk( + long handle, long[] textAddresses, int[] textLengths, String replacement); + + // ========== Utility Operations ========== + + /** + * Escapes special regex characters for literal matching. Static method - no pattern handle + * required. + * + * @param text text to escape + * @return escaped text safe for use in regex patterns + */ + static native String quoteMeta(String text); + + /** + * Gets pattern complexity histogram (DFA branching factor). Returns histogram array where index + * is fanout value and element is count. + * + * @param handle compiled pattern handle + * @return histogram array, or null on error + */ + static native int[] programFanout(long handle); + + // ========== Zero-Copy Direct Memory Operations ========== + // + // These methods accept raw memory addresses instead of Java Strings, + // enabling true zero-copy regex matching with Chronicle Bytes or + // other off-heap memory systems. + // + // The memory at the provided address is passed directly to RE2 via + // StringPiece, eliminating all copy overhead. + // + // CRITICAL: The caller MUST ensure the memory remains valid for + // the duration of the call. Do NOT release Chronicle Bytes or other + // backing memory until the method returns. + + /** + * Tests if text fully matches the pattern using direct memory access (zero-copy). + * + *

This method accepts a native memory address and length, passing them directly to RE2 via + * StringPiece without any intermediate copying. This is ideal for use with Chronicle Bytes where + * data is already in off-heap memory. + * + *

Memory Safety: The memory at {@code textAddress} must remain valid and + * unchanged for the duration of this call. The caller is responsible for ensuring the backing + * memory (e.g., Chronicle Bytes object) is not released until this method returns. + * + *

Usage with Chronicle Bytes: + * + *

{@code
+   * try (Bytes bytes = Bytes.from("Hello World")) {
+   *     long address = bytes.addressForRead(0);
+   *     int length = (int) bytes.readRemaining();
+   *     boolean matches = RE2NativeJNI.fullMatchDirect(patternHandle, address, length);
+   * }
+   * }
+ * + * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) + * @param textAddress native memory address of UTF-8 encoded text (e.g., from Chronicle Bytes + * {@code addressForRead()}) + * @param textLength number of bytes to read from the address + * @return true if the entire text matches the pattern, false if no match or error + * @throws IllegalArgumentException if handle is 0 or textAddress is 0 + * @since 1.1.0 + */ + static native boolean fullMatchDirect(long handle, long textAddress, int textLength); + + /** + * Tests if pattern matches anywhere in text using direct memory access (zero-copy). + * + *

This method accepts a native memory address and length, passing them directly to RE2 via + * StringPiece without any intermediate copying. This is ideal for use with Chronicle Bytes where + * data is already in off-heap memory. + * + *

Memory Safety: The memory at {@code textAddress} must remain valid and + * unchanged for the duration of this call. The caller is responsible for ensuring the backing + * memory (e.g., Chronicle Bytes object) is not released until this method returns. + * + *

Usage with Chronicle Bytes: + * + *

{@code
+   * try (Bytes bytes = Bytes.from("Hello World")) {
+   *     long address = bytes.addressForRead(0);
+   *     int length = (int) bytes.readRemaining();
+   *     boolean matches = RE2NativeJNI.partialMatchDirect(patternHandle, address, length);
+   * }
+   * }
+ * + * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) + * @param textAddress native memory address of UTF-8 encoded text (e.g., from Chronicle Bytes + * {@code addressForRead()}) + * @param textLength number of bytes to read from the address + * @return true if the pattern matches anywhere in text, false if no match or error + * @throws IllegalArgumentException if handle is 0 or textAddress is 0 + * @since 1.1.0 + */ + static native boolean partialMatchDirect(long handle, long textAddress, int textLength); + + /** + * Performs full match on multiple memory regions in a single JNI call (zero-copy bulk). + * + *

This method accepts arrays of memory addresses and lengths, enabling efficient bulk matching + * without any copying. Each address/length pair is matched independently against the pattern. + * + *

Memory Safety: All memory regions specified by the address/length pairs + * must remain valid for the duration of this call. This is particularly important for Chronicle + * Bytes - ensure all Bytes objects remain alive until this method returns. + * + *

Performance: This method minimizes JNI crossing overhead by processing all + * inputs in a single native call. Combined with zero-copy memory access, this provides maximum + * throughput for batch processing scenarios. + * + * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) + * @param textAddresses array of native memory addresses (e.g., from Chronicle Bytes) + * @param textLengths array of byte lengths (must be same length as textAddresses) + * @return boolean array (parallel to inputs) indicating matches, or null on error + * @throws IllegalArgumentException if arrays are null or have different lengths + * @since 1.1.0 + */ + static native boolean[] fullMatchDirectBulk(long handle, long[] textAddresses, int[] textLengths); + + /** + * Performs partial match on multiple memory regions in a single JNI call (zero-copy bulk). + * + *

This method accepts arrays of memory addresses and lengths, enabling efficient bulk matching + * without any copying. Each address/length pair is matched independently against the pattern. + * + *

Memory Safety: All memory regions specified by the address/length pairs + * must remain valid for the duration of this call. This is particularly important for Chronicle + * Bytes - ensure all Bytes objects remain alive until this method returns. + * + *

Performance: This method minimizes JNI crossing overhead by processing all + * inputs in a single native call. Combined with zero-copy memory access, this provides maximum + * throughput for batch processing scenarios. + * + * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) + * @param textAddresses array of native memory addresses (e.g., from Chronicle Bytes) + * @param textLengths array of byte lengths (must be same length as textAddresses) + * @return boolean array (parallel to inputs) indicating matches, or null on error + * @throws IllegalArgumentException if arrays are null or have different lengths + * @since 1.1.0 + */ + static native boolean[] partialMatchDirectBulk( + long handle, long[] textAddresses, int[] textLengths); + + /** + * Extracts capture groups from text using direct memory access (zero-copy). + * + *

This method reads text directly from the provided memory address, extracts all capture + * groups, and returns them as a String array. The input is zero-copy, but the output necessarily + * creates new Java Strings for the captured groups. + * + *

Memory Safety: The memory at {@code textAddress} must remain valid for the + * duration of this call. + * + * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) + * @param textAddress native memory address of UTF-8 encoded text + * @param textLength number of bytes to read from the address + * @return String array where [0] = full match, [1+] = capturing groups, or null if no match + * @since 1.1.0 + */ + static native String[] extractGroupsDirect(long handle, long textAddress, int textLength); + + /** + * Finds all non-overlapping matches in text using direct memory access (zero-copy). + * + *

This method reads text directly from the provided memory address and finds all + * non-overlapping matches. The input is zero-copy, but the output necessarily creates new Java + * Strings for the matches. + * + *

Memory Safety: The memory at {@code textAddress} must remain valid for the + * duration of this call. + * + * @param handle compiled pattern handle (from {@link #compile(String, boolean)}) + * @param textAddress native memory address of UTF-8 encoded text + * @param textLength number of bytes to read from the address + * @return array of match results with capture groups, or null if no matches + * @since 1.1.0 + */ + static native String[][] findAllMatchesDirect(long handle, long textAddress, int textLength); } - diff --git a/libre2-core/src/main/java/com/axonops/libre2/metrics/DropwizardMetricsAdapter.java b/libre2-core/src/main/java/com/axonops/libre2/metrics/DropwizardMetricsAdapter.java index db2f13a..3fa672b 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/metrics/DropwizardMetricsAdapter.java +++ b/libre2-core/src/main/java/com/axonops/libre2/metrics/DropwizardMetricsAdapter.java @@ -18,7 +18,6 @@ import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; - import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; @@ -26,14 +25,15 @@ /** * Dropwizard Metrics adapter for libre2-java. * - *

Wraps a Dropwizard {@link MetricRegistry} and delegates all metric - * operations to it. This allows libre2-java to integrate with any application - * or framework that uses Dropwizard Metrics (Cassandra, Spring Boot, etc.). + *

Wraps a Dropwizard {@link MetricRegistry} and delegates all metric operations to it. This + * allows libre2-java to integrate with any application or framework that uses Dropwizard Metrics + * (Cassandra, Spring Boot, etc.). * - *

Thread Safety: MetricRegistry and all Dropwizard metric - * types are thread-safe. This adapter is fully thread-safe. + *

Thread Safety: MetricRegistry and all Dropwizard metric types are + * thread-safe. This adapter is fully thread-safe. * *

Usage Examples: + * *

{@code
  * // Standalone application:
  * MetricRegistry registry = new MetricRegistry();
@@ -48,80 +48,81 @@
  */
 public final class DropwizardMetricsAdapter implements RE2MetricsRegistry {
 
-    private final MetricRegistry registry;
-    private final String prefix;
+  private final MetricRegistry registry;
+  private final String prefix;
 
-    /**
-     * Creates adapter with default metric prefix: {@code com.axonops.libre2}
-     *
-     * @param registry the Dropwizard MetricRegistry to register metrics with
-     * @throws NullPointerException if registry is null
-     */
-    public DropwizardMetricsAdapter(MetricRegistry registry) {
-        this(registry, "com.axonops.libre2");
-    }
+  /**
+   * Creates adapter with default metric prefix: {@code com.axonops.libre2}
+   *
+   * @param registry the Dropwizard MetricRegistry to register metrics with
+   * @throws NullPointerException if registry is null
+   */
+  public DropwizardMetricsAdapter(MetricRegistry registry) {
+    this(registry, "com.axonops.libre2");
+  }
 
-    /**
-     * Creates adapter with custom metric prefix.
-     *
-     * 

The prefix determines the metric namespace in the registry and JMX. - * For example, with prefix {@code "com.myapp.regex"}, metrics will appear as: - *

    - *
  • {@code com.myapp.regex.patterns.compiled}
  • - *
  • {@code com.myapp.regex.cache.size}
  • - *
  • etc.
  • - *
- * - * @param registry the Dropwizard MetricRegistry to register metrics with - * @param prefix the metric name prefix (e.g., "com.myapp.regex") - * @throws NullPointerException if registry or prefix is null - */ - public DropwizardMetricsAdapter(MetricRegistry registry, String prefix) { - this.registry = Objects.requireNonNull(registry, "registry cannot be null"); - this.prefix = Objects.requireNonNull(prefix, "prefix cannot be null"); - } + /** + * Creates adapter with custom metric prefix. + * + *

The prefix determines the metric namespace in the registry and JMX. For example, with prefix + * {@code "com.myapp.regex"}, metrics will appear as: + * + *

    + *
  • {@code com.myapp.regex.patterns.compiled} + *
  • {@code com.myapp.regex.cache.size} + *
  • etc. + *
+ * + * @param registry the Dropwizard MetricRegistry to register metrics with + * @param prefix the metric name prefix (e.g., "com.myapp.regex") + * @throws NullPointerException if registry or prefix is null + */ + public DropwizardMetricsAdapter(MetricRegistry registry, String prefix) { + this.registry = Objects.requireNonNull(registry, "registry cannot be null"); + this.prefix = Objects.requireNonNull(prefix, "prefix cannot be null"); + } - @Override - public void incrementCounter(String name) { - registry.counter(metricName(name)).inc(); - } + @Override + public void incrementCounter(String name) { + registry.counter(metricName(name)).inc(); + } - @Override - public void incrementCounter(String name, long delta) { - registry.counter(metricName(name)).inc(delta); - } + @Override + public void incrementCounter(String name, long delta) { + registry.counter(metricName(name)).inc(delta); + } - @Override - public void recordTimer(String name, long durationNanos) { - registry.timer(metricName(name)).update(durationNanos, TimeUnit.NANOSECONDS); - } + @Override + public void recordTimer(String name, long durationNanos) { + registry.timer(metricName(name)).update(durationNanos, TimeUnit.NANOSECONDS); + } - @Override - public void registerGauge(String name, Supplier valueSupplier) { - String fullName = metricName(name); + @Override + public void registerGauge(String name, Supplier valueSupplier) { + String fullName = metricName(name); - // Remove existing gauge if present (idempotent registration) - registry.remove(fullName); + // Remove existing gauge if present (idempotent registration) + registry.remove(fullName); - // Register new gauge - registry.register(fullName, (Gauge) valueSupplier::get); - } + // Register new gauge + registry.register(fullName, (Gauge) valueSupplier::get); + } - @Override - public void removeGauge(String name) { - registry.remove(metricName(name)); - } + @Override + public void removeGauge(String name) { + registry.remove(metricName(name)); + } - /** - * Builds full metric name with prefix. - * - *

Uses Dropwizard's {@link MetricRegistry#name(String, String...)} utility - * for consistent dot-separated naming. - * - * @param name metric name - * @return full metric name with prefix - */ - private String metricName(String name) { - return MetricRegistry.name(prefix, name); - } + /** + * Builds full metric name with prefix. + * + *

Uses Dropwizard's {@link MetricRegistry#name(String, String...)} utility for consistent + * dot-separated naming. + * + * @param name metric name + * @return full metric name with prefix + */ + private String metricName(String name) { + return MetricRegistry.name(prefix, name); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/metrics/MetricNames.java b/libre2-core/src/main/java/com/axonops/libre2/metrics/MetricNames.java index 22145c1..771e2ea 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/metrics/MetricNames.java +++ b/libre2-core/src/main/java/com/axonops/libre2/metrics/MetricNames.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package com.axonops.libre2.metrics; /** @@ -24,85 +25,98 @@ *

Architecture Overview

* *

Pattern Cache

+ * *

The RE2 library maintains an automatic LRU cache of compiled regex patterns to avoid expensive * recompilation. When {@link com.axonops.libre2.api.Pattern#compile(String)} is called: + * *

    - *
  1. Cache Hit - Pattern found in cache, returned immediately (no compilation)
  2. - *
  3. Cache Miss - Pattern compiled via native JNI, stored in cache for reuse
  4. + *
  5. Cache Hit - Pattern found in cache, returned immediately (no compilation) + *
  6. Cache Miss - Pattern compiled via native JNI, stored in cache for reuse *
* *

Dual Eviction Strategy: + * *

    - *
  • LRU Eviction - When cache exceeds maxCacheSize, least-recently-used patterns evicted
  • - *
  • Idle Eviction - Background thread evicts patterns unused for idleTimeoutSeconds
  • + *
  • LRU Eviction - When cache exceeds maxCacheSize, least-recently-used patterns evicted + *
  • Idle Eviction - Background thread evicts patterns unused for idleTimeoutSeconds *
* *

This dual strategy provides: + * *

    - *
  • Short-term performance (LRU keeps hot patterns)
  • - *
  • Long-term memory hygiene (idle eviction prevents unbounded growth)
  • + *
  • Short-term performance (LRU keeps hot patterns) + *
  • Long-term memory hygiene (idle eviction prevents unbounded growth) *
* *

Deferred Cleanup Queue

- *

Patterns cannot be immediately freed from native memory if they are in use by active - * {@link com.axonops.libre2.api.Matcher} instances. When a pattern is evicted (LRU or idle) - * but has active matchers: + * + *

Patterns cannot be immediately freed from native memory if they are in use by active {@link + * com.axonops.libre2.api.Matcher} instances. When a pattern is evicted (LRU or idle) but has active + * matchers: + * *

    - *
  1. Pattern removed from cache (no longer available for new compilations)
  2. - *
  3. Pattern moved to deferred cleanup queue (awaiting matcher closure)
  4. - *
  5. When all matchers close, pattern freed from native memory
  6. + *
  7. Pattern removed from cache (no longer available for new compilations) + *
  8. Pattern moved to deferred cleanup queue (awaiting matcher closure) + *
  9. When all matchers close, pattern freed from native memory *
* *

Why Deferred Cleanup Matters: + * *

    - *
  • Prevents use-after-free crashes (matchers reference native memory)
  • - *
  • Allows safe concurrent pattern eviction and matching operations
  • - *
  • High deferred counts indicate matchers not closed promptly (potential leak)
  • + *
  • Prevents use-after-free crashes (matchers reference native memory) + *
  • Allows safe concurrent pattern eviction and matching operations + *
  • High deferred counts indicate matchers not closed promptly (potential leak) *
* - *

A background task runs every {@code deferredCleanupIntervalSeconds} to free patterns - * whose matchers have closed. + *

A background task runs every {@code deferredCleanupIntervalSeconds} to free patterns whose + * matchers have closed. * *

Native Memory Tracking

+ * *

Compiled RE2 patterns are stored in off-heap native memory (not JVM heap) to: + * *

    - *
  • Avoid Java GC pressure (large regex automata can be 100s of KB)
  • - *
  • Leverage RE2's optimized C++ memory layout
  • - *
  • Prevent OutOfMemoryError in high-throughput scenarios
  • + *
  • Avoid Java GC pressure (large regex automata can be 100s of KB) + *
  • Leverage RE2's optimized C++ memory layout + *
  • Prevent OutOfMemoryError in high-throughput scenarios *
* - *

Exact Memory Measurement: When a pattern is compiled, the native library reports - * exact memory usage via {@code Pattern.getNativeMemoryBytes()}. This is NOT an estimate - - * it's the actual allocation size from RE2's internal accounting. + *

Exact Memory Measurement: When a pattern is compiled, the native library reports exact + * memory usage via {@code Pattern.getNativeMemoryBytes()}. This is NOT an estimate - it's the + * actual allocation size from RE2's internal accounting. * *

Memory Lifecycle: + * *

    - *
  1. Pattern compiled → native memory allocated (tracked in CACHE_NATIVE_MEMORY)
  2. - *
  3. Pattern evicted but in use → moved to deferred (tracked in CACHE_DEFERRED_MEMORY)
  4. - *
  5. All matchers closed → pattern freed (memory reclaimed, counters decremented)
  6. + *
  7. Pattern compiled → native memory allocated (tracked in CACHE_NATIVE_MEMORY) + *
  8. Pattern evicted but in use → moved to deferred (tracked in CACHE_DEFERRED_MEMORY) + *
  9. All matchers closed → pattern freed (memory reclaimed, counters decremented) *
* *

Total Native Memory = Cache Memory + Deferred Memory * *

Metric Categories

+ * *
    - *
  • Pattern Compilation (5 metrics) - Compilation performance and cache efficiency
  • - *
  • Cache State (3 metrics) - Current cache size and memory usage
  • - *
  • Cache Evictions (3 metrics) - Eviction types (LRU, idle, deferred) and frequencies
  • - *
  • Deferred Cleanup (4 metrics) - Patterns awaiting cleanup (in use by matchers)
  • - *
  • Resource Management (4 metrics) - Active patterns/matchers and cleanup tracking
  • - *
  • Performance (3 metrics) - Matching operation latencies (RE2 guarantees linear time)
  • - *
  • Errors (3 metrics) - Compilation failures and resource exhaustion
  • + *
  • Pattern Compilation (5 metrics) - Compilation performance and cache efficiency + *
  • Cache State (3 metrics) - Current cache size and memory usage + *
  • Cache Evictions (3 metrics) - Eviction types (LRU, idle, deferred) and frequencies + *
  • Deferred Cleanup (4 metrics) - Patterns awaiting cleanup (in use by matchers) + *
  • Resource Management (4 metrics) - Active patterns/matchers and cleanup tracking + *
  • Performance (3 metrics) - Matching operation latencies (RE2 guarantees linear time) + *
  • Errors (3 metrics) - Compilation failures and resource exhaustion *
* *

Metric Types

+ * *
    - *
  • Counter - Monotonically increasing count (suffix: {@code .total.count})
  • - *
  • Timer - Latency histogram with percentiles (suffix: {@code .latency})
  • - *
  • Gauge - Current or peak value (suffix: {@code .current.*} or {@code .peak.*})
  • + *
  • Counter - Monotonically increasing count (suffix: {@code .total.count}) + *
  • Timer - Latency histogram with percentiles (suffix: {@code .latency}) + *
  • Gauge - Current or peak value (suffix: {@code .current.*} or {@code .peak.*}) *
* *

Usage Example

+ * *
{@code
  * // Initialize with Dropwizard Metrics and JMX
  * MetricRegistry registry = new MetricRegistry();
@@ -127,15 +141,16 @@
  * }
* *

Monitoring Recommendations

+ * *
    *
  • Cache Hit Rate: PATTERNS_CACHE_HITS / (PATTERNS_CACHE_HITS + PATTERNS_CACHE_MISSES) - * - Target: >90% for steady-state workloads
  • - *
  • Deferred Cleanup: CACHE_DEFERRED_PATTERNS_COUNT should be low (near zero) - * - High values indicate matchers not closed (potential leak)
  • - *
  • Memory Growth: CACHE_NATIVE_MEMORY + CACHE_DEFERRED_MEMORY = total off-heap - * - Should stabilize after warmup, not grow unbounded
  • - *
  • Eviction Balance: CACHE_EVICTIONS_IDLE should dominate over CACHE_EVICTIONS_LRU - * - Means cache sized correctly, idle patterns cleaned up
  • + * - Target: >90% for steady-state workloads + *
  • Deferred Cleanup: CACHE_DEFERRED_PATTERNS_COUNT should be low (near zero) - High + * values indicate matchers not closed (potential leak) + *
  • Memory Growth: CACHE_NATIVE_MEMORY + CACHE_DEFERRED_MEMORY = total off-heap - Should + * stabilize after warmup, not grow unbounded + *
  • Eviction Balance: CACHE_EVICTIONS_IDLE should dominate over CACHE_EVICTIONS_LRU - + * Means cache sized correctly, idle patterns cleaned up *
* * @since 1.0.0 @@ -144,538 +159,734 @@ * @see com.axonops.libre2.api.Matcher */ public final class MetricNames { - private MetricNames() {} - - // ======================================== - // Pattern Compilation Metrics (5) - // ======================================== - - /** - * Total patterns compiled (cache misses). - *

Type: Counter - *

Incremented: Each time a pattern is compiled via native JNI - *

Interpretation: High values indicate poor cache hit rate or many unique patterns - */ - public static final String PATTERNS_COMPILED = "patterns.compiled.total.count"; - - /** - * Total cache hits (pattern found in cache). - *

Type: Counter - *

Incremented: When Pattern.compile() finds pattern already cached - *

Interpretation: High hit rate (hits / (hits + misses)) indicates effective caching - */ - public static final String PATTERNS_CACHE_HITS = "patterns.cache.hits.total.count"; - - /** - * Total cache misses (pattern not in cache, compilation required). - *

Type: Counter - *

Incremented: When Pattern.compile() must compile new pattern - *

Interpretation: Equal to PATTERNS_COMPILED; compare to hits for hit rate - */ - public static final String PATTERNS_CACHE_MISSES = "patterns.cache.misses.total.count"; - - /** - * Pattern compilation latency histogram. - *

Type: Timer (nanoseconds) - *

Recorded: For each successful pattern compilation (native JNI call) - *

Provides: min, max, mean, p50, p75, p95, p98, p99, p99.9, rates (1m, 5m, 15m) - *

Interpretation: High latencies indicate complex regex patterns or platform issues - */ - public static final String PATTERNS_COMPILATION_LATENCY = "patterns.compilation.latency"; - - /** - * Patterns recompiled after cache validation detected corruption. - *

Type: Counter - *

Incremented: When cached pattern fails validation and is recompiled - *

Interpretation: Should be zero; non-zero indicates serious native memory corruption - */ - public static final String PATTERNS_INVALID_RECOMPILED = "patterns.invalid.recompiled.total.count"; - - // ======================================== - // Cache State Metrics (3) - // ======================================== - - /** - * Current number of patterns in cache. - *

Type: Gauge (count) - *

Updated: On cache insertions and evictions - *

Interpretation: Should stay below configured maxCacheSize; sudden drops indicate evictions - */ - public static final String CACHE_PATTERNS_COUNT = "cache.patterns.current.count"; - - /** - * Current native memory used by cached patterns. - *

Type: Gauge (bytes) - *

Updated: On cache insertions and evictions - *

Interpretation: Exact off-heap memory usage (reported by RE2 native library) - */ - public static final String CACHE_NATIVE_MEMORY = "cache.native_memory.current.bytes"; - - /** - * Peak native memory used by cached patterns (high water mark). - *

Type: Gauge (bytes) - *

Updated: When current memory exceeds previous peak - *

Interpretation: Maximum exact memory usage; helps size cache limits - */ - public static final String CACHE_NATIVE_MEMORY_PEAK = "cache.native_memory.peak.bytes"; - - // ======================================== - // Cache Eviction Metrics (3) - // ======================================== - - /** - * Patterns evicted due to LRU cache overflow. - *

Type: Counter - *

Incremented: When cache exceeds maxCacheSize and LRU pattern evicted - *

Interpretation: High values indicate cache too small or working set exceeds limit - */ - public static final String CACHE_EVICTIONS_LRU = "cache.evictions.lru.total.count"; - - /** - * Patterns evicted due to idle timeout. - *

Type: Counter - *

Incremented: When background task evicts pattern unused for idleTimeoutSeconds - *

Interpretation: High values indicate many patterns accessed once then abandoned - */ - public static final String CACHE_EVICTIONS_IDLE = "cache.evictions.idle.total.count"; - - /** - * Patterns freed from deferred cleanup queue (were in use when eviction attempted). - *

Type: Counter - *

Incremented: When deferred cleanup task successfully frees pattern after matchers closed - *

Interpretation: Normal during concurrent workloads; see deferred metrics for backlog - */ - public static final String CACHE_EVICTIONS_DEFERRED = "cache.evictions.deferred.total.count"; - - // ======================================== - // Deferred Cleanup Metrics (4) - // ======================================== - - /** - * Current number of patterns awaiting deferred cleanup. - *

Type: Gauge (count) - *

Updated: When patterns moved to deferred queue or freed - *

Interpretation: Should be low; high values indicate matchers not closed promptly - */ - public static final String CACHE_DEFERRED_PATTERNS_COUNT = "cache.deferred.patterns.current.count"; - - /** - * Peak number of patterns in deferred cleanup queue. - *

Type: Gauge (count) - *

Updated: When deferred count exceeds previous peak - *

Interpretation: High peaks indicate bursts of concurrent matcher usage - */ - public static final String CACHE_DEFERRED_PATTERNS_PEAK = "cache.deferred.patterns.peak.count"; - - /** - * Current native memory held by deferred cleanup patterns. - *

Type: Gauge (bytes) - *

Updated: When patterns added to or freed from deferred queue - *

Interpretation: Exact memory not yet reclaimed; large values indicate matcher leak risk - */ - public static final String CACHE_DEFERRED_MEMORY = "cache.deferred.native_memory.current.bytes"; - - /** - * Peak native memory held by deferred cleanup patterns. - *

Type: Gauge (bytes) - *

Updated: When deferred memory exceeds previous peak - *

Interpretation: Maximum exact memory from in-use patterns - */ - public static final String CACHE_DEFERRED_MEMORY_PEAK = "cache.deferred.native_memory.peak.bytes"; - - // ======================================== - // Resource Management Metrics (4) - // ======================================== - - /** - * Current number of active (compiled) patterns across all caches. - *

Type: Gauge (count) - *

Updated: On pattern compilation and cleanup - *

Interpretation: Should stay below maxSimultaneousCompiledPatterns limit - */ - public static final String RESOURCES_PATTERNS_ACTIVE = "resources.patterns.active.current.count"; - - /** - * Current number of active matchers. - *

Type: Gauge (count) - *

Updated: On matcher creation and close - *

Interpretation: High values indicate many concurrent matching operations - */ - public static final String RESOURCES_MATCHERS_ACTIVE = "resources.matchers.active.current.count"; - - /** - * Total patterns freed (native memory deallocated). - *

Type: Counter - *

Incremented: When pattern's native handle freed via freePattern() - *

Interpretation: Should approximately equal PATTERNS_COMPILED over time - */ - public static final String RESOURCES_PATTERNS_FREED = "resources.patterns.freed.total.count"; - - /** - * Total matchers freed. - *

Type: Counter - *

Incremented: When Matcher.close() completes - *

Interpretation: Tracks matcher lifecycle; useful for leak detection - */ - public static final String RESOURCES_MATCHERS_FREED = "resources.matchers.freed.total.count"; - - // ======================================== - // Performance Metrics - Matching - // ======================================== - // Pattern: Global metrics (ALL) + Specific breakdown (String, Bulk, Zero-Copy) - - /** - * Total matching operations (ALL - String + Bulk + Zero-Copy). - *

Type: Counter - *

Incremented: For EVERY matches() or find() call regardless of variant - *

Interpretation: Total matching workload across all API variants - *

Breakdown: Sum of MATCHING_STRING_OPERATIONS + MATCHING_BULK_OPERATIONS + MATCHING_ZERO_COPY_OPERATIONS - */ - public static final String MATCHING_OPERATIONS = "matching.operations.total.count"; - - /** - * Matching operation latency (ALL variants). - *

Type: Timer (nanoseconds) - *

Recorded: For EVERY matching operation (String, bulk, zero-copy) - *

Interpretation: Overall matching performance across all variants - */ - public static final String MATCHING_LATENCY = "matching.latency"; - - /** - * Full match operation latency (ALL variants). - *

Type: Timer (nanoseconds) - *

Recorded: For each full match (String or zero-copy) - *

Interpretation: Full match performance - */ - public static final String MATCHING_FULL_MATCH_LATENCY = "matching.full_match.latency"; - - /** - * Partial match operation latency (ALL variants). - *

Type: Timer (nanoseconds) - *

Recorded: For each partial match (String or zero-copy) - *

Interpretation: Partial match performance - */ - public static final String MATCHING_PARTIAL_MATCH_LATENCY = "matching.partial_match.latency"; - - // --- String-specific matching metrics --- - - /** - * String-based matching operations only. - *

Type: Counter - *

Incremented: For each matches(String) or find(String) call - *

Interpretation: String API usage (subset of MATCHING_OPERATIONS) - */ - public static final String MATCHING_STRING_OPERATIONS = "matching.string.operations.total.count"; - - /** - * String-based matching latency. - *

Type: Timer (nanoseconds) - *

Recorded: For each String matching operation - *

Interpretation: String API performance baseline - */ - public static final String MATCHING_STRING_LATENCY = "matching.string.latency"; - - // --- Bulk-specific matching metrics --- - - /** - * Bulk matching operations (matchAll, filter with String arrays/collections). - *

Type: Counter - *

Incremented: Once per bulk call - *

Interpretation: Bulk API usage (subset of MATCHING_OPERATIONS) - */ - public static final String MATCHING_BULK_OPERATIONS = "matching.bulk.operations.total.count"; - - /** - * Total items processed in bulk matching. - *

Type: Counter - *

Incremented: By number of items in each bulk call - *

Interpretation: Total strings processed via bulk - */ - public static final String MATCHING_BULK_ITEMS = "matching.bulk.items.total.count"; - - /** - * Bulk matching latency (per item average). - *

Type: Timer (nanoseconds per item) - *

Recorded: Average latency per item - *

Interpretation: Should be lower than single due to JNI amortization - */ - public static final String MATCHING_BULK_LATENCY = "matching.bulk.latency"; - - // --- Zero-copy specific matching metrics --- - - /** - * Zero-copy matching operations (ByteBuffer or address/length - single). - *

Type: Counter - *

Incremented: For each zero-copy single match - *

Interpretation: Zero-copy API adoption (subset of MATCHING_OPERATIONS) - */ - public static final String MATCHING_ZERO_COPY_OPERATIONS = "matching.zero_copy.operations.total.count"; - - /** - * Zero-copy matching latency. - *

Type: Timer (nanoseconds) - *

Recorded: For each zero-copy single match - *

Interpretation: Should be 46-99% faster than String - */ - public static final String MATCHING_ZERO_COPY_LATENCY = "matching.zero_copy.latency"; - - /** - * Zero-copy bulk matching operations (address/length arrays). - *

Type: Counter - *

Incremented: Once per zero-copy bulk call - *

Interpretation: Zero-copy bulk usage - */ - public static final String MATCHING_BULK_ZERO_COPY_OPERATIONS = "matching.bulk.zero_copy.operations.total.count"; - - /** - * Zero-copy bulk matching latency (per item). - *

Type: Timer (nanoseconds per item) - *

Recorded: Per-item latency for zero-copy bulk - *

Interpretation: Fastest path (bulk + zero-copy) - */ - public static final String MATCHING_BULK_ZERO_COPY_LATENCY = "matching.bulk.zero_copy.latency"; - - // ======================================== - // Performance Metrics - Capture Groups - // ======================================== - // Pattern: Global metrics (ALL) + Specific breakdown (String, Bulk, Zero-Copy) - - /** - * Total capture group operations (ALL - String + Bulk + Zero-Copy). - *

Type: Counter - *

Incremented: For EVERY match(), find(), findAll() with group extraction - *

Interpretation: Total capture workload across all variants - *

Breakdown: Sum of CAPTURE_STRING_OPERATIONS + CAPTURE_BULK_OPERATIONS + CAPTURE_ZERO_COPY_OPERATIONS - */ - public static final String CAPTURE_OPERATIONS = "capture.operations.total.count"; - - /** - * Capture group extraction latency (ALL variants). - *

Type: Timer (nanoseconds) - *

Recorded: For EVERY capture operation (String, bulk, zero-copy) - *

Interpretation: Overall capture performance across all variants - */ - public static final String CAPTURE_LATENCY = "capture.latency"; - - // --- String-specific capture metrics --- - - /** - * String-based capture operations only. - *

Type: Counter - *

Incremented: For each match(String), find(String), findAll(String) - *

Interpretation: String capture API usage (subset of CAPTURE_OPERATIONS) - */ - public static final String CAPTURE_STRING_OPERATIONS = "capture.string.operations.total.count"; - - /** - * String-based capture latency. - *

Type: Timer (nanoseconds) - *

Recorded: For each String capture operation - *

Interpretation: String capture performance baseline - */ - public static final String CAPTURE_STRING_LATENCY = "capture.string.latency"; - - // --- Bulk-specific capture metrics --- - - /** - * Bulk capture operations (extractGroupsBulk, matchAll with groups). - *

Type: Counter - *

Incremented: Once per bulk capture call - *

Interpretation: Bulk capture API usage (subset of CAPTURE_OPERATIONS) - */ - public static final String CAPTURE_BULK_OPERATIONS = "capture.bulk.operations.total.count"; - - /** - * Total items in bulk capture operations. - *

Type: Counter - *

Incremented: By number of items in each bulk capture - *

Interpretation: Total strings processed via bulk capture - */ - public static final String CAPTURE_BULK_ITEMS = "capture.bulk.items.total.count"; - - /** - * Bulk capture latency (per item average). - *

Type: Timer (nanoseconds per item) - *

Recorded: Average latency per item in bulk capture - *

Interpretation: Should be lower than single due to JNI amortization - */ - public static final String CAPTURE_BULK_LATENCY = "capture.bulk.latency"; - - // --- Zero-copy specific capture metrics --- - - /** - * Zero-copy capture operations (ByteBuffer, address/length - single). - *

Type: Counter - *

Incremented: For each zero-copy single capture - *

Interpretation: Zero-copy capture adoption (subset of CAPTURE_OPERATIONS) - */ - public static final String CAPTURE_ZERO_COPY_OPERATIONS = "capture.zero_copy.operations.total.count"; - - /** - * Zero-copy capture latency. - *

Type: Timer (nanoseconds) - *

Recorded: For each zero-copy capture - *

Interpretation: Should be 46-99% faster than String - */ - public static final String CAPTURE_ZERO_COPY_LATENCY = "capture.zero_copy.latency"; - - /** - * Zero-copy bulk capture operations. - *

Type: Counter - *

Incremented: Once per zero-copy bulk capture call - *

Interpretation: Zero-copy bulk capture usage - */ - public static final String CAPTURE_BULK_ZERO_COPY_OPERATIONS = "capture.bulk.zero_copy.operations.total.count"; - - /** - * Zero-copy bulk capture latency (per item). - *

Type: Timer (nanoseconds per item) - *

Recorded: Per-item latency for zero-copy bulk capture - *

Interpretation: Fastest capture path - */ - public static final String CAPTURE_BULK_ZERO_COPY_LATENCY = "capture.bulk.zero_copy.latency"; - - /** - * Total matches found by findAll operations (ALL variants). - *

Type: Counter - *

Incremented: By number of matches found in each findAll() - *

Interpretation: Total matches extracted across all findAll calls - */ - public static final String CAPTURE_FINDALL_MATCHES = "capture.findall.matches.total.count"; - - // ======================================== - // Performance Metrics - Replace - // ======================================== - // Pattern: Global metrics (ALL) + Specific breakdown (String, Bulk, Zero-Copy) - - /** - * Total replace operations (ALL - String + Bulk + Zero-Copy). - *

Type: Counter - *

Incremented: For EVERY replaceFirst(), replaceAll() regardless of variant - *

Interpretation: Total replace workload across all variants - *

Breakdown: Sum of REPLACE_STRING_OPERATIONS + REPLACE_BULK_OPERATIONS + REPLACE_ZERO_COPY_OPERATIONS - */ - public static final String REPLACE_OPERATIONS = "replace.operations.total.count"; - - /** - * Replace operation latency (ALL variants). - *

Type: Timer (nanoseconds) - *

Recorded: For EVERY replace operation (String, bulk, zero-copy) - *

Interpretation: Overall replace performance across all variants - */ - public static final String REPLACE_LATENCY = "replace.latency"; - - // --- String-specific replace metrics --- - - /** - * String-based replace operations only. - *

Type: Counter - *

Incremented: For each replaceFirst(String) or replaceAll(String) - *

Interpretation: String replace API usage (subset of REPLACE_OPERATIONS) - */ - public static final String REPLACE_STRING_OPERATIONS = "replace.string.operations.total.count"; - - /** - * String-based replace latency. - *

Type: Timer (nanoseconds) - *

Recorded: For each String replace operation - *

Interpretation: String replace performance baseline - */ - public static final String REPLACE_STRING_LATENCY = "replace.string.latency"; - - // --- Bulk-specific replace metrics --- - - /** - * Bulk replace operations (replaceAll with arrays/collections). - *

Type: Counter - *

Incremented: Once per bulk replace call - *

Interpretation: Bulk replace API usage (subset of REPLACE_OPERATIONS) - */ - public static final String REPLACE_BULK_OPERATIONS = "replace.bulk.operations.total.count"; - - /** - * Total items in bulk replace operations. - *

Type: Counter - *

Incremented: By number of items in each bulk replace - *

Interpretation: Total strings processed via bulk replace - */ - public static final String REPLACE_BULK_ITEMS = "replace.bulk.items.total.count"; - - /** - * Bulk replace latency (per item average). - *

Type: Timer (nanoseconds per item) - *

Recorded: Average latency per item in bulk replace - *

Interpretation: Should be lower than single due to JNI amortization - */ - public static final String REPLACE_BULK_LATENCY = "replace.bulk.latency"; - - // --- Zero-copy specific replace metrics --- - - /** - * Zero-copy replace operations (ByteBuffer, address/length - single). - *

Type: Counter - *

Incremented: For each zero-copy single replace - *

Interpretation: Zero-copy replace adoption (subset of REPLACE_OPERATIONS) - */ - public static final String REPLACE_ZERO_COPY_OPERATIONS = "replace.zero_copy.operations.total.count"; - - /** - * Zero-copy replace latency. - *

Type: Timer (nanoseconds) - *

Recorded: For each zero-copy replace - *

Interpretation: Should be 46-99% faster than String - */ - public static final String REPLACE_ZERO_COPY_LATENCY = "replace.zero_copy.latency"; - - /** - * Zero-copy bulk replace operations. - *

Type: Counter - *

Incremented: Once per zero-copy bulk replace call - *

Interpretation: Zero-copy bulk replace usage - */ - public static final String REPLACE_BULK_ZERO_COPY_OPERATIONS = "replace.bulk.zero_copy.operations.total.count"; - - /** - * Number of items processed in zero-copy bulk replace operations. - *

Type: Counter (items) - *

Recorded: Count of individual buffers/addresses processed in bulk zero-copy replace - *

Interpretation: Total items in all REPLACE_BULK_ZERO_COPY_OPERATIONS calls - */ - public static final String REPLACE_BULK_ZERO_COPY_ITEMS = "replace.bulk.zero_copy.items.total.count"; - - /** - * Zero-copy bulk replace latency (per item). - *

Type: Timer (nanoseconds per item) - *

Recorded: Per-item latency for zero-copy bulk replace - *

Interpretation: Fastest replace path - */ - public static final String REPLACE_BULK_ZERO_COPY_LATENCY = "replace.bulk.zero_copy.latency"; - - // ======================================== - // Error Metrics (3) - // ======================================== - - /** - * Pattern compilation failures (invalid regex syntax). - *

Type: Counter - *

Incremented: When RE2 rejects pattern as invalid - *

Interpretation: User error (bad regex); check logs for pattern details - */ - public static final String ERRORS_COMPILATION_FAILED = "errors.compilation.failed.total.count"; - - /** - * Native library load failures. - *

Type: Counter - *

Incremented: When RE2 native library fails to load at startup - *

Interpretation: Critical error; check platform detection and library bundle - */ - public static final String ERRORS_NATIVE_LIBRARY = "errors.native_library.total.count"; - - /** - * Resource limit exceeded (too many patterns or matchers). - *

Type: Counter - *

Incremented: When maxSimultaneousCompiledPatterns exceeded - *

Interpretation: Safety limit hit; increase limit or reduce concurrency - */ - public static final String ERRORS_RESOURCE_EXHAUSTED = "errors.resource.exhausted.total.count"; + private MetricNames() {} + + // ======================================== + // Pattern Compilation Metrics (5) + // ======================================== + + /** + * Total patterns compiled (cache misses). + * + *

Type: Counter + * + *

Incremented: Each time a pattern is compiled via native JNI + * + *

Interpretation: High values indicate poor cache hit rate or many unique patterns + */ + public static final String PATTERNS_COMPILED = "patterns.compiled.total.count"; + + /** + * Total cache hits (pattern found in cache). + * + *

Type: Counter + * + *

Incremented: When Pattern.compile() finds pattern already cached + * + *

Interpretation: High hit rate (hits / (hits + misses)) indicates effective caching + */ + public static final String PATTERNS_CACHE_HITS = "patterns.cache.hits.total.count"; + + /** + * Total cache misses (pattern not in cache, compilation required). + * + *

Type: Counter + * + *

Incremented: When Pattern.compile() must compile new pattern + * + *

Interpretation: Equal to PATTERNS_COMPILED; compare to hits for hit rate + */ + public static final String PATTERNS_CACHE_MISSES = "patterns.cache.misses.total.count"; + + /** + * Pattern compilation latency histogram. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each successful pattern compilation (native JNI call) + * + *

Provides: min, max, mean, p50, p75, p95, p98, p99, p99.9, rates (1m, 5m, 15m) + * + *

Interpretation: High latencies indicate complex regex patterns or platform issues + */ + public static final String PATTERNS_COMPILATION_LATENCY = "patterns.compilation.latency"; + + /** + * Patterns recompiled after cache validation detected corruption. + * + *

Type: Counter + * + *

Incremented: When cached pattern fails validation and is recompiled + * + *

Interpretation: Should be zero; non-zero indicates serious native memory corruption + */ + public static final String PATTERNS_INVALID_RECOMPILED = + "patterns.invalid.recompiled.total.count"; + + // ======================================== + // Cache State Metrics (3) + // ======================================== + + /** + * Current number of patterns in cache. + * + *

Type: Gauge (count) + * + *

Updated: On cache insertions and evictions + * + *

Interpretation: Should stay below configured maxCacheSize; sudden drops indicate + * evictions + */ + public static final String CACHE_PATTERNS_COUNT = "cache.patterns.current.count"; + + /** + * Current native memory used by cached patterns. + * + *

Type: Gauge (bytes) + * + *

Updated: On cache insertions and evictions + * + *

Interpretation: Exact off-heap memory usage (reported by RE2 native library) + */ + public static final String CACHE_NATIVE_MEMORY = "cache.native_memory.current.bytes"; + + /** + * Peak native memory used by cached patterns (high water mark). + * + *

Type: Gauge (bytes) + * + *

Updated: When current memory exceeds previous peak + * + *

Interpretation: Maximum exact memory usage; helps size cache limits + */ + public static final String CACHE_NATIVE_MEMORY_PEAK = "cache.native_memory.peak.bytes"; + + // ======================================== + // Cache Eviction Metrics (3) + // ======================================== + + /** + * Patterns evicted due to LRU cache overflow. + * + *

Type: Counter + * + *

Incremented: When cache exceeds maxCacheSize and LRU pattern evicted + * + *

Interpretation: High values indicate cache too small or working set exceeds limit + */ + public static final String CACHE_EVICTIONS_LRU = "cache.evictions.lru.total.count"; + + /** + * Patterns evicted due to idle timeout. + * + *

Type: Counter + * + *

Incremented: When background task evicts pattern unused for idleTimeoutSeconds + * + *

Interpretation: High values indicate many patterns accessed once then abandoned + */ + public static final String CACHE_EVICTIONS_IDLE = "cache.evictions.idle.total.count"; + + /** + * Patterns freed from deferred cleanup queue (were in use when eviction attempted). + * + *

Type: Counter + * + *

Incremented: When deferred cleanup task successfully frees pattern after matchers + * closed + * + *

Interpretation: Normal during concurrent workloads; see deferred metrics for backlog + */ + public static final String CACHE_EVICTIONS_DEFERRED = "cache.evictions.deferred.total.count"; + + // ======================================== + // Deferred Cleanup Metrics (4) + // ======================================== + + /** + * Current number of patterns awaiting deferred cleanup. + * + *

Type: Gauge (count) + * + *

Updated: When patterns moved to deferred queue or freed + * + *

Interpretation: Should be low; high values indicate matchers not closed promptly + */ + public static final String CACHE_DEFERRED_PATTERNS_COUNT = + "cache.deferred.patterns.current.count"; + + /** + * Peak number of patterns in deferred cleanup queue. + * + *

Type: Gauge (count) + * + *

Updated: When deferred count exceeds previous peak + * + *

Interpretation: High peaks indicate bursts of concurrent matcher usage + */ + public static final String CACHE_DEFERRED_PATTERNS_PEAK = "cache.deferred.patterns.peak.count"; + + /** + * Current native memory held by deferred cleanup patterns. + * + *

Type: Gauge (bytes) + * + *

Updated: When patterns added to or freed from deferred queue + * + *

Interpretation: Exact memory not yet reclaimed; large values indicate matcher leak + * risk + */ + public static final String CACHE_DEFERRED_MEMORY = "cache.deferred.native_memory.current.bytes"; + + /** + * Peak native memory held by deferred cleanup patterns. + * + *

Type: Gauge (bytes) + * + *

Updated: When deferred memory exceeds previous peak + * + *

Interpretation: Maximum exact memory from in-use patterns + */ + public static final String CACHE_DEFERRED_MEMORY_PEAK = "cache.deferred.native_memory.peak.bytes"; + + // ======================================== + // Resource Management Metrics (4) + // ======================================== + + /** + * Current number of active (compiled) patterns across all caches. + * + *

Type: Gauge (count) + * + *

Updated: On pattern compilation and cleanup + * + *

Interpretation: Should stay below maxSimultaneousCompiledPatterns limit + */ + public static final String RESOURCES_PATTERNS_ACTIVE = "resources.patterns.active.current.count"; + + /** + * Current number of active matchers. + * + *

Type: Gauge (count) + * + *

Updated: On matcher creation and close + * + *

Interpretation: High values indicate many concurrent matching operations + */ + public static final String RESOURCES_MATCHERS_ACTIVE = "resources.matchers.active.current.count"; + + /** + * Total patterns freed (native memory deallocated). + * + *

Type: Counter + * + *

Incremented: When pattern's native handle freed via freePattern() + * + *

Interpretation: Should approximately equal PATTERNS_COMPILED over time + */ + public static final String RESOURCES_PATTERNS_FREED = "resources.patterns.freed.total.count"; + + /** + * Total matchers freed. + * + *

Type: Counter + * + *

Incremented: When Matcher.close() completes + * + *

Interpretation: Tracks matcher lifecycle; useful for leak detection + */ + public static final String RESOURCES_MATCHERS_FREED = "resources.matchers.freed.total.count"; + + // ======================================== + // Performance Metrics - Matching + // ======================================== + // Pattern: Global metrics (ALL) + Specific breakdown (String, Bulk, Zero-Copy) + + /** + * Total matching operations (ALL - String + Bulk + Zero-Copy). + * + *

Type: Counter + * + *

Incremented: For EVERY matches() or find() call regardless of variant + * + *

Interpretation: Total matching workload across all API variants + * + *

Breakdown: Sum of MATCHING_STRING_OPERATIONS + MATCHING_BULK_OPERATIONS + + * MATCHING_ZERO_COPY_OPERATIONS + */ + public static final String MATCHING_OPERATIONS = "matching.operations.total.count"; + + /** + * Matching operation latency (ALL variants). + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For EVERY matching operation (String, bulk, zero-copy) + * + *

Interpretation: Overall matching performance across all variants + */ + public static final String MATCHING_LATENCY = "matching.latency"; + + /** + * Full match operation latency (ALL variants). + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each full match (String or zero-copy) + * + *

Interpretation: Full match performance + */ + public static final String MATCHING_FULL_MATCH_LATENCY = "matching.full_match.latency"; + + /** + * Partial match operation latency (ALL variants). + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each partial match (String or zero-copy) + * + *

Interpretation: Partial match performance + */ + public static final String MATCHING_PARTIAL_MATCH_LATENCY = "matching.partial_match.latency"; + + // --- String-specific matching metrics --- + + /** + * String-based matching operations only. + * + *

Type: Counter + * + *

Incremented: For each matches(String) or find(String) call + * + *

Interpretation: String API usage (subset of MATCHING_OPERATIONS) + */ + public static final String MATCHING_STRING_OPERATIONS = "matching.string.operations.total.count"; + + /** + * String-based matching latency. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each String matching operation + * + *

Interpretation: String API performance baseline + */ + public static final String MATCHING_STRING_LATENCY = "matching.string.latency"; + + // --- Bulk-specific matching metrics --- + + /** + * Bulk matching operations (matchAll, filter with String arrays/collections). + * + *

Type: Counter + * + *

Incremented: Once per bulk call + * + *

Interpretation: Bulk API usage (subset of MATCHING_OPERATIONS) + */ + public static final String MATCHING_BULK_OPERATIONS = "matching.bulk.operations.total.count"; + + /** + * Total items processed in bulk matching. + * + *

Type: Counter + * + *

Incremented: By number of items in each bulk call + * + *

Interpretation: Total strings processed via bulk + */ + public static final String MATCHING_BULK_ITEMS = "matching.bulk.items.total.count"; + + /** + * Bulk matching latency (per item average). + * + *

Type: Timer (nanoseconds per item) + * + *

Recorded: Average latency per item + * + *

Interpretation: Should be lower than single due to JNI amortization + */ + public static final String MATCHING_BULK_LATENCY = "matching.bulk.latency"; + + // --- Zero-copy specific matching metrics --- + + /** + * Zero-copy matching operations (ByteBuffer or address/length - single). + * + *

Type: Counter + * + *

Incremented: For each zero-copy single match + * + *

Interpretation: Zero-copy API adoption (subset of MATCHING_OPERATIONS) + */ + public static final String MATCHING_ZERO_COPY_OPERATIONS = + "matching.zero_copy.operations.total.count"; + + /** + * Zero-copy matching latency. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each zero-copy single match + * + *

Interpretation: Should be 46-99% faster than String + */ + public static final String MATCHING_ZERO_COPY_LATENCY = "matching.zero_copy.latency"; + + /** + * Zero-copy bulk matching operations (address/length arrays). + * + *

Type: Counter + * + *

Incremented: Once per zero-copy bulk call + * + *

Interpretation: Zero-copy bulk usage + */ + public static final String MATCHING_BULK_ZERO_COPY_OPERATIONS = + "matching.bulk.zero_copy.operations.total.count"; + + /** + * Zero-copy bulk matching latency (per item). + * + *

Type: Timer (nanoseconds per item) + * + *

Recorded: Per-item latency for zero-copy bulk + * + *

Interpretation: Fastest path (bulk + zero-copy) + */ + public static final String MATCHING_BULK_ZERO_COPY_LATENCY = "matching.bulk.zero_copy.latency"; + + // ======================================== + // Performance Metrics - Capture Groups + // ======================================== + // Pattern: Global metrics (ALL) + Specific breakdown (String, Bulk, Zero-Copy) + + /** + * Total capture group operations (ALL - String + Bulk + Zero-Copy). + * + *

Type: Counter + * + *

Incremented: For EVERY match(), find(), findAll() with group extraction + * + *

Interpretation: Total capture workload across all variants + * + *

Breakdown: Sum of CAPTURE_STRING_OPERATIONS + CAPTURE_BULK_OPERATIONS + + * CAPTURE_ZERO_COPY_OPERATIONS + */ + public static final String CAPTURE_OPERATIONS = "capture.operations.total.count"; + + /** + * Capture group extraction latency (ALL variants). + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For EVERY capture operation (String, bulk, zero-copy) + * + *

Interpretation: Overall capture performance across all variants + */ + public static final String CAPTURE_LATENCY = "capture.latency"; + + // --- String-specific capture metrics --- + + /** + * String-based capture operations only. + * + *

Type: Counter + * + *

Incremented: For each match(String), find(String), findAll(String) + * + *

Interpretation: String capture API usage (subset of CAPTURE_OPERATIONS) + */ + public static final String CAPTURE_STRING_OPERATIONS = "capture.string.operations.total.count"; + + /** + * String-based capture latency. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each String capture operation + * + *

Interpretation: String capture performance baseline + */ + public static final String CAPTURE_STRING_LATENCY = "capture.string.latency"; + + // --- Bulk-specific capture metrics --- + + /** + * Bulk capture operations (extractGroupsBulk, matchAll with groups). + * + *

Type: Counter + * + *

Incremented: Once per bulk capture call + * + *

Interpretation: Bulk capture API usage (subset of CAPTURE_OPERATIONS) + */ + public static final String CAPTURE_BULK_OPERATIONS = "capture.bulk.operations.total.count"; + + /** + * Total items in bulk capture operations. + * + *

Type: Counter + * + *

Incremented: By number of items in each bulk capture + * + *

Interpretation: Total strings processed via bulk capture + */ + public static final String CAPTURE_BULK_ITEMS = "capture.bulk.items.total.count"; + + /** + * Bulk capture latency (per item average). + * + *

Type: Timer (nanoseconds per item) + * + *

Recorded: Average latency per item in bulk capture + * + *

Interpretation: Should be lower than single due to JNI amortization + */ + public static final String CAPTURE_BULK_LATENCY = "capture.bulk.latency"; + + // --- Zero-copy specific capture metrics --- + + /** + * Zero-copy capture operations (ByteBuffer, address/length - single). + * + *

Type: Counter + * + *

Incremented: For each zero-copy single capture + * + *

Interpretation: Zero-copy capture adoption (subset of CAPTURE_OPERATIONS) + */ + public static final String CAPTURE_ZERO_COPY_OPERATIONS = + "capture.zero_copy.operations.total.count"; + + /** + * Zero-copy capture latency. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each zero-copy capture + * + *

Interpretation: Should be 46-99% faster than String + */ + public static final String CAPTURE_ZERO_COPY_LATENCY = "capture.zero_copy.latency"; + + /** + * Zero-copy bulk capture operations. + * + *

Type: Counter + * + *

Incremented: Once per zero-copy bulk capture call + * + *

Interpretation: Zero-copy bulk capture usage + */ + public static final String CAPTURE_BULK_ZERO_COPY_OPERATIONS = + "capture.bulk.zero_copy.operations.total.count"; + + /** + * Zero-copy bulk capture latency (per item). + * + *

Type: Timer (nanoseconds per item) + * + *

Recorded: Per-item latency for zero-copy bulk capture + * + *

Interpretation: Fastest capture path + */ + public static final String CAPTURE_BULK_ZERO_COPY_LATENCY = "capture.bulk.zero_copy.latency"; + + /** + * Total matches found by findAll operations (ALL variants). + * + *

Type: Counter + * + *

Incremented: By number of matches found in each findAll() + * + *

Interpretation: Total matches extracted across all findAll calls + */ + public static final String CAPTURE_FINDALL_MATCHES = "capture.findall.matches.total.count"; + + // ======================================== + // Performance Metrics - Replace + // ======================================== + // Pattern: Global metrics (ALL) + Specific breakdown (String, Bulk, Zero-Copy) + + /** + * Total replace operations (ALL - String + Bulk + Zero-Copy). + * + *

Type: Counter + * + *

Incremented: For EVERY replaceFirst(), replaceAll() regardless of variant + * + *

Interpretation: Total replace workload across all variants + * + *

Breakdown: Sum of REPLACE_STRING_OPERATIONS + REPLACE_BULK_OPERATIONS + + * REPLACE_ZERO_COPY_OPERATIONS + */ + public static final String REPLACE_OPERATIONS = "replace.operations.total.count"; + + /** + * Replace operation latency (ALL variants). + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For EVERY replace operation (String, bulk, zero-copy) + * + *

Interpretation: Overall replace performance across all variants + */ + public static final String REPLACE_LATENCY = "replace.latency"; + + // --- String-specific replace metrics --- + + /** + * String-based replace operations only. + * + *

Type: Counter + * + *

Incremented: For each replaceFirst(String) or replaceAll(String) + * + *

Interpretation: String replace API usage (subset of REPLACE_OPERATIONS) + */ + public static final String REPLACE_STRING_OPERATIONS = "replace.string.operations.total.count"; + + /** + * String-based replace latency. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each String replace operation + * + *

Interpretation: String replace performance baseline + */ + public static final String REPLACE_STRING_LATENCY = "replace.string.latency"; + + // --- Bulk-specific replace metrics --- + + /** + * Bulk replace operations (replaceAll with arrays/collections). + * + *

Type: Counter + * + *

Incremented: Once per bulk replace call + * + *

Interpretation: Bulk replace API usage (subset of REPLACE_OPERATIONS) + */ + public static final String REPLACE_BULK_OPERATIONS = "replace.bulk.operations.total.count"; + + /** + * Total items in bulk replace operations. + * + *

Type: Counter + * + *

Incremented: By number of items in each bulk replace + * + *

Interpretation: Total strings processed via bulk replace + */ + public static final String REPLACE_BULK_ITEMS = "replace.bulk.items.total.count"; + + /** + * Bulk replace latency (per item average). + * + *

Type: Timer (nanoseconds per item) + * + *

Recorded: Average latency per item in bulk replace + * + *

Interpretation: Should be lower than single due to JNI amortization + */ + public static final String REPLACE_BULK_LATENCY = "replace.bulk.latency"; + + // --- Zero-copy specific replace metrics --- + + /** + * Zero-copy replace operations (ByteBuffer, address/length - single). + * + *

Type: Counter + * + *

Incremented: For each zero-copy single replace + * + *

Interpretation: Zero-copy replace adoption (subset of REPLACE_OPERATIONS) + */ + public static final String REPLACE_ZERO_COPY_OPERATIONS = + "replace.zero_copy.operations.total.count"; + + /** + * Zero-copy replace latency. + * + *

Type: Timer (nanoseconds) + * + *

Recorded: For each zero-copy replace + * + *

Interpretation: Should be 46-99% faster than String + */ + public static final String REPLACE_ZERO_COPY_LATENCY = "replace.zero_copy.latency"; + + /** + * Zero-copy bulk replace operations. + * + *

Type: Counter + * + *

Incremented: Once per zero-copy bulk replace call + * + *

Interpretation: Zero-copy bulk replace usage + */ + public static final String REPLACE_BULK_ZERO_COPY_OPERATIONS = + "replace.bulk.zero_copy.operations.total.count"; + + /** + * Number of items processed in zero-copy bulk replace operations. + * + *

Type: Counter (items) + * + *

Recorded: Count of individual buffers/addresses processed in bulk zero-copy replace + * + *

Interpretation: Total items in all REPLACE_BULK_ZERO_COPY_OPERATIONS calls + */ + public static final String REPLACE_BULK_ZERO_COPY_ITEMS = + "replace.bulk.zero_copy.items.total.count"; + + /** + * Zero-copy bulk replace latency (per item). + * + *

Type: Timer (nanoseconds per item) + * + *

Recorded: Per-item latency for zero-copy bulk replace + * + *

Interpretation: Fastest replace path + */ + public static final String REPLACE_BULK_ZERO_COPY_LATENCY = "replace.bulk.zero_copy.latency"; + + // ======================================== + // Error Metrics (3) + // ======================================== + + /** + * Pattern compilation failures (invalid regex syntax). + * + *

Type: Counter + * + *

Incremented: When RE2 rejects pattern as invalid + * + *

Interpretation: User error (bad regex); check logs for pattern details + */ + public static final String ERRORS_COMPILATION_FAILED = "errors.compilation.failed.total.count"; + + /** + * Native library load failures. + * + *

Type: Counter + * + *

Incremented: When RE2 native library fails to load at startup + * + *

Interpretation: Critical error; check platform detection and library bundle + */ + public static final String ERRORS_NATIVE_LIBRARY = "errors.native_library.total.count"; + + /** + * Resource limit exceeded (too many patterns or matchers). + * + *

Type: Counter + * + *

Incremented: When maxSimultaneousCompiledPatterns exceeded + * + *

Interpretation: Safety limit hit; increase limit or reduce concurrency + */ + public static final String ERRORS_RESOURCE_EXHAUSTED = "errors.resource.exhausted.total.count"; } diff --git a/libre2-core/src/main/java/com/axonops/libre2/metrics/NoOpMetricsRegistry.java b/libre2-core/src/main/java/com/axonops/libre2/metrics/NoOpMetricsRegistry.java index b0ecbd9..0c31773 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/metrics/NoOpMetricsRegistry.java +++ b/libre2-core/src/main/java/com/axonops/libre2/metrics/NoOpMetricsRegistry.java @@ -21,8 +21,8 @@ /** * No-op metrics registry implementation. * - *

Used when metrics are disabled (default). All methods are empty - * and will be inlined by the JIT compiler, resulting in zero overhead. + *

Used when metrics are disabled (default). All methods are empty and will be inlined by the JIT + * compiler, resulting in zero overhead. * *

This is the default implementation in {@link com.axonops.libre2.cache.RE2Config#DEFAULT}. * @@ -30,37 +30,35 @@ */ public final class NoOpMetricsRegistry implements RE2MetricsRegistry { - /** - * Singleton instance - use this instead of creating new instances. - */ - public static final NoOpMetricsRegistry INSTANCE = new NoOpMetricsRegistry(); + /** Singleton instance - use this instead of creating new instances. */ + public static final NoOpMetricsRegistry INSTANCE = new NoOpMetricsRegistry(); - private NoOpMetricsRegistry() { - // Singleton - use INSTANCE - } + private NoOpMetricsRegistry() { + // Singleton - use INSTANCE + } - @Override - public void incrementCounter(String name) { - // No-op - } + @Override + public void incrementCounter(String name) { + // No-op + } - @Override - public void incrementCounter(String name, long delta) { - // No-op - } + @Override + public void incrementCounter(String name, long delta) { + // No-op + } - @Override - public void recordTimer(String name, long durationNanos) { - // No-op - } + @Override + public void recordTimer(String name, long durationNanos) { + // No-op + } - @Override - public void registerGauge(String name, Supplier valueSupplier) { - // No-op - } + @Override + public void registerGauge(String name, Supplier valueSupplier) { + // No-op + } - @Override - public void removeGauge(String name) { - // No-op - } + @Override + public void removeGauge(String name) { + // No-op + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/metrics/RE2MetricsRegistry.java b/libre2-core/src/main/java/com/axonops/libre2/metrics/RE2MetricsRegistry.java index a0a16af..bed4d72 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/metrics/RE2MetricsRegistry.java +++ b/libre2-core/src/main/java/com/axonops/libre2/metrics/RE2MetricsRegistry.java @@ -21,14 +21,15 @@ /** * Abstract metrics registry interface for libre2-java. * - *

Allows the library to work with or without Dropwizard Metrics dependency. - * Implementations can use Dropwizard Metrics, custom metrics systems, or no-op. + *

Allows the library to work with or without Dropwizard Metrics dependency. Implementations can + * use Dropwizard Metrics, custom metrics systems, or no-op. * *

Metric Types (following Dropwizard patterns): + * *

    - *
  • Counter: Atomic long counter (incrementing values)
  • - *
  • Timer: Measures duration in nanoseconds with histogram
  • - *
  • Gauge: Instantaneous value computed on-demand via supplier
  • + *
  • Counter: Atomic long counter (incrementing values) + *
  • Timer: Measures duration in nanoseconds with histogram + *
  • Gauge: Instantaneous value computed on-demand via supplier *
* *

Thread Safety: All implementations must be thread-safe. @@ -37,58 +38,58 @@ */ public interface RE2MetricsRegistry { - /** - * Increment a counter by 1. - * - *

Thread-safe: Multiple threads can increment the same counter concurrently. - * - * @param name metric name (e.g., "patterns.compiled") - */ - void incrementCounter(String name); + /** + * Increment a counter by 1. + * + *

Thread-safe: Multiple threads can increment the same counter concurrently. + * + * @param name metric name (e.g., "patterns.compiled") + */ + void incrementCounter(String name); - /** - * Increment a counter by a specific delta. - * - *

Thread-safe: Multiple threads can increment the same counter concurrently. - * - * @param name metric name (e.g., "cache.evictions_lru") - * @param delta amount to increment (must be non-negative) - */ - void incrementCounter(String name, long delta); + /** + * Increment a counter by a specific delta. + * + *

Thread-safe: Multiple threads can increment the same counter concurrently. + * + * @param name metric name (e.g., "cache.evictions_lru") + * @param delta amount to increment (must be non-negative) + */ + void incrementCounter(String name, long delta); - /** - * Record a timer measurement in nanoseconds. - * - *

Timers maintain histograms of duration measurements, allowing - * calculation of percentiles (P50, P99, etc.). - * - *

Thread-safe: Multiple threads can record to the same timer concurrently. - * - * @param name metric name (e.g., "patterns.compilation_time") - * @param durationNanos duration in nanoseconds - */ - void recordTimer(String name, long durationNanos); + /** + * Record a timer measurement in nanoseconds. + * + *

Timers maintain histograms of duration measurements, allowing calculation of percentiles + * (P50, P99, etc.). + * + *

Thread-safe: Multiple threads can record to the same timer concurrently. + * + * @param name metric name (e.g., "patterns.compilation_time") + * @param durationNanos duration in nanoseconds + */ + void recordTimer(String name, long durationNanos); - /** - * Register a gauge that computes its value on-demand. - * - *

The supplier will be called each time the gauge is read (e.g., via JMX). - * The supplier should be fast and not block. - * - *

If a gauge with this name already exists, it should be replaced. - * - * @param name metric name (e.g., "cache.size") - * @param valueSupplier function that returns the current value - */ - void registerGauge(String name, Supplier valueSupplier); + /** + * Register a gauge that computes its value on-demand. + * + *

The supplier will be called each time the gauge is read (e.g., via JMX). The supplier should + * be fast and not block. + * + *

If a gauge with this name already exists, it should be replaced. + * + * @param name metric name (e.g., "cache.size") + * @param valueSupplier function that returns the current value + */ + void registerGauge(String name, Supplier valueSupplier); - /** - * Remove a previously registered gauge. - * - *

Used during cleanup to prevent memory leaks. - * If no gauge exists with this name, this is a no-op. - * - * @param name metric name to remove - */ - void removeGauge(String name); + /** + * Remove a previously registered gauge. + * + *

Used during cleanup to prevent memory leaks. If no gauge exists with this name, this is a + * no-op. + * + * @param name metric name to remove + */ + void removeGauge(String name); } diff --git a/libre2-core/src/main/java/com/axonops/libre2/util/PatternHasher.java b/libre2-core/src/main/java/com/axonops/libre2/util/PatternHasher.java index d790a20..aefc010 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/util/PatternHasher.java +++ b/libre2-core/src/main/java/com/axonops/libre2/util/PatternHasher.java @@ -20,10 +20,11 @@ * Utility for hashing pattern strings for logging purposes. * *

Pattern hashing provides privacy and readability in logs: + * *

    - *
  • Privacy: Don't log potentially sensitive regex patterns
  • - *
  • Readability: Logs aren't cluttered with long pattern strings
  • - *
  • Debuggability: Same pattern always gets same hash, easy to grep/trace
  • + *
  • Privacy: Don't log potentially sensitive regex patterns + *
  • Readability: Logs aren't cluttered with long pattern strings + *
  • Debuggability: Same pattern always gets same hash, easy to grep/trace *
* *

Example: Pattern ".*ERROR.*DATABASE.*" → hash "7a3f2b1c" @@ -32,34 +33,34 @@ */ public final class PatternHasher { - private PatternHasher() { - // Utility class - } + private PatternHasher() { + // Utility class + } - /** - * Creates a compact hex hash of a pattern string for logging. - * - *

Uses {@link String#hashCode()} for consistency and simplicity. - * The hash is deterministic - same pattern always produces same hash. - * - * @param pattern the regex pattern string - * @return 8-character hex string (e.g., "7a3f2b1c") - */ - public static String hash(String pattern) { - if (pattern == null) { - return "null"; - } - return Integer.toHexString(pattern.hashCode()); + /** + * Creates a compact hex hash of a pattern string for logging. + * + *

Uses {@link String#hashCode()} for consistency and simplicity. The hash is deterministic - + * same pattern always produces same hash. + * + * @param pattern the regex pattern string + * @return 8-character hex string (e.g., "7a3f2b1c") + */ + public static String hash(String pattern) { + if (pattern == null) { + return "null"; } + return Integer.toHexString(pattern.hashCode()); + } - /** - * Creates a hash with additional context for case sensitivity. - * - * @param pattern the regex pattern string - * @param caseSensitive whether the pattern is case-sensitive - * @return hash with case sensitivity indicator (e.g., "7a3f2b1c[CS]" or "7a3f2b1c[CI]") - */ - public static String hashWithCase(String pattern, boolean caseSensitive) { - return hash(pattern) + (caseSensitive ? "[CS]" : "[CI]"); - } + /** + * Creates a hash with additional context for case sensitivity. + * + * @param pattern the regex pattern string + * @param caseSensitive whether the pattern is case-sensitive + * @return hash with case sensitivity indicator (e.g., "7a3f2b1c[CS]" or "7a3f2b1c[CI]") + */ + public static String hashWithCase(String pattern, boolean caseSensitive) { + return hash(pattern) + (caseSensitive ? "[CS]" : "[CI]"); + } } diff --git a/libre2-core/src/main/java/com/axonops/libre2/util/ResourceTracker.java b/libre2-core/src/main/java/com/axonops/libre2/util/ResourceTracker.java index 9ff4ad3..c1fd8ce 100644 --- a/libre2-core/src/main/java/com/axonops/libre2/util/ResourceTracker.java +++ b/libre2-core/src/main/java/com/axonops/libre2/util/ResourceTracker.java @@ -16,226 +16,213 @@ package com.axonops.libre2.util; -import com.axonops.libre2.api.Pattern; -import com.axonops.libre2.metrics.RE2MetricsRegistry; import com.axonops.libre2.metrics.MetricNames; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - +import com.axonops.libre2.metrics.RE2MetricsRegistry; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Tracks pattern resource usage for enforcing limits and monitoring. * - * CRITICAL: Tracks ACTIVE (simultaneous) patterns, not cumulative total. - * Instance-level (per-cache) to avoid conflicts when multiple caches exist. + *

CRITICAL: Tracks ACTIVE (simultaneous) patterns, not cumulative total. Instance-level + * (per-cache) to avoid conflicts when multiple caches exist. * * @since 1.0.0 */ public final class ResourceTracker { - private final Logger logger = LoggerFactory.getLogger(ResourceTracker.class); - - // ACTIVE counts (current simultaneous) - private final AtomicInteger activePatternsCount = new AtomicInteger(0); - private final AtomicInteger activeMatchersCount = new AtomicInteger(0); - - // Cumulative counters (lifetime, for metrics only) - private final AtomicLong totalPatternsCompiled = new AtomicLong(0); - private final AtomicLong totalPatternsClosed = new AtomicLong(0); - private final AtomicLong totalMatchersCreated = new AtomicLong(0); - private final AtomicLong totalMatchersClosed = new AtomicLong(0); - - // Rejection counters - private final AtomicLong patternLimitRejections = new AtomicLong(0); - private final AtomicLong matcherLimitRejections = new AtomicLong(0); - - public ResourceTracker() { - // Instance per cache - } - - /** - * Tracks a new pattern allocation (called when pattern compiled). - * - * @param maxSimultaneous maximum allowed simultaneous patterns - * @param metricsRegistry optional metrics registry to record errors - * @throws ResourceException if simultaneous limit exceeded - */ - public void trackPatternAllocated(int maxSimultaneous, RE2MetricsRegistry metricsRegistry) { - int current = activePatternsCount.incrementAndGet(); - totalPatternsCompiled.incrementAndGet(); - - if (current > maxSimultaneous) { - activePatternsCount.decrementAndGet(); // Roll back - patternLimitRejections.incrementAndGet(); - - // Record resource exhausted error - if (metricsRegistry != null) { - metricsRegistry.incrementCounter(MetricNames.ERRORS_RESOURCE_EXHAUSTED); - } - - throw new com.axonops.libre2.api.ResourceException( - "Maximum simultaneous compiled patterns exceeded: " + maxSimultaneous + - " (this is ACTIVE count, not cumulative - patterns can be freed and recompiled)"); - } - - logger.trace("RE2: Pattern allocated - active: {}, cumulative: {}", current, totalPatternsCompiled.get()); - } - - /** - * Tracks a pattern being freed (called when pattern closed). - * - * @param metricsRegistry optional metrics registry to record freed count - */ - public void trackPatternFreed(RE2MetricsRegistry metricsRegistry) { - int currentBefore = activePatternsCount.get(); - int current = activePatternsCount.decrementAndGet(); - totalPatternsClosed.incrementAndGet(); - - // Record pattern freed metric (Counter, not Gauge) - if (metricsRegistry != null) { - metricsRegistry.incrementCounter(MetricNames.RESOURCES_PATTERNS_FREED); - } - - if (current < 0) { - // Get stack trace to see WHO is calling this incorrectly - StackTraceElement[] stack = Thread.currentThread().getStackTrace(); - StringBuilder stackStr = new StringBuilder(); - for (int i = 2; i < Math.min(10, stack.length); i++) { - stackStr.append("\n at ").append(stack[i]); - } - logger.error("RE2: Pattern count went negative! before={}, after={}, Stack trace:{}", - currentBefore, current, stackStr.toString()); - activePatternsCount.set(0); - } - - logger.trace("RE2: Pattern freed - active: {}, cumulative closed: {}", current, totalPatternsClosed.get()); - } - - /** - * Gets current ACTIVE (simultaneous) pattern count. - * - * @return number of patterns currently active - */ - public int getActivePatternCount() { - return activePatternsCount.get(); - } - - /** - * Gets total patterns compiled over library lifetime (cumulative). - */ - public long getTotalPatternsCompiled() { - return totalPatternsCompiled.get(); - } - - /** - * Gets total patterns closed over library lifetime (cumulative). - */ - public long getTotalPatternsClosed() { - return totalPatternsClosed.get(); - } - - /** - * Gets rejection count for pattern limit. - */ - public long getPatternLimitRejections() { - return patternLimitRejections.get(); - } - - /** - * Tracks a new matcher allocation. - */ - public void trackMatcherAllocated() { - activeMatchersCount.incrementAndGet(); - totalMatchersCreated.incrementAndGet(); - } - - /** - * Tracks a matcher being freed. - * - * @param metricsRegistry optional metrics registry to record freed count - */ - public void trackMatcherFreed(RE2MetricsRegistry metricsRegistry) { - int current = activeMatchersCount.decrementAndGet(); - totalMatchersClosed.incrementAndGet(); - - // Record matcher freed metric (Counter, not Gauge) - if (metricsRegistry != null) { - metricsRegistry.incrementCounter(MetricNames.RESOURCES_MATCHERS_FREED); - } - - if (current < 0) { - logger.error("RE2: Matcher count went negative! This is a bug."); - activeMatchersCount.set(0); - } - } - - /** - * Gets current ACTIVE (simultaneous) matcher count. - */ - public int getActiveMatcherCount() { - return activeMatchersCount.get(); - } - - /** - * Gets total matchers created over library lifetime. - */ - public long getTotalMatchersCreated() { - return totalMatchersCreated.get(); - } - - /** - * Gets total matchers closed over library lifetime. - */ - public long getTotalMatchersClosed() { - return totalMatchersClosed.get(); - } - - /** - * Gets rejection count for matcher limit. - */ - public long getMatcherLimitRejections() { - return matcherLimitRejections.get(); - } - - /** - * Resets all counters (for testing only). - */ - public void reset() { - activePatternsCount.set(0); - activeMatchersCount.set(0); - totalPatternsCompiled.set(0); - totalPatternsClosed.set(0); - totalMatchersCreated.set(0); - totalMatchersClosed.set(0); - patternLimitRejections.set(0); - matcherLimitRejections.set(0); - logger.trace("RE2: ResourceTracker reset"); - } - - /** - * Gets statistics snapshot. - */ - public ResourceStatistics getStatistics() { - return new ResourceStatistics( - activePatternsCount.get(), - totalPatternsCompiled.get(), - totalPatternsClosed.get(), - patternLimitRejections.get(), - matcherLimitRejections.get() - ); - } - - public record ResourceStatistics( - int activePatterns, - long totalCompiled, - long totalClosed, - long patternLimitRejections, - long matcherLimitRejections - ) { - public boolean hasPotentialLeaks() { - // If cumulative compiled > cumulative closed + active, we have leaks - return totalCompiled > (totalClosed + activePatterns); - } - } + private final Logger logger = LoggerFactory.getLogger(ResourceTracker.class); + + // ACTIVE counts (current simultaneous) + private final AtomicInteger activePatternsCount = new AtomicInteger(0); + private final AtomicInteger activeMatchersCount = new AtomicInteger(0); + + // Cumulative counters (lifetime, for metrics only) + private final AtomicLong totalPatternsCompiled = new AtomicLong(0); + private final AtomicLong totalPatternsClosed = new AtomicLong(0); + private final AtomicLong totalMatchersCreated = new AtomicLong(0); + private final AtomicLong totalMatchersClosed = new AtomicLong(0); + + // Rejection counters + private final AtomicLong patternLimitRejections = new AtomicLong(0); + private final AtomicLong matcherLimitRejections = new AtomicLong(0); + + public ResourceTracker() { + // Instance per cache + } + + /** + * Tracks a new pattern allocation (called when pattern compiled). + * + * @param maxSimultaneous maximum allowed simultaneous patterns + * @param metricsRegistry optional metrics registry to record errors + * @throws ResourceException if simultaneous limit exceeded + */ + public void trackPatternAllocated(int maxSimultaneous, RE2MetricsRegistry metricsRegistry) { + int current = activePatternsCount.incrementAndGet(); + totalPatternsCompiled.incrementAndGet(); + + if (current > maxSimultaneous) { + activePatternsCount.decrementAndGet(); // Roll back + patternLimitRejections.incrementAndGet(); + + // Record resource exhausted error + if (metricsRegistry != null) { + metricsRegistry.incrementCounter(MetricNames.ERRORS_RESOURCE_EXHAUSTED); + } + + throw new com.axonops.libre2.api.ResourceException( + "Maximum simultaneous compiled patterns exceeded: " + + maxSimultaneous + + " (this is ACTIVE count, not cumulative - patterns can be freed and recompiled)"); + } + + logger.trace( + "RE2: Pattern allocated - active: {}, cumulative: {}", + current, + totalPatternsCompiled.get()); + } + + /** + * Tracks a pattern being freed (called when pattern closed). + * + * @param metricsRegistry optional metrics registry to record freed count + */ + public void trackPatternFreed(RE2MetricsRegistry metricsRegistry) { + int currentBefore = activePatternsCount.get(); + int current = activePatternsCount.decrementAndGet(); + totalPatternsClosed.incrementAndGet(); + + // Record pattern freed metric (Counter, not Gauge) + if (metricsRegistry != null) { + metricsRegistry.incrementCounter(MetricNames.RESOURCES_PATTERNS_FREED); + } + + if (current < 0) { + // Get stack trace to see WHO is calling this incorrectly + StackTraceElement[] stack = Thread.currentThread().getStackTrace(); + StringBuilder stackStr = new StringBuilder(); + for (int i = 2; i < Math.min(10, stack.length); i++) { + stackStr.append("\n at ").append(stack[i]); + } + logger.error( + "RE2: Pattern count went negative! before={}, after={}, Stack trace:{}", + currentBefore, + current, + stackStr.toString()); + activePatternsCount.set(0); + } + + logger.trace( + "RE2: Pattern freed - active: {}, cumulative closed: {}", + current, + totalPatternsClosed.get()); + } + + /** + * Gets current ACTIVE (simultaneous) pattern count. + * + * @return number of patterns currently active + */ + public int getActivePatternCount() { + return activePatternsCount.get(); + } + + /** Gets total patterns compiled over library lifetime (cumulative). */ + public long getTotalPatternsCompiled() { + return totalPatternsCompiled.get(); + } + + /** Gets total patterns closed over library lifetime (cumulative). */ + public long getTotalPatternsClosed() { + return totalPatternsClosed.get(); + } + + /** Gets rejection count for pattern limit. */ + public long getPatternLimitRejections() { + return patternLimitRejections.get(); + } + + /** Tracks a new matcher allocation. */ + public void trackMatcherAllocated() { + activeMatchersCount.incrementAndGet(); + totalMatchersCreated.incrementAndGet(); + } + + /** + * Tracks a matcher being freed. + * + * @param metricsRegistry optional metrics registry to record freed count + */ + public void trackMatcherFreed(RE2MetricsRegistry metricsRegistry) { + int current = activeMatchersCount.decrementAndGet(); + totalMatchersClosed.incrementAndGet(); + + // Record matcher freed metric (Counter, not Gauge) + if (metricsRegistry != null) { + metricsRegistry.incrementCounter(MetricNames.RESOURCES_MATCHERS_FREED); + } + + if (current < 0) { + logger.error("RE2: Matcher count went negative! This is a bug."); + activeMatchersCount.set(0); + } + } + + /** Gets current ACTIVE (simultaneous) matcher count. */ + public int getActiveMatcherCount() { + return activeMatchersCount.get(); + } + + /** Gets total matchers created over library lifetime. */ + public long getTotalMatchersCreated() { + return totalMatchersCreated.get(); + } + + /** Gets total matchers closed over library lifetime. */ + public long getTotalMatchersClosed() { + return totalMatchersClosed.get(); + } + + /** Gets rejection count for matcher limit. */ + public long getMatcherLimitRejections() { + return matcherLimitRejections.get(); + } + + /** Resets all counters (for testing only). */ + public void reset() { + activePatternsCount.set(0); + activeMatchersCount.set(0); + totalPatternsCompiled.set(0); + totalPatternsClosed.set(0); + totalMatchersCreated.set(0); + totalMatchersClosed.set(0); + patternLimitRejections.set(0); + matcherLimitRejections.set(0); + logger.trace("RE2: ResourceTracker reset"); + } + + /** Gets statistics snapshot. */ + public ResourceStatistics getStatistics() { + return new ResourceStatistics( + activePatternsCount.get(), + totalPatternsCompiled.get(), + totalPatternsClosed.get(), + patternLimitRejections.get(), + matcherLimitRejections.get()); + } + + /** Statistics about tracked resources. */ + public record ResourceStatistics( + int activePatterns, + long totalCompiled, + long totalClosed, + long patternLimitRejections, + long matcherLimitRejections) { + public boolean hasPotentialLeaks() { + // If cumulative compiled > cumulative closed + active, we have leaks + return totalCompiled > (totalClosed + activePatterns); + } + } } diff --git a/libre2-core/src/test/java/com/axonops/libre2/HelloWorldTest.java b/libre2-core/src/test/java/com/axonops/libre2/HelloWorldTest.java index 845d0ce..155ff70 100644 --- a/libre2-core/src/test/java/com/axonops/libre2/HelloWorldTest.java +++ b/libre2-core/src/test/java/com/axonops/libre2/HelloWorldTest.java @@ -16,25 +16,23 @@ package com.axonops.libre2; -import org.junit.jupiter.api.Test; +import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.*; +import org.junit.jupiter.api.Test; -/** - * Simple unit test demonstrating pure Java testing without native library. - */ +/** Simple unit test demonstrating pure Java testing without native library. */ class HelloWorldTest { - @Test - void testHelloWorld() { - String message = "Hello, libre2-java!"; - assertThat(message).isNotNull(); - assertThat(message).contains("libre2"); - } + @Test + void testHelloWorld() { + String message = "Hello, libre2-java!"; + assertThat(message).isNotNull(); + assertThat(message).contains("libre2"); + } - @Test - void testBasicJavaLogic() { - int sum = 1 + 1; - assertThat(sum).isEqualTo(2); - } + @Test + void testBasicJavaLogic() { + int sum = 1 + 1; + assertThat(sum).isEqualTo(2); + } } diff --git a/libre2-core/src/test/java/com/axonops/libre2/cache/ConfigurationTest.java b/libre2-core/src/test/java/com/axonops/libre2/cache/ConfigurationTest.java index 3a274f6..fb733bb 100644 --- a/libre2-core/src/test/java/com/axonops/libre2/cache/ConfigurationTest.java +++ b/libre2-core/src/test/java/com/axonops/libre2/cache/ConfigurationTest.java @@ -1,39 +1,39 @@ package com.axonops.libre2.cache; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import com.axonops.libre2.metrics.NoOpMetricsRegistry; import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for RE2Config validation and defaults. - */ +/** Tests for RE2Config validation and defaults. */ class ConfigurationTest { - @Test - void testDefaultConfiguration() { - RE2Config config = RE2Config.DEFAULT; - - assertThat(config.cacheEnabled()).isTrue(); - assertThat(config.maxCacheSize()).isEqualTo(50000); - assertThat(config.idleTimeoutSeconds()).isEqualTo(300); - assertThat(config.evictionScanIntervalSeconds()).isEqualTo(60); - assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(100000); - assertThat(config.maxMatchersPerPattern()).isEqualTo(10000); - } - - @Test - void testNoCacheConfiguration() { - RE2Config config = RE2Config.NO_CACHE; - - assertThat(config.cacheEnabled()).isFalse(); - assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(100000); - assertThat(config.maxMatchersPerPattern()).isEqualTo(10000); - } - - @Test - void testBuilderWithAllCustomValues() { - RE2Config config = RE2Config.builder() + @Test + void testDefaultConfiguration() { + RE2Config config = RE2Config.DEFAULT; + + assertThat(config.cacheEnabled()).isTrue(); + assertThat(config.maxCacheSize()).isEqualTo(50000); + assertThat(config.idleTimeoutSeconds()).isEqualTo(300); + assertThat(config.evictionScanIntervalSeconds()).isEqualTo(60); + assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(100000); + assertThat(config.maxMatchersPerPattern()).isEqualTo(10000); + } + + @Test + void testNoCacheConfiguration() { + RE2Config config = RE2Config.NO_CACHE; + + assertThat(config.cacheEnabled()).isFalse(); + assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(100000); + assertThat(config.maxMatchersPerPattern()).isEqualTo(10000); + } + + @Test + void testBuilderWithAllCustomValues() { + RE2Config config = + RE2Config.builder() .cacheEnabled(true) .maxCacheSize(10000) .idleTimeoutSeconds(600) @@ -42,95 +42,148 @@ void testBuilderWithAllCustomValues() { .maxMatchersPerPattern(20000) .build(); - assertThat(config.cacheEnabled()).isTrue(); - assertThat(config.maxCacheSize()).isEqualTo(10000); - assertThat(config.idleTimeoutSeconds()).isEqualTo(600); - assertThat(config.evictionScanIntervalSeconds()).isEqualTo(120); - assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(200000); - assertThat(config.maxMatchersPerPattern()).isEqualTo(20000); - } - - @Test - void testValidation_InvalidMaxCacheSize_Zero() { - assertThatThrownBy(() -> new RE2Config(true, 0, 300, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("maxCacheSize must be positive"); - } - - @Test - void testValidation_InvalidMaxCacheSize_Negative() { - assertThatThrownBy(() -> new RE2Config(true, -1, 300, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("maxCacheSize must be positive"); - } - - @Test - void testValidation_InvalidIdleTimeout_Zero() { - assertThatThrownBy(() -> new RE2Config(true, 1000, 0, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("idleTimeoutSeconds must be positive"); - } - - @Test - void testValidation_InvalidScanInterval_Negative() { - assertThatThrownBy(() -> new RE2Config(true, 1000, 300, -1, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("evictionScanIntervalSeconds must be positive"); - } - - @Test - void testValidation_InvalidMaxSimultaneousPatterns_Zero() { - assertThatThrownBy(() -> new RE2Config(true, 1000, 300, 60, 5, 1000, 0, 10000, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("maxSimultaneousCompiledPatterns must be positive"); - } - - @Test - void testValidation_InvalidMaxMatchers_Zero() { - assertThatThrownBy(() -> new RE2Config(true, 1000, 300, 60, 5, 1000, 100000, 0, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("maxMatchersPerPattern must be positive"); - } - - @Test - void testValidation_CacheLargerThanSimultaneousLimit() { - assertThatThrownBy(() -> new RE2Config(true, 100000, 300, 60, 5, 1000, 50000, 10000, true, NoOpMetricsRegistry.INSTANCE)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("maxCacheSize") - .hasMessageContaining("maxSimultaneousCompiledPatterns"); - } - - @Test - void testValidation_IgnoresInvalidCacheParams_IfCacheDisabled() { - // Should NOT throw - cache params ignored when disabled - RE2Config config = new RE2Config(false, -999, -999, -999, -999, -999, 100000, 10000, false, NoOpMetricsRegistry.INSTANCE); - - assertThat(config.cacheEnabled()).isFalse(); - } - - @Test - void testValidation_VerySmallCache() { - // Should work - cache size = 1 - RE2Config config = new RE2Config(true, 1, 300, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE); - - assertThat(config.maxCacheSize()).isEqualTo(1); - } - - @Test - void testValidation_VeryLargeCache() { - // Should work - cache size = 500K - RE2Config config = new RE2Config(true, 500000, 300, 60, 5, 1000, 1000000, 10000, true, NoOpMetricsRegistry.INSTANCE); - - assertThat(config.maxCacheSize()).isEqualTo(500000); - } - - @Test - void testBuilder_DefaultValues() { - RE2Config config = RE2Config.builder().build(); - - // Should have production defaults - assertThat(config.cacheEnabled()).isTrue(); - assertThat(config.maxCacheSize()).isEqualTo(50000); - assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(100000); - } + assertThat(config.cacheEnabled()).isTrue(); + assertThat(config.maxCacheSize()).isEqualTo(10000); + assertThat(config.idleTimeoutSeconds()).isEqualTo(600); + assertThat(config.evictionScanIntervalSeconds()).isEqualTo(120); + assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(200000); + assertThat(config.maxMatchersPerPattern()).isEqualTo(20000); + } + + @Test + void testValidation_InvalidMaxCacheSize_Zero() { + assertThatThrownBy( + () -> + new RE2Config( + true, 0, 300, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxCacheSize must be positive"); + } + + @Test + void testValidation_InvalidMaxCacheSize_Negative() { + assertThatThrownBy( + () -> + new RE2Config( + true, -1, 300, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxCacheSize must be positive"); + } + + @Test + void testValidation_InvalidIdleTimeout_Zero() { + assertThatThrownBy( + () -> + new RE2Config( + true, 1000, 0, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("idleTimeoutSeconds must be positive"); + } + + @Test + void testValidation_InvalidScanInterval_Negative() { + assertThatThrownBy( + () -> + new RE2Config( + true, + 1000, + 300, + -1, + 5, + 1000, + 100000, + 10000, + true, + NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("evictionScanIntervalSeconds must be positive"); + } + + @Test + void testValidation_InvalidMaxSimultaneousPatterns_Zero() { + assertThatThrownBy( + () -> + new RE2Config( + true, 1000, 300, 60, 5, 1000, 0, 10000, true, NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxSimultaneousCompiledPatterns must be positive"); + } + + @Test + void testValidation_InvalidMaxMatchers_Zero() { + assertThatThrownBy( + () -> + new RE2Config( + true, 1000, 300, 60, 5, 1000, 100000, 0, true, NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxMatchersPerPattern must be positive"); + } + + @Test + void testValidation_CacheLargerThanSimultaneousLimit() { + assertThatThrownBy( + () -> + new RE2Config( + true, + 100000, + 300, + 60, + 5, + 1000, + 50000, + 10000, + true, + NoOpMetricsRegistry.INSTANCE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxCacheSize") + .hasMessageContaining("maxSimultaneousCompiledPatterns"); + } + + @Test + void testValidation_IgnoresInvalidCacheParams_IfCacheDisabled() { + // Should NOT throw - cache params ignored when disabled + RE2Config config = + new RE2Config( + false, + -999, + -999, + -999, + -999, + -999, + 100000, + 10000, + false, + NoOpMetricsRegistry.INSTANCE); + + assertThat(config.cacheEnabled()).isFalse(); + } + + @Test + void testValidation_VerySmallCache() { + // Should work - cache size = 1 + RE2Config config = + new RE2Config(true, 1, 300, 60, 5, 1000, 100000, 10000, true, NoOpMetricsRegistry.INSTANCE); + + assertThat(config.maxCacheSize()).isEqualTo(1); + } + + @Test + void testValidation_VeryLargeCache() { + // Should work - cache size = 500K + RE2Config config = + new RE2Config( + true, 500000, 300, 60, 5, 1000, 1000000, 10000, true, NoOpMetricsRegistry.INSTANCE); + + assertThat(config.maxCacheSize()).isEqualTo(500000); + } + + @Test + void testBuilder_DefaultValues() { + RE2Config config = RE2Config.builder().build(); + + // Should have production defaults + assertThat(config.cacheEnabled()).isTrue(); + assertThat(config.maxCacheSize()).isEqualTo(50000); + assertThat(config.maxSimultaneousCompiledPatterns()).isEqualTo(100000); + } } diff --git a/libre2-core/src/test/java/com/axonops/libre2/dropwizard/RE2MetricsConfigTest.java b/libre2-core/src/test/java/com/axonops/libre2/dropwizard/RE2MetricsConfigTest.java index bac6d0d..9e1e7db 100644 --- a/libre2-core/src/test/java/com/axonops/libre2/dropwizard/RE2MetricsConfigTest.java +++ b/libre2-core/src/test/java/com/axonops/libre2/dropwizard/RE2MetricsConfigTest.java @@ -1,72 +1,70 @@ package com.axonops.libre2.dropwizard; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import com.axonops.libre2.cache.RE2Config; import com.axonops.libre2.metrics.DropwizardMetricsAdapter; -import com.axonops.libre2.metrics.RE2MetricsRegistry; import com.codahale.metrics.MetricRegistry; import org.junit.jupiter.api.Test; -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for RE2MetricsConfig factory methods. - */ +/** Tests for RE2MetricsConfig factory methods. */ class RE2MetricsConfigTest { - @Test - void testWithMetrics_CustomPrefix() { - MetricRegistry registry = new MetricRegistry(); + @Test + void testWithMetrics_CustomPrefix() { + MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.myapp.regex"); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "com.myapp.regex"); - assertThat(config).isNotNull(); - assertThat(config.metricsRegistry()).isInstanceOf(DropwizardMetricsAdapter.class); - } + assertThat(config).isNotNull(); + assertThat(config.metricsRegistry()).isInstanceOf(DropwizardMetricsAdapter.class); + } - @Test - void testWithMetrics_DefaultPrefix() { - MetricRegistry registry = new MetricRegistry(); + @Test + void testWithMetrics_DefaultPrefix() { + MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry); + RE2Config config = RE2MetricsConfig.withMetrics(registry); - assertThat(config).isNotNull(); - assertThat(config.metricsRegistry()).isInstanceOf(DropwizardMetricsAdapter.class); - } + assertThat(config).isNotNull(); + assertThat(config.metricsRegistry()).isInstanceOf(DropwizardMetricsAdapter.class); + } - @Test - void testForCassandra() { - MetricRegistry registry = new MetricRegistry(); + @Test + void testForCassandra() { + MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.forCassandra(registry); + RE2Config config = RE2MetricsConfig.forCassandra(registry); - assertThat(config).isNotNull(); - assertThat(config.metricsRegistry()).isInstanceOf(DropwizardMetricsAdapter.class); - // Should use Cassandra-standard prefix (verified via metrics test below) - } + assertThat(config).isNotNull(); + assertThat(config.metricsRegistry()).isInstanceOf(DropwizardMetricsAdapter.class); + // Should use Cassandra-standard prefix (verified via metrics test below) + } - @Test - void testWithMetrics_DisableJmx() { - MetricRegistry registry = new MetricRegistry(); + @Test + void testWithMetrics_DisableJmx() { + MetricRegistry registry = new MetricRegistry(); - RE2Config config = RE2MetricsConfig.withMetrics(registry, "test", false); + RE2Config config = RE2MetricsConfig.withMetrics(registry, "test", false); - assertThat(config).isNotNull(); - // JMX should not be auto-configured (can't easily verify without JMX checks) - } + assertThat(config).isNotNull(); + // JMX should not be auto-configured (can't easily verify without JMX checks) + } - @Test - void testNullRegistry_ThrowsException() { - assertThatThrownBy(() -> RE2MetricsConfig.withMetrics(null, "test")) - .isInstanceOf(NullPointerException.class) - .hasMessageContaining("registry"); - } + @Test + void testNullRegistry_ThrowsException() { + assertThatThrownBy(() -> RE2MetricsConfig.withMetrics(null, "test")) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("registry"); + } - @Test - void testNullPrefix_ThrowsException() { - MetricRegistry registry = new MetricRegistry(); + @Test + void testNullPrefix_ThrowsException() { + MetricRegistry registry = new MetricRegistry(); - assertThatThrownBy(() -> RE2MetricsConfig.withMetrics(registry, null)) - .isInstanceOf(NullPointerException.class) - .hasMessageContaining("metricPrefix"); - } + assertThatThrownBy(() -> RE2MetricsConfig.withMetrics(registry, null)) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("metricPrefix"); + } } diff --git a/libre2-core/src/test/java/com/axonops/libre2/test/TestUtils.java b/libre2-core/src/test/java/com/axonops/libre2/test/TestUtils.java index 404919a..71450cd 100644 --- a/libre2-core/src/test/java/com/axonops/libre2/test/TestUtils.java +++ b/libre2-core/src/test/java/com/axonops/libre2/test/TestUtils.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package com.axonops.libre2.test; import com.axonops.libre2.api.Pattern; @@ -26,15 +27,17 @@ * Test utilities for RE2 test setup and teardown. * *

Provides helper methods to reduce test boilerplate: + * *

    - *
  • Cache configuration with sensible test defaults
  • - *
  • Metrics registry setup
  • - *
  • Global cache management
  • + *
  • Cache configuration with sensible test defaults + *
  • Metrics registry setup + *
  • Global cache management *
* *

Usage Patterns

* *

Simple Test Without Metrics

+ * *
{@code
  * @Test
  * void myTest() {
@@ -44,6 +47,7 @@
  * }
* *

Test With Custom Cache Configuration

+ * *
{@code
  * private PatternCache originalCache;
  *
@@ -62,6 +66,7 @@
  * }
* *

Test With Metrics

+ * *
{@code
  * private PatternCache originalCache;
  * private MetricRegistry registry;
@@ -88,108 +93,110 @@
  * @since 1.0.0
  */
 public final class TestUtils {
-    private TestUtils() {
-        // Utility class
-    }
+  private TestUtils() {
+    // Utility class
+  }
 
-    /**
-     * Creates test configuration with sensible defaults for testing.
-     *
-     * 

Differs from production defaults: - *

    - *
  • Smaller cache (5000 vs 50000) - faster tests
  • - *
  • Shorter timeouts (faster eviction in tests)
  • - *
  • No JMX (avoids InstanceAlreadyExistsException)
  • - *
- * - * @return builder with test defaults - */ - public static RE2Config.Builder testConfigBuilder() { - return RE2Config.builder() - .maxCacheSize(5000) - .idleTimeoutSeconds(60) - .evictionScanIntervalSeconds(15) - .deferredCleanupIntervalSeconds(2) - .metricsRegistry(NoOpMetricsRegistry.INSTANCE); - } + /** + * Creates test configuration with sensible defaults for testing. + * + *

Differs from production defaults: + * + *

    + *
  • Smaller cache (5000 vs 50000) - faster tests + *
  • Shorter timeouts (faster eviction in tests) + *
  • No JMX (avoids InstanceAlreadyExistsException) + *
+ * + * @return builder with test defaults + */ + public static RE2Config.Builder testConfigBuilder() { + return RE2Config.builder() + .maxCacheSize(5000) + .idleTimeoutSeconds(60) + .evictionScanIntervalSeconds(15) + .deferredCleanupIntervalSeconds(2) + .metricsRegistry(NoOpMetricsRegistry.INSTANCE); + } - /** - * Creates test configuration with Dropwizard metrics (JMX disabled). - * - *

Use when you need to verify metrics in tests. - * JMX is always disabled to prevent InstanceAlreadyExistsException in test suites. - * - * @param registry Dropwizard MetricRegistry - * @param prefix metric name prefix - * @return builder with metrics enabled - */ - public static RE2Config.Builder testConfigWithMetrics(MetricRegistry registry, String prefix) { - return testConfigBuilder() - .metricsRegistry(new DropwizardMetricsAdapter(registry, prefix)); - } + /** + * Creates test configuration with Dropwizard metrics (JMX disabled). + * + *

Use when you need to verify metrics in tests. JMX is always disabled to prevent + * InstanceAlreadyExistsException in test suites. + * + * @param registry Dropwizard MetricRegistry + * @param prefix metric name prefix + * @return builder with metrics enabled + */ + public static RE2Config.Builder testConfigWithMetrics(MetricRegistry registry, String prefix) { + return testConfigBuilder().metricsRegistry(new DropwizardMetricsAdapter(registry, prefix)); + } - /** - * Replaces global cache with custom configuration. - * - *

Saves and returns original cache for restoration in teardown. - * - *

Example: - *

{@code
-     * @BeforeEach
-     * void setup() {
-     *     RE2Config config = TestUtils.testConfigBuilder().maxCacheSize(100).build();
-     *     originalCache = TestUtils.replaceGlobalCache(config);
-     * }
-     *
-     * @AfterEach
-     * void cleanup() {
-     *     TestUtils.restoreGlobalCache(originalCache);
-     * }
-     * }
- * - * @param config custom configuration - * @return original cache (save for restoration) - */ - public static PatternCache replaceGlobalCache(RE2Config config) { - PatternCache original = Pattern.getGlobalCache(); - Pattern.setGlobalCache(new PatternCache(config)); - return original; - } + /** + * Replaces global cache with custom configuration. + * + *

Saves and returns original cache for restoration in teardown. + * + *

Example: + * + *

{@code
+   * @BeforeEach
+   * void setup() {
+   *     RE2Config config = TestUtils.testConfigBuilder().maxCacheSize(100).build();
+   *     originalCache = TestUtils.replaceGlobalCache(config);
+   * }
+   *
+   * @AfterEach
+   * void cleanup() {
+   *     TestUtils.restoreGlobalCache(originalCache);
+   * }
+   * }
+ * + * @param config custom configuration + * @return original cache (save for restoration) + */ + public static PatternCache replaceGlobalCache(RE2Config config) { + PatternCache original = Pattern.getGlobalCache(); + Pattern.setGlobalCache(new PatternCache(config)); + return original; + } - /** - * Replaces global cache with metrics-enabled configuration. - * - *

Convenience method that creates config with Dropwizard metrics adapter. - * JMX is disabled to prevent test suite conflicts. - * - *

Example: - *

{@code
-     * @BeforeEach
-     * void setup() {
-     *     registry = new MetricRegistry();
-     *     originalCache = TestUtils.replaceGlobalCacheWithMetrics(registry, "test.prefix");
-     * }
-     * }
- * - * @param registry Dropwizard MetricRegistry - * @param prefix metric name prefix - * @return original cache (save for restoration) - */ - public static PatternCache replaceGlobalCacheWithMetrics(MetricRegistry registry, String prefix) { - RE2Config config = testConfigWithMetrics(registry, prefix).build(); - return replaceGlobalCache(config); - } + /** + * Replaces global cache with metrics-enabled configuration. + * + *

Convenience method that creates config with Dropwizard metrics adapter. JMX is disabled to + * prevent test suite conflicts. + * + *

Example: + * + *

{@code
+   * @BeforeEach
+   * void setup() {
+   *     registry = new MetricRegistry();
+   *     originalCache = TestUtils.replaceGlobalCacheWithMetrics(registry, "test.prefix");
+   * }
+   * }
+ * + * @param registry Dropwizard MetricRegistry + * @param prefix metric name prefix + * @return original cache (save for restoration) + */ + public static PatternCache replaceGlobalCacheWithMetrics(MetricRegistry registry, String prefix) { + RE2Config config = testConfigWithMetrics(registry, prefix).build(); + return replaceGlobalCache(config); + } - /** - * Restores original global cache. - * - *

Call in @AfterEach to restore cache state after test. - * - * @param originalCache cache to restore (returned from replaceGlobalCache) - */ - public static void restoreGlobalCache(PatternCache originalCache) { - if (originalCache != null) { - Pattern.setGlobalCache(originalCache); - } + /** + * Restores original global cache. + * + *

Call in @AfterEach to restore cache state after test. + * + * @param originalCache cache to restore (returned from replaceGlobalCache) + */ + public static void restoreGlobalCache(PatternCache originalCache) { + if (originalCache != null) { + Pattern.setGlobalCache(originalCache); } + } } diff --git a/perf-test/src/test/java/com/axonops/libre2/performance/BulkMatchingPerformanceTest.java b/perf-test/src/test/java/com/axonops/libre2/performance/BulkMatchingPerformanceTest.java index e5ab8c7..d269bde 100644 --- a/perf-test/src/test/java/com/axonops/libre2/performance/BulkMatchingPerformanceTest.java +++ b/perf-test/src/test/java/com/axonops/libre2/performance/BulkMatchingPerformanceTest.java @@ -15,188 +15,191 @@ */ package com.axonops.libre2.performance; +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.PatternCache; import com.axonops.libre2.test.TestUtils; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - /** - * Performance tests comparing bulk vs individual matching operations. - * These tests are skipped under QEMU emulation as performance is not representative. + * Performance tests comparing bulk vs individual matching operations. These tests are skipped under + * QEMU emulation as performance is not representative. */ class BulkMatchingPerformanceTest { - private static final Logger logger = LoggerFactory.getLogger(BulkMatchingPerformanceTest.class); - - private static PatternCache originalCache; - - @BeforeAll - static void setUpClass() { - originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); + private static final Logger logger = LoggerFactory.getLogger(BulkMatchingPerformanceTest.class); + + private static PatternCache originalCache; + + @BeforeAll + static void setUpClass() { + originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build()); + } + + @AfterAll + static void tearDownClass() { + TestUtils.restoreGlobalCache(originalCache); + } + + /** Detects if running under QEMU emulation (set by CI workflow). */ + private static boolean isQemuEmulation() { + return "true".equals(System.getenv("QEMU_EMULATION")); + } + + @Test + void testBulkVsIndividual_10kStrings() { + assumeTrue(!isQemuEmulation(), "Skipping performance test under QEMU emulation"); + + Pattern pattern = Pattern.compile("item\\d+"); + + // Create 10,000 test strings + List inputs = new ArrayList<>(10_000); + for (int i = 0; i < 10_000; i++) { + if (i % 2 == 0) { + inputs.add("item" + i); + } else { + inputs.add("other" + i); + } } - @AfterAll - static void tearDownClass() { - TestUtils.restoreGlobalCache(originalCache); + // Warmup (JIT compilation) + for (int i = 0; i < 3; i++) { + pattern.matchAll(inputs); + individualMatches(pattern, inputs); } - /** - * Detects if running under QEMU emulation (set by CI workflow). - */ - private static boolean isQemuEmulation() { - return "true".equals(System.getenv("QEMU_EMULATION")); + // Benchmark bulk API + long bulkStart = System.nanoTime(); + boolean[] bulkResults = pattern.matchAll(inputs); + long bulkDuration = System.nanoTime() - bulkStart; + + // Benchmark individual calls + long individualStart = System.nanoTime(); + boolean[] individualResults = individualMatches(pattern, inputs); + long individualDuration = System.nanoTime() - individualStart; + + // Verify correctness (both methods should give same results) + assertArrayEquals(bulkResults, individualResults); + + // Calculate speedup + double speedup = (double) individualDuration / bulkDuration; + + logger.info("=== Bulk vs Individual Matching (10,000 strings) ==="); + logger.info( + "Bulk API duration: {} ms ({} μs per match)", + bulkDuration / 1_000_000.0, + bulkDuration / 10_000.0 / 1000.0); + logger.info( + "Individual API duration: {} ms ({} μs per match)", + individualDuration / 1_000_000.0, + individualDuration / 10_000.0 / 1000.0); + logger.info("Speedup: {}x faster", String.format("%.1f", speedup)); + logger.info("Match count: {}/10000", countMatches(bulkResults)); + logger.info("===================================================="); + + // Note: Speedup varies by pattern complexity and JIT warmup + // Simple patterns: 1-3x (matching cost dominates JNI overhead) + // Complex patterns: 5-20x (JNI overhead more significant) + // Performance tests are informational, not assertions + logger.info( + "Note: Speedup varies by pattern complexity. Simple patterns: 1-3x, Complex patterns:" + + " 10-20x"); + } + + @Test + void testFilter_Performance() { + assumeTrue(!isQemuEmulation(), "Skipping performance test under QEMU emulation"); + + Pattern pattern = Pattern.compile("[a-z0-9]+@[a-z]+\\.com"); // Allow digits in username + + // Create mix of valid and invalid emails + List inputs = new ArrayList<>(10_000); + for (int i = 0; i < 10_000; i++) { + if (i % 3 == 0) { + inputs.add("user" + i + "@example.com"); // Match + } else { + inputs.add("invalid_" + i); // No match + } } - @Test - void testBulkVsIndividual_10kStrings() { - assumeTrue(!isQemuEmulation(), "Skipping performance test under QEMU emulation"); - - Pattern pattern = Pattern.compile("item\\d+"); - - // Create 10,000 test strings - List inputs = new ArrayList<>(10_000); - for (int i = 0; i < 10_000; i++) { - if (i % 2 == 0) { - inputs.add("item" + i); - } else { - inputs.add("other" + i); - } - } - - // Warmup (JIT compilation) - for (int i = 0; i < 3; i++) { - pattern.matchAll(inputs); - individualMatches(pattern, inputs); - } - - // Benchmark bulk API - long bulkStart = System.nanoTime(); - boolean[] bulkResults = pattern.matchAll(inputs); - long bulkDuration = System.nanoTime() - bulkStart; - - // Benchmark individual calls - long individualStart = System.nanoTime(); - boolean[] individualResults = individualMatches(pattern, inputs); - long individualDuration = System.nanoTime() - individualStart; - - // Verify correctness (both methods should give same results) - assertArrayEquals(bulkResults, individualResults); - - // Calculate speedup - double speedup = (double) individualDuration / bulkDuration; - - logger.info("=== Bulk vs Individual Matching (10,000 strings) ==="); - logger.info("Bulk API duration: {} ms ({} μs per match)", - bulkDuration / 1_000_000.0, bulkDuration / 10_000.0 / 1000.0); - logger.info("Individual API duration: {} ms ({} μs per match)", - individualDuration / 1_000_000.0, individualDuration / 10_000.0 / 1000.0); - logger.info("Speedup: {}x faster", String.format("%.1f", speedup)); - logger.info("Match count: {}/10000", countMatches(bulkResults)); - logger.info("===================================================="); - - // Note: Speedup varies by pattern complexity and JIT warmup - // Simple patterns: 1-3x (matching cost dominates JNI overhead) - // Complex patterns: 5-20x (JNI overhead more significant) - // Performance tests are informational, not assertions - logger.info("Note: Speedup varies by pattern complexity. Simple patterns: 1-3x, Complex patterns: 10-20x"); + // Warmup + for (int i = 0; i < 3; i++) { + pattern.filter(inputs); } - @Test - void testFilter_Performance() { - assumeTrue(!isQemuEmulation(), "Skipping performance test under QEMU emulation"); - - Pattern pattern = Pattern.compile("[a-z0-9]+@[a-z]+\\.com"); // Allow digits in username - - // Create mix of valid and invalid emails - List inputs = new ArrayList<>(10_000); - for (int i = 0; i < 10_000; i++) { - if (i % 3 == 0) { - inputs.add("user" + i + "@example.com"); // Match - } else { - inputs.add("invalid_" + i); // No match - } - } - - // Warmup - for (int i = 0; i < 3; i++) { - pattern.filter(inputs); - } - - // Benchmark filter - long start = System.nanoTime(); - List filtered = pattern.filter(inputs); - long duration = System.nanoTime() - start; - - logger.info("=== Filter Performance (10,000 strings) ==="); - logger.info("Duration: {} ms", duration / 1_000_000.0); - logger.info("Filtered count: {}/10000", filtered.size()); - logger.info("Throughput: {} matches/sec", (int)(10_000.0 / (duration / 1_000_000_000.0))); - logger.info("=========================================="); - - // Verify correctness - assertEquals(3334, filtered.size()); // ~1/3 should match - assertTrue(filtered.stream().allMatch(s -> s.contains("@example.com"))); + // Benchmark filter + long start = System.nanoTime(); + List filtered = pattern.filter(inputs); + long duration = System.nanoTime() - start; + + logger.info("=== Filter Performance (10,000 strings) ==="); + logger.info("Duration: {} ms", duration / 1_000_000.0); + logger.info("Filtered count: {}/10000", filtered.size()); + logger.info("Throughput: {} matches/sec", (int) (10_000.0 / (duration / 1_000_000_000.0))); + logger.info("=========================================="); + + // Verify correctness + assertEquals(3334, filtered.size()); // ~1/3 should match + assertTrue(filtered.stream().allMatch(s -> s.contains("@example.com"))); + } + + @Test + void testMapFiltering_Performance() { + assumeTrue(!isQemuEmulation(), "Skipping performance test under QEMU emulation"); + + Pattern pattern = Pattern.compile("user\\d+"); + + // Create large map + Map inputs = new HashMap<>(); + for (int i = 0; i < 10_000; i++) { + if (i % 2 == 0) { + inputs.put("user" + i, i); + } else { + inputs.put("admin" + i, i); + } } - @Test - void testMapFiltering_Performance() { - assumeTrue(!isQemuEmulation(), "Skipping performance test under QEMU emulation"); - - Pattern pattern = Pattern.compile("user\\d+"); - - // Create large map - Map inputs = new HashMap<>(); - for (int i = 0; i < 10_000; i++) { - if (i % 2 == 0) { - inputs.put("user" + i, i); - } else { - inputs.put("admin" + i, i); - } - } - - // Warmup - for (int i = 0; i < 3; i++) { - pattern.filterByKey(inputs); - } - - // Benchmark - long start = System.nanoTime(); - Map filtered = pattern.filterByKey(inputs); - long duration = System.nanoTime() - start; - - logger.info("=== Map Filter Performance (10,000 entries) ==="); - logger.info("Duration: {} ms", duration / 1_000_000.0); - logger.info("Filtered count: {}/10000", filtered.size()); - logger.info("=============================================="); - - assertEquals(5000, filtered.size()); - assertTrue(filtered.keySet().stream().allMatch(k -> k.startsWith("user"))); + // Warmup + for (int i = 0; i < 3; i++) { + pattern.filterByKey(inputs); } - private boolean[] individualMatches(Pattern pattern, List inputs) { - boolean[] results = new boolean[inputs.size()]; - for (int i = 0; i < inputs.size(); i++) { - results[i] = pattern.matches(inputs.get(i)); - } - return results; + // Benchmark + long start = System.nanoTime(); + Map filtered = pattern.filterByKey(inputs); + long duration = System.nanoTime() - start; + + logger.info("=== Map Filter Performance (10,000 entries) ==="); + logger.info("Duration: {} ms", duration / 1_000_000.0); + logger.info("Filtered count: {}/10000", filtered.size()); + logger.info("=============================================="); + + assertEquals(5000, filtered.size()); + assertTrue(filtered.keySet().stream().allMatch(k -> k.startsWith("user"))); + } + + private boolean[] individualMatches(Pattern pattern, List inputs) { + boolean[] results = new boolean[inputs.size()]; + for (int i = 0; i < inputs.size(); i++) { + results[i] = pattern.matches(inputs.get(i)); } + return results; + } - private int countMatches(boolean[] results) { - int count = 0; - for (boolean match : results) { - if (match) count++; - } - return count; + private int countMatches(boolean[] results) { + int count = 0; + for (boolean match : results) { + if (match) count++; } + return count; + } } diff --git a/perf-test/src/test/java/com/axonops/libre2/performance/CachePerformanceTest.java b/perf-test/src/test/java/com/axonops/libre2/performance/CachePerformanceTest.java index 6c7fa26..04a8550 100644 --- a/perf-test/src/test/java/com/axonops/libre2/performance/CachePerformanceTest.java +++ b/perf-test/src/test/java/com/axonops/libre2/performance/CachePerformanceTest.java @@ -1,9 +1,15 @@ package com.axonops.libre2.performance; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.CacheStatistics; import com.axonops.libre2.cache.RE2Config; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -11,317 +17,321 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -import static org.assertj.core.api.Assertions.*; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - /** * Performance benchmark tests for the optimized PatternCache. * - * These tests demonstrate the lock-free, non-blocking behavior - * of the ConcurrentHashMap-based cache implementation. + *

These tests demonstrate the lock-free, non-blocking behavior of the ConcurrentHashMap-based + * cache implementation. */ class CachePerformanceTest { - private static final Logger logger = LoggerFactory.getLogger(CachePerformanceTest.class); - - /** - * Detects if running under QEMU emulation (set by CI workflow). - * Performance tests are skipped under QEMU as results are not representative. - */ - private static boolean isQemuEmulation() { - return "true".equals(System.getenv("QEMU_EMULATION")); - } - - @BeforeEach - void setUp() { - Pattern.resetCache(); + private static final Logger logger = LoggerFactory.getLogger(CachePerformanceTest.class); + + /** + * Detects if running under QEMU emulation (set by CI workflow). Performance tests are skipped + * under QEMU as results are not representative. + */ + private static boolean isQemuEmulation() { + return "true".equals(System.getenv("QEMU_EMULATION")); + } + + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + // Restore default configuration for other tests + Pattern.configureCache(RE2Config.DEFAULT); + Pattern.resetCache(); + } + + @Test + @Timeout(value = 120, unit = TimeUnit.SECONDS) + void testHighConcurrencyThroughput() throws InterruptedException { + // Configure higher limit for this test + // 100 threads × 10,000 ops × 10% new = 100,000 unique patterns + 1,000 pre-warmed + RE2Config testConfig = + RE2Config.builder().maxCacheSize(150000).maxSimultaneousCompiledPatterns(200000).build(); + Pattern.configureCache(testConfig); + + // Pre-warm cache with patterns + for (int i = 0; i < 1000; i++) { + Pattern.compile("pattern" + i); } - @AfterEach - void tearDown() { - // Restore default configuration for other tests - Pattern.configureCache(RE2Config.DEFAULT); - Pattern.resetCache(); - } - - @Test - @Timeout(value = 120, unit = TimeUnit.SECONDS) - void testHighConcurrencyThroughput() throws InterruptedException { - // Configure higher limit for this test - // 100 threads × 10,000 ops × 10% new = 100,000 unique patterns + 1,000 pre-warmed - RE2Config testConfig = RE2Config.builder() - .maxCacheSize(150000) - .maxSimultaneousCompiledPatterns(200000) - .build(); - Pattern.configureCache(testConfig); - - // Pre-warm cache with patterns - for (int i = 0; i < 1000; i++) { - Pattern.compile("pattern" + i); - } - - int threadCount = 100; - int operationsPerThread = 10000; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicLong totalOps = new AtomicLong(0); - AtomicInteger errors = new AtomicInteger(0); + int threadCount = 100; + int operationsPerThread = 10000; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicLong totalOps = new AtomicLong(0); + AtomicInteger errors = new AtomicInteger(0); - long startTime = System.nanoTime(); + long startTime = System.nanoTime(); - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - for (int j = 0; j < operationsPerThread; j++) { - // 90% cache hits, 10% misses - String pattern = (j % 10 == 0) + start.await(); + for (int j = 0; j < operationsPerThread; j++) { + // 90% cache hits, 10% misses + String pattern = + (j % 10 == 0) ? "new_pattern_" + threadId + "_" + j : "pattern" + (j % 1000); - Pattern p = Pattern.compile(pattern); - try (Matcher m = p.matcher("test")) { - m.matches(); - } - totalOps.incrementAndGet(); + Pattern p = Pattern.compile(pattern); + try (Matcher m = p.matcher("test")) { + m.matches(); } + totalOps.incrementAndGet(); + } } catch (Exception e) { - errors.incrementAndGet(); - logger.error("Thread error", e); + errors.incrementAndGet(); + logger.error("Thread error", e); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - start.countDown(); - done.await(); - - long endTime = System.nanoTime(); - double durationMs = (endTime - startTime) / 1_000_000.0; - double opsPerSecond = (totalOps.get() / durationMs) * 1000; - - CacheStatistics stats = Pattern.getCacheStatistics(); - - logger.info("=== High Concurrency Throughput Test ==="); - logger.info("Threads: {}", threadCount); - logger.info("Operations per thread: {}", operationsPerThread); - logger.info("Total operations: {}", totalOps.get()); - logger.info("Duration: {} ms", String.format("%.2f", durationMs)); - logger.info("Throughput: {} ops/sec", String.format("%.0f", opsPerSecond)); - logger.info("Cache hits: {}", stats.hits()); - logger.info("Cache misses: {}", stats.misses()); - logger.info("Hit rate: {}%", String.format("%.1f", stats.hitRate() * 100)); - logger.info("========================================"); - - // Verify ALL operations completed without errors - assertThat(errors.get()).isEqualTo(0); - long expected = (long) threadCount * operationsPerThread; - assertThat(totalOps.get()).isEqualTo(expected); - // With lock-free implementation, should achieve high throughput - assertThat(opsPerSecond).isGreaterThan(50000); // At least 50K ops/sec + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testCacheHitLatency() throws InterruptedException { - // Pre-warm cache - Pattern testPattern = Pattern.compile("test_pattern"); - - int iterations = 100000; - long[] latencies = new long[iterations]; - - for (int i = 0; i < iterations; i++) { - long start = System.nanoTime(); - Pattern p = Pattern.compile("test_pattern"); - latencies[i] = System.nanoTime() - start; - } - - // Calculate statistics - java.util.Arrays.sort(latencies); - long p50 = latencies[iterations / 2]; - long p99 = latencies[(int) (iterations * 0.99)]; - long p999 = latencies[(int) (iterations * 0.999)]; - - double avgNs = java.util.Arrays.stream(latencies).average().orElse(0); - - logger.info("=== Cache Hit Latency Test ==="); - logger.info("Iterations: {}", iterations); - logger.info("Average latency: {} ns ({} μs)", String.format("%.0f", avgNs), String.format("%.2f", avgNs / 1000)); - logger.info("P50 latency: {} ns ({} μs)", p50, p50 / 1000.0); - logger.info("P99 latency: {} ns ({} μs)", p99, p99 / 1000.0); - logger.info("P99.9 latency: {} ns ({} μs)", p999, p999 / 1000.0); - logger.info("=============================="); - - // With lock-free implementation, cache hits should be very fast - assertThat(p50).isLessThan(10000); // < 10μs P50 - assertThat(p99).isLessThan(100000); // < 100μs P99 + start.countDown(); + done.await(); + + long endTime = System.nanoTime(); + double durationMs = (endTime - startTime) / 1_000_000.0; + double opsPerSecond = (totalOps.get() / durationMs) * 1000; + + CacheStatistics stats = Pattern.getCacheStatistics(); + + logger.info("=== High Concurrency Throughput Test ==="); + logger.info("Threads: {}", threadCount); + logger.info("Operations per thread: {}", operationsPerThread); + logger.info("Total operations: {}", totalOps.get()); + logger.info("Duration: {} ms", String.format("%.2f", durationMs)); + logger.info("Throughput: {} ops/sec", String.format("%.0f", opsPerSecond)); + logger.info("Cache hits: {}", stats.hits()); + logger.info("Cache misses: {}", stats.misses()); + logger.info("Hit rate: {}%", String.format("%.1f", stats.hitRate() * 100)); + logger.info("========================================"); + + // Verify ALL operations completed without errors + assertThat(errors.get()).isEqualTo(0); + long expected = (long) threadCount * operationsPerThread; + assertThat(totalOps.get()).isEqualTo(expected); + // With lock-free implementation, should achieve high throughput + assertThat(opsPerSecond).isGreaterThan(50000); // At least 50K ops/sec + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testCacheHitLatency() throws InterruptedException { + // Pre-warm cache + Pattern testPattern = Pattern.compile("test_pattern"); + + int iterations = 100000; + long[] latencies = new long[iterations]; + + for (int i = 0; i < iterations; i++) { + long start = System.nanoTime(); + Pattern p = Pattern.compile("test_pattern"); + latencies[i] = System.nanoTime() - start; } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testEvictionDoesNotBlockAccess() throws InterruptedException { - // This test verifies that cache operations complete without errors during - // heavy concurrent eviction. The old synchronized implementation would - // have thread contention causing failures or extreme latency. - // - // We don't assert on latency because: - // 1. GC pauses from many objects can cause spikes (150-200ms) - // 2. CI environments have variable performance - // 3. The key invariant is: operations complete without errors - - // Pre-compile patterns for cache hits - String[] hitPatterns = new String[100]; - for (int i = 0; i < 100; i++) { - hitPatterns[i] = "hit_pattern_" + i; - Pattern.compile(hitPatterns[i]); - } - - // Fill cache to trigger eviction (use smaller count to reduce GC) - for (int i = 0; i < 10000; i++) { - Pattern.compile("fill_" + i); - } - - int threadCount = 50; - int operationsPerThread = 500; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicLong totalHitOps = new AtomicLong(0); - AtomicLong totalEvictOps = new AtomicLong(0); - AtomicInteger errors = new AtomicInteger(0); + // Calculate statistics + java.util.Arrays.sort(latencies); + long p50 = latencies[iterations / 2]; + long p99 = latencies[(int) (iterations * 0.99)]; + long p999 = latencies[(int) (iterations * 0.999)]; + + double avgNs = java.util.Arrays.stream(latencies).average().orElse(0); + + logger.info("=== Cache Hit Latency Test ==="); + logger.info("Iterations: {}", iterations); + logger.info( + "Average latency: {} ns ({} μs)", + String.format("%.0f", avgNs), + String.format("%.2f", avgNs / 1000)); + logger.info("P50 latency: {} ns ({} μs)", p50, p50 / 1000.0); + logger.info("P99 latency: {} ns ({} μs)", p99, p99 / 1000.0); + logger.info("P99.9 latency: {} ns ({} μs)", p999, p999 / 1000.0); + logger.info("=============================="); + + // With lock-free implementation, cache hits should be very fast + assertThat(p50).isLessThan(10000); // < 10μs P50 + assertThat(p99).isLessThan(100000); // < 100μs P99 + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testEvictionDoesNotBlockAccess() throws InterruptedException { + // This test verifies that cache operations complete without errors during + // heavy concurrent eviction. The old synchronized implementation would + // have thread contention causing failures or extreme latency. + // + // We don't assert on latency because: + // 1. GC pauses from many objects can cause spikes (150-200ms) + // 2. CI environments have variable performance + // 3. The key invariant is: operations complete without errors + + // Pre-compile patterns for cache hits + String[] hitPatterns = new String[100]; + for (int i = 0; i < 100; i++) { + hitPatterns[i] = "hit_pattern_" + i; + Pattern.compile(hitPatterns[i]); + } - // Half threads do cache hits, half trigger evictions - for (int i = 0; i < threadCount; i++) { - int threadId = i; - boolean doHits = (i % 2 == 0); + // Fill cache to trigger eviction (use smaller count to reduce GC) + for (int i = 0; i < 10000; i++) { + Pattern.compile("fill_" + i); + } - new Thread(() -> { + int threadCount = 50; + int operationsPerThread = 500; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicLong totalHitOps = new AtomicLong(0); + AtomicLong totalEvictOps = new AtomicLong(0); + AtomicInteger errors = new AtomicInteger(0); + + // Half threads do cache hits, half trigger evictions + for (int i = 0; i < threadCount; i++) { + int threadId = i; + boolean doHits = (i % 2 == 0); + + new Thread( + () -> { try { - start.await(); - for (int j = 0; j < operationsPerThread; j++) { - if (doHits) { - Pattern p = Pattern.compile(hitPatterns[j % 100]); - try (Matcher m = p.matcher("test")) { - m.matches(); - } - totalHitOps.incrementAndGet(); - } else { - Pattern p = Pattern.compile("new_" + threadId + "_" + j); - try (Matcher m = p.matcher("test")) { - m.matches(); - } - totalEvictOps.incrementAndGet(); - } + start.await(); + for (int j = 0; j < operationsPerThread; j++) { + if (doHits) { + Pattern p = Pattern.compile(hitPatterns[j % 100]); + try (Matcher m = p.matcher("test")) { + m.matches(); + } + totalHitOps.incrementAndGet(); + } else { + Pattern p = Pattern.compile("new_" + threadId + "_" + j); + try (Matcher m = p.matcher("test")) { + m.matches(); + } + totalEvictOps.incrementAndGet(); } + } } catch (Exception e) { - errors.incrementAndGet(); - logger.error("Thread error", e); + errors.incrementAndGet(); + logger.error("Thread error", e); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } - - long startTime = System.nanoTime(); - start.countDown(); - done.await(); - long durationMs = (System.nanoTime() - startTime) / 1_000_000; - - logger.info("=== Eviction Non-Blocking Test ==="); - logger.info("Threads: {} ({} doing hits, {} triggering eviction)", threadCount, threadCount/2, threadCount/2); - logger.info("Total hit operations: {}", totalHitOps.get()); - logger.info("Total eviction operations: {}", totalEvictOps.get()); - logger.info("Duration: {} ms", durationMs); - logger.info("Errors: {}", errors.get()); - logger.info("=================================="); - - // Key assertions: - // 1. All operations completed (no deadlocks, no blocking) - int expectedHitOps = (threadCount / 2) * operationsPerThread; - int expectedEvictOps = (threadCount / 2) * operationsPerThread; - assertThat(totalHitOps.get()).isEqualTo(expectedHitOps); - assertThat(totalEvictOps.get()).isEqualTo(expectedEvictOps); - - // 2. No errors occurred - assertThat(errors.get()).isEqualTo(0); - - // 3. Test completed in reasonable time (not blocked for seconds) - // Old synchronized code could take 10+ seconds; lock-free should complete in <5s - assertThat(durationMs).isLessThan(5000); + }) + .start(); } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testConcurrentCompilationScalability() throws InterruptedException { - // Test that throughput stays high with concurrent threads - // (old synchronized code would collapse to near-zero with many threads) - int[] threadCounts = {1, 10, 50, 100}; - int operationsPerThread = 5000; - - logger.info("=== Scalability Test ==="); - - // Pre-warm: ensure native library loaded and JIT warmed up - Pattern.compile("warmup"); - - double previousThroughput = 0; - - for (int threadCount : threadCounts) { - Pattern.resetCache(); - - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicLong totalOps = new AtomicLong(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { - try { - start.await(); - for (int j = 0; j < operationsPerThread; j++) { - Pattern.compile("pattern" + threadId + "_" + (j % 100)); - totalOps.incrementAndGet(); - } - } catch (Exception e) { - logger.error("Thread error", e); - } finally { - done.countDown(); + long startTime = System.nanoTime(); + start.countDown(); + done.await(); + long durationMs = (System.nanoTime() - startTime) / 1_000_000; + + logger.info("=== Eviction Non-Blocking Test ==="); + logger.info( + "Threads: {} ({} doing hits, {} triggering eviction)", + threadCount, + threadCount / 2, + threadCount / 2); + logger.info("Total hit operations: {}", totalHitOps.get()); + logger.info("Total eviction operations: {}", totalEvictOps.get()); + logger.info("Duration: {} ms", durationMs); + logger.info("Errors: {}", errors.get()); + logger.info("=================================="); + + // Key assertions: + // 1. All operations completed (no deadlocks, no blocking) + int expectedHitOps = (threadCount / 2) * operationsPerThread; + int expectedEvictOps = (threadCount / 2) * operationsPerThread; + assertThat(totalHitOps.get()).isEqualTo(expectedHitOps); + assertThat(totalEvictOps.get()).isEqualTo(expectedEvictOps); + + // 2. No errors occurred + assertThat(errors.get()).isEqualTo(0); + + // 3. Test completed in reasonable time (not blocked for seconds) + // Old synchronized code could take 10+ seconds; lock-free should complete in <5s + assertThat(durationMs).isLessThan(5000); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testConcurrentCompilationScalability() throws InterruptedException { + // Test that throughput stays high with concurrent threads + // (old synchronized code would collapse to near-zero with many threads) + int[] threadCounts = {1, 10, 50, 100}; + int operationsPerThread = 5000; + + logger.info("=== Scalability Test ==="); + + // Pre-warm: ensure native library loaded and JIT warmed up + Pattern.compile("warmup"); + + double previousThroughput = 0; + + for (int threadCount : threadCounts) { + Pattern.resetCache(); + + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicLong totalOps = new AtomicLong(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { + try { + start.await(); + for (int j = 0; j < operationsPerThread; j++) { + Pattern.compile("pattern" + threadId + "_" + (j % 100)); + totalOps.incrementAndGet(); } - }).start(); - } - - long startTime = System.nanoTime(); - start.countDown(); - done.await(); - long endTime = System.nanoTime(); - - double durationMs = (endTime - startTime) / 1_000_000.0; - double throughput = (totalOps.get() / durationMs) * 1000; - - logger.info("{} threads: {} ops/sec", threadCount, String.format("%.0f", throughput)); - - // Key test: throughput should NOT collapse with more threads - // Old synchronized implementation would collapse to near-zero - // With lock-free implementation, throughput scales with thread count - // Skip throughput assertions under QEMU and CI (too slow/variable for strict thresholds) - boolean isCI = System.getenv("CI") != null || System.getenv("GITHUB_ACTIONS") != null; - if (!isQemuEmulation() && !isCI) { - if (threadCount == 1) { - // Single thread does cold compilation - expect at least 50K ops/sec - assertThat(throughput).isGreaterThan(50000); - } else { - // Multi-threaded should scale - at least 100K ops/sec - // (each thread compiles its own unique patterns, no contention) - assertThat(throughput).isGreaterThan(100000); - } - } - - previousThroughput = throughput; + } catch (Exception e) { + logger.error("Thread error", e); + } finally { + done.countDown(); + } + }) + .start(); + } + + long startTime = System.nanoTime(); + start.countDown(); + done.await(); + long endTime = System.nanoTime(); + + double durationMs = (endTime - startTime) / 1_000_000.0; + double throughput = (totalOps.get() / durationMs) * 1000; + + logger.info("{} threads: {} ops/sec", threadCount, String.format("%.0f", throughput)); + + // Key test: throughput should NOT collapse with more threads + // Old synchronized implementation would collapse to near-zero + // With lock-free implementation, throughput scales with thread count + // Skip throughput assertions under QEMU and CI (too slow/variable for strict thresholds) + boolean isCI = System.getenv("CI") != null || System.getenv("GITHUB_ACTIONS") != null; + if (!isQemuEmulation() && !isCI) { + if (threadCount == 1) { + // Single thread does cold compilation - expect at least 50K ops/sec + assertThat(throughput).isGreaterThan(50000); + } else { + // Multi-threaded should scale - at least 100K ops/sec + // (each thread compiles its own unique patterns, no contention) + assertThat(throughput).isGreaterThan(100000); } + } - logger.info("========================"); + previousThroughput = throughput; } + + logger.info("========================"); + } } diff --git a/perf-test/src/test/java/com/axonops/libre2/stress/StressTest.java b/perf-test/src/test/java/com/axonops/libre2/stress/StressTest.java index 8596a14..f21f219 100644 --- a/perf-test/src/test/java/com/axonops/libre2/stress/StressTest.java +++ b/perf-test/src/test/java/com/axonops/libre2/stress/StressTest.java @@ -1,187 +1,192 @@ package com.axonops.libre2.stress; +import static org.assertj.core.api.Assertions.*; + import com.axonops.libre2.api.Matcher; import com.axonops.libre2.api.Pattern; import com.axonops.libre2.cache.CacheStatistics; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; - import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; -import static org.assertj.core.api.Assertions.*; - -/** - * Sustained load stress tests for production-level concurrency. - */ +/** Sustained load stress tests for production-level concurrency. */ class StressTest { - @BeforeEach - void setUp() { - Pattern.resetCache(); - } - - @AfterEach - void tearDown() { - Pattern.resetCache(); - } - - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testSustainedLoad_1000Operations_100Threads() throws InterruptedException { - int threadCount = 100; - int operationsPerThread = 1000; - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - AtomicLong compileOps = new AtomicLong(0); - AtomicLong matchOps = new AtomicLong(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + @BeforeEach + void setUp() { + Pattern.resetCache(); + } + + @AfterEach + void tearDown() { + Pattern.resetCache(); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testSustainedLoad_1000Operations_100Threads() throws InterruptedException { + int threadCount = 100; + int operationsPerThread = 1000; + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + AtomicLong compileOps = new AtomicLong(0); + AtomicLong matchOps = new AtomicLong(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - for (int j = 0; j < operationsPerThread; j++) { - int op = j % 10; - - if (op < 3) { - // 30%: Compile pattern - Pattern.compile("pattern" + (j % 50)); // Reuse some patterns - compileOps.incrementAndGet(); - } else { - // 70%: Match using pattern - Pattern p = Pattern.compile("test\\d+"); - try (Matcher m = p.matcher("test" + j)) { - m.find(); - } - matchOps.incrementAndGet(); - } + for (int j = 0; j < operationsPerThread; j++) { + int op = j % 10; + + if (op < 3) { + // 30%: Compile pattern + Pattern.compile("pattern" + (j % 50)); // Reuse some patterns + compileOps.incrementAndGet(); + } else { + // 70%: Match using pattern + Pattern p = Pattern.compile("test\\d+"); + try (Matcher m = p.matcher("test" + j)) { + m.find(); + } + matchOps.incrementAndGet(); } + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - done.await(); + done.await(); - assertThat(errors.get()).isEqualTo(0); - assertThat(compileOps.get() + matchOps.get()).isEqualTo(threadCount * operationsPerThread); + assertThat(errors.get()).isEqualTo(0); + assertThat(compileOps.get() + matchOps.get()).isEqualTo(threadCount * operationsPerThread); - // Cache should be stable (not growing unbounded) - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isLessThanOrEqualTo(50000); - } + // Cache should be stable (not growing unbounded) + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isLessThanOrEqualTo(50000); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testBurst_1000Patterns_Simultaneous() throws InterruptedException { - int threadCount = 1000; - CountDownLatch start = new CountDownLatch(1); - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testBurst_1000Patterns_Simultaneous() throws InterruptedException { + int threadCount = 1000; + CountDownLatch start = new CountDownLatch(1); + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); + + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - start.await(); - Pattern p = Pattern.compile("burst_pattern_" + threadId); - assertThat(p).isNotNull(); + start.await(); + Pattern p = Pattern.compile("burst_pattern_" + threadId); + assertThat(p).isNotNull(); } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - start.countDown(); - done.await(); + start.countDown(); + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // Cache should handle burst, with evictions - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isLessThanOrEqualTo(50000); - assertThat(stats.misses()).isEqualTo(1000); - } + // Cache should handle burst, with evictions + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isLessThanOrEqualTo(50000); + assertThat(stats.misses()).isEqualTo(1000); + } - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testMemoryPressure_LargePatterns() throws InterruptedException { - int threadCount = 100; - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testMemoryPressure_LargePatterns() throws InterruptedException { + int threadCount = 100; + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); - // Build complex patterns (large when compiled) - String complexPattern = "(\\w+|\\d+|[a-z]{10,50}|email@[\\w.]+|https?://[\\w./]+){0,20}"; + // Build complex patterns (large when compiled) + String complexPattern = "(\\w+|\\d+|[a-z]{10,50}|email@[\\w.]+|https?://[\\w./]+){0,20}"; - for (int i = 0; i < threadCount; i++) { - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + new Thread( + () -> { try { - Pattern p = Pattern.compile(complexPattern); - try (Matcher m = p.matcher("test123email@example.comhttp://test.com")) { - m.find(); - } + Pattern p = Pattern.compile(complexPattern); + try (Matcher m = p.matcher("test123email@example.comhttp://test.com")) { + m.find(); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - done.await(); + done.await(); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // Cache should handle large patterns without OOM - CacheStatistics stats = Pattern.getCacheStatistics(); - assertThat(stats.currentSize()).isGreaterThan(0); - } + // Cache should handle large patterns without OOM + CacheStatistics stats = Pattern.getCacheStatistics(); + assertThat(stats.currentSize()).isGreaterThan(0); + } + + @Test + @Timeout(value = 60, unit = TimeUnit.SECONDS) + void testMemoryPressure_ManySmallPatterns() throws InterruptedException { + int threadCount = 100; + int patternsPerThread = 600; // 60K total > 50K cache + CountDownLatch done = new CountDownLatch(threadCount); + AtomicInteger errors = new AtomicInteger(0); - @Test - @Timeout(value = 60, unit = TimeUnit.SECONDS) - void testMemoryPressure_ManySmallPatterns() throws InterruptedException { - int threadCount = 100; - int patternsPerThread = 600; // 60K total > 50K cache - CountDownLatch done = new CountDownLatch(threadCount); - AtomicInteger errors = new AtomicInteger(0); - - for (int i = 0; i < threadCount; i++) { - int threadId = i; - new Thread(() -> { + for (int i = 0; i < threadCount; i++) { + int threadId = i; + new Thread( + () -> { try { - for (int j = 0; j < patternsPerThread; j++) { - Pattern.compile("p" + threadId + "_" + j); - } + for (int j = 0; j < patternsPerThread; j++) { + Pattern.compile("p" + threadId + "_" + j); + } } catch (Exception e) { - errors.incrementAndGet(); + errors.incrementAndGet(); } finally { - done.countDown(); + done.countDown(); } - }).start(); - } + }) + .start(); + } - done.await(); + done.await(); - // Wait for async eviction to complete (must exceed evictionProtectionMs of 1 second) - Thread.sleep(1500); + // Wait for async eviction to complete (must exceed evictionProtectionMs of 1 second) + Thread.sleep(1500); - assertThat(errors.get()).isEqualTo(0); + assertThat(errors.get()).isEqualTo(0); - // 60,000 patterns compiled - cache should enforce soft size limit - CacheStatistics stats = Pattern.getCacheStatistics(); - // With soft limits, allow up to 20% overage during high concurrent load - int maxAllowed = (int) (50000 * 1.2); - assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); - // Some evictions should have occurred - assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThan(0); - } + // 60,000 patterns compiled - cache should enforce soft size limit + CacheStatistics stats = Pattern.getCacheStatistics(); + // With soft limits, allow up to 20% overage during high concurrent load + int maxAllowed = (int) (50000 * 1.2); + assertThat(stats.currentSize()).isLessThanOrEqualTo(maxAllowed); + // Some evictions should have occurred + assertThat(stats.evictionsLRU() + stats.evictionsDeferred()).isGreaterThan(0); + } } diff --git a/pom.xml b/pom.xml index d8e648d..f06f837 100644 --- a/pom.xml +++ b/pom.xml @@ -58,6 +58,9 @@ 0.8.11 + 10.12.5 + 3.3.1 + 2.21.1 @@ -136,6 +139,27 @@ maven-assembly-plugin 3.6.0 + + + + org.apache.maven.plugins + maven-checkstyle-plugin + ${maven-checkstyle-plugin.version} + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + + + + + + com.spotify.fmt + fmt-maven-plugin + ${fmt-maven-plugin.version} +