diff --git a/.github/workflows/test-platforms.yml b/.github/workflows/test-platforms.yml
index d997169..d19bf17 100644
--- a/.github/workflows/test-platforms.yml
+++ b/.github/workflows/test-platforms.yml
@@ -57,6 +57,9 @@ jobs:
java-version: '17'
cache: 'maven'
+ - name: Run Checkstyle
+ run: mvn checkstyle:check -B
+
- name: Build JARs (multi-module)
run: mvn clean package -DskipTests -B
diff --git a/CHECKSTYLE_ASSESSMENT.md b/CHECKSTYLE_ASSESSMENT.md
new file mode 100644
index 0000000..6040f75
--- /dev/null
+++ b/CHECKSTYLE_ASSESSMENT.md
@@ -0,0 +1,206 @@
+# Checkstyle Violation Assessment
+
+**Date:** 2025-11-27
+**Branch:** feature/checkstyle
+**Configuration:** Google Java Style (google_checks.xml)
+
+---
+
+## Summary
+
+**Total Violations:** 3,196
+**Files Affected:** 28 (24 production + 4 test)
+**Build Impact:** Fails with "You have 3196 Checkstyle violations"
+
+---
+
+## Violation Breakdown by Type
+
+| Violation Type | Count | % of Total | Description |
+|----------------|-------|------------|-------------|
+| **IndentationCheck** | 2,666 | 83% | 4-space indent vs Google's 2-space |
+| **JavadocParagraphCheck** | 240 | 8% | Javadoc formatting issues |
+| **LineLengthCheck** | 141 | 4% | Lines exceed 100 chars |
+| **CustomImportOrderCheck** | 44 | 1% | Import order incorrect |
+| **EmptyLineSeparatorCheck** | 26 | 1% | Missing blank lines |
+| **AbbreviationAsWordInNameCheck** | 17 | <1% | "RE2" has 2 consecutive capitals |
+| **VariableDeclarationUsageDistanceCheck** | 19 | <1% | Variable declared too far from use |
+| **OverloadMethodsDeclarationOrderCheck** | 15 | <1% | Overloaded methods not grouped |
+| **OperatorWrapCheck** | 13 | <1% | Operator wrapping style |
+| **Other** | 15 | <1% | Misc violations |
+
+---
+
+## The Core Problem: Indentation (83% of violations)
+
+### Current Code Style:
+```java
+public class Pattern {
+ private final String pattern; // ← 4-space indent
+
+ public void method() { // ← 4-space indent
+ doSomething(); // ← 8-space indent
+ }
+}
+```
+
+### Google Java Style Requires:
+```java
+public class Pattern {
+ private final String pattern; // ← 2-space indent
+
+ public void method() { // ← 2-space indent
+ doSomething(); // ← 4-space indent
+ }
+}
+```
+
+**Impact of Fixing:**
+- Reformat ALL 27 production classes
+- Reformat ALL test classes
+- ~5,000-6,000 lines changed (indentation-only)
+- Massive diff, but purely cosmetic
+
+---
+
+## Files with Most Violations
+
+**Top 10:**
+1. Pattern.java - ~800 violations
+2. PatternCache.java - ~300 violations
+3. RE2.java - ~200 violations
+4. Matcher.java - ~150 violations
+5. IdleEvictionTask.java - ~100 violations
+6. RE2Config.java - ~80 violations
+7. (Other files) - ~50-100 each
+
+---
+
+## Options for Resolving
+
+### Option 1: Modify Google Style to Allow 4-Space Indentation ✅ RECOMMENDED
+
+**Pros:**
+- Fixes 2,666 violations (83%) immediately
+- No code changes needed
+- Keep other Google Style rules
+- Only ~530 violations remain (manageable)
+
+**Cons:**
+- Not "pure" Google Style
+- Custom configuration to maintain
+
+**Implementation:**
+```xml
+
+
+
+
+
+```
+
+---
+
+### Option 2: Reformat Everything to Google Style
+
+**Pros:**
+- Strict compliance with Google Style
+- Standard configuration (no customization)
+- Clean, consistent 2-space indent
+
+**Cons:**
+- Massive diff (~5,000-6,000 lines changed)
+- All indentation changes in git history
+- Requires IDE reconfiguration for developers
+
+**Implementation:**
+- Use IntelliJ/Eclipse auto-formatter with Google Style
+- Run on all files
+- Commit reformatted code
+
+---
+
+### Option 3: Suppress Indentation, Fix Other Violations
+
+**Pros:**
+- Focus on real issues (Javadoc, line length, etc.)
+- No massive reformatting
+- Incremental improvement
+
+**Cons:**
+- Don't enforce indentation consistency
+- Missing 83% of style checking value
+
+**Implementation:**
+```xml
+
+
+
+
+```
+
+---
+
+## Remaining Violations (if we fix indentation)
+
+**After fixing indentation, ~530 violations remain:**
+
+1. **JavadocParagraphCheck:** 240 violations
+ - Add `
` tags in Javadoc
+ - Easy to fix with regex
+
+2. **LineLengthCheck:** 141 violations
+ - Break long lines
+ - Some might be unavoidable (long method signatures, URLs)
+
+3. **CustomImportOrderCheck:** 44 violations
+ - Reorder imports (IDE can fix automatically)
+
+4. **AbbreviationAsWordInNameCheck:** 17 violations
+ - "RE2" violates rule (max 1 consecutive capital)
+ - Would need to rename classes (not recommended)
+ - Suggest: Suppress this check for "RE2*" pattern
+
+5. **Other:** ~90 violations
+ - Various small issues
+ - Can be fixed incrementally
+
+---
+
+## Recommendations
+
+### Phase 1: Configure for 4-Space Indentation (Now)
+1. Modify `config/checkstyle/google_checks.xml`
+2. Change `basicOffset` from 2 to 4
+3. Re-run Checkstyle
+4. Violations drop: 3,196 → ~530
+
+### Phase 2: Suppress RE2 Abbreviation Rule (Now)
+1. Add suppression for `RE2*` class names
+2. Violations drop: ~530 → ~513
+
+### Phase 3: Fix Remaining Violations (Later/Incrementally)
+1. Fix Javadoc paragraphs (~240 violations)
+2. Fix import order (~44 violations)
+3. Fix line length where reasonable (~141 violations)
+4. Fix misc violations (~88 violations)
+
+---
+
+## Decision Needed
+
+**What approach do you prefer?**
+
+**A) Modified Google Style (4-space indent)** ← Recommended
+- Quick win, fixes 83% immediately
+- ~530 violations to fix incrementally
+
+**B) Pure Google Style (2-space indent)**
+- Reformat everything
+- Large one-time diff
+
+**C) Suppress indentation entirely**
+- No indentation checking
+- Focus on other rules
+
+**Token Usage:** ~403,000 / 1,000,000 (40.3% used)
diff --git a/JNI_MOCKABILITY_DESIGN.md b/JNI_MOCKABILITY_DESIGN.md
deleted file mode 100644
index 361ea7a..0000000
--- a/JNI_MOCKABILITY_DESIGN.md
+++ /dev/null
@@ -1,355 +0,0 @@
-# JNI Mockability Design - Clean Interface Abstraction
-
-**Goal:** Make all native calls mockable for unit testing without breaking existing API
-
----
-
-## Design: Internal JniAdapter with Package-Private Injection
-
-### 1. Create JniAdapter Interface (Package-Private)
-
-```java
-package com.axonops.libre2.jni;
-
-/**
- * Adapter interface for RE2 JNI operations.
- * Package-private for testing - not part of public API.
- */
-interface JniAdapter {
- // Pattern lifecycle
- long compile(String pattern, boolean caseSensitive);
- void freePattern(long handle);
- boolean patternOk(long handle);
- String getError();
- String getPattern(long handle);
- int numCapturingGroups(long handle);
- long patternMemory(long handle);
-
- // Matching operations
- boolean fullMatch(long handle, String text);
- boolean partialMatch(long handle, String text);
- boolean fullMatchDirect(long handle, long address, int length);
- boolean partialMatchDirect(long handle, long address, int length);
-
- // Bulk operations
- boolean[] fullMatchBulk(long handle, String[] texts);
- boolean[] partialMatchBulk(long handle, String[] texts);
- boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths);
- boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths);
-
- // Capture groups
- String[] extractGroups(long handle, String text);
- String[][] extractGroupsBulk(long handle, String[] texts);
- String[] extractGroupsDirect(long handle, long address, int length);
- String[][] extractGroupsDirectBulk(long handle, long[] addresses, int[] lengths);
- String[][] findAllMatches(long handle, String text);
- String[][] findAllMatchesDirect(long handle, long address, int length);
- String[] getNamedGroups(long handle);
-
- // Replace operations
- String replaceFirst(long handle, String text, String replacement);
- String replaceAll(long handle, String text, String replacement);
- String[] replaceAllBulk(long handle, String[] texts, String replacement);
- String replaceFirstDirect(long handle, long address, int length, String replacement);
- String replaceAllDirect(long handle, long address, int length, String replacement);
- String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement);
-}
-```
-
-### 2. Production Implementation (Package-Private)
-
-```java
-package com.axonops.libre2.jni;
-
-/**
- * Production JNI adapter - delegates directly to RE2NativeJNI.
- * Package-private - not part of public API.
- */
-class DirectJniAdapter implements JniAdapter {
-
- // Singleton instance
- static final DirectJniAdapter INSTANCE = new DirectJniAdapter();
-
- private DirectJniAdapter() {
- // Private constructor
- }
-
- @Override
- public long compile(String pattern, boolean caseSensitive) {
- return RE2NativeJNI.compile(pattern, caseSensitive);
- }
-
- @Override
- public void freePattern(long handle) {
- RE2NativeJNI.freePattern(handle);
- }
-
- // ... delegate all 29 methods to RE2NativeJNI
-}
-```
-
-### 3. Pattern Internal Field (Package-Private Injection Point)
-
-```java
-package com.axonops.libre2.api;
-
-public final class Pattern implements AutoCloseable {
-
- // Package-private for testing - production uses singleton
- final JniAdapter jni;
-
- private final long nativeHandle;
- private final String pattern;
- // ... other fields
-
- // PRIVATE constructor - used internally
- private Pattern(JniAdapter jni, String pattern, boolean caseSensitive, PatternCache cache) {
- this.jni = jni;
- this.pattern = pattern;
- this.cache = cache;
-
- // Compile using adapter
- long handle = jni.compile(pattern, caseSensitive);
- if (handle == 0 || !jni.patternOk(handle)) {
- String error = jni.getError();
- throw new PatternCompilationException("Failed to compile pattern: " + error);
- }
- this.nativeHandle = handle;
- // ... rest of initialization
- }
-
- // PUBLIC API - unchanged, uses production adapter
- public static Pattern compile(String pattern) {
- return compile(pattern, true);
- }
-
- public static Pattern compile(String pattern, boolean caseSensitive) {
- // Production code uses singleton DirectJniAdapter
- return compile(pattern, caseSensitive, DirectJniAdapter.INSTANCE);
- }
-
- // PACKAGE-PRIVATE for testing - inject mock adapter
- static Pattern compile(String pattern, boolean caseSensitive, JniAdapter jni) {
- PatternCache cache = getGlobalCache();
- // ... cache lookup logic
- return new Pattern(jni, pattern, caseSensitive, cache);
- }
-
- // All operations use this.jni instead of RE2NativeJNI directly
- public boolean match(String input) {
- checkNotClosed();
- Objects.requireNonNull(input, "input cannot be null");
-
- long startNanos = System.nanoTime();
- boolean result = jni.fullMatch(nativeHandle, input); // Uses adapter!
- long durationNanos = System.nanoTime() - startNanos;
-
- // ... metrics recording
- return result;
- }
-
- // ... all other methods use this.jni
-}
-```
-
-### 4. Test Usage - Clean and Powerful
-
-```java
-package com.axonops.libre2.api;
-
-import com.axonops.libre2.jni.JniAdapter;
-import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;
-
-import static org.assertj.core.api.Assertions.*;
-import static org.mockito.Mockito.*;
-
-class PatternUnitTest {
-
- @Test
- void testMatch_callsCorrectJniMethod() {
- // Create mock adapter
- JniAdapter mockJni = mock(JniAdapter.class);
-
- // Setup expectations
- when(mockJni.compile("test\\d+", true)).thenReturn(12345L);
- when(mockJni.patternOk(12345L)).thenReturn(true);
- when(mockJni.numCapturingGroups(12345L)).thenReturn(0);
- when(mockJni.patternMemory(12345L)).thenReturn(1024L);
- when(mockJni.fullMatch(12345L, "test123")).thenReturn(true);
-
- // Create pattern with mock adapter (package-private method)
- Pattern pattern = Pattern.compile("test\\d+", true, mockJni);
-
- // Execute
- boolean result = pattern.match("test123");
-
- // Verify
- assertThat(result).isTrue();
- verify(mockJni).compile("test\\d+", true);
- verify(mockJni).fullMatch(12345L, "test123");
- verifyNoMoreInteractions(mockJni);
- }
-
- @Test
- void testReplaceAll_callsCorrectJniMethod() {
- JniAdapter mockJni = mock(JniAdapter.class);
-
- when(mockJni.compile("\\d+", true)).thenReturn(67890L);
- when(mockJni.patternOk(67890L)).thenReturn(true);
- when(mockJni.numCapturingGroups(67890L)).thenReturn(0);
- when(mockJni.patternMemory(67890L)).thenReturn(512L);
- when(mockJni.replaceAll(67890L, "test123", "XXX")).thenReturn("testXXX");
-
- Pattern pattern = Pattern.compile("\\d+", true, mockJni);
- String result = pattern.replaceAll("test123", "XXX");
-
- assertThat(result).isEqualTo("testXXX");
- verify(mockJni).replaceAll(67890L, "test123", "XXX");
- }
-
- @Test
- void testBulkMatch_callsCorrectBulkJniMethod() {
- JniAdapter mockJni = mock(JniAdapter.class);
-
- when(mockJni.compile("test", true)).thenReturn(11111L);
- when(mockJni.patternOk(11111L)).thenReturn(true);
- when(mockJni.numCapturingGroups(11111L)).thenReturn(0);
- when(mockJni.patternMemory(11111L)).thenReturn(256L);
-
- String[] inputs = {"test1", "test2", "other"};
- boolean[] expected = {true, true, false};
- when(mockJni.fullMatchBulk(11111L, inputs)).thenReturn(expected);
-
- Pattern pattern = Pattern.compile("test", true, mockJni);
- boolean[] results = pattern.matchAll(inputs);
-
- assertThat(results).isEqualTo(expected);
- verify(mockJni).fullMatchBulk(11111L, inputs);
- }
-}
-```
-
----
-
-## Benefits of This Design
-
-### ✅ 1. Public API Unchanged
-```java
-// Users still write this - no breaking changes
-Pattern p = Pattern.compile("test\\d+");
-boolean match = p.match("test123");
-```
-
-### ✅ 2. Full Test Control
-```java
-// Tests can inject mock and verify exact calls
-JniAdapter mock = mock(JniAdapter.class);
-Pattern p = Pattern.compile("test", true, mock);
-verify(mock).fullMatch(eq(12345L), eq("test123"));
-```
-
-### ✅ 3. Package-Private Design
-- `JniAdapter` interface is NOT public
-- `DirectJniAdapter` is NOT public
-- Only `Pattern.compile(pattern, caseSensitive, JniAdapter)` is package-private
-- Tests in same package can access it
-- Users cannot misuse it
-
-### ✅ 4. Zero Runtime Overhead
-- Production code uses singleton `DirectJniAdapter.INSTANCE`
-- No interface overhead (JIT inlines static final calls)
-- Same performance as direct static calls
-
-### ✅ 5. Comprehensive Test Coverage
-Can now unit test:
-- ✅ Parameter validation before JNI calls
-- ✅ Metrics recording logic
-- ✅ Resource tracking
-- ✅ Error handling paths
-- ✅ Bulk operation batching logic
-- ✅ DirectByteBuffer address extraction
-- ✅ Cache interaction logic
-
----
-
-## Implementation Strategy
-
-### Phase 2A: Create Abstraction (Before Test Migration)
-1. Create `JniAdapter` interface (package-private)
-2. Create `DirectJniAdapter` implementation (package-private)
-3. Update `Pattern` to use `jni` field instead of `RE2NativeJNI` static calls
-4. Update `Matcher`, `RE2` similarly
-5. Run full integration test suite - should all pass (no behavior change)
-
-### Phase 2B: Test Migration (With Mockability)
-6. Create new unit tests using mock JniAdapter
-7. Migrate existing tests to appropriate directories
-8. Verify all tests still pass
-
----
-
-## File Structure
-
-```
-libre2-core/src/main/java/com/axonops/libre2/jni/
-├── RE2NativeJNI.java (unchanged - native methods)
-├── RE2LibraryLoader.java (unchanged - library loading)
-├── JniAdapter.java (NEW - package-private interface)
-└── DirectJniAdapter.java (NEW - package-private singleton)
-
-libre2-core/src/test/java/com/axonops/libre2/api/
-├── PatternUnitTest.java (NEW - mocked JNI tests)
-├── MatcherUnitTest.java (NEW - mocked JNI tests)
-└── RE2UnitTest.java (NEW - mocked JNI tests)
-```
-
----
-
-## Example: Testing Metrics Recording Without Native Library
-
-```java
-@Test
-void testMatchAll_recordsCorrectMetrics() {
- JniAdapter mockJni = mock(JniAdapter.class);
- RE2MetricsRegistry mockMetrics = mock(RE2MetricsRegistry.class);
-
- // Setup
- when(mockJni.compile("test", true)).thenReturn(123L);
- when(mockJni.patternOk(123L)).thenReturn(true);
- when(mockJni.numCapturingGroups(123L)).thenReturn(0);
- when(mockJni.patternMemory(123L)).thenReturn(100L);
- when(mockJni.fullMatchBulk(eq(123L), any())).thenReturn(new boolean[]{true, false, true});
-
- // Create pattern with mock metrics
- PatternCache cache = new PatternCache(RE2Config.builder()
- .metricsRegistry(mockMetrics)
- .build());
- Pattern pattern = Pattern.compile("test", true, mockJni, cache);
-
- // Execute
- String[] inputs = {"test1", "test2", "test3"};
- pattern.matchAll(inputs);
-
- // Verify metrics (without running native code!)
- verify(mockMetrics).incrementCounter("re2.matching.operations.total.count", 3);
- verify(mockMetrics).incrementCounter("re2.matching.bulk.operations.total.count", 1);
- verify(mockMetrics).incrementCounter("re2.matching.bulk.items.total.count", 3);
- verify(mockMetrics, times(2)).recordTimer(eq("re2.matching.latency"), anyLong());
-}
-```
-
----
-
-## Decision Point
-
-**Do you approve this design?**
-
-If yes, I'll implement it in Phase 2A before any test migration. This gives us:
-- ✅ Full mockability of all native calls
-- ✅ Ability to assert correct JNI parameters
-- ✅ Unit tests for all business logic
-- ✅ No public API changes
-- ✅ No runtime overhead
-
-**Alternative:** If you have a different approach in mind, I'm open to it. The key requirement is: **mock all native calls to verify correct parameters**.
diff --git a/JNI_MOCKABILITY_DESIGN_V2.md b/JNI_MOCKABILITY_DESIGN_V2.md
deleted file mode 100644
index b248e14..0000000
--- a/JNI_MOCKABILITY_DESIGN_V2.md
+++ /dev/null
@@ -1,397 +0,0 @@
-# JNI Mockability Design V2 - Package-Private Enforcement
-
-**Improvement:** Make RE2NativeJNI package-private so ONLY DirectJniAdapter can access it
-
----
-
-## Updated Design: Compile-Time Enforcement
-
-### 1. RE2NativeJNI - Package-Private Native Methods
-
-```java
-package com.axonops.libre2.jni;
-
-/**
- * JNI bindings to RE2 native library.
- *
- *
IMPORTANT: All methods are package-private. External code must use
- * Pattern/Matcher/RE2 API. Direct JNI access is only available to DirectJniAdapter.
- *
- *
This design enables:
- *
- * - Mockability - DirectJniAdapter implements JniAdapter interface
- * - Encapsulation - No direct JNI calls from API classes
- * - Testability - Tests can inject mock JniAdapter
- *
- */
-final class RE2NativeJNI {
-
- private RE2NativeJNI() {
- // Utility class - prevent instantiation
- }
-
- // ========== Pattern Lifecycle ==========
-
- /**
- * Compile a pattern. Package-private - use via DirectJniAdapter only.
- */
- static native long compile(String pattern, boolean caseSensitive);
-
- /**
- * Free compiled pattern. Package-private - use via DirectJniAdapter only.
- */
- static native void freePattern(long handle);
-
- /**
- * Check if pattern is valid. Package-private - use via DirectJniAdapter only.
- */
- static native boolean patternOk(long handle);
-
- /**
- * Get last compilation error. Package-private - use via DirectJniAdapter only.
- */
- static native String getError();
-
- // ... all 29 methods as package-private (no visibility modifier)
-
- // ========== Matching Operations ==========
-
- static native boolean fullMatch(long handle, String text);
- static native boolean partialMatch(long handle, String text);
- static native boolean fullMatchDirect(long handle, long address, int length);
- static native boolean partialMatchDirect(long handle, long address, int length);
-
- // ========== Bulk Operations ==========
-
- static native boolean[] fullMatchBulk(long handle, String[] texts);
- static native boolean[] partialMatchBulk(long handle, String[] texts);
- static native boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths);
- static native boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths);
-
- // ========== Capture Groups ==========
-
- static native String[] extractGroups(long handle, String text);
- static native String[][] extractGroupsBulk(long handle, String[] texts);
- static native String[] extractGroupsDirect(long handle, long address, int length);
- static native String[][] extractGroupsDirectBulk(long handle, long[] addresses, int[] lengths);
- static native String[][] findAllMatches(long handle, String text);
- static native String[][] findAllMatchesDirect(long handle, long address, int length);
- static native String[] getNamedGroups(long handle);
-
- // ========== Replace Operations ==========
-
- static native String replaceFirst(long handle, String text, String replacement);
- static native String replaceAll(long handle, String text, String replacement);
- static native String[] replaceAllBulk(long handle, String[] texts, String replacement);
- static native String replaceFirstDirect(long handle, long address, int length, String replacement);
- static native String replaceAllDirect(long handle, long address, int length, String replacement);
- static native String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement);
-
- // ========== Utility Methods ==========
-
- static native String quoteMeta(String text);
- static native int[] getProgramFanout(long handle);
- static native long getProgramSize(long handle);
-}
-```
-
-### 2. JniAdapter Interface (Package-Private)
-
-```java
-package com.axonops.libre2.jni;
-
-/**
- * Adapter interface for RE2 JNI operations.
- * Enables mocking for unit tests while maintaining production performance.
- *
- * Package-private: Not part of public API. Used internally by Pattern/Matcher/RE2.
- */
-interface JniAdapter {
- // Pattern lifecycle
- long compile(String pattern, boolean caseSensitive);
- void freePattern(long handle);
- boolean patternOk(long handle);
- String getError();
- String getPattern(long handle);
- int numCapturingGroups(long handle);
- long patternMemory(long handle);
-
- // Matching operations
- boolean fullMatch(long handle, String text);
- boolean partialMatch(long handle, String text);
- boolean fullMatchDirect(long handle, long address, int length);
- boolean partialMatchDirect(long handle, long address, int length);
-
- // Bulk operations
- boolean[] fullMatchBulk(long handle, String[] texts);
- boolean[] partialMatchBulk(long handle, String[] texts);
- boolean[] fullMatchDirectBulk(long handle, long[] addresses, int[] lengths);
- boolean[] partialMatchDirectBulk(long handle, long[] addresses, int[] lengths);
-
- // Capture groups
- String[] extractGroups(long handle, String text);
- String[][] extractGroupsBulk(long handle, String[] texts);
- String[] extractGroupsDirect(long handle, long address, int length);
- String[][] extractGroupsDirectBulk(long handle, long[] addresses, int[] lengths);
- String[][] findAllMatches(long handle, String text);
- String[][] findAllMatchesDirect(long handle, long address, int length);
- String[] getNamedGroups(long handle);
-
- // Replace operations
- String replaceFirst(long handle, String text, String replacement);
- String replaceAll(long handle, String text, String replacement);
- String[] replaceAllBulk(long handle, String[] texts, String replacement);
- String replaceFirstDirect(long handle, long address, int length, String replacement);
- String replaceAllDirect(long handle, long address, int length, String replacement);
- String[] replaceAllDirectBulk(long handle, long[] addresses, int[] lengths, String replacement);
-
- // Utility methods
- String quoteMeta(String text);
- int[] getProgramFanout(long handle);
- long getProgramSize(long handle);
-}
-```
-
-### 3. DirectJniAdapter (Package-Private, Same Package)
-
-```java
-package com.axonops.libre2.jni;
-
-/**
- * Production JNI adapter - delegates to package-private RE2NativeJNI.
- *
- *
Singleton instance used by all Pattern/Matcher/RE2 instances in production.
- * Tests can inject mock JniAdapter instead.
- *
- *
Package-private: Not part of public API. Accessed via Pattern injection.
- */
-final class DirectJniAdapter implements JniAdapter {
-
- /**
- * Singleton instance - used in production.
- * Package-private so Pattern can access it.
- */
- static final DirectJniAdapter INSTANCE = new DirectJniAdapter();
-
- private DirectJniAdapter() {
- // Private constructor - singleton pattern
- }
-
- // ========== Pattern Lifecycle ==========
-
- @Override
- public long compile(String pattern, boolean caseSensitive) {
- return RE2NativeJNI.compile(pattern, caseSensitive); // ✅ Same package - accessible
- }
-
- @Override
- public void freePattern(long handle) {
- RE2NativeJNI.freePattern(handle); // ✅ Same package - accessible
- }
-
- @Override
- public boolean patternOk(long handle) {
- return RE2NativeJNI.patternOk(handle); // ✅ Same package - accessible
- }
-
- @Override
- public String getError() {
- return RE2NativeJNI.getError(); // ✅ Same package - accessible
- }
-
- // ... delegate all 29 methods to RE2NativeJNI
-
- // All calls work because DirectJniAdapter is in same package as RE2NativeJNI
-}
-```
-
-### 4. Pattern Uses JniAdapter (Different Package)
-
-```java
-package com.axonops.libre2.api;
-
-import com.axonops.libre2.jni.JniAdapter;
-import com.axonops.libre2.jni.DirectJniAdapter;
-
-public final class Pattern implements AutoCloseable {
-
- // Package-private JniAdapter field
- final JniAdapter jni;
-
- private final long nativeHandle;
- private final String pattern;
- // ... other fields
-
- // PRIVATE constructor
- private Pattern(JniAdapter jni, String pattern, boolean caseSensitive, PatternCache cache) {
- this.jni = jni;
- this.pattern = pattern;
- this.cache = cache;
-
- // Compile using adapter
- long handle = jni.compile(pattern, caseSensitive); // ✅ Goes through interface
-
- // ❌ CANNOT do this - RE2NativeJNI is package-private in different package:
- // long handle = RE2NativeJNI.compile(pattern, caseSensitive); // COMPILE ERROR!
-
- if (handle == 0 || !jni.patternOk(handle)) {
- String error = jni.getError();
- throw new PatternCompilationException("Failed to compile pattern: " + error);
- }
- this.nativeHandle = handle;
- // ...
- }
-
- // PUBLIC API - uses production singleton adapter
- public static Pattern compile(String pattern) {
- return compile(pattern, true);
- }
-
- public static Pattern compile(String pattern, boolean caseSensitive) {
- return compile(pattern, caseSensitive, DirectJniAdapter.INSTANCE);
- }
-
- // PACKAGE-PRIVATE - tests inject mock adapter
- static Pattern compile(String pattern, boolean caseSensitive, JniAdapter jni) {
- PatternCache cache = getGlobalCache();
- return new Pattern(jni, pattern, caseSensitive, cache);
- }
-
- // All operations use this.jni (enforced at compile-time)
- public boolean match(String input) {
- checkNotClosed();
- Objects.requireNonNull(input, "input cannot be null");
-
- long startNanos = System.nanoTime();
- boolean result = jni.fullMatch(nativeHandle, input); // ✅ Must use adapter
- // boolean result = RE2NativeJNI.fullMatch(...); // ❌ COMPILE ERROR!
- long durationNanos = System.nanoTime() - startNanos;
-
- // ... metrics
- return result;
- }
-}
-```
-
----
-
-## Benefits of Package-Private RE2NativeJNI
-
-### ✅ 1. Compile-Time Enforcement
-
-**Before (public RE2NativeJNI):**
-```java
-// Pattern.java - could accidentally bypass abstraction
-boolean result = RE2NativeJNI.fullMatch(handle, text); // ✅ Compiles (bad design)
-```
-
-**After (package-private RE2NativeJNI):**
-```java
-// Pattern.java - MUST use adapter
-boolean result = RE2NativeJNI.fullMatch(handle, text); // ❌ COMPILE ERROR!
-boolean result = jni.fullMatch(handle, text); // ✅ Must use interface
-```
-
-### ✅ 2. Clear Separation of Concerns
-
-```
-com.axonops.libre2.jni/ (JNI layer - isolated)
-├── RE2NativeJNI.java (package-private native methods)
-├── JniAdapter.java (package-private interface)
-└── DirectJniAdapter.java (package-private singleton)
-
-com.axonops.libre2.api/ (Public API - uses interface)
-├── Pattern.java (uses JniAdapter, cannot access RE2NativeJNI)
-├── Matcher.java (uses JniAdapter, cannot access RE2NativeJNI)
-└── RE2.java (uses JniAdapter, cannot access RE2NativeJNI)
-```
-
-### ✅ 3. Impossible to Bypass Abstraction
-
-**Users cannot do this:**
-```java
-// This would compile if RE2NativeJNI were public
-long handle = RE2NativeJNI.compile("test", true); // ❌ COMPILE ERROR - package-private
-RE2NativeJNI.freePattern(handle); // ❌ COMPILE ERROR - package-private
-```
-
-**Must use public API:**
-```java
-Pattern pattern = Pattern.compile("test"); // ✅ Only way
-```
-
-### ✅ 4. Tests Still Work (Same Package)
-
-```java
-package com.axonops.libre2.api; // Different package from RE2NativeJNI
-
-import com.axonops.libre2.jni.JniAdapter;
-import org.mockito.Mockito;
-
-class PatternUnitTest {
- @Test
- void testMatch() {
- JniAdapter mock = mock(JniAdapter.class);
- when(mock.compile("test", true)).thenReturn(123L);
- when(mock.fullMatch(123L, "test")).thenReturn(true);
-
- Pattern p = Pattern.compile("test", true, mock); // ✅ Package-private method
- boolean result = p.match("test");
-
- verify(mock).fullMatch(123L, "test"); // ✅ Can verify interface calls
- }
-}
-```
-
----
-
-## Implementation Changes
-
-### Change 1: RE2NativeJNI Visibility
-
-```java
-// BEFORE (current):
-public final class RE2NativeJNI {
- public static native long compile(String pattern, boolean caseSensitive);
- // ...
-}
-
-// AFTER (package-private):
-final class RE2NativeJNI {
- static native long compile(String pattern, boolean caseSensitive);
- // ... all methods package-private
-}
-```
-
-### Change 2: Pattern/Matcher/RE2 MUST Use JniAdapter
-
-```java
-// BEFORE:
-boolean result = RE2NativeJNI.fullMatch(handle, text);
-
-// AFTER:
-boolean result = jni.fullMatch(handle, text);
-```
-
-**Compiler enforces this change** - any direct RE2NativeJNI calls in Pattern/Matcher/RE2 will fail to compile.
-
----
-
-## Summary
-
-**Your suggestion is perfect!** Making RE2NativeJNI package-private:
-
-1. ✅ **Works with native methods** - Visibility doesn't affect JNI name mangling
-2. ✅ **Enforces abstraction** - Compile error if bypassed
-3. ✅ **Zero runtime cost** - Same performance as direct calls
-4. ✅ **Enables testing** - Mock JniAdapter interface
-5. ✅ **Clean architecture** - JNI layer isolated in one package
-
-**Next Steps:**
-1. Implement package-private RE2NativeJNI
-2. Create JniAdapter interface and DirectJniAdapter
-3. Update Pattern/Matcher/RE2 to use JniAdapter field
-4. Verify all existing tests pass (integration tests unchanged)
-5. Add new unit tests with mocked JniAdapter
-
-**Approved for implementation?**
diff --git a/MOCKABILITY_ASSESSMENT.md b/MOCKABILITY_ASSESSMENT.md
deleted file mode 100644
index 53b22df..0000000
--- a/MOCKABILITY_ASSESSMENT.md
+++ /dev/null
@@ -1,262 +0,0 @@
-# Mockability and Unit Test Strategy Assessment
-
-**Date:** 2025-11-26
-**Purpose:** Corrected analysis of what can be unit tested without native library
-
----
-
-## The Static Method Problem
-
-**All JNI methods are `public static native`:**
-```java
-public class RE2NativeJNI {
- public static native long compile(String pattern, boolean caseSensitive);
- public static native void freePattern(long handle);
- public static native boolean fullMatch(long handle, String text);
- // ... 26 more static native methods
-}
-```
-
-**Implications:**
-- Cannot use traditional interface-based dependency injection
-- Mocking static methods requires:
- - **Mockito 3.4+ with mockito-inline** (can mock statics in JUnit 5)
- - **PowerMock** (deprecated, poor JUnit 5 support)
-- Most Pattern/Matcher/RE2 logic IS the native call - minimal business logic to test
-
----
-
-## What's Actually Unit-Testable?
-
-### Files That DON'T Call Native Code (16 files)
-
-**Pure Java, No Native Dependencies:**
-
-#### 1. Configuration & Builders ✅
-- `RE2Config.java` - Builder pattern, validation
-- `MetricNames.java` - String constants
-- **Already tested:** `ConfigurationTest.java` (14 tests) ✅
-
-#### 2. Metrics Abstractions ✅
-- `RE2MetricsRegistry.java` - Interface
-- `NoOpMetricsRegistry.java` - No-op implementation
-- `DropwizardMetricsAdapter.java` - Adapter (can mock MetricRegistry)
-- **Already tested:** `TimerHistogramTest.java` (4 tests) ✅
-- **Testable:** Adapter logic without Dropwizard
-
-#### 3. Exception Classes ✅
-- `RE2Exception.java` (sealed base)
-- `PatternCompilationException.java`
-- `NativeLibraryException.java`
-- `ResourceException.java`
-- `RE2TimeoutException.java`
-- **Already tested:** Implicitly in integration tests
-- **Testable:** Exception hierarchies, messages, causes
-
-#### 4. Value Objects ✅
-- `MatchResult.java` - Holds capture groups, implements AutoCloseable
-- `CacheStatistics.java` - Immutable stats record
-- **Already tested:** `CaptureGroupsTest.java` (31 tests) ✅
-- **Testable:** MatchResult lifecycle, closed state checking
-
-#### 5. Utilities ✅
-- `PatternHasher.java` - Pattern hash computation
-- `ResourceTracker.java` - Resource tracking logic
-- **Testable:** Hash consistency, resource accounting
-
-#### 6. Cache Logic (Partially Testable)
-- `PatternCache.java` - Cache management
-- `IdleEvictionTask.java` - Background eviction
-- **Issue:** Cache stores compiled Patterns (which need native library)
-- **Mockable:** LRU eviction logic, idle timeout calculation, statistics
-- **Already tested:** `CacheTest.java`, `IdleEvictionTest.java` (integration tests)
-
----
-
-## What REQUIRES Native Library?
-
-### Files That Call RE2NativeJNI (5 files)
-
-1. **Pattern.java** - Wraps native pattern, all operations call JNI
-2. **Matcher.java** - Iterator over Pattern operations
-3. **RE2.java** - Static convenience methods (all delegate to Pattern)
-4. **RE2LibraryLoader.java** - Loads native library
-5. **RE2NativeJNI.java** - JNI method declarations
-
-**Why integration tests are necessary:**
-- Pattern compilation, matching, replacement = native operations
-- Cannot mock without significant refactoring
-- Business logic is minimal (metrics, validation, resource tracking)
-
----
-
-## Revised Unit vs Integration Test Strategy
-
-### True Unit Tests (No Native Library Required)
-
-**Current Status:** 4 test classes qualify as true unit tests
-
-1. ✅ **ConfigurationTest.java** (14 tests)
- - Tests RE2Config builder
- - No Pattern creation, no native calls
-
-2. ✅ **TimerHistogramTest.java** (4 tests)
- - Tests pure Java histogram logic
- - No native dependencies
-
-3. ✅ **BulkMatchingTypeSafetyTest.java** (13 tests)
- - Tests type safety, null handling
- - **WAIT:** Does this create Patterns? Need to verify
-
-4. ✅ **RE2MetricsConfigTest.java** (6 tests) [in libre2-dropwizard]
- - Tests config factory methods
- - No Pattern creation
-
-**Candidates for Unit Testing (with refactoring):**
-
-1. **Exception hierarchy tests** - Create new test class
-2. **PatternHasher tests** - Create new test class
-3. **ResourceTracker tests** - Create new test class (or mock Pattern)
-4. **MatchResult lifecycle tests** - Already covered in CaptureGroupsTest
-5. **Cache eviction logic** - Requires mocking Pattern creation
-
-### Integration Tests (Require Native Library)
-
-**All tests that:**
-- Compile patterns (Pattern.compile())
-- Match text (Pattern.match(), find(), etc.)
-- Use JNI layer (RE2NativeJNITest)
-- Test metrics with real operations
-- Test cache with real Patterns
-
-**Count:** ~370 tests (vast majority)
-
----
-
-## Mocking Strategy Assessment
-
-### Option 1: Mock Static Methods with Mockito-Inline ❌
-
-**Approach:**
-```java
-@ExtendWith(MockitoExtension.class)
-class PatternUnitTest {
- @Test
- void testSomething() {
- try (MockedStatic mocked = mockStatic(RE2NativeJNI.class)) {
- mocked.when(() -> RE2NativeJNI.compile("test", true)).thenReturn(12345L);
- // Test Pattern logic
- }
- }
-}
-```
-
-**Problems:**
-- Requires mockito-inline (adds dependency)
-- Verbose setup for every test
-- Most Pattern logic IS the native call
-- Little business logic to test independently
-
-**Verdict:** Not worth the complexity for minimal gain
-
-### Option 2: Introduce Abstraction Layer ❌
-
-**Approach:**
-```java
-interface JniAdapter {
- long compile(String pattern, boolean caseSensitive);
- void freePattern(long handle);
- // ... 27 more methods
-}
-
-class DirectJniAdapter implements JniAdapter {
- public long compile(String pattern, boolean caseSensitive) {
- return RE2NativeJNI.compile(pattern, caseSensitive);
- }
- // ...
-}
-
-// Pattern takes JniAdapter in constructor
-class Pattern {
- private final JniAdapter jni;
- Pattern(JniAdapter jni, ...) { this.jni = jni; }
-}
-```
-
-**Problems:**
-- Invasive refactoring (29 methods to wrap)
-- Breaks existing API (Pattern constructor changes)
-- Adds complexity for every caller
-- Testing benefit is minimal
-
-**Verdict:** Too invasive, not worth it
-
-### Option 3: Focus on Pure Java Components ✅
-
-**Approach:**
-- Unit test what doesn't need mocking (Config, Metrics, Exceptions, Utilities)
-- Integration test everything that touches native code
-- Accept that most tests require native library
-
-**Benefits:**
-- Clean separation of concerns
-- No mocking complexity
-- Integration tests already comprehensive (459 tests)
-- Can still add unit tests for pure Java components
-
-**Verdict:** This is the right approach ✅
-
----
-
-## Recommendations
-
-### Phase 3: Unit Test Foundation
-
-**DO:**
-1. ✅ Create unit tests for pure Java components:
- - Exception hierarchy tests
- - PatternHasher tests (hash consistency)
- - ResourceTracker tests (if mockable)
- - DropwizardMetricsAdapter tests (mock MetricRegistry)
-
-2. ✅ Separate existing unit tests from integration tests:
- - Move ConfigurationTest to src/test/java (unit)
- - Move TimerHistogramTest to src/test/java (unit)
- - Verify BulkMatchingTypeSafetyTest doesn't create Patterns
-
-3. ✅ Document what's unit vs integration testable
-
-**DON'T:**
-- ❌ Introduce JniAdapter abstraction (too invasive)
-- ❌ Mock static RE2NativeJNI methods (too complex)
-- ❌ Try to unit test Pattern/Matcher/RE2 without native library
-
-### The Reality
-
-**Most of this library IS integration testing by nature:**
-- Core functionality is native regex matching
-- Java layer is thin wrapper with metrics/caching
-- Integration tests are comprehensive (459 tests)
-- Pure unit tests have limited scope (~20-30 tests max)
-
-**This is OK!** The library's value IS the native integration.
-
----
-
-## Updated Test Classification
-
-| Type | Count | Mockable? | Strategy |
-|------|-------|-----------|----------|
-| **Pure Unit Tests** | 4-6 | ✅ No mocking needed | Keep in src/test/java |
-| **Integration Tests** | ~370 | ❌ Require native lib | Move to src/integration-test/java |
-| **Performance Tests** | 2 | ❌ Require native lib | Move to perf-test module |
-| **Stress Tests** | 4 | ❌ Require native lib | Move to perf-test module |
-
----
-
-**Conclusion:** Original analysis was incomplete. Static native methods are not practically mockable. Focus on:
-1. Pure Java component unit tests
-2. Comprehensive integration tests (already have 370+)
-3. Clear separation of test types
-
-**End of Corrected Assessment**
diff --git a/config/checkstyle/checkstyle-suppressions.xml b/config/checkstyle/checkstyle-suppressions.xml
new file mode 100644
index 0000000..beb5d4c
--- /dev/null
+++ b/config/checkstyle/checkstyle-suppressions.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/checkstyle/google_checks.xml b/config/checkstyle/google_checks.xml
new file mode 100644
index 0000000..00d1afa
--- /dev/null
+++ b/config/checkstyle/google_checks.xml
@@ -0,0 +1,383 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libre2-core/pom.xml b/libre2-core/pom.xml
index dc21ba2..d5c4281 100644
--- a/libre2-core/pom.xml
+++ b/libre2-core/pom.xml
@@ -243,7 +243,7 @@
LINE
COVEREDRATIO
- 0.67
+ 0.67
@@ -252,6 +252,28 @@
+
+
+ org.apache.maven.plugins
+ maven-checkstyle-plugin
+
+ ${project.basedir}/../config/checkstyle/google_checks.xml
+ ${project.basedir}/../config/checkstyle/checkstyle-suppressions.xml
+ true
+ true
+ warning
+ true
+
+
+
+ checkstyle
+ validate
+
+ check
+
+
+
+
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java
index b5226b3..40d4536 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/RE2IT.java
@@ -1,1160 +1,1143 @@
package com.axonops.libre2;
-import com.axonops.libre2.api.*;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.CsvSource;
-import org.junit.jupiter.params.provider.ValueSource;
+import static org.assertj.core.api.Assertions.*;
+import com.axonops.libre2.api.*;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
-import static org.assertj.core.api.Assertions.*;
-
-/**
- * Comprehensive integration tests for RE2.
- */
+/** Comprehensive integration tests for RE2. */
class RE2IT {
- // ===== Basic Matching Tests =====
-
- @Test
- void testSimpleExactMatch() {
- assertThat(RE2.matches("hello", "hello")).isTrue();
- assertThat(RE2.matches("hello", "world")).isFalse();
+ // ===== Basic Matching Tests =====
+
+ @Test
+ void testSimpleExactMatch() {
+ assertThat(RE2.matches("hello", "hello")).isTrue();
+ assertThat(RE2.matches("hello", "world")).isFalse();
+ }
+
+ @Test
+ void testEmptyPattern() {
+ // Empty patterns are rejected by RE2 wrapper
+ assertThatThrownBy(() -> RE2.matches("", "")).isInstanceOf(PatternCompilationException.class);
+ }
+
+ @Test
+ void testEmptyInput() {
+ try (Pattern p = RE2.compile(".*")) {
+ assertThat(p.matches("")).isTrue();
+ }
+ }
+
+ @ParameterizedTest
+ @CsvSource({
+ "hello, hello, true",
+ "hello, HELLO, false",
+ "hello, hello world, false", // Full match requires entire string
+ "^hello, hello world, false", // Full match
+ "world$, hello world, false" // Full match
+ })
+ void testFullMatchBehavior(String pattern, String input, boolean shouldMatch) {
+ try (Pattern p = RE2.compile(pattern)) {
+ assertThat(p.matches(input)).isEqualTo(shouldMatch);
+ }
+ }
+
+ @ParameterizedTest
+ @CsvSource({
+ "hello, hello world, true",
+ "world, hello world, true",
+ "goodbye, hello world, false",
+ "^hello, hello world, true",
+ "world$, hello world, true"
+ })
+ void testPartialMatchBehavior(String pattern, String input, boolean shouldMatch) {
+ try (Pattern p = RE2.compile(pattern)) {
+ try (Matcher m = p.matcher(input)) {
+ assertThat(m.find()).isEqualTo(shouldMatch);
+ }
+ }
+ }
+
+ // ===== Case Sensitivity Tests =====
+
+ @Test
+ void testCaseSensitiveMatching() {
+ try (Pattern p = RE2.compile("HELLO", true)) {
+ assertThat(p.matches("HELLO")).isTrue();
+ assertThat(p.matches("hello")).isFalse();
+ assertThat(p.matches("HeLLo")).isFalse();
+ }
+ }
+
+ @Test
+ void testCaseInsensitiveMatching() {
+ try (Pattern p = RE2.compile("HELLO", false)) {
+ assertThat(p.matches("HELLO")).isTrue();
+ assertThat(p.matches("hello")).isTrue();
+ assertThat(p.matches("HeLLo")).isTrue();
+ assertThat(p.matches("hElLo")).isTrue();
+ }
+ }
+
+ // ===== Regex Feature Tests =====
+
+ @ParameterizedTest
+ @CsvSource({
+ "\\d+, 123, true",
+ "\\d+, abc, false",
+ "\\w+, hello123, true",
+ "\\w+, !, false",
+ "\\s+, ' ', true",
+ "\\s+, text, false",
+ "[a-z]+, abc, true",
+ "[a-z]+, ABC, false",
+ "[0-9]{3}, 123, true",
+ "[0-9]{3}, 12, false"
+ })
+ void testCharacterClasses(String pattern, String input, boolean shouldMatch) {
+ try (Pattern p = RE2.compile(pattern)) {
+ assertThat(p.matches(input)).isEqualTo(shouldMatch);
+ }
+ }
+
+ @Test
+ void testRepetitionZeroOrMore() {
+ try (Pattern p = RE2.compile("a*")) {
+ assertThat(p.matches("")).isTrue();
+ assertThat(p.matches("a")).isTrue();
+ assertThat(p.matches("aaa")).isTrue();
+ }
+ }
+
+ @Test
+ void testRepetitionOneOrMore() {
+ try (Pattern p = RE2.compile("a+")) {
+ assertThat(p.matches("")).isFalse();
+ assertThat(p.matches("a")).isTrue();
+ assertThat(p.matches("aaa")).isTrue();
+ }
+ }
+
+ @Test
+ void testRepetitionOptional() {
+ try (Pattern p = RE2.compile("a?")) {
+ assertThat(p.matches("")).isTrue();
+ assertThat(p.matches("a")).isTrue();
+ assertThat(p.matches("aa")).isFalse();
+ }
+ }
+
+ @Test
+ void testRepetitionExactCount() {
+ try (Pattern p = RE2.compile("a{2}")) {
+ assertThat(p.matches("aa")).isTrue();
+ assertThat(p.matches("a")).isFalse();
+ assertThat(p.matches("aaa")).isFalse();
+ }
+ }
+
+ @Test
+ void testRepetitionRange() {
+ try (Pattern p = RE2.compile("a{2,4}")) {
+ assertThat(p.matches("a")).isFalse();
+ assertThat(p.matches("aa")).isTrue();
+ assertThat(p.matches("aaa")).isTrue();
+ assertThat(p.matches("aaaa")).isTrue();
+ assertThat(p.matches("aaaaa")).isFalse();
}
+ }
- @Test
- void testEmptyPattern() {
- // Empty patterns are rejected by RE2 wrapper
- assertThatThrownBy(() -> RE2.matches("", ""))
- .isInstanceOf(PatternCompilationException.class);
+ @ParameterizedTest
+ @ValueSource(strings = {"abc|def", "(hello|world)", "cat|dog|bird", "\\d+|\\w+"})
+ void testAlternation(String pattern) {
+ try (Pattern p = RE2.compile(pattern)) {
+ assertThat(p).isNotNull();
}
+ }
- @Test
- void testEmptyInput() {
- try (Pattern p = RE2.compile(".*")) {
- assertThat(p.matches("")).isTrue();
- }
+ @Test
+ void testDotMetacharacter() {
+ try (Pattern p = RE2.compile("a.c")) {
+ assertThat(p.matches("abc")).isTrue();
+ assertThat(p.matches("axc")).isTrue();
+ assertThat(p.matches("ac")).isFalse();
}
+ }
- @ParameterizedTest
- @CsvSource({
- "hello, hello, true",
- "hello, HELLO, false",
- "hello, hello world, false", // Full match requires entire string
- "^hello, hello world, false", // Full match
- "world$, hello world, false" // Full match
- })
- void testFullMatchBehavior(String pattern, String input, boolean shouldMatch) {
- try (Pattern p = RE2.compile(pattern)) {
- assertThat(p.matches(input)).isEqualTo(shouldMatch);
- }
+ @Test
+ void testAnchors() {
+ try (Pattern start = RE2.compile("^hello")) {
+ try (Matcher m = start.matcher("hello world")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = start.matcher("say hello")) {
+ assertThat(m.find()).isFalse();
+ }
}
- @ParameterizedTest
- @CsvSource({
- "hello, hello world, true",
- "world, hello world, true",
- "goodbye, hello world, false",
- "^hello, hello world, true",
- "world$, hello world, true"
- })
- void testPartialMatchBehavior(String pattern, String input, boolean shouldMatch) {
- try (Pattern p = RE2.compile(pattern)) {
- try (Matcher m = p.matcher(input)) {
- assertThat(m.find()).isEqualTo(shouldMatch);
- }
- }
+ try (Pattern end = RE2.compile("world$")) {
+ try (Matcher m = end.matcher("hello world")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = end.matcher("world hello")) {
+ assertThat(m.find()).isFalse();
+ }
}
+ }
- // ===== Case Sensitivity Tests =====
+ // ===== UTF-8 and Special Characters =====
- @Test
- void testCaseSensitiveMatching() {
- try (Pattern p = RE2.compile("HELLO", true)) {
- assertThat(p.matches("HELLO")).isTrue();
- assertThat(p.matches("hello")).isFalse();
- assertThat(p.matches("HeLLo")).isFalse();
- }
+ @Test
+ void testUTF8Characters() {
+ try (Pattern p = RE2.compile("hello")) {
+ assertThat(p.matches("hello")).isTrue();
}
- @Test
- void testCaseInsensitiveMatching() {
- try (Pattern p = RE2.compile("HELLO", false)) {
- assertThat(p.matches("HELLO")).isTrue();
- assertThat(p.matches("hello")).isTrue();
- assertThat(p.matches("HeLLo")).isTrue();
- assertThat(p.matches("hElLo")).isTrue();
- }
+ try (Pattern p = RE2.compile("café")) {
+ assertThat(p.matches("café")).isTrue();
}
- // ===== Regex Feature Tests =====
-
- @ParameterizedTest
- @CsvSource({
- "\\d+, 123, true",
- "\\d+, abc, false",
- "\\w+, hello123, true",
- "\\w+, !, false",
- "\\s+, ' ', true",
- "\\s+, text, false",
- "[a-z]+, abc, true",
- "[a-z]+, ABC, false",
- "[0-9]{3}, 123, true",
- "[0-9]{3}, 12, false"
- })
- void testCharacterClasses(String pattern, String input, boolean shouldMatch) {
- try (Pattern p = RE2.compile(pattern)) {
- assertThat(p.matches(input)).isEqualTo(shouldMatch);
- }
+ try (Pattern p = RE2.compile("日本語")) {
+ assertThat(p.matches("日本語")).isTrue();
}
- @Test
- void testRepetitionZeroOrMore() {
- try (Pattern p = RE2.compile("a*")) {
- assertThat(p.matches("")).isTrue();
- assertThat(p.matches("a")).isTrue();
- assertThat(p.matches("aaa")).isTrue();
- }
+ try (Pattern p = RE2.compile("emoji😀test")) {
+ assertThat(p.matches("emoji😀test")).isTrue();
}
+ }
- @Test
- void testRepetitionOneOrMore() {
- try (Pattern p = RE2.compile("a+")) {
- assertThat(p.matches("")).isFalse();
- assertThat(p.matches("a")).isTrue();
- assertThat(p.matches("aaa")).isTrue();
- }
+ @Test
+ void testSpecialRegexCharacters() {
+ try (Pattern p = RE2.compile("\\(\\)\\[\\]\\{\\}")) {
+ assertThat(p.matches("()[]{}")).isTrue();
}
- @Test
- void testRepetitionOptional() {
- try (Pattern p = RE2.compile("a?")) {
- assertThat(p.matches("")).isTrue();
- assertThat(p.matches("a")).isTrue();
- assertThat(p.matches("aa")).isFalse();
- }
+ try (Pattern p = RE2.compile("\\.\\*\\+\\?")) {
+ assertThat(p.matches(".*+?")).isTrue();
}
+ }
- @Test
- void testRepetitionExactCount() {
- try (Pattern p = RE2.compile("a{2}")) {
- assertThat(p.matches("aa")).isTrue();
- assertThat(p.matches("a")).isFalse();
- assertThat(p.matches("aaa")).isFalse();
- }
- }
+ // ===== Comprehensive Unicode Script Tests =====
- @Test
- void testRepetitionRange() {
- try (Pattern p = RE2.compile("a{2,4}")) {
- assertThat(p.matches("a")).isFalse();
- assertThat(p.matches("aa")).isTrue();
- assertThat(p.matches("aaa")).isTrue();
- assertThat(p.matches("aaaa")).isTrue();
- assertThat(p.matches("aaaaa")).isFalse();
- }
+ @Test
+ void testChineseCharacters() {
+ // Simplified Chinese
+ try (Pattern p = RE2.compile("中文测试")) {
+ assertThat(p.matches("中文测试")).isTrue();
+ assertThat(p.matches("中文")).isFalse();
}
- @ParameterizedTest
- @ValueSource(strings = {
- "abc|def",
- "(hello|world)",
- "cat|dog|bird",
- "\\d+|\\w+"
- })
- void testAlternation(String pattern) {
- try (Pattern p = RE2.compile(pattern)) {
- assertThat(p).isNotNull();
- }
+ // Traditional Chinese
+ try (Pattern p = RE2.compile("繁體中文")) {
+ assertThat(p.matches("繁體中文")).isTrue();
}
- @Test
- void testDotMetacharacter() {
- try (Pattern p = RE2.compile("a.c")) {
- assertThat(p.matches("abc")).isTrue();
- assertThat(p.matches("axc")).isTrue();
- assertThat(p.matches("ac")).isFalse();
- }
+ // Mixed Chinese and ASCII
+ try (Pattern p = RE2.compile("测试123")) {
+ assertThat(p.matches("测试123")).isTrue();
}
- @Test
- void testAnchors() {
- try (Pattern start = RE2.compile("^hello")) {
- try (Matcher m = start.matcher("hello world")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = start.matcher("say hello")) {
- assertThat(m.find()).isFalse();
- }
- }
-
- try (Pattern end = RE2.compile("world$")) {
- try (Matcher m = end.matcher("hello world")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = end.matcher("world hello")) {
- assertThat(m.find()).isFalse();
- }
- }
+ // Chinese in pattern with wildcards
+ try (Pattern p = RE2.compile(".*中文.*")) {
+ try (Matcher m = p.matcher("这是中文文本")) {
+ assertThat(m.find()).isTrue();
+ }
}
- // ===== UTF-8 and Special Characters =====
-
- @Test
- void testUTF8Characters() {
- try (Pattern p = RE2.compile("hello")) {
- assertThat(p.matches("hello")).isTrue();
- }
-
- try (Pattern p = RE2.compile("café")) {
- assertThat(p.matches("café")).isTrue();
- }
-
- try (Pattern p = RE2.compile("日本語")) {
- assertThat(p.matches("日本語")).isTrue();
- }
-
- try (Pattern p = RE2.compile("emoji😀test")) {
- assertThat(p.matches("emoji😀test")).isTrue();
- }
+ // Chinese character class (Unicode range)
+ try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}]+")) {
+ assertThat(p.matches("汉字")).isTrue();
+ assertThat(p.matches("abc")).isFalse();
}
+ }
- @Test
- void testSpecialRegexCharacters() {
- try (Pattern p = RE2.compile("\\(\\)\\[\\]\\{\\}")) {
- assertThat(p.matches("()[]{}")).isTrue();
- }
-
- try (Pattern p = RE2.compile("\\.\\*\\+\\?")) {
- assertThat(p.matches(".*+?")).isTrue();
- }
+ @Test
+ void testArabicCharacters() {
+ // Basic Arabic text
+ try (Pattern p = RE2.compile("مرحبا")) {
+ assertThat(p.matches("مرحبا")).isTrue();
}
- // ===== Comprehensive Unicode Script Tests =====
-
- @Test
- void testChineseCharacters() {
- // Simplified Chinese
- try (Pattern p = RE2.compile("中文测试")) {
- assertThat(p.matches("中文测试")).isTrue();
- assertThat(p.matches("中文")).isFalse();
- }
-
- // Traditional Chinese
- try (Pattern p = RE2.compile("繁體中文")) {
- assertThat(p.matches("繁體中文")).isTrue();
- }
-
- // Mixed Chinese and ASCII
- try (Pattern p = RE2.compile("测试123")) {
- assertThat(p.matches("测试123")).isTrue();
- }
-
- // Chinese in pattern with wildcards
- try (Pattern p = RE2.compile(".*中文.*")) {
- try (Matcher m = p.matcher("这是中文文本")) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Chinese character class (Unicode range)
- try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}]+")) {
- assertThat(p.matches("汉字")).isTrue();
- assertThat(p.matches("abc")).isFalse();
- }
+ // Arabic with diacritics
+ try (Pattern p = RE2.compile("العربية")) {
+ assertThat(p.matches("العربية")).isTrue();
}
- @Test
- void testArabicCharacters() {
- // Basic Arabic text
- try (Pattern p = RE2.compile("مرحبا")) {
- assertThat(p.matches("مرحبا")).isTrue();
- }
-
- // Arabic with diacritics
- try (Pattern p = RE2.compile("العربية")) {
- assertThat(p.matches("العربية")).isTrue();
- }
-
- // Mixed Arabic and numbers
- try (Pattern p = RE2.compile("رقم \\d+")) {
- try (Matcher m = p.matcher("رقم 123")) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Arabic numerals (Eastern Arabic)
- try (Pattern p = RE2.compile("١٢٣")) {
- assertThat(p.matches("١٢٣")).isTrue();
- }
+ // Mixed Arabic and numbers
+ try (Pattern p = RE2.compile("رقم \\d+")) {
+ try (Matcher m = p.matcher("رقم 123")) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testHebrewCharacters() {
- // Basic Hebrew
- try (Pattern p = RE2.compile("שלום")) {
- assertThat(p.matches("שלום")).isTrue();
- }
-
- // Hebrew with vowel points (nikkud)
- try (Pattern p = RE2.compile("עברית")) {
- assertThat(p.matches("עברית")).isTrue();
- }
-
- // Mixed Hebrew and ASCII
- try (Pattern p = RE2.compile("test שלום test")) {
- assertThat(p.matches("test שלום test")).isTrue();
- }
+ // Arabic numerals (Eastern Arabic)
+ try (Pattern p = RE2.compile("١٢٣")) {
+ assertThat(p.matches("١٢٣")).isTrue();
}
+ }
- @Test
- void testGreekCharacters() {
- // Basic Greek
- try (Pattern p = RE2.compile("Ελληνικά")) {
- assertThat(p.matches("Ελληνικά")).isTrue();
- }
-
- // Greek letters commonly used in math/science
- try (Pattern p = RE2.compile("αβγδ")) {
- assertThat(p.matches("αβγδ")).isTrue();
- }
-
- // Greek uppercase
- try (Pattern p = RE2.compile("ΑΒΓΔ")) {
- assertThat(p.matches("ΑΒΓΔ")).isTrue();
- }
-
- // Mixed Greek and math symbols
- try (Pattern p = RE2.compile("π = 3\\.14")) {
- assertThat(p.matches("π = 3.14")).isTrue();
- }
+ @Test
+ void testHebrewCharacters() {
+ // Basic Hebrew
+ try (Pattern p = RE2.compile("שלום")) {
+ assertThat(p.matches("שלום")).isTrue();
}
- @Test
- void testCyrillicCharacters() {
- // Russian
- try (Pattern p = RE2.compile("Привет")) {
- assertThat(p.matches("Привет")).isTrue();
- }
-
- // Ukrainian
- try (Pattern p = RE2.compile("Слава Україні")) {
- assertThat(p.matches("Слава Україні")).isTrue();
- }
-
- // Mixed Cyrillic and Latin (common in technical docs)
- try (Pattern p = RE2.compile("error: Ошибка")) {
- assertThat(p.matches("error: Ошибка")).isTrue();
- }
+ // Hebrew with vowel points (nikkud)
+ try (Pattern p = RE2.compile("עברית")) {
+ assertThat(p.matches("עברית")).isTrue();
}
- @Test
- void testKoreanCharacters() {
- // Hangul
- try (Pattern p = RE2.compile("안녕하세요")) {
- assertThat(p.matches("안녕하세요")).isTrue();
- }
-
- // Mixed Korean and ASCII
- try (Pattern p = RE2.compile("Hello 세계")) {
- assertThat(p.matches("Hello 세계")).isTrue();
- }
-
- // Korean with numbers
- try (Pattern p = RE2.compile("테스트\\d+")) {
- assertThat(p.matches("테스트123")).isTrue();
- }
+ // Mixed Hebrew and ASCII
+ try (Pattern p = RE2.compile("test שלום test")) {
+ assertThat(p.matches("test שלום test")).isTrue();
}
+ }
- @Test
- void testThaiCharacters() {
- // Thai script
- try (Pattern p = RE2.compile("สวัสดี")) {
- assertThat(p.matches("สวัสดี")).isTrue();
- }
-
- // Thai with tone marks
- try (Pattern p = RE2.compile("ภาษาไทย")) {
- assertThat(p.matches("ภาษาไทย")).isTrue();
- }
+ @Test
+ void testGreekCharacters() {
+ // Basic Greek
+ try (Pattern p = RE2.compile("Ελληνικά")) {
+ assertThat(p.matches("Ελληνικά")).isTrue();
}
- @Test
- void testDevanagariCharacters() {
- // Hindi
- try (Pattern p = RE2.compile("नमस्ते")) {
- assertThat(p.matches("नमस्ते")).isTrue();
- }
-
- // Sanskrit
- try (Pattern p = RE2.compile("संस्कृत")) {
- assertThat(p.matches("संस्कृत")).isTrue();
- }
+ // Greek letters commonly used in math/science
+ try (Pattern p = RE2.compile("αβγδ")) {
+ assertThat(p.matches("αβγδ")).isTrue();
}
- @Test
- void testMixedScripts() {
- // Multiple scripts in one pattern
- try (Pattern p = RE2.compile("Hello 世界 مرحبا שלום")) {
- assertThat(p.matches("Hello 世界 مرحبا שלום")).isTrue();
- }
-
- // Technical text with multiple scripts
- try (Pattern p = RE2.compile("Error: 错误 - Ошибка")) {
- assertThat(p.matches("Error: 错误 - Ошибка")).isTrue();
- }
-
- // Product names mixing scripts
- try (Pattern p = RE2.compile("Sony ソニー")) {
- assertThat(p.matches("Sony ソニー")).isTrue();
- }
+ // Greek uppercase
+ try (Pattern p = RE2.compile("ΑΒΓΔ")) {
+ assertThat(p.matches("ΑΒΓΔ")).isTrue();
}
- @Test
- void testUnicodeEmoji() {
- // Basic emoji
- try (Pattern p = RE2.compile("😀😁😂")) {
- assertThat(p.matches("😀😁😂")).isTrue();
- }
-
- // Emoji with text
- try (Pattern p = RE2.compile("Hello 👋 World 🌍")) {
- assertThat(p.matches("Hello 👋 World 🌍")).isTrue();
- }
-
- // Search for emoji in text
- try (Pattern p = RE2.compile("👍")) {
- try (Matcher m = p.matcher("Great job! 👍 Keep going!")) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Emoji sequences (family, flags, etc.)
- try (Pattern p = RE2.compile("🇺🇸")) {
- assertThat(p.matches("🇺🇸")).isTrue();
- }
+ // Mixed Greek and math symbols
+ try (Pattern p = RE2.compile("π = 3\\.14")) {
+ assertThat(p.matches("π = 3.14")).isTrue();
}
+ }
- @Test
- void testSpecialUnicodeSymbols() {
- // Currency symbols
- try (Pattern p = RE2.compile("€\\d+\\.\\d{2}")) {
- assertThat(p.matches("€19.99")).isTrue();
- }
-
- try (Pattern p = RE2.compile("£\\d+")) {
- assertThat(p.matches("£100")).isTrue();
- }
-
- try (Pattern p = RE2.compile("¥\\d+")) {
- assertThat(p.matches("¥1000")).isTrue();
- }
-
- // Math symbols
- try (Pattern p = RE2.compile("∑.*=.*∞")) {
- try (Matcher m = p.matcher("∑x = ∞")) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Copyright and trademark
- try (Pattern p = RE2.compile("©.*®.*™")) {
- try (Matcher m = p.matcher("© 2025 Company® Product™")) {
- assertThat(m.find()).isTrue();
- }
- }
+ @Test
+ void testCyrillicCharacters() {
+ // Russian
+ try (Pattern p = RE2.compile("Привет")) {
+ assertThat(p.matches("Привет")).isTrue();
}
- @Test
- void testUnicodeCaseInsensitive() {
- // German with umlauts (ä, ö, ü have proper case folding)
- try (Pattern p = RE2.compile("münchen", false)) {
- assertThat(p.matches("münchen")).isTrue();
- assertThat(p.matches("MÜNCHEN")).isTrue();
- }
-
- // Greek case insensitive
- try (Pattern p = RE2.compile("ελληνικά", false)) {
- assertThat(p.matches("ελληνικά")).isTrue();
- assertThat(p.matches("ΕΛΛΗΝΙΚΆ")).isTrue();
- }
-
- // Cyrillic case insensitive
- try (Pattern p = RE2.compile("привет", false)) {
- assertThat(p.matches("привет")).isTrue();
- assertThat(p.matches("ПРИВЕТ")).isTrue();
- }
-
- // Note: German ß does NOT fold to SS in RE2 (unlike Java)
- // This is intentional RE2 behavior for correctness
- try (Pattern p = RE2.compile("straße", false)) {
- assertThat(p.matches("straße")).isTrue();
- // STRASSE would NOT match - ß ≠ SS in RE2
- }
+ // Ukrainian
+ try (Pattern p = RE2.compile("Слава Україні")) {
+ assertThat(p.matches("Слава Україні")).isTrue();
}
- @Test
- void testUnicodeInLogProcessing() {
- // Simulating logs with international user data
- String logEntry = "2025-11-17 [INFO] User 田中太郎 (tanaka@example.jp) logged in from 東京";
-
- // Find Japanese name
- try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}\\x{3040}-\\x{309f}\\x{30a0}-\\x{30ff}]+")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find email
- try (Pattern p = RE2.compile("\\w+@[\\w.]+")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Mixed Cyrillic and Latin (common in technical docs)
+ try (Pattern p = RE2.compile("error: Ошибка")) {
+ assertThat(p.matches("error: Ошибка")).isTrue();
}
+ }
- @Test
- void testUnicodeNormalization() {
- // Precomposed vs decomposed (NFC vs NFD)
- // é can be U+00E9 (precomposed) or U+0065 U+0301 (decomposed)
- String precomposed = "café"; // Using precomposed é
-
- try (Pattern p = RE2.compile("café")) {
- assertThat(p.matches(precomposed)).isTrue();
- }
+ @Test
+ void testKoreanCharacters() {
+ // Hangul
+ try (Pattern p = RE2.compile("안녕하세요")) {
+ assertThat(p.matches("안녕하세요")).isTrue();
}
- @Test
- void testRTLScriptMixing() {
- // Right-to-left text mixed with left-to-right
- String mixed = "The word שלום means peace";
-
- try (Pattern p = RE2.compile("שלום")) {
- try (Matcher m = p.matcher(mixed)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Arabic RTL
- String arabicMixed = "Welcome مرحبا to our site";
- try (Pattern p = RE2.compile("مرحبا")) {
- try (Matcher m = p.matcher(arabicMixed)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Mixed Korean and ASCII
+ try (Pattern p = RE2.compile("Hello 세계")) {
+ assertThat(p.matches("Hello 세계")).isTrue();
}
- @Test
- void testUnicodeWordBoundaries() {
- // Word boundaries with CJK (no spaces between words)
- try (Pattern p = RE2.compile("日本")) {
- try (Matcher m = p.matcher("私は日本語を勉強しています")) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Word boundaries with Arabic
- try (Pattern p = RE2.compile("العربية")) {
- try (Matcher m = p.matcher("أنا أتعلم اللغة العربية")) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Korean with numbers
+ try (Pattern p = RE2.compile("테스트\\d+")) {
+ assertThat(p.matches("테스트123")).isTrue();
}
+ }
- @Test
- void testVeryLongUnicodeText() {
- // Large text with mixed scripts
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < 1000; i++) {
- sb.append("Hello 世界 مرحبا שלום Привет 안녕 ");
- }
- String largeText = sb.toString();
-
- // Search in large mixed-script text
- try (Pattern p = RE2.compile("Привет")) {
- try (Matcher m = p.matcher(largeText)) {
- long start = System.currentTimeMillis();
- assertThat(m.find()).isTrue();
- long duration = System.currentTimeMillis() - start;
- assertThat(duration).isLessThan(100); // Should be fast
- }
- }
+ @Test
+ void testThaiCharacters() {
+ // Thai script
+ try (Pattern p = RE2.compile("สวัสดี")) {
+ assertThat(p.matches("สวัสดี")).isTrue();
}
- // ===== Email and URL Pattern Tests =====
-
- @Test
- void testEmailPattern() {
- String emailPattern = "\\w+@\\w+\\.\\w+";
- try (Pattern p = RE2.compile(emailPattern)) {
- assertThat(p.matches("user@example.com")).isTrue();
- assertThat(p.matches("invalid.email")).isFalse();
- assertThat(p.matches("@example.com")).isFalse();
- }
+ // Thai with tone marks
+ try (Pattern p = RE2.compile("ภาษาไทย")) {
+ assertThat(p.matches("ภาษาไทย")).isTrue();
}
+ }
- @Test
- void testURLPattern() {
- String urlPattern = "https?://[\\w.]+(/.*)?";
- try (Pattern p = RE2.compile(urlPattern)) {
- try (Matcher m = p.matcher("https://example.com/path")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = p.matcher("http://test.org")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = p.matcher("ftp://example.com")) {
- assertThat(m.find()).isFalse();
- }
- }
+ @Test
+ void testDevanagariCharacters() {
+ // Hindi
+ try (Pattern p = RE2.compile("नमस्ते")) {
+ assertThat(p.matches("नमस्ते")).isTrue();
}
- // ===== Error Handling Tests =====
-
- @Test
- void testNullPatternThrows() {
- assertThatThrownBy(() -> RE2.compile(null))
- .isInstanceOf(NullPointerException.class);
+ // Sanskrit
+ try (Pattern p = RE2.compile("संस्कृत")) {
+ assertThat(p.matches("संस्कृत")).isTrue();
}
+ }
- @Test
- void testNullInputThrows() {
- try (Pattern p = RE2.compile("test")) {
- assertThatThrownBy(() -> p.matcher(null))
- .isInstanceOf(NullPointerException.class);
- }
+ @Test
+ void testMixedScripts() {
+ // Multiple scripts in one pattern
+ try (Pattern p = RE2.compile("Hello 世界 مرحبا שלום")) {
+ assertThat(p.matches("Hello 世界 مرحبا שלום")).isTrue();
}
- @ParameterizedTest
- @ValueSource(strings = {
- "(unclosed",
- "(?Pempty)",
- "[[invalid"
- })
- void testInvalidPatternThrows(String invalidPattern) {
- assertThatThrownBy(() -> RE2.compile(invalidPattern))
- .isInstanceOf(PatternCompilationException.class)
- .hasMessageContaining("compilation failed");
- }
-
- @Test
- void testPatternCompilationExceptionContainsPattern() {
- try {
- RE2.compile("(unclosed");
- fail("Should have thrown PatternCompilationException");
- } catch (PatternCompilationException e) {
- assertThat(e.getPattern()).isEqualTo("(unclosed");
- assertThat(e.getMessage()).contains("unclosed");
- }
+ // Technical text with multiple scripts
+ try (Pattern p = RE2.compile("Error: 错误 - Ошибка")) {
+ assertThat(p.matches("Error: 错误 - Ошибка")).isTrue();
}
- // ===== Resource Management Tests =====
-
- @Test
- void testPatternClose() {
- // Use compileWithoutCache() to test actual closing
- Pattern p = Pattern.compileWithoutCache("test", true);
- assertThat(p.isClosed()).isFalse();
-
- p.close();
- assertThat(p.isClosed()).isTrue();
+ // Product names mixing scripts
+ try (Pattern p = RE2.compile("Sony ソニー")) {
+ assertThat(p.matches("Sony ソニー")).isTrue();
}
+ }
- @Test
- void testUseAfterClose() {
- // Use compileWithoutCache() to test actual closing
- Pattern p = Pattern.compileWithoutCache("test", true);
- p.close();
-
- assertThatThrownBy(() -> p.matcher("input"))
- .isInstanceOf(IllegalStateException.class)
- .hasMessageContaining("closed");
+ @Test
+ void testUnicodeEmoji() {
+ // Basic emoji
+ try (Pattern p = RE2.compile("😀😁😂")) {
+ assertThat(p.matches("😀😁😂")).isTrue();
}
- @Test
- void testDoubleClose() {
- // Use compileWithoutCache() to test actual closing
- Pattern p = Pattern.compileWithoutCache("test", true);
- p.close();
-
- // Second close should be idempotent (not throw)
- assertThatCode(p::close).doesNotThrowAnyException();
+ // Emoji with text
+ try (Pattern p = RE2.compile("Hello 👋 World 🌍")) {
+ assertThat(p.matches("Hello 👋 World 🌍")).isTrue();
}
- @Test
- void testTryWithResources() {
- // Verify AutoCloseable works correctly with uncached patterns
- Pattern[] holder = new Pattern[1];
-
- try (Pattern p = Pattern.compileWithoutCache("test", true)) {
- holder[0] = p;
- assertThat(p.isClosed()).isFalse();
- }
-
- assertThat(holder[0].isClosed()).isTrue();
+ // Search for emoji in text
+ try (Pattern p = RE2.compile("👍")) {
+ try (Matcher m = p.matcher("Great job! 👍 Keep going!")) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testCachedPatternNotClosedOnClose() {
- // Cached patterns should NOT actually close when close() is called
- Pattern p = RE2.compile("test");
- assertThat(p.isClosed()).isFalse();
-
- p.close(); // This should be a no-op for cached patterns
-
- // Pattern should still not be closed (cache manages it)
- assertThat(p.isClosed()).isFalse();
+ // Emoji sequences (family, flags, etc.)
+ try (Pattern p = RE2.compile("🇺🇸")) {
+ assertThat(p.matches("🇺🇸")).isTrue();
}
+ }
- @Test
- void testNestedTryWithResources() {
- try (Pattern p = RE2.compile("test")) {
- try (Matcher m = p.matcher("test")) {
- assertThat(m.matches()).isTrue();
- }
- }
+ @Test
+ void testSpecialUnicodeSymbols() {
+ // Currency symbols
+ try (Pattern p = RE2.compile("€\\d+\\.\\d{2}")) {
+ assertThat(p.matches("€19.99")).isTrue();
}
- @Test
- void testMultiplePatternsIndependent() {
- try (Pattern p1 = RE2.compile("pattern1");
- Pattern p2 = RE2.compile("pattern2");
- Pattern p3 = RE2.compile("pattern3")) {
-
- assertThat(p1.matches("pattern1")).isTrue();
- assertThat(p2.matches("pattern2")).isTrue();
- assertThat(p3.matches("pattern3")).isTrue();
-
- assertThat(p1.matches("pattern2")).isFalse();
- }
+ try (Pattern p = RE2.compile("£\\d+")) {
+ assertThat(p.matches("£100")).isTrue();
}
- // ===== Complex Pattern Tests =====
-
- @Test
- void testIPv4Pattern() {
- String ipPattern = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}";
- try (Pattern p = RE2.compile(ipPattern)) {
- assertThat(p.matches("192.168.1.1")).isTrue();
- assertThat(p.matches("10.0.0.1")).isTrue();
- assertThat(p.matches("999.999.999.999")).isTrue(); // Matches pattern, not valid IP
- assertThat(p.matches("192.168.1")).isFalse();
- }
+ try (Pattern p = RE2.compile("¥\\d+")) {
+ assertThat(p.matches("¥1000")).isTrue();
}
- @Test
- void testDatePattern() {
- String datePattern = "\\d{4}-\\d{2}-\\d{2}";
- try (Pattern p = RE2.compile(datePattern)) {
- assertThat(p.matches("2025-11-17")).isTrue();
- assertThat(p.matches("2025-1-17")).isFalse();
- assertThat(p.matches("25-11-17")).isFalse();
- }
+ // Math symbols
+ try (Pattern p = RE2.compile("∑.*=.*∞")) {
+ try (Matcher m = p.matcher("∑x = ∞")) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testPhoneNumberPattern() {
- String phonePattern = "\\(?\\d{3}\\)?[- ]?\\d{3}[- ]?\\d{4}";
- try (Pattern p = RE2.compile(phonePattern)) {
- try (Matcher m = p.matcher("(555) 123-4567")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = p.matcher("555-123-4567")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = p.matcher("5551234567")) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Copyright and trademark
+ try (Pattern p = RE2.compile("©.*®.*™")) {
+ try (Matcher m = p.matcher("© 2025 Company® Product™")) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- @Test
- void testComplexAlternation() {
- try (Pattern p = RE2.compile("(cat|dog|bird|fish)")) {
- assertThat(p.matches("cat")).isTrue();
- assertThat(p.matches("dog")).isTrue();
- assertThat(p.matches("fish")).isTrue();
- assertThat(p.matches("cow")).isFalse();
- }
+ @Test
+ void testUnicodeCaseInsensitive() {
+ // German with umlauts (ä, ö, ü have proper case folding)
+ try (Pattern p = RE2.compile("münchen", false)) {
+ assertThat(p.matches("münchen")).isTrue();
+ assertThat(p.matches("MÜNCHEN")).isTrue();
}
- @Test
- void testNestedGroups() {
- try (Pattern p = RE2.compile("((a|b)(c|d))")) {
- assertThat(p.matches("ac")).isTrue();
- assertThat(p.matches("ad")).isTrue();
- assertThat(p.matches("bc")).isTrue();
- assertThat(p.matches("bd")).isTrue();
- assertThat(p.matches("ab")).isFalse();
- }
+ // Greek case insensitive
+ try (Pattern p = RE2.compile("ελληνικά", false)) {
+ assertThat(p.matches("ελληνικά")).isTrue();
+ assertThat(p.matches("ΕΛΛΗΝΙΚΆ")).isTrue();
}
- // ===== Edge Cases =====
-
- @Test
- void testVeryLongPattern() {
- String longPattern = "a".repeat(1000);
- try (Pattern p = RE2.compile(longPattern)) {
- assertThat(p.matches(longPattern)).isTrue();
- assertThat(p.matches("a".repeat(999))).isFalse();
- }
+ // Cyrillic case insensitive
+ try (Pattern p = RE2.compile("привет", false)) {
+ assertThat(p.matches("привет")).isTrue();
+ assertThat(p.matches("ПРИВЕТ")).isTrue();
}
- @Test
- void testVeryLongInput() {
- String longInput = "x".repeat(10000) + "needle" + "y".repeat(10000);
- try (Pattern p = RE2.compile("needle")) {
- try (Matcher m = p.matcher(longInput)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Note: German ß does NOT fold to SS in RE2 (unlike Java)
+ // This is intentional RE2 behavior for correctness
+ try (Pattern p = RE2.compile("straße", false)) {
+ assertThat(p.matches("straße")).isTrue();
+ // STRASSE would NOT match - ß ≠ SS in RE2
}
+ }
- @Test
- void testPatternWithManyAlternatives() {
- StringBuilder pattern = new StringBuilder();
- for (int i = 0; i < 100; i++) {
- if (i > 0) pattern.append("|");
- pattern.append("word").append(i);
- }
+ @Test
+ void testUnicodeInLogProcessing() {
+ // Simulating logs with international user data
+ String logEntry = "2025-11-17 [INFO] User 田中太郎 (tanaka@example.jp) logged in from 東京";
- try (Pattern p = RE2.compile(pattern.toString())) {
- assertThat(p.matches("word50")).isTrue();
- assertThat(p.matches("word99")).isTrue();
- assertThat(p.matches("word100")).isFalse();
- }
+ // Find Japanese name
+ try (Pattern p = RE2.compile("[\\x{4e00}-\\x{9fff}\\x{3040}-\\x{309f}\\x{30a0}-\\x{30ff}]+")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testDeepNesting() {
- String pattern = "((((((((((a))))))))))";
- try (Pattern p = RE2.compile(pattern)) {
- assertThat(p.matches("a")).isTrue();
- }
+ // Find email
+ try (Pattern p = RE2.compile("\\w+@[\\w.]+")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- // ===== Whitespace and Special Input Tests =====
+ @Test
+ void testUnicodeNormalization() {
+ // Precomposed vs decomposed (NFC vs NFD)
+ // é can be U+00E9 (precomposed) or U+0065 U+0301 (decomposed)
+ String precomposed = "café"; // Using precomposed é
- @Test
- void testWhitespaceMatching() {
- try (Pattern p = RE2.compile("\\s+")) {
- assertThat(p.matches(" ")).isTrue();
- assertThat(p.matches("\t\n")).isTrue();
- assertThat(p.matches("text")).isFalse();
- }
+ try (Pattern p = RE2.compile("café")) {
+ assertThat(p.matches(precomposed)).isTrue();
}
+ }
- @Test
- void testNewlinesInInput() {
- try (Pattern p = RE2.compile("hello")) {
- try (Matcher m = p.matcher("hello\nworld")) {
- assertThat(m.find()).isTrue();
- }
- }
- }
+ @Test
+ void testRTLScriptMixing() {
+ // Right-to-left text mixed with left-to-right
+ String mixed = "The word שלום means peace";
- @Test
- void testTabsInInput() {
- try (Pattern p = RE2.compile("hello\tworld")) {
- assertThat(p.matches("hello\tworld")).isTrue();
- }
+ try (Pattern p = RE2.compile("שלום")) {
+ try (Matcher m = p.matcher(mixed)) {
+ assertThat(m.find()).isTrue();
+ }
}
- // ===== Resource Leak Tests =====
-
- @Test
- void testManyPatternsNoLeak() {
- // Compile and close many patterns (tests resource cleanup)
- for (int i = 0; i < 1000; i++) {
- try (Pattern p = RE2.compile("pattern" + i)) {
- assertThat(p).isNotNull();
- }
- }
+ // Arabic RTL
+ String arabicMixed = "Welcome مرحبا to our site";
+ try (Pattern p = RE2.compile("مرحبا")) {
+ try (Matcher m = p.matcher(arabicMixed)) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- @Test
- void testManyMatchersNoLeak() {
- try (Pattern p = RE2.compile("test")) {
- for (int i = 0; i < 1000; i++) {
- try (Matcher m = p.matcher("test" + i)) {
- m.matches();
- }
- }
- }
- }
-
- // ===== Concurrent Access Tests =====
+ @Test
+ void testUnicodeWordBoundaries() {
+ // Word boundaries with CJK (no spaces between words)
+ try (Pattern p = RE2.compile("日本")) {
+ try (Matcher m = p.matcher("私は日本語を勉強しています")) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Word boundaries with Arabic
+ try (Pattern p = RE2.compile("العربية")) {
+ try (Matcher m = p.matcher("أنا أتعلم اللغة العربية")) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+ }
+
+ @Test
+ void testVeryLongUnicodeText() {
+ // Large text with mixed scripts
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < 1000; i++) {
+ sb.append("Hello 世界 مرحبا שלום Привет 안녕 ");
+ }
+ String largeText = sb.toString();
+
+ // Search in large mixed-script text
+ try (Pattern p = RE2.compile("Привет")) {
+ try (Matcher m = p.matcher(largeText)) {
+ long start = System.currentTimeMillis();
+ assertThat(m.find()).isTrue();
+ long duration = System.currentTimeMillis() - start;
+ assertThat(duration).isLessThan(100); // Should be fast
+ }
+ }
+ }
+
+ // ===== Email and URL Pattern Tests =====
+
+ @Test
+ void testEmailPattern() {
+ String emailPattern = "\\w+@\\w+\\.\\w+";
+ try (Pattern p = RE2.compile(emailPattern)) {
+ assertThat(p.matches("user@example.com")).isTrue();
+ assertThat(p.matches("invalid.email")).isFalse();
+ assertThat(p.matches("@example.com")).isFalse();
+ }
+ }
+
+ @Test
+ void testURLPattern() {
+ String urlPattern = "https?://[\\w.]+(/.*)?";
+ try (Pattern p = RE2.compile(urlPattern)) {
+ try (Matcher m = p.matcher("https://example.com/path")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = p.matcher("http://test.org")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = p.matcher("ftp://example.com")) {
+ assertThat(m.find()).isFalse();
+ }
+ }
+ }
+
+ // ===== Error Handling Tests =====
+
+ @Test
+ void testNullPatternThrows() {
+ assertThatThrownBy(() -> RE2.compile(null)).isInstanceOf(NullPointerException.class);
+ }
+
+ @Test
+ void testNullInputThrows() {
+ try (Pattern p = RE2.compile("test")) {
+ assertThatThrownBy(() -> p.matcher(null)).isInstanceOf(NullPointerException.class);
+ }
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"(unclosed", "(?Pempty)", "[[invalid"})
+ void testInvalidPatternThrows(String invalidPattern) {
+ assertThatThrownBy(() -> RE2.compile(invalidPattern))
+ .isInstanceOf(PatternCompilationException.class)
+ .hasMessageContaining("compilation failed");
+ }
+
+ @Test
+ void testPatternCompilationExceptionContainsPattern() {
+ try {
+ RE2.compile("(unclosed");
+ fail("Should have thrown PatternCompilationException");
+ } catch (PatternCompilationException e) {
+ assertThat(e.getPattern()).isEqualTo("(unclosed");
+ assertThat(e.getMessage()).contains("unclosed");
+ }
+ }
+
+ // ===== Resource Management Tests =====
+
+ @Test
+ void testPatternClose() {
+ // Use compileWithoutCache() to test actual closing
+ Pattern p = Pattern.compileWithoutCache("test", true);
+ assertThat(p.isClosed()).isFalse();
+
+ p.close();
+ assertThat(p.isClosed()).isTrue();
+ }
+
+ @Test
+ void testUseAfterClose() {
+ // Use compileWithoutCache() to test actual closing
+ Pattern p = Pattern.compileWithoutCache("test", true);
+ p.close();
+
+ assertThatThrownBy(() -> p.matcher("input"))
+ .isInstanceOf(IllegalStateException.class)
+ .hasMessageContaining("closed");
+ }
+
+ @Test
+ void testDoubleClose() {
+ // Use compileWithoutCache() to test actual closing
+ Pattern p = Pattern.compileWithoutCache("test", true);
+ p.close();
+
+ // Second close should be idempotent (not throw)
+ assertThatCode(p::close).doesNotThrowAnyException();
+ }
+
+ @Test
+ void testTryWithResources() {
+ // Verify AutoCloseable works correctly with uncached patterns
+ Pattern[] holder = new Pattern[1];
+
+ try (Pattern p = Pattern.compileWithoutCache("test", true)) {
+ holder[0] = p;
+ assertThat(p.isClosed()).isFalse();
+ }
+
+ assertThat(holder[0].isClosed()).isTrue();
+ }
+
+ @Test
+ void testCachedPatternNotClosedOnClose() {
+ // Cached patterns should NOT actually close when close() is called
+ Pattern p = RE2.compile("test");
+ assertThat(p.isClosed()).isFalse();
+
+ p.close(); // This should be a no-op for cached patterns
+
+ // Pattern should still not be closed (cache manages it)
+ assertThat(p.isClosed()).isFalse();
+ }
+
+ @Test
+ void testNestedTryWithResources() {
+ try (Pattern p = RE2.compile("test")) {
+ try (Matcher m = p.matcher("test")) {
+ assertThat(m.matches()).isTrue();
+ }
+ }
+ }
+
+ @Test
+ void testMultiplePatternsIndependent() {
+ try (Pattern p1 = RE2.compile("pattern1");
+ Pattern p2 = RE2.compile("pattern2");
+ Pattern p3 = RE2.compile("pattern3")) {
+
+ assertThat(p1.matches("pattern1")).isTrue();
+ assertThat(p2.matches("pattern2")).isTrue();
+ assertThat(p3.matches("pattern3")).isTrue();
+
+ assertThat(p1.matches("pattern2")).isFalse();
+ }
+ }
+
+ // ===== Complex Pattern Tests =====
+
+ @Test
+ void testIPv4Pattern() {
+ String ipPattern = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}";
+ try (Pattern p = RE2.compile(ipPattern)) {
+ assertThat(p.matches("192.168.1.1")).isTrue();
+ assertThat(p.matches("10.0.0.1")).isTrue();
+ assertThat(p.matches("999.999.999.999")).isTrue(); // Matches pattern, not valid IP
+ assertThat(p.matches("192.168.1")).isFalse();
+ }
+ }
+
+ @Test
+ void testDatePattern() {
+ String datePattern = "\\d{4}-\\d{2}-\\d{2}";
+ try (Pattern p = RE2.compile(datePattern)) {
+ assertThat(p.matches("2025-11-17")).isTrue();
+ assertThat(p.matches("2025-1-17")).isFalse();
+ assertThat(p.matches("25-11-17")).isFalse();
+ }
+ }
+
+ @Test
+ void testPhoneNumberPattern() {
+ String phonePattern = "\\(?\\d{3}\\)?[- ]?\\d{3}[- ]?\\d{4}";
+ try (Pattern p = RE2.compile(phonePattern)) {
+ try (Matcher m = p.matcher("(555) 123-4567")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = p.matcher("555-123-4567")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = p.matcher("5551234567")) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+ }
+
+ @Test
+ void testComplexAlternation() {
+ try (Pattern p = RE2.compile("(cat|dog|bird|fish)")) {
+ assertThat(p.matches("cat")).isTrue();
+ assertThat(p.matches("dog")).isTrue();
+ assertThat(p.matches("fish")).isTrue();
+ assertThat(p.matches("cow")).isFalse();
+ }
+ }
+
+ @Test
+ void testNestedGroups() {
+ try (Pattern p = RE2.compile("((a|b)(c|d))")) {
+ assertThat(p.matches("ac")).isTrue();
+ assertThat(p.matches("ad")).isTrue();
+ assertThat(p.matches("bc")).isTrue();
+ assertThat(p.matches("bd")).isTrue();
+ assertThat(p.matches("ab")).isFalse();
+ }
+ }
+
+ // ===== Edge Cases =====
+
+ @Test
+ void testVeryLongPattern() {
+ String longPattern = "a".repeat(1000);
+ try (Pattern p = RE2.compile(longPattern)) {
+ assertThat(p.matches(longPattern)).isTrue();
+ assertThat(p.matches("a".repeat(999))).isFalse();
+ }
+ }
+
+ @Test
+ void testVeryLongInput() {
+ String longInput = "x".repeat(10000) + "needle" + "y".repeat(10000);
+ try (Pattern p = RE2.compile("needle")) {
+ try (Matcher m = p.matcher(longInput)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+ }
+
+ @Test
+ void testPatternWithManyAlternatives() {
+ StringBuilder pattern = new StringBuilder();
+ for (int i = 0; i < 100; i++) {
+ if (i > 0) pattern.append("|");
+ pattern.append("word").append(i);
+ }
+
+ try (Pattern p = RE2.compile(pattern.toString())) {
+ assertThat(p.matches("word50")).isTrue();
+ assertThat(p.matches("word99")).isTrue();
+ assertThat(p.matches("word100")).isFalse();
+ }
+ }
+
+ @Test
+ void testDeepNesting() {
+ String pattern = "((((((((((a))))))))))";
+ try (Pattern p = RE2.compile(pattern)) {
+ assertThat(p.matches("a")).isTrue();
+ }
+ }
+
+ // ===== Whitespace and Special Input Tests =====
+
+ @Test
+ void testWhitespaceMatching() {
+ try (Pattern p = RE2.compile("\\s+")) {
+ assertThat(p.matches(" ")).isTrue();
+ assertThat(p.matches("\t\n")).isTrue();
+ assertThat(p.matches("text")).isFalse();
+ }
+ }
+
+ @Test
+ void testNewlinesInInput() {
+ try (Pattern p = RE2.compile("hello")) {
+ try (Matcher m = p.matcher("hello\nworld")) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+ }
+
+ @Test
+ void testTabsInInput() {
+ try (Pattern p = RE2.compile("hello\tworld")) {
+ assertThat(p.matches("hello\tworld")).isTrue();
+ }
+ }
+
+ // ===== Resource Leak Tests =====
+
+ @Test
+ void testManyPatternsNoLeak() {
+ // Compile and close many patterns (tests resource cleanup)
+ for (int i = 0; i < 1000; i++) {
+ try (Pattern p = RE2.compile("pattern" + i)) {
+ assertThat(p).isNotNull();
+ }
+ }
+ }
+
+ @Test
+ void testManyMatchersNoLeak() {
+ try (Pattern p = RE2.compile("test")) {
+ for (int i = 0; i < 1000; i++) {
+ try (Matcher m = p.matcher("test" + i)) {
+ m.matches();
+ }
+ }
+ }
+ }
+
+ // ===== Concurrent Access Tests =====
- @Test
- void testConcurrentPatternCompilation() throws InterruptedException {
- int threadCount = 10;
- int patternsPerThread = 100;
- CountDownLatch latch = new CountDownLatch(threadCount);
- AtomicInteger errors = new AtomicInteger(0);
-
- List threads = new ArrayList<>();
- for (int t = 0; t < threadCount; t++) {
- int threadId = t;
- Thread thread = new Thread(() -> {
+ @Test
+ void testConcurrentPatternCompilation() throws InterruptedException {
+ int threadCount = 10;
+ int patternsPerThread = 100;
+ CountDownLatch latch = new CountDownLatch(threadCount);
+ AtomicInteger errors = new AtomicInteger(0);
+
+ List threads = new ArrayList<>();
+ for (int t = 0; t < threadCount; t++) {
+ int threadId = t;
+ Thread thread =
+ new Thread(
+ () -> {
try {
- for (int i = 0; i < patternsPerThread; i++) {
- try (Pattern p = RE2.compile("thread" + threadId + "pattern" + i)) {
- p.matches("thread" + threadId + "pattern" + i);
- }
+ for (int i = 0; i < patternsPerThread; i++) {
+ try (Pattern p = RE2.compile("thread" + threadId + "pattern" + i)) {
+ p.matches("thread" + threadId + "pattern" + i);
}
+ }
} catch (Exception e) {
- errors.incrementAndGet();
+ errors.incrementAndGet();
} finally {
- latch.countDown();
+ latch.countDown();
}
- });
- threads.add(thread);
- thread.start();
- }
-
- latch.await();
- assertThat(errors.get()).isEqualTo(0);
+ });
+ threads.add(thread);
+ thread.start();
}
- // ===== Pattern Properties Tests =====
+ latch.await();
+ assertThat(errors.get()).isEqualTo(0);
+ }
- @Test
- void testPatternProperties() {
- try (Pattern p = RE2.compile("test.*pattern", false)) {
- assertThat(p.pattern()).isEqualTo("test.*pattern");
- assertThat(p.isCaseSensitive()).isFalse();
- assertThat(p.isClosed()).isFalse();
- }
+ // ===== Pattern Properties Tests =====
+
+ @Test
+ void testPatternProperties() {
+ try (Pattern p = RE2.compile("test.*pattern", false)) {
+ assertThat(p.pattern()).isEqualTo("test.*pattern");
+ assertThat(p.isCaseSensitive()).isFalse();
+ assertThat(p.isClosed()).isFalse();
}
+ }
- @Test
- void testMatcherProperties() {
- try (Pattern p = RE2.compile("test")) {
- try (Matcher m = p.matcher("input")) {
- assertThat(m.pattern()).isSameAs(p);
- assertThat(m.input()).isEqualTo("input");
- }
- }
+ @Test
+ void testMatcherProperties() {
+ try (Pattern p = RE2.compile("test")) {
+ try (Matcher m = p.matcher("input")) {
+ assertThat(m.pattern()).isSameAs(p);
+ assertThat(m.input()).isEqualTo("input");
+ }
}
+ }
- // ===== ReDoS Safety Tests =====
+ // ===== ReDoS Safety Tests =====
- @Test
- void testReDoSSafePatterns() {
- // Patterns that would cause catastrophic backtracking in Java regex
- // RE2 handles these in linear time
+ @Test
+ void testReDoSSafePatterns() {
+ // Patterns that would cause catastrophic backtracking in Java regex
+ // RE2 handles these in linear time
- String[] redosPatterns = {
- "(a+)+b",
- "(a*)*b",
- "(a|a)*b",
- "(a|ab)*c"
- };
+ String[] redosPatterns = {"(a+)+b", "(a*)*b", "(a|a)*b", "(a|ab)*c"};
- for (String pattern : redosPatterns) {
- try (Pattern p = RE2.compile(pattern)) {
- // These should complete quickly (RE2 is linear time)
- // In Java regex, these would hang on long non-matching input
- String input = "a".repeat(100) + "x";
+ for (String pattern : redosPatterns) {
+ try (Pattern p = RE2.compile(pattern)) {
+ // These should complete quickly (RE2 is linear time)
+ // In Java regex, these would hang on long non-matching input
+ String input = "a".repeat(100) + "x";
- try (Matcher m = p.matcher(input)) {
- long start = System.currentTimeMillis();
- boolean matches = m.find();
- long duration = System.currentTimeMillis() - start;
+ try (Matcher m = p.matcher(input)) {
+ long start = System.currentTimeMillis();
+ boolean matches = m.find();
+ long duration = System.currentTimeMillis() - start;
- // Should complete in milliseconds, not seconds
- assertThat(duration).isLessThan(100);
- }
- }
+ // Should complete in milliseconds, not seconds
+ assertThat(duration).isLessThan(100);
}
+ }
}
+ }
- // ===== toString() Tests =====
+ // ===== toString() Tests =====
- @Test
- void testToStringDoesNotThrow() {
- try (Pattern p = RE2.compile("test")) {
- assertThat(p.toString()).isNotNull();
+ @Test
+ void testToStringDoesNotThrow() {
+ try (Pattern p = RE2.compile("test")) {
+ assertThat(p.toString()).isNotNull();
- try (Matcher m = p.matcher("input")) {
- assertThat(m.toString()).isNotNull();
- }
- }
+ try (Matcher m = p.matcher("input")) {
+ assertThat(m.toString()).isNotNull();
+ }
}
+ }
- // ===== Large Scale Tests =====
+ // ===== Large Scale Tests =====
- @Test
- void testManyDifferentPatterns() {
- // Test each pattern with matching and non-matching input
- try (Pattern p = RE2.compile("\\d+")) {
- assertThat(p.matches("123")).isTrue();
- assertThat(p.matches("abc")).isFalse();
- }
+ @Test
+ void testManyDifferentPatterns() {
+ // Test each pattern with matching and non-matching input
+ try (Pattern p = RE2.compile("\\d+")) {
+ assertThat(p.matches("123")).isTrue();
+ assertThat(p.matches("abc")).isFalse();
+ }
- try (Pattern p = RE2.compile("\\w+")) {
- assertThat(p.matches("hello123")).isTrue();
- assertThat(p.matches("!!!")).isFalse();
- }
+ try (Pattern p = RE2.compile("\\w+")) {
+ assertThat(p.matches("hello123")).isTrue();
+ assertThat(p.matches("!!!")).isFalse();
+ }
- try (Pattern p = RE2.compile("\\s+")) {
- assertThat(p.matches(" ")).isTrue();
- assertThat(p.matches("text")).isFalse();
- }
+ try (Pattern p = RE2.compile("\\s+")) {
+ assertThat(p.matches(" ")).isTrue();
+ assertThat(p.matches("text")).isFalse();
+ }
- try (Pattern p = RE2.compile("[a-z]+")) {
- assertThat(p.matches("abc")).isTrue();
- assertThat(p.matches("ABC")).isFalse();
- }
+ try (Pattern p = RE2.compile("[a-z]+")) {
+ assertThat(p.matches("abc")).isTrue();
+ assertThat(p.matches("ABC")).isFalse();
+ }
- try (Pattern p = RE2.compile("[A-Z]+")) {
- assertThat(p.matches("ABC")).isTrue();
- assertThat(p.matches("abc")).isFalse();
- }
+ try (Pattern p = RE2.compile("[A-Z]+")) {
+ assertThat(p.matches("ABC")).isTrue();
+ assertThat(p.matches("abc")).isFalse();
+ }
- try (Pattern p = RE2.compile(".*")) {
- assertThat(p.matches("anything")).isTrue();
- assertThat(p.matches("")).isTrue();
- }
+ try (Pattern p = RE2.compile(".*")) {
+ assertThat(p.matches("anything")).isTrue();
+ assertThat(p.matches("")).isTrue();
+ }
- try (Pattern p = RE2.compile(".+")) {
- assertThat(p.matches("something")).isTrue();
- assertThat(p.matches("")).isFalse();
- }
+ try (Pattern p = RE2.compile(".+")) {
+ assertThat(p.matches("something")).isTrue();
+ assertThat(p.matches("")).isFalse();
+ }
- try (Pattern p = RE2.compile("a*")) {
- assertThat(p.matches("aaa")).isTrue();
- assertThat(p.matches("")).isTrue();
- assertThat(p.matches("b")).isFalse();
- }
+ try (Pattern p = RE2.compile("a*")) {
+ assertThat(p.matches("aaa")).isTrue();
+ assertThat(p.matches("")).isTrue();
+ assertThat(p.matches("b")).isFalse();
+ }
- try (Pattern p = RE2.compile("a+")) {
- assertThat(p.matches("aaa")).isTrue();
- assertThat(p.matches("")).isFalse();
- }
+ try (Pattern p = RE2.compile("a+")) {
+ assertThat(p.matches("aaa")).isTrue();
+ assertThat(p.matches("")).isFalse();
+ }
- try (Pattern p = RE2.compile("^start")) {
- try (Matcher m = p.matcher("start here")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = p.matcher("here start")) {
- assertThat(m.find()).isFalse();
- }
- }
+ try (Pattern p = RE2.compile("^start")) {
+ try (Matcher m = p.matcher("start here")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = p.matcher("here start")) {
+ assertThat(m.find()).isFalse();
+ }
+ }
- try (Pattern p = RE2.compile("end$")) {
- try (Matcher m = p.matcher("at the end")) {
- assertThat(m.find()).isTrue();
- }
- try (Matcher m = p.matcher("end here")) {
- assertThat(m.find()).isFalse();
- }
- }
+ try (Pattern p = RE2.compile("end$")) {
+ try (Matcher m = p.matcher("at the end")) {
+ assertThat(m.find()).isTrue();
+ }
+ try (Matcher m = p.matcher("end here")) {
+ assertThat(m.find()).isFalse();
+ }
+ }
- try (Pattern p = RE2.compile("^exact$")) {
- assertThat(p.matches("exact")).isTrue();
- assertThat(p.matches("exact ")).isFalse();
- assertThat(p.matches(" exact")).isFalse();
- }
+ try (Pattern p = RE2.compile("^exact$")) {
+ assertThat(p.matches("exact")).isTrue();
+ assertThat(p.matches("exact ")).isFalse();
+ assertThat(p.matches(" exact")).isFalse();
+ }
- try (Pattern p = RE2.compile("hello|world")) {
- assertThat(p.matches("hello")).isTrue();
- assertThat(p.matches("world")).isTrue();
- assertThat(p.matches("goodbye")).isFalse();
- }
+ try (Pattern p = RE2.compile("hello|world")) {
+ assertThat(p.matches("hello")).isTrue();
+ assertThat(p.matches("world")).isTrue();
+ assertThat(p.matches("goodbye")).isFalse();
+ }
- try (Pattern p = RE2.compile("(cat|dog)")) {
- assertThat(p.matches("cat")).isTrue();
- assertThat(p.matches("dog")).isTrue();
- assertThat(p.matches("bird")).isFalse();
- }
+ try (Pattern p = RE2.compile("(cat|dog)")) {
+ assertThat(p.matches("cat")).isTrue();
+ assertThat(p.matches("dog")).isTrue();
+ assertThat(p.matches("bird")).isFalse();
+ }
- try (Pattern p = RE2.compile("\\d{3}-\\d{4}")) {
- assertThat(p.matches("123-4567")).isTrue();
- assertThat(p.matches("12-4567")).isFalse();
- }
+ try (Pattern p = RE2.compile("\\d{3}-\\d{4}")) {
+ assertThat(p.matches("123-4567")).isTrue();
+ assertThat(p.matches("12-4567")).isFalse();
+ }
- try (Pattern p = RE2.compile("\\w+@\\w+\\.\\w+")) {
- assertThat(p.matches("user@example.com")).isTrue();
- assertThat(p.matches("invalid")).isFalse();
- }
+ try (Pattern p = RE2.compile("\\w+@\\w+\\.\\w+")) {
+ assertThat(p.matches("user@example.com")).isTrue();
+ assertThat(p.matches("invalid")).isFalse();
}
+ }
- @Test
- void testQuickSuccessiveOperations() {
- // Test rapid pattern creation and matching (stress test)
- for (int i = 0; i < 100; i++) {
- boolean matches = RE2.matches("test" + i, "test" + i);
- assertThat(matches).isTrue();
+ @Test
+ void testQuickSuccessiveOperations() {
+ // Test rapid pattern creation and matching (stress test)
+ for (int i = 0; i < 100; i++) {
+ boolean matches = RE2.matches("test" + i, "test" + i);
+ assertThat(matches).isTrue();
- // Also verify non-match
- matches = RE2.matches("test" + i, "different" + i);
- assertThat(matches).isFalse();
- }
+ // Also verify non-match
+ matches = RE2.matches("test" + i, "different" + i);
+ assertThat(matches).isFalse();
}
+ }
- @Test
- void testPatternReuseManyTimes() {
- // Test a single pattern used many times
- try (Pattern p = RE2.compile("\\d+")) {
- for (int i = 0; i < 1000; i++) {
- assertThat(p.matches(String.valueOf(i))).isTrue();
- assertThat(p.matches("text" + i)).isFalse();
- }
- }
+ @Test
+ void testPatternReuseManyTimes() {
+ // Test a single pattern used many times
+ try (Pattern p = RE2.compile("\\d+")) {
+ for (int i = 0; i < 1000; i++) {
+ assertThat(p.matches(String.valueOf(i))).isTrue();
+ assertThat(p.matches("text" + i)).isFalse();
+ }
}
+ }
- // ===== Log Processing Tests (Real-World Use Case) =====
+ // ===== Log Processing Tests (Real-World Use Case) =====
- @Test
- void testTypicalLogEntry() {
- String logEntry = "2025-11-17 10:30:45.123 [INFO] com.example.Service - Processing request id=12345 user=admin@example.com duration=250ms status=200";
+ @Test
+ void testTypicalLogEntry() {
+ String logEntry =
+ "2025-11-17 10:30:45.123 [INFO] com.example.Service - Processing request id=12345"
+ + " user=admin@example.com duration=250ms status=200";
- // Find timestamp
- try (Pattern p = RE2.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find timestamp
+ try (Pattern p = RE2.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find log level
- try (Pattern p = RE2.compile("\\[(INFO|WARN|ERROR|DEBUG)\\]")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find log level
+ try (Pattern p = RE2.compile("\\[(INFO|WARN|ERROR|DEBUG)\\]")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find user email
- try (Pattern p = RE2.compile("user=\\w+@[\\w.]+")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find user email
+ try (Pattern p = RE2.compile("user=\\w+@[\\w.]+")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find duration
- try (Pattern p = RE2.compile("duration=\\d+ms")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find duration
+ try (Pattern p = RE2.compile("duration=\\d+ms")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find request ID
- try (Pattern p = RE2.compile("id=\\d+")) {
- try (Matcher m = p.matcher(logEntry)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find request ID
+ try (Pattern p = RE2.compile("id=\\d+")) {
+ try (Matcher m = p.matcher(logEntry)) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- @Test
- void testMultiLineLogEntry() {
- String multiLineLog = """
+ @Test
+ void testMultiLineLogEntry() {
+ String multiLineLog =
+ """
2025-11-17 10:30:45.123 [ERROR] com.example.Service - Request failed
java.lang.NullPointerException: Cannot invoke method on null object
at com.example.Service.processRequest(Service.java:123)
@@ -1165,418 +1148,442 @@ void testMultiLineLogEntry() {
... 10 more
""";
- // Find error class
- try (Pattern p = RE2.compile("java\\.lang\\.\\w+Exception")) {
- try (Matcher m = p.matcher(multiLineLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find stack trace lines
- try (Pattern p = RE2.compile("at [\\w.$]+\\([\\w.]+:\\d+\\)")) {
- try (Matcher m = p.matcher(multiLineLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find file and line number
- try (Pattern p = RE2.compile("Service\\.java:\\d+")) {
- try (Matcher m = p.matcher(multiLineLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find error class
+ try (Pattern p = RE2.compile("java\\.lang\\.\\w+Exception")) {
+ try (Matcher m = p.matcher(multiLineLog)) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testLargeLogChunk() {
- // Simulate processing a large log file (10,000 lines)
- StringBuilder largeLog = new StringBuilder();
- for (int i = 0; i < 10000; i++) {
- largeLog.append(String.format("2025-11-17 10:%02d:%02d [INFO] Request %d processed successfully%n",
- i / 60, i % 60, i));
- }
-
- String logText = largeLog.toString();
-
- // Find all INFO entries
- try (Pattern p = RE2.compile("\\[INFO\\]")) {
- try (Matcher m = p.matcher(logText)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find specific request ID in large log
- try (Pattern p = RE2.compile("Request 5000 processed")) {
- try (Matcher m = p.matcher(logText)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Pattern that doesn't exist
- try (Pattern p = RE2.compile("\\[ERROR\\]")) {
- try (Matcher m = p.matcher(logText)) {
- assertThat(m.find()).isFalse();
- }
- }
+ // Find stack trace lines
+ try (Pattern p = RE2.compile("at [\\w.$]+\\([\\w.]+:\\d+\\)")) {
+ try (Matcher m = p.matcher(multiLineLog)) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testApacheAccessLog() {
- String accessLog = "192.168.1.100 - - [17/Nov/2025:10:30:45 +0000] \"GET /api/users?id=123 HTTP/1.1\" 200 1234 \"https://example.com/\" \"Mozilla/5.0\"";
-
- // Find IP address
- try (Pattern p = RE2.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) {
- try (Matcher m = p.matcher(accessLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find HTTP method
- try (Pattern p = RE2.compile("(GET|POST|PUT|DELETE|PATCH)")) {
- try (Matcher m = p.matcher(accessLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find HTTP status
- try (Pattern p = RE2.compile("\" \\d{3} ")) {
- try (Matcher m = p.matcher(accessLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find URL path
- try (Pattern p = RE2.compile("/api/\\w+")) {
- try (Matcher m = p.matcher(accessLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find file and line number
+ try (Pattern p = RE2.compile("Service\\.java:\\d+")) {
+ try (Matcher m = p.matcher(multiLineLog)) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- @Test
- void testJSONLogEntry() {
- String jsonLog = "{\"timestamp\":\"2025-11-17T10:30:45.123Z\",\"level\":\"ERROR\",\"service\":\"api-gateway\",\"message\":\"Connection timeout\",\"userId\":12345,\"requestId\":\"req-abc-123\",\"duration\":5000}";
+ @Test
+ void testLargeLogChunk() {
+ // Simulate processing a large log file (10,000 lines)
+ StringBuilder largeLog = new StringBuilder();
+ for (int i = 0; i < 10000; i++) {
+ largeLog.append(
+ String.format(
+ "2025-11-17 10:%02d:%02d [INFO] Request %d processed successfully%n",
+ i / 60, i % 60, i));
+ }
- // Find timestamp
- try (Pattern p = RE2.compile("\"timestamp\":\"[^\"]+\"")) {
- try (Matcher m = p.matcher(jsonLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ String logText = largeLog.toString();
- // Find error level
- try (Pattern p = RE2.compile("\"level\":\"ERROR\"")) {
- try (Matcher m = p.matcher(jsonLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find user ID
- try (Pattern p = RE2.compile("\"userId\":\\d+")) {
- try (Matcher m = p.matcher(jsonLog)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find request ID pattern
- try (Pattern p = RE2.compile("\"requestId\":\"req-[a-z0-9-]+\"")) {
- try (Matcher m = p.matcher(jsonLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find all INFO entries
+ try (Pattern p = RE2.compile("\\[INFO\\]")) {
+ try (Matcher m = p.matcher(logText)) {
+ assertThat(m.find()).isTrue();
+ }
}
- @Test
- void testSearchInVeryLargeLogFile() {
- // Simulate searching in a 1MB log file
- StringBuilder hugeLog = new StringBuilder();
-
- // Add 50,000 normal log entries
- for (int i = 0; i < 50000; i++) {
- hugeLog.append(String.format("[INFO] %d - Normal operation%n", i));
- }
+ // Find specific request ID in large log
+ try (Pattern p = RE2.compile("Request 5000 processed")) {
+ try (Matcher m = p.matcher(logText)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Add a few error entries in the middle
- hugeLog.append("[ERROR] Database connection failed - retry attempt 1\n");
- hugeLog.append("[ERROR] Database connection failed - retry attempt 2\n");
+ // Pattern that doesn't exist
+ try (Pattern p = RE2.compile("\\[ERROR\\]")) {
+ try (Matcher m = p.matcher(logText)) {
+ assertThat(m.find()).isFalse();
+ }
+ }
+ }
- // Add more normal entries
- for (int i = 50000; i < 100000; i++) {
- hugeLog.append(String.format("[INFO] %d - Normal operation%n", i));
- }
+ @Test
+ void testApacheAccessLog() {
+ String accessLog =
+ "192.168.1.100 - - [17/Nov/2025:10:30:45 +0000] \"GET /api/users?id=123 HTTP/1.1\" 200 1234"
+ + " \"https://example.com/\" \"Mozilla/5.0\"";
- String logText = hugeLog.toString();
- assertThat(logText.length()).isGreaterThan(1_000_000); // Over 1MB
+ // Find IP address
+ try (Pattern p = RE2.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) {
+ try (Matcher m = p.matcher(accessLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Search for ERROR entries in huge log
- try (Pattern p = RE2.compile("\\[ERROR\\].*")) {
- try (Matcher m = p.matcher(logText)) {
- long start = System.currentTimeMillis();
- boolean found = m.find();
- long duration = System.currentTimeMillis() - start;
+ // Find HTTP method
+ try (Pattern p = RE2.compile("(GET|POST|PUT|DELETE|PATCH)")) {
+ try (Matcher m = p.matcher(accessLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- assertThat(found).isTrue();
- // Should be fast even on huge input (RE2 is linear time)
- assertThat(duration).isLessThan(500);
- }
- }
+ // Find HTTP status
+ try (Pattern p = RE2.compile("\" \\d{3} ")) {
+ try (Matcher m = p.matcher(accessLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Search for pattern that doesn't exist
- try (Pattern p = RE2.compile("\\[CRITICAL\\]")) {
- try (Matcher m = p.matcher(logText)) {
- long start = System.currentTimeMillis();
- boolean found = m.find();
- long duration = System.currentTimeMillis() - start;
-
- assertThat(found).isFalse();
- // Should still be fast even when scanning entire 1MB
- assertThat(duration).isLessThan(500);
- }
- }
+ // Find URL path
+ try (Pattern p = RE2.compile("/api/\\w+")) {
+ try (Matcher m = p.matcher(accessLog)) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- @Test
- void testCassandraQueryLog() {
- String cassandraLog = "INFO [Native-Transport-Requests-1] 2025-11-17 10:30:45,123 QueryProcessor.java:169 - Execute CQL3 query: SELECT * FROM keyspace.table WHERE partition_key = 'abc123' AND clustering_key > 100 ALLOW FILTERING";
+ @Test
+ void testJSONLogEntry() {
+ String jsonLog =
+ "{\"timestamp\":\"2025-11-17T10:30:45.123Z\",\"level\":\"ERROR\",\"service\":\"api-gateway\",\"message\":\"Connection"
+ + " timeout\",\"userId\":12345,\"requestId\":\"req-abc-123\",\"duration\":5000}";
- // Find CQL query
- try (Pattern p = RE2.compile("SELECT .* FROM [\\w.]+")) {
- try (Matcher m = p.matcher(cassandraLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find timestamp
+ try (Pattern p = RE2.compile("\"timestamp\":\"[^\"]+\"")) {
+ try (Matcher m = p.matcher(jsonLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find keyspace.table
- try (Pattern p = RE2.compile("FROM [\\w]+\\.[\\w]+")) {
- try (Matcher m = p.matcher(cassandraLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find error level
+ try (Pattern p = RE2.compile("\"level\":\"ERROR\"")) {
+ try (Matcher m = p.matcher(jsonLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find ALLOW FILTERING
- try (Pattern p = RE2.compile("ALLOW FILTERING")) {
- try (Matcher m = p.matcher(cassandraLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find user ID
+ try (Pattern p = RE2.compile("\"userId\":\\d+")) {
+ try (Matcher m = p.matcher(jsonLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
- // Find thread name
- try (Pattern p = RE2.compile("\\[Native-Transport-Requests-\\d+\\]")) {
- try (Matcher m = p.matcher(cassandraLog)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Find request ID pattern
+ try (Pattern p = RE2.compile("\"requestId\":\"req-[a-z0-9-]+\"")) {
+ try (Matcher m = p.matcher(jsonLog)) {
+ assertThat(m.find()).isTrue();
+ }
}
+ }
- @Test
- void testSearchMultiplePatternsInLargeText() {
- // Simulate Cassandra SAI scanning large partition with multiple filter terms
- StringBuilder partition = new StringBuilder();
+ @Test
+ void testSearchInVeryLargeLogFile() {
+ // Simulate searching in a 1MB log file
+ StringBuilder hugeLog = new StringBuilder();
- // 10,000 rows in partition
- for (int i = 0; i < 10000; i++) {
- partition.append(String.format("row_%d|user_%d@example.com|status_%s|value_%d|timestamp_%d%n",
- i, i % 100, i % 2 == 0 ? "active" : "inactive", i * 10, System.currentTimeMillis() + i));
- }
+ // Add 50,000 normal log entries
+ for (int i = 0; i < 50000; i++) {
+ hugeLog.append(String.format("[INFO] %d - Normal operation%n", i));
+ }
- String data = partition.toString();
- assertThat(data.length()).isGreaterThan(500_000); // Over 500KB
+ // Add a few error entries in the middle
+ hugeLog.append("[ERROR] Database connection failed - retry attempt 1\n");
+ hugeLog.append("[ERROR] Database connection failed - retry attempt 2\n");
- // Pattern 1: Find rows with specific user pattern
- try (Pattern p = RE2.compile("user_42@example\\.com")) {
- try (Matcher m = p.matcher(data)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Add more normal entries
+ for (int i = 50000; i < 100000; i++) {
+ hugeLog.append(String.format("[INFO] %d - Normal operation%n", i));
+ }
- // Pattern 2: Find active status
- try (Pattern p = RE2.compile("status_active")) {
- try (Matcher m = p.matcher(data)) {
- assertThat(m.find()).isTrue();
- }
- }
+ String logText = hugeLog.toString();
+ assertThat(logText.length()).isGreaterThan(1_000_000); // Over 1MB
- // Pattern 3: Find specific row range
- try (Pattern p = RE2.compile("row_500\\d")) {
- try (Matcher m = p.matcher(data)) {
- assertThat(m.find()).isTrue();
- }
- }
+ // Search for ERROR entries in huge log
+ try (Pattern p = RE2.compile("\\[ERROR\\].*")) {
+ try (Matcher m = p.matcher(logText)) {
+ long start = System.currentTimeMillis();
+ boolean found = m.find();
+ long duration = System.currentTimeMillis() - start;
- // Pattern 4: Complex pattern combining multiple fields
- try (Pattern p = RE2.compile("row_\\d+\\|.*@example\\.com\\|status_active")) {
- try (Matcher m = p.matcher(data)) {
- long start = System.currentTimeMillis();
- boolean found = m.find();
- long duration = System.currentTimeMillis() - start;
-
- assertThat(found).isTrue();
- // Should be fast even on 500KB+ data
- assertThat(duration).isLessThan(200);
- }
- }
+ assertThat(found).isTrue();
+ // Should be fast even on huge input (RE2 is linear time)
+ assertThat(duration).isLessThan(500);
+ }
}
- @Test
- void testRealisticDatabaseTextSearch() {
- // Simulate searching through Cassandra text column with large values
- String[] largeTextSamples = {
- // Sample 1: Large JSON document
- "{\"user\":{\"name\":\"John Doe\",\"email\":\"john@example.com\",\"address\":{\"street\":\"123 Main St\",\"city\":\"Springfield\"},\"orders\":[" +
- "{\"id\":1,\"product\":\"Widget\",\"price\":29.99},{\"id\":2,\"product\":\"Gadget\",\"price\":49.99}]},\"metadata\":{\"source\":\"web\",\"timestamp\":\"2025-11-17T10:30:45Z\"}}",
-
- // Sample 2: Large log blob
- "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" +
- "[ERROR] Retry attempt 1/3\n" +
- "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" +
- "[ERROR] Retry attempt 2/3\n" +
- "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n" +
- "[ERROR] Retry attempt 3/3\n" +
- "[ERROR] All retry attempts exhausted, marking node as DOWN",
-
- // Sample 3: XML document
- "Important Document2025-11-17" +
- "This is a large amount of text that might be stored in a database column." +
- "More content with various special characters: @#$%^&*(){}[]"
- };
-
- for (String text : largeTextSamples) {
- // Search for email pattern
- try (Pattern p = RE2.compile("\\w+@[\\w.]+")) {
- try (Matcher m = p.matcher(text)) {
- // May or may not find depending on sample
- m.find();
- }
- }
+ // Search for pattern that doesn't exist
+ try (Pattern p = RE2.compile("\\[CRITICAL\\]")) {
+ try (Matcher m = p.matcher(logText)) {
+ long start = System.currentTimeMillis();
+ boolean found = m.find();
+ long duration = System.currentTimeMillis() - start;
- // Search for number pattern
- try (Pattern p = RE2.compile("\\d+")) {
- try (Matcher m = p.matcher(text)) {
- assertThat(m.find()).isTrue(); // All samples have numbers
- }
- }
-
- // Search for ERROR keyword
- try (Pattern p = RE2.compile("ERROR")) {
- try (Matcher m = p.matcher(text)) {
- m.find(); // May or may not find
- }
- }
- }
+ assertThat(found).isFalse();
+ // Should still be fast even when scanning entire 1MB
+ assertThat(duration).isLessThan(500);
+ }
}
+ }
- @Test
- void testConcurrentLogSearching() throws InterruptedException {
- // Simulate multiple Cassandra query threads searching logs concurrently
- String largeLog = generateLargeLogData(50000);
-
- int threadCount = 20;
- CountDownLatch latch = new CountDownLatch(threadCount);
- AtomicInteger successCount = new AtomicInteger(0);
- AtomicInteger errors = new AtomicInteger(0);
-
- String[] searchPatterns = {
- "\\[ERROR\\]",
- "user_\\d+@example\\.com",
- "duration=\\d+ms",
- "status=\\d{3}",
- "Exception",
- "192\\.168\\.\\d+\\.\\d+",
- "Request \\d+",
- "Thread-\\d+"
- };
-
- for (int t = 0; t < threadCount; t++) {
- int threadId = t;
- new Thread(() -> {
+ @Test
+ void testCassandraQueryLog() {
+ String cassandraLog =
+ "INFO [Native-Transport-Requests-1] 2025-11-17 10:30:45,123 QueryProcessor.java:169 -"
+ + " Execute CQL3 query: SELECT * FROM keyspace.table WHERE partition_key = 'abc123' AND"
+ + " clustering_key > 100 ALLOW FILTERING";
+
+ // Find CQL query
+ try (Pattern p = RE2.compile("SELECT .* FROM [\\w.]+")) {
+ try (Matcher m = p.matcher(cassandraLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Find keyspace.table
+ try (Pattern p = RE2.compile("FROM [\\w]+\\.[\\w]+")) {
+ try (Matcher m = p.matcher(cassandraLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Find ALLOW FILTERING
+ try (Pattern p = RE2.compile("ALLOW FILTERING")) {
+ try (Matcher m = p.matcher(cassandraLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Find thread name
+ try (Pattern p = RE2.compile("\\[Native-Transport-Requests-\\d+\\]")) {
+ try (Matcher m = p.matcher(cassandraLog)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+ }
+
+ @Test
+ void testSearchMultiplePatternsInLargeText() {
+ // Simulate Cassandra SAI scanning large partition with multiple filter terms
+ StringBuilder partition = new StringBuilder();
+
+ // 10,000 rows in partition
+ for (int i = 0; i < 10000; i++) {
+ partition.append(
+ String.format(
+ "row_%d|user_%d@example.com|status_%s|value_%d|timestamp_%d%n",
+ i,
+ i % 100,
+ i % 2 == 0 ? "active" : "inactive",
+ i * 10,
+ System.currentTimeMillis() + i));
+ }
+
+ String data = partition.toString();
+ assertThat(data.length()).isGreaterThan(500_000); // Over 500KB
+
+ // Pattern 1: Find rows with specific user pattern
+ try (Pattern p = RE2.compile("user_42@example\\.com")) {
+ try (Matcher m = p.matcher(data)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Pattern 2: Find active status
+ try (Pattern p = RE2.compile("status_active")) {
+ try (Matcher m = p.matcher(data)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Pattern 3: Find specific row range
+ try (Pattern p = RE2.compile("row_500\\d")) {
+ try (Matcher m = p.matcher(data)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Pattern 4: Complex pattern combining multiple fields
+ try (Pattern p = RE2.compile("row_\\d+\\|.*@example\\.com\\|status_active")) {
+ try (Matcher m = p.matcher(data)) {
+ long start = System.currentTimeMillis();
+ boolean found = m.find();
+ long duration = System.currentTimeMillis() - start;
+
+ assertThat(found).isTrue();
+ // Should be fast even on 500KB+ data
+ assertThat(duration).isLessThan(200);
+ }
+ }
+ }
+
+ @Test
+ void testRealisticDatabaseTextSearch() {
+ // Simulate searching through Cassandra text column with large values
+ String[] largeTextSamples = {
+ // Sample 1: Large JSON document
+ "{\"user\":{\"name\":\"John"
+ + " Doe\",\"email\":\"john@example.com\",\"address\":{\"street\":\"123 Main"
+ + " St\",\"city\":\"Springfield\"},\"orders\":["
+ + "{\"id\":1,\"product\":\"Widget\",\"price\":29.99},{\"id\":2,\"product\":\"Gadget\",\"price\":49.99}]},\"metadata\":{\"source\":\"web\",\"timestamp\":\"2025-11-17T10:30:45Z\"}}",
+
+ // Sample 2: Large log blob
+ "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n"
+ + "[ERROR] Retry attempt 1/3\n"
+ + "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n"
+ + "[ERROR] Retry attempt 2/3\n"
+ + "[ERROR] Connection timeout to 192.168.1.100:9042 after 5000ms\n"
+ + "[ERROR] Retry attempt 3/3\n"
+ + "[ERROR] All retry attempts exhausted, marking node as DOWN",
+
+ // Sample 3: XML document
+ "Important"
+ + " Document2025-11-17This"
+ + " is a large amount of text that might be stored in a database"
+ + " column.More content with various"
+ + " special characters: @#$%^&*(){}[]"
+ };
+
+ for (String text : largeTextSamples) {
+ // Search for email pattern
+ try (Pattern p = RE2.compile("\\w+@[\\w.]+")) {
+ try (Matcher m = p.matcher(text)) {
+ // May or may not find depending on sample
+ m.find();
+ }
+ }
+
+ // Search for number pattern
+ try (Pattern p = RE2.compile("\\d+")) {
+ try (Matcher m = p.matcher(text)) {
+ assertThat(m.find()).isTrue(); // All samples have numbers
+ }
+ }
+
+ // Search for ERROR keyword
+ try (Pattern p = RE2.compile("ERROR")) {
+ try (Matcher m = p.matcher(text)) {
+ m.find(); // May or may not find
+ }
+ }
+ }
+ }
+
+ @Test
+ void testConcurrentLogSearching() throws InterruptedException {
+ // Simulate multiple Cassandra query threads searching logs concurrently
+ String largeLog = generateLargeLogData(50000);
+
+ int threadCount = 20;
+ CountDownLatch latch = new CountDownLatch(threadCount);
+ AtomicInteger successCount = new AtomicInteger(0);
+ AtomicInteger errors = new AtomicInteger(0);
+
+ String[] searchPatterns = {
+ "\\[ERROR\\]",
+ "user_\\d+@example\\.com",
+ "duration=\\d+ms",
+ "status=\\d{3}",
+ "Exception",
+ "192\\.168\\.\\d+\\.\\d+",
+ "Request \\d+",
+ "Thread-\\d+"
+ };
+
+ for (int t = 0; t < threadCount; t++) {
+ int threadId = t;
+ new Thread(
+ () -> {
try {
- String pattern = searchPatterns[threadId % searchPatterns.length];
- try (Pattern p = RE2.compile(pattern)) {
- try (Matcher m = p.matcher(largeLog)) {
- if (m.find()) {
- successCount.incrementAndGet();
- }
- }
+ String pattern = searchPatterns[threadId % searchPatterns.length];
+ try (Pattern p = RE2.compile(pattern)) {
+ try (Matcher m = p.matcher(largeLog)) {
+ if (m.find()) {
+ successCount.incrementAndGet();
+ }
}
+ }
} catch (Exception e) {
- errors.incrementAndGet();
+ errors.incrementAndGet();
} finally {
- latch.countDown();
+ latch.countDown();
}
- }).start();
- }
-
- latch.await();
- assertThat(errors.get()).isEqualTo(0);
- assertThat(successCount.get()).isGreaterThan(0);
- }
-
- @Test
- void testSQLInjectionPatternDetection() {
- // Test patterns for detecting SQL injection in logs
- String[] suspiciousInputs = {
- "admin' OR '1'='1",
- "' UNION SELECT * FROM users--",
- "1; DROP TABLE users;--",
- "admin'--",
- "' OR 1=1--"
- };
-
- // Pattern to detect SQL injection attempts
- try (Pattern p = RE2.compile("('.*(OR|UNION|DROP|SELECT|--|;).*)|(--)")) {
- for (String input : suspiciousInputs) {
- try (Matcher m = p.matcher(input)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Safe inputs should not match
- try (Matcher m = p.matcher("normal_username")) {
- assertThat(m.find()).isFalse();
- }
- }
- }
-
- @Test
- void testSearchWithLineBreaks() {
- String multiLineData = "Line 1: Some data\n" +
- "Line 2: ERROR - Something failed\n" +
- "Line 3: More data\n" +
- "Line 4: WARNING - Check this\n" +
- "Line 5: Final line";
-
- // Find lines with ERROR
- try (Pattern p = RE2.compile("ERROR")) {
- try (Matcher m = p.matcher(multiLineData)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Find lines with WARNING
- try (Pattern p = RE2.compile("WARNING")) {
- try (Matcher m = p.matcher(multiLineData)) {
- assertThat(m.find()).isTrue();
- }
- }
-
- // Pattern that spans multiple lines won't match (RE2 default behavior)
- try (Pattern p = RE2.compile("Line 2.*Line 3")) {
- try (Matcher m = p.matcher(multiLineData)) {
- assertThat(m.find()).isFalse(); // . doesn't match \n by default
- }
- }
- }
-
- // ===== Helper Methods =====
-
- private String generateLargeLogData(int lineCount) {
- StringBuilder log = new StringBuilder();
- for (int i = 0; i < lineCount; i++) {
- String level = i % 100 == 0 ? "ERROR" : (i % 20 == 0 ? "WARN" : "INFO");
- log.append(String.format("%s [Thread-%d] 2025-11-17 10:%02d:%02d Request %d from user_%d@example.com - duration=%dms status=%d%n",
- level, i % 10, i / 3600, (i / 60) % 60, i, i % 1000, i % 500, 200 + (i % 5)));
- }
- return log.toString();
- }
+ })
+ .start();
+ }
+
+ latch.await();
+ assertThat(errors.get()).isEqualTo(0);
+ assertThat(successCount.get()).isGreaterThan(0);
+ }
+
+ @Test
+ void testSQLInjectionPatternDetection() {
+ // Test patterns for detecting SQL injection in logs
+ String[] suspiciousInputs = {
+ "admin' OR '1'='1",
+ "' UNION SELECT * FROM users--",
+ "1; DROP TABLE users;--",
+ "admin'--",
+ "' OR 1=1--"
+ };
+
+ // Pattern to detect SQL injection attempts
+ try (Pattern p = RE2.compile("('.*(OR|UNION|DROP|SELECT|--|;).*)|(--)")) {
+ for (String input : suspiciousInputs) {
+ try (Matcher m = p.matcher(input)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Safe inputs should not match
+ try (Matcher m = p.matcher("normal_username")) {
+ assertThat(m.find()).isFalse();
+ }
+ }
+ }
+
+ @Test
+ void testSearchWithLineBreaks() {
+ String multiLineData =
+ "Line 1: Some data\n"
+ + "Line 2: ERROR - Something failed\n"
+ + "Line 3: More data\n"
+ + "Line 4: WARNING - Check this\n"
+ + "Line 5: Final line";
+
+ // Find lines with ERROR
+ try (Pattern p = RE2.compile("ERROR")) {
+ try (Matcher m = p.matcher(multiLineData)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Find lines with WARNING
+ try (Pattern p = RE2.compile("WARNING")) {
+ try (Matcher m = p.matcher(multiLineData)) {
+ assertThat(m.find()).isTrue();
+ }
+ }
+
+ // Pattern that spans multiple lines won't match (RE2 default behavior)
+ try (Pattern p = RE2.compile("Line 2.*Line 3")) {
+ try (Matcher m = p.matcher(multiLineData)) {
+ assertThat(m.find()).isFalse(); // . doesn't match \n by default
+ }
+ }
+ }
+
+ // ===== Helper Methods =====
+
+ private String generateLargeLogData(int lineCount) {
+ StringBuilder log = new StringBuilder();
+ for (int i = 0; i < lineCount; i++) {
+ String level = i % 100 == 0 ? "ERROR" : (i % 20 == 0 ? "WARN" : "INFO");
+ log.append(
+ String.format(
+ "%s [Thread-%d] 2025-11-17 10:%02d:%02d Request %d from user_%d@example.com -"
+ + " duration=%dms status=%d%n",
+ level, i % 10, i / 3600, (i / 60) % 60, i, i % 1000, i % 500, 200 + (i % 5)));
+ }
+ return log.toString();
+ }
}
-
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java
index 8985f24..0247bb6 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingIT.java
@@ -15,783 +15,783 @@
*/
package com.axonops.libre2.api;
+import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
+
import com.axonops.libre2.cache.PatternCache;
import com.axonops.libre2.test.TestUtils;
+import java.util.*;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
-import java.util.*;
-
-import static org.junit.jupiter.api.Assertions.*;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-/**
- * Tests for bulk matching operations (Collection and array variants).
- */
+/** Tests for bulk matching operations (Collection and array variants). */
class BulkMatchingIT {
- private static PatternCache originalCache;
+ private static PatternCache originalCache;
- @BeforeAll
- static void setUpClass() {
- // Use test config to avoid JMX InstanceAlreadyExistsException
- originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build());
- }
+ @BeforeAll
+ static void setUpClass() {
+ // Use test config to avoid JMX InstanceAlreadyExistsException
+ originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build());
+ }
- @AfterAll
- static void tearDownClass() {
- TestUtils.restoreGlobalCache(originalCache);
- }
+ @AfterAll
+ static void tearDownClass() {
+ TestUtils.restoreGlobalCache(originalCache);
+ }
- /**
- * Detects if running under QEMU emulation (set by CI workflow).
- * Large dataset tests are skipped under QEMU as they're too slow.
- */
- private static boolean isQemuEmulation() {
- return "true".equals(System.getenv("QEMU_EMULATION"));
- }
+ /**
+ * Detects if running under QEMU emulation (set by CI workflow). Large dataset tests are skipped
+ * under QEMU as they're too slow.
+ */
+ private static boolean isQemuEmulation() {
+ return "true".equals(System.getenv("QEMU_EMULATION"));
+ }
- @Test
- void testMatchAll_Collection_Basic() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testMatchAll_Collection_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- List inputs = List.of("test1", "prod1", "test2", "other");
- boolean[] results = pattern.matchAll(inputs);
+ List inputs = List.of("test1", "prod1", "test2", "other");
+ boolean[] results = pattern.matchAll(inputs);
- assertArrayEquals(new boolean[]{true, false, true, false}, results);
- }
+ assertArrayEquals(new boolean[] {true, false, true, false}, results);
+ }
- @Test
- void testMatchAll_Array_Basic() {
- Pattern pattern = Pattern.compile("\\d{3}-\\d{4}");
+ @Test
+ void testMatchAll_Array_Basic() {
+ Pattern pattern = Pattern.compile("\\d{3}-\\d{4}");
- String[] inputs = {"123-4567", "invalid", "999-8888", "nope"};
- boolean[] results = pattern.matchAll(inputs);
+ String[] inputs = {"123-4567", "invalid", "999-8888", "nope"};
+ boolean[] results = pattern.matchAll(inputs);
- assertArrayEquals(new boolean[]{true, false, true, false}, results);
- }
+ assertArrayEquals(new boolean[] {true, false, true, false}, results);
+ }
- @Test
- void testMatchAll_Empty() {
- Pattern pattern = Pattern.compile("test");
+ @Test
+ void testMatchAll_Empty() {
+ Pattern pattern = Pattern.compile("test");
- boolean[] results = pattern.matchAll(Collections.emptyList());
- assertEquals(0, results.length);
+ boolean[] results = pattern.matchAll(Collections.emptyList());
+ assertEquals(0, results.length);
- boolean[] results2 = pattern.matchAll(new String[0]);
- assertEquals(0, results2.length);
- }
+ boolean[] results2 = pattern.matchAll(new String[0]);
+ assertEquals(0, results2.length);
+ }
- @Test
- void testMatchAll_Set() {
- Pattern pattern = Pattern.compile("[a-z]+");
+ @Test
+ void testMatchAll_Set() {
+ Pattern pattern = Pattern.compile("[a-z]+");
- Set inputs = Set.of("abc", "123", "xyz", "456");
- boolean[] results = pattern.matchAll(inputs);
+ Set inputs = Set.of("abc", "123", "xyz", "456");
+ boolean[] results = pattern.matchAll(inputs);
- // Set order is not guaranteed, but count should be correct
- int matchCount = 0;
- for (boolean match : results) {
- if (match) matchCount++;
- }
- assertEquals(2, matchCount); // "abc" and "xyz"
+ // Set order is not guaranteed, but count should be correct
+ int matchCount = 0;
+ for (boolean match : results) {
+ if (match) matchCount++;
}
+ assertEquals(2, matchCount); // "abc" and "xyz"
+ }
- @Test
- void testMatchAll_Queue() {
- Pattern pattern = Pattern.compile("item\\d+");
+ @Test
+ void testMatchAll_Queue() {
+ Pattern pattern = Pattern.compile("item\\d+");
- Queue inputs = new LinkedList<>(List.of("item1", "other", "item2"));
- boolean[] results = pattern.matchAll(inputs);
+ Queue inputs = new LinkedList<>(List.of("item1", "other", "item2"));
+ boolean[] results = pattern.matchAll(inputs);
- assertArrayEquals(new boolean[]{true, false, true}, results);
- }
+ assertArrayEquals(new boolean[] {true, false, true}, results);
+ }
- @Test
- void testMatchAll_NullInput() {
- Pattern pattern = Pattern.compile("test");
+ @Test
+ void testMatchAll_NullInput() {
+ Pattern pattern = Pattern.compile("test");
- assertThrows(NullPointerException.class, () -> pattern.matchAll((Collection) null));
- assertThrows(NullPointerException.class, () -> pattern.matchAll((String[]) null));
- }
+ assertThrows(NullPointerException.class, () -> pattern.matchAll((Collection) null));
+ assertThrows(NullPointerException.class, () -> pattern.matchAll((String[]) null));
+ }
- @Test
- void testFilter_Basic() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testFilter_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- List inputs = List.of("test1", "prod1", "test2", "other");
- List filtered = pattern.filter(inputs);
+ List inputs = List.of("test1", "prod1", "test2", "other");
+ List filtered = pattern.filter(inputs);
- assertEquals(List.of("test1", "test2"), filtered);
- }
+ assertEquals(List.of("test1", "test2"), filtered);
+ }
- @Test
- void testFilter_NoMatches() {
- Pattern pattern = Pattern.compile("nomatch");
+ @Test
+ void testFilter_NoMatches() {
+ Pattern pattern = Pattern.compile("nomatch");
- List inputs = List.of("test1", "test2", "test3");
- List filtered = pattern.filter(inputs);
+ List inputs = List.of("test1", "test2", "test3");
+ List filtered = pattern.filter(inputs);
- assertTrue(filtered.isEmpty());
- }
+ assertTrue(filtered.isEmpty());
+ }
- @Test
- void testFilter_AllMatch() {
- Pattern pattern = Pattern.compile("test\\d");
+ @Test
+ void testFilter_AllMatch() {
+ Pattern pattern = Pattern.compile("test\\d");
- List inputs = List.of("test1", "test2", "test3");
- List filtered = pattern.filter(inputs);
+ List inputs = List.of("test1", "test2", "test3");
+ List filtered = pattern.filter(inputs);
- assertEquals(inputs, filtered);
- }
+ assertEquals(inputs, filtered);
+ }
- @Test
- void testFilterNot_Basic() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testFilterNot_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- List inputs = List.of("test1", "prod1", "test2", "other");
- List filtered = pattern.filterNot(inputs);
+ List inputs = List.of("test1", "prod1", "test2", "other");
+ List filtered = pattern.filterNot(inputs);
- assertEquals(List.of("prod1", "other"), filtered);
- }
+ assertEquals(List.of("prod1", "other"), filtered);
+ }
- @Test
- void testFilterNot_AllMatch() {
- Pattern pattern = Pattern.compile("test\\d");
+ @Test
+ void testFilterNot_AllMatch() {
+ Pattern pattern = Pattern.compile("test\\d");
- List inputs = List.of("test1", "test2");
- List filtered = pattern.filterNot(inputs);
+ List inputs = List.of("test1", "test2");
+ List filtered = pattern.filterNot(inputs);
- assertTrue(filtered.isEmpty());
- }
+ assertTrue(filtered.isEmpty());
+ }
- @Test
- void testRetainMatches_List() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testRetainMatches_List() {
+ Pattern pattern = Pattern.compile("test.*");
- List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other"));
- int removed = pattern.retainMatches(inputs);
+ List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other"));
+ int removed = pattern.retainMatches(inputs);
- assertEquals(2, removed);
- assertEquals(List.of("test1", "test2"), inputs);
- }
+ assertEquals(2, removed);
+ assertEquals(List.of("test1", "test2"), inputs);
+ }
- @Test
- void testRetainMatches_Set() {
- Pattern pattern = Pattern.compile("[a-z]+");
+ @Test
+ void testRetainMatches_Set() {
+ Pattern pattern = Pattern.compile("[a-z]+");
- Set inputs = new HashSet<>(Set.of("abc", "123", "xyz"));
- int removed = pattern.retainMatches(inputs);
+ Set inputs = new HashSet<>(Set.of("abc", "123", "xyz"));
+ int removed = pattern.retainMatches(inputs);
- assertEquals(1, removed);
- assertEquals(Set.of("abc", "xyz"), inputs);
- }
+ assertEquals(1, removed);
+ assertEquals(Set.of("abc", "xyz"), inputs);
+ }
- @Test
- void testRemoveMatches_List() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testRemoveMatches_List() {
+ Pattern pattern = Pattern.compile("test.*");
- List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other"));
- int removed = pattern.removeMatches(inputs);
+ List inputs = new ArrayList<>(List.of("test1", "prod1", "test2", "other"));
+ int removed = pattern.removeMatches(inputs);
- assertEquals(2, removed);
- assertEquals(List.of("prod1", "other"), inputs);
- }
+ assertEquals(2, removed);
+ assertEquals(List.of("prod1", "other"), inputs);
+ }
- @Test
- void testRemoveMatches_Set() {
- Pattern pattern = Pattern.compile("[a-z]+");
+ @Test
+ void testRemoveMatches_Set() {
+ Pattern pattern = Pattern.compile("[a-z]+");
- Set inputs = new HashSet<>(Set.of("abc", "123", "xyz"));
- int removed = pattern.removeMatches(inputs);
+ Set inputs = new HashSet<>(Set.of("abc", "123", "xyz"));
+ int removed = pattern.removeMatches(inputs);
- assertEquals(2, removed);
- assertEquals(Set.of("123"), inputs);
- }
+ assertEquals(2, removed);
+ assertEquals(Set.of("123"), inputs);
+ }
- @Test
- void testRetainMatches_ImmutableCollection() {
- Pattern pattern = Pattern.compile("test");
+ @Test
+ void testRetainMatches_ImmutableCollection() {
+ Pattern pattern = Pattern.compile("test");
- List immutable = List.of("test", "other");
- assertThrows(UnsupportedOperationException.class, () -> pattern.retainMatches(immutable));
- }
+ List immutable = List.of("test", "other");
+ assertThrows(UnsupportedOperationException.class, () -> pattern.retainMatches(immutable));
+ }
- @Test
- void testFilterByKey_Basic() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testFilterByKey_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- Map inputs = Map.of(
+ Map inputs =
+ Map.of(
"test_1", 100,
"prod_1", 200,
- "test_2", 300
- );
+ "test_2", 300);
- Map filtered = pattern.filterByKey(inputs);
+ Map filtered = pattern.filterByKey(inputs);
- assertEquals(2, filtered.size());
- assertEquals(100, filtered.get("test_1"));
- assertEquals(300, filtered.get("test_2"));
- assertNull(filtered.get("prod_1"));
- }
+ assertEquals(2, filtered.size());
+ assertEquals(100, filtered.get("test_1"));
+ assertEquals(300, filtered.get("test_2"));
+ assertNull(filtered.get("prod_1"));
+ }
- @Test
- void testFilterByValue_Basic() {
- Pattern pattern = Pattern.compile(".*@example\\.com");
+ @Test
+ void testFilterByValue_Basic() {
+ Pattern pattern = Pattern.compile(".*@example\\.com");
- Map inputs = Map.of(
+ Map inputs =
+ Map.of(
1, "user@example.com",
2, "invalid",
- 3, "admin@example.com"
- );
+ 3, "admin@example.com");
- Map filtered = pattern.filterByValue(inputs);
+ Map filtered = pattern.filterByValue(inputs);
- assertEquals(2, filtered.size());
- assertEquals("user@example.com", filtered.get(1));
- assertEquals("admin@example.com", filtered.get(3));
- assertNull(filtered.get(2));
- }
+ assertEquals(2, filtered.size());
+ assertEquals("user@example.com", filtered.get(1));
+ assertEquals("admin@example.com", filtered.get(3));
+ assertNull(filtered.get(2));
+ }
- @Test
- void testFilterNotByKey_Basic() {
- Pattern pattern = Pattern.compile("test.*");
+ @Test
+ void testFilterNotByKey_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- Map inputs = Map.of(
+ Map inputs =
+ Map.of(
"test_1", 100,
"prod_1", 200,
- "test_2", 300
- );
+ "test_2", 300);
- Map filtered = pattern.filterNotByKey(inputs);
+ Map filtered = pattern.filterNotByKey(inputs);
- assertEquals(1, filtered.size());
- assertEquals(200, filtered.get("prod_1"));
- }
+ assertEquals(1, filtered.size());
+ assertEquals(200, filtered.get("prod_1"));
+ }
- @Test
- void testFilterNotByValue_Basic() {
- Pattern pattern = Pattern.compile(".*@example\\.com");
+ @Test
+ void testFilterNotByValue_Basic() {
+ Pattern pattern = Pattern.compile(".*@example\\.com");
- Map inputs = Map.of(
+ Map inputs =
+ Map.of(
1, "user@example.com",
2, "invalid",
- 3, "admin@example.com"
- );
-
- Map filtered = pattern.filterNotByValue(inputs);
-
- assertEquals(1, filtered.size());
- assertEquals("invalid", filtered.get(2));
- }
-
- @Test
- void testRetainMatchesByKey_Basic() {
- Pattern pattern = Pattern.compile("test.*");
-
- Map map = new HashMap<>();
- map.put("test_1", 100);
- map.put("prod_1", 200);
- map.put("test_2", 300);
-
- int removed = pattern.retainMatchesByKey(map);
-
- assertEquals(1, removed);
- assertEquals(2, map.size());
- assertEquals(100, map.get("test_1"));
- assertEquals(300, map.get("test_2"));
- }
-
- @Test
- void testRetainMatchesByValue_Basic() {
- Pattern pattern = Pattern.compile("\\d+");
-
- Map map = new HashMap<>();
- map.put("a", "123");
- map.put("b", "abc");
- map.put("c", "456");
-
- int removed = pattern.retainMatchesByValue(map);
-
- assertEquals(1, removed);
- assertEquals(2, map.size());
- assertEquals("123", map.get("a"));
- assertEquals("456", map.get("c"));
- }
-
- @Test
- void testRemoveMatchesByKey_Basic() {
- Pattern pattern = Pattern.compile("test.*");
-
- Map map = new HashMap<>();
- map.put("test_1", 100);
- map.put("prod_1", 200);
- map.put("test_2", 300);
-
- int removed = pattern.removeMatchesByKey(map);
-
- assertEquals(2, removed);
- assertEquals(1, map.size());
- assertEquals(200, map.get("prod_1"));
- }
-
- @Test
- void testRemoveMatchesByValue_Basic() {
- Pattern pattern = Pattern.compile("\\d+");
-
- Map map = new HashMap<>();
- map.put("a", "123");
- map.put("b", "abc");
- map.put("c", "456");
-
- int removed = pattern.removeMatchesByValue(map);
-
- assertEquals(2, removed);
- assertEquals(1, map.size());
- assertEquals("abc", map.get("b"));
- }
-
- @Test
- void testBulk_LargeCollection() {
- Pattern pattern = Pattern.compile("item\\d+");
-
- // Create 1000 strings
- List inputs = new ArrayList<>();
- for (int i = 0; i < 1000; i++) {
- if (i % 2 == 0) {
- inputs.add("item" + i);
- } else {
- inputs.add("other" + i);
- }
- }
-
- boolean[] results = pattern.matchAll(inputs);
-
- assertEquals(1000, results.length);
-
- int matchCount = 0;
- for (boolean match : results) {
- if (match) matchCount++;
- }
-
- assertEquals(500, matchCount); // Every other item
- }
-
- @Test
- void testFilter_PreservesOrder() {
- Pattern pattern = Pattern.compile("keep\\d");
-
- List inputs = List.of("keep1", "drop1", "keep2", "drop2", "keep3");
- List filtered = pattern.filter(inputs);
-
- // Order should be preserved
- assertEquals(List.of("keep1", "keep2", "keep3"), filtered);
- }
+ 3, "admin@example.com");
- @Test
- void testMapFiltering_EmptyMap() {
- Pattern pattern = Pattern.compile("test");
+ Map filtered = pattern.filterNotByValue(inputs);
- Map empty = Map.of();
+ assertEquals(1, filtered.size());
+ assertEquals("invalid", filtered.get(2));
+ }
- assertEquals(0, pattern.filterByKey(empty).size());
- assertEquals(0, pattern.filterNotByKey(empty).size());
- assertEquals(0, pattern.retainMatchesByKey(new HashMap<>(empty)));
- assertEquals(0, pattern.removeMatchesByKey(new HashMap<>(empty)));
- }
+ @Test
+ void testRetainMatchesByKey_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- @Test
- void testCollectionFiltering_DifferentTypes() {
- Pattern pattern = Pattern.compile("[a-z]+");
+ Map map = new HashMap<>();
+ map.put("test_1", 100);
+ map.put("prod_1", 200);
+ map.put("test_2", 300);
- // ArrayList
- List list = new ArrayList<>(List.of("abc", "123", "xyz"));
- assertEquals(2, pattern.filter(list).size());
+ int removed = pattern.retainMatchesByKey(map);
- // HashSet
- Set set = new HashSet<>(Set.of("abc", "123", "xyz"));
- assertEquals(2, pattern.filter(set).size());
+ assertEquals(1, removed);
+ assertEquals(2, map.size());
+ assertEquals(100, map.get("test_1"));
+ assertEquals(300, map.get("test_2"));
+ }
- // LinkedList (Queue)
- Queue queue = new LinkedList<>(List.of("abc", "123", "xyz"));
- assertEquals(2, pattern.filter(queue).size());
- }
+ @Test
+ void testRetainMatchesByValue_Basic() {
+ Pattern pattern = Pattern.compile("\\d+");
- @Test
- void testFilterNot_AllCollectionTypes() {
- Pattern pattern = Pattern.compile("\\d+");
+ Map map = new HashMap<>();
+ map.put("a", "123");
+ map.put("b", "abc");
+ map.put("c", "456");
- // ArrayList
- List arrayList = new ArrayList<>(List.of("123", "abc", "456"));
- assertEquals(List.of("abc"), pattern.filterNot(arrayList));
+ int removed = pattern.retainMatchesByValue(map);
- // HashSet
- Set hashSet = new HashSet<>(Set.of("123", "abc", "456"));
- List filtered = pattern.filterNot(hashSet);
- assertEquals(1, filtered.size());
- assertTrue(filtered.contains("abc"));
+ assertEquals(1, removed);
+ assertEquals(2, map.size());
+ assertEquals("123", map.get("a"));
+ assertEquals("456", map.get("c"));
+ }
- // LinkedList (as Queue)
- Queue linkedList = new LinkedList<>(List.of("123", "abc", "456"));
- assertEquals(1, pattern.filterNot(linkedList).size());
- }
+ @Test
+ void testRemoveMatchesByKey_Basic() {
+ Pattern pattern = Pattern.compile("test.*");
- @Test
- void testRetainMatches_AllCollectionTypes() {
- Pattern pattern = Pattern.compile("[a-z]+");
-
- // ArrayList
- List arrayList = new ArrayList<>(List.of("abc", "123", "xyz"));
- assertEquals(1, pattern.retainMatches(arrayList));
- assertEquals(2, arrayList.size());
-
- // HashSet
- Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz"));
- assertEquals(1, pattern.retainMatches(hashSet));
- assertEquals(2, hashSet.size());
-
- // LinkedList (as Queue)
- Queue queue = new LinkedList<>(List.of("abc", "123", "xyz"));
- assertEquals(1, pattern.retainMatches(queue));
- assertEquals(2, queue.size());
-
- // TreeSet (sorted)
- Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz"));
- assertEquals(1, pattern.retainMatches(treeSet));
- assertEquals(Set.of("abc", "xyz"), treeSet);
- }
+ Map map = new HashMap<>();
+ map.put("test_1", 100);
+ map.put("prod_1", 200);
+ map.put("test_2", 300);
- @Test
- void testRemoveMatches_AllCollectionTypes() {
- Pattern pattern = Pattern.compile("[a-z]+");
-
- // ArrayList
- List arrayList = new ArrayList<>(List.of("abc", "123", "xyz"));
- assertEquals(2, pattern.removeMatches(arrayList));
- assertEquals(List.of("123"), arrayList);
-
- // HashSet
- Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz"));
- assertEquals(2, pattern.removeMatches(hashSet));
- assertEquals(Set.of("123"), hashSet);
-
- // LinkedList (as Queue)
- Queue queue = new LinkedList<>(List.of("abc", "123", "xyz"));
- assertEquals(2, pattern.removeMatches(queue));
- assertEquals(1, queue.size());
- assertTrue(queue.contains("123"));
-
- // TreeSet
- Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz"));
- assertEquals(2, pattern.removeMatches(treeSet));
- assertEquals(Set.of("123"), treeSet);
- }
+ int removed = pattern.removeMatchesByKey(map);
- @Test
- void testMapOperations_AllVariants() {
- Pattern pattern = Pattern.compile("key\\d+");
+ assertEquals(2, removed);
+ assertEquals(1, map.size());
+ assertEquals(200, map.get("prod_1"));
+ }
- // Test all 8 map operations
- Map map1 = new HashMap<>(Map.of("key1", "val1", "other", "val2", "key2", "val3"));
- assertEquals(2, pattern.filterByKey(map1).size());
+ @Test
+ void testRemoveMatchesByValue_Basic() {
+ Pattern pattern = Pattern.compile("\\d+");
- Map map2 = new HashMap<>(Map.of("key1", "val1", "other", "val2"));
- assertEquals(1, pattern.filterNotByKey(map2).size());
+ Map map = new HashMap<>();
+ map.put("a", "123");
+ map.put("b", "abc");
+ map.put("c", "456");
- Map map3 = new HashMap<>(Map.of("k1", "key1", "k2", "other", "k3", "key2"));
- assertEquals(2, pattern.filterByValue(map3).size());
+ int removed = pattern.removeMatchesByValue(map);
- Map map4 = new HashMap<>(Map.of("k1", "key1", "k2", "other"));
- assertEquals(1, pattern.filterNotByValue(map4).size());
+ assertEquals(2, removed);
+ assertEquals(1, map.size());
+ assertEquals("abc", map.get("b"));
+ }
- Map map5 = new HashMap<>(Map.of("key1", 1, "other", 2, "key2", 3));
- assertEquals(1, pattern.retainMatchesByKey(map5));
- assertEquals(2, map5.size());
+ @Test
+ void testBulk_LargeCollection() {
+ Pattern pattern = Pattern.compile("item\\d+");
- Map map6 = new HashMap<>(Map.of("key1", 1, "other", 2));
- assertEquals(1, pattern.removeMatchesByKey(map6));
- assertEquals(1, map6.size());
-
- Map map7 = new HashMap<>(Map.of(1, "key1", 2, "other", 3, "key2"));
- assertEquals(1, pattern.retainMatchesByValue(map7));
- assertEquals(2, map7.size());
-
- Map map8 = new HashMap<>(Map.of(1, "key1", 2, "other"));
- assertEquals(1, pattern.removeMatchesByValue(map8));
- assertEquals(1, map8.size());
+ // Create 1000 strings
+ List inputs = new ArrayList<>();
+ for (int i = 0; i < 1000; i++) {
+ if (i % 2 == 0) {
+ inputs.add("item" + i);
+ } else {
+ inputs.add("other" + i);
+ }
}
- @Test
- void testMatchAll_LinkedHashSet_PreservesOrder() {
- Pattern pattern = Pattern.compile("keep.*");
-
- // LinkedHashSet preserves insertion order
- Set linkedSet = new LinkedHashSet<>();
- linkedSet.add("keep1");
- linkedSet.add("drop1");
- linkedSet.add("keep2");
- linkedSet.add("drop2");
-
- boolean[] results = pattern.matchAll(linkedSet);
- assertEquals(4, results.length);
-
- // Verify order matches insertion order
- List asList = new ArrayList<>(linkedSet);
- boolean[] expected = {true, false, true, false};
- assertArrayEquals(expected, results);
- }
-
- @Test
- void testFilter_Queue_FIFO_Order() {
- Pattern pattern = Pattern.compile("item\\d");
-
- // Queue maintains FIFO order
- Queue queue = new LinkedList<>(List.of("item1", "other", "item2", "item3"));
- List filtered = pattern.filter(queue);
-
- // Should preserve order
- assertEquals(List.of("item1", "item2", "item3"), filtered);
- }
-
- // ========== Additional Scenarios ==========
-
- @Test
- void testMatchAll_WithNullElements() {
- Pattern pattern = Pattern.compile("test.*");
+ boolean[] results = pattern.matchAll(inputs);
- // Null elements in array should not crash
- String[] arrayWithNulls = {"test1", null, "test2", null, "other"};
- boolean[] results = pattern.matchAll(arrayWithNulls);
+ assertEquals(1000, results.length);
+
+ int matchCount = 0;
+ for (boolean match : results) {
+ if (match) matchCount++;
+ }
+
+ assertEquals(500, matchCount); // Every other item
+ }
+
+ @Test
+ void testFilter_PreservesOrder() {
+ Pattern pattern = Pattern.compile("keep\\d");
+
+ List inputs = List.of("keep1", "drop1", "keep2", "drop2", "keep3");
+ List filtered = pattern.filter(inputs);
+
+ // Order should be preserved
+ assertEquals(List.of("keep1", "keep2", "keep3"), filtered);
+ }
+
+ @Test
+ void testMapFiltering_EmptyMap() {
+ Pattern pattern = Pattern.compile("test");
+
+ Map empty = Map.of();
+
+ assertEquals(0, pattern.filterByKey(empty).size());
+ assertEquals(0, pattern.filterNotByKey(empty).size());
+ assertEquals(0, pattern.retainMatchesByKey(new HashMap<>(empty)));
+ assertEquals(0, pattern.removeMatchesByKey(new HashMap<>(empty)));
+ }
+
+ @Test
+ void testCollectionFiltering_DifferentTypes() {
+ Pattern pattern = Pattern.compile("[a-z]+");
+
+ // ArrayList
+ List list = new ArrayList<>(List.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.filter(list).size());
+
+ // HashSet
+ Set set = new HashSet<>(Set.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.filter(set).size());
+
+ // LinkedList (Queue)
+ Queue queue = new LinkedList<>(List.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.filter(queue).size());
+ }
+
+ @Test
+ void testFilterNot_AllCollectionTypes() {
+ Pattern pattern = Pattern.compile("\\d+");
+
+ // ArrayList
+ List arrayList = new ArrayList<>(List.of("123", "abc", "456"));
+ assertEquals(List.of("abc"), pattern.filterNot(arrayList));
+
+ // HashSet
+ Set hashSet = new HashSet<>(Set.of("123", "abc", "456"));
+ List filtered = pattern.filterNot(hashSet);
+ assertEquals(1, filtered.size());
+ assertTrue(filtered.contains("abc"));
+
+ // LinkedList (as Queue)
+ Queue linkedList = new LinkedList<>(List.of("123", "abc", "456"));
+ assertEquals(1, pattern.filterNot(linkedList).size());
+ }
+
+ @Test
+ void testRetainMatches_AllCollectionTypes() {
+ Pattern pattern = Pattern.compile("[a-z]+");
+
+ // ArrayList
+ List arrayList = new ArrayList<>(List.of("abc", "123", "xyz"));
+ assertEquals(1, pattern.retainMatches(arrayList));
+ assertEquals(2, arrayList.size());
+
+ // HashSet
+ Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz"));
+ assertEquals(1, pattern.retainMatches(hashSet));
+ assertEquals(2, hashSet.size());
+
+ // LinkedList (as Queue)
+ Queue queue = new LinkedList<>(List.of("abc", "123", "xyz"));
+ assertEquals(1, pattern.retainMatches(queue));
+ assertEquals(2, queue.size());
+
+ // TreeSet (sorted)
+ Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz"));
+ assertEquals(1, pattern.retainMatches(treeSet));
+ assertEquals(Set.of("abc", "xyz"), treeSet);
+ }
+
+ @Test
+ void testRemoveMatches_AllCollectionTypes() {
+ Pattern pattern = Pattern.compile("[a-z]+");
+
+ // ArrayList
+ List arrayList = new ArrayList<>(List.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.removeMatches(arrayList));
+ assertEquals(List.of("123"), arrayList);
+
+ // HashSet
+ Set hashSet = new HashSet<>(Set.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.removeMatches(hashSet));
+ assertEquals(Set.of("123"), hashSet);
+
+ // LinkedList (as Queue)
+ Queue queue = new LinkedList<>(List.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.removeMatches(queue));
+ assertEquals(1, queue.size());
+ assertTrue(queue.contains("123"));
+
+ // TreeSet
+ Set treeSet = new TreeSet<>(Set.of("abc", "123", "xyz"));
+ assertEquals(2, pattern.removeMatches(treeSet));
+ assertEquals(Set.of("123"), treeSet);
+ }
+
+ @Test
+ void testMapOperations_AllVariants() {
+ Pattern pattern = Pattern.compile("key\\d+");
+
+ // Test all 8 map operations
+ Map map1 =
+ new HashMap<>(Map.of("key1", "val1", "other", "val2", "key2", "val3"));
+ assertEquals(2, pattern.filterByKey(map1).size());
+
+ Map map2 = new HashMap<>(Map.of("key1", "val1", "other", "val2"));
+ assertEquals(1, pattern.filterNotByKey(map2).size());
+
+ Map map3 = new HashMap<>(Map.of("k1", "key1", "k2", "other", "k3", "key2"));
+ assertEquals(2, pattern.filterByValue(map3).size());
+
+ Map map4 = new HashMap<>(Map.of("k1", "key1", "k2", "other"));
+ assertEquals(1, pattern.filterNotByValue(map4).size());
+
+ Map map5 = new HashMap<>(Map.of("key1", 1, "other", 2, "key2", 3));
+ assertEquals(1, pattern.retainMatchesByKey(map5));
+ assertEquals(2, map5.size());
+
+ Map map6 = new HashMap<>(Map.of("key1", 1, "other", 2));
+ assertEquals(1, pattern.removeMatchesByKey(map6));
+ assertEquals(1, map6.size());
+
+ Map map7 = new HashMap<>(Map.of(1, "key1", 2, "other", 3, "key2"));
+ assertEquals(1, pattern.retainMatchesByValue(map7));
+ assertEquals(2, map7.size());
+
+ Map map8 = new HashMap<>(Map.of(1, "key1", 2, "other"));
+ assertEquals(1, pattern.removeMatchesByValue(map8));
+ assertEquals(1, map8.size());
+ }
+
+ @Test
+ void testMatchAll_LinkedHashSet_PreservesOrder() {
+ Pattern pattern = Pattern.compile("keep.*");
+
+ // LinkedHashSet preserves insertion order
+ Set linkedSet = new LinkedHashSet<>();
+ linkedSet.add("keep1");
+ linkedSet.add("drop1");
+ linkedSet.add("keep2");
+ linkedSet.add("drop2");
+
+ boolean[] results = pattern.matchAll(linkedSet);
+ assertEquals(4, results.length);
+
+ // Verify order matches insertion order
+ List asList = new ArrayList<>(linkedSet);
+ boolean[] expected = {true, false, true, false};
+ assertArrayEquals(expected, results);
+ }
+
+ @Test
+ void testFilter_Queue_FIFO_Order() {
+ Pattern pattern = Pattern.compile("item\\d");
+
+ // Queue maintains FIFO order
+ Queue queue = new LinkedList<>(List.of("item1", "other", "item2", "item3"));
+ List filtered = pattern.filter(queue);
+
+ // Should preserve order
+ assertEquals(List.of("item1", "item2", "item3"), filtered);
+ }
+
+ // ========== Additional Scenarios ==========
+
+ @Test
+ void testMatchAll_WithNullElements() {
+ Pattern pattern = Pattern.compile("test.*");
+
+ // Null elements in array should not crash
+ String[] arrayWithNulls = {"test1", null, "test2", null, "other"};
+ boolean[] results = pattern.matchAll(arrayWithNulls);
+
+ assertEquals(5, results.length);
+ // Nulls should be treated as non-matches (handled by JNI)
+ assertTrue(results[0]); // "test1" matches
+ assertFalse(results[1]); // null doesn't match
+ assertTrue(results[2]); // "test2" matches
+ assertFalse(results[3]); // null doesn't match
+ assertFalse(results[4]); // "other" doesn't match
+ }
+
+ @Test
+ void testFilter_WithDuplicates() {
+ Pattern pattern = Pattern.compile("keep");
+
+ // List with duplicates
+ List withDuplicates = List.of("keep", "drop", "keep", "keep", "drop");
+ List filtered = pattern.filter(withDuplicates);
+
+ // All "keep" entries preserved, including duplicates
+ assertEquals(3, filtered.size());
+ assertEquals(List.of("keep", "keep", "keep"), filtered);
+ }
+
+ @Test
+ void testRetainMatches_WithDuplicates() {
+ Pattern pattern = Pattern.compile("\\d+");
+
+ // ArrayList with duplicates: 3x"123", 1x"456" = 4 numeric, 2 non-numeric
+ List list = new ArrayList<>(List.of("123", "abc", "123", "xyz", "456", "123"));
+ int removed = pattern.retainMatches(list);
+
+ assertEquals(2, removed); // "abc" and "xyz" removed
+ assertEquals(4, list.size()); // 3x"123" + 1x"456" remain
+ // All numeric strings retained (including duplicates)
+ assertEquals(3, list.stream().filter(s -> s.equals("123")).count()); // 3x "123"
+ assertEquals(1, list.stream().filter(s -> s.equals("456")).count()); // 1x "456"
+ assertTrue(list.stream().allMatch(s -> s.matches("\\d+"))); // All are numeric
+ }
+
+ @Test
+ void testMapFiltering_TreeMap() {
+ Pattern pattern = Pattern.compile("key[1-2]");
+
+ // TreeMap maintains sorted order
+ Map treeMap =
+ new TreeMap<>(
+ Map.of(
+ "key3", 3,
+ "key1", 1,
+ "other", 5,
+ "key2", 2));
+
+ Map filtered = pattern.filterByKey(treeMap);
+
+ assertEquals(2, filtered.size());
+ assertEquals(1, filtered.get("key1"));
+ assertEquals(2, filtered.get("key2"));
+ assertNull(filtered.get("key3"));
+ assertNull(filtered.get("other"));
+ }
+
+ @Test
+ void testMapFiltering_LinkedHashMap() {
+ Pattern pattern = Pattern.compile("user\\d+");
+
+ // LinkedHashMap preserves insertion order
+ Map linkedMap = new LinkedHashMap<>();
+ linkedMap.put("user2", "second");
+ linkedMap.put("admin", "first");
+ linkedMap.put("user1", "third");
+
+ Map filtered = pattern.filterByKey(linkedMap);
+
+ assertEquals(2, filtered.size());
+ // LinkedHashMap order should be preserved
+ List keys = new ArrayList<>(filtered.keySet());
+ assertTrue(keys.contains("user2"));
+ assertTrue(keys.contains("user1"));
+ }
+
+ @Test
+ void testMapFiltering_ConcurrentHashMap() {
+ Pattern pattern = Pattern.compile("data_.*");
+
+ // ConcurrentHashMap (thread-safe map)
+ Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>();
+ concurrentMap.put("data_1", 100);
+ concurrentMap.put("meta_1", 200);
+ concurrentMap.put("data_2", 300);
+
+ Map filtered = pattern.filterByKey(concurrentMap);
+
+ assertEquals(2, filtered.size());
+ assertEquals(100, filtered.get("data_1"));
+ assertEquals(300, filtered.get("data_2"));
+ }
+
+ @Test
+ void testRetainMatchesByKey_TreeMap() {
+ Pattern pattern = Pattern.compile("keep.*");
+
+ // TreeMap (sorted)
+ Map treeMap =
+ new TreeMap<>(
+ Map.of(
+ "keep1", 1,
+ "drop1", 2,
+ "keep2", 3));
+
+ int removed = pattern.retainMatchesByKey(treeMap);
+
+ assertEquals(1, removed);
+ assertEquals(2, treeMap.size());
+ assertTrue(treeMap.containsKey("keep1"));
+ assertTrue(treeMap.containsKey("keep2"));
+ }
+
+ @Test
+ void testRetainMatchesByValue_LinkedHashMap() {
+ Pattern pattern = Pattern.compile("valid");
+
+ Map linkedMap = new LinkedHashMap<>();
+ linkedMap.put(1, "valid");
+ linkedMap.put(2, "invalid");
+ linkedMap.put(3, "valid");
+
+ int removed = pattern.retainMatchesByValue(linkedMap);
+
+ assertEquals(1, removed);
+ assertEquals(2, linkedMap.size());
+ assertEquals("valid", linkedMap.get(1));
+ assertEquals("valid", linkedMap.get(3));
+ }
+
+ @Test
+ void testBulk_VeryLargeCollection_10k() {
+ assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)");
+
+ Pattern pattern = Pattern.compile("item\\d{4}"); // item + 4 digits
+
+ // Create 10,000 strings
+ List inputs = new ArrayList<>(10_000);
+ for (int i = 0; i < 10_000; i++) {
+ inputs.add("item" + i);
+ }
+
+ // Test matchAll
+ boolean[] results = pattern.matchAll(inputs);
+ assertEquals(10_000, results.length);
+
+ // Count matches (items 0-9999, all 4 digits match from item1000 onward)
+ int matchCount = 0;
+ for (boolean match : results) {
+ if (match) matchCount++;
+ }
+ assertEquals(9000, matchCount); // item1000 through item9999
+
+ // Test filter
+ List filtered = pattern.filter(inputs);
+ assertEquals(9000, filtered.size());
+
+ // Test in-place
+ List mutable = new ArrayList<>(inputs);
+ int removed = pattern.retainMatches(mutable);
+ assertEquals(1000, removed);
+ assertEquals(9000, mutable.size());
+ }
+
+ @Test
+ void testBulk_VeryLargeMap_10k() {
+ assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)");
+
+ Pattern pattern = Pattern.compile("user_\\d+");
+
+ // Create 10,000 entry map
+ Map largeMap = new HashMap<>();
+ for (int i = 0; i < 10_000; i++) {
+ if (i % 2 == 0) {
+ largeMap.put("user_" + i, i);
+ } else {
+ largeMap.put("admin_" + i, i);
+ }
+ }
+
+ // Test filterByKey
+ Map filtered = pattern.filterByKey(largeMap);
+ assertEquals(5000, filtered.size());
+
+ // Test in-place
+ Map mutable = new HashMap<>(largeMap);
+ int removed = pattern.retainMatchesByKey(mutable);
+ assertEquals(5000, removed);
+ assertEquals(5000, mutable.size());
+ }
+
+ @Test
+ void testMatchAll_EmptyStrings() {
+ Pattern pattern = Pattern.compile(".*"); // Matches everything including empty
+
+ List inputs = List.of("", "test", "", "other", "");
+ boolean[] results = pattern.matchAll(inputs);
+
+ // ".*" should match empty strings
+ assertArrayEquals(new boolean[] {true, true, true, true, true}, results);
+ }
+
+ @Test
+ void testFilter_PreservesDuplicateOrder() {
+ Pattern pattern = Pattern.compile("keep");
+
+ // Specific order with duplicates
+ List ordered = List.of("keep", "drop", "keep", "other", "keep");
+ List filtered = pattern.filter(ordered);
+
+ // Order and duplicates preserved
+ assertEquals(List.of("keep", "keep", "keep"), filtered);
+ }
+
+ @Test
+ void testRemoveMatchesByKey_ConcurrentHashMap() {
+ Pattern pattern = Pattern.compile("tmp_.*");
+
+ Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>();
+ concurrentMap.put("tmp_cache", "data1");
+ concurrentMap.put("perm_data", "data2");
+ concurrentMap.put("tmp_session", "data3");
- assertEquals(5, results.length);
- // Nulls should be treated as non-matches (handled by JNI)
- assertTrue(results[0]); // "test1" matches
- assertFalse(results[1]); // null doesn't match
- assertTrue(results[2]); // "test2" matches
- assertFalse(results[3]); // null doesn't match
- assertFalse(results[4]); // "other" doesn't match
- }
-
- @Test
- void testFilter_WithDuplicates() {
- Pattern pattern = Pattern.compile("keep");
-
- // List with duplicates
- List withDuplicates = List.of("keep", "drop", "keep", "keep", "drop");
- List filtered = pattern.filter(withDuplicates);
-
- // All "keep" entries preserved, including duplicates
- assertEquals(3, filtered.size());
- assertEquals(List.of("keep", "keep", "keep"), filtered);
- }
-
- @Test
- void testRetainMatches_WithDuplicates() {
- Pattern pattern = Pattern.compile("\\d+");
-
- // ArrayList with duplicates: 3x"123", 1x"456" = 4 numeric, 2 non-numeric
- List list = new ArrayList<>(List.of("123", "abc", "123", "xyz", "456", "123"));
- int removed = pattern.retainMatches(list);
-
- assertEquals(2, removed); // "abc" and "xyz" removed
- assertEquals(4, list.size()); // 3x"123" + 1x"456" remain
- // All numeric strings retained (including duplicates)
- assertEquals(3, list.stream().filter(s -> s.equals("123")).count()); // 3x "123"
- assertEquals(1, list.stream().filter(s -> s.equals("456")).count()); // 1x "456"
- assertTrue(list.stream().allMatch(s -> s.matches("\\d+"))); // All are numeric
- }
-
- @Test
- void testMapFiltering_TreeMap() {
- Pattern pattern = Pattern.compile("key[1-2]");
-
- // TreeMap maintains sorted order
- Map treeMap = new TreeMap<>(Map.of(
- "key3", 3,
- "key1", 1,
- "other", 5,
- "key2", 2
- ));
-
- Map filtered = pattern.filterByKey(treeMap);
-
- assertEquals(2, filtered.size());
- assertEquals(1, filtered.get("key1"));
- assertEquals(2, filtered.get("key2"));
- assertNull(filtered.get("key3"));
- assertNull(filtered.get("other"));
- }
-
- @Test
- void testMapFiltering_LinkedHashMap() {
- Pattern pattern = Pattern.compile("user\\d+");
-
- // LinkedHashMap preserves insertion order
- Map linkedMap = new LinkedHashMap<>();
- linkedMap.put("user2", "second");
- linkedMap.put("admin", "first");
- linkedMap.put("user1", "third");
-
- Map filtered = pattern.filterByKey(linkedMap);
-
- assertEquals(2, filtered.size());
- // LinkedHashMap order should be preserved
- List keys = new ArrayList<>(filtered.keySet());
- assertTrue(keys.contains("user2"));
- assertTrue(keys.contains("user1"));
- }
-
- @Test
- void testMapFiltering_ConcurrentHashMap() {
- Pattern pattern = Pattern.compile("data_.*");
-
- // ConcurrentHashMap (thread-safe map)
- Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>();
- concurrentMap.put("data_1", 100);
- concurrentMap.put("meta_1", 200);
- concurrentMap.put("data_2", 300);
-
- Map filtered = pattern.filterByKey(concurrentMap);
-
- assertEquals(2, filtered.size());
- assertEquals(100, filtered.get("data_1"));
- assertEquals(300, filtered.get("data_2"));
- }
-
- @Test
- void testRetainMatchesByKey_TreeMap() {
- Pattern pattern = Pattern.compile("keep.*");
-
- // TreeMap (sorted)
- Map treeMap = new TreeMap<>(Map.of(
- "keep1", 1,
- "drop1", 2,
- "keep2", 3
- ));
-
- int removed = pattern.retainMatchesByKey(treeMap);
-
- assertEquals(1, removed);
- assertEquals(2, treeMap.size());
- assertTrue(treeMap.containsKey("keep1"));
- assertTrue(treeMap.containsKey("keep2"));
- }
-
- @Test
- void testRetainMatchesByValue_LinkedHashMap() {
- Pattern pattern = Pattern.compile("valid");
-
- Map linkedMap = new LinkedHashMap<>();
- linkedMap.put(1, "valid");
- linkedMap.put(2, "invalid");
- linkedMap.put(3, "valid");
-
- int removed = pattern.retainMatchesByValue(linkedMap);
-
- assertEquals(1, removed);
- assertEquals(2, linkedMap.size());
- assertEquals("valid", linkedMap.get(1));
- assertEquals("valid", linkedMap.get(3));
- }
-
- @Test
- void testBulk_VeryLargeCollection_10k() {
- assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)");
-
- Pattern pattern = Pattern.compile("item\\d{4}"); // item + 4 digits
-
- // Create 10,000 strings
- List inputs = new ArrayList<>(10_000);
- for (int i = 0; i < 10_000; i++) {
- inputs.add("item" + i);
- }
-
- // Test matchAll
- boolean[] results = pattern.matchAll(inputs);
- assertEquals(10_000, results.length);
-
- // Count matches (items 0-9999, all 4 digits match from item1000 onward)
- int matchCount = 0;
- for (boolean match : results) {
- if (match) matchCount++;
- }
- assertEquals(9000, matchCount); // item1000 through item9999
-
- // Test filter
- List filtered = pattern.filter(inputs);
- assertEquals(9000, filtered.size());
-
- // Test in-place
- List mutable = new ArrayList<>(inputs);
- int removed = pattern.retainMatches(mutable);
- assertEquals(1000, removed);
- assertEquals(9000, mutable.size());
- }
-
- @Test
- void testBulk_VeryLargeMap_10k() {
- assumeTrue(!isQemuEmulation(), "Skipping large dataset test under QEMU emulation (too slow)");
-
- Pattern pattern = Pattern.compile("user_\\d+");
-
- // Create 10,000 entry map
- Map largeMap = new HashMap<>();
- for (int i = 0; i < 10_000; i++) {
- if (i % 2 == 0) {
- largeMap.put("user_" + i, i);
- } else {
- largeMap.put("admin_" + i, i);
- }
- }
-
- // Test filterByKey
- Map filtered = pattern.filterByKey(largeMap);
- assertEquals(5000, filtered.size());
-
- // Test in-place
- Map mutable = new HashMap<>(largeMap);
- int removed = pattern.retainMatchesByKey(mutable);
- assertEquals(5000, removed);
- assertEquals(5000, mutable.size());
- }
-
- @Test
- void testMatchAll_EmptyStrings() {
- Pattern pattern = Pattern.compile(".*"); // Matches everything including empty
-
- List inputs = List.of("", "test", "", "other", "");
- boolean[] results = pattern.matchAll(inputs);
-
- // ".*" should match empty strings
- assertArrayEquals(new boolean[]{true, true, true, true, true}, results);
- }
-
- @Test
- void testFilter_PreservesDuplicateOrder() {
- Pattern pattern = Pattern.compile("keep");
-
- // Specific order with duplicates
- List ordered = List.of("keep", "drop", "keep", "other", "keep");
- List filtered = pattern.filter(ordered);
-
- // Order and duplicates preserved
- assertEquals(List.of("keep", "keep", "keep"), filtered);
- }
-
- @Test
- void testRemoveMatchesByKey_ConcurrentHashMap() {
- Pattern pattern = Pattern.compile("tmp_.*");
-
- Map concurrentMap = new java.util.concurrent.ConcurrentHashMap<>();
- concurrentMap.put("tmp_cache", "data1");
- concurrentMap.put("perm_data", "data2");
- concurrentMap.put("tmp_session", "data3");
-
- int removed = pattern.removeMatchesByKey(concurrentMap);
-
- assertEquals(2, removed);
- assertEquals(1, concurrentMap.size());
- assertTrue(concurrentMap.containsKey("perm_data"));
- }
+ int removed = pattern.removeMatchesByKey(concurrentMap);
+
+ assertEquals(2, removed);
+ assertEquals(1, concurrentMap.size());
+ assertTrue(concurrentMap.containsKey("perm_data"));
+ }
}
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java
index 41eb6e0..5fa5f05 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/BulkMatchingTypeSafetyIT.java
@@ -15,327 +15,310 @@
*/
package com.axonops.libre2.api;
+import static org.junit.jupiter.api.Assertions.*;
+
import com.axonops.libre2.cache.PatternCache;
import com.axonops.libre2.test.TestUtils;
+import java.util.*;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
-import java.util.*;
-
-import static org.junit.jupiter.api.Assertions.*;
-
/**
- * Tests for type safety and encoding handling in bulk matching operations.
- * Demonstrates how the API handles non-String types, Unicode, emoji, and special characters.
+ * Tests for type safety and encoding handling in bulk matching operations. Demonstrates how the API
+ * handles non-String types, Unicode, emoji, and special characters.
*/
class BulkMatchingTypeSafetyIT {
- private static PatternCache originalCache;
-
- @BeforeAll
- static void setUpClass() {
- originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build());
- }
-
- @AfterAll
- static void tearDownClass() {
- TestUtils.restoreGlobalCache(originalCache);
- }
-
- // ========== Type Safety Tests ==========
-
- /**
- * Demonstrates compile-time type safety.
- * Collection cannot be passed to matchAll(Collection).
- * This test verifies the API contract.
- */
- @Test
- void testTypeSafety_CompileTime() {
- Pattern pattern = Pattern.compile("\\d+");
-
- // This compiles - correct type
- List strings = List.of("123", "456");
- boolean[] results = pattern.matchAll(strings);
- assertNotNull(results);
-
- // This would NOT compile (commented out to allow test compilation):
- // List ints = List.of(123, 456);
- // pattern.matchAll(ints); // Compile error: Required Collection, found Collection
-
- // Java's type system prevents this at compile time
- }
-
- /**
- * Tests runtime behavior with raw types (unchecked warnings).
- * We throw explicit IllegalArgumentException with helpful message.
- */
- @Test
- @SuppressWarnings({"rawtypes", "unchecked"})
- void testTypeSafety_RuntimeWithRawTypes() {
- Pattern pattern = Pattern.compile("test");
-
- // Using raw List (no generic type) - compiles with warning
- List raw = new ArrayList();
- raw.add(123); // Integer
- raw.add("test"); // String
- raw.add(456); // Integer
-
- // Should throw IllegalArgumentException with helpful message
- IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> {
- pattern.matchAll(raw);
- });
-
- // Verify error message is helpful
- assertTrue(e.getMessage().contains("non-String elements"));
- assertTrue(e.getMessage().contains("stream().map(Object::toString)"));
- assertNotNull(e.getCause()); // Cause is ArrayStoreException
- assertTrue(e.getCause() instanceof ArrayStoreException);
- }
-
- /**
- * Tests that all Collection-based methods throw helpful errors for non-String elements.
- */
- @Test
- @SuppressWarnings({"rawtypes", "unchecked"})
- void testTypeSafety_AllMethodsValidate() {
- Pattern pattern = Pattern.compile("test");
-
- List raw = new ArrayList();
- raw.add(123);
-
- // All Collection methods should throw IllegalArgumentException
- assertThrows(IllegalArgumentException.class, () -> pattern.matchAll(raw));
- assertThrows(IllegalArgumentException.class, () -> pattern.filter(raw));
- assertThrows(IllegalArgumentException.class, () -> pattern.filterNot(raw));
-
- List mutableRaw = new ArrayList();
- mutableRaw.add(123);
- assertThrows(IllegalArgumentException.class, () -> pattern.retainMatches(mutableRaw));
-
- List mutableRaw2 = new ArrayList();
- mutableRaw2.add(123);
- assertThrows(IllegalArgumentException.class, () -> pattern.removeMatches(mutableRaw2));
- }
-
- // ========== UTF-8 / Unicode Handling Tests ==========
-
- /**
- * Tests bulk matching with Unicode characters (Chinese, Arabic, Emoji).
- * Java Strings are UTF-16, JNI converts to UTF-8 automatically.
- */
- @Test
- void testBulkMatching_UnicodeCharacters() {
- // Chinese characters
- Pattern chinesePattern = Pattern.compile("中文.*");
- List chineseInputs = List.of("中文测试", "English", "中文字符", "123");
- boolean[] results = chinesePattern.matchAll(chineseInputs);
-
- assertArrayEquals(new boolean[]{true, false, true, false}, results);
-
- // Arabic characters
- Pattern arabicPattern = Pattern.compile("مرحبا.*");
- List arabicInputs = List.of("مرحبا بك", "hello", "مرحبا العالم");
- results = arabicPattern.matchAll(arabicInputs);
-
- assertArrayEquals(new boolean[]{true, false, true}, results);
-
- // Emoji
- Pattern emojiPattern = Pattern.compile(".*😀.*");
- List emojiInputs = List.of("hello😀world", "no emoji", "test😀😀test");
- results = emojiPattern.matchAll(emojiInputs);
-
- assertArrayEquals(new boolean[]{true, false, true}, results);
- }
-
- /**
- * Tests bulk matching with mixed Unicode scripts in same string.
- */
- @Test
- void testBulkMatching_MixedScripts() {
- Pattern pattern = Pattern.compile("User:\\s*\\S+");
-
- List inputs = List.of(
- "User: Alice", // ASCII
- "User: 田中", // Japanese
- "User: José", // Accented
- "User: مصطفى", // Arabic
- "User: 李明" // Chinese
- );
-
- boolean[] results = pattern.matchAll(inputs);
-
- // All should match (\\S+ matches any non-whitespace including Unicode)
- assertArrayEquals(new boolean[]{true, true, true, true, true}, results);
- }
-
- /**
- * Tests bulk filtering with emoji and special Unicode characters.
- */
- @Test
- void testFilter_EmojiAndSpecialCharacters() {
- // Match strings containing specific emoji (literal match, not ranges)
- Pattern pattern = Pattern.compile(".*(😀|😢|😁|😂|😃).*");
-
- List inputs = List.of(
- "Happy 😀",
- "Sad 😢",
- "No emoji",
- "Multiple 😁😂😃"
- );
-
- List filtered = pattern.filter(inputs);
-
- assertEquals(3, filtered.size()); // All except "No emoji"
- assertTrue(filtered.contains("Happy 😀"));
- assertTrue(filtered.contains("Sad 😢"));
- assertTrue(filtered.contains("Multiple 😁😂😃"));
- }
-
- /**
- * Tests handling of zero-width characters and combining characters.
- */
- @Test
- void testMatchAll_ZeroWidthAndCombiningCharacters() {
- Pattern pattern = Pattern.compile("test.*");
-
- List inputs = List.of(
+ private static PatternCache originalCache;
+
+ @BeforeAll
+ static void setUpClass() {
+ originalCache = TestUtils.replaceGlobalCache(TestUtils.testConfigBuilder().build());
+ }
+
+ @AfterAll
+ static void tearDownClass() {
+ TestUtils.restoreGlobalCache(originalCache);
+ }
+
+ // ========== Type Safety Tests ==========
+
+ /**
+ * Demonstrates compile-time type safety. Collection cannot be passed to
+ * matchAll(Collection). This test verifies the API contract.
+ */
+ @Test
+ void testTypeSafety_CompileTime() {
+ Pattern pattern = Pattern.compile("\\d+");
+
+ // This compiles - correct type
+ List strings = List.of("123", "456");
+ boolean[] results = pattern.matchAll(strings);
+ assertNotNull(results);
+
+ // This would NOT compile (commented out to allow test compilation):
+ // List ints = List.of(123, 456);
+ // pattern.matchAll(ints); // Compile error: Required Collection, found
+ // Collection
+
+ // Java's type system prevents this at compile time
+ }
+
+ /**
+ * Tests runtime behavior with raw types (unchecked warnings). We throw explicit
+ * IllegalArgumentException with helpful message.
+ */
+ @Test
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ void testTypeSafety_RuntimeWithRawTypes() {
+ Pattern pattern = Pattern.compile("test");
+
+ // Using raw List (no generic type) - compiles with warning
+ List raw = new ArrayList();
+ raw.add(123); // Integer
+ raw.add("test"); // String
+ raw.add(456); // Integer
+
+ // Should throw IllegalArgumentException with helpful message
+ IllegalArgumentException e =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ pattern.matchAll(raw);
+ });
+
+ // Verify error message is helpful
+ assertTrue(e.getMessage().contains("non-String elements"));
+ assertTrue(e.getMessage().contains("stream().map(Object::toString)"));
+ assertNotNull(e.getCause()); // Cause is ArrayStoreException
+ assertTrue(e.getCause() instanceof ArrayStoreException);
+ }
+
+ /** Tests that all Collection-based methods throw helpful errors for non-String elements. */
+ @Test
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ void testTypeSafety_AllMethodsValidate() {
+ Pattern pattern = Pattern.compile("test");
+
+ List raw = new ArrayList();
+ raw.add(123);
+
+ // All Collection methods should throw IllegalArgumentException
+ assertThrows(IllegalArgumentException.class, () -> pattern.matchAll(raw));
+ assertThrows(IllegalArgumentException.class, () -> pattern.filter(raw));
+ assertThrows(IllegalArgumentException.class, () -> pattern.filterNot(raw));
+
+ List mutableRaw = new ArrayList();
+ mutableRaw.add(123);
+ assertThrows(IllegalArgumentException.class, () -> pattern.retainMatches(mutableRaw));
+
+ List mutableRaw2 = new ArrayList();
+ mutableRaw2.add(123);
+ assertThrows(IllegalArgumentException.class, () -> pattern.removeMatches(mutableRaw2));
+ }
+
+ // ========== UTF-8 / Unicode Handling Tests ==========
+
+ /**
+ * Tests bulk matching with Unicode characters (Chinese, Arabic, Emoji). Java Strings are UTF-16,
+ * JNI converts to UTF-8 automatically.
+ */
+ @Test
+ void testBulkMatching_UnicodeCharacters() {
+ // Chinese characters
+ Pattern chinesePattern = Pattern.compile("中文.*");
+ List chineseInputs = List.of("中文测试", "English", "中文字符", "123");
+ boolean[] results = chinesePattern.matchAll(chineseInputs);
+
+ assertArrayEquals(new boolean[] {true, false, true, false}, results);
+
+ // Arabic characters
+ Pattern arabicPattern = Pattern.compile("مرحبا.*");
+ List arabicInputs = List.of("مرحبا بك", "hello", "مرحبا العالم");
+ results = arabicPattern.matchAll(arabicInputs);
+
+ assertArrayEquals(new boolean[] {true, false, true}, results);
+
+ // Emoji
+ Pattern emojiPattern = Pattern.compile(".*😀.*");
+ List emojiInputs = List.of("hello😀world", "no emoji", "test😀😀test");
+ results = emojiPattern.matchAll(emojiInputs);
+
+ assertArrayEquals(new boolean[] {true, false, true}, results);
+ }
+
+ /** Tests bulk matching with mixed Unicode scripts in same string. */
+ @Test
+ void testBulkMatching_MixedScripts() {
+ Pattern pattern = Pattern.compile("User:\\s*\\S+");
+
+ List inputs =
+ List.of(
+ "User: Alice", // ASCII
+ "User: 田中", // Japanese
+ "User: José", // Accented
+ "User: مصطفى", // Arabic
+ "User: 李明" // Chinese
+ );
+
+ boolean[] results = pattern.matchAll(inputs);
+
+ // All should match (\\S+ matches any non-whitespace including Unicode)
+ assertArrayEquals(new boolean[] {true, true, true, true, true}, results);
+ }
+
+ /** Tests bulk filtering with emoji and special Unicode characters. */
+ @Test
+ void testFilter_EmojiAndSpecialCharacters() {
+ // Match strings containing specific emoji (literal match, not ranges)
+ Pattern pattern = Pattern.compile(".*(😀|😢|😁|😂|😃).*");
+
+ List inputs = List.of("Happy 😀", "Sad 😢", "No emoji", "Multiple 😁😂😃");
+
+ List filtered = pattern.filter(inputs);
+
+ assertEquals(3, filtered.size()); // All except "No emoji"
+ assertTrue(filtered.contains("Happy 😀"));
+ assertTrue(filtered.contains("Sad 😢"));
+ assertTrue(filtered.contains("Multiple 😁😂😃"));
+ }
+
+ /** Tests handling of zero-width characters and combining characters. */
+ @Test
+ void testMatchAll_ZeroWidthAndCombiningCharacters() {
+ Pattern pattern = Pattern.compile("test.*");
+
+ List inputs =
+ List.of(
"test",
- "test\u200B", // Zero-width space
- "test\u0301", // Combining acute accent
- "tést", // Precomposed é
- "te\u0301st" // e + combining accent
- );
+ "test\u200B", // Zero-width space
+ "test\u0301", // Combining acute accent
+ "tést", // Precomposed é
+ "te\u0301st" // e + combining accent
+ );
- boolean[] results = pattern.matchAll(inputs);
+ boolean[] results = pattern.matchAll(inputs);
- // All should match (pattern is "test.*" which matches test followed by anything)
- assertArrayEquals(new boolean[]{true, true, true, false, false}, results);
- }
+ // All should match (pattern is "test.*" which matches test followed by anything)
+ assertArrayEquals(new boolean[] {true, true, true, false, false}, results);
+ }
- // ========== Special Character Tests ==========
+ // ========== Special Character Tests ==========
- /**
- * Tests bulk matching with control characters, newlines, tabs.
- */
- @Test
- void testMatchAll_ControlCharacters() {
- Pattern pattern = Pattern.compile("line\\d+");
+ /** Tests bulk matching with control characters, newlines, tabs. */
+ @Test
+ void testMatchAll_ControlCharacters() {
+ Pattern pattern = Pattern.compile("line\\d+");
- List inputs = List.of(
+ List inputs =
+ List.of(
"line1",
- "line2\n", // With newline
- "line3\t", // With tab
- "line4\r\n", // With CR+LF
- "other\n"
- );
-
- boolean[] results = pattern.matchAll(inputs);
-
- // Pattern matches "lineN" without trailing characters
- assertArrayEquals(new boolean[]{true, false, false, false, false}, results);
- }
-
- /**
- * Tests map filtering with Unicode keys and values.
- */
- @Test
- void testMapFiltering_UnicodeKeysAndValues() {
- Pattern pattern = Pattern.compile("用户.*"); // Chinese "user"
-
- Map users = new HashMap<>();
- users.put("用户001", "Alice");
- users.put("管理员", "Admin");
- users.put("用户002", "Bob");
-
- Map filtered = pattern.filterByKey(users);
-
- assertEquals(2, filtered.size());
- assertTrue(filtered.containsKey("用户001"));
- assertTrue(filtered.containsKey("用户002"));
- assertFalse(filtered.containsKey("管理员"));
- }
-
- // ========== toString() Behavior Tests ==========
-
- /**
- * Demonstrates that non-String objects would need explicit toString().
- * Since our signature requires Collection, this is handled at compile time.
- */
- @Test
- void testNonStringObjects_RequireExplicitConversion() {
- Pattern pattern = Pattern.compile("\\d+");
-
- // If you have Collection, you must convert explicitly
- List numbers = List.of(123, 456, 789);
-
- // Convert to strings explicitly
- List stringNumbers = numbers.stream()
- .map(Object::toString)
- .toList();
-
- boolean[] results = pattern.matchAll(stringNumbers);
- assertArrayEquals(new boolean[]{true, true, true}, results);
- }
-
- // ========== Null and Empty Tests ==========
-
- /**
- * Tests how matchAll handles collections with null elements.
- * Nulls should not crash - JNI handles them gracefully (returns false for match).
- */
- @Test
- void testMatchAll_NullElements_DoesNotCrash() {
- Pattern pattern = Pattern.compile("test");
-
- String[] arrayWithNulls = {"test", null, "other", null};
- boolean[] results = pattern.matchAll(arrayWithNulls);
-
- assertEquals(4, results.length);
- assertTrue(results[0]); // "test" matches
- assertFalse(results[1]); // null doesn't match
- assertFalse(results[2]); // "other" doesn't match
- assertFalse(results[3]); // null doesn't match
- }
-
- /**
- * Tests bulk matching with empty strings.
- */
- @Test
- void testMatchAll_EmptyStrings() {
- // Pattern that matches empty string (and only empty)
- Pattern onlyEmptyPattern = Pattern.compile("^$");
- List inputs = List.of("", "test", "", "other");
- boolean[] results = onlyEmptyPattern.matchAll(inputs);
-
- assertArrayEquals(new boolean[]{true, false, true, false}, results);
-
- // Pattern that matches anything (including empty)
- Pattern anyPattern = Pattern.compile(".*");
- results = anyPattern.matchAll(inputs);
-
- assertArrayEquals(new boolean[]{true, true, true, true}, results);
- }
-
- // ========== Binary Data / Invalid UTF-8 ==========
-
- /**
- * Tests behavior with strings containing invalid UTF-16 surrogate pairs.
- * JNI's GetStringUTFChars handles this by replacing invalid sequences.
- */
- @Test
- void testMatchAll_InvalidSurrogates() {
- Pattern pattern = Pattern.compile("test.*");
-
- // Create string with unpaired surrogate (invalid UTF-16)
- String invalidSurrogate = "test\uD800"; // High surrogate without low surrogate
-
- List inputs = List.of("test", invalidSurrogate, "test123");
- boolean[] results = pattern.matchAll(inputs);
-
- // JNI will replace invalid sequence, pattern may or may not match
- // The important thing is it doesn't crash
- assertEquals(3, results.length);
- assertNotNull(results); // Just verify no crash
- }
+ "line2\n", // With newline
+ "line3\t", // With tab
+ "line4\r\n", // With CR+LF
+ "other\n");
+
+ boolean[] results = pattern.matchAll(inputs);
+
+ // Pattern matches "lineN" without trailing characters
+ assertArrayEquals(new boolean[] {true, false, false, false, false}, results);
+ }
+
+ /** Tests map filtering with Unicode keys and values. */
+ @Test
+ void testMapFiltering_UnicodeKeysAndValues() {
+ Pattern pattern = Pattern.compile("用户.*"); // Chinese "user"
+
+ Map users = new HashMap<>();
+ users.put("用户001", "Alice");
+ users.put("管理员", "Admin");
+ users.put("用户002", "Bob");
+
+ Map filtered = pattern.filterByKey(users);
+
+ assertEquals(2, filtered.size());
+ assertTrue(filtered.containsKey("用户001"));
+ assertTrue(filtered.containsKey("用户002"));
+ assertFalse(filtered.containsKey("管理员"));
+ }
+
+ // ========== toString() Behavior Tests ==========
+
+ /**
+ * Demonstrates that non-String objects would need explicit toString(). Since our signature
+ * requires Collection, this is handled at compile time.
+ */
+ @Test
+ void testNonStringObjects_RequireExplicitConversion() {
+ Pattern pattern = Pattern.compile("\\d+");
+
+ // If you have Collection, you must convert explicitly
+ List numbers = List.of(123, 456, 789);
+
+ // Convert to strings explicitly
+ List stringNumbers = numbers.stream().map(Object::toString).toList();
+
+ boolean[] results = pattern.matchAll(stringNumbers);
+ assertArrayEquals(new boolean[] {true, true, true}, results);
+ }
+
+ // ========== Null and Empty Tests ==========
+
+ /**
+ * Tests how matchAll handles collections with null elements. Nulls should not crash - JNI handles
+ * them gracefully (returns false for match).
+ */
+ @Test
+ void testMatchAll_NullElements_DoesNotCrash() {
+ Pattern pattern = Pattern.compile("test");
+
+ String[] arrayWithNulls = {"test", null, "other", null};
+ boolean[] results = pattern.matchAll(arrayWithNulls);
+
+ assertEquals(4, results.length);
+ assertTrue(results[0]); // "test" matches
+ assertFalse(results[1]); // null doesn't match
+ assertFalse(results[2]); // "other" doesn't match
+ assertFalse(results[3]); // null doesn't match
+ }
+
+ /** Tests bulk matching with empty strings. */
+ @Test
+ void testMatchAll_EmptyStrings() {
+ // Pattern that matches empty string (and only empty)
+ Pattern onlyEmptyPattern = Pattern.compile("^$");
+ List inputs = List.of("", "test", "", "other");
+ boolean[] results = onlyEmptyPattern.matchAll(inputs);
+
+ assertArrayEquals(new boolean[] {true, false, true, false}, results);
+
+ // Pattern that matches anything (including empty)
+ Pattern anyPattern = Pattern.compile(".*");
+ results = anyPattern.matchAll(inputs);
+
+ assertArrayEquals(new boolean[] {true, true, true, true}, results);
+ }
+
+ // ========== Binary Data / Invalid UTF-8 ==========
+
+ /**
+ * Tests behavior with strings containing invalid UTF-16 surrogate pairs. JNI's GetStringUTFChars
+ * handles this by replacing invalid sequences.
+ */
+ @Test
+ void testMatchAll_InvalidSurrogates() {
+ Pattern pattern = Pattern.compile("test.*");
+
+ // Create string with unpaired surrogate (invalid UTF-16)
+ String invalidSurrogate = "test\uD800"; // High surrogate without low surrogate
+
+ List inputs = List.of("test", invalidSurrogate, "test123");
+ boolean[] results = pattern.matchAll(inputs);
+
+ // JNI will replace invalid sequence, pattern may or may not match
+ // The important thing is it doesn't crash
+ assertEquals(3, results.length);
+ assertNotNull(results); // Just verify no crash
+ }
}
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java
index 18560d3..fa67415 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ByteBufferApiIT.java
@@ -16,320 +16,316 @@
package com.axonops.libre2.api;
+import static org.assertj.core.api.Assertions.*;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
-
-import static org.assertj.core.api.Assertions.*;
-
/**
* Tests for ByteBuffer API with automatic routing to zero-copy or String API.
*
- * These tests verify that Pattern correctly detects DirectByteBuffer vs
- * heap ByteBuffer and routes to the appropriate implementation.
+ * These tests verify that Pattern correctly detects DirectByteBuffer vs heap ByteBuffer and
+ * routes to the appropriate implementation.
*/
@DisplayName("ByteBuffer API Tests")
class ByteBufferApiIT {
- /**
- * Creates a DirectByteBuffer (off-heap, supports zero-copy).
- */
- private ByteBuffer createDirectBuffer(String text) {
- byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
- ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length);
- buffer.put(bytes);
- buffer.flip();
- return buffer;
- }
+ /** Creates a DirectByteBuffer (off-heap, supports zero-copy). */
+ private ByteBuffer createDirectBuffer(String text) {
+ byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
+ ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ return buffer;
+ }
- /**
- * Creates a heap ByteBuffer (on-heap, falls back to String API).
- */
- private ByteBuffer createHeapBuffer(String text) {
- return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8));
- }
+ /** Creates a heap ByteBuffer (on-heap, falls back to String API). */
+ private ByteBuffer createHeapBuffer(String text) {
+ return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8));
+ }
- // ========== DirectByteBuffer Tests (Zero-Copy Path) ==========
+ // ========== DirectByteBuffer Tests (Zero-Copy Path) ==========
- @Test
- @DisplayName("Pattern.matches(DirectByteBuffer) should use zero-copy")
- void patternMatches_directBuffer_usesZeroCopy() {
- Pattern pattern = Pattern.compile("hello");
+ @Test
+ @DisplayName("Pattern.matches(DirectByteBuffer) should use zero-copy")
+ void patternMatches_directBuffer_usesZeroCopy() {
+ Pattern pattern = Pattern.compile("hello");
- ByteBuffer buffer = createDirectBuffer("hello");
- assertThat(buffer.isDirect()).isTrue(); // Verify it's direct
+ ByteBuffer buffer = createDirectBuffer("hello");
+ assertThat(buffer.isDirect()).isTrue(); // Verify it's direct
- boolean matches = pattern.matches(buffer);
+ boolean matches = pattern.matches(buffer);
- assertThat(matches).isTrue();
- }
+ assertThat(matches).isTrue();
+ }
- @Test
- @DisplayName("Pattern.find(DirectByteBuffer) should use zero-copy")
- void patternFind_directBuffer_usesZeroCopy() {
- Pattern pattern = Pattern.compile("world");
+ @Test
+ @DisplayName("Pattern.find(DirectByteBuffer) should use zero-copy")
+ void patternFind_directBuffer_usesZeroCopy() {
+ Pattern pattern = Pattern.compile("world");
- ByteBuffer buffer = createDirectBuffer("hello world");
- assertThat(buffer.isDirect()).isTrue();
+ ByteBuffer buffer = createDirectBuffer("hello world");
+ assertThat(buffer.isDirect()).isTrue();
- boolean found = pattern.find(buffer);
+ boolean found = pattern.find(buffer);
- assertThat(found).isTrue();
- }
-
- @Test
- @DisplayName("Pattern.extractGroups(DirectByteBuffer) should use zero-copy")
- void extractGroups_directBuffer_usesZeroCopy() {
- Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
+ assertThat(found).isTrue();
+ }
- ByteBuffer buffer = createDirectBuffer("2025-11-24");
- assertThat(buffer.isDirect()).isTrue();
+ @Test
+ @DisplayName("Pattern.extractGroups(DirectByteBuffer) should use zero-copy")
+ void extractGroups_directBuffer_usesZeroCopy() {
+ Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
+
+ ByteBuffer buffer = createDirectBuffer("2025-11-24");
+ assertThat(buffer.isDirect()).isTrue();
- String[] groups = pattern.extractGroups(buffer);
+ String[] groups = pattern.extractGroups(buffer);
+
+ assertThat(groups).containsExactly("2025-11-24", "2025", "11", "24");
+ }
+
+ @Test
+ @DisplayName("Pattern.findAllMatches(DirectByteBuffer) should use zero-copy")
+ void findAllMatches_directBuffer_usesZeroCopy() {
+ Pattern pattern = Pattern.compile("\\d+");
+
+ ByteBuffer buffer = createDirectBuffer("a1b22c333");
+ assertThat(buffer.isDirect()).isTrue();
+
+ String[][] matches = pattern.findAllMatches(buffer);
+
+ assertThat(matches).isNotNull();
+ assertThat(matches.length).isEqualTo(3);
+ assertThat(matches[0][0]).isEqualTo("1");
+ assertThat(matches[1][0]).isEqualTo("22");
+ assertThat(matches[2][0]).isEqualTo("333");
+ }
+
+ // ========== Heap ByteBuffer Tests (String Fallback Path) ==========
+
+ @Test
+ @DisplayName("Pattern.matches(heap ByteBuffer) should fall back to String API")
+ void patternMatches_heapBuffer_fallsBackToString() {
+ Pattern pattern = Pattern.compile("hello");
+
+ ByteBuffer buffer = createHeapBuffer("hello");
+ assertThat(buffer.isDirect()).isFalse(); // Verify it's heap
+
+ boolean matches = pattern.matches(buffer);
+
+ assertThat(matches).isTrue();
+ }
+
+ @Test
+ @DisplayName("Pattern.find(heap ByteBuffer) should fall back to String API")
+ void patternFind_heapBuffer_fallsBackToString() {
+ Pattern pattern = Pattern.compile("world");
+
+ ByteBuffer buffer = createHeapBuffer("hello world");
+ assertThat(buffer.isDirect()).isFalse();
+
+ boolean found = pattern.find(buffer);
+
+ assertThat(found).isTrue();
+ }
+
+ // ========== Consistency Tests (Direct vs Heap vs String) ==========
+
+ @ParameterizedTest
+ @DisplayName("DirectByteBuffer, heap ByteBuffer, and String should all match")
+ @CsvSource({
+ "\\d+, 12345, true",
+ "\\d+, abc, false",
+ "[a-z]+, hello, true",
+ "[a-z]+, HELLO, false",
+ "test, test, true",
+ "test, testing, false"
+ })
+ void allApisProduceSameResults(String patternStr, String input, boolean expected) {
+ Pattern pattern = Pattern.compile(patternStr);
- assertThat(groups).containsExactly("2025-11-24", "2025", "11", "24");
+ // String API
+ boolean stringResult = pattern.matches(input);
+
+ // DirectByteBuffer API (zero-copy)
+ ByteBuffer directBuffer = createDirectBuffer(input);
+ boolean directResult = pattern.matches(directBuffer);
+
+ // Heap ByteBuffer API (String fallback)
+ ByteBuffer heapBuffer = createHeapBuffer(input);
+ boolean heapResult = pattern.matches(heapBuffer);
+
+ // All should produce same result
+ assertThat(directResult)
+ .as("DirectByteBuffer should match String API")
+ .isEqualTo(stringResult)
+ .isEqualTo(expected);
+
+ assertThat(heapResult)
+ .as("Heap ByteBuffer should match String API")
+ .isEqualTo(stringResult)
+ .isEqualTo(expected);
+ }
+
+ @ParameterizedTest
+ @DisplayName("find() should work consistently across all API variants")
+ @CsvSource({
+ "\\d+, abc123def, true",
+ "\\d+, abcdef, false",
+ "@, user@example.com, true",
+ "@, noatsign, false"
+ })
+ void find_allApisConsistent(String patternStr, String input, boolean expected) {
+ Pattern pattern = Pattern.compile(patternStr);
+
+ // String API
+ boolean stringResult;
+ try (Matcher m = pattern.matcher(input)) {
+ stringResult = m.find();
}
- @Test
- @DisplayName("Pattern.findAllMatches(DirectByteBuffer) should use zero-copy")
- void findAllMatches_directBuffer_usesZeroCopy() {
- Pattern pattern = Pattern.compile("\\d+");
+ // DirectByteBuffer
+ boolean directResult = pattern.find(createDirectBuffer(input));
- ByteBuffer buffer = createDirectBuffer("a1b22c333");
- assertThat(buffer.isDirect()).isTrue();
+ // Heap ByteBuffer
+ boolean heapResult = pattern.find(createHeapBuffer(input));
- String[][] matches = pattern.findAllMatches(buffer);
+ assertThat(directResult).isEqualTo(stringResult).isEqualTo(expected);
+ assertThat(heapResult).isEqualTo(stringResult).isEqualTo(expected);
+ }
- assertThat(matches).isNotNull();
- assertThat(matches.length).isEqualTo(3);
- assertThat(matches[0][0]).isEqualTo("1");
- assertThat(matches[1][0]).isEqualTo("22");
- assertThat(matches[2][0]).isEqualTo("333");
- }
+ // ========== Mixed Usage Tests ==========
- // ========== Heap ByteBuffer Tests (String Fallback Path) ==========
+ @Test
+ @DisplayName("Pattern can mix String, DirectByteBuffer, and heap ByteBuffer")
+ void pattern_mixedUsage_allTypes() {
+ Pattern pattern = Pattern.compile("\\d+");
- @Test
- @DisplayName("Pattern.matches(heap ByteBuffer) should fall back to String API")
- void patternMatches_heapBuffer_fallsBackToString() {
- Pattern pattern = Pattern.compile("hello");
+ // Use with String
+ assertThat(pattern.matches("123")).isTrue();
- ByteBuffer buffer = createHeapBuffer("hello");
- assertThat(buffer.isDirect()).isFalse(); // Verify it's heap
+ // Use with DirectByteBuffer (zero-copy)
+ ByteBuffer directBuffer = createDirectBuffer("456");
+ assertThat(pattern.matches(directBuffer)).isTrue();
- boolean matches = pattern.matches(buffer);
+ // Use with heap ByteBuffer (String fallback)
+ ByteBuffer heapBuffer = createHeapBuffer("789");
+ assertThat(pattern.matches(heapBuffer)).isTrue();
- assertThat(matches).isTrue();
- }
-
- @Test
- @DisplayName("Pattern.find(heap ByteBuffer) should fall back to String API")
- void patternFind_heapBuffer_fallsBackToString() {
- Pattern pattern = Pattern.compile("world");
-
- ByteBuffer buffer = createHeapBuffer("hello world");
- assertThat(buffer.isDirect()).isFalse();
-
- boolean found = pattern.find(buffer);
-
- assertThat(found).isTrue();
- }
-
- // ========== Consistency Tests (Direct vs Heap vs String) ==========
-
- @ParameterizedTest
- @DisplayName("DirectByteBuffer, heap ByteBuffer, and String should all match")
- @CsvSource({
- "\\d+, 12345, true",
- "\\d+, abc, false",
- "[a-z]+, hello, true",
- "[a-z]+, HELLO, false",
- "test, test, true",
- "test, testing, false"
- })
- void allApisProduceSameResults(String patternStr, String input, boolean expected) {
- Pattern pattern = Pattern.compile(patternStr);
-
- // String API
- boolean stringResult = pattern.matches(input);
-
- // DirectByteBuffer API (zero-copy)
- ByteBuffer directBuffer = createDirectBuffer(input);
- boolean directResult = pattern.matches(directBuffer);
-
- // Heap ByteBuffer API (String fallback)
- ByteBuffer heapBuffer = createHeapBuffer(input);
- boolean heapResult = pattern.matches(heapBuffer);
-
- // All should produce same result
- assertThat(directResult)
- .as("DirectByteBuffer should match String API")
- .isEqualTo(stringResult)
- .isEqualTo(expected);
-
- assertThat(heapResult)
- .as("Heap ByteBuffer should match String API")
- .isEqualTo(stringResult)
- .isEqualTo(expected);
- }
+ // Mix all three in same method
+ assertThat(pattern.matches("abc")).isFalse();
+ assertThat(pattern.matches(createDirectBuffer("def"))).isFalse();
+ assertThat(pattern.matches(createHeapBuffer("ghi"))).isFalse();
+ }
- @ParameterizedTest
- @DisplayName("find() should work consistently across all API variants")
- @CsvSource({
- "\\d+, abc123def, true",
- "\\d+, abcdef, false",
- "@, user@example.com, true",
- "@, noatsign, false"
- })
- void find_allApisConsistent(String patternStr, String input, boolean expected) {
- Pattern pattern = Pattern.compile(patternStr);
-
- // String API
- boolean stringResult;
- try (Matcher m = pattern.matcher(input)) {
- stringResult = m.find();
- }
-
- // DirectByteBuffer
- boolean directResult = pattern.find(createDirectBuffer(input));
-
- // Heap ByteBuffer
- boolean heapResult = pattern.find(createHeapBuffer(input));
-
- assertThat(directResult).isEqualTo(stringResult).isEqualTo(expected);
- assertThat(heapResult).isEqualTo(stringResult).isEqualTo(expected);
+ // ========== Position/Limit Handling Tests ==========
+
+ @Test
+ @DisplayName("ByteBuffer position and limit should be respected")
+ void byteBuffer_positionLimit_respected() {
+ Pattern pattern = Pattern.compile("world");
+
+ ByteBuffer buffer = createDirectBuffer("hello world goodbye");
+
+ // Match full buffer - should find "world"
+ assertThat(pattern.find(buffer)).isTrue();
+
+ // Reset and set position to skip "hello "
+ buffer.rewind();
+ buffer.position(6); // Start at "world"
+ buffer.limit(11); // End after "world"
+
+ // Should match just "world"
+ assertThat(pattern.matches(buffer)).isTrue();
+ }
+
+ @Test
+ @DisplayName("ByteBuffer position should not be modified")
+ void byteBuffer_positionNotModified() {
+ Pattern pattern = Pattern.compile("test");
+
+ ByteBuffer buffer = createDirectBuffer("test");
+ int originalPosition = buffer.position();
+ int originalLimit = buffer.limit();
+
+ pattern.matches(buffer);
+
+ // Position and limit should be unchanged
+ assertThat(buffer.position()).isEqualTo(originalPosition);
+ assertThat(buffer.limit()).isEqualTo(originalLimit);
+ }
+
+ // ========== Validation Tests ==========
+
+ @Test
+ @DisplayName("Should throw on null ByteBuffer")
+ void matches_nullByteBuffer_throws() {
+ Pattern pattern = Pattern.compile("test");
+
+ assertThatNullPointerException()
+ .isThrownBy(() -> pattern.matches((ByteBuffer) null))
+ .withMessageContaining("null");
+ }
+
+ @Test
+ @DisplayName("Empty ByteBuffer should work")
+ void matches_emptyByteBuffer_works() {
+ Pattern pattern = Pattern.compile(".*"); // Match anything (including empty)
+
+ ByteBuffer emptyDirect = createDirectBuffer("");
+ ByteBuffer emptyHeap = createHeapBuffer("");
+
+ assertThat(pattern.matches(emptyDirect)).isTrue();
+ assertThat(pattern.matches(emptyHeap)).isTrue();
+ }
+
+ // ========== Real-World Scenario Tests ==========
+
+ @Test
+ @DisplayName("Real-world: Netty-like scenario with DirectByteBuffer")
+ void realWorld_nettyStyleDirectBuffer() {
+ Pattern emailPattern =
+ Pattern.compile("\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", false);
+
+ // Simulate Netty ByteBuf-like usage (direct memory)
+ ByteBuffer networkBuffer = ByteBuffer.allocateDirect(1024);
+ String message = "New user registered: user@example.com";
+ networkBuffer.put(message.getBytes(StandardCharsets.UTF_8));
+ networkBuffer.flip();
+
+ // Extract email using zero-copy
+ boolean hasEmail = emailPattern.find(networkBuffer);
+
+ assertThat(hasEmail).isTrue();
+ }
+
+ @Test
+ @DisplayName("Real-world: Process multiple network buffers")
+ void realWorld_multipleNetworkBuffers() {
+ Pattern validPattern = Pattern.compile("valid_.*");
+
+ // Simulate multiple incoming network buffers
+ ByteBuffer[] buffers = {
+ createDirectBuffer("valid_request_1"),
+ createDirectBuffer("invalid_request"),
+ createDirectBuffer("valid_request_2"),
+ createHeapBuffer("other_data"), // Mixed: some heap, some direct
+ createDirectBuffer("valid_request_3")
+ };
+
+ // Process all buffers
+ int validCount = 0;
+ for (ByteBuffer buffer : buffers) {
+ if (validPattern.matches(buffer)) {
+ validCount++;
+ }
}
- // ========== Mixed Usage Tests ==========
-
- @Test
- @DisplayName("Pattern can mix String, DirectByteBuffer, and heap ByteBuffer")
- void pattern_mixedUsage_allTypes() {
- Pattern pattern = Pattern.compile("\\d+");
-
- // Use with String
- assertThat(pattern.matches("123")).isTrue();
-
- // Use with DirectByteBuffer (zero-copy)
- ByteBuffer directBuffer = createDirectBuffer("456");
- assertThat(pattern.matches(directBuffer)).isTrue();
-
- // Use with heap ByteBuffer (String fallback)
- ByteBuffer heapBuffer = createHeapBuffer("789");
- assertThat(pattern.matches(heapBuffer)).isTrue();
-
- // Mix all three in same method
- assertThat(pattern.matches("abc")).isFalse();
- assertThat(pattern.matches(createDirectBuffer("def"))).isFalse();
- assertThat(pattern.matches(createHeapBuffer("ghi"))).isFalse();
- }
-
- // ========== Position/Limit Handling Tests ==========
-
- @Test
- @DisplayName("ByteBuffer position and limit should be respected")
- void byteBuffer_positionLimit_respected() {
- Pattern pattern = Pattern.compile("world");
-
- ByteBuffer buffer = createDirectBuffer("hello world goodbye");
-
- // Match full buffer - should find "world"
- assertThat(pattern.find(buffer)).isTrue();
-
- // Reset and set position to skip "hello "
- buffer.rewind();
- buffer.position(6); // Start at "world"
- buffer.limit(11); // End after "world"
-
- // Should match just "world"
- assertThat(pattern.matches(buffer)).isTrue();
- }
-
- @Test
- @DisplayName("ByteBuffer position should not be modified")
- void byteBuffer_positionNotModified() {
- Pattern pattern = Pattern.compile("test");
-
- ByteBuffer buffer = createDirectBuffer("test");
- int originalPosition = buffer.position();
- int originalLimit = buffer.limit();
-
- pattern.matches(buffer);
-
- // Position and limit should be unchanged
- assertThat(buffer.position()).isEqualTo(originalPosition);
- assertThat(buffer.limit()).isEqualTo(originalLimit);
- }
-
- // ========== Validation Tests ==========
-
- @Test
- @DisplayName("Should throw on null ByteBuffer")
- void matches_nullByteBuffer_throws() {
- Pattern pattern = Pattern.compile("test");
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.matches((ByteBuffer) null))
- .withMessageContaining("null");
- }
-
- @Test
- @DisplayName("Empty ByteBuffer should work")
- void matches_emptyByteBuffer_works() {
- Pattern pattern = Pattern.compile(".*"); // Match anything (including empty)
-
- ByteBuffer emptyDirect = createDirectBuffer("");
- ByteBuffer emptyHeap = createHeapBuffer("");
-
- assertThat(pattern.matches(emptyDirect)).isTrue();
- assertThat(pattern.matches(emptyHeap)).isTrue();
- }
-
- // ========== Real-World Scenario Tests ==========
-
- @Test
- @DisplayName("Real-world: Netty-like scenario with DirectByteBuffer")
- void realWorld_nettyStyleDirectBuffer() {
- Pattern emailPattern = Pattern.compile("\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", false);
-
- // Simulate Netty ByteBuf-like usage (direct memory)
- ByteBuffer networkBuffer = ByteBuffer.allocateDirect(1024);
- String message = "New user registered: user@example.com";
- networkBuffer.put(message.getBytes(StandardCharsets.UTF_8));
- networkBuffer.flip();
-
- // Extract email using zero-copy
- boolean hasEmail = emailPattern.find(networkBuffer);
-
- assertThat(hasEmail).isTrue();
- }
-
- @Test
- @DisplayName("Real-world: Process multiple network buffers")
- void realWorld_multipleNetworkBuffers() {
- Pattern validPattern = Pattern.compile("valid_.*");
-
- // Simulate multiple incoming network buffers
- ByteBuffer[] buffers = {
- createDirectBuffer("valid_request_1"),
- createDirectBuffer("invalid_request"),
- createDirectBuffer("valid_request_2"),
- createHeapBuffer("other_data"), // Mixed: some heap, some direct
- createDirectBuffer("valid_request_3")
- };
-
- // Process all buffers
- int validCount = 0;
- for (ByteBuffer buffer : buffers) {
- if (validPattern.matches(buffer)) {
- validCount++;
- }
- }
-
- assertThat(validCount).isEqualTo(3);
- }
+ assertThat(validCount).isEqualTo(3);
+ }
}
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java
index a3e7228..1e5dc8e 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/CaptureGroupsIT.java
@@ -16,457 +16,449 @@
package com.axonops.libre2.api;
+import static org.assertj.core.api.Assertions.*;
+
+import java.util.List;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
-import java.util.List;
-
-import static org.assertj.core.api.Assertions.*;
-
-/**
- * Tests for capture group functionality (MatchResult and Pattern capture methods).
- */
+/** Tests for capture group functionality (MatchResult and Pattern capture methods). */
@DisplayName("Capture Groups")
class CaptureGroupsIT {
- // ========== MatchResult Basic Tests ==========
-
- @Test
- @DisplayName("MatchResult should indicate successful match")
- void matchResult_successfulMatch_matched() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("123")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group()).isEqualTo("123");
- assertThat(result.group(0)).isEqualTo("123");
- assertThat(result.group(1)).isEqualTo("123");
- }
- }
-
- @Test
- @DisplayName("MatchResult should indicate failed match")
- void matchResult_failedMatch_notMatched() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("abc")) {
- assertThat(result.matched()).isFalse();
- assertThat(result.groupCount()).isEqualTo(0);
- }
- }
-
- @Test
- @DisplayName("MatchResult should throw on group access when not matched")
- void matchResult_noMatch_throwsOnGroupAccess() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("abc")) {
- assertThatIllegalStateException()
- .isThrownBy(() -> result.group())
- .withMessageContaining("No match");
- }
- }
-
- // ========== Pattern.match() Tests ==========
-
- @Test
- @DisplayName("Pattern.match() should extract single group")
- void patternMatch_singleGroup_extracted() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("123")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.groupCount()).isEqualTo(1);
- assertThat(result.group(0)).isEqualTo("123"); // Full match
- assertThat(result.group(1)).isEqualTo("123"); // Captured group
- }
- }
-
- @Test
- @DisplayName("Pattern.match() should extract multiple groups")
- void patternMatch_multipleGroups_extracted() {
- Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)");
- try (MatchResult result = pattern.match("user@example.com")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.groupCount()).isEqualTo(3);
- assertThat(result.group()).isEqualTo("user@example.com");
- assertThat(result.group(1)).isEqualTo("user");
- assertThat(result.group(2)).isEqualTo("example");
- assertThat(result.group(3)).isEqualTo("com");
- }
- }
-
- @Test
- @DisplayName("Pattern.match() should handle date extraction")
- void patternMatch_dateExtraction_works() {
- Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
- try (MatchResult result = pattern.match("2025-11-24")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group()).isEqualTo("2025-11-24");
- assertThat(result.group(1)).isEqualTo("2025");
- assertThat(result.group(2)).isEqualTo("11");
- assertThat(result.group(3)).isEqualTo("24");
- }
- }
-
- @Test
- @DisplayName("Pattern.match() should fail on partial content")
- void patternMatch_partialContent_fails() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("abc123def")) {
- assertThat(result.matched()).isFalse();
- }
- }
-
- // ========== Pattern.find() Tests ==========
-
- @Test
- @DisplayName("Pattern.find() should find first match in text")
- void patternFind_firstMatch_found() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.find("abc123def456")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group()).isEqualTo("123"); // First match
- assertThat(result.group(1)).isEqualTo("123");
- }
- }
-
- @Test
- @DisplayName("Pattern.find() should extract groups from first match")
- void patternFind_firstMatchGroups_extracted() {
- Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
- try (MatchResult result = pattern.find("Contact support@example.com or admin@test.org")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group()).isEqualTo("support@example.com"); // First email
- assertThat(result.group(1)).isEqualTo("support");
- assertThat(result.group(2)).isEqualTo("example.com");
- }
- }
-
- @Test
- @DisplayName("Pattern.find() should return failed match when not found")
- void patternFind_notFound_failedMatch() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.find("no digits here")) {
- assertThat(result.matched()).isFalse();
- }
- }
-
- // ========== Pattern.findAll() Tests ==========
-
- @Test
- @DisplayName("Pattern.findAll() should find all matches")
- void patternFindAll_multipleMatches_found() {
- Pattern pattern = Pattern.compile("(\\d+)");
- List matches = pattern.findAll("a1b22c333");
- try {
- assertThat(matches).hasSize(3);
- assertThat(matches.get(0).group()).isEqualTo("1");
- assertThat(matches.get(1).group()).isEqualTo("22");
- assertThat(matches.get(2).group()).isEqualTo("333");
- } finally {
- matches.forEach(MatchResult::close);
- }
- }
-
- @Test
- @DisplayName("Pattern.findAll() should extract groups from each match")
- void patternFindAll_multipleMatchesWithGroups_extracted() {
- Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
- List matches = pattern.findAll("Call 555-1234 or 555-5678");
- try {
- assertThat(matches).hasSize(2);
-
- // First match
- assertThat(matches.get(0).group()).isEqualTo("555-1234");
- assertThat(matches.get(0).group(1)).isEqualTo("555");
- assertThat(matches.get(0).group(2)).isEqualTo("1234");
-
- // Second match
- assertThat(matches.get(1).group()).isEqualTo("555-5678");
- assertThat(matches.get(1).group(1)).isEqualTo("555");
- assertThat(matches.get(1).group(2)).isEqualTo("5678");
- } finally {
- matches.forEach(MatchResult::close);
- }
- }
-
- @Test
- @DisplayName("Pattern.findAll() should return empty list for no matches")
- void patternFindAll_noMatches_emptyList() {
- Pattern pattern = Pattern.compile("(\\d+)");
- List matches = pattern.findAll("no digits");
- try {
- assertThat(matches).isEmpty();
- } finally {
- matches.forEach(MatchResult::close);
- }
- }
-
- // ========== Named Groups Tests ==========
-
- @Test
- @DisplayName("Named groups should be accessible by name")
- void namedGroups_accessByName_works() {
- Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})");
- try (MatchResult result = pattern.match("2025-11-24")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group("year")).isEqualTo("2025");
- assertThat(result.group("month")).isEqualTo("11");
- assertThat(result.group("day")).isEqualTo("24");
- }
- }
-
- @Test
- @DisplayName("Named groups should also be accessible by index")
- void namedGroups_accessByIndex_works() {
- Pattern pattern = Pattern.compile("(?P[a-z]+)@(?P[a-z]+\\.[a-z]+)");
- try (MatchResult result = pattern.match("admin@example.com")) {
- assertThat(result.matched()).isTrue();
- // Access by name
- assertThat(result.group("user")).isEqualTo("admin");
- assertThat(result.group("domain")).isEqualTo("example.com");
-
- // Also accessible by index
- assertThat(result.group(1)).isEqualTo("admin");
- assertThat(result.group(2)).isEqualTo("example.com");
- }
- }
-
- @Test
- @DisplayName("Non-existent named group should return null")
- void namedGroups_nonExistent_returnsNull() {
- Pattern pattern = Pattern.compile("(?P\\d+)");
- try (MatchResult result = pattern.match("123")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group("found")).isEqualTo("123");
- assertThat(result.group("notfound")).isNull();
- }
- }
-
- // ========== Edge Cases ==========
-
- @Test
- @DisplayName("Pattern with no groups should work")
- void pattern_noGroups_works() {
- Pattern pattern = Pattern.compile("\\d+"); // No parentheses
- try (MatchResult result = pattern.match("123")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.groupCount()).isEqualTo(0);
- assertThat(result.group()).isEqualTo("123"); // Group 0 still available
- }
- }
-
- @Test
- @DisplayName("Optional groups that don't participate should be null")
- void optionalGroups_notParticipating_null() {
- Pattern pattern = Pattern.compile("(a)?(b)");
- try (MatchResult result = pattern.match("b")) { // 'a' is optional and doesn't match
- assertThat(result.matched()).isTrue();
- assertThat(result.groupCount()).isEqualTo(2);
- assertThat(result.group(0)).isEqualTo("b");
- assertThat(result.group(1)).isNull(); // Optional 'a' didn't participate
- assertThat(result.group(2)).isEqualTo("b");
- }
- }
-
- @Test
- @DisplayName("Nested groups should be extracted correctly")
- void nestedGroups_extracted() {
- Pattern pattern = Pattern.compile("((\\d+)-(\\d+))");
- try (MatchResult result = pattern.match("123-456")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.groupCount()).isEqualTo(3);
- assertThat(result.group(1)).isEqualTo("123-456"); // Outer group
- assertThat(result.group(2)).isEqualTo("123"); // First inner
- assertThat(result.group(3)).isEqualTo("456"); // Second inner
- }
- }
-
- @Test
- @DisplayName("MatchResult.groups() should return defensive copy")
- void matchResult_groupsArray_defensiveCopy() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("123")) {
- String[] groups1 = result.groups();
- String[] groups2 = result.groups();
-
- assertThat(groups1).isNotSameAs(groups2); // Different array instances
- assertThat(groups1).containsExactly(groups2); // Same content
- }
- }
-
- @Test
- @DisplayName("MatchResult should provide input string")
- void matchResult_input_available() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("123")) {
- assertThat(result.input()).isEqualTo("123");
- }
- }
-
- @Test
- @DisplayName("MatchResult should throw on invalid group index")
- void matchResult_invalidIndex_throws() {
- Pattern pattern = Pattern.compile("(\\d+)");
- try (MatchResult result = pattern.match("123")) {
- assertThatIndexOutOfBoundsException()
- .isThrownBy(() -> result.group(5))
- .withMessageContaining("out of bounds");
-
- assertThatIndexOutOfBoundsException()
- .isThrownBy(() -> result.group(-1))
- .withMessageContaining("out of bounds");
- }
- }
-
- // ========== Real-World Scenarios ==========
-
- @Test
- @DisplayName("Extract email components")
- void realWorld_emailExtraction() {
- Pattern pattern = Pattern.compile("([a-z0-9._%+-]+)@([a-z0-9.-]+)\\.([a-z]{2,})");
- try (MatchResult result = pattern.match("john.doe@example.co.uk")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group(1)).isEqualTo("john.doe");
- assertThat(result.group(2)).isEqualTo("example.co");
- assertThat(result.group(3)).isEqualTo("uk");
- }
- }
-
- @Test
- @DisplayName("Parse log line with timestamp and level")
- void realWorld_logParsing() {
- Pattern pattern = Pattern.compile("\\[(\\d+)\\] (\\w+): (.+)");
- try (MatchResult result = pattern.find("[1234567890] ERROR: Something went wrong")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group(1)).isEqualTo("1234567890"); // timestamp
- assertThat(result.group(2)).isEqualTo("ERROR"); // level
- assertThat(result.group(3)).isEqualTo("Something went wrong"); // message
- }
- }
-
- @Test
- @DisplayName("Extract all URLs from text")
- void realWorld_extractAllUrls() {
- Pattern pattern = Pattern.compile("https?://([a-z0-9.-]+)/([a-z0-9/_-]+)");
- List matches = pattern.findAll("Visit http://example.com/page1 and https://test.org/page2");
- try {
- assertThat(matches).hasSize(2);
-
- // First URL
- assertThat(matches.get(0).group()).isEqualTo("http://example.com/page1");
- assertThat(matches.get(0).group(1)).isEqualTo("example.com");
- assertThat(matches.get(0).group(2)).isEqualTo("page1");
-
- // Second URL
- assertThat(matches.get(1).group()).isEqualTo("https://test.org/page2");
- assertThat(matches.get(1).group(1)).isEqualTo("test.org");
- assertThat(matches.get(1).group(2)).isEqualTo("page2");
- } finally {
- matches.forEach(MatchResult::close);
- }
- }
-
- @Test
- @DisplayName("Extract all numbers from mixed text")
- void realWorld_extractAllNumbers() {
- Pattern pattern = Pattern.compile("(\\d+)");
- List matches = pattern.findAll("Item 1 costs $99, item 22 costs $199");
- try {
- assertThat(matches).hasSize(4);
- assertThat(matches.get(0).group(1)).isEqualTo("1");
- assertThat(matches.get(1).group(1)).isEqualTo("99");
- assertThat(matches.get(2).group(1)).isEqualTo("22");
- assertThat(matches.get(3).group(1)).isEqualTo("199");
- } finally {
- matches.forEach(MatchResult::close);
- }
- }
-
- // ========== Named Groups Advanced Tests ==========
-
- @Test
- @DisplayName("Mixed named and unnamed groups")
- void namedGroups_mixedWithUnnamed_works() {
- Pattern pattern = Pattern.compile("(\\d{4})-(?P\\d{2})-(\\d{2})");
- try (MatchResult result = pattern.match("2025-11-24")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group(1)).isEqualTo("2025"); // Unnamed
- assertThat(result.group("month")).isEqualTo("11"); // Named
- assertThat(result.group(2)).isEqualTo("11"); // Also accessible by index
- assertThat(result.group(3)).isEqualTo("24"); // Unnamed
- }
- }
-
- @Test
- @DisplayName("MatchResult should expose named groups map")
- void matchResult_namedGroupsMap_exposed() {
- Pattern pattern = Pattern.compile("(?P\\d+)-(?P\\d+)");
- try (MatchResult result = pattern.match("123-456")) {
- assertThat(result.namedGroups()).containsKeys("a", "b");
- assertThat(result.namedGroups().get("a")).isEqualTo(1);
- assertThat(result.namedGroups().get("b")).isEqualTo(2);
- }
- }
-
- // ========== Consistency Tests ==========
-
- @ParameterizedTest
- @DisplayName("Pattern.match() vs Pattern.matches() consistency")
- @CsvSource({
- "\\d+, 123, true",
- "\\d+, abc, false",
- "[a-z]+, hello, true",
- "[a-z]+, HELLO, false"
- })
- void match_consistentWithMatches(String patternStr, String input, boolean shouldMatch) {
- Pattern pattern = Pattern.compile(patternStr);
-
- boolean matchesResult = pattern.matches(input);
- try (MatchResult matchResult = pattern.match(input)) {
- assertThat(matchResult.matched()).isEqualTo(matchesResult).isEqualTo(shouldMatch);
- }
- }
-
- @Test
- @DisplayName("Pattern.find() vs Matcher.find() consistency")
- void find_consistentWithMatcher() {
- Pattern pattern = Pattern.compile("(\\d+)");
-
- boolean matcherFind;
- try (Matcher m = pattern.matcher("abc123def")) {
- matcherFind = m.find();
- }
-
- try (MatchResult findResult = pattern.find("abc123def")) {
- assertThat(findResult.matched()).isEqualTo(matcherFind);
- }
- }
-
- // ========== Empty and Null Tests ==========
-
- @Test
- @DisplayName("Empty string should work")
- void emptyString_works() {
- Pattern pattern = Pattern.compile(".*");
- try (MatchResult result = pattern.match("")) {
- assertThat(result.matched()).isTrue();
- assertThat(result.group()).isEqualTo("");
- }
- }
-
- @Test
- @DisplayName("Null input should throw")
- void nullInput_throws() {
- Pattern pattern = Pattern.compile("test");
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.match((String) null)); // Cast to disambiguate
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.find((String) null)); // Cast to disambiguate
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.findAll((String) null)); // Cast to disambiguate
- }
+ // ========== MatchResult Basic Tests ==========
+
+ @Test
+ @DisplayName("MatchResult should indicate successful match")
+ void matchResult_successfulMatch_matched() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("123")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group()).isEqualTo("123");
+ assertThat(result.group(0)).isEqualTo("123");
+ assertThat(result.group(1)).isEqualTo("123");
+ }
+ }
+
+ @Test
+ @DisplayName("MatchResult should indicate failed match")
+ void matchResult_failedMatch_notMatched() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("abc")) {
+ assertThat(result.matched()).isFalse();
+ assertThat(result.groupCount()).isEqualTo(0);
+ }
+ }
+
+ @Test
+ @DisplayName("MatchResult should throw on group access when not matched")
+ void matchResult_noMatch_throwsOnGroupAccess() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("abc")) {
+ assertThatIllegalStateException()
+ .isThrownBy(() -> result.group())
+ .withMessageContaining("No match");
+ }
+ }
+
+ // ========== Pattern.match() Tests ==========
+
+ @Test
+ @DisplayName("Pattern.match() should extract single group")
+ void patternMatch_singleGroup_extracted() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("123")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.groupCount()).isEqualTo(1);
+ assertThat(result.group(0)).isEqualTo("123"); // Full match
+ assertThat(result.group(1)).isEqualTo("123"); // Captured group
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.match() should extract multiple groups")
+ void patternMatch_multipleGroups_extracted() {
+ Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+)\\.([a-z]+)");
+ try (MatchResult result = pattern.match("user@example.com")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.groupCount()).isEqualTo(3);
+ assertThat(result.group()).isEqualTo("user@example.com");
+ assertThat(result.group(1)).isEqualTo("user");
+ assertThat(result.group(2)).isEqualTo("example");
+ assertThat(result.group(3)).isEqualTo("com");
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.match() should handle date extraction")
+ void patternMatch_dateExtraction_works() {
+ Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
+ try (MatchResult result = pattern.match("2025-11-24")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group()).isEqualTo("2025-11-24");
+ assertThat(result.group(1)).isEqualTo("2025");
+ assertThat(result.group(2)).isEqualTo("11");
+ assertThat(result.group(3)).isEqualTo("24");
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.match() should fail on partial content")
+ void patternMatch_partialContent_fails() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("abc123def")) {
+ assertThat(result.matched()).isFalse();
+ }
+ }
+
+ // ========== Pattern.find() Tests ==========
+
+ @Test
+ @DisplayName("Pattern.find() should find first match in text")
+ void patternFind_firstMatch_found() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.find("abc123def456")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group()).isEqualTo("123"); // First match
+ assertThat(result.group(1)).isEqualTo("123");
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.find() should extract groups from first match")
+ void patternFind_firstMatchGroups_extracted() {
+ Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
+ try (MatchResult result = pattern.find("Contact support@example.com or admin@test.org")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group()).isEqualTo("support@example.com"); // First email
+ assertThat(result.group(1)).isEqualTo("support");
+ assertThat(result.group(2)).isEqualTo("example.com");
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.find() should return failed match when not found")
+ void patternFind_notFound_failedMatch() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.find("no digits here")) {
+ assertThat(result.matched()).isFalse();
+ }
+ }
+
+ // ========== Pattern.findAll() Tests ==========
+
+ @Test
+ @DisplayName("Pattern.findAll() should find all matches")
+ void patternFindAll_multipleMatches_found() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ List matches = pattern.findAll("a1b22c333");
+ try {
+ assertThat(matches).hasSize(3);
+ assertThat(matches.get(0).group()).isEqualTo("1");
+ assertThat(matches.get(1).group()).isEqualTo("22");
+ assertThat(matches.get(2).group()).isEqualTo("333");
+ } finally {
+ matches.forEach(MatchResult::close);
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.findAll() should extract groups from each match")
+ void patternFindAll_multipleMatchesWithGroups_extracted() {
+ Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
+ List matches = pattern.findAll("Call 555-1234 or 555-5678");
+ try {
+ assertThat(matches).hasSize(2);
+
+ // First match
+ assertThat(matches.get(0).group()).isEqualTo("555-1234");
+ assertThat(matches.get(0).group(1)).isEqualTo("555");
+ assertThat(matches.get(0).group(2)).isEqualTo("1234");
+
+ // Second match
+ assertThat(matches.get(1).group()).isEqualTo("555-5678");
+ assertThat(matches.get(1).group(1)).isEqualTo("555");
+ assertThat(matches.get(1).group(2)).isEqualTo("5678");
+ } finally {
+ matches.forEach(MatchResult::close);
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.findAll() should return empty list for no matches")
+ void patternFindAll_noMatches_emptyList() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ List matches = pattern.findAll("no digits");
+ try {
+ assertThat(matches).isEmpty();
+ } finally {
+ matches.forEach(MatchResult::close);
+ }
+ }
+
+ // ========== Named Groups Tests ==========
+
+ @Test
+ @DisplayName("Named groups should be accessible by name")
+ void namedGroups_accessByName_works() {
+ Pattern pattern = Pattern.compile("(?P\\d{4})-(?P\\d{2})-(?P\\d{2})");
+ try (MatchResult result = pattern.match("2025-11-24")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group("year")).isEqualTo("2025");
+ assertThat(result.group("month")).isEqualTo("11");
+ assertThat(result.group("day")).isEqualTo("24");
+ }
+ }
+
+ @Test
+ @DisplayName("Named groups should also be accessible by index")
+ void namedGroups_accessByIndex_works() {
+ Pattern pattern = Pattern.compile("(?P[a-z]+)@(?P[a-z]+\\.[a-z]+)");
+ try (MatchResult result = pattern.match("admin@example.com")) {
+ assertThat(result.matched()).isTrue();
+ // Access by name
+ assertThat(result.group("user")).isEqualTo("admin");
+ assertThat(result.group("domain")).isEqualTo("example.com");
+
+ // Also accessible by index
+ assertThat(result.group(1)).isEqualTo("admin");
+ assertThat(result.group(2)).isEqualTo("example.com");
+ }
+ }
+
+ @Test
+ @DisplayName("Non-existent named group should return null")
+ void namedGroups_nonExistent_returnsNull() {
+ Pattern pattern = Pattern.compile("(?P\\d+)");
+ try (MatchResult result = pattern.match("123")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group("found")).isEqualTo("123");
+ assertThat(result.group("notfound")).isNull();
+ }
+ }
+
+ // ========== Edge Cases ==========
+
+ @Test
+ @DisplayName("Pattern with no groups should work")
+ void pattern_noGroups_works() {
+ Pattern pattern = Pattern.compile("\\d+"); // No parentheses
+ try (MatchResult result = pattern.match("123")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.groupCount()).isEqualTo(0);
+ assertThat(result.group()).isEqualTo("123"); // Group 0 still available
+ }
+ }
+
+ @Test
+ @DisplayName("Optional groups that don't participate should be null")
+ void optionalGroups_notParticipating_null() {
+ Pattern pattern = Pattern.compile("(a)?(b)");
+ try (MatchResult result = pattern.match("b")) { // 'a' is optional and doesn't match
+ assertThat(result.matched()).isTrue();
+ assertThat(result.groupCount()).isEqualTo(2);
+ assertThat(result.group(0)).isEqualTo("b");
+ assertThat(result.group(1)).isNull(); // Optional 'a' didn't participate
+ assertThat(result.group(2)).isEqualTo("b");
+ }
+ }
+
+ @Test
+ @DisplayName("Nested groups should be extracted correctly")
+ void nestedGroups_extracted() {
+ Pattern pattern = Pattern.compile("((\\d+)-(\\d+))");
+ try (MatchResult result = pattern.match("123-456")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.groupCount()).isEqualTo(3);
+ assertThat(result.group(1)).isEqualTo("123-456"); // Outer group
+ assertThat(result.group(2)).isEqualTo("123"); // First inner
+ assertThat(result.group(3)).isEqualTo("456"); // Second inner
+ }
+ }
+
+ @Test
+ @DisplayName("MatchResult.groups() should return defensive copy")
+ void matchResult_groupsArray_defensiveCopy() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("123")) {
+ String[] groups1 = result.groups();
+ String[] groups2 = result.groups();
+
+ assertThat(groups1).isNotSameAs(groups2); // Different array instances
+ assertThat(groups1).containsExactly(groups2); // Same content
+ }
+ }
+
+ @Test
+ @DisplayName("MatchResult should provide input string")
+ void matchResult_input_available() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("123")) {
+ assertThat(result.input()).isEqualTo("123");
+ }
+ }
+
+ @Test
+ @DisplayName("MatchResult should throw on invalid group index")
+ void matchResult_invalidIndex_throws() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ try (MatchResult result = pattern.match("123")) {
+ assertThatIndexOutOfBoundsException()
+ .isThrownBy(() -> result.group(5))
+ .withMessageContaining("out of bounds");
+
+ assertThatIndexOutOfBoundsException()
+ .isThrownBy(() -> result.group(-1))
+ .withMessageContaining("out of bounds");
+ }
+ }
+
+ // ========== Real-World Scenarios ==========
+
+ @Test
+ @DisplayName("Extract email components")
+ void realWorld_emailExtraction() {
+ Pattern pattern = Pattern.compile("([a-z0-9._%+-]+)@([a-z0-9.-]+)\\.([a-z]{2,})");
+ try (MatchResult result = pattern.match("john.doe@example.co.uk")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group(1)).isEqualTo("john.doe");
+ assertThat(result.group(2)).isEqualTo("example.co");
+ assertThat(result.group(3)).isEqualTo("uk");
+ }
+ }
+
+ @Test
+ @DisplayName("Parse log line with timestamp and level")
+ void realWorld_logParsing() {
+ Pattern pattern = Pattern.compile("\\[(\\d+)\\] (\\w+): (.+)");
+ try (MatchResult result = pattern.find("[1234567890] ERROR: Something went wrong")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group(1)).isEqualTo("1234567890"); // timestamp
+ assertThat(result.group(2)).isEqualTo("ERROR"); // level
+ assertThat(result.group(3)).isEqualTo("Something went wrong"); // message
+ }
+ }
+
+ @Test
+ @DisplayName("Extract all URLs from text")
+ void realWorld_extractAllUrls() {
+ Pattern pattern = Pattern.compile("https?://([a-z0-9.-]+)/([a-z0-9/_-]+)");
+ List matches =
+ pattern.findAll("Visit http://example.com/page1 and https://test.org/page2");
+ try {
+ assertThat(matches).hasSize(2);
+
+ // First URL
+ assertThat(matches.get(0).group()).isEqualTo("http://example.com/page1");
+ assertThat(matches.get(0).group(1)).isEqualTo("example.com");
+ assertThat(matches.get(0).group(2)).isEqualTo("page1");
+
+ // Second URL
+ assertThat(matches.get(1).group()).isEqualTo("https://test.org/page2");
+ assertThat(matches.get(1).group(1)).isEqualTo("test.org");
+ assertThat(matches.get(1).group(2)).isEqualTo("page2");
+ } finally {
+ matches.forEach(MatchResult::close);
+ }
+ }
+
+ @Test
+ @DisplayName("Extract all numbers from mixed text")
+ void realWorld_extractAllNumbers() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+ List matches = pattern.findAll("Item 1 costs $99, item 22 costs $199");
+ try {
+ assertThat(matches).hasSize(4);
+ assertThat(matches.get(0).group(1)).isEqualTo("1");
+ assertThat(matches.get(1).group(1)).isEqualTo("99");
+ assertThat(matches.get(2).group(1)).isEqualTo("22");
+ assertThat(matches.get(3).group(1)).isEqualTo("199");
+ } finally {
+ matches.forEach(MatchResult::close);
+ }
+ }
+
+ // ========== Named Groups Advanced Tests ==========
+
+ @Test
+ @DisplayName("Mixed named and unnamed groups")
+ void namedGroups_mixedWithUnnamed_works() {
+ Pattern pattern = Pattern.compile("(\\d{4})-(?P\\d{2})-(\\d{2})");
+ try (MatchResult result = pattern.match("2025-11-24")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group(1)).isEqualTo("2025"); // Unnamed
+ assertThat(result.group("month")).isEqualTo("11"); // Named
+ assertThat(result.group(2)).isEqualTo("11"); // Also accessible by index
+ assertThat(result.group(3)).isEqualTo("24"); // Unnamed
+ }
+ }
+
+ @Test
+ @DisplayName("MatchResult should expose named groups map")
+ void matchResult_namedGroupsMap_exposed() {
+ Pattern pattern = Pattern.compile("(?P\\d+)-(?P\\d+)");
+ try (MatchResult result = pattern.match("123-456")) {
+ assertThat(result.namedGroups()).containsKeys("a", "b");
+ assertThat(result.namedGroups().get("a")).isEqualTo(1);
+ assertThat(result.namedGroups().get("b")).isEqualTo(2);
+ }
+ }
+
+ // ========== Consistency Tests ==========
+
+ @ParameterizedTest
+ @DisplayName("Pattern.match() vs Pattern.matches() consistency")
+ @CsvSource({"\\d+, 123, true", "\\d+, abc, false", "[a-z]+, hello, true", "[a-z]+, HELLO, false"})
+ void match_consistentWithMatches(String patternStr, String input, boolean shouldMatch) {
+ Pattern pattern = Pattern.compile(patternStr);
+
+ boolean matchesResult = pattern.matches(input);
+ try (MatchResult matchResult = pattern.match(input)) {
+ assertThat(matchResult.matched()).isEqualTo(matchesResult).isEqualTo(shouldMatch);
+ }
+ }
+
+ @Test
+ @DisplayName("Pattern.find() vs Matcher.find() consistency")
+ void find_consistentWithMatcher() {
+ Pattern pattern = Pattern.compile("(\\d+)");
+
+ boolean matcherFind;
+ try (Matcher m = pattern.matcher("abc123def")) {
+ matcherFind = m.find();
+ }
+
+ try (MatchResult findResult = pattern.find("abc123def")) {
+ assertThat(findResult.matched()).isEqualTo(matcherFind);
+ }
+ }
+
+ // ========== Empty and Null Tests ==========
+
+ @Test
+ @DisplayName("Empty string should work")
+ void emptyString_works() {
+ Pattern pattern = Pattern.compile(".*");
+ try (MatchResult result = pattern.match("")) {
+ assertThat(result.matched()).isTrue();
+ assertThat(result.group()).isEqualTo("");
+ }
+ }
+
+ @Test
+ @DisplayName("Null input should throw")
+ void nullInput_throws() {
+ Pattern pattern = Pattern.compile("test");
+
+ assertThatNullPointerException()
+ .isThrownBy(() -> pattern.match((String) null)); // Cast to disambiguate
+
+ assertThatNullPointerException()
+ .isThrownBy(() -> pattern.find((String) null)); // Cast to disambiguate
+
+ assertThatNullPointerException()
+ .isThrownBy(() -> pattern.findAll((String) null)); // Cast to disambiguate
+ }
}
-
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java
index f01d543..d1abe20 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/Phase1ExtensionsIT.java
@@ -16,249 +16,236 @@
package com.axonops.libre2.api;
-import org.junit.jupiter.api.DisplayName;
-import org.junit.jupiter.api.Test;
+import static org.assertj.core.api.Assertions.*;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
-import static org.assertj.core.api.Assertions.*;
-
-/**
- * Tests for Phase 1 extensions: findAll bulk variants and ByteBuffer[] bulk.
- */
+/** Tests for Phase 1 extensions: findAll bulk variants and ByteBuffer[] bulk. */
@DisplayName("Phase 1 Extensions (findAll bulk + ByteBuffer[] bulk)")
class Phase1ExtensionsIT {
- private ByteBuffer createDirectBuffer(String text) {
- byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
- ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length);
- buffer.put(bytes);
- buffer.flip();
- return buffer;
- }
+ private ByteBuffer createDirectBuffer(String text) {
+ byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
+ ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ return buffer;
+ }
+
+ private ByteBuffer createHeapBuffer(String text) {
+ return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8));
+ }
+
+ // ========== findAll(String[]) Tests ==========
+
+ @Test
+ @DisplayName("findAll(String[]) should find partial matches in all strings")
+ void findAll_stringArray_findsPartialMatches() {
+ Pattern pattern = Pattern.compile("test");
+ String[] inputs = {
+ "test", // Full match - should find
+ "testing", // Partial match - should find
+ "notest", // Partial match - should find
+ "other" // No match
+ };
- private ByteBuffer createHeapBuffer(String text) {
- return ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8));
- }
+ boolean[] results = pattern.findAll(inputs);
- // ========== findAll(String[]) Tests ==========
+ assertThat(results).containsExactly(true, true, true, false);
+ }
- @Test
- @DisplayName("findAll(String[]) should find partial matches in all strings")
- void findAll_stringArray_findsPartialMatches() {
- Pattern pattern = Pattern.compile("test");
- String[] inputs = {
- "test", // Full match - should find
- "testing", // Partial match - should find
- "notest", // Partial match - should find
- "other" // No match
- };
+ @Test
+ @DisplayName("findAll(String[]) vs matchAll(String[]) - partial vs full")
+ void findAll_vs_matchAll_differentBehavior() {
+ Pattern pattern = Pattern.compile("test");
+ String[] inputs = {"test", "testing", "other"};
+
+ boolean[] matchResults = pattern.matchAll(inputs); // Full match
+ boolean[] findResults = pattern.findAll(inputs); // Partial match
+
+ assertThat(matchResults).containsExactly(true, false, false); // Only exact matches
+ assertThat(findResults).containsExactly(true, true, false); // Partial matches too
+ }
+
+ @Test
+ @DisplayName("findAll(Collection) should work")
+ void findAll_collection_works() {
+ Pattern pattern = Pattern.compile("\\d+");
+ List inputs = Arrays.asList("abc123", "def", "456ghi");
+
+ boolean[] results = pattern.findAll(inputs);
+
+ assertThat(results).containsExactly(true, false, true);
+ }
+
+ @Test
+ @DisplayName("findAll(String[]) with empty array should return empty")
+ void findAll_emptyArray_returnsEmpty() {
+ Pattern pattern = Pattern.compile("test");
+
+ boolean[] results = pattern.findAll(new String[0]);
+
+ assertThat(results).isEmpty();
+ }
+
+ // ========== matchAll(ByteBuffer[]) Tests ==========
+
+ @Test
+ @DisplayName("matchAll(ByteBuffer[]) with all DirectByteBuffers should use zero-copy")
+ void matchAll_allDirectBuffers_usesZeroCopy() {
+ Pattern pattern = Pattern.compile("test");
+ ByteBuffer[] buffers = {
+ createDirectBuffer("test"), createDirectBuffer("testing"), createDirectBuffer("test")
+ };
- boolean[] results = pattern.findAll(inputs);
+ boolean[] results = pattern.matchAll(buffers);
+
+ assertThat(results).containsExactly(true, false, true);
+ }
+
+ @Test
+ @DisplayName("matchAll(ByteBuffer[]) with all heap buffers should convert to String")
+ void matchAll_allHeapBuffers_convertsToString() {
+ Pattern pattern = Pattern.compile("test");
+ ByteBuffer[] buffers = {
+ createHeapBuffer("test"), createHeapBuffer("testing"), createHeapBuffer("test")
+ };
+
+ boolean[] results = pattern.matchAll(buffers);
+
+ assertThat(results).containsExactly(true, false, true);
+ }
+
+ @Test
+ @DisplayName("matchAll(ByteBuffer[]) with mixed buffers should convert all to String")
+ void matchAll_mixedBuffers_convertsToString() {
+ Pattern pattern = Pattern.compile("test");
+ ByteBuffer[] buffers = {
+ createDirectBuffer("test"), // Direct
+ createHeapBuffer("testing"), // Heap - forces String path for all
+ createDirectBuffer("test") // Direct
+ };
+
+ boolean[] results = pattern.matchAll(buffers);
- assertThat(results).containsExactly(true, true, true, false);
- }
+ assertThat(results).containsExactly(true, false, true);
+ }
- @Test
- @DisplayName("findAll(String[]) vs matchAll(String[]) - partial vs full")
- void findAll_vs_matchAll_differentBehavior() {
- Pattern pattern = Pattern.compile("test");
- String[] inputs = {"test", "testing", "other"};
+ @Test
+ @DisplayName("matchAll(ByteBuffer[]) should produce same results as matchAll(String[])")
+ void matchAll_byteBufferArray_matchesStringArray() {
+ Pattern pattern = Pattern.compile("\\d+");
+ String[] strings = {"123", "abc", "456"};
- boolean[] matchResults = pattern.matchAll(inputs); // Full match
- boolean[] findResults = pattern.findAll(inputs); // Partial match
+ boolean[] stringResults = pattern.matchAll(strings);
+
+ ByteBuffer[] buffers = {
+ createDirectBuffer("123"), createDirectBuffer("abc"), createDirectBuffer("456")
+ };
- assertThat(matchResults).containsExactly(true, false, false); // Only exact matches
- assertThat(findResults).containsExactly(true, true, false); // Partial matches too
- }
+ boolean[] bufferResults = pattern.matchAll(buffers);
- @Test
- @DisplayName("findAll(Collection) should work")
- void findAll_collection_works() {
- Pattern pattern = Pattern.compile("\\d+");
- List inputs = Arrays.asList("abc123", "def", "456ghi");
+ assertThat(bufferResults).containsExactly(stringResults);
+ }
- boolean[] results = pattern.findAll(inputs);
+ // ========== findAll(ByteBuffer[]) Tests ==========
- assertThat(results).containsExactly(true, false, true);
- }
+ @Test
+ @DisplayName("findAll(ByteBuffer[]) with DirectByteBuffers should use zero-copy")
+ void findAll_directBuffers_usesZeroCopy() {
+ Pattern pattern = Pattern.compile("test");
+ ByteBuffer[] buffers = {
+ createDirectBuffer("test"), // Full match - finds
+ createDirectBuffer("testing"), // Partial match - finds
+ createDirectBuffer("other") // No match
+ };
- @Test
- @DisplayName("findAll(String[]) with empty array should return empty")
- void findAll_emptyArray_returnsEmpty() {
- Pattern pattern = Pattern.compile("test");
+ boolean[] results = pattern.findAll(buffers);
- boolean[] results = pattern.findAll(new String[0]);
+ assertThat(results).containsExactly(true, true, false);
+ }
- assertThat(results).isEmpty();
- }
+ @Test
+ @DisplayName(
+ "findAll(ByteBuffer[]) should differ from matchAll(ByteBuffer[]) for partial matches")
+ void findAll_vs_matchAll_byteBuffers_differentBehavior() {
+ Pattern pattern = Pattern.compile("test");
+ ByteBuffer[] buffers = {
+ createDirectBuffer("test"), createDirectBuffer("testing"), createDirectBuffer("other")
+ };
- // ========== matchAll(ByteBuffer[]) Tests ==========
+ boolean[] matchResults = pattern.matchAll(buffers); // Full match
+ boolean[] findResults = pattern.findAll(buffers); // Partial match
- @Test
- @DisplayName("matchAll(ByteBuffer[]) with all DirectByteBuffers should use zero-copy")
- void matchAll_allDirectBuffers_usesZeroCopy() {
- Pattern pattern = Pattern.compile("test");
- ByteBuffer[] buffers = {
- createDirectBuffer("test"),
- createDirectBuffer("testing"),
- createDirectBuffer("test")
- };
+ assertThat(matchResults).containsExactly(true, false, false); // Only exact
+ assertThat(findResults).containsExactly(true, true, false); // Includes partial
+ }
- boolean[] results = pattern.matchAll(buffers);
+ @Test
+ @DisplayName("findAll(ByteBuffer[]) with empty array should return empty")
+ void findAll_emptyBufferArray_returnsEmpty() {
+ Pattern pattern = Pattern.compile("test");
- assertThat(results).containsExactly(true, false, true);
- }
+ boolean[] results = pattern.findAll(new ByteBuffer[0]);
- @Test
- @DisplayName("matchAll(ByteBuffer[]) with all heap buffers should convert to String")
- void matchAll_allHeapBuffers_convertsToString() {
- Pattern pattern = Pattern.compile("test");
- ByteBuffer[] buffers = {
- createHeapBuffer("test"),
- createHeapBuffer("testing"),
- createHeapBuffer("test")
- };
+ assertThat(results).isEmpty();
+ }
+
+ // ========== Integration Tests ==========
- boolean[] results = pattern.matchAll(buffers);
+ @Test
+ @DisplayName("ByteBuffer[] bulk should work with Cassandra-like multi-column scenario")
+ void cassandraScenario_bulkByteBufferProcessing() {
+ Pattern emailPattern = Pattern.compile("[a-z]+@[a-z]+\\.[a-z]+");
- assertThat(results).containsExactly(true, false, true);
- }
+ // Simulate Cassandra returning ByteBuffer[] from multiple cells
+ ByteBuffer[] cells = {
+ createDirectBuffer("user@example.com"),
+ createDirectBuffer("invalid"),
+ createDirectBuffer("admin@test.org"),
+ createDirectBuffer("also_invalid")
+ };
- @Test
- @DisplayName("matchAll(ByteBuffer[]) with mixed buffers should convert all to String")
- void matchAll_mixedBuffers_convertsToString() {
- Pattern pattern = Pattern.compile("test");
- ByteBuffer[] buffers = {
- createDirectBuffer("test"), // Direct
- createHeapBuffer("testing"), // Heap - forces String path for all
- createDirectBuffer("test") // Direct
- };
+ boolean[] results = emailPattern.matchAll(cells);
- boolean[] results = pattern.matchAll(buffers);
+ assertThat(results).containsExactly(true, false, true, false);
- assertThat(results).containsExactly(true, false, true);
+ // Count valid emails
+ long validCount = 0;
+ for (boolean result : results) {
+ if (result) validCount++;
}
+ assertThat(validCount).isEqualTo(2);
+ }
- @Test
- @DisplayName("matchAll(ByteBuffer[]) should produce same results as matchAll(String[])")
- void matchAll_byteBufferArray_matchesStringArray() {
- Pattern pattern = Pattern.compile("\\d+");
- String[] strings = {"123", "abc", "456"};
+ // ========== Null Handling ==========
- boolean[] stringResults = pattern.matchAll(strings);
+ @Test
+ @DisplayName("findAll(String[]) should throw on null array")
+ void findAll_nullArray_throws() {
+ Pattern pattern = Pattern.compile("test");
- ByteBuffer[] buffers = {
- createDirectBuffer("123"),
- createDirectBuffer("abc"),
- createDirectBuffer("456")
- };
+ assertThatNullPointerException().isThrownBy(() -> pattern.findAll((String[]) null));
+ }
- boolean[] bufferResults = pattern.matchAll(buffers);
+ @Test
+ @DisplayName("matchAll(ByteBuffer[]) should throw on null array")
+ void matchAll_nullByteBufferArray_throws() {
+ Pattern pattern = Pattern.compile("test");
- assertThat(bufferResults).containsExactly(stringResults);
- }
+ assertThatNullPointerException().isThrownBy(() -> pattern.matchAll((ByteBuffer[]) null));
+ }
- // ========== findAll(ByteBuffer[]) Tests ==========
+ @Test
+ @DisplayName("findAll(ByteBuffer[]) should throw on null array")
+ void findAll_nullByteBufferArray_throws() {
+ Pattern pattern = Pattern.compile("test");
- @Test
- @DisplayName("findAll(ByteBuffer[]) with DirectByteBuffers should use zero-copy")
- void findAll_directBuffers_usesZeroCopy() {
- Pattern pattern = Pattern.compile("test");
- ByteBuffer[] buffers = {
- createDirectBuffer("test"), // Full match - finds
- createDirectBuffer("testing"), // Partial match - finds
- createDirectBuffer("other") // No match
- };
-
- boolean[] results = pattern.findAll(buffers);
-
- assertThat(results).containsExactly(true, true, false);
- }
-
- @Test
- @DisplayName("findAll(ByteBuffer[]) should differ from matchAll(ByteBuffer[]) for partial matches")
- void findAll_vs_matchAll_byteBuffers_differentBehavior() {
- Pattern pattern = Pattern.compile("test");
- ByteBuffer[] buffers = {
- createDirectBuffer("test"),
- createDirectBuffer("testing"),
- createDirectBuffer("other")
- };
-
- boolean[] matchResults = pattern.matchAll(buffers); // Full match
- boolean[] findResults = pattern.findAll(buffers); // Partial match
-
- assertThat(matchResults).containsExactly(true, false, false); // Only exact
- assertThat(findResults).containsExactly(true, true, false); // Includes partial
- }
-
- @Test
- @DisplayName("findAll(ByteBuffer[]) with empty array should return empty")
- void findAll_emptyBufferArray_returnsEmpty() {
- Pattern pattern = Pattern.compile("test");
-
- boolean[] results = pattern.findAll(new ByteBuffer[0]);
-
- assertThat(results).isEmpty();
- }
-
- // ========== Integration Tests ==========
-
- @Test
- @DisplayName("ByteBuffer[] bulk should work with Cassandra-like multi-column scenario")
- void cassandraScenario_bulkByteBufferProcessing() {
- Pattern emailPattern = Pattern.compile("[a-z]+@[a-z]+\\.[a-z]+");
-
- // Simulate Cassandra returning ByteBuffer[] from multiple cells
- ByteBuffer[] cells = {
- createDirectBuffer("user@example.com"),
- createDirectBuffer("invalid"),
- createDirectBuffer("admin@test.org"),
- createDirectBuffer("also_invalid")
- };
-
- boolean[] results = emailPattern.matchAll(cells);
-
- assertThat(results).containsExactly(true, false, true, false);
-
- // Count valid emails
- long validCount = 0;
- for (boolean result : results) {
- if (result) validCount++;
- }
- assertThat(validCount).isEqualTo(2);
- }
-
- // ========== Null Handling ==========
-
- @Test
- @DisplayName("findAll(String[]) should throw on null array")
- void findAll_nullArray_throws() {
- Pattern pattern = Pattern.compile("test");
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.findAll((String[]) null));
- }
-
- @Test
- @DisplayName("matchAll(ByteBuffer[]) should throw on null array")
- void matchAll_nullByteBufferArray_throws() {
- Pattern pattern = Pattern.compile("test");
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.matchAll((ByteBuffer[]) null));
- }
-
- @Test
- @DisplayName("findAll(ByteBuffer[]) should throw on null array")
- void findAll_nullByteBufferArray_throws() {
- Pattern pattern = Pattern.compile("test");
-
- assertThatNullPointerException()
- .isThrownBy(() -> pattern.findAll((ByteBuffer[]) null));
- }
+ assertThatNullPointerException().isThrownBy(() -> pattern.findAll((ByteBuffer[]) null));
+ }
}
diff --git a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java
index 9dec6ea..5028a73 100644
--- a/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java
+++ b/libre2-core/src/integration-test/java/com/axonops/libre2/api/ReplaceOperationsIT.java
@@ -16,302 +16,291 @@
package com.axonops.libre2.api;
-import org.junit.jupiter.api.DisplayName;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.CsvSource;
+import static org.assertj.core.api.Assertions.*;
import java.util.Arrays;
import java.util.List;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
-import static org.assertj.core.api.Assertions.*;
-
-/**
- * Tests for replace operations (replaceFirst, replaceAll, bulk variants).
- */
+/** Tests for replace operations (replaceFirst, replaceAll, bulk variants). */
@DisplayName("Replace Operations")
class ReplaceOperationsIT {
- // ========== replaceFirst() Tests ==========
+ // ========== replaceFirst() Tests ==========
+
+ @Test
+ @DisplayName("replaceFirst should replace first match")
+ void replaceFirst_firstMatch_replaced() {
+ Pattern pattern = Pattern.compile("\\d+");
+ String result = pattern.replaceFirst("Item 123 costs $456", "XXX");
- @Test
- @DisplayName("replaceFirst should replace first match")
- void replaceFirst_firstMatch_replaced() {
- Pattern pattern = Pattern.compile("\\d+");
- String result = pattern.replaceFirst("Item 123 costs $456", "XXX");
+ assertThat(result).isEqualTo("Item XXX costs $456");
+ }
+
+ @Test
+ @DisplayName("replaceFirst should return original if no match")
+ void replaceFirst_noMatch_returnsOriginal() {
+ Pattern pattern = Pattern.compile("\\d+");
+ String result = pattern.replaceFirst("No digits here", "XXX");
- assertThat(result).isEqualTo("Item XXX costs $456");
- }
-
- @Test
- @DisplayName("replaceFirst should return original if no match")
- void replaceFirst_noMatch_returnsOriginal() {
- Pattern pattern = Pattern.compile("\\d+");
- String result = pattern.replaceFirst("No digits here", "XXX");
-
- assertThat(result).isEqualTo("No digits here");
- }
-
- @Test
- @DisplayName("replaceFirst should handle empty replacement")
- void replaceFirst_emptyReplacement_removes() {
- Pattern pattern = Pattern.compile("\\d+");
- String result = pattern.replaceFirst("Item 123", "");
-
- assertThat(result).isEqualTo("Item ");
- }
-
- // ========== replaceAll() Tests ==========
-
- @Test
- @DisplayName("replaceAll should replace all matches")
- void replaceAll_allMatches_replaced() {
- Pattern pattern = Pattern.compile("\\d+");
- String result = pattern.replaceAll("Item 123 costs $456", "XXX");
-
- assertThat(result).isEqualTo("Item XXX costs $XXX");
- }
-
- @Test
- @DisplayName("replaceAll should return original if no matches")
- void replaceAll_noMatches_returnsOriginal() {
- Pattern pattern = Pattern.compile("\\d+");
- String result = pattern.replaceAll("No digits here", "XXX");
-
- assertThat(result).isEqualTo("No digits here");
- }
-
- @Test
- @DisplayName("replaceAll should handle empty replacement")
- void replaceAll_emptyReplacement_removesAll() {
- Pattern pattern = Pattern.compile("\\d+");
- String result = pattern.replaceAll("a1b2c3", "");
-
- assertThat(result).isEqualTo("abc");
- }
-
- @Test
- @DisplayName("replaceAll should redact emails")
- void replaceAll_redactEmails_works() {
- Pattern emailPattern = Pattern.compile("[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}");
- String result = emailPattern.replaceAll("Contact user@example.com or admin@test.org", "[REDACTED]");
-
- assertThat(result).isEqualTo("Contact [REDACTED] or [REDACTED]");
- }
-
- // ========== Backreference Tests ==========
-
- @Test
- @DisplayName("replaceFirst should support backreferences with \\\\1")
- void replaceFirst_backreferences_work() {
- Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
- String result = pattern.replaceFirst("Date: 2025-11-24", "\\2/\\3/\\1");
-
- assertThat(result).isEqualTo("Date: 11/24/2025");
- }
-
- @Test
- @DisplayName("replaceAll should support backreferences")
- void replaceAll_backreferences_work() {
- Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
- String result = pattern.replaceAll("Call 555-1234 or 555-5678", "(\\1) \\2");
-
- assertThat(result).isEqualTo("Call (555) 1234 or (555) 5678");
- }
-
- @Test
- @DisplayName("replaceAll should swap groups with backreferences")
- void replaceAll_swapGroups_works() {
- Pattern pattern = Pattern.compile("([a-z]+)@([a-z]+\\.[a-z]+)");
- String result = pattern.replaceAll("user@example.com", "\\2 (\\1)");
-
- assertThat(result).isEqualTo("example.com (user)");
- }
-
- @Test
- @DisplayName("replaceAll should support multiple backreferences")
- void replaceAll_multipleBackrefs_work() {
- Pattern pattern = Pattern.compile("(\\w+)\\s+(\\w+)\\s+(\\w+)");
- String result = pattern.replaceAll("one two three", "\\3-\\2-\\1");
-
- assertThat(result).isEqualTo("three-two-one");
- }
-
- // ========== Bulk Replace Tests ==========
-
- @Test
- @DisplayName("replaceAll(array) should replace in all strings")
- void replaceAll_array_replacesAll() {
- Pattern ssnPattern = Pattern.compile("\\d{3}-\\d{2}-\\d{4}");
- String[] logs = {
- "User 123-45-6789 logged in",
- "No PII here",
- "SSN: 987-65-4321"
- };
-
- String[] redacted = ssnPattern.replaceAll(logs, "[REDACTED]");
-
- assertThat(redacted).containsExactly(
- "User [REDACTED] logged in",
- "No PII here",
- "SSN: [REDACTED]"
- );
- }
-
- @Test
- @DisplayName("replaceAll(collection) should replace in all strings")
- void replaceAll_collection_replacesAll() {
- Pattern pattern = Pattern.compile("\\d+");
- List inputs = Arrays.asList("a1b2", "c3d4", "no digits");
-
- List results = pattern.replaceAll(inputs, "X");
-
- assertThat(results).containsExactly("aXbX", "cXdX", "no digits");
- }
-
- @Test
- @DisplayName("replaceAll(array) should support backreferences")
- void replaceAll_arrayBackrefs_work() {
- Pattern pattern = Pattern.compile("(\\d{3})-(\\d{4})");
- String[] inputs = {"555-1234", "555-5678"};
-
- String[] results = pattern.replaceAll(inputs, "(\\1) \\2");
-
- assertThat(results).containsExactly("(555) 1234", "(555) 5678");
- }
-
- @Test
- @DisplayName("replaceAll(array) with empty array should return empty")
- void replaceAll_emptyArray_returnsEmpty() {
- Pattern pattern = Pattern.compile("\\d+");
- String[] results = pattern.replaceAll(new String[0], "XXX");
-
- assertThat(results).isEmpty();
- }
-
- @Test
- @DisplayName("replaceAll(collection) with empty collection should return empty")
- void replaceAll_emptyCollection_returnsEmpty() {
- Pattern pattern = Pattern.compile("\\d+");
- List