diff --git a/src/Database/Adapter.php b/src/Database/Adapter.php index 62a8eb7fe..811539aef 100644 --- a/src/Database/Adapter.php +++ b/src/Database/Adapter.php @@ -1442,4 +1442,38 @@ public function enableAlterLocks(bool $enable): self return $this; } + + /** + * Does the adapter support trigram index? + * + * @return bool + */ + abstract public function getSupportForTrigramIndex(): bool; + + /** + * Is PCRE regex supported? + * PCRE (Perl Compatible Regular Expressions) supports \b for word boundaries + * + * @return bool + */ + abstract public function getSupportForPCRERegex(): bool; + + /** + * Is POSIX regex supported? + * POSIX regex uses \y for word boundaries instead of \b + * + * @return bool + */ + abstract public function getSupportForPOSIXRegex(): bool; + + /** + * Is regex supported at all? + * Returns true if either PCRE or POSIX regex is supported + * + * @return bool + */ + public function getSupportForRegex(): bool + { + return $this->getSupportForPCRERegex() || $this->getSupportForPOSIXRegex(); + } } diff --git a/src/Database/Adapter/MariaDB.php b/src/Database/Adapter/MariaDB.php index 2876139f7..2201ecc09 100644 --- a/src/Database/Adapter/MariaDB.php +++ b/src/Database/Adapter/MariaDB.php @@ -2230,4 +2230,19 @@ public function getSupportForAlterLocks(): bool { return true; } + + public function getSupportForTrigramIndex(): bool + { + return false; + } + + public function getSupportForPCRERegex(): bool + { + return true; + } + + public function getSupportForPOSIXRegex(): bool + { + return false; + } } diff --git a/src/Database/Adapter/Mongo.php b/src/Database/Adapter/Mongo.php index 18554f87c..bf31e668a 100644 --- a/src/Database/Adapter/Mongo.php +++ b/src/Database/Adapter/Mongo.php @@ -2476,7 +2476,8 @@ protected function getQueryOperator(string $operator): string Query::TYPE_STARTS_WITH, Query::TYPE_NOT_STARTS_WITH, Query::TYPE_ENDS_WITH, - Query::TYPE_NOT_ENDS_WITH => '$regex', + Query::TYPE_NOT_ENDS_WITH, + Query::TYPE_REGEX => '$regex', Query::TYPE_OR => '$or', Query::TYPE_AND => '$and', Query::TYPE_EXISTS, @@ -2749,6 +2750,26 @@ public function getSupportForGetConnectionId(): bool return false; } + /** + * Is PCRE regex supported? + * + * @return bool + */ + public function getSupportForPCRERegex(): bool + { + return true; + } + + /** + * Is POSIX regex supported? + * + * @return bool + */ + public function getSupportForPOSIXRegex(): bool + { + return false; + } + /** * Is cache fallback supported? * @@ -3230,4 +3251,9 @@ public function getSupportForAlterLocks(): bool { return false; } + + public function getSupportForTrigramIndex(): bool + { + return false; + } } diff --git a/src/Database/Adapter/MySQL.php b/src/Database/Adapter/MySQL.php index 2ff77e9a0..9db1516eb 100644 --- a/src/Database/Adapter/MySQL.php +++ b/src/Database/Adapter/MySQL.php @@ -31,6 +31,9 @@ public function setTimeout(int $milliseconds, string $event = Database::EVENT_AL $this->timeout = $milliseconds; + $pdo = $this->getPDO(); + $pdo->exec("SET GLOBAL regexp_time_limit = {$milliseconds}"); + $this->before($event, 'timeout', function ($sql) use ($milliseconds) { return \preg_replace( pattern: '/SELECT/', @@ -152,6 +155,11 @@ protected function processException(PDOException $e): \Exception return new TimeoutException('Query timed out', $e->getCode(), $e); } + // Regex timeout + if ($e->getCode() === 'HY000' && isset($e->errorInfo[1]) && $e->errorInfo[1] === 3699) { + return new TimeoutException('Query timed out', $e->getCode(), $e); + } + // Functional index dependency if ($e->getCode() === 'HY000' && isset($e->errorInfo[1]) && $e->errorInfo[1] === 3837) { return new DependencyException('Attribute cannot be deleted because it is used in an index', $e->getCode(), $e); diff --git a/src/Database/Adapter/Pool.php b/src/Database/Adapter/Pool.php index 76c98e8b2..1e61004a9 100644 --- a/src/Database/Adapter/Pool.php +++ b/src/Database/Adapter/Pool.php @@ -365,6 +365,21 @@ public function getSupportForFulltextWildcardIndex(): bool return $this->delegate(__FUNCTION__, \func_get_args()); } + public function getSupportForPCRERegex(): bool + { + return $this->delegate(__FUNCTION__, \func_get_args()); + } + + public function getSupportForPOSIXRegex(): bool + { + return $this->delegate(__FUNCTION__, \func_get_args()); + } + + public function getSupportForTrigramIndex(): bool + { + return $this->delegate(__FUNCTION__, \func_get_args()); + } + public function getSupportForCasting(): bool { return $this->delegate(__FUNCTION__, \func_get_args()); diff --git a/src/Database/Adapter/Postgres.php b/src/Database/Adapter/Postgres.php index 86da09a58..e3bf04da4 100644 --- a/src/Database/Adapter/Postgres.php +++ b/src/Database/Adapter/Postgres.php @@ -154,6 +154,7 @@ public function create(string $name): bool // Enable extensions $this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS postgis')->execute(); $this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS vector')->execute(); + $this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS pg_trgm')->execute(); $collation = " CREATE COLLATION IF NOT EXISTS utf8_ci_ai ( @@ -899,9 +900,10 @@ public function createIndex(string $collection, string $id, string $type, array Database::INDEX_SPATIAL, Database::INDEX_HNSW_EUCLIDEAN, Database::INDEX_HNSW_COSINE, - Database::INDEX_HNSW_DOT => 'INDEX', + Database::INDEX_HNSW_DOT, + Database::INDEX_OBJECT, + Database::INDEX_TRIGRAM => 'INDEX', Database::INDEX_UNIQUE => 'UNIQUE INDEX', - Database::INDEX_OBJECT => 'INDEX', default => throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_OBJECT . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT), }; @@ -922,6 +924,11 @@ public function createIndex(string $collection, string $id, string $type, array Database::INDEX_HNSW_COSINE => " USING HNSW ({$attributes} vector_cosine_ops)", Database::INDEX_HNSW_DOT => " USING HNSW ({$attributes} vector_ip_ops)", Database::INDEX_OBJECT => " USING GIN ({$attributes})", + Database::INDEX_TRIGRAM => + " USING GIN (" . implode(', ', array_map( + fn ($attr) => "$attr gin_trgm_ops", + array_map(fn ($attr) => trim($attr), explode(',', $attributes)) + )) . ")", default => " ({$attributes})", }; @@ -2112,6 +2119,21 @@ public function getSupportForVectors(): bool return true; } + public function getSupportForPCRERegex(): bool + { + return false; + } + + public function getSupportForPOSIXRegex(): bool + { + return true; + } + + public function getSupportForTrigramIndex(): bool + { + return true; + } + /** * @return string */ @@ -2120,6 +2142,14 @@ public function getLikeOperator(): string return 'ILIKE'; } + /** + * @return string + */ + public function getRegexOperator(): string + { + return '~'; + } + protected function processException(PDOException $e): \Exception { // Timeout diff --git a/src/Database/Adapter/SQL.php b/src/Database/Adapter/SQL.php index dfd1565ba..26cee75ed 100644 --- a/src/Database/Adapter/SQL.php +++ b/src/Database/Adapter/SQL.php @@ -1794,6 +1794,8 @@ protected function getSQLOperator(string $method): string case Query::TYPE_NOT_ENDS_WITH: case Query::TYPE_NOT_CONTAINS: return $this->getLikeOperator(); + case Query::TYPE_REGEX: + return $this->getRegexOperator(); case Query::TYPE_VECTOR_DOT: case Query::TYPE_VECTOR_COSINE: case Query::TYPE_VECTOR_EUCLIDEAN: @@ -2287,6 +2289,14 @@ public function getLikeOperator(): string return 'LIKE'; } + /** + * @return string + */ + public function getRegexOperator(): string + { + return 'REGEXP'; + } + public function getInternalIndexesKeys(): array { return []; diff --git a/src/Database/Adapter/SQLite.php b/src/Database/Adapter/SQLite.php index a3d31db68..6d00bb90a 100644 --- a/src/Database/Adapter/SQLite.php +++ b/src/Database/Adapter/SQLite.php @@ -1876,4 +1876,26 @@ public function getSupportForAlterLocks(): bool { return false; } + + /** + * Is PCRE regex supported? + * SQLite does not have native REGEXP support - it requires compile-time option or user-defined function + * + * @return bool + */ + public function getSupportForPCRERegex(): bool + { + return false; + } + + /** + * Is POSIX regex supported? + * SQLite does not have native REGEXP support - it requires compile-time option or user-defined function + * + * @return bool + */ + public function getSupportForPOSIXRegex(): bool + { + return false; + } } diff --git a/src/Database/Database.php b/src/Database/Database.php index d5595df38..4f0269021 100644 --- a/src/Database/Database.php +++ b/src/Database/Database.php @@ -85,6 +85,7 @@ class Database public const INDEX_HNSW_EUCLIDEAN = 'hnsw_euclidean'; public const INDEX_HNSW_COSINE = 'hnsw_cosine'; public const INDEX_HNSW_DOT = 'hnsw_dot'; + public const INDEX_TRIGRAM = 'trigram'; // Max limits public const MAX_INT = 2147483647; @@ -1641,6 +1642,11 @@ public function createCollection(string $id, array $attributes = [], array $inde $this->adapter->getSupportForMultipleFulltextIndexes(), $this->adapter->getSupportForIdenticalIndexes(), $this->adapter->getSupportForObject(), + $this->adapter->getSupportForTrigramIndex(), + $this->adapter->getSupportForSpatialAttributes(), + $this->adapter->getSupportForIndex(), + $this->adapter->getSupportForUniqueIndex(), + $this->adapter->getSupportForFulltextIndex(), ); foreach ($indexes as $index) { if (!$validator->isValid($index)) { @@ -2785,7 +2791,12 @@ public function updateAttribute(string $collection, string $id, ?string $type = $this->adapter->getSupportForAttributes(), $this->adapter->getSupportForMultipleFulltextIndexes(), $this->adapter->getSupportForIdenticalIndexes(), - $this->adapter->getSupportForObject() + $this->adapter->getSupportForObject(), + $this->adapter->getSupportForTrigramIndex(), + $this->adapter->getSupportForSpatialAttributes(), + $this->adapter->getSupportForIndex(), + $this->adapter->getSupportForUniqueIndex(), + $this->adapter->getSupportForFulltextIndex(), ); foreach ($indexes as $index) { @@ -3623,52 +3634,6 @@ public function createIndex(string $collection, string $id, string $type, array throw new LimitException('Index limit reached. Cannot create new index.'); } - switch ($type) { - case self::INDEX_KEY: - if (!$this->adapter->getSupportForIndex()) { - throw new DatabaseException('Key index is not supported'); - } - break; - - case self::INDEX_UNIQUE: - if (!$this->adapter->getSupportForUniqueIndex()) { - throw new DatabaseException('Unique index is not supported'); - } - break; - - case self::INDEX_FULLTEXT: - if (!$this->adapter->getSupportForFulltextIndex()) { - throw new DatabaseException('Fulltext index is not supported'); - } - break; - - case self::INDEX_SPATIAL: - if (!$this->adapter->getSupportForSpatialAttributes()) { - throw new DatabaseException('Spatial indexes are not supported'); - } - if (!empty($orders) && !$this->adapter->getSupportForSpatialIndexOrder()) { - throw new DatabaseException('Spatial indexes with explicit orders are not supported. Remove the orders to create this index.'); - } - break; - - case Database::INDEX_HNSW_EUCLIDEAN: - case Database::INDEX_HNSW_COSINE: - case Database::INDEX_HNSW_DOT: - if (!$this->adapter->getSupportForVectors()) { - throw new DatabaseException('Vector indexes are not supported'); - } - break; - - case self::INDEX_OBJECT: - if (!$this->adapter->getSupportForObject()) { - throw new DatabaseException('Object indexes are not supported'); - } - break; - - default: - throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_OBJECT . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT); - } - /** @var array $collectionAttributes */ $collectionAttributes = $collection->getAttribute('attributes', []); $indexAttributesWithTypes = []; @@ -3722,6 +3687,11 @@ public function createIndex(string $collection, string $id, string $type, array $this->adapter->getSupportForMultipleFulltextIndexes(), $this->adapter->getSupportForIdenticalIndexes(), $this->adapter->getSupportForObject(), + $this->adapter->getSupportForTrigramIndex(), + $this->adapter->getSupportForSpatialAttributes(), + $this->adapter->getSupportForIndex(), + $this->adapter->getSupportForUniqueIndex(), + $this->adapter->getSupportForFulltextIndex(), ); if (!$validator->isValid($index)) { throw new IndexException($validator->getDescription()); diff --git a/src/Database/Query.php b/src/Database/Query.php index e8ccdcaa3..c813cd348 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -26,6 +26,7 @@ class Query public const TYPE_NOT_STARTS_WITH = 'notStartsWith'; public const TYPE_ENDS_WITH = 'endsWith'; public const TYPE_NOT_ENDS_WITH = 'notEndsWith'; + public const TYPE_REGEX = 'regex'; public const TYPE_EXISTS = 'exists'; public const TYPE_NOT_EXISTS = 'notExists'; @@ -113,6 +114,7 @@ class Query self::TYPE_CURSOR_BEFORE, self::TYPE_AND, self::TYPE_OR, + self::TYPE_REGEX ]; public const VECTOR_TYPES = [ @@ -1185,6 +1187,18 @@ public static function vectorEuclidean(string $attribute, array $vector): self return new self(self::TYPE_VECTOR_EUCLIDEAN, $attribute, [$vector]); } + /** + * Helper method to create Query with regex method + * + * @param string $attribute + * @param string $pattern + * @return Query + */ + public static function regex(string $attribute, string $pattern): self + { + return new self(self::TYPE_REGEX, $attribute, [$pattern]); + } + /** * Helper method to create Query with exists method * diff --git a/src/Database/Validator/Index.php b/src/Database/Validator/Index.php index 33648feeb..e2fc70a0b 100644 --- a/src/Database/Validator/Index.php +++ b/src/Database/Validator/Index.php @@ -29,6 +29,11 @@ class Index extends Validator * @param bool $supportForMultipleFulltextIndexes * @param bool $supportForIdenticalIndexes * @param bool $supportForObjectIndexes + * @param bool $supportForTrigramIndexes + * @param bool $supportForSpatialIndexes + * @param bool $supportForKeyIndexes + * @param bool $supportForUniqueIndexes + * @param bool $supportForFulltextIndexes * @throws DatabaseException */ public function __construct( @@ -43,7 +48,12 @@ public function __construct( protected bool $supportForAttributes = true, protected bool $supportForMultipleFulltextIndexes = true, protected bool $supportForIdenticalIndexes = true, - protected bool $supportForObjectIndexes = false + protected bool $supportForObjectIndexes = false, + protected bool $supportForTrigramIndexes = false, + protected bool $supportForSpatialIndexes = false, + protected bool $supportForKeyIndexes = true, + protected bool $supportForUniqueIndexes = true, + protected bool $supportForFulltextIndexes = true, ) { foreach ($attributes as $attribute) { $key = \strtolower($attribute->getAttribute('key', $attribute->getAttribute('$id'))); @@ -98,6 +108,9 @@ public function isArray(): bool */ public function isValid($value): bool { + if (!$this->checkValidIndex($value)) { + return false; + } if (!$this->checkValidAttributes($value)) { return false; } @@ -137,6 +150,82 @@ public function isValid($value): bool if (!$this->checkObjectIndexes($value)) { return false; } + if (!$this->checkTrigramIndexes($value)) { + return false; + } + if (!$this->checkKeyUniqueFulltextSupport($value)) { + return false; + } + return true; + } + + /** + * @param Document $index + * @return bool + */ + public function checkValidIndex(Document $index): bool + { + $type = $index->getAttribute('type'); + switch ($type) { + case Database::INDEX_KEY: + if (!$this->supportForKeyIndexes) { + $this->message = 'Key index is not supported'; + return false; + } + break; + + case Database::INDEX_UNIQUE: + if (!$this->supportForUniqueIndexes) { + $this->message = 'Unique index is not supported'; + return false; + } + break; + + case Database::INDEX_FULLTEXT: + if (!$this->supportForFulltextIndexes) { + $this->message = 'Fulltext index is not supported'; + return false; + } + break; + + case Database::INDEX_SPATIAL: + if (!$this->supportForSpatialIndexes) { + $this->message = 'Spatial indexes are not supported'; + return false; + } + if (!empty($index->getAttribute('orders')) && !$this->supportForSpatialIndexOrder) { + $this->message = 'Spatial indexes with explicit orders are not supported. Remove the orders to create this index.'; + return false; + } + break; + + case Database::INDEX_HNSW_EUCLIDEAN: + case Database::INDEX_HNSW_COSINE: + case Database::INDEX_HNSW_DOT: + if (!$this->supportForVectorIndexes) { + $this->message = 'Vector indexes are not supported'; + return false; + } + break; + + case Database::INDEX_OBJECT: + if (!$this->supportForObjectIndexes) { + $this->message = 'Object indexes are not supported'; + return false; + } + break; + + case Database::INDEX_TRIGRAM: + if (!$this->supportForTrigramIndexes) { + $this->message = 'Trigram indexes are not supported'; + return false; + } + break; + + default: + $this->message = 'Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_OBJECT . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT . ', '.Database::INDEX_TRIGRAM; + return false; + } return true; } @@ -357,6 +446,11 @@ public function checkSpatialIndexes(Document $index): bool return true; } + if ($this->supportForSpatialIndexes === false) { + $this->message = 'Spatial indexes are not supported'; + return false; + } + $attributes = $index->getAttribute('attributes', []); $orders = $index->getAttribute('orders', []); @@ -462,6 +556,65 @@ public function checkVectorIndexes(Document $index): bool return true; } + /** + * @param Document $index + * @return bool + * @throws DatabaseException + */ + public function checkTrigramIndexes(Document $index): bool + { + $type = $index->getAttribute('type'); + + if ($type !== Database::INDEX_TRIGRAM) { + return true; + } + + if ($this->supportForTrigramIndexes === false) { + $this->message = 'Trigram indexes are not supported'; + return false; + } + + $attributes = $index->getAttribute('attributes', []); + + foreach ($attributes as $attributeName) { + $attribute = $this->attributes[\strtolower($attributeName)] ?? new Document(); + if ($attribute->getAttribute('type', '') !== Database::VAR_STRING) { + $this->message = 'Trigram index can only be created on string type attributes'; + return false; + } + } + + $orders = $index->getAttribute('orders', []); + $lengths = $index->getAttribute('lengths', []); + if (!empty($orders) || \count(\array_filter($lengths)) > 0) { + $this->message = 'Trigram indexes do not support orders or lengths'; + return false; + } + + return true; + } + + /** + * @param Document $index + * @return bool + */ + public function checkKeyUniqueFulltextSupport(Document $index): bool + { + $type = $index->getAttribute('type'); + + if ($type === Database::INDEX_KEY && $this->supportForKeyIndexes === false) { + $this->message = 'Key index is not supported'; + return false; + } + + if ($type === Database::INDEX_UNIQUE && $this->supportForUniqueIndexes === false) { + $this->message = 'Unique index is not supported'; + return false; + } + + return true; + } + /** * @param Document $index * @return bool diff --git a/src/Database/Validator/Queries.php b/src/Database/Validator/Queries.php index 22017692a..79e4a62ab 100644 --- a/src/Database/Validator/Queries.php +++ b/src/Database/Validator/Queries.php @@ -122,6 +122,7 @@ public function isValid($value): bool Query::TYPE_VECTOR_DOT, Query::TYPE_VECTOR_COSINE, Query::TYPE_VECTOR_EUCLIDEAN, + Query::TYPE_REGEX, Query::TYPE_EXISTS, Query::TYPE_NOT_EXISTS => Base::METHOD_TYPE_FILTER, default => '', diff --git a/src/Database/Validator/Query/Filter.php b/src/Database/Validator/Query/Filter.php index e62fc3913..74dfa999d 100644 --- a/src/Database/Validator/Query/Filter.php +++ b/src/Database/Validator/Query/Filter.php @@ -342,6 +342,7 @@ public function isValid($value): bool case Query::TYPE_NOT_STARTS_WITH: case Query::TYPE_ENDS_WITH: case Query::TYPE_NOT_ENDS_WITH: + case Query::TYPE_REGEX: if (count($value->getValues()) != 1) { $this->message = \ucfirst($method) . ' queries require exactly one value.'; return false; diff --git a/tests/e2e/Adapter/Scopes/DocumentTests.php b/tests/e2e/Adapter/Scopes/DocumentTests.php index 151a5ae26..df31b9595 100644 --- a/tests/e2e/Adapter/Scopes/DocumentTests.php +++ b/tests/e2e/Adapter/Scopes/DocumentTests.php @@ -3,6 +3,7 @@ namespace Tests\E2E\Adapter\Scopes; use Exception; +use PDOException; use Throwable; use Utopia\Database\Adapter\SQL; use Utopia\Database\Database; @@ -14,6 +15,7 @@ use Utopia\Database\Exception\Duplicate as DuplicateException; use Utopia\Database\Exception\Limit as LimitException; use Utopia\Database\Exception\Structure as StructureException; +use Utopia\Database\Exception\Timeout as TimeoutException; use Utopia\Database\Exception\Type as TypeException; use Utopia\Database\Helpers\ID; use Utopia\Database\Helpers\Permission; @@ -6549,4 +6551,880 @@ public function testUpsertWithJSONFilters(): void // Cleanup $database->deleteCollection($collection); } + + public function testFindRegex(): void + { + Authorization::setRole(Role::any()->toString()); + + /** @var Database $database */ + $database = static::getDatabase(); + + // Skip test if regex is not supported + if (!$database->getAdapter()->getSupportForRegex()) { + $this->expectNotToPerformAssertions(); + return; + } + + // Determine regex support type + $supportsPCRE = $database->getAdapter()->getSupportForPCRERegex(); + $supportsPOSIX = $database->getAdapter()->getSupportForPOSIXRegex(); + + // Determine word boundary pattern based on support + $wordBoundaryPattern = null; + $wordBoundaryPatternPHP = null; + if ($supportsPCRE) { + $wordBoundaryPattern = '\\b'; // PCRE uses \b + $wordBoundaryPatternPHP = '\\b'; // PHP preg_match uses \b + } elseif ($supportsPOSIX) { + $wordBoundaryPattern = '\\y'; // POSIX uses \y + $wordBoundaryPatternPHP = '\\b'; // PHP preg_match still uses \b for verification + } + + $database->createCollection('moviesRegex', permissions: [ + Permission::create(Role::any()), + Permission::read(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ]); + + if ($database->getAdapter()->getSupportForAttributes()) { + $this->assertEquals(true, $database->createAttribute('moviesRegex', 'name', Database::VAR_STRING, 128, true)); + $this->assertEquals(true, $database->createAttribute('moviesRegex', 'director', Database::VAR_STRING, 128, true)); + $this->assertEquals(true, $database->createAttribute('moviesRegex', 'year', Database::VAR_INTEGER, 0, true)); + } + + if ($database->getAdapter()->getSupportForTrigramIndex()) { + $database->createIndex('moviesRegex', 'trigram_name', Database::INDEX_TRIGRAM, ['name']); + $database->createIndex('moviesRegex', 'trigram_director', Database::INDEX_TRIGRAM, ['director']); + } + + // Create test documents + $database->createDocuments('moviesRegex', [ + new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Frozen', + 'director' => 'Chris Buck & Jennifer Lee', + 'year' => 2013, + ]), + new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Frozen II', + 'director' => 'Chris Buck & Jennifer Lee', + 'year' => 2019, + ]), + new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Captain America: The First Avenger', + 'director' => 'Joe Johnston', + 'year' => 2011, + ]), + new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Captain Marvel', + 'director' => 'Anna Boden & Ryan Fleck', + 'year' => 2019, + ]), + new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Work in Progress', + 'director' => 'TBD', + 'year' => 2025, + ]), + new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Work in Progress 2', + 'director' => 'TBD', + 'year' => 2026, + ]), + ]); + + // Helper function to verify regex query completeness + $verifyRegexQuery = function (string $attribute, string $regexPattern, array $queryResults) use ($database) { + // Convert database regex pattern to PHP regex format. + // POSIX-style word boundary (\y) is not supported by PHP PCRE, so map it to \b. + $normalizedPattern = str_replace('\y', '\b', $regexPattern); + $phpPattern = '/' . str_replace('/', '\/', $normalizedPattern) . '/'; + + // Get all documents to manually verify + $allDocuments = $database->find('moviesRegex'); + + // Manually filter documents that match the pattern + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $value = $doc->getAttribute($attribute); + if (preg_match($phpPattern, $value)) { + $expectedMatches[] = $doc->getId(); + } + } + + // Get IDs from query results + $actualMatches = array_map(fn ($doc) => $doc->getId(), $queryResults); + + // Verify no extra documents are returned + foreach ($queryResults as $doc) { + $value = $doc->getAttribute($attribute); + $this->assertTrue( + (bool) preg_match($phpPattern, $value), + "Document '{$doc->getId()}' with {$attribute}='{$value}' should match pattern '{$regexPattern}'" + ); + } + + // Verify all expected documents are returned (no missing) + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern '{$regexPattern}' on attribute '{$attribute}'" + ); + }; + + // Test basic regex pattern - match movies starting with 'Captain' + // Note: Pattern format may vary by adapter (MongoDB uses regex strings, SQL uses REGEXP) + $pattern = '/^Captain/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '^Captain'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', '^Captain', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Captain America: The First Avenger', $names)); + $this->assertTrue(in_array('Captain Marvel', $names)); + + // Test regex pattern - match movies containing 'Frozen' + $pattern = '/Frozen/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'Frozen'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', 'Frozen', $documents); + + // Test regex pattern - match exact title 'Frozen' + $exactFrozenDocuments = $database->find('moviesRegex', [ + Query::regex('name', '^Frozen$'), + ]); + $verifyRegexQuery('name', '^Frozen$', $exactFrozenDocuments); + $this->assertCount(1, $exactFrozenDocuments, 'Exact ^Frozen$ regex should return only one document'); + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Frozen', $names)); + $this->assertTrue(in_array('Frozen II', $names)); + + // Test regex pattern - match movies ending with 'Marvel' + $pattern = '/Marvel$/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'Marvel$'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', 'Marvel$', $documents); + + $this->assertEquals(1, count($documents)); // Only Captain Marvel + $this->assertEquals('Captain Marvel', $documents[0]->getAttribute('name')); + + // Test regex pattern - match movies with 'Work' in the name + $pattern = '/.*Work.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '.*Work.*'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', '.*Work.*', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Work in Progress', $names)); + $this->assertTrue(in_array('Work in Progress 2', $names)); + + // Test regex pattern - match movies with 'Buck' in director + $pattern = '/.*Buck.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('director', '.*Buck.*'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('director', '.*Buck.*', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Frozen', $names)); + $this->assertTrue(in_array('Frozen II', $names)); + + // Test regex with case pattern - adapters may be case-sensitive or case-insensitive + // MySQL/MariaDB REGEXP is case-insensitive by default, MongoDB is case-sensitive + $patternCaseSensitive = '/captain/'; + $patternCaseInsensitive = '/captain/i'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'captain'), // lowercase + ]); + + // Verify all returned documents match the pattern (case-insensitive check for verification) + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + // Verify that returned documents contain 'captain' (case-insensitive check) + $this->assertTrue( + (bool) preg_match($patternCaseInsensitive, $name), + "Document '{$name}' should match pattern 'captain' (case-insensitive check)" + ); + } + + // Verify completeness: Check what the database actually returns + // Some adapters (MongoDB) are case-sensitive, others (MySQL/MariaDB) are case-insensitive + // We'll determine expected matches based on case-sensitive matching (pure regex behavior) + // If the adapter is case-insensitive, it will return more documents, which is fine + $allDocuments = $database->find('moviesRegex'); + $expectedMatchesCaseSensitive = []; + $expectedMatchesCaseInsensitive = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($patternCaseSensitive, $name)) { + $expectedMatchesCaseSensitive[] = $doc->getId(); + } + if (preg_match($patternCaseInsensitive, $name)) { + $expectedMatchesCaseInsensitive[] = $doc->getId(); + } + } + + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($actualMatches); + + // The database might be case-sensitive (MongoDB) or case-insensitive (MySQL/MariaDB) + // Check which one matches the actual results + sort($expectedMatchesCaseSensitive); + sort($expectedMatchesCaseInsensitive); + + // Verify that actual results match either case-sensitive or case-insensitive expectations + $matchesCaseSensitive = ($expectedMatchesCaseSensitive === $actualMatches); + $matchesCaseInsensitive = ($expectedMatchesCaseInsensitive === $actualMatches); + + $this->assertTrue( + $matchesCaseSensitive || $matchesCaseInsensitive, + "Query results should match either case-sensitive (" . count($expectedMatchesCaseSensitive) . " docs) or case-insensitive (" . count($expectedMatchesCaseInsensitive) . " docs) expectations. Got " . count($actualMatches) . " documents." + ); + + // Test regex with case-insensitive pattern (if adapter supports it via flags) + // Test with uppercase to verify case sensitivity + $pattern = '/Captain/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'Captain'), // uppercase + ]); + + // Verify all returned documents match the pattern + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $this->assertTrue( + (bool) preg_match($pattern, $name), + "Document '{$name}' should match pattern 'Captain'" + ); + } + + // Verify completeness + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($pattern, $name)) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern 'Captain'" + ); + + // Test regex combined with other queries + $pattern = '/^Captain/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '^Captain'), + Query::greaterThan('year', 2010), + ]); + + // Verify all returned documents match both conditions + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $year = $doc->getAttribute('year'); + $this->assertTrue( + (bool) preg_match($pattern, $name), + "Document '{$name}' should match pattern '{$pattern}'" + ); + $this->assertGreaterThan(2010, $year, "Document '{$name}' should have year > 2010"); + } + + // Verify completeness: manually check all documents that match both conditions + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + $year = $doc->getAttribute('year'); + if (preg_match($pattern, $name) && $year > 2010) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching both regex '^Captain' and year > 2010" + ); + + // Test regex with limit + $pattern = '/.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '.*'), // Match all + Query::limit(3), + ]); + + $this->assertEquals(3, count($documents)); + + // Verify all returned documents match the pattern (should match all) + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $this->assertTrue( + (bool) preg_match($pattern, $name), + "Document '{$name}' should match pattern '{$pattern}'" + ); + } + + // Note: With limit, we can't verify completeness, but we can verify all returned match + + // Test regex with non-matching pattern + $pattern = '/^NonExistentPattern$/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '^NonExistentPattern$'), + ]); + + $this->assertEquals(0, count($documents)); + + // Verify no documents match (double-check by getting all and filtering) + $allDocuments = $database->find('moviesRegex'); + $matchingCount = 0; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($pattern, $name)) { + $matchingCount++; + } + } + $this->assertEquals(0, $matchingCount, "No documents should match pattern '{$pattern}'"); + + // Verify completeness: no documents should be returned + $this->assertEquals([], array_map(fn ($doc) => $doc->getId(), $documents)); + + // Test regex with special characters (should be escaped or handled properly) + $pattern = '/.*:.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '.*:.*'), // Match movies with colon + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', '.*:.*', $documents); + + // Verify expected document is included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Captain America: The First Avenger', $names)); + + // ReDOS safety: ensure pathological patterns respond quickly and do not hang + $catastrophicPattern = '(a+)+$'; + $start = microtime(true); + $redosDocs = $database->find('moviesRegex', [ + Query::regex('name', $catastrophicPattern), + ]); + $elapsed = microtime(true) - $start; + $this->assertLessThan(1.0, $elapsed, 'Regex evaluation should not be slow or vulnerable to ReDOS'); + $verifyRegexQuery('name', $catastrophicPattern, $redosDocs); + $this->assertCount(0, $redosDocs, 'Pathological regex should not match any movie titles'); + + // Test regex search pattern - match movies with word boundaries + // Only test if word boundaries are supported (PCRE or POSIX) + if ($wordBoundaryPattern !== null) { + $dbPattern = $wordBoundaryPattern . 'Work' . $wordBoundaryPattern; + $phpPattern = '/' . $wordBoundaryPatternPHP . 'Work' . $wordBoundaryPatternPHP . '/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', $dbPattern), + ]); + + // Verify all returned documents match the pattern + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $this->assertTrue( + (bool) preg_match($phpPattern, $name), + "Document '{$name}' should match pattern '{$dbPattern}'" + ); + } + + // Verify completeness: manually check all documents + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($phpPattern, $name)) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern '{$dbPattern}'" + ); + } + + // Test regex search with multiple patterns - match movies containing 'Captain' or 'Frozen' + $pattern1 = '/Captain/'; + $pattern2 = '/Frozen/'; + $documents = $database->find('moviesRegex', [ + Query::or([ + Query::regex('name', 'Captain'), + Query::regex('name', 'Frozen'), + ]), + ]); + + // Verify all returned documents match at least one pattern + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $matchesPattern1 = (bool) preg_match($pattern1, $name); + $matchesPattern2 = (bool) preg_match($pattern2, $name); + $this->assertTrue( + $matchesPattern1 || $matchesPattern2, + "Document '{$name}' should match either pattern 'Captain' or 'Frozen'" + ); + } + + // Verify completeness: manually check all documents + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($pattern1, $name) || preg_match($pattern2, $name)) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern 'Captain' OR 'Frozen'" + ); + $database->deleteCollection('moviesRegex'); + } + public function testRegexInjection(): void + { + Authorization::setRole(Role::any()->toString()); + + /** @var Database $database */ + $database = static::getDatabase(); + + // Skip test if regex is not supported + if (!$database->getAdapter()->getSupportForRegex()) { + $this->expectNotToPerformAssertions(); + return; + } + + $collectionName = 'injectionTest'; + $database->createCollection($collectionName, permissions: [ + Permission::create(Role::any()), + Permission::read(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ]); + + if ($database->getAdapter()->getSupportForAttributes()) { + $this->assertEquals(true, $database->createAttribute($collectionName, 'text', Database::VAR_STRING, 1000, true)); + } + + // Create test documents - one that should match, one that shouldn't + $database->createDocument($collectionName, new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'text' => 'target', + ])); + + $database->createDocument($collectionName, new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'text' => 'other', + ])); + + // SQL injection attempts - these should NOT return the "other" document + $sqlInjectionPatterns = [ + "target') OR '1'='1", // SQL injection attempt + "target' OR 1=1--", // SQL injection with comment + "target' OR 'x'='x", // SQL injection attempt + "target' UNION SELECT *--", // SQL UNION injection + ]; + + // MongoDB injection attempts - these should NOT return the "other" document + $mongoInjectionPatterns = [ + 'target" || "1"=="1', // MongoDB injection attempt + 'target" || true', // MongoDB boolean injection + 'target"} || {"text": "other"}', // MongoDB operator injection + ]; + + $allInjectionPatterns = array_merge($sqlInjectionPatterns, $mongoInjectionPatterns); + + foreach ($allInjectionPatterns as $pattern) { + try { + $results = $database->find($collectionName, [ + Query::regex('text', $pattern), + ]); + + // Critical check: if injection succeeded, we might get the "other" document + // which should NOT match a pattern starting with "target" + $foundOther = false; + foreach ($results as $doc) { + $text = $doc->getAttribute('text'); + if ($text === 'other') { + $foundOther = true; + + // Verify that "other" doesn't actually match the pattern as a regex + $matches = @preg_match('/' . str_replace('/', '\/', $pattern) . '/', $text); + if ($matches === 0 || $matches === false) { + // "other" doesn't match the pattern but was returned + // This indicates potential injection vulnerability + $this->fail( + "Potential injection detected: Pattern '{$pattern}' returned document 'other' " . + "which doesn't match the pattern. This suggests SQL/MongoDB injection may have succeeded." + ); + } + } + } + + // Additional verification: check that all returned documents actually match the pattern + foreach ($results as $doc) { + $text = $doc->getAttribute('text'); + $matches = @preg_match('/' . str_replace('/', '\/', $pattern) . '/', $text); + + // If pattern is invalid, skip validation + if ($matches === false) { + continue; + } + + // If document doesn't match but was returned, it's suspicious + if ($matches === 0) { + $this->fail( + "Potential injection: Document '{$text}' was returned for pattern '{$pattern}' " . + "but doesn't match the regex pattern." + ); + } + } + + } catch (\Exception $e) { + // Exceptions are acceptable - they indicate the injection was blocked or caused an error + // This is actually good - it means the system rejected the malicious pattern + $this->assertInstanceOf(\Exception::class, $e); + } + } + + // Test that legitimate regex patterns still work correctly + $legitimatePatterns = [ + 'target', // Should match "target" + '^target', // Should match "target" (anchored) + 'other', // Should match "other" + ]; + + foreach ($legitimatePatterns as $pattern) { + try { + $results = $database->find($collectionName, [ + Query::regex('text', $pattern), + ]); + + $this->assertIsArray($results); + + // Verify each result actually matches + foreach ($results as $doc) { + $text = $doc->getAttribute('text'); + $matches = @preg_match('/' . str_replace('/', '\/', $pattern) . '/', $text); + if ($matches !== false) { + $this->assertEquals( + 1, + $matches, + "Document '{$text}' should match pattern '{$pattern}'" + ); + } + } + } catch (\Exception $e) { + $this->fail("Legitimate pattern '{$pattern}' should not throw exception: " . $e->getMessage()); + } + } + + // Cleanup + $database->deleteCollection($collectionName); + } + + /** + * Test ReDoS (Regular Expression Denial of Service) with timeout protection + * This test verifies that ReDoS patterns either timeout properly or complete quickly, + * preventing denial of service attacks. + */ + public function testRegexRedos(): void + { + Authorization::setRole(Role::any()->toString()); + + /** @var Database $database */ + $database = static::getDatabase(); + + // Skip test if regex is not supported + if (!$database->getAdapter()->getSupportForRegex()) { + $this->expectNotToPerformAssertions(); + return; + } + + $collectionName = 'redosTimeoutTest'; + $database->createCollection($collectionName, permissions: [ + Permission::create(Role::any()), + Permission::read(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ]); + + if ($database->getAdapter()->getSupportForAttributes()) { + $this->assertEquals(true, $database->createAttribute($collectionName, 'text', Database::VAR_STRING, 1000, true)); + } + + // Create documents with strings designed to trigger ReDoS + // These strings have many 'a's but end with 'c' instead of 'b' + // This causes catastrophic backtracking with patterns like (a+)+b + $redosStrings = []; + for ($i = 15; $i <= 35; $i += 5) { + $redosStrings[] = str_repeat('a', $i) . 'c'; + } + + // Also add some normal strings + $normalStrings = [ + 'normal text', + 'another string', + 'test123', + 'valid data', + ]; + + $documents = []; + foreach ($redosStrings as $text) { + $documents[] = new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'text' => $text, + ]); + } + + foreach ($normalStrings as $text) { + $documents[] = new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'text' => $text, + ]); + } + + $database->createDocuments($collectionName, $documents); + + // ReDoS patterns that cause exponential backtracking + $redosPatterns = [ + '(a+)+b', // Classic ReDoS: nested quantifiers + '(a|a)*b', // Alternation with quantifier + '(a+)+$', // Anchored pattern + '(a*)*b', // Nested star quantifiers + '(a+)+b+', // Multiple nested quantifiers + '(.+)+b', // Generic nested quantifiers + '(.*)+b', // Generic nested quantifiers + ]; + + $supportsTimeout = $database->getAdapter()->getSupportForTimeouts(); + + if ($supportsTimeout) { + $database->setTimeout(2000); + } + + foreach ($redosPatterns as $pattern) { + $startTime = microtime(true); + + try { + $results = $database->find($collectionName, [ + Query::regex('text', $pattern), + ]); + $elapsed = microtime(true) - $startTime; + // If timeout is supported, the query should either: + // 1. Complete quickly (< 3 seconds) if ReDoS is mitigated + // 2. Throw TimeoutException if it takes too long + if ($supportsTimeout) { + // If we got here without timeout, it should have completed quickly + $this->assertLessThan( + 3.0, + $elapsed, + "Regex pattern '{$pattern}' should complete quickly or timeout. Took {$elapsed}s" + ); + } else { + // Without timeout support, we just check it doesn't hang forever + // Set a reasonable upper bound (15 seconds) for systems without timeout + $this->assertLessThan( + 15.0, + $elapsed, + "Regex pattern '{$pattern}' should not cause excessive delay. Took {$elapsed}s" + ); + } + + // Verify results: none of our ReDoS strings should match these patterns + // (they all end with 'c', not 'b') + foreach ($results as $doc) { + $text = $doc->getAttribute('text'); + // If it matched, verify it's actually a valid match + $matches = @preg_match('/' . str_replace('/', '\/', $pattern) . '/', $text); + if ($matches !== false) { + $this->assertEquals( + 1, + $matches, + "Document with text '{$text}' should actually match pattern '{$pattern}'" + ); + } + } + + } catch (TimeoutException $e) { + // Timeout is expected for ReDoS patterns if not properly mitigated + $elapsed = microtime(true) - $startTime; + $this->assertInstanceOf( + TimeoutException::class, + $e, + "Regex pattern '{$pattern}' should timeout if it causes ReDoS. Elapsed: {$elapsed}s" + ); + + // Timeout should happen within reasonable time (not immediately, but not too late) + // Fast timeouts are actually good - they mean the system is protecting itself quickly + $this->assertGreaterThan( + 0.05, + $elapsed, + "Timeout should occur after some minimal processing time" + ); + + // Timeout should happen before the timeout limit (with some buffer) + if ($supportsTimeout) { + $this->assertLessThan( + 5.0, + $elapsed, + "Timeout should occur within reasonable time (before 5 seconds)" + ); + } + + } catch (\Exception $e) { + // Check if this is a query interruption/timeout from MySQL (error 1317) + // MySQL sometimes throws "Query execution was interrupted" instead of TimeoutException + $message = $e->getMessage(); + $isQueryInterrupted = false; + + // Check message for interruption keywords + if (strpos($message, 'Query execution was interrupted') !== false || + strpos($message, 'interrupted') !== false) { + $isQueryInterrupted = true; + } + + // Check if it's a PDOException with error code 1317 + if ($e instanceof PDOException) { + $errorInfo = $e->errorInfo ?? []; + // Error 1317 is "Query execution was interrupted" + if (isset($errorInfo[1]) && $errorInfo[1] === 1317) { + $isQueryInterrupted = true; + } + // Also check SQLSTATE 70100 + if ($e->getCode() === '70100') { + $isQueryInterrupted = true; + } + } + + if ($isQueryInterrupted) { + // This is effectively a timeout - MySQL interrupted the query + $elapsed = microtime(true) - $startTime; + $this->assertGreaterThan( + 0.05, + $elapsed, + "Query interruption should occur after some minimal processing time" + ); + // This is acceptable - the query was interrupted due to timeout + continue; + } + + // Other exceptions are unexpected + $this->fail("Unexpected exception for pattern '{$pattern}': " . get_class($e) . " - " . $e->getMessage()); + } + } + + // Test with a pattern that should match quickly (not ReDoS) + $safePattern = 'normal'; + $startTime = microtime(true); + $results = $database->find($collectionName, [ + Query::regex('text', $safePattern), + ]); + $elapsed = microtime(true) - $startTime; + + // Safe patterns should complete very quickly + $this->assertLessThan(1.0, $elapsed, 'Safe regex pattern should complete quickly'); + $this->assertGreaterThan(0, count($results), 'Safe pattern should match some documents'); + + // Verify safe pattern results are correct + foreach ($results as $doc) { + $text = $doc->getAttribute('text'); + $this->assertStringContainsString('normal', $text, "Document '{$text}' should contain 'normal'"); + } + + // Cleanup + if ($supportsTimeout) { + $database->clearTimeout(); + } + $database->deleteCollection($collectionName); + } } diff --git a/tests/e2e/Adapter/Scopes/IndexTests.php b/tests/e2e/Adapter/Scopes/IndexTests.php index 77f276cd6..e5eda16d0 100644 --- a/tests/e2e/Adapter/Scopes/IndexTests.php +++ b/tests/e2e/Adapter/Scopes/IndexTests.php @@ -173,7 +173,13 @@ public function testIndexValidation(): void $database->getAdapter()->getSupportForVectors(), $database->getAdapter()->getSupportForAttributes(), $database->getAdapter()->getSupportForMultipleFulltextIndexes(), - $database->getAdapter()->getSupportForIdenticalIndexes() + $database->getAdapter()->getSupportForIdenticalIndexes(), + $database->getAdapter()->getSupportForObject(), + $database->getAdapter()->getSupportForTrigramIndex(), + $database->getAdapter()->getSupportForSpatialAttributes(), + $database->getAdapter()->getSupportForIndex(), + $database->getAdapter()->getSupportForUniqueIndex(), + $database->getAdapter()->getSupportForFulltextIndex() ); if ($database->getAdapter()->getSupportForIdenticalIndexes()) { $errorMessage = 'Index length 701 is larger than the size for title1: 700"'; @@ -264,12 +270,20 @@ public function testIndexValidation(): void $database->getAdapter()->getSupportForVectors(), $database->getAdapter()->getSupportForAttributes(), $database->getAdapter()->getSupportForMultipleFulltextIndexes(), - $database->getAdapter()->getSupportForIdenticalIndexes() + $database->getAdapter()->getSupportForIdenticalIndexes(), + $database->getAdapter()->getSupportForObject(), + $database->getAdapter()->getSupportForTrigramIndex(), + $database->getAdapter()->getSupportForSpatialAttributes(), + $database->getAdapter()->getSupportForIndex(), + $database->getAdapter()->getSupportForUniqueIndex(), + $database->getAdapter()->getSupportForFulltextIndex() ); $this->assertFalse($validator->isValid($newIndex)); - if (!$database->getAdapter()->getSupportForMultipleFulltextIndexes()) { + if (!$database->getAdapter()->getSupportForFulltextIndex()) { + $this->assertEquals('Fulltext index is not supported', $validator->getDescription()); + } elseif (!$database->getAdapter()->getSupportForMultipleFulltextIndexes()) { $this->assertEquals('There is already a fulltext index in the collection', $validator->getDescription()); } elseif ($database->getAdapter()->getSupportForAttributes()) { $this->assertEquals('Attribute "integer" cannot be part of a fulltext index, must be of type string', $validator->getDescription()); @@ -281,7 +295,11 @@ public function testIndexValidation(): void $this->fail('Failed to throw exception'); } } catch (Exception $e) { - $this->assertEquals('Attribute "integer" cannot be part of a fulltext index, must be of type string', $e->getMessage()); + if (!$database->getAdapter()->getSupportForFulltextIndex()) { + $this->assertEquals('Fulltext index is not supported', $e->getMessage()); + } else { + $this->assertEquals('Attribute "integer" cannot be part of a fulltext index, must be of type string', $e->getMessage()); + } } @@ -644,4 +662,126 @@ public function testIdenticalIndexValidation(): void $database->deleteCollection($collectionId); } } + + public function testTrigramIndex(): void + { + $trigramSupport = $this->getDatabase()->getAdapter()->getSupportForTrigramIndex(); + if (!$trigramSupport) { + $this->expectNotToPerformAssertions(); + return; + } + + /** @var Database $database */ + $database = static::getDatabase(); + + $collectionId = 'trigram_test'; + try { + $database->createCollection($collectionId); + + $database->createAttribute($collectionId, 'name', Database::VAR_STRING, 256, false); + $database->createAttribute($collectionId, 'description', Database::VAR_STRING, 512, false); + + // Create trigram index on name attribute + $this->assertEquals(true, $database->createIndex($collectionId, 'trigram_name', Database::INDEX_TRIGRAM, ['name'])); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $this->assertCount(1, $indexes); + $this->assertEquals('trigram_name', $indexes[0]['$id']); + $this->assertEquals(Database::INDEX_TRIGRAM, $indexes[0]['type']); + $this->assertEquals(['name'], $indexes[0]['attributes']); + + // Create another trigram index on description + $this->assertEquals(true, $database->createIndex($collectionId, 'trigram_description', Database::INDEX_TRIGRAM, ['description'])); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $this->assertCount(2, $indexes); + + // Test that trigram index can be deleted + $this->assertEquals(true, $database->deleteIndex($collectionId, 'trigram_name')); + $this->assertEquals(true, $database->deleteIndex($collectionId, 'trigram_description')); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $this->assertCount(0, $indexes); + + } finally { + // Clean up + $database->deleteCollection($collectionId); + } + } + + public function testTrigramIndexValidation(): void + { + $trigramSupport = $this->getDatabase()->getAdapter()->getSupportForTrigramIndex(); + if (!$trigramSupport) { + $this->expectNotToPerformAssertions(); + return; + } + + /** @var Database $database */ + $database = static::getDatabase(); + + $collectionId = 'trigram_validation_test'; + try { + $database->createCollection($collectionId); + + $database->createAttribute($collectionId, 'name', Database::VAR_STRING, 256, false); + $database->createAttribute($collectionId, 'description', Database::VAR_STRING, 412, false); + $database->createAttribute($collectionId, 'age', Database::VAR_INTEGER, 8, false); + + // Test: Trigram index on non-string attribute should fail + try { + $database->createIndex($collectionId, 'trigram_invalid', Database::INDEX_TRIGRAM, ['age']); + $this->fail('Expected exception when creating trigram index on non-string attribute'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $e->getMessage()); + } + + // Test: Trigram index with multiple string attributes should succeed + $this->assertEquals(true, $database->createIndex($collectionId, 'trigram_multi', Database::INDEX_TRIGRAM, ['name', 'description'])); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $trigramMultiIndex = null; + foreach ($indexes as $idx) { + if ($idx['$id'] === 'trigram_multi') { + $trigramMultiIndex = $idx; + break; + } + } + $this->assertNotNull($trigramMultiIndex); + $this->assertEquals(Database::INDEX_TRIGRAM, $trigramMultiIndex['type']); + $this->assertEquals(['name', 'description'], $trigramMultiIndex['attributes']); + + // Test: Trigram index with mixed string and non-string attributes should fail + try { + $database->createIndex($collectionId, 'trigram_mixed', Database::INDEX_TRIGRAM, ['name', 'age']); + $this->fail('Expected exception when creating trigram index with mixed attribute types'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $e->getMessage()); + } + + // Test: Trigram index with orders should fail + try { + $database->createIndex($collectionId, 'trigram_order', Database::INDEX_TRIGRAM, ['name'], [], [Database::ORDER_ASC]); + $this->fail('Expected exception when creating trigram index with orders'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $e->getMessage()); + } + + // Test: Trigram index with lengths should fail + try { + $database->createIndex($collectionId, 'trigram_length', Database::INDEX_TRIGRAM, ['name'], [128]); + $this->fail('Expected exception when creating trigram index with lengths'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $e->getMessage()); + } + + } finally { + // Clean up + $database->deleteCollection($collectionId); + } + } } diff --git a/tests/unit/Validator/IndexTest.php b/tests/unit/Validator/IndexTest.php index 608a65d2b..5dfe80e4e 100644 --- a/tests/unit/Validator/IndexTest.php +++ b/tests/unit/Validator/IndexTest.php @@ -477,4 +477,123 @@ public function testIndexWithNoAttributeSupport(): void $index = $collection->getAttribute('indexes')[0]; $this->assertTrue($validator->isValid($index)); } + + /** + * @throws Exception + */ + public function testTrigramIndexValidation(): void + { + $collection = new Document([ + '$id' => ID::custom('test'), + 'name' => 'test', + 'attributes' => [ + new Document([ + '$id' => ID::custom('name'), + 'type' => Database::VAR_STRING, + 'format' => '', + 'size' => 255, + 'signed' => true, + 'required' => false, + 'default' => null, + 'array' => false, + 'filters' => [], + ]), + new Document([ + '$id' => ID::custom('description'), + 'type' => Database::VAR_STRING, + 'format' => '', + 'size' => 512, + 'signed' => true, + 'required' => false, + 'default' => null, + 'array' => false, + 'filters' => [], + ]), + new Document([ + '$id' => ID::custom('age'), + 'type' => Database::VAR_INTEGER, + 'format' => '', + 'size' => 0, + 'signed' => true, + 'required' => false, + 'default' => null, + 'array' => false, + 'filters' => [], + ]), + ], + 'indexes' => [] + ]); + + // Validator with supportForTrigramIndexes enabled + $validator = new Index($collection->getAttribute('attributes'), $collection->getAttribute('indexes', []), 768, [], false, false, false, false, false, false, false, false, supportForTrigramIndexes: true); + + // Valid: Trigram index on single VAR_STRING attribute + $validIndex = new Document([ + '$id' => ID::custom('idx_trigram_valid'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertTrue($validator->isValid($validIndex)); + + // Valid: Trigram index on multiple string attributes + $validIndexMulti = new Document([ + '$id' => ID::custom('idx_trigram_multi_valid'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name', 'description'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertTrue($validator->isValid($validIndexMulti)); + + // Invalid: Trigram index on non-string attribute + $invalidIndexType = new Document([ + '$id' => ID::custom('idx_trigram_invalid_type'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['age'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertFalse($validator->isValid($invalidIndexType)); + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $validator->getDescription()); + + // Invalid: Trigram index with mixed string and non-string attributes + $invalidIndexMixed = new Document([ + '$id' => ID::custom('idx_trigram_mixed'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name', 'age'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertFalse($validator->isValid($invalidIndexMixed)); + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $validator->getDescription()); + + // Invalid: Trigram index with orders + $invalidIndexOrder = new Document([ + '$id' => ID::custom('idx_trigram_order'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name'], + 'lengths' => [], + 'orders' => ['asc'], + ]); + $this->assertFalse($validator->isValid($invalidIndexOrder)); + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $validator->getDescription()); + + // Invalid: Trigram index with lengths + $invalidIndexLength = new Document([ + '$id' => ID::custom('idx_trigram_length'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name'], + 'lengths' => [128], + 'orders' => [], + ]); + $this->assertFalse($validator->isValid($invalidIndexLength)); + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $validator->getDescription()); + + // Validator with supportForTrigramIndexes disabled should reject trigram + $validatorNoSupport = new Index($collection->getAttribute('attributes'), $collection->getAttribute('indexes', []), 768, [], false, false, false, false, false, false, false, false, false); + $this->assertFalse($validatorNoSupport->isValid($validIndex)); + $this->assertEquals('Trigram indexes are not supported', $validatorNoSupport->getDescription()); + } }