From 060c3720d2752fdd968b801e2b5e6b69e5feaf2e Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 6 Jan 2026 08:08:09 +0000 Subject: [PATCH] Fix pasing of regex literals --- CHANGELOG.md | 6 ++++++ docs/syntax.md | 13 +++++++++++++ jsonpath/__about__.py | 2 +- jsonpath/lex.py | 2 +- tests/cts | 2 +- tests/regex_operator.json | 26 +++++++++++++++++++++++++- tests/test_issues.py | 16 ++++++++++++++++ 7 files changed, 63 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 686f0a0..5ea2795 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Python JSONPath Change Log +## Version 2.0.2 (unreleased) + +**Fixes** + +- Fixed parsing of non-standard JSONPath regular expression literals containing an escaped solidus (`/`). This affected queries using the regex operator `=~`, like `$.some[?(@.thing =~ /fo\/[a-z]/)]`, not standard `match` and `search` functions. See [#124](https://github.com/jg-rp/python-jsonpath/issues/124). + ## Version 2.0.1 **Fixes** diff --git a/docs/syntax.md b/docs/syntax.md index 50ba773..9ebf749 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -616,6 +616,19 @@ list-literal = "[" S literal *(S "," S literal) S "]" $..products[?(@.description =~ /.*trainers/i)] ``` +You can escape a solidus (`/`) with a reverse solidus (`\`). + +``` +$.some[?(@.thing =~ /fo\/[a-z]/)] +``` + +As a Python string literal, you'd need to double escape the reverse solidus or use a raw string literal. + +```python +query = r"$.some[?(@.thing =~ /fo\/[a-z]/)]" +query = "$.some[?(@.thing =~ /fo\\/[a-z]/)]" +``` + ### Union and intersection operators The union or concatenation operator, `|`, combines matches from two or more paths. diff --git a/jsonpath/__about__.py b/jsonpath/__about__.py index 2658d58..e856d07 100644 --- a/jsonpath/__about__.py +++ b/jsonpath/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2023-present James Prior # # SPDX-License-Identifier: MIT -__version__ = "2.0.1" +__version__ = "2.0.2" diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 7737c90..75f48a7 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -113,7 +113,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: ) # /pattern/ or /pattern/flags - self.re_pattern = r"/(?P.+?)/(?P[aims]*)" + self.re_pattern = r"/(?P(?:(?!(?[aims]*)" # func( self.function_pattern = r"(?P[a-z][a-z_0-9]+)(?P\()" diff --git a/tests/cts b/tests/cts index 13b60f1..b9d7153 160000 --- a/tests/cts +++ b/tests/cts @@ -1 +1 @@ -Subproject commit 13b60f1749e49b591dbbcf62fed8cd67f9aee13d +Subproject commit b9d7153e58711ad38bb8e35ece69c13f4b2f7d63 diff --git a/tests/regex_operator.json b/tests/regex_operator.json index 4816304..196cfc4 100644 --- a/tests/regex_operator.json +++ b/tests/regex_operator.json @@ -25,12 +25,36 @@ "tags": ["extra"] }, { - "name": "regex literal, escaped slash", + "name": "regex literal, escaped backslash", "selector": "$.some[?(@.thing =~ /fo\\\\[a-z]/)]", "document": { "some": [{ "thing": "fo\\b" }] }, "result": [{ "thing": "fo\\b" }], "result_paths": ["$['some'][0]"], "tags": ["extra"] + }, + { + "name": "regex literal, escaped slash", + "selector": "$.some[?(@.thing =~ /fo\\/[a-z]/)]", + "document": { "some": [{ "thing": "fo/b" }] }, + "result": [{ "thing": "fo/b" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + }, + { + "name": "regex literal, escaped asterisk", + "selector": "$.some[?(@.thing =~ /fo\\*[a-z]/)]", + "document": { "some": [{ "thing": "fo*b" }] }, + "result": [{ "thing": "fo*b" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + }, + { + "name": "regex literal, escaped dot", + "selector": "$.some[?(@.thing =~ /fo\\.[a-z]/)]", + "document": { "some": [{ "thing": "fo.b" }] }, + "result": [{ "thing": "fo.b" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] } ] } diff --git a/tests/test_issues.py b/tests/test_issues.py index c88f7f4..cfc2ad0 100644 --- a/tests/test_issues.py +++ b/tests/test_issues.py @@ -115,3 +115,19 @@ def test_issue_117() -> None: data = {"foo": ["bar", "baz"]} with pytest.raises(JSONPatchError): patch.apply(data) + + +def test_issue_124() -> None: + query_raw = r"$[?@type =~ /studio\/material\/.*/]" + query = "$[?@type =~ /studio\\/material\\/.*/]" + + data = [ + {"type": "studio/material/a"}, + {"type": "studio/material/b"}, + {"type": "studio foo"}, + ] + + want = [{"type": "studio/material/a"}, {"type": "studio/material/b"}] + + assert findall(query, data) == want + assert findall(query_raw, data) == want