From 83ea6b295019048c9e8169852cf9efd0e2449294 Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Sun, 15 Feb 2026 01:50:08 -0800 Subject: [PATCH 1/2] Fix unbounded TypeAdapter cache causing memory leak in multi-threaded usage The lru_cache wrapping pydantic.TypeAdapter was set to maxsize=None (unbounded). In multi-threaded contexts, pydantic regenerates parameterized generic types with different identities on each call, so the cache grows without bound. This is especially problematic in webserver environments using responses.parse. Setting maxsize=128 bounds the cache and prevents the memory leak while still providing effective caching for the most recently used types. Fixes #2672 --- src/openai/_models.py | 2 +- tests/test_models.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/openai/_models.py b/src/openai/_models.py index 5cca20c6f9..3007fe999f 100644 --- a/src/openai/_models.py +++ b/src/openai/_models.py @@ -799,7 +799,7 @@ class GenericModel(BaseGenericModel, BaseModel): if not PYDANTIC_V1: from pydantic import TypeAdapter as _TypeAdapter - _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter)) + _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=128)(_TypeAdapter)) if TYPE_CHECKING: from pydantic import TypeAdapter diff --git a/tests/test_models.py b/tests/test_models.py index 588869ee35..a3f52fb9bc 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -961,3 +961,18 @@ def __getattr__(self, attr: str) -> Item: ... assert model.a.prop == 1 assert isinstance(model.a, Item) assert model.other == "foo" + + +@pytest.mark.skipif(PYDANTIC_V1, reason="TypeAdapter cache is only used in Pydantic v2") +def test_type_adapter_cache_is_bounded() -> None: + """Regression test for https://github.com/openai/openai-python/issues/2672 + + The TypeAdapter cache must have a bounded maxsize to prevent memory leaks + in multi-threaded environments where parameterized generic types get + regenerated with different identities on each call. + """ + from openai._models import TypeAdapter + + cache_info = TypeAdapter.cache_info() + assert cache_info.maxsize is not None, "TypeAdapter cache maxsize must not be None (unbounded)" + assert cache_info.maxsize > 0, "TypeAdapter cache maxsize must be positive" From 38cb1b024820cd7ed328ddae0019ae79ee151772 Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Thu, 19 Feb 2026 21:59:40 -0800 Subject: [PATCH 2/2] Replace LRU cache with thread-local storage for TypeAdapter caching The LRU cache with maxsize=128 was redundant (default value) and ineffective in multi-threaded environments since each thread creates unique TypeAdapter instances. Switch to threading.local() which naturally prevents memory leaks (cleaned up on thread exit) while providing actual caching benefit within each thread. Co-Authored-By: Claude Opus 4.6 --- src/openai/_models.py | 15 ++++++++++++--- tests/test_models.py | 22 ++++++++++++++-------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/openai/_models.py b/src/openai/_models.py index 3007fe999f..7c6d521197 100644 --- a/src/openai/_models.py +++ b/src/openai/_models.py @@ -3,6 +3,7 @@ import os import inspect import weakref +import threading from typing import ( IO, TYPE_CHECKING, @@ -54,7 +55,6 @@ is_list, is_given, json_safe, - lru_cache, is_mapping, parse_date, coerce_boolean, @@ -799,12 +799,21 @@ class GenericModel(BaseGenericModel, BaseModel): if not PYDANTIC_V1: from pydantic import TypeAdapter as _TypeAdapter - _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=128)(_TypeAdapter)) + _type_adapter_cache: threading.local = threading.local() + + def _get_cached_type_adapter(type_: type[_T]) -> _TypeAdapter[_T]: + cache: dict[type[Any], _TypeAdapter[Any]] = getattr(_type_adapter_cache, "adapters", None) or {} + _type_adapter_cache.adapters = cache + adapter = cache.get(type_) + if adapter is None: + adapter = _TypeAdapter(type_) + cache[type_] = adapter + return adapter if TYPE_CHECKING: from pydantic import TypeAdapter else: - TypeAdapter = _CachedTypeAdapter + TypeAdapter = _get_cached_type_adapter def _validate_non_model_type(*, type_: type[_T], value: object) -> _T: return TypeAdapter(type_).validate_python(value) diff --git a/tests/test_models.py b/tests/test_models.py index a3f52fb9bc..567346171b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -964,15 +964,21 @@ def __getattr__(self, attr: str) -> Item: ... @pytest.mark.skipif(PYDANTIC_V1, reason="TypeAdapter cache is only used in Pydantic v2") -def test_type_adapter_cache_is_bounded() -> None: +def test_type_adapter_cache_is_thread_local() -> None: """Regression test for https://github.com/openai/openai-python/issues/2672 - The TypeAdapter cache must have a bounded maxsize to prevent memory leaks - in multi-threaded environments where parameterized generic types get - regenerated with different identities on each call. + The TypeAdapter cache uses threading.local() to prevent memory leaks + in multi-threaded environments. Each thread maintains its own cache that + is cleaned up when the thread exits. """ - from openai._models import TypeAdapter + import threading - cache_info = TypeAdapter.cache_info() - assert cache_info.maxsize is not None, "TypeAdapter cache maxsize must not be None (unbounded)" - assert cache_info.maxsize > 0, "TypeAdapter cache maxsize must be positive" + from openai._models import TypeAdapter, _type_adapter_cache + + # Verify the cache is thread-local + assert isinstance(_type_adapter_cache, threading.local) + + # Verify TypeAdapter returns a cached instance for the same type + adapter1 = TypeAdapter(int) + adapter2 = TypeAdapter(int) + assert adapter1 is adapter2, "TypeAdapter should return cached instances for the same type"