From 83ea6b295019048c9e8169852cf9efd0e2449294 Mon Sep 17 00:00:00 2001
From: Varun Chawla <varun_6april@hotmail.com>
Date: Sun, 15 Feb 2026 01:50:08 -0800
Subject: [PATCH 1/2] Fix unbounded TypeAdapter cache causing memory leak in
 multi-threaded usage

The lru_cache wrapping pydantic.TypeAdapter was set to maxsize=None
(unbounded). In multi-threaded contexts, pydantic regenerates
parameterized generic types with different identities on each call,
so the cache grows without bound. This is especially problematic in
webserver environments using responses.parse.

Setting maxsize=128 bounds the cache and prevents the memory leak
while still providing effective caching for the most recently used types.

Fixes #2672
---
 src/openai/_models.py |  2 +-
 tests/test_models.py  | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/openai/_models.py b/src/openai/_models.py
index 5cca20c6f9..3007fe999f 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -799,7 +799,7 @@ class GenericModel(BaseGenericModel, BaseModel):
 if not PYDANTIC_V1:
     from pydantic import TypeAdapter as _TypeAdapter
 
-    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter))
+    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=128)(_TypeAdapter))
 
     if TYPE_CHECKING:
         from pydantic import TypeAdapter
diff --git a/tests/test_models.py b/tests/test_models.py
index 588869ee35..a3f52fb9bc 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -961,3 +961,18 @@ def __getattr__(self, attr: str) -> Item: ...
     assert model.a.prop == 1
     assert isinstance(model.a, Item)
     assert model.other == "foo"
+
+
+@pytest.mark.skipif(PYDANTIC_V1, reason="TypeAdapter cache is only used in Pydantic v2")
+def test_type_adapter_cache_is_bounded() -> None:
+    """Regression test for https://github.com/openai/openai-python/issues/2672
+
+    The TypeAdapter cache must have a bounded maxsize to prevent memory leaks
+    in multi-threaded environments where parameterized generic types get
+    regenerated with different identities on each call.
+    """
+    from openai._models import TypeAdapter
+
+    cache_info = TypeAdapter.cache_info()
+    assert cache_info.maxsize is not None, "TypeAdapter cache maxsize must not be None (unbounded)"
+    assert cache_info.maxsize > 0, "TypeAdapter cache maxsize must be positive"

From 38cb1b024820cd7ed328ddae0019ae79ee151772 Mon Sep 17 00:00:00 2001
From: Varun Chawla <varun_6april@hotmail.com>
Date: Thu, 19 Feb 2026 21:59:40 -0800
Subject: [PATCH 2/2] Replace LRU cache with thread-local storage for
 TypeAdapter caching

The LRU cache with maxsize=128 was redundant (default value) and
ineffective in multi-threaded environments since each thread creates
unique TypeAdapter instances. Switch to threading.local() which
naturally prevents memory leaks (cleaned up on thread exit) while
providing actual caching benefit within each thread.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/openai/_models.py | 15 ++++++++++++---
 tests/test_models.py  | 22 ++++++++++++++--------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/openai/_models.py b/src/openai/_models.py
index 3007fe999f..7c6d521197 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -3,6 +3,7 @@
 import os
 import inspect
 import weakref
+import threading
 from typing import (
     IO,
     TYPE_CHECKING,
@@ -54,7 +55,6 @@
     is_list,
     is_given,
     json_safe,
-    lru_cache,
     is_mapping,
     parse_date,
     coerce_boolean,
@@ -799,12 +799,21 @@ class GenericModel(BaseGenericModel, BaseModel):
 if not PYDANTIC_V1:
     from pydantic import TypeAdapter as _TypeAdapter
 
-    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=128)(_TypeAdapter))
+    _type_adapter_cache: threading.local = threading.local()
+
+    def _get_cached_type_adapter(type_: type[_T]) -> _TypeAdapter[_T]:
+        cache: dict[type[Any], _TypeAdapter[Any]] = getattr(_type_adapter_cache, "adapters", None) or {}
+        _type_adapter_cache.adapters = cache
+        adapter = cache.get(type_)
+        if adapter is None:
+            adapter = _TypeAdapter(type_)
+            cache[type_] = adapter
+        return adapter
 
     if TYPE_CHECKING:
         from pydantic import TypeAdapter
     else:
-        TypeAdapter = _CachedTypeAdapter
+        TypeAdapter = _get_cached_type_adapter
 
     def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
         return TypeAdapter(type_).validate_python(value)
diff --git a/tests/test_models.py b/tests/test_models.py
index a3f52fb9bc..567346171b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -964,15 +964,21 @@ def __getattr__(self, attr: str) -> Item: ...
 
 
 @pytest.mark.skipif(PYDANTIC_V1, reason="TypeAdapter cache is only used in Pydantic v2")
-def test_type_adapter_cache_is_bounded() -> None:
+def test_type_adapter_cache_is_thread_local() -> None:
     """Regression test for https://github.com/openai/openai-python/issues/2672
 
-    The TypeAdapter cache must have a bounded maxsize to prevent memory leaks
-    in multi-threaded environments where parameterized generic types get
-    regenerated with different identities on each call.
+    The TypeAdapter cache uses threading.local() to prevent memory leaks
+    in multi-threaded environments. Each thread maintains its own cache that
+    is cleaned up when the thread exits.
     """
-    from openai._models import TypeAdapter
+    import threading
 
-    cache_info = TypeAdapter.cache_info()
-    assert cache_info.maxsize is not None, "TypeAdapter cache maxsize must not be None (unbounded)"
-    assert cache_info.maxsize > 0, "TypeAdapter cache maxsize must be positive"
+    from openai._models import TypeAdapter, _type_adapter_cache
+
+    # Verify the cache is thread-local
+    assert isinstance(_type_adapter_cache, threading.local)
+
+    # Verify TypeAdapter returns a cached instance for the same type
+    adapter1 = TypeAdapter(int)
+    adapter2 = TypeAdapter(int)
+    assert adapter1 is adapter2, "TypeAdapter should return cached instances for the same type"