@@ -75,6 +75,21 @@ class ListResponse(TypedDict):
7575 has_more : bool
7676
7777
78+ # Default limit for paginate_all to prevent context bloat
79+ DEFAULT_PAGINATION_MAX_BYTES = 20_000
80+
81+ # Default HTTP timeout in seconds (to handle slow pagination)
82+ DEFAULT_HTTP_TIMEOUT = 20
83+
84+
85+ class PaginatedResult (TypedDict ):
86+ """Result from paginate_all with size limit protection."""
87+
88+ data : list [dict [str , Any ]]
89+ cursor : str | None # Use this cursor to fetch more results if has_more is True
90+ has_more : bool # True if results were capped by size limit OR more pages exist
91+
92+
7893def is_oauth_enabled () -> bool :
7994 """
8095 Check if OAuth authentication is enabled via environment variable.
@@ -432,7 +447,7 @@ async def _request(self, method: str, endpoint: str, **kwargs) -> Any:
432447
433448 while retry_count <= max_retries :
434449 try :
435- async with httpx .AsyncClient (follow_redirects = True ) as client :
450+ async with httpx .AsyncClient (follow_redirects = True , timeout = DEFAULT_HTTP_TIMEOUT ) as client :
436451 logger .debug (f"Sending { method } request to { url } " )
437452 response = await client .request (method , url , headers = headers , ** kwargs )
438453
@@ -652,7 +667,7 @@ async def _request_list(self, endpoint: str, **kwargs) -> ListResponse:
652667 }
653668 headers .update (kwargs .pop ("headers" , {}))
654669
655- async with httpx .AsyncClient (follow_redirects = True ) as client :
670+ async with httpx .AsyncClient (follow_redirects = True , timeout = DEFAULT_HTTP_TIMEOUT ) as client :
656671 response = await client .get (url , headers = headers , ** kwargs )
657672 response .raise_for_status ()
658673
@@ -677,19 +692,31 @@ async def _request_list(self, endpoint: str, **kwargs) -> ListResponse:
677692 "has_more" : cursor is not None ,
678693 }
679694
680- async def paginate_all (self , endpoint : str , params : dict [str , Any ] | None = None ) -> list [dict [str , Any ]]:
695+ async def paginate_all (
696+ self ,
697+ endpoint : str ,
698+ params : dict [str , Any ] | None = None ,
699+ max_bytes : int = DEFAULT_PAGINATION_MAX_BYTES ,
700+ ) -> PaginatedResult :
681701 """Fetch all pages of results using cursor-based pagination.
682702
703+ Pagination stops when either all data is fetched or the size limit is reached.
704+
683705 Args:
684706 endpoint: API endpoint path
685707 params: Query parameters to include in the request
708+ max_bytes: Maximum total bytes of JSON data to accumulate (default: 20KB).
709+ When this limit is reached, pagination stops and truncated=True is returned.
686710
687711 Returns:
688- List of all items from all pages
712+ PaginatedResult with data, cursor, has_more, truncated info, and total_bytes
689713 """
690714 params = params or {}
691- all_items = []
692- cursor = None
715+ all_items : list [dict [str , Any ]] = []
716+ total_bytes = 0
717+ cursor : str | None = None
718+ truncated = False
719+ has_more = False
693720
694721 logger .debug (f"Starting pagination for endpoint '{ endpoint } ' with initial params: { params } " )
695722
@@ -724,9 +751,26 @@ async def paginate_all(self, endpoint: str, params: dict[str, Any] | None = None
724751 logger .debug ("Received empty response data, stopping pagination" )
725752 break
726753
727- logger .debug (f"Received page with { len (response ['data' ])} items" )
754+ # Calculate size of this page's data
755+ page_bytes = len (json .dumps (response ["data" ]).encode ("utf-8" ))
756+
757+ # Check if adding this page would exceed the limit
758+ if total_bytes + page_bytes > max_bytes and all_items :
759+ # We already have some data, stop here to avoid exceeding limit
760+ logger .warning (
761+ f"Pagination stopped due to size limit: { total_bytes } bytes accumulated, "
762+ f"next page would add { page_bytes } bytes (limit: { max_bytes } bytes)"
763+ )
764+ truncated = True
765+ has_more = True
766+ # Keep the cursor so caller can continue if needed
767+ cursor = response ["cursor" ]
768+ break
769+
770+ logger .debug (f"Received page with { len (response ['data' ])} items ({ page_bytes } bytes)" )
728771 all_items .extend (response ["data" ])
729- logger .debug (f"Total items collected so far: { len (all_items )} " )
772+ total_bytes += page_bytes
773+ logger .debug (f"Total items collected so far: { len (all_items )} ({ total_bytes } bytes)" )
730774
731775 # Check for next cursor
732776 cursor = response ["cursor" ]
@@ -736,8 +780,15 @@ async def paginate_all(self, endpoint: str, params: dict[str, Any] | None = None
736780 logger .debug ("No next cursor found, pagination complete" )
737781 break
738782
739- logger .info (f"Pagination complete for { endpoint } : collected { len (all_items )} total items" )
740- return all_items
783+ logger .info (
784+ f"Pagination complete for { endpoint } : collected { len (all_items )} items "
785+ f"({ total_bytes } bytes, capped={ truncated } )"
786+ )
787+ return {
788+ "data" : all_items ,
789+ "cursor" : cursor ,
790+ "has_more" : has_more or (cursor is not None ),
791+ }
741792
742793 async def create_honeytoken (
743794 self , name : str , description : str = "" , custom_tags : list | None = None
@@ -908,9 +959,8 @@ async def list_incidents(
908959 endpoint = "/incidents/secrets"
909960
910961 if get_all :
911- # When get_all=True, return all items without cursor
912- all_items = await self .paginate_all (endpoint , params )
913- return {"data" : all_items , "cursor" : None , "has_more" : False }
962+ # When get_all=True, return paginated result with truncation metadata
963+ return await self .paginate_all (endpoint , params )
914964
915965 query_string = "&" .join ([f"{ k } ={ v } " for k , v in params .items ()])
916966 if query_string :
@@ -1043,9 +1093,8 @@ async def list_honeytokens(
10431093 endpoint = "/honeytokens"
10441094
10451095 if get_all :
1046- # When get_all=True, return all items without cursor
1047- all_items = await self .paginate_all (endpoint , params )
1048- return {"data" : all_items , "cursor" : None , "has_more" : False }
1096+ # When get_all=True, return paginated result with truncation metadata
1097+ return await self .paginate_all (endpoint , params )
10491098
10501099 query_string = "&" .join ([f"{ k } ={ v } " for k , v in params .items ()])
10511100 if query_string :
@@ -1499,11 +1548,10 @@ async def list_occurrences(
14991548 if with_sources is not None :
15001549 params ["with_sources" ] = str (with_sources ).lower ()
15011550
1502- # If get_all is True, use paginate_all to get all results
1551+ # If get_all is True, use paginate_all to get all results with truncation metadata
15031552 if get_all :
15041553 logger .info ("Getting all occurrences using cursor-based pagination" )
1505- all_items = await self .paginate_all ("occurrences/secrets" , params )
1506- return {"data" : all_items , "cursor" : None , "has_more" : False }
1554+ return await self .paginate_all ("occurrences/secrets" , params )
15071555
15081556 # Otherwise, get a single page
15091557 logger .info (f"Getting occurrences with params: { params } " )
@@ -1613,9 +1661,8 @@ async def list_sources(
16131661 endpoint = "/sources"
16141662
16151663 if get_all :
1616- # When get_all=True, return all items without cursor
1617- all_items = await self .paginate_all (endpoint , params )
1618- return {"data" : all_items , "cursor" : None , "has_more" : False }
1664+ # When get_all=True, return paginated result with truncation metadata
1665+ return await self .paginate_all (endpoint , params )
16191666
16201667 return await self ._request_list (endpoint , params = params )
16211668
0 commit comments