Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Change Logs
===========

0.8.12
++++++

0.9.0
+++++

* :pr:`403`: update the serialization of SlidingWindowCache to include parameter slidinw_window, patch for sdpa_mask
* :pr:`400`, :pr:`401`:, :pr:`402`: improves InputObserver (investigations), add it the documentation
* :pr:`399`: update CI

Expand Down
22 changes: 17 additions & 5 deletions _doc/final/plot_export_gemma3_tiny_input_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import pandas
import torch
from onnx_diagnostic import doc
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.export.api import to_onnx
Expand All @@ -25,9 +26,10 @@
pipe = pipeline(
"image-text-to-text",
model=model_id,
device="cuda",
device="cpu",
trust_remote_code=True,
max_new_tokens=3,
dtype=torch.float16,
)
messages = [
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
Expand All @@ -50,7 +52,9 @@

# %%
# Captures inputs and outputs for the model.
observer = InputObserver()
observer = InputObserver(
missing=dict(pixel_values=torch.empty((0, 3, 896, 896), dtype=torch.float16))
)
with (
register_additional_serialization_functions(patch_transformers=True),
observer(pipe.model),
Expand All @@ -76,7 +80,7 @@


filename = "plot_export_gemma3_tiny_input_observer.onnx"
with torch_export_patches(patch_transformers=True):
with torch_export_patches(patch_transformers=True, patch_torch=True, stop_if_static=2):
to_onnx(
pipe.model,
args=(),
Expand All @@ -88,8 +92,16 @@

# %%
# Let's measure the discrepancies.
data = observer.check_discrepancies(filename, progress_bar=True)
print(pandas.DataFrame(data))
data = observer.check_discrepancies(filename, progress_bar=True, atol=1e-2, include_io=True)
df = pandas.DataFrame(data)
df.to_excel("plot_export_gemma3_tiny_input_observer.xlsx")
print(df)

# %%
# Let's show the errors.
for row in data:
if not row["SUCCESS"] and "error" in row:
print(row["error"])


# %%
Expand Down
2 changes: 1 addition & 1 deletion _doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ The function replaces dynamic dimensions defined as strings by
Older versions
==============

* `0.8.12 <../v0.8.12/index.html>`_
* `0.9.0 <../v0.9.0/index.html>`_
* `0.8.11 <../v0.8.11/index.html>`_
* `0.7.16 <../v0.7.16/index.html>`_
* `0.6.3 <../v0.6.3/index.html>`_
Expand Down
22 changes: 22 additions & 0 deletions _unittests/ut_helpers/test_cache_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,28 @@ def test_make_dynamic_cache_2_types(self):
)
self.assertEqual(0, max_diff(cache, cache)["abs"])

@requires_transformers("4.57")
def test_make_dynamic_cache_2_types_kwargs(self):
cache = make_dynamic_cache(
[
(torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
(torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
],
cls_layers=[
transformers.cache_utils.DynamicLayer,
transformers.cache_utils.DynamicSlidingWindowLayer,
],
cls_kwargs=[{}, dict(sliding_window=12)],
)
text = self.string_type(cache, with_shape=True)
self.assertEqual(
"DynamicCache(DynamicLayer(T1s4x5x6x7, T1s4x5x6x7), "
"DynamicSlidingWindowLayer(T1s4x5x6x7, T1s4x5x6x7))",
text,
)
self.assertEqual(0, max_diff(cache, cache)["abs"])
self.assertEqual(cache.layers[1].sliding_window, 12)

@requires_transformers("4.57")
def test_unflatten_flatten_mixed_layers(self):
with torch_export_patches(patch_transformers=True):
Expand Down
80 changes: 60 additions & 20 deletions _unittests/ut_investigate/test_input_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ def forward(self, x, y, z=None, w=None):
exporter="custom",
filename=proto_name,
)
data = observer.check_discrepancies(proto_name, progress_bar=False)
data = observer.check_discrepancies(proto_name, progress_bar=False, include_io=True)
df = pandas.DataFrame(data)
self.assertLess(df["abs"].max(), 1e-5)

Expand Down Expand Up @@ -878,25 +878,65 @@ def forward(self, x=None, y=None):
# self.assertEqual(2, len(args))
# self.assertEqual(len([v for v in args.values() if v is not None]), 2)

def test_infer_dynamic_shapes_exception(self):
"""
dict(input_ids:T7s1x282,
pixel_values:T1s1x3x896x896,
attention_mask:T7s1x282,
position_ids:T7s1x282,
token_type_ids:T7s1x282,cache_position:T7s282
)
dict(input_ids:T7s1x1,attention_mask:T7s1x283,position_ids:T7s1x1,
past_key_values:DynamicCache(
DynamicSlidingWindowLayer(T16s1x1x282x32, T16s1x1x282x32),
DynamicLayer(T16s1x1x282x32, T16s1x1x282x32)),
token_type_ids:T7s1x1,cache_position:T7s1)
dict(input_ids:T7s1x1,attention_mask:T7s1x284,position_ids:T7s1x1,
past_key_values:DynamicCache(
DynamicSlidingWindowLayer(T16s1x1x283x32, T16s1x1x283x32),
DynamicLayer(T16s1x1x283x32, T16s1x1x283x32)),
token_type_ids:T7s1x1,cache_position:T7s1)
"""
def test_infer_dynamic_shapes_missing(self):
class Model(torch.nn.Module):
def forward(
self,
input_ids=None,
pixel_values=None,
attention_mask=None,
position_ids=None,
past_key_values=None,
token_type_ids=None,
cache_position=None,
):
return input_ids

inputs = [
dict(
input_ids=torch.ones((1, 282), dtype=torch.int64),
pixel_values=torch.ones((1, 3, 896, 896), dtype=torch.int64),
attention_mask=torch.ones((1, 282), dtype=torch.int64),
position_ids=torch.ones((1, 282), dtype=torch.int64),
token_type_ids=torch.ones((1, 282), dtype=torch.int64),
cache_position=torch.ones((282,), dtype=torch.int64),
),
dict(
input_ids=torch.ones((1, 1), dtype=torch.int64),
attention_mask=torch.ones((1, 283), dtype=torch.int64),
position_ids=torch.ones((1, 1), dtype=torch.int64),
past_key_values=torch.rand((1, 1, 282, 32)),
token_type_ids=torch.ones((1, 1), dtype=torch.int64),
cache_position=torch.ones((1,), dtype=torch.int64),
),
dict(
input_ids=torch.ones((1, 1), dtype=torch.int64),
attention_mask=torch.ones((1, 284), dtype=torch.int64),
position_ids=torch.ones((1, 1), dtype=torch.int64),
past_key_values=torch.rand((1, 1, 283, 32)),
token_type_ids=torch.ones((1, 1), dtype=torch.int64),
cache_position=torch.ones((1,), dtype=torch.int64),
),
]

model = Model()
observer = InputObserver(missing=dict(pixel_values=torch.empty((0, 3, 896, 896))))
with observer(model):
for kwargs in inputs:
model(**kwargs)

shapes = observer.infer_dynamic_shapes(set_batch_dimension_for=True)
cst = torch.export.Dim.DYNAMIC
expected = {
"input_ids": {0: cst, 1: cst},
"pixel_values": {0: cst},
"attention_mask": {0: cst, 1: cst},
"position_ids": {0: cst, 1: cst},
"past_key_values": {0: cst, 2: cst},
"token_type_ids": {0: cst, 1: cst},
"cache_position": {0: cst},
}
self.assertEqual(expected, shapes)


if __name__ == "__main__":
Expand Down
89 changes: 89 additions & 0 deletions _unittests/ut_investigate/test_input_observer_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,95 @@ def forward(self, cache):
args["cache"].cross_attention_cache.layers[0].keys.shape, (1, 6, 1500, 64)
)

@requires_transformers("4.57")
def test_infer_dynamic_shapes_missing_pixels(self):
import transformers

class Model(torch.nn.Module):
def forward(
self,
input_ids=None,
pixel_values=None,
attention_mask=None,
position_ids=None,
past_key_values=None,
token_type_ids=None,
cache_position=None,
):
return input_ids

inputs = [
dict(
input_ids=torch.ones((1, 282), dtype=torch.int64),
pixel_values=torch.ones((1, 3, 896, 896), dtype=torch.int64),
attention_mask=torch.ones((1, 282), dtype=torch.int64),
position_ids=torch.ones((1, 282), dtype=torch.int64),
token_type_ids=torch.ones((1, 282), dtype=torch.int64),
cache_position=torch.ones((282,), dtype=torch.int64),
),
dict(
input_ids=torch.ones((1, 1), dtype=torch.int64),
attention_mask=torch.ones((1, 283), dtype=torch.int64),
position_ids=torch.ones((1, 1), dtype=torch.int64),
past_key_values=make_dynamic_cache(
[
(torch.rand((1, 1, 282, 32)), torch.rand((1, 1, 282, 32))),
(torch.rand((1, 1, 282, 32)), torch.rand((1, 1, 282, 32))),
],
cls_layers=[
transformers.cache_utils.DynamicSlidingWindowLayer,
transformers.cache_utils.DynamicLayer,
],
),
token_type_ids=torch.ones((1, 1), dtype=torch.int64),
cache_position=torch.ones((1,), dtype=torch.int64),
),
dict(
input_ids=torch.ones((1, 1), dtype=torch.int64),
attention_mask=torch.ones((1, 284), dtype=torch.int64),
position_ids=torch.ones((1, 1), dtype=torch.int64),
past_key_values=make_dynamic_cache(
[
(torch.rand((1, 1, 283, 32)), torch.rand((1, 1, 283, 32))),
(torch.rand((1, 1, 283, 32)), torch.rand((1, 1, 283, 32))),
],
cls_layers=[
transformers.cache_utils.DynamicSlidingWindowLayer,
transformers.cache_utils.DynamicLayer,
],
),
token_type_ids=torch.ones((1, 1), dtype=torch.int64),
cache_position=torch.ones((1,), dtype=torch.int64),
),
]

model = Model()
observer = InputObserver(missing=dict(pixel_values=torch.empty((0, 3, 896, 896))))
with (
register_additional_serialization_functions(patch_transformers=True),
observer(model),
):
for kwargs in inputs:
model(**kwargs)

shapes = observer.infer_dynamic_shapes(set_batch_dimension_for=True)
cst = torch.export.Dim.DYNAMIC
expected = {
"input_ids": {0: cst, 1: cst},
"pixel_values": {0: cst},
"attention_mask": {0: cst, 1: cst},
"position_ids": {0: cst, 1: cst},
"past_key_values": [
{0: cst, 2: cst},
{0: cst, 2: cst},
{0: cst, 2: cst},
{0: cst, 2: cst},
],
"token_type_ids": {0: cst, 1: cst},
"cache_position": {0: cst},
}
self.assertEqual(expected, shapes)


if __name__ == "__main__":
unittest.main(verbosity=2)
Original file line number Diff line number Diff line change
Expand Up @@ -275,13 +275,19 @@ def test_sliding_window_cache_flatten(self):
@unittest.skipIf(make_sliding_window_cache, "transformers<5")
def test_sliding_window_cache_flatten5(self):
cache = make_dynamic_cache(
[(torch.rand((4, 4, 4, 4)), torch.rand((4, 4, 4, 4)))],
[
(torch.rand((4, 4, 4, 4)), torch.rand((4, 4, 4, 4))),
(torch.rand((4, 4, 4, 4)), torch.rand((4, 4, 4, 4))),
],
cls_layers="DynamicSlidingWindowLayer",
cls_kwargs=[dict(sliding_window=11), dict(sliding_window=12)],
)
self.assertEqual(cache.layers[0].sliding_window, 11)
self.assertEqual(cache.layers[1].sliding_window, 12)
with torch_export_patches(patch_transformers=True):
flat, _spec = torch.utils._pytree.tree_flatten(cache)
self.assertEqual(
"#2[T1s4x4x4x4,T1s4x4x4x4]",
"#4[T1s4x4x4x4,T1s4x4x4x4,T1s4x4x4x4,T1s4x4x4x4]",
self.string_type(flat, with_shape=True),
)
cache2 = torch.utils._pytree.tree_unflatten(flat, _spec)
Expand All @@ -292,6 +298,8 @@ def test_sliding_window_cache_flatten5(self):
self.assertEqual(
[type(lay) for lay in cache.layers], [type(lay) for lay in cache2.layers]
)
self.assertEqual(cache2.layers[0].sliding_window, 11)
self.assertEqual(cache2.layers[1].sliding_window, 12)

@ignore_warnings(UserWarning)
@requires_torch("2.7.99")
Expand Down
34 changes: 34 additions & 0 deletions _unittests/ut_torch_export_patches/test_patch_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,40 @@ def test_sdpa_mask_recent_torch(self):
got = patched_sdpa_mask_recent_torch(**kwargs)
self.assertEqualArray(expected, got)

@requires_transformers("4.99")
def test_sdpa_mask_patched(self):
sdpa_mask = transformers.masking_utils.sdpa_mask
patched_sdpa_mask = patch_transformers.patched_sdpa_mask
kwargs = {
"batch_size": 1,
"cache_position": torch.tensor([3], dtype=torch.int64),
"kv_length": 4,
"kv_offset": 0,
"mask_function": transformers.masking_utils.causal_mask_function,
"attention_mask": torch.tensor([[True, True, True, True]]),
"local_size": None,
"allow_is_causal_skip": True,
"allow_is_bidirectional_skip": False,
}
expected = sdpa_mask(**kwargs)
got = patched_sdpa_mask(**kwargs)
self.assertEqual(expected, got)

kwargs = {
"batch_size": 1,
"cache_position": torch.tensor([3], dtype=torch.int64),
"kv_length": 4,
"kv_offset": 0,
"mask_function": transformers.masking_utils.causal_mask_function,
"attention_mask": torch.tensor([[True, True, True, True]]),
"local_size": None,
"allow_is_causal_skip": False,
"allow_is_bidirectional_skip": False,
}
expected = sdpa_mask(**kwargs)
got = patched_sdpa_mask(**kwargs)
self.assertEqualArray(expected, got)

@requires_transformers("4.99")
def test_sdpa_mask_recent_torch_is_running(self):
def _copy_vmap_for_bhqkv(mask_function, bh_indices=True):
Expand Down
2 changes: 1 addition & 1 deletion onnx_diagnostic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
Functions, classes to dig into a model when this one is right, slow, wrong...
"""

__version__ = "0.8.12"
__version__ = "0.9.0"
__author__ = "Xavier Dupré"
Loading
Loading