@@ -73,6 +73,14 @@ def validate_email(
7373 display_name , local_part , domain_part , is_quoted_local_part \
7474 = split_email (email )
7575
76+ if display_name :
77+ # UTS #39 3.3 Email Security Profiles for Identifiers requires
78+ # display names (incorrectly called "quoted-string-part" there)
79+ # to be NFC normalized. Since these are not a part of what we
80+ # are really validating, we won't check that the input was NFC
81+ # normalized, but we'll normalize in output.
82+ display_name = unicodedata .normalize ("NFC" , display_name )
83+
7684 # Collect return values in this instance.
7785 ret = ValidatedEmail ()
7886 ret .original = ((local_part if not is_quoted_local_part
@@ -95,6 +103,15 @@ def validate_email(
95103 # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
96104 # so we'll return the NFC-normalized local part. Since the caller may use that
97105 # string in place of the original string, ensure it is also valid.
106+ #
107+ # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
108+ # to be NFKC normalized, which loses some information in characters that can
109+ # be decomposed. We might want to consider applying NFKC normalization, but
110+ # we can't make the change easily because it would break database lookups
111+ # for any caller that put a normalized address from a previous version of
112+ # this library. (UTS #39 seems to require that the *input* be NKFC normalized
113+ # and has other requirements that are hard to check without additional Unicode
114+ # data, and I don't know whether the rules really apply in the wild.)
98115 normalized_local_part = unicodedata .normalize ("NFC" , ret .local_part )
99116 if normalized_local_part != ret .local_part :
100117 try :
0 commit comments