Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 27 additions & 21 deletions babel/messages/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from babel.core import Locale, UnknownLocaleError
from babel.dates import format_datetime
from babel.messages.plurals import get_plural
from babel.util import LOCALTZ, _cmp, distinct
from babel.util import LOCALTZ, _cmp

if TYPE_CHECKING:
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -164,7 +164,7 @@ def __init__(
if not string and self.pluralizable:
string = ('', '')
self.string = string
self.locations = list(distinct(locations))
self.locations = list(dict.fromkeys(locations)) if locations else []
self.flags = set(flags)
if id and self.python_format:
self.flags.add('python-format')
Expand All @@ -174,12 +174,15 @@ def __init__(
self.flags.add('python-brace-format')
else:
self.flags.discard('python-brace-format')
self.auto_comments = list(distinct(auto_comments))
self.user_comments = list(distinct(user_comments))
if isinstance(previous_id, str):
self.previous_id = [previous_id]
self.auto_comments = list(dict.fromkeys(auto_comments)) if auto_comments else []
self.user_comments = list(dict.fromkeys(user_comments)) if user_comments else []
if previous_id:
if isinstance(previous_id, str):
self.previous_id = [previous_id]
else:
self.previous_id = list(previous_id)
else:
self.previous_id = list(previous_id)
self.previous_id = []
self.lineno = lineno
self.context = context

Expand Down Expand Up @@ -289,9 +292,12 @@ def python_format(self) -> bool:

:type: `bool`"""
ids = self.id
if not isinstance(ids, (list, tuple)):
ids = [ids]
return any(PYTHON_FORMAT.search(id) for id in ids)
if isinstance(ids, (list, tuple)):
for id in ids: # Explicit loop for performance reasons.
if PYTHON_FORMAT.search(id):
return True
return False
return bool(PYTHON_FORMAT.search(ids))

@property
def python_brace_format(self) -> bool:
Expand All @@ -304,9 +310,12 @@ def python_brace_format(self) -> bool:

:type: `bool`"""
ids = self.id
if not isinstance(ids, (list, tuple)):
ids = [ids]
return any(_has_python_brace_format(id) for id in ids)
if isinstance(ids, (list, tuple)):
for id in ids: # Explicit loop for performance reasons.
if _has_python_brace_format(id):
return True
return False
return _has_python_brace_format(ids)


class TranslationError(Exception):
Expand Down Expand Up @@ -729,12 +738,9 @@ def __setitem__(self, id: _MessageID, message: Message) -> None:
# The new message adds pluralization
current.id = message.id
current.string = message.string
current.locations = list(distinct(current.locations +
message.locations))
current.auto_comments = list(distinct(current.auto_comments +
message.auto_comments))
current.user_comments = list(distinct(current.user_comments +
message.user_comments))
current.locations = list(dict.fromkeys([*current.locations, *message.locations]))
current.auto_comments = list(dict.fromkeys([*current.auto_comments, *message.auto_comments]))
current.user_comments = list(dict.fromkeys([*current.user_comments, *message.user_comments]))
current.flags |= message.flags
elif id == '':
# special treatment for the header message
Expand Down Expand Up @@ -916,8 +922,8 @@ def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, s
assert oldmsg is not None
message.string = oldmsg.string

if keep_user_comments:
message.user_comments = list(distinct(oldmsg.user_comments))
if keep_user_comments and oldmsg.user_comments:
message.user_comments = list(dict.fromkeys(oldmsg.user_comments))

if isinstance(message.id, (list, tuple)):
if not isinstance(message.string, (list, tuple)):
Expand Down
169 changes: 70 additions & 99 deletions babel/messages/pofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@

from babel.core import Locale
from babel.messages.catalog import Catalog, Message
from babel.util import TextWrapper, _cmp
from babel.util import TextWrapper

if TYPE_CHECKING:
from typing import IO, AnyStr

from _typeshed import SupportsWrite


_unescape_re = re.compile(r'\\([\\trn"])')


def unescape(string: str) -> str:
r"""Reverse `escape` the given string.

Expand All @@ -45,7 +48,10 @@
return '\r'
# m is \ or "
return m
return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])

if "\\" not in string: # Fast path: there's nothing to unescape
return string[1:-1]
return _unescape_re.sub(replace_escapes, string[1:-1])


def denormalize(string: str) -> str:
Expand Down Expand Up @@ -73,7 +79,7 @@
escaped_lines = string.splitlines()
if string.startswith('""'):
escaped_lines = escaped_lines[1:]
return ''.join(unescape(line) for line in escaped_lines)
return ''.join(map(unescape, escaped_lines))
else:
return unescape(string)

Expand Down Expand Up @@ -132,48 +138,14 @@
self.lineno = lineno


class _NormalizedString:

class _NormalizedString(list):
def __init__(self, *args: str) -> None:
self._strs: list[str] = []
for arg in args:
self.append(arg)

def append(self, s: str) -> None:
self._strs.append(s.strip())
super().__init__(map(str.strip, args))

def denormalize(self) -> str:
return ''.join(unescape(s) for s in self._strs)

def __bool__(self) -> bool:
return bool(self._strs)

def __repr__(self) -> str:
return os.linesep.join(self._strs)

def __cmp__(self, other: object) -> int:
if not other:
return 1

return _cmp(str(self), str(other))

def __gt__(self, other: object) -> bool:
return self.__cmp__(other) > 0

def __lt__(self, other: object) -> bool:
return self.__cmp__(other) < 0

def __ge__(self, other: object) -> bool:
return self.__cmp__(other) >= 0

def __le__(self, other: object) -> bool:
return self.__cmp__(other) <= 0

def __eq__(self, other: object) -> bool:
return self.__cmp__(other) == 0

def __ne__(self, other: object) -> bool:
return self.__cmp__(other) != 0
if not self:
return ""
return ''.join(map(unescape, self))


class PoFileParser:
Expand All @@ -183,13 +155,6 @@
See `read_po` for simple cases.
"""

_keywords = [
'msgid',
'msgstr',
'msgctxt',
'msgid_plural',
]

def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None:
self.catalog = catalog
self.ignore_obsolete = ignore_obsolete
Expand All @@ -216,23 +181,20 @@
Add a message to the catalog based on the current parser state and
clear the state ready to process the next message.
"""
self.translations.sort()
if len(self.messages) > 1:
msgid = tuple(m.denormalize() for m in self.messages)
else:
msgid = self.messages[0].denormalize()
if isinstance(msgid, (list, tuple)):
string = ['' for _ in range(self.catalog.num_plurals)]
for idx, translation in self.translations:
for idx, translation in sorted(self.translations):
if idx >= self.catalog.num_plurals:
self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog")
continue
string[idx] = translation.denormalize()
string = tuple(string)
else:
msgid = self.messages[0].denormalize()
string = self.translations[0][1].denormalize()
msgctxt = self.context.denormalize() if self.context else None
message = Message(msgid, string, list(self.locations), set(self.flags),
message = Message(msgid, string, self.locations, self.flags,
self.auto_comments, self.user_comments, lineno=self.offset + 1,
context=msgctxt)
if self.obsolete:
Expand All @@ -247,27 +209,19 @@
if self.messages:
if not self.translations:
self._invalid_pofile("", self.offset, f"missing msgstr for msgid '{self.messages[0].denormalize()}'")
self.translations.append([0, _NormalizedString("")])
self.translations.append([0, _NormalizedString()])
self._add_message()

def _process_message_line(self, lineno, line, obsolete=False) -> None:
if line.startswith('"'):
if not line:
return

Check warning on line 217 in babel/messages/pofile.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/pofile.py#L217

Added line #L217 was not covered by tests
if line[0] == '"':
self._process_string_continuation_line(line, lineno)
else:
self._process_keyword_line(lineno, line, obsolete)

def _process_keyword_line(self, lineno, line, obsolete=False) -> None:

for keyword in self._keywords:
try:
if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:
arg = line[len(keyword):]
break
except IndexError:
self._invalid_pofile(line, lineno, "Keyword must be followed by a string")
else:
self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")
return
keyword, _, arg = line.partition(' ')

if keyword in ['msgid', 'msgctxt']:
self._finish_current_message()
Expand All @@ -283,19 +237,23 @@
self.in_msgctxt = False
self.in_msgid = True
self.messages.append(_NormalizedString(arg))
return

elif keyword == 'msgstr':
if keyword == 'msgctxt':
self.in_msgctxt = True
self.context = _NormalizedString(arg)
return

if keyword == 'msgstr' or keyword.startswith('msgstr['):
self.in_msgid = False
self.in_msgstr = True
if arg.startswith('['):
idx, msg = arg[1:].split(']', 1)
self.translations.append([int(idx), _NormalizedString(msg)])
else:
self.translations.append([0, _NormalizedString(arg)])
kwarg, has_bracket, idxarg = keyword.partition('[')
idx = int(idxarg[:-1]) if has_bracket else 0
s = _NormalizedString(arg) if arg != '""' else _NormalizedString()
self.translations.append([idx, s])
return

elif keyword == 'msgctxt':
self.in_msgctxt = True
self.context = _NormalizedString(arg)
self._invalid_pofile(line, lineno, "Unknown or misformatted keyword")

def _process_string_continuation_line(self, line, lineno) -> None:
if self.in_msgid:
Expand All @@ -307,49 +265,62 @@
else:
self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
return
s.append(line)
s.append(line.strip()) # For performance reasons, `NormalizedString` doesn't strip internally

def _process_comment(self, line) -> None:

self._finish_current_message()

if line[1:].startswith(':'):
prefix = line[:2]
if prefix == '#:':
for location in _extract_locations(line[2:]):
pos = location.rfind(':')
if pos >= 0:
a, colon, b = location.rpartition(':')
if colon:
try:
lineno = int(location[pos + 1:])
self.locations.append((a, int(b)))
except ValueError:
continue
self.locations.append((location[:pos], lineno))
else:
else: # No line number specified
self.locations.append((location, None))
elif line[1:].startswith(','):
for flag in line[2:].lstrip().split(','):
self.flags.append(flag.strip())
elif line[1:].startswith('.'):
return

if prefix == '#,':
self.flags.extend(flag.strip() for flag in line[2:].lstrip().split(','))
return

if prefix == '#.':
# These are called auto-comments
comment = line[2:].strip()
if comment: # Just check that we're not adding empty comments
self.auto_comments.append(comment)
else:
# These are called user comments
self.user_comments.append(line[1:].strip())
return

# These are called user comments
self.user_comments.append(line[1:].strip())

def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
"""
Reads from the file-like object `fileobj` and adds any po file
units found in it to the `Catalog` supplied to the constructor.
Reads from the file-like object (or iterable of string-likes) `fileobj`
and adds any po file units found in it to the `Catalog`
supplied to the constructor.

All of the items in the iterable must be the same type; either `str`
or `bytes` (decoded with the catalog charset), but not a mixture.
"""
needs_decode = None

for lineno, line in enumerate(fileobj):
line = line.strip()
if not isinstance(line, str):
line = line.decode(self.catalog.charset)
if needs_decode is None:
# If we don't yet know whether we need to decode,
# let's find out now.
needs_decode = not isinstance(line, str)
if not line:
continue
if line.startswith('#'):
if line[1:].startswith('~'):
if needs_decode:
line = line.decode(self.catalog.charset)
if line[0] == '#':
if line[:2] == '#~':
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
else:
try:
Expand All @@ -364,8 +335,8 @@
# No actual messages found, but there was some info in comments, from which
# we'll construct an empty header message
if not self.counter and (self.flags or self.user_comments or self.auto_comments):
self.messages.append(_NormalizedString('""'))
self.translations.append([0, _NormalizedString('""')])
self.messages.append(_NormalizedString())
self.translations.append([0, _NormalizedString()])
self._add_message()

def _invalid_pofile(self, line, lineno, msg) -> None:
Expand Down
Loading
Loading