Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
A Dissect module implementing parsers for various database formats, including:

- Berkeley DB, used for example in older RPM databases
- Microsofts Extensible Storage Engine (ESE), used for example in Active Directory, Exchange and Windows Update
- Microsoft's Extensible Storage Engine (ESE), used for example in Active Directory, Exchange and Windows Update
- Google's LevelDB, used by browsers to store LocalStorage, SessionStorage and serialized IndexedDB databases
- SQLite3, commonly used by applications to store configuration data

For more information, please see [the documentation](https://docs.dissect.tools/en/latest/projects/dissect.database/index.html).
Expand Down
8 changes: 8 additions & 0 deletions dissect/database/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
from __future__ import annotations

from dissect.database.bsd.db import DB
from dissect.database.chromium.localstorage.localstorage import LocalStorage
from dissect.database.chromium.sessionstorage.sessionstorage import SessionStorage
from dissect.database.ese.ese import ESE
from dissect.database.exception import Error
from dissect.database.indexeddb.indexeddb import IndexedDB
from dissect.database.leveldb.leveldb import LevelDB
from dissect.database.sqlite3.sqlite3 import SQLite3

__all__ = [
"DB",
"ESE",
"Error",
"IndexedDB",
"LevelDB",
"LocalStorage",
"SQLite3",
"SessionStorage",
]
9 changes: 9 additions & 0 deletions dissect/database/chromium/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from dissect.database.chromium.localstorage.localstorage import LocalStorage
from dissect.database.chromium.sessionstorage.sessionstorage import SessionStorage

__all__ = [
"LocalStorage",
"SessionStorage",
]
12 changes: 12 additions & 0 deletions dissect/database/chromium/localstorage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from __future__ import annotations

from dissect.database.chromium.localstorage.c_localstorage import c_localstorage
from dissect.database.chromium.localstorage.localstorage import Key, LocalStorage, MetaKey, Store

__all__ = [
"Key",
"LocalStorage",
"MetaKey",
"Store",
"c_localstorage",
]
28 changes: 28 additions & 0 deletions dissect/database/chromium/localstorage/c_localstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from dissect.cstruct import cstruct

from dissect.database.util.protobuf import ProtobufVarint, ProtobufVarint32

# References:
# - https://github.com/chromium/chromium/blob/main/components/services/storage/dom_storage/local_storage_database.proto
localstorage_def = """
struct LocalStorageAreaWriteMetaData {
uint8 lm_type;
varint last_modified;

uint8 sb_type;
varint size_bytes;
};

struct LocalStorageAreaAccessMetaData {
uint8 la_type;
varint last_accessed;
};
"""

c_localstorage = cstruct()
c_localstorage.add_custom_type("varint", ProtobufVarint, size=None, alignment=1, signed=False)
c_localstorage.add_custom_type("varint64", ProtobufVarint, size=None, alignment=1, signed=False)
c_localstorage.add_custom_type("varint32", ProtobufVarint32, size=None, alignment=1, signed=False)
c_localstorage.load(localstorage_def)
235 changes: 235 additions & 0 deletions dissect/database/chromium/localstorage/localstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
from __future__ import annotations

from functools import cached_property
from typing import TYPE_CHECKING

from dissect.util.ts import webkittimestamp

from dissect.database.chromium.localstorage import c_localstorage
from dissect.database.leveldb.c_leveldb import c_leveldb
from dissect.database.leveldb.leveldb import LevelDB

if TYPE_CHECKING:
from collections.abc import Iterator
from pathlib import Path


class LocalStorage:
"""Google LocalStorage implementation.

References:
- https://www.cclsolutionsgroup.com/post/chromium-session-storage-and-local-storage
"""

stores: list[Store]

def __init__(self, path: Path):
if not path.exists():
raise FileNotFoundError(f"Provided path does not exist: {path!r}")

if not path.is_dir():
raise NotADirectoryError(f"Provided path is not a directory: {path!r}")

self._leveldb = LevelDB(path)

self.path = path
self.stores = list(self._get_stores())

def __repr__(self) -> str:
return f"<LocalStorage path='{self.path!s}' stores={len(self.stores)!r}>"

def _get_stores(self) -> Iterator[Store]:
"""Iterate over LevelDB records for store meta information."""

meta_keys = {}

for record in self._leveldb.records:
if record.state == c_leveldb.RecordState.LIVE and (
record.key[0:5] == b"META:" or record.key[0:11] == b"METAACCESS:"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
record.key[0:5] == b"META:" or record.key[0:11] == b"METAACCESS:"
record.key.startswith((b"META:", b"METAACCESS:"))

):
cls = MetaKey if record.key[0:5] == b"META:" else MetaAccessKey
meta_key = cls(record.key, record.value, record.state, record.sequence)
meta_keys.setdefault(meta_key.key, [])
meta_keys[meta_key.key].append(meta_key)
Comment on lines +52 to +53
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
meta_keys.setdefault(meta_key.key, [])
meta_keys[meta_key.key].append(meta_key)
meta_keys.setdefault(meta_key.key, []).append(meta_key)


for meta in meta_keys.values():
yield Store(self, meta)
Comment on lines +55 to +56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for meta in meta_keys.values():
yield Store(self, meta)
return [Store(self, meta) for meta in meta_keys.values()]


def store(self, key: str) -> Store | None:
"""Get a single store by host name."""
for store in self.stores:
if store.host == key:
return store
return None


class Store:
"""Represents a single store of keys."""

host: str
records: list[Key]
meta: list[MetaKey]
Comment on lines +68 to +71
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
host: str
records: list[Key]
meta: list[MetaKey]


def __init__(self, local_storage: LocalStorage, meta: list[MetaKey]):
self._local_storage = local_storage
self._records: list[Key] = []

self.host = meta[0].key
self.meta = sorted(meta, key=lambda m: m.sequence)

def __repr__(self) -> str:
return f"<Store host={self.host!r} records={len(self._records)!r}>"

@property
def records(self) -> Iterator[RecordKey]:
"""Yield all records related to this store."""

if self._records:
yield from self._records

# e.g. with "_https://google.com\x00\x01MyKey", the prefix would be "_https://google.com\x00"
prefix = RecordKey.prefix + self.host.encode("iso-8859-1") + b"\x00"
prefix_len = len(prefix)

for record in self._local_storage._leveldb.records:
if record.key[:prefix_len] == prefix:
key = RecordKey(self, record.key, record.value, record.state, record.sequence)
self._records.append(key)
yield key
Comment on lines +83 to +98
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few things about this:

  • A property generator doesn't feel very safe/stable
  • The cache is dangerous: as soon as you do a single generator iteration (but don't exhaust the generator) you'll have an issue where it will only iterate the up-until-then read records.
  • The cache in it's current implementation will yield duplicate records (it doesn't exit after reading from the cached records)

It's probably fine not caching this.


def get(self, key: str) -> RecordKey | None:
"""Get a single :class:`RecordKey` by the given string identifier."""
for record in self.records:
if record.key == key:
return record
return None


class Key:
"""Abstract LocalStorage key class."""

prefix: bytes
state: c_leveldb.RecordState
sequence: int
key: str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Key will be None for MetaAccessKey. Maybe this pattern is a bit awkward.

value: str

def __init__(self, raw_key: bytes, raw_value: bytes, state: c_leveldb.RecordState, sequence: int):
self._raw_key = raw_key
self._raw_value = raw_value

self.state = state
self.sequence = sequence

if not raw_key.startswith(self.prefix):
raise ValueError(
f"Invalid key prefix {raw_key[: len(self.prefix)]!r} for {self.__class__.__name__}: expected {self.prefix!r}" # noqa: E501
)

self._decode_key()
self._decode_value()
Comment on lines +129 to +130
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just override __init__ in MetaKey and MetaAccessKey, call super().__init__() and then do the self.key = ... directly after that?


def __repr__(self):
return f"<{self.__class__.__name__} state={self.state!r} sequence={self.sequence!r} key={self.key!r} value={self.value!r}>" # noqa: E501

def _decode_key(self) -> None:
raise NotImplementedError

def _decode_value(self) -> None:
raise NotImplementedError


class MetaKey(Key):
"""Represents a LocalStorage meta key."""

prefix: bytes = b"META:"
value: c_localstorage.LocalStorageAreaWriteMetaData

def _decode_key(self) -> None:
self.key = self._raw_key.removeprefix(self.prefix).decode("iso-8859-1")

def _decode_value(self) -> None:
self.value = c_localstorage.LocalStorageAreaWriteMetaData(self._raw_value)


class MetaAccessKey(MetaKey):
"""Represents a LocalStorage meta access key.

References:
- https://chromium-review.googlesource.com/c/chromium/src/+/5585301
"""

prefix: bytes = b"METAACCESS:"
value: c_localstorage.LocalStorageAreaAccessMetaData

def _decode_value(self) -> None:
self.value = c_localstorage.LocalStorageAreaAccessMetaData(self._raw_value)


class RecordKey(Key):
"""Represents a LocalStorage record key."""

prefix: bytes = b"_"

def __init__(self, store: Store, raw_key: bytes, raw_value: bytes, state: c_leveldb.RecordState, sequence: int):
super().__init__(raw_key, raw_value, state, sequence)
self.store = store

@cached_property
def meta(self) -> dict:
"""Calculate the metadata that likely belongs to this key.

In a batch write action, meta keys are written first, followed by the records belonging to that batch.
To identify a candidate meta key for this record key, we iterate over the meta keys for the store that
this record key belongs to and choose the meta key(s) with the closest sequence number that is lower than
the record key sequence number. This introduces a possible inaccuracy for the matched timestamp(s).

The accuracy of these timestamps should be taken with a grain of salt when interpreting them. A latency of
5 to 60 seconds was observed between a script requesting a write and the key data ending up on disk. The
latency depends on several factors, such as how many write actions are happening at the time of write and
the amount of writes per host (website) happening (this is limited to 60 per hour).

The reader (you!) is invited to invent a smarter method to efficiently find metadata belonging to a record key.

References:
- local_storage_impl.cc
"""
meta = {"created": None, "last_modified": None, "last_accessed": None}

for meta_key in self.store.meta:
if meta_key.sequence < self.sequence:
if hasattr(meta_key.value, "last_modified"):
meta["last_modified"] = webkittimestamp(meta_key.value.last_modified)
if not meta["created"]:
meta["created"] = meta["last_modified"]
if hasattr(meta_key.value, "last_accessed"):
meta["last_accessed"] = webkittimestamp(meta_key.value.last_accessed)
if not meta["created"] or meta["created"] > meta["last_accessed"]:
meta["created"] = meta["last_accessed"]

elif meta_key.sequence > self.sequence:
break

return meta

def _decode_key(self) -> None:
_, _, buf = self._raw_key.removeprefix(self.prefix).partition(b"\x00")

if buf[0] == 0x00:
self.key = buf[1:].decode("utf-16-le")

if buf[0] == 0x01:
self.key = buf[1:].decode("iso-8859-1")

def _decode_value(self) -> None:
buf = self._raw_value

if not buf:
self.value = None
return

if buf[0] == 0x00:
self.value = buf[1:].decode("utf-16-le")

if buf[0] == 0x01:
self.value = buf[1:].decode("iso-8859-1")
9 changes: 9 additions & 0 deletions dissect/database/chromium/sessionstorage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from dissect.database.chromium.sessionstorage.sessionstorage import Namespace, Record, SessionStorage

__all__ = [
"Namespace",
"Record",
"SessionStorage",
]
Loading