Source code for plaso.parsers.firefox_cache

# -*- coding: utf-8 -*-
"""Implements a parser for Firefox cache 1 and 2 files."""

from __future__ import unicode_literals

import collections
import re
import os

from dfdatetime import posix_time as dfdatetime_posix_time

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import errors
from plaso.lib import definitions
from plaso.parsers import dtfabric_parser
from plaso.parsers import logger
from plaso.parsers import manager


[docs]class FirefoxCacheEventData(events.EventData): """Firefox cache event data. Attributes: data_size (int): size of the cached data. fetch_count (int): number of times the cache entry was fetched. frequency (int): ??? info_size (int): size of the metadata. location (str): ??? request_method (str): HTTP request method. request_size (int): HTTP request byte size. response_code (int): HTTP response code. url (str): URL of original content. version (int): cache format version. """ DATA_TYPE = 'firefox:cache:record' def __init__(self): """Initializes event data.""" super(FirefoxCacheEventData, self).__init__(data_type=self.DATA_TYPE) self.data_size = None self.fetch_count = None self.frequency = None self.info_size = None self.location = None self.request_method = None self.request_size = None self.response_code = None self.url = None self.version = None
[docs]class BaseFirefoxCacheParser(dtfabric_parser.DtFabricBaseParser): """Parses Firefox cache files.""" # pylint: disable=abstract-method _MAXIMUM_URL_LENGTH = 65536 _REQUEST_METHODS = frozenset([ 'CONNECT', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PATCH', 'POST', 'PUT', 'TRACE']) _CACHE_ENTRY_HEADER_SIZE = 36 def _ParseHTTPHeaders(self, header_data, offset, display_name): """Extract relevant information from HTTP header. Args: header_data (bytes): HTTP header data. offset (int): offset of the cache record, relative to the start of the Firefox cache file. display_name (str): display name of the Firefox cache file. Returns: tuple: containing: str: HTTP request method or None if the value cannot be extracted. str: HTTP response code or None if the value cannot be extracted. """ header_string = header_data.decode('ascii', errors='replace') try: http_header_start = header_string.index('request-method') except ValueError: logger.debug('No request method in header: "{0:s}"'.format(header_string)) return None, None # HTTP request and response headers. http_headers = header_string[http_header_start::] header_parts = http_headers.split('\x00') # TODO: check len(header_parts). request_method = header_parts[1] if request_method not in self._REQUEST_METHODS: logger.debug(( '[{0:s}] {1:s}:{2:d}: Unknown HTTP method \'{3:s}\'. Response ' 'headers: \'{4:s}\'').format( self.NAME, display_name, offset, request_method, header_string)) try: response_head_start = http_headers.index('response-head') except ValueError: logger.debug('No response head in header: "{0:s}"'.format(header_string)) return request_method, None # HTTP response headers. response_head = http_headers[response_head_start::] response_head_parts = response_head.split('\x00') # Response code, followed by other response header key-value pairs, # separated by newline. # TODO: check len(response_head_parts). response_head_text = response_head_parts[1] response_head_text_parts = response_head_text.split('\r\n') # The first line contains response code. # TODO: check len(response_head_text_parts). response_code = response_head_text_parts[0] if not response_code.startswith('HTTP'): logger.debug(( '[{0:s}] {1:s}:{2:d}: Could not determine HTTP response code. ' 'Response headers: \'{3:s}\'.').format( self.NAME, display_name, offset, header_string)) return request_method, response_code
[docs]class FirefoxCacheParser(BaseFirefoxCacheParser): """Parses Firefox cache version 1 files (Firefox 31 or earlier).""" NAME = 'firefox_cache' DESCRIPTION = ( 'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).') _DEFINITION_FILE = 'firefox_cache.yaml' # Initial size of Firefox 4 and later cache files. _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024 # Smallest possible block size in Firefox cache files. _MINIMUM_BLOCK_SIZE = 256 # Name of a cache data file that contains metadata. _CACHE_FILENAME_RE = re.compile(r'^[0-9A-Fa-f]{5}m[0-9]{2}$') FIREFOX_CACHE_CONFIG = collections.namedtuple( 'firefox_cache_config', 'block_size first_record_offset') def _GetFirefoxConfig(self, file_object, display_name): """Determine cache file block size. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. Returns: firefox_cache_config: namedtuple containing the block size and first record offset. Raises: UnableToParseFile: if no valid cache record could be found. """ # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: cache_entry, _ = self._ReadCacheEntry( file_object, display_name, self._MINIMUM_BLOCK_SIZE) # We have not yet determined the block size, so we use the smallest # possible size. record_size = ( self._CACHE_ENTRY_HEADER_SIZE + cache_entry.request_size + cache_entry.information_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logger.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format( self.NAME, display_name, offset)) raise errors.UnableToParseFile( 'Could not find a valid cache record. Not a Firefox cache file.') def _ParseCacheEntry( self, parser_mediator, file_object, display_name, block_size): """Parses a cache entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. block_size (int): block size. """ cache_entry, event_data = self._ReadCacheEntry( file_object, display_name, block_size) date_time = dfdatetime_posix_time.PosixTime( timestamp=cache_entry.last_fetched_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_LAST_VISITED) parser_mediator.ProduceEventWithEventData(event, event_data) if cache_entry.last_modified_time: date_time = dfdatetime_posix_time.PosixTime( timestamp=cache_entry.last_modified_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) if cache_entry.expiration_time: date_time = dfdatetime_posix_time.PosixTime( timestamp=cache_entry.expiration_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_EXPIRATION) parser_mediator.ProduceEventWithEventData(event, event_data) def _ReadCacheEntry(self, file_object, display_name, block_size): """Reads a cache entry. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. block_size (int): block size. Returns: tuple: containing: firefox_cache1_entry_header: cache record header structure. FirefoxCacheEventData: event data. Raises: IOError: if the cache record header cannot be validated. OSError: if the cache record header cannot be validated. ParseError: if the cache record header cannot be parsed. """ file_offset = file_object.get_offset() # TODO: merge reading the cache entry header and body by having dtFabric # implement the sanity checks done in _ValidateCacheEntryHeader. # Seeing that this parser tries to read each block for a possible # cache entry, we read the fixed-size values first. cache_entry_header_map = self._GetDataTypeMap('firefox_cache1_entry_header') try: cache_entry_header, header_data_size = self._ReadStructureFromFileObject( file_object, file_offset, cache_entry_header_map) except (ValueError, errors.ParseError) as exception: raise errors.ParseError( 'Unable to parse Firefox cache entry header with error: {0!s}'.format( exception)) if not self._ValidateCacheEntryHeader(cache_entry_header): # Skip to the next block potentially containing a cache entry. file_offset = block_size - header_data_size file_object.seek(file_offset, os.SEEK_CUR) raise IOError('Not a valid Firefox cache record.') body_data_size = ( cache_entry_header.request_size + cache_entry_header.information_size) cache_entry_body_data = self._ReadData( file_object, file_offset + header_data_size, body_data_size) url = cache_entry_body_data[:cache_entry_header.request_size].decode( 'ascii').rstrip('\x00') request_method, response_code = self._ParseHTTPHeaders( cache_entry_body_data[cache_entry_header.request_size:], file_offset, display_name) # A request can span multiple blocks, so we use modulo. cache_entry_data_size = header_data_size + body_data_size _, remaining_data_size = divmod(cache_entry_data_size, block_size) if remaining_data_size > 0: file_object.seek(block_size - remaining_data_size, os.SEEK_CUR) event_data = FirefoxCacheEventData() event_data.data_size = cache_entry_header.cached_data_size event_data.fetch_count = cache_entry_header.fetch_count event_data.info_size = cache_entry_header.information_size event_data.location = cache_entry_header.location event_data.request_method = request_method event_data.request_size = cache_entry_header.request_size event_data.response_code = response_code event_data.url = url event_data.version = '{0:d}.{1:d}'.format( cache_entry_header.major_format_version, cache_entry_header.minor_format_version) return cache_entry_header, event_data def _ValidateCacheEntryHeader(self, cache_entry_header): """Determines whether the values in the cache entry header are valid. Args: cache_entry_header (firefox_cache1_entry_header): cache entry header. Returns: bool: True if the cache entry header is valid. """ return ( cache_entry_header.request_size > 0 and cache_entry_header.request_size < self._MAXIMUM_URL_LENGTH and cache_entry_header.major_format_version == 1 and cache_entry_header.last_fetched_time > 0 and cache_entry_header.fetch_count > 0)
[docs] def ParseFileObject(self, parser_mediator, file_object): """Parses a Firefox cache file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() if (not self._CACHE_FILENAME_RE.match(filename) and not filename.startswith('_CACHE_00')): raise errors.UnableToParseFile('Not a Firefox cache1 file.') display_name = parser_mediator.GetDisplayName() firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: self._ParseCacheEntry( parser_mediator, file_object, display_name, firefox_config.block_size) except IOError: file_offset = file_object.get_offset() - self._MINIMUM_BLOCK_SIZE logger.debug(( '[{0:s}] Invalid cache record in file: {1:s} at offset: ' '{2:d}.').format(self.NAME, display_name, file_offset))
[docs]class FirefoxCache2Parser(BaseFirefoxCacheParser): """Parses Firefox cache version 2 files (Firefox 32 or later).""" NAME = 'firefox_cache2' DESCRIPTION = ( 'Parser for Firefox Cache version 2 files (Firefox 32 or later).') _DEFINITION_FILE = 'firefox_cache.yaml' _CACHE_VERSION = 2 # Cache version 2 filenames are SHA-1 hex digests. _CACHE_FILENAME_RE = re.compile(r'^[0-9A-Fa-f]{40}$') _CHUNK_SIZE = 512 * 1024 def _GetCacheFileMetadataHeaderOffset(self, file_object): """Determines the offset of the cache file metadata header. This method is inspired by the work of James Habben: https://github.com/JamesHabben/FirefoxCache2 Args: file_object (dfvfs.FileIO): a file-like object. Returns: int: offset of the file cache metadata header relative to the start of the file. Raises: IOError: if the start of the cache file metadata could not be determined. """ file_object.seek(-4, os.SEEK_END) file_offset = file_object.tell() metadata_size_map = self._GetDataTypeMap('uint32be') try: metadata_size, _ = self._ReadStructureFromFileObject( file_object, file_offset, metadata_size_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile( 'Unable to parse cache file metadata size with error: {0!s}'.format( exception)) # Firefox splits the content into chunks. number_of_chunks, remainder = divmod(metadata_size, self._CHUNK_SIZE) if remainder != 0: number_of_chunks += 1 # Each chunk in the cached record is padded with two bytes. # Skip the first 4 bytes which contains a hash value of the cached content. return metadata_size + (number_of_chunks * 2) + 4 def _ValidateCacheFileMetadataHeader(self, cache_file_metadata_header): """Determines whether the cache file metadata header is valid. Args: cache_file_metadata_header (firefox_cache2_file_metadata_header): cache file metadata header. Returns: bool: True if the cache file metadata header is valid. """ # TODO: add support for format version 2 and 3 return ( cache_file_metadata_header.key_size > 0 and cache_file_metadata_header.key_size < self._MAXIMUM_URL_LENGTH and cache_file_metadata_header.format_version == 1 and cache_file_metadata_header.last_fetched_time > 0 and cache_file_metadata_header.fetch_count > 0)
[docs] def ParseFileObject(self, parser_mediator, file_object): """Parses a Firefox cache file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() if not self._CACHE_FILENAME_RE.match(filename): raise errors.UnableToParseFile('Not a Firefox cache2 file.') # The file needs to be at least 36 bytes in size for it to contain # a cache2 file metadata header and a 4-byte offset that points to its # location in the file. file_size = file_object.get_size() if file_size < 36: raise errors.UnableToParseFile( 'File size too small for Firefox cache2 file.') file_offset = self._GetCacheFileMetadataHeaderOffset(file_object) file_metadata_header_map = self._GetDataTypeMap( 'firefox_cache2_file_metadata_header') try: file_metadata_header, _ = self._ReadStructureFromFileObject( file_object, file_offset, file_metadata_header_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile(( 'Unable to parse Firefox cache2 file metadata header with error: ' '{0!s}').format(exception)) if not self._ValidateCacheFileMetadataHeader(file_metadata_header): raise errors.UnableToParseFile('Not a valid Firefox cache2 record.') url = file_object.read(file_metadata_header.key_size) header_data = file_object.read() display_name = parser_mediator.GetDisplayName() request_method, response_code = self._ParseHTTPHeaders( header_data[:-4], file_offset, display_name) event_data = FirefoxCacheEventData() event_data.fetch_count = file_metadata_header.fetch_count event_data.frequency = file_metadata_header.frequency event_data.request_method = request_method event_data.request_size = file_metadata_header.key_size event_data.response_code = response_code event_data.version = self._CACHE_VERSION event_data.url = url.decode('ascii', errors='replace') date_time = dfdatetime_posix_time.PosixTime( timestamp=file_metadata_header.last_fetched_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_LAST_VISITED) parser_mediator.ProduceEventWithEventData(event, event_data) if file_metadata_header.last_modified_time: date_time = dfdatetime_posix_time.PosixTime( timestamp=file_metadata_header.last_modified_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) if file_metadata_header.expiration_time: date_time = dfdatetime_posix_time.PosixTime( timestamp=file_metadata_header.expiration_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_EXPIRATION) parser_mediator.ProduceEventWithEventData(event, event_data)
manager.ParsersManager.RegisterParsers([ FirefoxCacheParser, FirefoxCache2Parser])