Source code for plaso.parsers.docker

# -*- coding: utf-8 -*-
"""Parser for Docker configuration and log files."""

from __future__ import unicode_literals

import codecs
import json
import os

from dfvfs.helpers import text_file

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import errors
from plaso.lib import definitions
from plaso.lib import timelib
from plaso.parsers import manager
from plaso.parsers import interface


[docs]class DockerJSONContainerLogEventData(events.EventData): """Docker container's log event data. Attributes: container_id (str): identifier of the container (sha256). log_line (str): log line. log_source (str): log source. """ DATA_TYPE = 'docker:json:container:log' def __init__(self): """Initializes event data.""" super(DockerJSONContainerLogEventData, self).__init__( data_type=self.DATA_TYPE) self.container_id = None self.log_line = None self.log_source = None
[docs]class DockerJSONContainerEventData(events.EventData): """Docker container's configuration file event data. Attributes: action (str): whether the container was created, started, or finished. container_id (str): identifier of the container (SHA256). container_name (str): name of the container. """ DATA_TYPE = 'docker:json:container' def __init__(self): """Initializes event data.""" super(DockerJSONContainerEventData, self).__init__(data_type=self.DATA_TYPE) self.container_id = None self.container_name = None self.action = None
[docs]class DockerJSONLayerEventData(events.EventData): """Docker filesystem layer configuration file event data. Attributes: command: the command used which made Docker create a new layer layer_id: the identifier of the current Docker layer (sha1) """ DATA_TYPE = 'docker:json:layer' def __init__(self): """Initializes event data.""" super(DockerJSONLayerEventData, self).__init__(data_type=self.DATA_TYPE) self.command = None self.layer_id = None
[docs]class DockerJSONParser(interface.FileObjectParser): """Generates various events from Docker json config and log files. This handles : * Per container config file DOCKER_DIR/containers/<container_id>/config.json * Per container stdout/stderr output log DOCKER_DIR/containers/<container_id>/<container_id>-json.log * Filesystem layer config files DOCKER_DIR/graph/<layer_id>/json """ NAME = 'dockerjson' DESCRIPTION = 'Parser for JSON Docker files.' _ENCODING = 'utf-8' def _GetIdentifierFromPath(self, parser_mediator): """Extracts a container or a graph ID from a JSON file's path. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. Returns: str: container or graph identifier. """ file_entry = parser_mediator.GetFileEntry() path = file_entry.path_spec.location file_system = file_entry.GetFileSystem() path_segments = file_system.SplitPath(path) return path_segments[-2] def _ParseLayerConfigJSON(self, parser_mediator, file_object): """Extracts events from a Docker filesystem layer configuration file. The path of each filesystem layer config file is: DOCKER_DIR/graph/<layer_id>/json Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file is not a valid layer config file. """ file_content = file_object.read() file_content = codecs.decode(file_content, self._ENCODING) json_dict = json.loads(file_content) if 'docker_version' not in json_dict: raise errors.UnableToParseFile( 'not a valid Docker layer configuration file, missing ' '\'docker_version\' key.') if 'created' in json_dict: layer_creation_command_array = [ x.strip() for x in json_dict['container_config']['Cmd']] layer_creation_command = ' '.join(layer_creation_command_array).replace( '\t', '') event_data = DockerJSONLayerEventData() event_data.command = layer_creation_command event_data.layer_id = self._GetIdentifierFromPath(parser_mediator) timestamp = timelib.Timestamp.FromTimeString(json_dict['created']) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def _ParseContainerConfigJSON(self, parser_mediator, file_object): """Extracts events from a Docker container configuration file. The path of each container config file is: DOCKER_DIR/containers/<container_id>/config.json Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file is not a valid container config file. """ file_content = file_object.read() file_content = codecs.decode(file_content, self._ENCODING) json_dict = json.loads(file_content) if 'Driver' not in json_dict: raise errors.UnableToParseFile( 'not a valid Docker container configuration file, ' 'missing ' '\'Driver\' key.') container_id_from_path = self._GetIdentifierFromPath(parser_mediator) container_id_from_json = json_dict.get('ID', None) if not container_id_from_json: raise errors.UnableToParseFile( 'not a valid Docker layer configuration file, the \'ID\' key is ' 'missing from the JSON dict (should be {0:s})'.format( container_id_from_path)) if container_id_from_json != container_id_from_path: raise errors.UnableToParseFile( 'not a valid Docker container configuration file. The \'ID\' key of ' 'the JSON dict ({0:s}) is different from the layer ID taken from the' ' path to the file ({1:s}) JSON file.)'.format( container_id_from_json, container_id_from_path)) if 'Config' in json_dict and 'Hostname' in json_dict['Config']: container_name = json_dict['Config']['Hostname'] else: container_name = 'Unknown container name' event_data = DockerJSONContainerEventData() event_data.container_id = container_id_from_path event_data.container_name = container_name if 'State' in json_dict: if 'StartedAt' in json_dict['State']: event_data.action = 'Container Started' timestamp = timelib.Timestamp.FromTimeString( json_dict['State']['StartedAt']) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_START) parser_mediator.ProduceEventWithEventData(event, event_data) if 'FinishedAt' in json_dict['State']: if json_dict['State']['FinishedAt'] != '0001-01-01T00:00:00Z': event_data.action = 'Container Finished' # If the timestamp is 0001-01-01T00:00:00Z, the container # is still running, so we don't generate a Finished event timestamp = timelib.Timestamp.FromTimeString( json_dict['State']['FinishedAt']) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_END) parser_mediator.ProduceEventWithEventData(event, event_data) created_time = json_dict.get('Created', None) if created_time: event_data.action = 'Container Created' timestamp = timelib.Timestamp.FromTimeString(created_time) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def _ParseContainerLogJSON(self, parser_mediator, file_object): """Extract events from a Docker container log files. The format is one JSON formatted log message per line. The path of each container log file (which logs the container stdout and stderr) is: DOCKER_DIR/containers/<container_id>/<container_id>-json.log Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. """ container_id = self._GetIdentifierFromPath(parser_mediator) text_file_object = text_file.TextFile(file_object) for log_line in text_file_object: json_log_line = json.loads(log_line) time = json_log_line.get('time', None) if not time: continue event_data = DockerJSONContainerLogEventData() event_data.container_id = container_id event_data.log_line = json_log_line.get('log', None) event_data.log_source = json_log_line.get('stream', None) # TODO: pass line number to offset or remove. event_data.offset = 0 timestamp = timelib.Timestamp.FromTimeString(time) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def ParseFileObject(self, parser_mediator, file_object): """Parses various Docker configuration and log files in JSON format. This methods checks whether the file_object points to a docker JSON config or log file, and calls the corresponding _Parse* function to generate Events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. ValueError: if the JSON file cannot be decoded. """ # Trivial JSON format check: first character must be an open brace. if file_object.read(1) != b'{': raise errors.UnableToParseFile( 'is not a valid JSON file, missing opening brace.') file_object.seek(0, os.SEEK_SET) file_entry = parser_mediator.GetFileEntry() file_system = file_entry.GetFileSystem() json_file_path = parser_mediator.GetDisplayName() split_path = file_system.SplitPath(json_file_path) try: if 'containers' in split_path: if 'config.json' in split_path: self._ParseContainerConfigJSON(parser_mediator, file_object) if json_file_path.endswith('-json.log'): self._ParseContainerLogJSON(parser_mediator, file_object) elif 'graph' in split_path: if 'json' in split_path: self._ParseLayerConfigJSON(parser_mediator, file_object) except ValueError as exception: if exception == 'No JSON object could be decoded': raise errors.UnableToParseFile(exception) else: raise
manager.ParsersManager.RegisterParser(DockerJSONParser)