Source code for plaso.parsers.trendmicroav

# -*- coding: utf-8 -*-
"""Parser for Trend Micro Antivirus logs.

Trend Micro uses two log files to track the scans (both manual/scheduled and
real-time) and the web reputation (network scan/filtering).

Currently only the first log is supported.
"""

from __future__ import unicode_literals

import codecs

from dfdatetime import definitions as dfdatetime_definitions
from dfdatetime import posix_time as dfdatetime_posix_time
from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import errors
from plaso.lib import definitions
from plaso.lib import py2to3
from plaso.formatters import trendmicroav as formatter
from plaso.parsers import dsv_parser
from plaso.parsers import manager


[docs]class TrendMicroAVEventData(events.EventData): """Trend Micro AV Log event data. Attributes: action (str): action. filename (str): filename. path (str): path. scan_type (str): scan_type. threat (str): threat. """ DATA_TYPE = 'av:trendmicro:scan' def __init__(self): """Initializes event data.""" super(TrendMicroAVEventData, self).__init__(data_type=self.DATA_TYPE) self.action = None self.filename = None self.path = None self.scan_type = None self.threat = None
# pylint: disable=abstract-method
[docs]class TrendMicroBaseParser(dsv_parser.DSVParser): """Common code for parsing Trend Micro log files. The file format is reminiscent of CSV, but is not quite the same; the delimiter is a three-character sequence and there is no provision for quoting or escaping. """ DELIMITER = '<;>' # Subclasses must define an integer MIN_COLUMNS value. MIN_COLUMNS = None # Subclasses must define a list of field names. COLUMNS = () def __init__(self, encoding='cp1252'): """Initializes a parsing Trend Micro log file parser. Args: encoding (Optional[str]): encoding used in the DSV file, where None indicates the codepage of the parser mediator should be used. """ super(TrendMicroBaseParser, self).__init__(encoding=encoding) def _CreateDictReader(self, line_reader): """Iterates over the log lines and provide a reader for the values. Args: line_reader (iter): yields each line in the log file. Yields: dict[str, str]: column values keyed by column header. """ for line in line_reader: if isinstance(line, py2to3.BYTES_TYPE): try: line = codecs.decode(line, self._encoding) except UnicodeDecodeError as exception: raise errors.UnableToParseFile( 'Unable decode line with error: {0!s}'.format(exception)) stripped_line = line.strip() values = stripped_line.split(self.DELIMITER) number_of_values = len(values) number_of_columns = len(self.COLUMNS) if number_of_values < self.MIN_COLUMNS: raise errors.UnableToParseFile( 'Expected at least {0:d} values, found {1:d}'.format( self.MIN_COLUMNS, number_of_values)) if number_of_values > number_of_columns: raise errors.UnableToParseFile( 'Expected at most {0:d} values, found {1:d}'.format( number_of_columns, number_of_values)) yield dict(zip(self.COLUMNS, values)) def _ParseTimestamp(self, parser_mediator, row): """Provides a timestamp for the given row. If the Trend Micro log comes from a version that provides a POSIX timestamp, use that directly; it provides the advantages of UTC and of second precision. Otherwise fall back onto the local-timezone date and time. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row (dict[str, str]): fields of a single row, as specified in COLUMNS. Returns: dfdatetime.interface.DateTimeValue: date and time value. """ timestamp = row.get('timestamp', None) if timestamp is not None: try: timestamp = int(timestamp, 10) except (ValueError, TypeError): parser_mediator.ProduceExtractionWarning( 'Unable to parse timestamp value: {0!s}'.format(timestamp)) return dfdatetime_posix_time.PosixTime(timestamp=timestamp) # The timestamp is not available; parse the local date and time instead. try: return self._ConvertToTimestamp(row['date'], row['time']) except ValueError as exception: parser_mediator.ProduceExtractionWarning(( 'Unable to parse time string: "{0:s} {1:s}" with error: ' '{2!s}').format(repr(row['date']), repr(row['time']), exception)) def _ConvertToTimestamp(self, date, time): """Converts date and time strings into a timestamp. Recent versions of Office Scan write a log field with a Unix timestamp. Older versions may not write this field; their logs only provide a date and a time expressed in the local time zone. This functions handles the latter case. Args: date (str): date as an 8-character string in the YYYYMMDD format. time (str): time as a 3 or 4-character string in the [H]HMM format or a 6-character string in the HHMMSS format. Returns: dfdatetime_time_elements.TimestampElements: the parsed timestamp. Raises: ValueError: if the date and time values cannot be parsed. """ # Check that the strings have the correct length. if len(date) != 8: raise ValueError( 'Unsupported length of date string: {0!s}'.format(repr(date))) if len(time) < 3 or len(time) > 4: raise ValueError( 'Unsupported length of time string: {0!s}'.format(repr(time))) # Extract the date. try: year = int(date[:4], 10) month = int(date[4:6], 10) day = int(date[6:8], 10) except (TypeError, ValueError): raise ValueError('Unable to parse date string: {0!s}'.format(repr(date))) # Extract the time. Note that a single-digit hour value has no leading zero. try: hour = int(time[:-2], 10) minutes = int(time[-2:], 10) except (TypeError, ValueError): raise ValueError('Unable to parse time string: {0!s}'.format(repr(date))) time_elements_tuple = (year, month, day, hour, minutes, 0) date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True # TODO: add functionality to dfdatetime to control precision. date_time._precision = dfdatetime_definitions.PRECISION_1_MINUTE # pylint: disable=protected-access return date_time
[docs]class OfficeScanVirusDetectionParser(TrendMicroBaseParser): """Parses the Trend Micro Office Scan Virus Detection Log.""" NAME = 'trendmicro_vd' DESCRIPTION = 'Parser for Trend Micro Office Scan Virus Detection log files.' COLUMNS = [ 'date', 'time', 'threat', 'action', 'scan_type', 'unused1', 'path', 'filename', 'unused2', 'timestamp', 'unused3', 'unused4'] MIN_COLUMNS = 8
[docs] def ParseRow(self, parser_mediator, row_offset, row): """Parses a line of the log file and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row_offset (int): line number of the row. row (dict[str, str]): fields of a single row, as specified in COLUMNS. """ timestamp = self._ParseTimestamp(parser_mediator, row) if timestamp is None: return try: action = int(row['action'], 10) except (ValueError, TypeError): action = None try: scan_type = int(row['scan_type'], 10) except (ValueError, TypeError): scan_type = None event_data = TrendMicroAVEventData() event_data.action = action event_data.filename = row['filename'] event_data.offset = row_offset event_data.path = row['path'] event_data.scan_type = scan_type event_data.threat = row['threat'] event = time_events.DateTimeValuesEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def VerifyRow(self, parser_mediator, row): """Verifies if a line of the file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row (dict[str, str]): fields of a single row, as specified in COLUMNS. Returns: bool: True if this is the correct parser, False otherwise. """ if len(row) < self.MIN_COLUMNS: return False # Check the date format! # If it doesn't parse, then this isn't a Trend Micro AV log. try: timestamp = self._ConvertToTimestamp(row['date'], row['time']) except (ValueError, TypeError): return False if timestamp is None: return False # Check that the action value is plausible. try: action = int(row['action'], 10) except (ValueError, TypeError): return False if action not in formatter.SCAN_RESULTS: return False return True
[docs]class TrendMicroUrlEventData(events.EventData): """Trend Micro Web Reputation Log event data. Attributes: block_mode (str): operation mode. url (str): accessed URL. group_code (str): group code. group_name (str): group name. credibility_rating (int): credibility rating. credibility_score (int): credibility score. policy_identifier (int): policy identifier. application_name (str): application name. ip (str): IP address. threshold (int): threshold value. """ DATA_TYPE = 'av:trendmicro:webrep' def __init__(self): """Initializes event data.""" super(TrendMicroUrlEventData, self).__init__(data_type=self.DATA_TYPE) self.block_mode = None self.url = None self.group_code = None self.group_name = None self.credibility_rating = None self.credibility_score = None self.policy_identifier = None self.application_name = None self.ip = None self.threshold = None
[docs]class OfficeScanWebReputationParser(TrendMicroBaseParser): """Parses the Trend Micro Office Scan Web Reputation detection log.""" NAME = 'trendmicro_url' DESCRIPTION = 'Parser for Trend Micro Office Web Reputation log files.' COLUMNS = ( 'date', 'time', 'block_mode', 'url', 'group_code', 'group_name', 'credibility_rating', 'policy_identifier', 'application_name', 'credibility_score', 'ip', 'threshold', 'timestamp', 'unused') MIN_COLUMNS = 12
[docs] def ParseRow(self, parser_mediator, row_offset, row): """Parses a line of the log file and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row_offset (int): line number of the row. row (dict[str, str]): fields of a single row, as specified in COLUMNS. """ timestamp = self._ParseTimestamp(parser_mediator, row) if timestamp is None: return event_data = TrendMicroUrlEventData() event_data.offset = row_offset # Convert and store integer values. for field in ( 'credibility_rating', 'credibility_score', 'policy_identifier', 'threshold', 'block_mode'): try: value = int(row[field], 10) except (ValueError, TypeError): value = None setattr(event_data, field, value) # Store string values. for field in ('url', 'group_name', 'group_code', 'application_name', 'ip'): setattr(event_data, field, row[field]) event = time_events.DateTimeValuesEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def VerifyRow(self, parser_mediator, row): """Verifies if a line of the file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row (dict[str, str]): fields of a single row, as specified in COLUMNS. Returns: bool: True if this is the correct parser, False otherwise. """ if len(row) < self.MIN_COLUMNS: return False # Check the date format! # If it doesn't parse, then this isn't a Trend Micro AV log. try: timestamp = self._ConvertToTimestamp(row['date'], row['time']) except ValueError: return False if timestamp is None: return False try: block_mode = int(row['block_mode'], 10) except (ValueError, TypeError): return False if block_mode not in formatter.BLOCK_MODES: return False return True
manager.ParsersManager.RegisterParsers([ OfficeScanVirusDetectionParser, OfficeScanWebReputationParser])