Source code for plaso.parsers.trendmicroav

# -*- coding: utf-8 -*-
"""Parser for Trend Micro Antivirus logs.

Trend Micro uses two log files to track the scans (both manual/scheduled and
real-time) and the web reputation (network scan/filtering).

Currently only the first log is supported.
"""

from __future__ import unicode_literals

import codecs

from dfdatetime import definitions as dfdatetime_definitions
from dfdatetime import posix_time as dfdatetime_posix_time
from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import errors
from plaso.lib import definitions
from plaso.lib import py2to3
from plaso.formatters import trendmicroav as formatter
from plaso.parsers import dsv_parser
from plaso.parsers import manager


[docs]class TrendMicroAVEventData(events.EventData):
  """Trend Micro AV Log event data.

  Attributes:
    action (str): action.
    filename (str): filename.
    path (str): path.
    scan_type (str): scan_type.
    threat (str): threat.
  """

  DATA_TYPE = 'av:trendmicro:scan'

  def __init__(self):
    """Initializes event data."""
    super(TrendMicroAVEventData, self).__init__(data_type=self.DATA_TYPE)
    self.action = None
    self.filename = None
    self.path = None
    self.scan_type = None
    self.threat = None


# pylint: disable=abstract-method
[docs]class TrendMicroBaseParser(dsv_parser.DSVParser):
  """Common code for parsing Trend Micro log files.

  The file format is reminiscent of CSV, but is not quite the same; the
  delimiter is a three-character sequence and there is no provision for
  quoting or escaping.
  """

  DELIMITER = '<;>'

  # Subclasses must define an integer MIN_COLUMNS value.
  MIN_COLUMNS = None

  # Subclasses must define a list of field names.
  COLUMNS = ()

  def __init__(self, encoding='cp1252'):
    """Initializes a parsing Trend Micro log file parser.

    Args:
      encoding (Optional[str]): encoding used in the DSV file, where None
          indicates the codepage of the parser mediator should be used.
    """
    super(TrendMicroBaseParser, self).__init__(encoding=encoding)

  def _CreateDictReader(self, line_reader):
    """Iterates over the log lines and provide a reader for the values.

    Args:
      line_reader (iter): yields each line in the log file.

    Yields:
      dict[str, str]: column values keyed by column header.
    """
    for line in line_reader:
      if isinstance(line, py2to3.BYTES_TYPE):
        try:
          line = codecs.decode(line, self._encoding)
        except UnicodeDecodeError as exception:
          raise errors.UnableToParseFile(
              'Unable decode line with error: {0!s}'.format(exception))

      stripped_line = line.strip()
      values = stripped_line.split(self.DELIMITER)
      number_of_values = len(values)
      number_of_columns = len(self.COLUMNS)

      if number_of_values < self.MIN_COLUMNS:
        raise errors.UnableToParseFile(
            'Expected at least {0:d} values, found {1:d}'.format(
                self.MIN_COLUMNS, number_of_values))

      if number_of_values > number_of_columns:
        raise errors.UnableToParseFile(
            'Expected at most {0:d} values, found {1:d}'.format(
                number_of_columns, number_of_values))

      yield dict(zip(self.COLUMNS, values))

  def _ParseTimestamp(self, parser_mediator, row):
    """Provides a timestamp for the given row.

    If the Trend Micro log comes from a version that provides a POSIX timestamp,
    use that directly; it provides the advantages of UTC and of second
    precision. Otherwise fall back onto the local-timezone date and time.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      row (dict[str, str]): fields of a single row, as specified in COLUMNS.

    Returns:
      dfdatetime.interface.DateTimeValue: date and time value.
    """
    timestamp = row.get('timestamp', None)
    if timestamp is not None:
      try:
        timestamp = int(timestamp, 10)
      except (ValueError, TypeError):
        parser_mediator.ProduceExtractionWarning(
            'Unable to parse timestamp value: {0!s}'.format(timestamp))

      return dfdatetime_posix_time.PosixTime(timestamp=timestamp)

    # The timestamp is not available; parse the local date and time instead.
    try:
      return self._ConvertToTimestamp(row['date'], row['time'])
    except ValueError as exception:
      parser_mediator.ProduceExtractionWarning((
          'Unable to parse time string: "{0:s} {1:s}" with error: '
          '{2!s}').format(repr(row['date']), repr(row['time']), exception))

  def _ConvertToTimestamp(self, date, time):
    """Converts date and time strings into a timestamp.

    Recent versions of Office Scan write a log field with a Unix timestamp.
    Older versions may not write this field; their logs only provide a date and
    a time expressed in the local time zone. This functions handles the latter
    case.

    Args:
      date (str): date as an 8-character string in the YYYYMMDD format.
      time (str): time as a 3 or 4-character string in the [H]HMM format or a
          6-character string in the HHMMSS format.

    Returns:
      dfdatetime_time_elements.TimestampElements: the parsed timestamp.

    Raises:
      ValueError: if the date and time values cannot be parsed.
    """
    # Check that the strings have the correct length.
    if len(date) != 8:
      raise ValueError(
          'Unsupported length of date string: {0!s}'.format(repr(date)))

    if len(time) < 3 or len(time) > 4:
      raise ValueError(
          'Unsupported length of time string: {0!s}'.format(repr(time)))

    # Extract the date.
    try:
      year = int(date[:4], 10)
      month = int(date[4:6], 10)
      day = int(date[6:8], 10)
    except (TypeError, ValueError):
      raise ValueError('Unable to parse date string: {0!s}'.format(repr(date)))

    # Extract the time. Note that a single-digit hour value has no leading zero.
    try:
      hour = int(time[:-2], 10)
      minutes = int(time[-2:], 10)
    except (TypeError, ValueError):
      raise ValueError('Unable to parse time string: {0!s}'.format(repr(date)))

    time_elements_tuple = (year, month, day, hour, minutes, 0)
    date_time = dfdatetime_time_elements.TimeElements(
        time_elements_tuple=time_elements_tuple)
    date_time.is_local_time = True
    # TODO: add functionality to dfdatetime to control precision.
    date_time._precision = dfdatetime_definitions.PRECISION_1_MINUTE  # pylint: disable=protected-access

    return date_time


[docs]class OfficeScanVirusDetectionParser(TrendMicroBaseParser):
  """Parses the Trend Micro Office Scan Virus Detection Log."""

  NAME = 'trendmicro_vd'
  DESCRIPTION = 'Parser for Trend Micro Office Scan Virus Detection log files.'

  COLUMNS = [
      'date', 'time', 'threat', 'action', 'scan_type', 'unused1',
      'path', 'filename', 'unused2', 'timestamp', 'unused3', 'unused4']
  MIN_COLUMNS = 8

[docs]  def ParseRow(self, parser_mediator, row_offset, row):
    """Parses a line of the log file and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      row_offset (int): line number of the row.
      row (dict[str, str]): fields of a single row, as specified in COLUMNS.
    """
    timestamp = self._ParseTimestamp(parser_mediator, row)
    if timestamp is None:
      return

    try:
      action = int(row['action'], 10)
    except (ValueError, TypeError):
      action = None

    try:
      scan_type = int(row['scan_type'], 10)
    except (ValueError, TypeError):
      scan_type = None

    event_data = TrendMicroAVEventData()
    event_data.action = action
    event_data.filename = row['filename']
    event_data.offset = row_offset
    event_data.path = row['path']
    event_data.scan_type = scan_type
    event_data.threat = row['threat']

    event = time_events.DateTimeValuesEvent(
        timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

[docs]  def VerifyRow(self, parser_mediator, row):
    """Verifies if a line of the file is in the expected format.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      row (dict[str, str]): fields of a single row, as specified in COLUMNS.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    if len(row) < self.MIN_COLUMNS:
      return False

    # Check the date format!
    # If it doesn't parse, then this isn't a Trend Micro AV log.
    try:
      timestamp = self._ConvertToTimestamp(row['date'], row['time'])
    except (ValueError, TypeError):
      return False

    if timestamp is None:
      return False

    # Check that the action value is plausible.
    try:
      action = int(row['action'], 10)
    except (ValueError, TypeError):
      return False

    if action not in formatter.SCAN_RESULTS:
      return False
    return True


[docs]class TrendMicroUrlEventData(events.EventData):
  """Trend Micro Web Reputation Log event data.

  Attributes:
    block_mode (str): operation mode.
    url (str): accessed URL.
    group_code (str): group code.
    group_name (str): group name.
    credibility_rating (int): credibility rating.
    credibility_score (int): credibility score.
    policy_identifier (int): policy identifier.
    application_name (str): application name.
    ip (str): IP address.
    threshold (int): threshold value.
  """
  DATA_TYPE = 'av:trendmicro:webrep'

  def __init__(self):
    """Initializes event data."""
    super(TrendMicroUrlEventData, self).__init__(data_type=self.DATA_TYPE)
    self.block_mode = None
    self.url = None
    self.group_code = None
    self.group_name = None
    self.credibility_rating = None
    self.credibility_score = None
    self.policy_identifier = None
    self.application_name = None
    self.ip = None
    self.threshold = None


[docs]class OfficeScanWebReputationParser(TrendMicroBaseParser):
  """Parses the Trend Micro Office Scan Web Reputation detection log."""
  NAME = 'trendmicro_url'
  DESCRIPTION = 'Parser for Trend Micro Office Web Reputation log files.'

  COLUMNS = (
      'date', 'time', 'block_mode', 'url', 'group_code', 'group_name',
      'credibility_rating', 'policy_identifier', 'application_name',
      'credibility_score', 'ip', 'threshold', 'timestamp', 'unused')

  MIN_COLUMNS = 12

[docs]  def ParseRow(self, parser_mediator, row_offset, row):
    """Parses a line of the log file and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      row_offset (int): line number of the row.
      row (dict[str, str]): fields of a single row, as specified in COLUMNS.
    """
    timestamp = self._ParseTimestamp(parser_mediator, row)
    if timestamp is None:
      return

    event_data = TrendMicroUrlEventData()
    event_data.offset = row_offset

    # Convert and store integer values.
    for field in (
        'credibility_rating', 'credibility_score', 'policy_identifier',
        'threshold', 'block_mode'):
      try:
        value = int(row[field], 10)
      except (ValueError, TypeError):
        value = None
      setattr(event_data, field, value)

    # Store string values.
    for field in ('url', 'group_name', 'group_code', 'application_name', 'ip'):
      setattr(event_data, field, row[field])

    event = time_events.DateTimeValuesEvent(
        timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

[docs]  def VerifyRow(self, parser_mediator, row):
    """Verifies if a line of the file is in the expected format.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      row (dict[str, str]): fields of a single row, as specified in COLUMNS.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    if len(row) < self.MIN_COLUMNS:
      return False

    # Check the date format!
    # If it doesn't parse, then this isn't a Trend Micro AV log.
    try:
      timestamp = self._ConvertToTimestamp(row['date'], row['time'])
    except ValueError:
      return False

    if timestamp is None:
      return False

    try:
      block_mode = int(row['block_mode'], 10)
    except (ValueError, TypeError):
      return False

    if block_mode not in formatter.BLOCK_MODES:
      return False
    return True


manager.ParsersManager.RegisterParsers([
    OfficeScanVirusDetectionParser,
    OfficeScanWebReputationParser])