Source code for plaso.parsers.iis

# -*- coding: utf-8 -*-
"""Parser for Windows IIS Log file.

More documentation on fields can be found here:
https://msdn.microsoft.com/en-us/library/ms525807(v=vs.90).aspx
"""

from __future__ import unicode_literals

import pyparsing

from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import definitions
from plaso.lib import errors
from plaso.parsers import manager
from plaso.parsers import text_parser


[docs]class IISEventData(events.EventData): """IIS log event data. Attributes: """ DATA_TYPE = 'iis:log:line' def __init__(self): """Initializes event data.""" super(IISEventData, self).__init__(data_type=self.DATA_TYPE)
[docs]class WinIISParser(text_parser.PyparsingSingleLineTextParser): """Parses a Microsoft IIS log file.""" NAME = 'winiis' DESCRIPTION = 'Parser for Microsoft IIS log files.' # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status # sc-substatus sc-win32-status. # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus # sc-win32-status time-taken BLANK = pyparsing.Literal('-') WORD = pyparsing.Word(pyparsing.alphanums + '-') | BLANK INTEGER = ( pyparsing.Word(pyparsing.nums, min=1).setParseAction( text_parser.ConvertTokenToInteger) | BLANK) IP_ADDRESS = ( text_parser.PyparsingConstants.IPV4_ADDRESS | text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK) PORT = ( pyparsing.Word(pyparsing.nums, min=1, max=6).setParseAction( text_parser.ConvertTokenToInteger) | BLANK) _URI_SAFE_CHARACTERS = '/.?&+;_=()-:,%' _URI_UNSAFE_CHARACTERS = '{}|\\^~[]`' URI = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS) | BLANK # Per https://blogs.iis.net/nazim/use-of-special-characters-like-in-an-iis-url # IIS does not require the a query comply with RFC1738 restrictions on valid # URI characters QUERY = (pyparsing.Word( pyparsing.alphanums + _URI_SAFE_CHARACTERS + _URI_UNSAFE_CHARACTERS) | BLANK) DATE_TIME = ( text_parser.PyparsingConstants.DATE_ELEMENTS + text_parser.PyparsingConstants.TIME_ELEMENTS) DATE_METADATA = ( pyparsing.Literal('Date:') + DATE_TIME.setResultsName('date_time')) FIELDS_METADATA = ( pyparsing.Literal('Fields:') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('fields')) COMMENT = pyparsing.Literal('#') + ( DATE_METADATA | FIELDS_METADATA | pyparsing.SkipTo(pyparsing.LineEnd())) LOG_LINE_6_0 = ( DATE_TIME.setResultsName('date_time') + URI.setResultsName('s_sitename') + IP_ADDRESS.setResultsName('dest_ip') + WORD.setResultsName('http_method') + URI.setResultsName('cs_uri_stem') + URI.setResultsName('cs_uri_query') + PORT.setResultsName('dest_port') + WORD.setResultsName('cs_username') + IP_ADDRESS.setResultsName('source_ip') + URI.setResultsName('user_agent') + INTEGER.setResultsName('sc_status') + INTEGER.setResultsName('sc_substatus') + INTEGER.setResultsName('sc_win32_status')) _LOG_LINE_STRUCTURES = {} # Common fields. Set results name with underscores, not hyphens because regex # will not pick them up. _LOG_LINE_STRUCTURES['date'] = ( text_parser.PyparsingConstants.DATE.setResultsName('date')) _LOG_LINE_STRUCTURES['time'] = ( text_parser.PyparsingConstants.TIME.setResultsName('time')) _LOG_LINE_STRUCTURES['s-sitename'] = URI.setResultsName('s_sitename') _LOG_LINE_STRUCTURES['s-ip'] = IP_ADDRESS.setResultsName('dest_ip') _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method') _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName( 'requested_uri_stem') _LOG_LINE_STRUCTURES['cs-uri-query'] = QUERY.setResultsName('cs_uri_query') _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port') _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username') _LOG_LINE_STRUCTURES['c-ip'] = IP_ADDRESS.setResultsName('source_ip') _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent') _LOG_LINE_STRUCTURES['sc-status'] = INTEGER.setResultsName('http_status') _LOG_LINE_STRUCTURES['sc-substatus'] = INTEGER.setResultsName( 'sc_substatus') _LOG_LINE_STRUCTURES['sc-win32-status'] = INTEGER.setResultsName( 'sc_win32_status') # Less common fields. _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName( 's_computername') _LOG_LINE_STRUCTURES['sc-bytes'] = INTEGER.setResultsName('sent_bytes') _LOG_LINE_STRUCTURES['cs-bytes'] = INTEGER.setResultsName('received_bytes') _LOG_LINE_STRUCTURES['time-taken'] = INTEGER.setResultsName('time_taken') _LOG_LINE_STRUCTURES['cs-version'] = URI.setResultsName('protocol_version') _LOG_LINE_STRUCTURES['cs-host'] = URI.setResultsName('cs_host') _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie') _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer') _LOG_LINE_STRUCTURES['cs(Referer)'] = URI.setResultsName('cs_referrer') # Define the available log line structures. Default to the IIS v. 6.0 # common format. LINE_STRUCTURES = [ ('comment', COMMENT), ('logline', LOG_LINE_6_0)] # Define a signature value for the log file. _SIGNATURE = '#Software: Microsoft Internet Information Services' # Per https://msdn.microsoft.com/en-us/library/ms525807(v=vs.90).aspx: # "log file format(s) are all ASCII text formats (unless UTF-8 is enabled for # your Web sites) _ENCODING = 'utf-8' def __init__(self): """Initializes a parser object.""" super(WinIISParser, self).__init__() self._day_of_month = None self._month = None self._year = None def _ParseComment(self, structure): """Parses a comment. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ # TODO: refactor. Why is this method named _ParseComment when it extracts # the date and time? if structure[1] == 'Date:': time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') self._year, self._month, self._day_of_month, _, _, _ = time_elements_tuple elif structure[1] == 'Fields:': self._ParseFieldsMetadata(structure) def _ParseFieldsMetadata(self, structure): """Parses the fields metadata and updates the log line definition to match. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ fields = self._GetValueFromStructure(structure, 'fields', default_value='') fields = fields.split(' ') log_line_structure = pyparsing.Empty() if fields[0] == 'date' and fields[1] == 'time': log_line_structure += self.DATE_TIME.setResultsName('date_time') fields = fields[2:] for member in fields: log_line_structure += self._LOG_LINE_STRUCTURES.get(member, self.URI) updated_structures = [] for line_structure in self._line_structures: if line_structure[0] != 'logline': updated_structures.append(line_structure) updated_structures.append(('logline', log_line_structure)) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures = updated_structures def _ParseLogLine(self, parser_mediator, structure): """Parse a single log line and produce an event object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure parsed from the log file. """ time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') if not time_elements_tuple: time_tuple = self._GetValueFromStructure(structure, 'time') if not time_tuple: parser_mediator.ProduceExtractionWarning('missing time values') return date_tuple = self._GetValueFromStructure(structure, 'date') if not date_tuple: time_elements_tuple = ( self._year, self._month, self._day_of_month, time_tuple[0], time_tuple[1], time_tuple[2]) else: time_elements_tuple = ( date_tuple[0], date_tuple[1], date_tuple[2], time_tuple[0], time_tuple[1], time_tuple[2]) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return event_data = IISEventData() for key, value in iter(structure.items()): if key in ('date', 'date_time', 'time') or value == '-': continue if isinstance(value, pyparsing.ParseResults): value = ''.join(value) setattr(event_data, key, value) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in ('comment', 'logline'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'logline': self._ParseLogLine(parser_mediator, structure) elif key == 'comment': self._ParseComment(structure)
# pylint: disable=unused-argument
[docs] def VerifyStructure(self, parser_mediator, line): """Verify that this file is an IIS log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures = self.LINE_STRUCTURES self._day_of_month = None self._month = None self._year = None # TODO: Examine other versions of the file format and if this parser should # support them. For now just checking if it contains the IIS header. if self._SIGNATURE in line: return True return False
manager.ParsersManager.RegisterParser(WinIISParser)