# -*- coding: utf-8 -*-
"""Parser for syslog formatted log files"""
from __future__ import unicode_literals
import re
from dfdatetime import time_elements as dfdatetime_time_elements
import pyparsing
from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import errors
from plaso.lib import definitions
from plaso.lib import timelib
from plaso.parsers import manager
from plaso.parsers import text_parser
[docs]class SyslogLineEventData(events.EventData):
"""Syslog line event data.
Attributes:
body (str): message body.
hostname (str): hostname of the reporter.
pid (str): process identifier of the reporter.
reporter (str): reporter.
severity (str): severity.
"""
DATA_TYPE = 'syslog:line'
def __init__(self, data_type=DATA_TYPE):
"""Initializes an event data attribute container.
Args:
data_type (Optional[str]): event data type indicator.
"""
super(SyslogLineEventData, self).__init__(data_type=data_type)
self.body = None
self.hostname = None
self.pid = None
self.reporter = None
self.severity = None
[docs]class SyslogParser(text_parser.PyparsingMultiLineTextParser):
"""Parses syslog formatted log files"""
NAME = 'syslog'
DESCRIPTION = 'Syslog Parser'
_ENCODING = 'utf-8'
_plugin_classes = {}
# The reporter and facility fields can contain any printable character, but
# to allow for processing of syslog formats that delimit the reporter and
# facility with printable characters, we remove certain common delimiters
# from the set of printable characters.
_REPORTER_CHARACTERS = ''.join(
[c for c in pyparsing.printables if c not in [':', '[', '<']])
_FACILITY_CHARACTERS = ''.join(
[c for c in pyparsing.printables if c not in [':', '>']])
_SYSLOG_SEVERITY = [
'EMERG',
'ALERT',
'CRIT',
'ERR',
'WARNING',
'NOTICE',
'INFO',
'DEBUG']
_BODY_CONTENT = (
r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \
r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \
r'[\+|-]\d{2}:\d{2}\s))')
_VERIFICATION_REGEX = re.compile(
r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' + _BODY_CONTENT)
# The Chrome OS syslog messages are of a format beginning with an
# ISO 8601 combined date and time expression with timezone designator:
# 2016-10-25T12:37:23.297265-07:00
#
# This will then be followed by the SYSLOG Severity which will be one of:
# EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG
#
# 2016-10-25T12:37:23.297265-07:00 INFO
_CHROMEOS_VERIFICATION_REGEX = re.compile(
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
r'\d{6}[\+|-]\d{2}:\d{2}\s'
r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_CONTENT)
_PYPARSING_COMPONENTS = {
'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName(
'year'),
'two_digit_month': (
text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
'two_digit_month')),
'month': text_parser.PyparsingConstants.MONTH.setResultsName('month'),
'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
'day'),
'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
'hour'),
'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
'minute'),
'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
'second'),
'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName(
'fractional_seconds'),
'hostname': pyparsing.Word(pyparsing.printables).setResultsName(
'hostname'),
'reporter': pyparsing.Word(_REPORTER_CHARACTERS).setResultsName(
'reporter'),
'pid': text_parser.PyparsingConstants.PID.setResultsName('pid'),
'facility': pyparsing.Word(_FACILITY_CHARACTERS).setResultsName(
'facility'),
'severity': pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'),
'body': pyparsing.Regex(_BODY_CONTENT, re.DOTALL).setResultsName('body'),
'comment_body': pyparsing.SkipTo(' ---').setResultsName('body')
}
_PYPARSING_COMPONENTS['date'] = (
_PYPARSING_COMPONENTS['month'] +
_PYPARSING_COMPONENTS['day'] +
_PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
_PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
_PYPARSING_COMPONENTS['second'] + pyparsing.Optional(
pyparsing.Suppress('.') +
_PYPARSING_COMPONENTS['fractional_seconds']))
_PYPARSING_COMPONENTS['chromeos_date'] = pyparsing.Combine(
pyparsing.Word(pyparsing.nums, exact=4) + pyparsing.Literal('-') +
pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('-') +
pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('T') +
pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') +
pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') +
pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('.') +
pyparsing.Word(pyparsing.nums, exact=6) + pyparsing.oneOf(['-', '+']) +
pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Optional(
pyparsing.Literal(':') + pyparsing.Word(pyparsing.nums, exact=2)),
joinString='', adjacent=True)
_CHROMEOS_SYSLOG_LINE = (
_PYPARSING_COMPONENTS['chromeos_date'].setResultsName('chromeos_date') +
_PYPARSING_COMPONENTS['severity'] +
_PYPARSING_COMPONENTS['reporter'] +
pyparsing.Optional(pyparsing.Suppress(':')) +
pyparsing.Optional(
pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
pyparsing.Suppress(']')) +
pyparsing.Optional(pyparsing.Suppress(':')) +
_PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())
_SYSLOG_LINE = (
_PYPARSING_COMPONENTS['date'] +
_PYPARSING_COMPONENTS['hostname'] +
_PYPARSING_COMPONENTS['reporter'] +
pyparsing.Optional(
pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
pyparsing.Suppress(']')) +
pyparsing.Optional(
pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
pyparsing.Suppress('>')) +
pyparsing.Optional(pyparsing.Suppress(':')) +
_PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())
_SYSLOG_COMMENT = (
_PYPARSING_COMPONENTS['date'] + pyparsing.Suppress(':') +
pyparsing.Suppress('---') + _PYPARSING_COMPONENTS['comment_body'] +
pyparsing.Suppress('---') + pyparsing.LineEnd())
_KERNEL_SYSLOG_LINE = (
_PYPARSING_COMPONENTS['date'] +
pyparsing.Literal('kernel').setResultsName('reporter') +
pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] +
pyparsing.lineEnd())
LINE_STRUCTURES = [
('syslog_line', _SYSLOG_LINE),
('syslog_line', _KERNEL_SYSLOG_LINE),
('syslog_comment', _SYSLOG_COMMENT),
('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE)]
_SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])
def __init__(self):
"""Initializes a parser."""
super(SyslogParser, self).__init__()
self._last_month = 0
self._maximum_year = 0
self._plugin_by_reporter = {}
self._year_use = 0
def _UpdateYear(self, mediator, month):
"""Updates the year to use for events, based on last observed month.
Args:
mediator (ParserMediator): mediates the interactions between
parsers and other components, such as storage and abort signals.
month (int): month observed by the parser, where January is 1.
"""
if not self._year_use:
self._year_use = mediator.GetEstimatedYear()
if not self._maximum_year:
self._maximum_year = mediator.GetLatestYear()
if not self._last_month:
self._last_month = month
return
# Some syslog daemons allow out-of-order sequences, so allow some leeway
# to not cause Apr->May->Apr to cause the year to increment.
# See http://bugzilla.adiscon.com/show_bug.cgi?id=527
if self._last_month > (month + 1):
if self._year_use != self._maximum_year:
self._year_use += 1
self._last_month = month
[docs] def EnablePlugins(self, plugin_includes):
"""Enables parser plugins.
Args:
plugin_includes (list[str]): names of the plugins to enable, where None
or an empty list represents all plugins. Note that the default plugin
is handled separately.
"""
super(SyslogParser, self).EnablePlugins(plugin_includes)
self._plugin_by_reporter = {}
for plugin in self._plugins:
self._plugin_by_reporter[plugin.REPORTER] = plugin
[docs] def ParseRecord(self, parser_mediator, key, structure):
"""Parses a matching entry.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfvfs.
key (str): name of the parsed structure.
structure (pyparsing.ParseResults): elements parsed from the file.
Raises:
ParseError: when the structure type is unknown.
"""
if key not in self._SUPPORTED_KEYS:
raise errors.ParseError(
'Unable to parse record, unknown structure: {0:s}'.format(key))
if key == 'chromeos_syslog_line':
date_time = dfdatetime_time_elements.TimeElementsInMicroseconds()
iso8601_string = self._GetValueFromStructure(structure, 'chromeos_date')
try:
date_time.CopyFromStringISO8601(iso8601_string)
except ValueError:
parser_mediator.ProduceExtractionWarning(
'invalid date time value: {0:s}'.format(iso8601_string))
return
else:
# TODO: add support for fractional seconds.
month = self._GetValueFromStructure(structure, 'month')
try:
month = timelib.MONTH_DICT.get(month.lower(), 0)
except AttributeError:
parser_mediator.ProduceExtractionWarning(
'invalid month value: {0!s}'.format(month))
return
if month != 0:
self._UpdateYear(parser_mediator, month)
day = self._GetValueFromStructure(structure, 'day')
hours = self._GetValueFromStructure(structure, 'hour')
minutes = self._GetValueFromStructure(structure, 'minute')
seconds = self._GetValueFromStructure(structure, 'second')
time_elements_tuple = (
self._year_use, month, day, hours, minutes, seconds)
try:
date_time = dfdatetime_time_elements.TimeElements(
time_elements_tuple=time_elements_tuple)
date_time.is_local_time = True
except ValueError:
parser_mediator.ProduceExtractionWarning(
'invalid date time value: {0!s}'.format(time_elements_tuple))
return
plugin = None
if key == 'syslog_comment':
event_data = SyslogCommentEventData()
event_data.body = self._GetValueFromStructure(structure, 'body')
# TODO: pass line number to offset or remove.
event_data.offset = 0
else:
event_data = SyslogLineEventData()
event_data.body = self._GetValueFromStructure(structure, 'body')
event_data.hostname = self._GetValueFromStructure(structure, 'hostname')
# TODO: pass line number to offset or remove.
event_data.offset = 0
event_data.pid = self._GetValueFromStructure(structure, 'pid')
event_data.reporter = self._GetValueFromStructure(structure, 'reporter')
event_data.severity = self._GetValueFromStructure(structure, 'severity')
plugin = self._plugin_by_reporter.get(event_data.reporter, None)
if plugin:
attributes = {
'body': event_data.body,
'hostname': event_data.hostname,
'pid': event_data.pid,
'reporter': event_data.reporter,
'severity': event_data.severity}
try:
# TODO: pass event_data instead of attributes.
plugin.Process(parser_mediator, date_time, attributes)
except errors.WrongPlugin:
plugin = None
if not plugin:
event = time_events.DateTimeValuesEvent(
date_time, definitions.TIME_DESCRIPTION_WRITTEN)
parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def VerifyStructure(self, parser_mediator, lines):
"""Verifies that this is a syslog-formatted file.
Args:
parser_mediator (ParserMediator): mediates interactions between
parsers and other components, such as storage and dfvfs.
lines (str): one or more lines from the text file.
Returns:
bool: True if this is the correct parser, False otherwise.
"""
return (re.match(self._VERIFICATION_REGEX, lines) or
re.match(self._CHROMEOS_VERIFICATION_REGEX, lines)) is not None
manager.ParsersManager.RegisterParser(SyslogParser)