#!/usr/bin/python

"""Summarize the contents of a syslog log file.

The syslog(3) service writes system log messages in a certain format:

	Jan 17 19:21:50 zeus kernel: klogd 1.3-3, log source = /proc/kmsg started.

This program summarizes the contents of such a file, by displaying each
unique (except for the time) line once, and also the number of times such
a line occurs in the input. The lines are displayed in the order they occur
in the input.

Lars Wirzenius <liw@iki.fi>"""

IGNORE_FILENAME = "/etc/syslog-summary/ignore"
QUIET = 0

import sys, regex, regsub, getopt

datepat = regex.compile("^\(Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec\) [ 0-9][0-9] [ 0-9][0-9]:[0-9][0-9]:[0-9][0-9] $")
datelen=16
pidpat = regex.compile("[^ ]* [^ ]*\[[0-9][0-9]*\]: ")

ignore_pats = []

def read_patterns(filename):
	pats = []
	try:
		f = open(filename, "r")
	except IOError:
		return []
	for line in f.readlines():
		if line[-1:] == "\n":
			line = line[:-1]
		pats.append(regex.compile(line))
	f.close()
	return pats

def should_be_ignored(line):
	for pat in ignore_pats:
		if pat.search(line) >= 0:
			return 1
	return 0

def summarize(filename):
	counts = {}
	order = []
	ignored_count = 0
	if not QUIET:
		print "Summarizing %s" % filename
	file = open(filename, "r")
	line = file.readline()
	while line:
		date, rest = line[:datelen], line[datelen:]
		if datepat.search(date) == -1:
			rest = line
			print "bad date", "<" + date + ">"
		elif pidpat.search(rest) >= 0:
			rest = regsub.sub("\[[0-9]*\]:", ":", rest)
		if should_be_ignored(rest):
			ignored_count = ignored_count + 1
		else:
			if rest in order:
				counts[rest] = counts[rest] + 1
			else:
				counts[rest] = 1
				order.append(rest)
		line = file.readline()
	file.close()
	if QUIET and order:
		print "Summarizing %s" % filename
	if not QUIET or order:
		print "%8d Patterns to ignore" % len(ignore_pats)
		print "%8d Ignored lines" % ignored_count
	for rest in order:
		print "%8d %s" % (counts[rest], rest),
	if not QUIET or order:
		print

def main():
	global ignore_pats, IGNORE_FILENAME, QUIET

	opts, args = getopt.getopt(sys.argv[1:], "i:q", [
		"ignore=", "quiet" ])

	for opt, optarg in opts:
		if opt == "-i" or opt == "--ignore":
			IGNORE_FILENAME = optarg
		elif opt == "-q" or opt == "--quiet":
			QUIET = 1

	ignore_pats = read_patterns(IGNORE_FILENAME)
	for filename in args:
		summarize(filename)

if __name__ == "__main__":
	main()
