#
# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.70 2009/12/31 11:55:27 fabiankeil Exp $
+# $Id: privoxy-log-parser.pl,v 1.234 2010/07/21 16:05:44 fk Exp $
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
use Getopt::Long;
use constant {
- PRIVOXY_LOG_PARSER_VERSION => '0.5',
+ PRIVOXY_LOG_PARSER_VERSION => '0.6',
# Feel free to mess with these ...
DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black')
DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black')
CLI_OPTION_NO_EMBEDDED_CSS => 0,
CLI_OPTION_NO_MSECS => 0,
CLI_OPTION_NO_SYNTAX_HIGHLIGHTING => 0,
- CLI_OPTION_ERROR_LOG_FILE => '/var/log/privoxy-log.log',
+ CLI_OPTION_SHORTEN_THREAD_IDS => 0,
CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0,
CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0,
CLI_OPTION_STATISTICS => 0,
PUNISH_MISSING_HIGHLIGHT_KNOWLEDGE_WITH_DEATH => 1,
LOG_UNPARSED_LINES_TO_EXTRA_FILE => 0,
+ ERROR_LOG_FILE => '/var/log/privoxy-log-parser',
# You better leave these alone unless you know what you're doing.
COLOUR_RESET => "\033[0;0m",
my $html_output_mode;
my $no_msecs_mode; # XXX: should probably be removed
+my $shorten_thread_ids;
my $line_end;
sub prepare_our_stuff () {
} elsif ($c =~ m/^New host is: ([^\s]*)\./) {
- # New host is: trac.vidalia-project.net. Crunching Referer: http://www.vidalia-project.net/
- $c = highlight_matched_host($c, '(?<=New host is: )[^\s]+');
- $c = highlight_matched_url($c, '(?<=Crunching Referer: )[^\s]+');
+ # New host is: trac.vidalia-project.net. Crunching Referer: http://www.vidalia-project.net/!
+ $c = highlight_matched_host($c, '(?<=New host is: )[^\s]+(?=\.)');
+ $c = highlight_matched_url($c, '(?<=Crunching Referer: )[^\s!]+');
} elsif ($c =~ m/^Text mode enabled by force. (Take cover)!/) {
or $c =~ m/^Appended client IP address to/
or $c =~ m/^Removing 'Connection: close' to imply keep-alive./
or $c =~ m/^keep-alive support is disabled/
+ or $c =~ m/^Continue hack in da house/
)
{
# XXX: Some of these may need highlighting
# Appended client IP address to X-Forwarded-For: 10.0.0.2, 10.0.0.1
# Removing 'Connection: close' to imply keep-alive.
# keep-alive support is disabled. Crunching: Keep-Alive: 300.
+ # Continue hack in da house.
} elsif ($c =~ m/^scanning headers for:/) {
# Waiting for up to 4999 bytes from the client.
$c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Looks like we rea/ or
+ } elsif ($c =~ m/^Looks like we / or
$c =~ m/^Unsetting keep-alive flag/ or
$c =~ m/^No connections to wait/ or
$c =~ m/^Complete client request received/ or
# Looks like we reached the end of the last chunk. We better stop reading.
# Looks like we read the end of the last chunk together with the server \
# headers. We better stop reading.
+ # Looks like we got the last chunk together with the server headers. \
+ # We better stop reading.
# Unsetting keep-alive flag.
# No connections to wait for left.
# Client request arrived in time or the client closed the connection.
# Reloading configuration file '/usr/local/etc/privoxy/config'
$c =~ s@(?<=loading configuration file \')([^\']*)@$h{'file'}$1$h{'Standard'}@;
+ } elsif ($c =~ m/^Loading (actions|filter) file: /) {
+
+ # Loading actions file: /usr/local/etc/privoxy/default.action
+ # Loading filter file: /usr/local/etc/privoxy/default.filter
+ $c =~ s@(?<= file: )(.*)$@$h{'file'}$1$h{'Standard'}@;
+
} elsif ($c =~ m/^exiting by signal/) {
# exiting by signal 15 .. bye
$c =~ m/^Malformerd HTTP headers detected and MS IIS5 hack enabled/ or
$c =~ m/^Invalid \"chunked\" transfer/ or
$c =~ m/^Support for/ or
- $c =~ m/^Flushing header and buffers/
+ $c =~ m/^Flushing header and buffers/ or
+ $c =~ m/^Can not resolve/
) {
# No logfile configured. Please enable it before reporting any problems.
# Support for 'Connection: keep-alive' is experimental, incomplete and\
# known not to work properly in some situations.
# Flushing header and buffers. Stepping back from filtering.
+ # Can not resolve doesnotexist: hostname nor servname provided, or not known
} else {
sub gather_loglevel_connect_stats ($$) {
- my $c = shift;
- my $thread = shift;
+ my ($c, $thread) = @_;
our %thread_data;
our %stats;
}
}
-sub gather_loglevel_header_stats ($) {
+sub gather_loglevel_header_stats ($$) {
- my $c = shift;
- my $thread = shift;
+ my ($c, $thread) = @_;
our %stats;
if ($c =~ m/^A HTTP\/1\.1 response without/ or
. $line_end;
}
+sub shorten_thread_id ($) {
+
+ my $thread_id = shift;
+
+ our %short_thread_ids;
+ our $max_threadid;
+
+ unless (defined $short_thread_ids{$thread_id}) {
+ $short_thread_ids{$thread_id} = sprintf "%.3d", $max_threadid++;
+ }
+
+ return $short_thread_ids{$thread_id}
+}
+
sub parse_loop () {
my ($day, $time_stamp, $thread, $log_level, $content, $c, $msecs);
while (<>) {
if (m/^(\w{3} \d{2}) (\d\d:\d\d:\d\d)\.?(\d+)? (?:Privoxy\()?([^\)\s]*)[\)]? ([\w -]*): (.*?)\r?$/) {
- $thread = $t = $4;
+ $thread = $t = ($shorten_thread_ids) ? shorten_thread_id($4) : $4;
$req{$t}{'day'} = $day = $1;
$req{$t}{'time-stamp'} = $time_stamp = $2;
$req{$t}{'msecs'} = $msecs = $3 ? $3 : 0; # Only the cool kids have micro second resolution;
sub stats_loop () {
- my ($day, $time_stamp, $thread, $log_level, $content, $c, $msecs);
+ my ($day, $time_stamp, $msecs, $thread, $log_level, $content);
my %log_level_handlers = (
'Re-Filter' => \&handle_loglevel_ignore,
'Header' => \&gather_loglevel_header_stats,
$day = $1;
$time_stamp = $2;
$msecs = $3 ? $3 : 0;
- $log_level = $5;
- $content = $c = $6;
$thread = $4;
+ $log_level = $5;
+ $content = $6;
if (defined($log_level_handlers{$log_level})) {
'no-syntax-highlighting' => CLI_OPTION_NO_SYNTAX_HIGHLIGHTING,
'no-embedded-css' => CLI_OPTION_NO_EMBEDDED_CSS,
'no-msecs' => CLI_OPTION_NO_MSECS,
+ 'shorten-thread-ids' => CLI_OPTION_SHORTEN_THREAD_IDS,
'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS,
'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES,
'statistics' => CLI_OPTION_STATISTICS,
'no-syntax-highlighting' => \$cli_options{'no-syntax-highlighting'},
'no-embedded-css' => \$cli_options{'no-embedded-css'},
'no-msecs' => \$cli_options{'no-msecs'},
+ 'shorten-thread-ids' => \$cli_options{'shorten-thread-ids'},
'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'},
'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
$html_output_mode = cli_option_is_set('html-output');
$no_msecs_mode = cli_option_is_set('no-msecs');
+ $shorten_thread_ids = cli_option_is_set('shorten-thread-ids');
$line_end = get_line_end();
}
[--no-embedded-css]
[--no-msecs]
[--no-syntax-highlighting]
+ [--shorten-thread-ids]
[--show-ineffective-filters]
[--statistics]
[--title $cli_options{'title'}]
=head1 SYNOPSIS
B<privoxy-log-parser> [B<--accept-unknown-messages>] [B<--html-output>]
-[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--show-ineffective-filters>]
-[B<--version>]
+[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--shorten-thread-ids>]
+[B<--show-ineffective-filters>] [B<--version>]
=head1 DESCRIPTION
codes don't work, or if the terminal itself doesn't support the control
codes.
+[B<--shorten-thread-ids>] Shorten the thread ids to a three-digit decimal number.
+Note that the mapping from thread ids to shortended ids is created at
+run-time and thus varies with the input.
+
[B<--show-ineffective-filters>] Don't suppress log lines for filters
that didn't modify the content.
[B<--statistics>] Gather various statistics instead of syntax highlighting
log messages. This is an experimental feature, if the results look wrong
-they very well might be. Also note that the results a pretty much guaranteed
+they very well might be. Also note that the results are pretty much guaranteed
to be incorrect if Privoxy and Privoxy-Log-Parser aren't in sync.
[B<--version>] Print version and exit.