X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=c58726c269c56bbd3ff92c7df784b6984d3f2d62;hb=2e054979f96d3378ed6ded4d1fc2091e2bfa7d82;hp=1a8b8db54e3e967001bbabcf5215ba6862e7e67f;hpb=f3d0f652e9ce1ba3c0adc34863dae4a006ae875e;p=privoxy.git diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index 1a8b8db5..c58726c2 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -23,7 +23,7 @@ # hash key as input. # - Add --compress and --decompress options. # -# Copyright (c) 2007-2021 Fabian Keil +# Copyright (c) 2007-2022 Fabian Keil # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -43,7 +43,7 @@ use warnings; use Getopt::Long; use constant { - PRIVOXY_LOG_PARSER_VERSION => '0.9.4', + PRIVOXY_LOG_PARSER_VERSION => '0.9.5', # Feel free to mess with these ... DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black') DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black') @@ -186,6 +186,8 @@ sub prepare_our_stuff() { 'configuration-line' => 'red', 'content-type' => 'yellow', 'HOST' => HEADER_DEFAULT_COLOUR, + 'tls-version' => 'pink', + 'cipher-suite' => 'light_cyan', ); %h_colours = %h; @@ -1458,7 +1460,8 @@ sub handle_loglevel_crunch($) { # Highlight crunch reason foreach my $reason (keys %reason_colours) { - $content =~ s@($reason)@$reason_colours{$reason}$1$h{'Standard'}@g; + # Crunch: Blocked: https://capture.condenastdigital.com/track?_o=cne&[...]&dim2=%7B%22adBlocked%[...] + $content =~ s@($reason)@$reason_colours{$reason}$1$h{'Standard'}@; } if ($content =~ m/\[too long, truncated\]$/) { @@ -1625,9 +1628,10 @@ sub handle_loglevel_connect($) { $c = highlight_matched_host($c, '(?<=for )[^\s]+'); $c =~ s@(?<=in slot )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Socket/) { + } elsif ($c =~ m/^Socket \d+ (already|closed)/) { # Socket 16 already forgotten or never remembered. + # Socket 9 closed while waiting for client headers $c =~ s@(?<=Socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^The connection to/) { @@ -1644,7 +1648,7 @@ sub handle_loglevel_connect($) { $c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Stopped waiting for the request line/ or - $c =~ m/^No request line on socket \d received in time/ or + $c =~ m/^No request line on socket \d+ received in time/ or $c =~ m/^The client side of the connection on socket \d/) { # Stopped waiting for the request line. Timeout: 121. @@ -1801,8 +1805,13 @@ sub handle_loglevel_connect($) { } elsif ($c =~ m/^Optimistically sending /) { # Optimistically sending 318 bytes of client headers intended for www.privoxy.org + # Optimistically sending 318 bytes of client headers intended for www.privoxy.org. $c =~ s@(?<=sending )(\d+)@$h{'Number'}$1$h{'Standard'}@; - $c = highlight_matched_host($c, '(?<=for )[^\s]+'); + if ($c =~ /\.$/) { + $c = highlight_matched_host($c, '[^\s]+(?=\.)'); + } else { + $c = highlight_matched_host($c, '(?<=for )[^\s]+'); + } } elsif ($c =~ m/^Stopping to watch the client socket/) { @@ -1903,6 +1912,53 @@ sub handle_loglevel_connect($) { # Client socket 7 is no longer usable. The server socket has been closed. $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Socket timeout \d+ reached/) { + + # Socket timeout 3 reached: http://127.0.0.1:20000/no-filter/chunked-content/36 + $c =~ s@(?<=timeout )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c = highlight_matched_url($c, "(?<=reached: ).*") + + } elsif ($c =~ m/^Prepared to read up to /) { + + # Prepared to read up to 157 bytes of encrypted request body from the client. + $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Forwarding \d+ bytes /) { + + # Forwarding 157 bytes of encrypted request body. + $c =~ s@(?<=Forwarding )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Buffering encrypted client body/) { + + # Buffering encrypted client body. Prepared to read up to 2236 bytes. + $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^The last \d+ bytes of the encrypted request body have been read/) { + + # The last 6945 bytes of the encrypted request body have been read. + $c =~ s@(?<=The last )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Reducing the chunk offset from/) { + + # Reducing the chunk offset from 1096654 to 32704 after discarding 1063950 bytes to make room in the buffer. + # Reducing the chunk offset from 16219 to 128 after flushing 16091 bytes. + $c =~ s@(?<=\d to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=offset from )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=after discarding )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=after flushing )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Client socket \d+ is no longer usable/) { + + # Client socket 21 is no longer usable. The server socket has been closed. + $c =~ s@(?<=Client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^(Client|Server) successfully connected over/) { + + # Server successfully connected over TLSv1.3 (TLS_AES_256_GCM_SHA384). + # Client successfully connected over TLSv1.3 (TLS_AES_128_GCM_SHA256). + $c =~ s@(?<=connected over )(TLSv\d\.\d)@$h{'tls-version'}$1$h{'Standard'}@; + $c =~ s@(?<=\()([^)]+)@$h{'cipher-suite'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or @@ -2149,6 +2205,10 @@ sub handle_loglevel_error($) { $c =~ s@(?<=digested )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=of )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^The socks connection timed out after/) { + + # The socks connection timed out after 60 seconds. + $c =~ s@(?<=after )(\d+)@$h{'Number'}$1$h{'Standard'}@; } # XXX: There are probably more messages that deserve highlighting. @@ -2156,6 +2216,41 @@ sub handle_loglevel_error($) { return $c; } +sub handle_loglevel_received($) { + + my $c = shift; + + if ($c =~ m/^TLS from socket/) { + # TLS from socket 3: \x16\xda\xe2\xa2;\x0d\x0a + + $c =~ s@(?<=TLS from socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^from socket/) { + # from socket 3: HEAD http://p.p/ HTTP/1.1\x0d\x0aHost: p.p\x0d\x0aUser-Agent: curl/7.85.0\x0d\x0aAccept: */*\x0d\x0aProxy-Connection: Keep-Alive\x0d\x0a\x0d\x0a + + $c =~ s@(?<=from socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } + + return $c; +} + +sub handle_loglevel_writing($) { + + my $c = shift; + + if ($c =~ m/^to socket/) { + # to socket 11: HTTP/1.1 200 Connection established\x0d\x0a\x0d\x0a + + $c =~ s@(?<=to socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^TLS on socket /) { + # TLS on socket 9: o~\xfcS[\xfa\x8f\xd6\x96\xe6_\xc7$\x1b[...] + + $c =~ s@(?<=TLS on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } + + return $c; +} sub handle_loglevel_ignore($) { return shift; @@ -2614,8 +2709,8 @@ sub parse_loop() { 'Force' => \&handle_loglevel_force, 'Error' => \&handle_loglevel_error, 'Fatal error' => \&handle_loglevel_ignore, - 'Writing' => \&handle_loglevel_ignore, - 'Received' => \&handle_loglevel_ignore, + 'Writing' => \&handle_loglevel_writing, + 'Received' => \&handle_loglevel_received, 'Tagging' => \&handle_loglevel_tagging, 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore, @@ -2741,7 +2836,7 @@ sub stats_loop() { } -# Convert a timestamp like 18:07:28.733 into miliseconds +# Convert a timestamp like 18:07:28.733 into milliseconds sub time_stamp_to_msecs($) { my $time_stamp = shift; @@ -2760,24 +2855,27 @@ sub time_stamp_to_msecs($) { sub inactivity_detection_loop() { our %cli_options; - my ($time_stamp, $thread, $log_level, $content); + my ($date, $time_stamp, $thread, $log_level, $content); my ($msecs, $previous_msecs, $inactivity); my $inactivity_threshold = $cli_options{'inactivity-threshold'}; - my $previous_message; - my $log_message_out_of_order = 0; + my $previous_date; + my $log_messages_out_of_order = 0; while (<>) { - (undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5); + ($date, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5); next if (not defined($log_level)); next if ($time_stamp eq "-"); $msecs = time_stamp_to_msecs($time_stamp); unless (defined $msecs) { - print "Failed to convert $time_stamp into miliseconds\n"; + print "Failed to convert $time_stamp into milliseconds\n"; + print "$_"; next; } unless (defined $previous_msecs) { $previous_msecs = $msecs; + $previous_date = $date; + print "$_"; next; } $inactivity = $msecs - $previous_msecs; @@ -2786,18 +2884,23 @@ sub inactivity_detection_loop() { # a Privoxy thread may be moved off schedule between # getting the timestamp for the log message and actually # writing it. - $log_message_out_of_order++; + $log_messages_out_of_order++; } if ($inactivity > $inactivity_threshold) { - #print "$previous_message"; - print "Detected inactivity: $inactivity msecs\n"; + if ($previous_date eq $date) { + print "Detected inactivity: $inactivity msecs\n"; + } else { + # While we could include the date in the timestamp + # we currently don't. + print "Detected date change. Timestamp difference ignored.\n"; + } } print "$_"; $previous_msecs = $msecs; - $previous_message = $_; + $previous_date = $date; } - if ($log_message_out_of_order) { - print "At least $log_message_out_of_order messages were written out of the chronological order.\n"; + if ($log_messages_out_of_order) { + print "At least $log_messages_out_of_order messages were written out of the chronological order.\n"; print "This can result in false positives. Consider sorting the log first.\n"; } } @@ -2976,8 +3079,9 @@ will hide the "filter foo caused 0 hits" message. =head1 OPTIONS -[B<--detect-inactivity>] Instead of syntax highlighting, detect inactivities -of more than B miliseconds. Mainly useful for debugging. +[B<--detect-inactivity>] Instead of syntax highlighting, detect periods +of log inactivity of more than the amount of milliseconds specified with +the B<--inactivity-threshold> option. Mainly useful for debugging. [B<--host-statistics-threshold>] Only show the request count for a host if it's above or equal to the given threshold. If the threshold is 0, host @@ -2988,8 +3092,9 @@ omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is ac This option is only intended to make embedding log excerpts in web pages easier. It does not escape any input! -[B<--inactivity-threshold>] Specifies the number of miliseconds between log -messages to consider inactivity when running in [B<--detect-inactivity>] mode. +[B<--inactivity-threshold msecs>] Specifies the number of milliseconds between +log messages to consider inactivity when running in [B<--detect-inactivity>] +mode. [B<--keep-date>] Don't remove the date when printing highlighted log messages. Useful when parsing multiple log files at once.