From 7ad18083646e96b9ff493d8581fdccbae989d78e Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 11 Dec 2010 15:40:29 +0000 Subject: [PATCH] Implement a --unbreak-lines-only option to try to revert MUA breakage. --- tools/privoxy-log-parser.pl | 38 +++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index b3a2e501..f4b1309c 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.105 2010/11/13 20:37:39 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.106 2010/12/11 15:36:47 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -61,6 +61,7 @@ use constant { CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0, CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0, CLI_OPTION_STATISTICS => 0, + CLI_OPTION_UNBREAK_LINES_ONLY => 0, CLI_OPTION_URL_STATISTICS_THRESHOLD => 0, CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0, @@ -2282,6 +2283,27 @@ sub stats_loop () { } +sub unbreak_lines_only_loop() { + my $log_messages_reached = 0; + while (<>) { + chomp; + + # Log level other than LOG_LEVEL_CLF? + if (m/^(\w{3} \d{2}) (\d\d:\d\d:\d\d)\.?(\d+)? (?:Privoxy\()?([^\)\s]*)[\)]? ([\w -]*): (.*?)\r?$/ or + # LOG_LEVEL_CLF? + m/^((?:\d+\.\d+\.\d+\.\d+|[:\d]+)) - - \[(.*)\] "(.*)" (\d+) (\d+)/) { + $log_messages_reached = 1; + print "\n"; + + } else { + # Wrapped message + } + s@
$@@; + print; + print "\n" unless $log_messages_reached; + } +} + sub VersionMessage { my $version_message; @@ -2305,6 +2327,7 @@ sub get_cli_options () { 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES, 'statistics' => CLI_OPTION_STATISTICS, 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD, + 'unbreak-lines-only' => CLI_OPTION_UNBREAK_LINES_ONLY, 'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD, ); @@ -2318,6 +2341,7 @@ sub get_cli_options () { 'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'}, 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, + 'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'}, 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'}, 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'}, 'version' => sub { VersionMessage && exit(0) }, @@ -2348,6 +2372,7 @@ Options and their default values if they have any: [--shorten-thread-ids] [--show-ineffective-filters] [--statistics] + [--unbreak-lines-only] [--url-statistics-threshold $cli_options{'url-statistics-threshold'}] [--title $cli_options{'title'}] [--version] @@ -2368,7 +2393,10 @@ sub main () { print_intro(); - if (cli_option_is_set('statistics')) { + # XXX: should explicitly reject incompatible argument combinations + if (cli_option_is_set('unbreak-lines-only')) { + unbreak_lines_only_loop(); + } elsif (cli_option_is_set('statistics')) { stats_loop(); } else { parse_loop(); @@ -2443,6 +2471,12 @@ log messages. This is an experimental feature, if the results look wrong they very well might be. Also note that the results are pretty much guaranteed to be incorrect if Privoxy and Privoxy-Log-Parser aren't in sync. +[B<--unbreak-lines-only] Tries to fix lines that got messed up by a broken or +interestingly configured mail client and thus are no longer recognized properly. +Only fixes some breakage, but may be good enough or at least better than nothing. +Doesn't do anything else, so you probably want to pipe the output into +B again. + [B<--url-statistics-threshold>] Only show the request count for a ressource if it's above or equal to the given threshold. If the threshold is 0, URL statistics are disabled. -- 2.39.2