1#!/usr/bin/perl
2use IO::File ();
3use File::Find qw(find);
4use Text::Wrap qw(wrap);
5use Getopt::Long qw(GetOptions);
6use Pod::Usage qw(pod2usage);
7use Cwd qw(cwd);
8use File::Spec;
9use strict;
10
11my %opt = (
12  frames  => 3,
13  lines   => 0,
14  tests   => 0,
15  top     => 0,
16  verbose => 0,
17);
18
19GetOptions(\%opt, qw(
20            dir=s
21            frames=i
22            hide=s@
23            lines!
24            output-file=s
25            tests!
26            top=i
27            verbose+
28          )) or pod2usage(2);
29
30# Setup the directory to process
31if (exists $opt{dir}) {
32  $opt{dir} = File::Spec->canonpath($opt{dir});
33}
34else {
35  # Check if we're in 't'
36  $opt{dir} = cwd =~ /\/t$/ ? '..' : '.';
37
38  # Check if we're in the right directory
39  -d "$opt{dir}/$_" or die "$0: must be run from the perl source directory"
40                         . " when --dir is not given\n"
41      for qw(t lib ext);
42}
43
44# Assemble regex for functions whose leaks should be hidden
45# (no, a hash won't be significantly faster)
46my $hidden = do { local $"='|'; $opt{hide} ? qr/^(?:@{$opt{hide}})$/o : '' };
47
48# Setup our output file handle
49# (do it early, as it may fail)
50my $fh = \*STDOUT;
51if (exists $opt{'output-file'}) {
52  $fh = new IO::File ">$opt{'output-file'}"
53        or die "$0: cannot open $opt{'output-file'} ($!)\n";
54}
55
56# These hashes will receive the error and leak summary data:
57#
58# %error = (
59#   error_name => {
60#                   stack_frame => {
61#                                    test_script => occurences
62#                                  }
63#                 }
64# );
65#
66# %leak = (
67#   leak_type => {
68#                  stack_frames => {
69#                                    test_script => occurences
70#                                  }
71#                } # stack frames are separated by '<'s
72# );
73my(%error, %leak);
74
75# Collect summary data
76find({wanted => \&filter, no_chdir => 1}, $opt{dir});
77
78# Format the output nicely
79$Text::Wrap::columns = 80;
80$Text::Wrap::unexpand = 0;
81
82# Write summary
83summary($fh, \%error, \%leak);
84
85exit 0;
86
87sub summary {
88  my($fh, $error, $leak) = @_;
89  my(%ne, %nl, %top);
90
91  # Prepare the data
92
93  for my $e (keys %$error) {
94    for my $f (keys %{$error->{$e}}) {
95      my($func, $file, $line) = split /:/, $f;
96      my $nf = $opt{lines} ? "$func ($file:$line)" : "$func ($file)";
97      $ne{$e}{$nf}{count}++;
98      while (my($k,$v) = each %{$error->{$e}{$f}}) {
99        $ne{$e}{$nf}{tests}{$k} += $v;
100        $top{$k}{error}++;
101      }
102    }
103  }
104
105  for my $l (keys %$leak) {
106    for my $s (keys %{$leak->{$l}}) {
107      my $ns = join '<', map {
108                 my($func, $file, $line) = split /:/;
109                 /:/ ? $opt{lines}
110                       ? "$func ($file:$line)" : "$func ($file)"
111                     : $_
112               } split /</, $s;
113      $nl{$l}{$ns}{count}++;
114      while (my($k,$v) = each %{$leak->{$l}{$s}}) {
115        $nl{$l}{$ns}{tests}{$k} += $v;
116        $top{$k}{leak}++;
117      }
118    }
119  }
120
121  # Print the Top N
122
123  if ($opt{top}) {
124    for my $what (qw(error leak)) {
125      my @t = sort { $top{$b}{$what} <=> $top{$a}{$what} or $a cmp $b }
126              grep $top{$_}{$what}, keys %top;
127      @t > $opt{top} and splice @t, $opt{top};
128      my $n = @t;
129      my $s = $n > 1 ? 's' : '';
130      my $prev = 0;
131      print $fh "Top $n test scripts for ${what}s:\n\n";
132      for my $i (1 .. $n) {
133        $n = $top{$t[$i-1]}{$what};
134        $s = $n > 1 ? 's' : '';
135        printf $fh "    %3s %-40s %3d $what$s\n",
136                   $n != $prev ? "$i." : '', $t[$i-1], $n;
137        $prev = $n;
138      }
139      print $fh "\n";
140    }
141  }
142
143  # Print the real summary
144
145  print $fh "MEMORY ACCESS ERRORS\n\n";
146
147  for my $e (sort keys %ne) {
148    print $fh qq("$e"\n);
149    for my $frame (sort keys %{$ne{$e}}) {
150      my $data = $ne{$e}{$frame};
151      my $count = $data->{count} > 1 ? " [$data->{count} paths]" : '';
152      print $fh ' 'x4, "$frame$count\n",
153                format_tests($data->{tests}), "\n";
154    }
155    print $fh "\n";
156  }
157
158  print $fh "\nMEMORY LEAKS\n\n";
159
160  for my $l (sort keys %nl) {
161    print $fh qq("$l"\n);
162    for my $frames (sort keys %{$nl{$l}}) {
163      my $data = $nl{$l}{$frames};
164      my @stack = split /</, $frames;
165      $data->{count} > 1 and $stack[-1] .= " [$data->{count} paths]";
166      print $fh join('', map { ' 'x4 . "$_:$stack[$_]\n" } 0 .. $#stack ),
167                format_tests($data->{tests}), "\n\n";
168    }
169  }
170}
171
172sub format_tests {
173  my $tests = shift;
174  my $indent = ' 'x8;
175
176  if ($opt{tests}) {
177    return wrap($indent, $indent, join ', ', sort keys %$tests);
178  }
179  else {
180    my $count = keys %$tests;
181    my $s = $count > 1 ? 's' : '';
182    return $indent . "triggered by $count test$s";
183  }
184}
185
186sub filter {
187  debug(2, "$File::Find::name\n");
188
189  # Only process '*.t.valgrind' files
190  /(.*)\.t\.valgrind$/ or return;
191
192  # Strip all unnecessary stuff from the test name
193  my $test = $1;
194  $test =~ s/^(?:(?:\Q$opt{dir}\E|[.t])\/)+//;
195
196  debug(1, "processing $test ($_)\n");
197
198  # Get all the valgrind output lines
199  my @l = do {
200    my $fh = new IO::File $_ or die "$0: cannot open $_ ($!)\n";
201    # Process outputs can interrupt each other, so sort by pid first
202    my %pid; local $_;
203    while (<$fh>) {
204      chomp;
205      s/^==(\d+)==\s?// and push @{$pid{$1}}, $_;
206    }
207    map @$_, values %pid;
208  };
209
210  # Setup some useful regexes
211  my $hexaddr  = '0x[[:xdigit:]]+';
212  my $topframe = qr/^\s+at $hexaddr:\s+/;
213  my $address  = qr/^\s+Address $hexaddr is \d+ bytes (?:before|inside|after) a block of size \d+/;
214  my $leak     = qr/^\s*\d+ bytes in \d+ blocks are (still reachable|(?:definite|possib)ly lost)/;
215
216  for my $i (0 .. $#l) {
217    $l[$i]   =~ $topframe or next; # Match on any topmost frame...
218    $l[$i-1] =~ $address and next; # ...but not if it's only address details
219    my $line = $l[$i-1]; # The error / leak description line
220    my $j    = $i;
221
222    if ($line =~ $leak) {
223      debug(2, "LEAK: $line\n");
224
225      my $type   = $1;     # Type of leak (still reachable, ...)
226      my $inperl = 0;      # Are we inside the perl source? (And how deep?)
227      my @stack;           # Call stack
228
229      while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(\w+)\s+\((?:([^:]+):(\d+)|[^)]+)\)/o) {
230        my($func, $file, $lineno) = ($1, $2, $3);
231
232        # If the stack frame is inside perl => increment $inperl
233        # If we've already been inside perl, but are no longer => leave
234        defined $file && ++$inperl or $inperl && last;
235
236        # A function that should be hidden? => clear stack and leave
237        $hidden && $func =~ $hidden and @stack = (), last;
238
239        # Add stack frame if it's within our threshold
240        if ($inperl <= $opt{frames}) {
241          push @stack, $inperl ? "$func:$file:$lineno" : $func;
242        }
243      }
244
245      # If there's something on the stack and we've seen perl code,
246      # add this memory leak to the summary data
247      @stack and $inperl and $leak{$type}{join '<', @stack}{$test}++;
248    } else {
249      debug(1, "ERROR: $line\n");
250
251      # Simply find the topmost frame in the call stack within
252      # the perl source code
253      while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(?:(\w+)\s+\(([^:]+):(\d+)\))?/o) {
254        if (defined $1) {
255          $error{$line}{"$1:$2:$3"}{$test}++;
256          last;
257        }
258      }
259    }
260  }
261}
262
263sub debug {
264  my $level = shift;
265  $opt{verbose} >= $level and print STDERR @_;
266}
267
268__END__
269
270=head1 NAME
271
272valgrindpp.pl - A post processor for make test.valgrind
273
274=head1 SYNOPSIS
275
276valgrindpp.pl [B<--dir>=I<dir>] [B<--frames>=I<number>]
277[B<--hide>=I<identifier>] [B<--lines>]
278[B<--output-file>=I<file>] [B<--tests>]
279[B<--top>=I<number>] [B<--verbose>]
280
281=head1 DESCRIPTION
282
283B<valgrindpp.pl> is a post processor for I<.valgrind> files
284created during I<make test.valgrind>. It collects all these
285files, extracts most of the information and produces a
286significantly shorter summary of all detected memory access
287errors and memory leaks.
288
289=head1 OPTIONS
290
291=over 4
292
293=item B<--dir>=I<dir>
294
295Recursively process I<.valgrind> files in I<dir>. If this
296options is not given, B<valgrindpp.pl> must be run from
297either the perl source or the I<t> directory and will process
298all I<.valgrind> files within the distribution.
299
300=item B<--frames>=I<number>
301
302Number of stack frames within the perl source code to
303consider when distinguishing between memory leak sources.
304Increasing this value will give you a longer backtrace,
305while decreasing the number will show you fewer sources
306for memory leaks. The default is 3 frames.
307
308=item B<--hide>=I<identifier>
309
310Hide all memory leaks that have I<identifier> in their backtrace.
311Useful if you want to hide leaks from functions that are known to
312have lots of memory leaks. I<identifier> can also be a regular
313expression, in which case all leaks with symbols matching the
314expression are hidden. Can be given multiple times.
315
316=item B<--lines>
317
318Show line numbers for stack frames. This is useful for further
319increasing the error/leak resolution, but makes it harder to
320compare different reports using I<diff>.
321
322=item B<--output-file>=I<file>
323
324Redirect the output into I<file>. If this option is not
325given, the output goes to I<stdout>.
326
327=item B<--tests>
328
329List all tests that trigger memory access errors or memory
330leaks explicitly instead of only printing a count.
331
332=item B<--top>=I<number>
333
334List the top I<number> test scripts for memory access errors
335and memory leaks. Set to C<0> for no top-I<n> statistics.
336
337=item B<--verbose>
338
339Increase verbosity level. Can be given multiple times.
340
341=back
342
343=head1 COPYRIGHT
344
345Copyright 2003 by Marcus Holland-Moritz <mhx@cpan.org>.
346
347This program is free software; you may redistribute it
348and/or modify it under the same terms as Perl itself.
349
350=cut
351