1package B::Xref;
2
3our $VERSION = '1.01';
4
5=head1 NAME
6
7B::Xref - Generates cross reference reports for Perl programs
8
9=head1 SYNOPSIS
10
11perl -MO=Xref[,OPTIONS] foo.pl
12
13=head1 DESCRIPTION
14
15The B::Xref module is used to generate a cross reference listing of all
16definitions and uses of variables, subroutines and formats in a Perl program.
17It is implemented as a backend for the Perl compiler.
18
19The report generated is in the following format:
20
21    File filename1
22      Subroutine subname1
23	Package package1
24	  object1        line numbers
25	  object2        line numbers
26	  ...
27	Package package2
28	...
29
30Each B<File> section reports on a single file. Each B<Subroutine> section
31reports on a single subroutine apart from the special cases
32"(definitions)" and "(main)". These report, respectively, on subroutine
33definitions found by the initial symbol table walk and on the main part of
34the program or module external to all subroutines.
35
36The report is then grouped by the B<Package> of each variable,
37subroutine or format with the special case "(lexicals)" meaning
38lexical variables. Each B<object> name (implicitly qualified by its
39containing B<Package>) includes its type character(s) at the beginning
40where possible. Lexical variables are easier to track and even
41included dereferencing information where possible.
42
43The C<line numbers> are a comma separated list of line numbers (some
44preceded by code letters) where that object is used in some way.
45Simple uses aren't preceded by a code letter. Introductions (such as
46where a lexical is first defined with C<my>) are indicated with the
47letter "i". Subroutine and method calls are indicated by the character
48"&".  Subroutine definitions are indicated by "s" and format
49definitions by "f".
50
51=head1 OPTIONS
52
53Option words are separated by commas (not whitespace) and follow the
54usual conventions of compiler backend options.
55
56=over 8
57
58=item C<-oFILENAME>
59
60Directs output to C<FILENAME> instead of standard output.
61
62=item C<-r>
63
64Raw output. Instead of producing a human-readable report, outputs a line
65in machine-readable form for each definition/use of a variable/sub/format.
66
67=item C<-d>
68
69Don't output the "(definitions)" sections.
70
71=item C<-D[tO]>
72
73(Internal) debug options, probably only useful if C<-r> included.
74The C<t> option prints the object on the top of the stack as it's
75being tracked. The C<O> option prints each operator as it's being
76processed in the execution order of the program.
77
78=back
79
80=head1 BUGS
81
82Non-lexical variables are quite difficult to track through a program.
83Sometimes the type of a non-lexical variable's use is impossible to
84determine. Introductions of non-lexical non-scalars don't seem to be
85reported properly.
86
87=head1 AUTHOR
88
89Malcolm Beattie, mbeattie@sable.ox.ac.uk.
90
91=cut
92
93use strict;
94use Config;
95use B qw(peekop class comppadlist main_start svref_2object walksymtable
96         OPpLVAL_INTRO SVf_POK OPpOUR_INTRO cstring
97        );
98
99sub UNKNOWN { ["?", "?", "?"] }
100
101my @pad;			# lexicals in current pad
102				# as ["(lexical)", type, name]
103my %done;			# keyed by $$op: set when each $op is done
104my $top = UNKNOWN;		# shadows top element of stack as
105				# [pack, type, name] (pack can be "(lexical)")
106my $file;			# shadows current filename
107my $line;			# shadows current line number
108my $subname;			# shadows current sub name
109my %table;			# Multi-level hash to record all uses etc.
110my @todo = ();			# List of CVs that need processing
111
112my %code = (intro => "i", used => "",
113	    subdef => "s", subused => "&",
114	    formdef => "f", meth => "->");
115
116
117# Options
118my ($debug_op, $debug_top, $nodefs, $raw);
119
120sub process {
121    my ($var, $event) = @_;
122    my ($pack, $type, $name) = @$var;
123    if ($type eq "*") {
124	if ($event eq "used") {
125	    return;
126	} elsif ($event eq "subused") {
127	    $type = "&";
128	}
129    }
130    $type =~ s/(.)\*$/$1/g;
131    if ($raw) {
132	printf "%-16s %-12s %5d %-12s %4s %-16s %s\n",
133	    $file, $subname, $line, $pack, $type, $name, $event;
134    } else {
135	# Wheee
136	push(@{$table{$file}->{$subname}->{$pack}->{$type.$name}->{$event}},
137	    $line);
138    }
139}
140
141sub load_pad {
142    my $padlist = shift;
143    my ($namelistav, $vallistav, @namelist, $ix);
144    @pad = ();
145    return if class($padlist) eq "SPECIAL";
146    ($namelistav,$vallistav) = $padlist->ARRAY;
147    @namelist = $namelistav->ARRAY;
148    for ($ix = 1; $ix < @namelist; $ix++) {
149	my $namesv = $namelist[$ix];
150	next if class($namesv) eq "SPECIAL";
151	my ($type, $name) = $namesv->PV =~ /^(.)([^\0]*)(\0.*)?$/;
152	$pad[$ix] = ["(lexical)", $type || '?', $name || '?'];
153    }
154    if ($Config{useithreads}) {
155	my (@vallist);
156	@vallist = $vallistav->ARRAY;
157	for ($ix = 1; $ix < @vallist; $ix++) {
158	    my $valsv = $vallist[$ix];
159	    next unless class($valsv) eq "GV";
160	    # these pad GVs don't have corresponding names, so same @pad
161	    # array can be used without collisions
162	    $pad[$ix] = [$valsv->STASH->NAME, "*", $valsv->NAME];
163	}
164    }
165}
166
167sub xref {
168    my $start = shift;
169    my $op;
170    for ($op = $start; $$op; $op = $op->next) {
171	last if $done{$$op}++;
172	warn sprintf("top = [%s, %s, %s]\n", @$top) if $debug_top;
173	warn peekop($op), "\n" if $debug_op;
174	my $opname = $op->name;
175	if ($opname =~ /^(or|and|mapwhile|grepwhile|range|cond_expr)$/) {
176	    xref($op->other);
177	} elsif ($opname eq "match" || $opname eq "subst") {
178	    xref($op->pmreplstart);
179	} elsif ($opname eq "substcont") {
180	    xref($op->other->pmreplstart);
181	    $op = $op->other;
182	    redo;
183	} elsif ($opname eq "enterloop") {
184	    xref($op->redoop);
185	    xref($op->nextop);
186	    xref($op->lastop);
187	} elsif ($opname eq "subst") {
188	    xref($op->pmreplstart);
189	} else {
190	    no strict 'refs';
191	    my $ppname = "pp_$opname";
192	    &$ppname($op) if defined(&$ppname);
193	}
194    }
195}
196
197sub xref_cv {
198    my $cv = shift;
199    my $pack = $cv->GV->STASH->NAME;
200    $subname = ($pack eq "main" ? "" : "$pack\::") . $cv->GV->NAME;
201    load_pad($cv->PADLIST);
202    xref($cv->START);
203    $subname = "(main)";
204}
205
206sub xref_object {
207    my $cvref = shift;
208    xref_cv(svref_2object($cvref));
209}
210
211sub xref_main {
212    $subname = "(main)";
213    load_pad(comppadlist);
214    xref(main_start);
215    while (@todo) {
216	xref_cv(shift @todo);
217    }
218}
219
220sub pp_nextstate {
221    my $op = shift;
222    $file = $op->file;
223    $line = $op->line;
224    $top = UNKNOWN;
225}
226
227sub pp_padsv {
228    my $op = shift;
229    $top = $pad[$op->targ];
230    process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
231}
232
233sub pp_padav { pp_padsv(@_) }
234sub pp_padhv { pp_padsv(@_) }
235
236sub deref {
237    my ($op, $var, $as) = @_;
238    $var->[1] = $as . $var->[1];
239    process($var, $op->private & OPpOUR_INTRO ? "intro" : "used");
240}
241
242sub pp_rv2cv { deref(shift, $top, "&"); }
243sub pp_rv2hv { deref(shift, $top, "%"); }
244sub pp_rv2sv { deref(shift, $top, "\$"); }
245sub pp_rv2av { deref(shift, $top, "\@"); }
246sub pp_rv2gv { deref(shift, $top, "*"); }
247
248sub pp_gvsv {
249    my $op = shift;
250    my $gv;
251    if ($Config{useithreads}) {
252	$top = $pad[$op->padix];
253	$top = UNKNOWN unless $top;
254	$top->[1] = '$';
255    }
256    else {
257	$gv = $op->gv;
258	$top = [$gv->STASH->NAME, '$', $gv->SAFENAME];
259    }
260    process($top, $op->private & OPpLVAL_INTRO ||
261                  $op->private & OPpOUR_INTRO   ? "intro" : "used");
262}
263
264sub pp_gv {
265    my $op = shift;
266    my $gv;
267    if ($Config{useithreads}) {
268	$top = $pad[$op->padix];
269	$top = UNKNOWN unless $top;
270	$top->[1] = '*';
271    }
272    else {
273	$gv = $op->gv;
274	$top = [$gv->STASH->NAME, "*", $gv->SAFENAME];
275    }
276    process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
277}
278
279sub pp_const {
280    my $op = shift;
281    my $sv = $op->sv;
282    # constant could be in the pad (under useithreads)
283    if ($$sv) {
284	$top = ["?", "",
285		(class($sv) ne "SPECIAL" && $sv->FLAGS & SVf_POK)
286		? cstring($sv->PV) : "?"];
287    }
288    else {
289	$top = $pad[$op->targ];
290	$top = UNKNOWN unless $top;
291    }
292}
293
294sub pp_method {
295    my $op = shift;
296    $top = ["(method)", "->".$top->[1], $top->[2]];
297}
298
299sub pp_entersub {
300    my $op = shift;
301    if ($top->[1] eq "m") {
302	process($top, "meth");
303    } else {
304	process($top, "subused");
305    }
306    $top = UNKNOWN;
307}
308
309#
310# Stuff for cross referencing definitions of variables and subs
311#
312
313sub B::GV::xref {
314    my $gv = shift;
315    my $cv = $gv->CV;
316    if ($$cv) {
317	#return if $done{$$cv}++;
318	$file = $gv->FILE;
319	$line = $gv->LINE;
320	process([$gv->STASH->NAME, "&", $gv->NAME], "subdef");
321	push(@todo, $cv);
322    }
323    my $form = $gv->FORM;
324    if ($$form) {
325	return if $done{$$form}++;
326	$file = $gv->FILE;
327	$line = $gv->LINE;
328	process([$gv->STASH->NAME, "", $gv->NAME], "formdef");
329    }
330}
331
332sub xref_definitions {
333    my ($pack, %exclude);
334    return if $nodefs;
335    $subname = "(definitions)";
336    foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS
337		      strict vars FileHandle Exporter Carp PerlIO::Layer
338		      attributes utf8 warnings)) {
339        $exclude{$pack."::"} = 1;
340    }
341    no strict qw(vars refs);
342    walksymtable(\%{"main::"}, "xref", sub { !defined($exclude{$_[0]}) });
343}
344
345sub output {
346    return if $raw;
347    my ($file, $subname, $pack, $name, $ev, $perfile, $persubname,
348	$perpack, $pername, $perev);
349    foreach $file (sort(keys(%table))) {
350	$perfile = $table{$file};
351	print "File $file\n";
352	foreach $subname (sort(keys(%$perfile))) {
353	    $persubname = $perfile->{$subname};
354	    print "  Subroutine $subname\n";
355	    foreach $pack (sort(keys(%$persubname))) {
356		$perpack = $persubname->{$pack};
357		print "    Package $pack\n";
358		foreach $name (sort(keys(%$perpack))) {
359		    $pername = $perpack->{$name};
360		    my @lines;
361		    foreach $ev (qw(intro formdef subdef meth subused used)) {
362			$perev = $pername->{$ev};
363			if (defined($perev) && @$perev) {
364			    my $code = $code{$ev};
365			    push(@lines, map("$code$_", @$perev));
366			}
367		    }
368		    printf "      %-16s  %s\n", $name, join(", ", @lines);
369		}
370	    }
371	}
372    }
373}
374
375sub compile {
376    my @options = @_;
377    my ($option, $opt, $arg);
378  OPTION:
379    while ($option = shift @options) {
380	if ($option =~ /^-(.)(.*)/) {
381	    $opt = $1;
382	    $arg = $2;
383	} else {
384	    unshift @options, $option;
385	    last OPTION;
386	}
387	if ($opt eq "-" && $arg eq "-") {
388	    shift @options;
389	    last OPTION;
390	} elsif ($opt eq "o") {
391	    $arg ||= shift @options;
392	    open(STDOUT, ">$arg") or return "$arg: $!\n";
393	} elsif ($opt eq "d") {
394	    $nodefs = 1;
395	} elsif ($opt eq "r") {
396	    $raw = 1;
397	} elsif ($opt eq "D") {
398            $arg ||= shift @options;
399	    foreach $arg (split(//, $arg)) {
400		if ($arg eq "o") {
401		    B->debug(1);
402		} elsif ($arg eq "O") {
403		    $debug_op = 1;
404		} elsif ($arg eq "t") {
405		    $debug_top = 1;
406		}
407	    }
408	}
409    }
410    if (@options) {
411	return sub {
412	    my $objname;
413	    xref_definitions();
414	    foreach $objname (@options) {
415		$objname = "main::$objname" unless $objname =~ /::/;
416		eval "xref_object(\\&$objname)";
417		die "xref_object(\\&$objname) failed: $@" if $@;
418	    }
419	    output();
420	}
421    } else {
422	return sub {
423	    xref_definitions();
424	    xref_main();
425	    output();
426	}
427    }
428}
429
4301;
431