1#############################################################################
2# Pod/Find.pm -- finds files containing POD documentation
3#
4# Author: Marek Rouchal <marekr@cpan.org>
5#
6# Copyright (C) 1999-2000 by Marek Rouchal (and borrowing code
7# from Nick Ing-Simmon's PodToHtml). All rights reserved.
8# This file is part of "PodParser". Pod::Find is free software;
9# you can redistribute it and/or modify it under the same terms
10# as Perl itself.
11#############################################################################
12
13package Pod::Find;
14
15use vars qw($VERSION);
16$VERSION = 1.34;   ## Current version of this package
17require  5.005;   ## requires this Perl version or later
18use Carp;
19
20#############################################################################
21
22=head1 NAME
23
24Pod::Find - find POD documents in directory trees
25
26=head1 SYNOPSIS
27
28  use Pod::Find qw(pod_find simplify_name);
29  my %pods = pod_find({ -verbose => 1, -inc => 1 });
30  foreach(keys %pods) {
31     print "found library POD `$pods{$_}' in $_\n";
32  }
33
34  print "podname=",simplify_name('a/b/c/mymodule.pod'),"\n";
35
36  $location = pod_where( { -inc => 1 }, "Pod::Find" );
37
38=head1 DESCRIPTION
39
40B<Pod::Find> provides a set of functions to locate POD files.  Note that
41no function is exported by default to avoid pollution of your namespace,
42so be sure to specify them in the B<use> statement if you need them:
43
44  use Pod::Find qw(pod_find);
45
46From this version on the typical SCM (software configuration management)
47files/directories like RCS, CVS, SCCS, .svn are ignored.
48
49=cut
50
51use strict;
52#use diagnostics;
53use Exporter;
54use File::Spec;
55use File::Find;
56use Cwd;
57
58use vars qw(@ISA @EXPORT_OK $VERSION);
59@ISA = qw(Exporter);
60@EXPORT_OK = qw(&pod_find &simplify_name &pod_where &contains_pod);
61
62# package global variables
63my $SIMPLIFY_RX;
64
65=head2 C<pod_find( { %opts } , @directories )>
66
67The function B<pod_find> searches for POD documents in a given set of
68files and/or directories. It returns a hash with the file names as keys
69and the POD name as value. The POD name is derived from the file name
70and its position in the directory tree.
71
72E.g. when searching in F<$HOME/perl5lib>, the file
73F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>,
74whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be
75I<Myclass::Subclass>. The name information can be used for POD
76translators.
77
78Only text files containing at least one valid POD command are found.
79
80A warning is printed if more than one POD file with the same POD name
81is found, e.g. F<CPAN.pm> in different directories. This usually
82indicates duplicate occurrences of modules in the I<@INC> search path.
83
84B<OPTIONS> The first argument for B<pod_find> may be a hash reference
85with options. The rest are either directories that are searched
86recursively or files.  The POD names of files are the plain basenames
87with any Perl-like extension (.pm, .pl, .pod) stripped.
88
89=over 4
90
91=item C<-verbose =E<gt> 1>
92
93Print progress information while scanning.
94
95=item C<-perl =E<gt> 1>
96
97Apply Perl-specific heuristics to find the correct PODs. This includes
98stripping Perl-like extensions, omitting subdirectories that are numeric
99but do I<not> match the current Perl interpreter's version id, suppressing
100F<site_perl> as a module hierarchy name etc.
101
102=item C<-script =E<gt> 1>
103
104Search for PODs in the current Perl interpreter's installation
105B<scriptdir>. This is taken from the local L<Config|Config> module.
106
107=item C<-inc =E<gt> 1>
108
109Search for PODs in the current Perl interpreter's I<@INC> paths. This
110automatically considers paths specified in the C<PERL5LIB> environment
111as this is prepended to I<@INC> by the Perl interpreter itself.
112
113=back
114
115=cut
116
117# return a hash of the POD files found
118# first argument may be a hashref (options),
119# rest is a list of directories to search recursively
120sub pod_find
121{
122    my %opts;
123    if(ref $_[0]) {
124        %opts = %{shift()};
125    }
126
127    $opts{-verbose} ||= 0;
128    $opts{-perl}    ||= 0;
129
130    my (@search) = @_;
131
132    if($opts{-script}) {
133        require Config;
134        push(@search, $Config::Config{scriptdir})
135            if -d $Config::Config{scriptdir};
136        $opts{-perl} = 1;
137    }
138
139    if($opts{-inc}) {
140        if ($^O eq 'MacOS') {
141            # tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS
142            my @new_INC = @INC;
143            for (@new_INC) {
144                if ( $_ eq '.' ) {
145                    $_ = ':';
146                } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) {
147                    $_ = ':'. $_;
148                } else {
149                    $_ =~ s|^\./|:|;
150                }
151            }
152            push(@search, grep($_ ne File::Spec->curdir, @new_INC));
153        } else {
154            push(@search, grep($_ ne File::Spec->curdir, @INC));
155        }
156
157        $opts{-perl} = 1;
158    }
159
160    if($opts{-perl}) {
161        require Config;
162        # this code simplifies the POD name for Perl modules:
163        # * remove "site_perl"
164        # * remove e.g. "i586-linux" (from 'archname')
165        # * remove e.g. 5.00503
166        # * remove pod/ if followed by *.pod (e.g. in pod/perlfunc.pod)
167
168        # Mac OS:
169        # * remove ":?site_perl:"
170        # * remove :?pod: if followed by *.pod (e.g. in :pod:perlfunc.pod)
171
172        if ($^O eq 'MacOS') {
173            $SIMPLIFY_RX =
174              qq!^(?i:\:?site_perl\:|\:?pod\:(?=.*?\\.pod\\z))*!;
175        } else {
176            $SIMPLIFY_RX =
177              qq!^(?i:site(_perl)?/|\Q$Config::Config{archname}\E/|\\d+\\.\\d+([_.]?\\d+)?/|pod/(?=.*?\\.pod\\z))*!;
178        }
179    }
180
181    my %dirs_visited;
182    my %pods;
183    my %names;
184    my $pwd = cwd();
185
186    foreach my $try (@search) {
187        unless(File::Spec->file_name_is_absolute($try)) {
188            # make path absolute
189            $try = File::Spec->catfile($pwd,$try);
190        }
191        # simplify path
192        # on VMS canonpath will vmsify:[the.path], but File::Find::find
193        # wants /unixy/paths
194        $try = File::Spec->canonpath($try) if ($^O ne 'VMS');
195        $try = VMS::Filespec::unixify($try) if ($^O eq 'VMS');
196        my $name;
197        if(-f $try) {
198            if($name = _check_and_extract_name($try, $opts{-verbose})) {
199                _check_for_duplicates($try, $name, \%names, \%pods);
200            }
201            next;
202        }
203        my $root_rx = $^O eq 'MacOS' ? qq!^\Q$try\E! : qq!^\Q$try\E/!;
204        File::Find::find( sub {
205            my $item = $File::Find::name;
206            if(-d) {
207                if($item =~ m{/(?:RCS|CVS|SCCS|\.svn)$}) {
208                    $File::Find::prune = 1;
209                    return;
210                }
211                elsif($dirs_visited{$item}) {
212                    warn "Directory '$item' already seen, skipping.\n"
213                        if($opts{-verbose});
214                    $File::Find::prune = 1;
215                    return;
216                }
217                else {
218                    $dirs_visited{$item} = 1;
219                }
220                if($opts{-perl} && /^(\d+\.[\d_]+)\z/s && eval "$1" != $]) {
221                    $File::Find::prune = 1;
222                    warn "Perl $] version mismatch on $_, skipping.\n"
223                        if($opts{-verbose});
224                }
225                return;
226            }
227            if($name = _check_and_extract_name($item, $opts{-verbose}, $root_rx)) {
228                _check_for_duplicates($item, $name, \%names, \%pods);
229            }
230        }, $try); # end of File::Find::find
231    }
232    chdir $pwd;
233    %pods;
234}
235
236sub _check_for_duplicates {
237    my ($file, $name, $names_ref, $pods_ref) = @_;
238    if($$names_ref{$name}) {
239        warn "Duplicate POD found (shadowing?): $name ($file)\n";
240        warn "    Already seen in ",
241            join(' ', grep($$pods_ref{$_} eq $name, keys %$pods_ref)),"\n";
242    }
243    else {
244        $$names_ref{$name} = 1;
245    }
246    $$pods_ref{$file} = $name;
247}
248
249sub _check_and_extract_name {
250    my ($file, $verbose, $root_rx) = @_;
251
252    # check extension or executable flag
253    # this involves testing the .bat extension on Win32!
254    unless(-f $file && -T $file && ($file =~ /\.(pod|pm|plx?)\z/i || -x $file )) {
255      return undef;
256    }
257
258    return undef unless contains_pod($file,$verbose);
259
260    # strip non-significant path components
261    # TODO what happens on e.g. Win32?
262    my $name = $file;
263    if(defined $root_rx) {
264        $name =~ s!$root_rx!!s;
265        $name =~ s!$SIMPLIFY_RX!!os if(defined $SIMPLIFY_RX);
266    }
267    else {
268        if ($^O eq 'MacOS') {
269            $name =~ s/^.*://s;
270        } else {
271            $name =~ s:^.*/::s;
272        }
273    }
274    _simplify($name);
275    $name =~ s!/+!::!g; #/
276    if ($^O eq 'MacOS') {
277        $name =~ s!:+!::!g; # : -> ::
278    } else {
279        $name =~ s!/+!::!g; # / -> ::
280    }
281    $name;
282}
283
284=head2 C<simplify_name( $str )>
285
286The function B<simplify_name> is equivalent to B<basename>, but also
287strips Perl-like extensions (.pm, .pl, .pod) and extensions like
288F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively.
289
290=cut
291
292# basic simplification of the POD name:
293# basename & strip extension
294sub simplify_name {
295    my ($str) = @_;
296    # remove all path components
297    if ($^O eq 'MacOS') {
298        $str =~ s/^.*://s;
299    } else {
300        $str =~ s:^.*/::s;
301    }
302    _simplify($str);
303    $str;
304}
305
306# internal sub only
307sub _simplify {
308    # strip Perl's own extensions
309    $_[0] =~ s/\.(pod|pm|plx?)\z//i;
310    # strip meaningless extensions on Win32 and OS/2
311    $_[0] =~ s/\.(bat|exe|cmd)\z//i if($^O =~ /mswin|os2/i);
312    # strip meaningless extensions on VMS
313    $_[0] =~ s/\.(com)\z//i if($^O eq 'VMS');
314}
315
316# contribution from Tim Jenness <t.jenness@jach.hawaii.edu>
317
318=head2 C<pod_where( { %opts }, $pod )>
319
320Returns the location of a pod document given a search directory
321and a module (e.g. C<File::Find>) or script (e.g. C<perldoc>) name.
322
323Options:
324
325=over 4
326
327=item C<-inc =E<gt> 1>
328
329Search @INC for the pod and also the C<scriptdir> defined in the
330L<Config|Config> module.
331
332=item C<-dirs =E<gt> [ $dir1, $dir2, ... ]>
333
334Reference to an array of search directories. These are searched in order
335before looking in C<@INC> (if B<-inc>). Current directory is used if
336none are specified.
337
338=item C<-verbose =E<gt> 1>
339
340List directories as they are searched
341
342=back
343
344Returns the full path of the first occurrence to the file.
345Package names (eg 'A::B') are automatically converted to directory
346names in the selected directory. (eg on unix 'A::B' is converted to
347'A/B'). Additionally, '.pm', '.pl' and '.pod' are appended to the
348search automatically if required.
349
350A subdirectory F<pod/> is also checked if it exists in any of the given
351search directories. This ensures that e.g. L<perlfunc|perlfunc> is
352found.
353
354It is assumed that if a module name is supplied, that that name
355matches the file name. Pods are not opened to check for the 'NAME'
356entry.
357
358A check is made to make sure that the file that is found does
359contain some pod documentation.
360
361=cut
362
363sub pod_where {
364
365  # default options
366  my %options = (
367         '-inc' => 0,
368         '-verbose' => 0,
369         '-dirs' => [ File::Spec->curdir ],
370        );
371
372  # Check for an options hash as first argument
373  if (defined $_[0] && ref($_[0]) eq 'HASH') {
374    my $opt = shift;
375
376    # Merge default options with supplied options
377    %options = (%options, %$opt);
378  }
379
380  # Check usage
381  carp 'Usage: pod_where({options}, $pod)' unless (scalar(@_));
382
383  # Read argument
384  my $pod = shift;
385
386  # Split on :: and then join the name together using File::Spec
387  my @parts = split (/::/, $pod);
388
389  # Get full directory list
390  my @search_dirs = @{ $options{'-dirs'} };
391
392  if ($options{'-inc'}) {
393
394    require Config;
395
396    # Add @INC
397    if ($^O eq 'MacOS' && $options{'-inc'}) {
398        # tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS
399        my @new_INC = @INC;
400        for (@new_INC) {
401            if ( $_ eq '.' ) {
402                $_ = ':';
403            } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) {
404                $_ = ':'. $_;
405            } else {
406                $_ =~ s|^\./|:|;
407            }
408        }
409        push (@search_dirs, @new_INC);
410    } elsif ($options{'-inc'}) {
411        push (@search_dirs, @INC);
412    }
413
414    # Add location of pod documentation for perl man pages (eg perlfunc)
415    # This is a pod directory in the private install tree
416    #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'},
417    #					'pod');
418    #push (@search_dirs, $perlpoddir)
419    #  if -d $perlpoddir;
420
421    # Add location of binaries such as pod2text
422    push (@search_dirs, $Config::Config{'scriptdir'})
423      if -d $Config::Config{'scriptdir'};
424  }
425
426  warn "Search path is: ".join(' ', @search_dirs)."\n"
427        if $options{'-verbose'};
428
429  # Loop over directories
430  Dir: foreach my $dir ( @search_dirs ) {
431
432    # Don't bother if can't find the directory
433    if (-d $dir) {
434      warn "Looking in directory $dir\n"
435        if $options{'-verbose'};
436
437      # Now concatenate this directory with the pod we are searching for
438      my $fullname = File::Spec->catfile($dir, @parts);
439      warn "Filename is now $fullname\n"
440        if $options{'-verbose'};
441
442      # Loop over possible extensions
443      foreach my $ext ('', '.pod', '.pm', '.pl') {
444        my $fullext = $fullname . $ext;
445        if (-f $fullext &&
446         contains_pod($fullext, $options{'-verbose'}) ) {
447          warn "FOUND: $fullext\n" if $options{'-verbose'};
448          return $fullext;
449        }
450      }
451    } else {
452      warn "Directory $dir does not exist\n"
453        if $options{'-verbose'};
454      next Dir;
455    }
456    # for some strange reason the path on MacOS/darwin/cygwin is
457    # 'pods' not 'pod'
458    # this could be the case also for other systems that
459    # have a case-tolerant file system, but File::Spec
460    # does not recognize 'darwin' yet. And cygwin also has "pods",
461    # but is not case tolerant. Oh well...
462    if((File::Spec->case_tolerant || $^O =~ /macos|darwin|cygwin/i)
463     && -d File::Spec->catdir($dir,'pods')) {
464      $dir = File::Spec->catdir($dir,'pods');
465      redo Dir;
466    }
467    if(-d File::Spec->catdir($dir,'pod')) {
468      $dir = File::Spec->catdir($dir,'pod');
469      redo Dir;
470    }
471  }
472  # No match;
473  return undef;
474}
475
476=head2 C<contains_pod( $file , $verbose )>
477
478Returns true if the supplied filename (not POD module) contains some pod
479information.
480
481=cut
482
483sub contains_pod {
484  my $file = shift;
485  my $verbose = 0;
486  $verbose = shift if @_;
487
488  # check for one line of POD
489  unless(open(POD,"<$file")) {
490    warn "Error: $file is unreadable: $!\n";
491    return undef;
492  }
493
494  local $/ = undef;
495  my $pod = <POD>;
496  close(POD) || die "Error closing $file: $!\n";
497  unless($pod =~ /^=(head\d|pod|over|item)\b/m) {
498    warn "No POD in $file, skipping.\n"
499      if($verbose);
500    return 0;
501  }
502
503  return 1;
504}
505
506=head1 AUTHOR
507
508Please report bugs using L<http://rt.cpan.org>.
509
510Marek Rouchal E<lt>marekr@cpan.orgE<gt>,
511heavily borrowing code from Nick Ing-Simmons' PodToHtml.
512
513Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt> provided
514C<pod_where> and C<contains_pod>.
515
516=head1 SEE ALSO
517
518L<Pod::Parser>, L<Pod::Checker>, L<perldoc>
519
520=cut
521
5221;
523
524