1############################################################################# 2# Pod/Select.pm -- function to select portions of POD docs 3# 4# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::Select; 11 12use vars qw($VERSION); 13$VERSION = 1.30; ## Current version of this package 14require 5.005; ## requires this Perl version or later 15 16############################################################################# 17 18=head1 NAME 19 20Pod::Select, podselect() - extract selected sections of POD from input 21 22=head1 SYNOPSIS 23 24 use Pod::Select; 25 26 ## Select all the POD sections for each file in @filelist 27 ## and print the result on standard output. 28 podselect(@filelist); 29 30 ## Same as above, but write to tmp.out 31 podselect({-output => "tmp.out"}, @filelist): 32 33 ## Select from the given filelist, only those POD sections that are 34 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. 35 podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist): 36 37 ## Select the "DESCRIPTION" section of the PODs from STDIN and write 38 ## the result to STDERR. 39 podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN); 40 41or 42 43 use Pod::Select; 44 45 ## Create a parser object for selecting POD sections from the input 46 $parser = new Pod::Select(); 47 48 ## Select all the POD sections for each file in @filelist 49 ## and print the result to tmp.out. 50 $parser->parse_from_file("<&STDIN", "tmp.out"); 51 52 ## Select from the given filelist, only those POD sections that are 53 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. 54 $parser->select("NAME|SYNOPSIS", "OPTIONS"); 55 for (@filelist) { $parser->parse_from_file($_); } 56 57 ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from 58 ## STDIN and write the result to STDERR. 59 $parser->select("DESCRIPTION"); 60 $parser->add_selection("SEE ALSO"); 61 $parser->parse_from_filehandle(\*STDIN, \*STDERR); 62 63=head1 REQUIRES 64 65perl5.005, Pod::Parser, Exporter, Carp 66 67=head1 EXPORTS 68 69podselect() 70 71=head1 DESCRIPTION 72 73B<podselect()> is a function which will extract specified sections of 74pod documentation from an input stream. This ability is provided by the 75B<Pod::Select> module which is a subclass of B<Pod::Parser>. 76B<Pod::Select> provides a method named B<select()> to specify the set of 77POD sections to select for processing/printing. B<podselect()> merely 78creates a B<Pod::Select> object and then invokes the B<podselect()> 79followed by B<parse_from_file()>. 80 81=head1 SECTION SPECIFICATIONS 82 83B<podselect()> and B<Pod::Select::select()> may be given one or more 84"section specifications" to restrict the text processed to only the 85desired set of sections and their corresponding subsections. A section 86specification is a string containing one or more Perl-style regular 87expressions separated by forward slashes ("/"). If you need to use a 88forward slash literally within a section title you can escape it with a 89backslash ("\/"). 90 91The formal syntax of a section specification is: 92 93=over 4 94 95=item * 96 97I<head1-title-regex>/I<head2-title-regex>/... 98 99=back 100 101Any omitted or empty regular expressions will default to ".*". 102Please note that each regular expression given is implicitly 103anchored by adding "^" and "$" to the beginning and end. Also, if a 104given regular expression starts with a "!" character, then the 105expression is I<negated> (so C<!foo> would match anything I<except> 106C<foo>). 107 108Some example section specifications follow. 109 110=over 4 111 112=item * 113 114Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections: 115 116C<NAME|SYNOPSIS> 117 118=item * 119 120Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION> 121section: 122 123C<DESCRIPTION/Question|Answer> 124 125=item * 126 127Match the C<Comments> subsection of I<all> sections: 128 129C</Comments> 130 131=item * 132 133Match all subsections of C<DESCRIPTION> I<except> for C<Comments>: 134 135C<DESCRIPTION/!Comments> 136 137=item * 138 139Match the C<DESCRIPTION> section but do I<not> match any of its subsections: 140 141C<DESCRIPTION/!.+> 142 143=item * 144 145Match all top level sections but none of their subsections: 146 147C</!.+> 148 149=back 150 151=begin _NOT_IMPLEMENTED_ 152 153=head1 RANGE SPECIFICATIONS 154 155B<podselect()> and B<Pod::Select::select()> may be given one or more 156"range specifications" to restrict the text processed to only the 157desired ranges of paragraphs in the desired set of sections. A range 158specification is a string containing a single Perl-style regular 159expression (a regex), or else two Perl-style regular expressions 160(regexs) separated by a ".." (Perl's "range" operator is ".."). 161The regexs in a range specification are delimited by forward slashes 162("/"). If you need to use a forward slash literally within a regex you 163can escape it with a backslash ("\/"). 164 165The formal syntax of a range specification is: 166 167=over 4 168 169=item * 170 171/I<start-range-regex>/[../I<end-range-regex>/] 172 173=back 174 175Where each the item inside square brackets (the ".." followed by the 176end-range-regex) is optional. Each "range-regex" is of the form: 177 178 =cmd-expr text-expr 179 180Where I<cmd-expr> is intended to match the name of one or more POD 181commands, and I<text-expr> is intended to match the paragraph text for 182the command. If a range-regex is supposed to match a POD command, then 183the first character of the regex (the one after the initial '/') 184absolutely I<must> be a single '=' character; it may not be anything 185else (not even a regex meta-character) if it is supposed to match 186against the name of a POD command. 187 188If no I<=cmd-expr> is given then the text-expr will be matched against 189plain textblocks unless it is preceded by a space, in which case it is 190matched against verbatim text-blocks. If no I<text-expr> is given then 191only the command-portion of the paragraph is matched against. 192 193Note that these two expressions are each implicitly anchored. This 194means that when matching against the command-name, there will be an 195implicit '^' and '$' around the given I<=cmd-expr>; and when matching 196against the paragraph text there will be an implicit '\A' and '\Z' 197around the given I<text-expr>. 198 199Unlike with section-specs, the '!' character does I<not> have any special 200meaning (negation or otherwise) at the beginning of a range-spec! 201 202Some example range specifications follow. 203 204=over 4 205 206=item 207Match all C<=for html> paragraphs: 208 209C</=for html/> 210 211=item 212Match all paragraphs between C<=begin html> and C<=end html> 213(note that this will I<not> work correctly if such sections 214are nested): 215 216C</=begin html/../=end html/> 217 218=item 219Match all paragraphs between the given C<=item> name until the end of the 220current section: 221 222C</=item mine/../=head\d/> 223 224=item 225Match all paragraphs between the given C<=item> until the next item, or 226until the end of the itemized list (note that this will I<not> work as 227desired if the item contains an itemized list nested within it): 228 229C</=item mine/../=(item|back)/> 230 231=back 232 233=end _NOT_IMPLEMENTED_ 234 235=cut 236 237############################################################################# 238 239use strict; 240#use diagnostics; 241use Carp; 242use Pod::Parser 1.04; 243use vars qw(@ISA @EXPORT $MAX_HEADING_LEVEL); 244 245@ISA = qw(Pod::Parser); 246@EXPORT = qw(&podselect); 247 248## Maximum number of heading levels supported for '=headN' directives 249*MAX_HEADING_LEVEL = \3; 250 251############################################################################# 252 253=head1 OBJECT METHODS 254 255The following methods are provided in this module. Each one takes a 256reference to the object itself as an implicit first parameter. 257 258=cut 259 260##--------------------------------------------------------------------------- 261 262## =begin _PRIVATE_ 263## 264## =head1 B<_init_headings()> 265## 266## Initialize the current set of active section headings. 267## 268## =cut 269## 270## =end _PRIVATE_ 271 272use vars qw(%myData @section_headings); 273 274sub _init_headings { 275 my $self = shift; 276 local *myData = $self; 277 278 ## Initialize current section heading titles if necessary 279 unless (defined $myData{_SECTION_HEADINGS}) { 280 local *section_headings = $myData{_SECTION_HEADINGS} = []; 281 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 282 $section_headings[$i] = ''; 283 } 284 } 285} 286 287##--------------------------------------------------------------------------- 288 289=head1 B<curr_headings()> 290 291 ($head1, $head2, $head3, ...) = $parser->curr_headings(); 292 $head1 = $parser->curr_headings(1); 293 294This method returns a list of the currently active section headings and 295subheadings in the document being parsed. The list of headings returned 296corresponds to the most recently parsed paragraph of the input. 297 298If an argument is given, it must correspond to the desired section 299heading number, in which case only the specified section heading is 300returned. If there is no current section heading at the specified 301level, then C<undef> is returned. 302 303=cut 304 305sub curr_headings { 306 my $self = shift; 307 $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS}); 308 my @headings = @{ $self->{_SECTION_HEADINGS} }; 309 return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings; 310} 311 312##--------------------------------------------------------------------------- 313 314=head1 B<select()> 315 316 $parser->select($section_spec1,$section_spec2,...); 317 318This method is used to select the particular sections and subsections of 319POD documentation that are to be printed and/or processed. The existing 320set of selected sections is I<replaced> with the given set of sections. 321See B<add_selection()> for adding to the current set of selected 322sections. 323 324Each of the C<$section_spec> arguments should be a section specification 325as described in L<"SECTION SPECIFICATIONS">. The section specifications 326are parsed by this method and the resulting regular expressions are 327stored in the invoking object. 328 329If no C<$section_spec> arguments are given, then the existing set of 330selected sections is cleared out (which means C<all> sections will be 331processed). 332 333This method should I<not> normally be overridden by subclasses. 334 335=cut 336 337use vars qw(@selected_sections); 338 339sub select { 340 my $self = shift; 341 my @sections = @_; 342 local *myData = $self; 343 local $_; 344 345### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?) 346 347 ##--------------------------------------------------------------------- 348 ## The following is a blatant hack for backward compatibility, and for 349 ## implementing add_selection(). If the *first* *argument* is the 350 ## string "+", then the remaining section specifications are *added* 351 ## to the current set of selections; otherwise the given section 352 ## specifications will *replace* the current set of selections. 353 ## 354 ## This should probably be fixed someday, but for the present time, 355 ## it seems incredibly unlikely that "+" would ever correspond to 356 ## a legitimate section heading 357 ##--------------------------------------------------------------------- 358 my $add = ($sections[0] eq "+") ? shift(@sections) : ""; 359 360 ## Reset the set of sections to use 361 unless (@sections > 0) { 362 delete $myData{_SELECTED_SECTIONS} unless ($add); 363 return; 364 } 365 $myData{_SELECTED_SECTIONS} = [] 366 unless ($add && exists $myData{_SELECTED_SECTIONS}); 367 local *selected_sections = $myData{_SELECTED_SECTIONS}; 368 369 ## Compile each spec 370 my $spec; 371 for $spec (@sections) { 372 if ( defined($_ = &_compile_section_spec($spec)) ) { 373 ## Store them in our sections array 374 push(@selected_sections, $_); 375 } 376 else { 377 carp "Ignoring section spec \"$spec\"!\n"; 378 } 379 } 380} 381 382##--------------------------------------------------------------------------- 383 384=head1 B<add_selection()> 385 386 $parser->add_selection($section_spec1,$section_spec2,...); 387 388This method is used to add to the currently selected sections and 389subsections of POD documentation that are to be printed and/or 390processed. See <select()> for replacing the currently selected sections. 391 392Each of the C<$section_spec> arguments should be a section specification 393as described in L<"SECTION SPECIFICATIONS">. The section specifications 394are parsed by this method and the resulting regular expressions are 395stored in the invoking object. 396 397This method should I<not> normally be overridden by subclasses. 398 399=cut 400 401sub add_selection { 402 my $self = shift; 403 $self->select("+", @_); 404} 405 406##--------------------------------------------------------------------------- 407 408=head1 B<clear_selections()> 409 410 $parser->clear_selections(); 411 412This method takes no arguments, it has the exact same effect as invoking 413<select()> with no arguments. 414 415=cut 416 417sub clear_selections { 418 my $self = shift; 419 $self->select(); 420} 421 422##--------------------------------------------------------------------------- 423 424=head1 B<match_section()> 425 426 $boolean = $parser->match_section($heading1,$heading2,...); 427 428Returns a value of true if the given section and subsection heading 429titles match any of the currently selected section specifications in 430effect from prior calls to B<select()> and B<add_selection()> (or if 431there are no explictly selected/deselected sections). 432 433The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of 434the corresponding sections, subsections, etc. to try and match. If 435C<$headingN> is omitted then it defaults to the current corresponding 436section heading title in the input. 437 438This method should I<not> normally be overridden by subclasses. 439 440=cut 441 442sub match_section { 443 my $self = shift; 444 my (@headings) = @_; 445 local *myData = $self; 446 447 ## Return true if no restrictions were explicitly specified 448 my $selections = (exists $myData{_SELECTED_SECTIONS}) 449 ? $myData{_SELECTED_SECTIONS} : undef; 450 return 1 unless ((defined $selections) && (@{$selections} > 0)); 451 452 ## Default any unspecified sections to the current one 453 my @current_headings = $self->curr_headings(); 454 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 455 (defined $headings[$i]) or $headings[$i] = $current_headings[$i]; 456 } 457 458 ## Look for a match against the specified section expressions 459 my ($section_spec, $regex, $negated, $match); 460 for $section_spec ( @{$selections} ) { 461 ##------------------------------------------------------ 462 ## Each portion of this spec must match in order for 463 ## the spec to be matched. So we will start with a 464 ## match-value of 'true' and logically 'and' it with 465 ## the results of matching a given element of the spec. 466 ##------------------------------------------------------ 467 $match = 1; 468 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 469 $regex = $section_spec->[$i]; 470 $negated = ($regex =~ s/^\!//); 471 $match &= ($negated ? ($headings[$i] !~ /${regex}/) 472 : ($headings[$i] =~ /${regex}/)); 473 last unless ($match); 474 } 475 return 1 if ($match); 476 } 477 return 0; ## no match 478} 479 480##--------------------------------------------------------------------------- 481 482=head1 B<is_selected()> 483 484 $boolean = $parser->is_selected($paragraph); 485 486This method is used to determine if the block of text given in 487C<$paragraph> falls within the currently selected set of POD sections 488and subsections to be printed or processed. This method is also 489responsible for keeping track of the current input section and 490subsections. It is assumed that C<$paragraph> is the most recently read 491(but not yet processed) input paragraph. 492 493The value returned will be true if the C<$paragraph> and the rest of the 494text in the same section as C<$paragraph> should be selected (included) 495for processing; otherwise a false value is returned. 496 497=cut 498 499sub is_selected { 500 my ($self, $paragraph) = @_; 501 local $_; 502 local *myData = $self; 503 504 $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS}); 505 506 ## Keep track of current sections levels and headings 507 $_ = $paragraph; 508 if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*)\s*$/) 509 { 510 ## This is a section heading command 511 my ($level, $heading) = ($2, $3); 512 $level = 1 + (length($1) / 3) if ((! length $level) || (length $1)); 513 ## Reset the current section heading at this level 514 $myData{_SECTION_HEADINGS}->[$level - 1] = $heading; 515 ## Reset subsection headings of this one to empty 516 for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) { 517 $myData{_SECTION_HEADINGS}->[$i] = ''; 518 } 519 } 520 521 return $self->match_section(); 522} 523 524############################################################################# 525 526=head1 EXPORTED FUNCTIONS 527 528The following functions are exported by this module. Please note that 529these are functions (not methods) and therefore C<do not> take an 530implicit first argument. 531 532=cut 533 534##--------------------------------------------------------------------------- 535 536=head1 B<podselect()> 537 538 podselect(\%options,@filelist); 539 540B<podselect> will print the raw (untranslated) POD paragraphs of all 541POD sections in the given input files specified by C<@filelist> 542according to the given options. 543 544If any argument to B<podselect> is a reference to a hash 545(associative array) then the values with the following keys are 546processed as follows: 547 548=over 4 549 550=item B<-output> 551 552A string corresponding to the desired output file (or ">&STDOUT" 553or ">&STDERR"). The default is to use standard output. 554 555=item B<-sections> 556 557A reference to an array of sections specifications (as described in 558L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD 559sections and subsections to be selected from input. If no section 560specifications are given, then all sections of the PODs are used. 561 562=begin _NOT_IMPLEMENTED_ 563 564=item B<-ranges> 565 566A reference to an array of range specifications (as described in 567L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD 568paragraphs to be selected from the desired input sections. If no range 569specifications are given, then all paragraphs of the desired sections 570are used. 571 572=end _NOT_IMPLEMENTED_ 573 574=back 575 576All other arguments should correspond to the names of input files 577containing POD sections. A file name of "-" or "<&STDIN" will 578be interpeted to mean standard input (which is the default if no 579filenames are given). 580 581=cut 582 583sub podselect { 584 my(@argv) = @_; 585 my %defaults = (); 586 my $pod_parser = new Pod::Select(%defaults); 587 my $num_inputs = 0; 588 my $output = ">&STDOUT"; 589 my %opts; 590 local $_; 591 for (@argv) { 592 if (ref($_)) { 593 next unless (ref($_) eq 'HASH'); 594 %opts = (%defaults, %{$_}); 595 596 ##------------------------------------------------------------- 597 ## Need this for backward compatibility since we formerly used 598 ## options that were all uppercase words rather than ones that 599 ## looked like Unix command-line options. 600 ## to be uppercase keywords) 601 ##------------------------------------------------------------- 602 %opts = map { 603 my ($key, $val) = (lc $_, $opts{$_}); 604 $key =~ s/^(?=\w)/-/; 605 $key =~ /^-se[cl]/ and $key = '-sections'; 606 #! $key eq '-range' and $key .= 's'; 607 ($key => $val); 608 } (keys %opts); 609 610 ## Process the options 611 (exists $opts{'-output'}) and $output = $opts{'-output'}; 612 613 ## Select the desired sections 614 $pod_parser->select(@{ $opts{'-sections'} }) 615 if ( (defined $opts{'-sections'}) 616 && ((ref $opts{'-sections'}) eq 'ARRAY') ); 617 618 #! ## Select the desired paragraph ranges 619 #! $pod_parser->select(@{ $opts{'-ranges'} }) 620 #! if ( (defined $opts{'-ranges'}) 621 #! && ((ref $opts{'-ranges'}) eq 'ARRAY') ); 622 } 623 else { 624 $pod_parser->parse_from_file($_, $output); 625 ++$num_inputs; 626 } 627 } 628 $pod_parser->parse_from_file("-") unless ($num_inputs > 0); 629} 630 631############################################################################# 632 633=head1 PRIVATE METHODS AND DATA 634 635B<Pod::Select> makes uses a number of internal methods and data fields 636which clients should not need to see or use. For the sake of avoiding 637name collisions with client data and methods, these methods and fields 638are briefly discussed here. Determined hackers may obtain further 639information about them by reading the B<Pod::Select> source code. 640 641Private data fields are stored in the hash-object whose reference is 642returned by the B<new()> constructor for this class. The names of all 643private methods and data-fields used by B<Pod::Select> begin with a 644prefix of "_" and match the regular expression C</^_\w+$/>. 645 646=cut 647 648##--------------------------------------------------------------------------- 649 650=begin _PRIVATE_ 651 652=head1 B<_compile_section_spec()> 653 654 $listref = $parser->_compile_section_spec($section_spec); 655 656This function (note it is a function and I<not> a method) takes a 657section specification (as described in L<"SECTION SPECIFICATIONS">) 658given in C<$section_sepc>, and compiles it into a list of regular 659expressions. If C<$section_spec> has no syntax errors, then a reference 660to the list (array) of corresponding regular expressions is returned; 661otherwise C<undef> is returned and an error message is printed (using 662B<carp>) for each invalid regex. 663 664=end _PRIVATE_ 665 666=cut 667 668sub _compile_section_spec { 669 my ($section_spec) = @_; 670 my (@regexs, $negated); 671 672 ## Compile the spec into a list of regexs 673 local $_ = $section_spec; 674 s|\\\\|\001|g; ## handle escaped backward slashes 675 s|\\/|\002|g; ## handle escaped forward slashes 676 677 ## Parse the regexs for the heading titles 678 @regexs = split('/', $_, $MAX_HEADING_LEVEL); 679 680 ## Set default regex for ommitted levels 681 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 682 $regexs[$i] = '.*' unless ((defined $regexs[$i]) 683 && (length $regexs[$i])); 684 } 685 ## Modify the regexs as needed and validate their syntax 686 my $bad_regexs = 0; 687 for (@regexs) { 688 $_ .= '.+' if ($_ eq '!'); 689 s|\001|\\\\|g; ## restore escaped backward slashes 690 s|\002|\\/|g; ## restore escaped forward slashes 691 $negated = s/^\!//; ## check for negation 692 eval "/$_/"; ## check regex syntax 693 if ($@) { 694 ++$bad_regexs; 695 carp "Bad regular expression /$_/ in \"$section_spec\": $@\n"; 696 } 697 else { 698 ## Add the forward and rear anchors (and put the negator back) 699 $_ = '^' . $_ unless (/^\^/); 700 $_ = $_ . '$' unless (/\$$/); 701 $_ = '!' . $_ if ($negated); 702 } 703 } 704 return (! $bad_regexs) ? [ @regexs ] : undef; 705} 706 707##--------------------------------------------------------------------------- 708 709=begin _PRIVATE_ 710 711=head2 $self->{_SECTION_HEADINGS} 712 713A reference to an array of the current section heading titles for each 714heading level (note that the first heading level title is at index 0). 715 716=end _PRIVATE_ 717 718=cut 719 720##--------------------------------------------------------------------------- 721 722=begin _PRIVATE_ 723 724=head2 $self->{_SELECTED_SECTIONS} 725 726A reference to an array of references to arrays. Each subarray is a list 727of anchored regular expressions (preceded by a "!" if the expression is to 728be negated). The index of the expression in the subarray should correspond 729to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}> 730that it is to be matched against. 731 732=end _PRIVATE_ 733 734=cut 735 736############################################################################# 737 738=head1 SEE ALSO 739 740L<Pod::Parser> 741 742=head1 AUTHOR 743 744Please report bugs using L<http://rt.cpan.org>. 745 746Brad Appleton E<lt>bradapp@enteract.comE<gt> 747 748Based on code for B<pod2text> written by 749Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> 750 751=cut 752 7531; 754# vim: ts=4 sw=4 et 755