1############################################################################# 2# Pod/Checker.pm -- check pod documents for syntax errors 3# 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::Checker; 11 12use vars qw($VERSION); 13$VERSION = 1.43; ## Current version of this package 14require 5.005; ## requires this Perl version or later 15 16use Pod::ParseUtils; ## for hyperlinks and lists 17 18=head1 NAME 19 20Pod::Checker, podchecker() - check pod documents for syntax errors 21 22=head1 SYNOPSIS 23 24 use Pod::Checker; 25 26 $syntax_okay = podchecker($filepath, $outputpath, %options); 27 28 my $checker = new Pod::Checker %options; 29 $checker->parse_from_file($filepath, \*STDERR); 30 31=head1 OPTIONS/ARGUMENTS 32 33C<$filepath> is the input POD to read and C<$outputpath> is 34where to write POD syntax error messages. Either argument may be a scalar 35indicating a file-path, or else a reference to an open filehandle. 36If unspecified, the input-file it defaults to C<\*STDIN>, and 37the output-file defaults to C<\*STDERR>. 38 39=head2 podchecker() 40 41This function can take a hash of options: 42 43=over 4 44 45=item B<-warnings> =E<gt> I<val> 46 47Turn warnings on/off. I<val> is usually 1 for on, but higher values 48trigger additional warnings. See L<"Warnings">. 49 50=back 51 52=head1 DESCRIPTION 53 54B<podchecker> will perform syntax checking of Perl5 POD format documentation. 55 56Curious/ambitious users are welcome to propose additional features they wish 57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are 58consistent with L<perlpod>. 59 60The following checks are currently performed: 61 62=over 4 63 64=item * 65 66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences, 67and unterminated interior sequences. 68 69=item * 70 71Check for proper balancing of C<=begin> and C<=end>. The contents of such 72a block are generally ignored, i.e. no syntax checks are performed. 73 74=item * 75 76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. 77 78=item * 79 80Check for same nested interior-sequences (e.g. 81C<LE<lt>...LE<lt>...E<gt>...E<gt>>). 82 83=item * 84 85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>. 86 87=item * 88 89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod> 90for details. 91 92=item * 93 94Check for unresolved document-internal links. This check may also reveal 95misspelled links that seem to be internal links but should be links 96to something else. 97 98=back 99 100=head1 DIAGNOSTICS 101 102=head2 Errors 103 104=over 4 105 106=item * empty =headn 107 108A heading (C<=head1> or C<=head2>) without any text? That ain't no 109heading! 110 111=item * =over on line I<N> without closing =back 112 113The C<=over> command does not have a corresponding C<=back> before the 114next heading (C<=head1> or C<=head2>) or the end of the file. 115 116=item * =item without previous =over 117 118=item * =back without previous =over 119 120An C<=item> or C<=back> command has been found outside a 121C<=over>/C<=back> block. 122 123=item * No argument for =begin 124 125A C<=begin> command was found that is not followed by the formatter 126specification. 127 128=item * =end without =begin 129 130A standalone C<=end> command was found. 131 132=item * Nested =begin's 133 134There were at least two consecutive C<=begin> commands without 135the corresponding C<=end>. Only one C<=begin> may be active at 136a time. 137 138=item * =for without formatter specification 139 140There is no specification of the formatter after the C<=for> command. 141 142=item * unresolved internal link I<NAME> 143 144The given link to I<NAME> does not have a matching node in the current 145POD. This also happend when a single word node name is not enclosed in 146C<"">. 147 148=item * Unknown command "I<CMD>" 149 150An invalid POD command has been found. Valid are C<=head1>, C<=head2>, 151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, 152C<=for>, C<=pod>, C<=cut> 153 154=item * Unknown interior-sequence "I<SEQ>" 155 156An invalid markup command has been encountered. Valid are: 157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>, 158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>, 159C<ZE<lt>E<gt>> 160 161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt> 162 163Two nested identical markup commands have been found. Generally this 164does not make sense. 165 166=item * garbled entity I<STRING> 167 168The I<STRING> found cannot be interpreted as a character entity. 169 170=item * Entity number out of range 171 172An entity specified by number (dec, hex, oct) is out of range (1-255). 173 174=item * malformed link LE<lt>E<gt> 175 176The link found cannot be parsed because it does not conform to the 177syntax described in L<perlpod>. 178 179=item * nonempty ZE<lt>E<gt> 180 181The C<ZE<lt>E<gt>> sequence is supposed to be empty. 182 183=item * empty XE<lt>E<gt> 184 185The index entry specified contains nothing but whitespace. 186 187=item * Spurious text after =pod / =cut 188 189The commands C<=pod> and C<=cut> do not take any arguments. 190 191=item * Spurious character(s) after =back 192 193The C<=back> command does not take any arguments. 194 195=back 196 197=head2 Warnings 198 199These may not necessarily cause trouble, but indicate mediocre style. 200 201=over 4 202 203=item * multiple occurrence of link target I<name> 204 205The POD file has some C<=item> and/or C<=head> commands that have 206the same text. Potential hyperlinks to such a text cannot be unique then. 207This warning is printed only with warning level greater than one. 208 209=item * line containing nothing but whitespace in paragraph 210 211There is some whitespace on a seemingly empty line. POD is very sensitive 212to such things, so this is flagged. B<vi> users switch on the B<list> 213option to avoid this problem. 214 215=begin _disabled_ 216 217=item * file does not start with =head 218 219The file starts with a different POD directive than head. 220This is most probably something you do not want. 221 222=end _disabled_ 223 224=item * previous =item has no contents 225 226There is a list C<=item> right above the flagged line that has no 227text contents. You probably want to delete empty items. 228 229=item * preceding non-item paragraph(s) 230 231A list introduced by C<=over> starts with a text or verbatim paragraph, 232but continues with C<=item>s. Move the non-item paragraph out of the 233C<=over>/C<=back> block. 234 235=item * =item type mismatch (I<one> vs. I<two>) 236 237A list started with e.g. a bulletted C<=item> and continued with a 238numbered one. This is obviously inconsistent. For most translators the 239type of the I<first> C<=item> determines the type of the list. 240 241=item * I<N> unescaped C<E<lt>E<gt>> in paragraph 242 243Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> 244can potentially cause errors as they could be misinterpreted as 245markup commands. This is only printed when the -warnings level is 246greater than 1. 247 248=item * Unknown entity 249 250A character entity was found that does not belong to the standard 251ISO set or the POD specials C<verbar> and C<sol>. 252 253=item * No items in =over 254 255The list opened with C<=over> does not contain any items. 256 257=item * No argument for =item 258 259C<=item> without any parameters is deprecated. It should either be followed 260by C<*> to indicate an unordered list, by a number (optionally followed 261by a dot) to indicate an ordered (numbered) list or simple text for a 262definition list. 263 264=item * empty section in previous paragraph 265 266The previous section (introduced by a C<=head> command) does not contain 267any text. This usually indicates that something is missing. Note: A 268C<=head1> followed immediately by C<=head2> does not trigger this warning. 269 270=item * Verbatim paragraph in NAME section 271 272The NAME section (C<=head1 NAME>) should consist of a single paragraph 273with the script/module name, followed by a dash `-' and a very short 274description of what the thing is good for. 275 276=item * =headI<n> without preceding higher level 277 278For example if there is a C<=head2> in the POD file prior to a 279C<=head1>. 280 281=back 282 283=head2 Hyperlinks 284 285There are some warnings wrt. malformed hyperlinks. 286 287=over 4 288 289=item * ignoring leading/trailing whitespace in link 290 291There is whitespace at the beginning or the end of the contents of 292LE<lt>...E<gt>. 293 294=item * (section) in '$page' deprecated 295 296There is a section detected in the page name of LE<lt>...E<gt>, e.g. 297C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only. 298Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able 299to expand this to appropriate code. For links to (builtin) functions, 300please say C<LE<lt>perlfunc/mkdirE<gt>>, without (). 301 302=item * alternative text/node '%s' contains non-escaped | or / 303 304The characters C<|> and C</> are special in the LE<lt>...E<gt> context. 305Although the hyperlink parser does its best to determine which "/" is 306text and which is a delimiter in case of doubt, one ought to escape 307these literal characters like this: 308 309 / E<sol> 310 | E<verbar> 311 312=back 313 314=head1 RETURN VALUE 315 316B<podchecker> returns the number of POD syntax errors found or -1 if 317there were no POD commands at all found in the file. 318 319=head1 EXAMPLES 320 321See L</SYNOPSIS> 322 323=head1 INTERFACE 324 325While checking, this module collects document properties, e.g. the nodes 326for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>). 327POD translators can use this feature to syntax-check and get the nodes in 328a first pass before actually starting to convert. This is expensive in terms 329of execution time, but allows for very robust conversions. 330 331Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror> 332method to print errors and warnings. The summary output (e.g. 333"Pod syntax OK") has been dropped from the module and has been included in 334B<podchecker> (the script). This allows users of B<Pod::Checker> to 335control completely the output behaviour. Users of B<podchecker> (the script) 336get the well-known behaviour. 337 338=cut 339 340############################################################################# 341 342use strict; 343#use diagnostics; 344use Carp; 345use Exporter; 346use Pod::Parser; 347 348use vars qw(@ISA @EXPORT); 349@ISA = qw(Pod::Parser); 350@EXPORT = qw(&podchecker); 351 352use vars qw(%VALID_COMMANDS %VALID_SEQUENCES); 353 354my %VALID_COMMANDS = ( 355 'pod' => 1, 356 'cut' => 1, 357 'head1' => 1, 358 'head2' => 1, 359 'head3' => 1, 360 'head4' => 1, 361 'over' => 1, 362 'back' => 1, 363 'item' => 1, 364 'for' => 1, 365 'begin' => 1, 366 'end' => 1, 367); 368 369my %VALID_SEQUENCES = ( 370 'I' => 1, 371 'B' => 1, 372 'S' => 1, 373 'C' => 1, 374 'L' => 1, 375 'F' => 1, 376 'X' => 1, 377 'Z' => 1, 378 'E' => 1, 379); 380 381# stolen from HTML::Entities 382my %ENTITIES = ( 383 # Some normal chars that have special meaning in SGML context 384 amp => '&', # ampersand 385'gt' => '>', # greater than 386'lt' => '<', # less than 387 quot => '"', # double quote 388 389 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML 390 AElig => '�', # capital AE diphthong (ligature) 391 Aacute => '�', # capital A, acute accent 392 Acirc => '�', # capital A, circumflex accent 393 Agrave => '�', # capital A, grave accent 394 Aring => '�', # capital A, ring 395 Atilde => '�', # capital A, tilde 396 Auml => '�', # capital A, dieresis or umlaut mark 397 Ccedil => '�', # capital C, cedilla 398 ETH => '�', # capital Eth, Icelandic 399 Eacute => '�', # capital E, acute accent 400 Ecirc => '�', # capital E, circumflex accent 401 Egrave => '�', # capital E, grave accent 402 Euml => '�', # capital E, dieresis or umlaut mark 403 Iacute => '�', # capital I, acute accent 404 Icirc => '�', # capital I, circumflex accent 405 Igrave => '�', # capital I, grave accent 406 Iuml => '�', # capital I, dieresis or umlaut mark 407 Ntilde => '�', # capital N, tilde 408 Oacute => '�', # capital O, acute accent 409 Ocirc => '�', # capital O, circumflex accent 410 Ograve => '�', # capital O, grave accent 411 Oslash => '�', # capital O, slash 412 Otilde => '�', # capital O, tilde 413 Ouml => '�', # capital O, dieresis or umlaut mark 414 THORN => '�', # capital THORN, Icelandic 415 Uacute => '�', # capital U, acute accent 416 Ucirc => '�', # capital U, circumflex accent 417 Ugrave => '�', # capital U, grave accent 418 Uuml => '�', # capital U, dieresis or umlaut mark 419 Yacute => '�', # capital Y, acute accent 420 aacute => '�', # small a, acute accent 421 acirc => '�', # small a, circumflex accent 422 aelig => '�', # small ae diphthong (ligature) 423 agrave => '�', # small a, grave accent 424 aring => '�', # small a, ring 425 atilde => '�', # small a, tilde 426 auml => '�', # small a, dieresis or umlaut mark 427 ccedil => '�', # small c, cedilla 428 eacute => '�', # small e, acute accent 429 ecirc => '�', # small e, circumflex accent 430 egrave => '�', # small e, grave accent 431 eth => '�', # small eth, Icelandic 432 euml => '�', # small e, dieresis or umlaut mark 433 iacute => '�', # small i, acute accent 434 icirc => '�', # small i, circumflex accent 435 igrave => '�', # small i, grave accent 436 iuml => '�', # small i, dieresis or umlaut mark 437 ntilde => '�', # small n, tilde 438 oacute => '�', # small o, acute accent 439 ocirc => '�', # small o, circumflex accent 440 ograve => '�', # small o, grave accent 441 oslash => '�', # small o, slash 442 otilde => '�', # small o, tilde 443 ouml => '�', # small o, dieresis or umlaut mark 444 szlig => '�', # small sharp s, German (sz ligature) 445 thorn => '�', # small thorn, Icelandic 446 uacute => '�', # small u, acute accent 447 ucirc => '�', # small u, circumflex accent 448 ugrave => '�', # small u, grave accent 449 uuml => '�', # small u, dieresis or umlaut mark 450 yacute => '�', # small y, acute accent 451 yuml => '�', # small y, dieresis or umlaut mark 452 453 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) 454 copy => '�', # copyright sign 455 reg => '�', # registered sign 456 nbsp => "\240", # non breaking space 457 458 # Additional ISO-8859/1 entities listed in rfc1866 (section 14) 459 iexcl => '�', 460 cent => '�', 461 pound => '�', 462 curren => '�', 463 yen => '�', 464 brvbar => '�', 465 sect => '�', 466 uml => '�', 467 ordf => '�', 468 laquo => '�', 469'not' => '�', # not is a keyword in perl 470 shy => '�', 471 macr => '�', 472 deg => '�', 473 plusmn => '�', 474 sup1 => '�', 475 sup2 => '�', 476 sup3 => '�', 477 acute => '�', 478 micro => '�', 479 para => '�', 480 middot => '�', 481 cedil => '�', 482 ordm => '�', 483 raquo => '�', 484 frac14 => '�', 485 frac12 => '�', 486 frac34 => '�', 487 iquest => '�', 488'times' => '�', # times is a keyword in perl 489 divide => '�', 490 491# some POD special entities 492 verbar => '|', 493 sol => '/' 494); 495 496##--------------------------------------------------------------------------- 497 498##--------------------------------- 499## Function definitions begin here 500##--------------------------------- 501 502sub podchecker( $ ; $ % ) { 503 my ($infile, $outfile, %options) = @_; 504 local $_; 505 506 ## Set defaults 507 $infile ||= \*STDIN; 508 $outfile ||= \*STDERR; 509 510 ## Now create a pod checker 511 my $checker = new Pod::Checker(%options); 512 513 ## Now check the pod document for errors 514 $checker->parse_from_file($infile, $outfile); 515 516 ## Return the number of errors found 517 return $checker->num_errors(); 518} 519 520##--------------------------------------------------------------------------- 521 522##------------------------------- 523## Method definitions begin here 524##------------------------------- 525 526################################## 527 528=over 4 529 530=item C<Pod::Checker-E<gt>new( %options )> 531 532Return a reference to a new Pod::Checker object that inherits from 533Pod::Parser and is used for calling the required methods later. The 534following options are recognized: 535 536C<-warnings =E<gt> num> 537 Print warnings if C<num> is true. The higher the value of C<num>, 538the more warnings are printed. Currently there are only levels 1 and 2. 539 540C<-quiet =E<gt> num> 541 If C<num> is true, do not print any errors/warnings. This is useful 542when Pod::Checker is used to munge POD code into plain text from within 543POD formatters. 544 545=cut 546 547## sub new { 548## my $this = shift; 549## my $class = ref($this) || $this; 550## my %params = @_; 551## my $self = {%params}; 552## bless $self, $class; 553## $self->initialize(); 554## return $self; 555## } 556 557sub initialize { 558 my $self = shift; 559 ## Initialize number of errors, and setup an error function to 560 ## increment this number and then print to the designated output. 561 $self->{_NUM_ERRORS} = 0; 562 $self->{_NUM_WARNINGS} = 0; 563 $self->{-quiet} ||= 0; 564 # set the error handling subroutine 565 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror'); 566 $self->{_commands} = 0; # total number of POD commands encountered 567 $self->{_list_stack} = []; # stack for nested lists 568 $self->{_have_begin} = ''; # stores =begin 569 $self->{_links} = []; # stack for internal hyperlinks 570 $self->{_nodes} = []; # stack for =head/=item nodes 571 $self->{_index} = []; # text in X<> 572 # print warnings? 573 $self->{-warnings} = 1 unless(defined $self->{-warnings}); 574 $self->{_current_head1} = ''; # the current =head1 block 575 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings}); 576} 577 578################################## 579 580=item C<$checker-E<gt>poderror( @args )> 581 582=item C<$checker-E<gt>poderror( {%opts}, @args )> 583 584Internal method for printing errors and warnings. If no options are 585given, simply prints "@_". The following options are recognized and used 586to form the output: 587 588 -msg 589 590A message to print prior to C<@args>. 591 592 -line 593 594The line number the error occurred in. 595 596 -file 597 598The file (name) the error occurred in. 599 600 -severity 601 602The error level, should be 'WARNING' or 'ERROR'. 603 604=cut 605 606# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) 607sub poderror { 608 my $self = shift; 609 my %opts = (ref $_[0]) ? %{shift()} : (); 610 611 ## Retrieve options 612 chomp( my $msg = ($opts{-msg} || "")."@_" ); 613 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ""; 614 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ""; 615 unless (exists $opts{-severity}) { 616 ## See if can find severity in message prefix 617 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); 618 } 619 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ""; 620 621 ## Increment error count and print message " 622 ++($self->{_NUM_ERRORS}) 623 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); 624 ++($self->{_NUM_WARNINGS}) 625 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING')); 626 unless($self->{-quiet}) { 627 my $out_fh = $self->output_handle() || \*STDERR; 628 print $out_fh ($severity, $msg, $line, $file, "\n") 629 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); 630 } 631} 632 633################################## 634 635=item C<$checker-E<gt>num_errors()> 636 637Set (if argument specified) and retrieve the number of errors found. 638 639=cut 640 641sub num_errors { 642 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; 643} 644 645################################## 646 647=item C<$checker-E<gt>num_warnings()> 648 649Set (if argument specified) and retrieve the number of warnings found. 650 651=cut 652 653sub num_warnings { 654 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS}; 655} 656 657################################## 658 659=item C<$checker-E<gt>name()> 660 661Set (if argument specified) and retrieve the canonical name of POD as 662found in the C<=head1 NAME> section. 663 664=cut 665 666sub name { 667 return (@_ > 1 && $_[1]) ? 668 ($_[0]->{-name} = $_[1]) : $_[0]->{-name}; 669} 670 671################################## 672 673=item C<$checker-E<gt>node()> 674 675Add (if argument specified) and retrieve the nodes (as defined by C<=headX> 676and C<=item>) of the current POD. The nodes are returned in the order of 677their occurrence. They consist of plain text, each piece of whitespace is 678collapsed to a single blank. 679 680=cut 681 682sub node { 683 my ($self,$text) = @_; 684 if(defined $text) { 685 $text =~ s/\s+$//s; # strip trailing whitespace 686 $text =~ s/\s+/ /gs; # collapse whitespace 687 # add node, order important! 688 push(@{$self->{_nodes}}, $text); 689 # keep also a uniqueness counter 690 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); 691 return $text; 692 } 693 @{$self->{_nodes}}; 694} 695 696################################## 697 698=item C<$checker-E<gt>idx()> 699 700Add (if argument specified) and retrieve the index entries (as defined by 701C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece 702of whitespace is collapsed to a single blank. 703 704=cut 705 706# set/return index entries of current POD 707sub idx { 708 my ($self,$text) = @_; 709 if(defined $text) { 710 $text =~ s/\s+$//s; # strip trailing whitespace 711 $text =~ s/\s+/ /gs; # collapse whitespace 712 # add node, order important! 713 push(@{$self->{_index}}, $text); 714 # keep also a uniqueness counter 715 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); 716 return $text; 717 } 718 @{$self->{_index}}; 719} 720 721################################## 722 723=item C<$checker-E<gt>hyperlink()> 724 725Add (if argument specified) and retrieve the hyperlinks (as defined by 726C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line 727number and C<Pod::Hyperlink> object. 728 729=back 730 731=cut 732 733# set/return hyperlinks of the current POD 734sub hyperlink { 735 my $self = shift; 736 if($_[0]) { 737 push(@{$self->{_links}}, $_[0]); 738 return $_[0]; 739 } 740 @{$self->{_links}}; 741} 742 743## overrides for Pod::Parser 744 745sub end_pod { 746 ## Do some final checks and 747 ## print the number of errors found 748 my $self = shift; 749 my $infile = $self->input_file(); 750 751 if(@{$self->{_list_stack}}) { 752 my $list; 753 while(($list = $self->_close_list('EOF',$infile)) && 754 $list->indent() ne 'auto') { 755 $self->poderror({ -line => 'EOF', -file => $infile, 756 -severity => 'ERROR', -msg => "=over on line " . 757 $list->start() . " without closing =back" }); #" 758 } 759 } 760 761 # check validity of document internal hyperlinks 762 # first build the node names from the paragraph text 763 my %nodes; 764 foreach($self->node()) { 765 $nodes{$_} = 1; 766 if(/^(\S+)\s+\S/) { 767 # we have more than one word. Use the first as a node, too. 768 # This is used heavily in perlfunc.pod 769 $nodes{$1} ||= 2; # derived node 770 } 771 } 772 foreach($self->idx()) { 773 $nodes{$_} = 3; # index node 774 } 775 foreach($self->hyperlink()) { 776 my ($line,$link) = @$_; 777 # _TODO_ what if there is a link to the page itself by the name, 778 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION"> 779 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { 780 my $node = $self->_check_ptree($self->parse_text($link->node(), 781 $line), $line, $infile, 'L'); 782 if($node && !$nodes{$node}) { 783 $self->poderror({ -line => $line || '', -file => $infile, 784 -severity => 'ERROR', 785 -msg => "unresolved internal link '$node'"}); 786 } 787 } 788 } 789 790 # check the internal nodes for uniqueness. This pertains to 791 # =headX, =item and X<...> 792 if($self->{-warnings} && $self->{-warnings}>1) { 793 foreach(grep($self->{_unique_nodes}->{$_} > 1, 794 keys %{$self->{_unique_nodes}})) { 795 $self->poderror({ -line => '-', -file => $infile, 796 -severity => 'WARNING', 797 -msg => "multiple occurrence of link target '$_'"}); 798 } 799 } 800 801 # no POD found here 802 $self->num_errors(-1) if($self->{_commands} == 0); 803} 804 805# check a POD command directive 806sub command { 807 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_; 808 my ($file, $line) = $pod_para->file_line; 809 ## Check the command syntax 810 my $arg; # this will hold the command argument 811 if (! $VALID_COMMANDS{$cmd}) { 812 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', 813 -msg => "Unknown command '$cmd'" }); 814 } 815 else { # found a valid command 816 $self->{_commands}++; # delete this line if below is enabled again 817 818 ##### following check disabled due to strong request 819 #if(!$self->{_commands}++ && $cmd !~ /^head/) { 820 # $self->poderror({ -line => $line, -file => $file, 821 # -severity => 'WARNING', 822 # -msg => "file does not start with =head" }); 823 #} 824 825 # check syntax of particular command 826 if($cmd eq 'over') { 827 # check for argument 828 $arg = $self->interpolate_and_check($paragraph, $line,$file); 829 my $indent = 4; # default 830 if($arg && $arg =~ /^\s*(\d+)\s*$/) { 831 $indent = $1; 832 } 833 # start a new list 834 $self->_open_list($indent,$line,$file); 835 } 836 elsif($cmd eq 'item') { 837 # are we in a list? 838 unless(@{$self->{_list_stack}}) { 839 $self->poderror({ -line => $line, -file => $file, 840 -severity => 'ERROR', 841 -msg => "=item without previous =over" }); 842 # auto-open in case we encounter many more 843 $self->_open_list('auto',$line,$file); 844 } 845 my $list = $self->{_list_stack}->[0]; 846 # check whether the previous item had some contents 847 if(defined $self->{_list_item_contents} && 848 $self->{_list_item_contents} == 0) { 849 $self->poderror({ -line => $line, -file => $file, 850 -severity => 'WARNING', 851 -msg => "previous =item has no contents" }); 852 } 853 if($list->{_has_par}) { 854 $self->poderror({ -line => $line, -file => $file, 855 -severity => 'WARNING', 856 -msg => "preceding non-item paragraph(s)" }); 857 delete $list->{_has_par}; 858 } 859 # check for argument 860 $arg = $self->interpolate_and_check($paragraph, $line, $file); 861 if($arg && $arg =~ /(\S+)/) { 862 $arg =~ s/[\s\n]+$//; 863 my $type; 864 if($arg =~ /^[*]\s*(\S*.*)/) { 865 $type = 'bullet'; 866 $self->{_list_item_contents} = $1 ? 1 : 0; 867 $arg = $1; 868 } 869 elsif($arg =~ /^\d+\.?\s*(\S*)/) { 870 $type = 'number'; 871 $self->{_list_item_contents} = $1 ? 1 : 0; 872 $arg = $1; 873 } 874 else { 875 $type = 'definition'; 876 $self->{_list_item_contents} = 1; 877 } 878 my $first = $list->type(); 879 if($first && $first ne $type) { 880 $self->poderror({ -line => $line, -file => $file, 881 -severity => 'WARNING', 882 -msg => "=item type mismatch ('$first' vs. '$type')"}); 883 } 884 else { # first item 885 $list->type($type); 886 } 887 } 888 else { 889 $self->poderror({ -line => $line, -file => $file, 890 -severity => 'WARNING', 891 -msg => "No argument for =item" }); 892 $arg = ' '; # empty 893 $self->{_list_item_contents} = 0; 894 } 895 # add this item 896 $list->item($arg); 897 # remember this node 898 $self->node($arg); 899 } 900 elsif($cmd eq 'back') { 901 # check if we have an open list 902 unless(@{$self->{_list_stack}}) { 903 $self->poderror({ -line => $line, -file => $file, 904 -severity => 'ERROR', 905 -msg => "=back without previous =over" }); 906 } 907 else { 908 # check for spurious characters 909 $arg = $self->interpolate_and_check($paragraph, $line,$file); 910 if($arg && $arg =~ /\S/) { 911 $self->poderror({ -line => $line, -file => $file, 912 -severity => 'ERROR', 913 -msg => "Spurious character(s) after =back" }); 914 } 915 # close list 916 my $list = $self->_close_list($line,$file); 917 # check for empty lists 918 if(!$list->item() && $self->{-warnings}) { 919 $self->poderror({ -line => $line, -file => $file, 920 -severity => 'WARNING', 921 -msg => "No items in =over (at line " . 922 $list->start() . ") / =back list"}); #" 923 } 924 } 925 } 926 elsif($cmd =~ /^head(\d+)/) { 927 my $hnum = $1; 928 $self->{"_have_head_$hnum"}++; # count head types 929 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) { 930 $self->poderror({ -line => $line, -file => $file, 931 -severity => 'WARNING', 932 -msg => "=head$hnum without preceding higher level"}); 933 } 934 # check whether the previous =head section had some contents 935 if(defined $self->{_commands_in_head} && 936 $self->{_commands_in_head} == 0 && 937 defined $self->{_last_head} && 938 $self->{_last_head} >= $hnum) { 939 $self->poderror({ -line => $line, -file => $file, 940 -severity => 'WARNING', 941 -msg => "empty section in previous paragraph"}); 942 } 943 $self->{_commands_in_head} = -1; 944 $self->{_last_head} = $hnum; 945 # check if there is an open list 946 if(@{$self->{_list_stack}}) { 947 my $list; 948 while(($list = $self->_close_list($line,$file)) && 949 $list->indent() ne 'auto') { 950 $self->poderror({ -line => $line, -file => $file, 951 -severity => 'ERROR', 952 -msg => "=over on line ". $list->start() . 953 " without closing =back (at $cmd)" }); 954 } 955 } 956 # remember this node 957 $arg = $self->interpolate_and_check($paragraph, $line,$file); 958 $arg =~ s/[\s\n]+$//s; 959 $self->node($arg); 960 unless(length($arg)) { 961 $self->poderror({ -line => $line, -file => $file, 962 -severity => 'ERROR', 963 -msg => "empty =$cmd"}); 964 } 965 if($cmd eq 'head1') { 966 $self->{_current_head1} = $arg; 967 } else { 968 $self->{_current_head1} = ''; 969 } 970 } 971 elsif($cmd eq 'begin') { 972 if($self->{_have_begin}) { 973 # already have a begin 974 $self->poderror({ -line => $line, -file => $file, 975 -severity => 'ERROR', 976 -msg => "Nested =begin's (first at line " . 977 $self->{_have_begin} . ")"}); 978 } 979 else { 980 # check for argument 981 $arg = $self->interpolate_and_check($paragraph, $line,$file); 982 unless($arg && $arg =~ /(\S+)/) { 983 $self->poderror({ -line => $line, -file => $file, 984 -severity => 'ERROR', 985 -msg => "No argument for =begin"}); 986 } 987 # remember the =begin 988 $self->{_have_begin} = "$line:$1"; 989 } 990 } 991 elsif($cmd eq 'end') { 992 if($self->{_have_begin}) { 993 # close the existing =begin 994 $self->{_have_begin} = ''; 995 # check for spurious characters 996 $arg = $self->interpolate_and_check($paragraph, $line,$file); 997 # the closing argument is optional 998 #if($arg && $arg =~ /\S/) { 999 # $self->poderror({ -line => $line, -file => $file, 1000 # -severity => 'WARNING', 1001 # -msg => "Spurious character(s) after =end" }); 1002 #} 1003 } 1004 else { 1005 # don't have a matching =begin 1006 $self->poderror({ -line => $line, -file => $file, 1007 -severity => 'ERROR', 1008 -msg => "=end without =begin" }); 1009 } 1010 } 1011 elsif($cmd eq 'for') { 1012 unless($paragraph =~ /\s*(\S+)\s*/) { 1013 $self->poderror({ -line => $line, -file => $file, 1014 -severity => 'ERROR', 1015 -msg => "=for without formatter specification" }); 1016 } 1017 $arg = ''; # do not expand paragraph below 1018 } 1019 elsif($cmd =~ /^(pod|cut)$/) { 1020 # check for argument 1021 $arg = $self->interpolate_and_check($paragraph, $line,$file); 1022 if($arg && $arg =~ /(\S+)/) { 1023 $self->poderror({ -line => $line, -file => $file, 1024 -severity => 'ERROR', 1025 -msg => "Spurious text after =$cmd"}); 1026 } 1027 } 1028 $self->{_commands_in_head}++; 1029 ## Check the interior sequences in the command-text 1030 $self->interpolate_and_check($paragraph, $line,$file) 1031 unless(defined $arg); 1032 } 1033} 1034 1035sub _open_list 1036{ 1037 my ($self,$indent,$line,$file) = @_; 1038 my $list = Pod::List->new( 1039 -indent => $indent, 1040 -start => $line, 1041 -file => $file); 1042 unshift(@{$self->{_list_stack}}, $list); 1043 undef $self->{_list_item_contents}; 1044 $list; 1045} 1046 1047sub _close_list 1048{ 1049 my ($self,$line,$file) = @_; 1050 my $list = shift(@{$self->{_list_stack}}); 1051 if(defined $self->{_list_item_contents} && 1052 $self->{_list_item_contents} == 0) { 1053 $self->poderror({ -line => $line, -file => $file, 1054 -severity => 'WARNING', 1055 -msg => "previous =item has no contents" }); 1056 } 1057 undef $self->{_list_item_contents}; 1058 $list; 1059} 1060 1061# process a block of some text 1062sub interpolate_and_check { 1063 my ($self, $paragraph, $line, $file) = @_; 1064 ## Check the interior sequences in the command-text 1065 # and return the text 1066 $self->_check_ptree( 1067 $self->parse_text($paragraph,$line), $line, $file, ''); 1068} 1069 1070sub _check_ptree { 1071 my ($self,$ptree,$line,$file,$nestlist) = @_; 1072 local($_); 1073 my $text = ''; 1074 # process each node in the parse tree 1075 foreach(@$ptree) { 1076 # regular text chunk 1077 unless(ref) { 1078 # count the unescaped angle brackets 1079 # complain only when warning level is greater than 1 1080 if($self->{-warnings} && $self->{-warnings}>1) { 1081 my $count; 1082 if($count = tr/<>/<>/) { 1083 $self->poderror({ -line => $line, -file => $file, 1084 -severity => 'WARNING', 1085 -msg => "$count unescaped <> in paragraph" }); 1086 } 1087 } 1088 $text .= $_; 1089 next; 1090 } 1091 # have an interior sequence 1092 my $cmd = $_->cmd_name(); 1093 my $contents = $_->parse_tree(); 1094 ($file,$line) = $_->file_line(); 1095 # check for valid tag 1096 if (! $VALID_SEQUENCES{$cmd}) { 1097 $self->poderror({ -line => $line, -file => $file, 1098 -severity => 'ERROR', 1099 -msg => qq(Unknown interior-sequence '$cmd')}); 1100 # expand it anyway 1101 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1102 next; 1103 } 1104 if($nestlist =~ /$cmd/) { 1105 $self->poderror({ -line => $line, -file => $file, 1106 -severity => 'WARNING', 1107 -msg => "nested commands $cmd<...$cmd<...>...>"}); 1108 # _TODO_ should we add the contents anyway? 1109 # expand it anyway, see below 1110 } 1111 if($cmd eq 'E') { 1112 # preserve entities 1113 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) { 1114 $self->poderror({ -line => $line, -file => $file, 1115 -severity => 'ERROR', 1116 -msg => "garbled entity " . $_->raw_text()}); 1117 next; 1118 } 1119 my $ent = $$contents[0]; 1120 my $val; 1121 if($ent =~ /^0x[0-9a-f]+$/i) { 1122 # hexadec entity 1123 $val = hex($ent); 1124 } 1125 elsif($ent =~ /^0\d+$/) { 1126 # octal 1127 $val = oct($ent); 1128 } 1129 elsif($ent =~ /^\d+$/) { 1130 # numeric entity 1131 $val = $ent; 1132 } 1133 if(defined $val) { 1134 if($val>0 && $val<256) { 1135 $text .= chr($val); 1136 } 1137 else { 1138 $self->poderror({ -line => $line, -file => $file, 1139 -severity => 'ERROR', 1140 -msg => "Entity number out of range " . $_->raw_text()}); 1141 } 1142 } 1143 elsif($ENTITIES{$ent}) { 1144 # known ISO entity 1145 $text .= $ENTITIES{$ent}; 1146 } 1147 else { 1148 $self->poderror({ -line => $line, -file => $file, 1149 -severity => 'WARNING', 1150 -msg => "Unknown entity " . $_->raw_text()}); 1151 $text .= "E<$ent>"; 1152 } 1153 } 1154 elsif($cmd eq 'L') { 1155 # try to parse the hyperlink 1156 my $link = Pod::Hyperlink->new($contents->raw_text()); 1157 unless(defined $link) { 1158 $self->poderror({ -line => $line, -file => $file, 1159 -severity => 'ERROR', 1160 -msg => "malformed link " . $_->raw_text() ." : $@"}); 1161 next; 1162 } 1163 $link->line($line); # remember line 1164 if($self->{-warnings}) { 1165 foreach my $w ($link->warning()) { 1166 $self->poderror({ -line => $line, -file => $file, 1167 -severity => 'WARNING', 1168 -msg => $w }); 1169 } 1170 } 1171 # check the link text 1172 $text .= $self->_check_ptree($self->parse_text($link->text(), 1173 $line), $line, $file, "$nestlist$cmd"); 1174 # remember link 1175 $self->hyperlink([$line,$link]); 1176 } 1177 elsif($cmd =~ /[BCFIS]/) { 1178 # add the guts 1179 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1180 } 1181 elsif($cmd eq 'Z') { 1182 if(length($contents->raw_text())) { 1183 $self->poderror({ -line => $line, -file => $file, 1184 -severity => 'ERROR', 1185 -msg => "Nonempty Z<>"}); 1186 } 1187 } 1188 elsif($cmd eq 'X') { 1189 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1190 if($idx =~ /^\s*$/s) { 1191 $self->poderror({ -line => $line, -file => $file, 1192 -severity => 'ERROR', 1193 -msg => "Empty X<>"}); 1194 } 1195 else { 1196 # remember this node 1197 $self->idx($idx); 1198 } 1199 } 1200 else { 1201 # not reached 1202 die "internal error"; 1203 } 1204 } 1205 $text; 1206} 1207 1208# process a block of verbatim text 1209sub verbatim { 1210 ## Nothing particular to check 1211 my ($self, $paragraph, $line_num, $pod_para) = @_; 1212 1213 $self->_preproc_par($paragraph); 1214 1215 if($self->{_current_head1} eq 'NAME') { 1216 my ($file, $line) = $pod_para->file_line; 1217 $self->poderror({ -line => $line, -file => $file, 1218 -severity => 'WARNING', 1219 -msg => 'Verbatim paragraph in NAME section' }); 1220 } 1221} 1222 1223# process a block of regular text 1224sub textblock { 1225 my ($self, $paragraph, $line_num, $pod_para) = @_; 1226 my ($file, $line) = $pod_para->file_line; 1227 1228 $self->_preproc_par($paragraph); 1229 1230 # skip this paragraph if in a =begin block 1231 unless($self->{_have_begin}) { 1232 my $block = $self->interpolate_and_check($paragraph, $line,$file); 1233 if($self->{_current_head1} eq 'NAME') { 1234 if($block =~ /^\s*(\S+?)\s*[,-]/) { 1235 # this is the canonical name 1236 $self->{-name} = $1 unless(defined $self->{-name}); 1237 } 1238 } 1239 } 1240} 1241 1242sub _preproc_par 1243{ 1244 my $self = shift; 1245 $_[0] =~ s/[\s\n]+$//; 1246 if($_[0]) { 1247 $self->{_commands_in_head}++; 1248 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); 1249 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { 1250 $self->{_list_stack}->[0]->{_has_par} = 1; 1251 } 1252 } 1253} 1254 12551; 1256 1257__END__ 1258 1259=head1 AUTHOR 1260 1261Please report bugs using L<http://rt.cpan.org>. 1262 1263Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version), 1264Marek Rouchal E<lt>marekr@cpan.orgE<gt> 1265 1266Based on code for B<Pod::Text::pod2text()> written by 1267Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> 1268 1269=cut 1270 1271