1# Pod::Man -- Convert POD data to formatted *roff input.
2# $Id: Man.pm,v 1.10 2006/11/16 21:56:24 espie Exp $
3#
4# Copyright 1999, 2000, 2001, 2002, 2003 by Russ Allbery <rra@stanford.edu>
5#
6# This program is free software; you may redistribute it and/or modify it
7# under the same terms as Perl itself.
8#
9# This module translates POD documentation into *roff markup using the man
10# macro set, and is intended for converting POD documents written as Unix
11# manual pages to manual pages that can be read by the man(1) command.  It is
12# a replacement for the pod2man command distributed with versions of Perl
13# prior to 5.6.
14#
15# Perl core hackers, please note that this module is also separately
16# maintained outside of the Perl core as part of the podlators.  Please send
17# me any patches at the address above in addition to sending them to the
18# standard Perl mailing lists.
19
20##############################################################################
21# Modules and declarations
22##############################################################################
23
24package Pod::Man;
25
26require 5.005;
27
28use Carp qw(carp croak);
29use Pod::ParseLink qw(parselink);
30use Pod::Parser ();
31
32use strict;
33use subs qw(makespace);
34use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
35
36@ISA = qw(Pod::Parser);
37
38# Don't use the CVS revision as the version, since this module is also in Perl
39# core and too many things could munge CVS magic revision strings.  This
40# number should ideally be the same as the CVS revision in podlators, however.
41$VERSION = 1.37;
42
43
44##############################################################################
45# Preamble and *roff output tables
46##############################################################################
47
48# The following is the static preamble which starts all *roff output we
49# generate.  It's completely static except for the font to use as a
50# fixed-width font, which is designed by @CFONT@, and the left and right
51# quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@.  $PREAMBLE
52# should therefore be run through s/\@CFONT\@/<font>/g before output.
53$PREAMBLE = <<'----END OF PREAMBLE----';
54.de Sh \" Subsection heading
55.br
56.if t .Sp
57.ne 5
58.PP
59\fB\\$1\fR
60.PP
61..
62.de Sp \" Vertical space (when we can't use .PP)
63.if t .sp .5v
64.if n .sp
65..
66.de Vb \" Begin verbatim text
67.ft @CFONT@
68.nf
69.ne \\$1
70..
71.de Ve \" End verbatim text
72.ft R
73.fi
74..
75.\" Set up some character translations and predefined strings.  \*(-- will
76.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
77.\" double quote, and \*(R" will give a right double quote.  | will give a
78.\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
79.\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
80.\" expand to `' in nroff, nothing in troff, for use with C<>.
81.tr \(*W-|\(bv\*(Tr
82.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
83.ie n \{\
84.    ds -- \(*W-
85.    ds PI pi
86.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
87.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
88.    ds L" ""
89.    ds R" ""
90.    ds C` @LQUOTE@
91.    ds C' @RQUOTE@
92'br\}
93.el\{\
94.    ds -- \|\(em\|
95.    ds PI \(*p
96.    ds L" ``
97.    ds R" ''
98'br\}
99.\"
100.\" If the F register is turned on, we'll generate index entries on stderr for
101.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
102.\" entries marked with X<> in POD.  Of course, you'll have to process the
103.\" output yourself in some meaningful fashion.
104.if \nF \{\
105.    de IX
106.    tm Index:\\$1\t\\n%\t"\\$2"
107..
108.    nr % 0
109.    rr F
110.\}
111.\"
112.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
113.\" way too many mistakes in technical documents.
114.hy 0
115.if n .na
116.\"
117.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
118.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
119.    \" fudge factors for nroff and troff
120.if n \{\
121.    ds #H 0
122.    ds #V .8m
123.    ds #F .3m
124.    ds #[ \f1
125.    ds #] \fP
126.\}
127.if t \{\
128.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
129.    ds #V .6m
130.    ds #F 0
131.    ds #[ \&
132.    ds #] \&
133.\}
134.    \" simple accents for nroff and troff
135.if n \{\
136.    ds ' \&
137.    ds ` \&
138.    ds ^ \&
139.    ds , \&
140.    ds ~ ~
141.    ds /
142.\}
143.if t \{\
144.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
145.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
146.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
147.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
148.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
149.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
150.\}
151.    \" troff and (daisy-wheel) nroff accents
152.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
153.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
154.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
155.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
156.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
157.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
158.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
159.ds ae a\h'-(\w'a'u*4/10)'e
160.ds Ae A\h'-(\w'A'u*4/10)'E
161.    \" corrections for vroff
162.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
163.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
164.    \" for low resolution devices (crt and lpr)
165.if \n(.H>23 .if \n(.V>19 \
166\{\
167.    ds : e
168.    ds 8 ss
169.    ds o a
170.    ds d- d\h'-1'\(ga
171.    ds D- D\h'-1'\(hy
172.    ds th \o'bp'
173.    ds Th \o'LP'
174.    ds ae ae
175.    ds Ae AE
176.\}
177.rm #[ #] #H #V #F C
178----END OF PREAMBLE----
179#`# for cperl-mode
180
181# This table is taken nearly verbatim from Tom Christiansen's pod2man.  It
182# assumes that the standard preamble has already been printed, since that's
183# what defines all of the accent marks.  Note that some of these are quoted
184# with double quotes since they contain embedded single quotes, so use \\
185# uniformly for backslash for readability.
186%ESCAPES = (
187    'amp'       =>    '&',      # ampersand
188    'apos'      =>    "'",      # apostrophe
189    'lt'        =>    '<',      # left chevron, less-than
190    'gt'        =>    '>',      # right chevron, greater-than
191    'quot'      =>    '"',      # double quote
192    'sol'       =>    '/',      # solidus (forward slash)
193    'verbar'    =>    '|',      # vertical bar
194
195    'Aacute'    =>    "A\\*'",  # capital A, acute accent
196    'aacute'    =>    "a\\*'",  # small a, acute accent
197    'Acirc'     =>    'A\\*^',  # capital A, circumflex accent
198    'acirc'     =>    'a\\*^',  # small a, circumflex accent
199    'AElig'     =>    '\*(AE',  # capital AE diphthong (ligature)
200    'aelig'     =>    '\*(ae',  # small ae diphthong (ligature)
201    'Agrave'    =>    "A\\*`",  # capital A, grave accent
202    'agrave'    =>    "A\\*`",  # small a, grave accent
203    'Aring'     =>    'A\\*o',  # capital A, ring
204    'aring'     =>    'a\\*o',  # small a, ring
205    'Atilde'    =>    'A\\*~',  # capital A, tilde
206    'atilde'    =>    'a\\*~',  # small a, tilde
207    'Auml'      =>    'A\\*:',  # capital A, dieresis or umlaut mark
208    'auml'      =>    'a\\*:',  # small a, dieresis or umlaut mark
209    'Ccedil'    =>    'C\\*,',  # capital C, cedilla
210    'ccedil'    =>    'c\\*,',  # small c, cedilla
211    'Eacute'    =>    "E\\*'",  # capital E, acute accent
212    'eacute'    =>    "e\\*'",  # small e, acute accent
213    'Ecirc'     =>    'E\\*^',  # capital E, circumflex accent
214    'ecirc'     =>    'e\\*^',  # small e, circumflex accent
215    'Egrave'    =>    'E\\*`',  # capital E, grave accent
216    'egrave'    =>    'e\\*`',  # small e, grave accent
217    'ETH'       =>    '\\*(D-', # capital Eth, Icelandic
218    'eth'       =>    '\\*(d-', # small eth, Icelandic
219    'Euml'      =>    'E\\*:',  # capital E, dieresis or umlaut mark
220    'euml'      =>    'e\\*:',  # small e, dieresis or umlaut mark
221    'Iacute'    =>    "I\\*'",  # capital I, acute accent
222    'iacute'    =>    "i\\*'",  # small i, acute accent
223    'Icirc'     =>    'I\\*^',  # capital I, circumflex accent
224    'icirc'     =>    'i\\*^',  # small i, circumflex accent
225    'Igrave'    =>    'I\\*`',  # capital I, grave accent
226    'igrave'    =>    'i\\*`',  # small i, grave accent
227    'Iuml'      =>    'I\\*:',  # capital I, dieresis or umlaut mark
228    'iuml'      =>    'i\\*:',  # small i, dieresis or umlaut mark
229    'Ntilde'    =>    'N\*~',   # capital N, tilde
230    'ntilde'    =>    'n\*~',   # small n, tilde
231    'Oacute'    =>    "O\\*'",  # capital O, acute accent
232    'oacute'    =>    "o\\*'",  # small o, acute accent
233    'Ocirc'     =>    'O\\*^',  # capital O, circumflex accent
234    'ocirc'     =>    'o\\*^',  # small o, circumflex accent
235    'Ograve'    =>    'O\\*`',  # capital O, grave accent
236    'ograve'    =>    'o\\*`',  # small o, grave accent
237    'Oslash'    =>    'O\\*/',  # capital O, slash
238    'oslash'    =>    'o\\*/',  # small o, slash
239    'Otilde'    =>    'O\\*~',  # capital O, tilde
240    'otilde'    =>    'o\\*~',  # small o, tilde
241    'Ouml'      =>    'O\\*:',  # capital O, dieresis or umlaut mark
242    'ouml'      =>    'o\\*:',  # small o, dieresis or umlaut mark
243    'szlig'     =>    '\*8',    # small sharp s, German (sz ligature)
244    'THORN'     =>    '\\*(Th', # capital THORN, Icelandic
245    'thorn'     =>    '\\*(th', # small thorn, Icelandic
246    'Uacute'    =>    "U\\*'",  # capital U, acute accent
247    'uacute'    =>    "u\\*'",  # small u, acute accent
248    'Ucirc'     =>    'U\\*^',  # capital U, circumflex accent
249    'ucirc'     =>    'u\\*^',  # small u, circumflex accent
250    'Ugrave'    =>    'U\\*`',  # capital U, grave accent
251    'ugrave'    =>    'u\\*`',  # small u, grave accent
252    'Uuml'      =>    'U\\*:',  # capital U, dieresis or umlaut mark
253    'uuml'      =>    'u\\*:',  # small u, dieresis or umlaut mark
254    'Yacute'    =>    "Y\\*'",  # capital Y, acute accent
255    'yacute'    =>    "y\\*'",  # small y, acute accent
256    'yuml'      =>    'y\\*:',  # small y, dieresis or umlaut mark
257
258    'nbsp'      =>    '\\ ',    # non-breaking space
259    'shy'       =>    '',       # soft (discretionary) hyphen
260    'copy'	=>    '\(co',	# copyright
261);
262
263
264##############################################################################
265# Static helper functions
266##############################################################################
267
268# Protect leading quotes and periods against interpretation as commands.  Also
269# protect anything starting with a backslash, since it could expand or hide
270# something that *roff would interpret as a command.  This is overkill, but
271# it's much simpler than trying to parse *roff here.
272sub protect {
273    local $_ = shift;
274    s/^([.\'\\])/\\&$1/mg;
275    $_;
276}
277
278# Translate a font string into an escape.
279sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
280
281
282##############################################################################
283# Initialization
284##############################################################################
285
286# Initialize the object.  Here, we also process any additional options passed
287# to the constructor or set up defaults if none were given.  center is the
288# centered title, release is the version number, and date is the date for the
289# documentation.  Note that we can't know what file name we're processing due
290# to the architecture of Pod::Parser, so that *has* to either be passed to the
291# constructor or set separately with Pod::Man::name().
292sub initialize {
293    my $self = shift;
294
295    # Figure out the fixed-width font.  If user-supplied, make sure that they
296    # are the right length.
297    for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
298        if (defined $$self{$_}) {
299            if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
300                croak qq(roff font should be 1 or 2 chars,)
301                    . qq( not "$$self{$_}");
302            }
303        } else {
304            $$self{$_} = '';
305        }
306    }
307
308    # Set the default fonts.  We can't be sure what fixed bold-italic is going
309    # to be called, so default to just bold.
310    $$self{fixed}           ||= 'CW';
311    $$self{fixedbold}       ||= 'CB';
312    $$self{fixeditalic}     ||= 'CI';
313    $$self{fixedbolditalic} ||= 'CB';
314
315    # Set up a table of font escapes.  First number is fixed-width, second is
316    # bold, third is italic.
317    $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
318                      '010' => '\fB', '011' => '\f(BI',
319                      '100' => toescape ($$self{fixed}),
320                      '101' => toescape ($$self{fixeditalic}),
321                      '110' => toescape ($$self{fixedbold}),
322                      '111' => toescape ($$self{fixedbolditalic})};
323
324    # Extra stuff for page titles.
325    $$self{center} = 'User Contributed Perl Documentation'
326        unless defined $$self{center};
327    $$self{indent} = 4 unless defined $$self{indent};
328
329    # We used to try first to get the version number from a local binary, but
330    # we shouldn't need that any more.  Get the version from the running Perl.
331    # Work a little magic to handle subversions correctly under both the
332    # pre-5.6 and the post-5.6 version numbering schemes.
333    if (!defined $$self{release}) {
334        my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
335        $version[2] ||= 0;
336        $version[2] *= 10 ** (3 - length $version[2]);
337        for (@version) { $_ += 0 }
338        $$self{release} = 'perl v' . join ('.', @version);
339    }
340
341    # Double quotes in things that will be quoted.
342    for (qw/center date release/) {
343        $$self{$_} =~ s/\"/\"\"/g if $$self{$_};
344    }
345
346    # Figure out what quotes we'll be using for C<> text.
347    $$self{quotes} ||= '"';
348    if ($$self{quotes} eq 'none') {
349        $$self{LQUOTE} = $$self{RQUOTE} = '';
350    } elsif (length ($$self{quotes}) == 1) {
351        $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
352    } elsif ($$self{quotes} =~ /^(.)(.)$/
353             || $$self{quotes} =~ /^(..)(..)$/) {
354        $$self{LQUOTE} = $1;
355        $$self{RQUOTE} = $2;
356    } else {
357        croak qq(Invalid quote specification "$$self{quotes}");
358    }
359
360    # Double the first quote; note that this should not be s///g as two double
361    # quotes is represented in *roff as three double quotes, not four.  Weird,
362    # I know.
363    $$self{LQUOTE} =~ s/\"/\"\"/;
364    $$self{RQUOTE} =~ s/\"/\"\"/;
365
366    $self->SUPER::initialize;
367}
368
369# For each document we process, output the preamble first.
370sub begin_pod {
371    my $self = shift;
372
373    # Try to figure out the name and section from the file name.
374    my $section = $$self{section} || 1;
375    my $name = $$self{name};
376    if (!defined $name) {
377        $name = $self->input_file;
378        $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
379        $name =~ s/\.p(od|[lm])\z//i;
380        if ($section !~ /^3/) {
381            require File::Basename;
382            $name = uc File::Basename::basename ($name);
383        } else {
384            # Assume that we're dealing with a module.  We want to figure out
385            # the full module name from the path to the file, but we don't
386            # want to include too much of the path into the module name.  Lose
387            # everything up to the first of:
388            #
389            #     */lib/*perl*/         standard or site_perl module
390            #     */*perl*/lib/         from -Dprefix=/opt/perl
391            #     */*perl*/             random module hierarchy
392            #
393            # which works.  Also strip off a leading site or site_perl
394            # component, any OS-specific component, and any version number
395            # component, and strip off an initial component of "lib" or
396            # "blib/lib" since that's what ExtUtils::MakeMaker creates.
397            # splitdir requires at least File::Spec 0.8.
398            require File::Spec;
399            my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
400            my @dirs = File::Spec->splitdir ($dirs);
401            my $cut = 0;
402            my $i;
403            for ($i = 0; $i < scalar @dirs; $i++) {
404                if ($dirs[$i] eq 'lib' && $dirs[$i + 1] =~ /perl/) {
405                    $cut = $i + 2;
406                    last;
407                } elsif ($dirs[$i] =~ /perl/) {
408                    $cut = $i + 1;
409                    $cut++ if $dirs[$i + 1] eq 'lib';
410                    last;
411                }
412            }
413            if ($cut > 0) {
414                splice (@dirs, 0, $cut);
415                shift @dirs if ($dirs[0] =~ /^site(_perl)?$/);
416                shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
417                shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
418            }
419            shift @dirs if $dirs[0] eq 'lib';
420            splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
421
422            # Remove empty directories when building the module name; they
423            # occur too easily on Unix by doubling slashes.
424            $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
425        }
426    }
427
428    # If $name contains spaces, quote it; this mostly comes up in the case of
429    # input from stdin.
430    $name = '"' . $name . '"' if ($name =~ /\s/);
431
432    # Modification date header.  Try to use the modification time of our
433    # input.
434    if (!defined $$self{date}) {
435        my $time = (stat $self->input_file)[9] || time;
436        my ($day, $month, $year) = (localtime $time)[3,4,5];
437        $month++;
438        $year += 1900;
439        $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day);
440    }
441
442    # Now, print out the preamble and the title.  The meaning of the arguments
443    # to .TH unfortunately vary by system; some systems consider the fourth
444    # argument to be a "source" and others use it as a version number.
445    # Generally it's just presented as the left-side footer, though, so it
446    # doesn't matter too much if a particular system gives it another
447    # interpretation.
448    #
449    # The order of date and release used to be reversed in older versions of
450    # this module, but this order is correct for both Solaris and Linux.
451    local $_ = $PREAMBLE;
452    s/\@CFONT\@/$$self{fixed}/;
453    s/\@LQUOTE\@/$$self{LQUOTE}/;
454    s/\@RQUOTE\@/$$self{RQUOTE}/;
455    chomp $_;
456    my $pversion = $Pod::Parser::VERSION;
457    print { $self->output_handle } <<"----END OF HEADER----";
458.\\" Automatically generated by Pod::Man v$VERSION, Pod::Parser v$pversion
459.\\"
460.\\" Standard preamble:
461.\\" ========================================================================
462$_
463.\\" ========================================================================
464.\\"
465.IX Title "$name $section"
466.TH $name $section "$$self{date}" "$$self{release}" "$$self{center}"
467----END OF HEADER----
468
469    # Initialize a few per-file variables.
470    $$self{INDENT}    = 0;      # Current indentation level.
471    $$self{INDENTS}   = [];     # Stack of indentations.
472    $$self{INDEX}     = [];     # Index keys waiting to be printed.
473    $$self{IN_NAME}   = 0;      # Whether processing the NAME section.
474    $$self{ITEMS}     = 0;      # The number of consecutive =items.
475    $$self{ITEMTYPES} = [];     # Stack of =item types, one per list.
476    $$self{SHIFTWAIT} = 0;      # Whether there is a shift waiting.
477    $$self{SHIFTS}    = [];     # Stack of .RS shifts.
478    $$self{EXCLUDE}   = 0;
479    $$self{VERBATIM}  = 0;
480}
481
482
483##############################################################################
484# Core overrides
485##############################################################################
486
487# Called for each command paragraph.  Gets the command, the associated
488# paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
489# the command to a method named the same as the command.  =cut is handled
490# internally by Pod::Parser.
491sub command {
492    my $self = shift;
493    my $command = shift;
494    return if $command eq 'pod';
495    return if ($$self{EXCLUDE} && $command ne 'end');
496    if ($self->can ('cmd_' . $command)) {
497        $command = 'cmd_' . $command;
498        $self->$command (@_);
499    } else {
500        my ($text, $line, $paragraph) = @_;
501        my $file;
502        ($file, $line) = $paragraph->file_line;
503        $text =~ s/\n+\z//;
504        $text = " $text" if ($text =~ /^\S/);
505        warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
506        return;
507    }
508}
509
510# Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
511# Pod::Paragraph object.  Rofficate backslashes, untabify, put a zero-width
512# character at the beginning of each line to protect against commands, and
513# wrap in .Vb/.Ve.
514sub verbatim {
515    my $self = shift;
516    return if $$self{EXCLUDE};
517    local $_ = shift;
518    return if /^\s+$/;
519    s/\s+$/\n/;
520    my $lines = tr/\n/\n/;
521    1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
522    s/\\/\\e/g;
523    s/^(\s*\S)/'\&' . $1/gme;
524    $self->makespace;
525    $self->output (".Vb $lines\n$_.Ve\n");
526    $$self{NEEDSPACE} = 1;
527}
528
529# Called for a regular text block.  Gets the paragraph, the line number, and a
530# Pod::Paragraph object.  Perform interpolation and output the results.
531sub textblock {
532    my $self = shift;
533    return if $$self{EXCLUDE};
534    $self->output ($_[0]), return if $$self{VERBATIM};
535
536    # Parse the tree.  collapse knows about references to scalars as well as
537    # scalars and does the right thing with them.  Tidy up any trailing
538    # whitespace.
539    my $text = shift;
540    $text = $self->parse ($text, @_);
541    $text =~ s/\n\s*$/\n/;
542
543    # Output the paragraph.  We also have to handle =over without =item.  If
544    # there's an =over without =item, SHIFTWAIT will be set, and we need to
545    # handle creation of the indent here.  Add the shift to SHIFTS so that it
546    # will be cleaned up on =back.
547    $self->makespace;
548    if ($$self{SHIFTWAIT}) {
549        $self->output (".RS $$self{INDENT}\n");
550        push (@{ $$self{SHIFTS} }, $$self{INDENT});
551        $$self{SHIFTWAIT} = 0;
552    }
553    $self->output (protect $self->textmapfonts ($text));
554    $self->outindex;
555    $$self{NEEDSPACE} = 1;
556}
557
558# Called for a formatting code.  Takes a Pod::InteriorSequence object and
559# returns a reference to a scalar.  This scalar is the final formatted text.
560# It's returned as a reference to an array so that other formatting codes
561# above us know that the text has already been processed.
562sub sequence {
563    my ($self, $seq) = @_;
564    my $command = $seq->cmd_name;
565
566    # We have to defer processing of the inside of an L<> formatting code.  If
567    # this code is nested inside an L<> code, return the literal raw text of
568    # it.
569    my $parent = $seq->nested;
570    while (defined $parent) {
571        return $seq->raw_text if ($parent->cmd_name eq 'L');
572        $parent = $parent->nested;
573    }
574
575    # Zero-width characters.
576    return [ '\&' ] if ($command eq 'Z');
577
578    # C<>, L<>, X<>, and E<> don't apply guesswork to their contents.  C<>
579    # needs some additional special handling.
580    my $literal = ($command =~ /^[CELX]$/);
581    local $_ = $self->collapse ($seq->parse_tree, $literal, $command eq 'C');
582
583    # Handle E<> escapes.  Numeric escapes that match one of the supported ISO
584    # 8859-1 characters don't work at present.
585    if ($command eq 'E') {
586        if (/^\d+$/) {
587            return [ chr ($_) ];
588        } elsif (exists $ESCAPES{$_}) {
589            return [ $ESCAPES{$_} ];
590        } else {
591            my ($file, $line) = $seq->file_line;
592            warn "$file:$line: Unknown escape E<$_>\n";
593            return [ "E<$_>" ];
594        }
595    }
596
597    # For all the other codes, empty content produces no output.
598    return '' if $_ eq '';
599
600    # Handle simple formatting codes.
601    if ($command eq 'B') {
602        return [ '\f(BS' . $_ . '\f(BE' ];
603    } elsif ($command eq 'F' || $command eq 'I') {
604        return [ '\f(IS' . $_ . '\f(IE' ];
605    } elsif ($command eq 'C') {
606        return [ $self->quote_literal ($_) ];
607    }
608
609    # Handle links.
610    if ($command eq 'L') {
611        my ($text, $type) = (parselink ($_))[1,4];
612        return '' unless $text;
613        my ($file, $line) = $seq->file_line;
614        $text = $self->parse ($text, $line);
615        $text = '<' . $text . '>' if $type eq 'url';
616        return [ $text ];
617    }
618
619    # Whitespace protection replaces whitespace with "\ ".
620    if ($command eq 'S') {
621        s/\s+/\\ /g;
622        return [ $_ ];
623    }
624
625    # Add an index entry to the list of ones waiting to be output.
626    if ($command eq 'X') {
627        push (@{ $$self{INDEX} }, $_);
628        return '';
629    }
630
631    # Anything else is unknown.
632    my ($file, $line) = $seq->file_line;
633    warn "$file:$line: Unknown formatting code $command<$_>\n";
634}
635
636
637##############################################################################
638# Command paragraphs
639##############################################################################
640
641# All command paragraphs take the paragraph and the line number.
642
643# First level heading.  We can't output .IX in the NAME section due to a bug
644# in some versions of catman, so don't output a .IX for that section.  .SH
645# already uses small caps, so remove \s1 and \s-1.  Maintain IN_NAME as
646# appropriate, but don't leave it set while calling parse() so as to not
647# override guesswork on section headings after NAME.
648sub cmd_head1 {
649    my $self = shift;
650    $$self{IN_NAME} = 0;
651    local $_ = $self->parse (@_);
652    s/\s+$//;
653    s/\\s-?\d//g;
654    s/\s*\n\s*/ /g;
655    if ($$self{ITEMS} > 1) {
656        $$self{ITEMS} = 0;
657        $self->output (".PD\n");
658    }
659    $self->output ($self->switchquotes ('.SH', $self->mapfonts ($_)));
660    $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
661    $$self{NEEDSPACE} = 0;
662    $$self{IN_NAME} = ($_ eq 'NAME');
663}
664
665# Second level heading.
666sub cmd_head2 {
667    my $self = shift;
668    local $_ = $self->parse (@_);
669    s/\s+$//;
670    s/\s*\n\s*/ /g;
671    if ($$self{ITEMS} > 1) {
672        $$self{ITEMS} = 0;
673        $self->output (".PD\n");
674    }
675    $self->output ($self->switchquotes ('.Sh', $self->mapfonts ($_)));
676    $self->outindex ('Subsection', $_);
677    $$self{NEEDSPACE} = 0;
678}
679
680# Third level heading.
681sub cmd_head3 {
682    my $self = shift;
683    local $_ = $self->parse (@_);
684    s/\s+$//;
685    s/\s*\n\s*/ /g;
686    if ($$self{ITEMS} > 1) {
687        $$self{ITEMS} = 0;
688        $self->output (".PD\n");
689    }
690    $self->makespace;
691    $self->output ($self->textmapfonts ('\f(IS' . $_ . '\f(IE') . "\n");
692    $self->outindex ('Subsection', $_);
693    $$self{NEEDSPACE} = 1;
694}
695
696# Fourth level heading.
697sub cmd_head4 {
698    my $self = shift;
699    local $_ = $self->parse (@_);
700    s/\s+$//;
701    s/\s*\n\s*/ /g;
702    if ($$self{ITEMS} > 1) {
703        $$self{ITEMS} = 0;
704        $self->output (".PD\n");
705    }
706    $self->makespace;
707    $self->output ($self->textmapfonts ($_) . "\n");
708    $self->outindex ('Subsection', $_);
709    $$self{NEEDSPACE} = 1;
710}
711
712# Start a list.  For indents after the first, wrap the outside indent in .RS
713# so that hanging paragraph tags will be correct.
714sub cmd_over {
715    my $self = shift;
716    local $_ = shift;
717    unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
718    if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
719        $self->output (".RS $$self{INDENT}\n");
720        push (@{ $$self{SHIFTS} }, $$self{INDENT});
721    }
722    push (@{ $$self{INDENTS} }, $$self{INDENT});
723    push (@{ $$self{ITEMTYPES} }, 'unknown');
724    $$self{INDENT} = ($_ + 0);
725    $$self{SHIFTWAIT} = 1;
726}
727
728# End a list.  If we've closed an embedded indent, we've mangled the hanging
729# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
730# We'll close that .RS at the next =back or =item.
731sub cmd_back {
732    my $self = shift;
733    $$self{INDENT} = pop @{ $$self{INDENTS} };
734    if (defined $$self{INDENT}) {
735        pop @{ $$self{ITEMTYPES} };
736    } else {
737        my ($file, $line, $paragraph) = @_;
738        ($file, $line) = $paragraph->file_line;
739        warn "$file:$line: Unmatched =back\n";
740        $$self{INDENT} = 0;
741    }
742    if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
743        $self->output (".RE\n");
744        pop @{ $$self{SHIFTS} };
745    }
746    if (@{ $$self{INDENTS} } > 0) {
747        $self->output (".RE\n");
748        $self->output (".RS $$self{INDENT}\n");
749    }
750    $$self{NEEDSPACE} = 1;
751    $$self{SHIFTWAIT} = 0;
752}
753
754# An individual list item.  Emit an index entry for anything that's
755# interesting, but don't emit index entries for things like bullets and
756# numbers.  rofficate bullets too while we're at it (so for nice output, use *
757# for your lists rather than o or . or - or some other thing).  Newlines in an
758# item title are turned into spaces since *roff can't handle them embedded.
759sub cmd_item {
760    my $self = shift;
761    local $_ = $self->parse (@_);
762    s/\s+$//;
763    s/\s*\n\s*/ /g;
764    my $index;
765    if (/\w/ && !/^\w[.\)]\s*$/) {
766        $index = $_;
767        $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//;
768    }
769    $_ = '*' unless length ($_) > 0;
770    my $type = $$self{ITEMTYPES}[0];
771    unless (defined $type) {
772        my ($file, $line, $paragraph) = @_;
773        ($file, $line) = $paragraph->file_line;
774        $type = 'unknown';
775    }
776    if ($type eq 'unknown') {
777        $type = /^\*\s*\Z/ ? 'bullet' : 'text';
778        $$self{ITEMTYPES}[0] = $type if $$self{ITEMTYPES}[0];
779    }
780    s/^\*\s*\Z/\\\(bu/ if $type eq 'bullet';
781    if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
782        $self->output (".RE\n");
783        pop @{ $$self{SHIFTS} };
784    }
785    $_ = $self->textmapfonts ($_);
786    $self->output (".PD 0\n") if ($$self{ITEMS} == 1);
787    $self->output ($self->switchquotes ('.IP', $_, $$self{INDENT}));
788    $self->outindex ($index ? ('Item', $index) : ());
789    $$self{NEEDSPACE} = 0;
790    $$self{ITEMS}++;
791    $$self{SHIFTWAIT} = 0;
792}
793
794# Begin a block for a particular translator.  Setting VERBATIM triggers
795# special handling in textblock().
796sub cmd_begin {
797    my $self = shift;
798    local $_ = shift;
799    my ($kind) = /^(\S+)/ or return;
800    if ($kind eq 'man' || $kind eq 'roff') {
801        $$self{VERBATIM} = 1;
802    } else {
803        $$self{EXCLUDE} = 1;
804    }
805}
806
807# End a block for a particular translator.  We assume that all =begin/=end
808# pairs are properly closed.
809sub cmd_end {
810    my $self = shift;
811    $$self{EXCLUDE} = 0;
812    $$self{VERBATIM} = 0;
813}
814
815# One paragraph for a particular translator.  Ignore it unless it's intended
816# for man or roff, in which case we output it verbatim.
817sub cmd_for {
818    my $self = shift;
819    local $_ = shift;
820    return unless s/^(?:man|roff)\b[ \t]*\n?//;
821    $self->output ($_);
822}
823
824
825##############################################################################
826# Escaping and fontification
827##############################################################################
828
829# At this point, we'll have embedded font codes of the form \f(<font>[SE]
830# where <font> is one of B, I, or F.  Turn those into the right font start or
831# end codes.  The old pod2man didn't get B<someI<thing> else> right; after I<>
832# it switched back to normal text rather than bold.  We take care of this by
833# using variables as a combined pointer to our current font sequence, and set
834# each to the number of current nestings of start tags for that font.  Use
835# them as a vector to look up what font sequence to use.
836#
837# \fP changes to the previous font, but only one previous font is kept.  We
838# don't know what the outside level font is; normally it's R, but if we're
839# inside a heading it could be something else.  So arrange things so that the
840# outside font is always the "previous" font and end with \fP instead of \fR.
841# Idea from Zack Weinberg.
842sub mapfonts {
843    my $self = shift;
844    local $_ = shift;
845
846    my ($fixed, $bold, $italic) = (0, 0, 0);
847    my %magic = (F => \$fixed, B => \$bold, I => \$italic);
848    my $last = '\fR';
849    s { \\f\((.)(.) } {
850        my $sequence = '';
851        my $f;
852        if ($last ne '\fR') { $sequence = '\fP' }
853        ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
854        $f = $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
855        if ($f eq $last) {
856            '';
857        } else {
858            if ($f ne '\fR') { $sequence .= $f }
859            $last = $f;
860            $sequence;
861        }
862    }gxe;
863    $_;
864}
865
866# Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU
867# groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather
868# than R, presumably because \f(CW doesn't actually do a font change.  To work
869# around this, use a separate textmapfonts for text blocks where the default
870# font is always R and only use the smart mapfonts for headings.
871sub textmapfonts {
872    my $self = shift;
873    local $_ = shift;
874
875    my ($fixed, $bold, $italic) = (0, 0, 0);
876    my %magic = (F => \$fixed, B => \$bold, I => \$italic);
877    s { \\f\((.)(.) } {
878        ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
879        $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
880    }gxe;
881    $_;
882}
883
884
885##############################################################################
886# *roff-specific parsing and magic
887##############################################################################
888
889# Called instead of parse_text, calls parse_text with the right flags.
890sub parse {
891    my $self = shift;
892    $self->parse_text ({ -expand_seq   => 'sequence',
893                         -expand_ptree => 'collapse' }, @_);
894}
895
896# Takes a parse tree, a flag saying whether or not to treat it as literal text
897# (not call guesswork on it), and a flag saying whether or not to clean some
898# things up for *roff, and returns the concatenation of all of the text
899# strings in that parse tree.  If the literal flag isn't true, guesswork()
900# will be called on all plain scalars in the parse tree.  Otherwise, if
901# collapse is being called on a C<> code, $cleanup should be set to true and
902# some additional cleanup will be done.  Assumes that everything in the parse
903# tree is either a scalar or a reference to a scalar.
904sub collapse {
905    my ($self, $ptree, $literal, $cleanup) = @_;
906
907    # If we're processing the NAME section, don't do normal guesswork.  This
908    # is because NAME lines are often extracted by utilities like catman that
909    # require plain text and don't understand *roff markup.  We still need to
910    # escape backslashes and hyphens for *roff (and catman expects \- instead
911    # of -).
912    if ($$self{IN_NAME}) {
913        $literal = 1;
914        $cleanup = 1;
915    }
916
917    # Do the collapse of the parse tree as described above.
918    return join ('', map {
919        if (ref $_) {
920            join ('', @$_);
921        } elsif ($literal) {
922            if ($cleanup) {
923                s/\\/\\e/g;
924                s/-/\\-/g;
925                s/__/_\\|_/g;
926            }
927            $_;
928        } else {
929            $self->guesswork ($_);
930        }
931    } $ptree->children);
932}
933
934# Takes a text block to perform guesswork on; this is guaranteed not to
935# contain any formatting codes.  Returns the text block with remapping done.
936sub guesswork {
937    my $self = shift;
938    local $_ = shift;
939
940    # rofficate backslashes.
941    s/\\/\\e/g;
942
943    # Ensure double underbars have a tiny space between them.
944    s/__/_\\|_/g;
945
946    # Leave hyphens only if they're part of regular words and there is only
947    # one dash at a time.  Leave a dash after the first character as a regular
948    # non-breaking dash, but don't let it mark the rest of the word invalid
949    # for hyphenation.
950    s/-/\\-/g;
951    s{
952      ( (?:\G|^|\s) [a-zA-Z] ) ( \\- )?
953      ( (?: [a-zA-Z]+ \\-)+ )
954      ( [a-zA-Z]+ ) (?=\s|\Z)
955      \b
956     } {
957         my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
958         $hyphen ||= '';
959         $main =~ s/\\-/-/g;
960         $prefix . $hyphen . $main . $suffix;
961    }egx;
962
963    # Translate -- into a real em dash if it's used like one.
964    s{ (\s) \\-\\- (\s) }                         { $1 . '\*(--' . $2 }egx;
965    s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx;
966
967    # Make all caps a little smaller.  Be careful here, since we don't want to
968    # make @ARGV into small caps, nor do we want to fix the MIME in
969    # MIME-Version, since it looks weird with the full-height V.
970    s{
971        ( ^ | [\s\(\"\'\`\[\{<>] )
972        ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* )
973        (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | $ )
974    } { $1 . '\s-1' . $2 . '\s0' }egx;
975
976    # Italize functions in the form func().
977    s{
978        ( \b | \\s-1 )
979        (
980            [A-Za-z_] ([:\w]|\\s-?[01])+ \(\)
981        )
982    } { $1 . '\f(IS' . $2 . '\f(IE' }egx;
983
984    # func(n) is a reference to a manual page.  Make it \fIfunc\fR\|(n).
985    s{
986        ( \b | \\s-1 )
987        ( [A-Za-z_] (?:[.:\w]|\\-|\\s-?[01])+ )
988        (
989            \( \d [a-z]* \)
990        )
991    } { $1 . '\f(IS' . $2 . '\f(IE\|' . $3 }egx;
992
993    # Convert simple Perl variable references to a fixed-width font.
994    s{
995        ( \s+ )
996        ( [\$\@%] [\w:]+ )
997        (?! \( )
998    } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
999
1000    # Fix up double quotes.
1001    s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
1002
1003    # Make C++ into \*(C+, which is a squinched version.
1004    s{ \b C\+\+ } {\\*\(C+}gx;
1005
1006    # All done.
1007    $_;
1008}
1009
1010# Handles C<> text, deciding whether to put \*C` around it or not.  This is a
1011# whole bunch of messy heuristics to try to avoid overquoting, originally from
1012# Barrie Slaymaker.  This largely duplicates similar code in Pod::Text.
1013sub quote_literal {
1014    my $self = shift;
1015    local $_ = shift;
1016
1017    # A regex that matches the portion of a variable reference that's the
1018    # array or hash index, separated out just because we want to use it in
1019    # several places in the following regex.
1020    my $index = '(?: \[.*\] | \{.*\} )?';
1021
1022    # Check for things that we don't want to quote, and if we find any of
1023    # them, return the string with just a font change and no quoting.
1024    m{
1025      ^\s*
1026      (?:
1027         ( [\'\`\"] ) .* \1                             # already quoted
1028       | \` .* \'                                       # `quoted'
1029       | \$+ [\#^]? \S $index                           # special ($^Foo, $")
1030       | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
1031       | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
1032       | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
1033       | 0x [a-fA-F\d]+                                 # a hex constant
1034      )
1035      \s*\z
1036     }xo && return '\f(FS' . $_ . '\f(FE';
1037
1038    # If we didn't return, go ahead and quote the text.
1039    return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
1040}
1041
1042
1043##############################################################################
1044# Output formatting
1045##############################################################################
1046
1047# Make vertical whitespace.
1048sub makespace {
1049    my $self = shift;
1050    $self->output (".PD\n") if ($$self{ITEMS} > 1);
1051    $$self{ITEMS} = 0;
1052    $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
1053        if $$self{NEEDSPACE};
1054}
1055
1056# Output any pending index entries, and optionally an index entry given as an
1057# argument.  Support multiple index entries in X<> separated by slashes, and
1058# strip special escapes from index entries.
1059sub outindex {
1060    my ($self, $section, $index) = @_;
1061    my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
1062    return unless ($section || @entries);
1063    $$self{INDEX} = [];
1064    my @output;
1065    if (@entries) {
1066        push (@output, [ 'Xref', join (' ', @entries) ]);
1067    }
1068    if ($section) {
1069        $index =~ s/\\-/-/g;
1070        $index =~ s/\\(?:s-?\d|.\(..|.)//g;
1071        push (@output, [ $section, $index ]);
1072    }
1073    for (@output) {
1074        my ($type, $entry) = @$_;
1075        $entry =~ s/\"/\"\"/g;
1076        $self->output (".IX $type " . '"' . $entry . '"' . "\n");
1077    }
1078}
1079
1080# Output text to the output device.
1081sub output { print { $_[0]->output_handle } $_[1] }
1082
1083# Given a command and a single argument that may or may not contain double
1084# quotes, handle double-quote formatting for it.  If there are no double
1085# quotes, just return the command followed by the argument in double quotes.
1086# If there are double quotes, use an if statement to test for nroff, and for
1087# nroff output the command followed by the argument in double quotes with
1088# embedded double quotes doubled.  For other formatters, remap paired double
1089# quotes to LQUOTE and RQUOTE.
1090sub switchquotes {
1091    my $self = shift;
1092    my $command = shift;
1093    local $_ = shift;
1094    my $extra = shift;
1095    s/\\\*\([LR]\"/\"/g;
1096
1097    # We also have to deal with \*C` and \*C', which are used to add the
1098    # quotes around C<> text, since they may expand to " and if they do this
1099    # confuses the .SH macros and the like no end.  Expand them ourselves.
1100    # Also separate troff from nroff if there are any fixed-width fonts in use
1101    # to work around problems with Solaris nroff.
1102    my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/);
1103    my $fixedpat = join ('|', @{ $$self{FONTS} }{'100', '101', '110', '111'});
1104    $fixedpat =~ s/\\/\\\\/g;
1105    $fixedpat =~ s/\(/\\\(/g;
1106    if (/\"/ || /$fixedpat/) {
1107        s/\"/\"\"/g;
1108        my $nroff = $_;
1109        my $troff = $_;
1110        $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
1111        if ($c_is_quote && /\\\*\(C[\'\`]/) {
1112            $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g;
1113            $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g;
1114            $troff =~ s/\\\*\(C[\'\`]//g;
1115        }
1116        $nroff = qq("$nroff") . ($extra ? " $extra" : '');
1117        $troff = qq("$troff") . ($extra ? " $extra" : '');
1118
1119        # Work around the Solaris nroff bug where \f(CW\fP leaves the font set
1120        # to Roman rather than the actual previous font when used in headings.
1121        # troff output may still be broken, but at least we can fix nroff by
1122        # just switching the font changes to the non-fixed versions.
1123        $nroff =~ s/\Q$$self{FONTS}{100}\E(.*)\\f[PR]/$1/g;
1124        $nroff =~ s/\Q$$self{FONTS}{101}\E(.*)\\f([PR])/\\fI$1\\f$2/g;
1125        $nroff =~ s/\Q$$self{FONTS}{110}\E(.*)\\f([PR])/\\fB$1\\f$2/g;
1126        $nroff =~ s/\Q$$self{FONTS}{111}\E(.*)\\f([PR])/\\f\(BI$1\\f$2/g;
1127
1128        # Now finally output the command.  Only bother with .ie if the nroff
1129        # and troff output isn't the same.
1130        if ($nroff ne $troff) {
1131            return ".ie n $command $nroff\n.el $command $troff\n";
1132        } else {
1133            return "$command $nroff\n";
1134        }
1135    } else {
1136        $_ = qq("$_") . ($extra ? " $extra" : '');
1137        return "$command $_\n";
1138    }
1139}
1140
1141##############################################################################
1142# Module return value and documentation
1143##############################################################################
1144
11451;
1146__END__
1147
1148=head1 NAME
1149
1150Pod::Man - Convert POD data to formatted *roff input
1151
1152=head1 SYNOPSIS
1153
1154    use Pod::Man;
1155    my $parser = Pod::Man->new (release => $VERSION, section => 8);
1156
1157    # Read POD from STDIN and write to STDOUT.
1158    $parser->parse_from_filehandle;
1159
1160    # Read POD from file.pod and write to file.1.
1161    $parser->parse_from_file ('file.pod', 'file.1');
1162
1163=head1 DESCRIPTION
1164
1165Pod::Man is a module to convert documentation in the POD format (the
1166preferred language for documenting Perl) into *roff input using the man
1167macro set.  The resulting *roff code is suitable for display on a terminal
1168using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
1169It is conventionally invoked using the driver script B<pod2man>, but it can
1170also be used directly.
1171
1172As a derived class from Pod::Parser, Pod::Man supports the same methods and
1173interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
1174new parser with C<< Pod::Man->new() >> and then calls either
1175parse_from_filehandle() or parse_from_file().
1176
1177new() can take options, in the form of key/value pairs that control the
1178behavior of the parser.  See below for details.
1179
1180If no options are given, Pod::Man uses the name of the input file with any
1181trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1182section 1 unless the file ended in C<.pm> in which case it defaults to
1183section 3, to a centered title of "User Contributed Perl Documentation", to
1184a centered footer of the Perl version it is run with, and to a left-hand
1185footer of the modification date of its input (or the current date if given
1186STDIN for input).
1187
1188Pod::Man assumes that your *roff formatters have a fixed-width font named
1189CW.  If yours is called something else (like CR), use the C<fixed> option to
1190specify it.  This generally only matters for troff output for printing.
1191Similarly, you can set the fonts used for bold, italic, and bold italic
1192fixed-width output.
1193
1194Besides the obvious pod conversions, Pod::Man also takes care of formatting
1195func(), func(3), and simple variable references like $foo or @bar so you
1196don't have to use code escapes for them; complex expressions like
1197C<$fred{'stuff'}> will still need to be escaped, though.  It also translates
1198dashes that aren't used as hyphens into en dashes, makes long dashes--like
1199this--into proper em dashes, fixes "paired quotes," makes C++ look right,
1200puts a little space between double underbars, makes ALLCAPS a teeny bit
1201smaller in B<troff>, and escapes stuff that *roff treats as special so that
1202you don't have to.
1203
1204The recognized options to new() are as follows.  All options take a single
1205argument.
1206
1207=over 4
1208
1209=item center
1210
1211Sets the centered page header to use instead of "User Contributed Perl
1212Documentation".
1213
1214=item date
1215
1216Sets the left-hand footer.  By default, the modification date of the input
1217file will be used, or the current date if stat() can't find that file (the
1218case if the input is from STDIN), and the date will be formatted as
1219YYYY-MM-DD.
1220
1221=item fixed
1222
1223The fixed-width font to use for vertabim text and code.  Defaults to CW.
1224Some systems may want CR instead.  Only matters for B<troff> output.
1225
1226=item fixedbold
1227
1228Bold version of the fixed-width font.  Defaults to CB.  Only matters for
1229B<troff> output.
1230
1231=item fixeditalic
1232
1233Italic version of the fixed-width font (actually, something of a misnomer,
1234since most fixed-width fonts only have an oblique version, not an italic
1235version).  Defaults to CI.  Only matters for B<troff> output.
1236
1237=item fixedbolditalic
1238
1239Bold italic (probably actually oblique) version of the fixed-width font.
1240Pod::Man doesn't assume you have this, and defaults to CB.  Some systems
1241(such as Solaris) have this font available as CX.  Only matters for B<troff>
1242output.
1243
1244=item name
1245
1246Set the name of the manual page.  Without this option, the manual name is
1247set to the uppercased base name of the file being converted unless the
1248manual section is 3, in which case the path is parsed to see if it is a Perl
1249module path.  If it is, a path like C<.../lib/Pod/Man.pm> is converted into
1250a name like C<Pod::Man>.  This option, if given, overrides any automatic
1251determination of the name.
1252
1253=item quotes
1254
1255Sets the quote marks used to surround CE<lt>> text.  If the value is a
1256single character, it is used as both the left and right quote; if it is two
1257characters, the first character is used as the left quote and the second as
1258the right quoted; and if it is four characters, the first two are used as
1259the left quote and the second two as the right quote.
1260
1261This may also be set to the special value C<none>, in which case no quote
1262marks are added around CE<lt>> text (but the font is still changed for troff
1263output).
1264
1265=item release
1266
1267Set the centered footer.  By default, this is the version of Perl you run
1268Pod::Man under.  Note that some system an macro sets assume that the
1269centered footer will be a modification date and will prepend something like
1270"Last modified: "; if this is the case, you may want to set C<release> to
1271the last modified date and C<date> to the version number.
1272
1273=item section
1274
1275Set the section for the C<.TH> macro.  The standard section numbering
1276convention is to use 1 for user commands, 2 for system calls, 3 for
1277functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1278miscellaneous information, and 8 for administrator commands.  There is a lot
1279of variation here, however; some systems (like Solaris) use 4 for file
1280formats, 5 for miscellaneous information, and 7 for devices.  Still others
1281use 1m instead of 8, or some mix of both.  About the only section numbers
1282that are reliably consistent are 1, 2, and 3.
1283
1284By default, section 1 will be used unless the file ends in .pm in which case
1285section 3 will be selected.
1286
1287=back
1288
1289The standard Pod::Parser method parse_from_filehandle() takes up to two
1290arguments, the first being the file handle to read POD from and the second
1291being the file handle to write the formatted output to.  The first defaults
1292to STDIN if not given, and the second defaults to STDOUT.  The method
1293parse_from_file() is almost identical, except that its two arguments are the
1294input and output disk files instead.  See L<Pod::Parser> for the specific
1295details.
1296
1297=head1 DIAGNOSTICS
1298
1299=over 4
1300
1301=item roff font should be 1 or 2 chars, not "%s"
1302
1303(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1304wasn't either one or two characters.  Pod::Man doesn't support *roff fonts
1305longer than two characters, although some *roff extensions do (the canonical
1306versions of B<nroff> and B<troff> don't either).
1307
1308=item Invalid link %s
1309
1310(W) The POD source contained a C<LE<lt>E<gt>> formatting code that
1311Pod::Man was unable to parse.  You should never see this error message; it
1312probably indicates a bug in Pod::Man.
1313
1314=item Invalid quote specification "%s"
1315
1316(F) The quote specification given (the quotes option to the constructor) was
1317invalid.  A quote specification must be one, two, or four characters long.
1318
1319=item %s:%d: Unknown command paragraph "%s".
1320
1321(W) The POD source contained a non-standard command paragraph (something of
1322the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
1323
1324=item %s:%d: Unknown escape EE<lt>%sE<gt>
1325
1326(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1327know about.  C<EE<lt>%sE<gt>> was printed verbatim in the output.
1328
1329=item %s:%d: Unknown formatting code %s
1330
1331(W) The POD source contained a non-standard formatting code (something of
1332the form C<XE<lt>E<gt>>) that Pod::Man didn't know about.  It was ignored.
1333
1334=item %s:%d: Unmatched =back
1335
1336(W) Pod::Man encountered a C<=back> command that didn't correspond to an
1337C<=over> command.
1338
1339=back
1340
1341=head1 BUGS
1342
1343Eight-bit input data isn't handled at all well at present.  The correct
1344approach would be to map EE<lt>E<gt> escapes to the appropriate UTF-8
1345characters and then do a translation pass on the output according to the
1346user-specified output character set.  Unfortunately, we can't send eight-bit
1347data directly to the output unless the user says this is okay, since some
1348vendor *roff implementations can't handle eight-bit data.  If the *roff
1349implementation can, however, that's far superior to the current hacked
1350characters that only work under troff.
1351
1352There is currently no way to turn off the guesswork that tries to format
1353unmarked text appropriately, and sometimes it isn't wanted (particularly
1354when using POD to document something other than Perl).
1355
1356The NAME section should be recognized specially and index entries emitted
1357for everything in that section.  This would have to be deferred until the
1358next section, since extraneous things in NAME tends to confuse various man
1359page processors.
1360
1361Pod::Man doesn't handle font names longer than two characters.  Neither do
1362most B<troff> implementations, but GNU troff does as an extension.  It would
1363be nice to support as an option for those who want to use it.
1364
1365The preamble added to each output file is rather verbose, and most of it is
1366only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1367characters.  It would ideally be nice if all of those definitions were only
1368output if needed, perhaps on the fly as the characters are used.
1369
1370Pod::Man is excessively slow.
1371
1372=head1 CAVEATS
1373
1374The handling of hyphens and em dashes is somewhat fragile, and one may get
1375the wrong one under some circumstances.  This should only matter for
1376B<troff> output.
1377
1378When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1379necessarily get it right.
1380
1381=head1 SEE ALSO
1382
1383L<Pod::Parser>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>,
1384L<man(1)>, L<man(7)>
1385
1386Ossanna, Joseph F., and Brian W. Kernighan.  "Troff User's Manual,"
1387Computing Science Technical Report No. 54, AT&T Bell Laboratories.  This is
1388the best documentation of standard B<nroff> and B<troff>.  At the time of
1389this writing, it's available at
1390L<http://www.cs.bell-labs.com/cm/cs/cstr.html>.
1391
1392The man page documenting the man macro set may be L<man(5)> instead of
1393L<man(7)> on your system.  Also, please see L<pod2man(1)> for extensive
1394documentation on writing manual pages if you've not done it before and
1395aren't familiar with the conventions.
1396
1397The current version of this module is always available from its web site at
1398L<http://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
1399Perl core distribution as of 5.6.0.
1400
1401=head1 AUTHOR
1402
1403Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
1404B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>.
1405
1406=head1 COPYRIGHT AND LICENSE
1407
1408Copyright 1999, 2000, 2001, 2002, 2003 by Russ Allbery <rra@stanford.edu>.
1409
1410This program is free software; you may redistribute it and/or modify it
1411under the same terms as Perl itself.
1412
1413=cut
1414