1# ex:ts=8 sw=4:
2# $OpenBSD: Unformated.pm,v 1.3 2005/01/13 11:22:24 espie Exp $
3# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org>
4#
5# Permission to use, copy, modify, and distribute this software for any
6# purpose with or without fee is hereby granted, provided that the above
7# copyright notice and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17use strict;
18use warnings;
19package OpenBSD::Makewhatis::Unformated;
20
21# add_unformated_subject($lines, $toadd, $section, $filename, $toexpand):
22#
23#   build subject from list of $toadd lines, and add it to the list
24#   of current subjects as section $section
25#
26sub add_unformated_subject
27{
28    my ($subjects, $toadd, $section, $filename, $toexpand, $picky) = @_;
29
30    my $exp = sub {
31    	if (defined $toexpand->{$_[0]}) {
32		return $toexpand->{$_[0]};
33	} else {
34		print STDERR "$filename: can't expand $_[0]\n";
35		return "";
36	}
37    };
38
39    local $_ = join(' ', @$toadd);
40	# do interpolations
41    s/\\\*\((..)/&$exp($1)/ge;
42    s/\\\*\[(.*?)\]/&$exp($1)/ge;
43
44	# horizontal space adjustments
45    while (s/\\s[-+]?\d+//g)
46    	{}
47	# unbreakable spaces
48    s/\\\s+/ /g;
49    	# unbreakable em dashes
50    s/\\\|\\\(em\\\|/-/g;
51	# em dashes
52    s/\\\(em\s+/- /g;
53    	# em dashes in the middle of lines
54    s/\\\(em/-/g;
55    s/\\\*[LO]//g;
56    s/\\\(tm/(tm)/g;
57	# font changes
58    s/\\f[BIRP]//g;
59    s/\\f\(..//g;
60    	# fine space adjustments
61    while (s/\\[vh]\'.*?\'//g)
62    	{}
63    unless (s/\s+\\-\s+/ ($section) - / || s/\s*\\\-/ ($section) -/ ||
64    	s/\s-\s/ ($section) - /) {
65	print STDERR "Weird subject line in $filename:\n$_\n" if $picky;
66	    # Try guessing where the separation falls...
67	s/\s+\:\s+/ ($section) - / || s/\S+\s+/$& ($section) - / || s/\s*$/ ($section) - (empty subject)/;
68    }
69	# other dashes
70    s/\\-/-/g;
71	# escaped characters
72    s/\\\&(.)/$1/g;
73    s/\\\|/|/g;
74	# gremlins...
75    s/\\c//g;
76	# sequence of spaces
77    s/\s+$//;
78    s/^\s+//;
79    s/\s+/ /g;
80    	# some damage control
81    if (m/^\Q($section) - \E/) {
82    	print STDERR "Rejecting non-subject line from $filename:\n$_\n"
83	    if $picky;
84	return;
85    }
86    push(@$subjects, $_);
87}
88
89# $lines = handle($file, $filename, $picky)
90#
91#   handle an unformated manpage in $file
92#
93#   may return several subjects, perl(3p) do !
94#
95sub handle
96{
97    my ($f, $filename, $picky) = @_;
98    my @lines = ();
99    my %toexpand = ();
100    my $so_found = 0;
101    my $found_th = 0;
102    my $found_old = 0;
103    my $found_dt = 0;
104    my $found_new = 0;
105    # subject/keep is the only way to deal with Nm/Nd pairs
106    my @subject = ();
107    my @keep = ();
108    my $nd_seen = 0;
109    local $_;
110	# retrieve basename of file
111    my ($name, $section) = $filename =~ m|(?:.*/)?(.*)\.([\w\d]+)|;
112	# scan until macro
113    while (<$f>) {
114	next unless m/^\./ || $found_old || $found_new;
115	next if m/^\.\\\"/;
116	next if m/^\.if\s+t\s+/;
117	s/^\.if\s+n\s+//;
118	if (m/^\.\s*de/) {
119	    while (<$f>) {
120		last if m/^\.\s*\./;
121	    }
122	    next;
123	}
124	if (m/^\.\s*ds\s+(\S+)\s+/) {
125	    chomp($toexpand{$1} = $');
126	    next;
127	}
128	    # Some cross-refs just link to another manpage
129	$so_found = 1 if m/^\.\s*so/;
130	if (m/^\.\s*TH/ || m/^\.\s*th/) {
131		# in pricky mode, we should try to match these
132	    # ($name2, $section2) = m/^\.(?:TH|th)\s+(\S+)\s+(\S+)/;
133	    	# scan until first section
134	    $found_th = 1;
135	    next;
136	}
137	if ($found_th && !$found_old && (m/^\.\s*SH/ || m/^\.\s*sh/)) {
138		$found_old = 1;
139		next;
140	}
141	if (m/^\.\s*Dt/) {
142	    $section .= "/$1" if (m/^\.\s*Dt\s+\S+\s+\d\S*\s+(\S+)/);
143	    $found_dt = 1;
144	    next;
145    	}
146	if ($found_dt && !$found_new && m/^\.\s*Sh/) {
147		$found_new = 1;
148		next;
149	}
150	if ($found_old) {
151		last if m/^\.\s*(?:SH|sh|SS|ss|nf|LI)/;
152		    # several subjects in one manpage
153		if (m/^\.\s*(?:PP|Pp|br|PD|LP|sp)/) {
154		    add_unformated_subject(\@lines, \@subject,
155			$section, $filename, \%toexpand, $picky)
156			    if @subject != 0;
157		    @subject = ();
158		    next;
159		}
160		next if m/^\'/ || m/^\.\s*tr\s+/ || m/^\.\s*\\\"/ ||
161		    m/^\.\s*sv/ || m/^\.\s*Vb\s+/ || m/\.\s*HP\s+/;
162		# Motif index entries, don't do anything for now.
163		next if m/^\.\s*iX/;
164		# Some other index (cook)
165		next if m/^\.\s*XX/;
166		chomp;
167		s/\.\s*(?:B|I|IR|SM|BR)\s+//;
168		if (m/^\.\s*(\S\S)/) {
169		    print STDERR "$filename: not grokking $_\n"
170			if $picky;
171		    next;
172		}
173		push(@subject, $_) unless m/^\s*$/;
174		next;
175	}
176	if ($found_new) {
177		last if m/^\.\s*Sh/;
178		s/\s,/,/g;
179		if (s/^\.\s*(\S\S)\s+//) {
180		    my $macro = $1;
181		    next if $macro eq "\\\"";
182		    s/\"(.*?)\"/$1/g;
183		    s/\\-/-/g;
184		    $macro eq 'Xr' and s/^(\S+)\s+(\d\S*)/$1 ($2)/;
185		    $macro eq 'Ox' and s/^/OpenBSD /;
186		    $macro eq 'Nx' and s/^/NetBSD /;
187		    if ($macro eq 'Nd') {
188			if (@keep != 0) {
189			    add_unformated_subject(\@lines, \@keep,
190				$section, $filename, \%toexpand, $picky);
191			    @keep = ();
192			}
193			push(@subject, "\\-");
194			$nd_seen = 1;
195		    }
196		    if ($nd_seen && $macro eq 'Nm') {
197			@keep = @subject;
198			@subject = ();
199			$nd_seen = 0;
200		    }
201		}
202		push(@subject, $_) unless m/^\s*$/;
203	}
204    }
205    if ($found_th && !$found_old) {
206	    print STDERR "Couldn't find subject in old manpage $filename\n";
207    }
208    if ($found_dt && !$found_new) {
209	    print STDERR "Couldn't find subject in new manpage $filename\n";
210    }
211    unshift(@subject, @keep) if @keep != 0;
212    add_unformated_subject(\@lines, \@subject, $section,
213	$filename, \%toexpand, $picky) if @subject != 0;
214    if (!$so_found && !$found_old && !$found_new) {
215	print STDERR "Unknown manpage type $filename\n";
216    }
217    return \@lines;
218}
219
2201;
221