1# ex:ts=8 sw=4: 2# $OpenBSD: Unformated.pm,v 1.3 2005/01/13 11:22:24 espie Exp $ 3# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> 4# 5# Permission to use, copy, modify, and distribute this software for any 6# purpose with or without fee is hereby granted, provided that the above 7# copyright notice and this permission notice appear in all copies. 8# 9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 17use strict; 18use warnings; 19package OpenBSD::Makewhatis::Unformated; 20 21# add_unformated_subject($lines, $toadd, $section, $filename, $toexpand): 22# 23# build subject from list of $toadd lines, and add it to the list 24# of current subjects as section $section 25# 26sub add_unformated_subject 27{ 28 my ($subjects, $toadd, $section, $filename, $toexpand, $picky) = @_; 29 30 my $exp = sub { 31 if (defined $toexpand->{$_[0]}) { 32 return $toexpand->{$_[0]}; 33 } else { 34 print STDERR "$filename: can't expand $_[0]\n"; 35 return ""; 36 } 37 }; 38 39 local $_ = join(' ', @$toadd); 40 # do interpolations 41 s/\\\*\((..)/&$exp($1)/ge; 42 s/\\\*\[(.*?)\]/&$exp($1)/ge; 43 44 # horizontal space adjustments 45 while (s/\\s[-+]?\d+//g) 46 {} 47 # unbreakable spaces 48 s/\\\s+/ /g; 49 # unbreakable em dashes 50 s/\\\|\\\(em\\\|/-/g; 51 # em dashes 52 s/\\\(em\s+/- /g; 53 # em dashes in the middle of lines 54 s/\\\(em/-/g; 55 s/\\\*[LO]//g; 56 s/\\\(tm/(tm)/g; 57 # font changes 58 s/\\f[BIRP]//g; 59 s/\\f\(..//g; 60 # fine space adjustments 61 while (s/\\[vh]\'.*?\'//g) 62 {} 63 unless (s/\s+\\-\s+/ ($section) - / || s/\s*\\\-/ ($section) -/ || 64 s/\s-\s/ ($section) - /) { 65 print STDERR "Weird subject line in $filename:\n$_\n" if $picky; 66 # Try guessing where the separation falls... 67 s/\s+\:\s+/ ($section) - / || s/\S+\s+/$& ($section) - / || s/\s*$/ ($section) - (empty subject)/; 68 } 69 # other dashes 70 s/\\-/-/g; 71 # escaped characters 72 s/\\\&(.)/$1/g; 73 s/\\\|/|/g; 74 # gremlins... 75 s/\\c//g; 76 # sequence of spaces 77 s/\s+$//; 78 s/^\s+//; 79 s/\s+/ /g; 80 # some damage control 81 if (m/^\Q($section) - \E/) { 82 print STDERR "Rejecting non-subject line from $filename:\n$_\n" 83 if $picky; 84 return; 85 } 86 push(@$subjects, $_); 87} 88 89# $lines = handle($file, $filename, $picky) 90# 91# handle an unformated manpage in $file 92# 93# may return several subjects, perl(3p) do ! 94# 95sub handle 96{ 97 my ($f, $filename, $picky) = @_; 98 my @lines = (); 99 my %toexpand = (); 100 my $so_found = 0; 101 my $found_th = 0; 102 my $found_old = 0; 103 my $found_dt = 0; 104 my $found_new = 0; 105 # subject/keep is the only way to deal with Nm/Nd pairs 106 my @subject = (); 107 my @keep = (); 108 my $nd_seen = 0; 109 local $_; 110 # retrieve basename of file 111 my ($name, $section) = $filename =~ m|(?:.*/)?(.*)\.([\w\d]+)|; 112 # scan until macro 113 while (<$f>) { 114 next unless m/^\./ || $found_old || $found_new; 115 next if m/^\.\\\"/; 116 next if m/^\.if\s+t\s+/; 117 s/^\.if\s+n\s+//; 118 if (m/^\.\s*de/) { 119 while (<$f>) { 120 last if m/^\.\s*\./; 121 } 122 next; 123 } 124 if (m/^\.\s*ds\s+(\S+)\s+/) { 125 chomp($toexpand{$1} = $'); 126 next; 127 } 128 # Some cross-refs just link to another manpage 129 $so_found = 1 if m/^\.\s*so/; 130 if (m/^\.\s*TH/ || m/^\.\s*th/) { 131 # in pricky mode, we should try to match these 132 # ($name2, $section2) = m/^\.(?:TH|th)\s+(\S+)\s+(\S+)/; 133 # scan until first section 134 $found_th = 1; 135 next; 136 } 137 if ($found_th && !$found_old && (m/^\.\s*SH/ || m/^\.\s*sh/)) { 138 $found_old = 1; 139 next; 140 } 141 if (m/^\.\s*Dt/) { 142 $section .= "/$1" if (m/^\.\s*Dt\s+\S+\s+\d\S*\s+(\S+)/); 143 $found_dt = 1; 144 next; 145 } 146 if ($found_dt && !$found_new && m/^\.\s*Sh/) { 147 $found_new = 1; 148 next; 149 } 150 if ($found_old) { 151 last if m/^\.\s*(?:SH|sh|SS|ss|nf|LI)/; 152 # several subjects in one manpage 153 if (m/^\.\s*(?:PP|Pp|br|PD|LP|sp)/) { 154 add_unformated_subject(\@lines, \@subject, 155 $section, $filename, \%toexpand, $picky) 156 if @subject != 0; 157 @subject = (); 158 next; 159 } 160 next if m/^\'/ || m/^\.\s*tr\s+/ || m/^\.\s*\\\"/ || 161 m/^\.\s*sv/ || m/^\.\s*Vb\s+/ || m/\.\s*HP\s+/; 162 # Motif index entries, don't do anything for now. 163 next if m/^\.\s*iX/; 164 # Some other index (cook) 165 next if m/^\.\s*XX/; 166 chomp; 167 s/\.\s*(?:B|I|IR|SM|BR)\s+//; 168 if (m/^\.\s*(\S\S)/) { 169 print STDERR "$filename: not grokking $_\n" 170 if $picky; 171 next; 172 } 173 push(@subject, $_) unless m/^\s*$/; 174 next; 175 } 176 if ($found_new) { 177 last if m/^\.\s*Sh/; 178 s/\s,/,/g; 179 if (s/^\.\s*(\S\S)\s+//) { 180 my $macro = $1; 181 next if $macro eq "\\\""; 182 s/\"(.*?)\"/$1/g; 183 s/\\-/-/g; 184 $macro eq 'Xr' and s/^(\S+)\s+(\d\S*)/$1 ($2)/; 185 $macro eq 'Ox' and s/^/OpenBSD /; 186 $macro eq 'Nx' and s/^/NetBSD /; 187 if ($macro eq 'Nd') { 188 if (@keep != 0) { 189 add_unformated_subject(\@lines, \@keep, 190 $section, $filename, \%toexpand, $picky); 191 @keep = (); 192 } 193 push(@subject, "\\-"); 194 $nd_seen = 1; 195 } 196 if ($nd_seen && $macro eq 'Nm') { 197 @keep = @subject; 198 @subject = (); 199 $nd_seen = 0; 200 } 201 } 202 push(@subject, $_) unless m/^\s*$/; 203 } 204 } 205 if ($found_th && !$found_old) { 206 print STDERR "Couldn't find subject in old manpage $filename\n"; 207 } 208 if ($found_dt && !$found_new) { 209 print STDERR "Couldn't find subject in new manpage $filename\n"; 210 } 211 unshift(@subject, @keep) if @keep != 0; 212 add_unformated_subject(\@lines, \@subject, $section, 213 $filename, \%toexpand, $picky) if @subject != 0; 214 if (!$so_found && !$found_old && !$found_new) { 215 print STDERR "Unknown manpage type $filename\n"; 216 } 217 return \@lines; 218} 219 2201; 221