1# ex:ts=8 sw=4: 2# $OpenBSD: Formated.pm,v 1.3 2005/03/05 11:02:35 espie Exp $ 3# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> 4# 5# Permission to use, copy, modify, and distribute this software for any 6# purpose with or without fee is hereby granted, provided that the above 7# copyright notice and this permission notice appear in all copies. 8# 9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 17use strict; 18use warnings; 19package OpenBSD::Makewhatis::Formated; 20 21# add_formated_subject($subjects, $_, $section, $filename, $picky): 22# add subject $_ to the list of current $subjects, in section $section. 23# 24sub add_formated_subject 25{ 26 my ($subjects, $line, $section, $filename, $picky) = @_; 27 local $_ = $line; 28 29 if (m/-/) { 30 s/([-+.\w\d,])\s+/$1 /g; 31 s/([a-z][A-z])-\s+/$1/g; 32 # some twits use: func -- description 33 if (m/^[^-+.\w\d]*(.*?) -(?:-?)\s+(.*)/) { 34 my ($func, $descr) = ($1, $2); 35 $func =~ s/,\s*$//; 36 # nroff will tend to cut function names at the weirdest places 37 if (length($func) > 40 && $func =~ m/,/ && $section =~ /^3/) { 38 $func =~ s/\b \b//g; 39 } 40 $_ = "$func ($section) - $descr"; 41 push(@$subjects, $_); 42 return; 43 } 44 } 45 46 print STDERR "Weird subject line in $filename:\n$_\n" if $picky; 47 48 # try to find subject in line anyway 49 if (m/^\s*(.*\S)(?:\s{3,}|\(\)\s+)(.*?)\s*$/) { 50 my ($func, $descr) = ($1, $2); 51 $func =~ s/\s+/ /g; 52 $descr =~ s/\s+/ /g; 53 $_ = "$func ($section) - $descr"; 54 push(@$subjects, $_); 55 return; 56 } 57 58 print STDERR "Weird subject line in $filename:\n$_\n" unless $picky; 59} 60 61# $lines = handle($file, $filename, $picky) 62# 63# handle a formatted manpage in $file 64# 65# may return several subjects, perl(3p) do ! 66# 67sub handle 68{ 69 my ($file, $filename, $picky) = @_; 70 local $_; 71 my ($section, $subject); 72 my @lines=(); 73 my $foundname = 0; 74 while (<$file>) { 75 chomp; 76 if (m/^$/) { 77 # perl aggregates several subjects in one manpage 78 # so we don't stop after we've got one subject 79 add_formated_subject(\@lines, $subject, $section, $filename, $picky) 80 if defined $subject; 81 $subject = undef; 82 next; 83 } 84 # Remove boldface from wide characters 85 while (s/(..)\cH\cH\1/$1/g) 86 {} 87 # Remove boldface and underlining 88 while (s/_\cH//g || s/(.)\cH\1/$1/g) 89 {} 90 if (!$foundname && m/\w[-+.\w\d]*\(([-+.\w\d\/]+)\)/) { 91 $section = $1; 92 # Find architecture 93 if (m/Manual\s+\((.*?)\)/) { 94 $section = "$section/$1"; 95 } 96 } 97 # Not all man pages are in english 98 # weird hex is `Namae' in japanese 99 if (m/^(?:NAME|NAMES|NAMN|Name|\xbe|\xcc\xbe\xbe\xce|\xcc\xbe\xc1\xb0)\s*$/) { 100 unless (defined $section) { 101 # try to retrieve section from filename 102 if ($filename =~ m/(?:cat|man)([\dln])\//) { 103 $section = $1; 104 print STDERR "Can't find section in $filename, deducting $section from context\n" if $picky; 105 } else { 106 $section='??'; 107 print STDERR "Can't find section in $filename\n"; 108 } 109 } 110 $foundname = 1; 111 next; 112 } 113 if ($foundname) { 114 if (m/^\S/ || m/^\s+\*{3,}\s*$/) { 115 add_formated_subject(\@lines, $subject, $section, $filename, $picky) 116 if defined $subject; 117 last; 118 } else { 119 # deal with troff hyphenations 120 if (defined $subject and $subject =~ m/\xad\s*$/) { 121 $subject =~ s/(?:\xad\cH)*\xad\s*$//; 122 s/^\s*//; 123 } 124 # more troff hyphenation 125 if (defined $subject and $subject =~ m/\S(?:\-\cH)*\-$/) { 126 $subject =~ s/(?:\-\cH)*\-$//; 127 s/^\s*//; 128 } 129 s/^\s+/ /; 130 $subject.=$_; 131 } 132 } 133 } 134 135 print STDERR "Can't parse $filename (not a manpage ?)\n" if @lines == 0; 136 return \@lines; 137} 138 1391; 140