1# ex:ts=8 sw=4:
2# $OpenBSD: Formated.pm,v 1.3 2005/03/05 11:02:35 espie Exp $
3# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org>
4#
5# Permission to use, copy, modify, and distribute this software for any
6# purpose with or without fee is hereby granted, provided that the above
7# copyright notice and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17use strict;
18use warnings;
19package OpenBSD::Makewhatis::Formated;
20
21# add_formated_subject($subjects, $_, $section, $filename, $picky):
22#   add subject $_ to the list of current $subjects, in section $section.
23#
24sub add_formated_subject
25{
26    my ($subjects, $line, $section, $filename, $picky) = @_;
27    local $_ = $line;
28
29    if (m/-/) {
30	s/([-+.\w\d,])\s+/$1 /g;
31	s/([a-z][A-z])-\s+/$1/g;
32	# some twits use: func -- description
33	if (m/^[^-+.\w\d]*(.*?) -(?:-?)\s+(.*)/) {
34	    my ($func, $descr) = ($1, $2);
35	    $func =~ s/,\s*$//;
36	    # nroff will tend to cut function names at the weirdest places
37	    if (length($func) > 40 && $func =~ m/,/ && $section =~ /^3/) {
38	    	$func =~ s/\b \b//g;
39	    }
40	    $_ = "$func ($section) - $descr";
41	    push(@$subjects, $_);
42	    return;
43	}
44    }
45
46    print STDERR "Weird subject line in $filename:\n$_\n" if $picky;
47
48    # try to find subject in line anyway
49    if (m/^\s*(.*\S)(?:\s{3,}|\(\)\s+)(.*?)\s*$/) {
50    	my ($func, $descr) = ($1, $2);
51	$func =~ s/\s+/ /g;
52	$descr =~ s/\s+/ /g;
53	$_ = "$func ($section) - $descr";
54	push(@$subjects, $_);
55	return;
56    }
57
58    print STDERR "Weird subject line in $filename:\n$_\n" unless $picky;
59}
60
61# $lines = handle($file, $filename, $picky)
62#
63#   handle a formatted manpage in $file
64#
65#   may return several subjects, perl(3p) do !
66#
67sub handle
68{
69    my ($file, $filename, $picky) = @_;
70    local $_;
71    my ($section, $subject);
72    my @lines=();
73    my $foundname = 0;
74    while (<$file>) {
75	chomp;
76	if (m/^$/) {
77	    # perl aggregates several subjects in one manpage
78	    # so we don't stop after we've got one subject
79	    add_formated_subject(\@lines, $subject, $section, $filename, $picky)
80		if defined $subject;
81	    $subject = undef;
82	    next;
83	}
84	# Remove boldface from wide characters
85	while (s/(..)\cH\cH\1/$1/g)
86	    {}
87	# Remove boldface and underlining
88	while (s/_\cH//g || s/(.)\cH\1/$1/g)
89	    {}
90	if (!$foundname && m/\w[-+.\w\d]*\(([-+.\w\d\/]+)\)/) {
91	    $section = $1;
92	    # Find architecture
93	    if (m/Manual\s+\((.*?)\)/) {
94		$section = "$section/$1";
95	    }
96	}
97	# Not all man pages are in english
98	# weird hex is `Namae' in japanese
99	if (m/^(?:NAME|NAMES|NAMN|Name|\xbe|\xcc\xbe\xbe\xce|\xcc\xbe\xc1\xb0)\s*$/) {
100	    unless (defined $section) {
101		# try to retrieve section from filename
102		if ($filename =~ m/(?:cat|man)([\dln])\//) {
103		    $section = $1;
104		    print STDERR "Can't find section in $filename, deducting $section from context\n" if $picky;
105		} else {
106		    $section='??';
107		    print STDERR "Can't find section in $filename\n";
108		}
109	    }
110	    $foundname = 1;
111	    next;
112	}
113	if ($foundname) {
114	    if (m/^\S/ || m/^\s+\*{3,}\s*$/) {
115		add_formated_subject(\@lines, $subject, $section, $filename, $picky)
116		    if defined $subject;
117		last;
118	    } else {
119		# deal with troff hyphenations
120		if (defined $subject and $subject =~ m/\xad\s*$/) {
121		    $subject =~ s/(?:\xad\cH)*\xad\s*$//;
122		    s/^\s*//;
123		}
124		# more troff hyphenation
125		if (defined $subject and $subject =~ m/\S(?:\-\cH)*\-$/) {
126		    $subject =~ s/(?:\-\cH)*\-$//;
127		    s/^\s*//;
128		}
129		s/^\s+/ /;
130		$subject.=$_;
131	    }
132	}
133    }
134
135    print STDERR "Can't parse $filename (not a manpage ?)\n" if @lines == 0;
136    return \@lines;
137}
138
1391;
140