1# $MirOS: src/scripts/roff2htm,v 1.79 2014/02/10 00:36:11 tg Exp $
2# $ekkoBSD: catman2html.sh,v 1.2 2004/03/07 03:02:53 stephen Exp $
3#-
4# Copyright (c) 2004, 2005, 2006, 2007, 2011, 2012, 2014
5#	Thorsten “mirabilos” Glaser <tg@mirbsd.org>
6# Original version for ekkoBSD by:
7# Copyright (c) 2004
8#	Stephen Paskaluk <sap@mirbsd.org>
9# Parts of the regular expression set below are based upon work by:
10# Copyright (c) 1995
11#	Panagiotis J. Christias <christia@theseas.ntua.gr>
12#
13# Provided that these terms and disclaimer and all copyright notices
14# are retained or reproduced in an accompanying document, permission
15# is granted to deal in this work without restriction, including un-
16# limited rights to use, publicly perform, distribute, sell, modify,
17# merge, give away, or sublicence.
18#
19# Advertising materials mentioning features or use of this work must
20# display the following acknowledgement:
21#	This product includes material provided by Thorsten Glaser.
22#
23# This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
24# the utmost extent permitted by applicable law, neither express nor
25# implied; without malicious intent or gross negligence. In no event
26# may a licensor, author or contributor be held liable for indirect,
27# direct, other damage, loss, or other issues arising in any way out
28# of dealing in the work, even if advised of the possibility of such
29# damage or existence of a defect, except proven that it results out
30# of said person's immediate fault when using the work as intended.
31#-
32# Routines for converting catman pages and nrcon(1)d papers to HTML.
33# ATTENTION: this file contains embedded white-, backspace and high-
34#	     bit-on control characters! Use “jupp --asis $0” to edit
35# Note: this file contains magic and can’t be edited as UTF-8 either.
36# Note: this script assumes MirBSD filesystem interna: ino_t=uint32_t
37
38# check if mksh R31:2007/10/18 or up
39if [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R)@(3[2-9]|[4-9][0-9]|[1-9][0-9][0-9])\ +([0-9])/+([0-9])/+([0-9])?(\ *) ]]; then
40	i=0
41elif [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R31)* ]]; then
42	eval $(print "$KSH_VERSION" | sed 's#^.*R31 \([0-9]*\)/\([0-9]*\)/\([0-9]*\)\( .*\)*$#y=\1 m=\2 d=\3#')
43	(( i = y < 2007 ? 1 :
44	    y > 2007 ? 0 :
45	    m < 10 ? 1 :
46	    m > 10 ? 0 :
47	    d < 18 ? 1 : 0 ))
48	unset y m d
49else
50	i=1
51fi
52# we need an mksh version with uint32_t array indicēs
53if (( i )); then
54	print -u2 Error: your mksh is not recent enough.
55	print -u2 Please upgrade to at least mksh R32.
56	exit 1
57fi
58unset i
59
60# initialise globals
61roff2htm_gendate=$(date +"%F %T")		# current time
62set -A roff2htm_inodecache			# inode cache (empty)
63roff2htm_machine=$(uname -m)			# i386, sparc
64
65function set_conversion_man {
66	function do_convert {
67		do_convert_man "$@"
68	}
69}
70function set_conversion_paper {
71	function do_convert {
72		do_convert_paper "$@"
73	}
74}
75set_conversion_man
76
77function set_target_absolute {
78	roff2htm_rel=https://www.mirbsd.org/
79}
80function set_target_relative {
81	roff2htm_rel=../
82}
83set_target_relative
84
85function do_convert_man {
86	local -i _nl=0
87	col -x | sed							\
88	    -e '/-$/N
89{
90s/\([0-9A-z][-.,0-9A-z:]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z:]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\
91\2/
92}'									\
93	    -e 's#<_#≤#g' -e 's#>_#≥#g'				\
94	    -e 'y#&<>#���#'						\
95									\
96	    -e 's#[Oo][Oo]++#•#g'					\
97	    -e 's#_||#_|_|#g'					\
98	    -e 's#+_#±#g'						\
99									\
100	    -e 's#^[A-z][ 0-9A-z]*$#</pre><h2>&</h2><pre>#'		\
101	    -e 's#^  \([A-z][ -%'\''-;=?-~]*\)$#</pre><h3>\1</h3><pre>#'	\
102									\
103	    -e 's#\([^~<>�-�][�-�]*\)~#\1̅#g'				\
104	    -e 's#\([^]\)~_#\1_̅ #g'					\
105	    -e 's#\([^]\)\([^<>_�-�][�-�]*\)\([̅]*\)_#\1_\2\3#g'	\
106	    -e 's#\([^<>�-�]\)[�-�]*\([̅]*\)\([^<>�-�][�-�]*\)#<\1<\3\2>#g'	\
107	    -e 's#\(<_<\([^>_]*\)>\)\1#<G>\2</G>#g'			\
108	    -e 's#<_<\([^>_]*\)>#<i>\1</i>#g'				\
109	    -e 's#<.<\([^>]*\)>#<b>\1</b>#g'				\
110	    -e 's###g'							\
111									\
112	    -e '/<h[23]/s#</*[biG]>##g'					\
113	    -e 's#</\([biG]\)><\1>##g'					\
114	    -e 's#</\([biG]\)>\([- -*./:;?@^_~]*\)<\1>#\2#g'		\
115	    -e 's#\([- 0-9A-z]\)\([$/_-]*\)\(<[biG]>\)#\1\3\2#g'	\
116	    -e 's#\(</[biG]>\)\([)$/_-]*\)\([- 0-9A-z]\)#\2\1\3#g'	\
117									\
118	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(3p\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
119	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([1-9]\)\(/[/0-9A-Za-z]*\)*)#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
120	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([PSU][MS][DM]\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
121	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(PAPERS\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
122	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(GNU)#<a href="'$roff2htm_rel'manINFO/\1.html">&</a>#g' \
123	    -e 's#)\(</[biG]>\)\([- 0-9A-z]\)#\1)\2#g'			\
124									\
125	    -e 's/�/\&#38;/g'						\
126	    -e 's/�/\&#60;/g'						\
127	    -e 's/�/\&#62;/g'						\
128									\
129	    -e 's#<G>#<b><i>#g' -e 's#</G>#</i></b>#g'			\
130	    -e 's#</b><b>##g'						\
131									\
132	    -e '1s#^#<pre>#'						\
133	    -e '$s#$#</pre>#'						\
134	    -e 's#<pre></pre>##g'					\
135	    -e 's#</pre><pre>##g'					\
136	| while IFS= read -r line; do
137		if [[ -n $line ]]; then
138			(( _nl )) && [[ $line != '</pre>'* ]] && print
139			print -r -- "$line"
140			_nl=0
141		else
142			_nl=1
143		fi
144	done
145}
146
147function do_convert_paper {
148	local -i _nl=0
149	col -x | sed							\
150	    -e '/-$/N
151{
152s/\([0-9A-z][-.,0-9A-z:]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z:]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\
153\2/
154}'									\
155	    -e 's#<_#≤#g' -e 's#>_#≥#g'				\
156	    -e 'y#&<>#���#'						\
157									\
158	    -e 's#[Oo][Oo]++#•#g'					\
159	    -e 's#_||#_|_|#g'					\
160	    -e 's#+_#±#g'						\
161									\
162	    -e 's#\([^~<>�-�][�-�]*\)~#\1̅#g'				\
163	    -e 's#\([^]\)~_#\1_̅ #g'					\
164	    -e 's#\([^]\)\([^<>_�-�][�-�]*\)\([̅]*\)_#\1_\2\3#g'	\
165	    -e 's#\([^<>�-�]\)[�-�]*\([̅]*\)\([^<>�-�][�-�]*\)#<\1<\3\2>#g'	\
166	    -e 's#\(<_<\([^>_]*\)>\)\1#<G>\2</G>#g'			\
167	    -e 's#<_<\([^>_]*\)>#<i>\1</i>#g'				\
168	    -e 's#<.<\([^>]*\)>#<b>\1</b>#g'				\
169	    -e 's###g'							\
170									\
171	    -e 's#</\([biG]\)><\1>##g'					\
172	    -e 's#</\([biG]\)>\([- -*./:;?@^_~]*\)<\1>#\2#g'		\
173	    -e 's#\([- 0-9A-z]\)\([$/_-]*\)\(<[biG]>\)#\1\3\2#g'	\
174	    -e 's#\(</[biG]>\)\([)$/_-]*\)\([- 0-9A-z]\)#\2\1\3#g'	\
175									\
176	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([1-9]\)[/0-9A-Za-z]*)#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
177	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([PSU][MS][DM]\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
178	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(PAPERS\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
179	    -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(GNU)#<a href="'$roff2htm_rel'manINFO/\1.html">&</a>#g' \
180	    -e 's#)\(</[biG]>\)\([- 0-9A-z]\)#\1)\2#g'			\
181									\
182	    -e 's/�/\&#38;/g'						\
183	    -e 's/�/\&#60;/g'						\
184	    -e 's/�/\&#62;/g'						\
185									\
186	    -e 's#<G>#<b><i>#g' -e 's#</G>#</i></b>#g'			\
187	    -e 's#</b><b>##g'						\
188									\
189	    -e '1s#^#<pre>#'						\
190	    -e '$s#$#</pre>#'						\
191	    -e 's#<pre></pre>##g'					\
192	    -e 's#</pre><pre>##g'					\
193	| while IFS= read -r line; do
194		if [[ -n $line ]]; then
195			(( _nl )) && [[ $line != '</pre>'* ]] && print
196			print -r -- "$line"
197			_nl=0
198		else
199			_nl=1
200		fi
201	done
202}
203
204function output_header {
205	print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
206 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
207<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head>
208 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
209 <title>RTFM '$1\($2')</title>
210 <meta name="robots" content="index, follow" />
211 <link rel="canonical" href="https://www.mirbsd.org/man'$roff2htm_machine/$1.$2'" />
212 <style type="text/css">
213	/* <![CDATA[ */
214	body {
215		background-color:#000000;
216		color:#666666;
217		font-family:serif;
218	}
219	a {
220		color:inherit;
221		text-decoration:none;
222		border-bottom:1px dashed;
223	}
224	a:visited {
225		text-decoration:none;
226		border-bottom:1px dotted;
227	}
228	a:hover {
229		text-decoration:none;
230		border-bottom:1px double;
231	}
232	pre {
233		line-height:112%;
234		color:#FFBF00;
235	}
236	b {
237		color:#FFEF00;
238		font-weight:normal;
239	}
240	i {
241		font-style:normal;
242		border-bottom:1px solid #FFBF00;
243	}
244	b i,i b {
245		color:#FFEF00;
246		font-weight:normal;
247		font-style:normal;
248		border-bottom:1px solid #FFEF00;
249	}
250	h1 {
251		color:#FFEF00;
252		font-size:xx-large;
253		font-family:serif;
254	}
255	h2 {
256		color:#FFFFFF;
257		font-size:x-large;
258		font-family:sans-serif;
259	}
260	h3 {
261		color:#CCCCCC;
262		font-size:large;
263		font-family:sans-serif;
264	}
265	/* ]]> */
266 </style>
267</head><body>
268<h1>MirOS Manual: <a href="'$roff2htm_rel'man'$2/$1'.htm">'$1\($2')</a></h1>'
269}
270
271function output_footer {
272	print '
273<hr /><p style="font-size:xx-small;">Generated on' $roff2htm_gendate 'by
274 <tt>$MirOS: src/scripts/roff2htm,v 1.79 2014/02/10 00:36:11 tg Exp $</tt></p>
275<p>These manual pages and other documentation are <a
276 href="'$roff2htm_rel'man7/BSD-Licence.htm">copyrighted</a> by their respective writers;
277 their source is available at our <a href="http://cvs.mirbsd.de/">CVSweb</a>,
278 AnonCVS, and other mirrors. The rest is Copyright © 2002‒2014 <a
279 href="https://www.mirbsd.org/">The MirOS Project</a>, Germany.<br /><span
280 style="font-size:3pt; font-style:italic;">This product includes material
281 provided by Thorsten Glaser.</span></p>
282<p style="font-size:x-small;">This manual page’s HTML representation
283 is supposed to be <a href="http://validator.w3.org/check/referer">valid
284 XHTML/1.1</a>; if not, please send a bug report – diffs preferred.</p>
285</body></html>'
286}
287
288function do_conversion {
289	output_header ${1:-missing-pagename} ${2:-0}
290	do_convert ${2:-0}
291	output_footer
292}
293
294# do_conversion_verbose title section infile outfile
295function do_conversion_verbose {
296	print -nru2 -- $3 → $4
297	do_conversion $1 $2 <$3 >$4
298	print -u2
299}
300
301# convert_page /path/to/man.cat1 /targetpath
302function convert_page {
303	local fn=$1 page sect tn
304	local -Uui ino=$(stat -Lf %i $fn 2>/dev/null)
305	page=${fn##*/}			# basename
306	page=${page%.0}			# manual page name
307	sect=${fn%/*}			# dirname
308	sect=${sect##*/cat}		# archsection
309	sect=${sect%%/*}		# section
310	tn=man${sect}/${page}.htm	# target file
311
312	if (( ino )) && [[ -n ${roff2htm_inodecache[ino]} ]]; then
313		# source file is linked to a file we know
314		print -ru2 -- $tn ← ${roff2htm_inodecache[ino]}
315		ln -f $2/${roff2htm_inodecache[ino]} $2/$tn
316		# patch in the additional name(s)
317		ed -s $2/$tn <<-EOF
318			/<title>/s#</title>#, $page($sect)&#
319			/<h1>/s#</h1>#, <a href="$roff2htm_rel$tn">$page($sect)</a>&#
320			wq
321		EOF
322	else
323		# store target filename in the inode cache
324		roff2htm_inodecache[ino]=$tn
325		do_conversion_verbose $page $sect $fn $2/$tn
326	fi
327}
328
329# output_htaccess >…/.htaccess
330function output_htaccess {
331	print DirectoryIndex /dev/null
332	print "AddType 'text/html; charset=utf-8' htm"
333}
334
335# convert_all /path/to/share/man /targetpath
336function convert_all {
337	local tp=${2:-$(pwd)/mbsdman}		# target basepath
338	local x f
339
340	(find ${1:-/usr/share/man}/cat{[1-9],3p} -name \*.0 2>/dev/null | \
341	    sort -f) |&
342	for x in 1 2 3 3p 4 5 6 7 8 9; do
343		mkdir -p $tp/man$x	# one per section
344		output_htaccess >$tp/man$x/.htaccess
345	done
346	while read -p f; do
347		convert_page $f $tp	# any subpages
348	done
349}
350