1# $MirOS: src/scripts/roff2htm,v 1.79 2014/02/10 00:36:11 tg Exp $ 2# $ekkoBSD: catman2html.sh,v 1.2 2004/03/07 03:02:53 stephen Exp $ 3#- 4# Copyright (c) 2004, 2005, 2006, 2007, 2011, 2012, 2014 5# Thorsten “mirabilos” Glaser <tg@mirbsd.org> 6# Original version for ekkoBSD by: 7# Copyright (c) 2004 8# Stephen Paskaluk <sap@mirbsd.org> 9# Parts of the regular expression set below are based upon work by: 10# Copyright (c) 1995 11# Panagiotis J. Christias <christia@theseas.ntua.gr> 12# 13# Provided that these terms and disclaimer and all copyright notices 14# are retained or reproduced in an accompanying document, permission 15# is granted to deal in this work without restriction, including un- 16# limited rights to use, publicly perform, distribute, sell, modify, 17# merge, give away, or sublicence. 18# 19# Advertising materials mentioning features or use of this work must 20# display the following acknowledgement: 21# This product includes material provided by Thorsten Glaser. 22# 23# This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to 24# the utmost extent permitted by applicable law, neither express nor 25# implied; without malicious intent or gross negligence. In no event 26# may a licensor, author or contributor be held liable for indirect, 27# direct, other damage, loss, or other issues arising in any way out 28# of dealing in the work, even if advised of the possibility of such 29# damage or existence of a defect, except proven that it results out 30# of said person's immediate fault when using the work as intended. 31#- 32# Routines for converting catman pages and nrcon(1)d papers to HTML. 33# ATTENTION: this file contains embedded white-, backspace and high- 34# bit-on control characters! Use “jupp --asis $0” to edit 35# Note: this file contains magic and can’t be edited as UTF-8 either. 36# Note: this script assumes MirBSD filesystem interna: ino_t=uint32_t 37 38# check if mksh R31:2007/10/18 or up 39if [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R)@(3[2-9]|[4-9][0-9]|[1-9][0-9][0-9])\ +([0-9])/+([0-9])/+([0-9])?(\ *) ]]; then 40 i=0 41elif [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R31)* ]]; then 42 eval $(print "$KSH_VERSION" | sed 's#^.*R31 \([0-9]*\)/\([0-9]*\)/\([0-9]*\)\( .*\)*$#y=\1 m=\2 d=\3#') 43 (( i = y < 2007 ? 1 : 44 y > 2007 ? 0 : 45 m < 10 ? 1 : 46 m > 10 ? 0 : 47 d < 18 ? 1 : 0 )) 48 unset y m d 49else 50 i=1 51fi 52# we need an mksh version with uint32_t array indicēs 53if (( i )); then 54 print -u2 Error: your mksh is not recent enough. 55 print -u2 Please upgrade to at least mksh R32. 56 exit 1 57fi 58unset i 59 60# initialise globals 61roff2htm_gendate=$(date +"%F %T") # current time 62set -A roff2htm_inodecache # inode cache (empty) 63roff2htm_machine=$(uname -m) # i386, sparc 64 65function set_conversion_man { 66 function do_convert { 67 do_convert_man "$@" 68 } 69} 70function set_conversion_paper { 71 function do_convert { 72 do_convert_paper "$@" 73 } 74} 75set_conversion_man 76 77function set_target_absolute { 78 roff2htm_rel=https://www.mirbsd.org/ 79} 80function set_target_relative { 81 roff2htm_rel=../ 82} 83set_target_relative 84 85function do_convert_man { 86 local -i _nl=0 87 col -x | sed \ 88 -e '/-$/N 89{ 90s/\([0-9A-z][-.,0-9A-z:]*\)-\n\( *\)\([0-9A-z][-.,0-9A-z:]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\ 91\2/ 92}' \ 93 -e 's#<_#≤#g' -e 's#>_#≥#g' \ 94 -e 'y#&<>#���#' \ 95 \ 96 -e 's#[Oo][Oo]++#•#g' \ 97 -e 's#_||#_|_|#g' \ 98 -e 's#+_#±#g' \ 99 \ 100 -e 's#^[A-z][ 0-9A-z]*$#</pre><h2>&</h2><pre>#' \ 101 -e 's#^ \([A-z][ -%'\''-;=?-~]*\)$#</pre><h3>\1</h3><pre>#' \ 102 \ 103 -e 's#\([^~<>�-�][�-�]*\)~#\1̅#g' \ 104 -e 's#\([^]\)~_#\1_̅ #g' \ 105 -e 's#\([^]\)\([^<>_�-�][�-�]*\)\([̅]*\)_#\1_\2\3#g' \ 106 -e 's#\([^<>�-�]\)[�-�]*\([̅]*\)\([^<>�-�][�-�]*\)#<\1<\3\2>#g' \ 107 -e 's#\(<_<\([^>_]*\)>\)\1#<G>\2</G>#g' \ 108 -e 's#<_<\([^>_]*\)>#<i>\1</i>#g' \ 109 -e 's#<.<\([^>]*\)>#<b>\1</b>#g' \ 110 -e 's###g' \ 111 \ 112 -e '/<h[23]/s#</*[biG]>##g' \ 113 -e 's#</\([biG]\)><\1>##g' \ 114 -e 's#</\([biG]\)>\([- -*./:;?@^_~]*\)<\1>#\2#g' \ 115 -e 's#\([- 0-9A-z]\)\([$/_-]*\)\(<[biG]>\)#\1\3\2#g' \ 116 -e 's#\(</[biG]>\)\([)$/_-]*\)\([- 0-9A-z]\)#\2\1\3#g' \ 117 \ 118 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(3p\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 119 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([1-9]\)\(/[/0-9A-Za-z]*\)*)#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 120 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([PSU][MS][DM]\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 121 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(PAPERS\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 122 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(GNU)#<a href="'$roff2htm_rel'manINFO/\1.html">&</a>#g' \ 123 -e 's#)\(</[biG]>\)\([- 0-9A-z]\)#\1)\2#g' \ 124 \ 125 -e 's/�/\&/g' \ 126 -e 's/�/\</g' \ 127 -e 's/�/\>/g' \ 128 \ 129 -e 's#<G>#<b><i>#g' -e 's#</G>#</i></b>#g' \ 130 -e 's#</b><b>##g' \ 131 \ 132 -e '1s#^#<pre>#' \ 133 -e '$s#$#</pre>#' \ 134 -e 's#<pre></pre>##g' \ 135 -e 's#</pre><pre>##g' \ 136 | while IFS= read -r line; do 137 if [[ -n $line ]]; then 138 (( _nl )) && [[ $line != '</pre>'* ]] && print 139 print -r -- "$line" 140 _nl=0 141 else 142 _nl=1 143 fi 144 done 145} 146 147function do_convert_paper { 148 local -i _nl=0 149 col -x | sed \ 150 -e '/-$/N 151{ 152s/\([0-9A-z][-.,0-9A-z:]*\)-\n\( *\)\([0-9A-z][-.,0-9A-z:]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\ 153\2/ 154}' \ 155 -e 's#<_#≤#g' -e 's#>_#≥#g' \ 156 -e 'y#&<>#���#' \ 157 \ 158 -e 's#[Oo][Oo]++#•#g' \ 159 -e 's#_||#_|_|#g' \ 160 -e 's#+_#±#g' \ 161 \ 162 -e 's#\([^~<>�-�][�-�]*\)~#\1̅#g' \ 163 -e 's#\([^]\)~_#\1_̅ #g' \ 164 -e 's#\([^]\)\([^<>_�-�][�-�]*\)\([̅]*\)_#\1_\2\3#g' \ 165 -e 's#\([^<>�-�]\)[�-�]*\([̅]*\)\([^<>�-�][�-�]*\)#<\1<\3\2>#g' \ 166 -e 's#\(<_<\([^>_]*\)>\)\1#<G>\2</G>#g' \ 167 -e 's#<_<\([^>_]*\)>#<i>\1</i>#g' \ 168 -e 's#<.<\([^>]*\)>#<b>\1</b>#g' \ 169 -e 's###g' \ 170 \ 171 -e 's#</\([biG]\)><\1>##g' \ 172 -e 's#</\([biG]\)>\([- -*./:;?@^_~]*\)<\1>#\2#g' \ 173 -e 's#\([- 0-9A-z]\)\([$/_-]*\)\(<[biG]>\)#\1\3\2#g' \ 174 -e 's#\(</[biG]>\)\([)$/_-]*\)\([- 0-9A-z]\)#\2\1\3#g' \ 175 \ 176 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([1-9]\)[/0-9A-Za-z]*)#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 177 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([PSU][MS][DM]\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 178 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(PAPERS\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \ 179 -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(GNU)#<a href="'$roff2htm_rel'manINFO/\1.html">&</a>#g' \ 180 -e 's#)\(</[biG]>\)\([- 0-9A-z]\)#\1)\2#g' \ 181 \ 182 -e 's/�/\&/g' \ 183 -e 's/�/\</g' \ 184 -e 's/�/\>/g' \ 185 \ 186 -e 's#<G>#<b><i>#g' -e 's#</G>#</i></b>#g' \ 187 -e 's#</b><b>##g' \ 188 \ 189 -e '1s#^#<pre>#' \ 190 -e '$s#$#</pre>#' \ 191 -e 's#<pre></pre>##g' \ 192 -e 's#</pre><pre>##g' \ 193 | while IFS= read -r line; do 194 if [[ -n $line ]]; then 195 (( _nl )) && [[ $line != '</pre>'* ]] && print 196 print -r -- "$line" 197 _nl=0 198 else 199 _nl=1 200 fi 201 done 202} 203 204function output_header { 205 print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" 206 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> 207<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head> 208 <meta http-equiv="content-type" content="text/html; charset=utf-8" /> 209 <title>RTFM '$1\($2')</title> 210 <meta name="robots" content="index, follow" /> 211 <link rel="canonical" href="https://www.mirbsd.org/man'$roff2htm_machine/$1.$2'" /> 212 <style type="text/css"> 213 /* <![CDATA[ */ 214 body { 215 background-color:#000000; 216 color:#666666; 217 font-family:serif; 218 } 219 a { 220 color:inherit; 221 text-decoration:none; 222 border-bottom:1px dashed; 223 } 224 a:visited { 225 text-decoration:none; 226 border-bottom:1px dotted; 227 } 228 a:hover { 229 text-decoration:none; 230 border-bottom:1px double; 231 } 232 pre { 233 line-height:112%; 234 color:#FFBF00; 235 } 236 b { 237 color:#FFEF00; 238 font-weight:normal; 239 } 240 i { 241 font-style:normal; 242 border-bottom:1px solid #FFBF00; 243 } 244 b i,i b { 245 color:#FFEF00; 246 font-weight:normal; 247 font-style:normal; 248 border-bottom:1px solid #FFEF00; 249 } 250 h1 { 251 color:#FFEF00; 252 font-size:xx-large; 253 font-family:serif; 254 } 255 h2 { 256 color:#FFFFFF; 257 font-size:x-large; 258 font-family:sans-serif; 259 } 260 h3 { 261 color:#CCCCCC; 262 font-size:large; 263 font-family:sans-serif; 264 } 265 /* ]]> */ 266 </style> 267</head><body> 268<h1>MirOS Manual: <a href="'$roff2htm_rel'man'$2/$1'.htm">'$1\($2')</a></h1>' 269} 270 271function output_footer { 272 print ' 273<hr /><p style="font-size:xx-small;">Generated on' $roff2htm_gendate 'by 274 <tt>$MirOS: src/scripts/roff2htm,v 1.79 2014/02/10 00:36:11 tg Exp $</tt></p> 275<p>These manual pages and other documentation are <a 276 href="'$roff2htm_rel'man7/BSD-Licence.htm">copyrighted</a> by their respective writers; 277 their source is available at our <a href="http://cvs.mirbsd.de/">CVSweb</a>, 278 AnonCVS, and other mirrors. The rest is Copyright © 2002‒2014 <a 279 href="https://www.mirbsd.org/">The MirOS Project</a>, Germany.<br /><span 280 style="font-size:3pt; font-style:italic;">This product includes material 281 provided by Thorsten Glaser.</span></p> 282<p style="font-size:x-small;">This manual page’s HTML representation 283 is supposed to be <a href="http://validator.w3.org/check/referer">valid 284 XHTML/1.1</a>; if not, please send a bug report – diffs preferred.</p> 285</body></html>' 286} 287 288function do_conversion { 289 output_header ${1:-missing-pagename} ${2:-0} 290 do_convert ${2:-0} 291 output_footer 292} 293 294# do_conversion_verbose title section infile outfile 295function do_conversion_verbose { 296 print -nru2 -- $3 → $4 297 do_conversion $1 $2 <$3 >$4 298 print -u2 299} 300 301# convert_page /path/to/man.cat1 /targetpath 302function convert_page { 303 local fn=$1 page sect tn 304 local -Uui ino=$(stat -Lf %i $fn 2>/dev/null) 305 page=${fn##*/} # basename 306 page=${page%.0} # manual page name 307 sect=${fn%/*} # dirname 308 sect=${sect##*/cat} # archsection 309 sect=${sect%%/*} # section 310 tn=man${sect}/${page}.htm # target file 311 312 if (( ino )) && [[ -n ${roff2htm_inodecache[ino]} ]]; then 313 # source file is linked to a file we know 314 print -ru2 -- $tn ← ${roff2htm_inodecache[ino]} 315 ln -f $2/${roff2htm_inodecache[ino]} $2/$tn 316 # patch in the additional name(s) 317 ed -s $2/$tn <<-EOF 318 /<title>/s#</title>#, $page($sect)&# 319 /<h1>/s#</h1>#, <a href="$roff2htm_rel$tn">$page($sect)</a>&# 320 wq 321 EOF 322 else 323 # store target filename in the inode cache 324 roff2htm_inodecache[ino]=$tn 325 do_conversion_verbose $page $sect $fn $2/$tn 326 fi 327} 328 329# output_htaccess >…/.htaccess 330function output_htaccess { 331 print DirectoryIndex /dev/null 332 print "AddType 'text/html; charset=utf-8' htm" 333} 334 335# convert_all /path/to/share/man /targetpath 336function convert_all { 337 local tp=${2:-$(pwd)/mbsdman} # target basepath 338 local x f 339 340 (find ${1:-/usr/share/man}/cat{[1-9],3p} -name \*.0 2>/dev/null | \ 341 sort -f) |& 342 for x in 1 2 3 3p 4 5 6 7 8 9; do 343 mkdir -p $tp/man$x # one per section 344 output_htaccess >$tp/man$x/.htaccess 345 done 346 while read -p f; do 347 convert_page $f $tp # any subpages 348 done 349} 350