#.H1 AWKWORDS #.H2 Synopsis #.P awkwords --title "Title" file > file.html #.P awkwords file > file.html #.H2 Download #.P This code requires gawk and bash. To download: #.PRE #wget http://lawker.googlecode.com/svn/fridge/lib/bash/awkwords #chmod +x awkwords #./PRE #.P To test the code, apply it to itself: #.UL #.LI ./awkwords --title "Does this work?" awkwords > awkwards.html #./UL # #.H2 Description #.P # AwkWords is a simple-to-use markup language # for writing documentation for programs whose comment lines # start with "#" and whose comments contain HTML code. #.P # For example, #.URL http://awk.info/?tools/awkwords awk.info?tools/awkwords # shows the html generated from #.URL http://lawker.googlecode.com/svn/fridge/lib/bash/awkwords this bash script. #.P # When used with the --title option, a stand alone web page is generated # (to control the style of that page, see the CSS function, dicussed below). # When used without --title it generated some html suitable for inclusion # into other pages. #.P # Also, AwkWords finds all the <h2>, <h3>, <h4>, <h5>, # <h6>, <h7>, <h8>, <h9> headings and copies them to a table # of contents at the front of the file. # Note that AwkWords assumes that the file contains only one # <h1> heading- this is printed before the table of contents. #.P # AwkWords adds some short cuts for HTML markup, as well as including # nested contents (see below: "including nested content"). This is useful for including, say, # program output along with the actual program. #.H3 Extra Markup #.H4 Short cuts for HTML #.DL #.DT #.XX #.DD This is replaced by <XX>. #.DT #.XX words #.DD # This is replaced by <XX>words</XX>. Note that # this tag won't work properly if the source text spills over more than # one line. #.DT #.TO url words #.DD This is replaced by a link to mail to url. #.DT #.URL url words #.DD This is replaced by a link to mail to url. #./DL #.H4 Including nested content: #.DL #.DT #.IN file #.DD This line is replaced by the contents of file. #.DT #.LISTING file #.DD This line is replaced by the name of the file, followed by a verbatbim displau of file (no formatting). #.DT #.CODE file #.DD This line is replaced by the name of the file, followed verbatbim by file (no formatting). #.DT #.BODY file #.DD This line is replaced by file, less the lines before the first blank line. #./DL #./UL #.H2 Programmer's Guide #.P # Awkwords is divided into three functions: # unhtml fixes the printing of pre-formatted blocks; # toc adds the table of contents while # includes handles the details of the extra mark-up. #.H3 Functions #.H4 unhtml #.PRE unhtml() { cat $1| gawk ' BEGIN {IGNORECASE=1} /^
/ {In=1; print; next} /^<\/PRE>/ {In=0; print; next} In {gsub("<","\\<",$0); print; next } {print $0 }' } #./PRE #.H4 toc #.PRE toc() { cat $1 | gawk ' BEGIN { IGNORECASE = 1 } /^<[h]1>/ { Header=$0; next} /^[<]h[23456789]>/ { T++ ; Toc[T] = gensub(/(.*)[ \t]*(.*)[ \t]*<\/h(.*)>(.*)/, "<""h\\2><""font color=black>\\• <""a href=#" T ">\\3 ", "g",$0) Pre="" } { Line[++N] = Pre $0; Pre="" } END { print Header; print "<" "h2>Contents" print "<" "div id=\"htmltoc\">" for(I=1;I<=T;I++) print Toc[I] print "<" "/div>" print "<" "div id=\"htmlbody\">" for(I=1;I<=N;I++) print Line[I] print "" "div>" }' } #./PRE #.H4 includes #.P The xpand function controls recursive inclusion of content. Note that #.UL #.LI The last act of this function must be to call xpand1. #.LI # When including verbatim text, the recursive call to xpands # must pass "1" to the second paramter. #./UL #.PRE includes() { cat $1 | gawk ' function xpand(pre, tmp) { if ($1 ~ "^#.IN") xpands($2,pre) else if ($1 ~ "^#.BODY" ) xpandsBody($2,pre) else if ($1 ~ "^#.LISTING") { print "<" "pre>" xpands($2,1) # <===== note the recursive call with "1" print "<" "/pre>" } else if ($1 ~ "^#.CODE") { print "<" "p>" $2 "\n<" "pre>" xpands($2,1) # <===== note the recursive call with "1" print "<" "/pre>" } else if ($1 ~ "^#.URL") { tmp = $2; $1=$2=""; print "<" "a href=\""tmp"\">" trim($0) "" } else if ($1 ~ "^#.TO") { tmp = $2; $1=$2=""; print "<" "a href=\"mailto:"tmp"\">" trim($0) "" } else xpand1(pre) } #./PRE #.P # The xpand1 function controls the printing of a single line. # If we are formatting verbatim text, we must remove the start-of-html character "<". # Otherwise, we expand any html shortcuts. #.PRE function xpand1(pre) { if (pre) gsub("<","\\<",$0) # <=== remove start-of-html-character else { $0= xpandHtml($0) # <=== expand html short cuts sub(/^#/,"",$0) } print $0 } #./PRE #.P The function xpandHtml controls the html short cuts #.PRE function xpandHtml( str,tag) { if ($0 ~ /^#\.H1/) { $1="" return "<" "h""1>" $0 " " "h1>" } if (sub(/^#\./,"",$1)) { tag=$1; $1="" return "<" tag ">" (($0 ~ /^[ \t]*$/) ? "" : $0""tag">") } return $0 } #./PRE #.P The rest of the code is just some book-keeping and managing the recursive addition of content. #.PRE function xpands(f,pre) { if (newFile(f)) { while((getline0) xpand(pre) close(f) } } function xpandsBody(f,pre, using) { if (newFile(f)) { while((getline 0) { if ( !using && ($0 ~ /^[\t ]*$/) ) using = 1 if ( using ) xpand(pre)} close(f) } } function newFile(f) { return ++Seen[f]==1 } function trim (s) { sub(/^[ \t]*/,"",s); sub(/[ \t]*$/,"",s); return s } BEGIN { IGNORECASE=1 } { xpand() }' } #./PRE #.H3 CSS styles #.P # If used to generate a full web page, then the following styles are added. # Note that the htmltoc class controls the appearance of the table of contents. #.PRE css() { echo "<""STYLE type=\"text/css\">" cat<<-'EOF' div.htmltoc h2 { font-size: medium; font-weight: normal; margin: 0 0 0 0; margin-left: 30px;} div.htmltoc h3 { font-size: medium; font-weight: normal; margin: 0 0 0 0; margin-left: 60px;} div.htmltoc h4 { font-size: medium; font-weight: normal; margin: 0 0 0 0; margin-left: 90px;} div.htmltoc h5 { font-size: medium; font-weight: normal; margin: 0 0 0 0; margin-left: 120px;} div.htmltoc h6 { font-size: medium; font-weight: normal; margin: 0 0 0 0; margin-left: 150px;} div.htmltoc h7 { font-size: medium; font-weight: normal; margin: 0 0 0 0; margin-left: 180px; } EOF } #./PRE #.H3 Main command line #.PRE main() { cat $1 | includes | unhtml | toc; } if [ $1 == "--title" ] then echo "<""html><""head><""title>$2`css`<""body>"; shift 2 main $1 echo "<""/body><""/html>" else main $1 fi #./PRE #.H2 Bugs #.P There's no checking for valid input (e.g. pre-formatting tags that never close). #.P If the input file contains no html mark up, the results are pretty messy. #.P Recursive includes fail silently if the referenced file does not exist. #.P # I don't like the way I need a seperate pass to do "unhtml". I tried making it work # within the code but it got messy. #.H2 Author #.URL http://awk.info/?who/timm Tim Menzies