#.H1 AWKWORDS #.H2 Synopsis #.P awkwords --title "Title" file > file.html #.P awkwords file > file.html #.H2 Download #.P This code requires gawk and bash. To download: #.PRE #wget http://lawker.googlecode.com/svn/fridge/lib/bash/awkwords #chmod +x awkwords #./PRE #.P To test the code, apply it to itself: #.UL #.LI ./awkwords --title "Does this work?" awkwords > awkwards.html #./UL # #.H2 Description #.P # AwkWords is a simple-to-use markup language # for writing documentation for programs whose comment lines # start with "#" and whose comments contain HTML code. #.P # For example, #.URL http://awk.info/?tools/awkwords awk.info?tools/awkwords # shows the html generated from #.URL http://lawker.googlecode.com/svn/fridge/lib/bash/awkwords this bash script. #.P # When used with the --title option, a stand alone web page is generated # (to control the style of that page, see the CSS function, dicussed below). # When used without --title it generated some html suitable for inclusion # into other pages. #.P # Also, AwkWords finds all the <h2>, <h3>, <h4>, <h5>, # <h6>, <h7>, <h8>, <h9> headings and copies them to a table # of contents at the front of the file. # Note that AwkWords assumes that the file contains only one # <h1> heading- this is printed before the table of contents. #.P # AwkWords adds some short cuts for HTML markup, as well as including # nested contents (see below: "including nested content"). This is useful for including, say, # program output along with the actual program. #.H3 Extra Markup #.H4 Short cuts for HTML #.DL #.DT #.XX #.DD This is replaced by <XX>. #.DT #.XX words #.DD # This is replaced by <XX>words</XX>. Note that # this tag won't work properly if the source text spills over more than # one line. #.DT #.TO url words #.DD This is replaced by a link to mail to url. #.DT #.URL url words #.DD This is replaced by a link to mail to url. #./DL #.H4 Including nested content: #.DL #.DT #.IN file #.DD This line is replaced by the contents of file. #.DT #.LISTING file #.DD This line is replaced by the name of the file, followed by a verbatbim displau of file (no formatting). #.DT #.CODE file #.DD This line is replaced by the name of the file, followed verbatbim by file (no formatting). #.DT #.BODY file #.DD This line is replaced by file, less the lines before the first blank line. #./DL #./UL #.H2 Programmer's Guide #.P # Awkwords is divided into three functions: # unhtml fixes the printing of pre-formatted blocks; # toc adds the table of contents while # includes handles the details of the extra mark-up. #.H3 Functions #.H4 unhtml #.PRE unhtml() { cat $1| gawk ' BEGIN {IGNORECASE=1} /^
/   {In=1; print; next}
  /^<\/PRE>/ {In=0; print; next}
  In         {gsub("<","\\<",$0); print; next }
             {print $0 }'
}
#./PRE
#.H4 toc 
#.PRE
toc() { cat $1 | gawk '
 BEGIN             { IGNORECASE = 1 }
 /^<[h]1>/         { Header=$0; next}
 /^[<]h[23456789]>/  { 
       T++ ;
      Toc[T]  = gensub(/(.*)[ \t]*(.*)[ \t]*<\/h(.*)>(.*)/,
      "<""h\\2><""font color=black>\\• <""a href=#" T ">\\3",
                "g",$0)
		Pre="" }
     { Line[++N] = Pre $0; Pre="" }
 END { print Header;
       print "<" "h2>Contents"
       print "<" "div id=\"htmltoc\">"
       for(I=1;I<=T;I++) print Toc[I]	
       print "<" "/div>"
       print "<" "div id=\"htmlbody\">"
       for(I=1;I<=N;I++) print Line[I]
       print ""		
     }'
}
#./PRE
#.H4  includes
#.P       The  xpand function controls recursive inclusion of content. Note that 
#.UL       
#.LI        The last act of this function must be to call xpand1.
#.LI        
#           When including verbatim text, the recursive call to xpands 
#           must pass "1" to the second paramter.
#./UL
#.PRE
includes() { cat $1 | gawk '
function xpand(pre,  tmp) {
   if      ($1 ~ "^#.IN")    xpands($2,pre) 
   else if ($1 ~ "^#.BODY" ) xpandsBody($2,pre)
   else if ($1 ~ "^#.LISTING")  {
  	    print "<" "pre>"
	    xpands($2,1)     # <===== note the recursive call with "1"
	    print "<" "/pre>" } 
   else if ($1 ~ "^#.CODE")  {
  	    print "<" "p>" $2 "\n<" "pre>"
	    xpands($2,1)     # <===== note the recursive call with "1"
	    print "<" "/pre>" } 
   else if ($1 ~ "^#.URL") {
	    tmp = $2; $1=$2="";
	    print "<" "a href=\""tmp"\">" trim($0) ""
	    }
   else if ($1 ~ "^#.TO") {
	    tmp = $2; $1=$2="";
	    print "<" "a href=\"mailto:"tmp"\">" trim($0) ""
	    }
   else 
	xpand1(pre)
}
#./PRE
#.P       
#       The xpand1 function controls the printing of a single line.
#       If we are formatting verbatim text, we must remove the start-of-html character "<".
#       Otherwise, we expand any html shortcuts.
#.PRE
function xpand1(pre) {
   if (pre)
        gsub("<","\\<",$0)  # <=== remove start-of-html-character
   else {
        $0= xpandHtml($0)      # <=== expand html short cuts
        sub(/^#/,"",$0) }
        print $0 
}
#./PRE
#.P     The function xpandHtml controls the html short cuts
#.PRE
function xpandHtml(    str,tag) {
   if ($0 ~ /^#\.H1/) {         
	   $1=""
	   return "<" "h""1>" $0 "" }
   if (sub(/^#\./,"",$1)) {
	   tag=$1;  $1=""
	   return "<" tag ">"  (($0 ~ /^[ \t]*$/) ? "" : $0"")
   }
   return $0
}
#./PRE
#.P The rest of the code is just some book-keeping and managing the recursive addition of content.
#.PRE
function xpands(f,pre) {
     if (newFile(f)) {
	  while((getline  0) xpand(pre)
          close(f) }
}
function xpandsBody(f,pre, using) {
     if (newFile(f)) { 
	  while((getline 0) {
	    if ( !using && ($0 ~ /^[\t ]*$/) ) using = 1
	    if ( using ) xpand(pre)}
	  close(f) }
}
function newFile(f) { return ++Seen[f]==1 }
function trim (s)   { sub(/^[ \t]*/,"",s);  sub(/[ \t]*$/,"",s); return s } 

BEGIN { IGNORECASE=1 }
      { xpand()      }'
}
#./PRE
#.H3 CSS styles
#.P 
#    If used to generate a full web page, then the following styles are added.
#    Note that the htmltoc class controls the appearance of the table of contents.
#.PRE
css() { 
      echo "<""STYLE type=\"text/css\">"
      cat<<-'EOF'
         div.htmltoc h2 { font-size: medium; font-weight: normal; 
                          margin: 0 0 0 0; margin-left: 30px;}
	 div.htmltoc h3 { font-size: medium; font-weight: normal; 
                          margin: 0 0 0 0; margin-left: 60px;}
         div.htmltoc h4 { font-size: medium; font-weight: normal; 
                          margin: 0 0 0 0; margin-left: 90px;}
         div.htmltoc h5 { font-size: medium; font-weight: normal; 
                          margin: 0 0 0 0; margin-left: 120px;}
         div.htmltoc h6 { font-size: medium; font-weight: normal; 
                          margin: 0 0 0 0; margin-left: 150px;}
         div.htmltoc h7 { font-size: medium; font-weight: normal; 
                          margin: 0 0 0 0; margin-left: 180px; }
      
EOF
}
#./PRE
#.H3 Main command line
#.PRE       
main() { cat $1 | includes | unhtml | toc; }

if [ $1 == "--title" ]
then 
     echo "<""html><""head><""title>$2`css`<""body>"; 
     shift 2
     main $1
     echo "<""/body><""/html>"
else 
     main $1
fi 
#./PRE
#.H2 Bugs
#.P There's no checking for valid input (e.g. pre-formatting tags that never close).
#.P If the input file contains no html mark up, the results are pretty messy.
#.P Recursive includes fail silently if the referenced file does not exist.
#.P 
#  I don't like the way I need a seperate pass to do "unhtml". I tried making it work
#  within the code but it got messy.

#.H2 Author
#.URL  http://awk.info/?who/timm   Tim Menzies