#.H1 AWKWORDS #.H2 Synopsis #.P awkwords --title "Title" file > file.html #.P awkwords file > file.html #.H2 Download #.P This code requires gawk and bash. To download: #.PRE #wget http://lawker.googlecode.com/svn/fridge/lib/bash/awkwords #chmod +x awkwords #./PRE #.P To test the code, apply it to itself: #.UL #.LI ./awkwords --title "Does this work?" awkwords > awkwards.html #./UL # #.H2 Description #.P # AwkWords is a simple-to-use markup language # for writing documentation for programs whose comment lines # start with "#" and whose comments contain HTML code. #.P # For example, #.URL http://awk.info/?tools/awkwords awk.info?tools/awkwords # shows the html generated from #.URL http://lawker.googlecode.com/svn/fridge/lib/bash/awkwords this bash script. #.P # When used with the --title option, a stand alone web page is generated # (to control the style of that page, see the CSS function, dicussed below). # When used without --title it generated some html suitable for inclusion # into other pages. #.P # Also, AwkWords finds all the <h2>, <h3>, <h4>, <h5>, # <h6>, <h7>, <h8>, <h9> headings and copies them to a table # of contents at the front of the file. # Note that AwkWords assumes that the file contains only one # <h1> heading- this is printed before the table of contents. #.P # AwkWords adds some short cuts for HTML markup, as well as including # nested contents (see below: "including nested content"). This is useful for including, say, # program output along with the actual program. #.H3 Extra Markup #.H4 Short cuts for HTML #.DL #.DT #.XX #.DD This is replaced by <XX>. #.DT #.XX words #.DD # This is replaced by <XX>words</XX>. Note that # this tag won't work properly if the source text spills over more than # one line. #.DT #.TO url words #.DD This is replaced by a link to mail to url. #.DT #.URL url words #.DD This is replaced by a link to mail to url. #./DL #.H4 Including nested content: #.DL #.DT #.IN file #.DD This line is replaced by the contents of file. #.DT #.LISTING file #.DD This line is replaced by the name of the file, followed by a verbatbim displau of file (no formatting). #.DT #.CODE file #.DD This line is replaced by the name of the file, followed verbatbim by file (no formatting). #.DT #.BODY file #.DD This line is replaced by file, less the lines before the first blank line. #./DL #./UL #.H2 Programmer's Guide #.P # Awkwords is divided into three functions: # unhtml fixes the printing of pre-formatted blocks; # toc adds the table of contents while # includes handles the details of the extra mark-up. #.H3 Functions #.H4 unhtml #.PRE unhtml() { cat $1| gawk ' BEGIN {IGNORECASE=1} /^
/ {In=1; print; next}
/^<\/PRE>/ {In=0; print; next}
In {gsub("<","\\<",$0); print; next }
{print $0 }'
}
#./PRE
#.H4 toc
#.PRE
toc() { cat $1 | gawk '
BEGIN { IGNORECASE = 1 }
/^<[h]1>/ { Header=$0; next}
/^[<]h[23456789]>/ {
T++ ;
Toc[T] = gensub(/(.*)[ \t]*(.*)[ \t]*<\/h(.*)>(.*)/,
"<""h\\2><""font color=black>\\• <""a href=#" T ">\\3 ",
"g",$0)
Pre="" }
{ Line[++N] = Pre $0; Pre="" }
END { print Header;
print "<" "h2>Contents"
print "<" "div id=\"htmltoc\">"
for(I=1;I<=T;I++) print Toc[I]
print "<" "/div>"
print "<" "div id=\"htmlbody\">"
for(I=1;I<=N;I++) print Line[I]
print "" "div>"
}'
}
#./PRE
#.H4 includes
#.P The xpand function controls recursive inclusion of content. Note that
#.UL
#.LI The last act of this function must be to call xpand1.
#.LI
# When including verbatim text, the recursive call to xpands
# must pass "1" to the second paramter.
#./UL
#.PRE
includes() { cat $1 | gawk '
function xpand(pre, tmp) {
if ($1 ~ "^#.IN") xpands($2,pre)
else if ($1 ~ "^#.BODY" ) xpandsBody($2,pre)
else if ($1 ~ "^#.LISTING") {
print "<" "pre>"
xpands($2,1) # <===== note the recursive call with "1"
print "<" "/pre>" }
else if ($1 ~ "^#.CODE") {
print "<" "p>" $2 "\n<" "pre>"
xpands($2,1) # <===== note the recursive call with "1"
print "<" "/pre>" }
else if ($1 ~ "^#.URL") {
tmp = $2; $1=$2="";
print "<" "a href=\""tmp"\">" trim($0) ""
}
else if ($1 ~ "^#.TO") {
tmp = $2; $1=$2="";
print "<" "a href=\"mailto:"tmp"\">" trim($0) ""
}
else
xpand1(pre)
}
#./PRE
#.P
# The xpand1 function controls the printing of a single line.
# If we are formatting verbatim text, we must remove the start-of-html character "<".
# Otherwise, we expand any html shortcuts.
#.PRE
function xpand1(pre) {
if (pre)
gsub("<","\\<",$0) # <=== remove start-of-html-character
else {
$0= xpandHtml($0) # <=== expand html short cuts
sub(/^#/,"",$0) }
print $0
}
#./PRE
#.P The function xpandHtml controls the html short cuts
#.PRE
function xpandHtml( str,tag) {
if ($0 ~ /^#\.H1/) {
$1=""
return "<" "h""1>" $0 " " "h1>" }
if (sub(/^#\./,"",$1)) {
tag=$1; $1=""
return "<" tag ">" (($0 ~ /^[ \t]*$/) ? "" : $0""tag">")
}
return $0
}
#./PRE
#.P The rest of the code is just some book-keeping and managing the recursive addition of content.
#.PRE
function xpands(f,pre) {
if (newFile(f)) {
while((getline 0) xpand(pre)
close(f) }
}
function xpandsBody(f,pre, using) {
if (newFile(f)) {
while((getline 0) {
if ( !using && ($0 ~ /^[\t ]*$/) ) using = 1
if ( using ) xpand(pre)}
close(f) }
}
function newFile(f) { return ++Seen[f]==1 }
function trim (s) { sub(/^[ \t]*/,"",s); sub(/[ \t]*$/,"",s); return s }
BEGIN { IGNORECASE=1 }
{ xpand() }'
}
#./PRE
#.H3 CSS styles
#.P
# If used to generate a full web page, then the following styles are added.
# Note that the htmltoc class controls the appearance of the table of contents.
#.PRE
css() {
echo "<""STYLE type=\"text/css\">"
cat<<-'EOF'
div.htmltoc h2 { font-size: medium; font-weight: normal;
margin: 0 0 0 0; margin-left: 30px;}
div.htmltoc h3 { font-size: medium; font-weight: normal;
margin: 0 0 0 0; margin-left: 60px;}
div.htmltoc h4 { font-size: medium; font-weight: normal;
margin: 0 0 0 0; margin-left: 90px;}
div.htmltoc h5 { font-size: medium; font-weight: normal;
margin: 0 0 0 0; margin-left: 120px;}
div.htmltoc h6 { font-size: medium; font-weight: normal;
margin: 0 0 0 0; margin-left: 150px;}
div.htmltoc h7 { font-size: medium; font-weight: normal;
margin: 0 0 0 0; margin-left: 180px; }
EOF
}
#./PRE
#.H3 Main command line
#.PRE
main() { cat $1 | includes | unhtml | toc; }
if [ $1 == "--title" ]
then
echo "<""html><""head><""title>$2`css`<""body>";
shift 2
main $1
echo "<""/body><""/html>"
else
main $1
fi
#./PRE
#.H2 Bugs
#.P There's no checking for valid input (e.g. pre-formatting tags that never close).
#.P If the input file contains no html mark up, the results are pretty messy.
#.P Recursive includes fail silently if the referenced file does not exist.
#.P
# I don't like the way I need a seperate pass to do "unhtml". I tried making it work
# within the code but it got messy.
#.H2 Author
#.URL http://awk.info/?who/timm Tim Menzies