#!/bin/bash
# tohtml.sh
# Convert a text file to HTML format.
# Author: Mendel Cooper
# License: GPL3
# Usage: sh tohtml.sh < textfile > htmlfile
# Script can easily be modified to accept source and target filenames.
# Assumptions:
# 1) Paragraphs in (target) text file are separated by a blank line.
# 2) Jpeg images (*.jpg) are located in "images" subdirectory.
# 3) Emphasized (italic) phrases begin with a space+underscore
#+ or are the first character on the line,
#+ and end with an underscore+space or underscore+end-of-line.
# Settings
FNTSIZE=2 # Small-medium font size
IMGDIR="images" # Image directory
# Headers
HDR01='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
HDR02='<!-- Converted to HTML by ***tohtml.sh*** script -->'
HDR03='<!-- script author: M. Leo Cooper <thegrendel@theriver.com> -->'
HDR10='<html>'
HDR11='<head>'
HDR11a='</head>'
HDR12a='<title>'
HDR12b='</title>'
HDR121='<META NAME="GENERATOR" CONTENT="tohtml.sh script">'
HDR13='<body bgcolor="#dddddd">' # Change background color to suit.
HDR14a='<font size='
HDR14b='>'
# Footers
FTR10='</body>'
FTR11='</html>'
# Tags
BOLD="<b>"
CENTER="<center>"
END_CENTER="</center>"
LF="<br>"
write_headers ()
{
echo "$HDR01"
echo
echo "$HDR02"
echo "$HDR03"
echo
echo
echo "$HDR10"
echo "$HDR11"
echo "$HDR121"
echo "$HDR11a"
echo "$HDR13"
echo
echo -n "$HDR14a"
echo -n "$FNTSIZE"
echo "$HDR14b"
echo
echo "$BOLD" # Everything in bold (more easily readable).
}
process_text ()
{
while read line # Read one line at a time.
do
{
if [ ! "$line" ] # Blank line?
then # Then new paragraph must follow.
echo
echo "$LF" # Insert two <br> tags.
echo "$LF"
echo
continue # Skip the underscore test.
else # Otherwise . . .
if [[ "$line" =~ "\[*jpg\]" ]] # Is a graphic?
then # Strip away brackets.
temp=$( echo "$line" | sed -e 's/\[//' -e 's/\]//' )
line=""$CENTER" <img src="\"$IMGDIR"/$temp\"> "$END_CENTER" "
# Add image tag.
# And, center it.
fi
fi
echo "$line" | grep -q _
if [ "$?" -eq 0 ] # If line contains underscore ...
then
# ===================================================
# Convert underscored phrase to italics.
temp=$( echo "$line" |
sed -e 's/ _/ <i>/' -e 's/_ /<\/i> /' |
sed -e 's/^_/<i>/' -e 's/_$/<\/i>/' )
# Process only underscores prefixed by space,
#+ followed by space, or at beginning or end of line.
# Do not convert underscores embedded within a word!
line="$temp"
# Slows script execution. Can be optimized?
# ===================================================
fi
echo
echo "$line"
echo
} # End while
done
} # End process_text ()
write_footers () # Termination tags.
{
echo "$FTR10"
echo "$FTR11"
}
# main () {
# =========
write_headers
process_text
write_footers
# =========
# }
exit $?
# Exercises:
# ---------
# 1) Fixup: Check for closing underscore before a comma or period.
# 2) Add a test for the presence of a closing underscore
#+ in phrases to be italicized.
AskApache Web Development