#!/bin/bash
# tohtml.sh

# Convert a text file to HTML format.
# Author: Mendel Cooper
# License: GPL3
# Usage: sh tohtml.sh < textfile > htmlfile
# Script can easily be modified to accept source and target filenames.

#     Assumptions:
# 1) Paragraphs in (target) text file are separated by a blank line.
# 2) Jpeg images (*.jpg) are located in "images" subdirectory.
# 3) Emphasized (italic) phrases begin with a space+underscore
#+   or are the first character on the line,
#+   and end with an underscore+space or underscore+end-of-line.


# Settings
FNTSIZE=2        # Small-medium font size
IMGDIR="images"  # Image directory
# Headers
HDR01='&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"&gt;'
HDR02='&lt;!-- Converted to HTML by ***tohtml.sh*** script --&gt;'
HDR03='&lt;!-- script author: M. Leo Cooper &lt;thegrendel@theriver.com&gt; --&gt;'
HDR10='&lt;html&gt;'
HDR11='&lt;head&gt;'
HDR11a='&lt;/head&gt;'
HDR12a='&lt;title&gt;'
HDR12b='&lt;/title&gt;'
HDR121='&lt;META NAME="GENERATOR" CONTENT="tohtml.sh script"&gt;'
HDR13='&lt;body bgcolor="#dddddd"&gt;'   # Change background color to suit.
HDR14a='&lt;font size='
HDR14b='&gt;'
# Footers
FTR10='&lt;/body&gt;'
FTR11='&lt;/html&gt;'
# Tags
BOLD="&lt;b&gt;"
CENTER="&lt;center&gt;"
END_CENTER="&lt;/center&gt;"
LF="&lt;br&gt;"


write_headers ()
  {
  echo "$HDR01"
  echo
  echo "$HDR02"
  echo "$HDR03"
  echo
  echo
  echo "$HDR10"
  echo "$HDR11"
  echo "$HDR121"
  echo "$HDR11a"
  echo "$HDR13"
  echo
  echo -n "$HDR14a"
  echo -n "$FNTSIZE"
  echo "$HDR14b"
  echo
  echo "$BOLD"        # Everything in bold (more easily readable).
  }


process_text ()
  {
  while read line     # Read one line at a time.
  do
    {
    if [ ! "$line" ]  # Blank line?
    then              # Then new paragraph must follow.
      echo
      echo "$LF"      # Insert two &lt;br&gt; tags.
      echo "$LF"
      echo
      continue        # Skip the underscore test.
    else              # Otherwise . . .

      if [[ "$line" =~ "\[*jpg\]" ]]  # Is a graphic?
      then                            # Strip away brackets.
        temp=$( echo "$line" | sed -e 's/\[//' -e 's/\]//' )
        line=""$CENTER" &lt;img src="\"$IMGDIR"/$temp\"&gt; "$END_CENTER" "
                                      # Add image tag.
                                      # And, center it.
      fi

    fi


    echo "$line" | grep -q _
    if [ "$?" -eq 0 ]    # If line contains underscore ...
    then
      # ===================================================
      # Convert underscored phrase to italics.
      temp=$( echo "$line" |
              sed -e 's/ _/ &lt;i&gt;/' -e 's/_ /&lt;\/i&gt; /' |
              sed -e 's/^_/&lt;i&gt;/'  -e 's/_$/&lt;\/i&gt;/' )
      #  Process only underscores prefixed by space,
      #+ followed by space, or at beginning or end of line.
      #  Do not convert underscores embedded within a word!
      line="$temp"
      # Slows script execution. Can be optimized?
      # ===================================================
    fi



    echo
    echo "$line"
    echo
    } # End while
  done
  }   # End process_text ()


write_footers ()  # Termination tags.
  {
  echo "$FTR10"
  echo "$FTR11"
  }


# main () {
# =========
write_headers
process_text
write_footers
# =========
#         }

exit $?

#  Exercises:
#  ---------
#  1) Fixup: Check for closing underscore before a comma or period.
#  2) Add a test for the presence of a closing underscore
#+    in phrases to be italicized.
AskApache Web Development