#!/bin/bash
#
# Grab code from .html file and put into a new .c file
#
# Author: Joel Bretheim
# Modified by: Jason Graham <jgraham@compukix.net>
#

if (( $# != 1 )); then
    echo "Usage: `basename $0` base-url"
    exit 1
fi

BASE_URL="$1"
DIR_NAME=$(echo $BASE_URL | sed -E 's/^http(s)?:\/\///g')

rm -rf $DIR_NAME

# download the relevant html files
wget --verbose --level=1 --recursive \
     --no-parent --continue \
     $BASE_URL

# create an output directory for the C code
oname='output/'
rm -rf $oname
mkdir $oname


# process the html files into C code
for file in ./$DIR_NAME/*
do

    # remove file extension
    base=$(basename $file .html)
    bname=$oname$base
    
    # parse the html code
    echo 'Processing ' $bname '...'
    lynx -dump -nolist $file > $bname.temp.txt

    # grab the code we want
    awk '/The following/{f=1;next} /DOWNLOAD/{f=0} f' $bname.temp.txt > $bname.txt

    # remove the leading spaces
    sed 's/^[.]*//' $bname.txt > $bname.c
    #mpicc $bname.c -o $bname.x
done

echo "Cleaning up..."
rm $oname*.txt