#!/bin/bash
#
# Grab code from .html file and put into a new .c file
#
# Author: Joel Bretheim
# Modified by: Jason Graham
#
if (( $# != 1 )); then
echo "Usage: `basename $0` base-url"
exit 1
fi
BASE_URL="$1"
DIR_NAME=$(echo $BASE_URL | sed -E 's/^http(s)?:\/\///g')
rm -rf $DIR_NAME
# download the relevant html files
wget --verbose --level=1 --recursive \
--no-parent --continue \
$BASE_URL
# create an output directory for the C code
oname='output/'
rm -rf $oname
mkdir $oname
# process the html files into C code
for file in ./$DIR_NAME/*
do
# remove file extension
base=$(basename $file .html)
bname=$oname$base
# parse the html code
echo 'Processing ' $bname '...'
lynx -dump -nolist $file > $bname.temp.txt
# grab the code we want
awk '/The following/{f=1;next} /DOWNLOAD/{f=0} f' $bname.temp.txt > $bname.txt
# remove the leading spaces
sed 's/^[.]*//' $bname.txt > $bname.c
#mpicc $bname.c -o $bname.x
done
echo "Cleaning up..."
rm $oname*.txt