#!/bin/bash # # Grab code from .html file and put into a new .c file # # Author: Joel Bretheim # Modified by: Jason Graham # if (( $# != 1 )); then echo "Usage: `basename $0` base-url" exit 1 fi BASE_URL="$1" DIR_NAME=$(echo $BASE_URL | sed -E 's/^http(s)?:\/\///g') rm -rf $DIR_NAME # download the relevant html files wget --verbose --level=1 --recursive \ --no-parent --continue \ $BASE_URL # create an output directory for the C code oname='output/' rm -rf $oname mkdir $oname # process the html files into C code for file in ./$DIR_NAME/* do # remove file extension base=$(basename $file .html) bname=$oname$base # parse the html code echo 'Processing ' $bname '...' lynx -dump -nolist $file > $bname.temp.txt # grab the code we want awk '/The following/{f=1;next} /DOWNLOAD/{f=0} f' $bname.temp.txt > $bname.txt # remove the leading spaces sed 's/^[.]*//' $bname.txt > $bname.c #mpicc $bname.c -o $bname.x done echo "Cleaning up..." rm $oname*.txt