mirror of https://github.com/tLDP/LDP
486 lines
17 KiB
Bash
486 lines
17 KiB
Bash
#!/bin/bash
|
|
# wgetter2.bash
|
|
|
|
# Author: Little Monster [monster@monstruum.co.uk]
|
|
# ==> Used in ABS Guide with permission of script author.
|
|
# ==> This script still needs debugging and fixups (exercise for reader).
|
|
# ==> It could also use some additional editing in the comments.
|
|
|
|
|
|
# This is wgetter2 --
|
|
#+ a Bash script to make wget a bit more friendly, and save typing.
|
|
|
|
# Carefully crafted by Little Monster.
|
|
# More or less complete on 02/02/2005.
|
|
# If you think this script can be improved,
|
|
#+ email me at: monster@monstruum.co.uk
|
|
# ==> and cc: to the author of the ABS Guide, please.
|
|
# This script is licenced under the GPL.
|
|
# You are free to copy, alter and re-use it,
|
|
#+ but please don't try to claim you wrote it.
|
|
# Log your changes here instead.
|
|
|
|
# =======================================================================
|
|
# changelog:
|
|
|
|
# 07/02/2005. Fixups by Little Monster.
|
|
# 02/02/2005. Minor additions by Little Monster.
|
|
# (See after # +++++++++++ )
|
|
# 29/01/2005. Minor stylistic edits and cleanups by author of ABS Guide.
|
|
# Added exit error codes.
|
|
# 22/11/2004. Finished initial version of second version of wgetter:
|
|
# wgetter2 is born.
|
|
# 01/12/2004. Changed 'runn' function so it can be run 2 ways --
|
|
# either ask for a file name or have one input on the CL.
|
|
# 01/12/2004. Made sensible handling of no URL's given.
|
|
# 01/12/2004. Made loop of main options, so you don't
|
|
# have to keep calling wgetter 2 all the time.
|
|
# Runs as a session instead.
|
|
# 01/12/2004. Added looping to 'runn' function.
|
|
# Simplified and improved.
|
|
# 01/12/2004. Added state to recursion setting.
|
|
# Enables re-use of previous value.
|
|
# 05/12/2004. Modified the file detection routine in the 'runn' function
|
|
# so it's not fooled by empty values, and is cleaner.
|
|
# 01/02/2004. Added cookie finding routine from later version (which
|
|
# isn't ready yet), so as not to have hard-coded paths.
|
|
# =======================================================================
|
|
|
|
# Error codes for abnormal exit.
|
|
E_USAGE=67 # Usage message, then quit.
|
|
E_NO_OPTS=68 # No command-line args entered.
|
|
E_NO_URLS=69 # No URLs passed to script.
|
|
E_NO_SAVEFILE=70 # No save filename passed to script.
|
|
E_USER_EXIT=71 # User decides to quit.
|
|
|
|
|
|
# Basic default wget command we want to use.
|
|
# This is the place to change it, if required.
|
|
# NB: if using a proxy, set http_proxy = yourproxy in .wgetrc.
|
|
# Otherwise delete --proxy=on, below.
|
|
# ====================================================================
|
|
CommandA="wget -nc -c -t 5 --progress=bar --random-wait --proxy=on -r"
|
|
# ====================================================================
|
|
|
|
|
|
|
|
# --------------------------------------------------------------------
|
|
# Set some other variables and explain them.
|
|
|
|
pattern=" -A .jpg,.JPG,.jpeg,.JPEG,.gif,.GIF,.htm,.html,.shtml,.php"
|
|
# wget's option to only get certain types of file.
|
|
# comment out if not using
|
|
today=`date +%F` # Used for a filename.
|
|
home=$HOME # Set HOME to an internal variable.
|
|
# In case some other path is used, change it here.
|
|
depthDefault=3 # Set a sensible default recursion.
|
|
Depth=$depthDefault # Otherwise user feedback doesn't tie in properly.
|
|
RefA="" # Set blank referring page.
|
|
Flag="" # Default to not saving anything,
|
|
#+ or whatever else might be wanted in future.
|
|
lister="" # Used for passing a list of urls directly to wget.
|
|
Woptions="" # Used for passing wget some options for itself.
|
|
inFile="" # Used for the run function.
|
|
newFile="" # Used for the run function.
|
|
savePath="$home/w-save"
|
|
Config="$home/.wgetter2rc"
|
|
# This is where some variables can be stored,
|
|
#+ if permanently changed from within the script.
|
|
Cookie_List="$home/.cookielist"
|
|
# So we know where the cookies are kept . . .
|
|
cFlag="" # Part of the cookie file selection routine.
|
|
|
|
# Define the options available. Easy to change letters here if needed.
|
|
# These are the optional options; you don't just wait to be asked.
|
|
|
|
save=s # Save command instead of executing it.
|
|
cook=c # Change cookie file for this session.
|
|
help=h # Usage guide.
|
|
list=l # Pass wget the -i option and URL list.
|
|
runn=r # Run saved commands as an argument to the option.
|
|
inpu=i # Run saved commands interactively.
|
|
wopt=w # Allow to enter options to pass directly to wget.
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
if [ -z "$1" ]; then # Make sure we get something for wget to eat.
|
|
echo "You must at least enter a URL or option!"
|
|
echo "-$help for usage."
|
|
exit $E_NO_OPTS
|
|
fi
|
|
|
|
|
|
|
|
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
# added added added added added added added added added added added added
|
|
|
|
if [ ! -e "$Config" ]; then # See if configuration file exists.
|
|
echo "Creating configuration file, $Config"
|
|
echo "# This is the configuration file for wgetter2" > "$Config"
|
|
echo "# Your customised settings will be saved in this file" >> "$Config"
|
|
else
|
|
source $Config # Import variables we set outside the script.
|
|
fi
|
|
|
|
if [ ! -e "$Cookie_List" ]; then
|
|
# Set up a list of cookie files, if there isn't one.
|
|
echo "Hunting for cookies . . ."
|
|
find -name cookies.txt >> $Cookie_List # Create the list of cookie files.
|
|
fi # Isolate this in its own 'if' statement,
|
|
#+ in case we got interrupted while searching.
|
|
|
|
if [ -z "$cFlag" ]; then # If we haven't already done this . . .
|
|
echo # Make a nice space after the command prompt.
|
|
echo "Looks like you haven't set up your source of cookies yet."
|
|
n=0 # Make sure the counter
|
|
#+ doesn't contain random values.
|
|
while read; do
|
|
Cookies[$n]=$REPLY # Put the cookie files we found into an array.
|
|
echo "$n) ${Cookies[$n]}" # Create a menu.
|
|
n=$(( n + 1 )) # Increment the counter.
|
|
done < $Cookie_List # Feed the read statement.
|
|
echo "Enter the number of the cookie file you want to use."
|
|
echo "If you won't be using cookies, just press RETURN."
|
|
echo
|
|
echo "I won't be asking this again. Edit $Config"
|
|
echo "If you decide to change at a later date"
|
|
echo "or use the -${cook} option for per session changes."
|
|
read
|
|
if [ ! -z $REPLY ]; then # User didn't just press return.
|
|
Cookie=" --load-cookies ${Cookies[$REPLY]}"
|
|
# Set the variable here as well as in the config file.
|
|
|
|
echo "Cookie=\" --load-cookies ${Cookies[$REPLY]}\"" >> $Config
|
|
fi
|
|
echo "cFlag=1" >> $Config # So we know not to ask again.
|
|
fi
|
|
|
|
# end added section end added section end added section end added section
|
|
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
|
|
|
|
# Another variable.
|
|
# This one may or may not be subject to variation.
|
|
# A bit like the small print.
|
|
CookiesON=$Cookie
|
|
# echo "cookie file is $CookiesON" # For debugging.
|
|
# echo "home is ${home}" # For debugging.
|
|
# Got caught with this one!
|
|
|
|
|
|
wopts()
|
|
{
|
|
echo "Enter options to pass to wget."
|
|
echo "It is assumed you know what you're doing."
|
|
echo
|
|
echo "You can pass their arguments here too."
|
|
# That is to say, everything passed here is passed to wget.
|
|
|
|
read Wopts
|
|
# Read in the options to be passed to wget.
|
|
|
|
Woptions=" $Wopts"
|
|
# ^ Why the leading space?
|
|
# Assign to another variable.
|
|
# Just for fun, or something . . .
|
|
|
|
echo "passing options ${Wopts} to wget"
|
|
# Mainly for debugging.
|
|
# Is cute.
|
|
|
|
return
|
|
}
|
|
|
|
|
|
save_func()
|
|
{
|
|
echo "Settings will be saved."
|
|
if [ ! -d $savePath ]; then # See if directory exists.
|
|
mkdir $savePath # Create the directory to save things in
|
|
#+ if it isn't already there.
|
|
fi
|
|
|
|
Flag=S
|
|
# Tell the final bit of code what to do.
|
|
# Set a flag since stuff is done in main.
|
|
|
|
return
|
|
}
|
|
|
|
|
|
usage() # Tell them how it works.
|
|
{
|
|
echo "Welcome to wgetter. This is a front end to wget."
|
|
echo "It will always run wget with these options:"
|
|
echo "$CommandA"
|
|
echo "and the pattern to match: $pattern \
|
|
(which you can change at the top of this script)."
|
|
echo "It will also ask you for recursion depth, \
|
|
and if you want to use a referring page."
|
|
echo "Wgetter accepts the following options:"
|
|
echo ""
|
|
echo "-$help : Display this help."
|
|
echo "-$save : Save the command to a file $savePath/wget-($today) \
|
|
instead of running it."
|
|
echo "-$runn : Run saved wget commands instead of starting a new one -"
|
|
echo "Enter filename as argument to this option."
|
|
echo "-$inpu : Run saved wget commands interactively --"
|
|
echo "The script will ask you for the filename."
|
|
echo "-$cook : Change the cookies file for this session."
|
|
echo "-$list : Tell wget to use URL's from a list instead of \
|
|
from the command-line."
|
|
echo "-$wopt : Pass any other options direct to wget."
|
|
echo ""
|
|
echo "See the wget man page for additional options \
|
|
you can pass to wget."
|
|
echo ""
|
|
|
|
exit $E_USAGE # End here. Don't process anything else.
|
|
}
|
|
|
|
|
|
|
|
list_func() # Gives the user the option to use the -i option to wget,
|
|
#+ and a list of URLs.
|
|
{
|
|
while [ 1 ]; do
|
|
echo "Enter the name of the file containing URL's (press q to change
|
|
your mind)."
|
|
read urlfile
|
|
if [ ! -e "$urlfile" ] && [ "$urlfile" != q ]; then
|
|
# Look for a file, or the quit option.
|
|
echo "That file does not exist!"
|
|
elif [ "$urlfile" = q ]; then # Check quit option.
|
|
echo "Not using a url list."
|
|
return
|
|
else
|
|
echo "using $urlfile."
|
|
echo "If you gave url's on the command-line, I'll use those first."
|
|
# Report wget standard behaviour to the user.
|
|
lister=" -i $urlfile" # This is what we want to pass to wget.
|
|
return
|
|
fi
|
|
done
|
|
}
|
|
|
|
|
|
cookie_func() # Give the user the option to use a different cookie file.
|
|
{
|
|
while [ 1 ]; do
|
|
echo "Change the cookies file. Press return if you don't want to change
|
|
it."
|
|
read Cookies
|
|
# NB: this is not the same as Cookie, earlier.
|
|
# There is an 's' on the end.
|
|
# Bit like chocolate chips.
|
|
if [ -z "$Cookies" ]; then # Escape clause for wusses.
|
|
return
|
|
elif [ ! -e "$Cookies" ]; then
|
|
echo "File does not exist. Try again." # Keep em going . . .
|
|
else
|
|
CookiesON=" --load-cookies $Cookies" # File is good -- use it!
|
|
return
|
|
fi
|
|
done
|
|
}
|
|
|
|
|
|
|
|
run_func()
|
|
{
|
|
if [ -z "$OPTARG" ]; then
|
|
# Test to see if we used the in-line option or the query one.
|
|
if [ ! -d "$savePath" ]; then # If directory doesn't exist . . .
|
|
echo "$savePath does not appear to exist."
|
|
echo "Please supply path and filename of saved wget commands:"
|
|
read newFile
|
|
until [ -f "$newFile" ]; do # Keep going till we get something.
|
|
echo "Sorry, that file does not exist. Please try again."
|
|
# Try really hard to get something.
|
|
read newFile
|
|
done
|
|
|
|
|
|
# -----------------------------------------------------------------------
|
|
# if [ -z ( grep wget ${newfile} ) ]; then
|
|
# Assume they haven't got the right file and bail out.
|
|
# echo "Sorry, that file does not contain wget commands. Aborting."
|
|
# exit
|
|
# fi
|
|
#
|
|
# This is bogus code.
|
|
# It doesn't actually work.
|
|
# If anyone wants to fix it, feel free!
|
|
# -----------------------------------------------------------------------
|
|
|
|
|
|
filePath="${newFile}"
|
|
else
|
|
echo "Save path is $savePath"
|
|
echo "Please enter name of the file which you want to use."
|
|
echo "You have a choice of:"
|
|
ls $savePath # Give them a choice.
|
|
read inFile
|
|
until [ -f "$savePath/$inFile" ]; do # Keep going till
|
|
#+ we get something.
|
|
if [ ! -f "${savePath}/${inFile}" ]; then # If file doesn't exist.
|
|
echo "Sorry, that file does not exist. Please choose from:"
|
|
ls $savePath # If a mistake is made.
|
|
read inFile
|
|
fi
|
|
done
|
|
filePath="${savePath}/${inFile}" # Make one variable . . .
|
|
fi
|
|
else filePath="${savePath}/${OPTARG}" # Which can be many things . . .
|
|
fi
|
|
|
|
if [ ! -f "$filePath" ]; then # If a bogus file got through.
|
|
echo "You did not specify a suitable file."
|
|
echo "Run this script with the -${save} option first."
|
|
echo "Aborting."
|
|
exit $E_NO_SAVEFILE
|
|
fi
|
|
echo "Using: $filePath"
|
|
while read; do
|
|
eval $REPLY
|
|
echo "Completed: $REPLY"
|
|
done < $filePath # Feed the actual file we are using into a 'while' loop.
|
|
|
|
exit
|
|
}
|
|
|
|
|
|
|
|
# Fish out any options we are using for the script.
|
|
# This is based on the demo in "Learning The Bash Shell" (O'Reilly).
|
|
while getopts ":$save$cook$help$list$runn:$inpu$wopt" opt
|
|
do
|
|
case $opt in
|
|
$save) save_func;; # Save some wgetter sessions for later.
|
|
$cook) cookie_func;; # Change cookie file.
|
|
$help) usage;; # Get help.
|
|
$list) list_func;; # Allow wget to use a list of URLs.
|
|
$runn) run_func;; # Useful if you are calling wgetter from,
|
|
#+ for example, a cron script.
|
|
$inpu) run_func;; # When you don't know what your files are named.
|
|
$wopt) wopts;; # Pass options directly to wget.
|
|
\?) echo "Not a valid option."
|
|
echo "Use -${wopt} to pass options directly to wget,"
|
|
echo "or -${help} for help";; # Catch anything else.
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1)) # Do funky magic stuff with $#.
|
|
|
|
|
|
if [ -z "$1" ] && [ -z "$lister" ]; then
|
|
# We should be left with at least one URL
|
|
#+ on the command-line, unless a list is
|
|
#+ being used -- catch empty CL's.
|
|
echo "No URL's given! You must enter them on the same line as wgetter2."
|
|
echo "E.g., wgetter2 http://somesite http://anothersite."
|
|
echo "Use $help option for more information."
|
|
exit $E_NO_URLS # Bail out, with appropriate error code.
|
|
fi
|
|
|
|
URLS=" $@"
|
|
# Use this so that URL list can be changed if we stay in the option loop.
|
|
|
|
while [ 1 ]; do
|
|
# This is where we ask for the most used options.
|
|
# (Mostly unchanged from version 1 of wgetter)
|
|
if [ -z $curDepth ]; then
|
|
Current=""
|
|
else Current=" Current value is $curDepth"
|
|
fi
|
|
echo "How deep should I go? \
|
|
(integer: Default is $depthDefault.$Current)"
|
|
read Depth # Recursion -- how far should we go?
|
|
inputB="" # Reset this to blank on each pass of the loop.
|
|
echo "Enter the name of the referring page (default is none)."
|
|
read inputB # Need this for some sites.
|
|
|
|
echo "Do you want to have the output logged to the terminal"
|
|
echo "(y/n, default is yes)?"
|
|
read noHide # Otherwise wget will just log it to a file.
|
|
|
|
case $noHide in # Now you see me, now you don't.
|
|
y|Y ) hide="";;
|
|
n|N ) hide=" -b";;
|
|
* ) hide="";;
|
|
esac
|
|
|
|
if [ -z ${Depth} ]; then
|
|
# User accepted either default or current depth,
|
|
#+ in which case Depth is now empty.
|
|
if [ -z ${curDepth} ]; then
|
|
# See if a depth was set on a previous iteration.
|
|
Depth="$depthDefault"
|
|
# Set the default recursion depth if nothing
|
|
#+ else to use.
|
|
else Depth="$curDepth" # Otherwise, set the one we used before.
|
|
fi
|
|
fi
|
|
Recurse=" -l $Depth" # Set how deep we want to go.
|
|
curDepth=$Depth # Remember setting for next time.
|
|
|
|
if [ ! -z $inputB ]; then
|
|
RefA=" --referer=$inputB" # Option to use referring page.
|
|
fi
|
|
|
|
WGETTER="${CommandA}${pattern}${hide}${RefA}${Recurse}\
|
|
${CookiesON}${lister}${Woptions}${URLS}"
|
|
# Just string the whole lot together . . .
|
|
# NB: no embedded spaces.
|
|
# They are in the individual elements so that if any are empty,
|
|
#+ we don't get an extra space.
|
|
|
|
if [ -z "${CookiesON}" ] && [ "$cFlag" = "1" ] ; then
|
|
echo "Warning -- can't find cookie file"
|
|
# This should be changed,
|
|
#+ in case the user has opted to not use cookies.
|
|
fi
|
|
|
|
if [ "$Flag" = "S" ]; then
|
|
echo "$WGETTER" >> $savePath/wget-${today}
|
|
# Create a unique filename for today, or append to it if it exists.
|
|
echo "$inputB" >> $savePath/site-list-${today}
|
|
# Make a list, so it's easy to refer back to,
|
|
#+ since the whole command is a bit confusing to look at.
|
|
echo "Command saved to the file $savePath/wget-${today}"
|
|
# Tell the user.
|
|
echo "Referring page URL saved to the file$ \
|
|
savePath/site-list-${today}"
|
|
# Tell the user.
|
|
Saver=" with save option"
|
|
# Stick this somewhere, so it appears in the loop if set.
|
|
else
|
|
echo "*****************"
|
|
echo "*****Getting*****"
|
|
echo "*****************"
|
|
echo ""
|
|
echo "$WGETTER"
|
|
echo ""
|
|
echo "*****************"
|
|
eval "$WGETTER"
|
|
fi
|
|
|
|
echo ""
|
|
echo "Starting over$Saver."
|
|
echo "If you want to stop, press q."
|
|
echo "Otherwise, enter some URL's:"
|
|
# Let them go again. Tell about save option being set.
|
|
|
|
read
|
|
case $REPLY in
|
|
# Need to change this to a 'trap' clause.
|
|
q|Q ) exit $E_USER_EXIT;; # Exercise for the reader?
|
|
* ) URLS=" $REPLY";;
|
|
esac
|
|
|
|
echo ""
|
|
done
|
|
|
|
|
|
exit 0
|