Script to download hyperlinks from a url. Python3
James Mayclin
import requests
import os

Handles user input and returns it as a tuple.
def inputs():
url = input("Enter the url of the webpage that contains the links to

print("Enter desired download path.")

path = input("\"./subdirectory\" or press enter for current:")
if path == '':
path = '.'

print("Would you like to restrict the downloaded link to a certain word? e.g.
query_word = input("Enter query word or press enter:")

return url, path, query_word

Creates user specified subdirectory to download files to
def create_subdirectory(path):
if path == '.': return
if not os.path.exists(path):

Parses HTML text for hyperlinks and returns them in a list
def find_hrefs(request):
string = request.text.lower()
begin = 0
markers = []
while string.find('a href=', begin) != -1:
start = string.find('a href', begin) + len('a href=')
end = string.find('>', start)
markers.append((start, end))
begin = end
contained = [string[index[0]:index[1]].replace('\"', '') for index in markers]
return contained

Removes hyperlinks that don't contain 'query_word'
def constrain_hrefs(contained, query_word):
formats = [s for s in contained if (s.lower().find(query_word.lower()) != -1)]
return formats

Handles downloading of hyperlinks contained in 'links' at 'url'
and downloads them to 'path'
def download(links, path, url):
print("Found {} links matching criteria".format(len(links)))
for link in links:
option = input('\n(D)ownload all,(A)gree to each download, or (Q)uit? ')
if option.lower() == 'q':
check = False if option.lower() == 'd' else True
url = url[:url.rfind('/')]
for link in links:
if check:
continueResponse = input("Download " + link + "? (Y) or (N):")
if continueResponse.lower() == 'n': break
print('Requesting ', link)
full_url = url + '/' + link
r = requests.get(full_url)
print('Downloading ', link)
full_path = path + '/' + link
with open(full_path, 'wb')as f:

Run method - calls methods required for program to function
url, path, query_word = inputs()
r = requests.get(url)
all_links = find_hrefs(r)
to_download = constrain_hrefs(all_links, query_word)

download(to_download, path, url)

