Sei sulla pagina 1di 2

#!

/usr/bin/ruby
# This was a quick hack to download Facebook URLs from
# http://www.facebook.com/directory
#
# @author Ron Bowes
# @date 2010-07-11
require 'net/http'
require 'uri'
File.open("input.txt", "r") do |infile|
while (path = infile.gets) do
if(path =~ /directory/) then
attempts = 0
while true do
begin
count = 0
$stderr.puts("PATH: " + path)
url = URI.parse(path)
res = Net::HTTP.start(url.host, url.port
) {|http|
http.get(url.path, {"User-agent"
=>"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"})
}
links = res.body.scan(/UIDirectoryBox_It
em.*?a href="(.*?)"/)
links.each { |link|
link = link.shift
puts link
count = count + 1
}
$stderr.puts("Found %d links!" % count)
if(count == 0) then
attempts = attempts + 1
if(attempts > 3)
$stderr.puts("Giving up!
")
break
else
$stderr.puts("Found no l
inks, trying again (%d retries left)!" % (3 - attempts))
end
sleep(10)
else
break
end
rescue Exception
$stderr.puts("ERROR: " + $!)
sleep(30)
end
end
else
$stderr.puts("Skipping: " + path)
puts(path)
end
end
end

Potrebbero piacerti anche