import urllib2 from bs4 import BeautifulSoup import csv boxoffice = urllib2.urlopen('http://en.wikipedia.org/wiki/List_of_2014_box_office_number-one_films_in_the_United_States') html = boxoffice.read() #source code of the website soup = BeautifulSoup(html) #represents the code as a nested data structure for row in soup('table', {'class': 'wikitable sortable'})[0].find_all('tr'): tds = row('td') if tds==[]: print 'n/a' else: rank=tds[0].text film=tds[2].text gross=tds[3].text print rank, film, gross