# -*- coding: cp936 -*- import urllib2 from bs4 import BeautifulSoup import csv bcsfootball = urllib2.urlopen('http://www.bcsfootball.org/') html = bcsfootball.read() #source code of the website soup = BeautifulSoup(html) #represents the code as a nested data structure f = csv.writer(open("bcsfootball.csv", "wb")) f.writerow(["Rank", "Name"]) for row in soup('table', {'class': 'mod-data'})[0].tbody('tr'): tds = row('td') rank = tds[0].text name = tds[1].text f.writerow([rank, name]) # extracting all the URLs found within a page¡¯s tags # for link in soup.find_all('a'): # print(link.get('href')) # extracting all the text from a page # print(soup.get_text())