1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| from bs4 import BeautifulSoup import requests import pymysql
count = 0 FILE_PATH = '豆瓣电影TOP250.txt' USERNAME = 'root' PASSWORD = '123456' DATABASE = 'ForPy'
def getMovieList(): global count resp = requests.get('https://movie.douban.com/top250?start=' + str(count)) bs = BeautifulSoup(resp.text, 'html.parser') for item in bs.find_all('div', {'class': 'item'}): title = item.find('span', {'class': 'title'}).get_text() rating_num = item.find('span', {'class': 'rating_num'}).get_text() img = item.find('img')['src'] tag = item.find('span', {'class': 'inq'}) if tag is not None: desc = tag.get_text() print('标题:', title, ' 评分:', rating_num, ' 描述:', desc, ' image_url:', img) insert(title, rating_num, desc, img) count += 25 if count >= 255: print('获取完毕') else: getMovieList()
def insert(title, rating_num, desc, image_url): cursor = db.cursor() sql = "INSERT INTO `doubanTop250` VALUES (\"%s\",\"%s\",\"%s\",\"%s\")" % \ (title, rating_num, desc, image_url) print(sql) result = cursor.execute(sql) print("affect rows: " + str(result)) db.commit()
db = pymysql.connect('localhost', USERNAME, PASSWORD, DATABASE)
db.set_charset('utf8')
getMovieList() db.close()
|