인공지능 공부/남박사의 파이썬 실전
(인프런) 파이썬 실전 네이버영화평점 크롤링 하기
앨런튜링_
2021. 5. 30. 20:56
import requests
from bs4 import BeautifulSoup
def get_movie_point(start, end=1):
results = []
for i in range(start, end+1):
url = 'https://movie.naver.com/movie/point/af/list.nhn?&page={}'.format(i)
r = requests.get(url)
bs = BeautifulSoup(r.text, "lxml")
trs = bs.select("table.list_netizen > tbody > tr")
for tr in trs: #다수의 평점
number = tr.select_one("td.ac.num").text
writer = tr.select_one("td.num > a.author").text
tr_data = tr.select_one("td.title")
title = tr_data.select_one("a").text
point = tr_data.select_one("div.list_netizen_score > em").text
# td class="title" 태그에서 a, div, br 태그 제거
# extract() 함수는 태그와 태그의 내용까지 모두 제거
[x.extract() for x in tr_data.select("a")]
[x.extract() for x in tr_data.select("div")]
[x.extract() for x in tr_data.select("br")]
content = tr_data.text.strip()
results.append({
"number": number,
"movie": title,
"point": point,
"writer": writer,
"contents": content,
})
return results
print(get_movie_point(1,1))