인공지능 공부/남박사의 파이썬 실전

(인프런) 파이썬 실전 네이버영화평점 크롤링 하기

앨런튜링_ 2021. 5. 30. 20:56
import requests
from bs4 import BeautifulSoup

def get_movie_point(start, end=1):
    results = []
    for i in range(start, end+1):
        url = 'https://movie.naver.com/movie/point/af/list.nhn?&page={}'.format(i)
        r = requests.get(url)
        bs = BeautifulSoup(r.text, "lxml")
        trs = bs.select("table.list_netizen > tbody > tr")
        for tr in trs:  #다수의 평점
    
            number = tr.select_one("td.ac.num").text
            writer = tr.select_one("td.num > a.author").text
            tr_data = tr.select_one("td.title")
            title = tr_data.select_one("a").text


            point = tr_data.select_one("div.list_netizen_score > em").text

            # td class="title" 태그에서 a, div, br 태그 제거
            # extract() 함수는 태그와 태그의 내용까지 모두 제거
            [x.extract() for x in tr_data.select("a")]
            [x.extract() for x in tr_data.select("div")]
            [x.extract() for x in tr_data.select("br")]


            content = tr_data.text.strip()
            results.append({
                "number": number,
                "movie": title,
                "point": point,
                "writer": writer,
                "contents": content,
            })
    return results

print(get_movie_point(1,1))