from re import S
import requests
from bs4 import BeautifulSoup
def get_search_naver_blog(query, start_page=1, end_page=None):
#11 = (2-1)*10+1
#21 = (3-1)*10+1
start = (start_page-1)*10+1
query = "파이썬강좌"
url = "https://search.naver.com/search.naver?where=view&query={}&start={}".format(query, start)
r= requests.get(url)
bs= BeautifulSoup(r.text, "lxml")
lis = bs.select("li.sh_blog_top")
# 41-50 / 6,670건 =>6670
if end_page is None:
tot_counts = bs.select("span.title_num")[0].text
##6,670건
tot_counts = tot_counts.split("/")
tot_counts= int(tot_counts.replace("건", "").replace(",", "").strip())
end_page = tot_counts/10
if end_page > 900:
end_page = 900
for i in lis:
try:
thumnail = li.select("img")[0]["src"]
title = li.select("dl > dt > a")[0]
summery = li.select("dl > dd.sh_blog_passage")[0].text
title_link = title["href"]
title_text = title.text
result.append((thumnail, title_text, title_link, summery))
except:
continue
if start_page < end_page:
start_page +=1
result.extend(get_search_naver_blog(query, start_page=start_page, end_page=end_page))
return result
results = get_search_naver_blog("파이썬강좌", start_page=1, end_page=3)
for result in results:
print(result)