인공지능 공부/남박사의 파이썬 실전

( 인프런) 파이썬 실전 마그넷 크롤링 해서 자동 검색기 만들기

앨런튜링_ 2021. 6. 6. 18:36
import re
import requests
from bs4 import BeautifulSoup
import re

results = []
def search_google(keyword, start_page, end_page = None):
    url = "https://www.google.com/search?q={0}+magnet%3A%3Fxt%3D&oq={0}+magnet%3A%3Fxt%3D".format(keyword)

    #user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36
    header = {"user-agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"}
    r = requests.get(url, headers=header)
    bs = BeautifulSoup(r.text, "lxml")
    links = bs.select("#rso > div > div > div > div.yuRUbf > a")

    if end_page is None:
        counts = bs.select("div#result-stats")[0].text.replace("검색결과 약","").replace("개","").replace(",","").split("(")[0].strip()
        end_page = int(int(counts)/10)
        if end_page >20:
            end_page=20

    for a in links:
        href = a['href']
        text = a.select("h3")
        if len(text) <=0:
            continue   
        title = text[0].text
        try:
            r = requests.get(href)
            bs = BeautifulSoup(r.text, "lxml")
            magnets = bs.find_all("a", href=re.compile(r'magnet:\?xt=*'))

            if len(magnets) >0 :
                magnet = magnets[0]["href"]
                results.append((title, magnet))
        except:
            pass
    if start_page<end_page:
            start_page += 10
            results.extend(search_google(keyword, start_page, end_page=end_page))
   
    return results
results = search_google("리눅스",0)

for r in results:
    print(r)