From a256bc277d0fbd0ea947ef280fe76adcc2216bf7 Mon Sep 17 00:00:00 2001 From: texhno Date: Tue, 6 Feb 2024 03:46:17 +0100 Subject: [PATCH] Better heading selector --- scraper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scraper_functions.py b/scraper_functions.py index d1061b6..88d75cc 100644 --- a/scraper_functions.py +++ b/scraper_functions.py @@ -6,7 +6,7 @@ def query_external_website(base_url, query): try: page = requests.get(base_url + quote(query)) soup = BeautifulSoup(page.content, "html.parser") - title = soup.find("span", class_="mw-page-title-main").text + title = soup.find(id="firstHeading").text content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None) if content == None: raise Exception("Can't parse")