Better heading selector

This commit is contained in:
texhno 2024-02-06 03:46:17 +01:00
parent d4d14806db
commit a256bc277d

View File

@ -6,7 +6,7 @@ def query_external_website(base_url, query):
try: try:
page = requests.get(base_url + quote(query)) page = requests.get(base_url + quote(query))
soup = BeautifulSoup(page.content, "html.parser") soup = BeautifulSoup(page.content, "html.parser")
title = soup.find("span", class_="mw-page-title-main").text title = soup.find(id="firstHeading").text
content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None) content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
if content == None: if content == None:
raise Exception("Can't parse") raise Exception("Can't parse")