From 6df3c82a7ee1fdf5c6f5d95512f04bee92221fbe Mon Sep 17 00:00:00 2001 From: texhno Date: Tue, 6 Feb 2024 03:34:43 +0100 Subject: [PATCH] Get first available paragraph from query --- scraper_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scraper_functions.py b/scraper_functions.py index 831366d..42178c7 100644 --- a/scraper_functions.py +++ b/scraper_functions.py @@ -7,7 +7,7 @@ def query_external_website(base_url, query): page = requests.get(base_url + quote(query)) soup = BeautifulSoup(page.content, "html.parser") title = soup.find("span", class_="mw-page-title-main").text - content = soup.find(id="mw-content-text").select("p")[2].text - return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + quote(query) + content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None) + return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query) except: return "Can't parse search result :("