From 161abdf32e22244ddcd314b1be3fb147e823d981 Mon Sep 17 00:00:00 2001 From: t3xhno Date: Tue, 6 Feb 2024 11:39:10 +0100 Subject: [PATCH] Better wiki redirect --- functions.py | 2 +- scraper_functions.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/functions.py b/functions.py index db89d08..b654da0 100644 --- a/functions.py +++ b/functions.py @@ -6,7 +6,7 @@ def processmsg(msg, rcpt): return msg.replace("youtube.com", "iv.datura.network") elif msg.startswith("!wiki"): cmd, query = msg.split(" ", 1) - return sf.query_external_website("https://en.wikipedia.org/wiki/", query) + return sf.query_external_website("https://en.wikipedia.org", "/wiki/" + query) elif "good bot" in msg: return "^_^" diff --git a/scraper_functions.py b/scraper_functions.py index 88d75cc..502f44f 100644 --- a/scraper_functions.py +++ b/scraper_functions.py @@ -7,9 +7,13 @@ def query_external_website(base_url, query): page = requests.get(base_url + quote(query)) soup = BeautifulSoup(page.content, "html.parser") title = soup.find(id="firstHeading").text - content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None) + mainContentElement = soup.find(id="mw-content-text") + if "This page is a redirect" in mainContentElement.text: + redirectLink = mainContentElement.find(class_="redirectMsg").find_all("a")[0]["href"] + return query_external_website(base_url, redirectLink) + content = next((paragraph for paragraph in mainContentElement.select("p") if not paragraph.has_attr("class")), None) if content == None: - raise Exception("Can't parse") + raise Exception("Can't parse search result :(") return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query) - except: - return "Can't parse search result :(" + except Exception as e: + return e