From 4ca01a868d0d2c68f89e3ba7e5feb9805f10655b Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 02:49:40 +0100
Subject: [PATCH 1/6] Multiword wiki query added

---
 functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/functions.py b/functions.py
index 571458e..a596c7f 100644
--- a/functions.py
+++ b/functions.py
@@ -5,7 +5,8 @@ def processmsg(msg, rcpt):
     if "youtube.com/watch" in msg:
         return msg.replace("youtube.com", "iv.datura.network")
     elif msg.startswith("!wiki"):
-        return sf.query_external_website("https://en.wikipedia.org/wiki/", msg.split(" ")[1])
+        cmd, query = msg.split(" ", 1)
+        return sf.query_external_website("https://en.wikipedia.org/wiki/", query)
     elif "good bot" in msg:
         return "^_^"
 

From 41e38ef80fabf6e3971e2cae3401ec2c3ea667a0 Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 02:58:45 +0100
Subject: [PATCH 2/6] Correct url link from multiword wiki query

---
 scraper_functions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scraper_functions.py b/scraper_functions.py
index 5b9cd9f..831366d 100644
--- a/scraper_functions.py
+++ b/scraper_functions.py
@@ -1,12 +1,13 @@
 import requests
 from bs4 import BeautifulSoup
+from urllib.parse import quote
 
 def query_external_website(base_url, query):
     try:
-        page = requests.get(base_url + query)
+        page = requests.get(base_url + quote(query))
         soup = BeautifulSoup(page.content, "html.parser")
         title = soup.find("span", class_="mw-page-title-main").text
         content = soup.find(id="mw-content-text").select("p")[2].text
-        return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + query
+        return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + quote(query)
     except:
         return "Can't parse search result :("

From 6df3c82a7ee1fdf5c6f5d95512f04bee92221fbe Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 03:34:43 +0100
Subject: [PATCH 3/6] Get first available paragraph from query

---
 scraper_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scraper_functions.py b/scraper_functions.py
index 831366d..42178c7 100644
--- a/scraper_functions.py
+++ b/scraper_functions.py
@@ -7,7 +7,7 @@ def query_external_website(base_url, query):
         page = requests.get(base_url + quote(query))
         soup = BeautifulSoup(page.content, "html.parser")
         title = soup.find("span", class_="mw-page-title-main").text
-        content = soup.find(id="mw-content-text").select("p")[2].text
-        return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + quote(query)
+        content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
+        return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query)
     except:
         return "Can't parse search result :("

From d4d14806db20f72abeb9ba5110e428d8440aee1a Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 03:37:26 +0100
Subject: [PATCH 4/6] Raise exception on None content

---
 scraper_functions.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scraper_functions.py b/scraper_functions.py
index 42178c7..d1061b6 100644
--- a/scraper_functions.py
+++ b/scraper_functions.py
@@ -8,6 +8,8 @@ def query_external_website(base_url, query):
         soup = BeautifulSoup(page.content, "html.parser")
         title = soup.find("span", class_="mw-page-title-main").text
         content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
+        if content == None:
+            raise Exception("Can't parse")
         return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query)
     except:
         return "Can't parse search result :("

From a256bc277d0fbd0ea947ef280fe76adcc2216bf7 Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 03:46:17 +0100
Subject: [PATCH 5/6] Better heading selector

---
 scraper_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scraper_functions.py b/scraper_functions.py
index d1061b6..88d75cc 100644
--- a/scraper_functions.py
+++ b/scraper_functions.py
@@ -6,7 +6,7 @@ def query_external_website(base_url, query):
     try:
         page = requests.get(base_url + quote(query))
         soup = BeautifulSoup(page.content, "html.parser")
-        title = soup.find("span", class_="mw-page-title-main").text
+        title = soup.find(id="firstHeading").text
         content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
         if content == None:
             raise Exception("Can't parse")

From 161abdf32e22244ddcd314b1be3fb147e823d981 Mon Sep 17 00:00:00 2001
From: t3xhno <t3xhnolainx@gmail.com>
Date: Tue, 6 Feb 2024 11:39:10 +0100
Subject: [PATCH 6/6] Better wiki redirect

---
 functions.py         |  2 +-
 scraper_functions.py | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/functions.py b/functions.py
index db89d08..b654da0 100644
--- a/functions.py
+++ b/functions.py
@@ -6,7 +6,7 @@ def processmsg(msg, rcpt):
         return msg.replace("youtube.com", "iv.datura.network")
     elif msg.startswith("!wiki"):
         cmd, query = msg.split(" ", 1)
-        return sf.query_external_website("https://en.wikipedia.org/wiki/", query)
+        return sf.query_external_website("https://en.wikipedia.org", "/wiki/" + query)
     elif "good bot" in msg:
         return "^_^"
 
diff --git a/scraper_functions.py b/scraper_functions.py
index 88d75cc..502f44f 100644
--- a/scraper_functions.py
+++ b/scraper_functions.py
@@ -7,9 +7,13 @@ def query_external_website(base_url, query):
         page = requests.get(base_url + quote(query))
         soup = BeautifulSoup(page.content, "html.parser")
         title = soup.find(id="firstHeading").text
-        content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
+        mainContentElement = soup.find(id="mw-content-text")
+        if "This page is a redirect" in mainContentElement.text:
+            redirectLink = mainContentElement.find(class_="redirectMsg").find_all("a")[0]["href"]
+            return query_external_website(base_url, redirectLink)
+        content = next((paragraph for paragraph in mainContentElement.select("p") if not paragraph.has_attr("class")), None)
         if content == None:
-            raise Exception("Can't parse")
+            raise Exception("Can't parse search result :(")
         return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query)
-    except:
-        return "Can't parse search result :("
+    except Exception as e:
+        return e