fix wiki commands

2024-02-06 20:51:31 +01:00
parent 544f8052e9 161abdf32e
commit c011383f0e
2 changed files with 16 additions and 8 deletions
@@ -5,7 +5,7 @@ def processmsg(msg, rcpt):
    if "youtube.com/watch" in msg:
        return msg.replace("youtube.com", "iv.datura.network")
    elif msg.startswith("!"):
-         return command(msg)
+        return command(msg)
    elif "good bot" in msg:
        return "^_^"
@@ -21,5 +21,6 @@ def command(msg, rcpt):
        response = client.chat(model='llama2-uncensored:latest', messages=[{'role':'user','content':f'{msg[4:]}'}])
        return(response['message']['content'])
    elif msg.startswith("!wiki"):
-        return sf.query_external_website("https://en.wikipedia.org/wiki/", msg.split(" ")[1])
+        cmd, query = msg.split(" ", 1)
        return sf.query_external_website("https://en.wikipedia.org", "/wiki/" + query)
@@ -1,12 +1,19 @@
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 def query_external_website(base_url, query):
    try:
-        page = requests.get(base_url + query)
+        page = requests.get(base_url + quote(query))
        soup = BeautifulSoup(page.content, "html.parser")
-        title = soup.find("span", class_="mw-page-title-main").text
+        title = soup.find(id="firstHeading").text
-        content = soup.find(id="mw-content-text").select("p")[2].text
+        mainContentElement = soup.find(id="mw-content-text")
-        return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + query
+        if "This page is a redirect" in mainContentElement.text:
-    except:
+            redirectLink = mainContentElement.find(class_="redirectMsg").find_all("a")[0]["href"]
-        return "Can't parse search result :("
+            return query_external_website(base_url, redirectLink)
        content = next((paragraph for paragraph in mainContentElement.select("p") if not paragraph.has_attr("class")), None)
        if content == None:
            raise Exception("Can't parse search result :(")
        return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query)
    except Exception as e:
        return e