From e89201cb4bbef69aa1927ef69327fe6fa3444cba Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 02:04:47 +0100
Subject: [PATCH 1/2] Added scraper functions (wikipedia,for now)

---
 functions.py         | 8 +++++---
 requirements.txt     | 2 ++
 scraper_functions.py | 9 +++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 scraper_functions.py

diff --git a/functions.py b/functions.py
index 6a2fc5c..6778156 100644
--- a/functions.py
+++ b/functions.py
@@ -1,8 +1,10 @@
+import scraper_functions as sf
+
 def processmsg(msg, rcpt):
-    if msg.startswith("!"):
-        return command(msg, rcpt)
-    elif "youtube.com/watch" in msg:
+    if "youtube.com/watch" in msg:
         return msg.replace("youtube.com", "iv.datura.network")
+    elif msg.startswith("!wiki"):
+        return sf.query_external_website("https://en.wikipedia.org/wiki/", msg.split(" ")[1])
 
 def command(msg, rcpt):
     if msg.startswith("!help"):
diff --git a/requirements.txt b/requirements.txt
index 5593b66..f8acc82 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
 slixmpp
+requests
+beautifulsoup4
diff --git a/scraper_functions.py b/scraper_functions.py
new file mode 100644
index 0000000..a6f3891
--- /dev/null
+++ b/scraper_functions.py
@@ -0,0 +1,9 @@
+import requests
+from bs4 import BeautifulSoup
+
+def query_external_website(base_url, query):
+    page = requests.get(base_url + query)
+    soup = BeautifulSoup(page.content, "html.parser")
+    title = soup.select(".mw-page-title-main")[0]
+    content = soup.find(id="bodyContent").select("p")[2].text
+    return "\nTITLE: " + title.text + "\n\n" + "CONTENT:" + "\n" + content + "\n\n" + "FULL LINK:\n" + base_url + query
-- 
2.30.2


From 834934fccd09dc27d5dfbe5aaa8ed3b029755fb2 Mon Sep 17 00:00:00 2001
From: texhno <texhno@dmz.rs>
Date: Tue, 6 Feb 2024 02:21:53 +0100
Subject: [PATCH 2/2] Added scraper function

---
 functions.py         | 1 +
 requirements.txt     | 2 ++
 scraper_functions.py | 9 +++++++++
 3 files changed, 12 insertions(+)
 create mode 100644 scraper_functions.py

diff --git a/functions.py b/functions.py
index d722e49..4452e9b 100644
--- a/functions.py
+++ b/functions.py
@@ -1,4 +1,5 @@
 import ollama
+import scraper_functions as sf
 
 def processmsg(msg, rcpt):
     if "youtube.com/watch" in msg:
diff --git a/requirements.txt b/requirements.txt
index e1e7e30..b685df2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
 slixmpp
 ollama
+requests
+beautifulsoup4
diff --git a/scraper_functions.py b/scraper_functions.py
new file mode 100644
index 0000000..2f0445a
--- /dev/null
+++ b/scraper_functions.py
@@ -0,0 +1,9 @@
+import requests
+from bs4 import BeautifulSoup
+
+def query_external_website(base_url, query):
+    page = requests.get(base_url + query)
+    soup = BeautifulSoup(page.content, "html.parser")
+    title = soup.find("span", class_="mw-page-title-main").text
+    content = soup.find(id="mw-content-text").select("p")[2].text
+    return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + query
-- 
2.30.2