diff --git a/functions.py b/functions.py index d722e49..4452e9b 100644 --- a/functions.py +++ b/functions.py @@ -1,4 +1,5 @@ import ollama +import scraper_functions as sf def processmsg(msg, rcpt): if "youtube.com/watch" in msg: diff --git a/requirements.txt b/requirements.txt index e1e7e30..b685df2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ slixmpp ollama +requests +beautifulsoup4 diff --git a/scraper_functions.py b/scraper_functions.py new file mode 100644 index 0000000..2f0445a --- /dev/null +++ b/scraper_functions.py @@ -0,0 +1,9 @@ +import requests +from bs4 import BeautifulSoup + +def query_external_website(base_url, query): + page = requests.get(base_url + query) + soup = BeautifulSoup(page.content, "html.parser") + title = soup.find("span", class_="mw-page-title-main").text + content = soup.find(id="mw-content-text").select("p")[2].text + return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + query