Added scraper functions (wikipedia,for now)

This commit is contained in:
texhno 2024-02-06 02:04:47 +01:00
parent a8b9850be5
commit e89201cb4b
3 changed files with 16 additions and 3 deletions

View File

@ -1,8 +1,10 @@
import scraper_functions as sf
def processmsg(msg, rcpt): def processmsg(msg, rcpt):
if msg.startswith("!"): if "youtube.com/watch" in msg:
return command(msg, rcpt)
elif "youtube.com/watch" in msg:
return msg.replace("youtube.com", "iv.datura.network") return msg.replace("youtube.com", "iv.datura.network")
elif msg.startswith("!wiki"):
return sf.query_external_website("https://en.wikipedia.org/wiki/", msg.split(" ")[1])
def command(msg, rcpt): def command(msg, rcpt):
if msg.startswith("!help"): if msg.startswith("!help"):

View File

@ -1 +1,3 @@
slixmpp slixmpp
requests
beautifulsoup4

9
scraper_functions.py Normal file
View File

@ -0,0 +1,9 @@
import requests
from bs4 import BeautifulSoup
def query_external_website(base_url, query):
page = requests.get(base_url + query)
soup = BeautifulSoup(page.content, "html.parser")
title = soup.select(".mw-page-title-main")[0]
content = soup.find(id="bodyContent").select("p")[2].text
return "\nTITLE: " + title.text + "\n\n" + "CONTENT:" + "\n" + content + "\n\n" + "FULL LINK:\n" + base_url + query