Added scraper function

This commit is contained in:
texhno 2024-02-06 02:21:53 +01:00
parent 252a7fbdba
commit 834934fccd
3 changed files with 12 additions and 0 deletions

View File

@ -1,4 +1,5 @@
import ollama import ollama
import scraper_functions as sf
def processmsg(msg, rcpt): def processmsg(msg, rcpt):
if "youtube.com/watch" in msg: if "youtube.com/watch" in msg:

View File

@ -1,2 +1,4 @@
slixmpp slixmpp
ollama ollama
requests
beautifulsoup4

9
scraper_functions.py Normal file
View File

@ -0,0 +1,9 @@
import requests
from bs4 import BeautifulSoup
def query_external_website(base_url, query):
page = requests.get(base_url + query)
soup = BeautifulSoup(page.content, "html.parser")
title = soup.find("span", class_="mw-page-title-main").text
content = soup.find(id="mw-content-text").select("p")[2].text
return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + query