forked from Decentrala/chatbot
Added scraper function
This commit is contained in:
parent
252a7fbdba
commit
834934fccd
@ -1,4 +1,5 @@
|
|||||||
import ollama
|
import ollama
|
||||||
|
import scraper_functions as sf
|
||||||
|
|
||||||
def processmsg(msg, rcpt):
|
def processmsg(msg, rcpt):
|
||||||
if "youtube.com/watch" in msg:
|
if "youtube.com/watch" in msg:
|
||||||
|
@ -1,2 +1,4 @@
|
|||||||
slixmpp
|
slixmpp
|
||||||
ollama
|
ollama
|
||||||
|
requests
|
||||||
|
beautifulsoup4
|
||||||
|
9
scraper_functions.py
Normal file
9
scraper_functions.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def query_external_website(base_url, query):
|
||||||
|
page = requests.get(base_url + query)
|
||||||
|
soup = BeautifulSoup(page.content, "html.parser")
|
||||||
|
title = soup.find("span", class_="mw-page-title-main").text
|
||||||
|
content = soup.find(id="mw-content-text").select("p")[2].text
|
||||||
|
return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content + "\n\nFULL LINK:\n" + base_url + query
|
Loading…
Reference in New Issue
Block a user