2024-02-06 01:21:53 +00:00
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
2024-02-06 01:58:45 +00:00
|
|
|
from urllib.parse import quote
|
2024-02-06 01:21:53 +00:00
|
|
|
|
|
|
|
def query_external_website(base_url, query):
|
2024-02-06 01:37:19 +00:00
|
|
|
try:
|
2024-02-06 01:58:45 +00:00
|
|
|
page = requests.get(base_url + quote(query))
|
2024-02-06 01:37:19 +00:00
|
|
|
soup = BeautifulSoup(page.content, "html.parser")
|
2024-02-06 02:46:17 +00:00
|
|
|
title = soup.find(id="firstHeading").text
|
2024-02-06 10:39:10 +00:00
|
|
|
mainContentElement = soup.find(id="mw-content-text")
|
|
|
|
if "This page is a redirect" in mainContentElement.text:
|
|
|
|
redirectLink = mainContentElement.find(class_="redirectMsg").find_all("a")[0]["href"]
|
|
|
|
return query_external_website(base_url, redirectLink)
|
|
|
|
content = next((paragraph for paragraph in mainContentElement.select("p") if not paragraph.has_attr("class")), None)
|
2024-02-06 02:37:26 +00:00
|
|
|
if content == None:
|
2024-02-06 10:39:10 +00:00
|
|
|
raise Exception("Can't parse search result :(")
|
2024-02-06 02:34:43 +00:00
|
|
|
return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query)
|
2024-02-06 10:39:10 +00:00
|
|
|
except Exception as e:
|
|
|
|
return e
|
2024-02-06 20:17:49 +00:00
|
|
|
|
2024-02-06 20:24:04 +00:00
|
|
|
def getDmzTasks(url):
|
2024-02-06 20:17:49 +00:00
|
|
|
try:
|
2024-02-06 20:24:04 +00:00
|
|
|
page = requests.get(url)
|
2024-02-06 20:17:49 +00:00
|
|
|
soup = BeautifulSoup(page.content, "html.parser")
|
|
|
|
tasks = soup.find_all(class_="task")
|
|
|
|
result = "\nActive tasks:\n"
|
|
|
|
for task in tasks:
|
|
|
|
taskIndex = task.select("div")[0].text
|
|
|
|
taskTitle = task.select("div")[1].text
|
2024-02-06 20:27:55 +00:00
|
|
|
result += taskIndex + " " + taskTitle + " | " + "Link: " + url + task.find("a")["href"][1:] + "\n"
|
2024-02-06 20:17:49 +00:00
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
|
|
return e
|