diff --git a/scraper_functions.py b/scraper_functions.py index 7eb08a6..4236830 100644 --- a/scraper_functions.py +++ b/scraper_functions.py @@ -2,10 +2,14 @@ import requests from bs4 import BeautifulSoup from urllib.parse import quote +def getSoup(base_url, query = ""): + page = requests.get(base_url + quote(query)) + soup = BeautifulSoup(page.content, "html.parser") + return soup + def query_external_website(base_url, query): try: - page = requests.get(base_url + quote(query)) - soup = BeautifulSoup(page.content, "html.parser") + soup = getSoup(base_url, query) title = soup.find(id="firstHeading").text mainContentElement = soup.find(id="mw-content-text") if "This page is a redirect" in mainContentElement.text: @@ -20,14 +24,26 @@ def query_external_website(base_url, query): def getDmzTasks(url): try: - page = requests.get(url) - soup = BeautifulSoup(page.content, "html.parser") + soup = getSoup(url) tasks = soup.find_all(class_="task") result = "\nActive tasks:\n" for task in tasks: taskIndex = task.select("div")[0].text taskTitle = task.select("div")[1].text - result += taskIndex + " " + taskTitle + " | " + "Link: " + url + task.find("a")["href"][1:] + "\n" + result += taskIndex + " " + taskTitle + taskSoup = getSoup(url + task.find("a")["href"][1:]) + description = taskSoup.find("main").select("section")[0].find("p").text + result += "\n\tDescription:\n" + "\t\t" + description + "\n" + result += "\tAssigned users:\n" + "\t\t" + assignedUsers = taskSoup.find_all(class_="user-info-wrap") + if len(assignedUsers) == 0: + result += "None! Be the first :)\n" + result += "\tLink: " + url + task.find("a")["href"][1:] + "\n\n" + continue + usersList = "" + for user in assignedUsers: + usersList += user.find("div").text.split(": ")[1] + ", " + result += usersList[:-2] + "\n\n" return result except Exception as e: return e