import requests from bs4 import BeautifulSoup from urllib.parse import quote def getSoup(base_url, query = ""): page = requests.get(base_url + quote(query)) soup = BeautifulSoup(page.content, "html.parser") return soup def query_external_website(base_url, query): try: soup = getSoup(base_url, query) title = soup.find(id="firstHeading").text mainContentElement = soup.find(id="mw-content-text") if "This page is a redirect" in mainContentElement.text: redirectLink = mainContentElement.find(class_="redirectMsg").find_all("a")[0]["href"] return query_external_website(base_url, redirectLink) content = next((paragraph for paragraph in mainContentElement.select("p") if not paragraph.has_attr("class")), None) if content == None: raise Exception("Can't parse search result :(") return "\nTITLE:\n" + title + "\n\nCONTENT:\n" + content.text + "\n\nFULL LINK:\n" + base_url + quote(query) except Exception as e: return e def getDmzTasks(url): try: soup = getSoup(url) tasks = soup.find_all(class_="task") result = "\nActive tasks:\n" for task in tasks: taskIndex = task.select("div")[0].text taskTitle = task.select("div")[1].text result += taskIndex + " " + taskTitle taskSoup = getSoup(url + task.find("a")["href"][1:]) description = taskSoup.find("main").select("section")[0].find("p").text result += "\n\tDescription:\n" + "\t\t" + description + "\n" result += "\tAssigned users:\n" + "\t\t" assignedUsers = taskSoup.find_all(class_="user-info-wrap") if len(assignedUsers) == 0: result += "None! Be the first :)\n" result += "\tLink: " + url + task.find("a")["href"][1:] + "\n\n" continue usersList = "" for user in assignedUsers: usersList += user.find("div").text.split(": ")[1] + ", " result += usersList[:-2] + "\n" result += "\tLink: " + url + task.find("a")["href"][1:] + "\n\n" return result except Exception as e: return e