Better heading selector
This commit is contained in:
parent
d4d14806db
commit
a256bc277d
@ -6,7 +6,7 @@ def query_external_website(base_url, query):
|
|||||||
try:
|
try:
|
||||||
page = requests.get(base_url + quote(query))
|
page = requests.get(base_url + quote(query))
|
||||||
soup = BeautifulSoup(page.content, "html.parser")
|
soup = BeautifulSoup(page.content, "html.parser")
|
||||||
title = soup.find("span", class_="mw-page-title-main").text
|
title = soup.find(id="firstHeading").text
|
||||||
content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
|
content = next((paragraph for paragraph in soup.find(id="mw-content-text").select("p") if not paragraph.has_attr("class")), None)
|
||||||
if content == None:
|
if content == None:
|
||||||
raise Exception("Can't parse")
|
raise Exception("Can't parse")
|
||||||
|
Loading…
Reference in New Issue
Block a user