aboutsummaryrefslogtreecommitdiff
path: root/sjdbmk/legacy_wikipedia.py
diff options
context:
space:
mode:
Diffstat (limited to 'sjdbmk/legacy_wikipedia.py')
-rw-r--r--sjdbmk/legacy_wikipedia.py63
1 files changed, 56 insertions, 7 deletions
diff --git a/sjdbmk/legacy_wikipedia.py b/sjdbmk/legacy_wikipedia.py
index 2d66b20..c2f60a1 100644
--- a/sjdbmk/legacy_wikipedia.py
+++ b/sjdbmk/legacy_wikipedia.py
@@ -62,7 +62,21 @@ def get_on_this_day_zh() -> None:
li_element.append(event)
ul_element.append(li_element)
- result = str(p_element).replace("/wiki", "https://zh.wikipedia.org/zh-cn").replace('<span class="otd-year">', "<b>").replace("</span>:", ":</b>") + str(ul_element).replace("/wiki", "https://zh.wikipedia.org/zh-cn").replace("</dt><dd>", " – ").replace('<div class="event">\n<dt>', "").replace("</dd>\n</div>", "")
+ result = str(p_element).replace(
+ "/wiki", "https://zh.wikipedia.org/zh-cn"
+ ).replace('<span class="otd-year">', "<b>").replace(
+ "</span>:", ":</b>"
+ ) + str(
+ ul_element
+ ).replace(
+ "/wiki", "https://zh.wikipedia.org/zh-cn"
+ ).replace(
+ "</dt><dd>", " – "
+ ).replace(
+ '<div class="event">\n<dt>', ""
+ ).replace(
+ "</dd>\n</div>", ""
+ )
result = re.sub(r"<small>.*?图.*?</small>", "", result)
with open("otd_zh-" + formatted_time_yearless + ".html", "w") as file:
@@ -91,7 +105,9 @@ def get_on_this_day_en() -> None:
month = months[index]
day = 1
- url = "https://en.m.wikipedia.org/wiki/Wikipedia:Selected_anniversaries/" + month
+ url = (
+ "https://en.m.wikipedia.org/wiki/Wikipedia:Selected_anniversaries/" + month
+ )
response = requests.get(url, timeout=15)
html = response.text
soup = bs4.BeautifulSoup(html, "html.parser")
@@ -119,7 +135,16 @@ def get_on_this_day_en() -> None:
for li in li_contents:
p_element_2.append(li)
- result = str(p_element).replace("/wiki", "https://en.wikipedia.org/wiki") + str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki") + "\n" + str(p_element_2).replace("</li><li>", "; ").replace("<li>", "<b>Births and Deaths: </b>").replace("</li>", "").replace("/wiki", "https://en.wikipedia.org/wiki")
+ result = (
+ str(p_element).replace("/wiki", "https://en.wikipedia.org/wiki")
+ + str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki")
+ + "\n"
+ + str(p_element_2)
+ .replace("</li><li>", "; ")
+ .replace("<li>", "<b>Births and Deaths: </b>")
+ .replace("</li>", "")
+ .replace("/wiki", "https://en.wikipedia.org/wiki")
+ )
result = re.sub(r" <i>.*?icture.*?</i>", "", result)
with open("otd_en-" + formatted_time_yearless + ".html", "w") as file:
@@ -175,7 +200,23 @@ def get_in_the_news_en() -> str:
else:
p_element_3.append(li)
- result = str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki") + str(p_element_2).replace("</li><li>", "; ").replace("<li>", "<b>Ongoing: </b>").replace("</li>", "").replace("\n;", ";").replace("/wiki", "https://en.wikipedia.org/wiki").replace("</p>", "<br>") + str(p_element_3).replace("</li><li>", "; ").replace("<li>", "<b>Recent deaths: </b>").replace("</li>", "").replace("\n;", ";").replace("/wiki", "https://en.wikipedia.org/wiki").replace("<p>", "")
+ result = (
+ str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki")
+ + str(p_element_2)
+ .replace("</li><li>", "; ")
+ .replace("<li>", "<b>Ongoing: </b>")
+ .replace("</li>", "")
+ .replace("\n;", ";")
+ .replace("/wiki", "https://en.wikipedia.org/wiki")
+ .replace("</p>", "<br>")
+ + str(p_element_3)
+ .replace("</li><li>", "; ")
+ .replace("<li>", "<b>Recent deaths: </b>")
+ .replace("</li>", "")
+ .replace("\n;", ";")
+ .replace("/wiki", "https://en.wikipedia.org/wiki")
+ .replace("<p>", "")
+ )
result = re.sub(r" <i>\(.*?\)</i>", "", result)
return result
@@ -214,7 +255,11 @@ def get_in_the_news_zh() -> str:
"",
)
.replace("/wiki", "https://zh.wikipedia.org/zh-cn")
- + str(p_element_3).replace('<span class="hlist inline">', "<b>最近逝世:</b>").replace("</span>", "").replace("-", ";").replace("/wiki", "https://zh.wikipedia.org/zh-cn")
+ + str(p_element_3)
+ .replace('<span class="hlist inline">', "<b>最近逝世:</b>")
+ .replace("</span>", "")
+ .replace("-", ";")
+ .replace("/wiki", "https://zh.wikipedia.org/zh-cn")
).replace("</p><p>", "<br>")
result = re.sub(r"<small.*?>.*?</small>", "", result)
@@ -222,8 +267,12 @@ def get_in_the_news_zh() -> str:
def main() -> None:
- parser = argparse.ArgumentParser(description="Legacy Wikipedia script for the Daily Bulletin")
- parser.add_argument("--config", default="config.ini", help="path to the configuration file")
+ parser = argparse.ArgumentParser(
+ description="Legacy Wikipedia script for the Daily Bulletin"
+ )
+ parser.add_argument(
+ "--config", default="config.ini", help="path to the configuration file"
+ )
args = parser.parse_args()
config = configparser.ConfigParser()