diff options
Diffstat (limited to 'sjdbmk')
-rw-r--r-- | sjdbmk/inspire_approve.py | 11 | ||||
-rw-r--r-- | sjdbmk/inspire_dl.py | 103 | ||||
-rw-r--r-- | sjdbmk/legacy_wikipedia.py | 248 | ||||
-rw-r--r-- | sjdbmk/sendmail.py | 240 | ||||
-rw-r--r-- | sjdbmk/serve.py | 91 |
5 files changed, 693 insertions, 0 deletions
diff --git a/sjdbmk/inspire_approve.py b/sjdbmk/inspire_approve.py new file mode 100644 index 0000000..98d202c --- /dev/null +++ b/sjdbmk/inspire_approve.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +from __future__ import annotations +import json +import sys + +for fn in sys.argv[1:]: + with open(fn, "r+") as fd: + jq = json.load(fd) + jq["approved"] = True + json.dump(jq, fd, indent="\t") diff --git a/sjdbmk/inspire_dl.py b/sjdbmk/inspire_dl.py new file mode 100644 index 0000000..94fd994 --- /dev/null +++ b/sjdbmk/inspire_dl.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# +# Daily script to prepare the YK Pao School Daily Bulletin's JSON data files +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# TODO: Check for potential filename injections +# + +from __future__ import annotations +from configparser import ConfigParser +import json +import argparse +import logging +import os +import shutil +import requests + +logger = logging.getLogger(__name__) + + +def main() -> None: + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Download Daily Inspirations") + # parser.add_argument("--changeme", default=None, help="changeme") + parser.add_argument("--config", default="config.ini", help="path to the configuration file") + args = parser.parse_args() + + config = ConfigParser() + config.read(args.config) + + build_path = config["general"]["build_path"] + os.chdir(build_path) + + api_base = config["web_service"]["api_base"].rstrip("/") + "/" + token = config["web_service"]["token"].strip() + + response_json = requests.get( + api_base + "rs", + headers={"Authorization": "Bearer %s" % token}, + timeout=20, + ).json() + assert isinstance(response_json, list) + remote_submission_list = set(response_json) + + local_submission_list = set([sn.lstrip("inspire-") for sn in os.listdir() if sn.startswith("inspire-")]) + to_fetch = remote_submission_list - local_submission_list + if to_fetch: + logger.info("Going to fetch: %s" % ", ".join(to_fetch)) + else: + logger.info("Nothing to fetch") + for sn in to_fetch: + logger.info("Fetching: %s" % sn) + with requests.get( + api_base + "rs/" + sn, + headers={ + "Authorization": "Bearer %s" % token, + "Accept-Encoding": "identity", + }, + stream=True, + timeout=20, + ) as r: + try: + sub = json.load(r.raw) + except json.decoder.JSONDecodeError: + logger.error("inspire-%s is broken, skipping" % sn) + sub["used"] = False + sub["approved"] = False + with open("inspire-%s" % os.path.basename(sn), "w", encoding="utf-8") as fd: + json.dump(sub, fd, indent="\t") + if not sub["file"]: + logger.info("No attachment") + else: + logger.info("Attachment noticed") + with requests.get( + api_base + "rf/" + os.path.basename(sub["file"]), + headers={ + "Authorization": "Bearer %s" % token, + "Accept-Encoding": "identity", + }, + stream=True, + timeout=20, + ) as r: + with open("inspattach-%s" % os.path.basename(sub["file"]), "wb") as fd: + logger.info("Saved to inspattach-%s" % os.path.basename(sub["file"])) + shutil.copyfileobj(r.raw, fd) + fd.flush() + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/legacy_wikipedia.py b/sjdbmk/legacy_wikipedia.py new file mode 100644 index 0000000..2d66b20 --- /dev/null +++ b/sjdbmk/legacy_wikipedia.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# +# Legacy Daily Bulletin components that need to be replaced +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# Copyright (C) 2023-2024 Albert Tan <albert-tan@qq.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +import re +import os +import copy +import datetime +import logging +import argparse +import configparser + +import requests +import bs4 + +logger = logging.getLogger(__name__) + + +def get_on_this_day_zh() -> None: + months = list(map(lambda x: str(x) + "月", range(1, 13))) + + for index in range(12): + + month = months[index] + day = 1 + + url = "https://zh.m.wikipedia.org/zh-cn/Wikipedia:历史上的今天/" + month + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + div_elements = soup.find_all("div", class_="selected-anniversary") + + for div_element in div_elements: + + datetime_time = datetime.datetime(2000, index + 1, day) + formatted_time_yearless = datetime_time.strftime("%m-%d") + + p_element = div_element.find("p") + dl_element = div_element.find("dl") + event_elements = dl_element.find_all("div", class_="event") + ul_element = soup.new_tag("ul") + + for event in event_elements: + li_element = soup.new_tag("li") + li_element.append(event) + ul_element.append(li_element) + + result = str(p_element).replace("/wiki", "https://zh.wikipedia.org/zh-cn").replace('<span class="otd-year">', "<b>").replace("</span>:", ":</b>") + str(ul_element).replace("/wiki", "https://zh.wikipedia.org/zh-cn").replace("</dt><dd>", " – ").replace('<div class="event">\n<dt>', "").replace("</dd>\n</div>", "") + result = re.sub(r"<small>.*?图.*?</small>", "", result) + + with open("otd_zh-" + formatted_time_yearless + ".html", "w") as file: + file.write(result) + file.close() + day += 1 + + +def get_on_this_day_en() -> None: + months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + + for index in range(12): + + month = months[index] + day = 1 + url = "https://en.m.wikipedia.org/wiki/Wikipedia:Selected_anniversaries/" + month + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + p_elements = soup.find_all("p") + + for p_element in p_elements: + + try: + datetime_time = datetime.datetime(2000, index + 1, day) + formatted_time_yearless = datetime_time.strftime("%m-%d") + except ValueError: + break + + if not re.search( + f'<p><b><a href="/wiki/{month}_\\d+" title="{month} \\d+">{month} \\d+</a></b', + str(p_element), + ): + continue + div_element = p_element.find_next("div") + ul_element = div_element.find_next_sibling("ul") + ul_element_2 = ul_element.find_next("ul") + p_element_2 = soup.new_tag("p") + li_contents = list(ul_element_2.find_all("li")) + + for li in li_contents: + p_element_2.append(li) + + result = str(p_element).replace("/wiki", "https://en.wikipedia.org/wiki") + str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki") + "\n" + str(p_element_2).replace("</li><li>", "; ").replace("<li>", "<b>Births and Deaths: </b>").replace("</li>", "").replace("/wiki", "https://en.wikipedia.org/wiki") + result = re.sub(r" <i>.*?icture.*?</i>", "", result) + + with open("otd_en-" + formatted_time_yearless + ".html", "w") as file: + file.write(result) + file.close() + day += 1 + + +def get_in_the_news_en() -> str: + url = "https://en.m.wikipedia.org/wiki/Main_Page" + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + + h2_element = soup.find("div", id="mp-itn") + assert h2_element + ul_element = h2_element.find_next("ul") + assert ul_element + ul_element_2 = ul_element.find_next("ul") + assert ul_element_2 + div_element = ul_element_2.find_next("div") + assert div_element + ul_element_3 = div_element.find_next("ul") + assert ul_element_3 + + p_element_2 = soup.new_tag("p") + p_element_3 = soup.new_tag("p") + assert isinstance(ul_element_2, bs4.Tag) + assert isinstance(ul_element_3, bs4.Tag) + li_contents_2 = list(ul_element_2.find_all("li")) + li_contents_3 = list(ul_element_3.find_all("li")) + skip = False + for li in li_contents_2: + if skip: + skip = False + continue + if li.find("ul"): + new_li = copy.deepcopy(li) + new_li.find("ul").decompose() + p_element_2.append(new_li) + skip = True + else: + p_element_2.append(li) + for li in li_contents_3: + if skip: + skip = False + continue + if li.find("ul"): + new_li = copy.deepcopy(li) + new_li.find("ul").decompose() + p_element_3.append(new_li) + skip = True + else: + p_element_3.append(li) + + result = str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki") + str(p_element_2).replace("</li><li>", "; ").replace("<li>", "<b>Ongoing: </b>").replace("</li>", "").replace("\n;", ";").replace("/wiki", "https://en.wikipedia.org/wiki").replace("</p>", "<br>") + str(p_element_3).replace("</li><li>", "; ").replace("<li>", "<b>Recent deaths: </b>").replace("</li>", "").replace("\n;", ";").replace("/wiki", "https://en.wikipedia.org/wiki").replace("<p>", "") + result = re.sub(r" <i>\(.*?\)</i>", "", result) + + return result + + +def get_in_the_news_zh() -> str: + url = "https://zh.m.wikipedia.org/zh-cn/Wikipedia:%E9%A6%96%E9%A1%B5" + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + + div_element = soup.find("div", id="column-itn") + assert div_element + ul_element = div_element.find("ul") + assert isinstance(ul_element, bs4.Tag) + ul_element_2 = ul_element.find_next("ul") + assert isinstance(ul_element_2, bs4.Tag) + ul_element_3 = ul_element_2.find_next("ul") + assert isinstance(ul_element_3, bs4.Tag) + span_element_2 = ul_element_2.find("span", class_="hlist inline") + span_element_3 = ul_element_3.find("span", class_="hlist inline") + assert span_element_2 and span_element_3 + p_element_2 = soup.new_tag("p") + p_element_3 = soup.new_tag("p") + p_element_2.append(span_element_2) + p_element_3.append(span_element_3) + + result = ( + str(ul_element).replace("/wiki", "https://zh.wikipedia.org/zh-cn") + + str(p_element_2) + .replace('<span class="hlist inline">', "<b>正在发生:</b>") + .replace("</span>", "") + .replace("-", ";") + .replace( + '(<a href="/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD%E6%99%82%E9%96%93%E8%BB%B8" title="俄罗斯入侵乌克兰时间轴">时间轴</a>)', + "", + ) + .replace("/wiki", "https://zh.wikipedia.org/zh-cn") + + str(p_element_3).replace('<span class="hlist inline">', "<b>最近逝世:</b>").replace("</span>", "").replace("-", ";").replace("/wiki", "https://zh.wikipedia.org/zh-cn") + ).replace("</p><p>", "<br>") + result = re.sub(r"<small.*?>.*?</small>", "", result) + + return result + + +def main() -> None: + parser = argparse.ArgumentParser(description="Legacy Wikipedia script for the Daily Bulletin") + parser.add_argument("--config", default="config.ini", help="path to the configuration file") + args = parser.parse_args() + + config = configparser.ConfigParser() + config.read(args.config) + + build_path = config["general"]["build_path"] + os.chdir(build_path) + + logging.basicConfig(level=logging.DEBUG) + logger.warning("Running main() only grabs On This Day") + logger.info("get_on_this_day_en()") + get_on_this_day_en() + logger.info("get_on_this_day_zh()") + get_on_this_day_zh() + # logger.info("get_in_the_news_en()") + # get_in_the_news_en() + # logger.info("get_in_the_news_zh()") + # get_in_the_news_zh() + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/sendmail.py b/sjdbmk/sendmail.py new file mode 100644 index 0000000..35b1639 --- /dev/null +++ b/sjdbmk/sendmail.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# +# Send the Daily Bulletin the next morning +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +from configparser import ConfigParser +from typing import Optional +from pprint import pprint +import datetime +import zoneinfo +import argparse +import os + +import requests +import msal # type: ignore + + +def acquire_token(app: msal.PublicClientApplication, config: ConfigParser) -> str: + result = app.acquire_token_by_username_password( + config["credentials"]["username"], + config["credentials"]["password"], + scopes=config["credentials"]["scope"].split(" "), + ) + + if "access_token" in result: + assert isinstance(result["access_token"], str) + return result["access_token"] + raise ValueError("Authentication error in password login") + + +def sendmail( + token: str, + subject: str, + body: str, + to: list[str], + bcc: list[str], + cc: list[str], + when: Optional[datetime.datetime] = None, + content_type: str = "HTML", + importance: str = "Normal", + reply_to: Optional[str] = None, +) -> str: + data = { + "subject": subject, + "importance": importance, + "body": {"contentType": content_type, "content": body}, + "toRecipients": [{"emailAddress": {"address": a}} for a in to], + "ccRecipients": [{"emailAddress": {"address": a}} for a in cc], + "bccRecipients": [{"emailAddress": {"address": a}} for a in bcc], + } + + if when is not None: + if when.tzinfo is None: + raise TypeError("Naive datetimes are no longer supported") + utcwhen = when.astimezone(datetime.timezone.utc) + isoval = utcwhen.isoformat(timespec="seconds").replace("+00:00", "Z") + data["singleValueExtendedProperties"] = [{"id": "SystemTime 0x3FEF", "value": isoval}] + + if not reply_to: + response = requests.post( + "https://graph.microsoft.com/v1.0/me/messages", + json=data, + headers={ + "Authorization": "Bearer %s" % token, + "Prefer": 'IdType="ImmutableId"', + }, + timeout=20, + ).json() + else: + response = requests.post( + "https://graph.microsoft.com/v1.0/me/messages/%s/createReply" % reply_to, + json=data, + headers={ + "Authorization": "Bearer %s" % token, + "Prefer": 'IdType="ImmutableId"', + }, + timeout=20, + ).json() + + try: + msgid = response["id"] + except KeyError: + pprint(response) + raise ValueError("Unable to add email to drafts") + + assert isinstance(msgid, str) + + response2 = requests.post( + "https://graph.microsoft.com/v1.0/me/messages/%s/send" % msgid, + headers={"Authorization": "Bearer " + token}, + timeout=20, + ) + + if response2.status_code != 202: + pprint(response2.content.decode("utf-8", "replace")) + raise ValueError( + "Graph response to messages/%s/send returned something other than 202 Accepted" % response["id"], + ) + + return msgid + + +def main() -> None: + parser = argparse.ArgumentParser(description="Daily Bulletin Sender") + parser.add_argument( + "-d", + "--date", + default=None, + help="the date of the bulletin to send, in local time, in YYYY-MM-DD; defaults to tomorrow", + ) + parser.add_argument( + "-r", + "--reply", + action="store_true", + help="Reply to the previous bulletin when sending (BROKEN)", + ) + parser.add_argument("--config", default="config.ini", help="path to the configuration file") + args = parser.parse_args() + config = ConfigParser() + config.read(args.config) + if args.date: + date = datetime.datetime.strptime(args.date, "%Y-%m-%d").replace(tzinfo=zoneinfo.ZoneInfo(config["general"]["timezone"])) + else: + date = datetime.datetime.now(zoneinfo.ZoneInfo(config["general"]["timezone"])) + datetime.timedelta(days=1) + + os.chdir(config["general"]["build_path"]) + + html_filename = "sjdb-%s.html" % date.strftime("%Y%m%d") + with open(html_filename, "r", encoding="utf-8") as html_fd: + html = html_fd.read() + + app = msal.PublicClientApplication( + config["credentials"]["client_id"], + authority=config["credentials"]["authority"], + ) + token = acquire_token(app, config) + + if not args.reply: + a = sendmail( + token, + subject=config["sendmail"]["subject_format"] % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_1"].split(" "), + cc=config["sendmail"]["cc_1"].split(" "), + bcc=[w.strip() for w in open(config["sendmail"]["bcc_1_file"], "r").readlines() if w.strip()], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + ) + assert a + with open("last-a.txt", "w") as fd: + fd.write(a) + b = sendmail( + token, + subject=config["sendmail"]["subject_format"] % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_2"].split(" "), + cc=config["sendmail"]["cc_2"].split(" "), + bcc=[w.strip() for w in open(config["sendmail"]["bcc_2_file"], "r").readlines() if w.strip()], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + ) + assert b + with open("last-b.txt", "w") as fd: + fd.write(b) + else: + with open("last-a.txt", "r") as fd: + last_a = fd.read().strip() + a = sendmail( + token, + subject=config["sendmail"]["subject_format"] % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_1"].split(" "), + cc=config["sendmail"]["cc_1"].split(" "), + bcc=[w.strip() for w in open(config["sendmail"]["bcc_1_file"], "r").readlines() if w.strip()], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + reply_to=last_a, + ) + assert a + with open("last-a.txt", "w") as fd: + fd.write(a) + with open("last-b.txt", "r") as fd: + last_b = fd.read().strip() + b = sendmail( + token, + subject=config["sendmail"]["subject_format"] % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_2"].split(" "), + cc=config["sendmail"]["cc_2"].split(" "), + bcc=[w.strip() for w in open(config["sendmail"]["bcc_2_file"], "r").readlines() if w.strip()], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + reply_to=last_b, + ) + assert b + with open("last-b.txt", "w") as fd: + fd.write(b) + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/serve.py b/sjdbmk/serve.py new file mode 100644 index 0000000..411a5b2 --- /dev/null +++ b/sjdbmk/serve.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# +# Help in Daily Bulletin template development by dynamically filling templates +# with flask as the templates are being worked on. DO NOT USE IN PRODUCTION. +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + + +from __future__ import annotations +from typing import Union, TypeAlias +import json +import datetime +import zoneinfo +import os +import configparser +from jinja2 import StrictUndefined +from werkzeug.wrappers.response import Response as werkzeugResponse +from flask import ( + Flask, + Response, + render_template, +) + +ResponseType: TypeAlias = Union[Response, werkzeugResponse, str] + +app = Flask(__name__) +app.jinja_env.undefined = StrictUndefined + +config = configparser.ConfigParser() +config.read("config.ini") + + +# extra_data = { +# "aod": data["aods"][0], # FIXME +# "stddate": "2024-04-01", +# "weekday_english": "Monday", +# "weekday_abbrev": "Mon", +# "next_weekday_abbrev": "Tue", +# "weekday_chinese": "周一", +# "day_of_cycle": "SA", +# "today_breakfast": ("1", "2", "3", "4", "5", "6", "7", "8"), +# "today_lunch": ("1", "2", "3", "4", "5", "6", "7", "8"), +# "today_dinner": ("1", "2", "3", "4", "5", "6", "7", "8"), +# "next_breakfast": ("1", "2", "3", "4", "5", "6", "7", "8"), +# } +# +# data = data | extra_data + + +@app.route("/") +def index() -> ResponseType: + with open( + os.path.join( + config["general"]["build_path"], + "day-%s.json" % (datetime.datetime.now(tz=zoneinfo.ZoneInfo("Asia/Shanghai")) + datetime.timedelta(days=1)).strftime("%Y%m%d"), + ), + "r", + encoding="utf-8", + ) as fd: + data = json.load(fd) + return render_template("template.html", **data) + + +@app.route("/<date>") +def date(date: str) -> ResponseType: + with open( + os.path.join(config["general"]["build_path"], "day-%s.json" % date), + "r", + encoding="utf-8", + ) as fd: + data = json.load(fd) + return render_template("template.html", **data) + + +# The lack of the __name__ check is intentional. This script should not be used +# in a production server. + +app.run(port=8000, debug=True, use_reloader=True) |