diff options
Diffstat (limited to 'sjdbmk')
-rw-r--r-- | sjdbmk/daily.py | 271 | ||||
-rw-r--r-- | sjdbmk/grant.py | 82 | ||||
-rw-r--r-- | sjdbmk/inspire_approve.py | 11 | ||||
-rw-r--r-- | sjdbmk/inspire_dl.py | 109 | ||||
-rw-r--r-- | sjdbmk/legacy_wikipedia.py | 297 | ||||
-rw-r--r-- | sjdbmk/menuparser.py | 136 | ||||
-rw-r--r-- | sjdbmk/msal_skeleton.py | 62 | ||||
-rw-r--r-- | sjdbmk/pack.py | 92 | ||||
-rw-r--r-- | sjdbmk/sendmail.py | 274 | ||||
-rw-r--r-- | sjdbmk/sendmail2.py | 161 | ||||
-rw-r--r-- | sjdbmk/serve.py | 95 | ||||
-rw-r--r-- | sjdbmk/weekly.py | 516 |
12 files changed, 2106 insertions, 0 deletions
diff --git a/sjdbmk/daily.py b/sjdbmk/daily.py new file mode 100644 index 0000000..ce21bce --- /dev/null +++ b/sjdbmk/daily.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# +# Daily script to prepare the YK Pao School Daily Bulletin's JSON data files +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +from configparser import ConfigParser +import json +import argparse +import logging +import datetime +import zoneinfo +import os +import base64 +import mimetypes +import typing + +# import legacy_wikipedia + +logger = logging.getLogger(__name__) + +DAYNAMES = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + "Monday", +] +DAYNAMES_CHINESE = ["周一", "周二", "周三", "周四", "周五", "周六", "周日", "周一"] +DAYNAMES_SHORT = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", "Mon"] + + +def main() -> None: + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Daily script for the Daily Bulletin") + parser.add_argument( + "--date", + default=None, + help="the day to generate for, in local time, in YYYY-MM-DD; defaults to tomorrow", + # TODO: Verify validity of date + # TODO: Verify consistency of date elsewhere + ) + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + + if args.date: + datetime_target_naive = datetime.datetime.strptime(args.date, "%Y-%m-%d") + else: + datetime_target_naive = None + del args.date + + config = ConfigParser() + config.read(args.config) + + tzinfo = zoneinfo.ZoneInfo(config["general"]["timezone"]) + if datetime_target_naive: + datetime_target_aware = datetime_target_naive.replace(tzinfo=tzinfo) + else: + datetime_current_aware = datetime.datetime.now(tz=tzinfo) + datetime_target_aware = datetime_current_aware + datetime.timedelta(days=1) + del datetime_current_aware + del datetime_target_naive + logger.info("Generating for %s" % datetime_target_aware.strftime("%Y-%m-%d %Z")) + + cycle_data_path = config["general"]["cycle_data"] + with open(cycle_data_path, "r", encoding="utf-8") as cycle_data_file: + cycle_data = json.load(cycle_data_file) + + build_path = config["general"]["build_path"] + os.chdir(build_path) + + the_week_ahead_url = config["the_week_ahead"]["file_url"] + + generate( + datetime_target_aware, + cycle_data=cycle_data, + the_week_ahead_url=the_week_ahead_url, + ) + + +def generate( + datetime_target: datetime.datetime, + the_week_ahead_url: str, + cycle_data: dict[str, str], +) -> str: + weekday_enum = datetime_target.weekday() + weekday_en = DAYNAMES[weekday_enum] + weekday_zh = DAYNAMES_CHINESE[weekday_enum] + weekdays_short = DAYNAMES_SHORT[weekday_enum:] + # next_weekday_short = DAYNAMES_SHORT[weekday_enum + 1] + try: + day_of_cycle = cycle_data[datetime_target.strftime("%Y-%m-%d")] + except KeyError: + day_of_cycle = "SA" + logger.warning('Cycle day not found, using "SA"') + + for days_since_beginning in range(0, 5): + week_start_date = datetime_target - datetime.timedelta( + days=days_since_beginning + ) + try: + with open( + "week-%s.json" % week_start_date.strftime("%Y%m%d"), + "r", + encoding="utf-8", + ) as week_file: + week_data = json.load(week_file) + except FileNotFoundError: + continue + else: + break + else: + raise FileNotFoundError( + "Cannot find a week-{date}.json file without five prior days" + ) + + try: + aod = week_data["aods"][days_since_beginning] + except IndexError: + logger.warning("AOD not found") + aod = "None" + + breakfast_today = week_data["menu"]["breakfast"][days_since_beginning] + lunch_today = week_data["menu"]["lunch"][days_since_beginning] + dinner_today = week_data["menu"]["dinner"][days_since_beginning] + try: + breakfast_tomorrow = week_data["menu"]["breakfast"][days_since_beginning + 1] + except IndexError: + breakfast_tomorrow = None + try: + snack_morning = week_data["snacks"][0][days_since_beginning] + except (KeyError, IndexError): + snack_morning = None + try: + snack_afternoon = week_data["snacks"][1][days_since_beginning] + except (KeyError, IndexError): + snack_afternoon = None + try: + snack_evening = week_data["snacks"][2][days_since_beginning] + except (KeyError, IndexError): + snack_evening = None + + logger.info("Checking for inspirations") + # TODO: Should probably allow inspirations to be reused on the same day + # e.g. "used" should be set to the date it was used on + for inspfn in os.listdir(): + if not inspfn.startswith("inspire-"): + continue + with open(inspfn, "r", encoding="utf-8") as inspfd: + inspjq = json.load(inspfd) + if (not inspjq["approved"]) or inspjq["used"]: + continue + inspjq["used"] = True + with open(inspfn, "w", encoding="utf-8") as inspfd: + json.dump(inspjq, inspfd, indent="\t") + inspiration_type = inspjq["type"] + if inspiration_type not in ["text", "media", "canteen"]: + logger.warning("Inspiration type for %s invalid, skipping" % inspfn) + continue + inspiration_origin = inspjq["origin"] + inspiration_shared_by = inspjq["uname"] + inspiration_text = inspjq["text"] + inspiration_image_fn = inspjq["file"] + if inspiration_image_fn: + logger.info("Inspiration has attachment %s" % inspiration_image_fn) + inspiration_image_mime, inspiration_image_extra_encoding = ( + mimetypes.guess_type(inspiration_image_fn) + ) + assert not inspiration_image_extra_encoding + with open( + "inspattach-%s" % os.path.basename(inspiration_image_fn), "rb" + ) as ifd: + inspiration_image_data = base64.b64encode(ifd.read()).decode("ascii") + else: + inspiration_image_data = None + inspiration_image_mime = None + break + else: + inspiration_image_data = None + inspiration_image_mime = None + inspiration_type = None + inspiration_origin = None + inspiration_shared_by = None + inspiration_text = None + inspiration_image_fn = None + + logger.info("Finished processing inspirations") + logger.info("Starting On This Day") + + on_this_day_html_en: typing.Optional[str] + try: + with open("otd_en-%s.html" % datetime_target.strftime("%m-%d"), "r") as fd: + on_this_day_html_en = fd.read() + except FileNotFoundError: + on_this_day_html_en = None + logger.warning("On This Day English not found") + on_this_day_html_zh: typing.Optional[str] + try: + with open("otd_zh-%s.html" % datetime_target.strftime("%m-%d"), "r") as fd: + on_this_day_html_zh = fd.read() + except FileNotFoundError: + on_this_day_html_zh = None + logger.warning("On This Day Chinese not found") + logger.info("Finished On This Day") + + # logger.info("Starting In The News") + # in_the_news_html_en = legacy_wikipedia.get_in_the_news_en() + # in_the_news_html_zh = legacy_wikipedia.get_in_the_news_zh() + # logger.info("Finished In The News") + in_the_news_html_en = "" + in_the_news_html_zh = "" + + data = { + "stddate": datetime_target.strftime("%Y-%m-%d"), + "community_time": week_data["community_time"][days_since_beginning:], + "days_after_this": len(week_data["community_time"][days_since_beginning:]) - 1, + "aod": aod, + "weekday_english": weekday_en, + "weekdays_abbrev": weekdays_short, + "weekday_chinese": weekday_zh, + "day_of_cycle": day_of_cycle, + "today_breakfast": breakfast_today, + "today_lunch": lunch_today, + "today_dinner": dinner_today, + "next_breakfast": breakfast_tomorrow, + "the_week_ahead_url": the_week_ahead_url, + "snack_morning": snack_morning, + "snack_afternoon": snack_afternoon, + "snack_evening": snack_evening, + "inspiration_type": inspiration_type, + "inspiration_shared_by": inspiration_shared_by, + "inspiration_origin": inspiration_origin, + "inspiration_text": inspiration_text, + "inspiration_image_data": inspiration_image_data, + "inspiration_image_mime": inspiration_image_mime, + "on_this_day_html_en": on_this_day_html_en, + "on_this_day_html_zh": on_this_day_html_zh, + "in_the_news_html_en": in_the_news_html_en, + "in_the_news_html_zh": in_the_news_html_zh, + } + with open( + "day-%s.json" % datetime_target.strftime("%Y%m%d"), "w", encoding="utf-8" + ) as fd: + json.dump(data, fd, ensure_ascii=False, indent="\t") + logger.info( + "Data dumped to " + "day-%s.json" % datetime_target.strftime("%Y%m%d"), + ) + return "day-%s.json" % datetime_target.strftime("%Y%m%d") + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/grant.py b/sjdbmk/grant.py new file mode 100644 index 0000000..d55d9d6 --- /dev/null +++ b/sjdbmk/grant.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# +# Request user consent for delegated permissions to manage the Daily Bulletin +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +from pprint import pprint +from configparser import ConfigParser +from typing import Any +import requests +import msal # type: ignore + +# logging.basicConfig(level=logging.DEBUG) +# logging.getLogger("msal").setLevel(logging.INFO) + + +def acquire_token_interactive( + app: msal.PublicClientApplication, config: ConfigParser +) -> str: + result = app.acquire_token_interactive( + config["credentials"]["scope"].split(" "), + login_hint=config["credentials"]["username"], + ) + + if "access_token" in result: + assert isinstance(result["access_token"], str) + return result["access_token"] + raise ValueError( + "Authentication error while trying to interactively acquire a token" + ) + + +def test_login( + app: msal.PublicClientApplication, config: ConfigParser +) -> dict[str, Any]: + result = app.acquire_token_by_username_password( + config["credentials"]["username"], + config["credentials"]["password"], + scopes=config["credentials"]["scope"].split(" "), + ) + + if "access_token" in result: + token = result["access_token"] + else: + raise ValueError("Authentication error in password login", result) + + graph_response = requests.get( + "https://graph.microsoft.com/v1.0/me", + headers={"Authorization": "Bearer " + token}, + timeout=20, + ).json() + assert isinstance(graph_response, dict) + return graph_response + + +def main() -> None: + config = ConfigParser() + config.read("config.ini") + app = msal.PublicClientApplication( + config["credentials"]["client_id"], + authority=config["credentials"]["authority"], + ) + acquire_token_interactive(app, config) + pprint(test_login(app, config)) + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/inspire_approve.py b/sjdbmk/inspire_approve.py new file mode 100644 index 0000000..98d202c --- /dev/null +++ b/sjdbmk/inspire_approve.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +from __future__ import annotations +import json +import sys + +for fn in sys.argv[1:]: + with open(fn, "r+") as fd: + jq = json.load(fd) + jq["approved"] = True + json.dump(jq, fd, indent="\t") diff --git a/sjdbmk/inspire_dl.py b/sjdbmk/inspire_dl.py new file mode 100644 index 0000000..631ea44 --- /dev/null +++ b/sjdbmk/inspire_dl.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# +# Daily script to prepare the YK Pao School Daily Bulletin's JSON data files +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# TODO: Check for potential filename injections +# + +from __future__ import annotations +from configparser import ConfigParser +import json +import argparse +import logging +import os +import shutil +import requests + +logger = logging.getLogger(__name__) + + +def main() -> None: + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Download Daily Inspirations") + # parser.add_argument("--changeme", default=None, help="changeme") + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + + config = ConfigParser() + config.read(args.config) + + build_path = config["general"]["build_path"] + os.chdir(build_path) + + api_base = config["web_service"]["api_base"].rstrip("/") + "/" + token = config["web_service"]["token"].strip() + + response_json = requests.get( + api_base + "rs", + headers={"Authorization": "Bearer %s" % token}, + timeout=20, + ).json() + assert isinstance(response_json, list) + remote_submission_list = set(response_json) + + local_submission_list = set( + [sn.lstrip("inspire-") for sn in os.listdir() if sn.startswith("inspire-")] + ) + to_fetch = remote_submission_list - local_submission_list + if to_fetch: + logger.info("Going to fetch: %s" % ", ".join(to_fetch)) + else: + logger.info("Nothing to fetch") + for sn in to_fetch: + logger.info("Fetching: %s" % sn) + with requests.get( + api_base + "rs/" + sn, + headers={ + "Authorization": "Bearer %s" % token, + "Accept-Encoding": "identity", + }, + stream=True, + timeout=20, + ) as r: + try: + sub = json.load(r.raw) + except json.decoder.JSONDecodeError: + logger.error("inspire-%s is broken, skipping" % sn) + sub["used"] = False + sub["approved"] = False + with open("inspire-%s" % os.path.basename(sn), "w", encoding="utf-8") as fd: + json.dump(sub, fd, indent="\t") + if not sub["file"]: + logger.info("No attachment") + else: + logger.info("Attachment noticed") + with requests.get( + api_base + "rf/" + os.path.basename(sub["file"]), + headers={ + "Authorization": "Bearer %s" % token, + "Accept-Encoding": "identity", + }, + stream=True, + timeout=20, + ) as r: + with open("inspattach-%s" % os.path.basename(sub["file"]), "wb") as fd: + logger.info( + "Saved to inspattach-%s" % os.path.basename(sub["file"]) + ) + shutil.copyfileobj(r.raw, fd) + fd.flush() + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/legacy_wikipedia.py b/sjdbmk/legacy_wikipedia.py new file mode 100644 index 0000000..c2f60a1 --- /dev/null +++ b/sjdbmk/legacy_wikipedia.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# +# Legacy Daily Bulletin components that need to be replaced +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# Copyright (C) 2023-2024 Albert Tan <albert-tan@qq.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +import re +import os +import copy +import datetime +import logging +import argparse +import configparser + +import requests +import bs4 + +logger = logging.getLogger(__name__) + + +def get_on_this_day_zh() -> None: + months = list(map(lambda x: str(x) + "月", range(1, 13))) + + for index in range(12): + + month = months[index] + day = 1 + + url = "https://zh.m.wikipedia.org/zh-cn/Wikipedia:历史上的今天/" + month + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + div_elements = soup.find_all("div", class_="selected-anniversary") + + for div_element in div_elements: + + datetime_time = datetime.datetime(2000, index + 1, day) + formatted_time_yearless = datetime_time.strftime("%m-%d") + + p_element = div_element.find("p") + dl_element = div_element.find("dl") + event_elements = dl_element.find_all("div", class_="event") + ul_element = soup.new_tag("ul") + + for event in event_elements: + li_element = soup.new_tag("li") + li_element.append(event) + ul_element.append(li_element) + + result = str(p_element).replace( + "/wiki", "https://zh.wikipedia.org/zh-cn" + ).replace('<span class="otd-year">', "<b>").replace( + "</span>:", ":</b>" + ) + str( + ul_element + ).replace( + "/wiki", "https://zh.wikipedia.org/zh-cn" + ).replace( + "</dt><dd>", " – " + ).replace( + '<div class="event">\n<dt>', "" + ).replace( + "</dd>\n</div>", "" + ) + result = re.sub(r"<small>.*?图.*?</small>", "", result) + + with open("otd_zh-" + formatted_time_yearless + ".html", "w") as file: + file.write(result) + file.close() + day += 1 + + +def get_on_this_day_en() -> None: + months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + + for index in range(12): + + month = months[index] + day = 1 + url = ( + "https://en.m.wikipedia.org/wiki/Wikipedia:Selected_anniversaries/" + month + ) + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + p_elements = soup.find_all("p") + + for p_element in p_elements: + + try: + datetime_time = datetime.datetime(2000, index + 1, day) + formatted_time_yearless = datetime_time.strftime("%m-%d") + except ValueError: + break + + if not re.search( + f'<p><b><a href="/wiki/{month}_\\d+" title="{month} \\d+">{month} \\d+</a></b', + str(p_element), + ): + continue + div_element = p_element.find_next("div") + ul_element = div_element.find_next_sibling("ul") + ul_element_2 = ul_element.find_next("ul") + p_element_2 = soup.new_tag("p") + li_contents = list(ul_element_2.find_all("li")) + + for li in li_contents: + p_element_2.append(li) + + result = ( + str(p_element).replace("/wiki", "https://en.wikipedia.org/wiki") + + str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki") + + "\n" + + str(p_element_2) + .replace("</li><li>", "; ") + .replace("<li>", "<b>Births and Deaths: </b>") + .replace("</li>", "") + .replace("/wiki", "https://en.wikipedia.org/wiki") + ) + result = re.sub(r" <i>.*?icture.*?</i>", "", result) + + with open("otd_en-" + formatted_time_yearless + ".html", "w") as file: + file.write(result) + file.close() + day += 1 + + +def get_in_the_news_en() -> str: + url = "https://en.m.wikipedia.org/wiki/Main_Page" + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + + h2_element = soup.find("div", id="mp-itn") + assert h2_element + ul_element = h2_element.find_next("ul") + assert ul_element + ul_element_2 = ul_element.find_next("ul") + assert ul_element_2 + div_element = ul_element_2.find_next("div") + assert div_element + ul_element_3 = div_element.find_next("ul") + assert ul_element_3 + + p_element_2 = soup.new_tag("p") + p_element_3 = soup.new_tag("p") + assert isinstance(ul_element_2, bs4.Tag) + assert isinstance(ul_element_3, bs4.Tag) + li_contents_2 = list(ul_element_2.find_all("li")) + li_contents_3 = list(ul_element_3.find_all("li")) + skip = False + for li in li_contents_2: + if skip: + skip = False + continue + if li.find("ul"): + new_li = copy.deepcopy(li) + new_li.find("ul").decompose() + p_element_2.append(new_li) + skip = True + else: + p_element_2.append(li) + for li in li_contents_3: + if skip: + skip = False + continue + if li.find("ul"): + new_li = copy.deepcopy(li) + new_li.find("ul").decompose() + p_element_3.append(new_li) + skip = True + else: + p_element_3.append(li) + + result = ( + str(ul_element).replace("/wiki", "https://en.wikipedia.org/wiki") + + str(p_element_2) + .replace("</li><li>", "; ") + .replace("<li>", "<b>Ongoing: </b>") + .replace("</li>", "") + .replace("\n;", ";") + .replace("/wiki", "https://en.wikipedia.org/wiki") + .replace("</p>", "<br>") + + str(p_element_3) + .replace("</li><li>", "; ") + .replace("<li>", "<b>Recent deaths: </b>") + .replace("</li>", "") + .replace("\n;", ";") + .replace("/wiki", "https://en.wikipedia.org/wiki") + .replace("<p>", "") + ) + result = re.sub(r" <i>\(.*?\)</i>", "", result) + + return result + + +def get_in_the_news_zh() -> str: + url = "https://zh.m.wikipedia.org/zh-cn/Wikipedia:%E9%A6%96%E9%A1%B5" + response = requests.get(url, timeout=15) + html = response.text + soup = bs4.BeautifulSoup(html, "html.parser") + + div_element = soup.find("div", id="column-itn") + assert div_element + ul_element = div_element.find("ul") + assert isinstance(ul_element, bs4.Tag) + ul_element_2 = ul_element.find_next("ul") + assert isinstance(ul_element_2, bs4.Tag) + ul_element_3 = ul_element_2.find_next("ul") + assert isinstance(ul_element_3, bs4.Tag) + span_element_2 = ul_element_2.find("span", class_="hlist inline") + span_element_3 = ul_element_3.find("span", class_="hlist inline") + assert span_element_2 and span_element_3 + p_element_2 = soup.new_tag("p") + p_element_3 = soup.new_tag("p") + p_element_2.append(span_element_2) + p_element_3.append(span_element_3) + + result = ( + str(ul_element).replace("/wiki", "https://zh.wikipedia.org/zh-cn") + + str(p_element_2) + .replace('<span class="hlist inline">', "<b>正在发生:</b>") + .replace("</span>", "") + .replace("-", ";") + .replace( + '(<a href="/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD%E6%99%82%E9%96%93%E8%BB%B8" title="俄罗斯入侵乌克兰时间轴">时间轴</a>)', + "", + ) + .replace("/wiki", "https://zh.wikipedia.org/zh-cn") + + str(p_element_3) + .replace('<span class="hlist inline">', "<b>最近逝世:</b>") + .replace("</span>", "") + .replace("-", ";") + .replace("/wiki", "https://zh.wikipedia.org/zh-cn") + ).replace("</p><p>", "<br>") + result = re.sub(r"<small.*?>.*?</small>", "", result) + + return result + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Legacy Wikipedia script for the Daily Bulletin" + ) + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + + config = configparser.ConfigParser() + config.read(args.config) + + build_path = config["general"]["build_path"] + os.chdir(build_path) + + logging.basicConfig(level=logging.DEBUG) + logger.warning("Running main() only grabs On This Day") + logger.info("get_on_this_day_en()") + get_on_this_day_en() + logger.info("get_on_this_day_zh()") + get_on_this_day_zh() + # logger.info("get_in_the_news_en()") + # get_in_the_news_en() + # logger.info("get_in_the_news_zh()") + # get_in_the_news_zh() + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/menuparser.py b/sjdbmk/menuparser.py new file mode 100644 index 0000000..7413ff0 --- /dev/null +++ b/sjdbmk/menuparser.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# +# Utility functions to parse the XLSX menu for the Daily Bulletin +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + + +from typing import Optional, Any + +import openpyxl + + +def menu_item_fix(s: str) -> Optional[str]: + if not s: + return None + if s == "Condiments Selection\n葱,香菜,榨菜丝,老干妈,生抽,醋": + return None + return ( + s.strip() + .replace("Biscuit /", "Biscuit/") + .replace("Juice /", "Juice/") + .replace(" \n", "\n") + .replace("\n ", "\n") + ) + + +def parse_meal_table( + rows: list[Any], initrow: int, t: list[str] +) -> dict[str, dict[str, list[str]]]: + assert rows[initrow + 1][1].value is None + + igroups = [] + i = initrow + 2 + while True: + c = rows[i][1] + if not isinstance(c, openpyxl.cell.MergedCell): + igroups.append(i) + i += 1 + if len(igroups) >= len(t): + break + wgroups = dict(zip(igroups + [i], t + [None])) + + ret: dict[str, dict[str, list[str]]] = {} + kmap = {} + for k in range(2, 7): + ret[rows[initrow + 1][k].value[0:3]] = {} + kmap[k] = rows[initrow + 1][k].value[0:3] + + i = 0 + wgroupskeys = list(wgroups.keys()) + while i < len(wgroupskeys) - 1: + wgroup = wgroups[wgroupskeys[i]] + assert wgroup is not None + for km in ret: + ret[km][wgroup] = [] + for j in range(wgroupskeys[i], wgroupskeys[i + 1]): + for k in range(2, 7): + v = menu_item_fix(rows[j][k].value) + if v: + ret[kmap[k]][wgroup].append(v) + i += 1 + + return ret + + +def parse_menus(filename: str) -> dict[str, dict[str, dict[str, list[str]]]]: + wb = openpyxl.load_workbook(filename=filename) + ws = wb["菜单"] + rows = list(ws.iter_rows()) + + final = {} + + i = -1 + while i < len(rows) - 1: + i += 1 + row = rows[i] + if not isinstance(row[1].value, str): + continue + if "BREAKFAST" in row[1].value: + final["Breakfast"] = parse_meal_table( + rows, + i, + [ + "Taste of Asia", + "Eat Global", + "Revolution Noodle", + "Piccola Italia", + "Self Pick-up", # instead of veg and soup + "Fruit/Drink", + ], + ) + elif "LUNCH" in row[1].value: + final["Lunch"] = parse_meal_table( + rows, + i, + [ + "Taste of Asia", + "Eat Global", + "Revolution Noodle", + "Piccola Italia", + "Vegetarian", + "Daily Soup", + "Dessert/Fruit/Drink", + ], + ) + elif "DINNER" in row[1].value: + final["Dinner"] = parse_meal_table( + rows, + i, + [ + "Taste of Asia", + "Eat Global", + "Revolution Noodle", + "Piccola Italia", + "Vegetarian", + "Daily Soup", + "Dessert/Fruit/Drink", + ], + ) + # elif "Students Snack" in row[1].value: + # parse_meal_table(rows, i) + + return final diff --git a/sjdbmk/msal_skeleton.py b/sjdbmk/msal_skeleton.py new file mode 100644 index 0000000..3261341 --- /dev/null +++ b/sjdbmk/msal_skeleton.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# +# Skeleton to write new Daily Bulletin scripts that use the MSAL library +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +from configparser import ConfigParser +from typing import Any +import requests +import msal # type: ignore + + +def acquire_token(config: ConfigParser) -> str: + app = msal.PublicClientApplication( + config["credentials"]["client_id"], + authority=config["credentials"]["authority"], + ) + result = app.acquire_token_by_username_password( + config["credentials"]["username"], + config["credentials"]["password"], + scopes=config["credentials"]["scope"].split(" "), + ) + + if "access_token" in result: + assert isinstance(result["access_token"], str) + return result["access_token"] + raise ValueError("Authentication error in password login") + + +# TODO +def something(token: str) -> Any: + return requests.get( + "https://graph.microsoft.com/v1.0/me", + headers={"Authorization": "Bearer " + token}, + timeout=20, + ).json() + + +def main() -> None: + config = ConfigParser() + config.read("config.ini") + token = acquire_token(config) + print(something(token)) + # TODO + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/pack.py b/sjdbmk/pack.py new file mode 100644 index 0000000..902e256 --- /dev/null +++ b/sjdbmk/pack.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# +# Daily script to pack the YK Pao School Daily Bulletin HTML from JSON data +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from configparser import ConfigParser +import os +import json +import datetime +import argparse +import logging +import zoneinfo +from jinja2 import Template, StrictUndefined + + +def main(date: str, config: ConfigParser) -> None: + + with open( + os.path.join(config["templates"]["directory"], config["templates"]["main"]), + "r", + encoding="utf-8", + ) as template_file: + template = Template( + template_file.read(), undefined=StrictUndefined, autoescape=True + ) + + with open( + os.path.join( + config["general"]["build_path"], "day-" + date.replace("-", "") + ".json" + ), + "r", + encoding="utf-8", + ) as fd: + data = json.load(fd) + + # extra_data = { + # } + # + # data = data | extra_data + + template.stream(**data).dump( + os.path.join( + config["general"]["build_path"], "sjdb-%s.html" % date.replace("-", "") + ) + ) + + # FIXME: Escape the dangerous HTML! + + +if __name__ == "__main__": + try: + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Daily Bulletin Packer") + parser.add_argument( + "--date", + default=None, + help="the day to generate for, in local time, in YYYY-MM-DD; defaults to tomorrow", + # TODO: Verify validity of date + # TODO: Verify consistency of date elsewhere + ) + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + config = ConfigParser() + config.read(args.config) + if args.date: + date = args.date + else: + now = datetime.datetime.now( + zoneinfo.ZoneInfo(config["general"]["timezone"]) + ) + date = (now + datetime.timedelta(days=1)).strftime("%Y-%m-%d") + logging.info("Generating for day %s" % date) + # main(date, config) + main(date, config) + except KeyboardInterrupt: + logging.critical("KeyboardInterrupt") diff --git a/sjdbmk/sendmail.py b/sjdbmk/sendmail.py new file mode 100644 index 0000000..ddd6f32 --- /dev/null +++ b/sjdbmk/sendmail.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +# +# Send the Daily Bulletin the next morning +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from __future__ import annotations +from configparser import ConfigParser +from typing import Optional +from pprint import pprint +import datetime +import zoneinfo +import argparse +import os + +import requests +import msal # type: ignore + + +def open_and_readlines(filename: str) -> list[str]: + with open(filename, "r") as fd: + return fd.readlines() + + +def acquire_token(app: msal.PublicClientApplication, config: ConfigParser) -> str: + result = app.acquire_token_by_username_password( + config["credentials"]["username"], + config["credentials"]["password"], + scopes=config["credentials"]["scope"].split(" "), + ) + + if "access_token" in result: + assert isinstance(result["access_token"], str) + return result["access_token"] + raise ValueError("Authentication error in password login") + + +def sendmail( + token: str, + subject: str, + body: str, + to: list[str], + bcc: list[str], + cc: list[str], + when: Optional[datetime.datetime] = None, + content_type: str = "HTML", + importance: str = "Normal", + reply_to: Optional[str] = None, +) -> str: + data = { + "subject": subject, + "importance": importance, + "body": {"contentType": content_type, "content": body}, + "toRecipients": [{"emailAddress": {"address": a}} for a in to], + "ccRecipients": [{"emailAddress": {"address": a}} for a in cc], + "bccRecipients": [{"emailAddress": {"address": a}} for a in bcc], + } + + if when is not None: + if when.tzinfo is None: + raise TypeError("Naive datetimes are no longer supported") + utcwhen = when.astimezone(datetime.timezone.utc) + isoval = utcwhen.isoformat(timespec="seconds").replace("+00:00", "Z") + data["singleValueExtendedProperties"] = [ + {"id": "SystemTime 0x3FEF", "value": isoval} + ] + + if not reply_to: + response = requests.post( + "https://graph.microsoft.com/v1.0/me/messages", + json=data, + headers={ + "Authorization": "Bearer %s" % token, + "Prefer": 'IdType="ImmutableId"', + }, + timeout=20, + ).json() + else: + response = requests.post( + "https://graph.microsoft.com/v1.0/me/messages/%s/createReply" % reply_to, + json=data, + headers={ + "Authorization": "Bearer %s" % token, + "Prefer": 'IdType="ImmutableId"', + }, + timeout=20, + ).json() + + try: + msgid = response["id"] + except KeyError as exc: + pprint(response) + raise ValueError("Unable to add email to drafts") from exc + + assert isinstance(msgid, str) + + response2 = requests.post( + "https://graph.microsoft.com/v1.0/me/messages/%s/send" % msgid, + headers={"Authorization": "Bearer " + token}, + timeout=20, + ) + + if response2.status_code != 202: + pprint(response2.content.decode("utf-8", "replace")) + raise ValueError( + "Graph response to messages/%s/send returned something other than 202 Accepted" + % response["id"], + ) + + return msgid + + +def main() -> None: + parser = argparse.ArgumentParser(description="Daily Bulletin Sender") + parser.add_argument( + "-d", + "--date", + default=None, + help="the date of the bulletin to send, in local time, in YYYY-MM-DD; defaults to tomorrow", + ) + parser.add_argument( + "-r", + "--reply", + action="store_true", + help="Reply to the previous bulletin when sending (BROKEN)", + ) + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + config = ConfigParser() + config.read(args.config) + if args.date: + date = datetime.datetime.strptime(args.date, "%Y-%m-%d").replace( + tzinfo=zoneinfo.ZoneInfo(config["general"]["timezone"]) + ) + else: + date = datetime.datetime.now( + zoneinfo.ZoneInfo(config["general"]["timezone"]) + ) + datetime.timedelta(days=1) + + os.chdir(config["general"]["build_path"]) + + html_filename = "sjdb-%s.html" % date.strftime("%Y%m%d") + with open(html_filename, "r", encoding="utf-8") as html_fd: + html = html_fd.read() + + app = msal.PublicClientApplication( + config["credentials"]["client_id"], + authority=config["credentials"]["authority"], + ) + token = acquire_token(app, config) + + if not args.reply: + a = sendmail( + token, + subject=config["sendmail"]["subject_format"] + % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_1"].split(" "), + cc=config["sendmail"]["cc_1"].split(" "), + bcc=[ + w.strip() + for w in open_and_readlines(config["sendmail"]["bcc_1_file"]) + if w.strip() + ], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + ) + assert a + with open("last-a.txt", "w") as fd: + fd.write(a) + b = sendmail( + token, + subject=config["sendmail"]["subject_format"] + % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_2"].split(" "), + cc=config["sendmail"]["cc_2"].split(" "), + bcc=[ + w.strip() + for w in open_and_readlines(config["sendmail"]["bcc_2_file"]) + if w.strip() + ], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + ) + assert b + with open("last-b.txt", "w") as fd: + fd.write(b) + else: + with open("last-a.txt", "r") as fd: + last_a = fd.read().strip() + a = sendmail( + token, + subject=config["sendmail"]["subject_format"] + % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_1"].split(" "), + cc=config["sendmail"]["cc_1"].split(" "), + bcc=[ + w.strip() + for w in open_and_readlines(config["sendmail"]["bcc_1_file"]) + if w.strip() + ], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + reply_to=last_a, + ) + assert a + with open("last-a.txt", "w") as fd: + fd.write(a) + with open("last-b.txt", "r") as fd: + last_b = fd.read().strip() + b = sendmail( + token, + subject=config["sendmail"]["subject_format"] + % date.strftime(config["sendmail"]["subject_date_format"]), + body=html, + to=config["sendmail"]["to_2"].split(" "), + cc=config["sendmail"]["cc_2"].split(" "), + bcc=[ + w.strip() + for w in open_and_readlines(config["sendmail"]["bcc_2_file"]) + if w.strip() + ], + when=date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + content_type="HTML", + importance="Normal", + reply_to=last_b, + ) + assert b + with open("last-b.txt", "w") as fd: + fd.write(b) + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/sendmail2.py b/sjdbmk/sendmail2.py new file mode 100644 index 0000000..6cfda8c --- /dev/null +++ b/sjdbmk/sendmail2.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +# +# Send the Daily Bulletin the next morning +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# **TODO:** Send MIME rather than JSON +# + +from __future__ import annotations +import os +import datetime +import zoneinfo +import argparse +from configparser import ConfigParser +from typing import Optional +import msal # type: ignore +import requests + + +def acquire_token(app: msal.PublicClientApplication, config: ConfigParser) -> str: + result = app.acquire_token_by_username_password( + config["credentials"]["username"], + config["credentials"]["password"], + scopes=config["credentials"]["scope"].split(" "), + ) + + if "access_token" in result: + assert isinstance(result["access_token"], str) + return result["access_token"] + raise ValueError("Authentication error in password login") + + +def sendmail( + token: str, + subject: str, + body: str, + to: list[str], + bcc: list[str], + cc: list[str], + when: Optional[datetime.datetime] = None, + content_type: str = "HTML", + importance: str = "Normal", +) -> None: + data = { + "subject": subject, + "importance": importance, + "body": {"contentType": content_type, "content": body}, + "toRecipients": [{"emailAddress": {"address": a}} for a in to], + "ccRecipients": [{"emailAddress": {"address": a}} for a in cc], + "bccRecipients": [{"emailAddress": {"address": a}} for a in bcc], + } + + if when is not None: + if when.tzinfo is None: + raise TypeError("Naive datetimes are no longer supported") + utcwhen = when.astimezone(datetime.timezone.utc) + isoval = utcwhen.isoformat(timespec="seconds").replace("+00:00", "Z") + data["singleValueExtendedProperties"] = [ + {"id": "SystemTime 0x3FEF", "value": isoval} + ] + + response = requests.post( + "https://graph.microsoft.com/v1.0/me/messages", + json=data, + headers={"Authorization": "Bearer " + token}, + timeout=20, + ).json() + response2 = requests.post( + "https://graph.microsoft.com/v1.0/me/messages/%s/send" % response["id"], + headers={"Authorization": "Bearer " + token}, + timeout=20, + ) + if response2.status_code != 202: + print(response2.content) + raise ValueError( + "Graph response to messages/%s/send returned something other than 202 Accepted" + % response["id"], + response2, + ) + # TODO: Handle more errors + + +def main() -> None: + parser = argparse.ArgumentParser(description="Daily Bulletin Sender") + parser.add_argument( + "--date", + default=None, + help="the date of the bulletin to send, in local time, in YYYY-MM-DD; defaults to tomorrow", + ) + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + config = ConfigParser() + config.read(args.config) + if args.date: + date = datetime.datetime.strptime(args.date, "%Y-%m-%d").replace( + tzinfo=zoneinfo.ZoneInfo(config["general"]["timezone"]) + ) + else: + date = datetime.datetime.now( + zoneinfo.ZoneInfo(config["general"]["timezone"]) + ) + datetime.timedelta(days=1) + + os.chdir(config["general"]["build_path"]) + + html_filename = "sjdb-%s.html" % date.strftime("%Y%m%d") + with open(html_filename, "r", encoding="utf-8") as html_fd: + html = html_fd.read() + + app = msal.PublicClientApplication( + config["credentials"]["client_id"], + authority=config["credentials"]["authority"], + ) + token = acquire_token(app, config) + + common = { + "when": date.replace( + hour=int(config["sendmail"]["hour"]), + minute=int(config["sendmail"]["minute"]), + second=0, + microsecond=0, + ), + "content_type": "HTML", + "importance": "Normal", + "subject": config["sendmail"]["subject_format"] + % date.strftime(config["sendmail"]["subject_date_format"]), + "body": html, + } + + sendmail( + token, + to=config["sendmail"]["to_1"].split(" "), + cc=config["sendmail"]["cc_1"].split(" "), + bcc=config["sendmail"]["bcc_1"].split(" "), + **common, # type: ignore + ) + sendmail( + token, + to=config["sendmail"]["to_2"].split(" "), + cc=config["sendmail"]["cc_2"].split(" "), + bcc=config["sendmail"]["bcc_2"].split(" "), + **common, # type: ignore + ) + + +if __name__ == "__main__": + main() diff --git a/sjdbmk/serve.py b/sjdbmk/serve.py new file mode 100644 index 0000000..492e443 --- /dev/null +++ b/sjdbmk/serve.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# +# Help in Daily Bulletin template development by dynamically filling templates +# with flask as the templates are being worked on. DO NOT USE IN PRODUCTION. +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + + +from __future__ import annotations +from typing import Union, TypeAlias +import json +import datetime +import zoneinfo +import os +import configparser +from jinja2 import StrictUndefined +from werkzeug.wrappers.response import Response as werkzeugResponse +from flask import ( + Flask, + Response, + render_template, +) + +ResponseType: TypeAlias = Union[Response, werkzeugResponse, str] + +app = Flask(__name__) +app.jinja_env.undefined = StrictUndefined + +config = configparser.ConfigParser() +config.read("config.ini") + + +# extra_data = { +# "aod": data["aods"][0], # FIXME +# "stddate": "2024-04-01", +# "weekday_english": "Monday", +# "weekday_abbrev": "Mon", +# "next_weekday_abbrev": "Tue", +# "weekday_chinese": "周一", +# "day_of_cycle": "SA", +# "today_breakfast": ("1", "2", "3", "4", "5", "6", "7", "8"), +# "today_lunch": ("1", "2", "3", "4", "5", "6", "7", "8"), +# "today_dinner": ("1", "2", "3", "4", "5", "6", "7", "8"), +# "next_breakfast": ("1", "2", "3", "4", "5", "6", "7", "8"), +# } +# +# data = data | extra_data + + +@app.route("/") +def index() -> ResponseType: + with open( + os.path.join( + config["general"]["build_path"], + "day-%s.json" + % ( + datetime.datetime.now(tz=zoneinfo.ZoneInfo("Asia/Shanghai")) + + datetime.timedelta(days=1) + ).strftime("%Y%m%d"), + ), + "r", + encoding="utf-8", + ) as fd: + data = json.load(fd) + return render_template("template.html", **data) + + +@app.route("/<date>") +def date(date: str) -> ResponseType: + with open( + os.path.join(config["general"]["build_path"], "day-%s.json" % date), + "r", + encoding="utf-8", + ) as fd: + data = json.load(fd) + return render_template("template.html", **data) + + +# The lack of the __name__ check is intentional. This script should not be used +# in a production server. + +app.run(port=8000, debug=True, use_reloader=True) diff --git a/sjdbmk/weekly.py b/sjdbmk/weekly.py new file mode 100644 index 0000000..9c0a0c9 --- /dev/null +++ b/sjdbmk/weekly.py @@ -0,0 +1,516 @@ +#!/usr/bin/env python3 +# +# Weekly script to prepare the YK Pao School Daily Bulletin's week JSON data +# Copyright (C) 2024 Runxi Yu <https://runxiyu.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# Some rules: +# - Pass localized aware datetime objects around. +# Minimize the use of date strings and numbers. +# NEVER used naive datetime objects. +# Frequently check if the tzinfo is correct or cast the zone. +# - Delete variables that aren't supposed to be used anymore. +# - Functions should be short. +# - Do not pass ConfigParser objects around. +# - Use meaningful variable names. +# - Always write type hints. +# - Use the logger! Try not to print. +# +# TODO: Check The Week Ahead's dates + +from __future__ import annotations +from typing import Any, Iterable, Iterator +from configparser import ConfigParser +import argparse +import logging +import subprocess +import datetime +import zoneinfo +import os +import shutil +import json +import base64 +import email +import re + +import requests +import msal # type: ignore +import pptx +import pptx.exc + +import menuparser + +logger = logging.getLogger(__name__) + + +def generate( + datetime_target: datetime.datetime, # expected to be local time + the_week_ahead_url: str, + the_week_ahead_community_time_page_number: int, + the_week_ahead_aod_page_number: int, + weekly_menu_breakfast_page_number: int, + weekly_menu_lunch_page_number: int, + weekly_menu_dinner_page_number: int, + weekly_menu_query_string: str, + weekly_menu_sender: str, + weekly_menu_subject_regex: str, + weekly_menu_subject_regex_four_groups: tuple[int, int, int, int], + graph_client_id: str, + graph_authority: str, + graph_username: str, + graph_password: str, + graph_scopes: list[str], + calendar_address: str, + soffice: str, +) -> str: + if not datetime_target.tzinfo: + raise TypeError("Naive datetimes are unsupported") + output_filename = "week-%s.json" % datetime_target.strftime("%Y%m%d") + logger.info("Output filename: %s" % output_filename) + + token: str = acquire_token( + graph_client_id, graph_authority, graph_username, graph_password, graph_scopes + ) + + calendar_response = requests.get( + "https://graph.microsoft.com/v1.0/users/%s/calendar/calendarView" + % calendar_address, + headers={"Authorization": "Bearer " + token}, + params={ + "startDateTime": datetime_target.replace(microsecond=0).isoformat(), + "endDateTime": (datetime_target + datetime.timedelta(days=7)) + .replace(microsecond=0) + .isoformat(), + }, + timeout=15, + ) + if calendar_response.status_code != 200: + raise ValueError( + "Calendar response status code is not 200", calendar_response.content + ) + calendar_object = calendar_response.json() + # pprint(calendar_object) + + the_week_ahead_filename = "the_week_ahead-%s.pptx" % datetime_target.strftime( + "%Y%m%d" + ) + if not os.path.isfile(the_week_ahead_filename): + logger.info( + "The Week Ahead doesn't seem to exist at %s, downloading" + % the_week_ahead_filename + ) + download_share_url(token, the_week_ahead_url, the_week_ahead_filename) + logger.info("Downloaded The Week Ahead to %s" % the_week_ahead_filename) + assert os.path.isfile(the_week_ahead_filename) + else: + logger.info("The Week Ahead already exists at %s" % the_week_ahead_filename) + + menu_filename = "menu-%s.xlsx" % datetime_target.strftime("%Y%m%d") + if not os.path.isfile(menu_filename): + logger.info("Menu not found, downloading") + download_menu( + token, + datetime_target, + weekly_menu_query_string, + weekly_menu_sender, + weekly_menu_subject_regex, + weekly_menu_subject_regex_four_groups, + menu_filename, + ) + assert os.path.isfile(menu_filename) + else: + logger.info("All menus already exist") + + logger.info("Beginning to parse The Week Ahead") + the_week_ahead_presentation = pptx.Presentation(the_week_ahead_filename) + try: + community_time = extract_community_time( + the_week_ahead_presentation, + the_week_ahead_community_time_page_number, + ) + except ValueError: + logger.error( + "Invalid community time! Opening The Week Ahead for manual intervention." + ) + del the_week_ahead_presentation + subprocess.run([soffice, the_week_ahead_filename], check=True) + the_week_ahead_presentation = pptx.Presentation(the_week_ahead_filename) + community_time = extract_community_time( + the_week_ahead_presentation, + the_week_ahead_community_time_page_number, + ) + del the_week_ahead_filename + + aods = extract_aods(the_week_ahead_presentation, the_week_ahead_aod_page_number) + # We're assuming the the AODs don't need manual intervention. I think that's fair. + del the_week_ahead_presentation + logger.info("Finished parsing The Week Ahead") + + logger.info("Beginning to extract menus") + menu = menuparser.parse_menus( + menu_filename, + ) + logger.info("Finished extracting menus") + + final_data = { + "start_date": datetime_target.strftime("%Y-%m-%d"), + "community_time": community_time, + "aods": aods, + "menu": menu, + } + + with open(output_filename, "w", encoding="utf-8") as fd: + json.dump(final_data, fd, ensure_ascii=False, indent="\t") + logger.info("Dumped to: %s" % output_filename) + return output_filename + + +def main() -> None: + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Weekly script for the Daily Bulletin") + parser.add_argument( + "--date", + default=None, + help="the start of the week to generate for, in local time, YYYY-MM-DD; defaults to next Monday", + ) + parser.add_argument( + "--config", default="config.ini", help="path to the configuration file" + ) + args = parser.parse_args() + + if args.date: + datetime_target_naive = datetime.datetime.strptime(args.date, "%Y-%m-%d") + else: + datetime_target_naive = None + del args.date + + config = ConfigParser() + config.read(args.config) + + tzinfo = zoneinfo.ZoneInfo(config["general"]["timezone"]) + if datetime_target_naive: + datetime_target_aware = datetime_target_naive.replace(tzinfo=tzinfo) + else: + datetime_current_aware = datetime.datetime.now(tz=tzinfo) + datetime_target_aware = datetime_current_aware + datetime.timedelta( + days=((-datetime_current_aware.weekday()) % 7) + ) + del datetime_current_aware + del datetime_target_naive + logger.info("Generating for %s" % datetime_target_aware.strftime("%Y-%m-%d %Z")) + + build_path = config["general"]["build_path"] + # TODO: check if the build path exists and create it if it doesn't + os.chdir(build_path) + + the_week_ahead_url = config["the_week_ahead"]["file_url"] + the_week_ahead_community_time_page_number = int( + config["the_week_ahead"]["community_time_page_number"] + ) + the_week_ahead_aod_page_number = int(config["the_week_ahead"]["aod_page_number"]) + + weekly_menu_breakfast_page_number = int( + config["weekly_menu"]["breakfast_page_number"] + ) + weekly_menu_lunch_page_number = int(config["weekly_menu"]["lunch_page_number"]) + weekly_menu_dinner_page_number = int(config["weekly_menu"]["dinner_page_number"]) + weekly_menu_query_string = config["weekly_menu"]["query_string"] + weekly_menu_sender = config["weekly_menu"]["sender"] + weekly_menu_subject_regex = config["weekly_menu"]["subject_regex"] + weekly_menu_subject_regex_four_groups_raw = config["weekly_menu"][ + "subject_regex_four_groups" + ].split(" ") + weekly_menu_subject_regex_four_groups = tuple( + [int(z) for z in weekly_menu_subject_regex_four_groups_raw] + ) + assert len(weekly_menu_subject_regex_four_groups) == 4 + del weekly_menu_subject_regex_four_groups_raw + # weekly_menu_dessert_page_number = config["weekly_menu"]["dessert_page_number"] + + graph_client_id = config["credentials"]["client_id"] + graph_authority = config["credentials"]["authority"] + graph_username = config["credentials"]["username"] + graph_password = config["credentials"]["password"] + graph_scopes = config["credentials"]["scope"].split(" ") + + calendar_address = config["calendar"]["address"] + + soffice = config["general"]["soffice"] + + # TODO: make a function that checks the configuration + + generate( + datetime_target=datetime_target_aware, + the_week_ahead_url=the_week_ahead_url, + the_week_ahead_community_time_page_number=the_week_ahead_community_time_page_number, + the_week_ahead_aod_page_number=the_week_ahead_aod_page_number, + weekly_menu_breakfast_page_number=weekly_menu_breakfast_page_number, + weekly_menu_lunch_page_number=weekly_menu_lunch_page_number, + weekly_menu_dinner_page_number=weekly_menu_dinner_page_number, + weekly_menu_query_string=weekly_menu_query_string, + weekly_menu_sender=weekly_menu_sender, + weekly_menu_subject_regex=weekly_menu_subject_regex, + weekly_menu_subject_regex_four_groups=weekly_menu_subject_regex_four_groups, + graph_client_id=graph_client_id, + graph_authority=graph_authority, + graph_username=graph_username, + graph_password=graph_password, + graph_scopes=graph_scopes, + calendar_address=calendar_address, + soffice=soffice, + ) + # NOTE: generate() can get the timezone from datetime_target_aware + # It returns the generated filename. + + +def encode_sharing_url(url: str) -> str: + return "u!" + base64.urlsafe_b64encode(url.encode("utf-8")).decode("ascii").rstrip( + "=" + ) + + +def download_share_url( + token: str, url: str, local_filename: str, chunk_size: int = 65536 +) -> None: + + download_direct_url = requests.get( + "https://graph.microsoft.com/v1.0/shares/%s/driveItem" + % encode_sharing_url(url), + headers={"Authorization": "Bearer " + token}, + timeout=20, + ).json()["@microsoft.graph.downloadUrl"] + + with requests.get( + download_direct_url, + headers={ + "Authorization": "Bearer %s" % token, + "Accept-Encoding": "identity", + }, + stream=True, + timeout=20, + ) as r: + with open(local_filename, "wb") as fd: + shutil.copyfileobj(r.raw, fd) + fd.flush() + + +def acquire_token( + graph_client_id: str, + graph_authority: str, + graph_username: str, + graph_password: str, + graph_scopes: list[str], +) -> str: + app = msal.PublicClientApplication( + graph_client_id, + authority=graph_authority, + ) + result = app.acquire_token_by_username_password( + graph_username, graph_password, scopes=graph_scopes + ) + + if "access_token" in result: + assert isinstance(result["access_token"], str) + return result["access_token"] + raise ValueError("Authentication error in password login") + + +def search_mail(token: str, query_string: str) -> list[dict[str, Any]]: + hits = requests.post( + "https://graph.microsoft.com/v1.0/search/query", + headers={"Authorization": "Bearer " + token}, + json={ + "requests": [ + { + "entityTypes": ["message"], + "query": {"queryString": query_string}, + "from": 0, + "size": 15, + "enableTopResults": True, + } + ] + }, + timeout=20, + ).json()["value"][0]["hitsContainers"][0]["hits"] + assert isinstance(hits, list) + assert isinstance(hits[0], dict) + return hits + + +def extract_aods( + prs: pptx.presentation.Presentation, aod_page_number: int +) -> list[str]: + slide = prs.slides[aod_page_number] + aods = ["", "", "", ""] + for shape in slide.shapes: + if hasattr(shape, "text") and "Monday: " in shape.text: + slist = shape.text.split("\n") + for s in slist: + try: + day, aod = s.split(": ", 1) + except ValueError: + pass + day = day.lower() + if day == "monday": + aods[0] = aod + elif day == "tuesday": + aods[1] = aod + elif day == "wednesday": + aods[2] = aod + elif day == "thursday": + aods[3] = aod + if not all(aods): + raise ValueError( + "AOD parsing: The Week Ahead doesn't include all AOD days, or the formatting is borked" + ) + return aods + raise ValueError("AOD parsing: The Week Ahead's doesn't even include \"Monday\"") + # TODO: this is one of those places where Monday is *expected* to be the first day. + # TODO: revamp this. this is ugly! + + +def extract_community_time( + prs: pptx.presentation.Presentation, community_time_page_number: int +) -> list[list[str]]: + + slide = prs.slides[community_time_page_number] + for shape in slide.shapes: + if not shape.has_table: + continue + break + else: + raise ValueError("No shapes") + tbl = shape.table + row_count = len(tbl.rows) + col_count = len(tbl.columns) + if col_count not in [4, 5]: + raise ValueError( + "Community time parsing: The Week Ahead community time table does not have 4 or 5 columns" + ) + if col_count == 4: + logger.warning( + "Community time warning: only four columns found, assuming that Y12 has graduated" + ) + + res = [["" for c in range(col_count)] for r in range(row_count)] + + for r in range(row_count): + for c in range(col_count): + cell = tbl.cell(r, c) + if not cell.is_spanned: + t = "" + for p in cell.text_frame.paragraphs: + for pr in p.runs: + t += pr.text + t = t.strip() + if "whole school assembly" in t.lower(): + t = "Whole School Assembly" + elif ( + "tutor group check-in" in t.lower() + or "follow up day" in t.lower() + or "open session for tutor and tutee" in t.lower() + ): + t = "Tutor Time" + res[r][c] = t + if cell.is_merge_origin: + for sh in range(cell.span_height): + for sw in range(cell.span_width): + res[r + sh][c + sw] = t + + return [x[1:] for x in res[1:]] + + +def filter_mail_results_by_sender( + original: Iterable[dict[str, Any]], sender: str +) -> Iterator[dict[str, Any]]: + for hit in original: + if ( + hit["resource"]["sender"]["emailAddress"]["address"].lower() + == sender.lower() + ): + yield hit + + +# TODO: Potentially replace this with a pattern-match based on strptime(). +def filter_mail_results_by_subject_regex_groups( + original: Iterable[dict[str, Any]], + subject_regex: str, + subject_regex_groups: Iterable[int], +) -> Iterator[tuple[dict[str, Any], list[str]]]: + for hit in original: + logging.debug("Trying %s" % hit["resource"]["subject"]) + matched = re.compile(subject_regex).match(hit["resource"]["subject"]) + if matched: + yield (hit, [matched.group(group) for group in subject_regex_groups]) + + +def download_menu( + token: str, + datetime_target: datetime.datetime, + weekly_menu_query_string: str, + weekly_menu_sender: str, + weekly_menu_subject_regex: str, + weekly_menu_subject_regex_four_groups: tuple[int, int, int, int], + menu_filename: str, +) -> None: + search_results = search_mail(token, weekly_menu_query_string) + + for hit, matched_groups in filter_mail_results_by_subject_regex_groups( + filter_mail_results_by_sender(search_results, weekly_menu_sender), + weekly_menu_subject_regex, + weekly_menu_subject_regex_four_groups, + ): + try: + subject_1st_month = datetime.datetime.strptime( + matched_groups[0], "%b" # issues here are probably locales + ).month + subject_1st_day = int(matched_groups[1]) + except ValueError as exc: + raise ValueError(hit["resource"]["subject"], matched_groups[0]) from exc + if ( + subject_1st_month == datetime_target.month + and subject_1st_day == datetime_target.day + ): + break + else: + raise ValueError("No SJ-menu email found") + + with requests.get( + "https://graph.microsoft.com/v1.0/me/messages/%s/$value" % hit["hitId"], + headers={ + "Authorization": "Bearer %s" % token, + "Accept-Encoding": "identity", + }, + stream=True, + timeout=20, + ) as r: + msg = email.message_from_bytes(r.content) + + for part in msg.walk(): + if part.get_content_type() in [ + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ]: + payload = part.get_payload(decode=True) + pb = bytes(payload) + + with open(menu_filename, "wb") as w: + w.write(pb) + break + else: + raise ValueError("No proper attachment found in email") + + +if __name__ == "__main__": + main() |