aboutsummaryrefslogtreecommitdiff
path: root/sjdbmk/inspire_dl.py
diff options
context:
space:
mode:
Diffstat (limited to 'sjdbmk/inspire_dl.py')
-rw-r--r--sjdbmk/inspire_dl.py103
1 files changed, 103 insertions, 0 deletions
diff --git a/sjdbmk/inspire_dl.py b/sjdbmk/inspire_dl.py
new file mode 100644
index 0000000..94fd994
--- /dev/null
+++ b/sjdbmk/inspire_dl.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+#
+# Daily script to prepare the YK Pao School Daily Bulletin's JSON data files
+# Copyright (C) 2024 Runxi Yu <https://runxiyu.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# TODO: Check for potential filename injections
+#
+
+from __future__ import annotations
+from configparser import ConfigParser
+import json
+import argparse
+import logging
+import os
+import shutil
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+def main() -> None:
+ logging.basicConfig(level=logging.INFO)
+ parser = argparse.ArgumentParser(description="Download Daily Inspirations")
+ # parser.add_argument("--changeme", default=None, help="changeme")
+ parser.add_argument("--config", default="config.ini", help="path to the configuration file")
+ args = parser.parse_args()
+
+ config = ConfigParser()
+ config.read(args.config)
+
+ build_path = config["general"]["build_path"]
+ os.chdir(build_path)
+
+ api_base = config["web_service"]["api_base"].rstrip("/") + "/"
+ token = config["web_service"]["token"].strip()
+
+ response_json = requests.get(
+ api_base + "rs",
+ headers={"Authorization": "Bearer %s" % token},
+ timeout=20,
+ ).json()
+ assert isinstance(response_json, list)
+ remote_submission_list = set(response_json)
+
+ local_submission_list = set([sn.lstrip("inspire-") for sn in os.listdir() if sn.startswith("inspire-")])
+ to_fetch = remote_submission_list - local_submission_list
+ if to_fetch:
+ logger.info("Going to fetch: %s" % ", ".join(to_fetch))
+ else:
+ logger.info("Nothing to fetch")
+ for sn in to_fetch:
+ logger.info("Fetching: %s" % sn)
+ with requests.get(
+ api_base + "rs/" + sn,
+ headers={
+ "Authorization": "Bearer %s" % token,
+ "Accept-Encoding": "identity",
+ },
+ stream=True,
+ timeout=20,
+ ) as r:
+ try:
+ sub = json.load(r.raw)
+ except json.decoder.JSONDecodeError:
+ logger.error("inspire-%s is broken, skipping" % sn)
+ sub["used"] = False
+ sub["approved"] = False
+ with open("inspire-%s" % os.path.basename(sn), "w", encoding="utf-8") as fd:
+ json.dump(sub, fd, indent="\t")
+ if not sub["file"]:
+ logger.info("No attachment")
+ else:
+ logger.info("Attachment noticed")
+ with requests.get(
+ api_base + "rf/" + os.path.basename(sub["file"]),
+ headers={
+ "Authorization": "Bearer %s" % token,
+ "Accept-Encoding": "identity",
+ },
+ stream=True,
+ timeout=20,
+ ) as r:
+ with open("inspattach-%s" % os.path.basename(sub["file"]), "wb") as fd:
+ logger.info("Saved to inspattach-%s" % os.path.basename(sub["file"]))
+ shutil.copyfileobj(r.raw, fd)
+ fd.flush()
+
+
+if __name__ == "__main__":
+ main()