sjdbmk/twa.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

#!/usr/bin/env python3
#
# The Week Ahead Interpretation in the Songjiang Daily Bulletin Build System
# Copyright (C) 2024 Runxi Yu <https://runxiyu.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#

import logging
import datetime
import os

import pptx

from . import common

logger = logging.getLogger(__name__)


def download_or_report_the_week_ahead(token: str, datetime_target: datetime.datetime, the_week_ahead_url: str) -> None:
    the_week_ahead_filename = "the_week_ahead-%s.pptx" % datetime_target.strftime("%Y%m%d")
    if not os.path.isfile(the_week_ahead_filename):
        logger.info("Downloading The Week Ahead to %s" % the_week_ahead_filename)
        common.download_share_url(token, the_week_ahead_url, the_week_ahead_filename)
        assert os.path.isfile(the_week_ahead_filename)
    else:
        logger.info("The Week Ahead already exists at %s" % the_week_ahead_filename)


def parse_the_week_ahead(datetime_target: datetime.datetime, the_week_ahead_community_time_page_number: int, the_week_ahead_aod_page_number: int) -> tuple[list[list[str]], list[str]]:
    logger.info("Parsing The Week Ahead")
    the_week_ahead_filename = "the_week_ahead-%s.pptx" % datetime_target.strftime("%Y%m%d")
    the_week_ahead_presentation = pptx.Presentation(the_week_ahead_filename)
    community_time = extract_community_time(
        the_week_ahead_presentation,
        the_week_ahead_community_time_page_number,
    )
    aods = extract_aods(the_week_ahead_presentation, the_week_ahead_aod_page_number)
    return community_time, aods


def extract_community_time(prs: pptx.presentation.Presentation, community_time_page_number: int) -> list[list[str]]:
    slide = prs.slides[community_time_page_number]
    for shape in slide.shapes:
        if not shape.has_table:
            continue
        break
    else:
        raise ValueError("No shapes")
    tbl = shape.table
    row_count = len(tbl.rows)
    col_count = len(tbl.columns)
    if col_count not in [4, 5]:
        raise ValueError("Community time parsing: The Week Ahead community time table does not have 4 or 5 columns")
    if col_count == 4:
        logger.warning("Community time warning: only four columns found, assuming that Y12 has graduated")

    res = [["" for c in range(col_count)] for r in range(row_count)]

    for r in range(row_count):
        for c in range(col_count):
            cell = tbl.cell(r, c)
            if not cell.is_spanned:
                t = ""
                for p in cell.text_frame.paragraphs:
                    for pr in p.runs:
                        t += pr.text
                t = t.strip()
                if "whole school assembly" in t.lower():
                    t = "Whole School Assembly"
                elif "tutor group check-in" in t.lower() or "follow up day" in t.lower() or "open session for tutor and tutee" in t.lower():
                    t = "Tutor Time"
                res[r][c] = t.replace("（", " (").replace("）", ") ").replace("  ", " ")
                if cell.is_merge_origin:
                    for sh in range(cell.span_height):
                        for sw in range(cell.span_width):
                            res[r + sh][c + sw] = t

    return [x[1:] for x in res[1:]]


def extract_aods(prs: pptx.presentation.Presentation, aod_page_number: int) -> list[str]:
    slide = prs.slides[aod_page_number]
    aods = ["", "", "", ""]
    for shape in slide.shapes:
        if hasattr(shape, "text") and "monday: " in shape.text.lower():
            slist = shape.text.split("\n")
            for s in slist:
                try:
                    day, aod = s.split(": ", 1)
                except ValueError:
                    pass
                day = day.lower()
                if day == "monday":
                    aods[0] = aod
                elif day == "tuesday":
                    aods[1] = aod
                elif day == "wednesday":
                    aods[2] = aod
                elif day == "thursday":
                    aods[3] = aod
            if not all(aods):
                raise common.DailyBulletinError("The Week Ahead doesn't include all AOD days, or the formatting is borked")
            return aods
    raise common.DailyBulletinError("The Week Ahead's doesn't even include an AOD for Monday")
    # TODO: this is one of those places where Monday is *expected* to be the first day.
    # TODO: revamp this. this is ugly!