redacted.life/nova.py

#!/usr/bin/env python3
# pylint: disable=logging-format-interpolation
"Creates a static site for redacted.life"

import argparse
from collections import UserList
from datetime import datetime
import json
import os
import os.path as path
import logging
import re
import subprocess
import shutil
import sys

import jinja2
import markdown
from mutagen.mp3 import MP3


def gen_name(date, slug):
    "Returns to file name"
    return date.strftime("%Y-%m-%d-") + slug


def seconds_to_str(seconds):
    "Convert seconds to a string of hh:mm:ss"
    seconds = int(seconds)
    return f"{seconds // 3600:02}:{(seconds % 3600) // 60:02}:{seconds % 60:02}"


class EpisodeList(UserList):
    "Represents list of episodes"
    def __init__(self, url, data, output, template, archives):
        super().__init__(data)
        self.url = url
        self.output = output
        self.template = template
        self.archives = archives
        logging.info(f"New EpisodeList: {url=} {output=} {template=} {archives=}")

    def sort(self, *_args, **_kwargs):
        "Sorts the EpisodeList"
        super().sort(key=lambda x: x.date, reverse=False)

    def generate_thumbnails(self):
        "Generates thumbnails for all the videos"
        logging.info(f"Creating missing directories")
        if not path.isdir(self.output + "assets"):
            os.mkdir(self.output + "assets")
        if not path.isdir(self.output + "assets/thumbnails"):
            os.mkdir(self.output + "assets/thumbnails")
        for episode in self.data:
            location = (self.output + "assets/thumbnails/" +
                        gen_name(episode.date, episode.slug) + ".jpg")
            logging.info(f"Creating thumbnail for {episode=} at {location}")
            episode.store_thumbnail(location)

    def generate_rss(self, header):
        "Generates the RSS Feed"
        with open(self.output + "feed_mp3.rss", "w") as mp3, \
                open(self.output + "feed_ogg.rss", "w") as ogg:
            # pylint: disable = invalid-name
            for s, ext in ((mp3, "mp3"), (ogg, "ogg")):
                logging.info(f"Writing header for {ext}")
                s.write(header)
                for ep in self.data:
                    logging.info(f"Writing item for episode {ep} with {ext=}")
                    s.write("<item>")
                    s.write("\n")
                    # Title
                    s.write(f"<title><![CDATA[{ep.title}]]></title>")
                    s.write("\n")
                    # Description
                    s.write("<description><![CDATA["
                            f"{ep.config['description']}]]></description>")
                    s.write("\n")
                    # Date
                    datestring = ep.date.strftime(
                        '%a, %d %b, %Y %H:%M:%Sz GMT'
                    )
                    s.write(f"<pubDate>{datestring}</pubDate>")
                    s.write("\n")
                    # iTunes: explicit, author, subtitle, keywords
                    s.write(f"<itunes:explicit>{ep.config['explicit']}"
                            "</itunes:explicit>")
                    s.write("\n")
                    s.write(
                        f"<itunes:author><![CDATA[{ep.config['author']}]]>"
                        "</itunes:author>"
                    )
                    s.write("\n")
                    s.write(
                        "<itunes:subtitle><![CDATA["
                        f"{ep.config['subtitle']}]]></itunes:subtitle>"
                    )
                    s.write("\n")
                    s.write(
                        f"<itunes:keywords>{','.join(ep.config['tags'])}"
                        "</itunes:keywords>"
                    )
                    s.write("\n")
                    s.write(f"<itunes:duration>{seconds_to_str(len(ep))}"
                            "</itunes:duration>")
                    s.write("\n")
                    # Content (show_notes)
                    s.write(
                        f"<content:encoded><![CDATA[{ep.show_notes}]]>"
                        "</content:encoded>"
                    )
                    s.write("\n")
                    # GUID
                    s.write(
                        f"<guid isPermaLink=\"true\">{self.url}{ep.slug}"
                        ".html</guid>"
                    )
                    s.write("\n")
                    # Enclosure
                    audio = f'{self.url}assets/music/{ep.slug}.{ext}'
                    size = path.getsize(f"{ep.audio}.{ext}")
                    s.write(
                        f'<enclosure url="{audio}" type="audio/{ext}" '
                        f'length="{size}" />'
                    )
                    s.write("\n")
                    # Categories
                    for tag in ep.config["tags"]:
                        s.write(f"<category><![CDATA[{tag}]]></category>")
                        s.write("\n")
                    s.write("</item>")
                    s.write("\n")
                logging.info(f"Writing end for {ext}")
                s.write("</channel>")
                s.write("\n")
                s.write("</rss>")

    def generate_archives(self):
        "Generates archives page"
        if not path.isdir(self.output + "archives"):
            logging.info("Creating directory archives")
            os.mkdir(self.output + "archives")
        with open(self.output + "archives/index.html", "w") as file:
            episodes = [{
                "slug": gen_name(i.date, i.slug) + ".html",
                "title": i.title
            } for i in self.data[::-1]]
            file.write(self.archives.render(episodes=episodes,
                                            title="Archives",
                                            relative=".."
                                           ))

    def generate_site(self, root):
        "Generates the entire site"
        logging.info("Generating CSS from SCSS")
        subprocess.run(["sass", "--update", f"{root}scss:{root}assets/css"],
                       check=True)
        logging.info("Copy the existing assets")
        shutil.copytree(root + "assets", self.output + "assets",
                        dirs_exist_ok=True)
        logging.info("Create the required directories")
        paths = [
            "assets",
            "assets/audio",
            "assets/videos",
        ]
        logging.info("Creating missing directories")
        for directory in paths:
            if not path.isdir(self.output + directory):
                logging.info(f"Creating directory {directory}")
                os.mkdir(self.output + directory)

        logging.info("Render episodes and copy data")
        for episode in self.data:
            logging.info(f"Rendering episode {episode}")
            html = f"{self.output}{gen_name(episode.date, episode.slug)}.html"
            thumbnail = ("assets/thumbnails/" +
                         gen_name(episode.date, episode.slug) + ".jpg")
            video = (self.output + "assets/videos/" +
                     gen_name(episode.date, episode.slug) + ".mp4")
            audio = (self.output + "assets/audio/" +
                     gen_name(episode.date, episode.slug) + ".mp3")
            logging.info(f"Copying {episode.video} to {video}")
            shutil.copy2(episode.video, video)
            logging.info(f"Copying {episode.audio}.mp3 to {audio}")
            shutil.copy2(episode.audio + ".mp3", audio)
            logging.info(f"Copying {episode.audio}.ogg to {audio}")
            shutil.copy2(episode.audio + ".ogg", audio)
            logging.info(f"Writing to {html}")
            with open(html, "w") as file:
                file.write(episode.render(self.template, thumbnail))

        last = self.data[-1]
        last_name = f"{self.output}{gen_name(last.date, last.slug)}.html"
        logging.info(f"Copying last one ({last}) to index.html")
        shutil.copy2(last_name, self.output + "index.html")


class Episode:
    "Represents one episode of podcast"
    def __init__(self, date, slug, title, show_notes, video_src, audio_src, config):
        self.date = date
        self.slug = slug
        self.title = title
        self.show_notes = markdown.markdown(show_notes)
        self.video = video_src
        self.audio = audio_src
        self.config = config
        self.length = MP3(audio_src + ".mp3").info.length
        logging.info(f"New episode: {date=} {slug=} {title=} {self.video=} "
                     f"{self.audio=} {config=} {self.length=} {self.show_notes=}")

    def render(self, template, thumbnail_src, relative="."):
        "Renders the Episode with the given template"
        return template.render(
            title=self.title,
            show_notes=jinja2.Markup(self.show_notes),
            thumbnail_src=thumbnail_src,
            relative=relative,
            video_src=f"assets/videos/{path.basename(self.video)}"
        )

    def store_thumbnail(self, location):
        "Stores the thumbnail for given image at path"
        args = ["ffmpeg", "-i", self.video, "-ss", "00:00:01.000", "-vframes",
                "1", location]
        logging.info(f"Running {' '.join(args)}")
        subprocess.run(args, check=False)

    def __len__(self):
        return int(self.length)

    def __str__(self):
        return f"{self.slug}: {self.title}"

    def __repr__(self):
        return str(self)

def parse_args():
    "Parses arguments"
    parser = argparse.ArgumentParser()
    parser.add_argument("input_dir", help="Input directory")
    parser.add_argument("output_dir", help="Output directory")
    parser.add_argument("url", help="Base URL of website")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose Logging")
    args = parser.parse_args()
    input_dir = path.abspath(args.input_dir.rstrip("/")) + "/"
    output_dir = path.abspath(args.output_dir.rstrip("/")) + "/"
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig()
    url = args.url.rstrip("/") + "/"
    return input_dir, output_dir, url


class ParseError(ValueError):
    "Error raised while parsing a file"


def parse_file(file, array_keys=("tags")):
    "Parses a file"
    config = {}
    kv_re = re.compile(r"(?P<key>\w+):\s*(?P<value>.*)")
    while line := file.readline():
        if line.rstrip("\n") == "---":
            break
        if line.strip() == "":
            continue
        if match := kv_re.match(line):
            if match.group("key").strip().lower() in array_keys:
                config[match.group("key")] = [i.strip() for i in
                                              match.group("value").split(",")]
            else:
                config[match.group("key")] = match.group("value").strip()
        else:
            raise ParseError(f"Invalid line {line}")

    return (config, file.read())


def main(args):
    "Main method"
    root = path.dirname(sys.argv[0]).rstrip("/") + "/"
    input_dir, output_dir, url = args
    logging.info(f"Input directory: {input_dir}")
    logging.info(f"Output directory: {output_dir}")
    logging.info(f"URL: {url}")

    # Input validation
    paths = [
        input_dir,
        input_dir + "md",
        input_dir + "videos",
        input_dir + "audio",
    ]
    logging.info("Checking if all paths exist.")
    if not all(path.isdir((fail := i)) for i in paths):
        logging.error(f"Invalid Input. {fail} is not a directory.")
        return 1

    logging.info("Creating output directory if it doesn't exist")
    if not path.isdir(output_dir):
        os.mkdir(output_dir)

    env = jinja2.Environment(
        loader=jinja2.FileSystemLoader(root),
        autoescape=jinja2.select_autoescape("html")
    )

    logging.info("Creating EpisodeList")
    podcast = EpisodeList(
        url,
        [],
        output_dir,
        env.get_template("index.html"),
        env.get_template("archives.html")
    )

    split = re.compile(r"((?P<date>\d{4}-[01]?\d-[0123]?\d)-(?P<slug>.*).md)")
    logging.info(f"Parsing all files in {input_dir}md")
    for file in os.listdir(input_dir + "md"):
        logging.info(f"File: {file}")
        match = split.match(file)
        logging.info(f"Match: {match}")
        if not match:
            logging.error(f"Invalid filename: {file}")
            continue
        date = datetime.strptime(match.group("date"), "%Y-%m-%d")
        logging.info(f"Date: {date}")
        slug = match.group("slug")
        logging.info(f"Slug: {slug}")
        with open(input_dir + "md/" + file) as episode:
            try:
                config, show_notes = parse_file(episode)
                logging.info(f"Config: {config}")
                logging.info(f"Show Notes: {show_notes}")
            except ParseError as err:
                logging.error(f"Error while parsing file: {file}")
                logging.error(err)
                return 2
        logging.info("Appending to EpisodeList")
        podcast.append(
            Episode(
                date,
                slug,
                config["title"],
                show_notes,
                input_dir + "videos/" + gen_name(date, slug) + ".mp4",
                input_dir + "audio/" + gen_name(date, slug),
                config
            )
        )

    if not path.isdir(output_dir + "subscribe"):
        os.mkdir(output_dir + "subscribe")
    logging.info("Generating subscribe page")
    with open(input_dir + "subscribe.json") as subscribe, \
            open(output_dir + "subscribe/index.html", "w") as html:
        html.write(env.get_template("subscribe.html").render(
            relative="..",
            subscriptions=json.load(subscribe)
        ))

    if not path.isdir(output_dir + "donate"):
        os.mkdir(output_dir + "donate")
    logging.info("Generating donate page")
    with open(input_dir + "donate.json") as donate, \
            open(output_dir + "donate/index.html", "w") as html:
        html.write(env.get_template("donate.html").render(
            relative="..",
            donations=json.load(donate)
        ))

    logging.info("Sorting podcasts")
    podcast.sort()
    logging.info("Generating thumbnails")
    podcast.generate_thumbnails()
    logging.info("Generating archives pages")
    podcast.generate_archives()
    logging.info("Generating RSS feeds")
    with open(input_dir + "header.rss") as header:
        podcast.generate_rss(header.read())
    logging.info("Generating Site")
    podcast.generate_site(root)
    logging.info("Copying Overrides")
    shutil.copytree(input_dir + "overrides", output_dir, dirs_exist_ok=True)
    logging.info("Done")
    return 0


if __name__ == "__main__":
    sys.exit(main(parse_args()))