accountant/accountant.py

import csv
import math
from dataclasses import dataclass, asdict

from urllib.parse import urlparse, urlunparse
from bs4 import BeautifulSoup

import requests


@dataclass
class TimeSpent:
    hours: int
    mins: int
    nick: str
    name: str
    comment_id: str
    issue: str
    repo: str
    issue: str
    date: str
    url: str


class Gitea:
    def __init__(self, host: str, org: str):
        self.host = urlparse(host)
        self.org = org

        assert self.__is_gitea()
        print(f"Gitea instance online at {host}")
        assert self.__org_exists()
        print(f"Organisation {self.org} exists on {host}")
        self.repos = self.get_repositories()
        print(f"Found {len(self.repos)} public repositories in {self.org}")
        self.issues = self.get_issues()
        self.times = self.get_time_spent()
        self.write_csv()
        self.total_time()

    def total_time(self):
        logs = {}
        for repo in self.times:
            for issue in self.times[repo]:
                if issue:
                    for time in issue:
                        if time.nick in logs:
                            if "hours" in logs[time.nick]:
                                logs[time.nick]["hours"] += int(time.hours)
                            else:
                                logs[time.nick]["hours"] = int(time.hours)
                            if "mins" in logs[time.nick]:
                                logs[time.nick]["mins"] += int(time.mins)
                            else:
                                logs[time.nick]["mins"] = int(time.mins)
                        else:
                            logs[time.nick] = {"hours": int(time.hours), "mins": int(time.mins)}

        for nick in logs:
            time = logs[nick]
            hours = time["hours"] + math.floor(time["mins"] / 60)
            mins = time["mins"] % 60
            print(f"{nick}: {hours}h {mins}min")

    def write_csv(self):
        w = None
        print('writing to times.csv')
        with open("times.csv", "w+", encoding="utf-8") as f:
            for repo in self.times:
                for issue in self.times[repo]:
                    if issue:
                        for time in issue:
                            time = asdict(time)
                            if not w:
                                w = csv.DictWriter(f, time.keys())
                                w.writeheader()
                            w.writerow(time)

    def __is_gitea(self):
        resp = requests.get(self.__unparsed("/"))
        return all([resp.status_code == 200, b"Gitea" in resp.content])

    def __org_exists(self):
        resp = requests.get(self.__unparsed(self.org))
        return all([resp.status_code == 200, b"Gitea" in resp.content])

    def get_issues(self):
        issues = {}
        num = 0
        for repo in self.repos:
            issues[repo["name"]] = []
            limit = 10
            page = 1
            repo_issues = []
            while True:
                uri = self.__unparsed_api(
                    f"repos/{self.org}/{repo['name']}/issues",
                    query=f"state=all&page={page}&limit={limit}",
                )
                resp = requests.get(uri)
                assert resp.status_code == 200
                data = resp.json()
                if data:
                    repo_issues.extend(data)
                    page += 1
                else:
                    break
            undisputed_issues = []
            for issue in repo_issues:
                is_disputed = False
                for label in issue["labels"]:
                    if label["name"] == "Dispute":
                        is_disputed = True
                        break
                if not is_disputed:
                    undisputed_issues.append(issue)

            issues[repo["name"]] = undisputed_issues
            num += len(repo_issues)

        print(f"Found {num} tickets in public repositories in {self.org}")
        return issues

    def get_repositories(self):
        limit = 10
        page = 1
        repos = []
        while True:
            uri = self.__unparsed_api(
                f"orgs/{self.org}/repos", query=f"page={page}&limit={limit}"
            )

            resp = requests.get(uri)
            assert resp.status_code == 200
            data = resp.json()
            if data:
                repos.extend(data)
                page += 1
            else:
                break

        return repos

    def __unparsed(self, path: str, query: str = "") -> str:
        return urlunparse((self.host.scheme, self.host.netloc, path, "", query, ""))

    def __unparsed_api(self, path: str, query: str = "") -> str:
        path = f"/api/v1/{path}"
        return self.__unparsed(path=path, query=query)

    def get_time_spent(self):
        total_times = {}
        num = 0
        for repo in self.issues:
            repo_times = []
            issues = self.issues[repo]
            for issue in issues:
                times = []
                url = issue["html_url"]
                resp = requests.get(url)
                contents = resp.text
                soup = BeautifulSoup(contents, "html.parser")
                divs = soup.find_all("div", attrs={"class": "timeline-item event"})
                for div in divs:
                    for a in div.find_all(
                        "a", attrs={"class": "author"}
                    ):
                        if "added spent time" in a.parent.text:
                            for s in a.parent.find_all(
                                "span", attrs={"class": "time-since"}
                            ):
                                nick = a.text
                                name = div.find_all(
                                    "img", attrs={"class": "ui avatar image"}
                                )[0]["title"]
                                time = (
                                    div.find_all("div", attrs={"class": "detail"})[0]
                                    .find_all("span")[0]
                                    .text
                                )
                                if "h" in time:
                                    hours = int(time.split("h")[0])
                                else:
                                    hours = 0
                                if "min" in time:
                                    splits = time.split("min")[0]
                                    if "h" in splits:
                                        mins = int(splits.split("h")[1])
                                    else:
                                        mins = int(splits)
                                else:
                                    mins = 0
                                date = s["title"]
                                comment_id = div["id"]

                                times.append(
                                    TimeSpent(
                                        comment_id=comment_id,
                                        date=date,
                                        nick=nick,
                                        name=name,
                                        mins=mins,
                                        hours=hours,
                                        repo=repo,
                                        issue=url,
                                        url=f"{url}#{comment_id}",
                                    )
                                )
                num += len(times)
                repo_times.append(times)
            total_times[repo] = repo_times


        print(f"Found {num} log events in {self.org}")
        return total_times

if __name__ == "__main__":
    g = Gitea(host="https://gitea.hostea.org", org="Hostea")