#!/usr/bin/python3

import argparse
import sqlite3
import typing

from dedup.utils import fetchiter


def add_values(
    cursor: sqlite3.Cursor,
    insert_key: typing.Tuple[int, int, int, int],
    files: int,
    size: int,
) -> None:
    cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE pid1 = ? AND pid2 = ? AND fid1 = ? AND fid2 = ?;",
                   (files, size) + insert_key)
    if cursor.rowcount > 0:
        return
    cursor.execute("INSERT INTO sharing (pid1, pid2, fid1, fid2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
                   insert_key + (files, size))


def compute_pkgdict(
    rows: typing.Iterable[typing.Tuple[int, typing.Any, str, int, int]]
) -> typing.Dict[int, typing.Dict[int, typing.List[typing.Tuple[int, str]]]]:
    pkgdict: typing.Dict[
        int, typing.Dict[int, typing.List[typing.Tuple[int, str]]]
    ] = {}
    for pid, _, filename, size, fid in rows:
        funcdict = pkgdict.setdefault(pid, {})
        funcdict.setdefault(fid, []).append((size, filename))
    return pkgdict


def process_pkgdict(
    cursor: sqlite3.Cursor,
    pkgdict: typing.Dict[
        int, typing.Dict[int, typing.List[typing.Tuple[int, str]]]
    ],
) -> None:
    for pid1, funcdict1 in pkgdict.items():
        for fid1, files in funcdict1.items():
            numfiles = len(files)
            size = sum(entry[0] for entry in files)
            for pid2, funcdict2 in pkgdict.items():
                if pid1 == pid2:
                    pkgnumfiles = numfiles - 1
                    pkgsize = size - min(entry[0] for entry in files)
                    if pkgnumfiles == 0:
                        continue
                else:
                    pkgnumfiles = numfiles
                    pkgsize = size
                for fid2 in funcdict2.keys():
                    insert_key = (pid1, pid2, fid1, fid2)
                    add_values(cursor, insert_key, pkgnumfiles, pkgsize)


def main(db: sqlite3.Connection) -> None:
    cur = db.cursor()
    cur.execute("PRAGMA foreign_keys = ON;")
    cur.execute("DELETE FROM sharing;")
    cur.execute("DELETE FROM duplicate;")
    cur.execute("DELETE FROM issue;")
    readcur = db.cursor()
    readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
    for hashvalue, in fetchiter(readcur):
        cur.execute("SELECT function.eqclass, content.pid, content.id, content.filename, content.size, hash.fid FROM hash JOIN content ON hash.cid = content.id JOIN function ON hash.fid = function.id AND function.eqclass IS NOT NULL WHERE hash = ?;",
                    (hashvalue,))
        rowdict: typing.Dict[
            int, typing.List[typing.Tuple[int, int, str, int, int]]
        ] = {}
        for row in cur.fetchall():
            rowdict.setdefault(row[0], []).append(row[1:])
        for eqclass, rows in rowdict.items():
            if len(rows) < 2:
                print("skipping hash %s class %d with too few entries" % (hashvalue, eqclass))
                continue
            print("processing hash %s class %d with %d entries" % (hashvalue, eqclass, len(rows)))
            pkgdict = compute_pkgdict(rows)
            cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
                            [(row[1],) for row in rows])
            process_pkgdict(cur, pkgdict)
    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'file named something.gz is not a valid gzip file' FROM content WHERE content.filename LIKE '%.gz' AND NOT EXISTS (SELECT 1 FROM hash JOIN function ON hash.fid = function.id WHERE hash.cid = content.id AND function.name = 'gzip_sha512');")
    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'png image not named something.png' FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = 'png_sha512' AND lower(filename) NOT LIKE '%.png';")
    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'gif image not named something.gif' FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = 'gif_sha512' AND lower(filename) NOT LIKE '%.gif';")
    db.commit()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--database", action="store",
                        default="test.sqlite3",
                        help="path to the sqlite3 database file")
    args = parser.parse_args()
    main(sqlite3.connect(args.database))