#!/usr/bin/python """ CREATE TABLE package (package TEXT PRIMARY KEY, version TEXT, architecture TEXT); CREATE TABLE content (package TEXT, filename TEXT, size INTEGER, function TEXT, hash TEXT, FOREIGN KEY (package) REFERENCES package(package)); CREATE INDEX content_package_index ON content (package); CREATE INDEX content_hash_index ON content (hash); """ import hashlib import re import sqlite3 import struct import sys import tarfile import zlib import apt_pkg import lzma apt_pkg.init() class ArReader(object): global_magic = b"!\n" file_magic = b"`\n" def __init__(self, fileobj, membertest): self.fileobj = fileobj self.membertest = membertest self.remaining = None def skip(self, length): while length: data = self.fileobj.read(min(4096, length)) if not data: raise ValueError("archive truncated") length -= len(data) def skiptillmember(self): data = self.fileobj.read(len(self.global_magic)) if data != self.global_magic: raise ValueError("ar global header not found") while True: file_header = self.fileobj.read(60) if not file_header: raise ValueError("end of archive found") parts = struct.unpack("16s 12s 6s 6s 8s 10s 2s", file_header) parts = [p.rstrip(" ") for p in parts] if parts.pop() != self.file_magic: print(repr(file_header)) raise ValueError("ar file header not found") name = parts[0] length = int(parts[5]) if self.membertest(name): self.remaining = length return name self.skip(length + length % 2) def read(self, length=None): if length is None: length = self.remaining else: length = min(self.remaining, length) data = self.fileobj.read(length) self.remaining -= len(data) return data class XzStream(object): blocksize = 65536 def __init__(self, fileobj): self.fileobj = fileobj self.decomp = lzma.LZMADecompressor() self.buff = b"" def read(self, length): data = True while True: if len(self.buff) >= length: ret = self.buff[:length] self.buff = self.buff[length:] return ret elif not data: # read EOF in last iteration ret = self.buff self.buff = b"" return ret data = self.fileobj.read(self.blocksize) if data: self.buff += self.decomp.decompress(data) else: self.buff += self.decomp.flush() class MultiHash(object): def __init__(self, *hashes): self.hashes = hashes def update(self, data): for hasher in self.hashes: hasher.update(data) class HashBlacklist(object): def __init__(self, hasher, blacklist=set()): self.hasher = hasher self.blacklist = blacklist self.update = self.hasher.update self.name = hasher.name def hexdigest(self): digest = self.hasher.hexdigest() if digest in self.blacklist: return None return digest class GzipDecompressor(object): def __init__(self): self.inbuffer = b"" self.decompressor = None # zlib.decompressobj(-zlib.MAX_WBITS) def decompress(self, data): if self.decompressor: data = self.decompressor.decompress(data) if not self.decompressor.unused_data: return data unused_data = self.decompressor.unused_data self.decompressor = None return data + self.decompress(unused_data) self.inbuffer += data skip = 10 if len(self.inbuffer) < skip: return b"" if not self.inbuffer.startswith(b"\037\213\010"): raise ValueError("gzip magic not found") flag = ord(self.inbuffer[3]) if flag & 4: if len(self.inbuffer) < skip + 2: return b"" length, = struct.unpack("[^_]+)_(?P[^_]+)_(?P[^_.]+)\\.deb$", filename) package, version, architecture = match.groups() db = sqlite3.connect("test.sqlite3") cur = db.cursor() cur.execute("SELECT version FROM package WHERE package = ?;", (package,)) versions = [tpl[0] for tpl in cur.fetchall()] versions.append(version) versions.sort(cmp=apt_pkg.version_compare) if versions[-1] != version: return # not the newest version cur.execute("DELETE FROM package WHERE package = ?;", (package,)) cur.execute("DELETE FROM content WHERE package = ?;", (package,)) cur.execute("INSERT INTO package (package, version, architecture) VALUES (?, ?, ?);", (package, version, architecture)) with open(filename) as pkg: for name, size, function, hexhash in get_hashes(pkg): name = name.decode("utf8") cur.execute("INSERT INTO content (package, filename, size, function, hash) VALUES (?, ?, ?, ?, ?);", (package, name, size, function, hexhash)) db.commit() if __name__ == "__main__": main()