diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-26 21:53:11 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-26 21:53:11 +0200 |
commit | 03e7e27b440917081369e797e09de975912cb68c (patch) | |
tree | 4d55406f2eb704911c9d7da87dfe84b46d47c0c9 | |
parent | dc378a18d50142baceaef4c2a416cb5a40f84861 (diff) | |
download | debian-dedup-03e7e27b440917081369e797e09de975912cb68c.tar.gz |
verify package hashes when importing via http
-rwxr-xr-x | autoimport.py | 19 | ||||
-rw-r--r-- | dedup/hashing.py | 19 | ||||
-rwxr-xr-x | importpkg.py | 26 |
3 files changed, 55 insertions, 9 deletions
diff --git a/autoimport.py b/autoimport.py index 694ffeb..481a3f8 100755 --- a/autoimport.py +++ b/autoimport.py @@ -29,7 +29,8 @@ def process_http(pkgs, url): version_compare(pkgs[name]["version"], pkg["Version"]) > 0: continue pkgs[name] = dict(version=pkg["Version"], - filename="%s/%s" % (url, pkg["Filename"])) + filename="%s/%s" % (url, pkg["Filename"]), + sha256hash=pkg["SHA256"]) def process_file(pkgs, filename): base = os.path.basename(filename) @@ -51,14 +52,18 @@ def process_dir(pkgs, d): except ValueError: pass -def process_pkg(name, filename): +def process_pkg(name, pkgdict): + filename = pkgdict["filename"] print("importing %s" % filename) + importcmd = ["python", "importpkg.py"] + if "sha256hash" in pkgdict: + importcmd.extend(["-H", pkgdict["sha256hash"]]) if filename.startswith("http://"): with open(os.path.join("tmp", name), "w") as outp: dl = subprocess.Popen(["curl", "-s", filename], stdout=subprocess.PIPE, close_fds=True) - imp = subprocess.Popen(["python", "importpkg.py"], stdin=dl.stdout, - stdout=outp, close_fds=True) + imp = subprocess.Popen(importcmd, stdin=dl.stdout, stdout=outp, + close_fds=True) if imp.wait(): raise ValueError("importpkg failed") if dl.wait(): @@ -66,8 +71,8 @@ def process_pkg(name, filename): else: with open(filename) as inp: with open(os.path.join("tmp", name), "w") as outp: - subprocess.check_call(["python", "importpkg.py"], stdin=inp, - stdout=outp, close_fds=True) + subprocess.check_call(importcmd, stdin=inp, stdout=outp, + close_fds=True) print("preprocessed %s" % name) def main(): @@ -106,7 +111,7 @@ def main(): with e: fs = {} for name, pkg in pkgs.items(): - fs[e.submit(process_pkg, name, pkg["filename"])] = name + fs[e.submit(process_pkg, name, pkg)] = name for f in concurrent.futures.as_completed(fs.keys()): name = fs[f] diff --git a/dedup/hashing.py b/dedup/hashing.py index 1283c7e..002eda8 100644 --- a/dedup/hashing.py +++ b/dedup/hashing.py @@ -106,3 +106,22 @@ def hash_file(hashobj, filelike, blocksize=65536): hashobj.update(data) data = filelike.read(blocksize) return hashobj + +class HashedStream(object): + """A file-like object, that supports sequential reading and hashes the + contents on the fly.""" + def __init__(self, filelike, hashobj): + """ + @param filelike: a file-like object, that must support the read method + @param hashobj: a hashlib-like object providing update and hexdigest + """ + self.filelike = filelike + self.hashobj = hashobj + + def read(self, length): + data = self.filelike.read(length) + self.hashobj.update(data) + return data + + def hexdigest(self): + return self.hashobj.hexdigest() diff --git a/importpkg.py b/importpkg.py index 56e03ae..2f38f5c 100755 --- a/importpkg.py +++ b/importpkg.py @@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file. And finally a document consisting of the string "commit" is emitted.""" import hashlib +import optparse import sys import tarfile import zlib @@ -15,7 +16,8 @@ import lzma import yaml from dedup.arreader import ArReader -from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file +from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \ + HashedStream, hash_file from dedup.compression import GzipDecompressor, DecompressedStream from dedup.image import ImageHash @@ -121,8 +123,28 @@ def process_package(filelike): yield "commit" break +def process_package_with_hash(filelike, sha256hash): + hstream = HashedStream(filelike, hashlib.sha256()) + for elem in process_package(hstream): + if elem == "commit": + while hstream.read(4096): + pass + if hstream.hexdigest() != sha256hash: + raise ValueError("hash sum mismatch") + yield elem + break + yield elem + def main(): - yaml.safe_dump_all(process_package(sys.stdin), sys.stdout) + parser = optparse.OptionParser() + parser.add_option("-H", "--hash", action="store", + help="verify that stdin hash given sha256 hash") + options, args = parser.parse_args() + if options.hash: + gen = process_package_with_hash(sys.stdin, options.hash) + else: + gen = process_package(sys.stdin) + yaml.safe_dump_all(gen, sys.stdout) if __name__ == "__main__": main() |