summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-08-02 08:40:49 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-08-02 08:40:49 +0200
commitcb3708825bf7ea32314040575cef35980dad0cd8 (patch)
tree31575a8525dc90ba6904268d94f47e1604bf0557 /importpkg.py
parenta4bbbb6e664e605634cb3f9e0564c7e4a93697be (diff)
parent2712edb550968ce7ec8cd9800241d7944666631a (diff)
downloaddebian-dedup-cb3708825bf7ea32314040575cef35980dad0cd8.tar.gz
Merge branch master into sqlalchemy
This makes the sqlalchemy branch schema-compatible with master again. The biggest change on master was the introduction of the function table. It caused most of the conflicts. Note that webapp had one conflict not detected by git: The selecting of issues in show_package needed sqlalchemy conversion. Conflicts: README update_sharing.py webapp.py
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py45
1 files changed, 38 insertions, 7 deletions
diff --git a/importpkg.py b/importpkg.py
index 56e03ae..182ca01 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file.
And finally a document consisting of the string "commit" is emitted."""
import hashlib
+import optparse
import sys
import tarfile
import zlib
@@ -15,9 +16,10 @@ import lzma
import yaml
from dedup.arreader import ArReader
-from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file
+from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \
+ HashedStream, hash_file
from dedup.compression import GzipDecompressor, DecompressedStream
-from dedup.image import ImageHash
+from dedup.image import GIFHash, PNGHash
class MultiHash(object):
def __init__(self, *hashes):
@@ -42,17 +44,24 @@ def gziphash():
hashobj.name = "gzip_sha512"
return HashBlacklist(hashobj, boring_sha512_hashes)
-def imagehash():
- hashobj = ImageHash(hashlib.sha512())
+def pnghash():
+ hashobj = PNGHash(hashlib.sha512())
hashobj = SuppressingHash(hashobj, (ValueError,))
- hashobj.name = "image_sha512"
+ hashobj.name = "png_sha512"
+ return hashobj
+
+def gifhash():
+ hashobj = GIFHash(hashlib.sha512())
+ hashobj = SuppressingHash(hashobj, (ValueError,))
+ hashobj.name = "gif_sha512"
return hashobj
def get_hashes(tar):
for elem in tar:
if not elem.isreg(): # excludes hard links as well
continue
- hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash())
+ hasher = MultiHash(sha512_nontrivial(), gziphash(), pnghash(),
+ gifhash())
hasher = hash_file(hasher, tar.extractfile(elem))
hashes = {}
for hashobj in hasher.hashes:
@@ -107,6 +116,8 @@ def process_package(filelike):
elif name == "data.tar.xz":
zf = DecompressedStream(af, lzma.LZMADecompressor())
tf = tarfile.open(fileobj=zf, mode="r|")
+ elif name == "data.tar":
+ tf = tarfile.open(fileobj=af, mode="r|")
else:
continue
if state != "control_file":
@@ -121,8 +132,28 @@ def process_package(filelike):
yield "commit"
break
+def process_package_with_hash(filelike, sha256hash):
+ hstream = HashedStream(filelike, hashlib.sha256())
+ for elem in process_package(hstream):
+ if elem == "commit":
+ while hstream.read(4096):
+ pass
+ if hstream.hexdigest() != sha256hash:
+ raise ValueError("hash sum mismatch")
+ yield elem
+ break
+ yield elem
+
def main():
- yaml.safe_dump_all(process_package(sys.stdin), sys.stdout)
+ parser = optparse.OptionParser()
+ parser.add_option("-H", "--hash", action="store",
+ help="verify that stdin hash given sha256 hash")
+ options, args = parser.parse_args()
+ if options.hash:
+ gen = process_package_with_hash(sys.stdin, options.hash)
+ else:
+ gen = process_package(sys.stdin)
+ yaml.safe_dump_all(gen, sys.stdout)
if __name__ == "__main__":
main()