copy-to-sha256.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #!/usr/bin/env python3
  2. import os
  3. import logging
  4. import stat
  5. import argparse
  6. import hashlib
  7. import shutil
  8. import tarfile
  9. def hash_file(filename):
  10. with open(filename, "rb", buffering=0) as f:
  11. return hash_fileobj(f)
  12. def hash_fileobj(f):
  13. h = hashlib.sha256()
  14. for b in iter(lambda: f.read(128*1024), b""):
  15. h.update(b)
  16. return h.hexdigest()
  17. def main():
  18. logging.basicConfig(format="%(message)s")
  19. logger = logging.getLogger("copy")
  20. logger.setLevel(logging.DEBUG)
  21. args = argparse.ArgumentParser(description="...",
  22. formatter_class=argparse.RawTextHelpFormatter)
  23. args.add_argument("from_path", metavar="from", help="from")
  24. args.add_argument("to_path", metavar="to", help="to")
  25. args = args.parse_args()
  26. from_path = os.path.normpath(args.from_path)
  27. to_path = os.path.normpath(args.to_path)
  28. try:
  29. tar = tarfile.open(from_path, "r")
  30. except IsADirectoryError:
  31. tar = None
  32. if tar:
  33. handle_tar(logger, tar, to_path)
  34. else:
  35. handle_dir(logger, path, to_path)
  36. def handle_dir(logger, from_path, to_path):
  37. def onerror(oserror):
  38. logger.warning(oserror)
  39. files = os.walk(from_path, onerror=onerror)
  40. for f in files:
  41. dirpath, dirnames, filenames = f
  42. for filename in filenames:
  43. absname = os.path.join(dirpath, filename)
  44. st = os.lstat(absname)
  45. mode = st.st_mode
  46. assert not stat.S_ISDIR(mode)
  47. if stat.S_ISLNK(mode) or stat.S_ISCHR(mode) or stat.S_ISBLK(mode) or stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode):
  48. continue
  49. sha256 = hash_file(absname)
  50. to_abs = os.path.join(to_path, sha256)
  51. if os.path.exists(to_abs):
  52. logger.info("Exists, skipped {} ({})".format(to_abs, absname))
  53. else:
  54. logger.info("cp {} {}".format(absname, to_abs))
  55. shutil.copyfile(absname, to_abs)
  56. def handle_tar(logger, tar, to_path):
  57. for member in tar.getmembers():
  58. if member.isfile() or member.islnk():
  59. f = tar.extractfile(member)
  60. sha256 = hash_fileobj(f)
  61. to_abs = os.path.join(to_path, sha256)
  62. if os.path.exists(to_abs):
  63. logger.info("Exists, skipped {} ({})".format(to_abs, member.name))
  64. else:
  65. logger.info("Extracted {} ({})".format(to_abs, member.name))
  66. to_file = open(to_abs, "wb")
  67. f.seek(0)
  68. shutil.copyfileobj(f, to_file)
  69. if __name__ == "__main__":
  70. main()