copy-to-sha256.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #!/usr/bin/env python3
  2. import os
  3. import logging
  4. import stat
  5. import argparse
  6. import hashlib
  7. import shutil
  8. import tarfile
  9. HASH_LENGTH = 8
  10. def hash_file(filename) -> str:
  11. with open(filename, "rb", buffering=0) as f:
  12. return hash_fileobj(f)
  13. def hash_fileobj(f) -> str:
  14. h = hashlib.sha256()
  15. for b in iter(lambda: f.read(128*1024), b""):
  16. h.update(b)
  17. return h.hexdigest()
  18. def main():
  19. logging.basicConfig(format="%(message)s")
  20. logger = logging.getLogger("copy")
  21. logger.setLevel(logging.DEBUG)
  22. args = argparse.ArgumentParser(description="...",
  23. formatter_class=argparse.RawTextHelpFormatter)
  24. args.add_argument("from_path", metavar="from", help="from")
  25. args.add_argument("to_path", metavar="to", help="to")
  26. args = args.parse_args()
  27. from_path = os.path.normpath(args.from_path)
  28. to_path = os.path.normpath(args.to_path)
  29. try:
  30. tar = tarfile.open(from_path, "r")
  31. except IsADirectoryError:
  32. tar = None
  33. if tar:
  34. handle_tar(logger, tar, to_path)
  35. else:
  36. handle_dir(logger, from_path, to_path)
  37. def handle_dir(logger, from_path: str, to_path: str):
  38. def onerror(oserror):
  39. logger.warning(oserror)
  40. files = os.walk(from_path, onerror=onerror)
  41. for f in files:
  42. dirpath, dirnames, filenames = f
  43. for filename in filenames:
  44. absname = os.path.join(dirpath, filename)
  45. st = os.lstat(absname)
  46. mode = st.st_mode
  47. assert not stat.S_ISDIR(mode)
  48. if stat.S_ISLNK(mode) or stat.S_ISCHR(mode) or stat.S_ISBLK(mode) or stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode):
  49. continue
  50. file_hash = hash_file(absname)
  51. filename = file_hash[0:HASH_LENGTH] + ".bin"
  52. to_abs = os.path.join(to_path, filename)
  53. if os.path.exists(to_abs):
  54. logger.info("Exists, skipped {} ({})".format(to_abs, absname))
  55. else:
  56. logger.info("cp {} {}".format(absname, to_abs))
  57. shutil.copyfile(absname, to_abs)
  58. def handle_tar(logger, tar, to_path: str):
  59. for member in tar.getmembers():
  60. if member.isfile() or member.islnk():
  61. f = tar.extractfile(member)
  62. file_hash = hash_fileobj(f)
  63. filename = file_hash[0:HASH_LENGTH] + ".bin"
  64. to_abs = os.path.join(to_path, filename)
  65. if os.path.exists(to_abs):
  66. logger.info("Exists, skipped {} ({})".format(to_abs, member.name))
  67. else:
  68. logger.info("Extracted {} ({})".format(to_abs, member.name))
  69. to_file = open(to_abs, "wb")
  70. f.seek(0)
  71. shutil.copyfileobj(f, to_file)
  72. if __name__ == "__main__":
  73. main()