gnunet-chk.py.in 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. #!@PYTHONEXE@
  2. # This file is part of GNUnet.
  3. # (C) 2013, 2018 Christian Grothoff (and other contributing authors)
  4. #
  5. # GNUnet is free software: you can redistribute it and/or modify it
  6. # under the terms of the GNU Affero General Public License as published
  7. # by the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # GNUnet is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. # Affero General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU Affero General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. #
  18. # SPDX-License-Identifier: AGPL3.0-or-later
  19. #
  20. # File: gnunet-chk.py
  21. # Brief: Computes GNUNET style Content Hash Key for a given file
  22. # Author: Sree Harsha Totakura
  23. from hashlib import sha512
  24. import logging
  25. import os
  26. import getopt
  27. import sys
  28. from Crypto.Cipher import AES
  29. from functools import reduce
  30. # Defaults
  31. DBLOCK_SIZE = (32 * 1024) # Data block size
  32. # Pick a multiple of 2 here to achive 8-byte alignment! We also
  33. # probably want DBlocks to have (roughly) the same size as IBlocks.
  34. # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
  35. # byte = 2 * 512 bits). DO NOT CHANGE!
  36. CHK_PER_INODE = 256
  37. CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
  38. CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
  39. GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
  40. GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
  41. def encode_data_to_string(data):
  42. """Returns an ASCII encoding of the given data block like
  43. GNUNET_STRINGS_data_to_string() function.
  44. data: A bytearray representing the block of data which has to be encoded
  45. """
  46. echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
  47. assert (None != data)
  48. assert (bytearray == type(data))
  49. size = len(data)
  50. assert (0 != size)
  51. vbit = 0
  52. wpos = 0
  53. rpos = 0
  54. bits = 0
  55. out = ""
  56. while (rpos < size) or (vbit > 0):
  57. if (rpos < size) and (vbit < 5):
  58. bits = (bits << 8) | data[rpos] # eat 8 more bits
  59. rpos += 1
  60. vbit += 8
  61. if (vbit < 5):
  62. bits <<= (5 - vbit) # zero-padding
  63. assert (vbit == ((size * 8) % 5))
  64. vbit = 5
  65. out += echart[(bits >> (vbit - 5)) & 31]
  66. wpos += 1
  67. vbit -= 5
  68. assert (0 == vbit)
  69. return out
  70. def sha512_hash(data):
  71. """ Returns the sha512 hash of the given data.
  72. data: string to hash
  73. """
  74. hash_obj = sha512()
  75. hash_obj.update(data)
  76. return hash_obj.digest()
  77. class AESKey(object):
  78. """Class for AES Keys. Contains the main key and the initialization
  79. vector. """
  80. key = None # The actual AES key
  81. iv = None # The initialization vector
  82. cipher = None # The cipher object
  83. KEY_SIZE = 32 # AES 256-bit key = 32 bytes
  84. IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
  85. def __init__(self, passphrase):
  86. """Creates a new AES key.
  87. passphrase: string containing the passphrase to get the AES key and
  88. initialization vector
  89. """
  90. passphrase = bytearray(passphrase)
  91. self.key = bytearray(self.KEY_SIZE)
  92. self.iv = bytearray(self.IV_SIZE)
  93. if (len(passphrase) > self.KEY_SIZE):
  94. self.key = passphrase[:self.KEY_SIZE]
  95. passphrase = passphrase[self.KEY_SIZE:]
  96. if (len(passphrase) > self.IV_SIZE):
  97. self.iv = passphrase[:self.IV_SIZE]
  98. else:
  99. self.iv[0:len(passphrase)] = passphrase
  100. else:
  101. self.key[0:len(passphrase)] = passphrase
  102. self.key = str(self.key)
  103. self.iv = str(self.iv)
  104. assert (len(self.key) == self.KEY_SIZE)
  105. assert (len(self.iv) == self.IV_SIZE)
  106. def setup_aes_cipher_(aes_key):
  107. """Initializes the AES object with settings similar to those in GNUnet.
  108. aes_key: the AESKey object
  109. Returns the newly initialized AES object
  110. """
  111. return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
  112. def aes_pad_(data):
  113. """Adds padding to the data such that the size of the data is a multiple of
  114. 16 bytes
  115. data: the data string
  116. Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
  117. as padding; data is the new data string with padded bytes at the end
  118. """
  119. pad_len = len(data) % 16
  120. if (0 != pad_len):
  121. pad_len = 16 - pad_len
  122. pad_bytes = bytearray(15)
  123. data += str(pad_bytes[:pad_len])
  124. return (pad_len, data)
  125. def aes_encrypt(aes_key, data):
  126. """Encrypts the given data using AES.
  127. aes_key: the AESKey object to use for AES encryption
  128. data: the data string to encrypt
  129. """
  130. (pad_len, data) = aes_pad_(data)
  131. cipher = setup_aes_cipher_(aes_key)
  132. enc_data = cipher.encrypt(data)
  133. if (0 != pad_len):
  134. enc_data = enc_data[:-pad_len]
  135. return enc_data
  136. def aes_decrypt(aes_key, data):
  137. """Decrypts the given data using AES
  138. aes_key: the AESKey object to use for AES decryption
  139. data: the data string to decrypt
  140. """
  141. (pad_len, data) = aes_pad_(data)
  142. cipher = setup_aes_cipher_(aes_key)
  143. ptext = cipher.decrypt(data)
  144. if (0 != pad_len):
  145. ptext = ptext[:-pad_len]
  146. return ptext
  147. class Chk(object):
  148. """Class for the content hash key."""
  149. key = None
  150. query = None
  151. fsize = None
  152. def __init__(self, key, query):
  153. assert (len(key) == CHK_HASH_SIZE)
  154. assert (len(query) == CHK_QUERY_SIZE)
  155. self.key = key
  156. self.query = query
  157. def setSize(self, size):
  158. self.fsize = size
  159. def uri(self):
  160. sizestr = repr(self.fsize)
  161. if isinstance(self.fsize, int):
  162. sizestr = sizestr[:-1]
  163. return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
  164. encode_data_to_string(bytearray(self.key)) + "." + \
  165. encode_data_to_string(bytearray(self.query)) + "." + \
  166. sizestr
  167. def compute_depth_(size):
  168. """Computes the depth of the hash tree.
  169. size: the size of the file whose tree's depth has to be computed
  170. Returns the depth of the tree. Always > 0.
  171. """
  172. depth = 1
  173. fl = DBLOCK_SIZE
  174. while (fl < size):
  175. depth += 1
  176. if ((fl * CHK_PER_INODE) < fl):
  177. return depth
  178. fl = fl * CHK_PER_INODE
  179. return depth
  180. def compute_tree_size_(depth):
  181. """Calculate how many bytes of payload a block tree of the given depth MAY
  182. correspond to at most (this function ignores the fact that some blocks will
  183. only be present partially due to the total file size cutting some blocks
  184. off at the end).
  185. depth: depth of the block. depth==0 is a DBLOCK.
  186. Returns the number of bytes of payload a subtree of this depth may
  187. correspond to.
  188. """
  189. rsize = DBLOCK_SIZE
  190. for cnt in range(0, depth):
  191. rsize *= CHK_PER_INODE
  192. return rsize
  193. def compute_chk_offset_(depth, end_offset):
  194. """Compute the offset of the CHK for the current block in the IBlock
  195. above
  196. depth: depth of the IBlock in the tree (aka overall number of tree levels
  197. minus depth); 0 == DBLOCK
  198. end_offset: current offset in the overall file, at the *beginning* of the
  199. block for DBLOCK (depth == 0), otherwise at the *end* of the
  200. block (exclusive)
  201. Returns the offset in the list of CHKs in the above IBlock
  202. """
  203. bds = compute_tree_size_(depth)
  204. if (depth > 0):
  205. end_offset -= 1
  206. ret = end_offset // bds
  207. return ret % CHK_PER_INODE
  208. def compute_iblock_size_(depth, offset):
  209. """Compute the size of the current IBLOCK. The encoder is triggering the
  210. calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
  211. construction. The IBLOCK maybe a full or a partial IBLOCK, and this
  212. function is to calculate how long it should be.
  213. depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
  214. (this function is for IBLOCKs only!)
  215. offset: current offset in the payload (!) of the overall file, must be > 0
  216. (since this function is called at the end of a block).
  217. Returns the number of elements to be in the corresponding IBlock
  218. """
  219. assert (depth > 0)
  220. assert (offset > 0)
  221. bds = compute_tree_size_(depth)
  222. mod = offset % bds
  223. if mod is 0:
  224. ret = CHK_PER_INODE
  225. else:
  226. bds /= CHK_PER_INODE
  227. ret = mod // bds
  228. if (mod % bds) is not 0:
  229. ret += 1
  230. return ret
  231. def compute_rootchk(readin, size):
  232. """Returns the content hash key after generating the hash tree for the given
  233. input stream.
  234. readin: the stream where to read data from
  235. size: the size of data to be read
  236. """
  237. depth = compute_depth_(size)
  238. current_depth = 0
  239. chks = [None] * (depth * CHK_PER_INODE) # list buffer
  240. read_offset = 0
  241. logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
  242. while True:
  243. if (depth == current_depth):
  244. off = CHK_PER_INODE * (depth - 1)
  245. assert (chks[off] is not None)
  246. logging.debug("Encoding done, reading CHK `" + chks[off].query + \
  247. "' from " + repr(off) + "\n")
  248. uri_chk = chks[off]
  249. assert (size == read_offset)
  250. uri_chk.setSize(size)
  251. return uri_chk
  252. if (0 == current_depth):
  253. pt_size = min(DBLOCK_SIZE, size - read_offset)
  254. try:
  255. pt_block = readin.read(pt_size)
  256. except IOError:
  257. logging.warning("Error reading input file stream")
  258. return None
  259. else:
  260. pt_elements = compute_iblock_size_(current_depth, read_offset)
  261. pt_block = ""
  262. pt_block = \
  263. reduce((lambda ba, chk:
  264. ba + (chk.key + chk.query)),
  265. chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
  266. pt_block)
  267. pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
  268. assert (len(pt_block) == pt_size)
  269. assert (pt_size <= DBLOCK_SIZE)
  270. off = compute_chk_offset_(current_depth, read_offset)
  271. logging.debug("Encoding data at offset " + repr(read_offset) + \
  272. " and depth " + repr(current_depth) + " with block " \
  273. "size " + repr(pt_size) + " and target CHK offset " + \
  274. repr(current_depth * CHK_PER_INODE))
  275. pt_hash = sha512_hash(pt_block)
  276. pt_aes_key = AESKey(pt_hash)
  277. pt_enc = aes_encrypt(pt_aes_key, pt_block)
  278. pt_enc_hash = sha512_hash(pt_enc)
  279. chk = Chk(pt_hash, pt_enc_hash)
  280. chks[(current_depth * CHK_PER_INODE) + off] = chk
  281. if (0 == current_depth):
  282. read_offset += pt_size
  283. if (read_offset == size) or \
  284. (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
  285. current_depth += 1
  286. else:
  287. if (CHK_PER_INODE == off) or (read_offset == size):
  288. current_depth += 1
  289. else:
  290. current_depth = 0
  291. def chkuri_from_path(path):
  292. """Returns the CHK URI of the file at the given path.
  293. path: the path of the file whose CHK has to be calculated
  294. """
  295. size = os.path.getsize(path)
  296. readin = open(path, "rb")
  297. chk = compute_rootchk(readin, size)
  298. readin.close()
  299. return chk.uri()
  300. def usage():
  301. """Prints help about using this script."""
  302. print(
  303. """
  304. Usage: gnunet-chk.py [options] file
  305. Prints the Content Hash Key of given file in GNUNET-style URI.
  306. Options:
  307. -h, --help : prints this message
  308. """
  309. )
  310. if '__main__' == __name__:
  311. try:
  312. opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
  313. except getopt.GetoptError as err:
  314. print(err)
  315. print("Exception occured")
  316. usage()
  317. sys.exit(2)
  318. for option, value in opts:
  319. if option in ("-h", "--help"):
  320. usage()
  321. sys.exit(0)
  322. if len(args) != 1:
  323. print("Incorrect number of arguments passed")
  324. usage()
  325. sys.exit(1)
  326. print(chkuri_from_path(args[0]))