ContentManager.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005
  1. import json
  2. import time
  3. import re
  4. import os
  5. import copy
  6. import gevent
  7. from Debug import Debug
  8. from Crypt import CryptHash
  9. from Config import config
  10. from util import helper
  11. from util import Diff
  12. from util import SafeRe
  13. from Peer import PeerHashfield
  14. from ContentDbDict import ContentDbDict
  15. from Plugin import PluginManager
  16. class VerifyError(Exception):
  17. pass
  18. class SignError(Exception):
  19. pass
  20. @PluginManager.acceptPlugins
  21. class ContentManager(object):
  22. def __init__(self, site):
  23. self.site = site
  24. self.log = self.site.log
  25. self.contents = ContentDbDict(site)
  26. self.hashfield = PeerHashfield()
  27. self.has_optional_files = False
  28. # Load all content.json files
  29. def loadContents(self):
  30. if len(self.contents) == 0:
  31. self.log.debug("ContentDb not initialized, load files from filesystem")
  32. self.loadContent(add_bad_files=False, delete_removed_files=False)
  33. self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
  34. # Load hashfield cache
  35. if "hashfield" in self.site.settings.get("cache", {}):
  36. self.hashfield.fromstring(self.site.settings["cache"]["hashfield"].decode("base64"))
  37. del self.site.settings["cache"]["hashfield"]
  38. elif self.contents.get("content.json") and self.site.settings["size_optional"] > 0:
  39. self.site.storage.updateBadFiles() # No hashfield cache created yet
  40. self.has_optional_files = bool(self.hashfield)
  41. self.contents.db.initSite(self.site)
  42. # Load content.json to self.content
  43. # Return: Changed files ["index.html", "data/messages.json"], Deleted files ["old.jpg"]
  44. def loadContent(self, content_inner_path="content.json", add_bad_files=True, delete_removed_files=True, load_includes=True, force=False):
  45. content_inner_path = content_inner_path.strip("/") # Remove / from beginning
  46. old_content = self.contents.get(content_inner_path)
  47. content_path = self.site.storage.getPath(content_inner_path)
  48. content_dir = helper.getDirname(self.site.storage.getPath(content_inner_path))
  49. content_inner_dir = helper.getDirname(content_inner_path)
  50. if os.path.isfile(content_path):
  51. try:
  52. # Check if file is newer than what we have
  53. if not force and old_content and not self.site.settings.get("own"):
  54. for line in open(content_path):
  55. if '"modified"' not in line:
  56. continue
  57. match = re.search("([0-9\.]+),$", line.strip(" \r\n"))
  58. if match and float(match.group(1)) <= old_content.get("modified", 0):
  59. self.log.debug("%s loadContent same json file, skipping" % content_inner_path)
  60. return [], []
  61. new_content = json.load(open(content_path))
  62. except Exception, err:
  63. self.log.warning("%s load error: %s" % (content_path, Debug.formatException(err)))
  64. return [], []
  65. else:
  66. self.log.debug("Content.json not exist: %s" % content_path)
  67. return [], [] # Content.json not exist
  68. try:
  69. # Get the files where the sha512 changed
  70. changed = []
  71. deleted = []
  72. # Check changed
  73. for relative_path, info in new_content.get("files", {}).iteritems():
  74. if "sha512" in info:
  75. hash_type = "sha512"
  76. else: # Backward compatibility
  77. hash_type = "sha1"
  78. new_hash = info[hash_type]
  79. if old_content and old_content["files"].get(relative_path): # We have the file in the old content
  80. old_hash = old_content["files"][relative_path].get(hash_type)
  81. else: # The file is not in the old content
  82. old_hash = None
  83. if old_hash != new_hash:
  84. changed.append(content_inner_dir + relative_path)
  85. # Check changed optional files
  86. for relative_path, info in new_content.get("files_optional", {}).iteritems():
  87. file_inner_path = content_inner_dir + relative_path
  88. new_hash = info["sha512"]
  89. if old_content and old_content.get("files_optional", {}).get(relative_path):
  90. # We have the file in the old content
  91. old_hash = old_content["files_optional"][relative_path].get("sha512")
  92. if old_hash != new_hash and self.site.isDownloadable(file_inner_path):
  93. changed.append(file_inner_path) # Download new file
  94. elif old_hash != new_hash and self.hashfield.hasHash(old_hash) and not self.site.settings.get("own"):
  95. try:
  96. old_hash_id = self.hashfield.getHashId(old_hash)
  97. self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][relative_path]["size"])
  98. self.optionalDelete(file_inner_path)
  99. self.log.debug("Deleted changed optional file: %s" % file_inner_path)
  100. except Exception, err:
  101. self.log.debug("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err)))
  102. else: # The file is not in the old content
  103. if self.site.isDownloadable(file_inner_path):
  104. changed.append(file_inner_path) # Download new file
  105. # Check deleted
  106. if old_content:
  107. old_files = dict(
  108. old_content.get("files", {}),
  109. **old_content.get("files_optional", {})
  110. )
  111. new_files = dict(
  112. new_content.get("files", {}),
  113. **new_content.get("files_optional", {})
  114. )
  115. deleted = [key for key in old_files if key not in new_files]
  116. if deleted and not self.site.settings.get("own"):
  117. # Deleting files that no longer in content.json
  118. for file_relative_path in deleted:
  119. file_inner_path = content_inner_dir + file_relative_path
  120. try:
  121. # Check if the deleted file is optional
  122. if old_content.get("files_optional") and old_content["files_optional"].get(file_relative_path):
  123. self.optionalDelete(file_inner_path)
  124. old_hash = old_content["files_optional"][file_relative_path].get("sha512")
  125. if self.hashfield.hasHash(old_hash):
  126. old_hash_id = self.hashfield.getHashId(old_hash)
  127. self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][file_relative_path]["size"])
  128. else:
  129. self.site.storage.delete(file_inner_path)
  130. self.log.debug("Deleted file: %s" % file_inner_path)
  131. except Exception, err:
  132. self.log.debug("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err)))
  133. # Cleanup empty dirs
  134. tree = {root: [dirs, files] for root, dirs, files in os.walk(self.site.storage.getPath(content_inner_dir))}
  135. for root in sorted(tree, key=len, reverse=True):
  136. dirs, files = tree[root]
  137. if dirs == [] and files == []:
  138. root_inner_path = self.site.storage.getInnerPath(root.replace("\\", "/"))
  139. self.log.debug("Empty directory: %s, cleaning up." % root_inner_path)
  140. try:
  141. self.site.storage.deleteDir(root_inner_path)
  142. # Remove from tree dict to reflect changed state
  143. tree[os.path.dirname(root)][0].remove(os.path.basename(root))
  144. except Exception, err:
  145. self.log.debug("Error deleting empty directory %s: %s" % (root_inner_path, err))
  146. # Check archived
  147. if old_content and "user_contents" in new_content and "archived" in new_content["user_contents"]:
  148. old_archived = old_content.get("user_contents", {}).get("archived", {})
  149. new_archived = new_content.get("user_contents", {}).get("archived", {})
  150. self.log.debug("old archived: %s, new archived: %s" % (len(old_archived), len(new_archived)))
  151. archived_changed = {
  152. key: date_archived
  153. for key, date_archived in new_archived.iteritems()
  154. if old_archived.get(key) != new_archived[key]
  155. }
  156. if archived_changed:
  157. self.log.debug("Archived changed: %s" % archived_changed)
  158. for archived_dirname, date_archived in archived_changed.iteritems():
  159. archived_inner_path = content_inner_dir + archived_dirname + "/content.json"
  160. if self.contents.get(archived_inner_path, {}).get("modified", 0) < date_archived:
  161. self.removeContent(archived_inner_path)
  162. deleted += archived_inner_path
  163. self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
  164. # Check archived before
  165. if old_content and "user_contents" in new_content and "archived_before" in new_content["user_contents"]:
  166. old_archived_before = old_content.get("user_contents", {}).get("archived_before", 0)
  167. new_archived_before = new_content.get("user_contents", {}).get("archived_before", 0)
  168. if old_archived_before != new_archived_before:
  169. self.log.debug("Archived before changed: %s -> %s" % (old_archived_before, new_archived_before))
  170. # Remove downloaded archived files
  171. num_removed_contents = 0
  172. for archived_inner_path in self.listModified(before=new_archived_before):
  173. if archived_inner_path.startswith(content_inner_dir) and archived_inner_path != content_inner_path:
  174. self.removeContent(archived_inner_path)
  175. num_removed_contents += 1
  176. self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
  177. # Remove archived files from download queue
  178. num_removed_bad_files = 0
  179. for bad_file in self.site.bad_files.keys():
  180. if bad_file.endswith("content.json"):
  181. del self.site.bad_files[bad_file]
  182. num_removed_bad_files += 1
  183. if num_removed_bad_files > 0:
  184. self.site.worker_manager.removeSolvedFileTasks(mark_as_good=False)
  185. gevent.spawn(self.site.update, since=0)
  186. self.log.debug("Archived removed contents: %s, removed bad files: %s" % (num_removed_contents, num_removed_bad_files))
  187. # Load includes
  188. if load_includes and "includes" in new_content:
  189. for relative_path, info in new_content["includes"].items():
  190. include_inner_path = content_inner_dir + relative_path
  191. if self.site.storage.isFile(include_inner_path): # Content.json exists, load it
  192. include_changed, include_deleted = self.loadContent(
  193. include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files
  194. )
  195. if include_changed:
  196. changed += include_changed # Add changed files
  197. if include_deleted:
  198. deleted += include_deleted # Add changed files
  199. else: # Content.json not exist, add to changed files
  200. self.log.debug("Missing include: %s" % include_inner_path)
  201. changed += [include_inner_path]
  202. # Load blind user includes (all subdir)
  203. if load_includes and "user_contents" in new_content:
  204. for relative_dir in os.listdir(content_dir):
  205. include_inner_path = content_inner_dir + relative_dir + "/content.json"
  206. if not self.site.storage.isFile(include_inner_path):
  207. continue # Content.json not exist
  208. include_changed, include_deleted = self.loadContent(
  209. include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files,
  210. load_includes=False
  211. )
  212. if include_changed:
  213. changed += include_changed # Add changed files
  214. if include_deleted:
  215. deleted += include_deleted # Add changed files
  216. # Save some memory
  217. new_content["signs"] = None
  218. if "cert_sign" in new_content:
  219. new_content["cert_sign"] = None
  220. if new_content.get("files_optional"):
  221. self.has_optional_files = True
  222. # Update the content
  223. self.contents[content_inner_path] = new_content
  224. except Exception, err:
  225. self.log.warning("%s parse error: %s" % (content_inner_path, Debug.formatException(err)))
  226. return [], [] # Content.json parse error
  227. # Add changed files to bad files
  228. if add_bad_files:
  229. for inner_path in changed:
  230. self.site.bad_files[inner_path] = self.site.bad_files.get(inner_path, 0) + 1
  231. for inner_path in deleted:
  232. if inner_path in self.site.bad_files:
  233. del self.site.bad_files[inner_path]
  234. self.site.worker_manager.removeSolvedFileTasks()
  235. if new_content.get("modified", 0) > self.site.settings.get("modified", 0):
  236. # Dont store modifications in the far future (more than 10 minute)
  237. self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"])
  238. return changed, deleted
  239. def removeContent(self, inner_path):
  240. inner_dir = helper.getDirname(inner_path)
  241. try:
  242. content = self.contents[inner_path]
  243. files = dict(
  244. content.get("files", {}),
  245. **content.get("files_optional", {})
  246. )
  247. except Exception, err:
  248. self.log.debug("Error loading %s for removeContent: %s" % (inner_path, Debug.formatException(err)))
  249. files = {}
  250. files["content.json"] = True
  251. # Deleting files that no longer in content.json
  252. for file_relative_path in files:
  253. file_inner_path = inner_dir + file_relative_path
  254. try:
  255. self.site.storage.delete(file_inner_path)
  256. self.log.debug("Deleted file: %s" % file_inner_path)
  257. except Exception, err:
  258. self.log.debug("Error deleting file %s: %s" % (file_inner_path, err))
  259. try:
  260. self.site.storage.deleteDir(inner_dir)
  261. except Exception, err:
  262. self.log.debug("Error deleting dir %s: %s" % (inner_dir, err))
  263. try:
  264. del self.contents[inner_path]
  265. except Exception, err:
  266. self.log.debug("Error key from contents: %s" % inner_path)
  267. # Get total size of site
  268. # Return: 32819 (size of files in kb)
  269. def getTotalSize(self, ignore=None):
  270. return self.contents.db.getTotalSize(self.site, ignore)
  271. def listModified(self, after=None, before=None):
  272. return self.contents.db.listModified(self.site, after=after, before=before)
  273. def listContents(self, inner_path="content.json", user_files=False):
  274. if inner_path not in self.contents:
  275. return []
  276. back = [inner_path]
  277. content_inner_dir = helper.getDirname(inner_path)
  278. for relative_path in self.contents[inner_path].get("includes", {}).keys():
  279. include_inner_path = content_inner_dir + relative_path
  280. back += self.listContents(include_inner_path)
  281. return back
  282. # Returns if file with the given modification date is archived or not
  283. def isArchived(self, inner_path, modified):
  284. match = re.match("(.*)/(.*?)/", inner_path)
  285. if not match:
  286. return False
  287. user_contents_inner_path = match.group(1) + "/content.json"
  288. relative_directory = match.group(2)
  289. file_info = self.getFileInfo(user_contents_inner_path)
  290. if file_info:
  291. time_archived_before = file_info.get("archived_before", 0)
  292. time_directory_archived = file_info.get("archived", {}).get(relative_directory)
  293. if modified <= time_archived_before or modified <= time_directory_archived:
  294. return True
  295. else:
  296. return False
  297. else:
  298. return False
  299. def isDownloaded(self, inner_path, hash_id=None):
  300. if not hash_id:
  301. file_info = self.getFileInfo(inner_path)
  302. if not file_info or "sha512" not in file_info:
  303. return False
  304. hash_id = self.hashfield.getHashId(file_info["sha512"])
  305. return hash_id in self.hashfield
  306. # Is modified since signing
  307. def isModified(self, inner_path):
  308. s = time.time()
  309. if inner_path.endswith("content.json"):
  310. try:
  311. is_valid = self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False)
  312. if is_valid:
  313. is_modified = False
  314. else:
  315. is_modified = True
  316. except VerifyError:
  317. is_modified = True
  318. else:
  319. try:
  320. self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False)
  321. is_modified = False
  322. except VerifyError:
  323. is_modified = True
  324. return is_modified
  325. # Find the file info line from self.contents
  326. # Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"}
  327. def getFileInfo(self, inner_path, new_file=False):
  328. dirs = inner_path.split("/") # Parent dirs of content.json
  329. inner_path_parts = [dirs.pop()] # Filename relative to content.json
  330. while True:
  331. content_inner_path = "%s/content.json" % "/".join(dirs)
  332. content_inner_path = content_inner_path.strip("/")
  333. content = self.contents.get(content_inner_path)
  334. # Check in files
  335. if content and "files" in content:
  336. back = content["files"].get("/".join(inner_path_parts))
  337. if back:
  338. back["content_inner_path"] = content_inner_path
  339. back["optional"] = False
  340. back["relative_path"] = "/".join(inner_path_parts)
  341. return back
  342. # Check in optional files
  343. if content and "files_optional" in content: # Check if file in this content.json
  344. back = content["files_optional"].get("/".join(inner_path_parts))
  345. if back:
  346. back["content_inner_path"] = content_inner_path
  347. back["optional"] = True
  348. back["relative_path"] = "/".join(inner_path_parts)
  349. return back
  350. # Return the rules if user dir
  351. if content and "user_contents" in content:
  352. back = content["user_contents"]
  353. content_inner_path_dir = helper.getDirname(content_inner_path)
  354. relative_content_path = inner_path[len(content_inner_path_dir):]
  355. user_auth_address_match = re.match("([A-Za-z0-9]+)/.*", relative_content_path)
  356. if user_auth_address_match:
  357. user_auth_address = user_auth_address_match.group(1)
  358. back["content_inner_path"] = "%s%s/content.json" % (content_inner_path_dir, user_auth_address)
  359. else:
  360. back["content_inner_path"] = content_inner_path_dir + "content.json"
  361. back["optional"] = None
  362. back["relative_path"] = "/".join(inner_path_parts)
  363. return back
  364. if new_file and content:
  365. back = {}
  366. back["content_inner_path"] = content_inner_path
  367. back["relative_path"] = "/".join(inner_path_parts)
  368. back["optional"] = None
  369. return back
  370. # No inner path in this dir, lets try the parent dir
  371. if dirs:
  372. inner_path_parts.insert(0, dirs.pop())
  373. else: # No more parent dirs
  374. break
  375. # Not found
  376. return False
  377. # Get rules for the file
  378. # Return: The rules for the file or False if not allowed
  379. def getRules(self, inner_path, content=None):
  380. if not inner_path.endswith("content.json"): # Find the files content.json first
  381. file_info = self.getFileInfo(inner_path)
  382. if not file_info:
  383. return False # File not found
  384. inner_path = file_info["content_inner_path"]
  385. if inner_path == "content.json": # Root content.json
  386. rules = {}
  387. rules["signers"] = self.getValidSigners(inner_path, content)
  388. return rules
  389. dirs = inner_path.split("/") # Parent dirs of content.json
  390. inner_path_parts = [dirs.pop()] # Filename relative to content.json
  391. inner_path_parts.insert(0, dirs.pop()) # Dont check in self dir
  392. while True:
  393. content_inner_path = "%s/content.json" % "/".join(dirs)
  394. parent_content = self.contents.get(content_inner_path.strip("/"))
  395. if parent_content and "includes" in parent_content:
  396. return parent_content["includes"].get("/".join(inner_path_parts))
  397. elif parent_content and "user_contents" in parent_content:
  398. return self.getUserContentRules(parent_content, inner_path, content)
  399. else: # No inner path in this dir, lets try the parent dir
  400. if dirs:
  401. inner_path_parts.insert(0, dirs.pop())
  402. else: # No more parent dirs
  403. break
  404. return False
  405. # Get rules for a user file
  406. # Return: The rules of the file or False if not allowed
  407. def getUserContentRules(self, parent_content, inner_path, content):
  408. user_contents = parent_content["user_contents"]
  409. # Delivered for directory
  410. if "inner_path" in parent_content:
  411. parent_content_dir = helper.getDirname(parent_content["inner_path"])
  412. user_address = re.match("([A-Za-z0-9]*?)/", inner_path[len(parent_content_dir):]).group(1)
  413. else:
  414. user_address = re.match(".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1)
  415. try:
  416. if not content:
  417. content = self.site.storage.loadJson(inner_path) # Read the file if no content specified
  418. user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"]) # web/nofish@zeroid.bit
  419. cert_user_id = content["cert_user_id"]
  420. except Exception: # Content.json not exist
  421. user_urn = "n-a/n-a"
  422. cert_user_id = "n-a"
  423. if user_address in user_contents["permissions"]:
  424. rules = copy.copy(user_contents["permissions"].get(user_address, {})) # Default rules based on address
  425. else:
  426. rules = copy.copy(user_contents["permissions"].get(cert_user_id, {})) # Default rules based on username
  427. if rules is False:
  428. banned = True
  429. rules = {}
  430. else:
  431. banned = False
  432. if "signers" in rules:
  433. rules["signers"] = rules["signers"][:] # Make copy of the signers
  434. for permission_pattern, permission_rules in user_contents["permission_rules"].items(): # Regexp rules
  435. if not SafeRe.match(permission_pattern, user_urn):
  436. continue # Rule is not valid for user
  437. # Update rules if its better than current recorded ones
  438. for key, val in permission_rules.iteritems():
  439. if key not in rules:
  440. if type(val) is list:
  441. rules[key] = val[:] # Make copy
  442. else:
  443. rules[key] = val
  444. elif type(val) is int: # Int, update if larger
  445. if val > rules[key]:
  446. rules[key] = val
  447. elif hasattr(val, "startswith"): # String, update if longer
  448. if len(val) > len(rules[key]):
  449. rules[key] = val
  450. elif type(val) is list: # List, append
  451. rules[key] += val
  452. # Accepted cert signers
  453. rules["cert_signers"] = user_contents.get("cert_signers", {})
  454. rules["cert_signers_pattern"] = user_contents.get("cert_signers_pattern")
  455. if "signers" not in rules:
  456. rules["signers"] = []
  457. if not banned:
  458. rules["signers"].append(user_address) # Add user as valid signer
  459. rules["user_address"] = user_address
  460. rules["includes_allowed"] = False
  461. return rules
  462. # Get diffs for changed files
  463. def getDiffs(self, inner_path, limit=30 * 1024, update_files=True):
  464. if inner_path not in self.contents:
  465. return {}
  466. diffs = {}
  467. content_inner_path_dir = helper.getDirname(inner_path)
  468. for file_relative_path in self.contents[inner_path].get("files", {}):
  469. file_inner_path = content_inner_path_dir + file_relative_path
  470. if self.site.storage.isFile(file_inner_path + "-new"): # New version present
  471. diffs[file_relative_path] = Diff.diff(
  472. list(self.site.storage.open(file_inner_path)),
  473. list(self.site.storage.open(file_inner_path + "-new")),
  474. limit=limit
  475. )
  476. if update_files:
  477. self.site.storage.delete(file_inner_path)
  478. self.site.storage.rename(file_inner_path + "-new", file_inner_path)
  479. if self.site.storage.isFile(file_inner_path + "-old"): # Old version present
  480. diffs[file_relative_path] = Diff.diff(
  481. list(self.site.storage.open(file_inner_path + "-old")),
  482. list(self.site.storage.open(file_inner_path)),
  483. limit=limit
  484. )
  485. if update_files:
  486. self.site.storage.delete(file_inner_path + "-old")
  487. return diffs
  488. def hashFile(self, dir_inner_path, file_relative_path, optional=False):
  489. back = {}
  490. file_inner_path = dir_inner_path + "/" + file_relative_path
  491. file_path = self.site.storage.getPath(file_inner_path)
  492. file_size = os.path.getsize(file_path)
  493. sha512sum = CryptHash.sha512sum(file_path) # Calculate sha512 sum of file
  494. if optional and not self.hashfield.hasHash(sha512sum):
  495. self.optionalDownloaded(file_inner_path, self.hashfield.getHashId(sha512sum), file_size, own=True)
  496. back[file_relative_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)}
  497. return back
  498. def isValidRelativePath(self, relative_path):
  499. if ".." in relative_path:
  500. return False
  501. elif len(relative_path) > 255:
  502. return False
  503. else:
  504. return re.match("^[a-z\[\]\(\) A-Z0-9~_@=\.\+-/]+$", relative_path)
  505. def sanitizePath(self, inner_path):
  506. return re.sub("[^a-z\[\]\(\) A-Z0-9_@=\.\+-/]", "", inner_path)
  507. # Hash files in directory
  508. def hashFiles(self, dir_inner_path, ignore_pattern=None, optional_pattern=None):
  509. files_node = {}
  510. files_optional_node = {}
  511. if dir_inner_path and not self.isValidRelativePath(dir_inner_path):
  512. ignored = True
  513. self.log.error("- [ERROR] Only ascii encoded directories allowed: %s" % dir_inner_path)
  514. for file_relative_path in self.site.storage.walk(dir_inner_path, ignore_pattern):
  515. file_name = helper.getFilename(file_relative_path)
  516. ignored = optional = False
  517. if file_name == "content.json":
  518. ignored = True
  519. elif file_name.startswith(".") or file_name.endswith("-old") or file_name.endswith("-new"):
  520. ignored = True
  521. elif not self.isValidRelativePath(file_relative_path):
  522. ignored = True
  523. self.log.error("- [ERROR] Invalid filename: %s" % file_relative_path)
  524. elif dir_inner_path == "" and file_relative_path == self.site.storage.getDbFile():
  525. ignored = True
  526. elif optional_pattern and SafeRe.match(optional_pattern, file_relative_path):
  527. optional = True
  528. if ignored: # Ignore content.json, defined regexp and files starting with .
  529. self.log.info("- [SKIPPED] %s" % file_relative_path)
  530. else:
  531. if optional:
  532. self.log.info("- [OPTIONAL] %s" % file_relative_path)
  533. files_optional_node.update(
  534. self.hashFile(dir_inner_path, file_relative_path, optional=True)
  535. )
  536. else:
  537. self.log.info("- %s" % file_relative_path)
  538. files_node.update(
  539. self.hashFile(dir_inner_path, file_relative_path)
  540. )
  541. return files_node, files_optional_node
  542. # Create and sign a content.json
  543. # Return: The new content if filewrite = False
  544. def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False):
  545. if not inner_path.endswith("content.json"):
  546. raise SignError("Invalid file name, you can only sign content.json files")
  547. if inner_path in self.contents:
  548. content = self.contents.get(inner_path)
  549. if content and content.get("cert_sign", False) is None and self.site.storage.isFile(inner_path):
  550. # Recover cert_sign from file
  551. content["cert_sign"] = self.site.storage.loadJson(inner_path).get("cert_sign")
  552. else:
  553. content = None
  554. if not content: # Content not exist yet, load default one
  555. self.log.info("File %s not exist yet, loading default values..." % inner_path)
  556. if self.site.storage.isFile(inner_path):
  557. content = self.site.storage.loadJson(inner_path)
  558. if "files" not in content:
  559. content["files"] = {}
  560. if "signs" not in content:
  561. content["signs"] = {}
  562. else:
  563. content = {"files": {}, "signs": {}} # Default content.json
  564. if inner_path == "content.json": # It's the root content.json, add some more fields
  565. content["title"] = "%s - ZeroNet_" % self.site.address
  566. content["description"] = ""
  567. content["signs_required"] = 1
  568. content["ignore"] = ""
  569. if extend:
  570. # Add extend keys if not exists
  571. for key, val in extend.items():
  572. if not content.get(key):
  573. content[key] = val
  574. self.log.info("Extending content.json with: %s" % key)
  575. directory = helper.getDirname(self.site.storage.getPath(inner_path))
  576. inner_directory = helper.getDirname(inner_path)
  577. self.log.info("Opening site data directory: %s..." % directory)
  578. changed_files = [inner_path]
  579. files_node, files_optional_node = self.hashFiles(
  580. helper.getDirname(inner_path), content.get("ignore"), content.get("optional")
  581. )
  582. if not remove_missing_optional:
  583. for file_inner_path, file_details in content.get("files_optional", {}).iteritems():
  584. if file_inner_path not in files_optional_node:
  585. files_optional_node[file_inner_path] = file_details
  586. # Find changed files
  587. files_merged = files_node.copy()
  588. files_merged.update(files_optional_node)
  589. for file_relative_path, file_details in files_merged.iteritems():
  590. old_hash = content.get("files", {}).get(file_relative_path, {}).get("sha512")
  591. new_hash = files_merged[file_relative_path]["sha512"]
  592. if old_hash != new_hash:
  593. changed_files.append(inner_directory + file_relative_path)
  594. self.log.debug("Changed files: %s" % changed_files)
  595. if update_changed_files:
  596. for file_path in changed_files:
  597. self.site.storage.onUpdated(file_path)
  598. # Generate new content.json
  599. self.log.info("Adding timestamp and sha512sums to new content.json...")
  600. new_content = content.copy() # Create a copy of current content.json
  601. new_content["files"] = files_node # Add files sha512 hash
  602. if files_optional_node:
  603. new_content["files_optional"] = files_optional_node
  604. elif "files_optional" in new_content:
  605. del new_content["files_optional"]
  606. new_content["modified"] = int(time.time()) # Add timestamp
  607. if inner_path == "content.json":
  608. new_content["zeronet_version"] = config.version
  609. new_content["signs_required"] = content.get("signs_required", 1)
  610. new_content["address"] = self.site.address
  611. new_content["inner_path"] = inner_path
  612. # Verify private key
  613. from Crypt import CryptBitcoin
  614. self.log.info("Verifying private key...")
  615. privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey)
  616. valid_signers = self.getValidSigners(inner_path, new_content)
  617. if privatekey_address not in valid_signers:
  618. raise SignError(
  619. "Private key invalid! Valid signers: %s, Private key address: %s" %
  620. (valid_signers, privatekey_address)
  621. )
  622. self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers))
  623. if inner_path == "content.json" and privatekey_address == self.site.address:
  624. # If signing using the root key, then sign the valid signers
  625. signers_data = "%s:%s" % (new_content["signs_required"], ",".join(valid_signers))
  626. new_content["signers_sign"] = CryptBitcoin.sign(str(signers_data), privatekey)
  627. if not new_content["signers_sign"]:
  628. self.log.info("Old style address, signers_sign is none")
  629. self.log.info("Signing %s..." % inner_path)
  630. if "signs" in new_content:
  631. del(new_content["signs"]) # Delete old signs
  632. if "sign" in new_content:
  633. del(new_content["sign"]) # Delete old sign (backward compatibility)
  634. sign_content = json.dumps(new_content, sort_keys=True)
  635. sign = CryptBitcoin.sign(sign_content, privatekey)
  636. # new_content["signs"] = content.get("signs", {}) # TODO: Multisig
  637. if sign: # If signing is successful (not an old address)
  638. new_content["signs"] = {}
  639. new_content["signs"][privatekey_address] = sign
  640. self.verifyContent(inner_path, new_content)
  641. if filewrite:
  642. self.log.info("Saving to %s..." % inner_path)
  643. self.site.storage.writeJson(inner_path, new_content)
  644. self.contents[inner_path] = new_content
  645. self.log.info("File %s signed!" % inner_path)
  646. if filewrite: # Written to file
  647. return True
  648. else: # Return the new content
  649. return new_content
  650. # The valid signers of content.json file
  651. # Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"]
  652. def getValidSigners(self, inner_path, content=None):
  653. valid_signers = []
  654. if inner_path == "content.json": # Root content.json
  655. if "content.json" in self.contents and "signers" in self.contents["content.json"]:
  656. valid_signers += self.contents["content.json"]["signers"][:]
  657. else:
  658. rules = self.getRules(inner_path, content)
  659. if rules and "signers" in rules:
  660. valid_signers += rules["signers"]
  661. if self.site.address not in valid_signers:
  662. valid_signers.append(self.site.address) # Site address always valid
  663. return valid_signers
  664. # Return: The required number of valid signs for the content.json
  665. def getSignsRequired(self, inner_path, content=None):
  666. return 1 # Todo: Multisig
  667. def verifyCert(self, inner_path, content):
  668. from Crypt import CryptBitcoin
  669. rules = self.getRules(inner_path, content)
  670. if not rules:
  671. raise VerifyError("No rules for this file")
  672. if not rules.get("cert_signers") and not rules.get("cert_signers_pattern"):
  673. return True # Does not need cert
  674. if "cert_user_id" not in content:
  675. raise VerifyError("Missing cert_user_id")
  676. if content["cert_user_id"].count("@") != 1:
  677. raise VerifyError("Invalid domain in cert_user_id")
  678. name, domain = content["cert_user_id"].rsplit("@", 1)
  679. cert_address = rules["cert_signers"].get(domain)
  680. if not cert_address: # Unknown Cert signer
  681. if rules.get("cert_signers_pattern") and SafeRe.match(rules["cert_signers_pattern"], domain):
  682. cert_address = domain
  683. else:
  684. raise VerifyError("Invalid cert signer: %s" % domain)
  685. try:
  686. cert_subject = "%s#%s/%s" % (rules["user_address"], content["cert_auth_type"], name)
  687. result = CryptBitcoin.verify(cert_subject, cert_address, content["cert_sign"])
  688. except Exception, err:
  689. raise VerifyError("Certificate verify error: %s" % err)
  690. return result
  691. # Checks if the content.json content is valid
  692. # Return: True or False
  693. def verifyContent(self, inner_path, content):
  694. content_size = len(json.dumps(content, indent=1)) + sum([file["size"] for file in content["files"].values() if file["size"] >= 0]) # Size of new content
  695. # Calculate old content size
  696. old_content = self.contents.get(inner_path)
  697. if old_content:
  698. old_content_size = len(json.dumps(old_content, indent=1)) + sum([file["size"] for file in old_content.get("files", {}).values()])
  699. old_content_size_optional = sum([file["size"] for file in old_content.get("files_optional", {}).values()])
  700. else:
  701. old_content_size = 0
  702. old_content_size_optional = 0
  703. # Reset site site on first content.json
  704. if not old_content and inner_path == "content.json":
  705. self.site.settings["size"] = 0
  706. content_size_optional = sum([file["size"] for file in content.get("files_optional", {}).values() if file["size"] >= 0])
  707. site_size = self.site.settings["size"] - old_content_size + content_size # Site size without old content plus the new
  708. site_size_optional = self.site.settings["size_optional"] - old_content_size_optional + content_size_optional # Site size without old content plus the new
  709. site_size_limit = self.site.getSizeLimit() * 1024 * 1024
  710. # Check site address
  711. if content.get("address") and content["address"] != self.site.address:
  712. raise VerifyError("Wrong site address: %s != %s" % (content["address"], self.site.address))
  713. # Check file inner path
  714. if content.get("inner_path") and content["inner_path"] != inner_path:
  715. raise VerifyError("Wrong inner_path: %s" % content["inner_path"])
  716. # Check total site size limit
  717. if site_size > site_size_limit:
  718. if inner_path == "content.json" and self.site.settings["size"] == 0:
  719. # First content.json download, save site size to display warning
  720. self.site.settings["size"] = site_size
  721. task = self.site.worker_manager.findTask(inner_path)
  722. if task: # Dont try to download from other peers
  723. self.site.worker_manager.failTask(task)
  724. raise VerifyError("Content too large %sB > %sB, aborting task..." % (site_size, site_size_limit))
  725. # Verify valid filenames
  726. for file_relative_path in content.get("files", {}).keys() + content.get("files_optional", {}).keys():
  727. if not self.isValidRelativePath(file_relative_path):
  728. raise VerifyError("Invalid relative path: %s" % file_relative_path)
  729. if inner_path == "content.json":
  730. self.site.settings["size"] = site_size
  731. self.site.settings["size_optional"] = site_size_optional
  732. return True # Root content.json is passed
  733. else:
  734. if self.verifyContentInclude(inner_path, content, content_size, content_size_optional):
  735. self.site.settings["size"] = site_size
  736. self.site.settings["size_optional"] = site_size_optional
  737. return True
  738. else:
  739. return False
  740. def verifyContentInclude(self, inner_path, content, content_size, content_size_optional):
  741. # Load include details
  742. rules = self.getRules(inner_path, content)
  743. if not rules:
  744. raise VerifyError("No rules")
  745. # Check include size limit
  746. if rules.get("max_size") is not None: # Include size limit
  747. if content_size > rules["max_size"]:
  748. raise VerifyError("Include too large %sB > %sB" % (content_size, rules["max_size"]))
  749. if rules.get("max_size_optional") is not None: # Include optional files limit
  750. if content_size_optional > rules["max_size_optional"]:
  751. raise VerifyError("Include optional files too large %sB > %sB" % (
  752. content_size_optional, rules["max_size_optional"])
  753. )
  754. # Filename limit
  755. if rules.get("files_allowed"):
  756. for file_inner_path in content["files"].keys():
  757. if not SafeRe.match("^%s$" % rules["files_allowed"], file_inner_path):
  758. raise VerifyError("File not allowed: %s" % file_inner_path)
  759. if rules.get("files_allowed_optional"):
  760. for file_inner_path in content.get("files_optional", {}).keys():
  761. if not SafeRe.match("^%s$" % rules["files_allowed_optional"], file_inner_path):
  762. raise VerifyError("Optional file not allowed: %s" % file_inner_path)
  763. # Check if content includes allowed
  764. if rules.get("includes_allowed") is False and content.get("includes"):
  765. raise VerifyError("Includes not allowed")
  766. return True # All good
  767. # Verify file validity
  768. # Return: None = Same as before, False = Invalid, True = Valid
  769. def verifyFile(self, inner_path, file, ignore_same=True):
  770. if inner_path.endswith("content.json"): # content.json: Check using sign
  771. from Crypt import CryptBitcoin
  772. try:
  773. if type(file) is dict:
  774. new_content = file
  775. else:
  776. new_content = json.load(file)
  777. if inner_path in self.contents:
  778. old_content = self.contents.get(inner_path, {"modified": 0})
  779. # Checks if its newer the ours
  780. if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json
  781. return None
  782. elif old_content["modified"] > new_content["modified"]: # We have newer
  783. raise VerifyError(
  784. "We have newer (Our: %s, Sent: %s)" %
  785. (old_content["modified"], new_content["modified"])
  786. )
  787. if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+)
  788. raise VerifyError("Modify timestamp is in the far future!")
  789. if self.isArchived(inner_path, new_content["modified"]):
  790. if inner_path in self.site.bad_files:
  791. del self.site.bad_files[inner_path]
  792. raise VerifyError("This file is archived!")
  793. # Check sign
  794. sign = new_content.get("sign")
  795. signs = new_content.get("signs", {})
  796. if "sign" in new_content:
  797. del(new_content["sign"]) # The file signed without the sign
  798. if "signs" in new_content:
  799. del(new_content["signs"]) # The file signed without the signs
  800. sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace
  801. # Fix float representation error on Android
  802. modified = new_content["modified"]
  803. if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"):
  804. modified_fixed = "{:.6f}".format(modified).strip("0.")
  805. sign_content = sign_content.replace(
  806. '"modified": %s' % repr(modified),
  807. '"modified": %s' % modified_fixed
  808. )
  809. if signs: # New style signing
  810. valid_signers = self.getValidSigners(inner_path, new_content)
  811. signs_required = self.getSignsRequired(inner_path, new_content)
  812. if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json
  813. signers_data = "%s:%s" % (signs_required, ",".join(valid_signers))
  814. if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]):
  815. raise VerifyError("Invalid signers_sign!")
  816. if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid
  817. raise VerifyError("Invalid cert!")
  818. valid_signs = 0
  819. for address in valid_signers:
  820. if address in signs:
  821. valid_signs += CryptBitcoin.verify(sign_content, address, signs[address])
  822. if valid_signs >= signs_required:
  823. break # Break if we has enough signs
  824. if valid_signs < signs_required:
  825. raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required))
  826. else:
  827. return self.verifyContent(inner_path, new_content)
  828. else: # Old style signing
  829. if CryptBitcoin.verify(sign_content, self.site.address, sign):
  830. return self.verifyContent(inner_path, new_content)
  831. else:
  832. raise VerifyError("Invalid old-style sign")
  833. except Exception, err:
  834. self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err)))
  835. raise err
  836. else: # Check using sha512 hash
  837. file_info = self.getFileInfo(inner_path)
  838. if file_info:
  839. if CryptHash.sha512sum(file) != file_info.get("sha512", ""):
  840. raise VerifyError("Invalid hash")
  841. if file_info.get("size", 0) != file.tell():
  842. raise VerifyError(
  843. "File size does not match %s <> %s" %
  844. (inner_path, file.tell(), file_info.get("size", 0))
  845. )
  846. return True
  847. else: # File not in content.json
  848. raise VerifyError("File not in content.json")
  849. def optionalDelete(self, inner_path):
  850. self.site.storage.delete(inner_path)
  851. def optionalDownloaded(self, inner_path, hash_id, size=None, own=False):
  852. if size is None:
  853. size = self.site.storage.getSize(inner_path)
  854. done = self.hashfield.appendHashId(hash_id)
  855. self.site.settings["optional_downloaded"] += size
  856. return done
  857. def optionalRemoved(self, inner_path, hash_id, size=None):
  858. if size is None:
  859. size = self.site.storage.getSize(inner_path)
  860. done = self.hashfield.removeHashId(hash_id)
  861. self.site.settings["optional_downloaded"] -= size
  862. return done