ContentManager.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926
  1. import json
  2. import time
  3. import re
  4. import os
  5. import copy
  6. import gevent
  7. from Debug import Debug
  8. from Crypt import CryptHash
  9. from Config import config
  10. from util import helper
  11. from util import Diff
  12. from util import SafeRe
  13. from Peer import PeerHashfield
  14. from ContentDbDict import ContentDbDict
  15. from Plugin import PluginManager
  16. class VerifyError(Exception):
  17. pass
  18. class SignError(Exception):
  19. pass
  20. @PluginManager.acceptPlugins
  21. class ContentManager(object):
  22. def __init__(self, site):
  23. self.site = site
  24. self.log = self.site.log
  25. self.contents = ContentDbDict(site)
  26. self.hashfield = PeerHashfield()
  27. self.has_optional_files = False
  28. # Load all content.json files
  29. def loadContents(self):
  30. if len(self.contents) == 0:
  31. self.log.debug("ContentDb not initialized, load files from filesystem")
  32. self.loadContent(add_bad_files=False, delete_removed_files=False)
  33. self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
  34. # Load hashfield cache
  35. if "hashfield" in self.site.settings.get("cache", {}):
  36. self.hashfield.fromstring(self.site.settings["cache"]["hashfield"].decode("base64"))
  37. del self.site.settings["cache"]["hashfield"]
  38. elif self.contents.get("content.json") and self.site.settings["size_optional"] > 0:
  39. self.site.storage.updateBadFiles() # No hashfield cache created yet
  40. self.has_optional_files = bool(self.hashfield)
  41. self.contents.db.initSite(self.site)
  42. # Load content.json to self.content
  43. # Return: Changed files ["index.html", "data/messages.json"], Deleted files ["old.jpg"]
  44. def loadContent(self, content_inner_path="content.json", add_bad_files=True, delete_removed_files=True, load_includes=True, force=False):
  45. content_inner_path = content_inner_path.strip("/") # Remove / from beginning
  46. old_content = self.contents.get(content_inner_path)
  47. content_path = self.site.storage.getPath(content_inner_path)
  48. content_dir = helper.getDirname(self.site.storage.getPath(content_inner_path))
  49. content_inner_dir = helper.getDirname(content_inner_path)
  50. if os.path.isfile(content_path):
  51. try:
  52. # Check if file is newer than what we have
  53. if not force and old_content and not self.site.settings.get("own"):
  54. for line in open(content_path):
  55. if '"modified"' not in line:
  56. continue
  57. match = re.search("([0-9\.]+),$", line.strip(" \r\n"))
  58. if match and float(match.group(1)) <= old_content.get("modified", 0):
  59. self.log.debug("%s loadContent same json file, skipping" % content_inner_path)
  60. return [], []
  61. new_content = json.load(open(content_path))
  62. except Exception, err:
  63. self.log.warning("%s load error: %s" % (content_path, Debug.formatException(err)))
  64. return [], []
  65. else:
  66. self.log.warning("Content.json not exist: %s" % content_path)
  67. return [], [] # Content.json not exist
  68. try:
  69. # Get the files where the sha512 changed
  70. changed = []
  71. deleted = []
  72. # Check changed
  73. for relative_path, info in new_content.get("files", {}).iteritems():
  74. if "sha512" in info:
  75. hash_type = "sha512"
  76. else: # Backward compatibility
  77. hash_type = "sha1"
  78. new_hash = info[hash_type]
  79. if old_content and old_content["files"].get(relative_path): # We have the file in the old content
  80. old_hash = old_content["files"][relative_path].get(hash_type)
  81. else: # The file is not in the old content
  82. old_hash = None
  83. if old_hash != new_hash:
  84. changed.append(content_inner_dir + relative_path)
  85. # Check changed optional files
  86. for relative_path, info in new_content.get("files_optional", {}).iteritems():
  87. file_inner_path = content_inner_dir + relative_path
  88. new_hash = info["sha512"]
  89. if old_content and old_content.get("files_optional", {}).get(relative_path):
  90. # We have the file in the old content
  91. old_hash = old_content["files_optional"][relative_path].get("sha512")
  92. if old_hash != new_hash and self.site.isDownloadable(file_inner_path):
  93. changed.append(file_inner_path) # Download new file
  94. elif old_hash != new_hash and self.hashfield.hasHash(old_hash) and not self.site.settings.get("own"):
  95. try:
  96. old_hash_id = self.hashfield.getHashId(old_hash)
  97. self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][relative_path]["size"])
  98. self.site.storage.delete(file_inner_path)
  99. self.log.debug("Deleted changed optional file: %s" % file_inner_path)
  100. except Exception, err:
  101. self.log.debug("Error deleting file %s: %s" % (file_inner_path, err))
  102. else: # The file is not in the old content
  103. if self.site.isDownloadable(file_inner_path):
  104. changed.append(file_inner_path) # Download new file
  105. # Check deleted
  106. if old_content:
  107. old_files = dict(
  108. old_content.get("files", {}),
  109. **old_content.get("files_optional", {})
  110. )
  111. new_files = dict(
  112. new_content.get("files", {}),
  113. **new_content.get("files_optional", {})
  114. )
  115. deleted = [key for key in old_files if key not in new_files]
  116. if deleted and not self.site.settings.get("own"):
  117. # Deleting files that no longer in content.json
  118. for file_relative_path in deleted:
  119. file_inner_path = content_inner_dir + file_relative_path
  120. try:
  121. self.site.storage.delete(file_inner_path)
  122. # Check if the deleted file is optional
  123. if old_content.get("files_optional") and old_content["files_optional"].get(file_relative_path):
  124. old_hash = old_content["files_optional"][file_relative_path].get("sha512")
  125. if self.hashfield.hasHash(old_hash):
  126. old_hash_id = self.hashField.getHashid(old_hash)
  127. self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][file_relative_path]["size"])
  128. self.log.debug("Deleted file: %s" % file_inner_path)
  129. except Exception, err:
  130. self.log.debug("Error deleting file %s: %s" % (file_inner_path, err))
  131. # Cleanup empty dirs
  132. tree = {root: [dirs, files] for root, dirs, files in os.walk(self.site.storage.getPath(content_inner_dir))}
  133. for root in sorted(tree, key=len, reverse=True):
  134. dirs, files = tree[root]
  135. if dirs == [] and files == []:
  136. root_inner_path = self.site.storage.getInnerPath(root.replace("\\", "/"))
  137. self.log.debug("Empty directory: %s, cleaning up." % root_inner_path)
  138. try:
  139. self.site.storage.deleteDir(root_inner_path)
  140. # Remove from tree dict to reflect changed state
  141. tree[os.path.dirname(root)][0].remove(os.path.basename(root))
  142. except Exception, err:
  143. self.log.debug("Error deleting empty directory %s: %s" % (root_inner_path, err))
  144. # Check archived
  145. if old_content and "user_contents" in new_content and "archived" in new_content["user_contents"]:
  146. old_archived = old_content.get("user_contents", {}).get("archived", {})
  147. new_archived = new_content.get("user_contents", {}).get("archived", {})
  148. self.log.debug("old archived: %s, new archived: %s" % (len(old_archived), len(new_archived)))
  149. archived_changed = {
  150. key: date_archived
  151. for key, date_archived in new_archived.iteritems()
  152. if old_archived.get(key) != new_archived[key]
  153. }
  154. if archived_changed:
  155. self.log.debug("Archived changed: %s" % archived_changed)
  156. for archived_dirname, date_archived in archived_changed.iteritems():
  157. archived_inner_path = content_inner_dir + archived_dirname + "/content.json"
  158. if self.contents.get(archived_inner_path, {}).get("modified", 0) < date_archived:
  159. self.removeContent(archived_inner_path)
  160. self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
  161. # Load includes
  162. if load_includes and "includes" in new_content:
  163. for relative_path, info in new_content["includes"].items():
  164. include_inner_path = content_inner_dir + relative_path
  165. if self.site.storage.isFile(include_inner_path): # Content.json exists, load it
  166. include_changed, include_deleted = self.loadContent(
  167. include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files
  168. )
  169. if include_changed:
  170. changed += include_changed # Add changed files
  171. if include_deleted:
  172. deleted += include_deleted # Add changed files
  173. else: # Content.json not exist, add to changed files
  174. self.log.debug("Missing include: %s" % include_inner_path)
  175. changed += [include_inner_path]
  176. # Load blind user includes (all subdir)
  177. if load_includes and "user_contents" in new_content:
  178. for relative_dir in os.listdir(content_dir):
  179. include_inner_path = content_inner_dir + relative_dir + "/content.json"
  180. if not self.site.storage.isFile(include_inner_path):
  181. continue # Content.json not exist
  182. include_changed, include_deleted = self.loadContent(
  183. include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files,
  184. load_includes=False
  185. )
  186. if include_changed:
  187. changed += include_changed # Add changed files
  188. if include_deleted:
  189. deleted += include_deleted # Add changed files
  190. # Save some memory
  191. new_content["signs"] = None
  192. if "cert_sign" in new_content:
  193. new_content["cert_sign"] = None
  194. if new_content.get("files_optional"):
  195. self.has_optional_files = True
  196. # Update the content
  197. self.contents[content_inner_path] = new_content
  198. except Exception, err:
  199. self.log.warning("%s parse error: %s" % (content_inner_path, Debug.formatException(err)))
  200. return [], [] # Content.json parse error
  201. # Add changed files to bad files
  202. if add_bad_files:
  203. for inner_path in changed:
  204. self.site.bad_files[inner_path] = self.site.bad_files.get(inner_path, 0) + 1
  205. for inner_path in deleted:
  206. if inner_path in self.site.bad_files:
  207. del self.site.bad_files[inner_path]
  208. if new_content.get("modified", 0) > self.site.settings.get("modified", 0):
  209. # Dont store modifications in the far future (more than 10 minute)
  210. self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"])
  211. return changed, deleted
  212. def removeContent(self, inner_path):
  213. inner_dir = helper.getDirname(inner_path)
  214. try:
  215. content = self.contents[inner_path]
  216. files = dict(
  217. content.get("files", {}),
  218. **content.get("files_optional", {})
  219. )
  220. except Exception, err:
  221. self.log.debug("Error loading %s for removeContent: %s" % (inner_path, Debug.formatException(err)))
  222. files = {}
  223. files["content.json"] = True
  224. # Deleting files that no longer in content.json
  225. for file_relative_path in files:
  226. file_inner_path = inner_dir + file_relative_path
  227. try:
  228. self.site.storage.delete(file_inner_path)
  229. self.log.debug("Deleted file: %s" % file_inner_path)
  230. except Exception, err:
  231. self.log.debug("Error deleting file %s: %s" % (file_inner_path, err))
  232. try:
  233. self.site.storage.deleteDir(inner_dir)
  234. except Exception, err:
  235. self.log.debug("Error deleting dir %s: %s" % (inner_dir, err))
  236. try:
  237. del self.contents[inner_path]
  238. except Exception, err:
  239. self.log.debug("Error key from contents: %s" % inner_path)
  240. # Get total size of site
  241. # Return: 32819 (size of files in kb)
  242. def getTotalSize(self, ignore=None):
  243. return self.contents.db.getTotalSize(self.site, ignore)
  244. def listModified(self, since):
  245. return self.contents.db.listModified(self.site, since)
  246. def listContents(self, inner_path="content.json", user_files=False):
  247. if inner_path not in self.contents:
  248. return []
  249. back = [inner_path]
  250. content_inner_dir = helper.getDirname(inner_path)
  251. for relative_path in self.contents[inner_path].get("includes", {}).keys():
  252. include_inner_path = content_inner_dir + relative_path
  253. back += self.listContents(include_inner_path)
  254. return back
  255. # Returns if file with the given modification date is archived or not
  256. def isArchived(self, inner_path, modified):
  257. match = re.match("(.*)/(.*?)/", inner_path)
  258. if not match:
  259. return False
  260. user_contents_inner_path = match.group(1) + "/content.json"
  261. relative_directory = match.group(2)
  262. file_info = self.getFileInfo(user_contents_inner_path)
  263. if file_info and file_info.get("archived", {}).get(relative_directory) >= modified:
  264. return True
  265. else:
  266. return False
  267. # Find the file info line from self.contents
  268. # Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"}
  269. def getFileInfo(self, inner_path, new_file=False):
  270. dirs = inner_path.split("/") # Parent dirs of content.json
  271. inner_path_parts = [dirs.pop()] # Filename relative to content.json
  272. while True:
  273. content_inner_path = "%s/content.json" % "/".join(dirs)
  274. content_inner_path = content_inner_path.strip("/")
  275. content = self.contents.get(content_inner_path)
  276. # Check in files
  277. if content and "files" in content:
  278. back = content["files"].get("/".join(inner_path_parts))
  279. if back:
  280. back["content_inner_path"] = content_inner_path
  281. back["optional"] = False
  282. back["relative_path"] = "/".join(inner_path_parts)
  283. return back
  284. # Check in optional files
  285. if content and "files_optional" in content: # Check if file in this content.json
  286. back = content["files_optional"].get("/".join(inner_path_parts))
  287. if back:
  288. back["content_inner_path"] = content_inner_path
  289. back["optional"] = True
  290. back["relative_path"] = "/".join(inner_path_parts)
  291. return back
  292. # Return the rules if user dir
  293. if content and "user_contents" in content:
  294. back = content["user_contents"]
  295. content_inner_path_dir = helper.getDirname(content_inner_path)
  296. relative_content_path = inner_path[len(content_inner_path_dir):]
  297. if "/" in relative_content_path:
  298. user_auth_address = re.match("([A-Za-z0-9]+)/.*", relative_content_path).group(1)
  299. back["content_inner_path"] = "%s%s/content.json" % (content_inner_path_dir, user_auth_address)
  300. else:
  301. back["content_inner_path"] = content_inner_path_dir + "content.json"
  302. back["optional"] = None
  303. back["relative_path"] = "/".join(inner_path_parts)
  304. return back
  305. if new_file and content:
  306. back = {}
  307. back["content_inner_path"] = content_inner_path
  308. back["relative_path"] = "/".join(inner_path_parts)
  309. back["optional"] = None
  310. return back
  311. # No inner path in this dir, lets try the parent dir
  312. if dirs:
  313. inner_path_parts.insert(0, dirs.pop())
  314. else: # No more parent dirs
  315. break
  316. # Not found
  317. return False
  318. # Get rules for the file
  319. # Return: The rules for the file or False if not allowed
  320. def getRules(self, inner_path, content=None):
  321. if not inner_path.endswith("content.json"): # Find the files content.json first
  322. file_info = self.getFileInfo(inner_path)
  323. if not file_info:
  324. return False # File not found
  325. inner_path = file_info["content_inner_path"]
  326. if inner_path == "content.json": # Root content.json
  327. rules = {}
  328. rules["signers"] = self.getValidSigners(inner_path, content)
  329. return rules
  330. dirs = inner_path.split("/") # Parent dirs of content.json
  331. inner_path_parts = [dirs.pop()] # Filename relative to content.json
  332. inner_path_parts.insert(0, dirs.pop()) # Dont check in self dir
  333. while True:
  334. content_inner_path = "%s/content.json" % "/".join(dirs)
  335. parent_content = self.contents.get(content_inner_path.strip("/"))
  336. if parent_content and "includes" in parent_content:
  337. return parent_content["includes"].get("/".join(inner_path_parts))
  338. elif parent_content and "user_contents" in parent_content:
  339. return self.getUserContentRules(parent_content, inner_path, content)
  340. else: # No inner path in this dir, lets try the parent dir
  341. if dirs:
  342. inner_path_parts.insert(0, dirs.pop())
  343. else: # No more parent dirs
  344. break
  345. return False
  346. # Get rules for a user file
  347. # Return: The rules of the file or False if not allowed
  348. def getUserContentRules(self, parent_content, inner_path, content):
  349. user_contents = parent_content["user_contents"]
  350. # Delivered for directory
  351. if "inner_path" in parent_content:
  352. parent_content_dir = helper.getDirname(parent_content["inner_path"])
  353. user_address = re.match("([A-Za-z0-9]*?)/", inner_path[len(parent_content_dir):]).group(1)
  354. else:
  355. user_address = re.match(".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1)
  356. try:
  357. if not content:
  358. content = self.site.storage.loadJson(inner_path) # Read the file if no content specified
  359. user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"]) # web/nofish@zeroid.bit
  360. cert_user_id = content["cert_user_id"]
  361. except Exception: # Content.json not exist
  362. user_urn = "n-a/n-a"
  363. cert_user_id = "n-a"
  364. if user_address in user_contents["permissions"]:
  365. rules = copy.copy(user_contents["permissions"].get(user_address, {})) # Default rules based on address
  366. else:
  367. rules = copy.copy(user_contents["permissions"].get(cert_user_id, {})) # Default rules based on username
  368. if rules is False:
  369. banned = True
  370. rules = {}
  371. else:
  372. banned = False
  373. if "signers" in rules:
  374. rules["signers"] = rules["signers"][:] # Make copy of the signers
  375. for permission_pattern, permission_rules in user_contents["permission_rules"].items(): # Regexp rules
  376. if not SafeRe.match(permission_pattern, user_urn):
  377. continue # Rule is not valid for user
  378. # Update rules if its better than current recorded ones
  379. for key, val in permission_rules.iteritems():
  380. if key not in rules:
  381. if type(val) is list:
  382. rules[key] = val[:] # Make copy
  383. else:
  384. rules[key] = val
  385. elif type(val) is int: # Int, update if larger
  386. if val > rules[key]:
  387. rules[key] = val
  388. elif hasattr(val, "startswith"): # String, update if longer
  389. if len(val) > len(rules[key]):
  390. rules[key] = val
  391. elif type(val) is list: # List, append
  392. rules[key] += val
  393. rules["cert_signers"] = user_contents["cert_signers"] # Add valid cert signers
  394. if "signers" not in rules:
  395. rules["signers"] = []
  396. if not banned:
  397. rules["signers"].append(user_address) # Add user as valid signer
  398. rules["user_address"] = user_address
  399. rules["includes_allowed"] = False
  400. return rules
  401. # Get diffs for changed files
  402. def getDiffs(self, inner_path, limit=30 * 1024, update_files=True):
  403. if inner_path not in self.contents:
  404. return {}
  405. diffs = {}
  406. content_inner_path_dir = helper.getDirname(inner_path)
  407. for file_relative_path in self.contents[inner_path].get("files", {}):
  408. file_inner_path = content_inner_path_dir + file_relative_path
  409. if self.site.storage.isFile(file_inner_path + "-new"): # New version present
  410. diffs[file_relative_path] = Diff.diff(
  411. list(self.site.storage.open(file_inner_path)),
  412. list(self.site.storage.open(file_inner_path + "-new")),
  413. limit=limit
  414. )
  415. if update_files:
  416. self.site.storage.delete(file_inner_path)
  417. self.site.storage.rename(file_inner_path + "-new", file_inner_path)
  418. if self.site.storage.isFile(file_inner_path + "-old"): # Old version present
  419. diffs[file_relative_path] = Diff.diff(
  420. list(self.site.storage.open(file_inner_path + "-old")),
  421. list(self.site.storage.open(file_inner_path)),
  422. limit=limit
  423. )
  424. if update_files:
  425. self.site.storage.delete(file_inner_path + "-old")
  426. return diffs
  427. def hashFile(self, dir_inner_path, file_relative_path, optional=False):
  428. back = {}
  429. file_inner_path = dir_inner_path + "/" + file_relative_path
  430. file_path = self.site.storage.getPath(file_inner_path)
  431. file_size = os.path.getsize(file_path)
  432. sha512sum = CryptHash.sha512sum(file_path) # Calculate sha512 sum of file
  433. if optional and not self.hashfield.hasHash(sha512sum):
  434. self.optionalDownloaded(file_inner_path, self.hashfield.getHashId(sha512sum), file_size, own=True)
  435. back[file_relative_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)}
  436. return back
  437. def isValidRelativePath(self, relative_path):
  438. if ".." in relative_path:
  439. return False
  440. elif len(relative_path) > 255:
  441. return False
  442. else:
  443. return re.match("^[a-z\[\]\(\) A-Z0-9_@=\.\+-/]+$", relative_path)
  444. def sanitizePath(self, inner_path):
  445. return re.sub("[^a-z\[\]\(\) A-Z0-9_@=\.\+-/]", "", inner_path)
  446. # Hash files in directory
  447. def hashFiles(self, dir_inner_path, ignore_pattern=None, optional_pattern=None):
  448. files_node = {}
  449. files_optional_node = {}
  450. if dir_inner_path and not self.isValidRelativePath(dir_inner_path):
  451. ignored = True
  452. self.log.error("- [ERROR] Only ascii encoded directories allowed: %s" % dir_inner_path)
  453. for file_relative_path in self.site.storage.walk(dir_inner_path, ignore_pattern):
  454. file_name = helper.getFilename(file_relative_path)
  455. ignored = optional = False
  456. if file_name == "content.json":
  457. ignored = True
  458. elif file_name.startswith(".") or file_name.endswith("-old") or file_name.endswith("-new"):
  459. ignored = True
  460. elif not self.isValidRelativePath(file_relative_path):
  461. ignored = True
  462. self.log.error("- [ERROR] Invalid filename: %s" % file_relative_path)
  463. elif dir_inner_path == "" and file_relative_path == self.site.storage.getDbFile():
  464. ignored = True
  465. elif optional_pattern and SafeRe.match(optional_pattern, file_relative_path):
  466. optional = True
  467. if ignored: # Ignore content.json, defined regexp and files starting with .
  468. self.log.info("- [SKIPPED] %s" % file_relative_path)
  469. else:
  470. if optional:
  471. self.log.info("- [OPTIONAL] %s" % file_relative_path)
  472. files_optional_node.update(
  473. self.hashFile(dir_inner_path, file_relative_path, optional=True)
  474. )
  475. else:
  476. self.log.info("- %s" % file_relative_path)
  477. files_node.update(
  478. self.hashFile(dir_inner_path, file_relative_path)
  479. )
  480. return files_node, files_optional_node
  481. # Create and sign a content.json
  482. # Return: The new content if filewrite = False
  483. def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False):
  484. if not inner_path.endswith("content.json"):
  485. raise SignError("Invalid file name, you can only sign content.json files")
  486. if inner_path in self.contents:
  487. content = self.contents.get(inner_path)
  488. if content and content.get("cert_sign", False) is None and self.site.storage.isFile(inner_path):
  489. # Recover cert_sign from file
  490. content["cert_sign"] = self.site.storage.loadJson(inner_path).get("cert_sign")
  491. else:
  492. content = None
  493. if not content: # Content not exist yet, load default one
  494. self.log.info("File %s not exist yet, loading default values..." % inner_path)
  495. if self.site.storage.isFile(inner_path):
  496. content = self.site.storage.loadJson(inner_path)
  497. if "files" not in content:
  498. content["files"] = {}
  499. if "signs" not in content:
  500. content["signs"] = {}
  501. else:
  502. content = {"files": {}, "signs": {}} # Default content.json
  503. if inner_path == "content.json": # It's the root content.json, add some more fields
  504. content["title"] = "%s - ZeroNet_" % self.site.address
  505. content["description"] = ""
  506. content["signs_required"] = 1
  507. content["ignore"] = ""
  508. if extend:
  509. # Add extend keys if not exists
  510. for key, val in extend.items():
  511. if not content.get(key):
  512. content[key] = val
  513. self.log.info("Extending content.json with: %s" % key)
  514. directory = helper.getDirname(self.site.storage.getPath(inner_path))
  515. inner_directory = helper.getDirname(inner_path)
  516. self.log.info("Opening site data directory: %s..." % directory)
  517. changed_files = [inner_path]
  518. files_node, files_optional_node = self.hashFiles(
  519. helper.getDirname(inner_path), content.get("ignore"), content.get("optional")
  520. )
  521. if not remove_missing_optional:
  522. for file_inner_path, file_details in content.get("files_optional", {}).iteritems():
  523. if file_inner_path not in files_optional_node:
  524. files_optional_node[file_inner_path] = file_details
  525. # Find changed files
  526. files_merged = files_node.copy()
  527. files_merged.update(files_optional_node)
  528. for file_relative_path, file_details in files_merged.iteritems():
  529. old_hash = content.get("files", {}).get(file_relative_path, {}).get("sha512")
  530. new_hash = files_merged[file_relative_path]["sha512"]
  531. if old_hash != new_hash:
  532. changed_files.append(inner_directory + file_relative_path)
  533. self.log.debug("Changed files: %s" % changed_files)
  534. if update_changed_files:
  535. for file_path in changed_files:
  536. self.site.storage.onUpdated(file_path)
  537. # Generate new content.json
  538. self.log.info("Adding timestamp and sha512sums to new content.json...")
  539. new_content = content.copy() # Create a copy of current content.json
  540. new_content["files"] = files_node # Add files sha512 hash
  541. if files_optional_node:
  542. new_content["files_optional"] = files_optional_node
  543. elif "files_optional" in new_content:
  544. del new_content["files_optional"]
  545. new_content["modified"] = int(time.time()) # Add timestamp
  546. if inner_path == "content.json":
  547. new_content["zeronet_version"] = config.version
  548. new_content["signs_required"] = content.get("signs_required", 1)
  549. new_content["address"] = self.site.address
  550. new_content["inner_path"] = inner_path
  551. # Verify private key
  552. from Crypt import CryptBitcoin
  553. self.log.info("Verifying private key...")
  554. privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey)
  555. valid_signers = self.getValidSigners(inner_path, new_content)
  556. if privatekey_address not in valid_signers:
  557. raise SignError(
  558. "Private key invalid! Valid signers: %s, Private key address: %s" %
  559. (valid_signers, privatekey_address)
  560. )
  561. self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers))
  562. if inner_path == "content.json" and privatekey_address == self.site.address:
  563. # If signing using the root key, then sign the valid signers
  564. signers_data = "%s:%s" % (new_content["signs_required"], ",".join(valid_signers))
  565. new_content["signers_sign"] = CryptBitcoin.sign(str(signers_data), privatekey)
  566. if not new_content["signers_sign"]:
  567. self.log.info("Old style address, signers_sign is none")
  568. self.log.info("Signing %s..." % inner_path)
  569. if "signs" in new_content:
  570. del(new_content["signs"]) # Delete old signs
  571. if "sign" in new_content:
  572. del(new_content["sign"]) # Delete old sign (backward compatibility)
  573. sign_content = json.dumps(new_content, sort_keys=True)
  574. sign = CryptBitcoin.sign(sign_content, privatekey)
  575. # new_content["signs"] = content.get("signs", {}) # TODO: Multisig
  576. if sign: # If signing is successful (not an old address)
  577. new_content["signs"] = {}
  578. new_content["signs"][privatekey_address] = sign
  579. self.verifyContent(inner_path, new_content)
  580. if filewrite:
  581. self.log.info("Saving to %s..." % inner_path)
  582. self.site.storage.writeJson(inner_path, new_content)
  583. self.contents[inner_path] = new_content
  584. self.log.info("File %s signed!" % inner_path)
  585. if filewrite: # Written to file
  586. return True
  587. else: # Return the new content
  588. return new_content
  589. # The valid signers of content.json file
  590. # Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"]
  591. def getValidSigners(self, inner_path, content=None):
  592. valid_signers = []
  593. if inner_path == "content.json": # Root content.json
  594. if "content.json" in self.contents and "signers" in self.contents["content.json"]:
  595. valid_signers += self.contents["content.json"]["signers"][:]
  596. else:
  597. rules = self.getRules(inner_path, content)
  598. if rules and "signers" in rules:
  599. valid_signers += rules["signers"]
  600. if self.site.address not in valid_signers:
  601. valid_signers.append(self.site.address) # Site address always valid
  602. return valid_signers
  603. # Return: The required number of valid signs for the content.json
  604. def getSignsRequired(self, inner_path, content=None):
  605. return 1 # Todo: Multisig
  606. def verifyCert(self, inner_path, content):
  607. from Crypt import CryptBitcoin
  608. rules = self.getRules(inner_path, content)
  609. if not rules.get("cert_signers"):
  610. return True # Does not need cert
  611. if "cert_user_id" not in content:
  612. raise VerifyError("Missing cert_user_id")
  613. name, domain = content["cert_user_id"].split("@")
  614. cert_address = rules["cert_signers"].get(domain)
  615. if not cert_address: # Cert signer not allowed
  616. raise VerifyError("Invalid cert signer: %s" % domain)
  617. try:
  618. cert_subject = "%s#%s/%s" % (rules["user_address"], content["cert_auth_type"], name)
  619. result = CryptBitcoin.verify(cert_subject, cert_address, content["cert_sign"])
  620. except Exception, err:
  621. raise VerifyError("Certificate verify error: %s" % err)
  622. return result
  623. # Checks if the content.json content is valid
  624. # Return: True or False
  625. def verifyContent(self, inner_path, content):
  626. content_size = len(json.dumps(content, indent=1)) + sum([file["size"] for file in content["files"].values() if file["size"] >= 0]) # Size of new content
  627. # Calculate old content size
  628. old_content = self.contents.get(inner_path)
  629. if old_content:
  630. old_content_size = len(json.dumps(old_content, indent=1)) + sum([file["size"] for file in old_content.get("files", {}).values()])
  631. old_content_size_optional = sum([file["size"] for file in old_content.get("files_optional", {}).values()])
  632. else:
  633. old_content_size = 0
  634. old_content_size_optional = 0
  635. # Reset site site on first content.json
  636. if not old_content and inner_path == "content.json":
  637. self.site.settings["size"] = 0
  638. content_size_optional = sum([file["size"] for file in content.get("files_optional", {}).values() if file["size"] >= 0])
  639. site_size = self.site.settings["size"] - old_content_size + content_size # Site size without old content plus the new
  640. site_size_optional = self.site.settings["size_optional"] - old_content_size_optional + content_size_optional # Site size without old content plus the new
  641. site_size_limit = self.site.getSizeLimit() * 1024 * 1024
  642. # Check site address
  643. if content.get("address") and content["address"] != self.site.address:
  644. raise VerifyError("Wrong site address: %s != %s" % (content["address"], self.site.address))
  645. # Check file inner path
  646. if content.get("inner_path") and content["inner_path"] != inner_path:
  647. raise VerifyError("Wrong inner_path: %s" % content["inner_path"])
  648. # Check total site size limit
  649. if site_size > site_size_limit:
  650. if inner_path == "content.json" and self.site.settings["size"] == 0:
  651. # First content.json download, save site size to display warning
  652. self.site.settings["size"] = site_size
  653. task = self.site.worker_manager.findTask(inner_path)
  654. if task: # Dont try to download from other peers
  655. self.site.worker_manager.failTask(task)
  656. raise VerifyError("Content too large %sB > %sB, aborting task..." % (site_size, site_size_limit))
  657. # Verify valid filenames
  658. for file_relative_path in content.get("files", {}).keys() + content.get("files_optional", {}).keys():
  659. if not self.isValidRelativePath(file_relative_path):
  660. raise VerifyError("Invalid relative path: %s" % file_relative_path)
  661. if inner_path == "content.json":
  662. self.site.settings["size"] = site_size
  663. self.site.settings["size_optional"] = site_size_optional
  664. return True # Root content.json is passed
  665. else:
  666. if self.verifyContentInclude(inner_path, content, content_size, content_size_optional):
  667. self.site.settings["size"] = site_size
  668. self.site.settings["size_optional"] = site_size_optional
  669. return True
  670. else:
  671. return False
  672. def verifyContentInclude(self, inner_path, content, content_size, content_size_optional):
  673. # Load include details
  674. rules = self.getRules(inner_path, content)
  675. if not rules:
  676. raise VerifyError("No rules")
  677. # Check include size limit
  678. if rules.get("max_size") is not None: # Include size limit
  679. if content_size > rules["max_size"]:
  680. raise VerifyError("Include too large %sB > %sB" % (content_size, rules["max_size"]))
  681. if rules.get("max_size_optional") is not None: # Include optional files limit
  682. if content_size_optional > rules["max_size_optional"]:
  683. raise VerifyError("Include optional files too large %sB > %sB" % (
  684. content_size_optional, rules["max_size_optional"])
  685. )
  686. # Filename limit
  687. if rules.get("files_allowed"):
  688. for file_inner_path in content["files"].keys():
  689. if not SafeRe.match("^%s$" % rules["files_allowed"], file_inner_path):
  690. raise VerifyError("File not allowed: %s" % file_inner_path)
  691. if rules.get("files_allowed_optional"):
  692. for file_inner_path in content.get("files_optional", {}).keys():
  693. if not SafeRe.match("^%s$" % rules["files_allowed_optional"], file_inner_path):
  694. raise VerifyError("Optional file not allowed: %s" % file_inner_path)
  695. # Check if content includes allowed
  696. if rules.get("includes_allowed") is False and content.get("includes"):
  697. raise VerifyError("Includes not allowed")
  698. return True # All good
  699. # Verify file validity
  700. # Return: None = Same as before, False = Invalid, True = Valid
  701. def verifyFile(self, inner_path, file, ignore_same=True):
  702. if inner_path.endswith("content.json"): # content.json: Check using sign
  703. from Crypt import CryptBitcoin
  704. try:
  705. if type(file) is dict:
  706. new_content = file
  707. else:
  708. new_content = json.load(file)
  709. if inner_path in self.contents:
  710. old_content = self.contents.get(inner_path, {"modified": 0})
  711. # Checks if its newer the ours
  712. if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json
  713. return None
  714. elif old_content["modified"] > new_content["modified"]: # We have newer
  715. raise VerifyError(
  716. "We have newer (Our: %s, Sent: %s)" %
  717. (old_content["modified"], new_content["modified"])
  718. )
  719. if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+)
  720. raise VerifyError("Modify timestamp is in the far future!")
  721. if self.isArchived(inner_path, new_content["modified"]):
  722. if inner_path in self.site.bad_files:
  723. del self.site.bad_files[inner_path]
  724. raise VerifyError("This file is archived!")
  725. # Check sign
  726. sign = new_content.get("sign")
  727. signs = new_content.get("signs", {})
  728. if "sign" in new_content:
  729. del(new_content["sign"]) # The file signed without the sign
  730. if "signs" in new_content:
  731. del(new_content["signs"]) # The file signed without the signs
  732. sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace
  733. # Fix float representation error on Android
  734. modified = new_content["modified"]
  735. if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"):
  736. modified_fixed = "{:.6f}".format(modified).strip("0.")
  737. sign_content = sign_content.replace(
  738. '"modified": %s' % repr(modified),
  739. '"modified": %s' % modified_fixed
  740. )
  741. self.verifyContent(inner_path, new_content)
  742. if signs: # New style signing
  743. valid_signers = self.getValidSigners(inner_path, new_content)
  744. signs_required = self.getSignsRequired(inner_path, new_content)
  745. if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json
  746. signers_data = "%s:%s" % (signs_required, ",".join(valid_signers))
  747. if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]):
  748. raise VerifyError("Invalid signers_sign!")
  749. if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid
  750. raise VerifyError("Invalid cert!")
  751. valid_signs = 0
  752. for address in valid_signers:
  753. if address in signs:
  754. valid_signs += CryptBitcoin.verify(sign_content, address, signs[address])
  755. if valid_signs >= signs_required:
  756. break # Break if we has enough signs
  757. if valid_signs < signs_required:
  758. raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required))
  759. else:
  760. return True
  761. else: # Old style signing
  762. if CryptBitcoin.verify(sign_content, self.site.address, sign):
  763. return True
  764. else:
  765. raise VerifyError("Invalid old-style sign")
  766. except Exception, err:
  767. self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err)))
  768. raise err
  769. else: # Check using sha512 hash
  770. file_info = self.getFileInfo(inner_path)
  771. if file_info:
  772. if CryptHash.sha512sum(file) != file_info.get("sha512", ""):
  773. raise VerifyError("Invalid hash")
  774. if file_info.get("size", 0) != file.tell():
  775. raise VerifyError(
  776. "File size does not match %s <> %s" %
  777. (inner_path, file.tell(), file_info.get("size", 0))
  778. )
  779. return True
  780. else: # File not in content.json
  781. raise VerifyError("File not in content.json")
  782. def optionalDownloaded(self, inner_path, hash_id, size=None, own=False):
  783. if size is None:
  784. size = self.site.storage.getSize(inner_path)
  785. done = self.hashfield.appendHashId(hash_id)
  786. self.site.settings["optional_downloaded"] += size
  787. return done
  788. def optionalRemoved(self, inner_path, hash_id, size=None):
  789. if size is None:
  790. size = self.site.storage.getSize(inner_path)
  791. done = self.hashfield.removeHashId(hash_id)
  792. self.site.settings["optional_downloaded"] -= size
  793. return done