SiteStorage.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. import os
  2. import re
  3. import shutil
  4. import json
  5. import time
  6. import sqlite3
  7. import gevent.event
  8. from Db import Db
  9. from Debug import Debug
  10. from Config import config
  11. from util import helper
  12. class SiteStorage:
  13. def __init__(self, site, allow_create=True):
  14. self.site = site
  15. self.directory = "%s/%s" % (config.data_dir, self.site.address) # Site data diretory
  16. self.allowed_dir = os.path.abspath(self.directory) # Only serve/modify file within this dir
  17. self.log = site.log
  18. self.db = None # Db class
  19. self.db_checked = False # Checked db tables since startup
  20. self.event_db_busy = None # Gevent AsyncResult if db is working on rebuild
  21. self.has_db = self.isFile("dbschema.json") # The site has schema
  22. if not os.path.isdir(self.directory):
  23. if allow_create:
  24. os.mkdir(self.directory) # Create directory if not found
  25. else:
  26. raise Exception("Directory not exists: %s" % self.directory)
  27. # Load db from dbschema.json
  28. def openDb(self, check=True):
  29. schema = self.loadJson("dbschema.json")
  30. db_path = self.getPath(schema["db_file"])
  31. if check:
  32. if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # Not exist or null
  33. self.rebuildDb()
  34. if not self.db:
  35. self.db = Db(schema, db_path)
  36. if check and not self.db_checked:
  37. changed_tables = self.db.checkTables()
  38. if changed_tables:
  39. self.rebuildDb(delete_db=False) # Todo only update the changed table datas
  40. def closeDb(self):
  41. if self.db:
  42. self.db.close()
  43. self.event_db_busy = None
  44. self.db = None
  45. # Return db class
  46. def getDb(self):
  47. if not self.db:
  48. self.log.debug("No database, waiting for dbschema.json...")
  49. self.site.needFile("dbschema.json", priority=3)
  50. self.has_db = self.isFile("dbschema.json") # Recheck if dbschema exist
  51. if self.has_db:
  52. self.openDb()
  53. return self.db
  54. # Rebuild sql cache
  55. def rebuildDb(self, delete_db=True):
  56. self.has_db = self.isFile("dbschema.json")
  57. if not self.has_db:
  58. return False
  59. self.event_db_busy = gevent.event.AsyncResult()
  60. schema = self.loadJson("dbschema.json")
  61. db_path = self.getPath(schema["db_file"])
  62. if os.path.isfile(db_path) and delete_db:
  63. if self.db:
  64. self.db.close() # Close db if open
  65. self.log.info("Deleting %s" % db_path)
  66. try:
  67. os.unlink(db_path)
  68. except Exception, err:
  69. self.log.error("Delete error: %s" % err)
  70. self.openDb(check=False)
  71. self.log.info("Creating tables...")
  72. self.db.checkTables()
  73. self.log.info("Importing data...")
  74. cur = self.db.getCursor()
  75. cur.execute("BEGIN")
  76. cur.logging = False
  77. found = 0
  78. s = time.time()
  79. for content_inner_path, content in self.site.content_manager.contents.items():
  80. content_path = self.getPath(content_inner_path)
  81. if os.path.isfile(content_path): # Missing content.json file
  82. if self.db.loadJson(content_path, cur=cur):
  83. found += 1
  84. else:
  85. self.log.error("[MISSING] %s" % content_inner_path)
  86. for file_relative_path in content["files"].keys():
  87. if not file_relative_path.endswith(".json"):
  88. continue # We only interesed in json files
  89. content_inner_path_dir = helper.getDirname(content_inner_path) # Content.json dir relative to site
  90. file_inner_path = content_inner_path_dir + file_relative_path # File Relative to site dir
  91. file_inner_path = file_inner_path.strip("/") # Strip leading /
  92. file_path = self.getPath(file_inner_path)
  93. if os.path.isfile(file_path):
  94. if self.db.loadJson(file_path, cur=cur):
  95. found += 1
  96. else:
  97. self.log.error("[MISSING] %s" % file_inner_path)
  98. cur.execute("END")
  99. self.log.info("Imported %s data file in %ss" % (found, time.time() - s))
  100. self.event_db_busy.set(True) # Event done, notify waiters
  101. self.event_db_busy = None # Clear event
  102. # Execute sql query or rebuild on dberror
  103. def query(self, query, params=None):
  104. if self.event_db_busy: # Db not ready for queries
  105. self.log.debug("Wating for db...")
  106. self.event_db_busy.get() # Wait for event
  107. try:
  108. res = self.getDb().execute(query, params)
  109. except sqlite3.DatabaseError, err:
  110. if err.__class__.__name__ == "DatabaseError":
  111. self.log.error("Database error: %s, query: %s, try to rebuilding it..." % (err, query))
  112. self.rebuildDb()
  113. res = self.db.cur.execute(query, params)
  114. else:
  115. raise err
  116. return res
  117. # Open file object
  118. def open(self, inner_path, mode="rb"):
  119. return open(self.getPath(inner_path), mode)
  120. # Open file object
  121. def read(self, inner_path, mode="r"):
  122. return open(self.getPath(inner_path), mode).read()
  123. # Write content to file
  124. def write(self, inner_path, content):
  125. file_path = self.getPath(inner_path)
  126. # Create dir if not exist
  127. file_dir = os.path.dirname(file_path)
  128. if not os.path.isdir(file_dir):
  129. os.makedirs(file_dir)
  130. # Write file
  131. if hasattr(content, 'read'): # File-like object
  132. with open(file_path, "wb") as file:
  133. shutil.copyfileobj(content, file) # Write buff to disk
  134. else: # Simple string
  135. with open(file_path, "wb") as file:
  136. file.write(content)
  137. del content
  138. self.onUpdated(inner_path)
  139. # Remove file from filesystem
  140. def delete(self, inner_path):
  141. file_path = self.getPath(inner_path)
  142. os.unlink(file_path)
  143. # List files from a directory
  144. def list(self, dir_inner_path):
  145. directory = self.getPath(dir_inner_path)
  146. for root, dirs, files in os.walk(directory):
  147. root = root.replace("\\", "/")
  148. root_relative_path = re.sub("^%s" % re.escape(directory), "", root).lstrip("/")
  149. for file_name in files:
  150. if root_relative_path: # Not root dir
  151. yield root_relative_path + "/" + file_name
  152. else:
  153. yield file_name
  154. # Site content updated
  155. def onUpdated(self, inner_path):
  156. file_path = self.getPath(inner_path)
  157. # Update Sql cache
  158. if inner_path == "dbschema.json":
  159. self.has_db = self.isFile("dbschema.json")
  160. self.getDb().checkTables() # Check if any if table schema changed
  161. elif inner_path.endswith(".json") and self.has_db: # Load json file to db
  162. self.log.debug("Loading json file to db: %s" % inner_path)
  163. try:
  164. self.getDb().loadJson(file_path)
  165. except Exception, err:
  166. self.log.error("Json %s load error: %s" % (inner_path, Debug.formatException(err)))
  167. self.closeDb()
  168. # Load and parse json file
  169. def loadJson(self, inner_path):
  170. with self.open(inner_path) as file:
  171. return json.load(file)
  172. # Write formatted json file
  173. def writeJson(self, inner_path, data):
  174. content = json.dumps(data, indent=1, sort_keys=True)
  175. # Make it a little more compact by removing unnecessary white space
  176. def compact_list(match):
  177. return "[ " + match.group(1).strip() + " ]"
  178. def compact_dict(match):
  179. return "{ " + match.group(1).strip() + " }"
  180. content = re.sub("\[([^,\{\[]{10,100}?)\]", compact_list, content, flags=re.DOTALL)
  181. content = re.sub("\{([^,\[\{]{10,100}?)\}", compact_dict, content, flags=re.DOTALL)
  182. # Write to disk
  183. self.write(inner_path, content)
  184. # Get file size
  185. def getSize(self, inner_path):
  186. path = self.getPath(inner_path)
  187. if os.path.isfile(path):
  188. return os.path.getsize(path)
  189. else:
  190. return 0
  191. # File exist
  192. def isFile(self, inner_path):
  193. return os.path.isfile(self.getPath(inner_path))
  194. # File or directory exist
  195. def isExists(self, inner_path):
  196. return os.path.exists(self.getPath(inner_path))
  197. # Dir exist
  198. def isDir(self, inner_path):
  199. return os.path.isdir(self.getPath(inner_path))
  200. # Security check and return path of site's file
  201. def getPath(self, inner_path):
  202. inner_path = inner_path.replace("\\", "/") # Windows separator fix
  203. inner_path = re.sub("^%s/" % re.escape(self.directory), "", inner_path) # Remove site directory if begins with it
  204. file_path = u"%s/%s" % (self.directory, inner_path)
  205. if not inner_path:
  206. return self.directory
  207. file_abspath = os.path.dirname(os.path.abspath(file_path))
  208. if ".." in file_path or not file_abspath.startswith(self.allowed_dir):
  209. raise Exception(u"File not allowed: %s" % file_path)
  210. return file_path
  211. # Get site dir relative path
  212. def getInnerPath(self, path):
  213. if path == self.directory:
  214. inner_path = ""
  215. else:
  216. inner_path = re.sub("^%s/" % re.escape(self.directory), "", path)
  217. return inner_path
  218. # Verify all files sha512sum using content.json
  219. def verifyFiles(self, quick_check=False): # Fast = using file size
  220. bad_files = []
  221. if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first
  222. self.site.needFile("content.json", update=True) # Force update to fix corrupt file
  223. self.site.content_manager.loadContent() # Reload content.json
  224. for content_inner_path, content in self.site.content_manager.contents.items():
  225. if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file
  226. self.log.debug("[MISSING] %s" % content_inner_path)
  227. bad_files.append(content_inner_path)
  228. for file_relative_path in content.get("files", {}).keys():
  229. file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir
  230. file_inner_path = file_inner_path.strip("/") # Strip leading /
  231. file_path = self.getPath(file_inner_path)
  232. if not os.path.isfile(file_path):
  233. self.log.debug("[MISSING] %s" % file_inner_path)
  234. bad_files.append(file_inner_path)
  235. continue
  236. if quick_check:
  237. ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"]
  238. else:
  239. ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
  240. if not ok:
  241. self.log.debug("[CHANGED] %s" % file_inner_path)
  242. bad_files.append(file_inner_path)
  243. # Optional files
  244. optional_added = 0
  245. optional_removed = 0
  246. for file_relative_path in content.get("files_optional", {}).keys():
  247. file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir
  248. file_inner_path = file_inner_path.strip("/") # Strip leading /
  249. file_path = self.getPath(file_inner_path)
  250. if not os.path.isfile(file_path):
  251. self.site.content_manager.hashfield.removeHash(content["files_optional"][file_relative_path]["sha512"])
  252. continue
  253. if quick_check:
  254. ok = os.path.getsize(file_path) == content["files_optional"][file_relative_path]["size"]
  255. else:
  256. ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
  257. if ok:
  258. self.site.content_manager.hashfield.appendHash(content["files_optional"][file_relative_path]["sha512"])
  259. optional_added += 1
  260. else:
  261. self.site.content_manager.hashfield.removeHash(content["files_optional"][file_relative_path]["sha512"])
  262. optional_removed += 1
  263. self.log.debug("[OPTIONAL CHANGED] %s" % file_inner_path)
  264. self.log.debug(
  265. "%s verified: %s, quick: %s, bad: %s, optionals: +%s -%s" %
  266. (content_inner_path, len(content["files"]), quick_check, bad_files, optional_added, optional_removed)
  267. )
  268. return bad_files
  269. # Check and try to fix site files integrity
  270. def checkFiles(self, quick_check=True):
  271. s = time.time()
  272. bad_files = self.verifyFiles(quick_check)
  273. if bad_files:
  274. for bad_file in bad_files:
  275. self.site.bad_files[bad_file] = self.site.bad_files.get("bad_file", 0) + 1
  276. self.log.debug("Checked files in %.2fs... Quick:%s" % (time.time() - s, quick_check))
  277. # Delete site's all file
  278. def deleteFiles(self):
  279. if self.has_db:
  280. self.log.debug("Deleting db file...")
  281. self.closeDb()
  282. try:
  283. schema = self.loadJson("dbschema.json")
  284. db_path = self.getPath(schema["db_file"])
  285. if os.path.isfile(db_path):
  286. os.unlink(db_path)
  287. except Exception, err:
  288. self.log.error("Db file delete error: %s" % err)
  289. self.log.debug("Deleting files from content.json...")
  290. files = [] # Get filenames
  291. for content_inner_path, content in self.site.content_manager.contents.items():
  292. files.append(content_inner_path)
  293. # Add normal files
  294. for file_relative_path in content.get("files", {}).keys():
  295. file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir
  296. files.append(file_inner_path)
  297. # Add optional files
  298. for file_relative_path in content.get("files_optional", {}).keys():
  299. file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir
  300. files.append(file_inner_path)
  301. for inner_path in files:
  302. path = self.getPath(inner_path)
  303. if os.path.isfile(path):
  304. os.unlink(path)
  305. self.log.debug("Deleting empty dirs...")
  306. for root, dirs, files in os.walk(self.directory, topdown=False):
  307. for dir in dirs:
  308. path = os.path.join(root, dir)
  309. if os.path.isdir(path) and os.listdir(path) == []:
  310. os.removedirs(path)
  311. self.log.debug("Removing %s" % path)
  312. if os.path.isdir(self.directory) and os.listdir(self.directory) == []:
  313. os.removedirs(self.directory) # Remove sites directory if empty
  314. if os.path.isdir(self.directory):
  315. self.log.debug("Some unknown file remained in site data dir: %s..." % self.directory)
  316. return False # Some files not deleted
  317. else:
  318. self.log.debug("Site data directory deleted: %s..." % self.directory)
  319. return True # All clean