BigfilePiecefield.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. import array
  2. def packPiecefield(data):
  3. res = []
  4. if not data:
  5. return array.array("H", "")
  6. if data[0] == "0":
  7. res.append(0)
  8. find = "1"
  9. else:
  10. find = "0"
  11. last_pos = 0
  12. pos = 0
  13. while 1:
  14. pos = data.find(find, pos)
  15. if find == "0":
  16. find = "1"
  17. else:
  18. find = "0"
  19. if pos == -1:
  20. res.append(len(data) - last_pos)
  21. break
  22. res.append(pos - last_pos)
  23. last_pos = pos
  24. return array.array("H", res)
  25. def unpackPiecefield(data):
  26. if not data:
  27. return ""
  28. res = []
  29. char = "1"
  30. for times in data:
  31. if times > 10000:
  32. return ""
  33. res.append(char * times)
  34. if char == "1":
  35. char = "0"
  36. else:
  37. char = "1"
  38. return "".join(res)
  39. class BigfilePiecefield(object):
  40. __slots__ = ["data"]
  41. def __init__(self):
  42. self.data = ""
  43. def fromstring(self, s):
  44. self.data = s
  45. def tostring(self):
  46. return self.data
  47. def pack(self):
  48. return packPiecefield(self.data).tostring()
  49. def unpack(self, s):
  50. self.data = unpackPiecefield(array.array("H", s))
  51. def __getitem__(self, key):
  52. try:
  53. return int(self.data[key])
  54. except IndexError:
  55. return False
  56. def __setitem__(self, key, value):
  57. data = self.data
  58. if len(data) < key:
  59. data = data.ljust(key+1, "0")
  60. data = data[:key] + str(int(value)) + data[key + 1:]
  61. self.data = data
  62. class BigfilePiecefieldPacked(object):
  63. __slots__ = ["data"]
  64. def __init__(self):
  65. self.data = ""
  66. def fromstring(self, data):
  67. self.data = packPiecefield(data).tostring()
  68. def tostring(self):
  69. return unpackPiecefield(array.array("H", self.data))
  70. def pack(self):
  71. return array.array("H", self.data).tostring()
  72. def unpack(self, data):
  73. self.data = data
  74. def __getitem__(self, key):
  75. try:
  76. return int(self.tostring()[key])
  77. except IndexError:
  78. return False
  79. def __setitem__(self, key, value):
  80. data = self.tostring()
  81. if len(data) < key:
  82. data = data.ljust(key+1, "0")
  83. data = data[:key] + str(int(value)) + data[key + 1:]
  84. self.fromstring(data)
  85. if __name__ == "__main__":
  86. import os
  87. import psutil
  88. import time
  89. testdata = "1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1"
  90. meminfo = psutil.Process(os.getpid()).memory_info
  91. for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
  92. print "-- Testing storage: %s --" % storage
  93. m = meminfo()[0]
  94. s = time.time()
  95. piecefields = {}
  96. for i in range(10000):
  97. piecefield = storage()
  98. piecefield.fromstring(testdata[:i] + "0" + testdata[i + 1:])
  99. piecefields[i] = piecefield
  100. print "Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data))
  101. m = meminfo()[0]
  102. s = time.time()
  103. for piecefield in piecefields.values():
  104. val = piecefield[1000]
  105. print "Query one x10000: +%sKB in %.3fs" % ((meminfo()[0] - m) / 1024, time.time() - s)
  106. m = meminfo()[0]
  107. s = time.time()
  108. for piecefield in piecefields.values():
  109. piecefield[1000] = True
  110. print "Change one x10000: +%sKB in %.3fs" % ((meminfo()[0] - m) / 1024, time.time() - s)
  111. m = meminfo()[0]
  112. s = time.time()
  113. for piecefield in piecefields.values():
  114. packed = piecefield.pack()
  115. print "Pack x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(packed))
  116. m = meminfo()[0]
  117. s = time.time()
  118. for piecefield in piecefields.values():
  119. piecefield.unpack(packed)
  120. print "Unpack x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data))
  121. piecefields = {}