file.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. """For internal use only. It provides a slice-like file reader."""
  2. import os
  3. try:
  4. from multiprocessing import Lock
  5. except ImportError:
  6. from threading import Lock
  7. class FileBuffer(object):
  8. """A slice-able file reader"""
  9. def __init__(self, database):
  10. self._handle = open(database, 'rb')
  11. self._size = os.fstat(self._handle.fileno()).st_size
  12. if not hasattr(os, 'pread'):
  13. self._lock = Lock()
  14. def __getitem__(self, key):
  15. if isinstance(key, slice):
  16. return self._read(key.stop - key.start, key.start)
  17. elif isinstance(key, int):
  18. return self._read(1, key)
  19. else:
  20. raise TypeError("Invalid argument type.")
  21. def rfind(self, needle, start):
  22. """Reverse find needle from start"""
  23. pos = self._read(self._size - start - 1, start).rfind(needle)
  24. if pos == -1:
  25. return pos
  26. return start + pos
  27. def size(self):
  28. """Size of file"""
  29. return self._size
  30. def close(self):
  31. """Close file"""
  32. self._handle.close()
  33. if hasattr(os, 'pread'):
  34. def _read(self, buffersize, offset):
  35. """read that uses pread"""
  36. # pylint: disable=no-member
  37. return os.pread(self._handle.fileno(), buffersize, offset)
  38. else:
  39. def _read(self, buffersize, offset):
  40. """read with a lock
  41. This lock is necessary as after a fork, the different processes
  42. will share the same file table entry, even if we dup the fd, and
  43. as such the same offsets. There does not appear to be a way to
  44. duplicate the file table entry and we cannot re-open based on the
  45. original path as that file may have replaced with another or
  46. unlinked.
  47. """
  48. with self._lock:
  49. self._handle.seek(offset)
  50. return self._handle.read(buffersize)