reader.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. """
  2. maxminddb.reader
  3. ~~~~~~~~~~~~~~~~
  4. This module contains the pure Python database reader and related classes.
  5. """
  6. from __future__ import unicode_literals
  7. try:
  8. import mmap
  9. except ImportError:
  10. # pylint: disable=invalid-name
  11. mmap = None
  12. import struct
  13. from maxminddb.compat import byte_from_int, int_from_byte, ipaddress
  14. from maxminddb.const import MODE_AUTO, MODE_MMAP, MODE_FILE, MODE_MEMORY
  15. from maxminddb.decoder import Decoder
  16. from maxminddb.errors import InvalidDatabaseError
  17. from maxminddb.file import FileBuffer
  18. class Reader(object):
  19. """
  20. Instances of this class provide a reader for the MaxMind DB format. IP
  21. addresses can be looked up using the ``get`` method.
  22. """
  23. _DATA_SECTION_SEPARATOR_SIZE = 16
  24. _METADATA_START_MARKER = b"\xAB\xCD\xEFMaxMind.com"
  25. _ipv4_start = None
  26. def __init__(self, database, mode=MODE_AUTO):
  27. """Reader for the MaxMind DB file format
  28. Arguments:
  29. database -- A path to a valid MaxMind DB file such as a GeoIP2
  30. database file.
  31. mode -- mode to open the database with. Valid mode are:
  32. * MODE_MMAP - read from memory map.
  33. * MODE_FILE - read database as standard file.
  34. * MODE_MEMORY - load database into memory.
  35. * MODE_AUTO - tries MODE_MMAP and then MODE_FILE. Default.
  36. """
  37. if (mode == MODE_AUTO and mmap) or mode == MODE_MMAP:
  38. with open(database, 'rb') as db_file:
  39. self._buffer = mmap.mmap(
  40. db_file.fileno(), 0, access=mmap.ACCESS_READ)
  41. self._buffer_size = self._buffer.size()
  42. elif mode in (MODE_AUTO, MODE_FILE):
  43. self._buffer = FileBuffer(database)
  44. self._buffer_size = self._buffer.size()
  45. elif mode == MODE_MEMORY:
  46. with open(database, 'rb') as db_file:
  47. self._buffer = db_file.read()
  48. self._buffer_size = len(self._buffer)
  49. else:
  50. raise ValueError('Unsupported open mode ({0}). Only MODE_AUTO, '
  51. ' MODE_FILE, and MODE_MEMORY are support by the pure Python '
  52. 'Reader'.format(mode))
  53. metadata_start = self._buffer.rfind(self._METADATA_START_MARKER,
  54. max(0, self._buffer_size
  55. - 128 * 1024))
  56. if metadata_start == -1:
  57. self.close()
  58. raise InvalidDatabaseError('Error opening database file ({0}). '
  59. 'Is this a valid MaxMind DB file?'
  60. ''.format(database))
  61. metadata_start += len(self._METADATA_START_MARKER)
  62. metadata_decoder = Decoder(self._buffer, metadata_start)
  63. (metadata, _) = metadata_decoder.decode(metadata_start)
  64. self._metadata = Metadata(
  65. **metadata) # pylint: disable=bad-option-value
  66. self._decoder = Decoder(self._buffer, self._metadata.search_tree_size
  67. + self._DATA_SECTION_SEPARATOR_SIZE)
  68. def metadata(self):
  69. """Return the metadata associated with the MaxMind DB file"""
  70. return self._metadata
  71. def get(self, ip_address):
  72. """Return the record for the ip_address in the MaxMind DB
  73. Arguments:
  74. ip_address -- an IP address in the standard string notation
  75. """
  76. address = ipaddress.ip_address(ip_address)
  77. if address.version == 6 and self._metadata.ip_version == 4:
  78. raise ValueError('Error looking up {0}. You attempted to look up '
  79. 'an IPv6 address in an IPv4-only database.'.format(
  80. ip_address))
  81. pointer = self._find_address_in_tree(address)
  82. return self._resolve_data_pointer(pointer) if pointer else None
  83. def _find_address_in_tree(self, ip_address):
  84. packed = ip_address.packed
  85. bit_count = len(packed) * 8
  86. node = self._start_node(bit_count)
  87. for i in range(bit_count):
  88. if node >= self._metadata.node_count:
  89. break
  90. bit = 1 & (int_from_byte(packed[i >> 3]) >> 7 - (i % 8))
  91. node = self._read_node(node, bit)
  92. if node == self._metadata.node_count:
  93. # Record is empty
  94. return 0
  95. elif node > self._metadata.node_count:
  96. return node
  97. raise InvalidDatabaseError('Invalid node in search tree')
  98. def _start_node(self, length):
  99. if self._metadata.ip_version != 6 or length == 128:
  100. return 0
  101. # We are looking up an IPv4 address in an IPv6 tree. Skip over the
  102. # first 96 nodes.
  103. if self._ipv4_start:
  104. return self._ipv4_start
  105. node = 0
  106. for _ in range(96):
  107. if node >= self._metadata.node_count:
  108. break
  109. node = self._read_node(node, 0)
  110. self._ipv4_start = node
  111. return node
  112. def _read_node(self, node_number, index):
  113. base_offset = node_number * self._metadata.node_byte_size
  114. record_size = self._metadata.record_size
  115. if record_size == 24:
  116. offset = base_offset + index * 3
  117. node_bytes = b'\x00' + self._buffer[offset:offset + 3]
  118. elif record_size == 28:
  119. (middle,) = struct.unpack(
  120. b'!B', self._buffer[base_offset + 3:base_offset + 4])
  121. if index:
  122. middle &= 0x0F
  123. else:
  124. middle = (0xF0 & middle) >> 4
  125. offset = base_offset + index * 4
  126. node_bytes = byte_from_int(
  127. middle) + self._buffer[offset:offset + 3]
  128. elif record_size == 32:
  129. offset = base_offset + index * 4
  130. node_bytes = self._buffer[offset:offset + 4]
  131. else:
  132. raise InvalidDatabaseError(
  133. 'Unknown record size: {0}'.format(record_size))
  134. return struct.unpack(b'!I', node_bytes)[0]
  135. def _resolve_data_pointer(self, pointer):
  136. resolved = pointer - self._metadata.node_count + \
  137. self._metadata.search_tree_size
  138. if resolved > self._buffer_size:
  139. raise InvalidDatabaseError(
  140. "The MaxMind DB file's search tree is corrupt")
  141. (data, _) = self._decoder.decode(resolved)
  142. return data
  143. def close(self):
  144. """Closes the MaxMind DB file and returns the resources to the system"""
  145. # pylint: disable=unidiomatic-typecheck
  146. if type(self._buffer) not in (str, bytes):
  147. self._buffer.close()
  148. class Metadata(object):
  149. """Metadata for the MaxMind DB reader"""
  150. # pylint: disable=too-many-instance-attributes
  151. def __init__(self, **kwargs):
  152. """Creates new Metadata object. kwargs are key/value pairs from spec"""
  153. # Although I could just update __dict__, that is less obvious and it
  154. # doesn't work well with static analysis tools and some IDEs
  155. self.node_count = kwargs['node_count']
  156. self.record_size = kwargs['record_size']
  157. self.ip_version = kwargs['ip_version']
  158. self.database_type = kwargs['database_type']
  159. self.languages = kwargs['languages']
  160. self.binary_format_major_version = kwargs[
  161. 'binary_format_major_version']
  162. self.binary_format_minor_version = kwargs[
  163. 'binary_format_minor_version']
  164. self.build_epoch = kwargs['build_epoch']
  165. self.description = kwargs['description']
  166. @property
  167. def node_byte_size(self):
  168. """The size of a node in bytes"""
  169. return self.record_size // 4
  170. @property
  171. def search_tree_size(self):
  172. """The size of the search tree"""
  173. return self.node_count * self.node_byte_size
  174. def __repr__(self):
  175. args = ', '.join('%s=%r' % x for x in self.__dict__.items())
  176. return '{module}.{class_name}({data})'.format(
  177. module=self.__module__,
  178. class_name=self.__class__.__name__,
  179. data=args)