123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- """
- maxminddb.reader
- ~~~~~~~~~~~~~~~~
- This module contains the pure Python database reader and related classes.
- """
- from __future__ import unicode_literals
- try:
- import mmap
- except ImportError:
- # pylint: disable=invalid-name
- mmap = None
- import struct
- from maxminddb.compat import byte_from_int, int_from_byte, ipaddress
- from maxminddb.const import MODE_AUTO, MODE_MMAP, MODE_FILE, MODE_MEMORY
- from maxminddb.decoder import Decoder
- from maxminddb.errors import InvalidDatabaseError
- from maxminddb.file import FileBuffer
- class Reader(object):
- """
- Instances of this class provide a reader for the MaxMind DB format. IP
- addresses can be looked up using the ``get`` method.
- """
- _DATA_SECTION_SEPARATOR_SIZE = 16
- _METADATA_START_MARKER = b"\xAB\xCD\xEFMaxMind.com"
- _ipv4_start = None
- def __init__(self, database, mode=MODE_AUTO):
- """Reader for the MaxMind DB file format
- Arguments:
- database -- A path to a valid MaxMind DB file such as a GeoIP2
- database file.
- mode -- mode to open the database with. Valid mode are:
- * MODE_MMAP - read from memory map.
- * MODE_FILE - read database as standard file.
- * MODE_MEMORY - load database into memory.
- * MODE_AUTO - tries MODE_MMAP and then MODE_FILE. Default.
- """
- if (mode == MODE_AUTO and mmap) or mode == MODE_MMAP:
- with open(database, 'rb') as db_file:
- self._buffer = mmap.mmap(
- db_file.fileno(), 0, access=mmap.ACCESS_READ)
- self._buffer_size = self._buffer.size()
- elif mode in (MODE_AUTO, MODE_FILE):
- self._buffer = FileBuffer(database)
- self._buffer_size = self._buffer.size()
- elif mode == MODE_MEMORY:
- with open(database, 'rb') as db_file:
- self._buffer = db_file.read()
- self._buffer_size = len(self._buffer)
- else:
- raise ValueError('Unsupported open mode ({0}). Only MODE_AUTO, '
- ' MODE_FILE, and MODE_MEMORY are support by the pure Python '
- 'Reader'.format(mode))
- metadata_start = self._buffer.rfind(self._METADATA_START_MARKER,
- max(0, self._buffer_size
- - 128 * 1024))
- if metadata_start == -1:
- self.close()
- raise InvalidDatabaseError('Error opening database file ({0}). '
- 'Is this a valid MaxMind DB file?'
- ''.format(database))
- metadata_start += len(self._METADATA_START_MARKER)
- metadata_decoder = Decoder(self._buffer, metadata_start)
- (metadata, _) = metadata_decoder.decode(metadata_start)
- self._metadata = Metadata(
- **metadata) # pylint: disable=bad-option-value
- self._decoder = Decoder(self._buffer, self._metadata.search_tree_size
- + self._DATA_SECTION_SEPARATOR_SIZE)
- def metadata(self):
- """Return the metadata associated with the MaxMind DB file"""
- return self._metadata
- def get(self, ip_address):
- """Return the record for the ip_address in the MaxMind DB
- Arguments:
- ip_address -- an IP address in the standard string notation
- """
- address = ipaddress.ip_address(ip_address)
- if address.version == 6 and self._metadata.ip_version == 4:
- raise ValueError('Error looking up {0}. You attempted to look up '
- 'an IPv6 address in an IPv4-only database.'.format(
- ip_address))
- pointer = self._find_address_in_tree(address)
- return self._resolve_data_pointer(pointer) if pointer else None
- def _find_address_in_tree(self, ip_address):
- packed = ip_address.packed
- bit_count = len(packed) * 8
- node = self._start_node(bit_count)
- for i in range(bit_count):
- if node >= self._metadata.node_count:
- break
- bit = 1 & (int_from_byte(packed[i >> 3]) >> 7 - (i % 8))
- node = self._read_node(node, bit)
- if node == self._metadata.node_count:
- # Record is empty
- return 0
- elif node > self._metadata.node_count:
- return node
- raise InvalidDatabaseError('Invalid node in search tree')
- def _start_node(self, length):
- if self._metadata.ip_version != 6 or length == 128:
- return 0
- # We are looking up an IPv4 address in an IPv6 tree. Skip over the
- # first 96 nodes.
- if self._ipv4_start:
- return self._ipv4_start
- node = 0
- for _ in range(96):
- if node >= self._metadata.node_count:
- break
- node = self._read_node(node, 0)
- self._ipv4_start = node
- return node
- def _read_node(self, node_number, index):
- base_offset = node_number * self._metadata.node_byte_size
- record_size = self._metadata.record_size
- if record_size == 24:
- offset = base_offset + index * 3
- node_bytes = b'\x00' + self._buffer[offset:offset + 3]
- elif record_size == 28:
- (middle,) = struct.unpack(
- b'!B', self._buffer[base_offset + 3:base_offset + 4])
- if index:
- middle &= 0x0F
- else:
- middle = (0xF0 & middle) >> 4
- offset = base_offset + index * 4
- node_bytes = byte_from_int(
- middle) + self._buffer[offset:offset + 3]
- elif record_size == 32:
- offset = base_offset + index * 4
- node_bytes = self._buffer[offset:offset + 4]
- else:
- raise InvalidDatabaseError(
- 'Unknown record size: {0}'.format(record_size))
- return struct.unpack(b'!I', node_bytes)[0]
- def _resolve_data_pointer(self, pointer):
- resolved = pointer - self._metadata.node_count + \
- self._metadata.search_tree_size
- if resolved > self._buffer_size:
- raise InvalidDatabaseError(
- "The MaxMind DB file's search tree is corrupt")
- (data, _) = self._decoder.decode(resolved)
- return data
- def close(self):
- """Closes the MaxMind DB file and returns the resources to the system"""
- # pylint: disable=unidiomatic-typecheck
- if type(self._buffer) not in (str, bytes):
- self._buffer.close()
- class Metadata(object):
- """Metadata for the MaxMind DB reader"""
- # pylint: disable=too-many-instance-attributes
- def __init__(self, **kwargs):
- """Creates new Metadata object. kwargs are key/value pairs from spec"""
- # Although I could just update __dict__, that is less obvious and it
- # doesn't work well with static analysis tools and some IDEs
- self.node_count = kwargs['node_count']
- self.record_size = kwargs['record_size']
- self.ip_version = kwargs['ip_version']
- self.database_type = kwargs['database_type']
- self.languages = kwargs['languages']
- self.binary_format_major_version = kwargs[
- 'binary_format_major_version']
- self.binary_format_minor_version = kwargs[
- 'binary_format_minor_version']
- self.build_epoch = kwargs['build_epoch']
- self.description = kwargs['description']
- @property
- def node_byte_size(self):
- """The size of a node in bytes"""
- return self.record_size // 4
- @property
- def search_tree_size(self):
- """The size of the search tree"""
- return self.node_count * self.node_byte_size
- def __repr__(self):
- args = ', '.join('%s=%r' % x for x in self.__dict__.items())
- return '{module}.{class_name}({data})'.format(
- module=self.__module__,
- class_name=self.__class__.__name__,
- data=args)
|