added stem python library

2025-07-29 09:28:57 -04:00 · 2014-05-21 14:09:41 -04:00 · 2014-05-21 14:09:41 -04:00 · 619ab6db0f
commit 619ab6db0f
parent 8ffa569094
37 changed files with 19032 additions and 0 deletions
--- a/lib/stem/descriptor/init.py
+++ b/lib/stem/descriptor/init.py
@ -0,0 +1,552 @@
+# Copyright 2012-2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Package for parsing and processing descriptor data.
+
+**Module Overview:**
+
+::
+
+  parse_file - Parses the descriptors in a file.
+
+  Descriptor - Common parent for all descriptor file types.
+    |- get_path - location of the descriptor on disk if it came from a file
+    |- get_archive_path - location of the descriptor within the archive it came from
+    |- get_bytes - similar to str(), but provides our original bytes content
+    |- get_unrecognized_lines - unparsed descriptor content
+    +- __str__ - string that the descriptor was made from
+
+.. data:: DocumentHandler (enum)
+
+  Ways in which we can parse a
+  :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`.
+
+  Both **ENTRIES** and **BARE_DOCUMENT** have a 'thin' document, which doesn't
+  have a populated **routers** attribute. This allows for lower memory usage
+  and upfront runtime. However, if read time and memory aren't a concern then
+  **DOCUMENT** can provide you with a fully populated document.
+
+  =================== ===========
+  DocumentHandler     Description
+  =================== ===========
+  **ENTRIES**         Iterates over the contained :class:`~stem.descriptor.router_status_entry.RouterStatusEntry`. Each has a reference to the bare document it came from (through its **document** attribute).
+  **DOCUMENT**        :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` with the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` it contains (through its **routers** attribute).
+  **BARE_DOCUMENT**   :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` **without** a reference to its contents (the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` are unread).
+  =================== ===========
+"""
+
+__all__ = [
+  "export",
+  "reader",
+  "remote",
+  "extrainfo_descriptor",
+  "server_descriptor",
+  "microdescriptor",
+  "networkstatus",
+  "router_status_entry",
+  "tordnsel",
+  "parse_file",
+  "Descriptor",
+]
+
+import os
+import re
+
+import stem.prereq
+import stem.util.enum
+import stem.util.str_tools
+
+try:
+  # added in python 2.7
+  from collections import OrderedDict
+except ImportError:
+  from stem.util.ordereddict import OrderedDict
+
+KEYWORD_CHAR = "a-zA-Z0-9-"
+WHITESPACE = " \t"
+KEYWORD_LINE = re.compile("^([%s]+)(?:[%s]+(.*))?$" % (KEYWORD_CHAR, WHITESPACE))
+PGP_BLOCK_START = re.compile("^-----BEGIN ([%s%s]+)-----$" % (KEYWORD_CHAR, WHITESPACE))
+PGP_BLOCK_END = "-----END %s-----"
+
+DocumentHandler = stem.util.enum.UppercaseEnum(
+  "ENTRIES",
+  "DOCUMENT",
+  "BARE_DOCUMENT",
+)
+
+
+def parse_file(descriptor_file, descriptor_type = None, validate = True, document_handler = DocumentHandler.ENTRIES, **kwargs):
+  """
+  Simple function to read the descriptor contents from a file, providing an
+  iterator for its :class:`~stem.descriptor.__init__.Descriptor` contents.
+
+  If you don't provide a **descriptor_type** argument then this automatically
+  tries to determine the descriptor type based on the following...
+
+  * The @type annotation on the first line. These are generally only found in
+    the `descriptor archives <https://metrics.torproject.org>`_.
+
+  * The filename if it matches something from tor's data directory. For
+    instance, tor's 'cached-descriptors' contains server descriptors.
+
+  This is a handy function for simple usage, but if you're reading multiple
+  descriptor files you might want to consider the
+  :class:`~stem.descriptor.reader.DescriptorReader`.
+
+  Descriptor types include the following, including further minor versions (ie.
+  if we support 1.1 then we also support everything from 1.0 and most things
+  from 1.2, but not 2.0)...
+
+  ========================================= =====
+  Descriptor Type                           Class
+  ========================================= =====
+  server-descriptor 1.0                     :class:`~stem.descriptor.server_descriptor.RelayDescriptor`
+  extra-info 1.0                            :class:`~stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor`
+  microdescriptor 1.0                       :class:`~stem.descriptor.microdescriptor.Microdescriptor`
+  directory 1.0                             **unsupported**
+  network-status-2 1.0                      :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV2`)
+  dir-key-certificate-3 1.0                 :class:`~stem.descriptor.networkstatus.KeyCertificate`
+  network-status-consensus-3 1.0            :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
+  network-status-vote-3 1.0                 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
+  network-status-microdesc-consensus-3 1.0  :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
+  bridge-network-status 1.0                 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.BridgeNetworkStatusDocument`)
+  bridge-server-descriptor 1.0              :class:`~stem.descriptor.server_descriptor.BridgeDescriptor`
+  bridge-extra-info 1.1                     :class:`~stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`
+  torperf 1.0                               **unsupported**
+  bridge-pool-assignment 1.0                **unsupported**
+  tordnsel 1.0                              :class:`~stem.descriptor.tordnsel.TorDNSEL`
+  ========================================= =====
+
+  If you're using **python 3** then beware that the open() function defaults to
+  using text mode. **Binary mode** is strongly suggested because it's both
+  faster (by my testing by about 33x) and doesn't do universal newline
+  translation which can make us misparse the document.
+
+  ::
+
+    my_descriptor_file = open(descriptor_path, 'rb')
+
+  :param str,file descriptor_file: path or opened file with the descriptor contents
+  :param str descriptor_type: `descriptor type <https://metrics.torproject.org/formats.html#descriptortypes>`_, this is guessed if not provided
+  :param bool validate: checks the validity of the descriptor's content if
+    **True**, skips these checks otherwise
+  :param stem.descriptor.__init__.DocumentHandler document_handler: method in
+    which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+  :param dict kwargs: additional arguments for the descriptor constructor
+
+  :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
+
+  :raises:
+    * **ValueError** if the contents is malformed and validate is True
+    * **TypeError** if we can't match the contents of the file to a descriptor type
+    * **IOError** if unable to read from the descriptor_file
+  """
+
+  # if we got a path then open that file for parsing
+
+  if isinstance(descriptor_file, (bytes, unicode)):
+    with open(descriptor_file) as desc_file:
+      for desc in parse_file(desc_file, descriptor_type, validate, document_handler, **kwargs):
+        yield desc
+
+      return
+
+  # The tor descriptor specifications do not provide a reliable method for
+  # identifying a descriptor file's type and version so we need to guess
+  # based on its filename. Metrics descriptors, however, can be identified
+  # by an annotation on their first line...
+  # https://trac.torproject.org/5651
+
+  initial_position = descriptor_file.tell()
+  first_line = stem.util.str_tools._to_unicode(descriptor_file.readline().strip())
+  metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
+
+  if not metrics_header_match:
+    descriptor_file.seek(initial_position)
+
+  descriptor_path = getattr(descriptor_file, 'name', None)
+  filename = '<undefined>' if descriptor_path is None else os.path.basename(descriptor_file.name)
+  file_parser = None
+
+  if descriptor_type is not None:
+    descriptor_type_match = re.match("^(\S+) (\d+).(\d+)$", descriptor_type)
+
+    if descriptor_type_match:
+      desc_type, major_version, minor_version = descriptor_type_match.groups()
+      file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
+    else:
+      raise ValueError("The descriptor_type must be of the form '<type> <major_version>.<minor_version>'")
+  elif metrics_header_match:
+    # Metrics descriptor handling
+
+    desc_type, major_version, minor_version = metrics_header_match.groups()
+    file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
+  else:
+    # Cached descriptor handling. These contain multiple descriptors per file.
+
+    if filename == "cached-descriptors":
+      file_parser = lambda f: stem.descriptor.server_descriptor._parse_file(f, validate = validate, **kwargs)
+    elif filename == "cached-extrainfo":
+      file_parser = lambda f: stem.descriptor.extrainfo_descriptor._parse_file(f, validate = validate, **kwargs)
+    elif filename == "cached-microdescs":
+      file_parser = lambda f: stem.descriptor.microdescriptor._parse_file(f, validate = validate, **kwargs)
+    elif filename == "cached-consensus":
+      file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, validate = validate, document_handler = document_handler, **kwargs)
+    elif filename == "cached-microdesc-consensus":
+      file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs)
+
+  if file_parser:
+    for desc in file_parser(descriptor_file):
+      if descriptor_path is not None:
+        desc._set_path(os.path.abspath(descriptor_path))
+
+      yield desc
+
+    return
+
+  # Not recognized as a descriptor file.
+
+  raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
+
+
+def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs):
+  # Parses descriptor files from metrics, yielding individual descriptors. This
+  # throws a TypeError if the descriptor_type or version isn't recognized.
+
+  if descriptor_type == "server-descriptor" and major_version == 1:
+    for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
+      yield desc
+  elif descriptor_type == "bridge-server-descriptor" and major_version == 1:
+    for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
+      yield desc
+  elif descriptor_type == "extra-info" and major_version == 1:
+    for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
+      yield desc
+  elif descriptor_type == "microdescriptor" and major_version == 1:
+    for desc in stem.descriptor.microdescriptor._parse_file(descriptor_file, validate = validate, **kwargs):
+      yield desc
+  elif descriptor_type == "bridge-extra-info" and major_version == 1:
+    # version 1.1 introduced a 'transport' field...
+    # https://trac.torproject.org/6257
+
+    for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
+      yield desc
+  elif descriptor_type == "network-status-2" and major_version == 1:
+    document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV2
+
+    for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
+      yield desc
+  elif descriptor_type == "dir-key-certificate-3" and major_version == 1:
+    for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate, **kwargs):
+      yield desc
+  elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1:
+    document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
+
+    for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
+      yield desc
+  elif descriptor_type == "network-status-microdesc-consensus-3" and major_version == 1:
+    document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
+
+    for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs):
+      yield desc
+  elif descriptor_type == "bridge-network-status" and major_version == 1:
+    document_type = stem.descriptor.networkstatus.BridgeNetworkStatusDocument
+
+    for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
+      yield desc
+  elif descriptor_type == "tordnsel" and major_version == 1:
+    document_type = stem.descriptor.tordnsel.TorDNSEL
+
+    for desc in stem.descriptor.tordnsel._parse_file(descriptor_file, validate = validate, **kwargs):
+      yield desc
+  else:
+    raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
+
+
+class Descriptor(object):
+  """
+  Common parent for all types of descriptors.
+  """
+
+  def __init__(self, contents):
+    self._path = None
+    self._archive_path = None
+    self._raw_contents = contents
+
+  def get_path(self):
+    """
+    Provides the absolute path that we loaded this descriptor from.
+
+    :returns: **str** with the absolute path of the descriptor source
+    """
+
+    return self._path
+
+  def get_archive_path(self):
+    """
+    If this descriptor came from an archive then provides its path within the
+    archive. This is only set if the descriptor came from a
+    :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
+    descriptor didn't come from an archive.
+
+    :returns: **str** with the descriptor's path within the archive
+    """
+
+    return self._archive_path
+
+  def get_bytes(self):
+    """
+    Provides the ASCII **bytes** of the descriptor. This only differs from
+    **str()** if you're running python 3.x, in which case **str()** provides a
+    **unicode** string.
+
+    :returns: **bytes** for the descriptor's contents
+    """
+
+    return self._raw_contents
+
+  def get_unrecognized_lines(self):
+    """
+    Provides a list of lines that were either ignored or had data that we did
+    not know how to process. This is most common due to new descriptor fields
+    that this library does not yet know how to process. Patches welcome!
+
+    :returns: **list** of lines of unrecognized content
+    """
+
+    raise NotImplementedError
+
+  def _set_path(self, path):
+    self._path = path
+
+  def _set_archive_path(self, path):
+    self._archive_path = path
+
+  def __str__(self):
+    if stem.prereq.is_python_3():
+      return stem.util.str_tools._to_unicode(self._raw_contents)
+    else:
+      return self._raw_contents
+
+
+def _get_bytes_field(keyword, content):
+  """
+  Provides the value corresponding to the given keyword. This is handy to fetch
+  values specifically allowed to be arbitrary bytes prior to converting to
+  unicode.
+
+  :param str keyword: line to look up
+  :param bytes content: content to look through
+
+  :returns: **bytes** value on the given line, **None** if the line doesn't
+    exist
+
+  :raises: **ValueError** if the content isn't bytes
+  """
+
+  if not isinstance(content, bytes):
+    raise ValueError("Content must be bytes, got a %s" % type(content))
+
+  line_match = re.search(stem.util.str_tools._to_bytes("^(opt )?%s(?:[%s]+(.*))?$" % (keyword, WHITESPACE)), content, re.MULTILINE)
+
+  if line_match:
+    value = line_match.groups()[1]
+    return b"" if value is None else value
+  else:
+    return None
+
+
+def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
+  """
+  Reads from the descriptor file until we get to one of the given keywords or reach the
+  end of the file.
+
+  :param str,list keywords: keyword(s) we want to read until
+  :param file descriptor_file: file with the descriptor content
+  :param bool inclusive: includes the line with the keyword if True
+  :param bool ignore_first: doesn't check if the first line read has one of the
+    given keywords
+  :param bool skip: skips buffering content, returning None
+  :param int end_position: end if we reach this point in the file
+  :param bool include_ending_keyword: provides the keyword we broke on if **True**
+
+  :returns: **list** with the lines until we find one of the keywords, this is
+    a two value tuple with the ending keyword if include_ending_keyword is
+    **True**
+  """
+
+  content = None if skip else []
+  ending_keyword = None
+
+  if isinstance(keywords, (bytes, unicode)):
+    keywords = (keywords,)
+
+  if ignore_first:
+    first_line = descriptor_file.readline()
+
+    if content is not None and first_line is not None:
+      content.append(first_line)
+
+  while True:
+    last_position = descriptor_file.tell()
+
+    if end_position and last_position >= end_position:
+      break
+
+    line = descriptor_file.readline()
+
+    if not line:
+      break  # EOF
+
+    line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line))
+
+    if not line_match:
+      # no spaces or tabs in the line
+      line_keyword = stem.util.str_tools._to_unicode(line.strip())
+    else:
+      line_keyword = line_match.groups()[0]
+
+    if line_keyword in keywords:
+      ending_keyword = line_keyword
+
+      if not inclusive:
+        descriptor_file.seek(last_position)
+      elif content is not None:
+        content.append(line)
+
+      break
+    elif content is not None:
+      content.append(line)
+
+  if include_ending_keyword:
+    return (content, ending_keyword)
+  else:
+    return content
+
+
+def _get_pseudo_pgp_block(remaining_contents):
+  """
+  Checks if given contents begins with a pseudo-Open-PGP-style block and, if
+  so, pops it off and provides it back to the caller.
+
+  :param list remaining_contents: lines to be checked for a public key block
+
+  :returns: **str** with the armor wrapped contents or None if it doesn't exist
+
+  :raises: **ValueError** if the contents starts with a key block but it's
+    malformed (for instance, if it lacks an ending line)
+  """
+
+  if not remaining_contents:
+    return None  # nothing left
+
+  block_match = PGP_BLOCK_START.match(remaining_contents[0])
+
+  if block_match:
+    block_type = block_match.groups()[0]
+    block_lines = []
+    end_line = PGP_BLOCK_END % block_type
+
+    while True:
+      if not remaining_contents:
+        raise ValueError("Unterminated pgp style block (looking for '%s'):\n%s" % (end_line, "\n".join(block_lines)))
+
+      line = remaining_contents.pop(0)
+      block_lines.append(line)
+
+      if line == end_line:
+        return "\n".join(block_lines)
+  else:
+    return None
+
+
+def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
+  """
+  Initial breakup of the server descriptor contents to make parsing easier.
+
+  A descriptor contains a series of 'keyword lines' which are simply a keyword
+  followed by an optional value. Lines can also be followed by a signature
+  block.
+
+  To get a sub-listing with just certain keywords use extra_keywords. This can
+  be useful if we care about their relative ordering with respect to each
+  other. For instance, we care about the ordering of 'accept' and 'reject'
+  entries because this influences the resulting exit policy, but for everything
+  else in server descriptors the order does not matter.
+
+  :param str raw_contents: descriptor content provided by the relay
+  :param bool validate: checks the validity of the descriptor's content if
+    True, skips these checks otherwise
+  :param list extra_keywords: entity keywords to put into a separate listing
+    with ordering intact
+
+  :returns:
+    **collections.OrderedDict** with the 'keyword => (value, pgp key) entries'
+    mappings. If a extra_keywords was provided then this instead provides a two
+    value tuple, the second being a list of those entries.
+  """
+
+  entries = OrderedDict()
+  extra_entries = []  # entries with a keyword in extra_keywords
+  remaining_lines = raw_contents.split("\n")
+
+  while remaining_lines:
+    line = remaining_lines.pop(0)
+
+    # V2 network status documents explicitly can contain blank lines...
+    #
+    #   "Implementations MAY insert blank lines for clarity between sections;
+    #   these blank lines are ignored."
+    #
+    # ... and server descriptors end with an extra newline. But other documents
+    # don't say how blank lines should be handled so globally ignoring them.
+
+    if not line:
+      continue
+
+    # Some lines have an 'opt ' for backward compatibility. They should be
+    # ignored. This prefix is being removed in...
+    # https://trac.torproject.org/projects/tor/ticket/5124
+
+    if line.startswith("opt "):
+      line = line[4:]
+
+    line_match = KEYWORD_LINE.match(line)
+
+    if not line_match:
+      if not validate:
+        continue
+
+      raise ValueError("Line contains invalid characters: %s" % line)
+
+    keyword, value = line_match.groups()
+
+    if value is None:
+      value = ''
+
+    try:
+      block_contents = _get_pseudo_pgp_block(remaining_lines)
+    except ValueError as exc:
+      if not validate:
+        continue
+
+      raise exc
+
+    if keyword in extra_keywords:
+      extra_entries.append("%s %s" % (keyword, value))
+    else:
+      entries.setdefault(keyword, []).append((value, block_contents))
+
+  if extra_keywords:
+    return entries, extra_entries
+  else:
+    return entries
+
+# importing at the end to avoid circular dependencies on our Descriptor class
+
+import stem.descriptor.server_descriptor
+import stem.descriptor.extrainfo_descriptor
+import stem.descriptor.networkstatus
+import stem.descriptor.microdescriptor
+import stem.descriptor.tordnsel
--- a/lib/stem/descriptor/export.py
+++ b/lib/stem/descriptor/export.py
@ -0,0 +1,106 @@
+# Copyright 2012-2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Toolkit for exporting descriptors to other formats.
+
+**Module Overview:**
+
+::
+
+  export_csv - Exports descriptors to a CSV
+  export_csv_file - Writes exported CSV output to a file
+"""
+
+import cStringIO
+import csv
+
+import stem.descriptor
+import stem.prereq
+
+
+class _ExportDialect(csv.excel):
+  lineterminator = '\n'
+
+
+def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
+  """
+  Provides a newline separated CSV for one or more descriptors. If simply
+  provided with descriptors then the CSV contains all of its attributes,
+  labeled with a header row. Either 'included_fields' or 'excluded_fields' can
+  be used for more granular control over its attributes and the order.
+
+  :param Descriptor,list descriptors: either a
+    :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
+  :param list included_fields: attributes to include in the csv
+  :param list excluded_fields: attributes to exclude from the csv
+  :param bool header: if **True** then the first line will be a comma separated
+    list of the attribute names (**only supported in python 2.7 and higher**)
+
+  :returns: **str** of the CSV for the descriptors, one per line
+  :raises: **ValueError** if descriptors contain more than one descriptor type
+  """
+
+  output_buffer = cStringIO.StringIO()
+  export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
+  return output_buffer.getvalue()
+
+
+def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
+  """
+  Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
+  written directly to a file.
+
+  :param file output_file: file to be written to
+  :param Descriptor,list descriptors: either a
+    :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
+  :param list included_fields: attributes to include in the csv
+  :param list excluded_fields: attributes to exclude from the csv
+  :param bool header: if **True** then the first line will be a comma separated
+    list of the attribute names (**only supported in python 2.7 and higher**)
+
+  :returns: **str** of the CSV for the descriptors, one per line
+  :raises: **ValueError** if descriptors contain more than one descriptor type
+  """
+
+  if isinstance(descriptors, stem.descriptor.Descriptor):
+    descriptors = (descriptors,)
+
+  if not descriptors:
+    return
+
+  descriptor_type = type(descriptors[0])
+  descriptor_type_label = descriptor_type.__name__
+  included_fields = list(included_fields)
+
+  # If the user didn't specify the fields to include then export everything,
+  # ordered alphabetically. If they did specify fields then make sure that
+  # they exist.
+
+  desc_attr = sorted(vars(descriptors[0]).keys())
+
+  if included_fields:
+    for field in included_fields:
+      if not field in desc_attr:
+        raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr)))
+  else:
+    included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
+
+  for field in excluded_fields:
+    try:
+      included_fields.remove(field)
+    except ValueError:
+      pass
+
+  writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
+
+  if header and stem.prereq.is_python_27():
+    writer.writeheader()
+
+  for desc in descriptors:
+    if not isinstance(desc, stem.descriptor.Descriptor):
+      raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__)
+    elif descriptor_type != type(desc):
+      raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc)))
+
+    writer.writerow(vars(desc))
--- a/lib/stem/descriptor/extrainfo_descriptor.py
+++ b/lib/stem/descriptor/extrainfo_descriptor.py
@ -0,0 +1,940 @@
+# Copyright 2012-2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Parsing for Tor extra-info descriptors. These are published by relays whenever
+their server descriptor is published and have a similar format. However, unlike
+server descriptors these don't contain information that Tor clients require to
+function and as such aren't fetched by default.
+
+Defined in section 2.2 of the `dir-spec
+<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_,
+extra-info descriptors contain interesting but non-vital information such as
+usage statistics. Tor clients cannot request these documents for bridges.
+
+Extra-info descriptors are available from a few sources...
+
+* if you have 'DownloadExtraInfo 1' in your torrc...
+
+ * control port via 'GETINFO extra-info/digest/\*' queries
+ * the 'cached-extrainfo' file in tor's data directory
+
+* tor metrics, at https://metrics.torproject.org/data.html
+* directory authorities and mirrors via their DirPort
+
+**Module Overview:**
+
+::
+
+  ExtraInfoDescriptor - Tor extra-info descriptor.
+    |  |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
+    |  +- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
+    |
+    |- digest - calculates the upper-case hex digest value for our content
+    +- get_unrecognized_lines - lines with unrecognized content
+
+.. data:: DirResponse (enum)
+
+  Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses.
+
+  =================== ===========
+  DirResponse         Description
+  =================== ===========
+  **OK**              network status requests that were answered
+  **NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities
+  **UNAVAILABLE**     requested network status was unavailable
+  **NOT_FOUND**       requested network status was not found
+  **NOT_MODIFIED**    network status unmodified since If-Modified-Since time
+  **BUSY**            directory was busy
+  =================== ===========
+
+.. data:: DirStat (enum)
+
+  Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and
+  dir_*_tunneled_dl.
+
+  ===================== ===========
+  DirStat               Description
+  ===================== ===========
+  **COMPLETE**          requests that completed successfully
+  **TIMEOUT**           requests that didn't complete within a ten minute timeout
+  **RUNNING**           requests still in process when measurement's taken
+  **MIN**               smallest rate at which a descriptor was downloaded in B/s
+  **MAX**               largest rate at which a descriptor was downloaded in B/s
+  **D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s
+  **Q1** and **Q3**     rate of the slowest and fastest quarter download rates in B/s
+  **MD**                median download rate in B/s
+  ===================== ===========
+"""
+
+import datetime
+import hashlib
+import re
+
+import stem.util.connection
+import stem.util.enum
+import stem.util.str_tools
+
+from stem.descriptor import (
+  PGP_BLOCK_END,
+  Descriptor,
+  _read_until_keywords,
+  _get_descriptor_components,
+)
+
+try:
+  # added in python 3.2
+  from functools import lru_cache
+except ImportError:
+  from stem.util.lru_cache import lru_cache
+
+# known statuses for dirreq-v2-resp and dirreq-v3-resp...
+DirResponse = stem.util.enum.Enum(
+  ("OK", "ok"),
+  ("NOT_ENOUGH_SIGS", "not-enough-sigs"),
+  ("UNAVAILABLE", "unavailable"),
+  ("NOT_FOUND", "not-found"),
+  ("NOT_MODIFIED", "not-modified"),
+  ("BUSY", "busy"),
+)
+
+# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
+dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
+dir_stats += ['d%i' % i for i in range(1, 5)]
+dir_stats += ['d%i' % i for i in range(6, 10)]
+DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])
+
+# relay descriptors must have exactly one of the following
+REQUIRED_FIELDS = (
+  "extra-info",
+  "published",
+  "router-signature",
+)
+
+# optional entries that can appear at most once
+SINGLE_FIELDS = (
+  "read-history",
+  "write-history",
+  "geoip-db-digest",
+  "geoip6-db-digest",
+  "bridge-stats-end",
+  "bridge-ips",
+  "dirreq-stats-end",
+  "dirreq-v2-ips",
+  "dirreq-v3-ips",
+  "dirreq-v2-reqs",
+  "dirreq-v3-reqs",
+  "dirreq-v2-share",
+  "dirreq-v3-share",
+  "dirreq-v2-resp",
+  "dirreq-v3-resp",
+  "dirreq-v2-direct-dl",
+  "dirreq-v3-direct-dl",
+  "dirreq-v2-tunneled-dl",
+  "dirreq-v3-tunneled-dl",
+  "dirreq-read-history",
+  "dirreq-write-history",
+  "entry-stats-end",
+  "entry-ips",
+  "cell-stats-end",
+  "cell-processed-cells",
+  "cell-queued-cells",
+  "cell-time-in-queue",
+  "cell-circuits-per-decile",
+  "conn-bi-direct",
+  "exit-stats-end",
+  "exit-kibibytes-written",
+  "exit-kibibytes-read",
+  "exit-streams-opened",
+)
+
+
+def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
+  """
+  Iterates over the extra-info descriptors in a file.
+
+  :param file descriptor_file: file with descriptor content
+  :param bool is_bridge: parses the file as being a bridge descriptor
+  :param bool validate: checks the validity of the descriptor's content if
+    **True**, skips these checks otherwise
+  :param dict kwargs: additional arguments for the descriptor constructor
+
+  :returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
+    instances in the file
+
+  :raises:
+    * **ValueError** if the contents is malformed and validate is **True**
+    * **IOError** if the file can't be read
+  """
+
+  while True:
+    extrainfo_content = _read_until_keywords("router-signature", descriptor_file)
+
+    # we've reached the 'router-signature', now include the pgp style block
+    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+    extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
+
+    if extrainfo_content:
+      if is_bridge:
+        yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
+      else:
+        yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
+    else:
+      break  # done parsing file
+
+
+def _parse_timestamp_and_interval(keyword, content):
+  """
+  Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.
+
+  :param str keyword: line's keyword
+  :param str content: line content to be parsed
+
+  :returns: **tuple** of the form (timestamp (**datetime**), interval
+    (**int**), remaining content (**str**))
+
+  :raises: **ValueError** if the content is malformed
+  """
+
+  line = "%s %s" % (keyword, content)
+  content_match = re.match("^(.*) \(([0-9]+) s\)( .*)?$", content)
+
+  if not content_match:
+    raise ValueError("Malformed %s line: %s" % (keyword, line))
+
+  timestamp_str, interval, remainder = content_match.groups()
+
+  if remainder:
+    remainder = remainder[1:]  # remove leading space
+
+  if not interval.isdigit():
+    raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))
+
+  try:
+    timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
+    return timestamp, int(interval), remainder
+  except ValueError:
+    raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
+
+
+class ExtraInfoDescriptor(Descriptor):
+  """
+  Extra-info descriptor document.
+
+  :var str nickname: **\*** relay's nickname
+  :var str fingerprint: **\*** identity key fingerprint
+  :var datetime published: **\*** time in UTC when this descriptor was made
+  :var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses
+  :var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses
+  :var dict transport: **\*** mapping of transport methods to their (address,
+    port, args) tuple, these usually appear on bridges in which case all of
+    those are **None**
+
+  **Bi-directional connection usage:**
+
+  :var datetime conn_bi_direct_end: end of the sampling interval
+  :var int conn_bi_direct_interval: seconds per interval
+  :var int conn_bi_direct_below: connections that read/wrote less than 20 KiB
+  :var int conn_bi_direct_read: connections that read at least 10x more than wrote
+  :var int conn_bi_direct_write: connections that wrote at least 10x more than read
+  :var int conn_bi_direct_both: remaining connections
+
+  **Bytes read/written for relayed traffic:**
+
+  :var datetime read_history_end: end of the sampling interval
+  :var int read_history_interval: seconds per interval
+  :var list read_history_values: bytes read during each interval
+
+  :var datetime write_history_end: end of the sampling interval
+  :var int write_history_interval: seconds per interval
+  :var list write_history_values: bytes written during each interval
+
+  **Cell relaying statistics:**
+
+  :var datetime cell_stats_end: end of the period when stats were gathered
+  :var int cell_stats_interval: length in seconds of the interval
+  :var list cell_processed_cells: measurement of processed cells per circuit
+  :var list cell_queued_cells: measurement of queued cells per circuit
+  :var list cell_time_in_queue: mean enqueued time in milliseconds for cells
+  :var int cell_circuits_per_decile: mean number of circuits in a decile
+
+  **Directory Mirror Attributes:**
+
+  :var datetime dir_stats_end: end of the period when stats were gathered
+  :var int dir_stats_interval: length in seconds of the interval
+  :var dict dir_v2_ips: mapping of locales to rounded count of requester ips
+  :var dict dir_v3_ips: mapping of locales to rounded count of requester ips
+  :var float dir_v2_share: percent of total directory traffic it expects to serve
+  :var float dir_v3_share: percent of total directory traffic it expects to serve
+  :var dict dir_v2_requests: mapping of locales to rounded count of requests
+  :var dict dir_v3_requests: mapping of locales to rounded count of requests
+
+  :var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
+  :var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
+  :var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count
+  :var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count
+
+  :var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
+  :var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
+  :var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement
+  :var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement
+
+  :var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
+  :var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
+  :var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
+  :var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
+
+  **Bytes read/written for directory mirroring:**
+
+  :var datetime dir_read_history_end: end of the sampling interval
+  :var int dir_read_history_interval: seconds per interval
+  :var list dir_read_history_values: bytes read during each interval
+
+  :var datetime dir_write_history_end: end of the sampling interval
+  :var int dir_write_history_interval: seconds per interval
+  :var list dir_write_history_values: bytes read during each interval
+
+  **Guard Attributes:**
+
+  :var datetime entry_stats_end: end of the period when stats were gathered
+  :var int entry_stats_interval: length in seconds of the interval
+  :var dict entry_ips: mapping of locales to rounded count of unique user ips
+
+  **Exit Attributes:**
+
+  :var datetime exit_stats_end: end of the period when stats were gathered
+  :var int exit_stats_interval: length in seconds of the interval
+  :var dict exit_kibibytes_written: traffic per port (keys are ints or 'other')
+  :var dict exit_kibibytes_read: traffic per port (keys are ints or 'other')
+  :var dict exit_streams_opened: streams per port (keys are ints or 'other')
+
+  **Bridge Attributes:**
+
+  :var datetime bridge_stats_end: end of the period when stats were gathered
+  :var int bridge_stats_interval: length in seconds of the interval
+  :var dict bridge_ips: mapping of locales to rounded count of unique user ips
+  :var datetime geoip_start_time: replaced by bridge_stats_end (deprecated)
+  :var dict geoip_client_origins: replaced by bridge_ips (deprecated)
+  :var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
+  :var dict ip_versions: mapping of ip transports to a count for the number of users
+
+  **\*** attribute is either required when we're parsed with validation or has
+  a default value, others are left as **None** if undefined
+  """
+
+  def __init__(self, raw_contents, validate = True):
+    """
+    Extra-info descriptor constructor. By default this validates the
+    descriptor's content as it's parsed. This validation can be disabled to
+    either improve performance or be accepting of malformed data.
+
+    :param str raw_contents: extra-info content provided by the relay
+    :param bool validate: checks the validity of the extra-info descriptor if
+      **True**, skips these checks otherwise
+
+    :raises: **ValueError** if the contents is malformed and validate is True
+    """
+
+    super(ExtraInfoDescriptor, self).__init__(raw_contents)
+    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
+
+    self.nickname = None
+    self.fingerprint = None
+    self.published = None
+    self.geoip_db_digest = None
+    self.geoip6_db_digest = None
+    self.transport = {}
+
+    self.conn_bi_direct_end = None
+    self.conn_bi_direct_interval = None
+    self.conn_bi_direct_below = None
+    self.conn_bi_direct_read = None
+    self.conn_bi_direct_write = None
+    self.conn_bi_direct_both = None
+
+    self.read_history_end = None
+    self.read_history_interval = None
+    self.read_history_values = None
+
+    self.write_history_end = None
+    self.write_history_interval = None
+    self.write_history_values = None
+
+    self.cell_stats_end = None
+    self.cell_stats_interval = None
+    self.cell_processed_cells = None
+    self.cell_queued_cells = None
+    self.cell_time_in_queue = None
+    self.cell_circuits_per_decile = None
+
+    self.dir_stats_end = None
+    self.dir_stats_interval = None
+    self.dir_v2_ips = None
+    self.dir_v3_ips = None
+    self.dir_v2_share = None
+    self.dir_v3_share = None
+    self.dir_v2_requests = None
+    self.dir_v3_requests = None
+    self.dir_v2_responses = None
+    self.dir_v3_responses = None
+    self.dir_v2_responses_unknown = None
+    self.dir_v3_responses_unknown = None
+    self.dir_v2_direct_dl = None
+    self.dir_v3_direct_dl = None
+    self.dir_v2_direct_dl_unknown = None
+    self.dir_v3_direct_dl_unknown = None
+    self.dir_v2_tunneled_dl = None
+    self.dir_v3_tunneled_dl = None
+    self.dir_v2_tunneled_dl_unknown = None
+    self.dir_v3_tunneled_dl_unknown = None
+
+    self.dir_read_history_end = None
+    self.dir_read_history_interval = None
+    self.dir_read_history_values = None
+
+    self.dir_write_history_end = None
+    self.dir_write_history_interval = None
+    self.dir_write_history_values = None
+
+    self.entry_stats_end = None
+    self.entry_stats_interval = None
+    self.entry_ips = None
+
+    self.exit_stats_end = None
+    self.exit_stats_interval = None
+    self.exit_kibibytes_written = None
+    self.exit_kibibytes_read = None
+    self.exit_streams_opened = None
+
+    self.bridge_stats_end = None
+    self.bridge_stats_interval = None
+    self.bridge_ips = None
+    self.geoip_start_time = None
+    self.geoip_client_origins = None
+
+    self.ip_versions = None
+    self.ip_transports = None
+
+    self._unrecognized_lines = []
+
+    entries = _get_descriptor_components(raw_contents, validate)
+
+    if validate:
+      for keyword in self._required_fields():
+        if not keyword in entries:
+          raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword)
+
+      for keyword in self._required_fields() + SINGLE_FIELDS:
+        if keyword in entries and len(entries[keyword]) > 1:
+          raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword)
+
+      expected_first_keyword = self._first_keyword()
+      if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
+        raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword)
+
+      expected_last_keyword = self._last_keyword()
+      if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
+        raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
+
+    self._parse(entries, validate)
+
+  def get_unrecognized_lines(self):
+    return list(self._unrecognized_lines)
+
+  def _parse(self, entries, validate):
+    """
+    Parses a series of 'keyword => (value, pgp block)' mappings and applies
+    them as attributes.
+
+    :param dict entries: descriptor contents to be applied
+    :param bool validate: checks the validity of descriptor content if True
+
+    :raises: **ValueError** if an error occurs in validation
+    """
+
+    for keyword, values in entries.items():
+      # most just work with the first (and only) value
+      value, _ = values[0]
+      line = "%s %s" % (keyword, value)  # original line
+
+      if keyword == "extra-info":
+        # "extra-info" Nickname Fingerprint
+        extra_info_comp = value.split()
+
+        if len(extra_info_comp) < 2:
+          if not validate:
+            continue
+
+          raise ValueError("Extra-info line must have two values: %s" % line)
+
+        if validate:
+          if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
+            raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
+          elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
+            raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % extra_info_comp[1])
+
+        self.nickname = extra_info_comp[0]
+        self.fingerprint = extra_info_comp[1]
+      elif keyword == "geoip-db-digest":
+        # "geoip-db-digest" Digest
+
+        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
+          raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
+
+        self.geoip_db_digest = value
+      elif keyword == "geoip6-db-digest":
+        # "geoip6-db-digest" Digest
+
+        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
+          raise ValueError("Geoip v6 digest line had an invalid sha1 digest: %s" % line)
+
+        self.geoip6_db_digest = value
+      elif keyword == "transport":
+        # "transport" transportname address:port [arglist]
+        # Everything after the transportname is scrubbed in published bridge
+        # descriptors, so we'll never see it in practice.
+        #
+        # These entries really only make sense for bridges, but have been seen
+        # on non-bridges in the wild when the relay operator configured it this
+        # way.
+
+        for transport_value, _ in values:
+          name, address, port, args = None, None, None, None
+
+          if not ' ' in transport_value:
+            # scrubbed
+            name = transport_value
+          else:
+            # not scrubbed
+            value_comp = transport_value.split()
+
+            if len(value_comp) < 1:
+              raise ValueError("Transport line is missing its transport name: %s" % line)
+            else:
+              name = value_comp[0]
+
+            if len(value_comp) < 2:
+              raise ValueError("Transport line is missing its address:port value: %s" % line)
+            elif not ":" in value_comp[1]:
+              raise ValueError("Transport line's address:port entry is missing a colon: %s" % line)
+            else:
+              address, port_str = value_comp[1].split(':', 1)
+
+              if not stem.util.connection.is_valid_ipv4_address(address) or \
+                     stem.util.connection.is_valid_ipv6_address(address):
+                raise ValueError("Transport line has a malformed address: %s" % line)
+              elif not stem.util.connection.is_valid_port(port_str):
+                raise ValueError("Transport line has a malformed port: %s" % line)
+
+              port = int(port_str)
+
+            if len(value_comp) >= 3:
+              args = value_comp[2:]
+            else:
+              args = []
+
+          self.transport[name] = (address, port, args)
+      elif keyword == "cell-circuits-per-decile":
+        # "cell-circuits-per-decile" num
+
+        if not value.isdigit():
+          if validate:
+            raise ValueError("Non-numeric cell-circuits-per-decile value: %s" % line)
+          else:
+            continue
+
+        stat = int(value)
+
+        if validate and stat < 0:
+          raise ValueError("Negative cell-circuits-per-decile value: %s" % line)
+
+        self.cell_circuits_per_decile = stat
+      elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"):
+        recognized_counts = {}
+        unrecognized_counts = {}
+
+        is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp")
+        key_set = DirResponse if is_response_stats else DirStat
+
+        key_type = "STATUS" if is_response_stats else "STAT"
+        error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line)
+
+        if value:
+          for entry in value.split(","):
+            if not "=" in entry:
+              if validate:
+                raise ValueError(error_msg)
+              else:
+                continue
+
+            status, count = entry.split("=", 1)
+
+            if count.isdigit():
+              if status in key_set:
+                recognized_counts[status] = int(count)
+              else:
+                unrecognized_counts[status] = int(count)
+            elif validate:
+              raise ValueError(error_msg)
+
+        if keyword == "dirreq-v2-resp":
+          self.dir_v2_responses = recognized_counts
+          self.dir_v2_responses_unknown = unrecognized_counts
+        elif keyword == "dirreq-v3-resp":
+          self.dir_v3_responses = recognized_counts
+          self.dir_v3_responses_unknown = unrecognized_counts
+        elif keyword == "dirreq-v2-direct-dl":
+          self.dir_v2_direct_dl = recognized_counts
+          self.dir_v2_direct_dl_unknown = unrecognized_counts
+        elif keyword == "dirreq-v3-direct-dl":
+          self.dir_v3_direct_dl = recognized_counts
+          self.dir_v3_direct_dl_unknown = unrecognized_counts
+        elif keyword == "dirreq-v2-tunneled-dl":
+          self.dir_v2_tunneled_dl = recognized_counts
+          self.dir_v2_tunneled_dl_unknown = unrecognized_counts
+        elif keyword == "dirreq-v3-tunneled-dl":
+          self.dir_v3_tunneled_dl = recognized_counts
+          self.dir_v3_tunneled_dl_unknown = unrecognized_counts
+      elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
+        # "<keyword>" num%
+
+        try:
+          if not value.endswith("%"):
+            raise ValueError()
+
+          percentage = float(value[:-1]) / 100
+
+          # Bug lets these be above 100%, however they're soon going away...
+          # https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html
+
+          if validate and percentage < 0:
+            raise ValueError("Negative percentage value: %s" % line)
+
+          if keyword == "dirreq-v2-share":
+            self.dir_v2_share = percentage
+          elif keyword == "dirreq-v3-share":
+            self.dir_v3_share = percentage
+        except ValueError as exc:
+          if validate:
+            raise ValueError("Value can't be parsed as a percentage: %s" % line)
+      elif keyword in ("cell-processed-cells", "cell-queued-cells", "cell-time-in-queue"):
+        # "<keyword>" num,...,num
+
+        entries = []
+
+        if value:
+          for entry in value.split(","):
+            try:
+              # Values should be positive but as discussed in ticket #5849
+              # there was a bug around this. It was fixed in tor 0.2.2.1.
+
+              entries.append(float(entry))
+            except ValueError:
+              if validate:
+                raise ValueError("Non-numeric entry in %s listing: %s" % (keyword, line))
+
+        if keyword == "cell-processed-cells":
+          self.cell_processed_cells = entries
+        elif keyword == "cell-queued-cells":
+          self.cell_queued_cells = entries
+        elif keyword == "cell-time-in-queue":
+          self.cell_time_in_queue = entries
+      elif keyword in ("published", "geoip-start-time"):
+        # "<keyword>" YYYY-MM-DD HH:MM:SS
+
+        try:
+          timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+
+          if keyword == "published":
+            self.published = timestamp
+          elif keyword == "geoip-start-time":
+            self.geoip_start_time = timestamp
+        except ValueError:
+          if validate:
+            raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line))
+      elif keyword in ("cell-stats-end", "entry-stats-end", "exit-stats-end", "bridge-stats-end", "dirreq-stats-end"):
+        # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
+
+        try:
+          timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)
+
+          if keyword == "cell-stats-end":
+            self.cell_stats_end = timestamp
+            self.cell_stats_interval = interval
+          elif keyword == "entry-stats-end":
+            self.entry_stats_end = timestamp
+            self.entry_stats_interval = interval
+          elif keyword == "exit-stats-end":
+            self.exit_stats_end = timestamp
+            self.exit_stats_interval = interval
+          elif keyword == "bridge-stats-end":
+            self.bridge_stats_end = timestamp
+            self.bridge_stats_interval = interval
+          elif keyword == "dirreq-stats-end":
+            self.dir_stats_end = timestamp
+            self.dir_stats_interval = interval
+        except ValueError as exc:
+          if validate:
+            raise exc
+      elif keyword == "conn-bi-direct":
+        # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH
+
+        try:
+          timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
+          stats = remainder.split(",")
+
+          if len(stats) != 4 or not \
+            (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
+            raise ValueError("conn-bi-direct line should end with four numeric values: %s" % line)
+
+          self.conn_bi_direct_end = timestamp
+          self.conn_bi_direct_interval = interval
+          self.conn_bi_direct_below = int(stats[0])
+          self.conn_bi_direct_read = int(stats[1])
+          self.conn_bi_direct_write = int(stats[2])
+          self.conn_bi_direct_both = int(stats[3])
+        except ValueError as exc:
+          if validate:
+            raise exc
+      elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"):
+        # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
+        try:
+          timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
+          history_values = []
+
+          if remainder:
+            try:
+              history_values = [int(entry) for entry in remainder.split(",")]
+            except ValueError:
+              raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
+
+          if keyword == "read-history":
+            self.read_history_end = timestamp
+            self.read_history_interval = interval
+            self.read_history_values = history_values
+          elif keyword == "write-history":
+            self.write_history_end = timestamp
+            self.write_history_interval = interval
+            self.write_history_values = history_values
+          elif keyword == "dirreq-read-history":
+            self.dir_read_history_end = timestamp
+            self.dir_read_history_interval = interval
+            self.dir_read_history_values = history_values
+          elif keyword == "dirreq-write-history":
+            self.dir_write_history_end = timestamp
+            self.dir_write_history_interval = interval
+            self.dir_write_history_values = history_values
+        except ValueError as exc:
+          if validate:
+            raise exc
+      elif keyword in ("exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened"):
+        # "<keyword>" port=N,port=N,...
+
+        port_mappings = {}
+        error_msg = "Entries in %s line should only be PORT=N entries: %s" % (keyword, line)
+
+        if value:
+          for entry in value.split(","):
+            if not "=" in entry:
+              if validate:
+                raise ValueError(error_msg)
+              else:
+                continue
+
+            port, stat = entry.split("=", 1)
+
+            if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
+              if port != 'other':
+                port = int(port)
+              port_mappings[port] = int(stat)
+            elif validate:
+              raise ValueError(error_msg)
+
+        if keyword == "exit-kibibytes-written":
+          self.exit_kibibytes_written = port_mappings
+        elif keyword == "exit-kibibytes-read":
+          self.exit_kibibytes_read = port_mappings
+        elif keyword == "exit-streams-opened":
+          self.exit_streams_opened = port_mappings
+      elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "entry-ips", "bridge-ips"):
+        # "<keyword>" CC=N,CC=N,...
+        #
+        # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
+        # locale codes for some special values, for instance...
+        #   A1,"Anonymous Proxy"
+        #   A2,"Satellite Provider"
+        #   ??,"Unknown"
+
+        locale_usage = {}
+        error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)
+
+        if value:
+          for entry in value.split(","):
+            if not "=" in entry:
+              if validate:
+                raise ValueError(error_msg)
+              else:
+                continue
+
+            locale, count = entry.split("=", 1)
+
+            if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit():
+              locale_usage[locale] = int(count)
+            elif validate:
+              raise ValueError(error_msg)
+
+        if keyword == "dirreq-v2-ips":
+          self.dir_v2_ips = locale_usage
+        elif keyword == "dirreq-v3-ips":
+          self.dir_v3_ips = locale_usage
+        elif keyword == "dirreq-v2-reqs":
+          self.dir_v2_requests = locale_usage
+        elif keyword == "dirreq-v3-reqs":
+          self.dir_v3_requests = locale_usage
+        elif keyword == "geoip-client-origins":
+          self.geoip_client_origins = locale_usage
+        elif keyword == "entry-ips":
+          self.entry_ips = locale_usage
+        elif keyword == "bridge-ips":
+          self.bridge_ips = locale_usage
+      elif keyword == "bridge-ip-versions":
+        self.ip_versions = {}
+
+        if value:
+          for entry in value.split(','):
+            if not '=' in entry:
+              raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
+
+            protocol, count = entry.split('=', 1)
+
+            if not count.isdigit():
+              raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, line))
+
+            self.ip_versions[protocol] = int(count)
+      elif keyword == "bridge-ip-transports":
+        self.ip_transports = {}
+
+        if value:
+          for entry in value.split(','):
+            if not '=' in entry:
+              raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
+
+            protocol, count = entry.split('=', 1)
+
+            if not count.isdigit():
+              raise stem.ProtocolError("Transport count was non-numeric (%s): %s" % (count, line))
+
+            self.ip_transports[protocol] = int(count)
+      else:
+        self._unrecognized_lines.append(line)
+
+  def digest(self):
+    """
+    Provides the upper-case hex encoded sha1 of our content. This value is part
+    of the server descriptor entry for this relay.
+
+    :returns: **str** with the upper-case hex digest value for this server
+      descriptor
+    """
+
+    raise NotImplementedError("Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass")
+
+  def _required_fields(self):
+    return REQUIRED_FIELDS
+
+  def _first_keyword(self):
+    return "extra-info"
+
+  def _last_keyword(self):
+    return "router-signature"
+
+
+class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
+  """
+  Relay extra-info descriptor, constructed from data such as that provided by
+  "GETINFO extra-info/digest/\*", cached descriptors, and metrics
+  (`specification <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
+
+  :var str signature: **\*** signature for this extrainfo descriptor
+
+  **\*** attribute is required when we're parsed with validation
+  """
+
+  def __init__(self, raw_contents, validate = True):
+    self.signature = None
+
+    super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate)
+
+  @lru_cache()
+  def digest(self):
+    # our digest is calculated from everything except our signature
+    raw_content, ending = str(self), "\nrouter-signature\n"
+    raw_content = raw_content[:raw_content.find(ending) + len(ending)]
+    return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()
+
+  def _parse(self, entries, validate):
+    entries = dict(entries)  # shallow copy since we're destructive
+
+    # handles fields only in server descriptors
+    for keyword, values in entries.items():
+      value, block_contents = values[0]
+
+      line = "%s %s" % (keyword, value)  # original line
+
+      if block_contents:
+        line += "\n%s" % block_contents
+
+      if keyword == "router-signature":
+        if validate and not block_contents:
+          raise ValueError("Router signature line must be followed by a signature block: %s" % line)
+
+        self.signature = block_contents
+        del entries["router-signature"]
+
+    ExtraInfoDescriptor._parse(self, entries, validate)
+
+
+class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
+  """
+  Bridge extra-info descriptor (`bridge descriptor specification
+  <https://metrics.torproject.org/formats.html#bridgedesc>`_)
+  """
+
+  def __init__(self, raw_contents, validate = True):
+    self._digest = None
+
+    super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate)
+
+  def digest(self):
+    return self._digest
+
+  def _parse(self, entries, validate):
+    entries = dict(entries)  # shallow copy since we're destructive
+
+    # handles fields only in server descriptors
+    for keyword, values in entries.items():
+      value, _ = values[0]
+      line = "%s %s" % (keyword, value)  # original line
+
+      if keyword == "router-digest":
+        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
+          raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)
+
+        self._digest = value
+        del entries["router-digest"]
+
+    ExtraInfoDescriptor._parse(self, entries, validate)
+
+  def _required_fields(self):
+    excluded_fields = [
+      "router-signature",
+    ]
+
+    included_fields = [
+      "router-digest",
+    ]
+
+    return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])
+
+  def _last_keyword(self):
+    return None
--- a/lib/stem/descriptor/microdescriptor.py
+++ b/lib/stem/descriptor/microdescriptor.py
@ -0,0 +1,309 @@
+# Copyright 2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Parsing for Tor microdescriptors, which contain a distilled version of a
+relay's server descriptor. As of Tor version 0.2.3.3-alpha Tor no longer
+downloads server descriptors by default, opting for microdescriptors instead.
+
+Unlike most descriptor documents these aren't available on the metrics site
+(since they don't contain any information that the server descriptors don't).
+
+The limited information in microdescriptors make them rather clunky to use
+compared with server descriptors. For instance microdescriptors lack the
+relay's fingerprint, making it difficut to use them to look up the relay's
+other descriptors.
+
+To do so you need to match the microdescriptor's digest against its
+corresponding router status entry. For added fun as of this writing the
+controller doesn't even surface those router status entries
+(:trac:`7953`).
+
+For instance, here's an example that prints the nickname and fignerprints of
+the exit relays.
+
+::
+
+  import os
+
+  from stem.control import Controller
+  from stem.descriptor import parse_file
+
+  with Controller.from_port(port = 9051) as controller:
+    controller.authenticate()
+
+    exit_digests = set()
+    data_dir = controller.get_conf("DataDirectory")
+
+    for desc in controller.get_microdescriptors():
+      if desc.exit_policy.is_exiting_allowed():
+        exit_digests.add(desc.digest)
+
+    print "Exit Relays:"
+
+    for desc in parse_file(os.path.join(data_dir, 'cached-microdesc-consensus')):
+      if desc.digest in exit_digests:
+        print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
+Doing the same is trivial with server descriptors...
+
+::
+
+  from stem.descriptor import parse_file
+
+  print "Exit Relays:"
+
+  for desc in parse_file("/home/atagar/.tor/cached-descriptors"):
+    if desc.exit_policy.is_exiting_allowed():
+      print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
+**Module Overview:**
+
+::
+
+  Microdescriptor - Tor microdescriptor.
+"""
+
+import hashlib
+
+import stem.descriptor.router_status_entry
+import stem.exit_policy
+
+from stem.descriptor import (
+  Descriptor,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
+try:
+  # added in python 3.2
+  from functools import lru_cache
+except ImportError:
+  from stem.util.lru_cache import lru_cache
+
+REQUIRED_FIELDS = (
+  "onion-key",
+)
+
+SINGLE_FIELDS = (
+  "onion-key",
+  "ntor-onion-key",
+  "family",
+  "p",
+  "p6",
+)
+
+
+def _parse_file(descriptor_file, validate = True, **kwargs):
+  """
+  Iterates over the microdescriptors in a file.
+
+  :param file descriptor_file: file with descriptor content
+  :param bool validate: checks the validity of the descriptor's content if
+    **True**, skips these checks otherwise
+  :param dict kwargs: additional arguments for the descriptor constructor
+
+  :returns: iterator for Microdescriptor instances in the file
+
+  :raises:
+    * **ValueError** if the contents is malformed and validate is True
+    * **IOError** if the file can't be read
+  """
+
+  while True:
+    annotations = _read_until_keywords("onion-key", descriptor_file)
+
+    # read until we reach an annotation or onion-key line
+    descriptor_lines = []
+
+    # read the onion-key line, done if we're at the end of the document
+
+    onion_key_line = descriptor_file.readline()
+
+    if onion_key_line:
+      descriptor_lines.append(onion_key_line)
+    else:
+      break
+
+    while True:
+      last_position = descriptor_file.tell()
+      line = descriptor_file.readline()
+
+      if not line:
+        break  # EOF
+      elif line.startswith(b"@") or line.startswith(b"onion-key"):
+        descriptor_file.seek(last_position)
+        break
+      else:
+        descriptor_lines.append(line)
+
+    if descriptor_lines:
+      # strip newlines from annotations
+      annotations = map(bytes.strip, annotations)
+
+      descriptor_text = bytes.join(b"", descriptor_lines)
+
+      yield Microdescriptor(descriptor_text, validate, annotations, **kwargs)
+    else:
+      break  # done parsing descriptors
+
+
+class Microdescriptor(Descriptor):
+  """
+  Microdescriptor (`descriptor specification
+  <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
+
+  :var str digest: **\*** hex digest for this microdescriptor, this can be used
+    to match against the corresponding digest attribute of a
+    :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3`
+  :var str onion_key: **\*** key used to encrypt EXTEND cells
+  :var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
+  :var list or_addresses: **\*** alternative for our address/or_port attributes, each
+    entry is a tuple of the form (address (**str**), port (**int**), is_ipv6
+    (**bool**))
+  :var list family: **\*** nicknames or fingerprints of declared family
+  :var stem.exit_policy.MicroExitPolicy exit_policy: **\*** relay's exit policy
+  :var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
+
+  **\*** attribute is required when we're parsed with validation
+  """
+
+  def __init__(self, raw_contents, validate = True, annotations = None):
+    super(Microdescriptor, self).__init__(raw_contents)
+    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
+
+    self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
+
+    self.onion_key = None
+    self.ntor_onion_key = None
+    self.or_addresses = []
+    self.family = []
+    self.exit_policy = stem.exit_policy.MicroExitPolicy("reject 1-65535")
+    self.exit_policy_v6 = None
+
+    self._unrecognized_lines = []
+
+    self._annotation_lines = annotations if annotations else []
+
+    entries = _get_descriptor_components(raw_contents, validate)
+    self._parse(entries, validate)
+
+    if validate:
+      self._check_constraints(entries)
+
+  def get_unrecognized_lines(self):
+    return list(self._unrecognized_lines)
+
+  @lru_cache()
+  def get_annotations(self):
+    """
+    Provides content that appeared prior to the descriptor. If this comes from
+    the cached-microdescs then this commonly contains content like...
+
+    ::
+
+      @last-listed 2013-02-24 00:18:30
+
+    :returns: **dict** with the key/value pairs in our annotations
+    """
+
+    annotation_dict = {}
+
+    for line in self._annotation_lines:
+      if b" " in line:
+        key, value = line.split(b" ", 1)
+        annotation_dict[key] = value
+      else:
+        annotation_dict[line] = None
+
+    return annotation_dict
+
+  def get_annotation_lines(self):
+    """
+    Provides the lines of content that appeared prior to the descriptor. This
+    is the same as the
+    :func:`~stem.descriptor.microdescriptor.Microdescriptor.get_annotations`
+    results, but with the unparsed lines and ordering retained.
+
+    :returns: **list** with the lines of annotation that came before this descriptor
+    """
+
+    return self._annotation_lines
+
+  def _parse(self, entries, validate):
+    """
+    Parses a series of 'keyword => (value, pgp block)' mappings and applies
+    them as attributes.
+
+    :param dict entries: descriptor contents to be applied
+    :param bool validate: checks the validity of descriptor content if **True**
+
+    :raises: **ValueError** if an error occurs in validation
+    """
+
+    for keyword, values in entries.items():
+      # most just work with the first (and only) value
+      value, block_contents = values[0]
+
+      line = "%s %s" % (keyword, value)  # original line
+
+      if block_contents:
+        line += "\n%s" % block_contents
+
+      if keyword == "onion-key":
+        if validate and not block_contents:
+          raise ValueError("Onion key line must be followed by a public key: %s" % line)
+
+        self.onion_key = block_contents
+      elif keyword == "ntor-onion-key":
+        self.ntor_onion_key = value
+      elif keyword == "a":
+        for entry, _ in values:
+          stem.descriptor.router_status_entry._parse_a_line(self, entry, validate)
+      elif keyword == "family":
+        self.family = value.split(" ")
+      elif keyword == "p":
+        stem.descriptor.router_status_entry._parse_p_line(self, value, validate)
+      elif keyword == "p6":
+        self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
+      else:
+        self._unrecognized_lines.append(line)
+
+  def _check_constraints(self, entries):
+    """
+    Does a basic check that the entries conform to this descriptor type's
+    constraints.
+
+    :param dict entries: keyword => (value, pgp key) entries
+
+    :raises: **ValueError** if an issue arises in validation
+    """
+
+    for keyword in REQUIRED_FIELDS:
+      if not keyword in entries:
+        raise ValueError("Microdescriptor must have a '%s' entry" % keyword)
+
+    for keyword in SINGLE_FIELDS:
+      if keyword in entries and len(entries[keyword]) > 1:
+        raise ValueError("The '%s' entry can only appear once in a microdescriptor" % keyword)
+
+    if "onion-key" != entries.keys()[0]:
+      raise ValueError("Microdescriptor must start with a 'onion-key' entry")
+
+  def _compare(self, other, method):
+    if not isinstance(other, Microdescriptor):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __hash__(self):
+    return hash(str(self).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
--- a/lib/stem/descriptor/networkstatus.py
+++ b/lib/stem/descriptor/networkstatus.py
--- a/lib/stem/descriptor/reader.py
+++ b/lib/stem/descriptor/reader.py
@ -0,0 +1,580 @@
+# Copyright 2012-2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Utilities for reading descriptors from local directories and archives. This is
+mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
+class, which is an iterator for the descriptor data in a series of
+destinations. For example...
+
+::
+
+  my_descriptors = [
+    "/tmp/server-descriptors-2012-03.tar.bz2",
+    "/tmp/archived_descriptors/",
+  ]
+
+  # prints the contents of all the descriptor files
+  with DescriptorReader(my_descriptors) as reader:
+    for descriptor in reader:
+      print descriptor
+
+This ignores files that cannot be processed due to read errors or unparsable
+content. To be notified of skipped files you can register a listener with
+:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
+
+The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
+modified timestamps for descriptor files that it has read so it can skip
+unchanged files if run again. This listing of processed files can also be
+persisted and applied to other
+:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
+following prints descriptors as they're changed over the course of a minute,
+and picks up where it left off if run again...
+
+::
+
+  reader = DescriptorReader(["/tmp/descriptor_data"])
+
+  try:
+    processed_files = load_processed_files("/tmp/used_descriptors")
+    reader.set_processed_files(processed_files)
+  except: pass # could not load, maybe this is the first run
+
+  start_time = time.time()
+
+  while (time.time() - start_time) < 60:
+    # prints any descriptors that have changed since last checked
+    with reader:
+      for descriptor in reader:
+        print descriptor
+
+    time.sleep(1)
+
+  save_processed_files("/tmp/used_descriptors", reader.get_processed_files())
+
+**Module Overview:**
+
+::
+
+  load_processed_files - Loads a listing of processed files
+  save_processed_files - Saves a listing of processed files
+
+  DescriptorReader - Iterator for descriptor data on the local file system
+    |- get_processed_files - provides the listing of files that we've processed
+    |- set_processed_files - sets our tracking of the files we have processed
+    |- register_read_listener - adds a listener for when files are read
+    |- register_skip_listener - adds a listener that's notified of skipped files
+    |- start - begins reading descriptor data
+    |- stop - stops reading descriptor data
+    |- __enter__ / __exit__ - manages the descriptor reader thread in the context
+    +- __iter__ - iterates over descriptor data in unread files
+
+  FileSkipped - Base exception for a file that was skipped
+    |- AlreadyRead - We've already read a file with this last modified timestamp
+    |- ParsingFailure - Contents can't be parsed as descriptor data
+    |- UnrecognizedType - File extension indicates non-descriptor data
+    +- ReadFailed - Wraps an error that was raised while reading the file
+       +- FileMissing - File does not exist
+"""
+
+import mimetypes
+import os
+import Queue
+import tarfile
+import threading
+
+import stem.descriptor
+import stem.prereq
+
+# flag to indicate when the reader thread is out of descriptor files to read
+FINISHED = "DONE"
+
+
+class FileSkipped(Exception):
+  "Base error when we can't provide descriptor data from a file."
+
+
+class AlreadyRead(FileSkipped):
+  """
+  Already read a file with this 'last modified' timestamp or later.
+
+  :param int last_modified: unix timestamp for when the file was last modified
+  :param int last_modified_when_read: unix timestamp for the modification time
+    when we last read this file
+  """
+
+  def __init__(self, last_modified, last_modified_when_read):
+    super(AlreadyRead, self).__init__("File has already been read since it was last modified. modification time: %s, last read: %s" % (last_modified, last_modified_when_read))
+    self.last_modified = last_modified
+    self.last_modified_when_read = last_modified_when_read
+
+
+class ParsingFailure(FileSkipped):
+  """
+  File contents could not be parsed as descriptor data.
+
+  :param ValueError exception: issue that arose when parsing
+  """
+
+  def __init__(self, parsing_exception):
+    super(ParsingFailure, self).__init__(parsing_exception)
+    self.exception = parsing_exception
+
+
+class UnrecognizedType(FileSkipped):
+  """
+  File doesn't contain descriptor data. This could either be due to its file
+  type or because it doesn't conform to a recognizable descriptor type.
+
+  :param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
+  """
+
+  def __init__(self, mime_type):
+    super(UnrecognizedType, self).__init__("Unrecognized mime type: %s (%s)" % mime_type)
+    self.mime_type = mime_type
+
+
+class ReadFailed(FileSkipped):
+  """
+  An IOError occurred while trying to read the file.
+
+  :param IOError exception: issue that arose when reading the file, **None** if
+    this arose due to the file not being present
+  """
+
+  def __init__(self, read_exception):
+    super(ReadFailed, self).__init__(read_exception)
+    self.exception = read_exception
+
+
+class FileMissing(ReadFailed):
+  "File does not exist."
+
+  def __init__(self):
+    super(FileMissing, self).__init__("File does not exist")
+
+
+def load_processed_files(path):
+  """
+  Loads a dictionary of 'path => last modified timestamp' mappings, as
+  persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
+  file.
+
+  :param str path: location to load the processed files dictionary from
+
+  :returns: **dict** of 'path (**str**) => last modified unix timestamp
+    (**int**)' mappings
+
+  :raises:
+    * **IOError** if unable to read the file
+    * **TypeError** if unable to parse the file's contents
+  """
+
+  processed_files = {}
+
+  with open(path) as input_file:
+    for line in input_file.readlines():
+      line = line.strip()
+
+      if not line:
+        continue  # skip blank lines
+
+      if not " " in line:
+        raise TypeError("Malformed line: %s" % line)
+
+      path, timestamp = line.rsplit(" ", 1)
+
+      if not os.path.isabs(path):
+        raise TypeError("'%s' is not an absolute path" % path)
+      elif not timestamp.isdigit():
+        raise TypeError("'%s' is not an integer timestamp" % timestamp)
+
+      processed_files[path] = int(timestamp)
+
+  return processed_files
+
+
+def save_processed_files(path, processed_files):
+  """
+  Persists a dictionary of 'path => last modified timestamp' mappings (as
+  provided by the DescriptorReader's
+  :func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
+  so that they can be loaded later and applied to another
+  :class:`~stem.descriptor.reader.DescriptorReader`.
+
+  :param str path: location to save the processed files dictionary to
+  :param dict processed_files: 'path => last modified' mappings
+
+  :raises:
+    * **IOError** if unable to write to the file
+    * **TypeError** if processed_files is of the wrong type
+  """
+
+  # makes the parent directory if it doesn't already exist
+  try:
+    path_dir = os.path.dirname(path)
+
+    if not os.path.exists(path_dir):
+      os.makedirs(path_dir)
+  except OSError as exc:
+    raise IOError(exc)
+
+  with open(path, "w") as output_file:
+    for path, timestamp in processed_files.items():
+      if not os.path.isabs(path):
+        raise TypeError("Only absolute paths are acceptable: %s" % path)
+
+      output_file.write("%s %i\n" % (path, timestamp))
+
+
+class DescriptorReader(object):
+  """
+  Iterator for the descriptor data on the local file system. This can process
+  text files, tarball archives (gzip or bzip2), or recurse directories.
+
+  By default this limits the number of descriptors that we'll read ahead before
+  waiting for our caller to fetch some of them. This is included to avoid
+  unbounded memory usage.
+
+  Our persistence_path argument is a convenient method to persist the listing
+  of files we have processed between runs, however it doesn't allow for error
+  handling. If you want that then use the
+  :func:`~stem.descriptor.reader.load_processed_files` and
+  :func:`~stem.descriptor.reader.save_processed_files` functions instead.
+
+  :param str,list target: path or list of paths for files or directories to be read from
+  :param bool validate: checks the validity of the descriptor's content if
+    **True**, skips these checks otherwise
+  :param bool follow_links: determines if we'll follow symlinks when traversing
+    directories (requires python 2.6)
+  :param int buffer_size: descriptors we'll buffer before waiting for some to
+    be read, this is unbounded if zero
+  :param str persistence_path: if set we will load and save processed file
+    listings from this path, errors are ignored
+  :param stem.descriptor.__init__.DocumentHandler document_handler: method in
+    which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+  :param dict kwargs: additional arguments for the descriptor constructor
+  """
+
+  def __init__(self, target, validate = True, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
+    if isinstance(target, (bytes, unicode)):
+      self._targets = [target]
+    else:
+      self._targets = target
+
+    # expand any relative paths we got
+
+    target = map(os.path.abspath, target)
+
+    self._validate = validate
+    self._follow_links = follow_links
+    self._persistence_path = persistence_path
+    self._document_handler = document_handler
+    self._kwargs = kwargs
+    self._read_listeners = []
+    self._skip_listeners = []
+    self._processed_files = {}
+
+    self._reader_thread = None
+    self._reader_thread_lock = threading.RLock()
+
+    self._iter_lock = threading.RLock()
+    self._iter_notice = threading.Event()
+
+    self._is_stopped = threading.Event()
+    self._is_stopped.set()
+
+    # Descriptors that we have read but not yet provided to the caller. A
+    # FINISHED entry is used by the reading thread to indicate the end.
+
+    self._unreturned_descriptors = Queue.Queue(buffer_size)
+
+    if self._persistence_path:
+      try:
+        processed_files = load_processed_files(self._persistence_path)
+        self.set_processed_files(processed_files)
+      except:
+        pass
+
+  def get_processed_files(self):
+    """
+    For each file that we have read descriptor data from this provides a
+    mapping of the form...
+
+    ::
+
+      absolute path (str) => last modified unix timestamp (int)
+
+    This includes entries set through the
+    :func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
+    method. Each run resets this to only the files that were present during
+    that run.
+
+    :returns: **dict** with the absolute paths and unix timestamp for the last
+      modified times of the files we have processed
+    """
+
+    # make sure that we only provide back absolute paths
+    return dict((os.path.abspath(k), v) for (k, v) in self._processed_files.items())
+
+  def set_processed_files(self, processed_files):
+    """
+    Sets the listing of the files we have processed. Most often this is used
+    with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
+    pre-populate the listing of descriptor files that we have seen.
+
+    :param dict processed_files: mapping of absolute paths (**str**) to unix
+      timestamps for the last modified time (**int**)
+    """
+
+    self._processed_files = dict(processed_files)
+
+  def register_read_listener(self, listener):
+    """
+    Registers a listener for when files are read. This is executed prior to
+    processing files. Listeners are expected to be of the form...
+
+    ::
+
+      my_listener(path)
+
+    :param functor listener: functor to be notified when files are read
+    """
+
+    self._read_listeners.append(listener)
+
+  def register_skip_listener(self, listener):
+    """
+    Registers a listener for files that are skipped. This listener is expected
+    to be a functor of the form...
+
+    ::
+
+      my_listener(path, exception)
+
+    :param functor listener: functor to be notified of files that are skipped
+      to read errors or because they couldn't be parsed as valid descriptor data
+    """
+
+    self._skip_listeners.append(listener)
+
+  def get_buffered_descriptor_count(self):
+    """
+    Provides the number of descriptors that are waiting to be iterated over.
+    This is limited to the buffer_size that we were constructed with.
+
+    :returns: **int** for the estimated number of currently enqueued
+      descriptors, this is not entirely reliable
+    """
+
+    return self._unreturned_descriptors.qsize()
+
+  def start(self):
+    """
+    Starts reading our descriptor files.
+
+    :raises: **ValueError** if we're already reading the descriptor files
+    """
+
+    with self._reader_thread_lock:
+      if self._reader_thread:
+        raise ValueError("Already running, you need to call stop() first")
+      else:
+        self._is_stopped.clear()
+        self._reader_thread = threading.Thread(target = self._read_descriptor_files, name="Descriptor Reader")
+        self._reader_thread.setDaemon(True)
+        self._reader_thread.start()
+
+  def stop(self):
+    """
+    Stops further reading of descriptor files.
+    """
+
+    with self._reader_thread_lock:
+      self._is_stopped.set()
+      self._iter_notice.set()
+
+      # clears our queue to unblock enqueue calls
+
+      try:
+        while True:
+          self._unreturned_descriptors.get_nowait()
+      except Queue.Empty:
+        pass
+
+      self._reader_thread.join()
+      self._reader_thread = None
+
+      if self._persistence_path:
+        try:
+          processed_files = self.get_processed_files()
+          save_processed_files(self._persistence_path, processed_files)
+        except:
+          pass
+
+  def _read_descriptor_files(self):
+    new_processed_files = {}
+    remaining_files = list(self._targets)
+
+    while remaining_files and not self._is_stopped.is_set():
+      target = remaining_files.pop(0)
+
+      if not os.path.exists(target):
+        self._notify_skip_listeners(target, FileMissing())
+        continue
+
+      if os.path.isdir(target):
+        walker = os.walk(target, followlinks = self._follow_links)
+        self._handle_walker(walker, new_processed_files)
+      else:
+        self._handle_file(target, new_processed_files)
+
+    self._processed_files = new_processed_files
+
+    if not self._is_stopped.is_set():
+      self._unreturned_descriptors.put(FINISHED)
+
+    self._iter_notice.set()
+
+  def __iter__(self):
+    with self._iter_lock:
+      while not self._is_stopped.is_set():
+        try:
+          descriptor = self._unreturned_descriptors.get_nowait()
+
+          if descriptor == FINISHED:
+            break
+          else:
+            yield descriptor
+        except Queue.Empty:
+          self._iter_notice.wait()
+          self._iter_notice.clear()
+
+  def _handle_walker(self, walker, new_processed_files):
+    for root, _, files in walker:
+      for filename in files:
+        self._handle_file(os.path.join(root, filename), new_processed_files)
+
+        # this can take a while if, say, we're including the root directory
+        if self._is_stopped.is_set():
+          return
+
+  def _handle_file(self, target, new_processed_files):
+    # This is a file. Register its last modified timestamp and check if
+    # it's a file that we should skip.
+
+    try:
+      last_modified = int(os.stat(target).st_mtime)
+      last_used = self._processed_files.get(target)
+      new_processed_files[target] = last_modified
+    except OSError as exc:
+      self._notify_skip_listeners(target, ReadFailed(exc))
+      return
+
+    if last_used and last_used >= last_modified:
+      self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
+      return
+
+    # Block devices and such are never descriptors, and can cause us to block
+    # for quite a while so skipping anything that isn't a regular file.
+
+    if not os.path.isfile(target):
+      return
+
+    # The mimetypes module only checks the file extension. To actually
+    # check the content (like the 'file' command) we'd need something like
+    # pymagic (https://github.com/cloudburst/pymagic).
+
+    target_type = mimetypes.guess_type(target)
+
+    # Checking if it's a tar file may fail due to permissions so failing back
+    # to the mime type...
+    #
+    #   IOError: [Errno 13] Permission denied: '/vmlinuz.old'
+    #
+    # With python 3 insuffient permissions raises an AttributeError instead...
+    #
+    #   http://bugs.python.org/issue17059
+
+    try:
+      is_tar = tarfile.is_tarfile(target)
+    except (IOError, AttributeError):
+      is_tar = target_type[0] == 'application/x-tar'
+
+    if target_type[0] in (None, 'text/plain'):
+      # either '.txt' or an unknown type
+      self._handle_descriptor_file(target, target_type)
+    elif is_tar:
+      # handles gzip, bz2, and decompressed tarballs among others
+      self._handle_archive(target)
+    else:
+      self._notify_skip_listeners(target, UnrecognizedType(target_type))
+
+  def _handle_descriptor_file(self, target, mime_type):
+    try:
+      self._notify_read_listeners(target)
+
+      with open(target, 'rb') as target_file:
+        for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
+          if self._is_stopped.is_set():
+            return
+
+          self._unreturned_descriptors.put(desc)
+          self._iter_notice.set()
+    except TypeError as exc:
+      self._notify_skip_listeners(target, UnrecognizedType(mime_type))
+    except ValueError as exc:
+      self._notify_skip_listeners(target, ParsingFailure(exc))
+    except IOError as exc:
+      self._notify_skip_listeners(target, ReadFailed(exc))
+
+  def _handle_archive(self, target):
+    # TODO: This would be nicer via the 'with' keyword, but tarfile's __exit__
+    # method was added sometime after python 2.5. We should change this when
+    # we drop python 2.5 support.
+
+    tar_file = None
+
+    try:
+      self._notify_read_listeners(target)
+      tar_file = tarfile.open(target)
+
+      for tar_entry in tar_file:
+        if tar_entry.isfile():
+          entry = tar_file.extractfile(tar_entry)
+
+          try:
+            for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
+              if self._is_stopped.is_set():
+                return
+
+              desc._set_path(os.path.abspath(target))
+              desc._set_archive_path(entry.name)
+              self._unreturned_descriptors.put(desc)
+              self._iter_notice.set()
+          except TypeError as exc:
+            self._notify_skip_listeners(target, ParsingFailure(exc))
+          except ValueError as exc:
+            self._notify_skip_listeners(target, ParsingFailure(exc))
+          finally:
+            entry.close()
+    except IOError as exc:
+      self._notify_skip_listeners(target, ReadFailed(exc))
+    finally:
+      if tar_file:
+        tar_file.close()
+
+  def _notify_read_listeners(self, path):
+    for listener in self._read_listeners:
+      listener(path)
+
+  def _notify_skip_listeners(self, path, exception):
+    for listener in self._skip_listeners:
+      listener(path, exception)
+
+  def __enter__(self):
+    self.start()
+    return self
+
+  def __exit__(self, exit_type, value, traceback):
+    self.stop()
--- a/lib/stem/descriptor/remote.py
+++ b/lib/stem/descriptor/remote.py
@ -0,0 +1,758 @@
+# Copyright 2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Module for remotely retrieving descriptors from directory authorities and
+mirrors. This is most easily done through the
+:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
+:class:`~stem.descriptor.remote.Query` instances to get you the descriptor
+content. For example...
+
+::
+
+  from stem.descriptor.remote import DescriptorDownloader
+
+  downloader = DescriptorDownloader(
+    use_mirrors = True,
+    timeout = 10,
+  )
+
+  query = downloader.get_server_descriptors()
+
+  print "Exit Relays:"
+
+  try:
+    for desc in query.run():
+      if desc.exit_policy.is_exiting_allowed():
+        print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
+    print
+    print "Query took %0.2f seconds" % query.runtime
+  except Exception as exc:
+    print "Unable to retrieve the server descriptors: %s" % exc
+
+If you don't care about errors then you can also simply iterate over the query
+itself...
+
+::
+
+  for desc in downloader.get_server_descriptors():
+    if desc.exit_policy.is_exiting_allowed():
+      print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
+::
+
+  get_authorities - Provides tor directory information.
+
+  DirectoryAuthority - Information about a tor directory authority.
+
+  Query - Asynchronous request to download tor descriptors
+    |- start - issues the query if it isn't already running
+    +- run - blocks until the request is finished and provides the results
+
+  DescriptorDownloader - Configurable class for issuing queries
+    |- use_directory_mirrors - use directory mirrors to download future descriptors
+    |- get_server_descriptors - provides present server descriptors
+    |- get_extrainfo_descriptors - provides present extrainfo descriptors
+    |- get_microdescriptors - provides present microdescriptors
+    |- get_consensus - provides the present consensus or router status entries
+    |- get_key_certificates - provides present authority key certificates
+    +- query - request an arbitrary descriptor resource
+
+.. data:: MAX_FINGERPRINTS
+
+  Maximum number of descriptors that can requested at a time by their
+  fingerprints.
+
+.. data:: MAX_MICRODESCRIPTOR_HASHES
+
+  Maximum number of microdescriptors that can requested at a time by their
+  hashes.
+"""
+
+import io
+import random
+import sys
+import threading
+import time
+import urllib2
+import zlib
+
+import stem.descriptor
+
+from stem import Flag
+from stem.util import log
+
+# Tor has a limited number of descriptors we can fetch explicitly by their
+# fingerprint or hashes due to a limit on the url length by squid proxies.
+
+MAX_FINGERPRINTS = 96
+MAX_MICRODESCRIPTOR_HASHES = 92
+
+# We commonly only want authorities that vote in the consensus, and hence have
+# a v3ident.
+
+HAS_V3IDENT = lambda auth: auth.v3ident is not None
+
+
+def _guess_descriptor_type(resource):
+  # Attempts to determine the descriptor type based on the resource url. This
+  # raises a ValueError if the resource isn't recognized.
+
+  if resource.startswith('/tor/server/'):
+    return 'server-descriptor 1.0'
+  elif resource.startswith('/tor/extra/'):
+    return 'extra-info 1.0'
+  elif resource.startswith('/tor/micro/'):
+    return 'microdescriptor 1.0'
+  elif resource.startswith('/tor/status-vote/'):
+    return 'network-status-consensus-3 1.0'
+  elif resource.startswith('/tor/keys/'):
+    return 'dir-key-certificate-3 1.0'
+  else:
+    raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
+
+
+class Query(object):
+  """
+  Asynchronous request for descriptor content from a directory authority or
+  mirror. These can either be made through the
+  :class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more
+  advanced usage.
+
+  To block on the response and get results either call
+  :func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
+  :func:`~stem.descriptor.remote.Query.run` method pass along any errors that
+  arise...
+
+  ::
+
+    from stem.descriptor.remote import Query
+
+    query = Query(
+      '/tor/server/all.z',
+      block = True,
+      timeout = 30,
+    )
+
+    print "Current relays:"
+
+    if not query.error:
+      for desc in query:
+        print desc.fingerprint
+    else:
+      print "Unable to retrieve the server descriptors: %s" % query.error
+
+  ... while iterating fails silently...
+
+  ::
+
+    print "Current relays:"
+
+    for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
+      print desc.fingerprint
+
+  In either case exceptions are available via our 'error' attribute.
+
+  Tor provides quite a few different descriptor resources via its directory
+  protocol (see section 4.2 and later of the `dir-spec
+  <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
+  Commonly useful ones include...
+
+  ===================================== ===========
+  Resource                              Description
+  ===================================== ===========
+  /tor/server/all.z                     all present server descriptors
+  /tor/server/fp/<fp1>+<fp2>+<fp3>.z    server descriptors with the given fingerprints
+  /tor/extra/all.z                      all present extrainfo descriptors
+  /tor/extra/fp/<fp1>+<fp2>+<fp3>.z     extrainfo descriptors with the given fingerprints
+  /tor/micro/d/<hash1>-<hash2>.z        microdescriptors with the given hashes
+  /tor/status-vote/current/consensus.z  present consensus
+  /tor/keys/all.z                       key certificates for the authorities
+  /tor/keys/fp/<v3ident1>+<v3ident2>.z  key certificates for specific authorities
+  ===================================== ===========
+
+  The '.z' suffix can be excluded to get a plaintext rather than compressed
+  response. Compression is handled transparently, so this shouldn't matter to
+  the caller.
+
+  :var str resource: resource being fetched, such as '/tor/server/all.z'
+  :var str descriptor_type: type of descriptors being fetched (for options see
+    :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
+    resource if **None**
+
+  :var list endpoints: (address, dirport) tuples of the authority or mirror
+    we're querying, this uses authorities if undefined
+  :var int retries: number of times to attempt the request if downloading it
+    fails
+  :var bool fall_back_to_authority: when retrying request issues the last
+    request to a directory authority if **True**
+
+  :var str content: downloaded descriptor content
+  :var Exception error: exception if a problem occured
+  :var bool is_done: flag that indicates if our request has finished
+  :var str download_url: last url used to download the descriptor, this is
+    unset until we've actually made a download attempt
+
+  :var float start_time: unix timestamp when we first started running
+  :var float timeout: duration before we'll time out our request
+  :var float runtime: time our query took, this is **None** if it's not yet
+    finished
+
+  :var bool validate: checks the validity of the descriptor's content if
+    **True**, skips these checks otherwise
+  :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+    which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+  :var dict kwargs: additional arguments for the descriptor constructor
+
+  :param bool start: start making the request when constructed (default is **True**)
+  :param bool block: only return after the request has been completed, this is
+    the same as running **query.run(True)** (default is **False**)
+  """
+
+  def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
+    if not resource.startswith('/'):
+      raise ValueError("Resources should start with a '/': %s" % resource)
+
+    self.resource = resource
+
+    if descriptor_type:
+      self.descriptor_type = descriptor_type
+    else:
+      self.descriptor_type = _guess_descriptor_type(resource)
+
+    self.endpoints = endpoints if endpoints else []
+    self.retries = retries
+    self.fall_back_to_authority = fall_back_to_authority
+
+    self.content = None
+    self.error = None
+    self.is_done = False
+    self.download_url = None
+
+    self.start_time = None
+    self.timeout = timeout
+    self.runtime = None
+
+    self.validate = validate
+    self.document_handler = document_handler
+    self.kwargs = kwargs
+
+    self._downloader_thread = None
+    self._downloader_thread_lock = threading.RLock()
+
+    if start:
+      self.start()
+
+    if block:
+      self.run(True)
+
+  def start(self):
+    """
+    Starts downloading the scriptors if we haven't started already.
+    """
+
+    with self._downloader_thread_lock:
+      if self._downloader_thread is None:
+        self._downloader_thread = threading.Thread(
+          name = "Descriptor Query",
+          target = self._download_descriptors,
+          args = (self.retries,)
+        )
+
+        self._downloader_thread.setDaemon(True)
+        self._downloader_thread.start()
+
+  def run(self, suppress = False):
+    """
+    Blocks until our request is complete then provides the descriptors. If we
+    haven't yet started our request then this does so.
+
+    :param bool suppress: avoids raising exceptions if **True**
+
+    :returns: list for the requested :class:`~stem.descriptor.__init__.Descriptor` instances
+
+    :raises:
+      Using the iterator can fail with the following if **suppress** is
+      **False**...
+
+        * **ValueError** if the descriptor contents is malformed
+        * **socket.timeout** if our request timed out
+        * **urllib2.URLError** for most request failures
+
+      Note that the urllib2 module may fail with other exception types, in
+      which case we'll pass it along.
+    """
+
+    return list(self._run(suppress))
+
+  def _run(self, suppress):
+    with self._downloader_thread_lock:
+      self.start()
+      self._downloader_thread.join()
+
+      if self.error:
+        if suppress:
+          return
+
+        raise self.error
+      else:
+        if self.content is None:
+          if suppress:
+            return
+
+          raise ValueError('BUG: _download_descriptors() finished without either results or an error')
+
+        try:
+          results = stem.descriptor.parse_file(
+            io.BytesIO(self.content),
+            self.descriptor_type,
+            validate = self.validate,
+            document_handler = self.document_handler,
+            **self.kwargs
+          )
+
+          for desc in results:
+            yield desc
+        except ValueError as exc:
+          self.error = exc  # encountered a parsing error
+
+          if suppress:
+            return
+
+          raise self.error
+
+  def __iter__(self):
+    for desc in self._run(True):
+      yield desc
+
+  def _pick_url(self, use_authority = False):
+    """
+    Provides a url that can be queried. If we have multiple endpoints then one
+    will be picked randomly.
+
+    :param bool use_authority: ignores our endpoints and uses a directory
+      authority instead
+
+    :returns: **str** for the url being queried by this request
+    """
+
+    if use_authority or not self.endpoints:
+      authority = random.choice(filter(HAS_V3IDENT, get_authorities().values()))
+      address, dirport = authority.address, authority.dir_port
+    else:
+      address, dirport = random.choice(self.endpoints)
+
+    return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/'))
+
+  def _download_descriptors(self, retries):
+    try:
+      use_authority = retries == 0 and self.fall_back_to_authority
+      self.download_url = self._pick_url(use_authority)
+
+      self.start_time = time.time()
+      response = urllib2.urlopen(self.download_url, timeout = self.timeout).read()
+
+      if self.download_url.endswith('.z'):
+        response = zlib.decompress(response)
+
+      self.content = response.strip()
+
+      self.runtime = time.time() - self.start_time
+      log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
+    except:
+      exc = sys.exc_info()[1]
+
+      if retries > 0:
+        log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
+        return self._download_descriptors(retries - 1)
+      else:
+        log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
+        self.error = exc
+    finally:
+      self.is_done = True
+
+
+class DescriptorDownloader(object):
+  """
+  Configurable class that issues :class:`~stem.descriptor.remote.Query`
+  instances on your behalf.
+
+  :param bool use_mirrors: downloads the present consensus and uses the directory
+    mirrors to fetch future requests, this fails silently if the consensus
+    cannot be downloaded
+  :param default_args: default arguments for the
+    :class:`~stem.descriptor.remote.Query` constructor
+  """
+
+  def __init__(self, use_mirrors = False, **default_args):
+    self._default_args = default_args
+
+    authorities = filter(HAS_V3IDENT, get_authorities().values())
+    self._endpoints = [(auth.address, auth.dir_port) for auth in authorities]
+
+    if use_mirrors:
+      try:
+        start_time = time.time()
+        self.use_directory_mirrors()
+        log.debug("Retrieved directory mirrors (took %0.2fs)" % (time.time() - start_time))
+      except Exception as exc:
+        log.debug("Unable to retrieve directory mirrors: %s" % exc)
+
+  def use_directory_mirrors(self):
+    """
+    Downloads the present consensus and configures ourselves to use directory
+    mirrors, in addition to authorities.
+
+    :returns: :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
+      from which we got the directory mirrors
+
+    :raises: **Exception** if unable to determine the directory mirrors
+    """
+
+    authorities = filter(HAS_V3IDENT, get_authorities().values())
+    new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities])
+
+    consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0]
+
+    for desc in consensus.routers.values():
+      if Flag.V2DIR in desc.flags:
+        new_endpoints.add((desc.address, desc.dir_port))
+
+    # we need our endpoints to be a list rather than set for random.choice()
+
+    self._endpoints = list(new_endpoints)
+
+    return consensus
+
+  def get_server_descriptors(self, fingerprints = None, **query_args):
+    """
+    Provides the server descriptors with the given fingerprints. If no
+    fingerprints are provided then this returns all descriptors in the present
+    consensus.
+
+    :param str,list fingerprints: fingerprint or list of fingerprints to be
+      retrieved, gets all descriptors if **None**
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
+
+    :raises: **ValueError** if we request more than 96 descriptors by their
+      fingerprints (this is due to a limit on the url length by squid proxies).
+    """
+
+    resource = '/tor/server/all.z'
+
+    if isinstance(fingerprints, str):
+      fingerprints = [fingerprints]
+
+    if fingerprints:
+      if len(fingerprints) > MAX_FINGERPRINTS:
+        raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
+
+      resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
+
+    return self.query(resource, **query_args)
+
+  def get_extrainfo_descriptors(self, fingerprints = None, **query_args):
+    """
+    Provides the extrainfo descriptors with the given fingerprints. If no
+    fingerprints are provided then this returns all descriptors in the present
+    consensus.
+
+    :param str,list fingerprints: fingerprint or list of fingerprints to be
+      retrieved, gets all descriptors if **None**
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors
+
+    :raises: **ValueError** if we request more than 96 descriptors by their
+      fingerprints (this is due to a limit on the url length by squid proxies).
+    """
+
+    resource = '/tor/extra/all.z'
+
+    if isinstance(fingerprints, str):
+      fingerprints = [fingerprints]
+
+    if fingerprints:
+      if len(fingerprints) > MAX_FINGERPRINTS:
+        raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
+
+      resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
+
+    return self.query(resource, **query_args)
+
+  def get_microdescriptors(self, hashes, **query_args):
+    """
+    Provides the microdescriptors with the given hashes. To get these see the
+    'microdescriptor_hashes' attribute of
+    :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`. Note
+    that these are only provided via a microdescriptor consensus (such as
+    'cached-microdesc-consensus' in your data directory).
+
+    :param str,list hashes: microdescriptor hash or list of hashes to be
+      retrieved
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors
+
+    :raises: **ValueError** if we request more than 92 microdescriptors by their
+      hashes (this is due to a limit on the url length by squid proxies).
+    """
+
+    if isinstance(hashes, str):
+      hashes = [hashes]
+
+    if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
+      raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_HASHES)
+
+    return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
+
+  def get_consensus(self, authority_v3ident = None, **query_args):
+    """
+    Provides the present router status entries.
+
+    :param str authority_v3ident: fingerprint of the authority key for which
+      to get the consensus, see `'v3ident' in tor's config.c
+      <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
+      for the values.
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the router status
+      entries
+    """
+
+    resource = '/tor/status-vote/current/consensus'
+
+    if authority_v3ident:
+      resource += '/%s' % authority_v3ident
+
+    return self.query(resource + '.z', **query_args)
+
+  def get_vote(self, authority, **query_args):
+    """
+    Provides the present vote for a given directory authority.
+
+    :param stem.descriptor.remote.DirectoryAuthority authority: authority for which to retrieve a vote for
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the router status
+      entries
+    """
+
+    resource = '/tor/status-vote/current/authority'
+
+    if not 'endpoint' in query_args:
+      query_args['endpoints'] = [(authority.address, authority.dir_port)]
+
+    return self.query(resource + '.z', **query_args)
+
+  def get_key_certificates(self, authority_v3idents = None, **query_args):
+    """
+    Provides the key certificates for authorities with the given fingerprints.
+    If no fingerprints are provided then this returns all present key
+    certificates.
+
+    :param str authority_v3idents: fingerprint or list of fingerprints of the
+      authority keys, see `'v3ident' in tor's config.c
+      <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
+      for the values.
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the key certificates
+
+    :raises: **ValueError** if we request more than 96 key certificates by
+      their identity fingerprints (this is due to a limit on the url length by
+      squid proxies).
+    """
+
+    resource = '/tor/keys/all.z'
+
+    if isinstance(authority_v3idents, str):
+      authority_v3idents = [authority_v3idents]
+
+    if authority_v3idents:
+      if len(authority_v3idents) > MAX_FINGERPRINTS:
+        raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_FINGERPRINTS)
+
+      resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
+
+    return self.query(resource, **query_args)
+
+  def query(self, resource, **query_args):
+    """
+    Issues a request for the given resource.
+
+    :param str resource: resource being fetched, such as '/tor/server/all.z'
+    :param query_args: additional arguments for the
+      :class:`~stem.descriptor.remote.Query` constructor
+
+    :returns: :class:`~stem.descriptor.remote.Query` for the descriptors
+
+    :raises: **ValueError** if resource is clearly invalid or the descriptor
+      type can't be determined when 'descriptor_type' is **None**
+    """
+
+    args = dict(self._default_args)
+    args.update(query_args)
+
+    if not 'endpoints' in args:
+      args['endpoints'] = self._endpoints
+
+    if not 'fall_back_to_authority' in args:
+      args['fall_back_to_authority'] = True
+
+    return Query(
+      resource,
+      **args
+    )
+
+
+class DirectoryAuthority(object):
+  """
+  Tor directory authority, a special type of relay `hardcoded into tor
+  <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
+  that enumerates the other relays within the network.
+
+  At a very high level tor works as follows...
+
+  1. A volunteer starts up a new tor relay, during which it sends a `server
+     descriptor <server_descriptor.html>`_ to each of the directory
+     authorities.
+
+  2. Each hour the directory authorities make a `vote <networkstatus.html>`_
+     that says who they think the active relays are in the network and some
+     attributes about them.
+
+  3. The directory authorities send each other their votes, and compile that
+     into the `consensus <networkstatus.html>`_. This document is very similar
+     to the votes, the only difference being that the majority of the
+     authorities agree upon and sign this document. The idividual relay entries
+     in the vote or consensus is called `router status entries
+     <router_status_entry.html>`_.
+
+  4. Tor clients (people using the service) download the consensus from one of
+     the authorities or a mirror to determine the active relays within the
+     network. They in turn use this to construct their circuits and use the
+     network.
+
+  :var str nickname: nickname of the authority
+  :var str address: IP address of the authority, presently they're all IPv4 but
+    this may not always be the case
+  :var int or_port: port on which the relay services relay traffic
+  :var int dir_port: port on which directory information is available
+  :var str fingerprint: relay fingerprint
+  :var str v3ident: identity key fingerprint used to sign votes and consensus
+  """
+
+  def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, fingerprint = None, v3ident = None):
+    self.nickname = nickname
+    self.address = address
+    self.or_port = or_port
+    self.dir_port = dir_port
+    self.fingerprint = fingerprint
+    self.v3ident = v3ident
+
+
+DIRECTORY_AUTHORITIES = {
+  'moria1': DirectoryAuthority(
+    nickname = 'moria1',
+    address = '128.31.0.39',
+    or_port = 9101,
+    dir_port = 9131,
+    fingerprint = '9695DFC35FFEB861329B9F1AB04C46397020CE31',
+    v3ident = 'D586D18309DED4CD6D57C18FDB97EFA96D330566',
+  ),
+  'tor26': DirectoryAuthority(
+    nickname = 'tor26',
+    address = '86.59.21.38',
+    or_port = 443,
+    dir_port = 80,
+    fingerprint = '847B1F850344D7876491A54892F904934E4EB85D',
+    v3ident = '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4',
+  ),
+  'dizum': DirectoryAuthority(
+    nickname = 'dizum',
+    address = '194.109.206.212',
+    or_port = 443,
+    dir_port = 80,
+    fingerprint = '7EA6EAD6FD83083C538F44038BBFA077587DD755',
+    v3ident = 'E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58',
+  ),
+  'Tonga': DirectoryAuthority(
+    nickname = 'Tonga',
+    address = '82.94.251.203',
+    or_port = 443,
+    dir_port = 80,
+    fingerprint = '4A0CCD2DDC7995083D73F5D667100C8A5831F16D',
+    v3ident = None,  # does not vote in the consensus
+  ),
+  'turtles': DirectoryAuthority(
+    nickname = 'turtles',
+    address = '76.73.17.194',
+    or_port = 9090,
+    dir_port = 9030,
+    fingerprint = 'F397038ADC51336135E7B80BD99CA3844360292B',
+    v3ident = '27B6B5996C426270A5C95488AA5BCEB6BCC86956',
+  ),
+  'gabelmoo': DirectoryAuthority(
+    nickname = 'gabelmoo',
+    address = '212.112.245.170',
+    or_port = 443,
+    dir_port = 80,
+    fingerprint = 'F2044413DAC2E02E3D6BCF4735A19BCA1DE97281',
+    v3ident = 'ED03BB616EB2F60BEC80151114BB25CEF515B226',
+  ),
+  'dannenberg': DirectoryAuthority(
+    nickname = 'dannenberg',
+    address = '193.23.244.244',
+    or_port = 443,
+    dir_port = 80,
+    fingerprint = '7BE683E65D48141321C5ED92F075C55364AC7123',
+    v3ident = '585769C78764D58426B8B52B6651A5A71137189A',
+  ),
+  'urras': DirectoryAuthority(
+    nickname = 'urras',
+    address = '208.83.223.34',
+    or_port = 80,
+    dir_port = 443,
+    fingerprint = '0AD3FA884D18F89EEA2D89C019379E0E7FD94417',
+    v3ident = '80550987E1D626E3EBA5E5E75A458DE0626D088C',
+  ),
+  'maatuska': DirectoryAuthority(
+    nickname = 'maatuska',
+    address = '171.25.193.9',
+    or_port = 80,
+    dir_port = 443,
+    fingerprint = 'BD6A829255CB08E66FBE7D3748363586E46B3810',
+    v3ident = '49015F787433103580E3B66A1707A00E60F2D15B',
+  ),
+  'Faravahar': DirectoryAuthority(
+    nickname = 'Faravahar',
+    address = '154.35.32.5',
+    or_port = 443,
+    dir_port = 80,
+    fingerprint = 'CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC',
+    v3ident = 'EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97',
+  ),
+}
+
+
+def get_authorities():
+  """
+  Provides the Tor directory authority information as of **Tor commit 00bcc25
+  (8/27/13)**. The directory information hardcoded into Tor and occasionally
+  changes, so the information this provides might not necessarily match your
+  version of tor.
+
+  :returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances
+  """
+
+  return dict(DIRECTORY_AUTHORITIES)
--- a/lib/stem/descriptor/router_status_entry.py
+++ b/lib/stem/descriptor/router_status_entry.py
@ -0,0 +1,749 @@
+# Copyright 2012-2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Parsing for router status entries, the information for individual routers
+within a network status document. This information is provided from a few
+sources...
+
+* control port via 'GETINFO ns/\*' and 'GETINFO md/\*' queries
+* router entries in a network status document, like the cached-consensus
+
+**Module Overview:**
+
+::
+
+  RouterStatusEntry - Common parent for router status entries
+    |- RouterStatusEntryV2 - Entry for a network status v2 document
+    |- RouterStatusEntryV3 - Entry for a network status v3 document
+    +- RouterStatusEntryMicroV3 - Entry for a microdescriptor flavored v3 document
+"""
+
+import base64
+import binascii
+import datetime
+
+import stem.exit_policy
+import stem.util.str_tools
+
+from stem.descriptor import (
+  KEYWORD_LINE,
+  Descriptor,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
+
+def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
+  """
+  Reads a range of the document_file containing some number of entry_class
+  instances. We deliminate the entry_class entries by the keyword on their
+  first line (entry_keyword). When finished the document is left at the
+  end_position.
+
+  Either an end_position or section_end_keywords must be provided.
+
+  :param file document_file: file with network status document content
+  :param bool validate: checks the validity of the document's contents if
+    **True**, skips these checks otherwise
+  :param class entry_class: class to construct instance for
+  :param str entry_keyword: first keyword for the entry instances
+  :param int start_position: start of the section, default is the current position
+  :param int end_position: end of the section
+  :param tuple section_end_keywords: keyword(s) that deliminate the end of the
+    section if no end_position was provided
+  :param tuple extra_args: extra arguments for the entry_class (after the
+    content and validate flag)
+
+  :returns: iterator over entry_class instances
+
+  :raises:
+    * **ValueError** if the contents is malformed and validate is **True**
+    * **IOError** if the file can't be read
+  """
+
+  if start_position:
+    document_file.seek(start_position)
+  else:
+    start_position = document_file.tell()
+
+  # check if we're starting at the end of the section (ie, there's no entries to read)
+  if section_end_keywords:
+    first_keyword = None
+    line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
+
+    if line_match:
+      first_keyword = line_match.groups()[0]
+
+    document_file.seek(start_position)
+
+    if first_keyword in section_end_keywords:
+      return
+
+  while end_position is None or document_file.tell() < end_position:
+    desc_lines, ending_keyword = _read_until_keywords(
+      (entry_keyword,) + section_end_keywords,
+      document_file,
+      ignore_first = True,
+      end_position = end_position,
+      include_ending_keyword = True
+    )
+
+    desc_content = bytes.join(b"", desc_lines)
+
+    if desc_content:
+      yield entry_class(desc_content, validate, *extra_args)
+
+      # check if we stopped at the end of the section
+      if ending_keyword in section_end_keywords:
+        break
+    else:
+      break
+
+
+class RouterStatusEntry(Descriptor):
+  """
+  Information about an individual router stored within a network status
+  document. This is the common parent for concrete status entry types.
+
+  :var stem.descriptor.networkstatus.NetworkStatusDocument document: **\*** document that this descriptor came from
+
+  :var str nickname: **\*** router's nickname
+  :var str fingerprint: **\*** router's fingerprint
+  :var datetime published: **\*** router's publication
+  :var str address: **\*** router's IP address
+  :var int or_port: **\*** router's ORPort
+  :var int dir_port: **\*** router's DirPort
+
+  :var list flags: **\*** list of :data:`~stem.Flag` associated with the relay
+
+  :var stem.version.Version version: parsed version of tor, this is **None** if
+    the relay's using a new versioning scheme
+  :var str version_line: versioning information reported by the relay
+  """
+
+  def __init__(self, content, validate, document):
+    """
+    Parse a router descriptor in a network status document.
+
+    :param str content: router descriptor content to be parsed
+    :param NetworkStatusDocument document: document this descriptor came from
+    :param bool validate: checks the validity of the content if **True**, skips
+      these checks otherwise
+
+    :raises: **ValueError** if the descriptor data is invalid
+    """
+
+    super(RouterStatusEntry, self).__init__(content)
+    content = stem.util.str_tools._to_unicode(content)
+
+    self.document = document
+
+    self.nickname = None
+    self.fingerprint = None
+    self.published = None
+    self.address = None
+    self.or_port = None
+    self.dir_port = None
+
+    self.flags = None
+
+    self.version_line = None
+    self.version = None
+
+    self._unrecognized_lines = []
+
+    entries = _get_descriptor_components(content, validate)
+
+    if validate:
+      self._check_constraints(entries)
+
+    self._parse(entries, validate)
+
+  def _parse(self, entries, validate):
+    """
+    Parses the given content and applies the attributes.
+
+    :param dict entries: keyword => (value, pgp key) entries
+    :param bool validate: checks validity if **True**
+
+    :raises: **ValueError** if a validity check fails
+    """
+
+    for keyword, values in entries.items():
+      value, _ = values[0]
+
+      if keyword == 's':
+        _parse_s_line(self, value, validate)
+      elif keyword == 'v':
+        _parse_v_line(self, value, validate)
+      else:
+        self._unrecognized_lines.append("%s %s" % (keyword, value))
+
+  def _check_constraints(self, entries):
+    """
+    Does a basic check that the entries conform to this descriptor type's
+    constraints.
+
+    :param dict entries: keyword => (value, pgp key) entries
+
+    :raises: **ValueError** if an issue arises in validation
+    """
+
+    for keyword in self._required_fields():
+      if not keyword in entries:
+        raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
+
+    for keyword in self._single_fields():
+      if keyword in entries and len(entries[keyword]) > 1:
+        raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
+
+    if 'r' != entries.keys()[0]:
+      raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
+
+  def _name(self, is_plural = False):
+    """
+    Name for this descriptor type.
+    """
+
+    if is_plural:
+      return "Router status entries"
+    else:
+      return "Router status entry"
+
+  def _required_fields(self):
+    """
+    Provides lines that must appear in the descriptor.
+    """
+
+    return ()
+
+  def _single_fields(self):
+    """
+    Provides lines that can only appear in the descriptor once.
+    """
+
+    return ()
+
+  def get_unrecognized_lines(self):
+    """
+    Provides any unrecognized lines.
+
+    :returns: list of unrecognized lines
+    """
+
+    return list(self._unrecognized_lines)
+
+  def _compare(self, other, method):
+    if not isinstance(other, RouterStatusEntry):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
+
+
+class RouterStatusEntryV2(RouterStatusEntry):
+  """
+  Information about an individual router stored within a version 2 network
+  status document.
+
+  :var str digest: **\*** router's upper-case hex digest
+
+  **\*** attribute is either required when we're parsed with validation or has
+  a default value, others are left as **None** if undefined
+  """
+
+  def __init__(self, content, validate = True, document = None):
+    self.digest = None
+    super(RouterStatusEntryV2, self).__init__(content, validate, document)
+
+  def _parse(self, entries, validate):
+    for keyword, values in entries.items():
+      value, _ = values[0]
+
+      if keyword == 'r':
+        _parse_r_line(self, value, validate, True)
+        del entries['r']
+
+    RouterStatusEntry._parse(self, entries, validate)
+
+  def _name(self, is_plural = False):
+    if is_plural:
+      return "Router status entries (v2)"
+    else:
+      return "Router status entry (v2)"
+
+  def _required_fields(self):
+    return ('r')
+
+  def _single_fields(self):
+    return ('r', 's', 'v')
+
+  def _compare(self, other, method):
+    if not isinstance(other, RouterStatusEntryV2):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
+
+
+class RouterStatusEntryV3(RouterStatusEntry):
+  """
+  Information about an individual router stored within a version 3 network
+  status document.
+
+  :var list or_addresses: **\*** relay's OR addresses, this is a tuple listing
+    of the form (address (**str**), port (**int**), is_ipv6 (**bool**))
+  :var str digest: **\*** router's upper-case hex digest
+
+  :var int bandwidth: bandwidth claimed by the relay (in kb/s)
+  :var int measured: bandwidth measured to be available by the relay
+  :var bool is_unmeasured: bandwidth measurement isn't based on three or more
+    measurements
+  :var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
+    information that isn't yet recognized
+
+  :var stem.exit_policy.MicroExitPolicy exit_policy: router's exit policy
+
+  :var list microdescriptor_hashes: **\*** tuples of two values, the list of
+    consensus methods for generating a set of digests and the 'algorithm =>
+    digest' mappings
+
+  **\*** attribute is either required when we're parsed with validation or has
+  a default value, others are left as **None** if undefined
+  """
+
+  def __init__(self, content, validate = True, document = None):
+    self.or_addresses = []
+    self.digest = None
+
+    self.bandwidth = None
+    self.measured = None
+    self.is_unmeasured = False
+    self.unrecognized_bandwidth_entries = []
+
+    self.exit_policy = None
+    self.microdescriptor_hashes = []
+
+    super(RouterStatusEntryV3, self).__init__(content, validate, document)
+
+  def _parse(self, entries, validate):
+    for keyword, values in entries.items():
+      value, _ = values[0]
+
+      if keyword == 'r':
+        _parse_r_line(self, value, validate, True)
+        del entries['r']
+      elif keyword == 'a':
+        for entry, _ in values:
+          _parse_a_line(self, entry, validate)
+
+        del entries['a']
+      elif keyword == 'w':
+        _parse_w_line(self, value, validate)
+        del entries['w']
+      elif keyword == 'p':
+        _parse_p_line(self, value, validate)
+        del entries['p']
+      elif keyword == 'm':
+        for entry, _ in values:
+          _parse_m_line(self, entry, validate)
+
+        del entries['m']
+
+    RouterStatusEntry._parse(self, entries, validate)
+
+  def _name(self, is_plural = False):
+    if is_plural:
+      return "Router status entries (v3)"
+    else:
+      return "Router status entry (v3)"
+
+  def _required_fields(self):
+    return ('r', 's')
+
+  def _single_fields(self):
+    return ('r', 's', 'v', 'w', 'p')
+
+  def _compare(self, other, method):
+    if not isinstance(other, RouterStatusEntryV3):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
+
+
+class RouterStatusEntryMicroV3(RouterStatusEntry):
+  """
+  Information about an individual router stored within a microdescriptor
+  flavored network status document.
+
+  :var int bandwidth: bandwidth claimed by the relay (in kb/s)
+  :var int measured: bandwidth measured to be available by the relay
+  :var bool is_unmeasured: bandwidth measurement isn't based on three or more
+    measurements
+  :var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
+    information that isn't yet recognized
+
+  :var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor
+
+  **\*** attribute is either required when we're parsed with validation or has
+  a default value, others are left as **None** if undefined
+  """
+
+  def __init__(self, content, validate = True, document = None):
+    self.bandwidth = None
+    self.measured = None
+    self.is_unmeasured = False
+    self.unrecognized_bandwidth_entries = []
+
+    self.digest = None
+
+    super(RouterStatusEntryMicroV3, self).__init__(content, validate, document)
+
+  def _parse(self, entries, validate):
+    for keyword, values in entries.items():
+      value, _ = values[0]
+
+      if keyword == 'r':
+        _parse_r_line(self, value, validate, False)
+        del entries['r']
+      elif keyword == 'w':
+        _parse_w_line(self, value, validate)
+        del entries['w']
+      elif keyword == 'm':
+        # "m" digest
+        # example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
+
+        self.digest = _base64_to_hex(value, validate, False)
+        del entries['m']
+
+    RouterStatusEntry._parse(self, entries, validate)
+
+  def _name(self, is_plural = False):
+    if is_plural:
+      return "Router status entries (micro v3)"
+    else:
+      return "Router status entry (micro v3)"
+
+  def _required_fields(self):
+    return ('r', 's', 'm')
+
+  def _single_fields(self):
+    return ('r', 's', 'v', 'w', 'm')
+
+  def _compare(self, other, method):
+    if not isinstance(other, RouterStatusEntryMicroV3):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
+
+
+def _parse_r_line(desc, value, validate, include_digest = True):
+  # Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
+  # all current entry types (v2, v3, and microdescriptor v3) with one little
+  # wrinkle: only the microdescriptor flavor excludes a 'digest' field.
+  #
+  # For v2 and v3 router status entries:
+  #   "r" nickname identity digest publication IP ORPort DirPort
+  #   example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
+  #
+  # For v3 microdescriptor router status entries:
+  #   "r" nickname identity publication IP ORPort DirPort
+  #   example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
+
+  r_comp = value.split(" ")
+
+  # inject a None for the digest to normalize the field positioning
+  if not include_digest:
+    r_comp.insert(2, None)
+
+  if len(r_comp) < 8:
+    if not validate:
+      return
+
+    expected_field_count = 'eight' if include_digest else 'seven'
+    raise ValueError("%s 'r' line must have %s values: r %s" % (desc._name(), expected_field_count, value))
+
+  if validate:
+    if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
+      raise ValueError("%s nickname isn't valid: %s" % (desc._name(), r_comp[0]))
+    elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
+      raise ValueError("%s address isn't a valid IPv4 address: %s" % (desc._name(), r_comp[5]))
+    elif not stem.util.connection.is_valid_port(r_comp[6]):
+      raise ValueError("%s ORPort is invalid: %s" % (desc._name(), r_comp[6]))
+    elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
+      raise ValueError("%s DirPort is invalid: %s" % (desc._name(), r_comp[7]))
+  elif not (r_comp[6].isdigit() and r_comp[7].isdigit()):
+    return
+
+  desc.nickname = r_comp[0]
+  desc.fingerprint = _base64_to_hex(r_comp[1], validate)
+
+  if include_digest:
+    desc.digest = _base64_to_hex(r_comp[2], validate)
+
+  desc.address = r_comp[5]
+  desc.or_port = int(r_comp[6])
+  desc.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
+
+  try:
+    published = "%s %s" % (r_comp[3], r_comp[4])
+    desc.published = datetime.datetime.strptime(published, "%Y-%m-%d %H:%M:%S")
+  except ValueError:
+    if validate:
+      raise ValueError("Publication time time wasn't parsable: r %s" % value)
+
+
+def _parse_a_line(desc, value, validate):
+  # "a" SP address ":" portlist
+  # example: a [2001:888:2133:0:82:94:251:204]:9001
+
+  if not ':' in value:
+    if not validate:
+      return
+
+    raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (desc._name(), value))
+
+  address, port = value.rsplit(':', 1)
+  is_ipv6 = address.startswith("[") and address.endswith("]")
+
+  if is_ipv6:
+    address = address[1:-1]  # remove brackets
+
+  if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
+          (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
+    if not validate:
+      return
+    else:
+      raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (desc._name(), value))
+
+  if stem.util.connection.is_valid_port(port):
+    desc.or_addresses.append((address, int(port), is_ipv6))
+  elif validate:
+    raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (desc._name(), port, value))
+
+
+def _parse_s_line(desc, value, validate):
+  # "s" Flags
+  # example: s Named Running Stable Valid
+
+  flags = [] if value == "" else value.split(" ")
+  desc.flags = flags
+
+  if validate:
+    for flag in flags:
+      if flags.count(flag) > 1:
+        raise ValueError("%s had duplicate flags: s %s" % (desc._name(), value))
+      elif flag == "":
+        raise ValueError("%s had extra whitespace on its 's' line: s %s" % (desc._name(), value))
+
+
+def _parse_v_line(desc, value, validate):
+  # "v" version
+  # example: v Tor 0.2.2.35
+  #
+  # The spec says that if this starts with "Tor " then what follows is a
+  # tor version. If not then it has "upgraded to a more sophisticated
+  # protocol versioning system".
+
+  desc.version_line = value
+
+  if value.startswith("Tor "):
+    try:
+      desc.version = stem.version._get_version(value[4:])
+    except ValueError as exc:
+      if validate:
+        raise ValueError("%s has a malformed tor version (%s): v %s" % (desc._name(), exc, value))
+
+
+def _parse_w_line(desc, value, validate):
+  # "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
+  # example: w Bandwidth=7980
+
+  w_comp = value.split(" ")
+
+  if len(w_comp) < 1:
+    if not validate:
+      return
+
+    raise ValueError("%s 'w' line is blank: w %s" % (desc._name(), value))
+  elif not w_comp[0].startswith("Bandwidth="):
+    if not validate:
+      return
+
+    raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (desc._name(), value))
+
+  for w_entry in w_comp:
+    if '=' in w_entry:
+      w_key, w_value = w_entry.split('=', 1)
+    else:
+      w_key, w_value = w_entry, None
+
+    if w_key == "Bandwidth":
+      if not (w_value and w_value.isdigit()):
+        if not validate:
+          return
+
+        raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (desc._name(), value))
+
+      desc.bandwidth = int(w_value)
+    elif w_key == "Measured":
+      if not (w_value and w_value.isdigit()):
+        if not validate:
+          return
+
+        raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (desc._name(), value))
+
+      desc.measured = int(w_value)
+    elif w_key == "Unmeasured":
+      if validate and w_value != "1":
+        raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (desc._name(), value))
+
+      desc.is_unmeasured = True
+    else:
+      desc.unrecognized_bandwidth_entries.append(w_entry)
+
+
+def _parse_p_line(desc, value, validate):
+  # "p" ("accept" / "reject") PortList
+  # p reject 1-65535
+  # example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
+
+  try:
+    desc.exit_policy = stem.exit_policy.MicroExitPolicy(value)
+  except ValueError as exc:
+    if not validate:
+      return
+
+    raise ValueError("%s exit policy is malformed (%s): p %s" % (desc._name(), exc, value))
+
+
+def _parse_m_line(desc, value, validate):
+  # "m" methods 1*(algorithm "=" digest)
+  # example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
+
+  m_comp = value.split(" ")
+
+  if not (desc.document and desc.document.is_vote):
+    if not validate:
+      return
+
+    vote_status = "vote" if desc.document else "<undefined document>"
+    raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (desc._name(), vote_status, value))
+  elif len(m_comp) < 1:
+    if not validate:
+      return
+
+    raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (desc._name(), value))
+
+  try:
+    methods = [int(entry) for entry in m_comp[0].split(",")]
+  except ValueError:
+    if not validate:
+      return
+
+    raise ValueError("%s microdescriptor methods should be a series of comma separated integers: m %s" % (desc._name(), value))
+
+  hashes = {}
+
+  for entry in m_comp[1:]:
+    if not '=' in entry:
+      if not validate:
+        continue
+
+      raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (desc._name(), value))
+
+    hash_name, digest = entry.split('=', 1)
+    hashes[hash_name] = digest
+
+  desc.microdescriptor_hashes.append((methods, hashes))
+
+
+def _base64_to_hex(identity, validate, check_if_fingerprint = True):
+  """
+  Decodes a base64 value to hex. For example...
+
+  ::
+
+    >>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
+    'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
+
+  :param str identity: encoded fingerprint from the consensus
+  :param bool validate: checks validity if **True**
+  :param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
+
+  :returns: **str** with the uppercase hex encoding of the relay's fingerprint
+
+  :raises: **ValueError** if the result isn't a valid fingerprint
+  """
+
+  # trailing equal signs were stripped from the identity
+  missing_padding = len(identity) % 4
+  identity += "=" * missing_padding
+
+  fingerprint = ""
+
+  try:
+    identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
+  except (TypeError, binascii.Error):
+    if not validate:
+      return None
+
+    raise ValueError("Unable to decode identity string '%s'" % identity)
+
+  for char in identity_decoded:
+    # Individual characters are either standard ASCII or hex encoded, and each
+    # represent two hex digits. For instance...
+    #
+    # >>> ord('\n')
+    # 10
+    # >>> hex(10)
+    # '0xa'
+    # >>> '0xa'[2:].zfill(2).upper()
+    # '0A'
+
+    char_int = char if isinstance(char, int) else ord(char)
+    fingerprint += hex(char_int)[2:].zfill(2).upper()
+
+  if check_if_fingerprint:
+    if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
+      if not validate:
+        return None
+
+      raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
+
+  return fingerprint
--- a/lib/stem/descriptor/server_descriptor.py
+++ b/lib/stem/descriptor/server_descriptor.py
@ -0,0 +1,968 @@
+# Copyright 2012-2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Parsing for Tor server descriptors, which contains the infrequently changing
+information about a Tor relay (contact information, exit policy, public keys,
+etc). This information is provided from a few sources...
+
+* control port via 'GETINFO desc/\*' queries
+* the 'cached-descriptors' file in tor's data directory
+* tor metrics, at https://metrics.torproject.org/data.html
+* directory authorities and mirrors via their DirPort
+
+**Module Overview:**
+
+::
+
+  ServerDescriptor - Tor server descriptor.
+    |- RelayDescriptor - Server descriptor for a relay.
+    |
+    |- BridgeDescriptor - Scrubbed server descriptor for a bridge.
+    |  |- is_scrubbed - checks if our content has been properly scrubbed
+    |  +- get_scrubbing_issues - description of issues with our scrubbing
+    |
+    |- digest - calculates the upper-case hex digest value for our content
+    |- get_unrecognized_lines - lines with unrecognized content
+    |- get_annotations - dictionary of content prior to the descriptor entry
+    +- get_annotation_lines - lines that provided the annotations
+"""
+
+import base64
+import codecs
+import datetime
+import hashlib
+import re
+
+import stem.descriptor.extrainfo_descriptor
+import stem.exit_policy
+import stem.prereq
+import stem.util.connection
+import stem.util.str_tools
+import stem.util.tor_tools
+import stem.version
+
+from stem.util import log
+
+from stem.descriptor import (
+  PGP_BLOCK_END,
+  Descriptor,
+  _get_bytes_field,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
+try:
+  # added in python 3.2
+  from functools import lru_cache
+except ImportError:
+  from stem.util.lru_cache import lru_cache
+
+# relay descriptors must have exactly one of the following
+REQUIRED_FIELDS = (
+  "router",
+  "bandwidth",
+  "published",
+  "onion-key",
+  "signing-key",
+  "router-signature",
+)
+
+# optional entries that can appear at most once
+SINGLE_FIELDS = (
+  "platform",
+  "fingerprint",
+  "hibernating",
+  "uptime",
+  "contact",
+  "read-history",
+  "write-history",
+  "eventdns",
+  "family",
+  "caches-extra-info",
+  "extra-info-digest",
+  "hidden-service-dir",
+  "protocols",
+  "allow-single-hop-exits",
+  "ntor-onion-key",
+)
+
+DEFAULT_IPV6_EXIT_POLICY = stem.exit_policy.MicroExitPolicy("reject 1-65535")
+REJECT_ALL_POLICY = stem.exit_policy.ExitPolicy("reject *:*")
+
+
+def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
+  """
+  Iterates over the server descriptors in a file.
+
+  :param file descriptor_file: file with descriptor content
+  :param bool is_bridge: parses the file as being a bridge descriptor
+  :param bool validate: checks the validity of the descriptor's content if
+    **True**, skips these checks otherwise
+  :param dict kwargs: additional arguments for the descriptor constructor
+
+  :returns: iterator for ServerDescriptor instances in the file
+
+  :raises:
+    * **ValueError** if the contents is malformed and validate is True
+    * **IOError** if the file can't be read
+  """
+
+  # Handler for relay descriptors
+  #
+  # Cached descriptors consist of annotations followed by the descriptor
+  # itself. For instance...
+  #
+  #   @downloaded-at 2012-03-14 16:31:05
+  #   @source "145.53.65.130"
+  #   router caerSidi 71.35.143.157 9001 0 0
+  #   platform Tor 0.2.1.30 on Linux x86_64
+  #   <rest of the descriptor content>
+  #   router-signature
+  #   -----BEGIN SIGNATURE-----
+  #   <signature for the above descriptor>
+  #   -----END SIGNATURE-----
+  #
+  # Metrics descriptor files are the same, but lack any annotations. The
+  # following simply does the following...
+  #
+  #   - parse as annotations until we get to "router"
+  #   - parse as descriptor content until we get to "router-signature" followed
+  #     by the end of the signature block
+  #   - construct a descriptor and provide it back to the caller
+  #
+  # Any annotations after the last server descriptor is ignored (never provided
+  # to the caller).
+
+  while True:
+    annotations = _read_until_keywords("router", descriptor_file)
+    descriptor_content = _read_until_keywords("router-signature", descriptor_file)
+
+    # we've reached the 'router-signature', now include the pgp style block
+    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+    descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
+
+    if descriptor_content:
+      # strip newlines from annotations
+      annotations = map(bytes.strip, annotations)
+
+      descriptor_text = bytes.join(b"", descriptor_content)
+
+      if is_bridge:
+        yield BridgeDescriptor(descriptor_text, validate, annotations, **kwargs)
+      else:
+        yield RelayDescriptor(descriptor_text, validate, annotations, **kwargs)
+    else:
+      if validate and annotations:
+        orphaned_annotations = stem.util.str_tools._to_unicode(b'\n'.join(annotations))
+        raise ValueError('Content conform to being a server descriptor:\n%s' % orphaned_annotations)
+
+      break  # done parsing descriptors
+
+
+class ServerDescriptor(Descriptor):
+  """
+  Common parent for server descriptors.
+
+  :var str nickname: **\*** relay's nickname
+  :var str fingerprint: identity key fingerprint
+  :var datetime published: **\*** time in UTC when this descriptor was made
+
+  :var str address: **\*** IPv4 address of the relay
+  :var int or_port: **\*** port used for relaying
+  :var int socks_port: **\*** port used as client (deprecated, always **None**)
+  :var int dir_port: **\*** port used for descriptor mirroring
+
+  :var bytes platform: line with operating system and tor version
+  :var stem.version.Version tor_version: version of tor
+  :var str operating_system: operating system
+  :var int uptime: uptime when published in seconds
+  :var bytes contact: contact information
+  :var stem.exit_policy.ExitPolicy exit_policy: **\*** stated exit policy
+  :var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
+  :var set family: **\*** nicknames or fingerprints of declared family
+
+  :var int average_bandwidth: **\*** average rate it's willing to relay in bytes/s
+  :var int burst_bandwidth: **\*** burst rate it's willing to relay in bytes/s
+  :var int observed_bandwidth: **\*** estimated capacity based on usage in bytes/s
+
+  :var list link_protocols: link protocols supported by the relay
+  :var list circuit_protocols: circuit protocols supported by the relay
+  :var bool hibernating: **\*** hibernating when published
+  :var bool allow_single_hop_exits: **\*** flag if single hop exiting is allowed
+  :var bool extra_info_cache: **\*** flag if a mirror for extra-info documents
+  :var str extra_info_digest: upper-case hex encoded digest of our extra-info document
+  :var bool eventdns: flag for evdns backend (deprecated, always unset)
+  :var list or_addresses: **\*** alternative for our address/or_port
+    attributes, each entry is a tuple of the form (address (**str**), port
+    (**int**), is_ipv6 (**bool**))
+
+  Deprecated, moved to extra-info descriptor...
+
+  :var datetime read_history_end: end of the sampling interval
+  :var int read_history_interval: seconds per interval
+  :var list read_history_values: bytes read during each interval
+
+  :var datetime write_history_end: end of the sampling interval
+  :var int write_history_interval: seconds per interval
+  :var list write_history_values: bytes written during each interval
+
+  **\*** attribute is either required when we're parsed with validation or has
+  a default value, others are left as **None** if undefined
+  """
+
+  def __init__(self, raw_contents, validate = True, annotations = None):
+    """
+    Server descriptor constructor, created from an individual relay's
+    descriptor content (as provided by "GETINFO desc/*", cached descriptors,
+    and metrics).
+
+    By default this validates the descriptor's content as it's parsed. This
+    validation can be disables to either improve performance or be accepting of
+    malformed data.
+
+    :param str raw_contents: descriptor content provided by the relay
+    :param bool validate: checks the validity of the descriptor's content if
+      **True**, skips these checks otherwise
+    :param list annotations: lines that appeared prior to the descriptor
+
+    :raises: **ValueError** if the contents is malformed and validate is True
+    """
+
+    super(ServerDescriptor, self).__init__(raw_contents)
+
+    # Only a few things can be arbitrary bytes according to the dir-spec, so
+    # parsing them separately.
+
+    self.platform = _get_bytes_field("platform", raw_contents)
+    self.contact = _get_bytes_field("contact", raw_contents)
+
+    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
+
+    self.nickname = None
+    self.fingerprint = None
+    self.published = None
+
+    self.address = None
+    self.or_port = None
+    self.socks_port = None
+    self.dir_port = None
+
+    self.tor_version = None
+    self.operating_system = None
+    self.uptime = None
+    self.exit_policy = None
+    self.exit_policy_v6 = DEFAULT_IPV6_EXIT_POLICY
+    self.family = set()
+
+    self.average_bandwidth = None
+    self.burst_bandwidth = None
+    self.observed_bandwidth = None
+
+    self.link_protocols = None
+    self.circuit_protocols = None
+    self.hibernating = False
+    self.allow_single_hop_exits = False
+    self.extra_info_cache = False
+    self.extra_info_digest = None
+    self.hidden_service_dir = None
+    self.eventdns = None
+    self.or_addresses = []
+
+    self.read_history_end = None
+    self.read_history_interval = None
+    self.read_history_values = None
+
+    self.write_history_end = None
+    self.write_history_interval = None
+    self.write_history_values = None
+
+    self._unrecognized_lines = []
+
+    self._annotation_lines = annotations if annotations else []
+
+    # A descriptor contains a series of 'keyword lines' which are simply a
+    # keyword followed by an optional value. Lines can also be followed by a
+    # signature block.
+    #
+    # We care about the ordering of 'accept' and 'reject' entries because this
+    # influences the resulting exit policy, but for everything else the order
+    # does not matter so breaking it into key / value pairs.
+
+    entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject"))
+
+    if policy == [u'reject *:*']:
+      self.exit_policy = REJECT_ALL_POLICY
+    else:
+      self.exit_policy = stem.exit_policy.ExitPolicy(*policy)
+
+    self._parse(entries, validate)
+
+    if validate:
+      self._check_constraints(entries)
+
+  def digest(self):
+    """
+    Provides the hex encoded sha1 of our content. This value is part of the
+    network status entry for this relay.
+
+    :returns: **unicode** with the upper-case hex digest value for this server descriptor
+    """
+
+    raise NotImplementedError("Unsupported Operation: this should be implemented by the ServerDescriptor subclass")
+
+  def get_unrecognized_lines(self):
+    return list(self._unrecognized_lines)
+
+  @lru_cache()
+  def get_annotations(self):
+    """
+    Provides content that appeared prior to the descriptor. If this comes from
+    the cached-descriptors file then this commonly contains content like...
+
+    ::
+
+      @downloaded-at 2012-03-18 21:18:29
+      @source "173.254.216.66"
+
+    :returns: **dict** with the key/value pairs in our annotations
+    """
+
+    annotation_dict = {}
+
+    for line in self._annotation_lines:
+      if b" " in line:
+        key, value = line.split(b" ", 1)
+        annotation_dict[key] = value
+      else:
+        annotation_dict[line] = None
+
+    return annotation_dict
+
+  def get_annotation_lines(self):
+    """
+    Provides the lines of content that appeared prior to the descriptor. This
+    is the same as the
+    :func:`~stem.descriptor.server_descriptor.ServerDescriptor.get_annotations`
+    results, but with the unparsed lines and ordering retained.
+
+    :returns: **list** with the lines of annotation that came before this descriptor
+    """
+
+    return self._annotation_lines
+
+  def _parse(self, entries, validate):
+    """
+    Parses a series of 'keyword => (value, pgp block)' mappings and applies
+    them as attributes.
+
+    :param dict entries: descriptor contents to be applied
+    :param bool validate: checks the validity of descriptor content if **True**
+
+    :raises: **ValueError** if an error occurs in validation
+    """
+
+    for keyword, values in entries.items():
+      # most just work with the first (and only) value
+      value, block_contents = values[0]
+
+      line = "%s %s" % (keyword, value)  # original line
+
+      if block_contents:
+        line += "\n%s" % block_contents
+
+      if keyword == "router":
+        # "router" nickname address ORPort SocksPort DirPort
+        router_comp = value.split()
+
+        if len(router_comp) < 5:
+          if not validate:
+            continue
+
+          raise ValueError("Router line must have five values: %s" % line)
+
+        if validate:
+          if not stem.util.tor_tools.is_valid_nickname(router_comp[0]):
+            raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0])
+          elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]):
+            raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1])
+          elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True):
+            raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2])
+          elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True):
+            raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3])
+          elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True):
+            raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4])
+        elif not (router_comp[2].isdigit() and router_comp[3].isdigit() and router_comp[4].isdigit()):
+          continue
+
+        self.nickname = router_comp[0]
+        self.address = router_comp[1]
+        self.or_port = int(router_comp[2])
+        self.socks_port = None if router_comp[3] == '0' else int(router_comp[3])
+        self.dir_port = None if router_comp[4] == '0' else int(router_comp[4])
+      elif keyword == "bandwidth":
+        # "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed
+        bandwidth_comp = value.split()
+
+        if len(bandwidth_comp) < 3:
+          if not validate:
+            continue
+
+          raise ValueError("Bandwidth line must have three values: %s" % line)
+        elif not bandwidth_comp[0].isdigit():
+          if not validate:
+            continue
+
+          raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0])
+        elif not bandwidth_comp[1].isdigit():
+          if not validate:
+            continue
+
+          raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1])
+        elif not bandwidth_comp[2].isdigit():
+          if not validate:
+            continue
+
+          raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
+
+        self.average_bandwidth = int(bandwidth_comp[0])
+        self.burst_bandwidth = int(bandwidth_comp[1])
+        self.observed_bandwidth = int(bandwidth_comp[2])
+      elif keyword == "platform":
+        # "platform" string
+
+        # The platform attribute was set earlier. This line can contain any
+        # arbitrary data, but tor seems to report its version followed by the
+        # os like the following...
+        #
+        #   platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64
+        #
+        # There's no guarantee that we'll be able to pick these out the
+        # version, but might as well try to save our caller the effort.
+
+        platform_match = re.match("^Tor (\S*).* on (.*)$", value)
+
+        if platform_match:
+          version_str, self.operating_system = platform_match.groups()
+
+          try:
+            self.tor_version = stem.version._get_version(version_str)
+          except ValueError:
+            pass
+      elif keyword == "published":
+        # "published" YYYY-MM-DD HH:MM:SS
+
+        try:
+          self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+        except ValueError:
+          if validate:
+            raise ValueError("Published line's time wasn't parsable: %s" % line)
+      elif keyword == "fingerprint":
+        # This is forty hex digits split into space separated groups of four.
+        # Checking that we match this pattern.
+
+        fingerprint = value.replace(" ", "")
+
+        if validate:
+          for grouping in value.split(" "):
+            if len(grouping) != 4:
+              raise ValueError("Fingerprint line should have groupings of four hex digits: %s" % value)
+
+          if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
+            raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value)
+
+        self.fingerprint = fingerprint
+      elif keyword == "hibernating":
+        # "hibernating" 0|1 (in practice only set if one)
+
+        if validate and not value in ("0", "1"):
+          raise ValueError("Hibernating line had an invalid value, must be zero or one: %s" % value)
+
+        self.hibernating = value == "1"
+      elif keyword == "allow-single-hop-exits":
+        self.allow_single_hop_exits = True
+      elif keyword == "caches-extra-info":
+        self.extra_info_cache = True
+      elif keyword == "extra-info-digest":
+        # this is forty hex digits which just so happens to be the same a
+        # fingerprint
+
+        if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
+          raise ValueError("Extra-info digests should consist of forty hex digits: %s" % value)
+
+        self.extra_info_digest = value
+      elif keyword == "hidden-service-dir":
+        if value:
+          self.hidden_service_dir = value.split(" ")
+        else:
+          self.hidden_service_dir = ["2"]
+      elif keyword == "uptime":
+        # We need to be tolerant of negative uptimes to accommodate a past tor
+        # bug...
+        #
+        # Changes in version 0.1.2.7-alpha - 2007-02-06
+        #  - If our system clock jumps back in time, don't publish a negative
+        #    uptime in the descriptor. Also, don't let the global rate limiting
+        #    buckets go absurdly negative.
+        #
+        # After parsing all of the attributes we'll double check that negative
+        # uptimes only occurred prior to this fix.
+
+        try:
+          self.uptime = int(value)
+        except ValueError:
+          if not validate:
+            continue
+
+          raise ValueError("Uptime line must have an integer value: %s" % value)
+      elif keyword == "contact":
+        pass  # parsed as a bytes field earlier
+      elif keyword == "protocols":
+        protocols_match = re.match("^Link (.*) Circuit (.*)$", value)
+
+        if protocols_match:
+          link_versions, circuit_versions = protocols_match.groups()
+          self.link_protocols = link_versions.split(" ")
+          self.circuit_protocols = circuit_versions.split(" ")
+        elif validate:
+          raise ValueError("Protocols line did not match the expected pattern: %s" % line)
+      elif keyword == "family":
+        self.family = set(value.split(" "))
+      elif keyword == "eventdns":
+        self.eventdns = value == "1"
+      elif keyword == "ipv6-policy":
+        self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
+      elif keyword == "or-address":
+        or_address_entries = [value for (value, _) in values]
+
+        for entry in or_address_entries:
+          line = "%s %s" % (keyword, entry)
+
+          if not ":" in entry:
+            if not validate:
+              continue
+            else:
+              raise ValueError("or-address line missing a colon: %s" % line)
+
+          address, port = entry.rsplit(':', 1)
+          is_ipv6 = address.startswith("[") and address.endswith("]")
+
+          if is_ipv6:
+            address = address[1:-1]  # remove brackets
+
+          if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
+                 (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
+            if not validate:
+              continue
+            else:
+              raise ValueError("or-address line has a malformed address: %s" % line)
+
+          if stem.util.connection.is_valid_port(port):
+            self.or_addresses.append((address, int(port), is_ipv6))
+          elif validate:
+            raise ValueError("or-address line has a malformed port: %s" % line)
+      elif keyword in ("read-history", "write-history"):
+        try:
+          timestamp, interval, remainder = \
+            stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value)
+
+          try:
+            if remainder:
+              history_values = [int(entry) for entry in remainder.split(",")]
+            else:
+              history_values = []
+          except ValueError:
+            raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
+
+          if keyword == "read-history":
+            self.read_history_end = timestamp
+            self.read_history_interval = interval
+            self.read_history_values = history_values
+          else:
+            self.write_history_end = timestamp
+            self.write_history_interval = interval
+            self.write_history_values = history_values
+        except ValueError as exc:
+          if validate:
+            raise exc
+      else:
+        self._unrecognized_lines.append(line)
+
+    # if we have a negative uptime and a tor version that shouldn't exhibit
+    # this bug then fail validation
+
+    if validate and self.uptime and self.tor_version:
+      if self.uptime < 0 and self.tor_version >= stem.version.Version("0.1.2.7"):
+        raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime))
+
+  def _check_constraints(self, entries):
+    """
+    Does a basic check that the entries conform to this descriptor type's
+    constraints.
+
+    :param dict entries: keyword => (value, pgp key) entries
+
+    :raises: **ValueError** if an issue arises in validation
+    """
+
+    for keyword in self._required_fields():
+      if not keyword in entries:
+        raise ValueError("Descriptor must have a '%s' entry" % keyword)
+
+    for keyword in self._single_fields():
+      if keyword in entries and len(entries[keyword]) > 1:
+        raise ValueError("The '%s' entry can only appear once in a descriptor" % keyword)
+
+    expected_first_keyword = self._first_keyword()
+    if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
+      raise ValueError("Descriptor must start with a '%s' entry" % expected_first_keyword)
+
+    expected_last_keyword = self._last_keyword()
+    if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
+      raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
+
+    if not self.exit_policy:
+      raise ValueError("Descriptor must have at least one 'accept' or 'reject' entry")
+
+  # Constraints that the descriptor must meet to be valid. These can be None if
+  # not applicable.
+
+  def _required_fields(self):
+    return REQUIRED_FIELDS
+
+  def _single_fields(self):
+    return REQUIRED_FIELDS + SINGLE_FIELDS
+
+  def _first_keyword(self):
+    return "router"
+
+  def _last_keyword(self):
+    return "router-signature"
+
+
+class RelayDescriptor(ServerDescriptor):
+  """
+  Server descriptor (`descriptor specification
+  <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
+
+  :var str onion_key: **\*** key used to encrypt EXTEND cells
+  :var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
+  :var str signing_key: **\*** relay's long-term identity key
+  :var str signature: **\*** signature for this descriptor
+
+  **\*** attribute is required when we're parsed with validation
+  """
+
+  def __init__(self, raw_contents, validate = True, annotations = None):
+    self.onion_key = None
+    self.ntor_onion_key = None
+    self.signing_key = None
+    self.signature = None
+
+    super(RelayDescriptor, self).__init__(raw_contents, validate, annotations)
+
+    # validate the descriptor if required
+    if validate:
+      self._validate_content()
+
+  @lru_cache()
+  def digest(self):
+    """
+    Provides the digest of our descriptor's content.
+
+    :returns: the digest string encoded in uppercase hex
+
+    :raises: ValueError if the digest canot be calculated
+    """
+
+    # Digest is calculated from everything in the
+    # descriptor except the router-signature.
+
+    raw_descriptor = self.get_bytes()
+    start_token = b"router "
+    sig_token = b"\nrouter-signature\n"
+    start = raw_descriptor.find(start_token)
+    sig_start = raw_descriptor.find(sig_token)
+    end = sig_start + len(sig_token)
+
+    if start >= 0 and sig_start > 0 and end > start:
+      for_digest = raw_descriptor[start:end]
+      digest_hash = hashlib.sha1(stem.util.str_tools._to_bytes(for_digest))
+      return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper())
+    else:
+      raise ValueError("unable to calculate digest for descriptor")
+
+  def _validate_content(self):
+    """
+    Validates that the descriptor content matches the signature.
+
+    :raises: ValueError if the signature does not match the content
+    """
+
+    key_as_bytes = RelayDescriptor._get_key_bytes(self.signing_key)
+
+    # ensure the fingerprint is a hash of the signing key
+
+    if self.fingerprint:
+      # calculate the signing key hash
+
+      key_der_as_hash = hashlib.sha1(stem.util.str_tools._to_bytes(key_as_bytes)).hexdigest()
+
+      if key_der_as_hash != self.fingerprint.lower():
+        log.warn("Signing key hash: %s != fingerprint: %s" % (key_der_as_hash, self.fingerprint.lower()))
+        raise ValueError("Fingerprint does not match hash")
+
+    self._verify_digest(key_as_bytes)
+
+  def _verify_digest(self, key_as_der):
+    # check that our digest matches what was signed
+
+    if not stem.prereq.is_crypto_available():
+      return
+
+    from Crypto.Util import asn1
+    from Crypto.Util.number import bytes_to_long, long_to_bytes
+
+    # get the ASN.1 sequence
+
+    seq = asn1.DerSequence()
+    seq.decode(key_as_der)
+    modulus = seq[0]
+    public_exponent = seq[1]  # should always be 65537
+
+    sig_as_bytes = RelayDescriptor._get_key_bytes(self.signature)
+
+    # convert the descriptor signature to an int
+
+    sig_as_long = bytes_to_long(sig_as_bytes)
+
+    # use the public exponent[e] & the modulus[n] to decrypt the int
+
+    decrypted_int = pow(sig_as_long, public_exponent, modulus)
+
+    # block size will always be 128 for a 1024 bit key
+
+    blocksize = 128
+
+    # convert the int to a byte array.
+
+    decrypted_bytes = long_to_bytes(decrypted_int, blocksize)
+
+    ############################################################################
+    ## The decrypted bytes should have a structure exactly along these lines.
+    ## 1 byte  - [null '\x00']
+    ## 1 byte  - [block type identifier '\x01'] - Should always be 1
+    ## N bytes - [padding '\xFF' ]
+    ## 1 byte  - [separator '\x00' ]
+    ## M bytes - [message]
+    ## Total   - 128 bytes
+    ## More info here http://www.ietf.org/rfc/rfc2313.txt
+    ##                esp the Notes in section 8.1
+    ############################################################################
+
+    try:
+      if decrypted_bytes.index(b'\x00\x01') != 0:
+        raise ValueError("Verification failed, identifier missing")
+    except ValueError:
+      raise ValueError("Verification failed, malformed data")
+
+    try:
+      identifier_offset = 2
+
+      # find the separator
+      seperator_index = decrypted_bytes.index(b'\x00', identifier_offset)
+    except ValueError:
+      raise ValueError("Verification failed, seperator not found")
+
+    digest_hex = codecs.encode(decrypted_bytes[seperator_index + 1:], 'hex_codec')
+    digest = stem.util.str_tools._to_unicode(digest_hex.upper())
+
+    local_digest = self.digest()
+
+    if digest != local_digest:
+      raise ValueError("Decrypted digest does not match local digest (calculated: %s, local: %s)" % (digest, local_digest))
+
+  def _parse(self, entries, validate):
+    entries = dict(entries)  # shallow copy since we're destructive
+
+    # handles fields only in server descriptors
+
+    for keyword, values in entries.items():
+      value, block_contents = values[0]
+      line = "%s %s" % (keyword, value)
+
+      if keyword == "onion-key":
+        if validate and not block_contents:
+          raise ValueError("Onion key line must be followed by a public key: %s" % line)
+
+        self.onion_key = block_contents
+        del entries["onion-key"]
+      elif keyword == "ntor-onion-key":
+        self.ntor_onion_key = value
+        del entries["ntor-onion-key"]
+      elif keyword == "signing-key":
+        if validate and not block_contents:
+          raise ValueError("Signing key line must be followed by a public key: %s" % line)
+
+        self.signing_key = block_contents
+        del entries["signing-key"]
+      elif keyword == "router-signature":
+        if validate and not block_contents:
+          raise ValueError("Router signature line must be followed by a signature block: %s" % line)
+
+        self.signature = block_contents
+        del entries["router-signature"]
+
+    ServerDescriptor._parse(self, entries, validate)
+
+  def _compare(self, other, method):
+    if not isinstance(other, RelayDescriptor):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __hash__(self):
+    return hash(str(self).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
+
+  @staticmethod
+  def _get_key_bytes(key_string):
+    # Remove the newlines from the key string & strip off the
+    # '-----BEGIN RSA PUBLIC KEY-----' header and
+    # '-----END RSA PUBLIC KEY-----' footer
+
+    key_as_string = ''.join(key_string.split('\n')[1:4])
+
+    # get the key representation in bytes
+
+    key_bytes = base64.b64decode(stem.util.str_tools._to_bytes(key_as_string))
+
+    return key_bytes
+
+
+class BridgeDescriptor(ServerDescriptor):
+  """
+  Bridge descriptor (`bridge descriptor specification
+  <https://metrics.torproject.org/formats.html#bridgedesc>`_)
+  """
+
+  def __init__(self, raw_contents, validate = True, annotations = None):
+    self._digest = None
+
+    super(BridgeDescriptor, self).__init__(raw_contents, validate, annotations)
+
+  def digest(self):
+    return self._digest
+
+  def _parse(self, entries, validate):
+    entries = dict(entries)
+
+    # handles fields only in bridge descriptors
+    for keyword, values in entries.items():
+      value, block_contents = values[0]
+      line = "%s %s" % (keyword, value)
+
+      if keyword == "router-digest":
+        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
+          raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)
+
+        self._digest = stem.util.str_tools._to_unicode(value)
+        del entries["router-digest"]
+
+    ServerDescriptor._parse(self, entries, validate)
+
+  def is_scrubbed(self):
+    """
+    Checks if we've been properly scrubbed in accordance with the `bridge
+    descriptor specification
+    <https://metrics.torproject.org/formats.html#bridgedesc>`_. Validation is a
+    moving target so this may not
+    be fully up to date.
+
+    :returns: **True** if we're scrubbed, **False** otherwise
+    """
+
+    return self.get_scrubbing_issues() == []
+
+  @lru_cache()
+  def get_scrubbing_issues(self):
+    """
+    Provides issues with our scrubbing.
+
+    :returns: **list** of strings which describe issues we have with our
+      scrubbing, this list is empty if we're properly scrubbed
+    """
+
+    issues = []
+
+    if not self.address.startswith("10."):
+      issues.append("Router line's address should be scrubbed to be '10.x.x.x': %s" % self.address)
+
+    if self.contact and self.contact != "somebody":
+      issues.append("Contact line should be scrubbed to be 'somebody', but instead had '%s'" % self.contact)
+
+    for address, _, is_ipv6 in self.or_addresses:
+      if not is_ipv6 and not address.startswith("10."):
+        issues.append("or-address line's address should be scrubbed to be '10.x.x.x': %s" % address)
+      elif is_ipv6 and not address.startswith("fd9f:2e19:3bcf::"):
+        # TODO: this check isn't quite right because we aren't checking that
+        # the next grouping of hex digits contains 1-2 digits
+        issues.append("or-address line's address should be scrubbed to be 'fd9f:2e19:3bcf::xx:xxxx': %s" % address)
+
+    for line in self.get_unrecognized_lines():
+      if line.startswith("onion-key "):
+        issues.append("Bridge descriptors should have their onion-key scrubbed: %s" % line)
+      elif line.startswith("signing-key "):
+        issues.append("Bridge descriptors should have their signing-key scrubbed: %s" % line)
+      elif line.startswith("router-signature "):
+        issues.append("Bridge descriptors should have their signature scrubbed: %s" % line)
+
+    return issues
+
+  def _required_fields(self):
+    # bridge required fields are the same as a relay descriptor, minus items
+    # excluded according to the format page
+
+    excluded_fields = [
+      "onion-key",
+      "signing-key",
+      "router-signature",
+    ]
+
+    included_fields = [
+      "router-digest",
+    ]
+
+    return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])
+
+  def _single_fields(self):
+    return self._required_fields() + SINGLE_FIELDS
+
+  def _last_keyword(self):
+    return None
+
+  def _compare(self, other, method):
+    if not isinstance(other, BridgeDescriptor):
+      return False
+
+    return method(str(self).strip(), str(other).strip())
+
+  def __hash__(self):
+    return hash(str(self).strip())
+
+  def __eq__(self, other):
+    return self._compare(other, lambda s, o: s == o)
+
+  def __lt__(self, other):
+    return self._compare(other, lambda s, o: s < o)
+
+  def __le__(self, other):
+    return self._compare(other, lambda s, o: s <= o)
--- a/lib/stem/descriptor/tordnsel.py
+++ b/lib/stem/descriptor/tordnsel.py
@ -0,0 +1,115 @@
+# Copyright 2013, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Parsing for `TorDNSEL <https://www.torproject.org/projects/tordnsel.html.en>`_
+exit list files.
+"""
+
+import datetime
+
+import stem.util.connection
+import stem.util.str_tools
+import stem.util.tor_tools
+
+from stem.descriptor import (
+  Descriptor,
+  _read_until_keywords,
+  _get_descriptor_components,
+)
+
+
+def _parse_file(tordnsel_file, validate = True, **kwargs):
+  """
+  Iterates over a tordnsel file.
+
+  :returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL`
+    instances in the file
+
+  :raises:
+    * **ValueError** if the contents is malformed and validate is **True**
+    * **IOError** if the file can't be read
+  """
+
+  # skip content prior to the first ExitNode
+  _read_until_keywords("ExitNode", tordnsel_file, skip = True)
+
+  while True:
+    contents = _read_until_keywords("ExitAddress", tordnsel_file)
+    contents += _read_until_keywords("ExitNode", tordnsel_file)
+
+    if contents:
+      yield TorDNSEL(bytes.join(b"", contents), validate, **kwargs)
+    else:
+      break  # done parsing file
+
+
+class TorDNSEL(Descriptor):
+  """
+  TorDNSEL descriptor (`exitlist specification
+  <https://www.torproject.org/tordnsel/exitlist-spec.txt>`_)
+
+  :var str fingerprint: **\*** authority's fingerprint
+  :var datetime published: **\*** time in UTC when this descriptor was made
+  :var datetime last_status: **\*** time in UTC when the relay was seen in a v2 network status
+  :var list exit_addresses: **\*** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time
+
+  **\*** attribute is either required when we're parsed with validation or has
+  a default value, others are left as **None** if undefined
+  """
+
+  def __init__(self, raw_contents, validate):
+    super(TorDNSEL, self).__init__(raw_contents)
+    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
+    entries = _get_descriptor_components(raw_contents, validate)
+
+    self.fingerprint = None
+    self.published = None
+    self.last_status = None
+    self.exit_addresses = []
+
+    self._parse(entries, validate)
+
+  def _parse(self, entries, validate):
+
+    for keyword, values in entries.items():
+      value, block_content = values[0]
+
+      if validate and block_content:
+        raise ValueError("Unexpected block content: %s" % block_content)
+
+      if keyword == "ExitNode":
+        if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
+          raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value)
+
+        self.fingerprint = value
+      elif keyword == "Published":
+        try:
+          self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+        except ValueError:
+          if validate:
+            raise ValueError("Published time wasn't parsable: %s" % value)
+      elif keyword == "LastStatus":
+        try:
+          self.last_status = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+        except ValueError:
+          if validate:
+            raise ValueError("LastStatus time wasn't parsable: %s" % value)
+      elif keyword == "ExitAddress":
+        for value, block_content in values:
+          address, date = value.split(" ", 1)
+
+          if validate:
+            if not stem.util.connection.is_valid_ipv4_address(address):
+              raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address)
+            elif block_content:
+              raise ValueError("Unexpected block content: %s" % block_content)
+
+          try:
+            date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
+            self.exit_addresses.append((address, date))
+          except ValueError:
+            if validate:
+              raise ValueError("ExitAddress found time wasn't parsable: %s" % value)
+      elif validate:
+        raise ValueError("Unrecognized keyword: %s" % keyword)