onionshare/lib/stem/descriptor/extrainfo_descriptor.py

# Copyright 2012-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information

"""
Parsing for Tor extra-info descriptors. These are published by relays whenever
their server descriptor is published and have a similar format. However, unlike
server descriptors these don't contain information that Tor clients require to
function and as such aren't fetched by default.

Defined in section 2.2 of the `dir-spec
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_,
extra-info descriptors contain interesting but non-vital information such as
usage statistics. Tor clients cannot request these documents for bridges.

Extra-info descriptors are available from a few sources...

* if you have 'DownloadExtraInfo 1' in your torrc...

 * control port via 'GETINFO extra-info/digest/\*' queries
 * the 'cached-extrainfo' file in tor's data directory

* tor metrics, at https://metrics.torproject.org/data.html
* directory authorities and mirrors via their DirPort

**Module Overview:**

::

  ExtraInfoDescriptor - Tor extra-info descriptor.
    |  |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
    |  +- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
    |
    |- digest - calculates the upper-case hex digest value for our content
    +- get_unrecognized_lines - lines with unrecognized content

.. data:: DirResponse (enum)

  Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses.

  =================== ===========
  DirResponse         Description
  =================== ===========
  **OK**              network status requests that were answered
  **NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities
  **UNAVAILABLE**     requested network status was unavailable
  **NOT_FOUND**       requested network status was not found
  **NOT_MODIFIED**    network status unmodified since If-Modified-Since time
  **BUSY**            directory was busy
  =================== ===========

.. data:: DirStat (enum)

  Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and
  dir_*_tunneled_dl.

  ===================== ===========
  DirStat               Description
  ===================== ===========
  **COMPLETE**          requests that completed successfully
  **TIMEOUT**           requests that didn't complete within a ten minute timeout
  **RUNNING**           requests still in process when measurement's taken
  **MIN**               smallest rate at which a descriptor was downloaded in B/s
  **MAX**               largest rate at which a descriptor was downloaded in B/s
  **D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s
  **Q1** and **Q3**     rate of the slowest and fastest quarter download rates in B/s
  **MD**                median download rate in B/s
  ===================== ===========
"""

import datetime
import hashlib
import re

import stem.util.connection
import stem.util.enum
import stem.util.str_tools

from stem.descriptor import (
  PGP_BLOCK_END,
  Descriptor,
  _read_until_keywords,
  _get_descriptor_components,
)

try:
  # added in python 3.2
  from functools import lru_cache
except ImportError:
  from stem.util.lru_cache import lru_cache

# known statuses for dirreq-v2-resp and dirreq-v3-resp...
DirResponse = stem.util.enum.Enum(
  ("OK", "ok"),
  ("NOT_ENOUGH_SIGS", "not-enough-sigs"),
  ("UNAVAILABLE", "unavailable"),
  ("NOT_FOUND", "not-found"),
  ("NOT_MODIFIED", "not-modified"),
  ("BUSY", "busy"),
)

# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
dir_stats += ['d%i' % i for i in range(1, 5)]
dir_stats += ['d%i' % i for i in range(6, 10)]
DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])

# relay descriptors must have exactly one of the following
REQUIRED_FIELDS = (
  "extra-info",
  "published",
  "router-signature",
)

# optional entries that can appear at most once
SINGLE_FIELDS = (
  "read-history",
  "write-history",
  "geoip-db-digest",
  "geoip6-db-digest",
  "bridge-stats-end",
  "bridge-ips",
  "dirreq-stats-end",
  "dirreq-v2-ips",
  "dirreq-v3-ips",
  "dirreq-v2-reqs",
  "dirreq-v3-reqs",
  "dirreq-v2-share",
  "dirreq-v3-share",
  "dirreq-v2-resp",
  "dirreq-v3-resp",
  "dirreq-v2-direct-dl",
  "dirreq-v3-direct-dl",
  "dirreq-v2-tunneled-dl",
  "dirreq-v3-tunneled-dl",
  "dirreq-read-history",
  "dirreq-write-history",
  "entry-stats-end",
  "entry-ips",
  "cell-stats-end",
  "cell-processed-cells",
  "cell-queued-cells",
  "cell-time-in-queue",
  "cell-circuits-per-decile",
  "conn-bi-direct",
  "exit-stats-end",
  "exit-kibibytes-written",
  "exit-kibibytes-read",
  "exit-streams-opened",
)


def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
  """
  Iterates over the extra-info descriptors in a file.

  :param file descriptor_file: file with descriptor content
  :param bool is_bridge: parses the file as being a bridge descriptor
  :param bool validate: checks the validity of the descriptor's content if
    **True**, skips these checks otherwise
  :param dict kwargs: additional arguments for the descriptor constructor

  :returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
    instances in the file

  :raises:
    * **ValueError** if the contents is malformed and validate is **True**
    * **IOError** if the file can't be read
  """

  while True:
    extrainfo_content = _read_until_keywords("router-signature", descriptor_file)

    # we've reached the 'router-signature', now include the pgp style block
    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
    extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)

    if extrainfo_content:
      if is_bridge:
        yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
      else:
        yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
    else:
      break  # done parsing file


def _parse_timestamp_and_interval(keyword, content):
  """
  Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.

  :param str keyword: line's keyword
  :param str content: line content to be parsed

  :returns: **tuple** of the form (timestamp (**datetime**), interval
    (**int**), remaining content (**str**))

  :raises: **ValueError** if the content is malformed
  """

  line = "%s %s" % (keyword, content)
  content_match = re.match("^(.*) \(([0-9]+) s\)( .*)?$", content)

  if not content_match:
    raise ValueError("Malformed %s line: %s" % (keyword, line))

  timestamp_str, interval, remainder = content_match.groups()

  if remainder:
    remainder = remainder[1:]  # remove leading space

  if not interval.isdigit():
    raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))

  try:
    timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
    return timestamp, int(interval), remainder
  except ValueError:
    raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))


class ExtraInfoDescriptor(Descriptor):
  """
  Extra-info descriptor document.

  :var str nickname: **\*** relay's nickname
  :var str fingerprint: **\*** identity key fingerprint
  :var datetime published: **\*** time in UTC when this descriptor was made
  :var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses
  :var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses
  :var dict transport: **\*** mapping of transport methods to their (address,
    port, args) tuple, these usually appear on bridges in which case all of
    those are **None**

  **Bi-directional connection usage:**

  :var datetime conn_bi_direct_end: end of the sampling interval
  :var int conn_bi_direct_interval: seconds per interval
  :var int conn_bi_direct_below: connections that read/wrote less than 20 KiB
  :var int conn_bi_direct_read: connections that read at least 10x more than wrote
  :var int conn_bi_direct_write: connections that wrote at least 10x more than read
  :var int conn_bi_direct_both: remaining connections

  **Bytes read/written for relayed traffic:**

  :var datetime read_history_end: end of the sampling interval
  :var int read_history_interval: seconds per interval
  :var list read_history_values: bytes read during each interval

  :var datetime write_history_end: end of the sampling interval
  :var int write_history_interval: seconds per interval
  :var list write_history_values: bytes written during each interval

  **Cell relaying statistics:**

  :var datetime cell_stats_end: end of the period when stats were gathered
  :var int cell_stats_interval: length in seconds of the interval
  :var list cell_processed_cells: measurement of processed cells per circuit
  :var list cell_queued_cells: measurement of queued cells per circuit
  :var list cell_time_in_queue: mean enqueued time in milliseconds for cells
  :var int cell_circuits_per_decile: mean number of circuits in a decile

  **Directory Mirror Attributes:**

  :var datetime dir_stats_end: end of the period when stats were gathered
  :var int dir_stats_interval: length in seconds of the interval
  :var dict dir_v2_ips: mapping of locales to rounded count of requester ips
  :var dict dir_v3_ips: mapping of locales to rounded count of requester ips
  :var float dir_v2_share: percent of total directory traffic it expects to serve
  :var float dir_v3_share: percent of total directory traffic it expects to serve
  :var dict dir_v2_requests: mapping of locales to rounded count of requests
  :var dict dir_v3_requests: mapping of locales to rounded count of requests

  :var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
  :var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
  :var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count
  :var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count

  :var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
  :var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
  :var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement
  :var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement

  :var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
  :var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
  :var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
  :var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement

  **Bytes read/written for directory mirroring:**

  :var datetime dir_read_history_end: end of the sampling interval
  :var int dir_read_history_interval: seconds per interval
  :var list dir_read_history_values: bytes read during each interval

  :var datetime dir_write_history_end: end of the sampling interval
  :var int dir_write_history_interval: seconds per interval
  :var list dir_write_history_values: bytes read during each interval

  **Guard Attributes:**

  :var datetime entry_stats_end: end of the period when stats were gathered
  :var int entry_stats_interval: length in seconds of the interval
  :var dict entry_ips: mapping of locales to rounded count of unique user ips

  **Exit Attributes:**

  :var datetime exit_stats_end: end of the period when stats were gathered
  :var int exit_stats_interval: length in seconds of the interval
  :var dict exit_kibibytes_written: traffic per port (keys are ints or 'other')
  :var dict exit_kibibytes_read: traffic per port (keys are ints or 'other')
  :var dict exit_streams_opened: streams per port (keys are ints or 'other')

  **Bridge Attributes:**

  :var datetime bridge_stats_end: end of the period when stats were gathered
  :var int bridge_stats_interval: length in seconds of the interval
  :var dict bridge_ips: mapping of locales to rounded count of unique user ips
  :var datetime geoip_start_time: replaced by bridge_stats_end (deprecated)
  :var dict geoip_client_origins: replaced by bridge_ips (deprecated)
  :var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
  :var dict ip_versions: mapping of ip transports to a count for the number of users

  **\*** attribute is either required when we're parsed with validation or has
  a default value, others are left as **None** if undefined
  """

  def __init__(self, raw_contents, validate = True):
    """
    Extra-info descriptor constructor. By default this validates the
    descriptor's content as it's parsed. This validation can be disabled to
    either improve performance or be accepting of malformed data.

    :param str raw_contents: extra-info content provided by the relay
    :param bool validate: checks the validity of the extra-info descriptor if
      **True**, skips these checks otherwise

    :raises: **ValueError** if the contents is malformed and validate is True
    """

    super(ExtraInfoDescriptor, self).__init__(raw_contents)
    raw_contents = stem.util.str_tools._to_unicode(raw_contents)

    self.nickname = None
    self.fingerprint = None
    self.published = None
    self.geoip_db_digest = None
    self.geoip6_db_digest = None
    self.transport = {}

    self.conn_bi_direct_end = None
    self.conn_bi_direct_interval = None
    self.conn_bi_direct_below = None
    self.conn_bi_direct_read = None
    self.conn_bi_direct_write = None
    self.conn_bi_direct_both = None

    self.read_history_end = None
    self.read_history_interval = None
    self.read_history_values = None

    self.write_history_end = None
    self.write_history_interval = None
    self.write_history_values = None

    self.cell_stats_end = None
    self.cell_stats_interval = None
    self.cell_processed_cells = None
    self.cell_queued_cells = None
    self.cell_time_in_queue = None
    self.cell_circuits_per_decile = None

    self.dir_stats_end = None
    self.dir_stats_interval = None
    self.dir_v2_ips = None
    self.dir_v3_ips = None
    self.dir_v2_share = None
    self.dir_v3_share = None
    self.dir_v2_requests = None
    self.dir_v3_requests = None
    self.dir_v2_responses = None
    self.dir_v3_responses = None
    self.dir_v2_responses_unknown = None
    self.dir_v3_responses_unknown = None
    self.dir_v2_direct_dl = None
    self.dir_v3_direct_dl = None
    self.dir_v2_direct_dl_unknown = None
    self.dir_v3_direct_dl_unknown = None
    self.dir_v2_tunneled_dl = None
    self.dir_v3_tunneled_dl = None
    self.dir_v2_tunneled_dl_unknown = None
    self.dir_v3_tunneled_dl_unknown = None

    self.dir_read_history_end = None
    self.dir_read_history_interval = None
    self.dir_read_history_values = None

    self.dir_write_history_end = None
    self.dir_write_history_interval = None
    self.dir_write_history_values = None

    self.entry_stats_end = None
    self.entry_stats_interval = None
    self.entry_ips = None

    self.exit_stats_end = None
    self.exit_stats_interval = None
    self.exit_kibibytes_written = None
    self.exit_kibibytes_read = None
    self.exit_streams_opened = None

    self.bridge_stats_end = None
    self.bridge_stats_interval = None
    self.bridge_ips = None
    self.geoip_start_time = None
    self.geoip_client_origins = None

    self.ip_versions = None
    self.ip_transports = None

    self._unrecognized_lines = []

    entries = _get_descriptor_components(raw_contents, validate)

    if validate:
      for keyword in self._required_fields():
        if not keyword in entries:
          raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword)

      for keyword in self._required_fields() + SINGLE_FIELDS:
        if keyword in entries and len(entries[keyword]) > 1:
          raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword)

      expected_first_keyword = self._first_keyword()
      if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
        raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword)

      expected_last_keyword = self._last_keyword()
      if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
        raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)

    self._parse(entries, validate)

  def get_unrecognized_lines(self):
    return list(self._unrecognized_lines)

  def _parse(self, entries, validate):
    """
    Parses a series of 'keyword => (value, pgp block)' mappings and applies
    them as attributes.

    :param dict entries: descriptor contents to be applied
    :param bool validate: checks the validity of descriptor content if True

    :raises: **ValueError** if an error occurs in validation
    """

    for keyword, values in entries.items():
      # most just work with the first (and only) value
      value, _ = values[0]
      line = "%s %s" % (keyword, value)  # original line

      if keyword == "extra-info":
        # "extra-info" Nickname Fingerprint
        extra_info_comp = value.split()

        if len(extra_info_comp) < 2:
          if not validate:
            continue

          raise ValueError("Extra-info line must have two values: %s" % line)

        if validate:
          if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
            raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
          elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
            raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % extra_info_comp[1])

        self.nickname = extra_info_comp[0]
        self.fingerprint = extra_info_comp[1]
      elif keyword == "geoip-db-digest":
        # "geoip-db-digest" Digest

        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
          raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)

        self.geoip_db_digest = value
      elif keyword == "geoip6-db-digest":
        # "geoip6-db-digest" Digest

        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
          raise ValueError("Geoip v6 digest line had an invalid sha1 digest: %s" % line)

        self.geoip6_db_digest = value
      elif keyword == "transport":
        # "transport" transportname address:port [arglist]
        # Everything after the transportname is scrubbed in published bridge
        # descriptors, so we'll never see it in practice.
        #
        # These entries really only make sense for bridges, but have been seen
        # on non-bridges in the wild when the relay operator configured it this
        # way.

        for transport_value, _ in values:
          name, address, port, args = None, None, None, None

          if not ' ' in transport_value:
            # scrubbed
            name = transport_value
          else:
            # not scrubbed
            value_comp = transport_value.split()

            if len(value_comp) < 1:
              raise ValueError("Transport line is missing its transport name: %s" % line)
            else:
              name = value_comp[0]

            if len(value_comp) < 2:
              raise ValueError("Transport line is missing its address:port value: %s" % line)
            elif not ":" in value_comp[1]:
              raise ValueError("Transport line's address:port entry is missing a colon: %s" % line)
            else:
              address, port_str = value_comp[1].split(':', 1)

              if not stem.util.connection.is_valid_ipv4_address(address) or \
                     stem.util.connection.is_valid_ipv6_address(address):
                raise ValueError("Transport line has a malformed address: %s" % line)
              elif not stem.util.connection.is_valid_port(port_str):
                raise ValueError("Transport line has a malformed port: %s" % line)

              port = int(port_str)

            if len(value_comp) >= 3:
              args = value_comp[2:]
            else:
              args = []

          self.transport[name] = (address, port, args)
      elif keyword == "cell-circuits-per-decile":
        # "cell-circuits-per-decile" num

        if not value.isdigit():
          if validate:
            raise ValueError("Non-numeric cell-circuits-per-decile value: %s" % line)
          else:
            continue

        stat = int(value)

        if validate and stat < 0:
          raise ValueError("Negative cell-circuits-per-decile value: %s" % line)

        self.cell_circuits_per_decile = stat
      elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"):
        recognized_counts = {}
        unrecognized_counts = {}

        is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp")
        key_set = DirResponse if is_response_stats else DirStat

        key_type = "STATUS" if is_response_stats else "STAT"
        error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line)

        if value:
          for entry in value.split(","):
            if not "=" in entry:
              if validate:
                raise ValueError(error_msg)
              else:
                continue

            status, count = entry.split("=", 1)

            if count.isdigit():
              if status in key_set:
                recognized_counts[status] = int(count)
              else:
                unrecognized_counts[status] = int(count)
            elif validate:
              raise ValueError(error_msg)

        if keyword == "dirreq-v2-resp":
          self.dir_v2_responses = recognized_counts
          self.dir_v2_responses_unknown = unrecognized_counts
        elif keyword == "dirreq-v3-resp":
          self.dir_v3_responses = recognized_counts
          self.dir_v3_responses_unknown = unrecognized_counts
        elif keyword == "dirreq-v2-direct-dl":
          self.dir_v2_direct_dl = recognized_counts
          self.dir_v2_direct_dl_unknown = unrecognized_counts
        elif keyword == "dirreq-v3-direct-dl":
          self.dir_v3_direct_dl = recognized_counts
          self.dir_v3_direct_dl_unknown = unrecognized_counts
        elif keyword == "dirreq-v2-tunneled-dl":
          self.dir_v2_tunneled_dl = recognized_counts
          self.dir_v2_tunneled_dl_unknown = unrecognized_counts
        elif keyword == "dirreq-v3-tunneled-dl":
          self.dir_v3_tunneled_dl = recognized_counts
          self.dir_v3_tunneled_dl_unknown = unrecognized_counts
      elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
        # "<keyword>" num%

        try:
          if not value.endswith("%"):
            raise ValueError()

          percentage = float(value[:-1]) / 100

          # Bug lets these be above 100%, however they're soon going away...
          # https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html

          if validate and percentage < 0:
            raise ValueError("Negative percentage value: %s" % line)

          if keyword == "dirreq-v2-share":
            self.dir_v2_share = percentage
          elif keyword == "dirreq-v3-share":
            self.dir_v3_share = percentage
        except ValueError as exc:
          if validate:
            raise ValueError("Value can't be parsed as a percentage: %s" % line)
      elif keyword in ("cell-processed-cells", "cell-queued-cells", "cell-time-in-queue"):
        # "<keyword>" num,...,num

        entries = []

        if value:
          for entry in value.split(","):
            try:
              # Values should be positive but as discussed in ticket #5849
              # there was a bug around this. It was fixed in tor 0.2.2.1.

              entries.append(float(entry))
            except ValueError:
              if validate:
                raise ValueError("Non-numeric entry in %s listing: %s" % (keyword, line))

        if keyword == "cell-processed-cells":
          self.cell_processed_cells = entries
        elif keyword == "cell-queued-cells":
          self.cell_queued_cells = entries
        elif keyword == "cell-time-in-queue":
          self.cell_time_in_queue = entries
      elif keyword in ("published", "geoip-start-time"):
        # "<keyword>" YYYY-MM-DD HH:MM:SS

        try:
          timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")

          if keyword == "published":
            self.published = timestamp
          elif keyword == "geoip-start-time":
            self.geoip_start_time = timestamp
        except ValueError:
          if validate:
            raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line))
      elif keyword in ("cell-stats-end", "entry-stats-end", "exit-stats-end", "bridge-stats-end", "dirreq-stats-end"):
        # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)

        try:
          timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)

          if keyword == "cell-stats-end":
            self.cell_stats_end = timestamp
            self.cell_stats_interval = interval
          elif keyword == "entry-stats-end":
            self.entry_stats_end = timestamp
            self.entry_stats_interval = interval
          elif keyword == "exit-stats-end":
            self.exit_stats_end = timestamp
            self.exit_stats_interval = interval
          elif keyword == "bridge-stats-end":
            self.bridge_stats_end = timestamp
            self.bridge_stats_interval = interval
          elif keyword == "dirreq-stats-end":
            self.dir_stats_end = timestamp
            self.dir_stats_interval = interval
        except ValueError as exc:
          if validate:
            raise exc
      elif keyword == "conn-bi-direct":
        # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH

        try:
          timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
          stats = remainder.split(",")

          if len(stats) != 4 or not \
            (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
            raise ValueError("conn-bi-direct line should end with four numeric values: %s" % line)

          self.conn_bi_direct_end = timestamp
          self.conn_bi_direct_interval = interval
          self.conn_bi_direct_below = int(stats[0])
          self.conn_bi_direct_read = int(stats[1])
          self.conn_bi_direct_write = int(stats[2])
          self.conn_bi_direct_both = int(stats[3])
        except ValueError as exc:
          if validate:
            raise exc
      elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"):
        # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
        try:
          timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
          history_values = []

          if remainder:
            try:
              history_values = [int(entry) for entry in remainder.split(",")]
            except ValueError:
              raise ValueError("%s line has non-numeric values: %s" % (keyword, line))

          if keyword == "read-history":
            self.read_history_end = timestamp
            self.read_history_interval = interval
            self.read_history_values = history_values
          elif keyword == "write-history":
            self.write_history_end = timestamp
            self.write_history_interval = interval
            self.write_history_values = history_values
          elif keyword == "dirreq-read-history":
            self.dir_read_history_end = timestamp
            self.dir_read_history_interval = interval
            self.dir_read_history_values = history_values
          elif keyword == "dirreq-write-history":
            self.dir_write_history_end = timestamp
            self.dir_write_history_interval = interval
            self.dir_write_history_values = history_values
        except ValueError as exc:
          if validate:
            raise exc
      elif keyword in ("exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened"):
        # "<keyword>" port=N,port=N,...

        port_mappings = {}
        error_msg = "Entries in %s line should only be PORT=N entries: %s" % (keyword, line)

        if value:
          for entry in value.split(","):
            if not "=" in entry:
              if validate:
                raise ValueError(error_msg)
              else:
                continue

            port, stat = entry.split("=", 1)

            if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
              if port != 'other':
                port = int(port)
              port_mappings[port] = int(stat)
            elif validate:
              raise ValueError(error_msg)

        if keyword == "exit-kibibytes-written":
          self.exit_kibibytes_written = port_mappings
        elif keyword == "exit-kibibytes-read":
          self.exit_kibibytes_read = port_mappings
        elif keyword == "exit-streams-opened":
          self.exit_streams_opened = port_mappings
      elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "entry-ips", "bridge-ips"):
        # "<keyword>" CC=N,CC=N,...
        #
        # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
        # locale codes for some special values, for instance...
        #   A1,"Anonymous Proxy"
        #   A2,"Satellite Provider"
        #   ??,"Unknown"

        locale_usage = {}
        error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)

        if value:
          for entry in value.split(","):
            if not "=" in entry:
              if validate:
                raise ValueError(error_msg)
              else:
                continue

            locale, count = entry.split("=", 1)

            if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit():
              locale_usage[locale] = int(count)
            elif validate:
              raise ValueError(error_msg)

        if keyword == "dirreq-v2-ips":
          self.dir_v2_ips = locale_usage
        elif keyword == "dirreq-v3-ips":
          self.dir_v3_ips = locale_usage
        elif keyword == "dirreq-v2-reqs":
          self.dir_v2_requests = locale_usage
        elif keyword == "dirreq-v3-reqs":
          self.dir_v3_requests = locale_usage
        elif keyword == "geoip-client-origins":
          self.geoip_client_origins = locale_usage
        elif keyword == "entry-ips":
          self.entry_ips = locale_usage
        elif keyword == "bridge-ips":
          self.bridge_ips = locale_usage
      elif keyword == "bridge-ip-versions":
        self.ip_versions = {}

        if value:
          for entry in value.split(','):
            if not '=' in entry:
              raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)

            protocol, count = entry.split('=', 1)

            if not count.isdigit():
              raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, line))

            self.ip_versions[protocol] = int(count)
      elif keyword == "bridge-ip-transports":
        self.ip_transports = {}

        if value:
          for entry in value.split(','):
            if not '=' in entry:
              raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)

            protocol, count = entry.split('=', 1)

            if not count.isdigit():
              raise stem.ProtocolError("Transport count was non-numeric (%s): %s" % (count, line))

            self.ip_transports[protocol] = int(count)
      else:
        self._unrecognized_lines.append(line)

  def digest(self):
    """
    Provides the upper-case hex encoded sha1 of our content. This value is part
    of the server descriptor entry for this relay.

    :returns: **str** with the upper-case hex digest value for this server
      descriptor
    """

    raise NotImplementedError("Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass")

  def _required_fields(self):
    return REQUIRED_FIELDS

  def _first_keyword(self):
    return "extra-info"

  def _last_keyword(self):
    return "router-signature"


class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
  """
  Relay extra-info descriptor, constructed from data such as that provided by
  "GETINFO extra-info/digest/\*", cached descriptors, and metrics
  (`specification <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).

  :var str signature: **\*** signature for this extrainfo descriptor

  **\*** attribute is required when we're parsed with validation
  """

  def __init__(self, raw_contents, validate = True):
    self.signature = None

    super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate)

  @lru_cache()
  def digest(self):
    # our digest is calculated from everything except our signature
    raw_content, ending = str(self), "\nrouter-signature\n"
    raw_content = raw_content[:raw_content.find(ending) + len(ending)]
    return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()

  def _parse(self, entries, validate):
    entries = dict(entries)  # shallow copy since we're destructive

    # handles fields only in server descriptors
    for keyword, values in entries.items():
      value, block_contents = values[0]

      line = "%s %s" % (keyword, value)  # original line

      if block_contents:
        line += "\n%s" % block_contents

      if keyword == "router-signature":
        if validate and not block_contents:
          raise ValueError("Router signature line must be followed by a signature block: %s" % line)

        self.signature = block_contents
        del entries["router-signature"]

    ExtraInfoDescriptor._parse(self, entries, validate)


class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
  """
  Bridge extra-info descriptor (`bridge descriptor specification
  <https://metrics.torproject.org/formats.html#bridgedesc>`_)
  """

  def __init__(self, raw_contents, validate = True):
    self._digest = None

    super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate)

  def digest(self):
    return self._digest

  def _parse(self, entries, validate):
    entries = dict(entries)  # shallow copy since we're destructive

    # handles fields only in server descriptors
    for keyword, values in entries.items():
      value, _ = values[0]
      line = "%s %s" % (keyword, value)  # original line

      if keyword == "router-digest":
        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
          raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)

        self._digest = value
        del entries["router-digest"]

    ExtraInfoDescriptor._parse(self, entries, validate)

  def _required_fields(self):
    excluded_fields = [
      "router-signature",
    ]

    included_fields = [
      "router-digest",
    ]

    return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])

  def _last_keyword(self):
    return None