# Copyright 2012-2013, Damian Johnson and The Tor Project # See LICENSE for licensing information """ Parsing for Tor extra-info descriptors. These are published by relays whenever their server descriptor is published and have a similar format. However, unlike server descriptors these don't contain information that Tor clients require to function and as such aren't fetched by default. Defined in section 2.2 of the `dir-spec `_, extra-info descriptors contain interesting but non-vital information such as usage statistics. Tor clients cannot request these documents for bridges. Extra-info descriptors are available from a few sources... * if you have 'DownloadExtraInfo 1' in your torrc... * control port via 'GETINFO extra-info/digest/\*' queries * the 'cached-extrainfo' file in tor's data directory * tor metrics, at https://metrics.torproject.org/data.html * directory authorities and mirrors via their DirPort **Module Overview:** :: ExtraInfoDescriptor - Tor extra-info descriptor. | |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay. | +- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge. | |- digest - calculates the upper-case hex digest value for our content +- get_unrecognized_lines - lines with unrecognized content .. data:: DirResponse (enum) Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses. =================== =========== DirResponse Description =================== =========== **OK** network status requests that were answered **NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities **UNAVAILABLE** requested network status was unavailable **NOT_FOUND** requested network status was not found **NOT_MODIFIED** network status unmodified since If-Modified-Since time **BUSY** directory was busy =================== =========== .. data:: DirStat (enum) Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and dir_*_tunneled_dl. ===================== =========== DirStat Description ===================== =========== **COMPLETE** requests that completed successfully **TIMEOUT** requests that didn't complete within a ten minute timeout **RUNNING** requests still in process when measurement's taken **MIN** smallest rate at which a descriptor was downloaded in B/s **MAX** largest rate at which a descriptor was downloaded in B/s **D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s **Q1** and **Q3** rate of the slowest and fastest quarter download rates in B/s **MD** median download rate in B/s ===================== =========== """ import datetime import hashlib import re import stem.util.connection import stem.util.enum import stem.util.str_tools from stem.descriptor import ( PGP_BLOCK_END, Descriptor, _read_until_keywords, _get_descriptor_components, ) try: # added in python 3.2 from functools import lru_cache except ImportError: from stem.util.lru_cache import lru_cache # known statuses for dirreq-v2-resp and dirreq-v3-resp... DirResponse = stem.util.enum.Enum( ("OK", "ok"), ("NOT_ENOUGH_SIGS", "not-enough-sigs"), ("UNAVAILABLE", "unavailable"), ("NOT_FOUND", "not-found"), ("NOT_MODIFIED", "not-modified"), ("BUSY", "busy"), ) # known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl... dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md'] dir_stats += ['d%i' % i for i in range(1, 5)] dir_stats += ['d%i' % i for i in range(6, 10)] DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats]) # relay descriptors must have exactly one of the following REQUIRED_FIELDS = ( "extra-info", "published", "router-signature", ) # optional entries that can appear at most once SINGLE_FIELDS = ( "read-history", "write-history", "geoip-db-digest", "geoip6-db-digest", "bridge-stats-end", "bridge-ips", "dirreq-stats-end", "dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "dirreq-v2-share", "dirreq-v3-share", "dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl", "dirreq-read-history", "dirreq-write-history", "entry-stats-end", "entry-ips", "cell-stats-end", "cell-processed-cells", "cell-queued-cells", "cell-time-in-queue", "cell-circuits-per-decile", "conn-bi-direct", "exit-stats-end", "exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened", ) def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs): """ Iterates over the extra-info descriptors in a file. :param file descriptor_file: file with descriptor content :param bool is_bridge: parses the file as being a bridge descriptor :param bool validate: checks the validity of the descriptor's content if **True**, skips these checks otherwise :param dict kwargs: additional arguments for the descriptor constructor :returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor` instances in the file :raises: * **ValueError** if the contents is malformed and validate is **True** * **IOError** if the file can't be read """ while True: extrainfo_content = _read_until_keywords("router-signature", descriptor_file) # we've reached the 'router-signature', now include the pgp style block block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True) if extrainfo_content: if is_bridge: yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs) else: yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs) else: break # done parsing file def _parse_timestamp_and_interval(keyword, content): """ Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry. :param str keyword: line's keyword :param str content: line content to be parsed :returns: **tuple** of the form (timestamp (**datetime**), interval (**int**), remaining content (**str**)) :raises: **ValueError** if the content is malformed """ line = "%s %s" % (keyword, content) content_match = re.match("^(.*) \(([0-9]+) s\)( .*)?$", content) if not content_match: raise ValueError("Malformed %s line: %s" % (keyword, line)) timestamp_str, interval, remainder = content_match.groups() if remainder: remainder = remainder[1:] # remove leading space if not interval.isdigit(): raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line)) try: timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S") return timestamp, int(interval), remainder except ValueError: raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line)) class ExtraInfoDescriptor(Descriptor): """ Extra-info descriptor document. :var str nickname: **\*** relay's nickname :var str fingerprint: **\*** identity key fingerprint :var datetime published: **\*** time in UTC when this descriptor was made :var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses :var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses :var dict transport: **\*** mapping of transport methods to their (address, port, args) tuple, these usually appear on bridges in which case all of those are **None** **Bi-directional connection usage:** :var datetime conn_bi_direct_end: end of the sampling interval :var int conn_bi_direct_interval: seconds per interval :var int conn_bi_direct_below: connections that read/wrote less than 20 KiB :var int conn_bi_direct_read: connections that read at least 10x more than wrote :var int conn_bi_direct_write: connections that wrote at least 10x more than read :var int conn_bi_direct_both: remaining connections **Bytes read/written for relayed traffic:** :var datetime read_history_end: end of the sampling interval :var int read_history_interval: seconds per interval :var list read_history_values: bytes read during each interval :var datetime write_history_end: end of the sampling interval :var int write_history_interval: seconds per interval :var list write_history_values: bytes written during each interval **Cell relaying statistics:** :var datetime cell_stats_end: end of the period when stats were gathered :var int cell_stats_interval: length in seconds of the interval :var list cell_processed_cells: measurement of processed cells per circuit :var list cell_queued_cells: measurement of queued cells per circuit :var list cell_time_in_queue: mean enqueued time in milliseconds for cells :var int cell_circuits_per_decile: mean number of circuits in a decile **Directory Mirror Attributes:** :var datetime dir_stats_end: end of the period when stats were gathered :var int dir_stats_interval: length in seconds of the interval :var dict dir_v2_ips: mapping of locales to rounded count of requester ips :var dict dir_v3_ips: mapping of locales to rounded count of requester ips :var float dir_v2_share: percent of total directory traffic it expects to serve :var float dir_v3_share: percent of total directory traffic it expects to serve :var dict dir_v2_requests: mapping of locales to rounded count of requests :var dict dir_v3_requests: mapping of locales to rounded count of requests :var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count :var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count :var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count :var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count :var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort :var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort :var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement :var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement :var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort :var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort :var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement :var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement **Bytes read/written for directory mirroring:** :var datetime dir_read_history_end: end of the sampling interval :var int dir_read_history_interval: seconds per interval :var list dir_read_history_values: bytes read during each interval :var datetime dir_write_history_end: end of the sampling interval :var int dir_write_history_interval: seconds per interval :var list dir_write_history_values: bytes read during each interval **Guard Attributes:** :var datetime entry_stats_end: end of the period when stats were gathered :var int entry_stats_interval: length in seconds of the interval :var dict entry_ips: mapping of locales to rounded count of unique user ips **Exit Attributes:** :var datetime exit_stats_end: end of the period when stats were gathered :var int exit_stats_interval: length in seconds of the interval :var dict exit_kibibytes_written: traffic per port (keys are ints or 'other') :var dict exit_kibibytes_read: traffic per port (keys are ints or 'other') :var dict exit_streams_opened: streams per port (keys are ints or 'other') **Bridge Attributes:** :var datetime bridge_stats_end: end of the period when stats were gathered :var int bridge_stats_interval: length in seconds of the interval :var dict bridge_ips: mapping of locales to rounded count of unique user ips :var datetime geoip_start_time: replaced by bridge_stats_end (deprecated) :var dict geoip_client_origins: replaced by bridge_ips (deprecated) :var dict ip_versions: mapping of ip protocols to a rounded count for the number of users :var dict ip_versions: mapping of ip transports to a count for the number of users **\*** attribute is either required when we're parsed with validation or has a default value, others are left as **None** if undefined """ def __init__(self, raw_contents, validate = True): """ Extra-info descriptor constructor. By default this validates the descriptor's content as it's parsed. This validation can be disabled to either improve performance or be accepting of malformed data. :param str raw_contents: extra-info content provided by the relay :param bool validate: checks the validity of the extra-info descriptor if **True**, skips these checks otherwise :raises: **ValueError** if the contents is malformed and validate is True """ super(ExtraInfoDescriptor, self).__init__(raw_contents) raw_contents = stem.util.str_tools._to_unicode(raw_contents) self.nickname = None self.fingerprint = None self.published = None self.geoip_db_digest = None self.geoip6_db_digest = None self.transport = {} self.conn_bi_direct_end = None self.conn_bi_direct_interval = None self.conn_bi_direct_below = None self.conn_bi_direct_read = None self.conn_bi_direct_write = None self.conn_bi_direct_both = None self.read_history_end = None self.read_history_interval = None self.read_history_values = None self.write_history_end = None self.write_history_interval = None self.write_history_values = None self.cell_stats_end = None self.cell_stats_interval = None self.cell_processed_cells = None self.cell_queued_cells = None self.cell_time_in_queue = None self.cell_circuits_per_decile = None self.dir_stats_end = None self.dir_stats_interval = None self.dir_v2_ips = None self.dir_v3_ips = None self.dir_v2_share = None self.dir_v3_share = None self.dir_v2_requests = None self.dir_v3_requests = None self.dir_v2_responses = None self.dir_v3_responses = None self.dir_v2_responses_unknown = None self.dir_v3_responses_unknown = None self.dir_v2_direct_dl = None self.dir_v3_direct_dl = None self.dir_v2_direct_dl_unknown = None self.dir_v3_direct_dl_unknown = None self.dir_v2_tunneled_dl = None self.dir_v3_tunneled_dl = None self.dir_v2_tunneled_dl_unknown = None self.dir_v3_tunneled_dl_unknown = None self.dir_read_history_end = None self.dir_read_history_interval = None self.dir_read_history_values = None self.dir_write_history_end = None self.dir_write_history_interval = None self.dir_write_history_values = None self.entry_stats_end = None self.entry_stats_interval = None self.entry_ips = None self.exit_stats_end = None self.exit_stats_interval = None self.exit_kibibytes_written = None self.exit_kibibytes_read = None self.exit_streams_opened = None self.bridge_stats_end = None self.bridge_stats_interval = None self.bridge_ips = None self.geoip_start_time = None self.geoip_client_origins = None self.ip_versions = None self.ip_transports = None self._unrecognized_lines = [] entries = _get_descriptor_components(raw_contents, validate) if validate: for keyword in self._required_fields(): if not keyword in entries: raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword) for keyword in self._required_fields() + SINGLE_FIELDS: if keyword in entries and len(entries[keyword]) > 1: raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword) expected_first_keyword = self._first_keyword() if expected_first_keyword and expected_first_keyword != entries.keys()[0]: raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword) expected_last_keyword = self._last_keyword() if expected_last_keyword and expected_last_keyword != entries.keys()[-1]: raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword) self._parse(entries, validate) def get_unrecognized_lines(self): return list(self._unrecognized_lines) def _parse(self, entries, validate): """ Parses a series of 'keyword => (value, pgp block)' mappings and applies them as attributes. :param dict entries: descriptor contents to be applied :param bool validate: checks the validity of descriptor content if True :raises: **ValueError** if an error occurs in validation """ for keyword, values in entries.items(): # most just work with the first (and only) value value, _ = values[0] line = "%s %s" % (keyword, value) # original line if keyword == "extra-info": # "extra-info" Nickname Fingerprint extra_info_comp = value.split() if len(extra_info_comp) < 2: if not validate: continue raise ValueError("Extra-info line must have two values: %s" % line) if validate: if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]): raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0]) elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]): raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % extra_info_comp[1]) self.nickname = extra_info_comp[0] self.fingerprint = extra_info_comp[1] elif keyword == "geoip-db-digest": # "geoip-db-digest" Digest if validate and not stem.util.tor_tools.is_hex_digits(value, 40): raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line) self.geoip_db_digest = value elif keyword == "geoip6-db-digest": # "geoip6-db-digest" Digest if validate and not stem.util.tor_tools.is_hex_digits(value, 40): raise ValueError("Geoip v6 digest line had an invalid sha1 digest: %s" % line) self.geoip6_db_digest = value elif keyword == "transport": # "transport" transportname address:port [arglist] # Everything after the transportname is scrubbed in published bridge # descriptors, so we'll never see it in practice. # # These entries really only make sense for bridges, but have been seen # on non-bridges in the wild when the relay operator configured it this # way. for transport_value, _ in values: name, address, port, args = None, None, None, None if not ' ' in transport_value: # scrubbed name = transport_value else: # not scrubbed value_comp = transport_value.split() if len(value_comp) < 1: raise ValueError("Transport line is missing its transport name: %s" % line) else: name = value_comp[0] if len(value_comp) < 2: raise ValueError("Transport line is missing its address:port value: %s" % line) elif not ":" in value_comp[1]: raise ValueError("Transport line's address:port entry is missing a colon: %s" % line) else: address, port_str = value_comp[1].split(':', 1) if not stem.util.connection.is_valid_ipv4_address(address) or \ stem.util.connection.is_valid_ipv6_address(address): raise ValueError("Transport line has a malformed address: %s" % line) elif not stem.util.connection.is_valid_port(port_str): raise ValueError("Transport line has a malformed port: %s" % line) port = int(port_str) if len(value_comp) >= 3: args = value_comp[2:] else: args = [] self.transport[name] = (address, port, args) elif keyword == "cell-circuits-per-decile": # "cell-circuits-per-decile" num if not value.isdigit(): if validate: raise ValueError("Non-numeric cell-circuits-per-decile value: %s" % line) else: continue stat = int(value) if validate and stat < 0: raise ValueError("Negative cell-circuits-per-decile value: %s" % line) self.cell_circuits_per_decile = stat elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"): recognized_counts = {} unrecognized_counts = {} is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp") key_set = DirResponse if is_response_stats else DirStat key_type = "STATUS" if is_response_stats else "STAT" error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line) if value: for entry in value.split(","): if not "=" in entry: if validate: raise ValueError(error_msg) else: continue status, count = entry.split("=", 1) if count.isdigit(): if status in key_set: recognized_counts[status] = int(count) else: unrecognized_counts[status] = int(count) elif validate: raise ValueError(error_msg) if keyword == "dirreq-v2-resp": self.dir_v2_responses = recognized_counts self.dir_v2_responses_unknown = unrecognized_counts elif keyword == "dirreq-v3-resp": self.dir_v3_responses = recognized_counts self.dir_v3_responses_unknown = unrecognized_counts elif keyword == "dirreq-v2-direct-dl": self.dir_v2_direct_dl = recognized_counts self.dir_v2_direct_dl_unknown = unrecognized_counts elif keyword == "dirreq-v3-direct-dl": self.dir_v3_direct_dl = recognized_counts self.dir_v3_direct_dl_unknown = unrecognized_counts elif keyword == "dirreq-v2-tunneled-dl": self.dir_v2_tunneled_dl = recognized_counts self.dir_v2_tunneled_dl_unknown = unrecognized_counts elif keyword == "dirreq-v3-tunneled-dl": self.dir_v3_tunneled_dl = recognized_counts self.dir_v3_tunneled_dl_unknown = unrecognized_counts elif keyword in ("dirreq-v2-share", "dirreq-v3-share"): # "" num% try: if not value.endswith("%"): raise ValueError() percentage = float(value[:-1]) / 100 # Bug lets these be above 100%, however they're soon going away... # https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html if validate and percentage < 0: raise ValueError("Negative percentage value: %s" % line) if keyword == "dirreq-v2-share": self.dir_v2_share = percentage elif keyword == "dirreq-v3-share": self.dir_v3_share = percentage except ValueError as exc: if validate: raise ValueError("Value can't be parsed as a percentage: %s" % line) elif keyword in ("cell-processed-cells", "cell-queued-cells", "cell-time-in-queue"): # "" num,...,num entries = [] if value: for entry in value.split(","): try: # Values should be positive but as discussed in ticket #5849 # there was a bug around this. It was fixed in tor 0.2.2.1. entries.append(float(entry)) except ValueError: if validate: raise ValueError("Non-numeric entry in %s listing: %s" % (keyword, line)) if keyword == "cell-processed-cells": self.cell_processed_cells = entries elif keyword == "cell-queued-cells": self.cell_queued_cells = entries elif keyword == "cell-time-in-queue": self.cell_time_in_queue = entries elif keyword in ("published", "geoip-start-time"): # "" YYYY-MM-DD HH:MM:SS try: timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") if keyword == "published": self.published = timestamp elif keyword == "geoip-start-time": self.geoip_start_time = timestamp except ValueError: if validate: raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line)) elif keyword in ("cell-stats-end", "entry-stats-end", "exit-stats-end", "bridge-stats-end", "dirreq-stats-end"): # "" YYYY-MM-DD HH:MM:SS (NSEC s) try: timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value) if keyword == "cell-stats-end": self.cell_stats_end = timestamp self.cell_stats_interval = interval elif keyword == "entry-stats-end": self.entry_stats_end = timestamp self.entry_stats_interval = interval elif keyword == "exit-stats-end": self.exit_stats_end = timestamp self.exit_stats_interval = interval elif keyword == "bridge-stats-end": self.bridge_stats_end = timestamp self.bridge_stats_interval = interval elif keyword == "dirreq-stats-end": self.dir_stats_end = timestamp self.dir_stats_interval = interval except ValueError as exc: if validate: raise exc elif keyword == "conn-bi-direct": # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH try: timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) stats = remainder.split(",") if len(stats) != 4 or not \ (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()): raise ValueError("conn-bi-direct line should end with four numeric values: %s" % line) self.conn_bi_direct_end = timestamp self.conn_bi_direct_interval = interval self.conn_bi_direct_below = int(stats[0]) self.conn_bi_direct_read = int(stats[1]) self.conn_bi_direct_write = int(stats[2]) self.conn_bi_direct_both = int(stats[3]) except ValueError as exc: if validate: raise exc elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"): # "" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... try: timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) history_values = [] if remainder: try: history_values = [int(entry) for entry in remainder.split(",")] except ValueError: raise ValueError("%s line has non-numeric values: %s" % (keyword, line)) if keyword == "read-history": self.read_history_end = timestamp self.read_history_interval = interval self.read_history_values = history_values elif keyword == "write-history": self.write_history_end = timestamp self.write_history_interval = interval self.write_history_values = history_values elif keyword == "dirreq-read-history": self.dir_read_history_end = timestamp self.dir_read_history_interval = interval self.dir_read_history_values = history_values elif keyword == "dirreq-write-history": self.dir_write_history_end = timestamp self.dir_write_history_interval = interval self.dir_write_history_values = history_values except ValueError as exc: if validate: raise exc elif keyword in ("exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened"): # "" port=N,port=N,... port_mappings = {} error_msg = "Entries in %s line should only be PORT=N entries: %s" % (keyword, line) if value: for entry in value.split(","): if not "=" in entry: if validate: raise ValueError(error_msg) else: continue port, stat = entry.split("=", 1) if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit(): if port != 'other': port = int(port) port_mappings[port] = int(stat) elif validate: raise ValueError(error_msg) if keyword == "exit-kibibytes-written": self.exit_kibibytes_written = port_mappings elif keyword == "exit-kibibytes-read": self.exit_kibibytes_read = port_mappings elif keyword == "exit-streams-opened": self.exit_streams_opened = port_mappings elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "entry-ips", "bridge-ips"): # "" CC=N,CC=N,... # # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric # locale codes for some special values, for instance... # A1,"Anonymous Proxy" # A2,"Satellite Provider" # ??,"Unknown" locale_usage = {} error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line) if value: for entry in value.split(","): if not "=" in entry: if validate: raise ValueError(error_msg) else: continue locale, count = entry.split("=", 1) if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit(): locale_usage[locale] = int(count) elif validate: raise ValueError(error_msg) if keyword == "dirreq-v2-ips": self.dir_v2_ips = locale_usage elif keyword == "dirreq-v3-ips": self.dir_v3_ips = locale_usage elif keyword == "dirreq-v2-reqs": self.dir_v2_requests = locale_usage elif keyword == "dirreq-v3-reqs": self.dir_v3_requests = locale_usage elif keyword == "geoip-client-origins": self.geoip_client_origins = locale_usage elif keyword == "entry-ips": self.entry_ips = locale_usage elif keyword == "bridge-ips": self.bridge_ips = locale_usage elif keyword == "bridge-ip-versions": self.ip_versions = {} if value: for entry in value.split(','): if not '=' in entry: raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '=' mappings: %s" % line) protocol, count = entry.split('=', 1) if not count.isdigit(): raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, line)) self.ip_versions[protocol] = int(count) elif keyword == "bridge-ip-transports": self.ip_transports = {} if value: for entry in value.split(','): if not '=' in entry: raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '=' mappings: %s" % line) protocol, count = entry.split('=', 1) if not count.isdigit(): raise stem.ProtocolError("Transport count was non-numeric (%s): %s" % (count, line)) self.ip_transports[protocol] = int(count) else: self._unrecognized_lines.append(line) def digest(self): """ Provides the upper-case hex encoded sha1 of our content. This value is part of the server descriptor entry for this relay. :returns: **str** with the upper-case hex digest value for this server descriptor """ raise NotImplementedError("Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass") def _required_fields(self): return REQUIRED_FIELDS def _first_keyword(self): return "extra-info" def _last_keyword(self): return "router-signature" class RelayExtraInfoDescriptor(ExtraInfoDescriptor): """ Relay extra-info descriptor, constructed from data such as that provided by "GETINFO extra-info/digest/\*", cached descriptors, and metrics (`specification `_). :var str signature: **\*** signature for this extrainfo descriptor **\*** attribute is required when we're parsed with validation """ def __init__(self, raw_contents, validate = True): self.signature = None super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate) @lru_cache() def digest(self): # our digest is calculated from everything except our signature raw_content, ending = str(self), "\nrouter-signature\n" raw_content = raw_content[:raw_content.find(ending) + len(ending)] return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper() def _parse(self, entries, validate): entries = dict(entries) # shallow copy since we're destructive # handles fields only in server descriptors for keyword, values in entries.items(): value, block_contents = values[0] line = "%s %s" % (keyword, value) # original line if block_contents: line += "\n%s" % block_contents if keyword == "router-signature": if validate and not block_contents: raise ValueError("Router signature line must be followed by a signature block: %s" % line) self.signature = block_contents del entries["router-signature"] ExtraInfoDescriptor._parse(self, entries, validate) class BridgeExtraInfoDescriptor(ExtraInfoDescriptor): """ Bridge extra-info descriptor (`bridge descriptor specification `_) """ def __init__(self, raw_contents, validate = True): self._digest = None super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate) def digest(self): return self._digest def _parse(self, entries, validate): entries = dict(entries) # shallow copy since we're destructive # handles fields only in server descriptors for keyword, values in entries.items(): value, _ = values[0] line = "%s %s" % (keyword, value) # original line if keyword == "router-digest": if validate and not stem.util.tor_tools.is_hex_digits(value, 40): raise ValueError("Router digest line had an invalid sha1 digest: %s" % line) self._digest = value del entries["router-digest"] ExtraInfoDescriptor._parse(self, entries, validate) def _required_fields(self): excluded_fields = [ "router-signature", ] included_fields = [ "router-digest", ] return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields]) def _last_keyword(self): return None