onionshare/lib/stem/descriptor/networkstatus.py
2014-05-21 14:09:41 -04:00

1476 lines
54 KiB
Python

# Copyright 2012-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Parsing for Tor network status documents. This supports both the v2 and v3
dir-spec. Documents can be obtained from a few sources...
* the 'cached-consensus' file in tor's data directory
* tor metrics, at https://metrics.torproject.org/data.html
* directory authorities and mirrors via their DirPort
... and contain the following sections...
* document header
* list of :class:`stem.descriptor.networkstatus.DirectoryAuthority`
* list of :class:`stem.descriptor.router_status_entry.RouterStatusEntry`
* document footer
Of these, the router status entry section can be quite large (on the order of
hundreds of kilobytes). As such we provide a couple of methods for reading
network status documents through :func:`~stem.descriptor.__init__.parse_file`.
For more information see :func:`~stem.descriptor.__init__.DocumentHandler`...
::
from stem.descriptor import parse_file, DocumentHandler
with open('.tor/cached-consensus', 'rb') as consensus_file:
# Processes the routers as we read them in. The routers refer to a document
# with an unset 'routers' attribute.
for router in parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler = DocumentHandler.ENTRIES):
print router.nickname
**Module Overview:**
::
NetworkStatusDocument - Network status document
|- NetworkStatusDocumentV2 - Version 2 network status document
|- NetworkStatusDocumentV3 - Version 3 network status document
+- BridgeNetworkStatusDocument - Version 3 network status document for bridges
KeyCertificate - Certificate used to authenticate an authority
DocumentSignature - Signature of a document by a directory authority
DirectoryAuthority - Directory authority as defined in a v3 network status document
"""
import datetime
import io
import stem.descriptor.router_status_entry
import stem.util.str_tools
import stem.util.tor_tools
import stem.version
from stem.descriptor import (
PGP_BLOCK_END,
Descriptor,
DocumentHandler,
_get_descriptor_components,
_read_until_keywords,
)
# Version 2 network status document fields, tuples of the form...
# (keyword, is_mandatory)
NETWORK_STATUS_V2_FIELDS = (
("network-status-version", True),
("dir-source", True),
("fingerprint", True),
("contact", True),
("dir-signing-key", True),
("client-versions", False),
("server-versions", False),
("published", True),
("dir-options", False),
("directory-signature", True),
)
# Network status document are either a 'vote' or 'consensus', with different
# mandatory fields for each. Both though require that their fields appear in a
# specific order. This is an ordered listing of the following...
#
# (field, in_votes, in_consensus, is_mandatory)
HEADER_STATUS_DOCUMENT_FIELDS = (
("network-status-version", True, True, True),
("vote-status", True, True, True),
("consensus-methods", True, False, False),
("consensus-method", False, True, False),
("published", True, False, True),
("valid-after", True, True, True),
("fresh-until", True, True, True),
("valid-until", True, True, True),
("voting-delay", True, True, True),
("client-versions", True, True, False),
("server-versions", True, True, False),
("known-flags", True, True, True),
("flag-thresholds", True, False, False),
("params", True, True, False),
)
FOOTER_STATUS_DOCUMENT_FIELDS = (
("directory-footer", True, True, False),
("bandwidth-weights", False, True, False),
("directory-signature", True, True, True),
)
HEADER_FIELDS = [attr[0] for attr in HEADER_STATUS_DOCUMENT_FIELDS]
FOOTER_FIELDS = [attr[0] for attr in FOOTER_STATUS_DOCUMENT_FIELDS]
AUTH_START = "dir-source"
ROUTERS_START = "r"
FOOTER_START = "directory-footer"
V2_FOOTER_START = "directory-signature"
DEFAULT_PARAMS = {
"bwweightscale": 10000,
"cbtdisabled": 0,
"cbtnummodes": 3,
"cbtrecentcount": 20,
"cbtmaxtimeouts": 18,
"cbtmincircs": 100,
"cbtquantile": 80,
"cbtclosequantile": 95,
"cbttestfreq": 60,
"cbtmintimeout": 2000,
"cbtinitialtimeout": 60000,
"Support022HiddenServices": 1,
}
# KeyCertificate fields, tuple is of the form...
# (keyword, is_mandatory)
KEY_CERTIFICATE_PARAMS = (
('dir-key-certificate-version', True),
('dir-address', False),
('fingerprint', True),
('dir-identity-key', True),
('dir-key-published', True),
('dir-key-expires', True),
('dir-signing-key', True),
('dir-key-crosscert', False),
('dir-key-certification', True),
)
def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES, **kwargs):
"""
Parses a network status and iterates over the RouterStatusEntry in it. The
document that these instances reference have an empty 'routers' attribute to
allow for limited memory usage.
:param file document_file: file with network status document content
:param class document_type: NetworkStatusDocument subclass
:param bool validate: checks the validity of the document's contents if
**True**, skips these checks otherwise
:param bool is_microdescriptor: **True** if this is for a microdescriptor
consensus, **False** otherwise
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:param dict kwargs: additional arguments for the descriptor constructor
:returns: :class:`stem.descriptor.networkstatus.NetworkStatusDocument` object
:raises:
* **ValueError** if the document_version is unrecognized or the contents is
malformed and validate is **True**
* **IOError** if the file can't be read
"""
# we can't properly default this since NetworkStatusDocumentV3 isn't defined yet
if document_type is None:
document_type = NetworkStatusDocumentV3
if document_type == NetworkStatusDocumentV2:
document_type = NetworkStatusDocumentV2
router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2
elif document_type == NetworkStatusDocumentV3:
if not is_microdescriptor:
router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
else:
router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3
elif document_type == BridgeNetworkStatusDocument:
document_type = BridgeNetworkStatusDocument
router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2
else:
raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type)
if document_handler == DocumentHandler.DOCUMENT:
yield document_type(document_file.read(), validate, **kwargs)
return
# getting the document without the routers section
header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
routers_start = document_file.tell()
_read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
routers_end = document_file.tell()
footer = document_file.readlines()
document_content = bytes.join(b"", header + footer)
if document_handler == DocumentHandler.BARE_DOCUMENT:
yield document_type(document_content, validate, **kwargs)
elif document_handler == DocumentHandler.ENTRIES:
desc_iterator = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
entry_class = router_type,
entry_keyword = ROUTERS_START,
start_position = routers_start,
end_position = routers_end,
extra_args = (document_type(document_content, validate),),
**kwargs
)
for desc in desc_iterator:
yield desc
else:
raise ValueError("Unrecognized document_handler: %s" % document_handler)
def _parse_file_key_certs(certificate_file, validate = True):
"""
Parses a file containing one or more authority key certificates.
:param file certificate_file: file with key certificates
:param bool validate: checks the validity of the certificate's contents if
**True**, skips these checks otherwise
:returns: iterator for :class:`stem.descriptor.networkstatus.KeyCertificate`
instance in the file
:raises:
* **ValueError** if the key certificate content is invalid and validate is
**True**
* **IOError** if the file can't be read
"""
while True:
keycert_content = _read_until_keywords("dir-key-certification", certificate_file)
# we've reached the 'router-signature', now include the pgp style block
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
keycert_content += _read_until_keywords(block_end_prefix, certificate_file, True)
if keycert_content:
yield stem.descriptor.networkstatus.KeyCertificate(bytes.join(b"", keycert_content), validate = validate)
else:
break # done parsing file
class NetworkStatusDocument(Descriptor):
"""
Common parent for network status documents.
"""
def __init__(self, raw_content):
super(NetworkStatusDocument, self).__init__(raw_content)
self._unrecognized_lines = []
def get_unrecognized_lines(self):
return list(self._unrecognized_lines)
class NetworkStatusDocumentV2(NetworkStatusDocument):
"""
Version 2 network status document. These have been deprecated and are no
longer generated by Tor.
:var dict routers: fingerprints to :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2`
contained in the document
:var int version: **\*** document version
:var str hostname: **\*** hostname of the authority
:var str address: **\*** authority's IP address
:var int dir_port: **\*** authority's DirPort
:var str fingerprint: **\*** authority's fingerprint
:var str contact: **\*** authority's contact information
:var str signing_key: **\*** authority's public signing key
:var list client_versions: list of recommended client tor version strings
:var list server_versions: list of recommended server tor version strings
:var datetime published: **\*** time when the document was published
:var list options: **\*** list of things that this authority decides
:var str signing_authority: **\*** name of the authority signing the document
:var str signature: **\*** authority's signature for the document
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as **None** if undefined
"""
def __init__(self, raw_content, validate = True):
super(NetworkStatusDocumentV2, self).__init__(raw_content)
self.version = None
self.hostname = None
self.address = None
self.dir_port = None
self.fingerprint = None
self.contact = None
self.signing_key = None
self.client_versions = []
self.server_versions = []
self.published = None
self.options = []
self.signing_authority = None
self.signatures = None
# Splitting the document from the routers. Unlike v3 documents we're not
# bending over backwards on the validation by checking the field order or
# that header/footer attributes aren't in the wrong section. This is a
# deprecated descriptor type - patches welcome if you want those checks.
document_file = io.BytesIO(raw_content)
document_content = bytes.join(b"", _read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
entry_keyword = ROUTERS_START,
section_end_keywords = (V2_FOOTER_START,),
extra_args = (self,),
)
self.routers = dict((desc.fingerprint, desc) for desc in router_iter)
document_content += b"\n" + document_file.read()
document_content = stem.util.str_tools._to_unicode(document_content)
entries = _get_descriptor_components(document_content, validate)
if validate:
self._check_constraints(entries)
self._parse(entries, validate)
def _parse(self, entries, validate):
for keyword, values in entries.items():
value, block_contents = values[0]
line = "%s %s" % (keyword, value) # original line
if block_contents:
line += "\n%s" % block_contents
if keyword == "network-status-version":
if not value.isdigit():
if not validate:
continue
raise ValueError("Network status document has a non-numeric version: %s" % line)
self.version = int(value)
if validate and self.version != 2:
raise ValueError("Expected a version 2 network status document, got version '%s' instead" % self.version)
elif keyword == "dir-source":
dir_source_comp = value.split()
if len(dir_source_comp) < 3:
if not validate:
continue
raise ValueError("The 'dir-source' line of a v2 network status document must have three values: %s" % line)
if validate:
if not dir_source_comp[0]:
# https://trac.torproject.org/7055
raise ValueError("Authority's hostname can't be blank: %s" % line)
elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[1]):
raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[1])
elif not stem.util.connection.is_valid_port(dir_source_comp[2], allow_zero = True):
raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[2])
elif not dir_source_comp[2].isdigit():
continue
self.hostname = dir_source_comp[0]
self.address = dir_source_comp[1]
self.dir_port = None if dir_source_comp[2] == '0' else int(dir_source_comp[2])
elif keyword == "fingerprint":
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
raise ValueError("Authority's fingerprint in a v2 network status document is malformed: %s" % line)
self.fingerprint = value
elif keyword == "contact":
self.contact = value
elif keyword == "dir-signing-key":
self.signing_key = block_contents
elif keyword in ("client-versions", "server-versions"):
# v2 documents existed while there were tor versions using the 'old'
# style, hence we aren't attempting to parse them
for version_str in value.split(","):
if keyword == 'client-versions':
self.client_versions.append(version_str)
elif keyword == 'server-versions':
self.server_versions.append(version_str)
elif keyword == "published":
try:
self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
except ValueError:
if validate:
raise ValueError("Version 2 network status document's 'published' time wasn't parsable: %s" % value)
elif keyword == "dir-options":
self.options = value.split()
elif keyword == "directory-signature":
self.signing_authority = value
self.signature = block_contents
else:
self._unrecognized_lines.append(line)
# 'client-versions' and 'server-versions' are only required if "Versions"
# is among the options
if validate and "Versions" in self.options:
if not ('client-versions' in entries and 'server-versions' in entries):
raise ValueError("Version 2 network status documents must have a 'client-versions' and 'server-versions' when 'Versions' is listed among its dir-options:\n%s" % str(self))
def _check_constraints(self, entries):
required_fields = [field for (field, is_mandatory) in NETWORK_STATUS_V2_FIELDS if is_mandatory]
for keyword in required_fields:
if not keyword in entries:
raise ValueError("Network status document (v2) must have a '%s' line:\n%s" % (keyword, str(self)))
# all recognized fields can only appear once
single_fields = [field for (field, _) in NETWORK_STATUS_V2_FIELDS]
for keyword in single_fields:
if keyword in entries and len(entries[keyword]) > 1:
raise ValueError("Network status document (v2) can only have a single '%s' line, got %i:\n%s" % (keyword, len(entries[keyword]), str(self)))
if 'network-status-version' != entries.keys()[0]:
raise ValueError("Network status document (v2) are expected to start with a 'network-status-version' line:\n%s" % str(self))
class NetworkStatusDocumentV3(NetworkStatusDocument):
"""
Version 3 network status document. This could be either a vote or consensus.
:var tuple routers: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
contained in the document
:var int version: **\*** document version
:var str version_flavor: **\*** flavor associated with the document (such as 'microdesc')
:var bool is_consensus: **\*** **True** if the document is a consensus
:var bool is_vote: **\*** **True** if the document is a vote
:var bool is_microdescriptor: **\*** **True** if this is a microdescriptor
flavored document, **False** otherwise
:var datetime valid_after: **\*** time when the consensus became valid
:var datetime fresh_until: **\*** time when the next consensus should be produced
:var datetime valid_until: **\*** time when this consensus becomes obsolete
:var int vote_delay: **\*** number of seconds allowed for collecting votes
from all authorities
:var int dist_delay: **\*** number of seconds allowed for collecting
signatures from all authorities
:var list client_versions: list of recommended client tor versions
:var list server_versions: list of recommended server tor versions
:var list known_flags: **\*** list of :data:`~stem.Flag` for the router's flags
:var dict params: **\*** dict of parameter(**str**) => value(**int**) mappings
:var list directory_authorities: **\*** list of :class:`~stem.descriptor.networkstatus.DirectoryAuthority`
objects that have generated this document
:var list signatures: **\*** :class:`~stem.descriptor.networkstatus.DocumentSignature`
of the authorities that have signed the document
**Consensus Attributes:**
:var int consensus_method: method version used to generate this consensus
:var dict bandwidth_weights: dict of weight(str) => value(int) mappings
**Vote Attributes:**
:var list consensus_methods: list of ints for the supported method versions
:var datetime published: time when the document was published
:var dict flag_thresholds: **\*** mapping of internal performance thresholds used while making the vote, values are **ints** or **floats**
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as None if undefined
"""
def __init__(self, raw_content, validate = True, default_params = True):
"""
Parse a v3 network status document.
:param str raw_content: raw network status document data
:param bool validate: **True** if the document is to be validated, **False** otherwise
:param bool default_params: includes defaults in our params dict, otherwise
it just contains values from the document
:raises: **ValueError** if the document is invalid
"""
super(NetworkStatusDocumentV3, self).__init__(raw_content)
document_file = io.BytesIO(raw_content)
self._header = _DocumentHeader(document_file, validate, default_params)
# merge header attributes into us
for attr, value in vars(self._header).items():
if attr != "_unrecognized_lines":
setattr(self, attr, value)
else:
self._unrecognized_lines += value
self.directory_authorities = tuple(stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
entry_class = DirectoryAuthority,
entry_keyword = AUTH_START,
section_end_keywords = (ROUTERS_START, FOOTER_START, V2_FOOTER_START),
extra_args = (self._header.is_vote,),
))
if validate and self._header.is_vote and len(self.directory_authorities) != 1:
raise ValueError("Votes should only have an authority entry for the one that issued it, got %i: %s" % (len(self.directory_authorities), self.directory_authorities))
if not self._header.is_microdescriptor:
router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
else:
router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
entry_class = router_type,
entry_keyword = ROUTERS_START,
section_end_keywords = (FOOTER_START, V2_FOOTER_START),
extra_args = (self,),
)
self.routers = dict((desc.fingerprint, desc) for desc in router_iter)
self._footer = _DocumentFooter(document_file, validate, self._header)
# merge header attributes into us
for attr, value in vars(self._footer).items():
if attr != "_unrecognized_lines":
setattr(self, attr, value)
else:
self._unrecognized_lines += value
def meets_consensus_method(self, method):
"""
Checks if we meet the given consensus-method. This works for both votes and
consensuses, checking our 'consensus-method' and 'consensus-methods'
entries.
:param int method: consensus-method to check for
:returns: **True** if we meet the given consensus-method, and **False** otherwise
"""
return self._header.meets_consensus_method(method)
def _compare(self, other, method):
if not isinstance(other, NetworkStatusDocumentV3):
return False
return method(str(self).strip(), str(other).strip())
def __hash__(self):
return hash(str(self).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class _DocumentHeader(object):
def __init__(self, document_file, validate, default_params):
self.version = None
self.version_flavor = None
self.is_consensus = True
self.is_vote = False
self.is_microdescriptor = False
self.consensus_methods = []
self.published = None
self.consensus_method = None
self.valid_after = None
self.fresh_until = None
self.valid_until = None
self.vote_delay = None
self.dist_delay = None
self.client_versions = []
self.server_versions = []
self.known_flags = []
self.flag_thresholds = {}
self.params = dict(DEFAULT_PARAMS) if default_params else {}
self._unrecognized_lines = []
content = bytes.join(b"", _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
content = stem.util.str_tools._to_unicode(content)
entries = _get_descriptor_components(content, validate)
self._parse(entries, validate)
# doing this validation afterward so we know our 'is_consensus' and
# 'is_vote' attributes
if validate:
_check_for_missing_and_disallowed_fields(self, entries, HEADER_STATUS_DOCUMENT_FIELDS)
_check_for_misordered_fields(entries, HEADER_FIELDS)
def meets_consensus_method(self, method):
if self.consensus_method is not None:
return self.consensus_method >= method
elif self.consensus_methods is not None:
return bool(filter(lambda x: x >= method, self.consensus_methods))
else:
return False # malformed document
def _parse(self, entries, validate):
for keyword, values in entries.items():
value, _ = values[0]
line = "%s %s" % (keyword, value)
# all known header fields can only appear once except
if validate and len(values) > 1 and keyword in HEADER_FIELDS:
raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values)))
if keyword == 'network-status-version':
# "network-status-version" version
if ' ' in value:
version, flavor = value.split(' ', 1)
else:
version, flavor = value, None
if not version.isdigit():
if not validate:
continue
raise ValueError("Network status document has a non-numeric version: %s" % line)
self.version = int(version)
self.version_flavor = flavor
self.is_microdescriptor = flavor == 'microdesc'
if validate and self.version != 3:
raise ValueError("Expected a version 3 network status document, got version '%s' instead" % self.version)
elif keyword == 'vote-status':
# "vote-status" type
#
# The consensus-method and consensus-methods fields are optional since
# they weren't included in version 1. Setting a default now that we
# know if we're a vote or not.
if value == 'consensus':
self.is_consensus, self.is_vote = True, False
self.consensus_method = 1
elif value == 'vote':
self.is_consensus, self.is_vote = False, True
self.consensus_methods = [1]
elif validate:
raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value)
elif keyword == 'consensus-methods':
# "consensus-methods" IntegerList
consensus_methods = []
for entry in value.split(" "):
if entry.isdigit():
consensus_methods.append(int(entry))
elif validate:
raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value)
self.consensus_methods = consensus_methods
if validate and not (1 in self.consensus_methods):
raise ValueError("Network status votes must include consensus-method version 1")
elif keyword == 'consensus-method':
# "consensus-method" Integer
if value.isdigit():
self.consensus_method = int(value)
elif validate:
raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value)
elif keyword in ('published', 'valid-after', 'fresh-until', 'valid-until'):
try:
date_value = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
if keyword == 'published':
self.published = date_value
elif keyword == 'valid-after':
self.valid_after = date_value
elif keyword == 'fresh-until':
self.fresh_until = date_value
elif keyword == 'valid-until':
self.valid_until = date_value
except ValueError:
if validate:
raise ValueError("Network status document's '%s' time wasn't parsable: %s" % (keyword, value))
elif keyword == "voting-delay":
# "voting-delay" VoteSeconds DistSeconds
value_comp = value.split(' ')
if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit():
self.vote_delay = int(value_comp[0])
self.dist_delay = int(value_comp[1])
elif validate:
raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value)
elif keyword in ("client-versions", "server-versions"):
for entry in value.split(","):
try:
version_value = stem.version._get_version(entry)
if keyword == 'client-versions':
self.client_versions.append(version_value)
elif keyword == 'server-versions':
self.server_versions.append(version_value)
except ValueError:
if validate:
raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s" % (keyword, entry, line))
elif keyword == "known-flags":
# "known-flags" FlagList
# simply fetches the entries, excluding empty strings
self.known_flags = [entry for entry in value.split(" ") if entry]
elif keyword == "flag-thresholds":
# "flag-thresholds" SP THRESHOLDS
value = value.strip()
if value:
for entry in value.split(" "):
if not '=' in entry:
if not validate:
continue
raise ValueError("Network status document's '%s' line is expected to be space separated key=value mappings, got: %s" % (keyword, line))
entry_key, entry_value = entry.split("=", 1)
try:
if entry_value.endswith("%"):
# opting for string manipulation rather than just
# 'float(entry_value) / 100' because floating point arithmetic
# will lose precision
self.flag_thresholds[entry_key] = float("0." + entry_value[:-1].replace('.', '', 1))
elif '.' in entry_value:
self.flag_thresholds[entry_key] = float(entry_value)
else:
self.flag_thresholds[entry_key] = int(entry_value)
except ValueError:
if validate:
raise ValueError("Network status document's '%s' line is expected to have float values, got: %s" % (keyword, line))
elif keyword == "params":
# "params" [Parameters]
# Parameter ::= Keyword '=' Int32
# Int32 ::= A decimal integer between -2147483648 and 2147483647.
# Parameters ::= Parameter | Parameters SP Parameter
# should only appear in consensus-method 7 or later
if validate and not self.meets_consensus_method(7):
raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later")
# skip if this is a blank line
if value == "":
continue
self.params.update(_parse_int_mappings(keyword, value, validate))
if validate:
self._check_params_constraints()
else:
self._unrecognized_lines.append(line)
def _check_params_constraints(self):
"""
Checks that the params we know about are within their documented ranges.
"""
for key, value in self.params.items():
# all parameters are constrained to int32 range
minimum, maximum = -2147483648, 2147483647
if key == "circwindow":
minimum, maximum = 100, 1000
elif key == "CircuitPriorityHalflifeMsec":
minimum = -1
elif key in ("perconnbwrate", "perconnbwburst"):
minimum = 1
elif key == "refuseunknownexits":
minimum, maximum = 0, 1
elif key == "bwweightscale":
minimum = 1
elif key == "cbtdisabled":
minimum, maximum = 0, 1
elif key == "cbtnummodes":
minimum, maximum = 1, 20
elif key == "cbtrecentcount":
minimum, maximum = 3, 1000
elif key == "cbtmaxtimeouts":
minimum, maximum = 3, 10000
elif key == "cbtmincircs":
minimum, maximum = 1, 10000
elif key == "cbtquantile":
minimum, maximum = 10, 99
elif key == "cbtclosequantile":
minimum, maximum = self.params.get("cbtquantile", minimum), 99
elif key == "cbttestfreq":
minimum = 1
elif key == "cbtmintimeout":
minimum = 500
elif key == "cbtinitialtimeout":
minimum = self.params.get("cbtmintimeout", minimum)
elif key == "UseOptimisticData":
minimum, maximum = 0, 1
elif key == "Support022HiddenServices":
minimum, maximum = 0, 1
if value < minimum or value > maximum:
raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value))
class _DocumentFooter(object):
def __init__(self, document_file, validate, header):
self.signatures = []
self.bandwidth_weights = {}
self._unrecognized_lines = []
content = stem.util.str_tools._to_unicode(document_file.read())
if not content:
return # footer is optional and there's nothing to parse
entries = _get_descriptor_components(content, validate)
self._parse(entries, validate, header)
if validate:
# Check that the footer has the right initial line. Prior to consensus
# method 9 it's a 'directory-signature' and after that footers start with
# 'directory-footer'.
if header.meets_consensus_method(9):
if entries.keys()[0] != 'directory-footer':
raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later")
else:
if entries.keys()[0] != 'directory-signature':
raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9")
_check_for_missing_and_disallowed_fields(header, entries, FOOTER_STATUS_DOCUMENT_FIELDS)
_check_for_misordered_fields(entries, FOOTER_FIELDS)
def _parse(self, entries, validate, header):
for keyword, values in entries.items():
value, block_contents = values[0]
line = "%s %s" % (keyword, value)
# all known footer fields can only appear once except...
# * 'directory-signature' in a consensus
if validate and len(values) > 1 and keyword in FOOTER_FIELDS:
if not (keyword == 'directory-signature' and header.is_consensus):
raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values)))
if keyword == "directory-footer":
# nothing to parse, simply checking that we don't have a value
if validate and value:
raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got '%s'" % line)
elif keyword == "bandwidth-weights":
self.bandwidth_weights = _parse_int_mappings(keyword, value, validate)
elif keyword == "directory-signature":
for sig_value, block_contents in values:
if not sig_value.count(" ") in (1, 2) or not block_contents:
if not validate:
continue
raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST\\nSIGNATURE', got:\n%s\n%s" % (sig_value, block_contents))
if sig_value.count(" ") == 1:
method = 'sha1' # default if none was provided
fingerprint, key_digest = sig_value.split(" ", 1)
else:
method, fingerprint, key_digest = sig_value.split(" ", 2)
self.signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, validate))
def _check_for_missing_and_disallowed_fields(header, entries, fields):
"""
Checks that we have mandatory fields for our type, and that we don't have
any fields exclusive to the other (ie, no vote-only fields appear in a
consensus or vice versa).
:param _DocumentHeader header: document header
:param dict entries: ordered keyword/value mappings of the header or footer
:param list fields: expected field attributes (either
**HEADER_STATUS_DOCUMENT_FIELDS** or **FOOTER_STATUS_DOCUMENT_FIELDS**)
:raises: **ValueError** if we're missing mandatory fields or have fields we shouldn't
"""
missing_fields, disallowed_fields = [], []
for field, in_votes, in_consensus, mandatory in fields:
if mandatory and ((header.is_consensus and in_consensus) or (header.is_vote and in_votes)):
# mandatory field, check that we have it
if not field in entries.keys():
missing_fields.append(field)
elif (header.is_consensus and not in_consensus) or (header.is_vote and not in_votes):
# field we shouldn't have, check that we don't
if field in entries.keys():
disallowed_fields.append(field)
if missing_fields:
raise ValueError("Network status document is missing mandatory field: %s" % ', '.join(missing_fields))
if disallowed_fields:
raise ValueError("Network status document has fields that shouldn't appear in this document type or version: %s" % ', '.join(disallowed_fields))
def _check_for_misordered_fields(entries, expected):
"""
To be valid a network status document's fiends need to appear in a specific
order. Checks that known fields appear in that order (unrecognized fields
are ignored).
:param dict entries: ordered keyword/value mappings of the header or footer
:param list expected: ordered list of expected fields (either
**HEADER_FIELDS** or **FOOTER_FIELDS**)
:raises: **ValueError** if entries aren't properly ordered
"""
# Earlier validation has ensured that our fields either belong to our
# document type or are unknown. Remove the unknown fields since they
# reflect a spec change and can appear anywhere in the document.
actual = filter(lambda field: field in expected, entries.keys())
# Narrow the expected to just what we have. If the lists then match then the
# order's valid.
expected = filter(lambda field: field in actual, expected)
if actual != expected:
actual_label = ', '.join(actual)
expected_label = ', '.join(expected)
raise ValueError("The fields in a section of the document are misordered. It should be '%s' but was '%s'" % (actual_label, expected_label))
def _parse_int_mappings(keyword, value, validate):
# Parse a series of 'key=value' entries, checking the following:
# - values are integers
# - keys are sorted in lexical order
results, seen_keys = {}, []
for entry in value.split(" "):
try:
if not '=' in entry:
raise ValueError("must only have 'key=value' entries")
entry_key, entry_value = entry.split("=", 1)
try:
# the int() function accepts things like '+123', but we don't want to
if entry_value.startswith('+'):
raise ValueError()
entry_value = int(entry_value)
except ValueError:
raise ValueError("'%s' is a non-numeric value" % entry_value)
if validate:
# parameters should be in ascending order by their key
for prior_key in seen_keys:
if prior_key > entry_key:
raise ValueError("parameters must be sorted by their key")
results[entry_key] = entry_value
seen_keys.append(entry_key)
except ValueError as exc:
if not validate:
continue
raise ValueError("Unable to parse network status document's '%s' line (%s): %s'" % (keyword, exc, value))
return results
class DirectoryAuthority(Descriptor):
"""
Directory authority information obtained from a v3 network status document.
Authorities can optionally use a legacy format. These are no longer found in
practice, but have the following differences...
* The authority's nickname ends with '-legacy'.
* There's no **contact** or **vote_digest** attribute.
:var str nickname: **\*** authority's nickname
:var str fingerprint: **\*** authority's fingerprint
:var str hostname: **\*** hostname of the authority
:var str address: **\*** authority's IP address
:var int dir_port: **\*** authority's DirPort
:var int or_port: **\*** authority's ORPort
:var bool is_legacy: **\*** if the authority's using the legacy format
:var str contact: contact information, this is included if is_legacy is **False**
**Consensus Attributes:**
:var str vote_digest: digest of the authority that contributed to the consensus, this is included if is_legacy is **False**
**Vote Attributes:**
:var str legacy_dir_key: fingerprint of and obsolete identity key
:var stem.descriptor.networkstatus.KeyCertificate key_certificate: **\***
authority's key certificate
**\*** mandatory attribute
"""
def __init__(self, raw_content, validate = True, is_vote = False):
"""
Parse a directory authority entry in a v3 network status document.
:param str raw_content: raw directory authority entry information
:param bool validate: checks the validity of the content if True, skips
these checks otherwise
:param bool is_vote: True if this is for a vote, False if it's for a consensus
:raises: ValueError if the descriptor data is invalid
"""
super(DirectoryAuthority, self).__init__(raw_content)
raw_content = stem.util.str_tools._to_unicode(raw_content)
self.nickname = None
self.fingerprint = None
self.hostname = None
self.address = None
self.dir_port = None
self.or_port = None
self.is_legacy = False
self.contact = None
self.vote_digest = None
self.legacy_dir_key = None
self.key_certificate = None
self._unrecognized_lines = []
self._parse(raw_content, validate, is_vote)
def _parse(self, content, validate, is_vote):
"""
Parses the given content and applies the attributes.
:param str content: descriptor content
:param bool validate: checks validity if True
:param bool is_vote: **True** if this is for a vote, **False** if it's for
a consensus
:raises: **ValueError** if a validity check fails
"""
# separate the directory authority entry from its key certificate
key_div = content.find('\ndir-key-certificate-version')
if key_div != -1:
key_cert_content = content[key_div + 1:]
content = content[:key_div + 1]
else:
key_cert_content = None
entries = _get_descriptor_components(content, validate)
if validate and 'dir-source' != entries.keys()[0]:
raise ValueError("Authority entries are expected to start with a 'dir-source' line:\n%s" % (content))
# check that we have mandatory fields
if validate:
is_legacy, dir_source_entry = False, entries.get("dir-source")
if dir_source_entry:
is_legacy = dir_source_entry[0][0].split()[0].endswith("-legacy")
required_fields, excluded_fields = ["dir-source"], []
if not is_legacy:
required_fields += ["contact"]
if is_vote:
if not key_cert_content:
raise ValueError("Authority votes must have a key certificate:\n%s" % content)
excluded_fields += ["vote-digest"]
elif not is_vote:
if key_cert_content:
raise ValueError("Authority consensus entries shouldn't have a key certificate:\n%s" % content)
if not is_legacy:
required_fields += ["vote-digest"]
excluded_fields += ["legacy-dir-key"]
for keyword in required_fields:
if not keyword in entries:
raise ValueError("Authority entries must have a '%s' line:\n%s" % (keyword, content))
for keyword in entries:
if keyword in excluded_fields:
type_label = "votes" if is_vote else "consensus entries"
raise ValueError("Authority %s shouldn't have a '%s' line:\n%s" % (type_label, keyword, content))
for keyword, values in entries.items():
value, _ = values[0]
line = "%s %s" % (keyword, value)
# all known attributes can only appear at most once
if validate and len(values) > 1 and keyword in ('dir-source', 'contact', 'legacy-dir-key', 'vote-digest'):
raise ValueError("Authority entries can only have a single '%s' line, got %i:\n%s" % (keyword, len(values), content))
if keyword == 'dir-source':
# "dir-source" nickname identity address IP dirport orport
dir_source_comp = value.split(" ")
if len(dir_source_comp) < 6:
if not validate:
continue
raise ValueError("Authority entry's 'dir-source' line must have six values: %s" % line)
if validate:
if not stem.util.tor_tools.is_valid_nickname(dir_source_comp[0].rstrip('-legacy')):
raise ValueError("Authority's nickname is invalid: %s" % dir_source_comp[0])
elif not stem.util.tor_tools.is_valid_fingerprint(dir_source_comp[1]):
raise ValueError("Authority's fingerprint is invalid: %s" % dir_source_comp[1])
elif not dir_source_comp[2]:
# https://trac.torproject.org/7055
raise ValueError("Authority's hostname can't be blank: %s" % line)
elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[3]):
raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[3])
elif not stem.util.connection.is_valid_port(dir_source_comp[4], allow_zero = True):
raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[4])
elif not stem.util.connection.is_valid_port(dir_source_comp[5]):
raise ValueError("Authority's ORPort is invalid: %s" % dir_source_comp[5])
elif not (dir_source_comp[4].isdigit() and dir_source_comp[5].isdigit()):
continue
self.nickname = dir_source_comp[0]
self.fingerprint = dir_source_comp[1]
self.hostname = dir_source_comp[2]
self.address = dir_source_comp[3]
self.dir_port = None if dir_source_comp[4] == '0' else int(dir_source_comp[4])
self.or_port = int(dir_source_comp[5])
self.is_legacy = self.nickname.endswith("-legacy")
elif keyword == 'contact':
# "contact" string
self.contact = value
elif keyword == 'legacy-dir-key':
# "legacy-dir-key" FINGERPRINT
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
raise ValueError("Authority has a malformed legacy directory key: %s" % line)
self.legacy_dir_key = value
elif keyword == 'vote-digest':
# "vote-digest" digest
# technically not a fingerprint, but has the same characteristics
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
raise ValueError("Authority has a malformed vote digest: %s" % line)
self.vote_digest = value
else:
self._unrecognized_lines.append(line)
if key_cert_content:
self.key_certificate = KeyCertificate(key_cert_content, validate)
def get_unrecognized_lines(self):
"""
Returns any unrecognized lines.
:returns: a list of unrecognized lines
"""
return self._unrecognized_lines
def _compare(self, other, method):
if not isinstance(other, DirectoryAuthority):
return False
return method(str(self).strip(), str(other).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class KeyCertificate(Descriptor):
"""
Directory key certificate for a v3 network status document.
:var int version: **\*** version of the key certificate
:var str address: authority's IP address
:var int dir_port: authority's DirPort
:var str fingerprint: **\*** authority's fingerprint
:var str identity_key: **\*** long term authority identity key
:var datetime published: **\*** time when this key was generated
:var datetime expires: **\*** time after which this key becomes invalid
:var str signing_key: **\*** directory server's public signing key
:var str crosscert: signature made using certificate's signing key
:var str certification: **\*** signature of this key certificate signed with
the identity key
**\*** mandatory attribute
"""
def __init__(self, raw_content, validate = True):
super(KeyCertificate, self).__init__(raw_content)
raw_content = stem.util.str_tools._to_unicode(raw_content)
self.version = None
self.address = None
self.dir_port = None
self.fingerprint = None
self.identity_key = None
self.published = None
self.expires = None
self.signing_key = None
self.crosscert = None
self.certification = None
self._unrecognized_lines = []
self._parse(raw_content, validate)
def _parse(self, content, validate):
"""
Parses the given content and applies the attributes.
:param str content: descriptor content
:param bool validate: checks validity if **True**
:raises: **ValueError** if a validity check fails
"""
entries = _get_descriptor_components(content, validate)
if validate:
if 'dir-key-certificate-version' != entries.keys()[0]:
raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (content))
elif 'dir-key-certification' != entries.keys()[-1]:
raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (content))
# check that we have mandatory fields and that our known fields only
# appear once
for keyword, is_mandatory in KEY_CERTIFICATE_PARAMS:
if is_mandatory and not keyword in entries:
raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, content))
entry_count = len(entries.get(keyword, []))
if entry_count > 1:
raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content))
for keyword, values in entries.items():
value, block_contents = values[0]
line = "%s %s" % (keyword, value)
if keyword == 'dir-key-certificate-version':
# "dir-key-certificate-version" version
if not value.isdigit():
if not validate:
continue
raise ValueError("Key certificate has a non-integer version: %s" % line)
self.version = int(value)
if validate and self.version != 3:
raise ValueError("Expected a version 3 key certificate, got version '%i' instead" % self.version)
elif keyword == 'dir-address':
# "dir-address" IPPort
if not ':' in value:
if not validate:
continue
raise ValueError("Key certificate's 'dir-address' is expected to be of the form ADDRESS:PORT: %s" % line)
address, dirport = value.split(':', 1)
if validate:
if not stem.util.connection.is_valid_ipv4_address(address):
raise ValueError("Key certificate's address isn't a valid IPv4 address: %s" % line)
elif not stem.util.connection.is_valid_port(dirport):
raise ValueError("Key certificate's dirport is invalid: %s" % line)
elif not dirport.isdigit():
continue
self.address = address
self.dir_port = int(dirport)
elif keyword == 'fingerprint':
# "fingerprint" fingerprint
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
raise ValueError("Key certificate's fingerprint is malformed: %s" % line)
self.fingerprint = value
elif keyword in ('dir-key-published', 'dir-key-expires'):
# "dir-key-published" YYYY-MM-DD HH:MM:SS
# "dir-key-expires" YYYY-MM-DD HH:MM:SS
try:
date_value = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
if keyword == 'dir-key-published':
self.published = date_value
elif keyword == 'dir-key-expires':
self.expires = date_value
except ValueError:
if validate:
raise ValueError("Key certificate's '%s' time wasn't parsable: %s" % (keyword, value))
elif keyword in ('dir-identity-key', 'dir-signing-key', 'dir-key-crosscert', 'dir-key-certification'):
# "dir-identity-key" NL a public key in PEM format
# "dir-signing-key" NL a key in PEM format
# "dir-key-crosscert" NL CrossSignature
# "dir-key-certification" NL Signature
if validate and not block_contents:
raise ValueError("Key certificate's '%s' line must be followed by a key block: %s" % (keyword, line))
if keyword == 'dir-identity-key':
self.identity_key = block_contents
elif keyword == 'dir-signing-key':
self.signing_key = block_contents
elif keyword == 'dir-key-crosscert':
self.crosscert = block_contents
elif keyword == 'dir-key-certification':
self.certification = block_contents
else:
self._unrecognized_lines.append(line)
def get_unrecognized_lines(self):
"""
Returns any unrecognized lines.
:returns: **list** of unrecognized lines
"""
return self._unrecognized_lines
def _compare(self, other, method):
if not isinstance(other, KeyCertificate):
return False
return method(str(self).strip(), str(other).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class DocumentSignature(object):
"""
Directory signature of a v3 network status document.
:var str method: algorithm used to make the signature
:var str identity: fingerprint of the authority that made the signature
:var str key_digest: digest of the signing key
:var str signature: document signature
:param bool validate: checks validity if **True**
:raises: **ValueError** if a validity check fails
"""
def __init__(self, method, identity, key_digest, signature, validate = True):
# Checking that these attributes are valid. Technically the key
# digest isn't a fingerprint, but it has the same characteristics.
if validate:
if not stem.util.tor_tools.is_valid_fingerprint(identity):
raise ValueError("Malformed fingerprint (%s) in the document signature" % identity)
if not stem.util.tor_tools.is_valid_fingerprint(key_digest):
raise ValueError("Malformed key digest (%s) in the document signature" % key_digest)
self.method = method
self.identity = identity
self.key_digest = key_digest
self.signature = signature
def _compare(self, other, method):
if not isinstance(other, DocumentSignature):
return False
for attr in ("method", "identity", "key_digest", "signature"):
if getattr(self, attr) != getattr(other, attr):
return method(getattr(self, attr), getattr(other, attr))
return method(True, True) # we're equal
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class BridgeNetworkStatusDocument(NetworkStatusDocument):
"""
Network status document containing bridges. This is only available through
the metrics site.
:var tuple routers: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2`
contained in the document
:var datetime published: time when the document was published
"""
def __init__(self, raw_content, validate = True):
super(BridgeNetworkStatusDocument, self).__init__(raw_content)
self.published = None
document_file = io.BytesIO(raw_content)
published_line = stem.util.str_tools._to_unicode(document_file.readline())
if published_line.startswith("published "):
published_line = published_line.split(" ", 1)[1].strip()
try:
self.published = datetime.datetime.strptime(published_line, "%Y-%m-%d %H:%M:%S")
except ValueError:
if validate:
raise ValueError("Bridge network status document's 'published' time wasn't parsable: %s" % published_line)
elif validate:
raise ValueError("Bridge network status documents must start with a 'published' line:\n%s" % stem.util.str_tools._to_unicode(raw_content))
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
extra_args = (self,),
)
self.routers = dict((desc.fingerprint, desc) for desc in router_iter)