mirror of
https://github.com/onionshare/onionshare.git
synced 2025-07-29 09:28:57 -04:00
added stem python library
This commit is contained in:
parent
8ffa569094
commit
619ab6db0f
37 changed files with 19032 additions and 0 deletions
552
lib/stem/descriptor/__init__.py
Normal file
552
lib/stem/descriptor/__init__.py
Normal file
|
@ -0,0 +1,552 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Package for parsing and processing descriptor data.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
parse_file - Parses the descriptors in a file.
|
||||
|
||||
Descriptor - Common parent for all descriptor file types.
|
||||
|- get_path - location of the descriptor on disk if it came from a file
|
||||
|- get_archive_path - location of the descriptor within the archive it came from
|
||||
|- get_bytes - similar to str(), but provides our original bytes content
|
||||
|- get_unrecognized_lines - unparsed descriptor content
|
||||
+- __str__ - string that the descriptor was made from
|
||||
|
||||
.. data:: DocumentHandler (enum)
|
||||
|
||||
Ways in which we can parse a
|
||||
:class:`~stem.descriptor.networkstatus.NetworkStatusDocument`.
|
||||
|
||||
Both **ENTRIES** and **BARE_DOCUMENT** have a 'thin' document, which doesn't
|
||||
have a populated **routers** attribute. This allows for lower memory usage
|
||||
and upfront runtime. However, if read time and memory aren't a concern then
|
||||
**DOCUMENT** can provide you with a fully populated document.
|
||||
|
||||
=================== ===========
|
||||
DocumentHandler Description
|
||||
=================== ===========
|
||||
**ENTRIES** Iterates over the contained :class:`~stem.descriptor.router_status_entry.RouterStatusEntry`. Each has a reference to the bare document it came from (through its **document** attribute).
|
||||
**DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` with the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` it contains (through its **routers** attribute).
|
||||
**BARE_DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` **without** a reference to its contents (the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` are unread).
|
||||
=================== ===========
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
"export",
|
||||
"reader",
|
||||
"remote",
|
||||
"extrainfo_descriptor",
|
||||
"server_descriptor",
|
||||
"microdescriptor",
|
||||
"networkstatus",
|
||||
"router_status_entry",
|
||||
"tordnsel",
|
||||
"parse_file",
|
||||
"Descriptor",
|
||||
]
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
try:
|
||||
# added in python 2.7
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from stem.util.ordereddict import OrderedDict
|
||||
|
||||
KEYWORD_CHAR = "a-zA-Z0-9-"
|
||||
WHITESPACE = " \t"
|
||||
KEYWORD_LINE = re.compile("^([%s]+)(?:[%s]+(.*))?$" % (KEYWORD_CHAR, WHITESPACE))
|
||||
PGP_BLOCK_START = re.compile("^-----BEGIN ([%s%s]+)-----$" % (KEYWORD_CHAR, WHITESPACE))
|
||||
PGP_BLOCK_END = "-----END %s-----"
|
||||
|
||||
DocumentHandler = stem.util.enum.UppercaseEnum(
|
||||
"ENTRIES",
|
||||
"DOCUMENT",
|
||||
"BARE_DOCUMENT",
|
||||
)
|
||||
|
||||
|
||||
def parse_file(descriptor_file, descriptor_type = None, validate = True, document_handler = DocumentHandler.ENTRIES, **kwargs):
|
||||
"""
|
||||
Simple function to read the descriptor contents from a file, providing an
|
||||
iterator for its :class:`~stem.descriptor.__init__.Descriptor` contents.
|
||||
|
||||
If you don't provide a **descriptor_type** argument then this automatically
|
||||
tries to determine the descriptor type based on the following...
|
||||
|
||||
* The @type annotation on the first line. These are generally only found in
|
||||
the `descriptor archives <https://metrics.torproject.org>`_.
|
||||
|
||||
* The filename if it matches something from tor's data directory. For
|
||||
instance, tor's 'cached-descriptors' contains server descriptors.
|
||||
|
||||
This is a handy function for simple usage, but if you're reading multiple
|
||||
descriptor files you might want to consider the
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`.
|
||||
|
||||
Descriptor types include the following, including further minor versions (ie.
|
||||
if we support 1.1 then we also support everything from 1.0 and most things
|
||||
from 1.2, but not 2.0)...
|
||||
|
||||
========================================= =====
|
||||
Descriptor Type Class
|
||||
========================================= =====
|
||||
server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.RelayDescriptor`
|
||||
extra-info 1.0 :class:`~stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor`
|
||||
microdescriptor 1.0 :class:`~stem.descriptor.microdescriptor.Microdescriptor`
|
||||
directory 1.0 **unsupported**
|
||||
network-status-2 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV2`)
|
||||
dir-key-certificate-3 1.0 :class:`~stem.descriptor.networkstatus.KeyCertificate`
|
||||
network-status-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
network-status-vote-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
network-status-microdesc-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
bridge-network-status 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.BridgeNetworkStatusDocument`)
|
||||
bridge-server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.BridgeDescriptor`
|
||||
bridge-extra-info 1.1 :class:`~stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`
|
||||
torperf 1.0 **unsupported**
|
||||
bridge-pool-assignment 1.0 **unsupported**
|
||||
tordnsel 1.0 :class:`~stem.descriptor.tordnsel.TorDNSEL`
|
||||
========================================= =====
|
||||
|
||||
If you're using **python 3** then beware that the open() function defaults to
|
||||
using text mode. **Binary mode** is strongly suggested because it's both
|
||||
faster (by my testing by about 33x) and doesn't do universal newline
|
||||
translation which can make us misparse the document.
|
||||
|
||||
::
|
||||
|
||||
my_descriptor_file = open(descriptor_path, 'rb')
|
||||
|
||||
:param str,file descriptor_file: path or opened file with the descriptor contents
|
||||
:param str descriptor_type: `descriptor type <https://metrics.torproject.org/formats.html#descriptortypes>`_, this is guessed if not provided
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **TypeError** if we can't match the contents of the file to a descriptor type
|
||||
* **IOError** if unable to read from the descriptor_file
|
||||
"""
|
||||
|
||||
# if we got a path then open that file for parsing
|
||||
|
||||
if isinstance(descriptor_file, (bytes, unicode)):
|
||||
with open(descriptor_file) as desc_file:
|
||||
for desc in parse_file(desc_file, descriptor_type, validate, document_handler, **kwargs):
|
||||
yield desc
|
||||
|
||||
return
|
||||
|
||||
# The tor descriptor specifications do not provide a reliable method for
|
||||
# identifying a descriptor file's type and version so we need to guess
|
||||
# based on its filename. Metrics descriptors, however, can be identified
|
||||
# by an annotation on their first line...
|
||||
# https://trac.torproject.org/5651
|
||||
|
||||
initial_position = descriptor_file.tell()
|
||||
first_line = stem.util.str_tools._to_unicode(descriptor_file.readline().strip())
|
||||
metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
|
||||
|
||||
if not metrics_header_match:
|
||||
descriptor_file.seek(initial_position)
|
||||
|
||||
descriptor_path = getattr(descriptor_file, 'name', None)
|
||||
filename = '<undefined>' if descriptor_path is None else os.path.basename(descriptor_file.name)
|
||||
file_parser = None
|
||||
|
||||
if descriptor_type is not None:
|
||||
descriptor_type_match = re.match("^(\S+) (\d+).(\d+)$", descriptor_type)
|
||||
|
||||
if descriptor_type_match:
|
||||
desc_type, major_version, minor_version = descriptor_type_match.groups()
|
||||
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
|
||||
else:
|
||||
raise ValueError("The descriptor_type must be of the form '<type> <major_version>.<minor_version>'")
|
||||
elif metrics_header_match:
|
||||
# Metrics descriptor handling
|
||||
|
||||
desc_type, major_version, minor_version = metrics_header_match.groups()
|
||||
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
|
||||
else:
|
||||
# Cached descriptor handling. These contain multiple descriptors per file.
|
||||
|
||||
if filename == "cached-descriptors":
|
||||
file_parser = lambda f: stem.descriptor.server_descriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == "cached-extrainfo":
|
||||
file_parser = lambda f: stem.descriptor.extrainfo_descriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == "cached-microdescs":
|
||||
file_parser = lambda f: stem.descriptor.microdescriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == "cached-consensus":
|
||||
file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, validate = validate, document_handler = document_handler, **kwargs)
|
||||
elif filename == "cached-microdesc-consensus":
|
||||
file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs)
|
||||
|
||||
if file_parser:
|
||||
for desc in file_parser(descriptor_file):
|
||||
if descriptor_path is not None:
|
||||
desc._set_path(os.path.abspath(descriptor_path))
|
||||
|
||||
yield desc
|
||||
|
||||
return
|
||||
|
||||
# Not recognized as a descriptor file.
|
||||
|
||||
raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
|
||||
|
||||
|
||||
def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs):
|
||||
# Parses descriptor files from metrics, yielding individual descriptors. This
|
||||
# throws a TypeError if the descriptor_type or version isn't recognized.
|
||||
|
||||
if descriptor_type == "server-descriptor" and major_version == 1:
|
||||
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "bridge-server-descriptor" and major_version == 1:
|
||||
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "extra-info" and major_version == 1:
|
||||
for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "microdescriptor" and major_version == 1:
|
||||
for desc in stem.descriptor.microdescriptor._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "bridge-extra-info" and major_version == 1:
|
||||
# version 1.1 introduced a 'transport' field...
|
||||
# https://trac.torproject.org/6257
|
||||
|
||||
for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "network-status-2" and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV2
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "dir-key-certificate-3" and major_version == 1:
|
||||
for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "network-status-microdesc-consensus-3" and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "bridge-network-status" and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.BridgeNetworkStatusDocument
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "tordnsel" and major_version == 1:
|
||||
document_type = stem.descriptor.tordnsel.TorDNSEL
|
||||
|
||||
for desc in stem.descriptor.tordnsel._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
else:
|
||||
raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
|
||||
|
||||
|
||||
class Descriptor(object):
|
||||
"""
|
||||
Common parent for all types of descriptors.
|
||||
"""
|
||||
|
||||
def __init__(self, contents):
|
||||
self._path = None
|
||||
self._archive_path = None
|
||||
self._raw_contents = contents
|
||||
|
||||
def get_path(self):
|
||||
"""
|
||||
Provides the absolute path that we loaded this descriptor from.
|
||||
|
||||
:returns: **str** with the absolute path of the descriptor source
|
||||
"""
|
||||
|
||||
return self._path
|
||||
|
||||
def get_archive_path(self):
|
||||
"""
|
||||
If this descriptor came from an archive then provides its path within the
|
||||
archive. This is only set if the descriptor came from a
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
|
||||
descriptor didn't come from an archive.
|
||||
|
||||
:returns: **str** with the descriptor's path within the archive
|
||||
"""
|
||||
|
||||
return self._archive_path
|
||||
|
||||
def get_bytes(self):
|
||||
"""
|
||||
Provides the ASCII **bytes** of the descriptor. This only differs from
|
||||
**str()** if you're running python 3.x, in which case **str()** provides a
|
||||
**unicode** string.
|
||||
|
||||
:returns: **bytes** for the descriptor's contents
|
||||
"""
|
||||
|
||||
return self._raw_contents
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
"""
|
||||
Provides a list of lines that were either ignored or had data that we did
|
||||
not know how to process. This is most common due to new descriptor fields
|
||||
that this library does not yet know how to process. Patches welcome!
|
||||
|
||||
:returns: **list** of lines of unrecognized content
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def _set_path(self, path):
|
||||
self._path = path
|
||||
|
||||
def _set_archive_path(self, path):
|
||||
self._archive_path = path
|
||||
|
||||
def __str__(self):
|
||||
if stem.prereq.is_python_3():
|
||||
return stem.util.str_tools._to_unicode(self._raw_contents)
|
||||
else:
|
||||
return self._raw_contents
|
||||
|
||||
|
||||
def _get_bytes_field(keyword, content):
|
||||
"""
|
||||
Provides the value corresponding to the given keyword. This is handy to fetch
|
||||
values specifically allowed to be arbitrary bytes prior to converting to
|
||||
unicode.
|
||||
|
||||
:param str keyword: line to look up
|
||||
:param bytes content: content to look through
|
||||
|
||||
:returns: **bytes** value on the given line, **None** if the line doesn't
|
||||
exist
|
||||
|
||||
:raises: **ValueError** if the content isn't bytes
|
||||
"""
|
||||
|
||||
if not isinstance(content, bytes):
|
||||
raise ValueError("Content must be bytes, got a %s" % type(content))
|
||||
|
||||
line_match = re.search(stem.util.str_tools._to_bytes("^(opt )?%s(?:[%s]+(.*))?$" % (keyword, WHITESPACE)), content, re.MULTILINE)
|
||||
|
||||
if line_match:
|
||||
value = line_match.groups()[1]
|
||||
return b"" if value is None else value
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
|
||||
"""
|
||||
Reads from the descriptor file until we get to one of the given keywords or reach the
|
||||
end of the file.
|
||||
|
||||
:param str,list keywords: keyword(s) we want to read until
|
||||
:param file descriptor_file: file with the descriptor content
|
||||
:param bool inclusive: includes the line with the keyword if True
|
||||
:param bool ignore_first: doesn't check if the first line read has one of the
|
||||
given keywords
|
||||
:param bool skip: skips buffering content, returning None
|
||||
:param int end_position: end if we reach this point in the file
|
||||
:param bool include_ending_keyword: provides the keyword we broke on if **True**
|
||||
|
||||
:returns: **list** with the lines until we find one of the keywords, this is
|
||||
a two value tuple with the ending keyword if include_ending_keyword is
|
||||
**True**
|
||||
"""
|
||||
|
||||
content = None if skip else []
|
||||
ending_keyword = None
|
||||
|
||||
if isinstance(keywords, (bytes, unicode)):
|
||||
keywords = (keywords,)
|
||||
|
||||
if ignore_first:
|
||||
first_line = descriptor_file.readline()
|
||||
|
||||
if content is not None and first_line is not None:
|
||||
content.append(first_line)
|
||||
|
||||
while True:
|
||||
last_position = descriptor_file.tell()
|
||||
|
||||
if end_position and last_position >= end_position:
|
||||
break
|
||||
|
||||
line = descriptor_file.readline()
|
||||
|
||||
if not line:
|
||||
break # EOF
|
||||
|
||||
line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line))
|
||||
|
||||
if not line_match:
|
||||
# no spaces or tabs in the line
|
||||
line_keyword = stem.util.str_tools._to_unicode(line.strip())
|
||||
else:
|
||||
line_keyword = line_match.groups()[0]
|
||||
|
||||
if line_keyword in keywords:
|
||||
ending_keyword = line_keyword
|
||||
|
||||
if not inclusive:
|
||||
descriptor_file.seek(last_position)
|
||||
elif content is not None:
|
||||
content.append(line)
|
||||
|
||||
break
|
||||
elif content is not None:
|
||||
content.append(line)
|
||||
|
||||
if include_ending_keyword:
|
||||
return (content, ending_keyword)
|
||||
else:
|
||||
return content
|
||||
|
||||
|
||||
def _get_pseudo_pgp_block(remaining_contents):
|
||||
"""
|
||||
Checks if given contents begins with a pseudo-Open-PGP-style block and, if
|
||||
so, pops it off and provides it back to the caller.
|
||||
|
||||
:param list remaining_contents: lines to be checked for a public key block
|
||||
|
||||
:returns: **str** with the armor wrapped contents or None if it doesn't exist
|
||||
|
||||
:raises: **ValueError** if the contents starts with a key block but it's
|
||||
malformed (for instance, if it lacks an ending line)
|
||||
"""
|
||||
|
||||
if not remaining_contents:
|
||||
return None # nothing left
|
||||
|
||||
block_match = PGP_BLOCK_START.match(remaining_contents[0])
|
||||
|
||||
if block_match:
|
||||
block_type = block_match.groups()[0]
|
||||
block_lines = []
|
||||
end_line = PGP_BLOCK_END % block_type
|
||||
|
||||
while True:
|
||||
if not remaining_contents:
|
||||
raise ValueError("Unterminated pgp style block (looking for '%s'):\n%s" % (end_line, "\n".join(block_lines)))
|
||||
|
||||
line = remaining_contents.pop(0)
|
||||
block_lines.append(line)
|
||||
|
||||
if line == end_line:
|
||||
return "\n".join(block_lines)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
|
||||
"""
|
||||
Initial breakup of the server descriptor contents to make parsing easier.
|
||||
|
||||
A descriptor contains a series of 'keyword lines' which are simply a keyword
|
||||
followed by an optional value. Lines can also be followed by a signature
|
||||
block.
|
||||
|
||||
To get a sub-listing with just certain keywords use extra_keywords. This can
|
||||
be useful if we care about their relative ordering with respect to each
|
||||
other. For instance, we care about the ordering of 'accept' and 'reject'
|
||||
entries because this influences the resulting exit policy, but for everything
|
||||
else in server descriptors the order does not matter.
|
||||
|
||||
:param str raw_contents: descriptor content provided by the relay
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
True, skips these checks otherwise
|
||||
:param list extra_keywords: entity keywords to put into a separate listing
|
||||
with ordering intact
|
||||
|
||||
:returns:
|
||||
**collections.OrderedDict** with the 'keyword => (value, pgp key) entries'
|
||||
mappings. If a extra_keywords was provided then this instead provides a two
|
||||
value tuple, the second being a list of those entries.
|
||||
"""
|
||||
|
||||
entries = OrderedDict()
|
||||
extra_entries = [] # entries with a keyword in extra_keywords
|
||||
remaining_lines = raw_contents.split("\n")
|
||||
|
||||
while remaining_lines:
|
||||
line = remaining_lines.pop(0)
|
||||
|
||||
# V2 network status documents explicitly can contain blank lines...
|
||||
#
|
||||
# "Implementations MAY insert blank lines for clarity between sections;
|
||||
# these blank lines are ignored."
|
||||
#
|
||||
# ... and server descriptors end with an extra newline. But other documents
|
||||
# don't say how blank lines should be handled so globally ignoring them.
|
||||
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Some lines have an 'opt ' for backward compatibility. They should be
|
||||
# ignored. This prefix is being removed in...
|
||||
# https://trac.torproject.org/projects/tor/ticket/5124
|
||||
|
||||
if line.startswith("opt "):
|
||||
line = line[4:]
|
||||
|
||||
line_match = KEYWORD_LINE.match(line)
|
||||
|
||||
if not line_match:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Line contains invalid characters: %s" % line)
|
||||
|
||||
keyword, value = line_match.groups()
|
||||
|
||||
if value is None:
|
||||
value = ''
|
||||
|
||||
try:
|
||||
block_contents = _get_pseudo_pgp_block(remaining_lines)
|
||||
except ValueError as exc:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise exc
|
||||
|
||||
if keyword in extra_keywords:
|
||||
extra_entries.append("%s %s" % (keyword, value))
|
||||
else:
|
||||
entries.setdefault(keyword, []).append((value, block_contents))
|
||||
|
||||
if extra_keywords:
|
||||
return entries, extra_entries
|
||||
else:
|
||||
return entries
|
||||
|
||||
# importing at the end to avoid circular dependencies on our Descriptor class
|
||||
|
||||
import stem.descriptor.server_descriptor
|
||||
import stem.descriptor.extrainfo_descriptor
|
||||
import stem.descriptor.networkstatus
|
||||
import stem.descriptor.microdescriptor
|
||||
import stem.descriptor.tordnsel
|
106
lib/stem/descriptor/export.py
Normal file
106
lib/stem/descriptor/export.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Toolkit for exporting descriptors to other formats.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
export_csv - Exports descriptors to a CSV
|
||||
export_csv_file - Writes exported CSV output to a file
|
||||
"""
|
||||
|
||||
import cStringIO
|
||||
import csv
|
||||
|
||||
import stem.descriptor
|
||||
import stem.prereq
|
||||
|
||||
|
||||
class _ExportDialect(csv.excel):
|
||||
lineterminator = '\n'
|
||||
|
||||
|
||||
def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
|
||||
"""
|
||||
Provides a newline separated CSV for one or more descriptors. If simply
|
||||
provided with descriptors then the CSV contains all of its attributes,
|
||||
labeled with a header row. Either 'included_fields' or 'excluded_fields' can
|
||||
be used for more granular control over its attributes and the order.
|
||||
|
||||
:param Descriptor,list descriptors: either a
|
||||
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
|
||||
:param list included_fields: attributes to include in the csv
|
||||
:param list excluded_fields: attributes to exclude from the csv
|
||||
:param bool header: if **True** then the first line will be a comma separated
|
||||
list of the attribute names (**only supported in python 2.7 and higher**)
|
||||
|
||||
:returns: **str** of the CSV for the descriptors, one per line
|
||||
:raises: **ValueError** if descriptors contain more than one descriptor type
|
||||
"""
|
||||
|
||||
output_buffer = cStringIO.StringIO()
|
||||
export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
|
||||
return output_buffer.getvalue()
|
||||
|
||||
|
||||
def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
|
||||
"""
|
||||
Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
|
||||
written directly to a file.
|
||||
|
||||
:param file output_file: file to be written to
|
||||
:param Descriptor,list descriptors: either a
|
||||
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
|
||||
:param list included_fields: attributes to include in the csv
|
||||
:param list excluded_fields: attributes to exclude from the csv
|
||||
:param bool header: if **True** then the first line will be a comma separated
|
||||
list of the attribute names (**only supported in python 2.7 and higher**)
|
||||
|
||||
:returns: **str** of the CSV for the descriptors, one per line
|
||||
:raises: **ValueError** if descriptors contain more than one descriptor type
|
||||
"""
|
||||
|
||||
if isinstance(descriptors, stem.descriptor.Descriptor):
|
||||
descriptors = (descriptors,)
|
||||
|
||||
if not descriptors:
|
||||
return
|
||||
|
||||
descriptor_type = type(descriptors[0])
|
||||
descriptor_type_label = descriptor_type.__name__
|
||||
included_fields = list(included_fields)
|
||||
|
||||
# If the user didn't specify the fields to include then export everything,
|
||||
# ordered alphabetically. If they did specify fields then make sure that
|
||||
# they exist.
|
||||
|
||||
desc_attr = sorted(vars(descriptors[0]).keys())
|
||||
|
||||
if included_fields:
|
||||
for field in included_fields:
|
||||
if not field in desc_attr:
|
||||
raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr)))
|
||||
else:
|
||||
included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
|
||||
|
||||
for field in excluded_fields:
|
||||
try:
|
||||
included_fields.remove(field)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
|
||||
|
||||
if header and stem.prereq.is_python_27():
|
||||
writer.writeheader()
|
||||
|
||||
for desc in descriptors:
|
||||
if not isinstance(desc, stem.descriptor.Descriptor):
|
||||
raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__)
|
||||
elif descriptor_type != type(desc):
|
||||
raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc)))
|
||||
|
||||
writer.writerow(vars(desc))
|
940
lib/stem/descriptor/extrainfo_descriptor.py
Normal file
940
lib/stem/descriptor/extrainfo_descriptor.py
Normal file
|
@ -0,0 +1,940 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor extra-info descriptors. These are published by relays whenever
|
||||
their server descriptor is published and have a similar format. However, unlike
|
||||
server descriptors these don't contain information that Tor clients require to
|
||||
function and as such aren't fetched by default.
|
||||
|
||||
Defined in section 2.2 of the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_,
|
||||
extra-info descriptors contain interesting but non-vital information such as
|
||||
usage statistics. Tor clients cannot request these documents for bridges.
|
||||
|
||||
Extra-info descriptors are available from a few sources...
|
||||
|
||||
* if you have 'DownloadExtraInfo 1' in your torrc...
|
||||
|
||||
* control port via 'GETINFO extra-info/digest/\*' queries
|
||||
* the 'cached-extrainfo' file in tor's data directory
|
||||
|
||||
* tor metrics, at https://metrics.torproject.org/data.html
|
||||
* directory authorities and mirrors via their DirPort
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ExtraInfoDescriptor - Tor extra-info descriptor.
|
||||
| |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
|
||||
| +- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
|
||||
|
|
||||
|- digest - calculates the upper-case hex digest value for our content
|
||||
+- get_unrecognized_lines - lines with unrecognized content
|
||||
|
||||
.. data:: DirResponse (enum)
|
||||
|
||||
Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses.
|
||||
|
||||
=================== ===========
|
||||
DirResponse Description
|
||||
=================== ===========
|
||||
**OK** network status requests that were answered
|
||||
**NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities
|
||||
**UNAVAILABLE** requested network status was unavailable
|
||||
**NOT_FOUND** requested network status was not found
|
||||
**NOT_MODIFIED** network status unmodified since If-Modified-Since time
|
||||
**BUSY** directory was busy
|
||||
=================== ===========
|
||||
|
||||
.. data:: DirStat (enum)
|
||||
|
||||
Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and
|
||||
dir_*_tunneled_dl.
|
||||
|
||||
===================== ===========
|
||||
DirStat Description
|
||||
===================== ===========
|
||||
**COMPLETE** requests that completed successfully
|
||||
**TIMEOUT** requests that didn't complete within a ten minute timeout
|
||||
**RUNNING** requests still in process when measurement's taken
|
||||
**MIN** smallest rate at which a descriptor was downloaded in B/s
|
||||
**MAX** largest rate at which a descriptor was downloaded in B/s
|
||||
**D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s
|
||||
**Q1** and **Q3** rate of the slowest and fastest quarter download rates in B/s
|
||||
**MD** median download rate in B/s
|
||||
===================== ===========
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_read_until_keywords,
|
||||
_get_descriptor_components,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# known statuses for dirreq-v2-resp and dirreq-v3-resp...
|
||||
DirResponse = stem.util.enum.Enum(
|
||||
("OK", "ok"),
|
||||
("NOT_ENOUGH_SIGS", "not-enough-sigs"),
|
||||
("UNAVAILABLE", "unavailable"),
|
||||
("NOT_FOUND", "not-found"),
|
||||
("NOT_MODIFIED", "not-modified"),
|
||||
("BUSY", "busy"),
|
||||
)
|
||||
|
||||
# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
|
||||
dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
|
||||
dir_stats += ['d%i' % i for i in range(1, 5)]
|
||||
dir_stats += ['d%i' % i for i in range(6, 10)]
|
||||
DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])
|
||||
|
||||
# relay descriptors must have exactly one of the following
|
||||
REQUIRED_FIELDS = (
|
||||
"extra-info",
|
||||
"published",
|
||||
"router-signature",
|
||||
)
|
||||
|
||||
# optional entries that can appear at most once
|
||||
SINGLE_FIELDS = (
|
||||
"read-history",
|
||||
"write-history",
|
||||
"geoip-db-digest",
|
||||
"geoip6-db-digest",
|
||||
"bridge-stats-end",
|
||||
"bridge-ips",
|
||||
"dirreq-stats-end",
|
||||
"dirreq-v2-ips",
|
||||
"dirreq-v3-ips",
|
||||
"dirreq-v2-reqs",
|
||||
"dirreq-v3-reqs",
|
||||
"dirreq-v2-share",
|
||||
"dirreq-v3-share",
|
||||
"dirreq-v2-resp",
|
||||
"dirreq-v3-resp",
|
||||
"dirreq-v2-direct-dl",
|
||||
"dirreq-v3-direct-dl",
|
||||
"dirreq-v2-tunneled-dl",
|
||||
"dirreq-v3-tunneled-dl",
|
||||
"dirreq-read-history",
|
||||
"dirreq-write-history",
|
||||
"entry-stats-end",
|
||||
"entry-ips",
|
||||
"cell-stats-end",
|
||||
"cell-processed-cells",
|
||||
"cell-queued-cells",
|
||||
"cell-time-in-queue",
|
||||
"cell-circuits-per-decile",
|
||||
"conn-bi-direct",
|
||||
"exit-stats-end",
|
||||
"exit-kibibytes-written",
|
||||
"exit-kibibytes-read",
|
||||
"exit-streams-opened",
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over the extra-info descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool is_bridge: parses the file as being a bridge descriptor
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
extrainfo_content = _read_until_keywords("router-signature", descriptor_file)
|
||||
|
||||
# we've reached the 'router-signature', now include the pgp style block
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
|
||||
if extrainfo_content:
|
||||
if is_bridge:
|
||||
yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
|
||||
else:
|
||||
yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
def _parse_timestamp_and_interval(keyword, content):
|
||||
"""
|
||||
Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.
|
||||
|
||||
:param str keyword: line's keyword
|
||||
:param str content: line content to be parsed
|
||||
|
||||
:returns: **tuple** of the form (timestamp (**datetime**), interval
|
||||
(**int**), remaining content (**str**))
|
||||
|
||||
:raises: **ValueError** if the content is malformed
|
||||
"""
|
||||
|
||||
line = "%s %s" % (keyword, content)
|
||||
content_match = re.match("^(.*) \(([0-9]+) s\)( .*)?$", content)
|
||||
|
||||
if not content_match:
|
||||
raise ValueError("Malformed %s line: %s" % (keyword, line))
|
||||
|
||||
timestamp_str, interval, remainder = content_match.groups()
|
||||
|
||||
if remainder:
|
||||
remainder = remainder[1:] # remove leading space
|
||||
|
||||
if not interval.isdigit():
|
||||
raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))
|
||||
|
||||
try:
|
||||
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
|
||||
return timestamp, int(interval), remainder
|
||||
except ValueError:
|
||||
raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
|
||||
|
||||
|
||||
class ExtraInfoDescriptor(Descriptor):
|
||||
"""
|
||||
Extra-info descriptor document.
|
||||
|
||||
:var str nickname: **\*** relay's nickname
|
||||
:var str fingerprint: **\*** identity key fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses
|
||||
:var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses
|
||||
:var dict transport: **\*** mapping of transport methods to their (address,
|
||||
port, args) tuple, these usually appear on bridges in which case all of
|
||||
those are **None**
|
||||
|
||||
**Bi-directional connection usage:**
|
||||
|
||||
:var datetime conn_bi_direct_end: end of the sampling interval
|
||||
:var int conn_bi_direct_interval: seconds per interval
|
||||
:var int conn_bi_direct_below: connections that read/wrote less than 20 KiB
|
||||
:var int conn_bi_direct_read: connections that read at least 10x more than wrote
|
||||
:var int conn_bi_direct_write: connections that wrote at least 10x more than read
|
||||
:var int conn_bi_direct_both: remaining connections
|
||||
|
||||
**Bytes read/written for relayed traffic:**
|
||||
|
||||
:var datetime read_history_end: end of the sampling interval
|
||||
:var int read_history_interval: seconds per interval
|
||||
:var list read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime write_history_end: end of the sampling interval
|
||||
:var int write_history_interval: seconds per interval
|
||||
:var list write_history_values: bytes written during each interval
|
||||
|
||||
**Cell relaying statistics:**
|
||||
|
||||
:var datetime cell_stats_end: end of the period when stats were gathered
|
||||
:var int cell_stats_interval: length in seconds of the interval
|
||||
:var list cell_processed_cells: measurement of processed cells per circuit
|
||||
:var list cell_queued_cells: measurement of queued cells per circuit
|
||||
:var list cell_time_in_queue: mean enqueued time in milliseconds for cells
|
||||
:var int cell_circuits_per_decile: mean number of circuits in a decile
|
||||
|
||||
**Directory Mirror Attributes:**
|
||||
|
||||
:var datetime dir_stats_end: end of the period when stats were gathered
|
||||
:var int dir_stats_interval: length in seconds of the interval
|
||||
:var dict dir_v2_ips: mapping of locales to rounded count of requester ips
|
||||
:var dict dir_v3_ips: mapping of locales to rounded count of requester ips
|
||||
:var float dir_v2_share: percent of total directory traffic it expects to serve
|
||||
:var float dir_v3_share: percent of total directory traffic it expects to serve
|
||||
:var dict dir_v2_requests: mapping of locales to rounded count of requests
|
||||
:var dict dir_v3_requests: mapping of locales to rounded count of requests
|
||||
|
||||
:var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
|
||||
:var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
|
||||
:var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count
|
||||
:var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count
|
||||
|
||||
:var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
|
||||
:var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
|
||||
:var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
:var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
|
||||
:var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
|
||||
:var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
|
||||
:var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
:var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
|
||||
**Bytes read/written for directory mirroring:**
|
||||
|
||||
:var datetime dir_read_history_end: end of the sampling interval
|
||||
:var int dir_read_history_interval: seconds per interval
|
||||
:var list dir_read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime dir_write_history_end: end of the sampling interval
|
||||
:var int dir_write_history_interval: seconds per interval
|
||||
:var list dir_write_history_values: bytes read during each interval
|
||||
|
||||
**Guard Attributes:**
|
||||
|
||||
:var datetime entry_stats_end: end of the period when stats were gathered
|
||||
:var int entry_stats_interval: length in seconds of the interval
|
||||
:var dict entry_ips: mapping of locales to rounded count of unique user ips
|
||||
|
||||
**Exit Attributes:**
|
||||
|
||||
:var datetime exit_stats_end: end of the period when stats were gathered
|
||||
:var int exit_stats_interval: length in seconds of the interval
|
||||
:var dict exit_kibibytes_written: traffic per port (keys are ints or 'other')
|
||||
:var dict exit_kibibytes_read: traffic per port (keys are ints or 'other')
|
||||
:var dict exit_streams_opened: streams per port (keys are ints or 'other')
|
||||
|
||||
**Bridge Attributes:**
|
||||
|
||||
:var datetime bridge_stats_end: end of the period when stats were gathered
|
||||
:var int bridge_stats_interval: length in seconds of the interval
|
||||
:var dict bridge_ips: mapping of locales to rounded count of unique user ips
|
||||
:var datetime geoip_start_time: replaced by bridge_stats_end (deprecated)
|
||||
:var dict geoip_client_origins: replaced by bridge_ips (deprecated)
|
||||
:var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
|
||||
:var dict ip_versions: mapping of ip transports to a count for the number of users
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True):
|
||||
"""
|
||||
Extra-info descriptor constructor. By default this validates the
|
||||
descriptor's content as it's parsed. This validation can be disabled to
|
||||
either improve performance or be accepting of malformed data.
|
||||
|
||||
:param str raw_contents: extra-info content provided by the relay
|
||||
:param bool validate: checks the validity of the extra-info descriptor if
|
||||
**True**, skips these checks otherwise
|
||||
|
||||
:raises: **ValueError** if the contents is malformed and validate is True
|
||||
"""
|
||||
|
||||
super(ExtraInfoDescriptor, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
self.nickname = None
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.geoip_db_digest = None
|
||||
self.geoip6_db_digest = None
|
||||
self.transport = {}
|
||||
|
||||
self.conn_bi_direct_end = None
|
||||
self.conn_bi_direct_interval = None
|
||||
self.conn_bi_direct_below = None
|
||||
self.conn_bi_direct_read = None
|
||||
self.conn_bi_direct_write = None
|
||||
self.conn_bi_direct_both = None
|
||||
|
||||
self.read_history_end = None
|
||||
self.read_history_interval = None
|
||||
self.read_history_values = None
|
||||
|
||||
self.write_history_end = None
|
||||
self.write_history_interval = None
|
||||
self.write_history_values = None
|
||||
|
||||
self.cell_stats_end = None
|
||||
self.cell_stats_interval = None
|
||||
self.cell_processed_cells = None
|
||||
self.cell_queued_cells = None
|
||||
self.cell_time_in_queue = None
|
||||
self.cell_circuits_per_decile = None
|
||||
|
||||
self.dir_stats_end = None
|
||||
self.dir_stats_interval = None
|
||||
self.dir_v2_ips = None
|
||||
self.dir_v3_ips = None
|
||||
self.dir_v2_share = None
|
||||
self.dir_v3_share = None
|
||||
self.dir_v2_requests = None
|
||||
self.dir_v3_requests = None
|
||||
self.dir_v2_responses = None
|
||||
self.dir_v3_responses = None
|
||||
self.dir_v2_responses_unknown = None
|
||||
self.dir_v3_responses_unknown = None
|
||||
self.dir_v2_direct_dl = None
|
||||
self.dir_v3_direct_dl = None
|
||||
self.dir_v2_direct_dl_unknown = None
|
||||
self.dir_v3_direct_dl_unknown = None
|
||||
self.dir_v2_tunneled_dl = None
|
||||
self.dir_v3_tunneled_dl = None
|
||||
self.dir_v2_tunneled_dl_unknown = None
|
||||
self.dir_v3_tunneled_dl_unknown = None
|
||||
|
||||
self.dir_read_history_end = None
|
||||
self.dir_read_history_interval = None
|
||||
self.dir_read_history_values = None
|
||||
|
||||
self.dir_write_history_end = None
|
||||
self.dir_write_history_interval = None
|
||||
self.dir_write_history_values = None
|
||||
|
||||
self.entry_stats_end = None
|
||||
self.entry_stats_interval = None
|
||||
self.entry_ips = None
|
||||
|
||||
self.exit_stats_end = None
|
||||
self.exit_stats_interval = None
|
||||
self.exit_kibibytes_written = None
|
||||
self.exit_kibibytes_read = None
|
||||
self.exit_streams_opened = None
|
||||
|
||||
self.bridge_stats_end = None
|
||||
self.bridge_stats_interval = None
|
||||
self.bridge_ips = None
|
||||
self.geoip_start_time = None
|
||||
self.geoip_client_origins = None
|
||||
|
||||
self.ip_versions = None
|
||||
self.ip_transports = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
if validate:
|
||||
for keyword in self._required_fields():
|
||||
if not keyword in entries:
|
||||
raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in self._required_fields() + SINGLE_FIELDS:
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword)
|
||||
|
||||
expected_first_keyword = self._first_keyword()
|
||||
if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
|
||||
raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword)
|
||||
|
||||
expected_last_keyword = self._last_keyword()
|
||||
if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
|
||||
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if True
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
# most just work with the first (and only) value
|
||||
value, _ = values[0]
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if keyword == "extra-info":
|
||||
# "extra-info" Nickname Fingerprint
|
||||
extra_info_comp = value.split()
|
||||
|
||||
if len(extra_info_comp) < 2:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Extra-info line must have two values: %s" % line)
|
||||
|
||||
if validate:
|
||||
if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
|
||||
raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
|
||||
elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
|
||||
raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % extra_info_comp[1])
|
||||
|
||||
self.nickname = extra_info_comp[0]
|
||||
self.fingerprint = extra_info_comp[1]
|
||||
elif keyword == "geoip-db-digest":
|
||||
# "geoip-db-digest" Digest
|
||||
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self.geoip_db_digest = value
|
||||
elif keyword == "geoip6-db-digest":
|
||||
# "geoip6-db-digest" Digest
|
||||
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Geoip v6 digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self.geoip6_db_digest = value
|
||||
elif keyword == "transport":
|
||||
# "transport" transportname address:port [arglist]
|
||||
# Everything after the transportname is scrubbed in published bridge
|
||||
# descriptors, so we'll never see it in practice.
|
||||
#
|
||||
# These entries really only make sense for bridges, but have been seen
|
||||
# on non-bridges in the wild when the relay operator configured it this
|
||||
# way.
|
||||
|
||||
for transport_value, _ in values:
|
||||
name, address, port, args = None, None, None, None
|
||||
|
||||
if not ' ' in transport_value:
|
||||
# scrubbed
|
||||
name = transport_value
|
||||
else:
|
||||
# not scrubbed
|
||||
value_comp = transport_value.split()
|
||||
|
||||
if len(value_comp) < 1:
|
||||
raise ValueError("Transport line is missing its transport name: %s" % line)
|
||||
else:
|
||||
name = value_comp[0]
|
||||
|
||||
if len(value_comp) < 2:
|
||||
raise ValueError("Transport line is missing its address:port value: %s" % line)
|
||||
elif not ":" in value_comp[1]:
|
||||
raise ValueError("Transport line's address:port entry is missing a colon: %s" % line)
|
||||
else:
|
||||
address, port_str = value_comp[1].split(':', 1)
|
||||
|
||||
if not stem.util.connection.is_valid_ipv4_address(address) or \
|
||||
stem.util.connection.is_valid_ipv6_address(address):
|
||||
raise ValueError("Transport line has a malformed address: %s" % line)
|
||||
elif not stem.util.connection.is_valid_port(port_str):
|
||||
raise ValueError("Transport line has a malformed port: %s" % line)
|
||||
|
||||
port = int(port_str)
|
||||
|
||||
if len(value_comp) >= 3:
|
||||
args = value_comp[2:]
|
||||
else:
|
||||
args = []
|
||||
|
||||
self.transport[name] = (address, port, args)
|
||||
elif keyword == "cell-circuits-per-decile":
|
||||
# "cell-circuits-per-decile" num
|
||||
|
||||
if not value.isdigit():
|
||||
if validate:
|
||||
raise ValueError("Non-numeric cell-circuits-per-decile value: %s" % line)
|
||||
else:
|
||||
continue
|
||||
|
||||
stat = int(value)
|
||||
|
||||
if validate and stat < 0:
|
||||
raise ValueError("Negative cell-circuits-per-decile value: %s" % line)
|
||||
|
||||
self.cell_circuits_per_decile = stat
|
||||
elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"):
|
||||
recognized_counts = {}
|
||||
unrecognized_counts = {}
|
||||
|
||||
is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp")
|
||||
key_set = DirResponse if is_response_stats else DirStat
|
||||
|
||||
key_type = "STATUS" if is_response_stats else "STAT"
|
||||
error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line)
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
if not "=" in entry:
|
||||
if validate:
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
continue
|
||||
|
||||
status, count = entry.split("=", 1)
|
||||
|
||||
if count.isdigit():
|
||||
if status in key_set:
|
||||
recognized_counts[status] = int(count)
|
||||
else:
|
||||
unrecognized_counts[status] = int(count)
|
||||
elif validate:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if keyword == "dirreq-v2-resp":
|
||||
self.dir_v2_responses = recognized_counts
|
||||
self.dir_v2_responses_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v3-resp":
|
||||
self.dir_v3_responses = recognized_counts
|
||||
self.dir_v3_responses_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v2-direct-dl":
|
||||
self.dir_v2_direct_dl = recognized_counts
|
||||
self.dir_v2_direct_dl_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v3-direct-dl":
|
||||
self.dir_v3_direct_dl = recognized_counts
|
||||
self.dir_v3_direct_dl_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v2-tunneled-dl":
|
||||
self.dir_v2_tunneled_dl = recognized_counts
|
||||
self.dir_v2_tunneled_dl_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v3-tunneled-dl":
|
||||
self.dir_v3_tunneled_dl = recognized_counts
|
||||
self.dir_v3_tunneled_dl_unknown = unrecognized_counts
|
||||
elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
|
||||
# "<keyword>" num%
|
||||
|
||||
try:
|
||||
if not value.endswith("%"):
|
||||
raise ValueError()
|
||||
|
||||
percentage = float(value[:-1]) / 100
|
||||
|
||||
# Bug lets these be above 100%, however they're soon going away...
|
||||
# https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html
|
||||
|
||||
if validate and percentage < 0:
|
||||
raise ValueError("Negative percentage value: %s" % line)
|
||||
|
||||
if keyword == "dirreq-v2-share":
|
||||
self.dir_v2_share = percentage
|
||||
elif keyword == "dirreq-v3-share":
|
||||
self.dir_v3_share = percentage
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise ValueError("Value can't be parsed as a percentage: %s" % line)
|
||||
elif keyword in ("cell-processed-cells", "cell-queued-cells", "cell-time-in-queue"):
|
||||
# "<keyword>" num,...,num
|
||||
|
||||
entries = []
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
try:
|
||||
# Values should be positive but as discussed in ticket #5849
|
||||
# there was a bug around this. It was fixed in tor 0.2.2.1.
|
||||
|
||||
entries.append(float(entry))
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Non-numeric entry in %s listing: %s" % (keyword, line))
|
||||
|
||||
if keyword == "cell-processed-cells":
|
||||
self.cell_processed_cells = entries
|
||||
elif keyword == "cell-queued-cells":
|
||||
self.cell_queued_cells = entries
|
||||
elif keyword == "cell-time-in-queue":
|
||||
self.cell_time_in_queue = entries
|
||||
elif keyword in ("published", "geoip-start-time"):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS
|
||||
|
||||
try:
|
||||
timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if keyword == "published":
|
||||
self.published = timestamp
|
||||
elif keyword == "geoip-start-time":
|
||||
self.geoip_start_time = timestamp
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line))
|
||||
elif keyword in ("cell-stats-end", "entry-stats-end", "exit-stats-end", "bridge-stats-end", "dirreq-stats-end"):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
|
||||
|
||||
try:
|
||||
timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)
|
||||
|
||||
if keyword == "cell-stats-end":
|
||||
self.cell_stats_end = timestamp
|
||||
self.cell_stats_interval = interval
|
||||
elif keyword == "entry-stats-end":
|
||||
self.entry_stats_end = timestamp
|
||||
self.entry_stats_interval = interval
|
||||
elif keyword == "exit-stats-end":
|
||||
self.exit_stats_end = timestamp
|
||||
self.exit_stats_interval = interval
|
||||
elif keyword == "bridge-stats-end":
|
||||
self.bridge_stats_end = timestamp
|
||||
self.bridge_stats_interval = interval
|
||||
elif keyword == "dirreq-stats-end":
|
||||
self.dir_stats_end = timestamp
|
||||
self.dir_stats_interval = interval
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
elif keyword == "conn-bi-direct":
|
||||
# "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH
|
||||
|
||||
try:
|
||||
timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
|
||||
stats = remainder.split(",")
|
||||
|
||||
if len(stats) != 4 or not \
|
||||
(stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
|
||||
raise ValueError("conn-bi-direct line should end with four numeric values: %s" % line)
|
||||
|
||||
self.conn_bi_direct_end = timestamp
|
||||
self.conn_bi_direct_interval = interval
|
||||
self.conn_bi_direct_below = int(stats[0])
|
||||
self.conn_bi_direct_read = int(stats[1])
|
||||
self.conn_bi_direct_write = int(stats[2])
|
||||
self.conn_bi_direct_both = int(stats[3])
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
|
||||
try:
|
||||
timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
|
||||
history_values = []
|
||||
|
||||
if remainder:
|
||||
try:
|
||||
history_values = [int(entry) for entry in remainder.split(",")]
|
||||
except ValueError:
|
||||
raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
|
||||
|
||||
if keyword == "read-history":
|
||||
self.read_history_end = timestamp
|
||||
self.read_history_interval = interval
|
||||
self.read_history_values = history_values
|
||||
elif keyword == "write-history":
|
||||
self.write_history_end = timestamp
|
||||
self.write_history_interval = interval
|
||||
self.write_history_values = history_values
|
||||
elif keyword == "dirreq-read-history":
|
||||
self.dir_read_history_end = timestamp
|
||||
self.dir_read_history_interval = interval
|
||||
self.dir_read_history_values = history_values
|
||||
elif keyword == "dirreq-write-history":
|
||||
self.dir_write_history_end = timestamp
|
||||
self.dir_write_history_interval = interval
|
||||
self.dir_write_history_values = history_values
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
elif keyword in ("exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened"):
|
||||
# "<keyword>" port=N,port=N,...
|
||||
|
||||
port_mappings = {}
|
||||
error_msg = "Entries in %s line should only be PORT=N entries: %s" % (keyword, line)
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
if not "=" in entry:
|
||||
if validate:
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
continue
|
||||
|
||||
port, stat = entry.split("=", 1)
|
||||
|
||||
if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
|
||||
if port != 'other':
|
||||
port = int(port)
|
||||
port_mappings[port] = int(stat)
|
||||
elif validate:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if keyword == "exit-kibibytes-written":
|
||||
self.exit_kibibytes_written = port_mappings
|
||||
elif keyword == "exit-kibibytes-read":
|
||||
self.exit_kibibytes_read = port_mappings
|
||||
elif keyword == "exit-streams-opened":
|
||||
self.exit_streams_opened = port_mappings
|
||||
elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "entry-ips", "bridge-ips"):
|
||||
# "<keyword>" CC=N,CC=N,...
|
||||
#
|
||||
# The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
|
||||
# locale codes for some special values, for instance...
|
||||
# A1,"Anonymous Proxy"
|
||||
# A2,"Satellite Provider"
|
||||
# ??,"Unknown"
|
||||
|
||||
locale_usage = {}
|
||||
error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
if not "=" in entry:
|
||||
if validate:
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
continue
|
||||
|
||||
locale, count = entry.split("=", 1)
|
||||
|
||||
if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit():
|
||||
locale_usage[locale] = int(count)
|
||||
elif validate:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if keyword == "dirreq-v2-ips":
|
||||
self.dir_v2_ips = locale_usage
|
||||
elif keyword == "dirreq-v3-ips":
|
||||
self.dir_v3_ips = locale_usage
|
||||
elif keyword == "dirreq-v2-reqs":
|
||||
self.dir_v2_requests = locale_usage
|
||||
elif keyword == "dirreq-v3-reqs":
|
||||
self.dir_v3_requests = locale_usage
|
||||
elif keyword == "geoip-client-origins":
|
||||
self.geoip_client_origins = locale_usage
|
||||
elif keyword == "entry-ips":
|
||||
self.entry_ips = locale_usage
|
||||
elif keyword == "bridge-ips":
|
||||
self.bridge_ips = locale_usage
|
||||
elif keyword == "bridge-ip-versions":
|
||||
self.ip_versions = {}
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if not '=' in entry:
|
||||
raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, line))
|
||||
|
||||
self.ip_versions[protocol] = int(count)
|
||||
elif keyword == "bridge-ip-transports":
|
||||
self.ip_transports = {}
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if not '=' in entry:
|
||||
raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError("Transport count was non-numeric (%s): %s" % (count, line))
|
||||
|
||||
self.ip_transports[protocol] = int(count)
|
||||
else:
|
||||
self._unrecognized_lines.append(line)
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the upper-case hex encoded sha1 of our content. This value is part
|
||||
of the server descriptor entry for this relay.
|
||||
|
||||
:returns: **str** with the upper-case hex digest value for this server
|
||||
descriptor
|
||||
"""
|
||||
|
||||
raise NotImplementedError("Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass")
|
||||
|
||||
def _required_fields(self):
|
||||
return REQUIRED_FIELDS
|
||||
|
||||
def _first_keyword(self):
|
||||
return "extra-info"
|
||||
|
||||
def _last_keyword(self):
|
||||
return "router-signature"
|
||||
|
||||
|
||||
class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
|
||||
"""
|
||||
Relay extra-info descriptor, constructed from data such as that provided by
|
||||
"GETINFO extra-info/digest/\*", cached descriptors, and metrics
|
||||
(`specification <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
|
||||
|
||||
:var str signature: **\*** signature for this extrainfo descriptor
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True):
|
||||
self.signature = None
|
||||
|
||||
super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate)
|
||||
|
||||
@lru_cache()
|
||||
def digest(self):
|
||||
# our digest is calculated from everything except our signature
|
||||
raw_content, ending = str(self), "\nrouter-signature\n"
|
||||
raw_content = raw_content[:raw_content.find(ending) + len(ending)]
|
||||
return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries) # shallow copy since we're destructive
|
||||
|
||||
# handles fields only in server descriptors
|
||||
for keyword, values in entries.items():
|
||||
value, block_contents = values[0]
|
||||
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if block_contents:
|
||||
line += "\n%s" % block_contents
|
||||
|
||||
if keyword == "router-signature":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Router signature line must be followed by a signature block: %s" % line)
|
||||
|
||||
self.signature = block_contents
|
||||
del entries["router-signature"]
|
||||
|
||||
ExtraInfoDescriptor._parse(self, entries, validate)
|
||||
|
||||
|
||||
class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
|
||||
"""
|
||||
Bridge extra-info descriptor (`bridge descriptor specification
|
||||
<https://metrics.torproject.org/formats.html#bridgedesc>`_)
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True):
|
||||
self._digest = None
|
||||
|
||||
super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate)
|
||||
|
||||
def digest(self):
|
||||
return self._digest
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries) # shallow copy since we're destructive
|
||||
|
||||
# handles fields only in server descriptors
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if keyword == "router-digest":
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self._digest = value
|
||||
del entries["router-digest"]
|
||||
|
||||
ExtraInfoDescriptor._parse(self, entries, validate)
|
||||
|
||||
def _required_fields(self):
|
||||
excluded_fields = [
|
||||
"router-signature",
|
||||
]
|
||||
|
||||
included_fields = [
|
||||
"router-digest",
|
||||
]
|
||||
|
||||
return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])
|
||||
|
||||
def _last_keyword(self):
|
||||
return None
|
309
lib/stem/descriptor/microdescriptor.py
Normal file
309
lib/stem/descriptor/microdescriptor.py
Normal file
|
@ -0,0 +1,309 @@
|
|||
# Copyright 2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor microdescriptors, which contain a distilled version of a
|
||||
relay's server descriptor. As of Tor version 0.2.3.3-alpha Tor no longer
|
||||
downloads server descriptors by default, opting for microdescriptors instead.
|
||||
|
||||
Unlike most descriptor documents these aren't available on the metrics site
|
||||
(since they don't contain any information that the server descriptors don't).
|
||||
|
||||
The limited information in microdescriptors make them rather clunky to use
|
||||
compared with server descriptors. For instance microdescriptors lack the
|
||||
relay's fingerprint, making it difficut to use them to look up the relay's
|
||||
other descriptors.
|
||||
|
||||
To do so you need to match the microdescriptor's digest against its
|
||||
corresponding router status entry. For added fun as of this writing the
|
||||
controller doesn't even surface those router status entries
|
||||
(:trac:`7953`).
|
||||
|
||||
For instance, here's an example that prints the nickname and fignerprints of
|
||||
the exit relays.
|
||||
|
||||
::
|
||||
|
||||
import os
|
||||
|
||||
from stem.control import Controller
|
||||
from stem.descriptor import parse_file
|
||||
|
||||
with Controller.from_port(port = 9051) as controller:
|
||||
controller.authenticate()
|
||||
|
||||
exit_digests = set()
|
||||
data_dir = controller.get_conf("DataDirectory")
|
||||
|
||||
for desc in controller.get_microdescriptors():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
exit_digests.add(desc.digest)
|
||||
|
||||
print "Exit Relays:"
|
||||
|
||||
for desc in parse_file(os.path.join(data_dir, 'cached-microdesc-consensus')):
|
||||
if desc.digest in exit_digests:
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
Doing the same is trivial with server descriptors...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor import parse_file
|
||||
|
||||
print "Exit Relays:"
|
||||
|
||||
for desc in parse_file("/home/atagar/.tor/cached-descriptors"):
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
Microdescriptor - Tor microdescriptor.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
|
||||
import stem.descriptor.router_status_entry
|
||||
import stem.exit_policy
|
||||
|
||||
from stem.descriptor import (
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
REQUIRED_FIELDS = (
|
||||
"onion-key",
|
||||
)
|
||||
|
||||
SINGLE_FIELDS = (
|
||||
"onion-key",
|
||||
"ntor-onion-key",
|
||||
"family",
|
||||
"p",
|
||||
"p6",
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over the microdescriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for Microdescriptor instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
annotations = _read_until_keywords("onion-key", descriptor_file)
|
||||
|
||||
# read until we reach an annotation or onion-key line
|
||||
descriptor_lines = []
|
||||
|
||||
# read the onion-key line, done if we're at the end of the document
|
||||
|
||||
onion_key_line = descriptor_file.readline()
|
||||
|
||||
if onion_key_line:
|
||||
descriptor_lines.append(onion_key_line)
|
||||
else:
|
||||
break
|
||||
|
||||
while True:
|
||||
last_position = descriptor_file.tell()
|
||||
line = descriptor_file.readline()
|
||||
|
||||
if not line:
|
||||
break # EOF
|
||||
elif line.startswith(b"@") or line.startswith(b"onion-key"):
|
||||
descriptor_file.seek(last_position)
|
||||
break
|
||||
else:
|
||||
descriptor_lines.append(line)
|
||||
|
||||
if descriptor_lines:
|
||||
# strip newlines from annotations
|
||||
annotations = map(bytes.strip, annotations)
|
||||
|
||||
descriptor_text = bytes.join(b"", descriptor_lines)
|
||||
|
||||
yield Microdescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
break # done parsing descriptors
|
||||
|
||||
|
||||
class Microdescriptor(Descriptor):
|
||||
"""
|
||||
Microdescriptor (`descriptor specification
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
|
||||
|
||||
:var str digest: **\*** hex digest for this microdescriptor, this can be used
|
||||
to match against the corresponding digest attribute of a
|
||||
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3`
|
||||
:var str onion_key: **\*** key used to encrypt EXTEND cells
|
||||
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
|
||||
:var list or_addresses: **\*** alternative for our address/or_port attributes, each
|
||||
entry is a tuple of the form (address (**str**), port (**int**), is_ipv6
|
||||
(**bool**))
|
||||
:var list family: **\*** nicknames or fingerprints of declared family
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy: **\*** relay's exit policy
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
super(Microdescriptor, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
|
||||
|
||||
self.onion_key = None
|
||||
self.ntor_onion_key = None
|
||||
self.or_addresses = []
|
||||
self.family = []
|
||||
self.exit_policy = stem.exit_policy.MicroExitPolicy("reject 1-65535")
|
||||
self.exit_policy_v6 = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
self._annotation_lines = annotations if annotations else []
|
||||
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
self._parse(entries, validate)
|
||||
|
||||
if validate:
|
||||
self._check_constraints(entries)
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
@lru_cache()
|
||||
def get_annotations(self):
|
||||
"""
|
||||
Provides content that appeared prior to the descriptor. If this comes from
|
||||
the cached-microdescs then this commonly contains content like...
|
||||
|
||||
::
|
||||
|
||||
@last-listed 2013-02-24 00:18:30
|
||||
|
||||
:returns: **dict** with the key/value pairs in our annotations
|
||||
"""
|
||||
|
||||
annotation_dict = {}
|
||||
|
||||
for line in self._annotation_lines:
|
||||
if b" " in line:
|
||||
key, value = line.split(b" ", 1)
|
||||
annotation_dict[key] = value
|
||||
else:
|
||||
annotation_dict[line] = None
|
||||
|
||||
return annotation_dict
|
||||
|
||||
def get_annotation_lines(self):
|
||||
"""
|
||||
Provides the lines of content that appeared prior to the descriptor. This
|
||||
is the same as the
|
||||
:func:`~stem.descriptor.microdescriptor.Microdescriptor.get_annotations`
|
||||
results, but with the unparsed lines and ordering retained.
|
||||
|
||||
:returns: **list** with the lines of annotation that came before this descriptor
|
||||
"""
|
||||
|
||||
return self._annotation_lines
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if **True**
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
# most just work with the first (and only) value
|
||||
value, block_contents = values[0]
|
||||
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if block_contents:
|
||||
line += "\n%s" % block_contents
|
||||
|
||||
if keyword == "onion-key":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Onion key line must be followed by a public key: %s" % line)
|
||||
|
||||
self.onion_key = block_contents
|
||||
elif keyword == "ntor-onion-key":
|
||||
self.ntor_onion_key = value
|
||||
elif keyword == "a":
|
||||
for entry, _ in values:
|
||||
stem.descriptor.router_status_entry._parse_a_line(self, entry, validate)
|
||||
elif keyword == "family":
|
||||
self.family = value.split(" ")
|
||||
elif keyword == "p":
|
||||
stem.descriptor.router_status_entry._parse_p_line(self, value, validate)
|
||||
elif keyword == "p6":
|
||||
self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
|
||||
else:
|
||||
self._unrecognized_lines.append(line)
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in REQUIRED_FIELDS:
|
||||
if not keyword in entries:
|
||||
raise ValueError("Microdescriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in SINGLE_FIELDS:
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a microdescriptor" % keyword)
|
||||
|
||||
if "onion-key" != entries.keys()[0]:
|
||||
raise ValueError("Microdescriptor must start with a 'onion-key' entry")
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, Microdescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
1475
lib/stem/descriptor/networkstatus.py
Normal file
1475
lib/stem/descriptor/networkstatus.py
Normal file
File diff suppressed because it is too large
Load diff
580
lib/stem/descriptor/reader.py
Normal file
580
lib/stem/descriptor/reader.py
Normal file
|
@ -0,0 +1,580 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utilities for reading descriptors from local directories and archives. This is
|
||||
mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
|
||||
class, which is an iterator for the descriptor data in a series of
|
||||
destinations. For example...
|
||||
|
||||
::
|
||||
|
||||
my_descriptors = [
|
||||
"/tmp/server-descriptors-2012-03.tar.bz2",
|
||||
"/tmp/archived_descriptors/",
|
||||
]
|
||||
|
||||
# prints the contents of all the descriptor files
|
||||
with DescriptorReader(my_descriptors) as reader:
|
||||
for descriptor in reader:
|
||||
print descriptor
|
||||
|
||||
This ignores files that cannot be processed due to read errors or unparsable
|
||||
content. To be notified of skipped files you can register a listener with
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
|
||||
|
||||
The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
|
||||
modified timestamps for descriptor files that it has read so it can skip
|
||||
unchanged files if run again. This listing of processed files can also be
|
||||
persisted and applied to other
|
||||
:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
|
||||
following prints descriptors as they're changed over the course of a minute,
|
||||
and picks up where it left off if run again...
|
||||
|
||||
::
|
||||
|
||||
reader = DescriptorReader(["/tmp/descriptor_data"])
|
||||
|
||||
try:
|
||||
processed_files = load_processed_files("/tmp/used_descriptors")
|
||||
reader.set_processed_files(processed_files)
|
||||
except: pass # could not load, maybe this is the first run
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
while (time.time() - start_time) < 60:
|
||||
# prints any descriptors that have changed since last checked
|
||||
with reader:
|
||||
for descriptor in reader:
|
||||
print descriptor
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
save_processed_files("/tmp/used_descriptors", reader.get_processed_files())
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
load_processed_files - Loads a listing of processed files
|
||||
save_processed_files - Saves a listing of processed files
|
||||
|
||||
DescriptorReader - Iterator for descriptor data on the local file system
|
||||
|- get_processed_files - provides the listing of files that we've processed
|
||||
|- set_processed_files - sets our tracking of the files we have processed
|
||||
|- register_read_listener - adds a listener for when files are read
|
||||
|- register_skip_listener - adds a listener that's notified of skipped files
|
||||
|- start - begins reading descriptor data
|
||||
|- stop - stops reading descriptor data
|
||||
|- __enter__ / __exit__ - manages the descriptor reader thread in the context
|
||||
+- __iter__ - iterates over descriptor data in unread files
|
||||
|
||||
FileSkipped - Base exception for a file that was skipped
|
||||
|- AlreadyRead - We've already read a file with this last modified timestamp
|
||||
|- ParsingFailure - Contents can't be parsed as descriptor data
|
||||
|- UnrecognizedType - File extension indicates non-descriptor data
|
||||
+- ReadFailed - Wraps an error that was raised while reading the file
|
||||
+- FileMissing - File does not exist
|
||||
"""
|
||||
|
||||
import mimetypes
|
||||
import os
|
||||
import Queue
|
||||
import tarfile
|
||||
import threading
|
||||
|
||||
import stem.descriptor
|
||||
import stem.prereq
|
||||
|
||||
# flag to indicate when the reader thread is out of descriptor files to read
|
||||
FINISHED = "DONE"
|
||||
|
||||
|
||||
class FileSkipped(Exception):
|
||||
"Base error when we can't provide descriptor data from a file."
|
||||
|
||||
|
||||
class AlreadyRead(FileSkipped):
|
||||
"""
|
||||
Already read a file with this 'last modified' timestamp or later.
|
||||
|
||||
:param int last_modified: unix timestamp for when the file was last modified
|
||||
:param int last_modified_when_read: unix timestamp for the modification time
|
||||
when we last read this file
|
||||
"""
|
||||
|
||||
def __init__(self, last_modified, last_modified_when_read):
|
||||
super(AlreadyRead, self).__init__("File has already been read since it was last modified. modification time: %s, last read: %s" % (last_modified, last_modified_when_read))
|
||||
self.last_modified = last_modified
|
||||
self.last_modified_when_read = last_modified_when_read
|
||||
|
||||
|
||||
class ParsingFailure(FileSkipped):
|
||||
"""
|
||||
File contents could not be parsed as descriptor data.
|
||||
|
||||
:param ValueError exception: issue that arose when parsing
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_exception):
|
||||
super(ParsingFailure, self).__init__(parsing_exception)
|
||||
self.exception = parsing_exception
|
||||
|
||||
|
||||
class UnrecognizedType(FileSkipped):
|
||||
"""
|
||||
File doesn't contain descriptor data. This could either be due to its file
|
||||
type or because it doesn't conform to a recognizable descriptor type.
|
||||
|
||||
:param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
|
||||
"""
|
||||
|
||||
def __init__(self, mime_type):
|
||||
super(UnrecognizedType, self).__init__("Unrecognized mime type: %s (%s)" % mime_type)
|
||||
self.mime_type = mime_type
|
||||
|
||||
|
||||
class ReadFailed(FileSkipped):
|
||||
"""
|
||||
An IOError occurred while trying to read the file.
|
||||
|
||||
:param IOError exception: issue that arose when reading the file, **None** if
|
||||
this arose due to the file not being present
|
||||
"""
|
||||
|
||||
def __init__(self, read_exception):
|
||||
super(ReadFailed, self).__init__(read_exception)
|
||||
self.exception = read_exception
|
||||
|
||||
|
||||
class FileMissing(ReadFailed):
|
||||
"File does not exist."
|
||||
|
||||
def __init__(self):
|
||||
super(FileMissing, self).__init__("File does not exist")
|
||||
|
||||
|
||||
def load_processed_files(path):
|
||||
"""
|
||||
Loads a dictionary of 'path => last modified timestamp' mappings, as
|
||||
persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
|
||||
file.
|
||||
|
||||
:param str path: location to load the processed files dictionary from
|
||||
|
||||
:returns: **dict** of 'path (**str**) => last modified unix timestamp
|
||||
(**int**)' mappings
|
||||
|
||||
:raises:
|
||||
* **IOError** if unable to read the file
|
||||
* **TypeError** if unable to parse the file's contents
|
||||
"""
|
||||
|
||||
processed_files = {}
|
||||
|
||||
with open(path) as input_file:
|
||||
for line in input_file.readlines():
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
continue # skip blank lines
|
||||
|
||||
if not " " in line:
|
||||
raise TypeError("Malformed line: %s" % line)
|
||||
|
||||
path, timestamp = line.rsplit(" ", 1)
|
||||
|
||||
if not os.path.isabs(path):
|
||||
raise TypeError("'%s' is not an absolute path" % path)
|
||||
elif not timestamp.isdigit():
|
||||
raise TypeError("'%s' is not an integer timestamp" % timestamp)
|
||||
|
||||
processed_files[path] = int(timestamp)
|
||||
|
||||
return processed_files
|
||||
|
||||
|
||||
def save_processed_files(path, processed_files):
|
||||
"""
|
||||
Persists a dictionary of 'path => last modified timestamp' mappings (as
|
||||
provided by the DescriptorReader's
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
|
||||
so that they can be loaded later and applied to another
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`.
|
||||
|
||||
:param str path: location to save the processed files dictionary to
|
||||
:param dict processed_files: 'path => last modified' mappings
|
||||
|
||||
:raises:
|
||||
* **IOError** if unable to write to the file
|
||||
* **TypeError** if processed_files is of the wrong type
|
||||
"""
|
||||
|
||||
# makes the parent directory if it doesn't already exist
|
||||
try:
|
||||
path_dir = os.path.dirname(path)
|
||||
|
||||
if not os.path.exists(path_dir):
|
||||
os.makedirs(path_dir)
|
||||
except OSError as exc:
|
||||
raise IOError(exc)
|
||||
|
||||
with open(path, "w") as output_file:
|
||||
for path, timestamp in processed_files.items():
|
||||
if not os.path.isabs(path):
|
||||
raise TypeError("Only absolute paths are acceptable: %s" % path)
|
||||
|
||||
output_file.write("%s %i\n" % (path, timestamp))
|
||||
|
||||
|
||||
class DescriptorReader(object):
|
||||
"""
|
||||
Iterator for the descriptor data on the local file system. This can process
|
||||
text files, tarball archives (gzip or bzip2), or recurse directories.
|
||||
|
||||
By default this limits the number of descriptors that we'll read ahead before
|
||||
waiting for our caller to fetch some of them. This is included to avoid
|
||||
unbounded memory usage.
|
||||
|
||||
Our persistence_path argument is a convenient method to persist the listing
|
||||
of files we have processed between runs, however it doesn't allow for error
|
||||
handling. If you want that then use the
|
||||
:func:`~stem.descriptor.reader.load_processed_files` and
|
||||
:func:`~stem.descriptor.reader.save_processed_files` functions instead.
|
||||
|
||||
:param str,list target: path or list of paths for files or directories to be read from
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param bool follow_links: determines if we'll follow symlinks when traversing
|
||||
directories (requires python 2.6)
|
||||
:param int buffer_size: descriptors we'll buffer before waiting for some to
|
||||
be read, this is unbounded if zero
|
||||
:param str persistence_path: if set we will load and save processed file
|
||||
listings from this path, errors are ignored
|
||||
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
"""
|
||||
|
||||
def __init__(self, target, validate = True, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
|
||||
if isinstance(target, (bytes, unicode)):
|
||||
self._targets = [target]
|
||||
else:
|
||||
self._targets = target
|
||||
|
||||
# expand any relative paths we got
|
||||
|
||||
target = map(os.path.abspath, target)
|
||||
|
||||
self._validate = validate
|
||||
self._follow_links = follow_links
|
||||
self._persistence_path = persistence_path
|
||||
self._document_handler = document_handler
|
||||
self._kwargs = kwargs
|
||||
self._read_listeners = []
|
||||
self._skip_listeners = []
|
||||
self._processed_files = {}
|
||||
|
||||
self._reader_thread = None
|
||||
self._reader_thread_lock = threading.RLock()
|
||||
|
||||
self._iter_lock = threading.RLock()
|
||||
self._iter_notice = threading.Event()
|
||||
|
||||
self._is_stopped = threading.Event()
|
||||
self._is_stopped.set()
|
||||
|
||||
# Descriptors that we have read but not yet provided to the caller. A
|
||||
# FINISHED entry is used by the reading thread to indicate the end.
|
||||
|
||||
self._unreturned_descriptors = Queue.Queue(buffer_size)
|
||||
|
||||
if self._persistence_path:
|
||||
try:
|
||||
processed_files = load_processed_files(self._persistence_path)
|
||||
self.set_processed_files(processed_files)
|
||||
except:
|
||||
pass
|
||||
|
||||
def get_processed_files(self):
|
||||
"""
|
||||
For each file that we have read descriptor data from this provides a
|
||||
mapping of the form...
|
||||
|
||||
::
|
||||
|
||||
absolute path (str) => last modified unix timestamp (int)
|
||||
|
||||
This includes entries set through the
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
|
||||
method. Each run resets this to only the files that were present during
|
||||
that run.
|
||||
|
||||
:returns: **dict** with the absolute paths and unix timestamp for the last
|
||||
modified times of the files we have processed
|
||||
"""
|
||||
|
||||
# make sure that we only provide back absolute paths
|
||||
return dict((os.path.abspath(k), v) for (k, v) in self._processed_files.items())
|
||||
|
||||
def set_processed_files(self, processed_files):
|
||||
"""
|
||||
Sets the listing of the files we have processed. Most often this is used
|
||||
with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
|
||||
pre-populate the listing of descriptor files that we have seen.
|
||||
|
||||
:param dict processed_files: mapping of absolute paths (**str**) to unix
|
||||
timestamps for the last modified time (**int**)
|
||||
"""
|
||||
|
||||
self._processed_files = dict(processed_files)
|
||||
|
||||
def register_read_listener(self, listener):
|
||||
"""
|
||||
Registers a listener for when files are read. This is executed prior to
|
||||
processing files. Listeners are expected to be of the form...
|
||||
|
||||
::
|
||||
|
||||
my_listener(path)
|
||||
|
||||
:param functor listener: functor to be notified when files are read
|
||||
"""
|
||||
|
||||
self._read_listeners.append(listener)
|
||||
|
||||
def register_skip_listener(self, listener):
|
||||
"""
|
||||
Registers a listener for files that are skipped. This listener is expected
|
||||
to be a functor of the form...
|
||||
|
||||
::
|
||||
|
||||
my_listener(path, exception)
|
||||
|
||||
:param functor listener: functor to be notified of files that are skipped
|
||||
to read errors or because they couldn't be parsed as valid descriptor data
|
||||
"""
|
||||
|
||||
self._skip_listeners.append(listener)
|
||||
|
||||
def get_buffered_descriptor_count(self):
|
||||
"""
|
||||
Provides the number of descriptors that are waiting to be iterated over.
|
||||
This is limited to the buffer_size that we were constructed with.
|
||||
|
||||
:returns: **int** for the estimated number of currently enqueued
|
||||
descriptors, this is not entirely reliable
|
||||
"""
|
||||
|
||||
return self._unreturned_descriptors.qsize()
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts reading our descriptor files.
|
||||
|
||||
:raises: **ValueError** if we're already reading the descriptor files
|
||||
"""
|
||||
|
||||
with self._reader_thread_lock:
|
||||
if self._reader_thread:
|
||||
raise ValueError("Already running, you need to call stop() first")
|
||||
else:
|
||||
self._is_stopped.clear()
|
||||
self._reader_thread = threading.Thread(target = self._read_descriptor_files, name="Descriptor Reader")
|
||||
self._reader_thread.setDaemon(True)
|
||||
self._reader_thread.start()
|
||||
|
||||
def stop(self):
|
||||
"""
|
||||
Stops further reading of descriptor files.
|
||||
"""
|
||||
|
||||
with self._reader_thread_lock:
|
||||
self._is_stopped.set()
|
||||
self._iter_notice.set()
|
||||
|
||||
# clears our queue to unblock enqueue calls
|
||||
|
||||
try:
|
||||
while True:
|
||||
self._unreturned_descriptors.get_nowait()
|
||||
except Queue.Empty:
|
||||
pass
|
||||
|
||||
self._reader_thread.join()
|
||||
self._reader_thread = None
|
||||
|
||||
if self._persistence_path:
|
||||
try:
|
||||
processed_files = self.get_processed_files()
|
||||
save_processed_files(self._persistence_path, processed_files)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _read_descriptor_files(self):
|
||||
new_processed_files = {}
|
||||
remaining_files = list(self._targets)
|
||||
|
||||
while remaining_files and not self._is_stopped.is_set():
|
||||
target = remaining_files.pop(0)
|
||||
|
||||
if not os.path.exists(target):
|
||||
self._notify_skip_listeners(target, FileMissing())
|
||||
continue
|
||||
|
||||
if os.path.isdir(target):
|
||||
walker = os.walk(target, followlinks = self._follow_links)
|
||||
self._handle_walker(walker, new_processed_files)
|
||||
else:
|
||||
self._handle_file(target, new_processed_files)
|
||||
|
||||
self._processed_files = new_processed_files
|
||||
|
||||
if not self._is_stopped.is_set():
|
||||
self._unreturned_descriptors.put(FINISHED)
|
||||
|
||||
self._iter_notice.set()
|
||||
|
||||
def __iter__(self):
|
||||
with self._iter_lock:
|
||||
while not self._is_stopped.is_set():
|
||||
try:
|
||||
descriptor = self._unreturned_descriptors.get_nowait()
|
||||
|
||||
if descriptor == FINISHED:
|
||||
break
|
||||
else:
|
||||
yield descriptor
|
||||
except Queue.Empty:
|
||||
self._iter_notice.wait()
|
||||
self._iter_notice.clear()
|
||||
|
||||
def _handle_walker(self, walker, new_processed_files):
|
||||
for root, _, files in walker:
|
||||
for filename in files:
|
||||
self._handle_file(os.path.join(root, filename), new_processed_files)
|
||||
|
||||
# this can take a while if, say, we're including the root directory
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
def _handle_file(self, target, new_processed_files):
|
||||
# This is a file. Register its last modified timestamp and check if
|
||||
# it's a file that we should skip.
|
||||
|
||||
try:
|
||||
last_modified = int(os.stat(target).st_mtime)
|
||||
last_used = self._processed_files.get(target)
|
||||
new_processed_files[target] = last_modified
|
||||
except OSError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
return
|
||||
|
||||
if last_used and last_used >= last_modified:
|
||||
self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
|
||||
return
|
||||
|
||||
# Block devices and such are never descriptors, and can cause us to block
|
||||
# for quite a while so skipping anything that isn't a regular file.
|
||||
|
||||
if not os.path.isfile(target):
|
||||
return
|
||||
|
||||
# The mimetypes module only checks the file extension. To actually
|
||||
# check the content (like the 'file' command) we'd need something like
|
||||
# pymagic (https://github.com/cloudburst/pymagic).
|
||||
|
||||
target_type = mimetypes.guess_type(target)
|
||||
|
||||
# Checking if it's a tar file may fail due to permissions so failing back
|
||||
# to the mime type...
|
||||
#
|
||||
# IOError: [Errno 13] Permission denied: '/vmlinuz.old'
|
||||
#
|
||||
# With python 3 insuffient permissions raises an AttributeError instead...
|
||||
#
|
||||
# http://bugs.python.org/issue17059
|
||||
|
||||
try:
|
||||
is_tar = tarfile.is_tarfile(target)
|
||||
except (IOError, AttributeError):
|
||||
is_tar = target_type[0] == 'application/x-tar'
|
||||
|
||||
if target_type[0] in (None, 'text/plain'):
|
||||
# either '.txt' or an unknown type
|
||||
self._handle_descriptor_file(target, target_type)
|
||||
elif is_tar:
|
||||
# handles gzip, bz2, and decompressed tarballs among others
|
||||
self._handle_archive(target)
|
||||
else:
|
||||
self._notify_skip_listeners(target, UnrecognizedType(target_type))
|
||||
|
||||
def _handle_descriptor_file(self, target, mime_type):
|
||||
try:
|
||||
self._notify_read_listeners(target)
|
||||
|
||||
with open(target, 'rb') as target_file:
|
||||
for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
self._unreturned_descriptors.put(desc)
|
||||
self._iter_notice.set()
|
||||
except TypeError as exc:
|
||||
self._notify_skip_listeners(target, UnrecognizedType(mime_type))
|
||||
except ValueError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
except IOError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
|
||||
def _handle_archive(self, target):
|
||||
# TODO: This would be nicer via the 'with' keyword, but tarfile's __exit__
|
||||
# method was added sometime after python 2.5. We should change this when
|
||||
# we drop python 2.5 support.
|
||||
|
||||
tar_file = None
|
||||
|
||||
try:
|
||||
self._notify_read_listeners(target)
|
||||
tar_file = tarfile.open(target)
|
||||
|
||||
for tar_entry in tar_file:
|
||||
if tar_entry.isfile():
|
||||
entry = tar_file.extractfile(tar_entry)
|
||||
|
||||
try:
|
||||
for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
desc._set_path(os.path.abspath(target))
|
||||
desc._set_archive_path(entry.name)
|
||||
self._unreturned_descriptors.put(desc)
|
||||
self._iter_notice.set()
|
||||
except TypeError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
except ValueError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
finally:
|
||||
entry.close()
|
||||
except IOError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
finally:
|
||||
if tar_file:
|
||||
tar_file.close()
|
||||
|
||||
def _notify_read_listeners(self, path):
|
||||
for listener in self._read_listeners:
|
||||
listener(path)
|
||||
|
||||
def _notify_skip_listeners(self, path, exception):
|
||||
for listener in self._skip_listeners:
|
||||
listener(path, exception)
|
||||
|
||||
def __enter__(self):
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exit_type, value, traceback):
|
||||
self.stop()
|
758
lib/stem/descriptor/remote.py
Normal file
758
lib/stem/descriptor/remote.py
Normal file
|
@ -0,0 +1,758 @@
|
|||
# Copyright 2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Module for remotely retrieving descriptors from directory authorities and
|
||||
mirrors. This is most easily done through the
|
||||
:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
|
||||
:class:`~stem.descriptor.remote.Query` instances to get you the descriptor
|
||||
content. For example...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor.remote import DescriptorDownloader
|
||||
|
||||
downloader = DescriptorDownloader(
|
||||
use_mirrors = True,
|
||||
timeout = 10,
|
||||
)
|
||||
|
||||
query = downloader.get_server_descriptors()
|
||||
|
||||
print "Exit Relays:"
|
||||
|
||||
try:
|
||||
for desc in query.run():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
print
|
||||
print "Query took %0.2f seconds" % query.runtime
|
||||
except Exception as exc:
|
||||
print "Unable to retrieve the server descriptors: %s" % exc
|
||||
|
||||
If you don't care about errors then you can also simply iterate over the query
|
||||
itself...
|
||||
|
||||
::
|
||||
|
||||
for desc in downloader.get_server_descriptors():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
::
|
||||
|
||||
get_authorities - Provides tor directory information.
|
||||
|
||||
DirectoryAuthority - Information about a tor directory authority.
|
||||
|
||||
Query - Asynchronous request to download tor descriptors
|
||||
|- start - issues the query if it isn't already running
|
||||
+- run - blocks until the request is finished and provides the results
|
||||
|
||||
DescriptorDownloader - Configurable class for issuing queries
|
||||
|- use_directory_mirrors - use directory mirrors to download future descriptors
|
||||
|- get_server_descriptors - provides present server descriptors
|
||||
|- get_extrainfo_descriptors - provides present extrainfo descriptors
|
||||
|- get_microdescriptors - provides present microdescriptors
|
||||
|- get_consensus - provides the present consensus or router status entries
|
||||
|- get_key_certificates - provides present authority key certificates
|
||||
+- query - request an arbitrary descriptor resource
|
||||
|
||||
.. data:: MAX_FINGERPRINTS
|
||||
|
||||
Maximum number of descriptors that can requested at a time by their
|
||||
fingerprints.
|
||||
|
||||
.. data:: MAX_MICRODESCRIPTOR_HASHES
|
||||
|
||||
Maximum number of microdescriptors that can requested at a time by their
|
||||
hashes.
|
||||
"""
|
||||
|
||||
import io
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib2
|
||||
import zlib
|
||||
|
||||
import stem.descriptor
|
||||
|
||||
from stem import Flag
|
||||
from stem.util import log
|
||||
|
||||
# Tor has a limited number of descriptors we can fetch explicitly by their
|
||||
# fingerprint or hashes due to a limit on the url length by squid proxies.
|
||||
|
||||
MAX_FINGERPRINTS = 96
|
||||
MAX_MICRODESCRIPTOR_HASHES = 92
|
||||
|
||||
# We commonly only want authorities that vote in the consensus, and hence have
|
||||
# a v3ident.
|
||||
|
||||
HAS_V3IDENT = lambda auth: auth.v3ident is not None
|
||||
|
||||
|
||||
def _guess_descriptor_type(resource):
|
||||
# Attempts to determine the descriptor type based on the resource url. This
|
||||
# raises a ValueError if the resource isn't recognized.
|
||||
|
||||
if resource.startswith('/tor/server/'):
|
||||
return 'server-descriptor 1.0'
|
||||
elif resource.startswith('/tor/extra/'):
|
||||
return 'extra-info 1.0'
|
||||
elif resource.startswith('/tor/micro/'):
|
||||
return 'microdescriptor 1.0'
|
||||
elif resource.startswith('/tor/status-vote/'):
|
||||
return 'network-status-consensus-3 1.0'
|
||||
elif resource.startswith('/tor/keys/'):
|
||||
return 'dir-key-certificate-3 1.0'
|
||||
else:
|
||||
raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
|
||||
|
||||
|
||||
class Query(object):
|
||||
"""
|
||||
Asynchronous request for descriptor content from a directory authority or
|
||||
mirror. These can either be made through the
|
||||
:class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more
|
||||
advanced usage.
|
||||
|
||||
To block on the response and get results either call
|
||||
:func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
|
||||
:func:`~stem.descriptor.remote.Query.run` method pass along any errors that
|
||||
arise...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor.remote import Query
|
||||
|
||||
query = Query(
|
||||
'/tor/server/all.z',
|
||||
block = True,
|
||||
timeout = 30,
|
||||
)
|
||||
|
||||
print "Current relays:"
|
||||
|
||||
if not query.error:
|
||||
for desc in query:
|
||||
print desc.fingerprint
|
||||
else:
|
||||
print "Unable to retrieve the server descriptors: %s" % query.error
|
||||
|
||||
... while iterating fails silently...
|
||||
|
||||
::
|
||||
|
||||
print "Current relays:"
|
||||
|
||||
for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
|
||||
print desc.fingerprint
|
||||
|
||||
In either case exceptions are available via our 'error' attribute.
|
||||
|
||||
Tor provides quite a few different descriptor resources via its directory
|
||||
protocol (see section 4.2 and later of the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
|
||||
Commonly useful ones include...
|
||||
|
||||
===================================== ===========
|
||||
Resource Description
|
||||
===================================== ===========
|
||||
/tor/server/all.z all present server descriptors
|
||||
/tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints
|
||||
/tor/extra/all.z all present extrainfo descriptors
|
||||
/tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints
|
||||
/tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes
|
||||
/tor/status-vote/current/consensus.z present consensus
|
||||
/tor/keys/all.z key certificates for the authorities
|
||||
/tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities
|
||||
===================================== ===========
|
||||
|
||||
The '.z' suffix can be excluded to get a plaintext rather than compressed
|
||||
response. Compression is handled transparently, so this shouldn't matter to
|
||||
the caller.
|
||||
|
||||
:var str resource: resource being fetched, such as '/tor/server/all.z'
|
||||
:var str descriptor_type: type of descriptors being fetched (for options see
|
||||
:func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
|
||||
resource if **None**
|
||||
|
||||
:var list endpoints: (address, dirport) tuples of the authority or mirror
|
||||
we're querying, this uses authorities if undefined
|
||||
:var int retries: number of times to attempt the request if downloading it
|
||||
fails
|
||||
:var bool fall_back_to_authority: when retrying request issues the last
|
||||
request to a directory authority if **True**
|
||||
|
||||
:var str content: downloaded descriptor content
|
||||
:var Exception error: exception if a problem occured
|
||||
:var bool is_done: flag that indicates if our request has finished
|
||||
:var str download_url: last url used to download the descriptor, this is
|
||||
unset until we've actually made a download attempt
|
||||
|
||||
:var float start_time: unix timestamp when we first started running
|
||||
:var float timeout: duration before we'll time out our request
|
||||
:var float runtime: time our query took, this is **None** if it's not yet
|
||||
finished
|
||||
|
||||
:var bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:var stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:var dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:param bool start: start making the request when constructed (default is **True**)
|
||||
:param bool block: only return after the request has been completed, this is
|
||||
the same as running **query.run(True)** (default is **False**)
|
||||
"""
|
||||
|
||||
def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
|
||||
if not resource.startswith('/'):
|
||||
raise ValueError("Resources should start with a '/': %s" % resource)
|
||||
|
||||
self.resource = resource
|
||||
|
||||
if descriptor_type:
|
||||
self.descriptor_type = descriptor_type
|
||||
else:
|
||||
self.descriptor_type = _guess_descriptor_type(resource)
|
||||
|
||||
self.endpoints = endpoints if endpoints else []
|
||||
self.retries = retries
|
||||
self.fall_back_to_authority = fall_back_to_authority
|
||||
|
||||
self.content = None
|
||||
self.error = None
|
||||
self.is_done = False
|
||||
self.download_url = None
|
||||
|
||||
self.start_time = None
|
||||
self.timeout = timeout
|
||||
self.runtime = None
|
||||
|
||||
self.validate = validate
|
||||
self.document_handler = document_handler
|
||||
self.kwargs = kwargs
|
||||
|
||||
self._downloader_thread = None
|
||||
self._downloader_thread_lock = threading.RLock()
|
||||
|
||||
if start:
|
||||
self.start()
|
||||
|
||||
if block:
|
||||
self.run(True)
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts downloading the scriptors if we haven't started already.
|
||||
"""
|
||||
|
||||
with self._downloader_thread_lock:
|
||||
if self._downloader_thread is None:
|
||||
self._downloader_thread = threading.Thread(
|
||||
name = "Descriptor Query",
|
||||
target = self._download_descriptors,
|
||||
args = (self.retries,)
|
||||
)
|
||||
|
||||
self._downloader_thread.setDaemon(True)
|
||||
self._downloader_thread.start()
|
||||
|
||||
def run(self, suppress = False):
|
||||
"""
|
||||
Blocks until our request is complete then provides the descriptors. If we
|
||||
haven't yet started our request then this does so.
|
||||
|
||||
:param bool suppress: avoids raising exceptions if **True**
|
||||
|
||||
:returns: list for the requested :class:`~stem.descriptor.__init__.Descriptor` instances
|
||||
|
||||
:raises:
|
||||
Using the iterator can fail with the following if **suppress** is
|
||||
**False**...
|
||||
|
||||
* **ValueError** if the descriptor contents is malformed
|
||||
* **socket.timeout** if our request timed out
|
||||
* **urllib2.URLError** for most request failures
|
||||
|
||||
Note that the urllib2 module may fail with other exception types, in
|
||||
which case we'll pass it along.
|
||||
"""
|
||||
|
||||
return list(self._run(suppress))
|
||||
|
||||
def _run(self, suppress):
|
||||
with self._downloader_thread_lock:
|
||||
self.start()
|
||||
self._downloader_thread.join()
|
||||
|
||||
if self.error:
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise self.error
|
||||
else:
|
||||
if self.content is None:
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise ValueError('BUG: _download_descriptors() finished without either results or an error')
|
||||
|
||||
try:
|
||||
results = stem.descriptor.parse_file(
|
||||
io.BytesIO(self.content),
|
||||
self.descriptor_type,
|
||||
validate = self.validate,
|
||||
document_handler = self.document_handler,
|
||||
**self.kwargs
|
||||
)
|
||||
|
||||
for desc in results:
|
||||
yield desc
|
||||
except ValueError as exc:
|
||||
self.error = exc # encountered a parsing error
|
||||
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise self.error
|
||||
|
||||
def __iter__(self):
|
||||
for desc in self._run(True):
|
||||
yield desc
|
||||
|
||||
def _pick_url(self, use_authority = False):
|
||||
"""
|
||||
Provides a url that can be queried. If we have multiple endpoints then one
|
||||
will be picked randomly.
|
||||
|
||||
:param bool use_authority: ignores our endpoints and uses a directory
|
||||
authority instead
|
||||
|
||||
:returns: **str** for the url being queried by this request
|
||||
"""
|
||||
|
||||
if use_authority or not self.endpoints:
|
||||
authority = random.choice(filter(HAS_V3IDENT, get_authorities().values()))
|
||||
address, dirport = authority.address, authority.dir_port
|
||||
else:
|
||||
address, dirport = random.choice(self.endpoints)
|
||||
|
||||
return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/'))
|
||||
|
||||
def _download_descriptors(self, retries):
|
||||
try:
|
||||
use_authority = retries == 0 and self.fall_back_to_authority
|
||||
self.download_url = self._pick_url(use_authority)
|
||||
|
||||
self.start_time = time.time()
|
||||
response = urllib2.urlopen(self.download_url, timeout = self.timeout).read()
|
||||
|
||||
if self.download_url.endswith('.z'):
|
||||
response = zlib.decompress(response)
|
||||
|
||||
self.content = response.strip()
|
||||
|
||||
self.runtime = time.time() - self.start_time
|
||||
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
|
||||
except:
|
||||
exc = sys.exc_info()[1]
|
||||
|
||||
if retries > 0:
|
||||
log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
|
||||
return self._download_descriptors(retries - 1)
|
||||
else:
|
||||
log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
|
||||
self.error = exc
|
||||
finally:
|
||||
self.is_done = True
|
||||
|
||||
|
||||
class DescriptorDownloader(object):
|
||||
"""
|
||||
Configurable class that issues :class:`~stem.descriptor.remote.Query`
|
||||
instances on your behalf.
|
||||
|
||||
:param bool use_mirrors: downloads the present consensus and uses the directory
|
||||
mirrors to fetch future requests, this fails silently if the consensus
|
||||
cannot be downloaded
|
||||
:param default_args: default arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
"""
|
||||
|
||||
def __init__(self, use_mirrors = False, **default_args):
|
||||
self._default_args = default_args
|
||||
|
||||
authorities = filter(HAS_V3IDENT, get_authorities().values())
|
||||
self._endpoints = [(auth.address, auth.dir_port) for auth in authorities]
|
||||
|
||||
if use_mirrors:
|
||||
try:
|
||||
start_time = time.time()
|
||||
self.use_directory_mirrors()
|
||||
log.debug("Retrieved directory mirrors (took %0.2fs)" % (time.time() - start_time))
|
||||
except Exception as exc:
|
||||
log.debug("Unable to retrieve directory mirrors: %s" % exc)
|
||||
|
||||
def use_directory_mirrors(self):
|
||||
"""
|
||||
Downloads the present consensus and configures ourselves to use directory
|
||||
mirrors, in addition to authorities.
|
||||
|
||||
:returns: :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
|
||||
from which we got the directory mirrors
|
||||
|
||||
:raises: **Exception** if unable to determine the directory mirrors
|
||||
"""
|
||||
|
||||
authorities = filter(HAS_V3IDENT, get_authorities().values())
|
||||
new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities])
|
||||
|
||||
consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0]
|
||||
|
||||
for desc in consensus.routers.values():
|
||||
if Flag.V2DIR in desc.flags:
|
||||
new_endpoints.add((desc.address, desc.dir_port))
|
||||
|
||||
# we need our endpoints to be a list rather than set for random.choice()
|
||||
|
||||
self._endpoints = list(new_endpoints)
|
||||
|
||||
return consensus
|
||||
|
||||
def get_server_descriptors(self, fingerprints = None, **query_args):
|
||||
"""
|
||||
Provides the server descriptors with the given fingerprints. If no
|
||||
fingerprints are provided then this returns all descriptors in the present
|
||||
consensus.
|
||||
|
||||
:param str,list fingerprints: fingerprint or list of fingerprints to be
|
||||
retrieved, gets all descriptors if **None**
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 96 descriptors by their
|
||||
fingerprints (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/server/all.z'
|
||||
|
||||
if isinstance(fingerprints, str):
|
||||
fingerprints = [fingerprints]
|
||||
|
||||
if fingerprints:
|
||||
if len(fingerprints) > MAX_FINGERPRINTS:
|
||||
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def get_extrainfo_descriptors(self, fingerprints = None, **query_args):
|
||||
"""
|
||||
Provides the extrainfo descriptors with the given fingerprints. If no
|
||||
fingerprints are provided then this returns all descriptors in the present
|
||||
consensus.
|
||||
|
||||
:param str,list fingerprints: fingerprint or list of fingerprints to be
|
||||
retrieved, gets all descriptors if **None**
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 96 descriptors by their
|
||||
fingerprints (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/extra/all.z'
|
||||
|
||||
if isinstance(fingerprints, str):
|
||||
fingerprints = [fingerprints]
|
||||
|
||||
if fingerprints:
|
||||
if len(fingerprints) > MAX_FINGERPRINTS:
|
||||
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def get_microdescriptors(self, hashes, **query_args):
|
||||
"""
|
||||
Provides the microdescriptors with the given hashes. To get these see the
|
||||
'microdescriptor_hashes' attribute of
|
||||
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`. Note
|
||||
that these are only provided via a microdescriptor consensus (such as
|
||||
'cached-microdesc-consensus' in your data directory).
|
||||
|
||||
:param str,list hashes: microdescriptor hash or list of hashes to be
|
||||
retrieved
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 92 microdescriptors by their
|
||||
hashes (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
if isinstance(hashes, str):
|
||||
hashes = [hashes]
|
||||
|
||||
if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
|
||||
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_HASHES)
|
||||
|
||||
return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
|
||||
|
||||
def get_consensus(self, authority_v3ident = None, **query_args):
|
||||
"""
|
||||
Provides the present router status entries.
|
||||
|
||||
:param str authority_v3ident: fingerprint of the authority key for which
|
||||
to get the consensus, see `'v3ident' in tor's config.c
|
||||
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
|
||||
for the values.
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the router status
|
||||
entries
|
||||
"""
|
||||
|
||||
resource = '/tor/status-vote/current/consensus'
|
||||
|
||||
if authority_v3ident:
|
||||
resource += '/%s' % authority_v3ident
|
||||
|
||||
return self.query(resource + '.z', **query_args)
|
||||
|
||||
def get_vote(self, authority, **query_args):
|
||||
"""
|
||||
Provides the present vote for a given directory authority.
|
||||
|
||||
:param stem.descriptor.remote.DirectoryAuthority authority: authority for which to retrieve a vote for
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the router status
|
||||
entries
|
||||
"""
|
||||
|
||||
resource = '/tor/status-vote/current/authority'
|
||||
|
||||
if not 'endpoint' in query_args:
|
||||
query_args['endpoints'] = [(authority.address, authority.dir_port)]
|
||||
|
||||
return self.query(resource + '.z', **query_args)
|
||||
|
||||
def get_key_certificates(self, authority_v3idents = None, **query_args):
|
||||
"""
|
||||
Provides the key certificates for authorities with the given fingerprints.
|
||||
If no fingerprints are provided then this returns all present key
|
||||
certificates.
|
||||
|
||||
:param str authority_v3idents: fingerprint or list of fingerprints of the
|
||||
authority keys, see `'v3ident' in tor's config.c
|
||||
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
|
||||
for the values.
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the key certificates
|
||||
|
||||
:raises: **ValueError** if we request more than 96 key certificates by
|
||||
their identity fingerprints (this is due to a limit on the url length by
|
||||
squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/keys/all.z'
|
||||
|
||||
if isinstance(authority_v3idents, str):
|
||||
authority_v3idents = [authority_v3idents]
|
||||
|
||||
if authority_v3idents:
|
||||
if len(authority_v3idents) > MAX_FINGERPRINTS:
|
||||
raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def query(self, resource, **query_args):
|
||||
"""
|
||||
Issues a request for the given resource.
|
||||
|
||||
:param str resource: resource being fetched, such as '/tor/server/all.z'
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the descriptors
|
||||
|
||||
:raises: **ValueError** if resource is clearly invalid or the descriptor
|
||||
type can't be determined when 'descriptor_type' is **None**
|
||||
"""
|
||||
|
||||
args = dict(self._default_args)
|
||||
args.update(query_args)
|
||||
|
||||
if not 'endpoints' in args:
|
||||
args['endpoints'] = self._endpoints
|
||||
|
||||
if not 'fall_back_to_authority' in args:
|
||||
args['fall_back_to_authority'] = True
|
||||
|
||||
return Query(
|
||||
resource,
|
||||
**args
|
||||
)
|
||||
|
||||
|
||||
class DirectoryAuthority(object):
|
||||
"""
|
||||
Tor directory authority, a special type of relay `hardcoded into tor
|
||||
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
|
||||
that enumerates the other relays within the network.
|
||||
|
||||
At a very high level tor works as follows...
|
||||
|
||||
1. A volunteer starts up a new tor relay, during which it sends a `server
|
||||
descriptor <server_descriptor.html>`_ to each of the directory
|
||||
authorities.
|
||||
|
||||
2. Each hour the directory authorities make a `vote <networkstatus.html>`_
|
||||
that says who they think the active relays are in the network and some
|
||||
attributes about them.
|
||||
|
||||
3. The directory authorities send each other their votes, and compile that
|
||||
into the `consensus <networkstatus.html>`_. This document is very similar
|
||||
to the votes, the only difference being that the majority of the
|
||||
authorities agree upon and sign this document. The idividual relay entries
|
||||
in the vote or consensus is called `router status entries
|
||||
<router_status_entry.html>`_.
|
||||
|
||||
4. Tor clients (people using the service) download the consensus from one of
|
||||
the authorities or a mirror to determine the active relays within the
|
||||
network. They in turn use this to construct their circuits and use the
|
||||
network.
|
||||
|
||||
:var str nickname: nickname of the authority
|
||||
:var str address: IP address of the authority, presently they're all IPv4 but
|
||||
this may not always be the case
|
||||
:var int or_port: port on which the relay services relay traffic
|
||||
:var int dir_port: port on which directory information is available
|
||||
:var str fingerprint: relay fingerprint
|
||||
:var str v3ident: identity key fingerprint used to sign votes and consensus
|
||||
"""
|
||||
|
||||
def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, fingerprint = None, v3ident = None):
|
||||
self.nickname = nickname
|
||||
self.address = address
|
||||
self.or_port = or_port
|
||||
self.dir_port = dir_port
|
||||
self.fingerprint = fingerprint
|
||||
self.v3ident = v3ident
|
||||
|
||||
|
||||
DIRECTORY_AUTHORITIES = {
|
||||
'moria1': DirectoryAuthority(
|
||||
nickname = 'moria1',
|
||||
address = '128.31.0.39',
|
||||
or_port = 9101,
|
||||
dir_port = 9131,
|
||||
fingerprint = '9695DFC35FFEB861329B9F1AB04C46397020CE31',
|
||||
v3ident = 'D586D18309DED4CD6D57C18FDB97EFA96D330566',
|
||||
),
|
||||
'tor26': DirectoryAuthority(
|
||||
nickname = 'tor26',
|
||||
address = '86.59.21.38',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '847B1F850344D7876491A54892F904934E4EB85D',
|
||||
v3ident = '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4',
|
||||
),
|
||||
'dizum': DirectoryAuthority(
|
||||
nickname = 'dizum',
|
||||
address = '194.109.206.212',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '7EA6EAD6FD83083C538F44038BBFA077587DD755',
|
||||
v3ident = 'E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58',
|
||||
),
|
||||
'Tonga': DirectoryAuthority(
|
||||
nickname = 'Tonga',
|
||||
address = '82.94.251.203',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '4A0CCD2DDC7995083D73F5D667100C8A5831F16D',
|
||||
v3ident = None, # does not vote in the consensus
|
||||
),
|
||||
'turtles': DirectoryAuthority(
|
||||
nickname = 'turtles',
|
||||
address = '76.73.17.194',
|
||||
or_port = 9090,
|
||||
dir_port = 9030,
|
||||
fingerprint = 'F397038ADC51336135E7B80BD99CA3844360292B',
|
||||
v3ident = '27B6B5996C426270A5C95488AA5BCEB6BCC86956',
|
||||
),
|
||||
'gabelmoo': DirectoryAuthority(
|
||||
nickname = 'gabelmoo',
|
||||
address = '212.112.245.170',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = 'F2044413DAC2E02E3D6BCF4735A19BCA1DE97281',
|
||||
v3ident = 'ED03BB616EB2F60BEC80151114BB25CEF515B226',
|
||||
),
|
||||
'dannenberg': DirectoryAuthority(
|
||||
nickname = 'dannenberg',
|
||||
address = '193.23.244.244',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '7BE683E65D48141321C5ED92F075C55364AC7123',
|
||||
v3ident = '585769C78764D58426B8B52B6651A5A71137189A',
|
||||
),
|
||||
'urras': DirectoryAuthority(
|
||||
nickname = 'urras',
|
||||
address = '208.83.223.34',
|
||||
or_port = 80,
|
||||
dir_port = 443,
|
||||
fingerprint = '0AD3FA884D18F89EEA2D89C019379E0E7FD94417',
|
||||
v3ident = '80550987E1D626E3EBA5E5E75A458DE0626D088C',
|
||||
),
|
||||
'maatuska': DirectoryAuthority(
|
||||
nickname = 'maatuska',
|
||||
address = '171.25.193.9',
|
||||
or_port = 80,
|
||||
dir_port = 443,
|
||||
fingerprint = 'BD6A829255CB08E66FBE7D3748363586E46B3810',
|
||||
v3ident = '49015F787433103580E3B66A1707A00E60F2D15B',
|
||||
),
|
||||
'Faravahar': DirectoryAuthority(
|
||||
nickname = 'Faravahar',
|
||||
address = '154.35.32.5',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = 'CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC',
|
||||
v3ident = 'EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97',
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_authorities():
|
||||
"""
|
||||
Provides the Tor directory authority information as of **Tor commit 00bcc25
|
||||
(8/27/13)**. The directory information hardcoded into Tor and occasionally
|
||||
changes, so the information this provides might not necessarily match your
|
||||
version of tor.
|
||||
|
||||
:returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances
|
||||
"""
|
||||
|
||||
return dict(DIRECTORY_AUTHORITIES)
|
749
lib/stem/descriptor/router_status_entry.py
Normal file
749
lib/stem/descriptor/router_status_entry.py
Normal file
|
@ -0,0 +1,749 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for router status entries, the information for individual routers
|
||||
within a network status document. This information is provided from a few
|
||||
sources...
|
||||
|
||||
* control port via 'GETINFO ns/\*' and 'GETINFO md/\*' queries
|
||||
* router entries in a network status document, like the cached-consensus
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
RouterStatusEntry - Common parent for router status entries
|
||||
|- RouterStatusEntryV2 - Entry for a network status v2 document
|
||||
|- RouterStatusEntryV3 - Entry for a network status v3 document
|
||||
+- RouterStatusEntryMicroV3 - Entry for a microdescriptor flavored v3 document
|
||||
"""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import datetime
|
||||
|
||||
import stem.exit_policy
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
KEYWORD_LINE,
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
|
||||
"""
|
||||
Reads a range of the document_file containing some number of entry_class
|
||||
instances. We deliminate the entry_class entries by the keyword on their
|
||||
first line (entry_keyword). When finished the document is left at the
|
||||
end_position.
|
||||
|
||||
Either an end_position or section_end_keywords must be provided.
|
||||
|
||||
:param file document_file: file with network status document content
|
||||
:param bool validate: checks the validity of the document's contents if
|
||||
**True**, skips these checks otherwise
|
||||
:param class entry_class: class to construct instance for
|
||||
:param str entry_keyword: first keyword for the entry instances
|
||||
:param int start_position: start of the section, default is the current position
|
||||
:param int end_position: end of the section
|
||||
:param tuple section_end_keywords: keyword(s) that deliminate the end of the
|
||||
section if no end_position was provided
|
||||
:param tuple extra_args: extra arguments for the entry_class (after the
|
||||
content and validate flag)
|
||||
|
||||
:returns: iterator over entry_class instances
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
if start_position:
|
||||
document_file.seek(start_position)
|
||||
else:
|
||||
start_position = document_file.tell()
|
||||
|
||||
# check if we're starting at the end of the section (ie, there's no entries to read)
|
||||
if section_end_keywords:
|
||||
first_keyword = None
|
||||
line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
|
||||
|
||||
if line_match:
|
||||
first_keyword = line_match.groups()[0]
|
||||
|
||||
document_file.seek(start_position)
|
||||
|
||||
if first_keyword in section_end_keywords:
|
||||
return
|
||||
|
||||
while end_position is None or document_file.tell() < end_position:
|
||||
desc_lines, ending_keyword = _read_until_keywords(
|
||||
(entry_keyword,) + section_end_keywords,
|
||||
document_file,
|
||||
ignore_first = True,
|
||||
end_position = end_position,
|
||||
include_ending_keyword = True
|
||||
)
|
||||
|
||||
desc_content = bytes.join(b"", desc_lines)
|
||||
|
||||
if desc_content:
|
||||
yield entry_class(desc_content, validate, *extra_args)
|
||||
|
||||
# check if we stopped at the end of the section
|
||||
if ending_keyword in section_end_keywords:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
class RouterStatusEntry(Descriptor):
|
||||
"""
|
||||
Information about an individual router stored within a network status
|
||||
document. This is the common parent for concrete status entry types.
|
||||
|
||||
:var stem.descriptor.networkstatus.NetworkStatusDocument document: **\*** document that this descriptor came from
|
||||
|
||||
:var str nickname: **\*** router's nickname
|
||||
:var str fingerprint: **\*** router's fingerprint
|
||||
:var datetime published: **\*** router's publication
|
||||
:var str address: **\*** router's IP address
|
||||
:var int or_port: **\*** router's ORPort
|
||||
:var int dir_port: **\*** router's DirPort
|
||||
|
||||
:var list flags: **\*** list of :data:`~stem.Flag` associated with the relay
|
||||
|
||||
:var stem.version.Version version: parsed version of tor, this is **None** if
|
||||
the relay's using a new versioning scheme
|
||||
:var str version_line: versioning information reported by the relay
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate, document):
|
||||
"""
|
||||
Parse a router descriptor in a network status document.
|
||||
|
||||
:param str content: router descriptor content to be parsed
|
||||
:param NetworkStatusDocument document: document this descriptor came from
|
||||
:param bool validate: checks the validity of the content if **True**, skips
|
||||
these checks otherwise
|
||||
|
||||
:raises: **ValueError** if the descriptor data is invalid
|
||||
"""
|
||||
|
||||
super(RouterStatusEntry, self).__init__(content)
|
||||
content = stem.util.str_tools._to_unicode(content)
|
||||
|
||||
self.document = document
|
||||
|
||||
self.nickname = None
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.address = None
|
||||
self.or_port = None
|
||||
self.dir_port = None
|
||||
|
||||
self.flags = None
|
||||
|
||||
self.version_line = None
|
||||
self.version = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
entries = _get_descriptor_components(content, validate)
|
||||
|
||||
if validate:
|
||||
self._check_constraints(entries)
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses the given content and applies the attributes.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
:param bool validate: checks validity if **True**
|
||||
|
||||
:raises: **ValueError** if a validity check fails
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 's':
|
||||
_parse_s_line(self, value, validate)
|
||||
elif keyword == 'v':
|
||||
_parse_v_line(self, value, validate)
|
||||
else:
|
||||
self._unrecognized_lines.append("%s %s" % (keyword, value))
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in self._required_fields():
|
||||
if not keyword in entries:
|
||||
raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
|
||||
|
||||
for keyword in self._single_fields():
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
|
||||
|
||||
if 'r' != entries.keys()[0]:
|
||||
raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
"""
|
||||
Name for this descriptor type.
|
||||
"""
|
||||
|
||||
if is_plural:
|
||||
return "Router status entries"
|
||||
else:
|
||||
return "Router status entry"
|
||||
|
||||
def _required_fields(self):
|
||||
"""
|
||||
Provides lines that must appear in the descriptor.
|
||||
"""
|
||||
|
||||
return ()
|
||||
|
||||
def _single_fields(self):
|
||||
"""
|
||||
Provides lines that can only appear in the descriptor once.
|
||||
"""
|
||||
|
||||
return ()
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
"""
|
||||
Provides any unrecognized lines.
|
||||
|
||||
:returns: list of unrecognized lines
|
||||
"""
|
||||
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntry):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryV2(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a version 2 network
|
||||
status document.
|
||||
|
||||
:var str digest: **\*** router's upper-case hex digest
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate = True, document = None):
|
||||
self.digest = None
|
||||
super(RouterStatusEntryV2, self).__init__(content, validate, document)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 'r':
|
||||
_parse_r_line(self, value, validate, True)
|
||||
del entries['r']
|
||||
|
||||
RouterStatusEntry._parse(self, entries, validate)
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
if is_plural:
|
||||
return "Router status entries (v2)"
|
||||
else:
|
||||
return "Router status entry (v2)"
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryV2):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryV3(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a version 3 network
|
||||
status document.
|
||||
|
||||
:var list or_addresses: **\*** relay's OR addresses, this is a tuple listing
|
||||
of the form (address (**str**), port (**int**), is_ipv6 (**bool**))
|
||||
:var str digest: **\*** router's upper-case hex digest
|
||||
|
||||
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
|
||||
:var int measured: bandwidth measured to be available by the relay
|
||||
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
|
||||
measurements
|
||||
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
|
||||
information that isn't yet recognized
|
||||
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy: router's exit policy
|
||||
|
||||
:var list microdescriptor_hashes: **\*** tuples of two values, the list of
|
||||
consensus methods for generating a set of digests and the 'algorithm =>
|
||||
digest' mappings
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate = True, document = None):
|
||||
self.or_addresses = []
|
||||
self.digest = None
|
||||
|
||||
self.bandwidth = None
|
||||
self.measured = None
|
||||
self.is_unmeasured = False
|
||||
self.unrecognized_bandwidth_entries = []
|
||||
|
||||
self.exit_policy = None
|
||||
self.microdescriptor_hashes = []
|
||||
|
||||
super(RouterStatusEntryV3, self).__init__(content, validate, document)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 'r':
|
||||
_parse_r_line(self, value, validate, True)
|
||||
del entries['r']
|
||||
elif keyword == 'a':
|
||||
for entry, _ in values:
|
||||
_parse_a_line(self, entry, validate)
|
||||
|
||||
del entries['a']
|
||||
elif keyword == 'w':
|
||||
_parse_w_line(self, value, validate)
|
||||
del entries['w']
|
||||
elif keyword == 'p':
|
||||
_parse_p_line(self, value, validate)
|
||||
del entries['p']
|
||||
elif keyword == 'm':
|
||||
for entry, _ in values:
|
||||
_parse_m_line(self, entry, validate)
|
||||
|
||||
del entries['m']
|
||||
|
||||
RouterStatusEntry._parse(self, entries, validate)
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
if is_plural:
|
||||
return "Router status entries (v3)"
|
||||
else:
|
||||
return "Router status entry (v3)"
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r', 's')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v', 'w', 'p')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryV3):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryMicroV3(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a microdescriptor
|
||||
flavored network status document.
|
||||
|
||||
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
|
||||
:var int measured: bandwidth measured to be available by the relay
|
||||
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
|
||||
measurements
|
||||
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
|
||||
information that isn't yet recognized
|
||||
|
||||
:var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate = True, document = None):
|
||||
self.bandwidth = None
|
||||
self.measured = None
|
||||
self.is_unmeasured = False
|
||||
self.unrecognized_bandwidth_entries = []
|
||||
|
||||
self.digest = None
|
||||
|
||||
super(RouterStatusEntryMicroV3, self).__init__(content, validate, document)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 'r':
|
||||
_parse_r_line(self, value, validate, False)
|
||||
del entries['r']
|
||||
elif keyword == 'w':
|
||||
_parse_w_line(self, value, validate)
|
||||
del entries['w']
|
||||
elif keyword == 'm':
|
||||
# "m" digest
|
||||
# example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
|
||||
|
||||
self.digest = _base64_to_hex(value, validate, False)
|
||||
del entries['m']
|
||||
|
||||
RouterStatusEntry._parse(self, entries, validate)
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
if is_plural:
|
||||
return "Router status entries (micro v3)"
|
||||
else:
|
||||
return "Router status entry (micro v3)"
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r', 's', 'm')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v', 'w', 'm')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryMicroV3):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
def _parse_r_line(desc, value, validate, include_digest = True):
|
||||
# Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
|
||||
# all current entry types (v2, v3, and microdescriptor v3) with one little
|
||||
# wrinkle: only the microdescriptor flavor excludes a 'digest' field.
|
||||
#
|
||||
# For v2 and v3 router status entries:
|
||||
# "r" nickname identity digest publication IP ORPort DirPort
|
||||
# example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
|
||||
#
|
||||
# For v3 microdescriptor router status entries:
|
||||
# "r" nickname identity publication IP ORPort DirPort
|
||||
# example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
|
||||
|
||||
r_comp = value.split(" ")
|
||||
|
||||
# inject a None for the digest to normalize the field positioning
|
||||
if not include_digest:
|
||||
r_comp.insert(2, None)
|
||||
|
||||
if len(r_comp) < 8:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
expected_field_count = 'eight' if include_digest else 'seven'
|
||||
raise ValueError("%s 'r' line must have %s values: r %s" % (desc._name(), expected_field_count, value))
|
||||
|
||||
if validate:
|
||||
if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
|
||||
raise ValueError("%s nickname isn't valid: %s" % (desc._name(), r_comp[0]))
|
||||
elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
|
||||
raise ValueError("%s address isn't a valid IPv4 address: %s" % (desc._name(), r_comp[5]))
|
||||
elif not stem.util.connection.is_valid_port(r_comp[6]):
|
||||
raise ValueError("%s ORPort is invalid: %s" % (desc._name(), r_comp[6]))
|
||||
elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
|
||||
raise ValueError("%s DirPort is invalid: %s" % (desc._name(), r_comp[7]))
|
||||
elif not (r_comp[6].isdigit() and r_comp[7].isdigit()):
|
||||
return
|
||||
|
||||
desc.nickname = r_comp[0]
|
||||
desc.fingerprint = _base64_to_hex(r_comp[1], validate)
|
||||
|
||||
if include_digest:
|
||||
desc.digest = _base64_to_hex(r_comp[2], validate)
|
||||
|
||||
desc.address = r_comp[5]
|
||||
desc.or_port = int(r_comp[6])
|
||||
desc.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
|
||||
|
||||
try:
|
||||
published = "%s %s" % (r_comp[3], r_comp[4])
|
||||
desc.published = datetime.datetime.strptime(published, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Publication time time wasn't parsable: r %s" % value)
|
||||
|
||||
|
||||
def _parse_a_line(desc, value, validate):
|
||||
# "a" SP address ":" portlist
|
||||
# example: a [2001:888:2133:0:82:94:251:204]:9001
|
||||
|
||||
if not ':' in value:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (desc._name(), value))
|
||||
|
||||
address, port = value.rsplit(':', 1)
|
||||
is_ipv6 = address.startswith("[") and address.endswith("]")
|
||||
|
||||
if is_ipv6:
|
||||
address = address[1:-1] # remove brackets
|
||||
|
||||
if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
|
||||
(is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
|
||||
if not validate:
|
||||
return
|
||||
else:
|
||||
raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (desc._name(), value))
|
||||
|
||||
if stem.util.connection.is_valid_port(port):
|
||||
desc.or_addresses.append((address, int(port), is_ipv6))
|
||||
elif validate:
|
||||
raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (desc._name(), port, value))
|
||||
|
||||
|
||||
def _parse_s_line(desc, value, validate):
|
||||
# "s" Flags
|
||||
# example: s Named Running Stable Valid
|
||||
|
||||
flags = [] if value == "" else value.split(" ")
|
||||
desc.flags = flags
|
||||
|
||||
if validate:
|
||||
for flag in flags:
|
||||
if flags.count(flag) > 1:
|
||||
raise ValueError("%s had duplicate flags: s %s" % (desc._name(), value))
|
||||
elif flag == "":
|
||||
raise ValueError("%s had extra whitespace on its 's' line: s %s" % (desc._name(), value))
|
||||
|
||||
|
||||
def _parse_v_line(desc, value, validate):
|
||||
# "v" version
|
||||
# example: v Tor 0.2.2.35
|
||||
#
|
||||
# The spec says that if this starts with "Tor " then what follows is a
|
||||
# tor version. If not then it has "upgraded to a more sophisticated
|
||||
# protocol versioning system".
|
||||
|
||||
desc.version_line = value
|
||||
|
||||
if value.startswith("Tor "):
|
||||
try:
|
||||
desc.version = stem.version._get_version(value[4:])
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise ValueError("%s has a malformed tor version (%s): v %s" % (desc._name(), exc, value))
|
||||
|
||||
|
||||
def _parse_w_line(desc, value, validate):
|
||||
# "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
|
||||
# example: w Bandwidth=7980
|
||||
|
||||
w_comp = value.split(" ")
|
||||
|
||||
if len(w_comp) < 1:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'w' line is blank: w %s" % (desc._name(), value))
|
||||
elif not w_comp[0].startswith("Bandwidth="):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (desc._name(), value))
|
||||
|
||||
for w_entry in w_comp:
|
||||
if '=' in w_entry:
|
||||
w_key, w_value = w_entry.split('=', 1)
|
||||
else:
|
||||
w_key, w_value = w_entry, None
|
||||
|
||||
if w_key == "Bandwidth":
|
||||
if not (w_value and w_value.isdigit()):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (desc._name(), value))
|
||||
|
||||
desc.bandwidth = int(w_value)
|
||||
elif w_key == "Measured":
|
||||
if not (w_value and w_value.isdigit()):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (desc._name(), value))
|
||||
|
||||
desc.measured = int(w_value)
|
||||
elif w_key == "Unmeasured":
|
||||
if validate and w_value != "1":
|
||||
raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (desc._name(), value))
|
||||
|
||||
desc.is_unmeasured = True
|
||||
else:
|
||||
desc.unrecognized_bandwidth_entries.append(w_entry)
|
||||
|
||||
|
||||
def _parse_p_line(desc, value, validate):
|
||||
# "p" ("accept" / "reject") PortList
|
||||
# p reject 1-65535
|
||||
# example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
|
||||
|
||||
try:
|
||||
desc.exit_policy = stem.exit_policy.MicroExitPolicy(value)
|
||||
except ValueError as exc:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s exit policy is malformed (%s): p %s" % (desc._name(), exc, value))
|
||||
|
||||
|
||||
def _parse_m_line(desc, value, validate):
|
||||
# "m" methods 1*(algorithm "=" digest)
|
||||
# example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
|
||||
|
||||
m_comp = value.split(" ")
|
||||
|
||||
if not (desc.document and desc.document.is_vote):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
vote_status = "vote" if desc.document else "<undefined document>"
|
||||
raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (desc._name(), vote_status, value))
|
||||
elif len(m_comp) < 1:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (desc._name(), value))
|
||||
|
||||
try:
|
||||
methods = [int(entry) for entry in m_comp[0].split(",")]
|
||||
except ValueError:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s microdescriptor methods should be a series of comma separated integers: m %s" % (desc._name(), value))
|
||||
|
||||
hashes = {}
|
||||
|
||||
for entry in m_comp[1:]:
|
||||
if not '=' in entry:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (desc._name(), value))
|
||||
|
||||
hash_name, digest = entry.split('=', 1)
|
||||
hashes[hash_name] = digest
|
||||
|
||||
desc.microdescriptor_hashes.append((methods, hashes))
|
||||
|
||||
|
||||
def _base64_to_hex(identity, validate, check_if_fingerprint = True):
|
||||
"""
|
||||
Decodes a base64 value to hex. For example...
|
||||
|
||||
::
|
||||
|
||||
>>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
|
||||
'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
|
||||
|
||||
:param str identity: encoded fingerprint from the consensus
|
||||
:param bool validate: checks validity if **True**
|
||||
:param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
|
||||
|
||||
:returns: **str** with the uppercase hex encoding of the relay's fingerprint
|
||||
|
||||
:raises: **ValueError** if the result isn't a valid fingerprint
|
||||
"""
|
||||
|
||||
# trailing equal signs were stripped from the identity
|
||||
missing_padding = len(identity) % 4
|
||||
identity += "=" * missing_padding
|
||||
|
||||
fingerprint = ""
|
||||
|
||||
try:
|
||||
identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
|
||||
except (TypeError, binascii.Error):
|
||||
if not validate:
|
||||
return None
|
||||
|
||||
raise ValueError("Unable to decode identity string '%s'" % identity)
|
||||
|
||||
for char in identity_decoded:
|
||||
# Individual characters are either standard ASCII or hex encoded, and each
|
||||
# represent two hex digits. For instance...
|
||||
#
|
||||
# >>> ord('\n')
|
||||
# 10
|
||||
# >>> hex(10)
|
||||
# '0xa'
|
||||
# >>> '0xa'[2:].zfill(2).upper()
|
||||
# '0A'
|
||||
|
||||
char_int = char if isinstance(char, int) else ord(char)
|
||||
fingerprint += hex(char_int)[2:].zfill(2).upper()
|
||||
|
||||
if check_if_fingerprint:
|
||||
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
|
||||
if not validate:
|
||||
return None
|
||||
|
||||
raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
|
||||
|
||||
return fingerprint
|
968
lib/stem/descriptor/server_descriptor.py
Normal file
968
lib/stem/descriptor/server_descriptor.py
Normal file
|
@ -0,0 +1,968 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor server descriptors, which contains the infrequently changing
|
||||
information about a Tor relay (contact information, exit policy, public keys,
|
||||
etc). This information is provided from a few sources...
|
||||
|
||||
* control port via 'GETINFO desc/\*' queries
|
||||
* the 'cached-descriptors' file in tor's data directory
|
||||
* tor metrics, at https://metrics.torproject.org/data.html
|
||||
* directory authorities and mirrors via their DirPort
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ServerDescriptor - Tor server descriptor.
|
||||
|- RelayDescriptor - Server descriptor for a relay.
|
||||
|
|
||||
|- BridgeDescriptor - Scrubbed server descriptor for a bridge.
|
||||
| |- is_scrubbed - checks if our content has been properly scrubbed
|
||||
| +- get_scrubbing_issues - description of issues with our scrubbing
|
||||
|
|
||||
|- digest - calculates the upper-case hex digest value for our content
|
||||
|- get_unrecognized_lines - lines with unrecognized content
|
||||
|- get_annotations - dictionary of content prior to the descriptor entry
|
||||
+- get_annotation_lines - lines that provided the annotations
|
||||
"""
|
||||
|
||||
import base64
|
||||
import codecs
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import stem.descriptor.extrainfo_descriptor
|
||||
import stem.exit_policy
|
||||
import stem.prereq
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
import stem.version
|
||||
|
||||
from stem.util import log
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_get_bytes_field,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# relay descriptors must have exactly one of the following
|
||||
REQUIRED_FIELDS = (
|
||||
"router",
|
||||
"bandwidth",
|
||||
"published",
|
||||
"onion-key",
|
||||
"signing-key",
|
||||
"router-signature",
|
||||
)
|
||||
|
||||
# optional entries that can appear at most once
|
||||
SINGLE_FIELDS = (
|
||||
"platform",
|
||||
"fingerprint",
|
||||
"hibernating",
|
||||
"uptime",
|
||||
"contact",
|
||||
"read-history",
|
||||
"write-history",
|
||||
"eventdns",
|
||||
"family",
|
||||
"caches-extra-info",
|
||||
"extra-info-digest",
|
||||
"hidden-service-dir",
|
||||
"protocols",
|
||||
"allow-single-hop-exits",
|
||||
"ntor-onion-key",
|
||||
)
|
||||
|
||||
DEFAULT_IPV6_EXIT_POLICY = stem.exit_policy.MicroExitPolicy("reject 1-65535")
|
||||
REJECT_ALL_POLICY = stem.exit_policy.ExitPolicy("reject *:*")
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over the server descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool is_bridge: parses the file as being a bridge descriptor
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for ServerDescriptor instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
# Handler for relay descriptors
|
||||
#
|
||||
# Cached descriptors consist of annotations followed by the descriptor
|
||||
# itself. For instance...
|
||||
#
|
||||
# @downloaded-at 2012-03-14 16:31:05
|
||||
# @source "145.53.65.130"
|
||||
# router caerSidi 71.35.143.157 9001 0 0
|
||||
# platform Tor 0.2.1.30 on Linux x86_64
|
||||
# <rest of the descriptor content>
|
||||
# router-signature
|
||||
# -----BEGIN SIGNATURE-----
|
||||
# <signature for the above descriptor>
|
||||
# -----END SIGNATURE-----
|
||||
#
|
||||
# Metrics descriptor files are the same, but lack any annotations. The
|
||||
# following simply does the following...
|
||||
#
|
||||
# - parse as annotations until we get to "router"
|
||||
# - parse as descriptor content until we get to "router-signature" followed
|
||||
# by the end of the signature block
|
||||
# - construct a descriptor and provide it back to the caller
|
||||
#
|
||||
# Any annotations after the last server descriptor is ignored (never provided
|
||||
# to the caller).
|
||||
|
||||
while True:
|
||||
annotations = _read_until_keywords("router", descriptor_file)
|
||||
descriptor_content = _read_until_keywords("router-signature", descriptor_file)
|
||||
|
||||
# we've reached the 'router-signature', now include the pgp style block
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
|
||||
if descriptor_content:
|
||||
# strip newlines from annotations
|
||||
annotations = map(bytes.strip, annotations)
|
||||
|
||||
descriptor_text = bytes.join(b"", descriptor_content)
|
||||
|
||||
if is_bridge:
|
||||
yield BridgeDescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
yield RelayDescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
if validate and annotations:
|
||||
orphaned_annotations = stem.util.str_tools._to_unicode(b'\n'.join(annotations))
|
||||
raise ValueError('Content conform to being a server descriptor:\n%s' % orphaned_annotations)
|
||||
|
||||
break # done parsing descriptors
|
||||
|
||||
|
||||
class ServerDescriptor(Descriptor):
|
||||
"""
|
||||
Common parent for server descriptors.
|
||||
|
||||
:var str nickname: **\*** relay's nickname
|
||||
:var str fingerprint: identity key fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
|
||||
:var str address: **\*** IPv4 address of the relay
|
||||
:var int or_port: **\*** port used for relaying
|
||||
:var int socks_port: **\*** port used as client (deprecated, always **None**)
|
||||
:var int dir_port: **\*** port used for descriptor mirroring
|
||||
|
||||
:var bytes platform: line with operating system and tor version
|
||||
:var stem.version.Version tor_version: version of tor
|
||||
:var str operating_system: operating system
|
||||
:var int uptime: uptime when published in seconds
|
||||
:var bytes contact: contact information
|
||||
:var stem.exit_policy.ExitPolicy exit_policy: **\*** stated exit policy
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
|
||||
:var set family: **\*** nicknames or fingerprints of declared family
|
||||
|
||||
:var int average_bandwidth: **\*** average rate it's willing to relay in bytes/s
|
||||
:var int burst_bandwidth: **\*** burst rate it's willing to relay in bytes/s
|
||||
:var int observed_bandwidth: **\*** estimated capacity based on usage in bytes/s
|
||||
|
||||
:var list link_protocols: link protocols supported by the relay
|
||||
:var list circuit_protocols: circuit protocols supported by the relay
|
||||
:var bool hibernating: **\*** hibernating when published
|
||||
:var bool allow_single_hop_exits: **\*** flag if single hop exiting is allowed
|
||||
:var bool extra_info_cache: **\*** flag if a mirror for extra-info documents
|
||||
:var str extra_info_digest: upper-case hex encoded digest of our extra-info document
|
||||
:var bool eventdns: flag for evdns backend (deprecated, always unset)
|
||||
:var list or_addresses: **\*** alternative for our address/or_port
|
||||
attributes, each entry is a tuple of the form (address (**str**), port
|
||||
(**int**), is_ipv6 (**bool**))
|
||||
|
||||
Deprecated, moved to extra-info descriptor...
|
||||
|
||||
:var datetime read_history_end: end of the sampling interval
|
||||
:var int read_history_interval: seconds per interval
|
||||
:var list read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime write_history_end: end of the sampling interval
|
||||
:var int write_history_interval: seconds per interval
|
||||
:var list write_history_values: bytes written during each interval
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
"""
|
||||
Server descriptor constructor, created from an individual relay's
|
||||
descriptor content (as provided by "GETINFO desc/*", cached descriptors,
|
||||
and metrics).
|
||||
|
||||
By default this validates the descriptor's content as it's parsed. This
|
||||
validation can be disables to either improve performance or be accepting of
|
||||
malformed data.
|
||||
|
||||
:param str raw_contents: descriptor content provided by the relay
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param list annotations: lines that appeared prior to the descriptor
|
||||
|
||||
:raises: **ValueError** if the contents is malformed and validate is True
|
||||
"""
|
||||
|
||||
super(ServerDescriptor, self).__init__(raw_contents)
|
||||
|
||||
# Only a few things can be arbitrary bytes according to the dir-spec, so
|
||||
# parsing them separately.
|
||||
|
||||
self.platform = _get_bytes_field("platform", raw_contents)
|
||||
self.contact = _get_bytes_field("contact", raw_contents)
|
||||
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
self.nickname = None
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
|
||||
self.address = None
|
||||
self.or_port = None
|
||||
self.socks_port = None
|
||||
self.dir_port = None
|
||||
|
||||
self.tor_version = None
|
||||
self.operating_system = None
|
||||
self.uptime = None
|
||||
self.exit_policy = None
|
||||
self.exit_policy_v6 = DEFAULT_IPV6_EXIT_POLICY
|
||||
self.family = set()
|
||||
|
||||
self.average_bandwidth = None
|
||||
self.burst_bandwidth = None
|
||||
self.observed_bandwidth = None
|
||||
|
||||
self.link_protocols = None
|
||||
self.circuit_protocols = None
|
||||
self.hibernating = False
|
||||
self.allow_single_hop_exits = False
|
||||
self.extra_info_cache = False
|
||||
self.extra_info_digest = None
|
||||
self.hidden_service_dir = None
|
||||
self.eventdns = None
|
||||
self.or_addresses = []
|
||||
|
||||
self.read_history_end = None
|
||||
self.read_history_interval = None
|
||||
self.read_history_values = None
|
||||
|
||||
self.write_history_end = None
|
||||
self.write_history_interval = None
|
||||
self.write_history_values = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
self._annotation_lines = annotations if annotations else []
|
||||
|
||||
# A descriptor contains a series of 'keyword lines' which are simply a
|
||||
# keyword followed by an optional value. Lines can also be followed by a
|
||||
# signature block.
|
||||
#
|
||||
# We care about the ordering of 'accept' and 'reject' entries because this
|
||||
# influences the resulting exit policy, but for everything else the order
|
||||
# does not matter so breaking it into key / value pairs.
|
||||
|
||||
entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject"))
|
||||
|
||||
if policy == [u'reject *:*']:
|
||||
self.exit_policy = REJECT_ALL_POLICY
|
||||
else:
|
||||
self.exit_policy = stem.exit_policy.ExitPolicy(*policy)
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
if validate:
|
||||
self._check_constraints(entries)
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the hex encoded sha1 of our content. This value is part of the
|
||||
network status entry for this relay.
|
||||
|
||||
:returns: **unicode** with the upper-case hex digest value for this server descriptor
|
||||
"""
|
||||
|
||||
raise NotImplementedError("Unsupported Operation: this should be implemented by the ServerDescriptor subclass")
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
@lru_cache()
|
||||
def get_annotations(self):
|
||||
"""
|
||||
Provides content that appeared prior to the descriptor. If this comes from
|
||||
the cached-descriptors file then this commonly contains content like...
|
||||
|
||||
::
|
||||
|
||||
@downloaded-at 2012-03-18 21:18:29
|
||||
@source "173.254.216.66"
|
||||
|
||||
:returns: **dict** with the key/value pairs in our annotations
|
||||
"""
|
||||
|
||||
annotation_dict = {}
|
||||
|
||||
for line in self._annotation_lines:
|
||||
if b" " in line:
|
||||
key, value = line.split(b" ", 1)
|
||||
annotation_dict[key] = value
|
||||
else:
|
||||
annotation_dict[line] = None
|
||||
|
||||
return annotation_dict
|
||||
|
||||
def get_annotation_lines(self):
|
||||
"""
|
||||
Provides the lines of content that appeared prior to the descriptor. This
|
||||
is the same as the
|
||||
:func:`~stem.descriptor.server_descriptor.ServerDescriptor.get_annotations`
|
||||
results, but with the unparsed lines and ordering retained.
|
||||
|
||||
:returns: **list** with the lines of annotation that came before this descriptor
|
||||
"""
|
||||
|
||||
return self._annotation_lines
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if **True**
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
# most just work with the first (and only) value
|
||||
value, block_contents = values[0]
|
||||
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if block_contents:
|
||||
line += "\n%s" % block_contents
|
||||
|
||||
if keyword == "router":
|
||||
# "router" nickname address ORPort SocksPort DirPort
|
||||
router_comp = value.split()
|
||||
|
||||
if len(router_comp) < 5:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Router line must have five values: %s" % line)
|
||||
|
||||
if validate:
|
||||
if not stem.util.tor_tools.is_valid_nickname(router_comp[0]):
|
||||
raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0])
|
||||
elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]):
|
||||
raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True):
|
||||
raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True):
|
||||
raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True):
|
||||
raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4])
|
||||
elif not (router_comp[2].isdigit() and router_comp[3].isdigit() and router_comp[4].isdigit()):
|
||||
continue
|
||||
|
||||
self.nickname = router_comp[0]
|
||||
self.address = router_comp[1]
|
||||
self.or_port = int(router_comp[2])
|
||||
self.socks_port = None if router_comp[3] == '0' else int(router_comp[3])
|
||||
self.dir_port = None if router_comp[4] == '0' else int(router_comp[4])
|
||||
elif keyword == "bandwidth":
|
||||
# "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed
|
||||
bandwidth_comp = value.split()
|
||||
|
||||
if len(bandwidth_comp) < 3:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line must have three values: %s" % line)
|
||||
elif not bandwidth_comp[0].isdigit():
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0])
|
||||
elif not bandwidth_comp[1].isdigit():
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1])
|
||||
elif not bandwidth_comp[2].isdigit():
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
|
||||
|
||||
self.average_bandwidth = int(bandwidth_comp[0])
|
||||
self.burst_bandwidth = int(bandwidth_comp[1])
|
||||
self.observed_bandwidth = int(bandwidth_comp[2])
|
||||
elif keyword == "platform":
|
||||
# "platform" string
|
||||
|
||||
# The platform attribute was set earlier. This line can contain any
|
||||
# arbitrary data, but tor seems to report its version followed by the
|
||||
# os like the following...
|
||||
#
|
||||
# platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64
|
||||
#
|
||||
# There's no guarantee that we'll be able to pick these out the
|
||||
# version, but might as well try to save our caller the effort.
|
||||
|
||||
platform_match = re.match("^Tor (\S*).* on (.*)$", value)
|
||||
|
||||
if platform_match:
|
||||
version_str, self.operating_system = platform_match.groups()
|
||||
|
||||
try:
|
||||
self.tor_version = stem.version._get_version(version_str)
|
||||
except ValueError:
|
||||
pass
|
||||
elif keyword == "published":
|
||||
# "published" YYYY-MM-DD HH:MM:SS
|
||||
|
||||
try:
|
||||
self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Published line's time wasn't parsable: %s" % line)
|
||||
elif keyword == "fingerprint":
|
||||
# This is forty hex digits split into space separated groups of four.
|
||||
# Checking that we match this pattern.
|
||||
|
||||
fingerprint = value.replace(" ", "")
|
||||
|
||||
if validate:
|
||||
for grouping in value.split(" "):
|
||||
if len(grouping) != 4:
|
||||
raise ValueError("Fingerprint line should have groupings of four hex digits: %s" % value)
|
||||
|
||||
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
|
||||
raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value)
|
||||
|
||||
self.fingerprint = fingerprint
|
||||
elif keyword == "hibernating":
|
||||
# "hibernating" 0|1 (in practice only set if one)
|
||||
|
||||
if validate and not value in ("0", "1"):
|
||||
raise ValueError("Hibernating line had an invalid value, must be zero or one: %s" % value)
|
||||
|
||||
self.hibernating = value == "1"
|
||||
elif keyword == "allow-single-hop-exits":
|
||||
self.allow_single_hop_exits = True
|
||||
elif keyword == "caches-extra-info":
|
||||
self.extra_info_cache = True
|
||||
elif keyword == "extra-info-digest":
|
||||
# this is forty hex digits which just so happens to be the same a
|
||||
# fingerprint
|
||||
|
||||
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
|
||||
raise ValueError("Extra-info digests should consist of forty hex digits: %s" % value)
|
||||
|
||||
self.extra_info_digest = value
|
||||
elif keyword == "hidden-service-dir":
|
||||
if value:
|
||||
self.hidden_service_dir = value.split(" ")
|
||||
else:
|
||||
self.hidden_service_dir = ["2"]
|
||||
elif keyword == "uptime":
|
||||
# We need to be tolerant of negative uptimes to accommodate a past tor
|
||||
# bug...
|
||||
#
|
||||
# Changes in version 0.1.2.7-alpha - 2007-02-06
|
||||
# - If our system clock jumps back in time, don't publish a negative
|
||||
# uptime in the descriptor. Also, don't let the global rate limiting
|
||||
# buckets go absurdly negative.
|
||||
#
|
||||
# After parsing all of the attributes we'll double check that negative
|
||||
# uptimes only occurred prior to this fix.
|
||||
|
||||
try:
|
||||
self.uptime = int(value)
|
||||
except ValueError:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Uptime line must have an integer value: %s" % value)
|
||||
elif keyword == "contact":
|
||||
pass # parsed as a bytes field earlier
|
||||
elif keyword == "protocols":
|
||||
protocols_match = re.match("^Link (.*) Circuit (.*)$", value)
|
||||
|
||||
if protocols_match:
|
||||
link_versions, circuit_versions = protocols_match.groups()
|
||||
self.link_protocols = link_versions.split(" ")
|
||||
self.circuit_protocols = circuit_versions.split(" ")
|
||||
elif validate:
|
||||
raise ValueError("Protocols line did not match the expected pattern: %s" % line)
|
||||
elif keyword == "family":
|
||||
self.family = set(value.split(" "))
|
||||
elif keyword == "eventdns":
|
||||
self.eventdns = value == "1"
|
||||
elif keyword == "ipv6-policy":
|
||||
self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
|
||||
elif keyword == "or-address":
|
||||
or_address_entries = [value for (value, _) in values]
|
||||
|
||||
for entry in or_address_entries:
|
||||
line = "%s %s" % (keyword, entry)
|
||||
|
||||
if not ":" in entry:
|
||||
if not validate:
|
||||
continue
|
||||
else:
|
||||
raise ValueError("or-address line missing a colon: %s" % line)
|
||||
|
||||
address, port = entry.rsplit(':', 1)
|
||||
is_ipv6 = address.startswith("[") and address.endswith("]")
|
||||
|
||||
if is_ipv6:
|
||||
address = address[1:-1] # remove brackets
|
||||
|
||||
if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
|
||||
(is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
|
||||
if not validate:
|
||||
continue
|
||||
else:
|
||||
raise ValueError("or-address line has a malformed address: %s" % line)
|
||||
|
||||
if stem.util.connection.is_valid_port(port):
|
||||
self.or_addresses.append((address, int(port), is_ipv6))
|
||||
elif validate:
|
||||
raise ValueError("or-address line has a malformed port: %s" % line)
|
||||
elif keyword in ("read-history", "write-history"):
|
||||
try:
|
||||
timestamp, interval, remainder = \
|
||||
stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value)
|
||||
|
||||
try:
|
||||
if remainder:
|
||||
history_values = [int(entry) for entry in remainder.split(",")]
|
||||
else:
|
||||
history_values = []
|
||||
except ValueError:
|
||||
raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
|
||||
|
||||
if keyword == "read-history":
|
||||
self.read_history_end = timestamp
|
||||
self.read_history_interval = interval
|
||||
self.read_history_values = history_values
|
||||
else:
|
||||
self.write_history_end = timestamp
|
||||
self.write_history_interval = interval
|
||||
self.write_history_values = history_values
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
else:
|
||||
self._unrecognized_lines.append(line)
|
||||
|
||||
# if we have a negative uptime and a tor version that shouldn't exhibit
|
||||
# this bug then fail validation
|
||||
|
||||
if validate and self.uptime and self.tor_version:
|
||||
if self.uptime < 0 and self.tor_version >= stem.version.Version("0.1.2.7"):
|
||||
raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime))
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in self._required_fields():
|
||||
if not keyword in entries:
|
||||
raise ValueError("Descriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in self._single_fields():
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a descriptor" % keyword)
|
||||
|
||||
expected_first_keyword = self._first_keyword()
|
||||
if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
|
||||
raise ValueError("Descriptor must start with a '%s' entry" % expected_first_keyword)
|
||||
|
||||
expected_last_keyword = self._last_keyword()
|
||||
if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
|
||||
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
|
||||
|
||||
if not self.exit_policy:
|
||||
raise ValueError("Descriptor must have at least one 'accept' or 'reject' entry")
|
||||
|
||||
# Constraints that the descriptor must meet to be valid. These can be None if
|
||||
# not applicable.
|
||||
|
||||
def _required_fields(self):
|
||||
return REQUIRED_FIELDS
|
||||
|
||||
def _single_fields(self):
|
||||
return REQUIRED_FIELDS + SINGLE_FIELDS
|
||||
|
||||
def _first_keyword(self):
|
||||
return "router"
|
||||
|
||||
def _last_keyword(self):
|
||||
return "router-signature"
|
||||
|
||||
|
||||
class RelayDescriptor(ServerDescriptor):
|
||||
"""
|
||||
Server descriptor (`descriptor specification
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
|
||||
|
||||
:var str onion_key: **\*** key used to encrypt EXTEND cells
|
||||
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
|
||||
:var str signing_key: **\*** relay's long-term identity key
|
||||
:var str signature: **\*** signature for this descriptor
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
self.onion_key = None
|
||||
self.ntor_onion_key = None
|
||||
self.signing_key = None
|
||||
self.signature = None
|
||||
|
||||
super(RelayDescriptor, self).__init__(raw_contents, validate, annotations)
|
||||
|
||||
# validate the descriptor if required
|
||||
if validate:
|
||||
self._validate_content()
|
||||
|
||||
@lru_cache()
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the digest of our descriptor's content.
|
||||
|
||||
:returns: the digest string encoded in uppercase hex
|
||||
|
||||
:raises: ValueError if the digest canot be calculated
|
||||
"""
|
||||
|
||||
# Digest is calculated from everything in the
|
||||
# descriptor except the router-signature.
|
||||
|
||||
raw_descriptor = self.get_bytes()
|
||||
start_token = b"router "
|
||||
sig_token = b"\nrouter-signature\n"
|
||||
start = raw_descriptor.find(start_token)
|
||||
sig_start = raw_descriptor.find(sig_token)
|
||||
end = sig_start + len(sig_token)
|
||||
|
||||
if start >= 0 and sig_start > 0 and end > start:
|
||||
for_digest = raw_descriptor[start:end]
|
||||
digest_hash = hashlib.sha1(stem.util.str_tools._to_bytes(for_digest))
|
||||
return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper())
|
||||
else:
|
||||
raise ValueError("unable to calculate digest for descriptor")
|
||||
|
||||
def _validate_content(self):
|
||||
"""
|
||||
Validates that the descriptor content matches the signature.
|
||||
|
||||
:raises: ValueError if the signature does not match the content
|
||||
"""
|
||||
|
||||
key_as_bytes = RelayDescriptor._get_key_bytes(self.signing_key)
|
||||
|
||||
# ensure the fingerprint is a hash of the signing key
|
||||
|
||||
if self.fingerprint:
|
||||
# calculate the signing key hash
|
||||
|
||||
key_der_as_hash = hashlib.sha1(stem.util.str_tools._to_bytes(key_as_bytes)).hexdigest()
|
||||
|
||||
if key_der_as_hash != self.fingerprint.lower():
|
||||
log.warn("Signing key hash: %s != fingerprint: %s" % (key_der_as_hash, self.fingerprint.lower()))
|
||||
raise ValueError("Fingerprint does not match hash")
|
||||
|
||||
self._verify_digest(key_as_bytes)
|
||||
|
||||
def _verify_digest(self, key_as_der):
|
||||
# check that our digest matches what was signed
|
||||
|
||||
if not stem.prereq.is_crypto_available():
|
||||
return
|
||||
|
||||
from Crypto.Util import asn1
|
||||
from Crypto.Util.number import bytes_to_long, long_to_bytes
|
||||
|
||||
# get the ASN.1 sequence
|
||||
|
||||
seq = asn1.DerSequence()
|
||||
seq.decode(key_as_der)
|
||||
modulus = seq[0]
|
||||
public_exponent = seq[1] # should always be 65537
|
||||
|
||||
sig_as_bytes = RelayDescriptor._get_key_bytes(self.signature)
|
||||
|
||||
# convert the descriptor signature to an int
|
||||
|
||||
sig_as_long = bytes_to_long(sig_as_bytes)
|
||||
|
||||
# use the public exponent[e] & the modulus[n] to decrypt the int
|
||||
|
||||
decrypted_int = pow(sig_as_long, public_exponent, modulus)
|
||||
|
||||
# block size will always be 128 for a 1024 bit key
|
||||
|
||||
blocksize = 128
|
||||
|
||||
# convert the int to a byte array.
|
||||
|
||||
decrypted_bytes = long_to_bytes(decrypted_int, blocksize)
|
||||
|
||||
############################################################################
|
||||
## The decrypted bytes should have a structure exactly along these lines.
|
||||
## 1 byte - [null '\x00']
|
||||
## 1 byte - [block type identifier '\x01'] - Should always be 1
|
||||
## N bytes - [padding '\xFF' ]
|
||||
## 1 byte - [separator '\x00' ]
|
||||
## M bytes - [message]
|
||||
## Total - 128 bytes
|
||||
## More info here http://www.ietf.org/rfc/rfc2313.txt
|
||||
## esp the Notes in section 8.1
|
||||
############################################################################
|
||||
|
||||
try:
|
||||
if decrypted_bytes.index(b'\x00\x01') != 0:
|
||||
raise ValueError("Verification failed, identifier missing")
|
||||
except ValueError:
|
||||
raise ValueError("Verification failed, malformed data")
|
||||
|
||||
try:
|
||||
identifier_offset = 2
|
||||
|
||||
# find the separator
|
||||
seperator_index = decrypted_bytes.index(b'\x00', identifier_offset)
|
||||
except ValueError:
|
||||
raise ValueError("Verification failed, seperator not found")
|
||||
|
||||
digest_hex = codecs.encode(decrypted_bytes[seperator_index + 1:], 'hex_codec')
|
||||
digest = stem.util.str_tools._to_unicode(digest_hex.upper())
|
||||
|
||||
local_digest = self.digest()
|
||||
|
||||
if digest != local_digest:
|
||||
raise ValueError("Decrypted digest does not match local digest (calculated: %s, local: %s)" % (digest, local_digest))
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries) # shallow copy since we're destructive
|
||||
|
||||
# handles fields only in server descriptors
|
||||
|
||||
for keyword, values in entries.items():
|
||||
value, block_contents = values[0]
|
||||
line = "%s %s" % (keyword, value)
|
||||
|
||||
if keyword == "onion-key":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Onion key line must be followed by a public key: %s" % line)
|
||||
|
||||
self.onion_key = block_contents
|
||||
del entries["onion-key"]
|
||||
elif keyword == "ntor-onion-key":
|
||||
self.ntor_onion_key = value
|
||||
del entries["ntor-onion-key"]
|
||||
elif keyword == "signing-key":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Signing key line must be followed by a public key: %s" % line)
|
||||
|
||||
self.signing_key = block_contents
|
||||
del entries["signing-key"]
|
||||
elif keyword == "router-signature":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Router signature line must be followed by a signature block: %s" % line)
|
||||
|
||||
self.signature = block_contents
|
||||
del entries["router-signature"]
|
||||
|
||||
ServerDescriptor._parse(self, entries, validate)
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RelayDescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
@staticmethod
|
||||
def _get_key_bytes(key_string):
|
||||
# Remove the newlines from the key string & strip off the
|
||||
# '-----BEGIN RSA PUBLIC KEY-----' header and
|
||||
# '-----END RSA PUBLIC KEY-----' footer
|
||||
|
||||
key_as_string = ''.join(key_string.split('\n')[1:4])
|
||||
|
||||
# get the key representation in bytes
|
||||
|
||||
key_bytes = base64.b64decode(stem.util.str_tools._to_bytes(key_as_string))
|
||||
|
||||
return key_bytes
|
||||
|
||||
|
||||
class BridgeDescriptor(ServerDescriptor):
|
||||
"""
|
||||
Bridge descriptor (`bridge descriptor specification
|
||||
<https://metrics.torproject.org/formats.html#bridgedesc>`_)
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
self._digest = None
|
||||
|
||||
super(BridgeDescriptor, self).__init__(raw_contents, validate, annotations)
|
||||
|
||||
def digest(self):
|
||||
return self._digest
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries)
|
||||
|
||||
# handles fields only in bridge descriptors
|
||||
for keyword, values in entries.items():
|
||||
value, block_contents = values[0]
|
||||
line = "%s %s" % (keyword, value)
|
||||
|
||||
if keyword == "router-digest":
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self._digest = stem.util.str_tools._to_unicode(value)
|
||||
del entries["router-digest"]
|
||||
|
||||
ServerDescriptor._parse(self, entries, validate)
|
||||
|
||||
def is_scrubbed(self):
|
||||
"""
|
||||
Checks if we've been properly scrubbed in accordance with the `bridge
|
||||
descriptor specification
|
||||
<https://metrics.torproject.org/formats.html#bridgedesc>`_. Validation is a
|
||||
moving target so this may not
|
||||
be fully up to date.
|
||||
|
||||
:returns: **True** if we're scrubbed, **False** otherwise
|
||||
"""
|
||||
|
||||
return self.get_scrubbing_issues() == []
|
||||
|
||||
@lru_cache()
|
||||
def get_scrubbing_issues(self):
|
||||
"""
|
||||
Provides issues with our scrubbing.
|
||||
|
||||
:returns: **list** of strings which describe issues we have with our
|
||||
scrubbing, this list is empty if we're properly scrubbed
|
||||
"""
|
||||
|
||||
issues = []
|
||||
|
||||
if not self.address.startswith("10."):
|
||||
issues.append("Router line's address should be scrubbed to be '10.x.x.x': %s" % self.address)
|
||||
|
||||
if self.contact and self.contact != "somebody":
|
||||
issues.append("Contact line should be scrubbed to be 'somebody', but instead had '%s'" % self.contact)
|
||||
|
||||
for address, _, is_ipv6 in self.or_addresses:
|
||||
if not is_ipv6 and not address.startswith("10."):
|
||||
issues.append("or-address line's address should be scrubbed to be '10.x.x.x': %s" % address)
|
||||
elif is_ipv6 and not address.startswith("fd9f:2e19:3bcf::"):
|
||||
# TODO: this check isn't quite right because we aren't checking that
|
||||
# the next grouping of hex digits contains 1-2 digits
|
||||
issues.append("or-address line's address should be scrubbed to be 'fd9f:2e19:3bcf::xx:xxxx': %s" % address)
|
||||
|
||||
for line in self.get_unrecognized_lines():
|
||||
if line.startswith("onion-key "):
|
||||
issues.append("Bridge descriptors should have their onion-key scrubbed: %s" % line)
|
||||
elif line.startswith("signing-key "):
|
||||
issues.append("Bridge descriptors should have their signing-key scrubbed: %s" % line)
|
||||
elif line.startswith("router-signature "):
|
||||
issues.append("Bridge descriptors should have their signature scrubbed: %s" % line)
|
||||
|
||||
return issues
|
||||
|
||||
def _required_fields(self):
|
||||
# bridge required fields are the same as a relay descriptor, minus items
|
||||
# excluded according to the format page
|
||||
|
||||
excluded_fields = [
|
||||
"onion-key",
|
||||
"signing-key",
|
||||
"router-signature",
|
||||
]
|
||||
|
||||
included_fields = [
|
||||
"router-digest",
|
||||
]
|
||||
|
||||
return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])
|
||||
|
||||
def _single_fields(self):
|
||||
return self._required_fields() + SINGLE_FIELDS
|
||||
|
||||
def _last_keyword(self):
|
||||
return None
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, BridgeDescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
115
lib/stem/descriptor/tordnsel.py
Normal file
115
lib/stem/descriptor/tordnsel.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
# Copyright 2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for `TorDNSEL <https://www.torproject.org/projects/tordnsel.html.en>`_
|
||||
exit list files.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
Descriptor,
|
||||
_read_until_keywords,
|
||||
_get_descriptor_components,
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(tordnsel_file, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over a tordnsel file.
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
# skip content prior to the first ExitNode
|
||||
_read_until_keywords("ExitNode", tordnsel_file, skip = True)
|
||||
|
||||
while True:
|
||||
contents = _read_until_keywords("ExitAddress", tordnsel_file)
|
||||
contents += _read_until_keywords("ExitNode", tordnsel_file)
|
||||
|
||||
if contents:
|
||||
yield TorDNSEL(bytes.join(b"", contents), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
class TorDNSEL(Descriptor):
|
||||
"""
|
||||
TorDNSEL descriptor (`exitlist specification
|
||||
<https://www.torproject.org/tordnsel/exitlist-spec.txt>`_)
|
||||
|
||||
:var str fingerprint: **\*** authority's fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var datetime last_status: **\*** time in UTC when the relay was seen in a v2 network status
|
||||
:var list exit_addresses: **\*** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate):
|
||||
super(TorDNSEL, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.last_status = None
|
||||
self.exit_addresses = []
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
|
||||
for keyword, values in entries.items():
|
||||
value, block_content = values[0]
|
||||
|
||||
if validate and block_content:
|
||||
raise ValueError("Unexpected block content: %s" % block_content)
|
||||
|
||||
if keyword == "ExitNode":
|
||||
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
|
||||
raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value)
|
||||
|
||||
self.fingerprint = value
|
||||
elif keyword == "Published":
|
||||
try:
|
||||
self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Published time wasn't parsable: %s" % value)
|
||||
elif keyword == "LastStatus":
|
||||
try:
|
||||
self.last_status = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("LastStatus time wasn't parsable: %s" % value)
|
||||
elif keyword == "ExitAddress":
|
||||
for value, block_content in values:
|
||||
address, date = value.split(" ", 1)
|
||||
|
||||
if validate:
|
||||
if not stem.util.connection.is_valid_ipv4_address(address):
|
||||
raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address)
|
||||
elif block_content:
|
||||
raise ValueError("Unexpected block content: %s" % block_content)
|
||||
|
||||
try:
|
||||
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
||||
self.exit_addresses.append((address, date))
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("ExitAddress found time wasn't parsable: %s" % value)
|
||||
elif validate:
|
||||
raise ValueError("Unrecognized keyword: %s" % keyword)
|
Loading…
Add table
Add a link
Reference in a new issue