mirror of
https://github.com/onionshare/onionshare.git
synced 2025-03-22 13:46:33 -04:00
added stem python library
This commit is contained in:
parent
8ffa569094
commit
619ab6db0f
700
lib/stem/__init__.py
Normal file
700
lib/stem/__init__.py
Normal file
@ -0,0 +1,700 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Library for working with the tor process.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ControllerError - Base exception raised when using the controller.
|
||||
|- ProtocolError - Malformed socket data.
|
||||
|- OperationFailed - Tor was unable to successfully complete the operation.
|
||||
| |- UnsatisfiableRequest - Tor was unable to satisfy a valid request.
|
||||
| | +- CircuitExtensionFailed - Attempt to make or extend a circuit failed.
|
||||
| +- InvalidRequest - Invalid request.
|
||||
| +- InvalidArguments - Invalid request parameters.
|
||||
+- SocketError - Communication with the socket failed.
|
||||
+- SocketClosed - Socket has been shut down.
|
||||
|
||||
.. data:: Runlevel (enum)
|
||||
|
||||
Rating of importance used for event logging.
|
||||
|
||||
=========== ===========
|
||||
Runlevel Description
|
||||
=========== ===========
|
||||
**ERR** critical issues that impair tor's ability to function
|
||||
**WARN** non-critical issues the user should be aware of
|
||||
**NOTICE** information that may be helpful to the user
|
||||
**INFO** high level runtime information
|
||||
**DEBUG** low level runtime information
|
||||
=========== ===========
|
||||
|
||||
.. data:: Signal (enum)
|
||||
|
||||
Signals that the tor process will accept.
|
||||
|
||||
========================= ===========
|
||||
Signal Description
|
||||
========================= ===========
|
||||
**RELOAD** or **HUP** reloads our torrc
|
||||
**SHUTDOWN** or **INT** shut down, waiting ShutdownWaitLength first if we're a relay
|
||||
**DUMP** or **USR1** dumps information about open connections and circuits to our log
|
||||
**DEBUG** or **USR2** switch our logging to the DEBUG runlevel
|
||||
**HALT** or **TERM** exit tor immediately
|
||||
**NEWNYM** switch to new circuits, so new application requests don't share any circuits with old ones (this also clears our DNS cache)
|
||||
**CLEARDNSCACHE** clears cached DNS results
|
||||
========================= ===========
|
||||
|
||||
.. data:: Flag (enum)
|
||||
|
||||
Flag assigned to tor relays by the authorities to indicate various
|
||||
characteristics.
|
||||
|
||||
================= ===========
|
||||
Flag Description
|
||||
================= ===========
|
||||
**AUTHORITY** relay is a directory authority
|
||||
**BADEXIT** relay shouldn't be used as an exit due to being either problematic or malicious (`wiki <https://trac.torproject.org/projects/tor/wiki/doc/badRelays>`_)
|
||||
**BADDIRECTORY** relay shouldn't be used for directory information
|
||||
**EXIT** relay's exit policy makes it more useful as an exit rather than middle hop
|
||||
**FAST** relay's suitable for high-bandwidth circuits
|
||||
**GUARD** relay's suitable for being an entry guard (first hop)
|
||||
**HSDIR** relay is being used as a v2 hidden service directory
|
||||
**NAMED** relay can be referred to by its nickname
|
||||
**RUNNING** relay is currently usable
|
||||
**STABLE** relay's suitable for long-lived circuits
|
||||
**UNNAMED** relay isn't presently bound to a nickname
|
||||
**V2DIR** relay supports the v2 directory protocol
|
||||
**VALID** relay has been validated
|
||||
================= ===========
|
||||
|
||||
.. data:: CircStatus (enum)
|
||||
|
||||
Statuses that a circuit can be in. Tor may provide statuses not in this enum.
|
||||
|
||||
============ ===========
|
||||
CircStatus Description
|
||||
============ ===========
|
||||
**LAUNCHED** new circuit was created
|
||||
**BUILT** circuit finished being created and can accept traffic
|
||||
**EXTENDED** circuit has been extended by a hop
|
||||
**FAILED** circuit construction failed
|
||||
**CLOSED** circuit has been closed
|
||||
============ ===========
|
||||
|
||||
.. data:: CircBuildFlag (enum)
|
||||
|
||||
Attributes about how a circuit is built. These were introduced in tor version
|
||||
0.2.3.11. Tor may provide flags not in this enum.
|
||||
|
||||
================= ===========
|
||||
CircBuildFlag Description
|
||||
================= ===========
|
||||
**ONEHOP_TUNNEL** single hop circuit to fetch directory information
|
||||
**IS_INTERNAL** circuit that won't be used for client traffic
|
||||
**NEED_CAPACITY** circuit only includes high capacity relays
|
||||
**NEED_UPTIME** circuit only includes relays with a high uptime
|
||||
================= ===========
|
||||
|
||||
.. data:: CircPurpose (enum)
|
||||
|
||||
Description of what a circuit is intended for. These were introduced in tor
|
||||
version 0.2.1.6. Tor may provide purposes not in this enum.
|
||||
|
||||
==================== ===========
|
||||
CircPurpose Description
|
||||
==================== ===========
|
||||
**GENERAL** client traffic or fetching directory information
|
||||
**HS_CLIENT_INTRO** client side introduction point for a hidden service circuit
|
||||
**HS_CLIENT_REND** client side hidden service rendezvous circuit
|
||||
**HS_SERVICE_INTRO** server side introduction point for a hidden service circuit
|
||||
**HS_SERVICE_REND** server side hidden service rendezvous circuit
|
||||
**TESTING** testing to see if we're reachable, so we can be used as a relay
|
||||
**CONTROLLER** circuit that was built by a controller
|
||||
**MEASURE_TIMEOUT** circuit being kept around to see how long it takes
|
||||
==================== ===========
|
||||
|
||||
.. data:: CircClosureReason (enum)
|
||||
|
||||
Reason that a circuit is being closed or failed to be established. Tor may
|
||||
provide reasons not in this enum.
|
||||
|
||||
========================= ===========
|
||||
CircClosureReason Description
|
||||
========================= ===========
|
||||
**NONE** no reason given
|
||||
**TORPROTOCOL** violation in the tor protocol
|
||||
**INTERNAL** internal error
|
||||
**REQUESTED** requested by the client via a TRUNCATE command
|
||||
**HIBERNATING** relay is presently hibernating
|
||||
**RESOURCELIMIT** relay is out of memory, sockets, or circuit IDs
|
||||
**CONNECTFAILED** unable to contact the relay
|
||||
**OR_IDENTITY** relay had the wrong OR identification
|
||||
**OR_CONN_CLOSED** connection failed after being established
|
||||
**FINISHED** circuit has expired (see tor's MaxCircuitDirtiness config option)
|
||||
**TIMEOUT** circuit construction timed out
|
||||
**DESTROYED** circuit unexpectedly closed
|
||||
**NOPATH** not enough relays to make a circuit
|
||||
**NOSUCHSERVICE** requested hidden service does not exist
|
||||
**MEASUREMENT_EXPIRED** same as **TIMEOUT** except that it was left open for measurement purposes
|
||||
========================= ===========
|
||||
|
||||
.. data:: CircEvent (enum)
|
||||
|
||||
Type of change reflected in a circuit by a CIRC_MINOR event. Tor may provide
|
||||
event types not in this enum.
|
||||
|
||||
===================== ===========
|
||||
CircEvent Description
|
||||
===================== ===========
|
||||
**PURPOSE_CHANGED** circuit purpose or hidden service state has changed
|
||||
**CANNIBALIZED** circuit connections are being reused for a different circuit
|
||||
===================== ===========
|
||||
|
||||
.. data:: HiddenServiceState (enum)
|
||||
|
||||
State that a hidden service circuit can have. These were introduced in tor
|
||||
version 0.2.3.11. Tor may provide states not in this enum.
|
||||
|
||||
Enumerations fall into four groups based on their prefix...
|
||||
|
||||
======= ===========
|
||||
Prefix Description
|
||||
======= ===========
|
||||
HSCI_* client-side introduction-point
|
||||
HSCR_* client-side rendezvous-point
|
||||
HSSI_* service-side introduction-point
|
||||
HSSR_* service-side rendezvous-point
|
||||
======= ===========
|
||||
|
||||
============================= ===========
|
||||
HiddenServiceState Description
|
||||
============================= ===========
|
||||
**HSCI_CONNECTING** connecting to the introductory point
|
||||
**HSCI_INTRO_SENT** sent INTRODUCE1 and awaiting a reply
|
||||
**HSCI_DONE** received a reply, circuit is closing
|
||||
**HSCR_CONNECTING** connecting to the introductory point
|
||||
**HSCR_ESTABLISHED_IDLE** rendezvous-point established, awaiting an introduction
|
||||
**HSCR_ESTABLISHED_WAITING** introduction received, awaiting a rend
|
||||
**HSCR_JOINED** connected to the hidden service
|
||||
**HSSI_CONNECTING** connecting to the introductory point
|
||||
**HSSI_ESTABLISHED** established introductory point
|
||||
**HSSR_CONNECTING** connecting to the introductory point
|
||||
**HSSR_JOINED** connected to the rendezvous-point
|
||||
============================= ===========
|
||||
|
||||
.. data:: RelayEndReason (enum)
|
||||
|
||||
Reasons why the stream is to be closed.
|
||||
|
||||
=================== ===========
|
||||
RelayEndReason Description
|
||||
=================== ===========
|
||||
**MISC** none of the following reasons
|
||||
**RESOLVEFAILED** unable to resolve the hostname
|
||||
**CONNECTREFUSED** remote host refused the connection
|
||||
**EXITPOLICY** OR refuses to connect to the destination
|
||||
**DESTROY** circuit is being shut down
|
||||
**DONE** connection has been closed
|
||||
**TIMEOUT** connection timed out
|
||||
**NOROUTE** routing error while contacting the destination
|
||||
**HIBERNATING** relay is temporarily hibernating
|
||||
**INTERNAL** internal error at the relay
|
||||
**RESOURCELIMIT** relay has insufficient resources to service the request
|
||||
**CONNRESET** connection was unexpectedly reset
|
||||
**TORPROTOCOL** violation in the tor protocol
|
||||
**NOTDIRECTORY** directory information requested from a relay that isn't mirroring it
|
||||
=================== ===========
|
||||
|
||||
.. data:: StreamStatus (enum)
|
||||
|
||||
State that a stream going through tor can have. Tor may provide states not in
|
||||
this enum.
|
||||
|
||||
================= ===========
|
||||
StreamStatus Description
|
||||
================= ===========
|
||||
**NEW** request for a new connection
|
||||
**NEWRESOLVE** request to resolve an address
|
||||
**REMAP** address is being re-mapped to another
|
||||
**SENTCONNECT** sent a connect cell along a circuit
|
||||
**SENTRESOLVE** sent a resolve cell along a circuit
|
||||
**SUCCEEDED** stream has been established
|
||||
**FAILED** stream is detached, and won't be re-established
|
||||
**DETACHED** stream is detached, but might be re-established
|
||||
**CLOSED** stream has closed
|
||||
================= ===========
|
||||
|
||||
.. data:: StreamClosureReason (enum)
|
||||
|
||||
Reason that a stream is being closed or failed to be established. This
|
||||
includes all values in the :data:`~stem.RelayEndReason` enumeration as
|
||||
well as the following. Tor may provide reasons not in this enum.
|
||||
|
||||
===================== ===========
|
||||
StreamClosureReason Description
|
||||
===================== ===========
|
||||
**END** endpoint has sent a RELAY_END cell
|
||||
**PRIVATE_ADDR** endpoint was a private address (127.0.0.1, 10.0.0.1, etc)
|
||||
===================== ===========
|
||||
|
||||
.. data:: StreamSource (enum)
|
||||
|
||||
Cause of a stream being remapped to another address. Tor may provide sources
|
||||
not in this enum.
|
||||
|
||||
============= ===========
|
||||
StreamSource Description
|
||||
============= ===========
|
||||
**CACHE** tor is remapping because of a cached answer
|
||||
**EXIT** exit relay requested the remap
|
||||
============= ===========
|
||||
|
||||
.. data:: StreamPurpose (enum)
|
||||
|
||||
Purpsoe of the stream. This is only provided with new streams and tor may
|
||||
provide purposes not in this enum.
|
||||
|
||||
================= ===========
|
||||
StreamPurpose Description
|
||||
================= ===========
|
||||
**DIR_FETCH** fetching directory information (descriptors, consensus, etc)
|
||||
**DIR_UPLOAD** uploading our descriptor to an authority
|
||||
**DNS_REQUEST** user initiated DNS request
|
||||
**DIRPORT_TEST** checking that our directory port is reachable externally
|
||||
**USER** either relaying user traffic or not one of the above categories
|
||||
================= ===========
|
||||
|
||||
.. data:: ORStatus (enum)
|
||||
|
||||
State that an OR connection can have. Tor may provide states not in this
|
||||
enum.
|
||||
|
||||
=============== ===========
|
||||
ORStatus Description
|
||||
=============== ===========
|
||||
**NEW** received OR connection, starting server-side handshake
|
||||
**LAUNCHED** launched outbound OR connection, starting client-side handshake
|
||||
**CONNECTED** OR connection has been established
|
||||
**FAILED** attempt to establish OR connection failed
|
||||
**CLOSED** OR connection has been closed
|
||||
=============== ===========
|
||||
|
||||
.. data:: ORClosureReason (enum)
|
||||
|
||||
Reason that an OR connection is being closed or failed to be established. Tor
|
||||
may provide reasons not in this enum.
|
||||
|
||||
=================== ===========
|
||||
ORClosureReason Description
|
||||
=================== ===========
|
||||
**DONE** OR connection shut down cleanly
|
||||
**CONNECTREFUSED** got a ECONNREFUSED when connecting to the relay
|
||||
**IDENTITY** identity of the relay wasn't what we expected
|
||||
**CONNECTRESET** got a ECONNRESET or similar error from relay
|
||||
**TIMEOUT** got a ETIMEOUT or similar error from relay
|
||||
**NOROUTE** got a ENOTCONN, ENETUNREACH, ENETDOWN, EHOSTUNREACH, or similar error from relay
|
||||
**IOERROR** got a different kind of error from relay
|
||||
**RESOURCELIMIT** relay has insufficient resources to service the request
|
||||
**MISC** connection refused for another reason
|
||||
=================== ===========
|
||||
|
||||
.. data:: AuthDescriptorAction (enum)
|
||||
|
||||
Actions that directory authorities might take with relay descriptors. Tor may
|
||||
provide reasons not in this enum.
|
||||
|
||||
===================== ===========
|
||||
AuthDescriptorAction Description
|
||||
===================== ===========
|
||||
**ACCEPTED** accepting the descriptor as the newest version
|
||||
**DROPPED** descriptor rejected without notifying the relay
|
||||
**REJECTED** relay notified that its descriptor has been rejected
|
||||
===================== ===========
|
||||
|
||||
.. data:: StatusType (enum)
|
||||
|
||||
Sources for tor status events. Tor may provide types not in this enum.
|
||||
|
||||
============= ===========
|
||||
StatusType Description
|
||||
============= ===========
|
||||
**GENERAL** general tor activity, not specifically as a client or relay
|
||||
**CLIENT** related to our activity as a tor client
|
||||
**SERVER** related to our activity as a tor relay
|
||||
============= ===========
|
||||
|
||||
.. data:: GuardType (enum)
|
||||
|
||||
Use a guard relay can be for. Tor may provide types not in this enum.
|
||||
|
||||
=========== ===========
|
||||
GuardType Description
|
||||
=========== ===========
|
||||
**ENTRY** used to connect to the tor network
|
||||
=========== ===========
|
||||
|
||||
.. data:: GuardStatus (enum)
|
||||
|
||||
Status a guard relay can have. Tor may provide types not in this enum.
|
||||
|
||||
============= ===========
|
||||
GuardStatus Description
|
||||
============= ===========
|
||||
**NEW** new guard that we weren't previously using
|
||||
**DROPPED** removed from use as one of our guards
|
||||
**UP** guard is now reachable
|
||||
**DOWN** guard is now unreachable
|
||||
**BAD** consensus or relay considers this relay to be unusable as a guard
|
||||
**GOOD** consensus or relay considers this relay to be usable as a guard
|
||||
============= ===========
|
||||
|
||||
.. data:: TimeoutSetType (enum)
|
||||
|
||||
Way in which the timeout value of a circuit is changing. Tor may provide
|
||||
types not in this enum.
|
||||
|
||||
=============== ===========
|
||||
TimeoutSetType Description
|
||||
=============== ===========
|
||||
**COMPUTED** tor has computed a new timeout based on prior circuits
|
||||
**RESET** timeout reverted to its default
|
||||
**SUSPENDED** timeout reverted to its default until network connectivity has recovered
|
||||
**DISCARD** throwing out timeout value from when the network was down
|
||||
**RESUME** resumed calculations to determine the proper timeout
|
||||
=============== ===========
|
||||
"""
|
||||
|
||||
__version__ = '1.1.1'
|
||||
__author__ = 'Damian Johnson'
|
||||
__contact__ = 'atagar@torproject.org'
|
||||
__url__ = 'https://stem.torproject.org/'
|
||||
__license__ = 'LGPLv3'
|
||||
|
||||
__all__ = [
|
||||
"descriptor",
|
||||
"response",
|
||||
"util",
|
||||
"connection",
|
||||
"control",
|
||||
"exit_policy",
|
||||
"prereq",
|
||||
"process",
|
||||
"socket",
|
||||
"version",
|
||||
"ControllerError",
|
||||
"ProtocolError",
|
||||
"OperationFailed",
|
||||
"UnsatisfiableRequest",
|
||||
"CircuitExtensionFailed",
|
||||
"InvalidRequest",
|
||||
"InvalidArguments",
|
||||
"SocketError",
|
||||
"SocketClosed",
|
||||
"Runlevel",
|
||||
"Signal",
|
||||
"Flag",
|
||||
"CircStatus",
|
||||
"CircBuildFlag",
|
||||
"CircPurpose",
|
||||
"CircClosureReason",
|
||||
"CircEvent",
|
||||
"HiddenServiceState",
|
||||
"RelayEndReason",
|
||||
"StreamStatus",
|
||||
"StreamClosureReason",
|
||||
"StreamSource",
|
||||
"StreamPurpose",
|
||||
"ORStatus",
|
||||
"ORClosureReason",
|
||||
"AuthDescriptorAction",
|
||||
"StatusType",
|
||||
"GuardType",
|
||||
"GuardStatus",
|
||||
"TimeoutSetType",
|
||||
]
|
||||
|
||||
import stem.util.enum
|
||||
|
||||
# Constant to indicate an undefined argument default. Usually we'd use None for
|
||||
# this, but users will commonly provide None as the argument so need something
|
||||
# else fairly unique...
|
||||
|
||||
UNDEFINED = "<Undefined_ >"
|
||||
|
||||
|
||||
class ControllerError(Exception):
|
||||
"Base error for controller communication issues."
|
||||
|
||||
|
||||
class ProtocolError(ControllerError):
|
||||
"Malformed content from the control socket."
|
||||
|
||||
|
||||
class OperationFailed(ControllerError):
|
||||
"""
|
||||
Base exception class for failed operations that return an error code
|
||||
|
||||
:var str code: error code returned by Tor
|
||||
:var str message: error message returned by Tor or a human readable error
|
||||
message
|
||||
"""
|
||||
|
||||
def __init__(self, code = None, message = None):
|
||||
super(ControllerError, self).__init__(message)
|
||||
self.code = code
|
||||
self.message = message
|
||||
|
||||
|
||||
class UnsatisfiableRequest(OperationFailed):
|
||||
"""
|
||||
Exception raised if Tor was unable to process our request.
|
||||
"""
|
||||
|
||||
|
||||
class CircuitExtensionFailed(UnsatisfiableRequest):
|
||||
"""
|
||||
An attempt to create or extend a circuit failed.
|
||||
|
||||
:var stem.response.CircuitEvent circ: response notifying us of the failure
|
||||
"""
|
||||
|
||||
def __init__(self, message, circ = None):
|
||||
super(CircuitExtensionFailed, self).__init__(message = message)
|
||||
self.circ = circ
|
||||
|
||||
|
||||
class InvalidRequest(OperationFailed):
|
||||
"""
|
||||
Exception raised when the request was invalid or malformed.
|
||||
"""
|
||||
|
||||
|
||||
class InvalidArguments(InvalidRequest):
|
||||
"""
|
||||
Exception class for requests which had invalid arguments.
|
||||
|
||||
:var str code: error code returned by Tor
|
||||
:var str message: error message returned by Tor or a human readable error
|
||||
message
|
||||
:var list arguments: a list of arguments which were invalid
|
||||
"""
|
||||
|
||||
def __init__(self, code = None, message = None, arguments = None):
|
||||
super(InvalidArguments, self).__init__(code, message)
|
||||
self.arguments = arguments
|
||||
|
||||
|
||||
class SocketError(ControllerError):
|
||||
"Error arose while communicating with the control socket."
|
||||
|
||||
|
||||
class SocketClosed(SocketError):
|
||||
"Control socket was closed before completing the message."
|
||||
|
||||
Runlevel = stem.util.enum.UppercaseEnum(
|
||||
"DEBUG",
|
||||
"INFO",
|
||||
"NOTICE",
|
||||
"WARN",
|
||||
"ERR",
|
||||
)
|
||||
|
||||
Flag = stem.util.enum.Enum(
|
||||
("AUTHORITY", "Authority"),
|
||||
("BADEXIT", "BadExit"),
|
||||
("BADDIRECTORY", "BadDirectory"),
|
||||
("EXIT", "Exit"),
|
||||
("FAST", "Fast"),
|
||||
("GUARD", "Guard"),
|
||||
("HSDIR", "HSDir"),
|
||||
("NAMED", "Named"),
|
||||
("RUNNING", "Running"),
|
||||
("STABLE", "Stable"),
|
||||
("UNNAMED", "Unnamed"),
|
||||
("V2DIR", "V2Dir"),
|
||||
("V3DIR", "V3Dir"),
|
||||
("VALID", "Valid"),
|
||||
)
|
||||
|
||||
Signal = stem.util.enum.UppercaseEnum(
|
||||
"RELOAD",
|
||||
"HUP",
|
||||
"SHUTDOWN",
|
||||
"INT",
|
||||
"DUMP",
|
||||
"USR1",
|
||||
"DEBUG",
|
||||
"USR2",
|
||||
"HALT",
|
||||
"TERM",
|
||||
"NEWNYM",
|
||||
"CLEARDNSCACHE",
|
||||
)
|
||||
|
||||
CircStatus = stem.util.enum.UppercaseEnum(
|
||||
"LAUNCHED",
|
||||
"BUILT",
|
||||
"EXTENDED",
|
||||
"FAILED",
|
||||
"CLOSED",
|
||||
)
|
||||
|
||||
CircBuildFlag = stem.util.enum.UppercaseEnum(
|
||||
"ONEHOP_TUNNEL",
|
||||
"IS_INTERNAL",
|
||||
"NEED_CAPACITY",
|
||||
"NEED_UPTIME",
|
||||
)
|
||||
|
||||
CircPurpose = stem.util.enum.UppercaseEnum(
|
||||
"GENERAL",
|
||||
"HS_CLIENT_INTRO",
|
||||
"HS_CLIENT_REND",
|
||||
"HS_SERVICE_INTRO",
|
||||
"HS_SERVICE_REND",
|
||||
"TESTING",
|
||||
"CONTROLLER",
|
||||
"MEASURE_TIMEOUT",
|
||||
)
|
||||
|
||||
CircClosureReason = stem.util.enum.UppercaseEnum(
|
||||
"NONE",
|
||||
"TORPROTOCOL",
|
||||
"INTERNAL",
|
||||
"REQUESTED",
|
||||
"HIBERNATING",
|
||||
"RESOURCELIMIT",
|
||||
"CONNECTFAILED",
|
||||
"OR_IDENTITY",
|
||||
"OR_CONN_CLOSED",
|
||||
"FINISHED",
|
||||
"TIMEOUT",
|
||||
"DESTROYED",
|
||||
"NOPATH",
|
||||
"NOSUCHSERVICE",
|
||||
"MEASUREMENT_EXPIRED",
|
||||
)
|
||||
|
||||
CircEvent = stem.util.enum.UppercaseEnum(
|
||||
"PURPOSE_CHANGED",
|
||||
"CANNIBALIZED",
|
||||
)
|
||||
|
||||
HiddenServiceState = stem.util.enum.UppercaseEnum(
|
||||
"HSCI_CONNECTING",
|
||||
"HSCI_INTRO_SENT",
|
||||
"HSCI_DONE",
|
||||
"HSCR_CONNECTING",
|
||||
"HSCR_ESTABLISHED_IDLE",
|
||||
"HSCR_ESTABLISHED_WAITING",
|
||||
"HSCR_JOINED",
|
||||
"HSSI_CONNECTING",
|
||||
"HSSI_ESTABLISHED",
|
||||
"HSSR_CONNECTING",
|
||||
"HSSR_JOINED",
|
||||
)
|
||||
|
||||
RelayEndReason = stem.util.enum.UppercaseEnum(
|
||||
"MISC",
|
||||
"RESOLVEFAILED",
|
||||
"CONNECTREFUSED",
|
||||
"EXITPOLICY",
|
||||
"DESTROY",
|
||||
"DONE",
|
||||
"TIMEOUT",
|
||||
"NOROUTE",
|
||||
"HIBERNATING",
|
||||
"INTERNAL",
|
||||
"RESOURCELIMIT",
|
||||
"CONNRESET",
|
||||
"TORPROTOCOL",
|
||||
"NOTDIRECTORY",
|
||||
)
|
||||
|
||||
StreamStatus = stem.util.enum.UppercaseEnum(
|
||||
"NEW",
|
||||
"NEWRESOLVE",
|
||||
"REMAP",
|
||||
"SENTCONNECT",
|
||||
"SENTRESOLVE",
|
||||
"SUCCEEDED",
|
||||
"FAILED",
|
||||
"DETACHED",
|
||||
"CLOSED",
|
||||
)
|
||||
|
||||
# StreamClosureReason is a superset of RelayEndReason
|
||||
StreamClosureReason = stem.util.enum.UppercaseEnum(*(RelayEndReason.keys() + [
|
||||
"END",
|
||||
"PRIVATE_ADDR",
|
||||
]))
|
||||
|
||||
StreamSource = stem.util.enum.UppercaseEnum(
|
||||
"CACHE",
|
||||
"EXIT",
|
||||
)
|
||||
|
||||
StreamPurpose = stem.util.enum.UppercaseEnum(
|
||||
"DIR_FETCH",
|
||||
"DIR_UPLOAD",
|
||||
"DNS_REQUEST",
|
||||
"DIRPORT_TEST",
|
||||
"USER",
|
||||
)
|
||||
|
||||
ORStatus = stem.util.enum.UppercaseEnum(
|
||||
"NEW",
|
||||
"LAUNCHED",
|
||||
"CONNECTED",
|
||||
"FAILED",
|
||||
"CLOSED",
|
||||
)
|
||||
|
||||
ORClosureReason = stem.util.enum.UppercaseEnum(
|
||||
"DONE",
|
||||
"CONNECTREFUSED",
|
||||
"IDENTITY",
|
||||
"CONNECTRESET",
|
||||
"TIMEOUT",
|
||||
"NOROUTE",
|
||||
"IOERROR",
|
||||
"RESOURCELIMIT",
|
||||
"MISC",
|
||||
)
|
||||
|
||||
AuthDescriptorAction = stem.util.enum.UppercaseEnum(
|
||||
"ACCEPTED",
|
||||
"DROPPED",
|
||||
"REJECTED",
|
||||
)
|
||||
|
||||
StatusType = stem.util.enum.UppercaseEnum(
|
||||
"GENERAL",
|
||||
"CLIENT",
|
||||
"SERVER",
|
||||
)
|
||||
|
||||
GuardType = stem.util.enum.UppercaseEnum(
|
||||
"ENTRY",
|
||||
)
|
||||
|
||||
GuardStatus = stem.util.enum.UppercaseEnum(
|
||||
"NEW",
|
||||
"UP",
|
||||
"DOWN",
|
||||
"BAD",
|
||||
"GOOD",
|
||||
"DROPPED",
|
||||
)
|
||||
|
||||
TimeoutSetType = stem.util.enum.UppercaseEnum(
|
||||
"COMPUTED",
|
||||
"RESET",
|
||||
"SUSPENDED",
|
||||
"DISCARD",
|
||||
"RESUME",
|
||||
)
|
1090
lib/stem/connection.py
Normal file
1090
lib/stem/connection.py
Normal file
File diff suppressed because it is too large
Load Diff
2498
lib/stem/control.py
Normal file
2498
lib/stem/control.py
Normal file
File diff suppressed because it is too large
Load Diff
552
lib/stem/descriptor/__init__.py
Normal file
552
lib/stem/descriptor/__init__.py
Normal file
@ -0,0 +1,552 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Package for parsing and processing descriptor data.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
parse_file - Parses the descriptors in a file.
|
||||
|
||||
Descriptor - Common parent for all descriptor file types.
|
||||
|- get_path - location of the descriptor on disk if it came from a file
|
||||
|- get_archive_path - location of the descriptor within the archive it came from
|
||||
|- get_bytes - similar to str(), but provides our original bytes content
|
||||
|- get_unrecognized_lines - unparsed descriptor content
|
||||
+- __str__ - string that the descriptor was made from
|
||||
|
||||
.. data:: DocumentHandler (enum)
|
||||
|
||||
Ways in which we can parse a
|
||||
:class:`~stem.descriptor.networkstatus.NetworkStatusDocument`.
|
||||
|
||||
Both **ENTRIES** and **BARE_DOCUMENT** have a 'thin' document, which doesn't
|
||||
have a populated **routers** attribute. This allows for lower memory usage
|
||||
and upfront runtime. However, if read time and memory aren't a concern then
|
||||
**DOCUMENT** can provide you with a fully populated document.
|
||||
|
||||
=================== ===========
|
||||
DocumentHandler Description
|
||||
=================== ===========
|
||||
**ENTRIES** Iterates over the contained :class:`~stem.descriptor.router_status_entry.RouterStatusEntry`. Each has a reference to the bare document it came from (through its **document** attribute).
|
||||
**DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` with the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` it contains (through its **routers** attribute).
|
||||
**BARE_DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` **without** a reference to its contents (the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` are unread).
|
||||
=================== ===========
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
"export",
|
||||
"reader",
|
||||
"remote",
|
||||
"extrainfo_descriptor",
|
||||
"server_descriptor",
|
||||
"microdescriptor",
|
||||
"networkstatus",
|
||||
"router_status_entry",
|
||||
"tordnsel",
|
||||
"parse_file",
|
||||
"Descriptor",
|
||||
]
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
try:
|
||||
# added in python 2.7
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from stem.util.ordereddict import OrderedDict
|
||||
|
||||
KEYWORD_CHAR = "a-zA-Z0-9-"
|
||||
WHITESPACE = " \t"
|
||||
KEYWORD_LINE = re.compile("^([%s]+)(?:[%s]+(.*))?$" % (KEYWORD_CHAR, WHITESPACE))
|
||||
PGP_BLOCK_START = re.compile("^-----BEGIN ([%s%s]+)-----$" % (KEYWORD_CHAR, WHITESPACE))
|
||||
PGP_BLOCK_END = "-----END %s-----"
|
||||
|
||||
DocumentHandler = stem.util.enum.UppercaseEnum(
|
||||
"ENTRIES",
|
||||
"DOCUMENT",
|
||||
"BARE_DOCUMENT",
|
||||
)
|
||||
|
||||
|
||||
def parse_file(descriptor_file, descriptor_type = None, validate = True, document_handler = DocumentHandler.ENTRIES, **kwargs):
|
||||
"""
|
||||
Simple function to read the descriptor contents from a file, providing an
|
||||
iterator for its :class:`~stem.descriptor.__init__.Descriptor` contents.
|
||||
|
||||
If you don't provide a **descriptor_type** argument then this automatically
|
||||
tries to determine the descriptor type based on the following...
|
||||
|
||||
* The @type annotation on the first line. These are generally only found in
|
||||
the `descriptor archives <https://metrics.torproject.org>`_.
|
||||
|
||||
* The filename if it matches something from tor's data directory. For
|
||||
instance, tor's 'cached-descriptors' contains server descriptors.
|
||||
|
||||
This is a handy function for simple usage, but if you're reading multiple
|
||||
descriptor files you might want to consider the
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`.
|
||||
|
||||
Descriptor types include the following, including further minor versions (ie.
|
||||
if we support 1.1 then we also support everything from 1.0 and most things
|
||||
from 1.2, but not 2.0)...
|
||||
|
||||
========================================= =====
|
||||
Descriptor Type Class
|
||||
========================================= =====
|
||||
server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.RelayDescriptor`
|
||||
extra-info 1.0 :class:`~stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor`
|
||||
microdescriptor 1.0 :class:`~stem.descriptor.microdescriptor.Microdescriptor`
|
||||
directory 1.0 **unsupported**
|
||||
network-status-2 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV2`)
|
||||
dir-key-certificate-3 1.0 :class:`~stem.descriptor.networkstatus.KeyCertificate`
|
||||
network-status-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
network-status-vote-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
network-status-microdesc-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
bridge-network-status 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.BridgeNetworkStatusDocument`)
|
||||
bridge-server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.BridgeDescriptor`
|
||||
bridge-extra-info 1.1 :class:`~stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`
|
||||
torperf 1.0 **unsupported**
|
||||
bridge-pool-assignment 1.0 **unsupported**
|
||||
tordnsel 1.0 :class:`~stem.descriptor.tordnsel.TorDNSEL`
|
||||
========================================= =====
|
||||
|
||||
If you're using **python 3** then beware that the open() function defaults to
|
||||
using text mode. **Binary mode** is strongly suggested because it's both
|
||||
faster (by my testing by about 33x) and doesn't do universal newline
|
||||
translation which can make us misparse the document.
|
||||
|
||||
::
|
||||
|
||||
my_descriptor_file = open(descriptor_path, 'rb')
|
||||
|
||||
:param str,file descriptor_file: path or opened file with the descriptor contents
|
||||
:param str descriptor_type: `descriptor type <https://metrics.torproject.org/formats.html#descriptortypes>`_, this is guessed if not provided
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **TypeError** if we can't match the contents of the file to a descriptor type
|
||||
* **IOError** if unable to read from the descriptor_file
|
||||
"""
|
||||
|
||||
# if we got a path then open that file for parsing
|
||||
|
||||
if isinstance(descriptor_file, (bytes, unicode)):
|
||||
with open(descriptor_file) as desc_file:
|
||||
for desc in parse_file(desc_file, descriptor_type, validate, document_handler, **kwargs):
|
||||
yield desc
|
||||
|
||||
return
|
||||
|
||||
# The tor descriptor specifications do not provide a reliable method for
|
||||
# identifying a descriptor file's type and version so we need to guess
|
||||
# based on its filename. Metrics descriptors, however, can be identified
|
||||
# by an annotation on their first line...
|
||||
# https://trac.torproject.org/5651
|
||||
|
||||
initial_position = descriptor_file.tell()
|
||||
first_line = stem.util.str_tools._to_unicode(descriptor_file.readline().strip())
|
||||
metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
|
||||
|
||||
if not metrics_header_match:
|
||||
descriptor_file.seek(initial_position)
|
||||
|
||||
descriptor_path = getattr(descriptor_file, 'name', None)
|
||||
filename = '<undefined>' if descriptor_path is None else os.path.basename(descriptor_file.name)
|
||||
file_parser = None
|
||||
|
||||
if descriptor_type is not None:
|
||||
descriptor_type_match = re.match("^(\S+) (\d+).(\d+)$", descriptor_type)
|
||||
|
||||
if descriptor_type_match:
|
||||
desc_type, major_version, minor_version = descriptor_type_match.groups()
|
||||
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
|
||||
else:
|
||||
raise ValueError("The descriptor_type must be of the form '<type> <major_version>.<minor_version>'")
|
||||
elif metrics_header_match:
|
||||
# Metrics descriptor handling
|
||||
|
||||
desc_type, major_version, minor_version = metrics_header_match.groups()
|
||||
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
|
||||
else:
|
||||
# Cached descriptor handling. These contain multiple descriptors per file.
|
||||
|
||||
if filename == "cached-descriptors":
|
||||
file_parser = lambda f: stem.descriptor.server_descriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == "cached-extrainfo":
|
||||
file_parser = lambda f: stem.descriptor.extrainfo_descriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == "cached-microdescs":
|
||||
file_parser = lambda f: stem.descriptor.microdescriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == "cached-consensus":
|
||||
file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, validate = validate, document_handler = document_handler, **kwargs)
|
||||
elif filename == "cached-microdesc-consensus":
|
||||
file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs)
|
||||
|
||||
if file_parser:
|
||||
for desc in file_parser(descriptor_file):
|
||||
if descriptor_path is not None:
|
||||
desc._set_path(os.path.abspath(descriptor_path))
|
||||
|
||||
yield desc
|
||||
|
||||
return
|
||||
|
||||
# Not recognized as a descriptor file.
|
||||
|
||||
raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
|
||||
|
||||
|
||||
def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs):
|
||||
# Parses descriptor files from metrics, yielding individual descriptors. This
|
||||
# throws a TypeError if the descriptor_type or version isn't recognized.
|
||||
|
||||
if descriptor_type == "server-descriptor" and major_version == 1:
|
||||
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "bridge-server-descriptor" and major_version == 1:
|
||||
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "extra-info" and major_version == 1:
|
||||
for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "microdescriptor" and major_version == 1:
|
||||
for desc in stem.descriptor.microdescriptor._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "bridge-extra-info" and major_version == 1:
|
||||
# version 1.1 introduced a 'transport' field...
|
||||
# https://trac.torproject.org/6257
|
||||
|
||||
for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "network-status-2" and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV2
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "dir-key-certificate-3" and major_version == 1:
|
||||
for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "network-status-microdesc-consensus-3" and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "bridge-network-status" and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.BridgeNetworkStatusDocument
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == "tordnsel" and major_version == 1:
|
||||
document_type = stem.descriptor.tordnsel.TorDNSEL
|
||||
|
||||
for desc in stem.descriptor.tordnsel._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
else:
|
||||
raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
|
||||
|
||||
|
||||
class Descriptor(object):
|
||||
"""
|
||||
Common parent for all types of descriptors.
|
||||
"""
|
||||
|
||||
def __init__(self, contents):
|
||||
self._path = None
|
||||
self._archive_path = None
|
||||
self._raw_contents = contents
|
||||
|
||||
def get_path(self):
|
||||
"""
|
||||
Provides the absolute path that we loaded this descriptor from.
|
||||
|
||||
:returns: **str** with the absolute path of the descriptor source
|
||||
"""
|
||||
|
||||
return self._path
|
||||
|
||||
def get_archive_path(self):
|
||||
"""
|
||||
If this descriptor came from an archive then provides its path within the
|
||||
archive. This is only set if the descriptor came from a
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
|
||||
descriptor didn't come from an archive.
|
||||
|
||||
:returns: **str** with the descriptor's path within the archive
|
||||
"""
|
||||
|
||||
return self._archive_path
|
||||
|
||||
def get_bytes(self):
|
||||
"""
|
||||
Provides the ASCII **bytes** of the descriptor. This only differs from
|
||||
**str()** if you're running python 3.x, in which case **str()** provides a
|
||||
**unicode** string.
|
||||
|
||||
:returns: **bytes** for the descriptor's contents
|
||||
"""
|
||||
|
||||
return self._raw_contents
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
"""
|
||||
Provides a list of lines that were either ignored or had data that we did
|
||||
not know how to process. This is most common due to new descriptor fields
|
||||
that this library does not yet know how to process. Patches welcome!
|
||||
|
||||
:returns: **list** of lines of unrecognized content
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def _set_path(self, path):
|
||||
self._path = path
|
||||
|
||||
def _set_archive_path(self, path):
|
||||
self._archive_path = path
|
||||
|
||||
def __str__(self):
|
||||
if stem.prereq.is_python_3():
|
||||
return stem.util.str_tools._to_unicode(self._raw_contents)
|
||||
else:
|
||||
return self._raw_contents
|
||||
|
||||
|
||||
def _get_bytes_field(keyword, content):
|
||||
"""
|
||||
Provides the value corresponding to the given keyword. This is handy to fetch
|
||||
values specifically allowed to be arbitrary bytes prior to converting to
|
||||
unicode.
|
||||
|
||||
:param str keyword: line to look up
|
||||
:param bytes content: content to look through
|
||||
|
||||
:returns: **bytes** value on the given line, **None** if the line doesn't
|
||||
exist
|
||||
|
||||
:raises: **ValueError** if the content isn't bytes
|
||||
"""
|
||||
|
||||
if not isinstance(content, bytes):
|
||||
raise ValueError("Content must be bytes, got a %s" % type(content))
|
||||
|
||||
line_match = re.search(stem.util.str_tools._to_bytes("^(opt )?%s(?:[%s]+(.*))?$" % (keyword, WHITESPACE)), content, re.MULTILINE)
|
||||
|
||||
if line_match:
|
||||
value = line_match.groups()[1]
|
||||
return b"" if value is None else value
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
|
||||
"""
|
||||
Reads from the descriptor file until we get to one of the given keywords or reach the
|
||||
end of the file.
|
||||
|
||||
:param str,list keywords: keyword(s) we want to read until
|
||||
:param file descriptor_file: file with the descriptor content
|
||||
:param bool inclusive: includes the line with the keyword if True
|
||||
:param bool ignore_first: doesn't check if the first line read has one of the
|
||||
given keywords
|
||||
:param bool skip: skips buffering content, returning None
|
||||
:param int end_position: end if we reach this point in the file
|
||||
:param bool include_ending_keyword: provides the keyword we broke on if **True**
|
||||
|
||||
:returns: **list** with the lines until we find one of the keywords, this is
|
||||
a two value tuple with the ending keyword if include_ending_keyword is
|
||||
**True**
|
||||
"""
|
||||
|
||||
content = None if skip else []
|
||||
ending_keyword = None
|
||||
|
||||
if isinstance(keywords, (bytes, unicode)):
|
||||
keywords = (keywords,)
|
||||
|
||||
if ignore_first:
|
||||
first_line = descriptor_file.readline()
|
||||
|
||||
if content is not None and first_line is not None:
|
||||
content.append(first_line)
|
||||
|
||||
while True:
|
||||
last_position = descriptor_file.tell()
|
||||
|
||||
if end_position and last_position >= end_position:
|
||||
break
|
||||
|
||||
line = descriptor_file.readline()
|
||||
|
||||
if not line:
|
||||
break # EOF
|
||||
|
||||
line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line))
|
||||
|
||||
if not line_match:
|
||||
# no spaces or tabs in the line
|
||||
line_keyword = stem.util.str_tools._to_unicode(line.strip())
|
||||
else:
|
||||
line_keyword = line_match.groups()[0]
|
||||
|
||||
if line_keyword in keywords:
|
||||
ending_keyword = line_keyword
|
||||
|
||||
if not inclusive:
|
||||
descriptor_file.seek(last_position)
|
||||
elif content is not None:
|
||||
content.append(line)
|
||||
|
||||
break
|
||||
elif content is not None:
|
||||
content.append(line)
|
||||
|
||||
if include_ending_keyword:
|
||||
return (content, ending_keyword)
|
||||
else:
|
||||
return content
|
||||
|
||||
|
||||
def _get_pseudo_pgp_block(remaining_contents):
|
||||
"""
|
||||
Checks if given contents begins with a pseudo-Open-PGP-style block and, if
|
||||
so, pops it off and provides it back to the caller.
|
||||
|
||||
:param list remaining_contents: lines to be checked for a public key block
|
||||
|
||||
:returns: **str** with the armor wrapped contents or None if it doesn't exist
|
||||
|
||||
:raises: **ValueError** if the contents starts with a key block but it's
|
||||
malformed (for instance, if it lacks an ending line)
|
||||
"""
|
||||
|
||||
if not remaining_contents:
|
||||
return None # nothing left
|
||||
|
||||
block_match = PGP_BLOCK_START.match(remaining_contents[0])
|
||||
|
||||
if block_match:
|
||||
block_type = block_match.groups()[0]
|
||||
block_lines = []
|
||||
end_line = PGP_BLOCK_END % block_type
|
||||
|
||||
while True:
|
||||
if not remaining_contents:
|
||||
raise ValueError("Unterminated pgp style block (looking for '%s'):\n%s" % (end_line, "\n".join(block_lines)))
|
||||
|
||||
line = remaining_contents.pop(0)
|
||||
block_lines.append(line)
|
||||
|
||||
if line == end_line:
|
||||
return "\n".join(block_lines)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
|
||||
"""
|
||||
Initial breakup of the server descriptor contents to make parsing easier.
|
||||
|
||||
A descriptor contains a series of 'keyword lines' which are simply a keyword
|
||||
followed by an optional value. Lines can also be followed by a signature
|
||||
block.
|
||||
|
||||
To get a sub-listing with just certain keywords use extra_keywords. This can
|
||||
be useful if we care about their relative ordering with respect to each
|
||||
other. For instance, we care about the ordering of 'accept' and 'reject'
|
||||
entries because this influences the resulting exit policy, but for everything
|
||||
else in server descriptors the order does not matter.
|
||||
|
||||
:param str raw_contents: descriptor content provided by the relay
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
True, skips these checks otherwise
|
||||
:param list extra_keywords: entity keywords to put into a separate listing
|
||||
with ordering intact
|
||||
|
||||
:returns:
|
||||
**collections.OrderedDict** with the 'keyword => (value, pgp key) entries'
|
||||
mappings. If a extra_keywords was provided then this instead provides a two
|
||||
value tuple, the second being a list of those entries.
|
||||
"""
|
||||
|
||||
entries = OrderedDict()
|
||||
extra_entries = [] # entries with a keyword in extra_keywords
|
||||
remaining_lines = raw_contents.split("\n")
|
||||
|
||||
while remaining_lines:
|
||||
line = remaining_lines.pop(0)
|
||||
|
||||
# V2 network status documents explicitly can contain blank lines...
|
||||
#
|
||||
# "Implementations MAY insert blank lines for clarity between sections;
|
||||
# these blank lines are ignored."
|
||||
#
|
||||
# ... and server descriptors end with an extra newline. But other documents
|
||||
# don't say how blank lines should be handled so globally ignoring them.
|
||||
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Some lines have an 'opt ' for backward compatibility. They should be
|
||||
# ignored. This prefix is being removed in...
|
||||
# https://trac.torproject.org/projects/tor/ticket/5124
|
||||
|
||||
if line.startswith("opt "):
|
||||
line = line[4:]
|
||||
|
||||
line_match = KEYWORD_LINE.match(line)
|
||||
|
||||
if not line_match:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Line contains invalid characters: %s" % line)
|
||||
|
||||
keyword, value = line_match.groups()
|
||||
|
||||
if value is None:
|
||||
value = ''
|
||||
|
||||
try:
|
||||
block_contents = _get_pseudo_pgp_block(remaining_lines)
|
||||
except ValueError as exc:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise exc
|
||||
|
||||
if keyword in extra_keywords:
|
||||
extra_entries.append("%s %s" % (keyword, value))
|
||||
else:
|
||||
entries.setdefault(keyword, []).append((value, block_contents))
|
||||
|
||||
if extra_keywords:
|
||||
return entries, extra_entries
|
||||
else:
|
||||
return entries
|
||||
|
||||
# importing at the end to avoid circular dependencies on our Descriptor class
|
||||
|
||||
import stem.descriptor.server_descriptor
|
||||
import stem.descriptor.extrainfo_descriptor
|
||||
import stem.descriptor.networkstatus
|
||||
import stem.descriptor.microdescriptor
|
||||
import stem.descriptor.tordnsel
|
106
lib/stem/descriptor/export.py
Normal file
106
lib/stem/descriptor/export.py
Normal file
@ -0,0 +1,106 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Toolkit for exporting descriptors to other formats.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
export_csv - Exports descriptors to a CSV
|
||||
export_csv_file - Writes exported CSV output to a file
|
||||
"""
|
||||
|
||||
import cStringIO
|
||||
import csv
|
||||
|
||||
import stem.descriptor
|
||||
import stem.prereq
|
||||
|
||||
|
||||
class _ExportDialect(csv.excel):
|
||||
lineterminator = '\n'
|
||||
|
||||
|
||||
def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
|
||||
"""
|
||||
Provides a newline separated CSV for one or more descriptors. If simply
|
||||
provided with descriptors then the CSV contains all of its attributes,
|
||||
labeled with a header row. Either 'included_fields' or 'excluded_fields' can
|
||||
be used for more granular control over its attributes and the order.
|
||||
|
||||
:param Descriptor,list descriptors: either a
|
||||
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
|
||||
:param list included_fields: attributes to include in the csv
|
||||
:param list excluded_fields: attributes to exclude from the csv
|
||||
:param bool header: if **True** then the first line will be a comma separated
|
||||
list of the attribute names (**only supported in python 2.7 and higher**)
|
||||
|
||||
:returns: **str** of the CSV for the descriptors, one per line
|
||||
:raises: **ValueError** if descriptors contain more than one descriptor type
|
||||
"""
|
||||
|
||||
output_buffer = cStringIO.StringIO()
|
||||
export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
|
||||
return output_buffer.getvalue()
|
||||
|
||||
|
||||
def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
|
||||
"""
|
||||
Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
|
||||
written directly to a file.
|
||||
|
||||
:param file output_file: file to be written to
|
||||
:param Descriptor,list descriptors: either a
|
||||
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
|
||||
:param list included_fields: attributes to include in the csv
|
||||
:param list excluded_fields: attributes to exclude from the csv
|
||||
:param bool header: if **True** then the first line will be a comma separated
|
||||
list of the attribute names (**only supported in python 2.7 and higher**)
|
||||
|
||||
:returns: **str** of the CSV for the descriptors, one per line
|
||||
:raises: **ValueError** if descriptors contain more than one descriptor type
|
||||
"""
|
||||
|
||||
if isinstance(descriptors, stem.descriptor.Descriptor):
|
||||
descriptors = (descriptors,)
|
||||
|
||||
if not descriptors:
|
||||
return
|
||||
|
||||
descriptor_type = type(descriptors[0])
|
||||
descriptor_type_label = descriptor_type.__name__
|
||||
included_fields = list(included_fields)
|
||||
|
||||
# If the user didn't specify the fields to include then export everything,
|
||||
# ordered alphabetically. If they did specify fields then make sure that
|
||||
# they exist.
|
||||
|
||||
desc_attr = sorted(vars(descriptors[0]).keys())
|
||||
|
||||
if included_fields:
|
||||
for field in included_fields:
|
||||
if not field in desc_attr:
|
||||
raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr)))
|
||||
else:
|
||||
included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
|
||||
|
||||
for field in excluded_fields:
|
||||
try:
|
||||
included_fields.remove(field)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
|
||||
|
||||
if header and stem.prereq.is_python_27():
|
||||
writer.writeheader()
|
||||
|
||||
for desc in descriptors:
|
||||
if not isinstance(desc, stem.descriptor.Descriptor):
|
||||
raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__)
|
||||
elif descriptor_type != type(desc):
|
||||
raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc)))
|
||||
|
||||
writer.writerow(vars(desc))
|
940
lib/stem/descriptor/extrainfo_descriptor.py
Normal file
940
lib/stem/descriptor/extrainfo_descriptor.py
Normal file
@ -0,0 +1,940 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor extra-info descriptors. These are published by relays whenever
|
||||
their server descriptor is published and have a similar format. However, unlike
|
||||
server descriptors these don't contain information that Tor clients require to
|
||||
function and as such aren't fetched by default.
|
||||
|
||||
Defined in section 2.2 of the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_,
|
||||
extra-info descriptors contain interesting but non-vital information such as
|
||||
usage statistics. Tor clients cannot request these documents for bridges.
|
||||
|
||||
Extra-info descriptors are available from a few sources...
|
||||
|
||||
* if you have 'DownloadExtraInfo 1' in your torrc...
|
||||
|
||||
* control port via 'GETINFO extra-info/digest/\*' queries
|
||||
* the 'cached-extrainfo' file in tor's data directory
|
||||
|
||||
* tor metrics, at https://metrics.torproject.org/data.html
|
||||
* directory authorities and mirrors via their DirPort
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ExtraInfoDescriptor - Tor extra-info descriptor.
|
||||
| |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
|
||||
| +- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
|
||||
|
|
||||
|- digest - calculates the upper-case hex digest value for our content
|
||||
+- get_unrecognized_lines - lines with unrecognized content
|
||||
|
||||
.. data:: DirResponse (enum)
|
||||
|
||||
Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses.
|
||||
|
||||
=================== ===========
|
||||
DirResponse Description
|
||||
=================== ===========
|
||||
**OK** network status requests that were answered
|
||||
**NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities
|
||||
**UNAVAILABLE** requested network status was unavailable
|
||||
**NOT_FOUND** requested network status was not found
|
||||
**NOT_MODIFIED** network status unmodified since If-Modified-Since time
|
||||
**BUSY** directory was busy
|
||||
=================== ===========
|
||||
|
||||
.. data:: DirStat (enum)
|
||||
|
||||
Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and
|
||||
dir_*_tunneled_dl.
|
||||
|
||||
===================== ===========
|
||||
DirStat Description
|
||||
===================== ===========
|
||||
**COMPLETE** requests that completed successfully
|
||||
**TIMEOUT** requests that didn't complete within a ten minute timeout
|
||||
**RUNNING** requests still in process when measurement's taken
|
||||
**MIN** smallest rate at which a descriptor was downloaded in B/s
|
||||
**MAX** largest rate at which a descriptor was downloaded in B/s
|
||||
**D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s
|
||||
**Q1** and **Q3** rate of the slowest and fastest quarter download rates in B/s
|
||||
**MD** median download rate in B/s
|
||||
===================== ===========
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_read_until_keywords,
|
||||
_get_descriptor_components,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# known statuses for dirreq-v2-resp and dirreq-v3-resp...
|
||||
DirResponse = stem.util.enum.Enum(
|
||||
("OK", "ok"),
|
||||
("NOT_ENOUGH_SIGS", "not-enough-sigs"),
|
||||
("UNAVAILABLE", "unavailable"),
|
||||
("NOT_FOUND", "not-found"),
|
||||
("NOT_MODIFIED", "not-modified"),
|
||||
("BUSY", "busy"),
|
||||
)
|
||||
|
||||
# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
|
||||
dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
|
||||
dir_stats += ['d%i' % i for i in range(1, 5)]
|
||||
dir_stats += ['d%i' % i for i in range(6, 10)]
|
||||
DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])
|
||||
|
||||
# relay descriptors must have exactly one of the following
|
||||
REQUIRED_FIELDS = (
|
||||
"extra-info",
|
||||
"published",
|
||||
"router-signature",
|
||||
)
|
||||
|
||||
# optional entries that can appear at most once
|
||||
SINGLE_FIELDS = (
|
||||
"read-history",
|
||||
"write-history",
|
||||
"geoip-db-digest",
|
||||
"geoip6-db-digest",
|
||||
"bridge-stats-end",
|
||||
"bridge-ips",
|
||||
"dirreq-stats-end",
|
||||
"dirreq-v2-ips",
|
||||
"dirreq-v3-ips",
|
||||
"dirreq-v2-reqs",
|
||||
"dirreq-v3-reqs",
|
||||
"dirreq-v2-share",
|
||||
"dirreq-v3-share",
|
||||
"dirreq-v2-resp",
|
||||
"dirreq-v3-resp",
|
||||
"dirreq-v2-direct-dl",
|
||||
"dirreq-v3-direct-dl",
|
||||
"dirreq-v2-tunneled-dl",
|
||||
"dirreq-v3-tunneled-dl",
|
||||
"dirreq-read-history",
|
||||
"dirreq-write-history",
|
||||
"entry-stats-end",
|
||||
"entry-ips",
|
||||
"cell-stats-end",
|
||||
"cell-processed-cells",
|
||||
"cell-queued-cells",
|
||||
"cell-time-in-queue",
|
||||
"cell-circuits-per-decile",
|
||||
"conn-bi-direct",
|
||||
"exit-stats-end",
|
||||
"exit-kibibytes-written",
|
||||
"exit-kibibytes-read",
|
||||
"exit-streams-opened",
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over the extra-info descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool is_bridge: parses the file as being a bridge descriptor
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
extrainfo_content = _read_until_keywords("router-signature", descriptor_file)
|
||||
|
||||
# we've reached the 'router-signature', now include the pgp style block
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
|
||||
if extrainfo_content:
|
||||
if is_bridge:
|
||||
yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
|
||||
else:
|
||||
yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
def _parse_timestamp_and_interval(keyword, content):
|
||||
"""
|
||||
Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.
|
||||
|
||||
:param str keyword: line's keyword
|
||||
:param str content: line content to be parsed
|
||||
|
||||
:returns: **tuple** of the form (timestamp (**datetime**), interval
|
||||
(**int**), remaining content (**str**))
|
||||
|
||||
:raises: **ValueError** if the content is malformed
|
||||
"""
|
||||
|
||||
line = "%s %s" % (keyword, content)
|
||||
content_match = re.match("^(.*) \(([0-9]+) s\)( .*)?$", content)
|
||||
|
||||
if not content_match:
|
||||
raise ValueError("Malformed %s line: %s" % (keyword, line))
|
||||
|
||||
timestamp_str, interval, remainder = content_match.groups()
|
||||
|
||||
if remainder:
|
||||
remainder = remainder[1:] # remove leading space
|
||||
|
||||
if not interval.isdigit():
|
||||
raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))
|
||||
|
||||
try:
|
||||
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
|
||||
return timestamp, int(interval), remainder
|
||||
except ValueError:
|
||||
raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
|
||||
|
||||
|
||||
class ExtraInfoDescriptor(Descriptor):
|
||||
"""
|
||||
Extra-info descriptor document.
|
||||
|
||||
:var str nickname: **\*** relay's nickname
|
||||
:var str fingerprint: **\*** identity key fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses
|
||||
:var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses
|
||||
:var dict transport: **\*** mapping of transport methods to their (address,
|
||||
port, args) tuple, these usually appear on bridges in which case all of
|
||||
those are **None**
|
||||
|
||||
**Bi-directional connection usage:**
|
||||
|
||||
:var datetime conn_bi_direct_end: end of the sampling interval
|
||||
:var int conn_bi_direct_interval: seconds per interval
|
||||
:var int conn_bi_direct_below: connections that read/wrote less than 20 KiB
|
||||
:var int conn_bi_direct_read: connections that read at least 10x more than wrote
|
||||
:var int conn_bi_direct_write: connections that wrote at least 10x more than read
|
||||
:var int conn_bi_direct_both: remaining connections
|
||||
|
||||
**Bytes read/written for relayed traffic:**
|
||||
|
||||
:var datetime read_history_end: end of the sampling interval
|
||||
:var int read_history_interval: seconds per interval
|
||||
:var list read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime write_history_end: end of the sampling interval
|
||||
:var int write_history_interval: seconds per interval
|
||||
:var list write_history_values: bytes written during each interval
|
||||
|
||||
**Cell relaying statistics:**
|
||||
|
||||
:var datetime cell_stats_end: end of the period when stats were gathered
|
||||
:var int cell_stats_interval: length in seconds of the interval
|
||||
:var list cell_processed_cells: measurement of processed cells per circuit
|
||||
:var list cell_queued_cells: measurement of queued cells per circuit
|
||||
:var list cell_time_in_queue: mean enqueued time in milliseconds for cells
|
||||
:var int cell_circuits_per_decile: mean number of circuits in a decile
|
||||
|
||||
**Directory Mirror Attributes:**
|
||||
|
||||
:var datetime dir_stats_end: end of the period when stats were gathered
|
||||
:var int dir_stats_interval: length in seconds of the interval
|
||||
:var dict dir_v2_ips: mapping of locales to rounded count of requester ips
|
||||
:var dict dir_v3_ips: mapping of locales to rounded count of requester ips
|
||||
:var float dir_v2_share: percent of total directory traffic it expects to serve
|
||||
:var float dir_v3_share: percent of total directory traffic it expects to serve
|
||||
:var dict dir_v2_requests: mapping of locales to rounded count of requests
|
||||
:var dict dir_v3_requests: mapping of locales to rounded count of requests
|
||||
|
||||
:var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
|
||||
:var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
|
||||
:var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count
|
||||
:var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count
|
||||
|
||||
:var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
|
||||
:var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
|
||||
:var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
:var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
|
||||
:var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
|
||||
:var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
|
||||
:var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
:var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
|
||||
**Bytes read/written for directory mirroring:**
|
||||
|
||||
:var datetime dir_read_history_end: end of the sampling interval
|
||||
:var int dir_read_history_interval: seconds per interval
|
||||
:var list dir_read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime dir_write_history_end: end of the sampling interval
|
||||
:var int dir_write_history_interval: seconds per interval
|
||||
:var list dir_write_history_values: bytes read during each interval
|
||||
|
||||
**Guard Attributes:**
|
||||
|
||||
:var datetime entry_stats_end: end of the period when stats were gathered
|
||||
:var int entry_stats_interval: length in seconds of the interval
|
||||
:var dict entry_ips: mapping of locales to rounded count of unique user ips
|
||||
|
||||
**Exit Attributes:**
|
||||
|
||||
:var datetime exit_stats_end: end of the period when stats were gathered
|
||||
:var int exit_stats_interval: length in seconds of the interval
|
||||
:var dict exit_kibibytes_written: traffic per port (keys are ints or 'other')
|
||||
:var dict exit_kibibytes_read: traffic per port (keys are ints or 'other')
|
||||
:var dict exit_streams_opened: streams per port (keys are ints or 'other')
|
||||
|
||||
**Bridge Attributes:**
|
||||
|
||||
:var datetime bridge_stats_end: end of the period when stats were gathered
|
||||
:var int bridge_stats_interval: length in seconds of the interval
|
||||
:var dict bridge_ips: mapping of locales to rounded count of unique user ips
|
||||
:var datetime geoip_start_time: replaced by bridge_stats_end (deprecated)
|
||||
:var dict geoip_client_origins: replaced by bridge_ips (deprecated)
|
||||
:var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
|
||||
:var dict ip_versions: mapping of ip transports to a count for the number of users
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True):
|
||||
"""
|
||||
Extra-info descriptor constructor. By default this validates the
|
||||
descriptor's content as it's parsed. This validation can be disabled to
|
||||
either improve performance or be accepting of malformed data.
|
||||
|
||||
:param str raw_contents: extra-info content provided by the relay
|
||||
:param bool validate: checks the validity of the extra-info descriptor if
|
||||
**True**, skips these checks otherwise
|
||||
|
||||
:raises: **ValueError** if the contents is malformed and validate is True
|
||||
"""
|
||||
|
||||
super(ExtraInfoDescriptor, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
self.nickname = None
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.geoip_db_digest = None
|
||||
self.geoip6_db_digest = None
|
||||
self.transport = {}
|
||||
|
||||
self.conn_bi_direct_end = None
|
||||
self.conn_bi_direct_interval = None
|
||||
self.conn_bi_direct_below = None
|
||||
self.conn_bi_direct_read = None
|
||||
self.conn_bi_direct_write = None
|
||||
self.conn_bi_direct_both = None
|
||||
|
||||
self.read_history_end = None
|
||||
self.read_history_interval = None
|
||||
self.read_history_values = None
|
||||
|
||||
self.write_history_end = None
|
||||
self.write_history_interval = None
|
||||
self.write_history_values = None
|
||||
|
||||
self.cell_stats_end = None
|
||||
self.cell_stats_interval = None
|
||||
self.cell_processed_cells = None
|
||||
self.cell_queued_cells = None
|
||||
self.cell_time_in_queue = None
|
||||
self.cell_circuits_per_decile = None
|
||||
|
||||
self.dir_stats_end = None
|
||||
self.dir_stats_interval = None
|
||||
self.dir_v2_ips = None
|
||||
self.dir_v3_ips = None
|
||||
self.dir_v2_share = None
|
||||
self.dir_v3_share = None
|
||||
self.dir_v2_requests = None
|
||||
self.dir_v3_requests = None
|
||||
self.dir_v2_responses = None
|
||||
self.dir_v3_responses = None
|
||||
self.dir_v2_responses_unknown = None
|
||||
self.dir_v3_responses_unknown = None
|
||||
self.dir_v2_direct_dl = None
|
||||
self.dir_v3_direct_dl = None
|
||||
self.dir_v2_direct_dl_unknown = None
|
||||
self.dir_v3_direct_dl_unknown = None
|
||||
self.dir_v2_tunneled_dl = None
|
||||
self.dir_v3_tunneled_dl = None
|
||||
self.dir_v2_tunneled_dl_unknown = None
|
||||
self.dir_v3_tunneled_dl_unknown = None
|
||||
|
||||
self.dir_read_history_end = None
|
||||
self.dir_read_history_interval = None
|
||||
self.dir_read_history_values = None
|
||||
|
||||
self.dir_write_history_end = None
|
||||
self.dir_write_history_interval = None
|
||||
self.dir_write_history_values = None
|
||||
|
||||
self.entry_stats_end = None
|
||||
self.entry_stats_interval = None
|
||||
self.entry_ips = None
|
||||
|
||||
self.exit_stats_end = None
|
||||
self.exit_stats_interval = None
|
||||
self.exit_kibibytes_written = None
|
||||
self.exit_kibibytes_read = None
|
||||
self.exit_streams_opened = None
|
||||
|
||||
self.bridge_stats_end = None
|
||||
self.bridge_stats_interval = None
|
||||
self.bridge_ips = None
|
||||
self.geoip_start_time = None
|
||||
self.geoip_client_origins = None
|
||||
|
||||
self.ip_versions = None
|
||||
self.ip_transports = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
if validate:
|
||||
for keyword in self._required_fields():
|
||||
if not keyword in entries:
|
||||
raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in self._required_fields() + SINGLE_FIELDS:
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword)
|
||||
|
||||
expected_first_keyword = self._first_keyword()
|
||||
if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
|
||||
raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword)
|
||||
|
||||
expected_last_keyword = self._last_keyword()
|
||||
if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
|
||||
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if True
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
# most just work with the first (and only) value
|
||||
value, _ = values[0]
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if keyword == "extra-info":
|
||||
# "extra-info" Nickname Fingerprint
|
||||
extra_info_comp = value.split()
|
||||
|
||||
if len(extra_info_comp) < 2:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Extra-info line must have two values: %s" % line)
|
||||
|
||||
if validate:
|
||||
if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
|
||||
raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
|
||||
elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
|
||||
raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % extra_info_comp[1])
|
||||
|
||||
self.nickname = extra_info_comp[0]
|
||||
self.fingerprint = extra_info_comp[1]
|
||||
elif keyword == "geoip-db-digest":
|
||||
# "geoip-db-digest" Digest
|
||||
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self.geoip_db_digest = value
|
||||
elif keyword == "geoip6-db-digest":
|
||||
# "geoip6-db-digest" Digest
|
||||
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Geoip v6 digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self.geoip6_db_digest = value
|
||||
elif keyword == "transport":
|
||||
# "transport" transportname address:port [arglist]
|
||||
# Everything after the transportname is scrubbed in published bridge
|
||||
# descriptors, so we'll never see it in practice.
|
||||
#
|
||||
# These entries really only make sense for bridges, but have been seen
|
||||
# on non-bridges in the wild when the relay operator configured it this
|
||||
# way.
|
||||
|
||||
for transport_value, _ in values:
|
||||
name, address, port, args = None, None, None, None
|
||||
|
||||
if not ' ' in transport_value:
|
||||
# scrubbed
|
||||
name = transport_value
|
||||
else:
|
||||
# not scrubbed
|
||||
value_comp = transport_value.split()
|
||||
|
||||
if len(value_comp) < 1:
|
||||
raise ValueError("Transport line is missing its transport name: %s" % line)
|
||||
else:
|
||||
name = value_comp[0]
|
||||
|
||||
if len(value_comp) < 2:
|
||||
raise ValueError("Transport line is missing its address:port value: %s" % line)
|
||||
elif not ":" in value_comp[1]:
|
||||
raise ValueError("Transport line's address:port entry is missing a colon: %s" % line)
|
||||
else:
|
||||
address, port_str = value_comp[1].split(':', 1)
|
||||
|
||||
if not stem.util.connection.is_valid_ipv4_address(address) or \
|
||||
stem.util.connection.is_valid_ipv6_address(address):
|
||||
raise ValueError("Transport line has a malformed address: %s" % line)
|
||||
elif not stem.util.connection.is_valid_port(port_str):
|
||||
raise ValueError("Transport line has a malformed port: %s" % line)
|
||||
|
||||
port = int(port_str)
|
||||
|
||||
if len(value_comp) >= 3:
|
||||
args = value_comp[2:]
|
||||
else:
|
||||
args = []
|
||||
|
||||
self.transport[name] = (address, port, args)
|
||||
elif keyword == "cell-circuits-per-decile":
|
||||
# "cell-circuits-per-decile" num
|
||||
|
||||
if not value.isdigit():
|
||||
if validate:
|
||||
raise ValueError("Non-numeric cell-circuits-per-decile value: %s" % line)
|
||||
else:
|
||||
continue
|
||||
|
||||
stat = int(value)
|
||||
|
||||
if validate and stat < 0:
|
||||
raise ValueError("Negative cell-circuits-per-decile value: %s" % line)
|
||||
|
||||
self.cell_circuits_per_decile = stat
|
||||
elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"):
|
||||
recognized_counts = {}
|
||||
unrecognized_counts = {}
|
||||
|
||||
is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp")
|
||||
key_set = DirResponse if is_response_stats else DirStat
|
||||
|
||||
key_type = "STATUS" if is_response_stats else "STAT"
|
||||
error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line)
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
if not "=" in entry:
|
||||
if validate:
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
continue
|
||||
|
||||
status, count = entry.split("=", 1)
|
||||
|
||||
if count.isdigit():
|
||||
if status in key_set:
|
||||
recognized_counts[status] = int(count)
|
||||
else:
|
||||
unrecognized_counts[status] = int(count)
|
||||
elif validate:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if keyword == "dirreq-v2-resp":
|
||||
self.dir_v2_responses = recognized_counts
|
||||
self.dir_v2_responses_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v3-resp":
|
||||
self.dir_v3_responses = recognized_counts
|
||||
self.dir_v3_responses_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v2-direct-dl":
|
||||
self.dir_v2_direct_dl = recognized_counts
|
||||
self.dir_v2_direct_dl_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v3-direct-dl":
|
||||
self.dir_v3_direct_dl = recognized_counts
|
||||
self.dir_v3_direct_dl_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v2-tunneled-dl":
|
||||
self.dir_v2_tunneled_dl = recognized_counts
|
||||
self.dir_v2_tunneled_dl_unknown = unrecognized_counts
|
||||
elif keyword == "dirreq-v3-tunneled-dl":
|
||||
self.dir_v3_tunneled_dl = recognized_counts
|
||||
self.dir_v3_tunneled_dl_unknown = unrecognized_counts
|
||||
elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
|
||||
# "<keyword>" num%
|
||||
|
||||
try:
|
||||
if not value.endswith("%"):
|
||||
raise ValueError()
|
||||
|
||||
percentage = float(value[:-1]) / 100
|
||||
|
||||
# Bug lets these be above 100%, however they're soon going away...
|
||||
# https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html
|
||||
|
||||
if validate and percentage < 0:
|
||||
raise ValueError("Negative percentage value: %s" % line)
|
||||
|
||||
if keyword == "dirreq-v2-share":
|
||||
self.dir_v2_share = percentage
|
||||
elif keyword == "dirreq-v3-share":
|
||||
self.dir_v3_share = percentage
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise ValueError("Value can't be parsed as a percentage: %s" % line)
|
||||
elif keyword in ("cell-processed-cells", "cell-queued-cells", "cell-time-in-queue"):
|
||||
# "<keyword>" num,...,num
|
||||
|
||||
entries = []
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
try:
|
||||
# Values should be positive but as discussed in ticket #5849
|
||||
# there was a bug around this. It was fixed in tor 0.2.2.1.
|
||||
|
||||
entries.append(float(entry))
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Non-numeric entry in %s listing: %s" % (keyword, line))
|
||||
|
||||
if keyword == "cell-processed-cells":
|
||||
self.cell_processed_cells = entries
|
||||
elif keyword == "cell-queued-cells":
|
||||
self.cell_queued_cells = entries
|
||||
elif keyword == "cell-time-in-queue":
|
||||
self.cell_time_in_queue = entries
|
||||
elif keyword in ("published", "geoip-start-time"):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS
|
||||
|
||||
try:
|
||||
timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if keyword == "published":
|
||||
self.published = timestamp
|
||||
elif keyword == "geoip-start-time":
|
||||
self.geoip_start_time = timestamp
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line))
|
||||
elif keyword in ("cell-stats-end", "entry-stats-end", "exit-stats-end", "bridge-stats-end", "dirreq-stats-end"):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
|
||||
|
||||
try:
|
||||
timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)
|
||||
|
||||
if keyword == "cell-stats-end":
|
||||
self.cell_stats_end = timestamp
|
||||
self.cell_stats_interval = interval
|
||||
elif keyword == "entry-stats-end":
|
||||
self.entry_stats_end = timestamp
|
||||
self.entry_stats_interval = interval
|
||||
elif keyword == "exit-stats-end":
|
||||
self.exit_stats_end = timestamp
|
||||
self.exit_stats_interval = interval
|
||||
elif keyword == "bridge-stats-end":
|
||||
self.bridge_stats_end = timestamp
|
||||
self.bridge_stats_interval = interval
|
||||
elif keyword == "dirreq-stats-end":
|
||||
self.dir_stats_end = timestamp
|
||||
self.dir_stats_interval = interval
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
elif keyword == "conn-bi-direct":
|
||||
# "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH
|
||||
|
||||
try:
|
||||
timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
|
||||
stats = remainder.split(",")
|
||||
|
||||
if len(stats) != 4 or not \
|
||||
(stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
|
||||
raise ValueError("conn-bi-direct line should end with four numeric values: %s" % line)
|
||||
|
||||
self.conn_bi_direct_end = timestamp
|
||||
self.conn_bi_direct_interval = interval
|
||||
self.conn_bi_direct_below = int(stats[0])
|
||||
self.conn_bi_direct_read = int(stats[1])
|
||||
self.conn_bi_direct_write = int(stats[2])
|
||||
self.conn_bi_direct_both = int(stats[3])
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
|
||||
try:
|
||||
timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
|
||||
history_values = []
|
||||
|
||||
if remainder:
|
||||
try:
|
||||
history_values = [int(entry) for entry in remainder.split(",")]
|
||||
except ValueError:
|
||||
raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
|
||||
|
||||
if keyword == "read-history":
|
||||
self.read_history_end = timestamp
|
||||
self.read_history_interval = interval
|
||||
self.read_history_values = history_values
|
||||
elif keyword == "write-history":
|
||||
self.write_history_end = timestamp
|
||||
self.write_history_interval = interval
|
||||
self.write_history_values = history_values
|
||||
elif keyword == "dirreq-read-history":
|
||||
self.dir_read_history_end = timestamp
|
||||
self.dir_read_history_interval = interval
|
||||
self.dir_read_history_values = history_values
|
||||
elif keyword == "dirreq-write-history":
|
||||
self.dir_write_history_end = timestamp
|
||||
self.dir_write_history_interval = interval
|
||||
self.dir_write_history_values = history_values
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
elif keyword in ("exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened"):
|
||||
# "<keyword>" port=N,port=N,...
|
||||
|
||||
port_mappings = {}
|
||||
error_msg = "Entries in %s line should only be PORT=N entries: %s" % (keyword, line)
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
if not "=" in entry:
|
||||
if validate:
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
continue
|
||||
|
||||
port, stat = entry.split("=", 1)
|
||||
|
||||
if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
|
||||
if port != 'other':
|
||||
port = int(port)
|
||||
port_mappings[port] = int(stat)
|
||||
elif validate:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if keyword == "exit-kibibytes-written":
|
||||
self.exit_kibibytes_written = port_mappings
|
||||
elif keyword == "exit-kibibytes-read":
|
||||
self.exit_kibibytes_read = port_mappings
|
||||
elif keyword == "exit-streams-opened":
|
||||
self.exit_streams_opened = port_mappings
|
||||
elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "entry-ips", "bridge-ips"):
|
||||
# "<keyword>" CC=N,CC=N,...
|
||||
#
|
||||
# The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
|
||||
# locale codes for some special values, for instance...
|
||||
# A1,"Anonymous Proxy"
|
||||
# A2,"Satellite Provider"
|
||||
# ??,"Unknown"
|
||||
|
||||
locale_usage = {}
|
||||
error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)
|
||||
|
||||
if value:
|
||||
for entry in value.split(","):
|
||||
if not "=" in entry:
|
||||
if validate:
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
continue
|
||||
|
||||
locale, count = entry.split("=", 1)
|
||||
|
||||
if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit():
|
||||
locale_usage[locale] = int(count)
|
||||
elif validate:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if keyword == "dirreq-v2-ips":
|
||||
self.dir_v2_ips = locale_usage
|
||||
elif keyword == "dirreq-v3-ips":
|
||||
self.dir_v3_ips = locale_usage
|
||||
elif keyword == "dirreq-v2-reqs":
|
||||
self.dir_v2_requests = locale_usage
|
||||
elif keyword == "dirreq-v3-reqs":
|
||||
self.dir_v3_requests = locale_usage
|
||||
elif keyword == "geoip-client-origins":
|
||||
self.geoip_client_origins = locale_usage
|
||||
elif keyword == "entry-ips":
|
||||
self.entry_ips = locale_usage
|
||||
elif keyword == "bridge-ips":
|
||||
self.bridge_ips = locale_usage
|
||||
elif keyword == "bridge-ip-versions":
|
||||
self.ip_versions = {}
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if not '=' in entry:
|
||||
raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, line))
|
||||
|
||||
self.ip_versions[protocol] = int(count)
|
||||
elif keyword == "bridge-ip-transports":
|
||||
self.ip_transports = {}
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if not '=' in entry:
|
||||
raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError("Transport count was non-numeric (%s): %s" % (count, line))
|
||||
|
||||
self.ip_transports[protocol] = int(count)
|
||||
else:
|
||||
self._unrecognized_lines.append(line)
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the upper-case hex encoded sha1 of our content. This value is part
|
||||
of the server descriptor entry for this relay.
|
||||
|
||||
:returns: **str** with the upper-case hex digest value for this server
|
||||
descriptor
|
||||
"""
|
||||
|
||||
raise NotImplementedError("Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass")
|
||||
|
||||
def _required_fields(self):
|
||||
return REQUIRED_FIELDS
|
||||
|
||||
def _first_keyword(self):
|
||||
return "extra-info"
|
||||
|
||||
def _last_keyword(self):
|
||||
return "router-signature"
|
||||
|
||||
|
||||
class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
|
||||
"""
|
||||
Relay extra-info descriptor, constructed from data such as that provided by
|
||||
"GETINFO extra-info/digest/\*", cached descriptors, and metrics
|
||||
(`specification <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
|
||||
|
||||
:var str signature: **\*** signature for this extrainfo descriptor
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True):
|
||||
self.signature = None
|
||||
|
||||
super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate)
|
||||
|
||||
@lru_cache()
|
||||
def digest(self):
|
||||
# our digest is calculated from everything except our signature
|
||||
raw_content, ending = str(self), "\nrouter-signature\n"
|
||||
raw_content = raw_content[:raw_content.find(ending) + len(ending)]
|
||||
return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries) # shallow copy since we're destructive
|
||||
|
||||
# handles fields only in server descriptors
|
||||
for keyword, values in entries.items():
|
||||
value, block_contents = values[0]
|
||||
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if block_contents:
|
||||
line += "\n%s" % block_contents
|
||||
|
||||
if keyword == "router-signature":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Router signature line must be followed by a signature block: %s" % line)
|
||||
|
||||
self.signature = block_contents
|
||||
del entries["router-signature"]
|
||||
|
||||
ExtraInfoDescriptor._parse(self, entries, validate)
|
||||
|
||||
|
||||
class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
|
||||
"""
|
||||
Bridge extra-info descriptor (`bridge descriptor specification
|
||||
<https://metrics.torproject.org/formats.html#bridgedesc>`_)
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True):
|
||||
self._digest = None
|
||||
|
||||
super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate)
|
||||
|
||||
def digest(self):
|
||||
return self._digest
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries) # shallow copy since we're destructive
|
||||
|
||||
# handles fields only in server descriptors
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if keyword == "router-digest":
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self._digest = value
|
||||
del entries["router-digest"]
|
||||
|
||||
ExtraInfoDescriptor._parse(self, entries, validate)
|
||||
|
||||
def _required_fields(self):
|
||||
excluded_fields = [
|
||||
"router-signature",
|
||||
]
|
||||
|
||||
included_fields = [
|
||||
"router-digest",
|
||||
]
|
||||
|
||||
return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])
|
||||
|
||||
def _last_keyword(self):
|
||||
return None
|
309
lib/stem/descriptor/microdescriptor.py
Normal file
309
lib/stem/descriptor/microdescriptor.py
Normal file
@ -0,0 +1,309 @@
|
||||
# Copyright 2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor microdescriptors, which contain a distilled version of a
|
||||
relay's server descriptor. As of Tor version 0.2.3.3-alpha Tor no longer
|
||||
downloads server descriptors by default, opting for microdescriptors instead.
|
||||
|
||||
Unlike most descriptor documents these aren't available on the metrics site
|
||||
(since they don't contain any information that the server descriptors don't).
|
||||
|
||||
The limited information in microdescriptors make them rather clunky to use
|
||||
compared with server descriptors. For instance microdescriptors lack the
|
||||
relay's fingerprint, making it difficut to use them to look up the relay's
|
||||
other descriptors.
|
||||
|
||||
To do so you need to match the microdescriptor's digest against its
|
||||
corresponding router status entry. For added fun as of this writing the
|
||||
controller doesn't even surface those router status entries
|
||||
(:trac:`7953`).
|
||||
|
||||
For instance, here's an example that prints the nickname and fignerprints of
|
||||
the exit relays.
|
||||
|
||||
::
|
||||
|
||||
import os
|
||||
|
||||
from stem.control import Controller
|
||||
from stem.descriptor import parse_file
|
||||
|
||||
with Controller.from_port(port = 9051) as controller:
|
||||
controller.authenticate()
|
||||
|
||||
exit_digests = set()
|
||||
data_dir = controller.get_conf("DataDirectory")
|
||||
|
||||
for desc in controller.get_microdescriptors():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
exit_digests.add(desc.digest)
|
||||
|
||||
print "Exit Relays:"
|
||||
|
||||
for desc in parse_file(os.path.join(data_dir, 'cached-microdesc-consensus')):
|
||||
if desc.digest in exit_digests:
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
Doing the same is trivial with server descriptors...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor import parse_file
|
||||
|
||||
print "Exit Relays:"
|
||||
|
||||
for desc in parse_file("/home/atagar/.tor/cached-descriptors"):
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
Microdescriptor - Tor microdescriptor.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
|
||||
import stem.descriptor.router_status_entry
|
||||
import stem.exit_policy
|
||||
|
||||
from stem.descriptor import (
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
REQUIRED_FIELDS = (
|
||||
"onion-key",
|
||||
)
|
||||
|
||||
SINGLE_FIELDS = (
|
||||
"onion-key",
|
||||
"ntor-onion-key",
|
||||
"family",
|
||||
"p",
|
||||
"p6",
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over the microdescriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for Microdescriptor instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
annotations = _read_until_keywords("onion-key", descriptor_file)
|
||||
|
||||
# read until we reach an annotation or onion-key line
|
||||
descriptor_lines = []
|
||||
|
||||
# read the onion-key line, done if we're at the end of the document
|
||||
|
||||
onion_key_line = descriptor_file.readline()
|
||||
|
||||
if onion_key_line:
|
||||
descriptor_lines.append(onion_key_line)
|
||||
else:
|
||||
break
|
||||
|
||||
while True:
|
||||
last_position = descriptor_file.tell()
|
||||
line = descriptor_file.readline()
|
||||
|
||||
if not line:
|
||||
break # EOF
|
||||
elif line.startswith(b"@") or line.startswith(b"onion-key"):
|
||||
descriptor_file.seek(last_position)
|
||||
break
|
||||
else:
|
||||
descriptor_lines.append(line)
|
||||
|
||||
if descriptor_lines:
|
||||
# strip newlines from annotations
|
||||
annotations = map(bytes.strip, annotations)
|
||||
|
||||
descriptor_text = bytes.join(b"", descriptor_lines)
|
||||
|
||||
yield Microdescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
break # done parsing descriptors
|
||||
|
||||
|
||||
class Microdescriptor(Descriptor):
|
||||
"""
|
||||
Microdescriptor (`descriptor specification
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
|
||||
|
||||
:var str digest: **\*** hex digest for this microdescriptor, this can be used
|
||||
to match against the corresponding digest attribute of a
|
||||
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3`
|
||||
:var str onion_key: **\*** key used to encrypt EXTEND cells
|
||||
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
|
||||
:var list or_addresses: **\*** alternative for our address/or_port attributes, each
|
||||
entry is a tuple of the form (address (**str**), port (**int**), is_ipv6
|
||||
(**bool**))
|
||||
:var list family: **\*** nicknames or fingerprints of declared family
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy: **\*** relay's exit policy
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
super(Microdescriptor, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
|
||||
|
||||
self.onion_key = None
|
||||
self.ntor_onion_key = None
|
||||
self.or_addresses = []
|
||||
self.family = []
|
||||
self.exit_policy = stem.exit_policy.MicroExitPolicy("reject 1-65535")
|
||||
self.exit_policy_v6 = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
self._annotation_lines = annotations if annotations else []
|
||||
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
self._parse(entries, validate)
|
||||
|
||||
if validate:
|
||||
self._check_constraints(entries)
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
@lru_cache()
|
||||
def get_annotations(self):
|
||||
"""
|
||||
Provides content that appeared prior to the descriptor. If this comes from
|
||||
the cached-microdescs then this commonly contains content like...
|
||||
|
||||
::
|
||||
|
||||
@last-listed 2013-02-24 00:18:30
|
||||
|
||||
:returns: **dict** with the key/value pairs in our annotations
|
||||
"""
|
||||
|
||||
annotation_dict = {}
|
||||
|
||||
for line in self._annotation_lines:
|
||||
if b" " in line:
|
||||
key, value = line.split(b" ", 1)
|
||||
annotation_dict[key] = value
|
||||
else:
|
||||
annotation_dict[line] = None
|
||||
|
||||
return annotation_dict
|
||||
|
||||
def get_annotation_lines(self):
|
||||
"""
|
||||
Provides the lines of content that appeared prior to the descriptor. This
|
||||
is the same as the
|
||||
:func:`~stem.descriptor.microdescriptor.Microdescriptor.get_annotations`
|
||||
results, but with the unparsed lines and ordering retained.
|
||||
|
||||
:returns: **list** with the lines of annotation that came before this descriptor
|
||||
"""
|
||||
|
||||
return self._annotation_lines
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if **True**
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
# most just work with the first (and only) value
|
||||
value, block_contents = values[0]
|
||||
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if block_contents:
|
||||
line += "\n%s" % block_contents
|
||||
|
||||
if keyword == "onion-key":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Onion key line must be followed by a public key: %s" % line)
|
||||
|
||||
self.onion_key = block_contents
|
||||
elif keyword == "ntor-onion-key":
|
||||
self.ntor_onion_key = value
|
||||
elif keyword == "a":
|
||||
for entry, _ in values:
|
||||
stem.descriptor.router_status_entry._parse_a_line(self, entry, validate)
|
||||
elif keyword == "family":
|
||||
self.family = value.split(" ")
|
||||
elif keyword == "p":
|
||||
stem.descriptor.router_status_entry._parse_p_line(self, value, validate)
|
||||
elif keyword == "p6":
|
||||
self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
|
||||
else:
|
||||
self._unrecognized_lines.append(line)
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in REQUIRED_FIELDS:
|
||||
if not keyword in entries:
|
||||
raise ValueError("Microdescriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in SINGLE_FIELDS:
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a microdescriptor" % keyword)
|
||||
|
||||
if "onion-key" != entries.keys()[0]:
|
||||
raise ValueError("Microdescriptor must start with a 'onion-key' entry")
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, Microdescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
1475
lib/stem/descriptor/networkstatus.py
Normal file
1475
lib/stem/descriptor/networkstatus.py
Normal file
File diff suppressed because it is too large
Load Diff
580
lib/stem/descriptor/reader.py
Normal file
580
lib/stem/descriptor/reader.py
Normal file
@ -0,0 +1,580 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utilities for reading descriptors from local directories and archives. This is
|
||||
mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
|
||||
class, which is an iterator for the descriptor data in a series of
|
||||
destinations. For example...
|
||||
|
||||
::
|
||||
|
||||
my_descriptors = [
|
||||
"/tmp/server-descriptors-2012-03.tar.bz2",
|
||||
"/tmp/archived_descriptors/",
|
||||
]
|
||||
|
||||
# prints the contents of all the descriptor files
|
||||
with DescriptorReader(my_descriptors) as reader:
|
||||
for descriptor in reader:
|
||||
print descriptor
|
||||
|
||||
This ignores files that cannot be processed due to read errors or unparsable
|
||||
content. To be notified of skipped files you can register a listener with
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
|
||||
|
||||
The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
|
||||
modified timestamps for descriptor files that it has read so it can skip
|
||||
unchanged files if run again. This listing of processed files can also be
|
||||
persisted and applied to other
|
||||
:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
|
||||
following prints descriptors as they're changed over the course of a minute,
|
||||
and picks up where it left off if run again...
|
||||
|
||||
::
|
||||
|
||||
reader = DescriptorReader(["/tmp/descriptor_data"])
|
||||
|
||||
try:
|
||||
processed_files = load_processed_files("/tmp/used_descriptors")
|
||||
reader.set_processed_files(processed_files)
|
||||
except: pass # could not load, maybe this is the first run
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
while (time.time() - start_time) < 60:
|
||||
# prints any descriptors that have changed since last checked
|
||||
with reader:
|
||||
for descriptor in reader:
|
||||
print descriptor
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
save_processed_files("/tmp/used_descriptors", reader.get_processed_files())
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
load_processed_files - Loads a listing of processed files
|
||||
save_processed_files - Saves a listing of processed files
|
||||
|
||||
DescriptorReader - Iterator for descriptor data on the local file system
|
||||
|- get_processed_files - provides the listing of files that we've processed
|
||||
|- set_processed_files - sets our tracking of the files we have processed
|
||||
|- register_read_listener - adds a listener for when files are read
|
||||
|- register_skip_listener - adds a listener that's notified of skipped files
|
||||
|- start - begins reading descriptor data
|
||||
|- stop - stops reading descriptor data
|
||||
|- __enter__ / __exit__ - manages the descriptor reader thread in the context
|
||||
+- __iter__ - iterates over descriptor data in unread files
|
||||
|
||||
FileSkipped - Base exception for a file that was skipped
|
||||
|- AlreadyRead - We've already read a file with this last modified timestamp
|
||||
|- ParsingFailure - Contents can't be parsed as descriptor data
|
||||
|- UnrecognizedType - File extension indicates non-descriptor data
|
||||
+- ReadFailed - Wraps an error that was raised while reading the file
|
||||
+- FileMissing - File does not exist
|
||||
"""
|
||||
|
||||
import mimetypes
|
||||
import os
|
||||
import Queue
|
||||
import tarfile
|
||||
import threading
|
||||
|
||||
import stem.descriptor
|
||||
import stem.prereq
|
||||
|
||||
# flag to indicate when the reader thread is out of descriptor files to read
|
||||
FINISHED = "DONE"
|
||||
|
||||
|
||||
class FileSkipped(Exception):
|
||||
"Base error when we can't provide descriptor data from a file."
|
||||
|
||||
|
||||
class AlreadyRead(FileSkipped):
|
||||
"""
|
||||
Already read a file with this 'last modified' timestamp or later.
|
||||
|
||||
:param int last_modified: unix timestamp for when the file was last modified
|
||||
:param int last_modified_when_read: unix timestamp for the modification time
|
||||
when we last read this file
|
||||
"""
|
||||
|
||||
def __init__(self, last_modified, last_modified_when_read):
|
||||
super(AlreadyRead, self).__init__("File has already been read since it was last modified. modification time: %s, last read: %s" % (last_modified, last_modified_when_read))
|
||||
self.last_modified = last_modified
|
||||
self.last_modified_when_read = last_modified_when_read
|
||||
|
||||
|
||||
class ParsingFailure(FileSkipped):
|
||||
"""
|
||||
File contents could not be parsed as descriptor data.
|
||||
|
||||
:param ValueError exception: issue that arose when parsing
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_exception):
|
||||
super(ParsingFailure, self).__init__(parsing_exception)
|
||||
self.exception = parsing_exception
|
||||
|
||||
|
||||
class UnrecognizedType(FileSkipped):
|
||||
"""
|
||||
File doesn't contain descriptor data. This could either be due to its file
|
||||
type or because it doesn't conform to a recognizable descriptor type.
|
||||
|
||||
:param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
|
||||
"""
|
||||
|
||||
def __init__(self, mime_type):
|
||||
super(UnrecognizedType, self).__init__("Unrecognized mime type: %s (%s)" % mime_type)
|
||||
self.mime_type = mime_type
|
||||
|
||||
|
||||
class ReadFailed(FileSkipped):
|
||||
"""
|
||||
An IOError occurred while trying to read the file.
|
||||
|
||||
:param IOError exception: issue that arose when reading the file, **None** if
|
||||
this arose due to the file not being present
|
||||
"""
|
||||
|
||||
def __init__(self, read_exception):
|
||||
super(ReadFailed, self).__init__(read_exception)
|
||||
self.exception = read_exception
|
||||
|
||||
|
||||
class FileMissing(ReadFailed):
|
||||
"File does not exist."
|
||||
|
||||
def __init__(self):
|
||||
super(FileMissing, self).__init__("File does not exist")
|
||||
|
||||
|
||||
def load_processed_files(path):
|
||||
"""
|
||||
Loads a dictionary of 'path => last modified timestamp' mappings, as
|
||||
persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
|
||||
file.
|
||||
|
||||
:param str path: location to load the processed files dictionary from
|
||||
|
||||
:returns: **dict** of 'path (**str**) => last modified unix timestamp
|
||||
(**int**)' mappings
|
||||
|
||||
:raises:
|
||||
* **IOError** if unable to read the file
|
||||
* **TypeError** if unable to parse the file's contents
|
||||
"""
|
||||
|
||||
processed_files = {}
|
||||
|
||||
with open(path) as input_file:
|
||||
for line in input_file.readlines():
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
continue # skip blank lines
|
||||
|
||||
if not " " in line:
|
||||
raise TypeError("Malformed line: %s" % line)
|
||||
|
||||
path, timestamp = line.rsplit(" ", 1)
|
||||
|
||||
if not os.path.isabs(path):
|
||||
raise TypeError("'%s' is not an absolute path" % path)
|
||||
elif not timestamp.isdigit():
|
||||
raise TypeError("'%s' is not an integer timestamp" % timestamp)
|
||||
|
||||
processed_files[path] = int(timestamp)
|
||||
|
||||
return processed_files
|
||||
|
||||
|
||||
def save_processed_files(path, processed_files):
|
||||
"""
|
||||
Persists a dictionary of 'path => last modified timestamp' mappings (as
|
||||
provided by the DescriptorReader's
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
|
||||
so that they can be loaded later and applied to another
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`.
|
||||
|
||||
:param str path: location to save the processed files dictionary to
|
||||
:param dict processed_files: 'path => last modified' mappings
|
||||
|
||||
:raises:
|
||||
* **IOError** if unable to write to the file
|
||||
* **TypeError** if processed_files is of the wrong type
|
||||
"""
|
||||
|
||||
# makes the parent directory if it doesn't already exist
|
||||
try:
|
||||
path_dir = os.path.dirname(path)
|
||||
|
||||
if not os.path.exists(path_dir):
|
||||
os.makedirs(path_dir)
|
||||
except OSError as exc:
|
||||
raise IOError(exc)
|
||||
|
||||
with open(path, "w") as output_file:
|
||||
for path, timestamp in processed_files.items():
|
||||
if not os.path.isabs(path):
|
||||
raise TypeError("Only absolute paths are acceptable: %s" % path)
|
||||
|
||||
output_file.write("%s %i\n" % (path, timestamp))
|
||||
|
||||
|
||||
class DescriptorReader(object):
|
||||
"""
|
||||
Iterator for the descriptor data on the local file system. This can process
|
||||
text files, tarball archives (gzip or bzip2), or recurse directories.
|
||||
|
||||
By default this limits the number of descriptors that we'll read ahead before
|
||||
waiting for our caller to fetch some of them. This is included to avoid
|
||||
unbounded memory usage.
|
||||
|
||||
Our persistence_path argument is a convenient method to persist the listing
|
||||
of files we have processed between runs, however it doesn't allow for error
|
||||
handling. If you want that then use the
|
||||
:func:`~stem.descriptor.reader.load_processed_files` and
|
||||
:func:`~stem.descriptor.reader.save_processed_files` functions instead.
|
||||
|
||||
:param str,list target: path or list of paths for files or directories to be read from
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param bool follow_links: determines if we'll follow symlinks when traversing
|
||||
directories (requires python 2.6)
|
||||
:param int buffer_size: descriptors we'll buffer before waiting for some to
|
||||
be read, this is unbounded if zero
|
||||
:param str persistence_path: if set we will load and save processed file
|
||||
listings from this path, errors are ignored
|
||||
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
"""
|
||||
|
||||
def __init__(self, target, validate = True, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
|
||||
if isinstance(target, (bytes, unicode)):
|
||||
self._targets = [target]
|
||||
else:
|
||||
self._targets = target
|
||||
|
||||
# expand any relative paths we got
|
||||
|
||||
target = map(os.path.abspath, target)
|
||||
|
||||
self._validate = validate
|
||||
self._follow_links = follow_links
|
||||
self._persistence_path = persistence_path
|
||||
self._document_handler = document_handler
|
||||
self._kwargs = kwargs
|
||||
self._read_listeners = []
|
||||
self._skip_listeners = []
|
||||
self._processed_files = {}
|
||||
|
||||
self._reader_thread = None
|
||||
self._reader_thread_lock = threading.RLock()
|
||||
|
||||
self._iter_lock = threading.RLock()
|
||||
self._iter_notice = threading.Event()
|
||||
|
||||
self._is_stopped = threading.Event()
|
||||
self._is_stopped.set()
|
||||
|
||||
# Descriptors that we have read but not yet provided to the caller. A
|
||||
# FINISHED entry is used by the reading thread to indicate the end.
|
||||
|
||||
self._unreturned_descriptors = Queue.Queue(buffer_size)
|
||||
|
||||
if self._persistence_path:
|
||||
try:
|
||||
processed_files = load_processed_files(self._persistence_path)
|
||||
self.set_processed_files(processed_files)
|
||||
except:
|
||||
pass
|
||||
|
||||
def get_processed_files(self):
|
||||
"""
|
||||
For each file that we have read descriptor data from this provides a
|
||||
mapping of the form...
|
||||
|
||||
::
|
||||
|
||||
absolute path (str) => last modified unix timestamp (int)
|
||||
|
||||
This includes entries set through the
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
|
||||
method. Each run resets this to only the files that were present during
|
||||
that run.
|
||||
|
||||
:returns: **dict** with the absolute paths and unix timestamp for the last
|
||||
modified times of the files we have processed
|
||||
"""
|
||||
|
||||
# make sure that we only provide back absolute paths
|
||||
return dict((os.path.abspath(k), v) for (k, v) in self._processed_files.items())
|
||||
|
||||
def set_processed_files(self, processed_files):
|
||||
"""
|
||||
Sets the listing of the files we have processed. Most often this is used
|
||||
with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
|
||||
pre-populate the listing of descriptor files that we have seen.
|
||||
|
||||
:param dict processed_files: mapping of absolute paths (**str**) to unix
|
||||
timestamps for the last modified time (**int**)
|
||||
"""
|
||||
|
||||
self._processed_files = dict(processed_files)
|
||||
|
||||
def register_read_listener(self, listener):
|
||||
"""
|
||||
Registers a listener for when files are read. This is executed prior to
|
||||
processing files. Listeners are expected to be of the form...
|
||||
|
||||
::
|
||||
|
||||
my_listener(path)
|
||||
|
||||
:param functor listener: functor to be notified when files are read
|
||||
"""
|
||||
|
||||
self._read_listeners.append(listener)
|
||||
|
||||
def register_skip_listener(self, listener):
|
||||
"""
|
||||
Registers a listener for files that are skipped. This listener is expected
|
||||
to be a functor of the form...
|
||||
|
||||
::
|
||||
|
||||
my_listener(path, exception)
|
||||
|
||||
:param functor listener: functor to be notified of files that are skipped
|
||||
to read errors or because they couldn't be parsed as valid descriptor data
|
||||
"""
|
||||
|
||||
self._skip_listeners.append(listener)
|
||||
|
||||
def get_buffered_descriptor_count(self):
|
||||
"""
|
||||
Provides the number of descriptors that are waiting to be iterated over.
|
||||
This is limited to the buffer_size that we were constructed with.
|
||||
|
||||
:returns: **int** for the estimated number of currently enqueued
|
||||
descriptors, this is not entirely reliable
|
||||
"""
|
||||
|
||||
return self._unreturned_descriptors.qsize()
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts reading our descriptor files.
|
||||
|
||||
:raises: **ValueError** if we're already reading the descriptor files
|
||||
"""
|
||||
|
||||
with self._reader_thread_lock:
|
||||
if self._reader_thread:
|
||||
raise ValueError("Already running, you need to call stop() first")
|
||||
else:
|
||||
self._is_stopped.clear()
|
||||
self._reader_thread = threading.Thread(target = self._read_descriptor_files, name="Descriptor Reader")
|
||||
self._reader_thread.setDaemon(True)
|
||||
self._reader_thread.start()
|
||||
|
||||
def stop(self):
|
||||
"""
|
||||
Stops further reading of descriptor files.
|
||||
"""
|
||||
|
||||
with self._reader_thread_lock:
|
||||
self._is_stopped.set()
|
||||
self._iter_notice.set()
|
||||
|
||||
# clears our queue to unblock enqueue calls
|
||||
|
||||
try:
|
||||
while True:
|
||||
self._unreturned_descriptors.get_nowait()
|
||||
except Queue.Empty:
|
||||
pass
|
||||
|
||||
self._reader_thread.join()
|
||||
self._reader_thread = None
|
||||
|
||||
if self._persistence_path:
|
||||
try:
|
||||
processed_files = self.get_processed_files()
|
||||
save_processed_files(self._persistence_path, processed_files)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _read_descriptor_files(self):
|
||||
new_processed_files = {}
|
||||
remaining_files = list(self._targets)
|
||||
|
||||
while remaining_files and not self._is_stopped.is_set():
|
||||
target = remaining_files.pop(0)
|
||||
|
||||
if not os.path.exists(target):
|
||||
self._notify_skip_listeners(target, FileMissing())
|
||||
continue
|
||||
|
||||
if os.path.isdir(target):
|
||||
walker = os.walk(target, followlinks = self._follow_links)
|
||||
self._handle_walker(walker, new_processed_files)
|
||||
else:
|
||||
self._handle_file(target, new_processed_files)
|
||||
|
||||
self._processed_files = new_processed_files
|
||||
|
||||
if not self._is_stopped.is_set():
|
||||
self._unreturned_descriptors.put(FINISHED)
|
||||
|
||||
self._iter_notice.set()
|
||||
|
||||
def __iter__(self):
|
||||
with self._iter_lock:
|
||||
while not self._is_stopped.is_set():
|
||||
try:
|
||||
descriptor = self._unreturned_descriptors.get_nowait()
|
||||
|
||||
if descriptor == FINISHED:
|
||||
break
|
||||
else:
|
||||
yield descriptor
|
||||
except Queue.Empty:
|
||||
self._iter_notice.wait()
|
||||
self._iter_notice.clear()
|
||||
|
||||
def _handle_walker(self, walker, new_processed_files):
|
||||
for root, _, files in walker:
|
||||
for filename in files:
|
||||
self._handle_file(os.path.join(root, filename), new_processed_files)
|
||||
|
||||
# this can take a while if, say, we're including the root directory
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
def _handle_file(self, target, new_processed_files):
|
||||
# This is a file. Register its last modified timestamp and check if
|
||||
# it's a file that we should skip.
|
||||
|
||||
try:
|
||||
last_modified = int(os.stat(target).st_mtime)
|
||||
last_used = self._processed_files.get(target)
|
||||
new_processed_files[target] = last_modified
|
||||
except OSError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
return
|
||||
|
||||
if last_used and last_used >= last_modified:
|
||||
self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
|
||||
return
|
||||
|
||||
# Block devices and such are never descriptors, and can cause us to block
|
||||
# for quite a while so skipping anything that isn't a regular file.
|
||||
|
||||
if not os.path.isfile(target):
|
||||
return
|
||||
|
||||
# The mimetypes module only checks the file extension. To actually
|
||||
# check the content (like the 'file' command) we'd need something like
|
||||
# pymagic (https://github.com/cloudburst/pymagic).
|
||||
|
||||
target_type = mimetypes.guess_type(target)
|
||||
|
||||
# Checking if it's a tar file may fail due to permissions so failing back
|
||||
# to the mime type...
|
||||
#
|
||||
# IOError: [Errno 13] Permission denied: '/vmlinuz.old'
|
||||
#
|
||||
# With python 3 insuffient permissions raises an AttributeError instead...
|
||||
#
|
||||
# http://bugs.python.org/issue17059
|
||||
|
||||
try:
|
||||
is_tar = tarfile.is_tarfile(target)
|
||||
except (IOError, AttributeError):
|
||||
is_tar = target_type[0] == 'application/x-tar'
|
||||
|
||||
if target_type[0] in (None, 'text/plain'):
|
||||
# either '.txt' or an unknown type
|
||||
self._handle_descriptor_file(target, target_type)
|
||||
elif is_tar:
|
||||
# handles gzip, bz2, and decompressed tarballs among others
|
||||
self._handle_archive(target)
|
||||
else:
|
||||
self._notify_skip_listeners(target, UnrecognizedType(target_type))
|
||||
|
||||
def _handle_descriptor_file(self, target, mime_type):
|
||||
try:
|
||||
self._notify_read_listeners(target)
|
||||
|
||||
with open(target, 'rb') as target_file:
|
||||
for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
self._unreturned_descriptors.put(desc)
|
||||
self._iter_notice.set()
|
||||
except TypeError as exc:
|
||||
self._notify_skip_listeners(target, UnrecognizedType(mime_type))
|
||||
except ValueError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
except IOError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
|
||||
def _handle_archive(self, target):
|
||||
# TODO: This would be nicer via the 'with' keyword, but tarfile's __exit__
|
||||
# method was added sometime after python 2.5. We should change this when
|
||||
# we drop python 2.5 support.
|
||||
|
||||
tar_file = None
|
||||
|
||||
try:
|
||||
self._notify_read_listeners(target)
|
||||
tar_file = tarfile.open(target)
|
||||
|
||||
for tar_entry in tar_file:
|
||||
if tar_entry.isfile():
|
||||
entry = tar_file.extractfile(tar_entry)
|
||||
|
||||
try:
|
||||
for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
desc._set_path(os.path.abspath(target))
|
||||
desc._set_archive_path(entry.name)
|
||||
self._unreturned_descriptors.put(desc)
|
||||
self._iter_notice.set()
|
||||
except TypeError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
except ValueError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
finally:
|
||||
entry.close()
|
||||
except IOError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
finally:
|
||||
if tar_file:
|
||||
tar_file.close()
|
||||
|
||||
def _notify_read_listeners(self, path):
|
||||
for listener in self._read_listeners:
|
||||
listener(path)
|
||||
|
||||
def _notify_skip_listeners(self, path, exception):
|
||||
for listener in self._skip_listeners:
|
||||
listener(path, exception)
|
||||
|
||||
def __enter__(self):
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exit_type, value, traceback):
|
||||
self.stop()
|
758
lib/stem/descriptor/remote.py
Normal file
758
lib/stem/descriptor/remote.py
Normal file
@ -0,0 +1,758 @@
|
||||
# Copyright 2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Module for remotely retrieving descriptors from directory authorities and
|
||||
mirrors. This is most easily done through the
|
||||
:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
|
||||
:class:`~stem.descriptor.remote.Query` instances to get you the descriptor
|
||||
content. For example...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor.remote import DescriptorDownloader
|
||||
|
||||
downloader = DescriptorDownloader(
|
||||
use_mirrors = True,
|
||||
timeout = 10,
|
||||
)
|
||||
|
||||
query = downloader.get_server_descriptors()
|
||||
|
||||
print "Exit Relays:"
|
||||
|
||||
try:
|
||||
for desc in query.run():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
print
|
||||
print "Query took %0.2f seconds" % query.runtime
|
||||
except Exception as exc:
|
||||
print "Unable to retrieve the server descriptors: %s" % exc
|
||||
|
||||
If you don't care about errors then you can also simply iterate over the query
|
||||
itself...
|
||||
|
||||
::
|
||||
|
||||
for desc in downloader.get_server_descriptors():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print " %s (%s)" % (desc.nickname, desc.fingerprint)
|
||||
|
||||
::
|
||||
|
||||
get_authorities - Provides tor directory information.
|
||||
|
||||
DirectoryAuthority - Information about a tor directory authority.
|
||||
|
||||
Query - Asynchronous request to download tor descriptors
|
||||
|- start - issues the query if it isn't already running
|
||||
+- run - blocks until the request is finished and provides the results
|
||||
|
||||
DescriptorDownloader - Configurable class for issuing queries
|
||||
|- use_directory_mirrors - use directory mirrors to download future descriptors
|
||||
|- get_server_descriptors - provides present server descriptors
|
||||
|- get_extrainfo_descriptors - provides present extrainfo descriptors
|
||||
|- get_microdescriptors - provides present microdescriptors
|
||||
|- get_consensus - provides the present consensus or router status entries
|
||||
|- get_key_certificates - provides present authority key certificates
|
||||
+- query - request an arbitrary descriptor resource
|
||||
|
||||
.. data:: MAX_FINGERPRINTS
|
||||
|
||||
Maximum number of descriptors that can requested at a time by their
|
||||
fingerprints.
|
||||
|
||||
.. data:: MAX_MICRODESCRIPTOR_HASHES
|
||||
|
||||
Maximum number of microdescriptors that can requested at a time by their
|
||||
hashes.
|
||||
"""
|
||||
|
||||
import io
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib2
|
||||
import zlib
|
||||
|
||||
import stem.descriptor
|
||||
|
||||
from stem import Flag
|
||||
from stem.util import log
|
||||
|
||||
# Tor has a limited number of descriptors we can fetch explicitly by their
|
||||
# fingerprint or hashes due to a limit on the url length by squid proxies.
|
||||
|
||||
MAX_FINGERPRINTS = 96
|
||||
MAX_MICRODESCRIPTOR_HASHES = 92
|
||||
|
||||
# We commonly only want authorities that vote in the consensus, and hence have
|
||||
# a v3ident.
|
||||
|
||||
HAS_V3IDENT = lambda auth: auth.v3ident is not None
|
||||
|
||||
|
||||
def _guess_descriptor_type(resource):
|
||||
# Attempts to determine the descriptor type based on the resource url. This
|
||||
# raises a ValueError if the resource isn't recognized.
|
||||
|
||||
if resource.startswith('/tor/server/'):
|
||||
return 'server-descriptor 1.0'
|
||||
elif resource.startswith('/tor/extra/'):
|
||||
return 'extra-info 1.0'
|
||||
elif resource.startswith('/tor/micro/'):
|
||||
return 'microdescriptor 1.0'
|
||||
elif resource.startswith('/tor/status-vote/'):
|
||||
return 'network-status-consensus-3 1.0'
|
||||
elif resource.startswith('/tor/keys/'):
|
||||
return 'dir-key-certificate-3 1.0'
|
||||
else:
|
||||
raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
|
||||
|
||||
|
||||
class Query(object):
|
||||
"""
|
||||
Asynchronous request for descriptor content from a directory authority or
|
||||
mirror. These can either be made through the
|
||||
:class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more
|
||||
advanced usage.
|
||||
|
||||
To block on the response and get results either call
|
||||
:func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
|
||||
:func:`~stem.descriptor.remote.Query.run` method pass along any errors that
|
||||
arise...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor.remote import Query
|
||||
|
||||
query = Query(
|
||||
'/tor/server/all.z',
|
||||
block = True,
|
||||
timeout = 30,
|
||||
)
|
||||
|
||||
print "Current relays:"
|
||||
|
||||
if not query.error:
|
||||
for desc in query:
|
||||
print desc.fingerprint
|
||||
else:
|
||||
print "Unable to retrieve the server descriptors: %s" % query.error
|
||||
|
||||
... while iterating fails silently...
|
||||
|
||||
::
|
||||
|
||||
print "Current relays:"
|
||||
|
||||
for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
|
||||
print desc.fingerprint
|
||||
|
||||
In either case exceptions are available via our 'error' attribute.
|
||||
|
||||
Tor provides quite a few different descriptor resources via its directory
|
||||
protocol (see section 4.2 and later of the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
|
||||
Commonly useful ones include...
|
||||
|
||||
===================================== ===========
|
||||
Resource Description
|
||||
===================================== ===========
|
||||
/tor/server/all.z all present server descriptors
|
||||
/tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints
|
||||
/tor/extra/all.z all present extrainfo descriptors
|
||||
/tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints
|
||||
/tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes
|
||||
/tor/status-vote/current/consensus.z present consensus
|
||||
/tor/keys/all.z key certificates for the authorities
|
||||
/tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities
|
||||
===================================== ===========
|
||||
|
||||
The '.z' suffix can be excluded to get a plaintext rather than compressed
|
||||
response. Compression is handled transparently, so this shouldn't matter to
|
||||
the caller.
|
||||
|
||||
:var str resource: resource being fetched, such as '/tor/server/all.z'
|
||||
:var str descriptor_type: type of descriptors being fetched (for options see
|
||||
:func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
|
||||
resource if **None**
|
||||
|
||||
:var list endpoints: (address, dirport) tuples of the authority or mirror
|
||||
we're querying, this uses authorities if undefined
|
||||
:var int retries: number of times to attempt the request if downloading it
|
||||
fails
|
||||
:var bool fall_back_to_authority: when retrying request issues the last
|
||||
request to a directory authority if **True**
|
||||
|
||||
:var str content: downloaded descriptor content
|
||||
:var Exception error: exception if a problem occured
|
||||
:var bool is_done: flag that indicates if our request has finished
|
||||
:var str download_url: last url used to download the descriptor, this is
|
||||
unset until we've actually made a download attempt
|
||||
|
||||
:var float start_time: unix timestamp when we first started running
|
||||
:var float timeout: duration before we'll time out our request
|
||||
:var float runtime: time our query took, this is **None** if it's not yet
|
||||
finished
|
||||
|
||||
:var bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:var stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:var dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:param bool start: start making the request when constructed (default is **True**)
|
||||
:param bool block: only return after the request has been completed, this is
|
||||
the same as running **query.run(True)** (default is **False**)
|
||||
"""
|
||||
|
||||
def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
|
||||
if not resource.startswith('/'):
|
||||
raise ValueError("Resources should start with a '/': %s" % resource)
|
||||
|
||||
self.resource = resource
|
||||
|
||||
if descriptor_type:
|
||||
self.descriptor_type = descriptor_type
|
||||
else:
|
||||
self.descriptor_type = _guess_descriptor_type(resource)
|
||||
|
||||
self.endpoints = endpoints if endpoints else []
|
||||
self.retries = retries
|
||||
self.fall_back_to_authority = fall_back_to_authority
|
||||
|
||||
self.content = None
|
||||
self.error = None
|
||||
self.is_done = False
|
||||
self.download_url = None
|
||||
|
||||
self.start_time = None
|
||||
self.timeout = timeout
|
||||
self.runtime = None
|
||||
|
||||
self.validate = validate
|
||||
self.document_handler = document_handler
|
||||
self.kwargs = kwargs
|
||||
|
||||
self._downloader_thread = None
|
||||
self._downloader_thread_lock = threading.RLock()
|
||||
|
||||
if start:
|
||||
self.start()
|
||||
|
||||
if block:
|
||||
self.run(True)
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts downloading the scriptors if we haven't started already.
|
||||
"""
|
||||
|
||||
with self._downloader_thread_lock:
|
||||
if self._downloader_thread is None:
|
||||
self._downloader_thread = threading.Thread(
|
||||
name = "Descriptor Query",
|
||||
target = self._download_descriptors,
|
||||
args = (self.retries,)
|
||||
)
|
||||
|
||||
self._downloader_thread.setDaemon(True)
|
||||
self._downloader_thread.start()
|
||||
|
||||
def run(self, suppress = False):
|
||||
"""
|
||||
Blocks until our request is complete then provides the descriptors. If we
|
||||
haven't yet started our request then this does so.
|
||||
|
||||
:param bool suppress: avoids raising exceptions if **True**
|
||||
|
||||
:returns: list for the requested :class:`~stem.descriptor.__init__.Descriptor` instances
|
||||
|
||||
:raises:
|
||||
Using the iterator can fail with the following if **suppress** is
|
||||
**False**...
|
||||
|
||||
* **ValueError** if the descriptor contents is malformed
|
||||
* **socket.timeout** if our request timed out
|
||||
* **urllib2.URLError** for most request failures
|
||||
|
||||
Note that the urllib2 module may fail with other exception types, in
|
||||
which case we'll pass it along.
|
||||
"""
|
||||
|
||||
return list(self._run(suppress))
|
||||
|
||||
def _run(self, suppress):
|
||||
with self._downloader_thread_lock:
|
||||
self.start()
|
||||
self._downloader_thread.join()
|
||||
|
||||
if self.error:
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise self.error
|
||||
else:
|
||||
if self.content is None:
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise ValueError('BUG: _download_descriptors() finished without either results or an error')
|
||||
|
||||
try:
|
||||
results = stem.descriptor.parse_file(
|
||||
io.BytesIO(self.content),
|
||||
self.descriptor_type,
|
||||
validate = self.validate,
|
||||
document_handler = self.document_handler,
|
||||
**self.kwargs
|
||||
)
|
||||
|
||||
for desc in results:
|
||||
yield desc
|
||||
except ValueError as exc:
|
||||
self.error = exc # encountered a parsing error
|
||||
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise self.error
|
||||
|
||||
def __iter__(self):
|
||||
for desc in self._run(True):
|
||||
yield desc
|
||||
|
||||
def _pick_url(self, use_authority = False):
|
||||
"""
|
||||
Provides a url that can be queried. If we have multiple endpoints then one
|
||||
will be picked randomly.
|
||||
|
||||
:param bool use_authority: ignores our endpoints and uses a directory
|
||||
authority instead
|
||||
|
||||
:returns: **str** for the url being queried by this request
|
||||
"""
|
||||
|
||||
if use_authority or not self.endpoints:
|
||||
authority = random.choice(filter(HAS_V3IDENT, get_authorities().values()))
|
||||
address, dirport = authority.address, authority.dir_port
|
||||
else:
|
||||
address, dirport = random.choice(self.endpoints)
|
||||
|
||||
return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/'))
|
||||
|
||||
def _download_descriptors(self, retries):
|
||||
try:
|
||||
use_authority = retries == 0 and self.fall_back_to_authority
|
||||
self.download_url = self._pick_url(use_authority)
|
||||
|
||||
self.start_time = time.time()
|
||||
response = urllib2.urlopen(self.download_url, timeout = self.timeout).read()
|
||||
|
||||
if self.download_url.endswith('.z'):
|
||||
response = zlib.decompress(response)
|
||||
|
||||
self.content = response.strip()
|
||||
|
||||
self.runtime = time.time() - self.start_time
|
||||
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
|
||||
except:
|
||||
exc = sys.exc_info()[1]
|
||||
|
||||
if retries > 0:
|
||||
log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
|
||||
return self._download_descriptors(retries - 1)
|
||||
else:
|
||||
log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
|
||||
self.error = exc
|
||||
finally:
|
||||
self.is_done = True
|
||||
|
||||
|
||||
class DescriptorDownloader(object):
|
||||
"""
|
||||
Configurable class that issues :class:`~stem.descriptor.remote.Query`
|
||||
instances on your behalf.
|
||||
|
||||
:param bool use_mirrors: downloads the present consensus and uses the directory
|
||||
mirrors to fetch future requests, this fails silently if the consensus
|
||||
cannot be downloaded
|
||||
:param default_args: default arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
"""
|
||||
|
||||
def __init__(self, use_mirrors = False, **default_args):
|
||||
self._default_args = default_args
|
||||
|
||||
authorities = filter(HAS_V3IDENT, get_authorities().values())
|
||||
self._endpoints = [(auth.address, auth.dir_port) for auth in authorities]
|
||||
|
||||
if use_mirrors:
|
||||
try:
|
||||
start_time = time.time()
|
||||
self.use_directory_mirrors()
|
||||
log.debug("Retrieved directory mirrors (took %0.2fs)" % (time.time() - start_time))
|
||||
except Exception as exc:
|
||||
log.debug("Unable to retrieve directory mirrors: %s" % exc)
|
||||
|
||||
def use_directory_mirrors(self):
|
||||
"""
|
||||
Downloads the present consensus and configures ourselves to use directory
|
||||
mirrors, in addition to authorities.
|
||||
|
||||
:returns: :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
|
||||
from which we got the directory mirrors
|
||||
|
||||
:raises: **Exception** if unable to determine the directory mirrors
|
||||
"""
|
||||
|
||||
authorities = filter(HAS_V3IDENT, get_authorities().values())
|
||||
new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities])
|
||||
|
||||
consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0]
|
||||
|
||||
for desc in consensus.routers.values():
|
||||
if Flag.V2DIR in desc.flags:
|
||||
new_endpoints.add((desc.address, desc.dir_port))
|
||||
|
||||
# we need our endpoints to be a list rather than set for random.choice()
|
||||
|
||||
self._endpoints = list(new_endpoints)
|
||||
|
||||
return consensus
|
||||
|
||||
def get_server_descriptors(self, fingerprints = None, **query_args):
|
||||
"""
|
||||
Provides the server descriptors with the given fingerprints. If no
|
||||
fingerprints are provided then this returns all descriptors in the present
|
||||
consensus.
|
||||
|
||||
:param str,list fingerprints: fingerprint or list of fingerprints to be
|
||||
retrieved, gets all descriptors if **None**
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 96 descriptors by their
|
||||
fingerprints (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/server/all.z'
|
||||
|
||||
if isinstance(fingerprints, str):
|
||||
fingerprints = [fingerprints]
|
||||
|
||||
if fingerprints:
|
||||
if len(fingerprints) > MAX_FINGERPRINTS:
|
||||
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def get_extrainfo_descriptors(self, fingerprints = None, **query_args):
|
||||
"""
|
||||
Provides the extrainfo descriptors with the given fingerprints. If no
|
||||
fingerprints are provided then this returns all descriptors in the present
|
||||
consensus.
|
||||
|
||||
:param str,list fingerprints: fingerprint or list of fingerprints to be
|
||||
retrieved, gets all descriptors if **None**
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 96 descriptors by their
|
||||
fingerprints (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/extra/all.z'
|
||||
|
||||
if isinstance(fingerprints, str):
|
||||
fingerprints = [fingerprints]
|
||||
|
||||
if fingerprints:
|
||||
if len(fingerprints) > MAX_FINGERPRINTS:
|
||||
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def get_microdescriptors(self, hashes, **query_args):
|
||||
"""
|
||||
Provides the microdescriptors with the given hashes. To get these see the
|
||||
'microdescriptor_hashes' attribute of
|
||||
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`. Note
|
||||
that these are only provided via a microdescriptor consensus (such as
|
||||
'cached-microdesc-consensus' in your data directory).
|
||||
|
||||
:param str,list hashes: microdescriptor hash or list of hashes to be
|
||||
retrieved
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 92 microdescriptors by their
|
||||
hashes (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
if isinstance(hashes, str):
|
||||
hashes = [hashes]
|
||||
|
||||
if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
|
||||
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_HASHES)
|
||||
|
||||
return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
|
||||
|
||||
def get_consensus(self, authority_v3ident = None, **query_args):
|
||||
"""
|
||||
Provides the present router status entries.
|
||||
|
||||
:param str authority_v3ident: fingerprint of the authority key for which
|
||||
to get the consensus, see `'v3ident' in tor's config.c
|
||||
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
|
||||
for the values.
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the router status
|
||||
entries
|
||||
"""
|
||||
|
||||
resource = '/tor/status-vote/current/consensus'
|
||||
|
||||
if authority_v3ident:
|
||||
resource += '/%s' % authority_v3ident
|
||||
|
||||
return self.query(resource + '.z', **query_args)
|
||||
|
||||
def get_vote(self, authority, **query_args):
|
||||
"""
|
||||
Provides the present vote for a given directory authority.
|
||||
|
||||
:param stem.descriptor.remote.DirectoryAuthority authority: authority for which to retrieve a vote for
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the router status
|
||||
entries
|
||||
"""
|
||||
|
||||
resource = '/tor/status-vote/current/authority'
|
||||
|
||||
if not 'endpoint' in query_args:
|
||||
query_args['endpoints'] = [(authority.address, authority.dir_port)]
|
||||
|
||||
return self.query(resource + '.z', **query_args)
|
||||
|
||||
def get_key_certificates(self, authority_v3idents = None, **query_args):
|
||||
"""
|
||||
Provides the key certificates for authorities with the given fingerprints.
|
||||
If no fingerprints are provided then this returns all present key
|
||||
certificates.
|
||||
|
||||
:param str authority_v3idents: fingerprint or list of fingerprints of the
|
||||
authority keys, see `'v3ident' in tor's config.c
|
||||
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
|
||||
for the values.
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the key certificates
|
||||
|
||||
:raises: **ValueError** if we request more than 96 key certificates by
|
||||
their identity fingerprints (this is due to a limit on the url length by
|
||||
squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/keys/all.z'
|
||||
|
||||
if isinstance(authority_v3idents, str):
|
||||
authority_v3idents = [authority_v3idents]
|
||||
|
||||
if authority_v3idents:
|
||||
if len(authority_v3idents) > MAX_FINGERPRINTS:
|
||||
raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def query(self, resource, **query_args):
|
||||
"""
|
||||
Issues a request for the given resource.
|
||||
|
||||
:param str resource: resource being fetched, such as '/tor/server/all.z'
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the descriptors
|
||||
|
||||
:raises: **ValueError** if resource is clearly invalid or the descriptor
|
||||
type can't be determined when 'descriptor_type' is **None**
|
||||
"""
|
||||
|
||||
args = dict(self._default_args)
|
||||
args.update(query_args)
|
||||
|
||||
if not 'endpoints' in args:
|
||||
args['endpoints'] = self._endpoints
|
||||
|
||||
if not 'fall_back_to_authority' in args:
|
||||
args['fall_back_to_authority'] = True
|
||||
|
||||
return Query(
|
||||
resource,
|
||||
**args
|
||||
)
|
||||
|
||||
|
||||
class DirectoryAuthority(object):
|
||||
"""
|
||||
Tor directory authority, a special type of relay `hardcoded into tor
|
||||
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
|
||||
that enumerates the other relays within the network.
|
||||
|
||||
At a very high level tor works as follows...
|
||||
|
||||
1. A volunteer starts up a new tor relay, during which it sends a `server
|
||||
descriptor <server_descriptor.html>`_ to each of the directory
|
||||
authorities.
|
||||
|
||||
2. Each hour the directory authorities make a `vote <networkstatus.html>`_
|
||||
that says who they think the active relays are in the network and some
|
||||
attributes about them.
|
||||
|
||||
3. The directory authorities send each other their votes, and compile that
|
||||
into the `consensus <networkstatus.html>`_. This document is very similar
|
||||
to the votes, the only difference being that the majority of the
|
||||
authorities agree upon and sign this document. The idividual relay entries
|
||||
in the vote or consensus is called `router status entries
|
||||
<router_status_entry.html>`_.
|
||||
|
||||
4. Tor clients (people using the service) download the consensus from one of
|
||||
the authorities or a mirror to determine the active relays within the
|
||||
network. They in turn use this to construct their circuits and use the
|
||||
network.
|
||||
|
||||
:var str nickname: nickname of the authority
|
||||
:var str address: IP address of the authority, presently they're all IPv4 but
|
||||
this may not always be the case
|
||||
:var int or_port: port on which the relay services relay traffic
|
||||
:var int dir_port: port on which directory information is available
|
||||
:var str fingerprint: relay fingerprint
|
||||
:var str v3ident: identity key fingerprint used to sign votes and consensus
|
||||
"""
|
||||
|
||||
def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, fingerprint = None, v3ident = None):
|
||||
self.nickname = nickname
|
||||
self.address = address
|
||||
self.or_port = or_port
|
||||
self.dir_port = dir_port
|
||||
self.fingerprint = fingerprint
|
||||
self.v3ident = v3ident
|
||||
|
||||
|
||||
DIRECTORY_AUTHORITIES = {
|
||||
'moria1': DirectoryAuthority(
|
||||
nickname = 'moria1',
|
||||
address = '128.31.0.39',
|
||||
or_port = 9101,
|
||||
dir_port = 9131,
|
||||
fingerprint = '9695DFC35FFEB861329B9F1AB04C46397020CE31',
|
||||
v3ident = 'D586D18309DED4CD6D57C18FDB97EFA96D330566',
|
||||
),
|
||||
'tor26': DirectoryAuthority(
|
||||
nickname = 'tor26',
|
||||
address = '86.59.21.38',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '847B1F850344D7876491A54892F904934E4EB85D',
|
||||
v3ident = '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4',
|
||||
),
|
||||
'dizum': DirectoryAuthority(
|
||||
nickname = 'dizum',
|
||||
address = '194.109.206.212',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '7EA6EAD6FD83083C538F44038BBFA077587DD755',
|
||||
v3ident = 'E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58',
|
||||
),
|
||||
'Tonga': DirectoryAuthority(
|
||||
nickname = 'Tonga',
|
||||
address = '82.94.251.203',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '4A0CCD2DDC7995083D73F5D667100C8A5831F16D',
|
||||
v3ident = None, # does not vote in the consensus
|
||||
),
|
||||
'turtles': DirectoryAuthority(
|
||||
nickname = 'turtles',
|
||||
address = '76.73.17.194',
|
||||
or_port = 9090,
|
||||
dir_port = 9030,
|
||||
fingerprint = 'F397038ADC51336135E7B80BD99CA3844360292B',
|
||||
v3ident = '27B6B5996C426270A5C95488AA5BCEB6BCC86956',
|
||||
),
|
||||
'gabelmoo': DirectoryAuthority(
|
||||
nickname = 'gabelmoo',
|
||||
address = '212.112.245.170',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = 'F2044413DAC2E02E3D6BCF4735A19BCA1DE97281',
|
||||
v3ident = 'ED03BB616EB2F60BEC80151114BB25CEF515B226',
|
||||
),
|
||||
'dannenberg': DirectoryAuthority(
|
||||
nickname = 'dannenberg',
|
||||
address = '193.23.244.244',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = '7BE683E65D48141321C5ED92F075C55364AC7123',
|
||||
v3ident = '585769C78764D58426B8B52B6651A5A71137189A',
|
||||
),
|
||||
'urras': DirectoryAuthority(
|
||||
nickname = 'urras',
|
||||
address = '208.83.223.34',
|
||||
or_port = 80,
|
||||
dir_port = 443,
|
||||
fingerprint = '0AD3FA884D18F89EEA2D89C019379E0E7FD94417',
|
||||
v3ident = '80550987E1D626E3EBA5E5E75A458DE0626D088C',
|
||||
),
|
||||
'maatuska': DirectoryAuthority(
|
||||
nickname = 'maatuska',
|
||||
address = '171.25.193.9',
|
||||
or_port = 80,
|
||||
dir_port = 443,
|
||||
fingerprint = 'BD6A829255CB08E66FBE7D3748363586E46B3810',
|
||||
v3ident = '49015F787433103580E3B66A1707A00E60F2D15B',
|
||||
),
|
||||
'Faravahar': DirectoryAuthority(
|
||||
nickname = 'Faravahar',
|
||||
address = '154.35.32.5',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
fingerprint = 'CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC',
|
||||
v3ident = 'EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97',
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_authorities():
|
||||
"""
|
||||
Provides the Tor directory authority information as of **Tor commit 00bcc25
|
||||
(8/27/13)**. The directory information hardcoded into Tor and occasionally
|
||||
changes, so the information this provides might not necessarily match your
|
||||
version of tor.
|
||||
|
||||
:returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances
|
||||
"""
|
||||
|
||||
return dict(DIRECTORY_AUTHORITIES)
|
749
lib/stem/descriptor/router_status_entry.py
Normal file
749
lib/stem/descriptor/router_status_entry.py
Normal file
@ -0,0 +1,749 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for router status entries, the information for individual routers
|
||||
within a network status document. This information is provided from a few
|
||||
sources...
|
||||
|
||||
* control port via 'GETINFO ns/\*' and 'GETINFO md/\*' queries
|
||||
* router entries in a network status document, like the cached-consensus
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
RouterStatusEntry - Common parent for router status entries
|
||||
|- RouterStatusEntryV2 - Entry for a network status v2 document
|
||||
|- RouterStatusEntryV3 - Entry for a network status v3 document
|
||||
+- RouterStatusEntryMicroV3 - Entry for a microdescriptor flavored v3 document
|
||||
"""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import datetime
|
||||
|
||||
import stem.exit_policy
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
KEYWORD_LINE,
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
|
||||
"""
|
||||
Reads a range of the document_file containing some number of entry_class
|
||||
instances. We deliminate the entry_class entries by the keyword on their
|
||||
first line (entry_keyword). When finished the document is left at the
|
||||
end_position.
|
||||
|
||||
Either an end_position or section_end_keywords must be provided.
|
||||
|
||||
:param file document_file: file with network status document content
|
||||
:param bool validate: checks the validity of the document's contents if
|
||||
**True**, skips these checks otherwise
|
||||
:param class entry_class: class to construct instance for
|
||||
:param str entry_keyword: first keyword for the entry instances
|
||||
:param int start_position: start of the section, default is the current position
|
||||
:param int end_position: end of the section
|
||||
:param tuple section_end_keywords: keyword(s) that deliminate the end of the
|
||||
section if no end_position was provided
|
||||
:param tuple extra_args: extra arguments for the entry_class (after the
|
||||
content and validate flag)
|
||||
|
||||
:returns: iterator over entry_class instances
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
if start_position:
|
||||
document_file.seek(start_position)
|
||||
else:
|
||||
start_position = document_file.tell()
|
||||
|
||||
# check if we're starting at the end of the section (ie, there's no entries to read)
|
||||
if section_end_keywords:
|
||||
first_keyword = None
|
||||
line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
|
||||
|
||||
if line_match:
|
||||
first_keyword = line_match.groups()[0]
|
||||
|
||||
document_file.seek(start_position)
|
||||
|
||||
if first_keyword in section_end_keywords:
|
||||
return
|
||||
|
||||
while end_position is None or document_file.tell() < end_position:
|
||||
desc_lines, ending_keyword = _read_until_keywords(
|
||||
(entry_keyword,) + section_end_keywords,
|
||||
document_file,
|
||||
ignore_first = True,
|
||||
end_position = end_position,
|
||||
include_ending_keyword = True
|
||||
)
|
||||
|
||||
desc_content = bytes.join(b"", desc_lines)
|
||||
|
||||
if desc_content:
|
||||
yield entry_class(desc_content, validate, *extra_args)
|
||||
|
||||
# check if we stopped at the end of the section
|
||||
if ending_keyword in section_end_keywords:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
class RouterStatusEntry(Descriptor):
|
||||
"""
|
||||
Information about an individual router stored within a network status
|
||||
document. This is the common parent for concrete status entry types.
|
||||
|
||||
:var stem.descriptor.networkstatus.NetworkStatusDocument document: **\*** document that this descriptor came from
|
||||
|
||||
:var str nickname: **\*** router's nickname
|
||||
:var str fingerprint: **\*** router's fingerprint
|
||||
:var datetime published: **\*** router's publication
|
||||
:var str address: **\*** router's IP address
|
||||
:var int or_port: **\*** router's ORPort
|
||||
:var int dir_port: **\*** router's DirPort
|
||||
|
||||
:var list flags: **\*** list of :data:`~stem.Flag` associated with the relay
|
||||
|
||||
:var stem.version.Version version: parsed version of tor, this is **None** if
|
||||
the relay's using a new versioning scheme
|
||||
:var str version_line: versioning information reported by the relay
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate, document):
|
||||
"""
|
||||
Parse a router descriptor in a network status document.
|
||||
|
||||
:param str content: router descriptor content to be parsed
|
||||
:param NetworkStatusDocument document: document this descriptor came from
|
||||
:param bool validate: checks the validity of the content if **True**, skips
|
||||
these checks otherwise
|
||||
|
||||
:raises: **ValueError** if the descriptor data is invalid
|
||||
"""
|
||||
|
||||
super(RouterStatusEntry, self).__init__(content)
|
||||
content = stem.util.str_tools._to_unicode(content)
|
||||
|
||||
self.document = document
|
||||
|
||||
self.nickname = None
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.address = None
|
||||
self.or_port = None
|
||||
self.dir_port = None
|
||||
|
||||
self.flags = None
|
||||
|
||||
self.version_line = None
|
||||
self.version = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
entries = _get_descriptor_components(content, validate)
|
||||
|
||||
if validate:
|
||||
self._check_constraints(entries)
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses the given content and applies the attributes.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
:param bool validate: checks validity if **True**
|
||||
|
||||
:raises: **ValueError** if a validity check fails
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 's':
|
||||
_parse_s_line(self, value, validate)
|
||||
elif keyword == 'v':
|
||||
_parse_v_line(self, value, validate)
|
||||
else:
|
||||
self._unrecognized_lines.append("%s %s" % (keyword, value))
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in self._required_fields():
|
||||
if not keyword in entries:
|
||||
raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
|
||||
|
||||
for keyword in self._single_fields():
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
|
||||
|
||||
if 'r' != entries.keys()[0]:
|
||||
raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
"""
|
||||
Name for this descriptor type.
|
||||
"""
|
||||
|
||||
if is_plural:
|
||||
return "Router status entries"
|
||||
else:
|
||||
return "Router status entry"
|
||||
|
||||
def _required_fields(self):
|
||||
"""
|
||||
Provides lines that must appear in the descriptor.
|
||||
"""
|
||||
|
||||
return ()
|
||||
|
||||
def _single_fields(self):
|
||||
"""
|
||||
Provides lines that can only appear in the descriptor once.
|
||||
"""
|
||||
|
||||
return ()
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
"""
|
||||
Provides any unrecognized lines.
|
||||
|
||||
:returns: list of unrecognized lines
|
||||
"""
|
||||
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntry):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryV2(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a version 2 network
|
||||
status document.
|
||||
|
||||
:var str digest: **\*** router's upper-case hex digest
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate = True, document = None):
|
||||
self.digest = None
|
||||
super(RouterStatusEntryV2, self).__init__(content, validate, document)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 'r':
|
||||
_parse_r_line(self, value, validate, True)
|
||||
del entries['r']
|
||||
|
||||
RouterStatusEntry._parse(self, entries, validate)
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
if is_plural:
|
||||
return "Router status entries (v2)"
|
||||
else:
|
||||
return "Router status entry (v2)"
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryV2):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryV3(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a version 3 network
|
||||
status document.
|
||||
|
||||
:var list or_addresses: **\*** relay's OR addresses, this is a tuple listing
|
||||
of the form (address (**str**), port (**int**), is_ipv6 (**bool**))
|
||||
:var str digest: **\*** router's upper-case hex digest
|
||||
|
||||
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
|
||||
:var int measured: bandwidth measured to be available by the relay
|
||||
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
|
||||
measurements
|
||||
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
|
||||
information that isn't yet recognized
|
||||
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy: router's exit policy
|
||||
|
||||
:var list microdescriptor_hashes: **\*** tuples of two values, the list of
|
||||
consensus methods for generating a set of digests and the 'algorithm =>
|
||||
digest' mappings
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate = True, document = None):
|
||||
self.or_addresses = []
|
||||
self.digest = None
|
||||
|
||||
self.bandwidth = None
|
||||
self.measured = None
|
||||
self.is_unmeasured = False
|
||||
self.unrecognized_bandwidth_entries = []
|
||||
|
||||
self.exit_policy = None
|
||||
self.microdescriptor_hashes = []
|
||||
|
||||
super(RouterStatusEntryV3, self).__init__(content, validate, document)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 'r':
|
||||
_parse_r_line(self, value, validate, True)
|
||||
del entries['r']
|
||||
elif keyword == 'a':
|
||||
for entry, _ in values:
|
||||
_parse_a_line(self, entry, validate)
|
||||
|
||||
del entries['a']
|
||||
elif keyword == 'w':
|
||||
_parse_w_line(self, value, validate)
|
||||
del entries['w']
|
||||
elif keyword == 'p':
|
||||
_parse_p_line(self, value, validate)
|
||||
del entries['p']
|
||||
elif keyword == 'm':
|
||||
for entry, _ in values:
|
||||
_parse_m_line(self, entry, validate)
|
||||
|
||||
del entries['m']
|
||||
|
||||
RouterStatusEntry._parse(self, entries, validate)
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
if is_plural:
|
||||
return "Router status entries (v3)"
|
||||
else:
|
||||
return "Router status entry (v3)"
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r', 's')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v', 'w', 'p')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryV3):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryMicroV3(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a microdescriptor
|
||||
flavored network status document.
|
||||
|
||||
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
|
||||
:var int measured: bandwidth measured to be available by the relay
|
||||
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
|
||||
measurements
|
||||
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
|
||||
information that isn't yet recognized
|
||||
|
||||
:var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, content, validate = True, document = None):
|
||||
self.bandwidth = None
|
||||
self.measured = None
|
||||
self.is_unmeasured = False
|
||||
self.unrecognized_bandwidth_entries = []
|
||||
|
||||
self.digest = None
|
||||
|
||||
super(RouterStatusEntryMicroV3, self).__init__(content, validate, document)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
for keyword, values in entries.items():
|
||||
value, _ = values[0]
|
||||
|
||||
if keyword == 'r':
|
||||
_parse_r_line(self, value, validate, False)
|
||||
del entries['r']
|
||||
elif keyword == 'w':
|
||||
_parse_w_line(self, value, validate)
|
||||
del entries['w']
|
||||
elif keyword == 'm':
|
||||
# "m" digest
|
||||
# example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
|
||||
|
||||
self.digest = _base64_to_hex(value, validate, False)
|
||||
del entries['m']
|
||||
|
||||
RouterStatusEntry._parse(self, entries, validate)
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
if is_plural:
|
||||
return "Router status entries (micro v3)"
|
||||
else:
|
||||
return "Router status entry (micro v3)"
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r', 's', 'm')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v', 'w', 'm')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryMicroV3):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
def _parse_r_line(desc, value, validate, include_digest = True):
|
||||
# Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
|
||||
# all current entry types (v2, v3, and microdescriptor v3) with one little
|
||||
# wrinkle: only the microdescriptor flavor excludes a 'digest' field.
|
||||
#
|
||||
# For v2 and v3 router status entries:
|
||||
# "r" nickname identity digest publication IP ORPort DirPort
|
||||
# example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
|
||||
#
|
||||
# For v3 microdescriptor router status entries:
|
||||
# "r" nickname identity publication IP ORPort DirPort
|
||||
# example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
|
||||
|
||||
r_comp = value.split(" ")
|
||||
|
||||
# inject a None for the digest to normalize the field positioning
|
||||
if not include_digest:
|
||||
r_comp.insert(2, None)
|
||||
|
||||
if len(r_comp) < 8:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
expected_field_count = 'eight' if include_digest else 'seven'
|
||||
raise ValueError("%s 'r' line must have %s values: r %s" % (desc._name(), expected_field_count, value))
|
||||
|
||||
if validate:
|
||||
if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
|
||||
raise ValueError("%s nickname isn't valid: %s" % (desc._name(), r_comp[0]))
|
||||
elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
|
||||
raise ValueError("%s address isn't a valid IPv4 address: %s" % (desc._name(), r_comp[5]))
|
||||
elif not stem.util.connection.is_valid_port(r_comp[6]):
|
||||
raise ValueError("%s ORPort is invalid: %s" % (desc._name(), r_comp[6]))
|
||||
elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
|
||||
raise ValueError("%s DirPort is invalid: %s" % (desc._name(), r_comp[7]))
|
||||
elif not (r_comp[6].isdigit() and r_comp[7].isdigit()):
|
||||
return
|
||||
|
||||
desc.nickname = r_comp[0]
|
||||
desc.fingerprint = _base64_to_hex(r_comp[1], validate)
|
||||
|
||||
if include_digest:
|
||||
desc.digest = _base64_to_hex(r_comp[2], validate)
|
||||
|
||||
desc.address = r_comp[5]
|
||||
desc.or_port = int(r_comp[6])
|
||||
desc.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
|
||||
|
||||
try:
|
||||
published = "%s %s" % (r_comp[3], r_comp[4])
|
||||
desc.published = datetime.datetime.strptime(published, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Publication time time wasn't parsable: r %s" % value)
|
||||
|
||||
|
||||
def _parse_a_line(desc, value, validate):
|
||||
# "a" SP address ":" portlist
|
||||
# example: a [2001:888:2133:0:82:94:251:204]:9001
|
||||
|
||||
if not ':' in value:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (desc._name(), value))
|
||||
|
||||
address, port = value.rsplit(':', 1)
|
||||
is_ipv6 = address.startswith("[") and address.endswith("]")
|
||||
|
||||
if is_ipv6:
|
||||
address = address[1:-1] # remove brackets
|
||||
|
||||
if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
|
||||
(is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
|
||||
if not validate:
|
||||
return
|
||||
else:
|
||||
raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (desc._name(), value))
|
||||
|
||||
if stem.util.connection.is_valid_port(port):
|
||||
desc.or_addresses.append((address, int(port), is_ipv6))
|
||||
elif validate:
|
||||
raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (desc._name(), port, value))
|
||||
|
||||
|
||||
def _parse_s_line(desc, value, validate):
|
||||
# "s" Flags
|
||||
# example: s Named Running Stable Valid
|
||||
|
||||
flags = [] if value == "" else value.split(" ")
|
||||
desc.flags = flags
|
||||
|
||||
if validate:
|
||||
for flag in flags:
|
||||
if flags.count(flag) > 1:
|
||||
raise ValueError("%s had duplicate flags: s %s" % (desc._name(), value))
|
||||
elif flag == "":
|
||||
raise ValueError("%s had extra whitespace on its 's' line: s %s" % (desc._name(), value))
|
||||
|
||||
|
||||
def _parse_v_line(desc, value, validate):
|
||||
# "v" version
|
||||
# example: v Tor 0.2.2.35
|
||||
#
|
||||
# The spec says that if this starts with "Tor " then what follows is a
|
||||
# tor version. If not then it has "upgraded to a more sophisticated
|
||||
# protocol versioning system".
|
||||
|
||||
desc.version_line = value
|
||||
|
||||
if value.startswith("Tor "):
|
||||
try:
|
||||
desc.version = stem.version._get_version(value[4:])
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise ValueError("%s has a malformed tor version (%s): v %s" % (desc._name(), exc, value))
|
||||
|
||||
|
||||
def _parse_w_line(desc, value, validate):
|
||||
# "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
|
||||
# example: w Bandwidth=7980
|
||||
|
||||
w_comp = value.split(" ")
|
||||
|
||||
if len(w_comp) < 1:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'w' line is blank: w %s" % (desc._name(), value))
|
||||
elif not w_comp[0].startswith("Bandwidth="):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (desc._name(), value))
|
||||
|
||||
for w_entry in w_comp:
|
||||
if '=' in w_entry:
|
||||
w_key, w_value = w_entry.split('=', 1)
|
||||
else:
|
||||
w_key, w_value = w_entry, None
|
||||
|
||||
if w_key == "Bandwidth":
|
||||
if not (w_value and w_value.isdigit()):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (desc._name(), value))
|
||||
|
||||
desc.bandwidth = int(w_value)
|
||||
elif w_key == "Measured":
|
||||
if not (w_value and w_value.isdigit()):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (desc._name(), value))
|
||||
|
||||
desc.measured = int(w_value)
|
||||
elif w_key == "Unmeasured":
|
||||
if validate and w_value != "1":
|
||||
raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (desc._name(), value))
|
||||
|
||||
desc.is_unmeasured = True
|
||||
else:
|
||||
desc.unrecognized_bandwidth_entries.append(w_entry)
|
||||
|
||||
|
||||
def _parse_p_line(desc, value, validate):
|
||||
# "p" ("accept" / "reject") PortList
|
||||
# p reject 1-65535
|
||||
# example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
|
||||
|
||||
try:
|
||||
desc.exit_policy = stem.exit_policy.MicroExitPolicy(value)
|
||||
except ValueError as exc:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s exit policy is malformed (%s): p %s" % (desc._name(), exc, value))
|
||||
|
||||
|
||||
def _parse_m_line(desc, value, validate):
|
||||
# "m" methods 1*(algorithm "=" digest)
|
||||
# example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
|
||||
|
||||
m_comp = value.split(" ")
|
||||
|
||||
if not (desc.document and desc.document.is_vote):
|
||||
if not validate:
|
||||
return
|
||||
|
||||
vote_status = "vote" if desc.document else "<undefined document>"
|
||||
raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (desc._name(), vote_status, value))
|
||||
elif len(m_comp) < 1:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (desc._name(), value))
|
||||
|
||||
try:
|
||||
methods = [int(entry) for entry in m_comp[0].split(",")]
|
||||
except ValueError:
|
||||
if not validate:
|
||||
return
|
||||
|
||||
raise ValueError("%s microdescriptor methods should be a series of comma separated integers: m %s" % (desc._name(), value))
|
||||
|
||||
hashes = {}
|
||||
|
||||
for entry in m_comp[1:]:
|
||||
if not '=' in entry:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (desc._name(), value))
|
||||
|
||||
hash_name, digest = entry.split('=', 1)
|
||||
hashes[hash_name] = digest
|
||||
|
||||
desc.microdescriptor_hashes.append((methods, hashes))
|
||||
|
||||
|
||||
def _base64_to_hex(identity, validate, check_if_fingerprint = True):
|
||||
"""
|
||||
Decodes a base64 value to hex. For example...
|
||||
|
||||
::
|
||||
|
||||
>>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
|
||||
'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
|
||||
|
||||
:param str identity: encoded fingerprint from the consensus
|
||||
:param bool validate: checks validity if **True**
|
||||
:param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
|
||||
|
||||
:returns: **str** with the uppercase hex encoding of the relay's fingerprint
|
||||
|
||||
:raises: **ValueError** if the result isn't a valid fingerprint
|
||||
"""
|
||||
|
||||
# trailing equal signs were stripped from the identity
|
||||
missing_padding = len(identity) % 4
|
||||
identity += "=" * missing_padding
|
||||
|
||||
fingerprint = ""
|
||||
|
||||
try:
|
||||
identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
|
||||
except (TypeError, binascii.Error):
|
||||
if not validate:
|
||||
return None
|
||||
|
||||
raise ValueError("Unable to decode identity string '%s'" % identity)
|
||||
|
||||
for char in identity_decoded:
|
||||
# Individual characters are either standard ASCII or hex encoded, and each
|
||||
# represent two hex digits. For instance...
|
||||
#
|
||||
# >>> ord('\n')
|
||||
# 10
|
||||
# >>> hex(10)
|
||||
# '0xa'
|
||||
# >>> '0xa'[2:].zfill(2).upper()
|
||||
# '0A'
|
||||
|
||||
char_int = char if isinstance(char, int) else ord(char)
|
||||
fingerprint += hex(char_int)[2:].zfill(2).upper()
|
||||
|
||||
if check_if_fingerprint:
|
||||
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
|
||||
if not validate:
|
||||
return None
|
||||
|
||||
raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
|
||||
|
||||
return fingerprint
|
968
lib/stem/descriptor/server_descriptor.py
Normal file
968
lib/stem/descriptor/server_descriptor.py
Normal file
@ -0,0 +1,968 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor server descriptors, which contains the infrequently changing
|
||||
information about a Tor relay (contact information, exit policy, public keys,
|
||||
etc). This information is provided from a few sources...
|
||||
|
||||
* control port via 'GETINFO desc/\*' queries
|
||||
* the 'cached-descriptors' file in tor's data directory
|
||||
* tor metrics, at https://metrics.torproject.org/data.html
|
||||
* directory authorities and mirrors via their DirPort
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ServerDescriptor - Tor server descriptor.
|
||||
|- RelayDescriptor - Server descriptor for a relay.
|
||||
|
|
||||
|- BridgeDescriptor - Scrubbed server descriptor for a bridge.
|
||||
| |- is_scrubbed - checks if our content has been properly scrubbed
|
||||
| +- get_scrubbing_issues - description of issues with our scrubbing
|
||||
|
|
||||
|- digest - calculates the upper-case hex digest value for our content
|
||||
|- get_unrecognized_lines - lines with unrecognized content
|
||||
|- get_annotations - dictionary of content prior to the descriptor entry
|
||||
+- get_annotation_lines - lines that provided the annotations
|
||||
"""
|
||||
|
||||
import base64
|
||||
import codecs
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import stem.descriptor.extrainfo_descriptor
|
||||
import stem.exit_policy
|
||||
import stem.prereq
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
import stem.version
|
||||
|
||||
from stem.util import log
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_get_bytes_field,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# relay descriptors must have exactly one of the following
|
||||
REQUIRED_FIELDS = (
|
||||
"router",
|
||||
"bandwidth",
|
||||
"published",
|
||||
"onion-key",
|
||||
"signing-key",
|
||||
"router-signature",
|
||||
)
|
||||
|
||||
# optional entries that can appear at most once
|
||||
SINGLE_FIELDS = (
|
||||
"platform",
|
||||
"fingerprint",
|
||||
"hibernating",
|
||||
"uptime",
|
||||
"contact",
|
||||
"read-history",
|
||||
"write-history",
|
||||
"eventdns",
|
||||
"family",
|
||||
"caches-extra-info",
|
||||
"extra-info-digest",
|
||||
"hidden-service-dir",
|
||||
"protocols",
|
||||
"allow-single-hop-exits",
|
||||
"ntor-onion-key",
|
||||
)
|
||||
|
||||
DEFAULT_IPV6_EXIT_POLICY = stem.exit_policy.MicroExitPolicy("reject 1-65535")
|
||||
REJECT_ALL_POLICY = stem.exit_policy.ExitPolicy("reject *:*")
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over the server descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool is_bridge: parses the file as being a bridge descriptor
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for ServerDescriptor instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
# Handler for relay descriptors
|
||||
#
|
||||
# Cached descriptors consist of annotations followed by the descriptor
|
||||
# itself. For instance...
|
||||
#
|
||||
# @downloaded-at 2012-03-14 16:31:05
|
||||
# @source "145.53.65.130"
|
||||
# router caerSidi 71.35.143.157 9001 0 0
|
||||
# platform Tor 0.2.1.30 on Linux x86_64
|
||||
# <rest of the descriptor content>
|
||||
# router-signature
|
||||
# -----BEGIN SIGNATURE-----
|
||||
# <signature for the above descriptor>
|
||||
# -----END SIGNATURE-----
|
||||
#
|
||||
# Metrics descriptor files are the same, but lack any annotations. The
|
||||
# following simply does the following...
|
||||
#
|
||||
# - parse as annotations until we get to "router"
|
||||
# - parse as descriptor content until we get to "router-signature" followed
|
||||
# by the end of the signature block
|
||||
# - construct a descriptor and provide it back to the caller
|
||||
#
|
||||
# Any annotations after the last server descriptor is ignored (never provided
|
||||
# to the caller).
|
||||
|
||||
while True:
|
||||
annotations = _read_until_keywords("router", descriptor_file)
|
||||
descriptor_content = _read_until_keywords("router-signature", descriptor_file)
|
||||
|
||||
# we've reached the 'router-signature', now include the pgp style block
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
|
||||
if descriptor_content:
|
||||
# strip newlines from annotations
|
||||
annotations = map(bytes.strip, annotations)
|
||||
|
||||
descriptor_text = bytes.join(b"", descriptor_content)
|
||||
|
||||
if is_bridge:
|
||||
yield BridgeDescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
yield RelayDescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
if validate and annotations:
|
||||
orphaned_annotations = stem.util.str_tools._to_unicode(b'\n'.join(annotations))
|
||||
raise ValueError('Content conform to being a server descriptor:\n%s' % orphaned_annotations)
|
||||
|
||||
break # done parsing descriptors
|
||||
|
||||
|
||||
class ServerDescriptor(Descriptor):
|
||||
"""
|
||||
Common parent for server descriptors.
|
||||
|
||||
:var str nickname: **\*** relay's nickname
|
||||
:var str fingerprint: identity key fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
|
||||
:var str address: **\*** IPv4 address of the relay
|
||||
:var int or_port: **\*** port used for relaying
|
||||
:var int socks_port: **\*** port used as client (deprecated, always **None**)
|
||||
:var int dir_port: **\*** port used for descriptor mirroring
|
||||
|
||||
:var bytes platform: line with operating system and tor version
|
||||
:var stem.version.Version tor_version: version of tor
|
||||
:var str operating_system: operating system
|
||||
:var int uptime: uptime when published in seconds
|
||||
:var bytes contact: contact information
|
||||
:var stem.exit_policy.ExitPolicy exit_policy: **\*** stated exit policy
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
|
||||
:var set family: **\*** nicknames or fingerprints of declared family
|
||||
|
||||
:var int average_bandwidth: **\*** average rate it's willing to relay in bytes/s
|
||||
:var int burst_bandwidth: **\*** burst rate it's willing to relay in bytes/s
|
||||
:var int observed_bandwidth: **\*** estimated capacity based on usage in bytes/s
|
||||
|
||||
:var list link_protocols: link protocols supported by the relay
|
||||
:var list circuit_protocols: circuit protocols supported by the relay
|
||||
:var bool hibernating: **\*** hibernating when published
|
||||
:var bool allow_single_hop_exits: **\*** flag if single hop exiting is allowed
|
||||
:var bool extra_info_cache: **\*** flag if a mirror for extra-info documents
|
||||
:var str extra_info_digest: upper-case hex encoded digest of our extra-info document
|
||||
:var bool eventdns: flag for evdns backend (deprecated, always unset)
|
||||
:var list or_addresses: **\*** alternative for our address/or_port
|
||||
attributes, each entry is a tuple of the form (address (**str**), port
|
||||
(**int**), is_ipv6 (**bool**))
|
||||
|
||||
Deprecated, moved to extra-info descriptor...
|
||||
|
||||
:var datetime read_history_end: end of the sampling interval
|
||||
:var int read_history_interval: seconds per interval
|
||||
:var list read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime write_history_end: end of the sampling interval
|
||||
:var int write_history_interval: seconds per interval
|
||||
:var list write_history_values: bytes written during each interval
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
"""
|
||||
Server descriptor constructor, created from an individual relay's
|
||||
descriptor content (as provided by "GETINFO desc/*", cached descriptors,
|
||||
and metrics).
|
||||
|
||||
By default this validates the descriptor's content as it's parsed. This
|
||||
validation can be disables to either improve performance or be accepting of
|
||||
malformed data.
|
||||
|
||||
:param str raw_contents: descriptor content provided by the relay
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param list annotations: lines that appeared prior to the descriptor
|
||||
|
||||
:raises: **ValueError** if the contents is malformed and validate is True
|
||||
"""
|
||||
|
||||
super(ServerDescriptor, self).__init__(raw_contents)
|
||||
|
||||
# Only a few things can be arbitrary bytes according to the dir-spec, so
|
||||
# parsing them separately.
|
||||
|
||||
self.platform = _get_bytes_field("platform", raw_contents)
|
||||
self.contact = _get_bytes_field("contact", raw_contents)
|
||||
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
self.nickname = None
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
|
||||
self.address = None
|
||||
self.or_port = None
|
||||
self.socks_port = None
|
||||
self.dir_port = None
|
||||
|
||||
self.tor_version = None
|
||||
self.operating_system = None
|
||||
self.uptime = None
|
||||
self.exit_policy = None
|
||||
self.exit_policy_v6 = DEFAULT_IPV6_EXIT_POLICY
|
||||
self.family = set()
|
||||
|
||||
self.average_bandwidth = None
|
||||
self.burst_bandwidth = None
|
||||
self.observed_bandwidth = None
|
||||
|
||||
self.link_protocols = None
|
||||
self.circuit_protocols = None
|
||||
self.hibernating = False
|
||||
self.allow_single_hop_exits = False
|
||||
self.extra_info_cache = False
|
||||
self.extra_info_digest = None
|
||||
self.hidden_service_dir = None
|
||||
self.eventdns = None
|
||||
self.or_addresses = []
|
||||
|
||||
self.read_history_end = None
|
||||
self.read_history_interval = None
|
||||
self.read_history_values = None
|
||||
|
||||
self.write_history_end = None
|
||||
self.write_history_interval = None
|
||||
self.write_history_values = None
|
||||
|
||||
self._unrecognized_lines = []
|
||||
|
||||
self._annotation_lines = annotations if annotations else []
|
||||
|
||||
# A descriptor contains a series of 'keyword lines' which are simply a
|
||||
# keyword followed by an optional value. Lines can also be followed by a
|
||||
# signature block.
|
||||
#
|
||||
# We care about the ordering of 'accept' and 'reject' entries because this
|
||||
# influences the resulting exit policy, but for everything else the order
|
||||
# does not matter so breaking it into key / value pairs.
|
||||
|
||||
entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject"))
|
||||
|
||||
if policy == [u'reject *:*']:
|
||||
self.exit_policy = REJECT_ALL_POLICY
|
||||
else:
|
||||
self.exit_policy = stem.exit_policy.ExitPolicy(*policy)
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
if validate:
|
||||
self._check_constraints(entries)
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the hex encoded sha1 of our content. This value is part of the
|
||||
network status entry for this relay.
|
||||
|
||||
:returns: **unicode** with the upper-case hex digest value for this server descriptor
|
||||
"""
|
||||
|
||||
raise NotImplementedError("Unsupported Operation: this should be implemented by the ServerDescriptor subclass")
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
@lru_cache()
|
||||
def get_annotations(self):
|
||||
"""
|
||||
Provides content that appeared prior to the descriptor. If this comes from
|
||||
the cached-descriptors file then this commonly contains content like...
|
||||
|
||||
::
|
||||
|
||||
@downloaded-at 2012-03-18 21:18:29
|
||||
@source "173.254.216.66"
|
||||
|
||||
:returns: **dict** with the key/value pairs in our annotations
|
||||
"""
|
||||
|
||||
annotation_dict = {}
|
||||
|
||||
for line in self._annotation_lines:
|
||||
if b" " in line:
|
||||
key, value = line.split(b" ", 1)
|
||||
annotation_dict[key] = value
|
||||
else:
|
||||
annotation_dict[line] = None
|
||||
|
||||
return annotation_dict
|
||||
|
||||
def get_annotation_lines(self):
|
||||
"""
|
||||
Provides the lines of content that appeared prior to the descriptor. This
|
||||
is the same as the
|
||||
:func:`~stem.descriptor.server_descriptor.ServerDescriptor.get_annotations`
|
||||
results, but with the unparsed lines and ordering retained.
|
||||
|
||||
:returns: **list** with the lines of annotation that came before this descriptor
|
||||
"""
|
||||
|
||||
return self._annotation_lines
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if **True**
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
for keyword, values in entries.items():
|
||||
# most just work with the first (and only) value
|
||||
value, block_contents = values[0]
|
||||
|
||||
line = "%s %s" % (keyword, value) # original line
|
||||
|
||||
if block_contents:
|
||||
line += "\n%s" % block_contents
|
||||
|
||||
if keyword == "router":
|
||||
# "router" nickname address ORPort SocksPort DirPort
|
||||
router_comp = value.split()
|
||||
|
||||
if len(router_comp) < 5:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Router line must have five values: %s" % line)
|
||||
|
||||
if validate:
|
||||
if not stem.util.tor_tools.is_valid_nickname(router_comp[0]):
|
||||
raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0])
|
||||
elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]):
|
||||
raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True):
|
||||
raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True):
|
||||
raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True):
|
||||
raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4])
|
||||
elif not (router_comp[2].isdigit() and router_comp[3].isdigit() and router_comp[4].isdigit()):
|
||||
continue
|
||||
|
||||
self.nickname = router_comp[0]
|
||||
self.address = router_comp[1]
|
||||
self.or_port = int(router_comp[2])
|
||||
self.socks_port = None if router_comp[3] == '0' else int(router_comp[3])
|
||||
self.dir_port = None if router_comp[4] == '0' else int(router_comp[4])
|
||||
elif keyword == "bandwidth":
|
||||
# "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed
|
||||
bandwidth_comp = value.split()
|
||||
|
||||
if len(bandwidth_comp) < 3:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line must have three values: %s" % line)
|
||||
elif not bandwidth_comp[0].isdigit():
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0])
|
||||
elif not bandwidth_comp[1].isdigit():
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1])
|
||||
elif not bandwidth_comp[2].isdigit():
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
|
||||
|
||||
self.average_bandwidth = int(bandwidth_comp[0])
|
||||
self.burst_bandwidth = int(bandwidth_comp[1])
|
||||
self.observed_bandwidth = int(bandwidth_comp[2])
|
||||
elif keyword == "platform":
|
||||
# "platform" string
|
||||
|
||||
# The platform attribute was set earlier. This line can contain any
|
||||
# arbitrary data, but tor seems to report its version followed by the
|
||||
# os like the following...
|
||||
#
|
||||
# platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64
|
||||
#
|
||||
# There's no guarantee that we'll be able to pick these out the
|
||||
# version, but might as well try to save our caller the effort.
|
||||
|
||||
platform_match = re.match("^Tor (\S*).* on (.*)$", value)
|
||||
|
||||
if platform_match:
|
||||
version_str, self.operating_system = platform_match.groups()
|
||||
|
||||
try:
|
||||
self.tor_version = stem.version._get_version(version_str)
|
||||
except ValueError:
|
||||
pass
|
||||
elif keyword == "published":
|
||||
# "published" YYYY-MM-DD HH:MM:SS
|
||||
|
||||
try:
|
||||
self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Published line's time wasn't parsable: %s" % line)
|
||||
elif keyword == "fingerprint":
|
||||
# This is forty hex digits split into space separated groups of four.
|
||||
# Checking that we match this pattern.
|
||||
|
||||
fingerprint = value.replace(" ", "")
|
||||
|
||||
if validate:
|
||||
for grouping in value.split(" "):
|
||||
if len(grouping) != 4:
|
||||
raise ValueError("Fingerprint line should have groupings of four hex digits: %s" % value)
|
||||
|
||||
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
|
||||
raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value)
|
||||
|
||||
self.fingerprint = fingerprint
|
||||
elif keyword == "hibernating":
|
||||
# "hibernating" 0|1 (in practice only set if one)
|
||||
|
||||
if validate and not value in ("0", "1"):
|
||||
raise ValueError("Hibernating line had an invalid value, must be zero or one: %s" % value)
|
||||
|
||||
self.hibernating = value == "1"
|
||||
elif keyword == "allow-single-hop-exits":
|
||||
self.allow_single_hop_exits = True
|
||||
elif keyword == "caches-extra-info":
|
||||
self.extra_info_cache = True
|
||||
elif keyword == "extra-info-digest":
|
||||
# this is forty hex digits which just so happens to be the same a
|
||||
# fingerprint
|
||||
|
||||
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
|
||||
raise ValueError("Extra-info digests should consist of forty hex digits: %s" % value)
|
||||
|
||||
self.extra_info_digest = value
|
||||
elif keyword == "hidden-service-dir":
|
||||
if value:
|
||||
self.hidden_service_dir = value.split(" ")
|
||||
else:
|
||||
self.hidden_service_dir = ["2"]
|
||||
elif keyword == "uptime":
|
||||
# We need to be tolerant of negative uptimes to accommodate a past tor
|
||||
# bug...
|
||||
#
|
||||
# Changes in version 0.1.2.7-alpha - 2007-02-06
|
||||
# - If our system clock jumps back in time, don't publish a negative
|
||||
# uptime in the descriptor. Also, don't let the global rate limiting
|
||||
# buckets go absurdly negative.
|
||||
#
|
||||
# After parsing all of the attributes we'll double check that negative
|
||||
# uptimes only occurred prior to this fix.
|
||||
|
||||
try:
|
||||
self.uptime = int(value)
|
||||
except ValueError:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError("Uptime line must have an integer value: %s" % value)
|
||||
elif keyword == "contact":
|
||||
pass # parsed as a bytes field earlier
|
||||
elif keyword == "protocols":
|
||||
protocols_match = re.match("^Link (.*) Circuit (.*)$", value)
|
||||
|
||||
if protocols_match:
|
||||
link_versions, circuit_versions = protocols_match.groups()
|
||||
self.link_protocols = link_versions.split(" ")
|
||||
self.circuit_protocols = circuit_versions.split(" ")
|
||||
elif validate:
|
||||
raise ValueError("Protocols line did not match the expected pattern: %s" % line)
|
||||
elif keyword == "family":
|
||||
self.family = set(value.split(" "))
|
||||
elif keyword == "eventdns":
|
||||
self.eventdns = value == "1"
|
||||
elif keyword == "ipv6-policy":
|
||||
self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
|
||||
elif keyword == "or-address":
|
||||
or_address_entries = [value for (value, _) in values]
|
||||
|
||||
for entry in or_address_entries:
|
||||
line = "%s %s" % (keyword, entry)
|
||||
|
||||
if not ":" in entry:
|
||||
if not validate:
|
||||
continue
|
||||
else:
|
||||
raise ValueError("or-address line missing a colon: %s" % line)
|
||||
|
||||
address, port = entry.rsplit(':', 1)
|
||||
is_ipv6 = address.startswith("[") and address.endswith("]")
|
||||
|
||||
if is_ipv6:
|
||||
address = address[1:-1] # remove brackets
|
||||
|
||||
if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
|
||||
(is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
|
||||
if not validate:
|
||||
continue
|
||||
else:
|
||||
raise ValueError("or-address line has a malformed address: %s" % line)
|
||||
|
||||
if stem.util.connection.is_valid_port(port):
|
||||
self.or_addresses.append((address, int(port), is_ipv6))
|
||||
elif validate:
|
||||
raise ValueError("or-address line has a malformed port: %s" % line)
|
||||
elif keyword in ("read-history", "write-history"):
|
||||
try:
|
||||
timestamp, interval, remainder = \
|
||||
stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value)
|
||||
|
||||
try:
|
||||
if remainder:
|
||||
history_values = [int(entry) for entry in remainder.split(",")]
|
||||
else:
|
||||
history_values = []
|
||||
except ValueError:
|
||||
raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
|
||||
|
||||
if keyword == "read-history":
|
||||
self.read_history_end = timestamp
|
||||
self.read_history_interval = interval
|
||||
self.read_history_values = history_values
|
||||
else:
|
||||
self.write_history_end = timestamp
|
||||
self.write_history_interval = interval
|
||||
self.write_history_values = history_values
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
else:
|
||||
self._unrecognized_lines.append(line)
|
||||
|
||||
# if we have a negative uptime and a tor version that shouldn't exhibit
|
||||
# this bug then fail validation
|
||||
|
||||
if validate and self.uptime and self.tor_version:
|
||||
if self.uptime < 0 and self.tor_version >= stem.version.Version("0.1.2.7"):
|
||||
raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime))
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in self._required_fields():
|
||||
if not keyword in entries:
|
||||
raise ValueError("Descriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in self._single_fields():
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a descriptor" % keyword)
|
||||
|
||||
expected_first_keyword = self._first_keyword()
|
||||
if expected_first_keyword and expected_first_keyword != entries.keys()[0]:
|
||||
raise ValueError("Descriptor must start with a '%s' entry" % expected_first_keyword)
|
||||
|
||||
expected_last_keyword = self._last_keyword()
|
||||
if expected_last_keyword and expected_last_keyword != entries.keys()[-1]:
|
||||
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
|
||||
|
||||
if not self.exit_policy:
|
||||
raise ValueError("Descriptor must have at least one 'accept' or 'reject' entry")
|
||||
|
||||
# Constraints that the descriptor must meet to be valid. These can be None if
|
||||
# not applicable.
|
||||
|
||||
def _required_fields(self):
|
||||
return REQUIRED_FIELDS
|
||||
|
||||
def _single_fields(self):
|
||||
return REQUIRED_FIELDS + SINGLE_FIELDS
|
||||
|
||||
def _first_keyword(self):
|
||||
return "router"
|
||||
|
||||
def _last_keyword(self):
|
||||
return "router-signature"
|
||||
|
||||
|
||||
class RelayDescriptor(ServerDescriptor):
|
||||
"""
|
||||
Server descriptor (`descriptor specification
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
|
||||
|
||||
:var str onion_key: **\*** key used to encrypt EXTEND cells
|
||||
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
|
||||
:var str signing_key: **\*** relay's long-term identity key
|
||||
:var str signature: **\*** signature for this descriptor
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
self.onion_key = None
|
||||
self.ntor_onion_key = None
|
||||
self.signing_key = None
|
||||
self.signature = None
|
||||
|
||||
super(RelayDescriptor, self).__init__(raw_contents, validate, annotations)
|
||||
|
||||
# validate the descriptor if required
|
||||
if validate:
|
||||
self._validate_content()
|
||||
|
||||
@lru_cache()
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the digest of our descriptor's content.
|
||||
|
||||
:returns: the digest string encoded in uppercase hex
|
||||
|
||||
:raises: ValueError if the digest canot be calculated
|
||||
"""
|
||||
|
||||
# Digest is calculated from everything in the
|
||||
# descriptor except the router-signature.
|
||||
|
||||
raw_descriptor = self.get_bytes()
|
||||
start_token = b"router "
|
||||
sig_token = b"\nrouter-signature\n"
|
||||
start = raw_descriptor.find(start_token)
|
||||
sig_start = raw_descriptor.find(sig_token)
|
||||
end = sig_start + len(sig_token)
|
||||
|
||||
if start >= 0 and sig_start > 0 and end > start:
|
||||
for_digest = raw_descriptor[start:end]
|
||||
digest_hash = hashlib.sha1(stem.util.str_tools._to_bytes(for_digest))
|
||||
return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper())
|
||||
else:
|
||||
raise ValueError("unable to calculate digest for descriptor")
|
||||
|
||||
def _validate_content(self):
|
||||
"""
|
||||
Validates that the descriptor content matches the signature.
|
||||
|
||||
:raises: ValueError if the signature does not match the content
|
||||
"""
|
||||
|
||||
key_as_bytes = RelayDescriptor._get_key_bytes(self.signing_key)
|
||||
|
||||
# ensure the fingerprint is a hash of the signing key
|
||||
|
||||
if self.fingerprint:
|
||||
# calculate the signing key hash
|
||||
|
||||
key_der_as_hash = hashlib.sha1(stem.util.str_tools._to_bytes(key_as_bytes)).hexdigest()
|
||||
|
||||
if key_der_as_hash != self.fingerprint.lower():
|
||||
log.warn("Signing key hash: %s != fingerprint: %s" % (key_der_as_hash, self.fingerprint.lower()))
|
||||
raise ValueError("Fingerprint does not match hash")
|
||||
|
||||
self._verify_digest(key_as_bytes)
|
||||
|
||||
def _verify_digest(self, key_as_der):
|
||||
# check that our digest matches what was signed
|
||||
|
||||
if not stem.prereq.is_crypto_available():
|
||||
return
|
||||
|
||||
from Crypto.Util import asn1
|
||||
from Crypto.Util.number import bytes_to_long, long_to_bytes
|
||||
|
||||
# get the ASN.1 sequence
|
||||
|
||||
seq = asn1.DerSequence()
|
||||
seq.decode(key_as_der)
|
||||
modulus = seq[0]
|
||||
public_exponent = seq[1] # should always be 65537
|
||||
|
||||
sig_as_bytes = RelayDescriptor._get_key_bytes(self.signature)
|
||||
|
||||
# convert the descriptor signature to an int
|
||||
|
||||
sig_as_long = bytes_to_long(sig_as_bytes)
|
||||
|
||||
# use the public exponent[e] & the modulus[n] to decrypt the int
|
||||
|
||||
decrypted_int = pow(sig_as_long, public_exponent, modulus)
|
||||
|
||||
# block size will always be 128 for a 1024 bit key
|
||||
|
||||
blocksize = 128
|
||||
|
||||
# convert the int to a byte array.
|
||||
|
||||
decrypted_bytes = long_to_bytes(decrypted_int, blocksize)
|
||||
|
||||
############################################################################
|
||||
## The decrypted bytes should have a structure exactly along these lines.
|
||||
## 1 byte - [null '\x00']
|
||||
## 1 byte - [block type identifier '\x01'] - Should always be 1
|
||||
## N bytes - [padding '\xFF' ]
|
||||
## 1 byte - [separator '\x00' ]
|
||||
## M bytes - [message]
|
||||
## Total - 128 bytes
|
||||
## More info here http://www.ietf.org/rfc/rfc2313.txt
|
||||
## esp the Notes in section 8.1
|
||||
############################################################################
|
||||
|
||||
try:
|
||||
if decrypted_bytes.index(b'\x00\x01') != 0:
|
||||
raise ValueError("Verification failed, identifier missing")
|
||||
except ValueError:
|
||||
raise ValueError("Verification failed, malformed data")
|
||||
|
||||
try:
|
||||
identifier_offset = 2
|
||||
|
||||
# find the separator
|
||||
seperator_index = decrypted_bytes.index(b'\x00', identifier_offset)
|
||||
except ValueError:
|
||||
raise ValueError("Verification failed, seperator not found")
|
||||
|
||||
digest_hex = codecs.encode(decrypted_bytes[seperator_index + 1:], 'hex_codec')
|
||||
digest = stem.util.str_tools._to_unicode(digest_hex.upper())
|
||||
|
||||
local_digest = self.digest()
|
||||
|
||||
if digest != local_digest:
|
||||
raise ValueError("Decrypted digest does not match local digest (calculated: %s, local: %s)" % (digest, local_digest))
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries) # shallow copy since we're destructive
|
||||
|
||||
# handles fields only in server descriptors
|
||||
|
||||
for keyword, values in entries.items():
|
||||
value, block_contents = values[0]
|
||||
line = "%s %s" % (keyword, value)
|
||||
|
||||
if keyword == "onion-key":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Onion key line must be followed by a public key: %s" % line)
|
||||
|
||||
self.onion_key = block_contents
|
||||
del entries["onion-key"]
|
||||
elif keyword == "ntor-onion-key":
|
||||
self.ntor_onion_key = value
|
||||
del entries["ntor-onion-key"]
|
||||
elif keyword == "signing-key":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Signing key line must be followed by a public key: %s" % line)
|
||||
|
||||
self.signing_key = block_contents
|
||||
del entries["signing-key"]
|
||||
elif keyword == "router-signature":
|
||||
if validate and not block_contents:
|
||||
raise ValueError("Router signature line must be followed by a signature block: %s" % line)
|
||||
|
||||
self.signature = block_contents
|
||||
del entries["router-signature"]
|
||||
|
||||
ServerDescriptor._parse(self, entries, validate)
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RelayDescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
@staticmethod
|
||||
def _get_key_bytes(key_string):
|
||||
# Remove the newlines from the key string & strip off the
|
||||
# '-----BEGIN RSA PUBLIC KEY-----' header and
|
||||
# '-----END RSA PUBLIC KEY-----' footer
|
||||
|
||||
key_as_string = ''.join(key_string.split('\n')[1:4])
|
||||
|
||||
# get the key representation in bytes
|
||||
|
||||
key_bytes = base64.b64decode(stem.util.str_tools._to_bytes(key_as_string))
|
||||
|
||||
return key_bytes
|
||||
|
||||
|
||||
class BridgeDescriptor(ServerDescriptor):
|
||||
"""
|
||||
Bridge descriptor (`bridge descriptor specification
|
||||
<https://metrics.torproject.org/formats.html#bridgedesc>`_)
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate = True, annotations = None):
|
||||
self._digest = None
|
||||
|
||||
super(BridgeDescriptor, self).__init__(raw_contents, validate, annotations)
|
||||
|
||||
def digest(self):
|
||||
return self._digest
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
entries = dict(entries)
|
||||
|
||||
# handles fields only in bridge descriptors
|
||||
for keyword, values in entries.items():
|
||||
value, block_contents = values[0]
|
||||
line = "%s %s" % (keyword, value)
|
||||
|
||||
if keyword == "router-digest":
|
||||
if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError("Router digest line had an invalid sha1 digest: %s" % line)
|
||||
|
||||
self._digest = stem.util.str_tools._to_unicode(value)
|
||||
del entries["router-digest"]
|
||||
|
||||
ServerDescriptor._parse(self, entries, validate)
|
||||
|
||||
def is_scrubbed(self):
|
||||
"""
|
||||
Checks if we've been properly scrubbed in accordance with the `bridge
|
||||
descriptor specification
|
||||
<https://metrics.torproject.org/formats.html#bridgedesc>`_. Validation is a
|
||||
moving target so this may not
|
||||
be fully up to date.
|
||||
|
||||
:returns: **True** if we're scrubbed, **False** otherwise
|
||||
"""
|
||||
|
||||
return self.get_scrubbing_issues() == []
|
||||
|
||||
@lru_cache()
|
||||
def get_scrubbing_issues(self):
|
||||
"""
|
||||
Provides issues with our scrubbing.
|
||||
|
||||
:returns: **list** of strings which describe issues we have with our
|
||||
scrubbing, this list is empty if we're properly scrubbed
|
||||
"""
|
||||
|
||||
issues = []
|
||||
|
||||
if not self.address.startswith("10."):
|
||||
issues.append("Router line's address should be scrubbed to be '10.x.x.x': %s" % self.address)
|
||||
|
||||
if self.contact and self.contact != "somebody":
|
||||
issues.append("Contact line should be scrubbed to be 'somebody', but instead had '%s'" % self.contact)
|
||||
|
||||
for address, _, is_ipv6 in self.or_addresses:
|
||||
if not is_ipv6 and not address.startswith("10."):
|
||||
issues.append("or-address line's address should be scrubbed to be '10.x.x.x': %s" % address)
|
||||
elif is_ipv6 and not address.startswith("fd9f:2e19:3bcf::"):
|
||||
# TODO: this check isn't quite right because we aren't checking that
|
||||
# the next grouping of hex digits contains 1-2 digits
|
||||
issues.append("or-address line's address should be scrubbed to be 'fd9f:2e19:3bcf::xx:xxxx': %s" % address)
|
||||
|
||||
for line in self.get_unrecognized_lines():
|
||||
if line.startswith("onion-key "):
|
||||
issues.append("Bridge descriptors should have their onion-key scrubbed: %s" % line)
|
||||
elif line.startswith("signing-key "):
|
||||
issues.append("Bridge descriptors should have their signing-key scrubbed: %s" % line)
|
||||
elif line.startswith("router-signature "):
|
||||
issues.append("Bridge descriptors should have their signature scrubbed: %s" % line)
|
||||
|
||||
return issues
|
||||
|
||||
def _required_fields(self):
|
||||
# bridge required fields are the same as a relay descriptor, minus items
|
||||
# excluded according to the format page
|
||||
|
||||
excluded_fields = [
|
||||
"onion-key",
|
||||
"signing-key",
|
||||
"router-signature",
|
||||
]
|
||||
|
||||
included_fields = [
|
||||
"router-digest",
|
||||
]
|
||||
|
||||
return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields])
|
||||
|
||||
def _single_fields(self):
|
||||
return self._required_fields() + SINGLE_FIELDS
|
||||
|
||||
def _last_keyword(self):
|
||||
return None
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, BridgeDescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
115
lib/stem/descriptor/tordnsel.py
Normal file
115
lib/stem/descriptor/tordnsel.py
Normal file
@ -0,0 +1,115 @@
|
||||
# Copyright 2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for `TorDNSEL <https://www.torproject.org/projects/tordnsel.html.en>`_
|
||||
exit list files.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
Descriptor,
|
||||
_read_until_keywords,
|
||||
_get_descriptor_components,
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(tordnsel_file, validate = True, **kwargs):
|
||||
"""
|
||||
Iterates over a tordnsel file.
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
# skip content prior to the first ExitNode
|
||||
_read_until_keywords("ExitNode", tordnsel_file, skip = True)
|
||||
|
||||
while True:
|
||||
contents = _read_until_keywords("ExitAddress", tordnsel_file)
|
||||
contents += _read_until_keywords("ExitNode", tordnsel_file)
|
||||
|
||||
if contents:
|
||||
yield TorDNSEL(bytes.join(b"", contents), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
class TorDNSEL(Descriptor):
|
||||
"""
|
||||
TorDNSEL descriptor (`exitlist specification
|
||||
<https://www.torproject.org/tordnsel/exitlist-spec.txt>`_)
|
||||
|
||||
:var str fingerprint: **\*** authority's fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var datetime last_status: **\*** time in UTC when the relay was seen in a v2 network status
|
||||
:var list exit_addresses: **\*** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate):
|
||||
super(TorDNSEL, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.last_status = None
|
||||
self.exit_addresses = []
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
|
||||
for keyword, values in entries.items():
|
||||
value, block_content = values[0]
|
||||
|
||||
if validate and block_content:
|
||||
raise ValueError("Unexpected block content: %s" % block_content)
|
||||
|
||||
if keyword == "ExitNode":
|
||||
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
|
||||
raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value)
|
||||
|
||||
self.fingerprint = value
|
||||
elif keyword == "Published":
|
||||
try:
|
||||
self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Published time wasn't parsable: %s" % value)
|
||||
elif keyword == "LastStatus":
|
||||
try:
|
||||
self.last_status = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("LastStatus time wasn't parsable: %s" % value)
|
||||
elif keyword == "ExitAddress":
|
||||
for value, block_content in values:
|
||||
address, date = value.split(" ", 1)
|
||||
|
||||
if validate:
|
||||
if not stem.util.connection.is_valid_ipv4_address(address):
|
||||
raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address)
|
||||
elif block_content:
|
||||
raise ValueError("Unexpected block content: %s" % block_content)
|
||||
|
||||
try:
|
||||
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
||||
self.exit_addresses.append((address, date))
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("ExitAddress found time wasn't parsable: %s" % value)
|
||||
elif validate:
|
||||
raise ValueError("Unrecognized keyword: %s" % keyword)
|
880
lib/stem/exit_policy.py
Normal file
880
lib/stem/exit_policy.py
Normal file
@ -0,0 +1,880 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Representation of tor exit policies. These can be easily used to check if
|
||||
exiting to a destination is permissible or not. For instance...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.exit_policy import ExitPolicy, MicroExitPolicy
|
||||
>>> policy = ExitPolicy("accept *:80", "accept *:443", "reject *:*")
|
||||
>>> print policy
|
||||
accept *:80, accept *:443, reject *:*
|
||||
>>> print policy.summary()
|
||||
accept 80, 443
|
||||
>>> policy.can_exit_to("75.119.206.243", 80)
|
||||
True
|
||||
|
||||
>>> policy = MicroExitPolicy("accept 80,443")
|
||||
>>> print policy
|
||||
accept 80,443
|
||||
>>> policy.can_exit_to("75.119.206.243", 80)
|
||||
True
|
||||
|
||||
::
|
||||
|
||||
ExitPolicy - Exit policy for a Tor relay
|
||||
| + MicroExitPolicy - Microdescriptor exit policy
|
||||
|- can_exit_to - check if exiting to this destination is allowed or not
|
||||
|- is_exiting_allowed - check if any exiting is allowed
|
||||
|- summary - provides a short label, similar to a microdescriptor
|
||||
|- __str__ - string representation
|
||||
+- __iter__ - ExitPolicyRule entries that this contains
|
||||
|
||||
ExitPolicyRule - Single rule of an exit policy chain
|
||||
|- is_address_wildcard - checks if we'll accept any address
|
||||
|- is_port_wildcard - checks if we'll accept any port
|
||||
|- get_address_type - provides the protocol our ip address belongs to
|
||||
|- is_match - checks if we match a given destination
|
||||
|- get_mask - provides the address representation of our mask
|
||||
|- get_masked_bits - provides the bit representation of our mask
|
||||
+- __str__ - string representation for this rule
|
||||
|
||||
get_config_policy - provides the ExitPolicy based on torrc rules
|
||||
|
||||
.. data:: AddressType (enum)
|
||||
|
||||
Enumerations for IP address types that can be in an exit policy.
|
||||
|
||||
============ ===========
|
||||
AddressType Description
|
||||
============ ===========
|
||||
**WILDCARD** any address of either IPv4 or IPv6
|
||||
**IPv4** IPv4 address
|
||||
**IPv6** IPv6 address
|
||||
============ ===========
|
||||
"""
|
||||
|
||||
import zlib
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.connection
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
AddressType = stem.util.enum.Enum(("WILDCARD", "Wildcard"), ("IPv4", "IPv4"), ("IPv6", "IPv6"))
|
||||
|
||||
# Addresses aliased by the 'private' policy. From the tor man page...
|
||||
#
|
||||
# To specify all internal and link-local networks (including 0.0.0.0/8,
|
||||
# 169.254.0.0/16, 127.0.0.0/8, 192.168.0.0/16, 10.0.0.0/8, and 172.16.0.0/12),
|
||||
# you can use the "private" alias instead of an address.
|
||||
|
||||
PRIVATE_ADDRESSES = (
|
||||
"0.0.0.0/8",
|
||||
"169.254.0.0/16",
|
||||
"127.0.0.0/8",
|
||||
"192.168.0.0/16",
|
||||
"10.0.0.0/8",
|
||||
"172.16.0.0/12",
|
||||
)
|
||||
|
||||
|
||||
def get_config_policy(rules):
|
||||
"""
|
||||
Converts an ExitPolicy found in a torrc to a proper exit pattern. This
|
||||
accounts for...
|
||||
|
||||
* ports being optional
|
||||
* the 'private' keyword
|
||||
|
||||
:param str,list rules: comma separated rules or list to be converted
|
||||
|
||||
:returns: :class:`~stem.exit_policy.ExitPolicy` reflected by the rules
|
||||
|
||||
:raises: **ValueError** if input isn't a valid tor exit policy
|
||||
"""
|
||||
|
||||
if isinstance(rules, (bytes, unicode)):
|
||||
rules = rules.split(',')
|
||||
|
||||
result = []
|
||||
|
||||
for rule in rules:
|
||||
rule = rule.strip()
|
||||
|
||||
if not rule:
|
||||
continue
|
||||
|
||||
if not ':' in rule:
|
||||
rule = "%s:*" % rule
|
||||
|
||||
if 'private' in rule:
|
||||
acceptance = rule.split(' ', 1)[0]
|
||||
port = rule.split(':', 1)[1]
|
||||
|
||||
for private_addr in PRIVATE_ADDRESSES:
|
||||
result.append(ExitPolicyRule("%s %s:%s" % (acceptance, private_addr, port)))
|
||||
else:
|
||||
result.append(ExitPolicyRule(rule))
|
||||
|
||||
# torrc policies can apply to IPv4 or IPv6, so we need to make sure /0
|
||||
# addresses aren't treated as being a full wildcard
|
||||
|
||||
for rule in result:
|
||||
rule._submask_wildcard = False
|
||||
|
||||
return ExitPolicy(*result)
|
||||
|
||||
|
||||
class ExitPolicy(object):
|
||||
"""
|
||||
Policy for the destinations that a relay allows or denies exiting to. This
|
||||
is, in effect, just a list of :class:`~stem.exit_policy.ExitPolicyRule`
|
||||
entries.
|
||||
|
||||
:param list rules: **str** or :class:`~stem.exit_policy.ExitPolicyRule`
|
||||
entries that make up this policy
|
||||
"""
|
||||
|
||||
def __init__(self, *rules):
|
||||
# sanity check the types
|
||||
for rule in rules:
|
||||
if not isinstance(rule, (bytes, unicode, ExitPolicyRule)):
|
||||
raise TypeError("Exit policy rules can only contain strings or ExitPolicyRules, got a %s (%s)" % (type(rule), rules))
|
||||
|
||||
# Unparsed representation of the rules we were constructed with. Our
|
||||
# _get_rules() method consumes this to provide ExitPolicyRule instances.
|
||||
# This is lazily evaluated so we don't need to actually parse the exit
|
||||
# policy if it's never used.
|
||||
|
||||
is_all_str = True
|
||||
|
||||
for rule in rules:
|
||||
if not isinstance(rule, (bytes, unicode)):
|
||||
is_all_str = False
|
||||
|
||||
if rules and is_all_str:
|
||||
byte_rules = [stem.util.str_tools._to_bytes(r) for r in rules]
|
||||
self._input_rules = zlib.compress(b','.join(byte_rules))
|
||||
else:
|
||||
self._input_rules = rules
|
||||
|
||||
# Result when no rules apply. According to the spec policies default to 'is
|
||||
# allowed', but our microdescriptor policy subclass might want to change
|
||||
# this.
|
||||
|
||||
self._is_allowed_default = True
|
||||
|
||||
@lru_cache()
|
||||
def can_exit_to(self, address = None, port = None, strict = False):
|
||||
"""
|
||||
Checks if this policy allows exiting to a given destination or not. If the
|
||||
address or port is omitted then this will check if we're allowed to exit to
|
||||
any instances of the defined address or port.
|
||||
|
||||
:param str address: IPv4 or IPv6 address (with or without brackets)
|
||||
:param int port: port number
|
||||
:param bool strict: if the address or port is excluded then check if we can
|
||||
exit to **all** instances of the defined address or port
|
||||
|
||||
:returns: **True** if exiting to this destination is allowed, **False** otherwise
|
||||
"""
|
||||
|
||||
for rule in self._get_rules():
|
||||
if rule.is_match(address, port, strict):
|
||||
return rule.is_accept
|
||||
|
||||
return self._is_allowed_default
|
||||
|
||||
@lru_cache()
|
||||
def is_exiting_allowed(self):
|
||||
"""
|
||||
Provides **True** if the policy allows exiting whatsoever, **False**
|
||||
otherwise.
|
||||
"""
|
||||
|
||||
rejected_ports = set()
|
||||
|
||||
for rule in self._get_rules():
|
||||
if rule.is_accept:
|
||||
for port in xrange(rule.min_port, rule.max_port + 1):
|
||||
if not port in rejected_ports:
|
||||
return True
|
||||
elif rule.is_address_wildcard():
|
||||
if rule.is_port_wildcard():
|
||||
return False
|
||||
else:
|
||||
rejected_ports.update(range(rule.min_port, rule.max_port + 1))
|
||||
|
||||
return self._is_allowed_default
|
||||
|
||||
@lru_cache()
|
||||
def summary(self):
|
||||
"""
|
||||
Provides a short description of our policy chain, similar to a
|
||||
microdescriptor. This excludes entries that don't cover all IP
|
||||
addresses, and is either white-list or blacklist policy based on
|
||||
the final entry. For instance...
|
||||
|
||||
::
|
||||
|
||||
>>> policy = ExitPolicy('accept *:80', 'accept *:443', 'reject *:*')
|
||||
>>> policy.summary()
|
||||
"accept 80, 443"
|
||||
|
||||
>>> policy = ExitPolicy('accept *:443', 'reject *:1-1024', 'accept *:*')
|
||||
>>> policy.summary()
|
||||
"reject 1-442, 444-1024"
|
||||
|
||||
:returns: **str** with a concise summary for our policy
|
||||
"""
|
||||
|
||||
# determines if we're a white-list or blacklist
|
||||
is_whitelist = not self._is_allowed_default
|
||||
|
||||
for rule in self._get_rules():
|
||||
if rule.is_address_wildcard() and rule.is_port_wildcard():
|
||||
is_whitelist = not rule.is_accept
|
||||
break
|
||||
|
||||
# Iterates over the policies and adds the the ports we'll return (ie,
|
||||
# allows if a white-list and rejects if a blacklist). Regardless of a
|
||||
# port's allow/reject policy, all further entries with that port are
|
||||
# ignored since policies respect the first matching policy.
|
||||
|
||||
display_ports, skip_ports = [], set()
|
||||
|
||||
for rule in self._get_rules():
|
||||
if not rule.is_address_wildcard():
|
||||
continue
|
||||
elif rule.is_port_wildcard():
|
||||
break
|
||||
|
||||
for port in xrange(rule.min_port, rule.max_port + 1):
|
||||
if port in skip_ports:
|
||||
continue
|
||||
|
||||
# if accept + white-list or reject + blacklist then add
|
||||
if rule.is_accept == is_whitelist:
|
||||
display_ports.append(port)
|
||||
|
||||
# all further entries with this port should be ignored
|
||||
skip_ports.add(port)
|
||||
|
||||
# convert port list to a list of ranges (ie, ['1-3'] rather than [1, 2, 3])
|
||||
if display_ports:
|
||||
display_ranges, temp_range = [], []
|
||||
display_ports.sort()
|
||||
display_ports.append(None) # ending item to include last range in loop
|
||||
|
||||
for port in display_ports:
|
||||
if not temp_range or temp_range[-1] + 1 == port:
|
||||
temp_range.append(port)
|
||||
else:
|
||||
if len(temp_range) > 1:
|
||||
display_ranges.append("%i-%i" % (temp_range[0], temp_range[-1]))
|
||||
else:
|
||||
display_ranges.append(str(temp_range[0]))
|
||||
|
||||
temp_range = [port]
|
||||
else:
|
||||
# everything for the inverse
|
||||
is_whitelist = not is_whitelist
|
||||
display_ranges = ["1-65535"]
|
||||
|
||||
# constructs the summary string
|
||||
label_prefix = "accept " if is_whitelist else "reject "
|
||||
|
||||
return (label_prefix + ", ".join(display_ranges)).strip()
|
||||
|
||||
@lru_cache()
|
||||
def _get_rules(self):
|
||||
rules = []
|
||||
is_all_accept, is_all_reject = True, True
|
||||
|
||||
if isinstance(self._input_rules, bytes):
|
||||
decompressed_rules = zlib.decompress(self._input_rules).split(b',')
|
||||
else:
|
||||
decompressed_rules = self._input_rules
|
||||
|
||||
for rule in decompressed_rules:
|
||||
if isinstance(rule, bytes):
|
||||
rule = stem.util.str_tools._to_unicode(rule)
|
||||
|
||||
if isinstance(rule, unicode):
|
||||
rule = ExitPolicyRule(rule.strip())
|
||||
|
||||
if rule.is_accept:
|
||||
is_all_reject = False
|
||||
else:
|
||||
is_all_accept = False
|
||||
|
||||
rules.append(rule)
|
||||
|
||||
if rule.is_address_wildcard() and rule.is_port_wildcard():
|
||||
break # this is a catch-all, no reason to include more
|
||||
|
||||
# If we only have one kind of entry *and* end with a wildcard then
|
||||
# we might as well use the simpler version. For instance...
|
||||
#
|
||||
# reject *:80, reject *:443, reject *:*
|
||||
#
|
||||
# ... could also be represented as simply...
|
||||
#
|
||||
# reject *:*
|
||||
#
|
||||
# This mostly comes up with reject-all policies because the
|
||||
# 'reject private:*' appends an extra seven rules that have no
|
||||
# effect.
|
||||
|
||||
if rules and (rules[-1].is_address_wildcard() and rules[-1].is_port_wildcard()):
|
||||
if is_all_accept:
|
||||
rules = [ExitPolicyRule("accept *:*")]
|
||||
elif is_all_reject:
|
||||
rules = [ExitPolicyRule("reject *:*")]
|
||||
|
||||
self._input_rules = None
|
||||
return rules
|
||||
|
||||
def __iter__(self):
|
||||
for rule in self._get_rules():
|
||||
yield rule
|
||||
|
||||
@lru_cache()
|
||||
def __str__(self):
|
||||
return ', '.join([str(rule) for rule in self._get_rules()])
|
||||
|
||||
def __hash__(self):
|
||||
# TODO: It would be nice to provide a real hash function, but doing so is
|
||||
# tricky due to how we lazily load the rules. Like equality checks a proper
|
||||
# hash function would need to call _get_rules(), but that's behind
|
||||
# @lru_cache which calls hash() forming a circular dependency.
|
||||
|
||||
return id(self)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, ExitPolicy):
|
||||
return self._get_rules() == list(other)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class MicroExitPolicy(ExitPolicy):
|
||||
"""
|
||||
Exit policy provided by the microdescriptors. This is a distilled version of
|
||||
a normal :class:`~stem.exit_policy.ExitPolicy` contains, just consisting of a
|
||||
list of ports that are either accepted or rejected. For instance...
|
||||
|
||||
::
|
||||
|
||||
accept 80,443 # only accepts common http ports
|
||||
reject 1-1024 # only accepts non-privileged ports
|
||||
|
||||
Since these policies are a subset of the exit policy information (lacking IP
|
||||
ranges) clients can only use them to guess if a relay will accept traffic or
|
||||
not. To quote the `dir-spec <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_ (section 3.2.1)...
|
||||
|
||||
::
|
||||
|
||||
With microdescriptors, clients don't learn exact exit policies:
|
||||
clients can only guess whether a relay accepts their request, try the
|
||||
BEGIN request, and might get end-reason-exit-policy if they guessed
|
||||
wrong, in which case they'll have to try elsewhere.
|
||||
|
||||
:var bool is_accept: **True** if these are ports that we accept, **False** if
|
||||
they're ports that we reject
|
||||
|
||||
:param str policy: policy string that describes this policy
|
||||
"""
|
||||
|
||||
def __init__(self, policy):
|
||||
# Microdescriptor policies are of the form...
|
||||
#
|
||||
# MicrodescriptrPolicy ::= ("accept" / "reject") SP PortList NL
|
||||
# PortList ::= PortOrRange
|
||||
# PortList ::= PortList "," PortOrRange
|
||||
# PortOrRange ::= INT "-" INT / INT
|
||||
|
||||
self._policy = policy
|
||||
|
||||
if policy.startswith("accept"):
|
||||
self.is_accept = True
|
||||
elif policy.startswith("reject"):
|
||||
self.is_accept = False
|
||||
else:
|
||||
raise ValueError("A microdescriptor exit policy must start with either 'accept' or 'reject': %s" % policy)
|
||||
|
||||
policy = policy[6:]
|
||||
|
||||
if not policy.startswith(" ") or (len(policy) - 1 != len(policy.lstrip())):
|
||||
raise ValueError("A microdescriptor exit policy should have a space separating accept/reject from its port list: %s" % self._policy)
|
||||
|
||||
policy = policy[1:]
|
||||
|
||||
# convert our port list into MicroExitPolicyRule
|
||||
rules = []
|
||||
|
||||
for port_entry in policy.split(","):
|
||||
if '-' in port_entry:
|
||||
min_port, max_port = port_entry.split('-', 1)
|
||||
else:
|
||||
min_port = max_port = port_entry
|
||||
|
||||
if not stem.util.connection.is_valid_port(min_port) or \
|
||||
not stem.util.connection.is_valid_port(max_port):
|
||||
raise ValueError("'%s' is an invalid port range" % port_entry)
|
||||
|
||||
rules.append(MicroExitPolicyRule(self.is_accept, int(min_port), int(max_port)))
|
||||
|
||||
super(MicroExitPolicy, self).__init__(*rules)
|
||||
self._is_allowed_default = not self.is_accept
|
||||
|
||||
def __str__(self):
|
||||
return self._policy
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, MicroExitPolicy):
|
||||
return str(self) == str(other)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class ExitPolicyRule(object):
|
||||
"""
|
||||
Single rule from the user's exit policy. These rules are chained together to
|
||||
form complete policies that describe where a relay will and will not allow
|
||||
traffic to exit.
|
||||
|
||||
The format of these rules are formally described in the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_ as an
|
||||
"exitpattern". Note that while these are similar to tor's man page entry for
|
||||
ExitPolicies, it's not the exact same. An exitpattern is better defined and
|
||||
stricter in what it'll accept. For instance, ports are not optional and it
|
||||
does not contain the 'private' alias.
|
||||
|
||||
This should be treated as an immutable object.
|
||||
|
||||
:var bool is_accept: indicates if exiting is allowed or disallowed
|
||||
|
||||
:var str address: address that this rule is for
|
||||
|
||||
:var int min_port: lower end of the port range that we include (inclusive)
|
||||
:var int max_port: upper end of the port range that we include (inclusive)
|
||||
|
||||
:param str rule: exit policy rule to be parsed
|
||||
|
||||
:raises: **ValueError** if input isn't a valid tor exit policy rule
|
||||
"""
|
||||
|
||||
def __init__(self, rule):
|
||||
# policy ::= "accept" exitpattern | "reject" exitpattern
|
||||
# exitpattern ::= addrspec ":" portspec
|
||||
|
||||
if rule.startswith("accept"):
|
||||
self.is_accept = True
|
||||
elif rule.startswith("reject"):
|
||||
self.is_accept = False
|
||||
else:
|
||||
raise ValueError("An exit policy must start with either 'accept' or 'reject': %s" % rule)
|
||||
|
||||
exitpattern = rule[6:]
|
||||
|
||||
if not exitpattern.startswith(" ") or (len(exitpattern) - 1 != len(exitpattern.lstrip())):
|
||||
raise ValueError("An exit policy should have a space separating its accept/reject from the exit pattern: %s" % rule)
|
||||
|
||||
exitpattern = exitpattern[1:]
|
||||
|
||||
if not ":" in exitpattern:
|
||||
raise ValueError("An exitpattern must be of the form 'addrspec:portspec': %s" % rule)
|
||||
|
||||
self.address = None
|
||||
self._address_type = None
|
||||
self._masked_bits = None
|
||||
self.min_port = self.max_port = None
|
||||
self._hash = None
|
||||
|
||||
# Our mask in ip notation (ex. "255.255.255.0"). This is only set if we
|
||||
# either have a custom mask that can't be represented by a number of bits,
|
||||
# or the user has called mask(), lazily loading this.
|
||||
|
||||
self._mask = None
|
||||
|
||||
addrspec, portspec = exitpattern.rsplit(":", 1)
|
||||
self._apply_addrspec(rule, addrspec)
|
||||
self._apply_portspec(rule, portspec)
|
||||
|
||||
# If true then a submask of /0 is treated by is_address_wildcard() as being
|
||||
# a wildcard.
|
||||
|
||||
self._submask_wildcard = True
|
||||
|
||||
def is_address_wildcard(self):
|
||||
"""
|
||||
**True** if we'll match against any address, **False** otherwise.
|
||||
|
||||
Note that if this policy can apply to both IPv4 and IPv6 then this is
|
||||
different from being for a /0 (since, for instance, 0.0.0.0/0 wouldn't
|
||||
match against an IPv6 address). That said, /0 addresses are highly unusual
|
||||
and most things citing exit policies are IPv4 specific anyway, making this
|
||||
moot.
|
||||
|
||||
:returns: **bool** for if our address matching is a wildcard
|
||||
"""
|
||||
|
||||
if self._submask_wildcard and self.get_masked_bits() == 0:
|
||||
return True
|
||||
|
||||
return self._address_type == _address_type_to_int(AddressType.WILDCARD)
|
||||
|
||||
def is_port_wildcard(self):
|
||||
"""
|
||||
**True** if we'll match against any port, **False** otherwise.
|
||||
|
||||
:returns: **bool** for if our port matching is a wildcard
|
||||
"""
|
||||
|
||||
return self.min_port in (0, 1) and self.max_port == 65535
|
||||
|
||||
def is_match(self, address = None, port = None, strict = False):
|
||||
"""
|
||||
**True** if we match against the given destination, **False** otherwise. If
|
||||
the address or port is omitted then this will check if we're allowed to
|
||||
exit to any instances of the defined address or port.
|
||||
|
||||
:param str address: IPv4 or IPv6 address (with or without brackets)
|
||||
:param int port: port number
|
||||
:param bool strict: if the address or port is excluded then check if we can
|
||||
exit to **all** instances of the defined address or port
|
||||
|
||||
:returns: **bool** indicating if we match against this destination
|
||||
|
||||
:raises: **ValueError** if provided with a malformed address or port
|
||||
"""
|
||||
|
||||
# validate our input and check if the argument doesn't match our address type
|
||||
if address is not None:
|
||||
address_type = self.get_address_type()
|
||||
|
||||
if stem.util.connection.is_valid_ipv4_address(address):
|
||||
if address_type == AddressType.IPv6:
|
||||
return False
|
||||
elif stem.util.connection.is_valid_ipv6_address(address, allow_brackets = True):
|
||||
if address_type == AddressType.IPv4:
|
||||
return False
|
||||
|
||||
address = address.lstrip("[").rstrip("]")
|
||||
else:
|
||||
raise ValueError("'%s' isn't a valid IPv4 or IPv6 address" % address)
|
||||
|
||||
if port is not None and not stem.util.connection.is_valid_port(port):
|
||||
raise ValueError("'%s' isn't a valid port" % port)
|
||||
|
||||
if not self.is_address_wildcard():
|
||||
# Already got the integer representation of our mask and our address
|
||||
# with the mask applied. Just need to check if this address with the
|
||||
# mask applied matches.
|
||||
|
||||
if address is None:
|
||||
if strict:
|
||||
return False
|
||||
else:
|
||||
comparison_addr_bin = int(stem.util.connection._get_address_binary(address), 2)
|
||||
comparison_addr_bin &= self._get_mask_bin()
|
||||
|
||||
if self._get_address_bin() != comparison_addr_bin:
|
||||
return False
|
||||
|
||||
if not self.is_port_wildcard():
|
||||
if port is None:
|
||||
if strict:
|
||||
return False
|
||||
elif port < self.min_port or port > self.max_port:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_address_type(self):
|
||||
"""
|
||||
Provides the :data:`~stem.exit_policy.AddressType` for our policy.
|
||||
|
||||
:returns: :data:`~stem.exit_policy.AddressType` for the type of address that we have
|
||||
"""
|
||||
|
||||
return _int_to_address_type(self._address_type)
|
||||
|
||||
def get_mask(self, cache = True):
|
||||
"""
|
||||
Provides the address represented by our mask. This is **None** if our
|
||||
address type is a wildcard.
|
||||
|
||||
:param bool cache: caches the result if **True**
|
||||
|
||||
:returns: str of our subnet mask for the address (ex. "255.255.255.0")
|
||||
"""
|
||||
|
||||
# Lazy loading our mask because it very infrequently requested. There's
|
||||
# no reason to usually usse memory for it.
|
||||
|
||||
if not self._mask:
|
||||
address_type = self.get_address_type()
|
||||
|
||||
if address_type == AddressType.WILDCARD:
|
||||
mask = None
|
||||
elif address_type == AddressType.IPv4:
|
||||
mask = stem.util.connection.get_mask_ipv4(self._masked_bits)
|
||||
elif address_type == AddressType.IPv6:
|
||||
mask = stem.util.connection.get_mask_ipv6(self._masked_bits)
|
||||
|
||||
if not cache:
|
||||
return mask
|
||||
|
||||
self._mask = mask
|
||||
|
||||
return self._mask
|
||||
|
||||
def get_masked_bits(self):
|
||||
"""
|
||||
Provides the number of bits our subnet mask represents. This is **None** if
|
||||
our mask can't have a bit representation.
|
||||
|
||||
:returns: int with the bit representation of our mask
|
||||
"""
|
||||
|
||||
return self._masked_bits
|
||||
|
||||
@lru_cache()
|
||||
def __str__(self):
|
||||
"""
|
||||
Provides the string representation of our policy. This does not
|
||||
necessarily match the rule that we were constructed from (due to things
|
||||
like IPv6 address collapsing or the multiple representations that our mask
|
||||
can have). However, it is a valid that would be accepted by our constructor
|
||||
to re-create this rule.
|
||||
"""
|
||||
|
||||
label = "accept " if self.is_accept else "reject "
|
||||
|
||||
if self.is_address_wildcard():
|
||||
label += "*:"
|
||||
else:
|
||||
address_type = self.get_address_type()
|
||||
|
||||
if address_type == AddressType.IPv4:
|
||||
label += self.address
|
||||
else:
|
||||
label += "[%s]" % self.address
|
||||
|
||||
# Including our mask label as follows...
|
||||
# - exclude our mask if it doesn't do anything
|
||||
# - use our masked bit count if we can
|
||||
# - use the mask itself otherwise
|
||||
|
||||
if (address_type == AddressType.IPv4 and self._masked_bits == 32) or \
|
||||
(address_type == AddressType.IPv6 and self._masked_bits == 128):
|
||||
label += ":"
|
||||
elif self._masked_bits is not None:
|
||||
label += "/%i:" % self._masked_bits
|
||||
else:
|
||||
label += "/%s:" % self.get_mask()
|
||||
|
||||
if self.is_port_wildcard():
|
||||
label += "*"
|
||||
elif self.min_port == self.max_port:
|
||||
label += str(self.min_port)
|
||||
else:
|
||||
label += "%i-%i" % (self.min_port, self.max_port)
|
||||
|
||||
return label
|
||||
|
||||
def __hash__(self):
|
||||
if self._hash is None:
|
||||
my_hash = 0
|
||||
|
||||
for attr in ("is_accept", "address", "min_port", "max_port"):
|
||||
my_hash *= 1024
|
||||
|
||||
attr_value = getattr(self, attr)
|
||||
|
||||
if attr_value is not None:
|
||||
my_hash += hash(attr_value)
|
||||
|
||||
my_hash *= 1024
|
||||
my_hash += hash(self.get_mask(False))
|
||||
|
||||
self._hash = my_hash
|
||||
|
||||
return self._hash
|
||||
|
||||
@lru_cache()
|
||||
def _get_mask_bin(self):
|
||||
# provides an integer representation of our mask
|
||||
|
||||
return int(stem.util.connection._get_address_binary(self.get_mask(False)), 2)
|
||||
|
||||
@lru_cache()
|
||||
def _get_address_bin(self):
|
||||
# provides an integer representation of our address
|
||||
|
||||
return int(stem.util.connection._get_address_binary(self.address), 2) & self._get_mask_bin()
|
||||
|
||||
def _apply_addrspec(self, rule, addrspec):
|
||||
# Parses the addrspec...
|
||||
# addrspec ::= "*" | ip4spec | ip6spec
|
||||
|
||||
if "/" in addrspec:
|
||||
self.address, addr_extra = addrspec.split("/", 1)
|
||||
else:
|
||||
self.address, addr_extra = addrspec, None
|
||||
|
||||
if addrspec == "*":
|
||||
self._address_type = _address_type_to_int(AddressType.WILDCARD)
|
||||
self.address = self._masked_bits = None
|
||||
elif stem.util.connection.is_valid_ipv4_address(self.address):
|
||||
# ipv4spec ::= ip4 | ip4 "/" num_ip4_bits | ip4 "/" ip4mask
|
||||
# ip4 ::= an IPv4 address in dotted-quad format
|
||||
# ip4mask ::= an IPv4 mask in dotted-quad format
|
||||
# num_ip4_bits ::= an integer between 0 and 32
|
||||
|
||||
self._address_type = _address_type_to_int(AddressType.IPv4)
|
||||
|
||||
if addr_extra is None:
|
||||
self._masked_bits = 32
|
||||
elif stem.util.connection.is_valid_ipv4_address(addr_extra):
|
||||
# provided with an ip4mask
|
||||
try:
|
||||
self._masked_bits = stem.util.connection._get_masked_bits(addr_extra)
|
||||
except ValueError:
|
||||
# mask can't be represented as a number of bits (ex. "255.255.0.255")
|
||||
self._mask = addr_extra
|
||||
self._masked_bits = None
|
||||
elif addr_extra.isdigit():
|
||||
# provided with a num_ip4_bits
|
||||
self._masked_bits = int(addr_extra)
|
||||
|
||||
if self._masked_bits < 0 or self._masked_bits > 32:
|
||||
raise ValueError("IPv4 masks must be in the range of 0-32 bits")
|
||||
else:
|
||||
raise ValueError("The '%s' isn't a mask nor number of bits: %s" % (addr_extra, rule))
|
||||
elif self.address.startswith("[") and self.address.endswith("]") and \
|
||||
stem.util.connection.is_valid_ipv6_address(self.address[1:-1]):
|
||||
# ip6spec ::= ip6 | ip6 "/" num_ip6_bits
|
||||
# ip6 ::= an IPv6 address, surrounded by square brackets.
|
||||
# num_ip6_bits ::= an integer between 0 and 128
|
||||
|
||||
self.address = stem.util.connection.expand_ipv6_address(self.address[1:-1].upper())
|
||||
self._address_type = _address_type_to_int(AddressType.IPv6)
|
||||
|
||||
if addr_extra is None:
|
||||
self._masked_bits = 128
|
||||
elif addr_extra.isdigit():
|
||||
# provided with a num_ip6_bits
|
||||
self._masked_bits = int(addr_extra)
|
||||
|
||||
if self._masked_bits < 0 or self._masked_bits > 128:
|
||||
raise ValueError("IPv6 masks must be in the range of 0-128 bits")
|
||||
else:
|
||||
raise ValueError("The '%s' isn't a number of bits: %s" % (addr_extra, rule))
|
||||
else:
|
||||
raise ValueError("Address isn't a wildcard, IPv4, or IPv6 address: %s" % rule)
|
||||
|
||||
def _apply_portspec(self, rule, portspec):
|
||||
# Parses the portspec...
|
||||
# portspec ::= "*" | port | port "-" port
|
||||
# port ::= an integer between 1 and 65535, inclusive.
|
||||
#
|
||||
# Due to a tor bug the spec says that we should accept port of zero, but
|
||||
# connections to port zero are never permitted.
|
||||
|
||||
if portspec == "*":
|
||||
self.min_port, self.max_port = 1, 65535
|
||||
elif portspec.isdigit():
|
||||
# provided with a single port
|
||||
if stem.util.connection.is_valid_port(portspec, allow_zero = True):
|
||||
self.min_port = self.max_port = int(portspec)
|
||||
else:
|
||||
raise ValueError("'%s' isn't within a valid port range: %s" % (portspec, rule))
|
||||
elif "-" in portspec:
|
||||
# provided with a port range
|
||||
port_comp = portspec.split("-", 1)
|
||||
|
||||
if stem.util.connection.is_valid_port(port_comp, allow_zero = True):
|
||||
self.min_port = int(port_comp[0])
|
||||
self.max_port = int(port_comp[1])
|
||||
|
||||
if self.min_port > self.max_port:
|
||||
raise ValueError("Port range has a lower bound that's greater than its upper bound: %s" % rule)
|
||||
else:
|
||||
raise ValueError("Malformed port range: %s" % rule)
|
||||
else:
|
||||
raise ValueError("Port value isn't a wildcard, integer, or range: %s" % rule)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, ExitPolicyRule):
|
||||
# Our string representation encompasses our effective policy. Technically
|
||||
# this isn't quite right since our rule attribute may differ (ie, "accept
|
||||
# 0.0.0.0/0" == "accept 0.0.0.0/0.0.0.0" will be True), but these
|
||||
# policies are effectively equivalent.
|
||||
|
||||
return hash(self) == hash(other)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def _address_type_to_int(address_type):
|
||||
return AddressType.index_of(address_type)
|
||||
|
||||
|
||||
def _int_to_address_type(address_type_int):
|
||||
return AddressType[AddressType.keys()[address_type_int]]
|
||||
|
||||
|
||||
class MicroExitPolicyRule(ExitPolicyRule):
|
||||
"""
|
||||
Lighter weight ExitPolicyRule derivative for microdescriptors.
|
||||
"""
|
||||
|
||||
def __init__(self, is_accept, min_port, max_port):
|
||||
self.is_accept = is_accept
|
||||
self.address = None # wildcard address
|
||||
self.min_port = min_port
|
||||
self.max_port = max_port
|
||||
self._hash = None
|
||||
|
||||
def is_address_wildcard(self):
|
||||
return True
|
||||
|
||||
def get_address_type(self):
|
||||
return AddressType.WILDCARD
|
||||
|
||||
def get_mask(self, cache = True):
|
||||
return None
|
||||
|
||||
def get_masked_bits(self):
|
||||
return None
|
||||
|
||||
def __hash__(self):
|
||||
if self._hash is None:
|
||||
my_hash = 0
|
||||
|
||||
for attr in ("is_accept", "min_port", "max_port"):
|
||||
my_hash *= 1024
|
||||
|
||||
attr_value = getattr(self, attr)
|
||||
|
||||
if attr_value is not None:
|
||||
my_hash += hash(attr_value)
|
||||
|
||||
self._hash = my_hash
|
||||
|
||||
return self._hash
|
131
lib/stem/prereq.py
Normal file
131
lib/stem/prereq.py
Normal file
@ -0,0 +1,131 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Checks for stem dependencies. We require python 2.6 or greater (including the
|
||||
3.x series). Other requirements for complete functionality are...
|
||||
|
||||
* pycrypto module
|
||||
|
||||
* validating descriptor signature integrity
|
||||
|
||||
::
|
||||
|
||||
check_requirements - checks for minimum requirements for running stem
|
||||
|
||||
is_python_27 - checks if python 2.7 or later is available
|
||||
is_python_3 - checks if python 3.0 or later is available
|
||||
|
||||
is_crypto_available - checks if the pycrypto module is available
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import sys
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
CRYPTO_UNAVAILABLE = "Unable to import the pycrypto module. Because of this we'll be unable to verify descriptor signature integrity. You can get pycrypto from: https://www.dlitz.net/software/pycrypto/"
|
||||
|
||||
|
||||
def check_requirements():
|
||||
"""
|
||||
Checks that we meet the minimum requirements to run stem. If we don't then
|
||||
this raises an ImportError with the issue.
|
||||
|
||||
:raises: ImportError with the problem if we don't meet stem's requirements
|
||||
"""
|
||||
|
||||
major_version, minor_version = sys.version_info[0:2]
|
||||
|
||||
if major_version < 2 or (major_version == 2 and minor_version < 6):
|
||||
raise ImportError("stem requires python version 2.6 or greater")
|
||||
|
||||
|
||||
def is_python_27():
|
||||
"""
|
||||
Checks if we're running python 2.7 or above (including the 3.x series).
|
||||
|
||||
:returns: **True** if we meet this requirement and **False** otherwise
|
||||
"""
|
||||
|
||||
major_version, minor_version = sys.version_info[0:2]
|
||||
|
||||
return major_version > 2 or (major_version == 2 and minor_version >= 7)
|
||||
|
||||
|
||||
def is_python_3():
|
||||
"""
|
||||
Checks if we're in the 3.0 - 3.x range.
|
||||
|
||||
:returns: **True** if we meet this requirement and **False** otherwise
|
||||
"""
|
||||
|
||||
return sys.version_info[0] == 3
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def is_crypto_available():
|
||||
"""
|
||||
Checks if the pycrypto functions we use are available. This is used for
|
||||
verifying relay descriptor signatures.
|
||||
|
||||
:returns: **True** if we can use pycrypto and **False** otherwise
|
||||
"""
|
||||
|
||||
from stem.util import log
|
||||
|
||||
try:
|
||||
from Crypto.PublicKey import RSA
|
||||
from Crypto.Util import asn1
|
||||
from Crypto.Util.number import long_to_bytes
|
||||
return True
|
||||
except ImportError:
|
||||
log.log_once("stem.prereq.is_crypto_available", log.INFO, CRYPTO_UNAVAILABLE)
|
||||
return False
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def is_mock_available():
|
||||
"""
|
||||
Checks if the mock module is available. In python 3.3 and up it is a builtin
|
||||
unittest module, but before this it needed to be `installed separately
|
||||
<https://pypi.python.org/pypi/mock/>`_. Imports should be as follows....
|
||||
|
||||
::
|
||||
|
||||
try:
|
||||
# added in python 3.3
|
||||
from unittest.mock import Mock
|
||||
except ImportError:
|
||||
from mock import Mock
|
||||
|
||||
:returns: **True** if the mock module is available and **False** otherwise
|
||||
"""
|
||||
|
||||
try:
|
||||
# checks for python 3.3 version
|
||||
import unittest.mock
|
||||
return True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import mock
|
||||
|
||||
# check for mock's patch.dict() which was introduced in version 0.7.0
|
||||
|
||||
if not hasattr(mock.patch, 'dict'):
|
||||
raise ImportError()
|
||||
|
||||
# check for mock's new_callable argument for patch() which was introduced in version 0.8.0
|
||||
|
||||
if not 'new_callable' in inspect.getargspec(mock.patch).args:
|
||||
raise ImportError()
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
255
lib/stem/process.py
Normal file
255
lib/stem/process.py
Normal file
@ -0,0 +1,255 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Helper functions for working with tor as a process.
|
||||
|
||||
:NO_TORRC:
|
||||
when provided as a torrc_path tor is ran with a blank configuration
|
||||
|
||||
:DEFAULT_INIT_TIMEOUT:
|
||||
number of seconds before we time out our attempt to start a tor instance
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
launch_tor - starts up a tor process
|
||||
launch_tor_with_config - starts a tor process with a custom torrc
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.system
|
||||
|
||||
NO_TORRC = "<no torrc>"
|
||||
DEFAULT_INIT_TIMEOUT = 90
|
||||
|
||||
|
||||
def launch_tor(tor_cmd = "tor", args = None, torrc_path = None, completion_percent = 100, init_msg_handler = None, timeout = DEFAULT_INIT_TIMEOUT, take_ownership = False):
|
||||
"""
|
||||
Initializes a tor process. This blocks until initialization completes or we
|
||||
error out.
|
||||
|
||||
If tor's data directory is missing or stale then bootstrapping will include
|
||||
making several requests to the directory authorities which can take a little
|
||||
while. Usually this is done in 50 seconds or so, but occasionally calls seem
|
||||
to get stuck, taking well over the default timeout.
|
||||
|
||||
**To work to must log at NOTICE runlevel to stdout.** It does this by
|
||||
default, but if you have a 'Log' entry in your torrc then you'll also need
|
||||
'Log NOTICE stdout'.
|
||||
|
||||
Note: The timeout argument does not work on Windows, and relies on the global
|
||||
state of the signal module.
|
||||
|
||||
:param str tor_cmd: command for starting tor
|
||||
:param list args: additional arguments for tor
|
||||
:param str torrc_path: location of the torrc for us to use
|
||||
:param int completion_percent: percent of bootstrap completion at which
|
||||
this'll return
|
||||
:param functor init_msg_handler: optional functor that will be provided with
|
||||
tor's initialization stdout as we get it
|
||||
:param int timeout: time after which the attempt to start tor is aborted, no
|
||||
timeouts are applied if **None**
|
||||
:param bool take_ownership: asserts ownership over the tor process so it
|
||||
aborts if this python process terminates or a :class:`~stem.control.Controller`
|
||||
we establish to it disconnects
|
||||
|
||||
:returns: **subprocess.Popen** instance for the tor subprocess
|
||||
|
||||
:raises: **OSError** if we either fail to create the tor process or reached a
|
||||
timeout without success
|
||||
"""
|
||||
|
||||
if stem.util.system.is_windows():
|
||||
timeout = None
|
||||
|
||||
# sanity check that we got a tor binary
|
||||
|
||||
if os.path.sep in tor_cmd:
|
||||
# got a path (either relative or absolute), check what it leads to
|
||||
|
||||
if os.path.isdir(tor_cmd):
|
||||
raise OSError("'%s' is a directory, not the tor executable" % tor_cmd)
|
||||
elif not os.path.isfile(tor_cmd):
|
||||
raise OSError("'%s' doesn't exist" % tor_cmd)
|
||||
elif not stem.util.system.is_available(tor_cmd):
|
||||
raise OSError("'%s' isn't available on your system. Maybe it's not in your PATH?" % tor_cmd)
|
||||
|
||||
# double check that we have a torrc to work with
|
||||
if not torrc_path in (None, NO_TORRC) and not os.path.exists(torrc_path):
|
||||
raise OSError("torrc doesn't exist (%s)" % torrc_path)
|
||||
|
||||
# starts a tor subprocess, raising an OSError if it fails
|
||||
runtime_args, temp_file = [tor_cmd], None
|
||||
|
||||
if args:
|
||||
runtime_args += args
|
||||
|
||||
if torrc_path:
|
||||
if torrc_path == NO_TORRC:
|
||||
temp_file = tempfile.mkstemp(prefix = "empty-torrc-", text = True)[1]
|
||||
runtime_args += ["-f", temp_file]
|
||||
else:
|
||||
runtime_args += ["-f", torrc_path]
|
||||
|
||||
if take_ownership:
|
||||
runtime_args += ["__OwningControllerProcess", str(os.getpid())]
|
||||
|
||||
tor_process = subprocess.Popen(runtime_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
||||
|
||||
if timeout:
|
||||
def timeout_handler(signum, frame):
|
||||
# terminates the uninitialized tor process and raise on timeout
|
||||
if temp_file:
|
||||
try:
|
||||
os.remove(temp_file)
|
||||
except:
|
||||
pass
|
||||
|
||||
tor_process.kill()
|
||||
|
||||
raise OSError("reached a %i second timeout without success" % timeout)
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(timeout)
|
||||
|
||||
bootstrap_line = re.compile("Bootstrapped ([0-9]+)%: ")
|
||||
problem_line = re.compile("\[(warn|err)\] (.*)$")
|
||||
last_problem = "Timed out"
|
||||
|
||||
while True:
|
||||
# Tor's stdout will be read as ASCII bytes. This is fine for python 2, but
|
||||
# in python 3 that means it'll mismatch with other operations (for instance
|
||||
# the bootstrap_line.search() call later will fail).
|
||||
#
|
||||
# It seems like python 2.x is perfectly happy for this to be unicode, so
|
||||
# normalizing to that.
|
||||
|
||||
init_line = tor_process.stdout.readline().decode("utf-8", "replace").strip()
|
||||
|
||||
# this will provide empty results if the process is terminated
|
||||
if not init_line:
|
||||
if timeout:
|
||||
signal.alarm(0) # stop alarm
|
||||
|
||||
# ... but best make sure
|
||||
|
||||
tor_process.kill()
|
||||
|
||||
raise OSError("Process terminated: %s" % last_problem)
|
||||
|
||||
# provide the caller with the initialization message if they want it
|
||||
|
||||
if init_msg_handler:
|
||||
init_msg_handler(init_line)
|
||||
|
||||
# return the process if we're done with bootstrapping
|
||||
bootstrap_match = bootstrap_line.search(init_line)
|
||||
problem_match = problem_line.search(init_line)
|
||||
|
||||
if bootstrap_match and int(bootstrap_match.groups()[0]) >= completion_percent:
|
||||
if timeout:
|
||||
signal.alarm(0) # stop alarm
|
||||
|
||||
if temp_file:
|
||||
try:
|
||||
os.remove(temp_file)
|
||||
except:
|
||||
pass
|
||||
|
||||
return tor_process
|
||||
elif problem_match:
|
||||
runlevel, msg = problem_match.groups()
|
||||
|
||||
if not "see warnings above" in msg:
|
||||
if ": " in msg:
|
||||
msg = msg.split(": ")[-1].strip()
|
||||
|
||||
last_problem = msg
|
||||
|
||||
|
||||
def launch_tor_with_config(config, tor_cmd = "tor", completion_percent = 100, init_msg_handler = None, timeout = DEFAULT_INIT_TIMEOUT, take_ownership = False):
|
||||
"""
|
||||
Initializes a tor process, like :func:`~stem.process.launch_tor`, but with a
|
||||
customized configuration. This writes a temporary torrc to disk, launches
|
||||
tor, then deletes the torrc.
|
||||
|
||||
For example...
|
||||
|
||||
::
|
||||
|
||||
tor_process = stem.process.launch_tor_with_config(
|
||||
config = {
|
||||
'ControlPort': '2778',
|
||||
'Log': [
|
||||
'NOTICE stdout',
|
||||
'ERR file /tmp/tor_error_log',
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
:param dict config: configuration options, such as '{"ControlPort": "9051"}',
|
||||
values can either be a **str** or **list of str** if for multiple values
|
||||
:param str tor_cmd: command for starting tor
|
||||
:param int completion_percent: percent of bootstrap completion at which
|
||||
this'll return
|
||||
:param functor init_msg_handler: optional functor that will be provided with
|
||||
tor's initialization stdout as we get it
|
||||
:param int timeout: time after which the attempt to start tor is aborted, no
|
||||
timeouts are applied if **None**
|
||||
:param bool take_ownership: asserts ownership over the tor process so it
|
||||
aborts if this python process terminates or a :class:`~stem.control.Controller`
|
||||
we establish to it disconnects
|
||||
|
||||
:returns: **subprocess.Popen** instance for the tor subprocess
|
||||
|
||||
:raises: **OSError** if we either fail to create the tor process or reached a
|
||||
timeout without success
|
||||
"""
|
||||
|
||||
# we need to be sure that we're logging to stdout to figure out when we're
|
||||
# done bootstrapping
|
||||
|
||||
if 'Log' in config:
|
||||
stdout_options = ['DEBUG stdout', 'INFO stdout', 'NOTICE stdout']
|
||||
|
||||
if isinstance(config['Log'], str):
|
||||
config['Log'] = [config['Log']]
|
||||
|
||||
has_stdout = False
|
||||
|
||||
for log_config in config['Log']:
|
||||
if log_config in stdout_options:
|
||||
has_stdout = True
|
||||
break
|
||||
|
||||
if not has_stdout:
|
||||
config['Log'].append('NOTICE stdout')
|
||||
|
||||
torrc_path = tempfile.mkstemp(prefix = "torrc-", text = True)[1]
|
||||
|
||||
try:
|
||||
with open(torrc_path, "w") as torrc_file:
|
||||
for key, values in config.items():
|
||||
if isinstance(values, str):
|
||||
torrc_file.write("%s %s\n" % (key, values))
|
||||
else:
|
||||
for value in values:
|
||||
torrc_file.write("%s %s\n" % (key, value))
|
||||
|
||||
# prevents tor from erroring out due to a missing torrc if it gets a sighup
|
||||
args = ['__ReloadTorrcOnSIGHUP', '0']
|
||||
|
||||
return launch_tor(tor_cmd, args, torrc_path, completion_percent, init_msg_handler, timeout, take_ownership)
|
||||
finally:
|
||||
try:
|
||||
os.remove(torrc_path)
|
||||
except:
|
||||
pass
|
571
lib/stem/response/__init__.py
Normal file
571
lib/stem/response/__init__.py
Normal file
@ -0,0 +1,571 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parses replies from the control socket.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
convert - translates a ControlMessage into a particular response subclass
|
||||
|
||||
ControlMessage - Message that's read from the control socket.
|
||||
|- from_str - provides a ControlMessage for the given string
|
||||
|- content - provides the parsed message content
|
||||
|- raw_content - unparsed socket data
|
||||
|- __str__ - content stripped of protocol formatting
|
||||
+- __iter__ - ControlLine entries for the content of the message
|
||||
|
||||
ControlLine - String subclass with methods for parsing controller responses.
|
||||
|- remainder - provides the unparsed content
|
||||
|- is_empty - checks if the remaining content is empty
|
||||
|- is_next_quoted - checks if the next entry is a quoted value
|
||||
|- is_next_mapping - checks if the next entry is a KEY=VALUE mapping
|
||||
|- peek_key - provides the key of the next entry
|
||||
|- pop - removes and returns the next entry
|
||||
+- pop_mapping - removes and returns the next entry as a KEY=VALUE mapping
|
||||
|
||||
SingleLineResponse - Simple tor response only including a single line of information.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
"events",
|
||||
"getinfo",
|
||||
"getconf",
|
||||
"protocolinfo",
|
||||
"authchallenge",
|
||||
"convert",
|
||||
"ControlMessage",
|
||||
"ControlLine",
|
||||
"SingleLineResponse",
|
||||
]
|
||||
|
||||
import re
|
||||
import StringIO
|
||||
import threading
|
||||
|
||||
import stem.socket
|
||||
|
||||
KEY_ARG = re.compile("^(\S+)=")
|
||||
|
||||
# Escape sequences from the 'esc_for_log' function of tor's 'common/util.c'.
|
||||
# It's hard to tell what controller functions use this in practice, but direct
|
||||
# users are...
|
||||
# - 'COOKIEFILE' field of PROTOCOLINFO responses
|
||||
# - logged messages about bugs
|
||||
# - the 'getinfo_helper_listeners' function of control.c
|
||||
|
||||
CONTROL_ESCAPES = {r"\\": "\\", r"\"": "\"", r"\'": "'",
|
||||
r"\r": "\r", r"\n": "\n", r"\t": "\t"}
|
||||
|
||||
|
||||
def convert(response_type, message, **kwargs):
|
||||
"""
|
||||
Converts a :class:`~stem.response.ControlMessage` into a particular kind of
|
||||
tor response. This does an in-place conversion of the message from being a
|
||||
:class:`~stem.response.ControlMessage` to a subclass for its response type.
|
||||
Recognized types include...
|
||||
|
||||
=================== =====
|
||||
response_type Class
|
||||
=================== =====
|
||||
**GETINFO** :class:`stem.response.getinfo.GetInfoResponse`
|
||||
**GETCONF** :class:`stem.response.getconf.GetConfResponse`
|
||||
**MAPADDRESS** :class:`stem.response.mapaddress.MapAddressResponse`
|
||||
**EVENT** :class:`stem.response.events.Event` subclass
|
||||
**PROTOCOLINFO** :class:`stem.response.protocolinfo.ProtocolInfoResponse`
|
||||
**AUTHCHALLENGE** :class:`stem.response.authchallenge.AuthChallengeResponse`
|
||||
**SINGLELINE** :class:`stem.response.SingleLineResponse`
|
||||
=================== =====
|
||||
|
||||
:param str response_type: type of tor response to convert to
|
||||
:param stem.response.ControlMessage message: message to be converted
|
||||
:param kwargs: optional keyword arguments to be passed to the parser method
|
||||
|
||||
:raises:
|
||||
* :class:`stem.ProtocolError` the message isn't a proper response of
|
||||
that type
|
||||
* :class:`stem.InvalidArguments` the arguments given as input are
|
||||
invalid, this is can only be raised if the response_type is: **GETINFO**,
|
||||
**GETCONF**
|
||||
* :class:`stem.InvalidRequest` the arguments given as input are
|
||||
invalid, this is can only be raised if the response_type is:
|
||||
**MAPADDRESS**
|
||||
* :class:`stem.OperationFailed` if the action the event represents failed,
|
||||
this is can only be raised if the response_type is: **MAPADDRESS**
|
||||
* **TypeError** if argument isn't a :class:`~stem.response.ControlMessage`
|
||||
or response_type isn't supported
|
||||
"""
|
||||
|
||||
import stem.response.events
|
||||
import stem.response.getinfo
|
||||
import stem.response.getconf
|
||||
import stem.response.protocolinfo
|
||||
import stem.response.authchallenge
|
||||
import stem.response.mapaddress
|
||||
|
||||
if not isinstance(message, ControlMessage):
|
||||
raise TypeError("Only able to convert stem.response.ControlMessage instances")
|
||||
|
||||
response_types = {
|
||||
"EVENT": stem.response.events.Event,
|
||||
"GETINFO": stem.response.getinfo.GetInfoResponse,
|
||||
"GETCONF": stem.response.getconf.GetConfResponse,
|
||||
"MAPADDRESS": stem.response.mapaddress.MapAddressResponse,
|
||||
"SINGLELINE": SingleLineResponse,
|
||||
"PROTOCOLINFO": stem.response.protocolinfo.ProtocolInfoResponse,
|
||||
"AUTHCHALLENGE": stem.response.authchallenge.AuthChallengeResponse,
|
||||
}
|
||||
|
||||
try:
|
||||
response_class = response_types[response_type]
|
||||
except TypeError:
|
||||
raise TypeError("Unsupported response type: %s" % response_type)
|
||||
|
||||
message.__class__ = response_class
|
||||
message._parse_message(**kwargs)
|
||||
|
||||
|
||||
class ControlMessage(object):
|
||||
"""
|
||||
Message from the control socket. This is iterable and can be stringified for
|
||||
individual message components stripped of protocol formatting. Messages are
|
||||
never empty.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def from_str(content, msg_type = None, **kwargs):
|
||||
"""
|
||||
Provides a ControlMessage for the given content.
|
||||
|
||||
:param str content: message to construct the message from
|
||||
:param str msg_type: type of tor reply to parse the content as
|
||||
:param kwargs: optional keyword arguments to be passed to the parser method
|
||||
|
||||
:returns: stem.response.ControlMessage instance
|
||||
"""
|
||||
|
||||
msg = stem.socket.recv_message(StringIO.StringIO(content))
|
||||
|
||||
if msg_type is not None:
|
||||
convert(msg_type, msg, **kwargs)
|
||||
|
||||
return msg
|
||||
|
||||
def __init__(self, parsed_content, raw_content):
|
||||
if not parsed_content:
|
||||
raise ValueError("ControlMessages can't be empty")
|
||||
|
||||
self._parsed_content = parsed_content
|
||||
self._raw_content = raw_content
|
||||
|
||||
def is_ok(self):
|
||||
"""
|
||||
Checks if any of our lines have a 250 response.
|
||||
|
||||
:returns: **True** if any lines have a 250 response code, **False** otherwise
|
||||
"""
|
||||
|
||||
for code, _, _ in self._parsed_content:
|
||||
if code == "250":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def content(self, get_bytes = False):
|
||||
"""
|
||||
Provides the parsed message content. These are entries of the form...
|
||||
|
||||
::
|
||||
|
||||
(status_code, divider, content)
|
||||
|
||||
**status_code**
|
||||
Three character code for the type of response (defined in section 4 of
|
||||
the control-spec).
|
||||
|
||||
**divider**
|
||||
Single character to indicate if this is mid-reply, data, or an end to the
|
||||
message (defined in section 2.3 of the control-spec).
|
||||
|
||||
**content**
|
||||
The following content is the actual payload of the line.
|
||||
|
||||
For data entries the content is the full multi-line payload with newline
|
||||
linebreaks and leading periods unescaped.
|
||||
|
||||
The **status_code** and **divider** are both strings (**bytes** in python
|
||||
2.x and **unicode** in python 3.x). The **content** however is **bytes** if
|
||||
**get_bytes** is **True**.
|
||||
|
||||
:param bool get_bytes: provides **bytes** for the **content** rather than a **str**
|
||||
|
||||
:returns: **list** of (str, str, str) tuples for the components of this message
|
||||
"""
|
||||
|
||||
if stem.prereq.is_python_3() and not get_bytes:
|
||||
return [(code, div, stem.util.str_tools._to_unicode(content)) for (code, div, content) in self._parsed_content]
|
||||
else:
|
||||
return list(self._parsed_content)
|
||||
|
||||
def raw_content(self, get_bytes = False):
|
||||
"""
|
||||
Provides the unparsed content read from the control socket.
|
||||
|
||||
:param bool get_bytes: if **True** then this provides **bytes** rather than a **str**
|
||||
|
||||
:returns: **str** of the socket data used to generate this message
|
||||
"""
|
||||
|
||||
if stem.prereq.is_python_3() and not get_bytes:
|
||||
return stem.util.str_tools._to_unicode(self._raw_content)
|
||||
else:
|
||||
return self._raw_content
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Content of the message, stripped of status code and divider protocol
|
||||
formatting.
|
||||
"""
|
||||
|
||||
return "\n".join(list(self))
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
Provides :class:`~stem.response.ControlLine` instances for the content of
|
||||
the message. This is stripped of status codes and dividers, for instance...
|
||||
|
||||
::
|
||||
|
||||
250+info/names=
|
||||
desc/id/* -- Router descriptors by ID.
|
||||
desc/name/* -- Router descriptors by nickname.
|
||||
.
|
||||
250 OK
|
||||
|
||||
Would provide two entries...
|
||||
|
||||
::
|
||||
|
||||
1st - "info/names=
|
||||
desc/id/* -- Router descriptors by ID.
|
||||
desc/name/* -- Router descriptors by nickname."
|
||||
2nd - "OK"
|
||||
"""
|
||||
|
||||
for _, _, content in self._parsed_content:
|
||||
if stem.prereq.is_python_3():
|
||||
content = stem.util.str_tools._to_unicode(content)
|
||||
|
||||
yield ControlLine(content)
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
:returns: number of ControlLines
|
||||
"""
|
||||
|
||||
return len(self._parsed_content)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
:returns: :class:`~stem.response.ControlLine` at the index
|
||||
"""
|
||||
|
||||
content = self._parsed_content[index][2]
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
content = stem.util.str_tools._to_unicode(content)
|
||||
|
||||
return ControlLine(content)
|
||||
|
||||
|
||||
class ControlLine(str):
|
||||
"""
|
||||
String subclass that represents a line of controller output. This behaves as
|
||||
a normal string with additional methods for parsing and popping entries from
|
||||
a space delimited series of elements like a stack.
|
||||
|
||||
None of these additional methods effect ourselves as a string (which is still
|
||||
immutable). All methods are thread safe.
|
||||
"""
|
||||
|
||||
def __new__(self, value):
|
||||
return str.__new__(self, value)
|
||||
|
||||
def __init__(self, value):
|
||||
self._remainder = value
|
||||
self._remainder_lock = threading.RLock()
|
||||
|
||||
def remainder(self):
|
||||
"""
|
||||
Provides our unparsed content. This is an empty string after we've popped
|
||||
all entries.
|
||||
|
||||
:returns: **str** of the unparsed content
|
||||
"""
|
||||
|
||||
return self._remainder
|
||||
|
||||
def is_empty(self):
|
||||
"""
|
||||
Checks if we have further content to pop or not.
|
||||
|
||||
:returns: **True** if we have additional content, **False** otherwise
|
||||
"""
|
||||
|
||||
return self._remainder == ""
|
||||
|
||||
def is_next_quoted(self, escaped = False):
|
||||
"""
|
||||
Checks if our next entry is a quoted value or not.
|
||||
|
||||
:param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
|
||||
|
||||
:returns: **True** if the next entry can be parsed as a quoted value, **False** otherwise
|
||||
"""
|
||||
|
||||
start_quote, end_quote = _get_quote_indices(self._remainder, escaped)
|
||||
return start_quote == 0 and end_quote != -1
|
||||
|
||||
def is_next_mapping(self, key = None, quoted = False, escaped = False):
|
||||
"""
|
||||
Checks if our next entry is a KEY=VALUE mapping or not.
|
||||
|
||||
:param str key: checks that the key matches this value, skipping the check if **None**
|
||||
:param bool quoted: checks that the mapping is to a quoted value
|
||||
:param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
|
||||
|
||||
:returns: **True** if the next entry can be parsed as a key=value mapping,
|
||||
**False** otherwise
|
||||
"""
|
||||
|
||||
remainder = self._remainder # temp copy to avoid locking
|
||||
key_match = KEY_ARG.match(remainder)
|
||||
|
||||
if key_match:
|
||||
if key and key != key_match.groups()[0]:
|
||||
return False
|
||||
|
||||
if quoted:
|
||||
# checks that we have a quoted value and that it comes after the 'key='
|
||||
start_quote, end_quote = _get_quote_indices(remainder, escaped)
|
||||
return start_quote == key_match.end() and end_quote != -1
|
||||
else:
|
||||
return True # we just needed to check for the key
|
||||
else:
|
||||
return False # doesn't start with a key
|
||||
|
||||
def peek_key(self):
|
||||
"""
|
||||
Provides the key of the next entry, providing **None** if it isn't a
|
||||
key/value mapping.
|
||||
|
||||
:returns: **str** with the next entry's key
|
||||
"""
|
||||
|
||||
remainder = self._remainder
|
||||
key_match = KEY_ARG.match(remainder)
|
||||
|
||||
if key_match:
|
||||
return key_match.groups()[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def pop(self, quoted = False, escaped = False):
|
||||
"""
|
||||
Parses the next space separated entry, removing it and the space from our
|
||||
remaining content. Examples...
|
||||
|
||||
::
|
||||
|
||||
>>> line = ControlLine("\\"We're all mad here.\\" says the grinning cat.")
|
||||
>>> print line.pop(True)
|
||||
"We're all mad here."
|
||||
>>> print line.pop()
|
||||
"says"
|
||||
>>> print line.remainder()
|
||||
"the grinning cat."
|
||||
|
||||
>>> line = ControlLine("\\"this has a \\\\\\" and \\\\\\\\ in it\\" foo=bar more_data")
|
||||
>>> print line.pop(True, True)
|
||||
"this has a \\" and \\\\ in it"
|
||||
|
||||
:param bool quoted: parses the next entry as a quoted value, removing the quotes
|
||||
:param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
|
||||
|
||||
:returns: **str** of the next space separated entry
|
||||
|
||||
:raises:
|
||||
* **ValueError** if quoted is True without the value being quoted
|
||||
* **IndexError** if we don't have any remaining content left to parse
|
||||
"""
|
||||
|
||||
with self._remainder_lock:
|
||||
next_entry, remainder = _parse_entry(self._remainder, quoted, escaped)
|
||||
self._remainder = remainder
|
||||
return next_entry
|
||||
|
||||
def pop_mapping(self, quoted = False, escaped = False):
|
||||
"""
|
||||
Parses the next space separated entry as a KEY=VALUE mapping, removing it
|
||||
and the space from our remaining content.
|
||||
|
||||
:param bool quoted: parses the value as being quoted, removing the quotes
|
||||
:param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
|
||||
|
||||
:returns: **tuple** of the form (key, value)
|
||||
|
||||
:raises: **ValueError** if this isn't a KEY=VALUE mapping or if quoted is
|
||||
**True** without the value being quoted
|
||||
:raises: **IndexError** if there's nothing to parse from the line
|
||||
"""
|
||||
|
||||
with self._remainder_lock:
|
||||
if self.is_empty():
|
||||
raise IndexError("no remaining content to parse")
|
||||
|
||||
key_match = KEY_ARG.match(self._remainder)
|
||||
|
||||
if not key_match:
|
||||
raise ValueError("the next entry isn't a KEY=VALUE mapping: " + self._remainder)
|
||||
|
||||
# parse off the key
|
||||
key = key_match.groups()[0]
|
||||
remainder = self._remainder[key_match.end():]
|
||||
|
||||
next_entry, remainder = _parse_entry(remainder, quoted, escaped)
|
||||
self._remainder = remainder
|
||||
return (key, next_entry)
|
||||
|
||||
|
||||
def _parse_entry(line, quoted, escaped):
|
||||
"""
|
||||
Parses the next entry from the given space separated content.
|
||||
|
||||
:param str line: content to be parsed
|
||||
:param bool quoted: parses the next entry as a quoted value, removing the quotes
|
||||
:param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
|
||||
|
||||
:returns: **tuple** of the form (entry, remainder)
|
||||
|
||||
:raises:
|
||||
* **ValueError** if quoted is True without the next value being quoted
|
||||
* **IndexError** if there's nothing to parse from the line
|
||||
"""
|
||||
|
||||
if line == "":
|
||||
raise IndexError("no remaining content to parse")
|
||||
|
||||
next_entry, remainder = "", line
|
||||
|
||||
if quoted:
|
||||
# validate and parse the quoted value
|
||||
start_quote, end_quote = _get_quote_indices(remainder, escaped)
|
||||
|
||||
if start_quote != 0 or end_quote == -1:
|
||||
raise ValueError("the next entry isn't a quoted value: " + line)
|
||||
|
||||
next_entry, remainder = remainder[1:end_quote], remainder[end_quote + 1:]
|
||||
else:
|
||||
# non-quoted value, just need to check if there's more data afterward
|
||||
if " " in remainder:
|
||||
next_entry, remainder = remainder.split(" ", 1)
|
||||
else:
|
||||
next_entry, remainder = remainder, ""
|
||||
|
||||
if escaped:
|
||||
next_entry = _unescape(next_entry)
|
||||
|
||||
return (next_entry, remainder.lstrip())
|
||||
|
||||
|
||||
def _get_quote_indices(line, escaped):
|
||||
"""
|
||||
Provides the indices of the next two quotes in the given content.
|
||||
|
||||
:param str line: content to be parsed
|
||||
:param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
|
||||
|
||||
:returns: **tuple** of two ints, indices being -1 if a quote doesn't exist
|
||||
"""
|
||||
|
||||
indices, quote_index = [], -1
|
||||
|
||||
for _ in range(2):
|
||||
quote_index = line.find("\"", quote_index + 1)
|
||||
|
||||
# if we have escapes then we need to skip any r'\"' entries
|
||||
if escaped:
|
||||
# skip check if index is -1 (no match) or 0 (first character)
|
||||
while quote_index >= 1 and line[quote_index - 1] == "\\":
|
||||
quote_index = line.find("\"", quote_index + 1)
|
||||
|
||||
indices.append(quote_index)
|
||||
|
||||
return tuple(indices)
|
||||
|
||||
|
||||
def _unescape(entry):
|
||||
# Unescapes the given string with the mappings in CONTROL_ESCAPES.
|
||||
#
|
||||
# This can't be a simple series of str.replace() calls because replacements
|
||||
# need to be excluded from consideration for further unescaping. For
|
||||
# instance, '\\t' should be converted to '\t' rather than a tab.
|
||||
|
||||
def _pop_with_unescape(entry):
|
||||
# Pop either the first character or the escape sequence conversion the
|
||||
# entry starts with. This provides a tuple of...
|
||||
#
|
||||
# (unescaped prefix, remaining entry)
|
||||
|
||||
for esc_sequence, replacement in CONTROL_ESCAPES.items():
|
||||
if entry.startswith(esc_sequence):
|
||||
return (replacement, entry[len(esc_sequence):])
|
||||
|
||||
return (entry[0], entry[1:])
|
||||
|
||||
result = []
|
||||
|
||||
while entry:
|
||||
prefix, entry = _pop_with_unescape(entry)
|
||||
result.append(prefix)
|
||||
|
||||
return "".join(result)
|
||||
|
||||
|
||||
class SingleLineResponse(ControlMessage):
|
||||
"""
|
||||
Reply to a request that performs an action rather than querying data. These
|
||||
requests only contain a single line, which is 'OK' if successful, and a
|
||||
description of the problem if not.
|
||||
|
||||
:var str code: status code for our line
|
||||
:var str message: content of the line
|
||||
"""
|
||||
|
||||
def is_ok(self, strict = False):
|
||||
"""
|
||||
Checks if the response code is "250". If strict is **True** then this
|
||||
checks if the response is "250 OK"
|
||||
|
||||
:param bool strict: checks for a "250 OK" message if **True**
|
||||
|
||||
:returns:
|
||||
* If strict is **False**: **True** if the response code is "250", **False** otherwise
|
||||
* If strict is **True**: **True** if the response is "250 OK", **False** otherwise
|
||||
"""
|
||||
|
||||
if strict:
|
||||
return self.content()[0] == ("250", " ", "OK")
|
||||
return self.content()[0][0] == "250"
|
||||
|
||||
def _parse_message(self):
|
||||
content = self.content()
|
||||
|
||||
if len(content) > 1:
|
||||
raise stem.ProtocolError("Received multi-line response")
|
||||
elif len(content) == 0:
|
||||
raise stem.ProtocolError("Received empty response")
|
||||
else:
|
||||
self.code, _, self.message = content[0]
|
56
lib/stem/response/authchallenge.py
Normal file
56
lib/stem/response/authchallenge.py
Normal file
@ -0,0 +1,56 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
import binascii
|
||||
|
||||
import stem.response
|
||||
import stem.socket
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
|
||||
|
||||
class AuthChallengeResponse(stem.response.ControlMessage):
|
||||
"""
|
||||
AUTHCHALLENGE query response.
|
||||
|
||||
:var str server_hash: server hash provided by tor
|
||||
:var str server_nonce: server nonce provided by tor
|
||||
"""
|
||||
|
||||
def _parse_message(self):
|
||||
# Example:
|
||||
# 250 AUTHCHALLENGE SERVERHASH=680A73C9836C4F557314EA1C4EDE54C285DB9DC89C83627401AEF9D7D27A95D5 SERVERNONCE=F8EA4B1F2C8B40EF1AF68860171605B910E3BBCABADF6FC3DB1FA064F4690E85
|
||||
|
||||
self.server_hash = None
|
||||
self.server_nonce = None
|
||||
|
||||
if not self.is_ok():
|
||||
raise stem.ProtocolError("AUTHCHALLENGE response didn't have an OK status:\n%s" % self)
|
||||
elif len(self) > 1:
|
||||
raise stem.ProtocolError("Received multiline AUTHCHALLENGE response:\n%s" % self)
|
||||
|
||||
line = self[0]
|
||||
|
||||
# sanity check that we're a AUTHCHALLENGE response
|
||||
if not line.pop() == "AUTHCHALLENGE":
|
||||
raise stem.ProtocolError("Message is not an AUTHCHALLENGE response (%s)" % self)
|
||||
|
||||
if line.is_next_mapping("SERVERHASH"):
|
||||
value = line.pop_mapping()[1]
|
||||
|
||||
if not stem.util.tor_tools.is_hex_digits(value, 64):
|
||||
raise stem.ProtocolError("SERVERHASH has an invalid value: %s" % value)
|
||||
|
||||
self.server_hash = binascii.a2b_hex(stem.util.str_tools._to_bytes(value))
|
||||
else:
|
||||
raise stem.ProtocolError("Missing SERVERHASH mapping: %s" % line)
|
||||
|
||||
if line.is_next_mapping("SERVERNONCE"):
|
||||
value = line.pop_mapping()[1]
|
||||
|
||||
if not stem.util.tor_tools.is_hex_digits(value, 64):
|
||||
raise stem.ProtocolError("SERVERNONCE has an invalid value: %s" % value)
|
||||
|
||||
self.server_nonce = binascii.a2b_hex(stem.util.str_tools._to_bytes(value))
|
||||
else:
|
||||
raise stem.ProtocolError("Missing SERVERNONCE mapping: %s" % line)
|
945
lib/stem/response/events.py
Normal file
945
lib/stem/response/events.py
Normal file
@ -0,0 +1,945 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
import datetime
|
||||
import io
|
||||
import re
|
||||
import time
|
||||
|
||||
import stem
|
||||
import stem.control
|
||||
import stem.descriptor.router_status_entry
|
||||
import stem.response
|
||||
import stem.version
|
||||
|
||||
from stem.util import connection, log, str_tools, tor_tools
|
||||
|
||||
# Matches keyword=value arguments. This can't be a simple "(.*)=(.*)" pattern
|
||||
# because some positional arguments, like circuit paths, can have an equal
|
||||
# sign.
|
||||
|
||||
KW_ARG = re.compile("^(.*) ([A-Za-z0-9_]+)=(\S*)$")
|
||||
QUOTED_KW_ARG = re.compile("^(.*) ([A-Za-z0-9_]+)=\"(.*)\"$")
|
||||
|
||||
|
||||
class Event(stem.response.ControlMessage):
|
||||
"""
|
||||
Base for events we receive asynchronously, as described in section 4.1 of the
|
||||
`control-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/control-spec.txt>`_.
|
||||
|
||||
:var str type: event type
|
||||
:var int arrived_at: unix timestamp for when the message arrived
|
||||
:var list positional_args: positional arguments of the event
|
||||
:var dict keyword_args: key/value arguments of the event
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = () # attribute names for recognized positional arguments
|
||||
_KEYWORD_ARGS = {} # map of 'keyword => attribute' for recognized attributes
|
||||
_QUOTED = () # positional arguments that are quoted
|
||||
_OPTIONALLY_QUOTED = () # positional arguments that may or may not be quoted
|
||||
_SKIP_PARSING = False # skip parsing contents into our positional_args and keyword_args
|
||||
_VERSION_ADDED = stem.version.Version('0.1.1.1-alpha') # minimum version with control-spec V1 event support
|
||||
|
||||
def _parse_message(self, arrived_at = None):
|
||||
if arrived_at is None:
|
||||
arrived_at = int(time.time())
|
||||
|
||||
if not str(self).strip():
|
||||
raise stem.ProtocolError("Received a blank tor event. Events must at the very least have a type.")
|
||||
|
||||
self.type = str(self).split().pop(0)
|
||||
self.arrived_at = arrived_at
|
||||
|
||||
# if we're a recognized event type then translate ourselves into that subclass
|
||||
|
||||
if self.type in EVENT_TYPE_TO_CLASS:
|
||||
self.__class__ = EVENT_TYPE_TO_CLASS[self.type]
|
||||
|
||||
self.positional_args = []
|
||||
self.keyword_args = {}
|
||||
|
||||
if not self._SKIP_PARSING:
|
||||
self._parse_standard_attr()
|
||||
|
||||
self._parse()
|
||||
|
||||
def _parse_standard_attr(self):
|
||||
"""
|
||||
Most events are of the form...
|
||||
650 *( positional_args ) *( key "=" value )
|
||||
|
||||
This parses this standard format, populating our **positional_args** and
|
||||
**keyword_args** attributes and creating attributes if it's in our event's
|
||||
**_POSITIONAL_ARGS** and **_KEYWORD_ARGS**.
|
||||
"""
|
||||
|
||||
# Tor events contain some number of positional arguments followed by
|
||||
# key/value mappings. Parsing keyword arguments from the end until we hit
|
||||
# something that isn't a key/value mapping. The rest are positional.
|
||||
|
||||
content = str(self)
|
||||
|
||||
while True:
|
||||
match = QUOTED_KW_ARG.match(content)
|
||||
|
||||
if not match:
|
||||
match = KW_ARG.match(content)
|
||||
|
||||
if match:
|
||||
content, keyword, value = match.groups()
|
||||
self.keyword_args[keyword] = value
|
||||
else:
|
||||
break
|
||||
|
||||
# Setting attributes for the fields that we recognize.
|
||||
|
||||
self.positional_args = content.split()[1:]
|
||||
positional = list(self.positional_args)
|
||||
|
||||
for attr_name in self._POSITIONAL_ARGS:
|
||||
attr_value = None
|
||||
|
||||
if positional:
|
||||
if attr_name in self._QUOTED or (attr_name in self._OPTIONALLY_QUOTED and positional[0].startswith('"')):
|
||||
attr_values = [positional.pop(0)]
|
||||
|
||||
if not attr_values[0].startswith('"'):
|
||||
raise stem.ProtocolError("The %s value should be quoted, but didn't have a starting quote: %s" % (attr_name, self))
|
||||
|
||||
while True:
|
||||
if not positional:
|
||||
raise stem.ProtocolError("The %s value should be quoted, but didn't have an ending quote: %s" % (attr_name, self))
|
||||
|
||||
attr_values.append(positional.pop(0))
|
||||
|
||||
if attr_values[-1].endswith('"'):
|
||||
break
|
||||
|
||||
attr_value = " ".join(attr_values)[1:-1]
|
||||
else:
|
||||
attr_value = positional.pop(0)
|
||||
|
||||
setattr(self, attr_name, attr_value)
|
||||
|
||||
for controller_attr_name, attr_name in self._KEYWORD_ARGS.items():
|
||||
setattr(self, attr_name, self.keyword_args.get(controller_attr_name))
|
||||
|
||||
# method overwritten by our subclasses for special handling that they do
|
||||
def _parse(self):
|
||||
pass
|
||||
|
||||
def _log_if_unrecognized(self, attr, attr_enum):
|
||||
"""
|
||||
Checks if an attribute exists in a given enumeration, logging a message if
|
||||
it isn't. Attributes can either be for a string or collection of strings
|
||||
|
||||
:param str attr: name of the attribute to check
|
||||
:param stem.util.enum.Enum enum: enumeration to check against
|
||||
"""
|
||||
|
||||
attr_values = getattr(self, attr)
|
||||
|
||||
if attr_values:
|
||||
if isinstance(attr_values, (bytes, unicode)):
|
||||
attr_values = [attr_values]
|
||||
|
||||
for value in attr_values:
|
||||
if not value in attr_enum:
|
||||
log_id = "event.%s.unknown_%s.%s" % (self.type.lower(), attr, value)
|
||||
unrecognized_msg = "%s event had an unrecognized %s (%s). Maybe a new addition to the control protocol? Full Event: '%s'" % (self.type, attr, value, self)
|
||||
log.log_once(log_id, log.INFO, unrecognized_msg)
|
||||
|
||||
|
||||
class AddrMapEvent(Event):
|
||||
"""
|
||||
Event that indicates a new address mapping.
|
||||
|
||||
The ADDRMAP event was one of the first Control Protocol V1 events and was
|
||||
introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var str hostname: address being resolved
|
||||
:var str destination: destionation of the resolution, this is usually an ip,
|
||||
but could be a hostname if TrackHostExits is enabled or **NONE** if the
|
||||
resolution failed
|
||||
:var datetime expiry: expiration time of the resolution in local time
|
||||
:var str error: error code if the resolution failed
|
||||
:var datetime utc_expiry: expiration time of the resolution in UTC
|
||||
:var bool cached: **True** if the resolution will be kept until it expires,
|
||||
**False** otherwise or **None** if undefined
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("hostname", "destination", "expiry")
|
||||
_KEYWORD_ARGS = {
|
||||
"error": "error",
|
||||
"EXPIRES": "utc_expiry",
|
||||
"CACHED": "cached",
|
||||
}
|
||||
_OPTIONALLY_QUOTED = ("expiry")
|
||||
|
||||
def _parse(self):
|
||||
if self.destination == "<error>":
|
||||
self.destination = None
|
||||
|
||||
if self.expiry is not None:
|
||||
if self.expiry == "NEVER":
|
||||
self.expiry = None
|
||||
else:
|
||||
try:
|
||||
self.expiry = datetime.datetime.strptime(self.expiry, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
raise stem.ProtocolError("Unable to parse date in ADDRMAP event: %s" % self)
|
||||
|
||||
if self.utc_expiry is not None:
|
||||
self.utc_expiry = datetime.datetime.strptime(self.utc_expiry, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if self.cached is not None:
|
||||
if self.cached == "YES":
|
||||
self.cached = True
|
||||
elif self.cached == "NO":
|
||||
self.cached = False
|
||||
else:
|
||||
raise stem.ProtocolError("An ADDRMAP event's CACHED mapping can only be 'YES' or 'NO': %s" % self)
|
||||
|
||||
|
||||
class AuthDirNewDescEvent(Event):
|
||||
"""
|
||||
Event specific to directory authorities, indicating that we just received new
|
||||
descriptors. The descriptor type contained within this event is unspecified
|
||||
so the descriptor contents are left unparsed.
|
||||
|
||||
The AUTHDIR_NEWDESCS event was introduced in tor version 0.1.1.10-alpha.
|
||||
|
||||
:var stem.AuthDescriptorAction action: what is being done with the descriptor
|
||||
:var str message: explanation of why we chose this action
|
||||
:var str descriptor: content of the descriptor
|
||||
"""
|
||||
|
||||
_SKIP_PARSING = True
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_AUTHDIR_NEWDESCS
|
||||
|
||||
def _parse(self):
|
||||
lines = str(self).split('\n')
|
||||
|
||||
if len(lines) < 5:
|
||||
raise stem.ProtocolError("AUTHDIR_NEWDESCS events must contain lines for at least the type, action, message, descriptor, and terminating 'OK'")
|
||||
elif not lines[-1] == "OK":
|
||||
raise stem.ProtocolError("AUTHDIR_NEWDESCS doesn't end with an 'OK'")
|
||||
|
||||
self.action = lines[1]
|
||||
self.message = lines[2]
|
||||
self.descriptor = '\n'.join(lines[3:-1])
|
||||
|
||||
|
||||
class BandwidthEvent(Event):
|
||||
"""
|
||||
Event emitted every second with the bytes sent and received by tor.
|
||||
|
||||
The BW event was one of the first Control Protocol V1 events and was
|
||||
introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var long read: bytes received by tor that second
|
||||
:var long written: bytes sent by tor that second
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("read", "written")
|
||||
|
||||
def _parse(self):
|
||||
if not self.read:
|
||||
raise stem.ProtocolError("BW event is missing its read value")
|
||||
elif not self.written:
|
||||
raise stem.ProtocolError("BW event is missing its written value")
|
||||
elif not self.read.isdigit() or not self.written.isdigit():
|
||||
raise stem.ProtocolError("A BW event's bytes sent and received should be a positive numeric value, received: %s" % self)
|
||||
|
||||
self.read = long(self.read)
|
||||
self.written = long(self.written)
|
||||
|
||||
|
||||
class BuildTimeoutSetEvent(Event):
|
||||
"""
|
||||
Event indicating that the timeout value for a circuit has changed. This was
|
||||
first added in tor version 0.2.2.7.
|
||||
|
||||
The BUILDTIMEOUT_SET event was introduced in tor version 0.2.2.7-alpha.
|
||||
|
||||
:var stem.TimeoutSetType set_type: way in which the timeout is changing
|
||||
:var int total_times: circuit build times tor used to determine the timeout
|
||||
:var int timeout: circuit timeout value in milliseconds
|
||||
:var int xm: Pareto parameter Xm in milliseconds
|
||||
:var float alpha: Pareto parameter alpha
|
||||
:var float quantile: CDF quantile cutoff point
|
||||
:var float timeout_rate: ratio of circuits that have time out
|
||||
:var int close_timeout: duration to keep measurement circuits in milliseconds
|
||||
:var float close_rate: ratio of measurement circuits that are closed
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("set_type",)
|
||||
_KEYWORD_ARGS = {
|
||||
"TOTAL_TIMES": "total_times",
|
||||
"TIMEOUT_MS": "timeout",
|
||||
"XM": "xm",
|
||||
"ALPHA": "alpha",
|
||||
"CUTOFF_QUANTILE": "quantile",
|
||||
"TIMEOUT_RATE": "timeout_rate",
|
||||
"CLOSE_MS": "close_timeout",
|
||||
"CLOSE_RATE": "close_rate",
|
||||
}
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_BUILDTIMEOUT_SET
|
||||
|
||||
def _parse(self):
|
||||
# convert our integer and float parameters
|
||||
|
||||
for param in ('total_times', 'timeout', 'xm', 'close_timeout'):
|
||||
param_value = getattr(self, param)
|
||||
|
||||
if param_value is not None:
|
||||
try:
|
||||
setattr(self, param, int(param_value))
|
||||
except ValueError:
|
||||
raise stem.ProtocolError("The %s of a BUILDTIMEOUT_SET should be an integer: %s" % (param, self))
|
||||
|
||||
for param in ('alpha', 'quantile', 'timeout_rate', 'close_rate'):
|
||||
param_value = getattr(self, param)
|
||||
|
||||
if param_value is not None:
|
||||
try:
|
||||
setattr(self, param, float(param_value))
|
||||
except ValueError:
|
||||
raise stem.ProtocolError("The %s of a BUILDTIMEOUT_SET should be a float: %s" % (param, self))
|
||||
|
||||
self._log_if_unrecognized('set_type', stem.TimeoutSetType)
|
||||
|
||||
|
||||
class CircuitEvent(Event):
|
||||
"""
|
||||
Event that indicates that a circuit has changed.
|
||||
|
||||
The fingerprint or nickname values in our 'path' may be **None** if the
|
||||
VERBOSE_NAMES feature isn't enabled. The option was first introduced in tor
|
||||
version 0.1.2.2, and on by default after 0.2.2.1.
|
||||
|
||||
The CIRC event was one of the first Control Protocol V1 events and was
|
||||
introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var str id: circuit identifier
|
||||
:var stem.CircStatus status: reported status for the circuit
|
||||
:var tuple path: relays involved in the circuit, these are
|
||||
**(fingerprint, nickname)** tuples
|
||||
:var tuple build_flags: :data:`~stem.CircBuildFlag` attributes
|
||||
governing how the circuit is built
|
||||
:var stem.CircPurpose purpose: purpose that the circuit is intended for
|
||||
:var stem.HiddenServiceState hs_state: status if this is a hidden service circuit
|
||||
:var str rend_query: circuit's rendezvous-point if this is hidden service related
|
||||
:var datetime created: time when the circuit was created or cannibalized
|
||||
:var stem.CircClosureReason reason: reason for the circuit to be closed
|
||||
:var stem.CircClosureReason remote_reason: remote side's reason for the circuit to be closed
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("id", "status", "path")
|
||||
_KEYWORD_ARGS = {
|
||||
"BUILD_FLAGS": "build_flags",
|
||||
"PURPOSE": "purpose",
|
||||
"HS_STATE": "hs_state",
|
||||
"REND_QUERY": "rend_query",
|
||||
"TIME_CREATED": "created",
|
||||
"REASON": "reason",
|
||||
"REMOTE_REASON": "remote_reason",
|
||||
}
|
||||
|
||||
def _parse(self):
|
||||
self.path = tuple(stem.control._parse_circ_path(self.path))
|
||||
|
||||
if self.build_flags is not None:
|
||||
self.build_flags = tuple(self.build_flags.split(','))
|
||||
|
||||
if self.created is not None:
|
||||
try:
|
||||
self.created = str_tools._parse_iso_timestamp(self.created)
|
||||
except ValueError as exc:
|
||||
raise stem.ProtocolError("Unable to parse create date (%s): %s" % (exc, self))
|
||||
|
||||
if not tor_tools.is_valid_circuit_id(self.id):
|
||||
raise stem.ProtocolError("Circuit IDs must be one to sixteen alphanumeric characters, got '%s': %s" % (self.id, self))
|
||||
|
||||
self._log_if_unrecognized('status', stem.CircStatus)
|
||||
self._log_if_unrecognized('build_flags', stem.CircBuildFlag)
|
||||
self._log_if_unrecognized('purpose', stem.CircPurpose)
|
||||
self._log_if_unrecognized('hs_state', stem.HiddenServiceState)
|
||||
self._log_if_unrecognized('reason', stem.CircClosureReason)
|
||||
self._log_if_unrecognized('remote_reason', stem.CircClosureReason)
|
||||
|
||||
|
||||
class CircMinorEvent(Event):
|
||||
"""
|
||||
Event providing information about minor changes in our circuits. This was
|
||||
first added in tor version 0.2.3.11.
|
||||
|
||||
The CIRC_MINOR event was introduced in tor version 0.2.3.11-alpha.
|
||||
|
||||
:var str id: circuit identifier
|
||||
:var stem.CircEvent event: type of change in the circuit
|
||||
:var tuple path: relays involved in the circuit, these are
|
||||
**(fingerprint, nickname)** tuples
|
||||
:var tuple build_flags: :data:`~stem.CircBuildFlag` attributes
|
||||
governing how the circuit is built
|
||||
:var stem.CircPurpose purpose: purpose that the circuit is intended for
|
||||
:var stem.HiddenServiceState hs_state: status if this is a hidden service circuit
|
||||
:var str rend_query: circuit's rendezvous-point if this is hidden service related
|
||||
:var datetime created: time when the circuit was created or cannibalized
|
||||
:var stem.CircPurpose old_purpose: prior purpose for the circuit
|
||||
:var stem.HiddenServiceState old_hs_state: prior status as a hidden service circuit
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("id", "event", "path")
|
||||
_KEYWORD_ARGS = {
|
||||
"BUILD_FLAGS": "build_flags",
|
||||
"PURPOSE": "purpose",
|
||||
"HS_STATE": "hs_state",
|
||||
"REND_QUERY": "rend_query",
|
||||
"TIME_CREATED": "created",
|
||||
"OLD_PURPOSE": "old_purpose",
|
||||
"OLD_HS_STATE": "old_hs_state",
|
||||
}
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_CIRC_MINOR
|
||||
|
||||
def _parse(self):
|
||||
self.path = tuple(stem.control._parse_circ_path(self.path))
|
||||
|
||||
if self.build_flags is not None:
|
||||
self.build_flags = tuple(self.build_flags.split(','))
|
||||
|
||||
if self.created is not None:
|
||||
try:
|
||||
self.created = str_tools._parse_iso_timestamp(self.created)
|
||||
except ValueError as exc:
|
||||
raise stem.ProtocolError("Unable to parse create date (%s): %s" % (exc, self))
|
||||
|
||||
if not tor_tools.is_valid_circuit_id(self.id):
|
||||
raise stem.ProtocolError("Circuit IDs must be one to sixteen alphanumeric characters, got '%s': %s" % (self.id, self))
|
||||
|
||||
self._log_if_unrecognized('event', stem.CircEvent)
|
||||
self._log_if_unrecognized('build_flags', stem.CircBuildFlag)
|
||||
self._log_if_unrecognized('purpose', stem.CircPurpose)
|
||||
self._log_if_unrecognized('hs_state', stem.HiddenServiceState)
|
||||
self._log_if_unrecognized('old_purpose', stem.CircPurpose)
|
||||
self._log_if_unrecognized('old_hs_state', stem.HiddenServiceState)
|
||||
|
||||
|
||||
class ClientsSeenEvent(Event):
|
||||
"""
|
||||
Periodic event on bridge relays that provides a summary of our users.
|
||||
|
||||
The CLIENTS_SEEN event was introduced in tor version 0.2.1.10-alpha.
|
||||
|
||||
:var datetime start_time: time in UTC that we started collecting these stats
|
||||
:var dict locales: mapping of country codes to a rounded count for the number of users
|
||||
:var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
|
||||
"""
|
||||
|
||||
_KEYWORD_ARGS = {
|
||||
"TimeStarted": "start_time",
|
||||
"CountrySummary": "locales",
|
||||
"IPVersions": "ip_versions",
|
||||
}
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_CLIENTS_SEEN
|
||||
|
||||
def _parse(self):
|
||||
if self.start_time is not None:
|
||||
self.start_time = datetime.datetime.strptime(self.start_time, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if self.locales is not None:
|
||||
locale_to_count = {}
|
||||
|
||||
for entry in self.locales.split(','):
|
||||
if not '=' in entry:
|
||||
raise stem.ProtocolError("The CLIENTS_SEEN's CountrySummary should be a comma separated listing of '<locale>=<count>' mappings: %s" % self)
|
||||
|
||||
locale, count = entry.split('=', 1)
|
||||
|
||||
if len(locale) != 2:
|
||||
raise stem.ProtocolError("Locales should be a two character code, got '%s': %s" % (locale, self))
|
||||
elif not count.isdigit():
|
||||
raise stem.ProtocolError("Locale count was non-numeric (%s): %s" % (count, self))
|
||||
elif locale in locale_to_count:
|
||||
raise stem.ProtocolError("CountrySummary had multiple mappings for '%s': %s" % (locale, self))
|
||||
|
||||
locale_to_count[locale] = int(count)
|
||||
|
||||
self.locales = locale_to_count
|
||||
|
||||
if self.ip_versions is not None:
|
||||
protocol_to_count = {}
|
||||
|
||||
for entry in self.ip_versions.split(','):
|
||||
if not '=' in entry:
|
||||
raise stem.ProtocolError("The CLIENTS_SEEN's IPVersions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % self)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, self))
|
||||
|
||||
protocol_to_count[protocol] = int(count)
|
||||
|
||||
self.ip_versions = protocol_to_count
|
||||
|
||||
|
||||
class ConfChangedEvent(Event):
|
||||
"""
|
||||
Event that indicates that our configuration changed, either in response to a
|
||||
SETCONF or RELOAD signal.
|
||||
|
||||
The CONF_CHANGED event was introduced in tor version 0.2.3.3-alpha.
|
||||
|
||||
:var dict config: mapping of configuration options to their new values
|
||||
(**None** if the option is being unset)
|
||||
"""
|
||||
|
||||
_SKIP_PARSING = True
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_CONF_CHANGED
|
||||
|
||||
def _parse(self):
|
||||
self.config = {}
|
||||
|
||||
# Skip first and last line since they're the header and footer. For
|
||||
# instance...
|
||||
#
|
||||
# 650-CONF_CHANGED
|
||||
# 650-ExitNodes=caerSidi
|
||||
# 650-ExitPolicy
|
||||
# 650-MaxCircuitDirtiness=20
|
||||
# 650 OK
|
||||
|
||||
for line in str(self).splitlines()[1:-1]:
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
else:
|
||||
key, value = line, None
|
||||
|
||||
self.config[key] = value
|
||||
|
||||
|
||||
class DescChangedEvent(Event):
|
||||
"""
|
||||
Event that indicates that our descriptor has changed.
|
||||
|
||||
The DESCCHANGED event was introduced in tor version 0.1.2.2-alpha.
|
||||
"""
|
||||
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_DESCCHANGED
|
||||
|
||||
|
||||
class GuardEvent(Event):
|
||||
"""
|
||||
Event that indicates that our guard relays have changed. The 'endpoint' could
|
||||
be either a...
|
||||
|
||||
* fingerprint
|
||||
* 'fingerprint=nickname' pair
|
||||
|
||||
The derived 'endpoint_*' attributes are generally more useful.
|
||||
|
||||
The GUARD event was introduced in tor version 0.1.2.5-alpha.
|
||||
|
||||
:var stem.GuardType guard_type: purpose the guard relay is for
|
||||
:var str endpoint: relay that the event concerns
|
||||
:var str endpoint_fingerprint: endpoint's finterprint
|
||||
:var str endpoint_nickname: endpoint's nickname if it was provided
|
||||
:var stem.GuardStatus status: status of the guard relay
|
||||
"""
|
||||
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_GUARD
|
||||
_POSITIONAL_ARGS = ("guard_type", "endpoint", "status")
|
||||
|
||||
def _parse(self):
|
||||
self.endpoint_fingerprint = None
|
||||
self.endpoint_nickname = None
|
||||
|
||||
try:
|
||||
self.endpoint_fingerprint, self.endpoint_nickname = \
|
||||
stem.control._parse_circ_entry(self.endpoint)
|
||||
except stem.ProtocolError:
|
||||
raise stem.ProtocolError("ORCONN's endpoint doesn't match a ServerSpec: %s" % self)
|
||||
|
||||
self._log_if_unrecognized('guard_type', stem.GuardType)
|
||||
self._log_if_unrecognized('status', stem.GuardStatus)
|
||||
|
||||
|
||||
class LogEvent(Event):
|
||||
"""
|
||||
Tor logging event. These are the most visible kind of event since, by
|
||||
default, tor logs at the NOTICE :data:`~stem.Runlevel` to stdout.
|
||||
|
||||
The logging events were some of the first Control Protocol V1 events
|
||||
and were introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var stem.Runlevel runlevel: runlevel of the logged message
|
||||
:var str message: logged message
|
||||
"""
|
||||
|
||||
_SKIP_PARSING = True
|
||||
|
||||
def _parse(self):
|
||||
self.runlevel = self.type
|
||||
self._log_if_unrecognized('runlevel', stem.Runlevel)
|
||||
|
||||
# message is our content, minus the runlevel and ending "OK" if a
|
||||
# multi-line message
|
||||
|
||||
self.message = str(self)[len(self.runlevel) + 1:].rstrip("\nOK")
|
||||
|
||||
|
||||
class NetworkStatusEvent(Event):
|
||||
"""
|
||||
Event for when our copy of the consensus has changed. This was introduced in
|
||||
tor version 0.1.2.3.
|
||||
|
||||
The NS event was introduced in tor version 0.1.2.3-alpha.
|
||||
|
||||
:var list desc: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` for the changed descriptors
|
||||
"""
|
||||
|
||||
_SKIP_PARSING = True
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_NS
|
||||
|
||||
def _parse(self):
|
||||
content = str(self).lstrip("NS\n").rstrip("\nOK")
|
||||
|
||||
self.desc = list(stem.descriptor.router_status_entry._parse_file(
|
||||
io.BytesIO(str_tools._to_bytes(content)),
|
||||
True,
|
||||
entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3,
|
||||
))
|
||||
|
||||
|
||||
class NewConsensusEvent(Event):
|
||||
"""
|
||||
Event for when we have a new consensus. This is similar to
|
||||
:class:`~stem.response.events.NetworkStatusEvent`, except that it contains
|
||||
the whole consensus so anything not listed is implicitly no longer
|
||||
recommended.
|
||||
|
||||
The NEWCONSENSUS event was introduced in tor version 0.2.1.13-alpha.
|
||||
|
||||
:var list desc: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` for the changed descriptors
|
||||
"""
|
||||
|
||||
_SKIP_PARSING = True
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_NEWCONSENSUS
|
||||
|
||||
def _parse(self):
|
||||
content = str(self).lstrip("NEWCONSENSUS\n").rstrip("\nOK")
|
||||
|
||||
self.desc = list(stem.descriptor.router_status_entry._parse_file(
|
||||
io.BytesIO(str_tools._to_bytes(content)),
|
||||
True,
|
||||
entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3,
|
||||
))
|
||||
|
||||
|
||||
class NewDescEvent(Event):
|
||||
"""
|
||||
Event that indicates that a new descriptor is available.
|
||||
|
||||
The fingerprint or nickname values in our 'relays' may be **None** if the
|
||||
VERBOSE_NAMES feature isn't enabled. The option was first introduced in tor
|
||||
version 0.1.2.2, and on by default after 0.2.2.1.
|
||||
|
||||
The NEWDESC event was one of the first Control Protocol V1 events and was
|
||||
introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var tuple relays: **(fingerprint, nickname)** tuples for the relays with
|
||||
new descriptors
|
||||
"""
|
||||
|
||||
def _parse(self):
|
||||
self.relays = tuple([stem.control._parse_circ_entry(entry) for entry in str(self).split()[1:]])
|
||||
|
||||
|
||||
class ORConnEvent(Event):
|
||||
"""
|
||||
Event that indicates a change in a relay connection. The 'endpoint' could be
|
||||
any of several things including a...
|
||||
|
||||
* fingerprint
|
||||
* nickname
|
||||
* 'fingerprint=nickname' pair
|
||||
* address:port
|
||||
|
||||
The derived 'endpoint_*' attributes are generally more useful.
|
||||
|
||||
The ORCONN event was one of the first Control Protocol V1 events and was
|
||||
introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var str endpoint: relay that the event concerns
|
||||
:var str endpoint_fingerprint: endpoint's finterprint if it was provided
|
||||
:var str endpoint_nickname: endpoint's nickname if it was provided
|
||||
:var str endpoint_address: endpoint's address if it was provided
|
||||
:var int endpoint_port: endpoint's port if it was provided
|
||||
:var stem.ORStatus status: state of the connection
|
||||
:var stem.ORClosureReason reason: reason for the connection to be closed
|
||||
:var int circ_count: number of established and pending circuits
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("endpoint", "status")
|
||||
_KEYWORD_ARGS = {
|
||||
"REASON": "reason",
|
||||
"NCIRCS": "circ_count",
|
||||
}
|
||||
|
||||
def _parse(self):
|
||||
self.endpoint_fingerprint = None
|
||||
self.endpoint_nickname = None
|
||||
self.endpoint_address = None
|
||||
self.endpoint_port = None
|
||||
|
||||
try:
|
||||
self.endpoint_fingerprint, self.endpoint_nickname = \
|
||||
stem.control._parse_circ_entry(self.endpoint)
|
||||
except stem.ProtocolError:
|
||||
if not ':' in self.endpoint:
|
||||
raise stem.ProtocolError("ORCONN endpoint is neither a relay nor 'address:port': %s" % self)
|
||||
|
||||
address, port = self.endpoint.split(':', 1)
|
||||
|
||||
if not connection.is_valid_port(port):
|
||||
raise stem.ProtocolError("ORCONN's endpoint location's port is invalid: %s" % self)
|
||||
|
||||
self.endpoint_address = address
|
||||
self.endpoint_port = int(port)
|
||||
|
||||
if self.circ_count is not None:
|
||||
if not self.circ_count.isdigit():
|
||||
raise stem.ProtocolError("ORCONN event got a non-numeric circuit count (%s): %s" % (self.circ_count, self))
|
||||
|
||||
self.circ_count = int(self.circ_count)
|
||||
|
||||
self._log_if_unrecognized('status', stem.ORStatus)
|
||||
self._log_if_unrecognized('reason', stem.ORClosureReason)
|
||||
|
||||
|
||||
class SignalEvent(Event):
|
||||
"""
|
||||
Event that indicates that tor has received and acted upon a signal being sent
|
||||
to the process. As of tor version 0.2.4.6 the only signals conveyed by this
|
||||
event are...
|
||||
|
||||
* RELOAD
|
||||
* DUMP
|
||||
* DEBUG
|
||||
* NEWNYM
|
||||
* CLEARDNSCACHE
|
||||
|
||||
The SIGNAL event was introduced in tor version 0.2.3.1-alpha.
|
||||
|
||||
:var stem.Signal signal: signal that tor received
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("signal",)
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_SIGNAL
|
||||
|
||||
def _parse(self):
|
||||
# log if we recieved an unrecognized signal
|
||||
expected_signals = (
|
||||
stem.Signal.RELOAD,
|
||||
stem.Signal.DUMP,
|
||||
stem.Signal.DEBUG,
|
||||
stem.Signal.NEWNYM,
|
||||
stem.Signal.CLEARDNSCACHE,
|
||||
)
|
||||
|
||||
self._log_if_unrecognized('signal', expected_signals)
|
||||
|
||||
|
||||
class StatusEvent(Event):
|
||||
"""
|
||||
Notification of a change in tor's state. These are generally triggered for
|
||||
the same sort of things as log messages of the NOTICE level or higher.
|
||||
However, unlike :class:`~stem.response.events.LogEvent` these contain well
|
||||
formed data.
|
||||
|
||||
The STATUS_GENERAL, STATUS_CLIENT, STATUS_SERVER events were introduced
|
||||
in tor version 0.1.2.3-alpha.
|
||||
|
||||
:var stem.StatusType status_type: category of the status event
|
||||
:var stem.Runlevel runlevel: runlevel of the logged message
|
||||
:var str message: logged message
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("runlevel", "action")
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_STATUS
|
||||
|
||||
def _parse(self):
|
||||
if self.type == 'STATUS_GENERAL':
|
||||
self.status_type = stem.StatusType.GENERAL
|
||||
elif self.type == 'STATUS_CLIENT':
|
||||
self.status_type = stem.StatusType.CLIENT
|
||||
elif self.type == 'STATUS_SERVER':
|
||||
self.status_type = stem.StatusType.SERVER
|
||||
else:
|
||||
raise ValueError("BUG: Unrecognized status type (%s), likely an EVENT_TYPE_TO_CLASS addition without revising how 'status_type' is assigned." % self.type)
|
||||
|
||||
self._log_if_unrecognized('runlevel', stem.Runlevel)
|
||||
|
||||
|
||||
class StreamEvent(Event):
|
||||
"""
|
||||
Event that indicates that a stream has changed.
|
||||
|
||||
The STREAM event was one of the first Control Protocol V1 events and was
|
||||
introduced in tor version 0.1.1.1-alpha.
|
||||
|
||||
:var str id: stream identifier
|
||||
:var stem.StreamStatus status: reported status for the stream
|
||||
:var str circ_id: circuit that the stream is attached to
|
||||
:var str target: destination of the stream
|
||||
:var str target_address: destination address (ip, hostname, or '(Tor_internal)')
|
||||
:var int target_port: destination port
|
||||
:var stem.StreamClosureReason reason: reason for the stream to be closed
|
||||
:var stem.StreamClosureReason remote_reason: remote side's reason for the stream to be closed
|
||||
:var stem.StreamSource source: origin of the REMAP request
|
||||
:var str source_addr: requester of the connection
|
||||
:var str source_address: requester address (ip or hostname)
|
||||
:var int source_port: requester port
|
||||
:var stem.StreamPurpose purpose: purpose for the stream
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("id", "status", "circ_id", "target")
|
||||
_KEYWORD_ARGS = {
|
||||
"REASON": "reason",
|
||||
"REMOTE_REASON": "remote_reason",
|
||||
"SOURCE": "source",
|
||||
"SOURCE_ADDR": "source_addr",
|
||||
"PURPOSE": "purpose",
|
||||
}
|
||||
|
||||
def _parse(self):
|
||||
if self.target is None:
|
||||
raise stem.ProtocolError("STREAM event didn't have a target: %s" % self)
|
||||
else:
|
||||
if not ':' in self.target:
|
||||
raise stem.ProtocolError("Target location must be of the form 'address:port': %s" % self)
|
||||
|
||||
address, port = self.target.rsplit(':', 1)
|
||||
|
||||
if not connection.is_valid_port(port, allow_zero = True):
|
||||
raise stem.ProtocolError("Target location's port is invalid: %s" % self)
|
||||
|
||||
self.target_address = address
|
||||
self.target_port = int(port)
|
||||
|
||||
if self.source_addr is None:
|
||||
self.source_address = None
|
||||
self.source_port = None
|
||||
else:
|
||||
if not ':' in self.source_addr:
|
||||
raise stem.ProtocolError("Source location must be of the form 'address:port': %s" % self)
|
||||
|
||||
address, port = self.source_addr.split(':', 1)
|
||||
|
||||
if not connection.is_valid_port(port, allow_zero = True):
|
||||
raise stem.ProtocolError("Source location's port is invalid: %s" % self)
|
||||
|
||||
self.source_address = address
|
||||
self.source_port = int(port)
|
||||
|
||||
# spec specifies a circ_id of zero if the stream is unattached
|
||||
|
||||
if self.circ_id == "0":
|
||||
self.circ_id = None
|
||||
|
||||
self._log_if_unrecognized('reason', stem.StreamClosureReason)
|
||||
self._log_if_unrecognized('remote_reason', stem.StreamClosureReason)
|
||||
self._log_if_unrecognized('purpose', stem.StreamPurpose)
|
||||
|
||||
|
||||
class StreamBwEvent(Event):
|
||||
"""
|
||||
Event (emitted approximately every second) with the bytes sent and received
|
||||
by the application since the last such event on this stream.
|
||||
|
||||
The STREAM_BW event was introduced in tor version 0.1.2.8-beta.
|
||||
|
||||
:var str id: stream identifier
|
||||
:var long written: bytes sent by the application
|
||||
:var long read: bytes received by the application
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("id", "written", "read")
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_STREAM_BW
|
||||
|
||||
def _parse(self):
|
||||
if not tor_tools.is_valid_stream_id(self.id):
|
||||
raise stem.ProtocolError("Stream IDs must be one to sixteen alphanumeric characters, got '%s': %s" % (self.id, self))
|
||||
elif not self.written:
|
||||
raise stem.ProtocolError("STREAM_BW event is missing its written value")
|
||||
elif not self.read:
|
||||
raise stem.ProtocolError("STREAM_BW event is missing its read value")
|
||||
elif not self.read.isdigit() or not self.written.isdigit():
|
||||
raise stem.ProtocolError("A STREAM_BW event's bytes sent and received should be a positive numeric value, received: %s" % self)
|
||||
|
||||
self.read = long(self.read)
|
||||
self.written = long(self.written)
|
||||
|
||||
|
||||
class TransportLaunchedEvent(Event):
|
||||
"""
|
||||
Event triggered when a pluggable transport is launched.
|
||||
|
||||
The TRANSPORT_LAUNCHED event was introduced in tor version 0.2.5.0-alpha.
|
||||
|
||||
:var str type: 'server' or 'client'
|
||||
:var str name: name of the pluggable transport
|
||||
:var str address: IPv4 or IPv6 address where the transport is listening for
|
||||
connections
|
||||
:var int port: port where the transport is listening for connections
|
||||
"""
|
||||
|
||||
_POSITIONAL_ARGS = ("type", "name", "address", "port")
|
||||
_VERSION_ADDED = stem.version.Requirement.EVENT_TRANSPORT_LAUNCHED
|
||||
|
||||
def _parse(self):
|
||||
if not self.type in ('server', 'client'):
|
||||
raise stem.ProtocolError("Transport type should either be 'server' or 'client': %s" % self)
|
||||
|
||||
if not connection.is_valid_ipv4_address(self.address) and \
|
||||
not connection.is_valid_ipv6_address(self.address):
|
||||
raise stem.ProtocolError("Transport address isn't a valid IPv4 or IPv6 address: %s" % self)
|
||||
|
||||
if not connection.is_valid_port(self.port):
|
||||
raise stem.ProtocolError("Transport port is invalid: %s" % self)
|
||||
|
||||
self.port = int(self.port)
|
||||
|
||||
EVENT_TYPE_TO_CLASS = {
|
||||
"ADDRMAP": AddrMapEvent,
|
||||
"AUTHDIR_NEWDESCS": AuthDirNewDescEvent,
|
||||
"BUILDTIMEOUT_SET": BuildTimeoutSetEvent,
|
||||
"BW": BandwidthEvent,
|
||||
"CIRC": CircuitEvent,
|
||||
"CIRC_MINOR": CircMinorEvent,
|
||||
"CLIENTS_SEEN": ClientsSeenEvent,
|
||||
"CONF_CHANGED": ConfChangedEvent,
|
||||
"DEBUG": LogEvent,
|
||||
"DESCCHANGED": DescChangedEvent,
|
||||
"ERR": LogEvent,
|
||||
"GUARD": GuardEvent,
|
||||
"INFO": LogEvent,
|
||||
"NEWCONSENSUS": NewConsensusEvent,
|
||||
"NEWDESC": NewDescEvent,
|
||||
"NOTICE": LogEvent,
|
||||
"NS": NetworkStatusEvent,
|
||||
"ORCONN": ORConnEvent,
|
||||
"SIGNAL": SignalEvent,
|
||||
"STATUS_CLIENT": StatusEvent,
|
||||
"STATUS_GENERAL": StatusEvent,
|
||||
"STATUS_SERVER": StatusEvent,
|
||||
"STREAM": StreamEvent,
|
||||
"STREAM_BW": StreamBwEvent,
|
||||
"TRANSPORT_LAUNCHED": TransportLaunchedEvent,
|
||||
"WARN": LogEvent,
|
||||
|
||||
# accounting for a bug in tor 0.2.0.22
|
||||
"STATUS_SEVER": StatusEvent,
|
||||
}
|
55
lib/stem/response/getconf.py
Normal file
55
lib/stem/response/getconf.py
Normal file
@ -0,0 +1,55 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
import stem.response
|
||||
import stem.socket
|
||||
|
||||
|
||||
class GetConfResponse(stem.response.ControlMessage):
|
||||
"""
|
||||
Reply for a GETCONF query.
|
||||
|
||||
Note that configuration parameters won't match what we queried for if it's one
|
||||
of the special mapping options (ex. "HiddenServiceOptions").
|
||||
|
||||
:var dict entries: mapping between the config parameter (**str**) and their
|
||||
values (**list** of **str**)
|
||||
"""
|
||||
|
||||
def _parse_message(self):
|
||||
# Example:
|
||||
# 250-CookieAuthentication=0
|
||||
# 250-ControlPort=9100
|
||||
# 250-DataDirectory=/home/neena/.tor
|
||||
# 250 DirPort
|
||||
|
||||
self.entries = {}
|
||||
remaining_lines = list(self)
|
||||
|
||||
if self.content() == [("250", " ", "OK")]:
|
||||
return
|
||||
|
||||
if not self.is_ok():
|
||||
unrecognized_keywords = []
|
||||
for code, _, line in self.content():
|
||||
if code == "552" and line.startswith("Unrecognized configuration key \"") and line.endswith("\""):
|
||||
unrecognized_keywords.append(line[32:-1])
|
||||
|
||||
if unrecognized_keywords:
|
||||
raise stem.InvalidArguments("552", "GETCONF request contained unrecognized keywords: %s" % ', '.join(unrecognized_keywords), unrecognized_keywords)
|
||||
else:
|
||||
raise stem.ProtocolError("GETCONF response contained a non-OK status code:\n%s" % self)
|
||||
|
||||
while remaining_lines:
|
||||
line = remaining_lines.pop(0)
|
||||
|
||||
if line.is_next_mapping():
|
||||
key, value = line.split("=", 1)
|
||||
else:
|
||||
key, value = (line.pop(), None)
|
||||
|
||||
if not key in self.entries:
|
||||
self.entries[key] = []
|
||||
|
||||
if value is not None:
|
||||
self.entries[key].append(value)
|
78
lib/stem/response/getinfo.py
Normal file
78
lib/stem/response/getinfo.py
Normal file
@ -0,0 +1,78 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
import stem.response
|
||||
import stem.socket
|
||||
|
||||
|
||||
class GetInfoResponse(stem.response.ControlMessage):
|
||||
"""
|
||||
Reply for a GETINFO query.
|
||||
|
||||
:var dict entries: mapping between the queried options and their bytes values
|
||||
"""
|
||||
|
||||
def _parse_message(self):
|
||||
# Example:
|
||||
# 250-version=0.2.3.11-alpha-dev (git-ef0bc7f8f26a917c)
|
||||
# 250+config-text=
|
||||
# ControlPort 9051
|
||||
# DataDirectory /home/atagar/.tor
|
||||
# ExitPolicy reject *:*
|
||||
# Log notice stdout
|
||||
# Nickname Unnamed
|
||||
# ORPort 9050
|
||||
# .
|
||||
# 250 OK
|
||||
|
||||
self.entries = {}
|
||||
remaining_lines = [content for (code, div, content) in self.content(get_bytes = True)]
|
||||
|
||||
if not self.is_ok() or not remaining_lines.pop() == b"OK":
|
||||
unrecognized_keywords = []
|
||||
for code, _, line in self.content():
|
||||
if code == '552' and line.startswith("Unrecognized key \"") and line.endswith("\""):
|
||||
unrecognized_keywords.append(line[18:-1])
|
||||
|
||||
if unrecognized_keywords:
|
||||
raise stem.InvalidArguments("552", "GETINFO request contained unrecognized keywords: %s\n" % ', '.join(unrecognized_keywords), unrecognized_keywords)
|
||||
else:
|
||||
raise stem.ProtocolError("GETINFO response didn't have an OK status:\n%s" % self)
|
||||
|
||||
while remaining_lines:
|
||||
try:
|
||||
key, value = remaining_lines.pop(0).split(b"=", 1)
|
||||
except ValueError:
|
||||
raise stem.ProtocolError("GETINFO replies should only contain parameter=value mappings:\n%s" % self)
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
key = stem.util.str_tools._to_unicode(key)
|
||||
|
||||
# if the value is a multiline value then it *must* be of the form
|
||||
# '<key>=\n<value>'
|
||||
|
||||
if b"\n" in value:
|
||||
if not value.startswith(b"\n"):
|
||||
raise stem.ProtocolError("GETINFO response contained a multi-line value that didn't start with a newline:\n%s" % self)
|
||||
|
||||
value = value[1:]
|
||||
|
||||
self.entries[key] = value
|
||||
|
||||
def _assert_matches(self, params):
|
||||
"""
|
||||
Checks if we match a given set of parameters, and raise a ProtocolError if not.
|
||||
|
||||
:param set params: parameters to assert that we contain
|
||||
|
||||
:raises:
|
||||
* :class:`stem.ProtocolError` if parameters don't match this response
|
||||
"""
|
||||
|
||||
reply_params = set(self.entries.keys())
|
||||
|
||||
if params != reply_params:
|
||||
requested_label = ", ".join(params)
|
||||
reply_label = ", ".join(reply_params)
|
||||
|
||||
raise stem.ProtocolError("GETINFO reply doesn't match the parameters that we requested. Queried '%s' but got '%s'." % (requested_label, reply_label))
|
42
lib/stem/response/mapaddress.py
Normal file
42
lib/stem/response/mapaddress.py
Normal file
@ -0,0 +1,42 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
import stem.response
|
||||
import stem.socket
|
||||
|
||||
|
||||
class MapAddressResponse(stem.response.ControlMessage):
|
||||
"""
|
||||
Reply for a MAPADDRESS query.
|
||||
Doesn't raise an exception unless no addresses were mapped successfully.
|
||||
|
||||
:var dict entries: mapping between the original and replacement addresses
|
||||
|
||||
:raises:
|
||||
* :class:`stem.OperationFailed` if Tor was unable to satisfy the request
|
||||
* :class:`stem.InvalidRequest` if the addresses provided were invalid
|
||||
"""
|
||||
|
||||
def _parse_message(self):
|
||||
# Example:
|
||||
# 250-127.192.10.10=torproject.org
|
||||
# 250 1.2.3.4=tor.freehaven.net
|
||||
|
||||
if not self.is_ok():
|
||||
for code, _, message in self.content():
|
||||
if code == "512":
|
||||
raise stem.InvalidRequest(code, message)
|
||||
elif code == "451":
|
||||
raise stem.OperationFailed(code, message)
|
||||
else:
|
||||
raise stem.ProtocolError("MAPADDRESS returned unexpected response code: %s", code)
|
||||
|
||||
self.entries = {}
|
||||
|
||||
for code, _, message in self.content():
|
||||
if code == "250":
|
||||
try:
|
||||
key, value = message.split("=", 1)
|
||||
self.entries[key] = value
|
||||
except ValueError:
|
||||
raise stem.ProtocolError(None, "MAPADDRESS returned '%s', which isn't a mapping" % message)
|
122
lib/stem/response/protocolinfo.py
Normal file
122
lib/stem/response/protocolinfo.py
Normal file
@ -0,0 +1,122 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
import stem.response
|
||||
import stem.socket
|
||||
import stem.version
|
||||
|
||||
from stem.connection import AuthMethod
|
||||
from stem.util import log
|
||||
|
||||
|
||||
class ProtocolInfoResponse(stem.response.ControlMessage):
|
||||
"""
|
||||
Version one PROTOCOLINFO query response.
|
||||
|
||||
The protocol_version is the only mandatory data for a valid PROTOCOLINFO
|
||||
response, so all other values are None if undefined or empty if a collection.
|
||||
|
||||
:var int protocol_version: protocol version of the response
|
||||
:var stem.version.Version tor_version: version of the tor process
|
||||
:var tuple auth_methods: :data:`stem.connection.AuthMethod` types that tor will accept
|
||||
:var tuple unknown_auth_methods: strings of unrecognized auth methods
|
||||
:var str cookie_path: path of tor's authentication cookie
|
||||
"""
|
||||
|
||||
def _parse_message(self):
|
||||
# Example:
|
||||
# 250-PROTOCOLINFO 1
|
||||
# 250-AUTH METHODS=COOKIE COOKIEFILE="/home/atagar/.tor/control_auth_cookie"
|
||||
# 250-VERSION Tor="0.2.1.30"
|
||||
# 250 OK
|
||||
|
||||
self.protocol_version = None
|
||||
self.tor_version = None
|
||||
self.auth_methods = ()
|
||||
self.unknown_auth_methods = ()
|
||||
self.cookie_path = None
|
||||
|
||||
auth_methods, unknown_auth_methods = [], []
|
||||
remaining_lines = list(self)
|
||||
|
||||
if not self.is_ok() or not remaining_lines.pop() == "OK":
|
||||
raise stem.ProtocolError("PROTOCOLINFO response didn't have an OK status:\n%s" % self)
|
||||
|
||||
# sanity check that we're a PROTOCOLINFO response
|
||||
if not remaining_lines[0].startswith("PROTOCOLINFO"):
|
||||
raise stem.ProtocolError("Message is not a PROTOCOLINFO response:\n%s" % self)
|
||||
|
||||
while remaining_lines:
|
||||
line = remaining_lines.pop(0)
|
||||
line_type = line.pop()
|
||||
|
||||
if line_type == "PROTOCOLINFO":
|
||||
# Line format:
|
||||
# FirstLine = "PROTOCOLINFO" SP PIVERSION CRLF
|
||||
# PIVERSION = 1*DIGIT
|
||||
|
||||
if line.is_empty():
|
||||
raise stem.ProtocolError("PROTOCOLINFO response's initial line is missing the protocol version: %s" % line)
|
||||
|
||||
try:
|
||||
self.protocol_version = int(line.pop())
|
||||
except ValueError:
|
||||
raise stem.ProtocolError("PROTOCOLINFO response version is non-numeric: %s" % line)
|
||||
|
||||
# The piversion really should be "1" but, according to the spec, tor
|
||||
# does not necessarily need to provide the PROTOCOLINFO version that we
|
||||
# requested. Log if it's something we aren't expecting but still make
|
||||
# an effort to parse like a v1 response.
|
||||
|
||||
if self.protocol_version != 1:
|
||||
log.info("We made a PROTOCOLINFO version 1 query but got a version %i response instead. We'll still try to use it, but this may cause problems." % self.protocol_version)
|
||||
elif line_type == "AUTH":
|
||||
# Line format:
|
||||
# AuthLine = "250-AUTH" SP "METHODS=" AuthMethod *("," AuthMethod)
|
||||
# *(SP "COOKIEFILE=" AuthCookieFile) CRLF
|
||||
# AuthMethod = "NULL" / "HASHEDPASSWORD" / "COOKIE"
|
||||
# AuthCookieFile = QuotedString
|
||||
|
||||
# parse AuthMethod mapping
|
||||
if not line.is_next_mapping("METHODS"):
|
||||
raise stem.ProtocolError("PROTOCOLINFO response's AUTH line is missing its mandatory 'METHODS' mapping: %s" % line)
|
||||
|
||||
for method in line.pop_mapping()[1].split(","):
|
||||
if method == "NULL":
|
||||
auth_methods.append(AuthMethod.NONE)
|
||||
elif method == "HASHEDPASSWORD":
|
||||
auth_methods.append(AuthMethod.PASSWORD)
|
||||
elif method == "COOKIE":
|
||||
auth_methods.append(AuthMethod.COOKIE)
|
||||
elif method == "SAFECOOKIE":
|
||||
auth_methods.append(AuthMethod.SAFECOOKIE)
|
||||
else:
|
||||
unknown_auth_methods.append(method)
|
||||
message_id = "stem.response.protocolinfo.unknown_auth_%s" % method
|
||||
log.log_once(message_id, log.INFO, "PROTOCOLINFO response included a type of authentication that we don't recognize: %s" % method)
|
||||
|
||||
# our auth_methods should have a single AuthMethod.UNKNOWN entry if
|
||||
# any unknown authentication methods exist
|
||||
if not AuthMethod.UNKNOWN in auth_methods:
|
||||
auth_methods.append(AuthMethod.UNKNOWN)
|
||||
|
||||
# parse optional COOKIEFILE mapping (quoted and can have escapes)
|
||||
if line.is_next_mapping("COOKIEFILE", True, True):
|
||||
self.cookie_path = line.pop_mapping(True, True)[1]
|
||||
elif line_type == "VERSION":
|
||||
# Line format:
|
||||
# VersionLine = "250-VERSION" SP "Tor=" TorVersion OptArguments CRLF
|
||||
# TorVersion = QuotedString
|
||||
|
||||
if not line.is_next_mapping("Tor", True):
|
||||
raise stem.ProtocolError("PROTOCOLINFO response's VERSION line is missing its mandatory tor version mapping: %s" % line)
|
||||
|
||||
try:
|
||||
self.tor_version = stem.version.Version(line.pop_mapping(True)[1])
|
||||
except ValueError as exc:
|
||||
raise stem.ProtocolError(exc)
|
||||
else:
|
||||
log.debug("Unrecognized PROTOCOLINFO line type '%s', ignoring it: %s" % (line_type, line))
|
||||
|
||||
self.auth_methods = tuple(auth_methods)
|
||||
self.unknown_auth_methods = tuple(unknown_auth_methods)
|
609
lib/stem/socket.py
Normal file
609
lib/stem/socket.py
Normal file
@ -0,0 +1,609 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Supports message based communication with sockets speaking the tor control
|
||||
protocol. This lets users send messages as basic strings and receive responses
|
||||
as instances of the :class:`~stem.response.ControlMessage` class.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ControlSocket - Socket wrapper that speaks the tor control protocol.
|
||||
|- ControlPort - Control connection via a port.
|
||||
| |- get_address - provides the ip address of our socket
|
||||
| +- get_port - provides the port of our socket
|
||||
|
|
||||
|- ControlSocketFile - Control connection via a local file socket.
|
||||
| +- get_socket_path - provides the path of the socket we connect to
|
||||
|
|
||||
|- send - sends a message to the socket
|
||||
|- recv - receives a ControlMessage from the socket
|
||||
|- is_alive - reports if the socket is known to be closed
|
||||
|- is_localhost - returns if the socket is for the local system or not
|
||||
|- connect - connects a new socket
|
||||
|- close - shuts down the socket
|
||||
+- __enter__ / __exit__ - manages socket connection
|
||||
|
||||
send_message - Writes a message to a control socket.
|
||||
recv_message - Reads a ControlMessage from a control socket.
|
||||
send_formatting - Performs the formatting expected from sent messages.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
import socket
|
||||
import threading
|
||||
|
||||
import stem.prereq
|
||||
import stem.response
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.util import log
|
||||
|
||||
|
||||
class ControlSocket(object):
|
||||
"""
|
||||
Wrapper for a socket connection that speaks the Tor control protocol. To the
|
||||
better part this transparently handles the formatting for sending and
|
||||
receiving complete messages. All methods are thread safe.
|
||||
|
||||
Callers should not instantiate this class directly, but rather use subclasses
|
||||
which are expected to implement the **_make_socket()** method.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._socket, self._socket_file = None, None
|
||||
self._is_alive = False
|
||||
|
||||
# Tracks sending and receiving separately. This should be safe, and doing
|
||||
# so prevents deadlock where we block writes because we're waiting to read
|
||||
# a message that isn't coming.
|
||||
|
||||
self._send_lock = threading.RLock()
|
||||
self._recv_lock = threading.RLock()
|
||||
|
||||
def send(self, message, raw = False):
|
||||
"""
|
||||
Formats and sends a message to the control socket. For more information see
|
||||
the :func:`~stem.socket.send_message` function.
|
||||
|
||||
:param str message: message to be formatted and sent to the socket
|
||||
:param bool raw: leaves the message formatting untouched, passing it to the socket as-is
|
||||
|
||||
:raises:
|
||||
* :class:`stem.SocketError` if a problem arises in using the socket
|
||||
* :class:`stem.SocketClosed` if the socket is known to be shut down
|
||||
"""
|
||||
|
||||
with self._send_lock:
|
||||
try:
|
||||
if not self.is_alive():
|
||||
raise stem.SocketClosed()
|
||||
|
||||
send_message(self._socket_file, message, raw)
|
||||
except stem.SocketClosed as exc:
|
||||
# if send_message raises a SocketClosed then we should properly shut
|
||||
# everything down
|
||||
|
||||
if self.is_alive():
|
||||
self.close()
|
||||
|
||||
raise exc
|
||||
|
||||
def recv(self):
|
||||
"""
|
||||
Receives a message from the control socket, blocking until we've received
|
||||
one. For more information see the :func:`~stem.socket.recv_message` function.
|
||||
|
||||
:returns: :class:`~stem.response.ControlMessage` for the message received
|
||||
|
||||
:raises:
|
||||
* :class:`stem.ProtocolError` the content from the socket is malformed
|
||||
* :class:`stem.SocketClosed` if the socket closes before we receive a complete message
|
||||
"""
|
||||
|
||||
with self._recv_lock:
|
||||
try:
|
||||
# makes a temporary reference to the _socket_file because connect()
|
||||
# and close() may set or unset it
|
||||
|
||||
socket_file = self._socket_file
|
||||
|
||||
if not socket_file:
|
||||
raise stem.SocketClosed()
|
||||
|
||||
return recv_message(socket_file)
|
||||
except stem.SocketClosed as exc:
|
||||
# If recv_message raises a SocketClosed then we should properly shut
|
||||
# everything down. However, there's a couple cases where this will
|
||||
# cause deadlock...
|
||||
#
|
||||
# * this socketClosed was *caused by* a close() call, which is joining
|
||||
# on our thread
|
||||
#
|
||||
# * a send() call that's currently in flight is about to call close(),
|
||||
# also attempting to join on us
|
||||
#
|
||||
# To resolve this we make a non-blocking call to acquire the send lock.
|
||||
# If we get it then great, we can close safely. If not then one of the
|
||||
# above are in progress and we leave the close to them.
|
||||
|
||||
if self.is_alive():
|
||||
if self._send_lock.acquire(False):
|
||||
self.close()
|
||||
self._send_lock.release()
|
||||
|
||||
raise exc
|
||||
|
||||
def is_alive(self):
|
||||
"""
|
||||
Checks if the socket is known to be closed. We won't be aware if it is
|
||||
until we either use it or have explicitily shut it down.
|
||||
|
||||
In practice a socket derived from a port knows about its disconnection
|
||||
after a failed :func:`~stem.socket.ControlSocket.recv` call. Socket file
|
||||
derived connections know after either a
|
||||
:func:`~stem.socket.ControlSocket.send` or
|
||||
:func:`~stem.socket.ControlSocket.recv`.
|
||||
|
||||
This means that to have reliable detection for when we're disconnected
|
||||
you need to continually pull from the socket (which is part of what the
|
||||
:class:`~stem.control.BaseController` does).
|
||||
|
||||
:returns: **bool** that's **True** if our socket is connected and **False** otherwise
|
||||
"""
|
||||
|
||||
return self._is_alive
|
||||
|
||||
def is_localhost(self):
|
||||
"""
|
||||
Returns if the connection is for the local system or not.
|
||||
|
||||
:returns: **bool** that's **True** if the connection is for the local host and **False** otherwise
|
||||
"""
|
||||
|
||||
return False
|
||||
|
||||
def connect(self):
|
||||
"""
|
||||
Connects to a new socket, closing our previous one if we're already
|
||||
attached.
|
||||
|
||||
:raises: :class:`stem.SocketError` if unable to make a socket
|
||||
"""
|
||||
|
||||
with self._send_lock:
|
||||
# Closes the socket if we're currently attached to one. Once we're no
|
||||
# longer alive it'll be safe to acquire the recv lock because recv()
|
||||
# calls no longer block (raising SocketClosed instead).
|
||||
|
||||
if self.is_alive():
|
||||
self.close()
|
||||
|
||||
with self._recv_lock:
|
||||
self._socket = self._make_socket()
|
||||
self._socket_file = self._socket.makefile(mode = "rwb")
|
||||
self._is_alive = True
|
||||
|
||||
# It's possible for this to have a transient failure...
|
||||
# SocketError: [Errno 4] Interrupted system call
|
||||
#
|
||||
# It's safe to retry, so give it another try if it fails.
|
||||
|
||||
try:
|
||||
self._connect()
|
||||
except stem.SocketError:
|
||||
self._connect() # single retry
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Shuts down the socket. If it's already closed then this is a no-op.
|
||||
"""
|
||||
|
||||
with self._send_lock:
|
||||
# Function is idempotent with one exception: we notify _close() if this
|
||||
# is causing our is_alive() state to change.
|
||||
|
||||
is_change = self.is_alive()
|
||||
|
||||
if self._socket:
|
||||
# if we haven't yet established a connection then this raises an error
|
||||
# socket.error: [Errno 107] Transport endpoint is not connected
|
||||
|
||||
try:
|
||||
self._socket.shutdown(socket.SHUT_RDWR)
|
||||
except socket.error:
|
||||
pass
|
||||
|
||||
# Suppressing unexpected exceptions from close. For instance, if the
|
||||
# socket's file has already been closed then with python 2.7 that raises
|
||||
# with...
|
||||
# error: [Errno 32] Broken pipe
|
||||
|
||||
try:
|
||||
self._socket.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
if self._socket_file:
|
||||
try:
|
||||
self._socket_file.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
self._socket = None
|
||||
self._socket_file = None
|
||||
self._is_alive = False
|
||||
|
||||
if is_change:
|
||||
self._close()
|
||||
|
||||
def _get_send_lock(self):
|
||||
"""
|
||||
The send lock is useful to classes that interact with us at a deep level
|
||||
because it's used to lock :func:`stem.socket.ControlSocket.connect` /
|
||||
:func:`stem.socket.ControlSocket.close`, and by extension our
|
||||
:func:`stem.socket.ControlSocket.is_alive` state changes.
|
||||
|
||||
:returns: **threading.RLock** that governs sending messages to our socket
|
||||
and state changes
|
||||
"""
|
||||
|
||||
return self._send_lock
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exit_type, value, traceback):
|
||||
self.close()
|
||||
|
||||
def _connect(self):
|
||||
"""
|
||||
Connection callback that can be overwritten by subclasses and wrappers.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def _close(self):
|
||||
"""
|
||||
Disconnection callback that can be overwritten by subclasses and wrappers.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def _make_socket(self):
|
||||
"""
|
||||
Constructs and connects new socket. This is implemented by subclasses.
|
||||
|
||||
:returns: **socket.socket** for our configuration
|
||||
|
||||
:raises:
|
||||
* :class:`stem.SocketError` if unable to make a socket
|
||||
* **NotImplementedError** if not implemented by a subclass
|
||||
"""
|
||||
|
||||
raise NotImplementedError("Unsupported Operation: this should be implemented by the ControlSocket subclass")
|
||||
|
||||
|
||||
class ControlPort(ControlSocket):
|
||||
"""
|
||||
Control connection to tor. For more information see tor's ControlPort torrc
|
||||
option.
|
||||
"""
|
||||
|
||||
def __init__(self, address = "127.0.0.1", port = 9051, connect = True):
|
||||
"""
|
||||
ControlPort constructor.
|
||||
|
||||
:param str address: ip address of the controller
|
||||
:param int port: port number of the controller
|
||||
:param bool connect: connects to the socket if True, leaves it unconnected otherwise
|
||||
|
||||
:raises: :class:`stem.SocketError` if connect is **True** and we're
|
||||
unable to establish a connection
|
||||
"""
|
||||
|
||||
super(ControlPort, self).__init__()
|
||||
self._control_addr = address
|
||||
self._control_port = port
|
||||
|
||||
if connect:
|
||||
self.connect()
|
||||
|
||||
def get_address(self):
|
||||
"""
|
||||
Provides the ip address our socket connects to.
|
||||
|
||||
:returns: str with the ip address of our socket
|
||||
"""
|
||||
|
||||
return self._control_addr
|
||||
|
||||
def get_port(self):
|
||||
"""
|
||||
Provides the port our socket connects to.
|
||||
|
||||
:returns: int with the port of our socket
|
||||
"""
|
||||
|
||||
return self._control_port
|
||||
|
||||
def is_localhost(self):
|
||||
return self._control_addr == "127.0.0.1"
|
||||
|
||||
def _make_socket(self):
|
||||
try:
|
||||
control_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
control_socket.connect((self._control_addr, self._control_port))
|
||||
return control_socket
|
||||
except socket.error as exc:
|
||||
raise stem.SocketError(exc)
|
||||
|
||||
|
||||
class ControlSocketFile(ControlSocket):
|
||||
"""
|
||||
Control connection to tor. For more information see tor's ControlSocket torrc
|
||||
option.
|
||||
"""
|
||||
|
||||
def __init__(self, path = "/var/run/tor/control", connect = True):
|
||||
"""
|
||||
ControlSocketFile constructor.
|
||||
|
||||
:param str socket_path: path where the control socket is located
|
||||
:param bool connect: connects to the socket if True, leaves it unconnected otherwise
|
||||
|
||||
:raises: :class:`stem.SocketError` if connect is **True** and we're
|
||||
unable to establish a connection
|
||||
"""
|
||||
|
||||
super(ControlSocketFile, self).__init__()
|
||||
self._socket_path = path
|
||||
|
||||
if connect:
|
||||
self.connect()
|
||||
|
||||
def get_socket_path(self):
|
||||
"""
|
||||
Provides the path our socket connects to.
|
||||
|
||||
:returns: str with the path for our control socket
|
||||
"""
|
||||
|
||||
return self._socket_path
|
||||
|
||||
def is_localhost(self):
|
||||
return True
|
||||
|
||||
def _make_socket(self):
|
||||
try:
|
||||
control_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
control_socket.connect(self._socket_path)
|
||||
return control_socket
|
||||
except socket.error as exc:
|
||||
raise stem.SocketError(exc)
|
||||
|
||||
|
||||
def send_message(control_file, message, raw = False):
|
||||
"""
|
||||
Sends a message to the control socket, adding the expected formatting for
|
||||
single verses multi-line messages. Neither message type should contain an
|
||||
ending newline (if so it'll be treated as a multi-line message with a blank
|
||||
line at the end). If the message doesn't contain a newline then it's sent
|
||||
as...
|
||||
|
||||
::
|
||||
|
||||
<message>\\r\\n
|
||||
|
||||
and if it does contain newlines then it's split on ``\\n`` and sent as...
|
||||
|
||||
::
|
||||
|
||||
+<line 1>\\r\\n
|
||||
<line 2>\\r\\n
|
||||
<line 3>\\r\\n
|
||||
.\\r\\n
|
||||
|
||||
:param file control_file: file derived from the control socket (see the
|
||||
socket's makefile() method for more information)
|
||||
:param str message: message to be sent on the control socket
|
||||
:param bool raw: leaves the message formatting untouched, passing it to the
|
||||
socket as-is
|
||||
|
||||
:raises:
|
||||
* :class:`stem.SocketError` if a problem arises in using the socket
|
||||
* :class:`stem.SocketClosed` if the socket is known to be shut down
|
||||
"""
|
||||
|
||||
if not raw:
|
||||
message = send_formatting(message)
|
||||
|
||||
try:
|
||||
control_file.write(stem.util.str_tools._to_bytes(message))
|
||||
control_file.flush()
|
||||
|
||||
log_message = message.replace("\r\n", "\n").rstrip()
|
||||
log.trace("Sent to tor:\n" + log_message)
|
||||
except socket.error as exc:
|
||||
log.info("Failed to send message: %s" % exc)
|
||||
|
||||
# When sending there doesn't seem to be a reliable method for
|
||||
# distinguishing between failures from a disconnect verses other things.
|
||||
# Just accounting for known disconnection responses.
|
||||
|
||||
if str(exc) == "[Errno 32] Broken pipe":
|
||||
raise stem.SocketClosed(exc)
|
||||
else:
|
||||
raise stem.SocketError(exc)
|
||||
except AttributeError:
|
||||
# if the control_file has been closed then flush will receive:
|
||||
# AttributeError: 'NoneType' object has no attribute 'sendall'
|
||||
|
||||
log.info("Failed to send message: file has been closed")
|
||||
raise stem.SocketClosed("file has been closed")
|
||||
|
||||
|
||||
def recv_message(control_file):
|
||||
"""
|
||||
Pulls from a control socket until we either have a complete message or
|
||||
encounter a problem.
|
||||
|
||||
:param file control_file: file derived from the control socket (see the
|
||||
socket's makefile() method for more information)
|
||||
|
||||
:returns: :class:`~stem.response.ControlMessage` read from the socket
|
||||
|
||||
:raises:
|
||||
* :class:`stem.ProtocolError` the content from the socket is malformed
|
||||
* :class:`stem.SocketClosed` if the socket closes before we receive
|
||||
a complete message
|
||||
"""
|
||||
|
||||
parsed_content, raw_content = [], b""
|
||||
logging_prefix = "Error while receiving a control message (%s): "
|
||||
|
||||
while True:
|
||||
try:
|
||||
# From a real socket readline() would always provide bytes, but during
|
||||
# tests we might be given a StringIO in which case it's unicode under
|
||||
# python 3.x.
|
||||
|
||||
line = stem.util.str_tools._to_bytes(control_file.readline())
|
||||
except AttributeError:
|
||||
# if the control_file has been closed then we will receive:
|
||||
# AttributeError: 'NoneType' object has no attribute 'recv'
|
||||
|
||||
prefix = logging_prefix % "SocketClosed"
|
||||
log.info(prefix + "socket file has been closed")
|
||||
raise stem.SocketClosed("socket file has been closed")
|
||||
except (socket.error, ValueError) as exc:
|
||||
# When disconnected we get...
|
||||
#
|
||||
# Python 2:
|
||||
# socket.error: [Errno 107] Transport endpoint is not connected
|
||||
#
|
||||
# Python 3:
|
||||
# ValueError: I/O operation on closed file.
|
||||
|
||||
prefix = logging_prefix % "SocketClosed"
|
||||
log.info(prefix + "received exception \"%s\"" % exc)
|
||||
raise stem.SocketClosed(exc)
|
||||
|
||||
raw_content += line
|
||||
|
||||
# Parses the tor control lines. These are of the form...
|
||||
# <status code><divider><content>\r\n
|
||||
|
||||
if len(line) == 0:
|
||||
# if the socket is disconnected then the readline() method will provide
|
||||
# empty content
|
||||
|
||||
prefix = logging_prefix % "SocketClosed"
|
||||
log.info(prefix + "empty socket content")
|
||||
raise stem.SocketClosed("Received empty socket content.")
|
||||
elif len(line) < 4:
|
||||
prefix = logging_prefix % "ProtocolError"
|
||||
log.info(prefix + "line too short, \"%s\"" % log.escape(line))
|
||||
raise stem.ProtocolError("Badly formatted reply line: too short")
|
||||
elif not re.match(b'^[a-zA-Z0-9]{3}[-+ ]', line):
|
||||
prefix = logging_prefix % "ProtocolError"
|
||||
log.info(prefix + "malformed status code/divider, \"%s\"" % log.escape(line))
|
||||
raise stem.ProtocolError("Badly formatted reply line: beginning is malformed")
|
||||
elif not line.endswith(b"\r\n"):
|
||||
prefix = logging_prefix % "ProtocolError"
|
||||
log.info(prefix + "no CRLF linebreak, \"%s\"" % log.escape(line))
|
||||
raise stem.ProtocolError("All lines should end with CRLF")
|
||||
|
||||
line = line[:-2] # strips off the CRLF
|
||||
status_code, divider, content = line[:3], line[3:4], line[4:]
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
status_code = stem.util.str_tools._to_unicode(status_code)
|
||||
divider = stem.util.str_tools._to_unicode(divider)
|
||||
|
||||
if divider == "-":
|
||||
# mid-reply line, keep pulling for more content
|
||||
parsed_content.append((status_code, divider, content))
|
||||
elif divider == " ":
|
||||
# end of the message, return the message
|
||||
parsed_content.append((status_code, divider, content))
|
||||
|
||||
log_message = raw_content.replace(b"\r\n", b"\n").rstrip()
|
||||
log.trace("Received from tor:\n" + stem.util.str_tools._to_unicode(log_message))
|
||||
|
||||
return stem.response.ControlMessage(parsed_content, raw_content)
|
||||
elif divider == "+":
|
||||
# data entry, all of the following lines belong to the content until we
|
||||
# get a line with just a period
|
||||
|
||||
while True:
|
||||
try:
|
||||
line = stem.util.str_tools._to_bytes(control_file.readline())
|
||||
except socket.error as exc:
|
||||
prefix = logging_prefix % "SocketClosed"
|
||||
log.info(prefix + "received an exception while mid-way through a data reply (exception: \"%s\", read content: \"%s\")" % (exc, log.escape(raw_content)))
|
||||
raise stem.SocketClosed(exc)
|
||||
|
||||
raw_content += line
|
||||
|
||||
if not line.endswith(b"\r\n"):
|
||||
prefix = logging_prefix % "ProtocolError"
|
||||
log.info(prefix + "CRLF linebreaks missing from a data reply, \"%s\"" % log.escape(raw_content))
|
||||
raise stem.ProtocolError("All lines should end with CRLF")
|
||||
elif line == b".\r\n":
|
||||
break # data block termination
|
||||
|
||||
line = line[:-2] # strips off the CRLF
|
||||
|
||||
# lines starting with a period are escaped by a second period (as per
|
||||
# section 2.4 of the control-spec)
|
||||
|
||||
if line.startswith(b".."):
|
||||
line = line[1:]
|
||||
|
||||
# appends to previous content, using a newline rather than CRLF
|
||||
# separator (more conventional for multi-line string content outside
|
||||
# the windows world)
|
||||
|
||||
content += b"\n" + line
|
||||
|
||||
parsed_content.append((status_code, divider, content))
|
||||
else:
|
||||
# this should never be reached due to the prefix regex, but might as well
|
||||
# be safe...
|
||||
prefix = logging_prefix % "ProtocolError"
|
||||
log.warn(prefix + "\"%s\" isn't a recognized divider type" % divider)
|
||||
raise stem.ProtocolError("Unrecognized divider type '%s': %s" % (divider, stem.util.str_tools._to_unicode(line)))
|
||||
|
||||
|
||||
def send_formatting(message):
|
||||
"""
|
||||
Performs the formatting expected from sent control messages. For more
|
||||
information see the :func:`~stem.socket.send_message` function.
|
||||
|
||||
:param str message: message to be formatted
|
||||
|
||||
:returns: **str** of the message wrapped by the formatting expected from
|
||||
controllers
|
||||
"""
|
||||
|
||||
# From control-spec section 2.2...
|
||||
# Command = Keyword OptArguments CRLF / "+" Keyword OptArguments CRLF CmdData
|
||||
# Keyword = 1*ALPHA
|
||||
# OptArguments = [ SP *(SP / VCHAR) ]
|
||||
#
|
||||
# A command is either a single line containing a Keyword and arguments, or a
|
||||
# multiline command whose initial keyword begins with +, and whose data
|
||||
# section ends with a single "." on a line of its own.
|
||||
|
||||
# if we already have \r\n entries then standardize on \n to start with
|
||||
message = message.replace("\r\n", "\n")
|
||||
|
||||
if "\n" in message:
|
||||
return "+%s\r\n.\r\n" % message.replace("\n", "\r\n")
|
||||
else:
|
||||
return message + "\r\n"
|
19
lib/stem/util/__init__.py
Normal file
19
lib/stem/util/__init__.py
Normal file
@ -0,0 +1,19 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utility functions used by the stem library.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
"conf",
|
||||
"connection",
|
||||
"enum",
|
||||
"log",
|
||||
"lru_cache",
|
||||
"ordereddict",
|
||||
"proc",
|
||||
"system",
|
||||
"term",
|
||||
"tor_tools",
|
||||
]
|
673
lib/stem/util/conf.py
Normal file
673
lib/stem/util/conf.py
Normal file
@ -0,0 +1,673 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Handlers for text configuration files. Configurations are simple string to
|
||||
string mappings, with the configuration files using the following rules...
|
||||
|
||||
* the key/value is separated by a space
|
||||
* anything after a "#" is ignored as a comment
|
||||
* excess whitespace is trimmed
|
||||
* empty lines are ignored
|
||||
* multi-line values can be defined by following the key with lines starting
|
||||
with a '|'
|
||||
|
||||
For instance...
|
||||
|
||||
::
|
||||
|
||||
# This is my sample config
|
||||
user.name Galen
|
||||
user.password yabba1234 # here's an inline comment
|
||||
user.notes takes a fancy to pepperjack cheese
|
||||
blankEntry.example
|
||||
|
||||
msg.greeting
|
||||
|Multi-line message exclaiming of the
|
||||
|wonder and awe that is pepperjack!
|
||||
|
||||
... would be loaded as...
|
||||
|
||||
::
|
||||
|
||||
config = {
|
||||
"user.name": "Galen",
|
||||
"user.password": "yabba1234",
|
||||
"user.notes": "takes a fancy to pepperjack cheese",
|
||||
"blankEntry.example": "",
|
||||
"msg.greeting": "Multi-line message exclaiming of the\\nwonder and awe that is pepperjack!",
|
||||
}
|
||||
|
||||
Configurations are managed via the :class:`~stem.util.conf.Config` class. The
|
||||
:class:`~stem.util.conf.Config` can be be used directly with its
|
||||
:func:`~stem.util.conf.Config.get` and :func:`~stem.util.conf.Config.set`
|
||||
methods, but usually modules will want a local dictionary with just the
|
||||
configurations that it cares about.
|
||||
|
||||
To do this use the :func:`~stem.util.conf.config_dict` function. For example...
|
||||
|
||||
::
|
||||
|
||||
import getpass
|
||||
from stem.util import conf, connection
|
||||
|
||||
def config_validator(key, value):
|
||||
if key == "timeout":
|
||||
# require at least a one second timeout
|
||||
return max(1, value)
|
||||
elif key == "endpoint":
|
||||
if not connection.is_valid_ipv4_address(value):
|
||||
raise ValueError("'%s' isn't a valid IPv4 address" % value)
|
||||
elif key == "port":
|
||||
if not connection.is_valid_port(value):
|
||||
raise ValueError("'%s' isn't a valid port" % value)
|
||||
elif key == "retries":
|
||||
# negative retries really don't make sense
|
||||
return max(0, value)
|
||||
|
||||
CONFIG = conf.config_dict("ssh_login", {
|
||||
"username": getpass.getuser(),
|
||||
"password": "",
|
||||
"timeout": 10,
|
||||
"endpoint": "263.12.8.0",
|
||||
"port": 22,
|
||||
"reconnect": False,
|
||||
"retries": 3,
|
||||
}, config_validator)
|
||||
|
||||
There's several things going on here so lets take it step by step...
|
||||
|
||||
* The :func:`~stem.util.conf.config_dict` provides a dictionary that's bound
|
||||
to a given configuration. If the "ssh_proxy_config" configuration changes
|
||||
then so will the contents of CONFIG.
|
||||
|
||||
* The dictionary we're passing to :func:`~stem.util.conf.config_dict` provides
|
||||
two important pieces of information: default values and their types. See the
|
||||
Config's :func:`~stem.util.conf.Config.get` method for how these type
|
||||
inferences work.
|
||||
|
||||
* The config_validator is a hook we're adding to make sure CONFIG only gets
|
||||
values we think are valid. In this case it ensures that our timeout value
|
||||
is at least one second, and rejects endpoints or ports that are invalid.
|
||||
|
||||
Now lets say our user has the following configuration file...
|
||||
|
||||
::
|
||||
|
||||
username waddle_doo
|
||||
password jabberwocky
|
||||
timeout -15
|
||||
port 9000000
|
||||
retries lots
|
||||
reconnect true
|
||||
logging debug
|
||||
|
||||
... and we load it as follows...
|
||||
|
||||
::
|
||||
|
||||
>>> from from stem.util import conf
|
||||
>>> our_config = conf.get_config("ssh_login")
|
||||
>>> our_config.load("/home/atagar/user_config")
|
||||
>>> print CONFIG
|
||||
{
|
||||
"username": "waddle_doo",
|
||||
"password": "jabberwocky",
|
||||
"timeout": 1,
|
||||
"endpoint": "263.12.8.0",
|
||||
"port": 22,
|
||||
"reconnect": True,
|
||||
"retries": 3,
|
||||
}
|
||||
|
||||
Here's an expanation of what happened...
|
||||
|
||||
* the username, password, and reconnect attributes took the values in the
|
||||
configuration file
|
||||
|
||||
* the 'config_validator' we added earlier allows for a minimum timeout of one
|
||||
and rejected the invalid port (with a log message)
|
||||
|
||||
* we weren't able to convert the retries' "lots" value to an integer so it kept
|
||||
its default value and logged a warning
|
||||
|
||||
* the user didn't supply an endpoint so that remained unchanged
|
||||
|
||||
* our CONFIG didn't have a 'logging' attribute so it was ignored
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
config_dict - provides a dictionary that's kept in sync with our config
|
||||
get_config - singleton for getting configurations
|
||||
parse_enum_csv - helper funcion for parsing confguration entries for enums
|
||||
|
||||
Config - Custom configuration
|
||||
|- load - reads a configuration file
|
||||
|- save - writes the current configuration to a file
|
||||
|- clear - empties our loaded configuration contents
|
||||
|- add_listener - notifies the given listener when an update occurs
|
||||
|- clear_listeners - removes any attached listeners
|
||||
|- keys - provides keys in the loaded configuration
|
||||
|- set - sets the given key/value pair
|
||||
|- unused_keys - provides keys that have never been requested
|
||||
|- get - provides the value for a given key, with type inference
|
||||
+- get_value - provides the value for a given key as a string
|
||||
"""
|
||||
|
||||
import threading
|
||||
|
||||
from stem.util import log
|
||||
|
||||
CONFS = {} # mapping of identifier to singleton instances of configs
|
||||
|
||||
|
||||
class _SyncListener(object):
|
||||
def __init__(self, config_dict, interceptor):
|
||||
self.config_dict = config_dict
|
||||
self.interceptor = interceptor
|
||||
|
||||
def update(self, config, key):
|
||||
if key in self.config_dict:
|
||||
new_value = config.get(key, self.config_dict[key])
|
||||
|
||||
if new_value == self.config_dict[key]:
|
||||
return # no change
|
||||
|
||||
if self.interceptor:
|
||||
interceptor_value = self.interceptor(key, new_value)
|
||||
|
||||
if interceptor_value:
|
||||
new_value = interceptor_value
|
||||
|
||||
self.config_dict[key] = new_value
|
||||
|
||||
|
||||
def config_dict(handle, conf_mappings, handler = None):
|
||||
"""
|
||||
Makes a dictionary that stays synchronized with a configuration.
|
||||
|
||||
This takes a dictionary of 'config_key => default_value' mappings and
|
||||
changes the values to reflect our current configuration. This will leave
|
||||
the previous values alone if...
|
||||
|
||||
* we don't have a value for that config_key
|
||||
* we can't convert our value to be the same type as the default_value
|
||||
|
||||
If a handler is provided then this is called just prior to assigning new
|
||||
values to the config_dict. The handler function is expected to accept the
|
||||
(key, value) for the new values and return what we should actually insert
|
||||
into the dictionary. If this returns None then the value is updated as
|
||||
normal.
|
||||
|
||||
For more information about how we convert types see our
|
||||
:func:`~stem.util.conf.Config.get` method.
|
||||
|
||||
**The dictionary you get from this is manged by the
|
||||
:class:`~stem.util.conf.Config` class and should be treated as being
|
||||
read-only.**
|
||||
|
||||
:param str handle: unique identifier for a config instance
|
||||
:param dict conf_mappings: config key/value mappings used as our defaults
|
||||
:param functor handler: function referred to prior to assigning values
|
||||
"""
|
||||
|
||||
selected_config = get_config(handle)
|
||||
selected_config.add_listener(_SyncListener(conf_mappings, handler).update)
|
||||
return conf_mappings
|
||||
|
||||
|
||||
def get_config(handle):
|
||||
"""
|
||||
Singleton constructor for configuration file instances. If a configuration
|
||||
already exists for the handle then it's returned. Otherwise a fresh instance
|
||||
is constructed.
|
||||
|
||||
:param str handle: unique identifier used to access this config instance
|
||||
"""
|
||||
|
||||
if not handle in CONFS:
|
||||
CONFS[handle] = Config()
|
||||
|
||||
return CONFS[handle]
|
||||
|
||||
|
||||
def parse_enum(key, value, enumeration):
|
||||
"""
|
||||
Provides the enumeration value for a given key. This is a case insensitive
|
||||
lookup and raises an exception if the enum key doesn't exist.
|
||||
|
||||
:param str key: configuration key being looked up
|
||||
:param str value: value to be parsed
|
||||
:param stem.util.enum.Enum enumeration: enumeration the values should be in
|
||||
|
||||
:returns: enumeration value
|
||||
|
||||
:raises: **ValueError** if the **value** isn't among the enumeration keys
|
||||
"""
|
||||
|
||||
return parse_enum_csv(key, value, enumeration, 1)[0]
|
||||
|
||||
|
||||
def parse_enum_csv(key, value, enumeration, count = None):
|
||||
"""
|
||||
Parses a given value as being a comma separated listing of enumeration keys,
|
||||
returning the corresponding enumeration values. This is intended to be a
|
||||
helper for config handlers. The checks this does are case insensitive.
|
||||
|
||||
The **count** attribute can be used to make assertions based on the number of
|
||||
values. This can be...
|
||||
|
||||
* None to indicate that there's no restrictions.
|
||||
* An int to indicate that we should have this many values.
|
||||
* An (int, int) tuple to indicate the range that values can be in. This range
|
||||
is inclusive and either can be None to indicate the lack of a lower or
|
||||
upper bound.
|
||||
|
||||
:param str key: configuration key being looked up
|
||||
:param str value: value to be parsed
|
||||
:param stem.util.enum.Enum enumeration: enumeration the values should be in
|
||||
:param int,tuple count: validates that we have this many items
|
||||
|
||||
:returns: list with the enumeration values
|
||||
|
||||
:raises: **ValueError** if the count assertion fails or the **value** entries
|
||||
don't match the enumeration keys
|
||||
"""
|
||||
|
||||
values = [val.upper().strip() for val in value.split(',')]
|
||||
|
||||
if values == ['']:
|
||||
return []
|
||||
|
||||
if count is None:
|
||||
pass # no count validateion checks to do
|
||||
elif isinstance(count, int):
|
||||
if len(values) != count:
|
||||
raise ValueError("Config entry '%s' is expected to be %i comma separated values, got '%s'" % (key, count, value))
|
||||
elif isinstance(count, tuple) and len(count) == 2:
|
||||
minimum, maximum = count
|
||||
|
||||
if minimum is not None and len(values) < minimum:
|
||||
raise ValueError("Config entry '%s' must have at least %i comma separated values, got '%s'" % (key, minimum, value))
|
||||
|
||||
if maximum is not None and len(values) > maximum:
|
||||
raise ValueError("Config entry '%s' can have at most %i comma separated values, got '%s'" % (key, maximum, value))
|
||||
else:
|
||||
raise ValueError("The count must be None, an int, or two value tuple. Got '%s' (%s)'" % (count, type(count)))
|
||||
|
||||
result = []
|
||||
enum_keys = [k.upper() for k in enumeration.keys()]
|
||||
enum_values = list(enumeration)
|
||||
|
||||
for val in values:
|
||||
if val in enum_keys:
|
||||
result.append(enum_values[enum_keys.index(val)])
|
||||
else:
|
||||
raise ValueError("The '%s' entry of config entry '%s' wasn't in the enumeration (expected %s)" % (val, key, ', '.join(enum_keys)))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Config(object):
|
||||
"""
|
||||
Handler for easily working with custom configurations, providing persistence
|
||||
to and from files. All operations are thread safe.
|
||||
|
||||
**Example usage:**
|
||||
|
||||
User has a file at '/home/atagar/myConfig' with...
|
||||
|
||||
::
|
||||
|
||||
destination.ip 1.2.3.4
|
||||
destination.port blarg
|
||||
|
||||
startup.run export PATH=$PATH:~/bin
|
||||
startup.run alias l=ls
|
||||
|
||||
And they have a script with...
|
||||
|
||||
::
|
||||
|
||||
from stem.util import conf
|
||||
|
||||
# Configuration values we'll use in this file. These are mappings of
|
||||
# configuration keys to the default values we'll use if the user doesn't
|
||||
# have something different in their config file (or it doesn't match this
|
||||
# type).
|
||||
|
||||
ssh_config = conf.config_dict("ssh_login", {
|
||||
"login.user": "atagar",
|
||||
"login.password": "pepperjack_is_awesome!",
|
||||
"destination.ip": "127.0.0.1",
|
||||
"destination.port": 22,
|
||||
"startup.run": [],
|
||||
})
|
||||
|
||||
# Makes an empty config instance with the handle of 'ssh_login'. This is
|
||||
# a singleton so other classes can fetch this same configuration from
|
||||
# this handle.
|
||||
|
||||
user_config = conf.get_config("ssh_login")
|
||||
|
||||
# Loads the user's configuration file, warning if this fails.
|
||||
|
||||
try:
|
||||
user_config.load("/home/atagar/myConfig")
|
||||
except IOError as exc:
|
||||
print "Unable to load the user's config: %s" % exc
|
||||
|
||||
# This replace the contents of ssh_config with the values from the user's
|
||||
# config file if...
|
||||
#
|
||||
# * the key is present in the config file
|
||||
# * we're able to convert the configuration file's value to the same type
|
||||
# as what's in the mapping (see the Config.get() method for how these
|
||||
# type inferences work)
|
||||
#
|
||||
# For instance in this case...
|
||||
#
|
||||
# * the login values are left alone because they aren't in the user's
|
||||
# config file
|
||||
#
|
||||
# * the 'destination.port' is also left with the value of 22 because we
|
||||
# can't turn "blarg" into an integer
|
||||
#
|
||||
# The other values are replaced, so ssh_config now becomes...
|
||||
#
|
||||
# {"login.user": "atagar",
|
||||
# "login.password": "pepperjack_is_awesome!",
|
||||
# "destination.ip": "1.2.3.4",
|
||||
# "destination.port": 22,
|
||||
# "startup.run": ["export PATH=$PATH:~/bin", "alias l=ls"]}
|
||||
#
|
||||
# Information for what values fail to load and why are reported to
|
||||
# 'stem.util.log'.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._path = None # location we last loaded from or saved to
|
||||
self._contents = {} # configuration key/value pairs
|
||||
self._listeners = [] # functors to be notified of config changes
|
||||
|
||||
# used for accessing _contents
|
||||
self._contents_lock = threading.RLock()
|
||||
|
||||
# keys that have been requested (used to provide unused config contents)
|
||||
self._requested_keys = set()
|
||||
|
||||
def load(self, path = None):
|
||||
"""
|
||||
Reads in the contents of the given path, adding its configuration values
|
||||
to our current contents.
|
||||
|
||||
:param str path: file path to be loaded, this uses the last loaded path if
|
||||
not provided
|
||||
|
||||
:raises:
|
||||
* **IOError** if we fail to read the file (it doesn't exist, insufficient
|
||||
permissions, etc)
|
||||
* **ValueError** if no path was provided and we've never been provided one
|
||||
"""
|
||||
|
||||
if path:
|
||||
self._path = path
|
||||
elif not self._path:
|
||||
raise ValueError("Unable to load configuration: no path provided")
|
||||
|
||||
with open(self._path, "r") as config_file:
|
||||
read_contents = config_file.readlines()
|
||||
|
||||
with self._contents_lock:
|
||||
while read_contents:
|
||||
line = read_contents.pop(0)
|
||||
|
||||
# strips any commenting or excess whitespace
|
||||
comment_start = line.find("#")
|
||||
|
||||
if comment_start != -1:
|
||||
line = line[:comment_start]
|
||||
|
||||
line = line.strip()
|
||||
|
||||
# parse the key/value pair
|
||||
if line:
|
||||
try:
|
||||
key, value = line.split(" ", 1)
|
||||
value = value.strip()
|
||||
except ValueError:
|
||||
log.debug("Config entry '%s' is expected to be of the format 'Key Value', defaulting to '%s' -> ''" % (line, line))
|
||||
key, value = line, ""
|
||||
|
||||
if not value:
|
||||
# this might be a multi-line entry, try processing it as such
|
||||
multiline_buffer = []
|
||||
|
||||
while read_contents and read_contents[0].lstrip().startswith("|"):
|
||||
content = read_contents.pop(0).lstrip()[1:] # removes '\s+|' prefix
|
||||
content = content.rstrip("\n") # trailing newline
|
||||
multiline_buffer.append(content)
|
||||
|
||||
if multiline_buffer:
|
||||
self.set(key, "\n".join(multiline_buffer), False)
|
||||
continue
|
||||
|
||||
self.set(key, value, False)
|
||||
|
||||
def save(self, path = None):
|
||||
"""
|
||||
Saves configuration contents to disk. If a path is provided then it
|
||||
replaces the configuration location that we track.
|
||||
|
||||
:param str path: location to be saved to
|
||||
|
||||
:raises: **ValueError** if no path was provided and we've never been provided one
|
||||
"""
|
||||
|
||||
if path:
|
||||
self._path = path
|
||||
elif not self._path:
|
||||
raise ValueError("Unable to save configuration: no path provided")
|
||||
|
||||
with self._contents_lock:
|
||||
with open(self._path, 'w') as output_file:
|
||||
for entry_key in sorted(self.keys()):
|
||||
for entry_value in self.get_value(entry_key, multiple = True):
|
||||
# check for multi line entries
|
||||
if "\n" in entry_value:
|
||||
entry_value = "\n|" + entry_value.replace("\n", "\n|")
|
||||
|
||||
output_file.write('%s %s\n' % (entry_key, entry_value))
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Drops the configuration contents and reverts back to a blank, unloaded
|
||||
state.
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
self._contents.clear()
|
||||
self._requested_keys = set()
|
||||
|
||||
def add_listener(self, listener, backfill = True):
|
||||
"""
|
||||
Registers the function to be notified of configuration updates. Listeners
|
||||
are expected to be functors which accept (config, key).
|
||||
|
||||
:param functor listener: function to be notified when our configuration is changed
|
||||
:param bool backfill: calls the function with our current values if **True**
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
self._listeners.append(listener)
|
||||
|
||||
if backfill:
|
||||
for key in self.keys():
|
||||
listener(self, key)
|
||||
|
||||
def clear_listeners(self):
|
||||
"""
|
||||
Removes all attached listeners.
|
||||
"""
|
||||
|
||||
self._listeners = []
|
||||
|
||||
def keys(self):
|
||||
"""
|
||||
Provides all keys in the currently loaded configuration.
|
||||
|
||||
:returns: **list** if strings for the configuration keys we've loaded
|
||||
"""
|
||||
|
||||
return self._contents.keys()
|
||||
|
||||
def unused_keys(self):
|
||||
"""
|
||||
Provides the configuration keys that have never been provided to a caller
|
||||
via :func:`~stem.util.conf.config_dict` or the
|
||||
:func:`~stem.util.conf.Config.get` and
|
||||
:func:`~stem.util.conf.Config.get_value` methods.
|
||||
|
||||
:returns: **set** of configuration keys we've loaded but have never been requested
|
||||
"""
|
||||
|
||||
return set(self.keys()).difference(self._requested_keys)
|
||||
|
||||
def set(self, key, value, overwrite = True):
|
||||
"""
|
||||
Appends the given key/value configuration mapping, behaving the same as if
|
||||
we'd loaded this from a configuration file.
|
||||
|
||||
:param str key: key for the configuration mapping
|
||||
:param str,list value: value we're setting the mapping to
|
||||
:param bool overwrite: replaces the previous value if **True**, otherwise
|
||||
the values are appended
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
if isinstance(value, str):
|
||||
if not overwrite and key in self._contents:
|
||||
self._contents[key].append(value)
|
||||
else:
|
||||
self._contents[key] = [value]
|
||||
|
||||
for listener in self._listeners:
|
||||
listener(self, key)
|
||||
elif isinstance(value, (list, tuple)):
|
||||
if not overwrite and key in self._contents:
|
||||
self._contents[key] += value
|
||||
else:
|
||||
self._contents[key] = value
|
||||
|
||||
for listener in self._listeners:
|
||||
listener(self, key)
|
||||
else:
|
||||
raise ValueError("Config.set() only accepts str, list, or tuple. Provided value was a '%s'" % type(value))
|
||||
|
||||
def get(self, key, default = None):
|
||||
"""
|
||||
Fetches the given configuration, using the key and default value to
|
||||
determine the type it should be. Recognized inferences are:
|
||||
|
||||
* **default is a boolean => boolean**
|
||||
|
||||
* values are case insensitive
|
||||
* provides the default if the value isn't "true" or "false"
|
||||
|
||||
* **default is an integer => int**
|
||||
|
||||
* provides the default if the value can't be converted to an int
|
||||
|
||||
* **default is a float => float**
|
||||
|
||||
* provides the default if the value can't be converted to a float
|
||||
|
||||
* **default is a list => list**
|
||||
|
||||
* string contents for all configuration values with this key
|
||||
|
||||
* **default is a tuple => tuple**
|
||||
|
||||
* string contents for all configuration values with this key
|
||||
|
||||
* **default is a dictionary => dict**
|
||||
|
||||
* values without "=>" in them are ignored
|
||||
* values are split into key/value pairs on "=>" with extra whitespace
|
||||
stripped
|
||||
|
||||
:param str key: config setting to be fetched
|
||||
:param default object: value provided if no such key exists or fails to be converted
|
||||
|
||||
:returns: given configuration value with its type inferred with the above rules
|
||||
"""
|
||||
|
||||
is_multivalue = isinstance(default, (list, tuple, dict))
|
||||
val = self.get_value(key, default, is_multivalue)
|
||||
|
||||
if val == default:
|
||||
return val # don't try to infer undefined values
|
||||
|
||||
if isinstance(default, bool):
|
||||
if val.lower() == "true":
|
||||
val = True
|
||||
elif val.lower() == "false":
|
||||
val = False
|
||||
else:
|
||||
log.debug("Config entry '%s' is expected to be a boolean, defaulting to '%s'" % (key, str(default)))
|
||||
val = default
|
||||
elif isinstance(default, int):
|
||||
try:
|
||||
val = int(val)
|
||||
except ValueError:
|
||||
log.debug("Config entry '%s' is expected to be an integer, defaulting to '%i'" % (key, default))
|
||||
val = default
|
||||
elif isinstance(default, float):
|
||||
try:
|
||||
val = float(val)
|
||||
except ValueError:
|
||||
log.debug("Config entry '%s' is expected to be a float, defaulting to '%f'" % (key, default))
|
||||
val = default
|
||||
elif isinstance(default, list):
|
||||
pass # nothing special to do (already a list)
|
||||
elif isinstance(default, tuple):
|
||||
val = tuple(val)
|
||||
elif isinstance(default, dict):
|
||||
valMap = {}
|
||||
for entry in val:
|
||||
if "=>" in entry:
|
||||
entryKey, entryVal = entry.split("=>", 1)
|
||||
valMap[entryKey.strip()] = entryVal.strip()
|
||||
else:
|
||||
log.debug("Ignoring invalid %s config entry (expected a mapping, but \"%s\" was missing \"=>\")" % (key, entry))
|
||||
val = valMap
|
||||
|
||||
return val
|
||||
|
||||
def get_value(self, key, default = None, multiple = False):
|
||||
"""
|
||||
This provides the current value associated with a given key.
|
||||
|
||||
:param str key: config setting to be fetched
|
||||
:param object default: value provided if no such key exists
|
||||
:param bool multiple: provides back a list of all values if **True**,
|
||||
otherwise this returns the last loaded configuration value
|
||||
|
||||
:returns: **str** or **list** of string configuration values associated
|
||||
with the given key, providing the default if no such key exists
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
if key in self._contents:
|
||||
self._requested_keys.add(key)
|
||||
|
||||
if multiple:
|
||||
return self._contents[key]
|
||||
else:
|
||||
return self._contents[key][-1]
|
||||
else:
|
||||
message_id = "stem.util.conf.missing_config_key_%s" % key
|
||||
log.log_once(message_id, log.TRACE, "config entry '%s' not found, defaulting to '%s'" % (key, default))
|
||||
return default
|
562
lib/stem/util/connection.py
Normal file
562
lib/stem/util/connection.py
Normal file
@ -0,0 +1,562 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Connection and networking based utility functions.
|
||||
|
||||
::
|
||||
|
||||
get_connections - quieries the connections belonging to a given process
|
||||
get_system_resolvers - provides connection resolution methods that are likely to be available
|
||||
|
||||
is_valid_ipv4_address - checks if a string is a valid IPv4 address
|
||||
is_valid_ipv6_address - checks if a string is a valid IPv6 address
|
||||
is_valid_port - checks if something is a valid representation for a port
|
||||
is_private_address - checks if an IPv4 address belongs to a private range or not
|
||||
|
||||
expand_ipv6_address - provides an IPv6 address with its collapsed portions expanded
|
||||
get_mask_ipv4 - provides the mask representation for a given number of bits
|
||||
get_mask_ipv6 - provides the IPv6 mask representation for a given number of bits
|
||||
|
||||
.. data:: Resolver (enum)
|
||||
|
||||
Method for resolving a process' connections.
|
||||
|
||||
================= ===========
|
||||
Resolver Description
|
||||
================= ===========
|
||||
**PROC** /proc contents
|
||||
**NETSTAT** netstat command
|
||||
**SS** ss command
|
||||
**LSOF** lsof command
|
||||
**SOCKSTAT** sockstat command under *nix
|
||||
**BSD_SOCKSTAT** sockstat command under FreeBSD
|
||||
**BSD_PROCSTAT** procstat command under FreeBSD
|
||||
================= ===========
|
||||
"""
|
||||
|
||||
import collections
|
||||
import hashlib
|
||||
import hmac
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
|
||||
import stem.util.proc
|
||||
import stem.util.system
|
||||
|
||||
from stem.util import enum, log
|
||||
|
||||
# Connection resolution is risky to log about since it's highly likely to
|
||||
# contain sensitive information. That said, it's also difficult to get right in
|
||||
# a platform independent fashion. To opt into the logging requried to
|
||||
# troubleshoot connection resolution set the following...
|
||||
|
||||
LOG_CONNECTION_RESOLUTION = False
|
||||
|
||||
Resolver = enum.Enum(
|
||||
('PROC', 'proc'),
|
||||
('NETSTAT', 'netstat'),
|
||||
('SS', 'ss'),
|
||||
('LSOF', 'lsof'),
|
||||
('SOCKSTAT', 'sockstat'),
|
||||
('BSD_SOCKSTAT', 'sockstat (bsd)'),
|
||||
('BSD_PROCSTAT', 'procstat (bsd)')
|
||||
)
|
||||
|
||||
Connection = collections.namedtuple('Connection', [
|
||||
'local_address',
|
||||
'local_port',
|
||||
'remote_address',
|
||||
'remote_port',
|
||||
'protocol',
|
||||
])
|
||||
|
||||
FULL_IPv4_MASK = "255.255.255.255"
|
||||
FULL_IPv6_MASK = "FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF"
|
||||
|
||||
CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE = os.urandom(32)
|
||||
|
||||
RESOLVER_COMMAND = {
|
||||
Resolver.PROC: '',
|
||||
|
||||
# -n = prevents dns lookups, -p = include process
|
||||
Resolver.NETSTAT: 'netstat -np',
|
||||
|
||||
# -n = numeric ports, -p = include process, -t = tcp sockets, -u = udp sockets
|
||||
Resolver.SS: 'ss -nptu',
|
||||
|
||||
# -n = prevent dns lookups, -P = show port numbers (not names), -i = ip only, -w = no warnings
|
||||
# (lsof provides a '-p <pid>' but oddly in practice it seems to be ~11-28% slower)
|
||||
Resolver.LSOF: 'lsof -wnPi',
|
||||
|
||||
Resolver.SOCKSTAT: 'sockstat',
|
||||
|
||||
# -4 = IPv4, -c = connected sockets
|
||||
Resolver.BSD_SOCKSTAT: 'sockstat -4c',
|
||||
|
||||
# -f <pid> = process pid
|
||||
Resolver.BSD_PROCSTAT: 'procstat -f {pid}',
|
||||
}
|
||||
|
||||
RESOLVER_FILTER = {
|
||||
Resolver.PROC: '',
|
||||
|
||||
# tcp 0 586 192.168.0.1:44284 38.229.79.2:443 ESTABLISHED 15843/tor
|
||||
Resolver.NETSTAT: '^{protocol}\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED\s+{pid}/{name}\s*$',
|
||||
|
||||
# tcp ESTAB 0 0 192.168.0.20:44415 38.229.79.2:443 users:(("tor",15843,9))
|
||||
Resolver.SS: '^{protocol}\s+ESTAB\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+users:\(\("{name}",{pid},[0-9]+\)\)$',
|
||||
|
||||
# tor 3873 atagar 45u IPv4 40994 0t0 TCP 10.243.55.20:45724->194.154.227.109:9001 (ESTABLISHED)
|
||||
Resolver.LSOF: '^{name}\s+{pid}\s+.*\s+{protocol}\s+{local_address}:{local_port}->{remote_address}:{remote_port} \(ESTABLISHED\)$',
|
||||
|
||||
# atagar tor 15843 tcp4 192.168.0.20:44092 68.169.35.102:443 ESTABLISHED
|
||||
Resolver.SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED$',
|
||||
|
||||
# _tor tor 4397 12 tcp4 172.27.72.202:54011 127.0.0.1:9001
|
||||
Resolver.BSD_SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+\S+\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
|
||||
|
||||
# 3561 tor 4 s - rw---n-- 2 0 TCP 10.0.0.2:9050 10.0.0.1:22370
|
||||
Resolver.BSD_PROCSTAT: '^\s*{pid}\s+{name}\s+.*\s+{protocol}\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
|
||||
}
|
||||
|
||||
|
||||
def get_connections(resolver, process_pid = None, process_name = None):
|
||||
"""
|
||||
Retrieves a list of the current connections for a given process. The provides
|
||||
a list of Connection instances, which have four attributes...
|
||||
|
||||
* local_address (str)
|
||||
* local_port (int)
|
||||
* remote_address (str)
|
||||
* remote_port (int)
|
||||
* protocol (str, generally either 'tcp' or 'udp')
|
||||
|
||||
:param Resolver resolver: method of connection resolution to use
|
||||
:param int process_pid: pid of the process to retrieve
|
||||
:param str process_name: name of the process to retrieve
|
||||
|
||||
:raises:
|
||||
* **ValueError** if using **Resolver.PROC** or **Resolver.BSD_PROCSTAT**
|
||||
and the process_pid wasn't provided
|
||||
|
||||
* **IOError** if no connections are available or resolution fails
|
||||
(generally they're indistinguishable). The common causes are the
|
||||
command being unavailable or permissions.
|
||||
"""
|
||||
|
||||
def _log(msg):
|
||||
if LOG_CONNECTION_RESOLUTION:
|
||||
log.debug(msg)
|
||||
|
||||
_log("=" * 80)
|
||||
_log("Querying connections for resolver: %s, pid: %s, name: %s" % (resolver, process_pid, process_name))
|
||||
|
||||
if isinstance(process_pid, str):
|
||||
try:
|
||||
process_pid = int(process_pid)
|
||||
except ValueError:
|
||||
raise ValueError("Process pid was non-numeric: %s" % process_pid)
|
||||
|
||||
if process_pid is None and resolver in (Resolver.PROC, Resolver.BSD_PROCSTAT):
|
||||
raise ValueError("%s resolution requires a pid" % resolver)
|
||||
|
||||
if resolver == Resolver.PROC:
|
||||
return [Connection(*conn) for conn in stem.util.proc.get_connections(process_pid)]
|
||||
|
||||
resolver_command = RESOLVER_COMMAND[resolver].format(pid = process_pid)
|
||||
|
||||
try:
|
||||
results = stem.util.system.call(resolver_command)
|
||||
except OSError as exc:
|
||||
raise IOError("Unable to query '%s': %s" % (resolver_command, exc))
|
||||
|
||||
resolver_regex_str = RESOLVER_FILTER[resolver].format(
|
||||
protocol = '(?P<protocol>\S+)',
|
||||
local_address = '(?P<local_address>[0-9.]+)',
|
||||
local_port = '(?P<local_port>[0-9]+)',
|
||||
remote_address = '(?P<remote_address>[0-9.]+)',
|
||||
remote_port = '(?P<remote_port>[0-9]+)',
|
||||
pid = process_pid if process_pid else '[0-9]*',
|
||||
name = process_name if process_name else '\S*',
|
||||
)
|
||||
|
||||
_log("Resolver regex: %s" % resolver_regex_str)
|
||||
_log("Resolver results:\n%s" % '\n'.join(results))
|
||||
|
||||
connections = []
|
||||
resolver_regex = re.compile(resolver_regex_str)
|
||||
|
||||
for line in results:
|
||||
match = resolver_regex.match(line)
|
||||
|
||||
if match:
|
||||
attr = match.groupdict()
|
||||
local_addr = attr['local_address']
|
||||
local_port = int(attr['local_port'])
|
||||
remote_addr = attr['remote_address']
|
||||
remote_port = int(attr['remote_port'])
|
||||
protocol = attr['protocol'].lower()
|
||||
|
||||
if remote_addr == '0.0.0.0':
|
||||
continue # procstat response for unestablished connections
|
||||
|
||||
if not (is_valid_ipv4_address(local_addr) and is_valid_ipv4_address(remote_addr)):
|
||||
_log("Invalid address (%s or %s): %s" % (local_addr, remote_addr, line))
|
||||
elif not (is_valid_port(local_port) and is_valid_port(remote_port)):
|
||||
_log("Invalid port (%s or %s): %s" % (local_port, remote_port, line))
|
||||
elif protocol not in ('tcp', 'udp'):
|
||||
_log("Unrecognized protocol (%s): %s" % (protocol, line))
|
||||
|
||||
conn = Connection(local_addr, local_port, remote_addr, remote_port, protocol)
|
||||
connections.append(conn)
|
||||
_log(str(conn))
|
||||
|
||||
_log("%i connections found" % len(connections))
|
||||
|
||||
if not connections:
|
||||
raise IOError("No results found using: %s" % resolver_command)
|
||||
|
||||
return connections
|
||||
|
||||
|
||||
def get_system_resolvers(system = None):
|
||||
"""
|
||||
Provides the types of connection resolvers likely to be available on this platform.
|
||||
|
||||
:param str system: system to get resolvers for, this is determined by
|
||||
platform.system() if not provided
|
||||
|
||||
:returns: **list** of Resolvers likely to be available on this platform
|
||||
"""
|
||||
|
||||
if system is None:
|
||||
system = platform.system()
|
||||
|
||||
if system == 'Windows':
|
||||
resolvers = []
|
||||
elif system in ('Darwin', 'OpenBSD'):
|
||||
resolvers = [Resolver.LSOF]
|
||||
elif system == 'FreeBSD':
|
||||
# Netstat is available, but lacks a '-p' equivilant so we can't associate
|
||||
# the results to processes. The platform also has a ss command, but it
|
||||
# belongs to a spreadsheet application.
|
||||
|
||||
resolvers = [Resolver.BSD_SOCKSTAT, Resolver.BSD_PROCSTAT, Resolver.LSOF]
|
||||
else:
|
||||
# Sockstat isn't available by default on ubuntu.
|
||||
|
||||
resolvers = [Resolver.NETSTAT, Resolver.SOCKSTAT, Resolver.LSOF, Resolver.SS]
|
||||
|
||||
# remove any that aren't in the user's PATH
|
||||
|
||||
resolvers = filter(lambda r: stem.util.system.is_available(RESOLVER_COMMAND[r]), resolvers)
|
||||
|
||||
# proc resolution, by far, outperforms the others so defaults to this is able
|
||||
|
||||
if stem.util.proc.is_available():
|
||||
resolvers = [Resolver.PROC] + resolvers
|
||||
|
||||
return resolvers
|
||||
|
||||
|
||||
def is_valid_ipv4_address(address):
|
||||
"""
|
||||
Checks if a string is a valid IPv4 address.
|
||||
|
||||
:param str address: string to be checked
|
||||
|
||||
:returns: **True** if input is a valid IPv4 address, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(address, (bytes, unicode)):
|
||||
return False
|
||||
|
||||
# checks if theres four period separated values
|
||||
|
||||
if address.count(".") != 3:
|
||||
return False
|
||||
|
||||
# checks that each value in the octet are decimal values between 0-255
|
||||
for entry in address.split("."):
|
||||
if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
|
||||
return False
|
||||
elif entry[0] == "0" and len(entry) > 1:
|
||||
return False # leading zeros, for instance in "1.2.3.001"
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_ipv6_address(address, allow_brackets = False):
|
||||
"""
|
||||
Checks if a string is a valid IPv6 address.
|
||||
|
||||
:param str address: string to be checked
|
||||
:param bool allow_brackets: ignore brackets which form '[address]'
|
||||
|
||||
:returns: **True** if input is a valid IPv6 address, **False** otherwise
|
||||
"""
|
||||
|
||||
if allow_brackets:
|
||||
if address.startswith("[") and address.endswith("]"):
|
||||
address = address[1:-1]
|
||||
|
||||
# addresses are made up of eight colon separated groups of four hex digits
|
||||
# with leading zeros being optional
|
||||
# https://en.wikipedia.org/wiki/IPv6#Address_format
|
||||
|
||||
colon_count = address.count(":")
|
||||
|
||||
if colon_count > 7:
|
||||
return False # too many groups
|
||||
elif colon_count != 7 and not "::" in address:
|
||||
return False # not enough groups and none are collapsed
|
||||
elif address.count("::") > 1 or ":::" in address:
|
||||
return False # multiple groupings of zeros can't be collapsed
|
||||
|
||||
for entry in address.split(":"):
|
||||
if not re.match("^[0-9a-fA-f]{0,4}$", entry):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_port(entry, allow_zero = False):
|
||||
"""
|
||||
Checks if a string or int is a valid port number.
|
||||
|
||||
:param list,str,int entry: string, integer or list to be checked
|
||||
:param bool allow_zero: accept port number of zero (reserved by definition)
|
||||
|
||||
:returns: **True** if input is an integer and within the valid port range, **False** otherwise
|
||||
"""
|
||||
|
||||
if isinstance(entry, list):
|
||||
for port in entry:
|
||||
if not is_valid_port(port, allow_zero):
|
||||
return False
|
||||
|
||||
return True
|
||||
elif isinstance(entry, (bytes, unicode)):
|
||||
if not entry.isdigit():
|
||||
return False
|
||||
elif entry[0] == "0" and len(entry) > 1:
|
||||
return False # leading zeros, ex "001"
|
||||
|
||||
entry = int(entry)
|
||||
|
||||
if allow_zero and entry == 0:
|
||||
return True
|
||||
|
||||
return entry > 0 and entry < 65536
|
||||
|
||||
|
||||
def is_private_address(address):
|
||||
"""
|
||||
Checks if the IPv4 address is in a range belonging to the local network or
|
||||
loopback. These include:
|
||||
|
||||
* Private ranges: 10.*, 172.16.* - 172.31.*, 192.168.*
|
||||
* Loopback: 127.*
|
||||
|
||||
:param str address: string to be checked
|
||||
|
||||
:returns: **True** if input is in a private range, **False** otherwise
|
||||
|
||||
:raises: **ValueError** if the address isn't a valid IPv4 address
|
||||
"""
|
||||
|
||||
if not is_valid_ipv4_address(address):
|
||||
raise ValueError("'%s' isn't a valid IPv4 address" % address)
|
||||
|
||||
# checks for any of the simple wildcard ranges
|
||||
|
||||
if address.startswith("10.") or address.startswith("192.168.") or address.startswith("127."):
|
||||
return True
|
||||
|
||||
# checks for the 172.16.* - 172.31.* range
|
||||
|
||||
if address.startswith("172."):
|
||||
second_octet = int(address.split('.')[1])
|
||||
|
||||
if second_octet >= 16 and second_octet <= 31:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def expand_ipv6_address(address):
|
||||
"""
|
||||
Expands abbreviated IPv6 addresses to their full colon separated hex format.
|
||||
For instance...
|
||||
|
||||
::
|
||||
|
||||
>>> expand_ipv6_address("2001:db8::ff00:42:8329")
|
||||
"2001:0db8:0000:0000:0000:ff00:0042:8329"
|
||||
|
||||
>>> expand_ipv6_address("::")
|
||||
"0000:0000:0000:0000:0000:0000:0000:0000"
|
||||
|
||||
:param str address: IPv6 address to be expanded
|
||||
|
||||
:raises: **ValueError** if the address can't be expanded due to being malformed
|
||||
"""
|
||||
|
||||
if not is_valid_ipv6_address(address):
|
||||
raise ValueError("'%s' isn't a valid IPv6 address" % address)
|
||||
|
||||
# expands collapsed groupings, there can only be a single '::' in a valid
|
||||
# address
|
||||
if "::" in address:
|
||||
missing_groups = 7 - address.count(":")
|
||||
address = address.replace("::", "::" + ":" * missing_groups)
|
||||
|
||||
# inserts missing zeros
|
||||
for index in xrange(8):
|
||||
start = index * 5
|
||||
end = address.index(":", start) if index != 7 else len(address)
|
||||
missing_zeros = 4 - (end - start)
|
||||
|
||||
if missing_zeros > 0:
|
||||
address = address[:start] + "0" * missing_zeros + address[start:]
|
||||
|
||||
return address
|
||||
|
||||
|
||||
def get_mask_ipv4(bits):
|
||||
"""
|
||||
Provides the IPv4 mask for a given number of bits, in the dotted-quad format.
|
||||
|
||||
:param int bits: number of bits to be converted
|
||||
|
||||
:returns: **str** with the subnet mask representation for this many bits
|
||||
|
||||
:raises: **ValueError** if given a number of bits outside the range of 0-32
|
||||
"""
|
||||
|
||||
if bits > 32 or bits < 0:
|
||||
raise ValueError("A mask can only be 0-32 bits, got %i" % bits)
|
||||
elif bits == 32:
|
||||
return FULL_IPv4_MASK
|
||||
|
||||
# get the binary representation of the mask
|
||||
mask_bin = _get_binary(2 ** bits - 1, 32)[::-1]
|
||||
|
||||
# breaks it into eight character groupings
|
||||
octets = [mask_bin[8 * i:8 * (i + 1)] for i in xrange(4)]
|
||||
|
||||
# converts each octet into its integer value
|
||||
return ".".join([str(int(octet, 2)) for octet in octets])
|
||||
|
||||
|
||||
def get_mask_ipv6(bits):
|
||||
"""
|
||||
Provides the IPv6 mask for a given number of bits, in the hex colon-delimited
|
||||
format.
|
||||
|
||||
:param int bits: number of bits to be converted
|
||||
|
||||
:returns: **str** with the subnet mask representation for this many bits
|
||||
|
||||
:raises: **ValueError** if given a number of bits outside the range of 0-128
|
||||
"""
|
||||
|
||||
if bits > 128 or bits < 0:
|
||||
raise ValueError("A mask can only be 0-128 bits, got %i" % bits)
|
||||
elif bits == 128:
|
||||
return FULL_IPv6_MASK
|
||||
|
||||
# get the binary representation of the mask
|
||||
mask_bin = _get_binary(2 ** bits - 1, 128)[::-1]
|
||||
|
||||
# breaks it into sixteen character groupings
|
||||
groupings = [mask_bin[16 * i:16 * (i + 1)] for i in xrange(8)]
|
||||
|
||||
# converts each group into its hex value
|
||||
return ":".join(["%04x" % int(group, 2) for group in groupings]).upper()
|
||||
|
||||
|
||||
def _get_masked_bits(mask):
|
||||
"""
|
||||
Provides the number of bits that an IPv4 subnet mask represents. Note that
|
||||
not all masks can be represented by a bit count.
|
||||
|
||||
:param str mask: mask to be converted
|
||||
|
||||
:returns: **int** with the number of bits represented by the mask
|
||||
|
||||
:raises: **ValueError** if the mask is invalid or can't be converted
|
||||
"""
|
||||
|
||||
if not is_valid_ipv4_address(mask):
|
||||
raise ValueError("'%s' is an invalid subnet mask" % mask)
|
||||
|
||||
# converts octets to binary representation
|
||||
mask_bin = _get_address_binary(mask)
|
||||
mask_match = re.match("^(1*)(0*)$", mask_bin)
|
||||
|
||||
if mask_match:
|
||||
return 32 - len(mask_match.groups()[1])
|
||||
else:
|
||||
raise ValueError("Unable to convert mask to a bit count: %s" % mask)
|
||||
|
||||
|
||||
def _get_binary(value, bits):
|
||||
"""
|
||||
Provides the given value as a binary string, padded with zeros to the given
|
||||
number of bits.
|
||||
|
||||
:param int value: value to be converted
|
||||
:param int bits: number of bits to pad to
|
||||
"""
|
||||
|
||||
# http://www.daniweb.com/code/snippet216539.html
|
||||
return "".join([str((value >> y) & 1) for y in range(bits - 1, -1, -1)])
|
||||
|
||||
|
||||
def _get_address_binary(address):
|
||||
"""
|
||||
Provides the binary value for an IPv4 or IPv6 address.
|
||||
|
||||
:returns: **str** with the binary representation of this address
|
||||
|
||||
:raises: **ValueError** if address is neither an IPv4 nor IPv6 address
|
||||
"""
|
||||
|
||||
if is_valid_ipv4_address(address):
|
||||
return "".join([_get_binary(int(octet), 8) for octet in address.split(".")])
|
||||
elif is_valid_ipv6_address(address):
|
||||
address = expand_ipv6_address(address)
|
||||
return "".join([_get_binary(int(grouping, 16), 16) for grouping in address.split(":")])
|
||||
else:
|
||||
raise ValueError("'%s' is neither an IPv4 or IPv6 address" % address)
|
||||
|
||||
|
||||
def _hmac_sha256(key, msg):
|
||||
"""
|
||||
Generates a sha256 digest using the given key and message.
|
||||
|
||||
:param str key: starting key for the hash
|
||||
:param str msg: message to be hashed
|
||||
|
||||
:returns: sha256 digest of msg as bytes, hashed using the given key
|
||||
"""
|
||||
|
||||
return hmac.new(key, msg, hashlib.sha256).digest()
|
||||
|
||||
|
||||
def _cryptovariables_equal(x, y):
|
||||
"""
|
||||
Compares two strings for equality securely.
|
||||
|
||||
:param str x: string to be compared.
|
||||
:param str y: the other string to be compared.
|
||||
|
||||
:returns: **True** if both strings are equal, **False** otherwise.
|
||||
"""
|
||||
|
||||
return (
|
||||
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, x) ==
|
||||
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, y))
|
170
lib/stem/util/enum.py
Normal file
170
lib/stem/util/enum.py
Normal file
@ -0,0 +1,170 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Basic enumeration, providing ordered types for collections. These can be
|
||||
constructed as simple type listings...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.util import enum
|
||||
>>> insects = enum.Enum("ANT", "WASP", "LADYBUG", "FIREFLY")
|
||||
>>> insects.ANT
|
||||
'Ant'
|
||||
>>> tuple(insects)
|
||||
('Ant', 'Wasp', 'Ladybug', 'Firefly')
|
||||
|
||||
... or with overwritten string counterparts...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.util import enum
|
||||
>>> pets = enum.Enum(("DOG", "Skippy"), "CAT", ("FISH", "Nemo"))
|
||||
>>> pets.DOG
|
||||
'Skippy'
|
||||
>>> pets.CAT
|
||||
'Cat'
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
UppercaseEnum - Provides an enum instance with capitalized values
|
||||
|
||||
Enum - Provides a basic, ordered enumeration
|
||||
|- keys - string representation of our enum keys
|
||||
|- index_of - index of an enum value
|
||||
|- next - provides the enum after a given enum value
|
||||
|- previous - provides the enum before a given value
|
||||
|- __getitem__ - provides the value for an enum key
|
||||
+- __iter__ - iterator over our enum keys
|
||||
"""
|
||||
|
||||
import stem.util.str_tools
|
||||
|
||||
|
||||
def UppercaseEnum(*args):
|
||||
"""
|
||||
Provides an :class:`~stem.util.enum.Enum` instance where the values are
|
||||
identical to the keys. Since the keys are uppercase by convention this means
|
||||
the values are too. For instance...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.util import enum
|
||||
>>> runlevels = enum.UppercaseEnum("DEBUG", "INFO", "NOTICE", "WARN", "ERROR")
|
||||
>>> runlevels.DEBUG
|
||||
'DEBUG'
|
||||
|
||||
:param list args: enum keys to initialize with
|
||||
|
||||
:returns: :class:`~stem.util.enum.Enum` instance with the given keys
|
||||
"""
|
||||
|
||||
return Enum(*[(v, v) for v in args])
|
||||
|
||||
|
||||
class Enum(object):
|
||||
"""
|
||||
Basic enumeration.
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
# ordered listings of our keys and values
|
||||
keys, values = [], []
|
||||
|
||||
for entry in args:
|
||||
if isinstance(entry, (bytes, unicode)):
|
||||
key, val = entry, stem.util.str_tools._to_camel_case(entry)
|
||||
elif isinstance(entry, tuple) and len(entry) == 2:
|
||||
key, val = entry
|
||||
else:
|
||||
raise ValueError("Unrecognized input: %s" % args)
|
||||
|
||||
keys.append(key)
|
||||
values.append(val)
|
||||
setattr(self, key, val)
|
||||
|
||||
self._keys = tuple(keys)
|
||||
self._values = tuple(values)
|
||||
|
||||
def keys(self):
|
||||
"""
|
||||
Provides an ordered listing of the enumeration keys in this set.
|
||||
|
||||
:returns: **list** with our enum keys
|
||||
"""
|
||||
|
||||
return list(self._keys)
|
||||
|
||||
def index_of(self, value):
|
||||
"""
|
||||
Provides the index of the given value in the collection.
|
||||
|
||||
:param str value: entry to be looked up
|
||||
|
||||
:returns: **int** index of the given entry
|
||||
|
||||
:raises: **ValueError** if no such element exists
|
||||
"""
|
||||
|
||||
return self._values.index(value)
|
||||
|
||||
def next(self, value):
|
||||
"""
|
||||
Provides the next enumeration after the given value.
|
||||
|
||||
:param str value: enumeration for which to get the next entry
|
||||
|
||||
:returns: enum value following the given entry
|
||||
|
||||
:raises: **ValueError** if no such element exists
|
||||
"""
|
||||
|
||||
if not value in self._values:
|
||||
raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values)))
|
||||
|
||||
next_index = (self._values.index(value) + 1) % len(self._values)
|
||||
return self._values[next_index]
|
||||
|
||||
def previous(self, value):
|
||||
"""
|
||||
Provides the previous enumeration before the given value.
|
||||
|
||||
:param str value: enumeration for which to get the previous entry
|
||||
|
||||
:returns: enum value proceeding the given entry
|
||||
|
||||
:raises: **ValueError** if no such element exists
|
||||
"""
|
||||
|
||||
if not value in self._values:
|
||||
raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values)))
|
||||
|
||||
prev_index = (self._values.index(value) - 1) % len(self._values)
|
||||
return self._values[prev_index]
|
||||
|
||||
def __getitem__(self, item):
|
||||
"""
|
||||
Provides the values for the given key.
|
||||
|
||||
:param str item: key to be looked up
|
||||
|
||||
:returns: **str** with the value for the given key
|
||||
|
||||
:raises: **ValueError** if the key doesn't exist
|
||||
"""
|
||||
|
||||
if item in vars(self):
|
||||
return getattr(self, item)
|
||||
else:
|
||||
keys = ", ".join(self.keys())
|
||||
raise ValueError("'%s' isn't among our enumeration keys, which includes: %s" % (item, keys))
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
Provides an ordered listing of the enums in this set.
|
||||
"""
|
||||
|
||||
for entry in self._values:
|
||||
yield entry
|
247
lib/stem/util/log.py
Normal file
247
lib/stem/util/log.py
Normal file
@ -0,0 +1,247 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Functions to aid library logging. The default logging
|
||||
:data:`~stem.util.log.Runlevel` is usually NOTICE and above.
|
||||
|
||||
**Stem users are more than welcome to listen for stem events, but these
|
||||
functions are not being vended to our users. They may change in the future, use
|
||||
them at your own risk.**
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
get_logger - provides the stem's Logger instance
|
||||
logging_level - converts a runlevel to its logging number
|
||||
escape - escapes special characters in a message in preparation for logging
|
||||
|
||||
log - logs a message at the given runlevel
|
||||
log_once - logs a message, deduplicating if it has already been logged
|
||||
trace - logs a message at the TRACE runlevel
|
||||
debug - logs a message at the DEBUG runlevel
|
||||
info - logs a message at the INFO runlevel
|
||||
notice - logs a message at the NOTICE runlevel
|
||||
warn - logs a message at the WARN runlevel
|
||||
error - logs a message at the ERROR runlevel
|
||||
|
||||
LogBuffer - Buffers logged events so they can be iterated over.
|
||||
|- is_empty - checks if there's events in our buffer
|
||||
+- __iter__ - iterates over and removes the buffered events
|
||||
|
||||
log_to_stdout - reports further logged events to stdout
|
||||
|
||||
.. data:: Runlevel (enum)
|
||||
|
||||
Enumeration for logging runlevels.
|
||||
|
||||
========== ===========
|
||||
Runlevel Description
|
||||
========== ===========
|
||||
**ERROR** critical issue occurred, the user needs to be notified
|
||||
**WARN** non-critical issue occurred that the user should be aware of
|
||||
**NOTICE** information that is helpful to the user
|
||||
**INFO** high level library activity
|
||||
**DEBUG** low level library activity
|
||||
**TRACE** request/reply logging
|
||||
========== ===========
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
# Logging runlevels. These are *very* commonly used so including shorter
|
||||
# aliases (so they can be referenced as log.DEBUG, log.WARN, etc).
|
||||
|
||||
Runlevel = stem.util.enum.UppercaseEnum("TRACE", "DEBUG", "INFO", "NOTICE", "WARN", "ERROR")
|
||||
TRACE, DEBUG, INFO, NOTICE, WARN, ERR = list(Runlevel)
|
||||
|
||||
# mapping of runlevels to the logger module's values, TRACE and DEBUG aren't
|
||||
# built into the module
|
||||
|
||||
LOG_VALUES = {
|
||||
Runlevel.TRACE: logging.DEBUG - 5,
|
||||
Runlevel.DEBUG: logging.DEBUG,
|
||||
Runlevel.INFO: logging.INFO,
|
||||
Runlevel.NOTICE: logging.INFO + 5,
|
||||
Runlevel.WARN: logging.WARN,
|
||||
Runlevel.ERROR: logging.ERROR,
|
||||
}
|
||||
|
||||
logging.addLevelName(LOG_VALUES[TRACE], "TRACE")
|
||||
logging.addLevelName(LOG_VALUES[NOTICE], "NOTICE")
|
||||
|
||||
LOGGER = logging.getLogger("stem")
|
||||
LOGGER.setLevel(LOG_VALUES[TRACE])
|
||||
|
||||
# There's some messages that we don't want to log more than once. This set has
|
||||
# the messages IDs that we've logged which fall into this category.
|
||||
DEDUPLICATION_MESSAGE_IDS = set()
|
||||
|
||||
# Adds a default nullhandler for the stem logger, suppressing the 'No handlers
|
||||
# could be found for logger "stem"' warning as per...
|
||||
# http://docs.python.org/release/3.1.3/library/logging.html#configuring-logging-for-a-library
|
||||
|
||||
|
||||
class _NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
if not LOGGER.handlers:
|
||||
LOGGER.addHandler(_NullHandler())
|
||||
|
||||
|
||||
def get_logger():
|
||||
"""
|
||||
Provides the stem logger.
|
||||
|
||||
:return: **logging.Logger** for stem
|
||||
"""
|
||||
|
||||
return LOGGER
|
||||
|
||||
|
||||
def logging_level(runlevel):
|
||||
"""
|
||||
Translates a runlevel into the value expected by the logging module.
|
||||
|
||||
:param stem.util.log.Runlevel runlevel: runlevel to be returned, no logging if **None**
|
||||
"""
|
||||
|
||||
if runlevel:
|
||||
return LOG_VALUES[runlevel]
|
||||
else:
|
||||
return logging.FATAL + 5
|
||||
|
||||
|
||||
def escape(message):
|
||||
"""
|
||||
Escapes specific sequences for logging (newlines, tabs, carriage returns). If
|
||||
the input is **bytes** then this converts it to **unicode** under python 3.x.
|
||||
|
||||
:param str message: string to be escaped
|
||||
|
||||
:returns: str that is escaped
|
||||
"""
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
message = stem.util.str_tools._to_unicode(message)
|
||||
|
||||
for pattern, replacement in (("\n", "\\n"), ("\r", "\\r"), ("\t", "\\t")):
|
||||
message = message.replace(pattern, replacement)
|
||||
|
||||
return message
|
||||
|
||||
|
||||
def log(runlevel, message):
|
||||
"""
|
||||
Logs a message at the given runlevel.
|
||||
|
||||
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
|
||||
:param str message: message to be logged
|
||||
"""
|
||||
|
||||
if runlevel:
|
||||
LOGGER.log(LOG_VALUES[runlevel], message)
|
||||
|
||||
|
||||
def log_once(message_id, runlevel, message):
|
||||
"""
|
||||
Logs a message at the given runlevel. If a message with this ID has already
|
||||
been logged then this is a no-op.
|
||||
|
||||
:param str message_id: unique message identifier to deduplicate on
|
||||
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
|
||||
:param str message: message to be logged
|
||||
|
||||
:returns: **True** if we log the message, **False** otherwise
|
||||
"""
|
||||
|
||||
if not runlevel or message_id in DEDUPLICATION_MESSAGE_IDS:
|
||||
return False
|
||||
else:
|
||||
DEDUPLICATION_MESSAGE_IDS.add(message_id)
|
||||
log(runlevel, message)
|
||||
|
||||
# shorter aliases for logging at a runlevel
|
||||
|
||||
|
||||
def trace(message):
|
||||
log(Runlevel.TRACE, message)
|
||||
|
||||
|
||||
def debug(message):
|
||||
log(Runlevel.DEBUG, message)
|
||||
|
||||
|
||||
def info(message):
|
||||
log(Runlevel.INFO, message)
|
||||
|
||||
|
||||
def notice(message):
|
||||
log(Runlevel.NOTICE, message)
|
||||
|
||||
|
||||
def warn(message):
|
||||
log(Runlevel.WARN, message)
|
||||
|
||||
|
||||
def error(message):
|
||||
log(Runlevel.ERROR, message)
|
||||
|
||||
|
||||
class LogBuffer(logging.Handler):
|
||||
"""
|
||||
Basic log handler that listens for stem events and stores them so they can be
|
||||
read later. Log entries are cleared as they are read.
|
||||
"""
|
||||
|
||||
def __init__(self, runlevel):
|
||||
# TODO: At least in python 2.6 logging.Handler has a bug in that it doesn't
|
||||
# extend object, causing our super() call to fail. When we drop python 2.6
|
||||
# support we should switch back to using super() instead.
|
||||
#super(LogBuffer, self).__init__(level = logging_level(runlevel))
|
||||
|
||||
logging.Handler.__init__(self, level = logging_level(runlevel))
|
||||
|
||||
self.formatter = logging.Formatter(
|
||||
fmt = '%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt = '%m/%d/%Y %H:%M:%S')
|
||||
|
||||
self._buffer = []
|
||||
|
||||
def is_empty(self):
|
||||
return not bool(self._buffer)
|
||||
|
||||
def __iter__(self):
|
||||
while self._buffer:
|
||||
yield self.formatter.format(self._buffer.pop(0))
|
||||
|
||||
def emit(self, record):
|
||||
self._buffer.append(record)
|
||||
|
||||
|
||||
class _StdoutLogger(logging.Handler):
|
||||
def __init__(self, runlevel):
|
||||
logging.Handler.__init__(self, level = logging_level(runlevel))
|
||||
|
||||
self.formatter = logging.Formatter(
|
||||
fmt = '%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt = '%m/%d/%Y %H:%M:%S')
|
||||
|
||||
def emit(self, record):
|
||||
print self.formatter.format(record)
|
||||
|
||||
|
||||
def log_to_stdout(runlevel):
|
||||
"""
|
||||
Logs further events to stdout.
|
||||
|
||||
:param stem.util.log.Runlevel runlevel: minimum runlevel a message needs to be to be logged
|
||||
"""
|
||||
|
||||
get_logger().addHandler(_StdoutLogger(runlevel))
|
182
lib/stem/util/lru_cache.py
Normal file
182
lib/stem/util/lru_cache.py
Normal file
@ -0,0 +1,182 @@
|
||||
# Drop in replace for python 3.2's collections.lru_cache, from...
|
||||
# http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
|
||||
#
|
||||
# ... which is under the MIT license. Stem users should *not* rely upon this
|
||||
# module. It will be removed when we drop support for python 3.2 and below.
|
||||
|
||||
"""
|
||||
Memoization decorator that caches a function's return value. If later called
|
||||
with the same arguments then the cached value is returned rather than
|
||||
reevaluated.
|
||||
|
||||
This is a a python 2.x port of `functools.lru_cache
|
||||
<http://docs.python.org/3/library/functools.html#functools.lru_cache>`_. If
|
||||
using python 3.2 or later you should use that instead.
|
||||
"""
|
||||
|
||||
from collections import namedtuple
|
||||
from functools import update_wrapper
|
||||
from threading import RLock
|
||||
|
||||
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
|
||||
|
||||
|
||||
class _HashedSeq(list):
|
||||
__slots__ = 'hashvalue'
|
||||
|
||||
def __init__(self, tup, hash=hash):
|
||||
self[:] = tup
|
||||
self.hashvalue = hash(tup)
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashvalue
|
||||
|
||||
|
||||
def _make_key(args, kwds, typed,
|
||||
kwd_mark = (object(),),
|
||||
fasttypes = set([int, str, frozenset, type(None)]),
|
||||
sorted=sorted, tuple=tuple, type=type, len=len):
|
||||
'Make a cache key from optionally typed positional and keyword arguments'
|
||||
key = args
|
||||
if kwds:
|
||||
sorted_items = sorted(kwds.items())
|
||||
key += kwd_mark
|
||||
for item in sorted_items:
|
||||
key += item
|
||||
if typed:
|
||||
key += tuple(type(v) for v in args)
|
||||
if kwds:
|
||||
key += tuple(type(v) for k, v in sorted_items)
|
||||
elif len(key) == 1 and type(key[0]) in fasttypes:
|
||||
return key[0]
|
||||
return _HashedSeq(key)
|
||||
|
||||
|
||||
def lru_cache(maxsize=100, typed=False):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
If *maxsize* is set to None, the LRU features are disabled and the cache
|
||||
can grow without bound.
|
||||
|
||||
If *typed* is True, arguments of different types will be cached separately.
|
||||
For example, f(3.0) and f(3) will be treated as distinct calls with
|
||||
distinct results.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
|
||||
f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
|
||||
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
|
||||
|
||||
"""
|
||||
|
||||
# Users should only access the lru_cache through its public API:
|
||||
# cache_info, cache_clear, and f.__wrapped__
|
||||
# The internals of the lru_cache are encapsulated for thread safety and
|
||||
# to allow the implementation to change (including a possible C version).
|
||||
|
||||
def decorating_function(user_function):
|
||||
|
||||
cache = dict()
|
||||
stats = [0, 0] # make statistics updateable non-locally
|
||||
HITS, MISSES = 0, 1 # names for the stats fields
|
||||
make_key = _make_key
|
||||
cache_get = cache.get # bound method to lookup key or return None
|
||||
_len = len # localize the global len() function
|
||||
lock = RLock() # because linkedlist updates aren't threadsafe
|
||||
root = [] # root of the circular doubly linked list
|
||||
root[:] = [root, root, None, None] # initialize by pointing to self
|
||||
nonlocal_root = [root] # make updateable non-locally
|
||||
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
|
||||
|
||||
if maxsize == 0:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# no caching, just do a statistics update after a successful call
|
||||
result = user_function(*args, **kwds)
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
elif maxsize is None:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# simple caching without ordering or size limit
|
||||
key = make_key(args, kwds, typed)
|
||||
result = cache_get(key, root) # root used here as a unique not-found sentinel
|
||||
if result is not root:
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
cache[key] = result
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
else:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# size limited caching that tracks accesses by recency
|
||||
key = make_key(args, kwds, typed) if kwds or typed else args
|
||||
with lock:
|
||||
link = cache_get(key)
|
||||
if link is not None:
|
||||
# record recent use of the key by moving it to the front of the list
|
||||
root, = nonlocal_root
|
||||
link_prev, link_next, key, result = link
|
||||
link_prev[NEXT] = link_next
|
||||
link_next[PREV] = link_prev
|
||||
last = root[PREV]
|
||||
last[NEXT] = root[PREV] = link
|
||||
link[PREV] = last
|
||||
link[NEXT] = root
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
with lock:
|
||||
root, = nonlocal_root
|
||||
if key in cache:
|
||||
# getting here means that this same key was added to the
|
||||
# cache while the lock was released. since the link
|
||||
# update is already done, we need only return the
|
||||
# computed result and update the count of misses.
|
||||
pass
|
||||
elif _len(cache) >= maxsize:
|
||||
# use the old root to store the new key and result
|
||||
oldroot = root
|
||||
oldroot[KEY] = key
|
||||
oldroot[RESULT] = result
|
||||
# empty the oldest link and make it the new root
|
||||
root = nonlocal_root[0] = oldroot[NEXT]
|
||||
oldkey = root[KEY]
|
||||
root[KEY] = root[RESULT] = None
|
||||
# now update the cache dictionary for the new links
|
||||
del cache[oldkey]
|
||||
cache[key] = oldroot
|
||||
else:
|
||||
# put result in a new link at the front of the list
|
||||
last = root[PREV]
|
||||
link = [last, root, key, result]
|
||||
last[NEXT] = root[PREV] = cache[key] = link
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
"""Report cache statistics"""
|
||||
with lock:
|
||||
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
with lock:
|
||||
cache.clear()
|
||||
root = nonlocal_root[0]
|
||||
root[:] = [root, root, None, None]
|
||||
stats[:] = [0, 0]
|
||||
|
||||
wrapper.__wrapped__ = user_function
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return update_wrapper(wrapper, user_function)
|
||||
|
||||
return decorating_function
|
133
lib/stem/util/ordereddict.py
Normal file
133
lib/stem/util/ordereddict.py
Normal file
@ -0,0 +1,133 @@
|
||||
# Drop in replacement for python 2.7's OrderedDict, from...
|
||||
# http://pypi.python.org/pypi/ordereddict
|
||||
#
|
||||
# Stem users should *not* rely upon this module. It will be removed when we
|
||||
# drop support for python 2.6 and below.
|
||||
|
||||
# Copyright (c) 2009 Raymond Hettinger
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation files
|
||||
# (the "Software"), to deal in the Software without restriction,
|
||||
# including without limitation the rights to use, copy, modify, merge,
|
||||
# publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
# and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from UserDict import DictMixin
|
||||
|
||||
|
||||
class OrderedDict(dict, DictMixin):
|
||||
def __init__(self, *args, **kwds):
|
||||
if len(args) > 1:
|
||||
raise TypeError('expected at most 1 arguments, got %d' % len(args))
|
||||
try:
|
||||
self.__end
|
||||
except AttributeError:
|
||||
self.clear()
|
||||
self.update(*args, **kwds)
|
||||
|
||||
def clear(self):
|
||||
self.__end = end = []
|
||||
end += [None, end, end] # sentinel node for doubly linked list
|
||||
self.__map = {} # key --> [key, prev, next]
|
||||
dict.clear(self)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key not in self:
|
||||
end = self.__end
|
||||
curr = end[1]
|
||||
curr[2] = end[1] = self.__map[key] = [key, curr, end]
|
||||
dict.__setitem__(self, key, value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, key)
|
||||
key, prev, next = self.__map.pop(key)
|
||||
prev[2] = next
|
||||
next[1] = prev
|
||||
|
||||
def __iter__(self):
|
||||
end = self.__end
|
||||
curr = end[2]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[2]
|
||||
|
||||
def __reversed__(self):
|
||||
end = self.__end
|
||||
curr = end[1]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[1]
|
||||
|
||||
def popitem(self, last=True):
|
||||
if not self:
|
||||
raise KeyError('dictionary is empty')
|
||||
if last:
|
||||
key = reversed(self).next()
|
||||
else:
|
||||
key = iter(self).next()
|
||||
value = self.pop(key)
|
||||
return key, value
|
||||
|
||||
def __reduce__(self):
|
||||
items = [[k, self[k]] for k in self]
|
||||
tmp = self.__map, self.__end
|
||||
del self.__map, self.__end
|
||||
inst_dict = vars(self).copy()
|
||||
self.__map, self.__end = tmp
|
||||
if inst_dict:
|
||||
return (self.__class__, (items,), inst_dict)
|
||||
return self.__class__, (items,)
|
||||
|
||||
def keys(self):
|
||||
return list(self)
|
||||
|
||||
setdefault = DictMixin.setdefault
|
||||
update = DictMixin.update
|
||||
pop = DictMixin.pop
|
||||
values = DictMixin.values
|
||||
items = DictMixin.items
|
||||
iterkeys = DictMixin.iterkeys
|
||||
itervalues = DictMixin.itervalues
|
||||
iteritems = DictMixin.iteritems
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, self.items())
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self)
|
||||
|
||||
@classmethod
|
||||
def fromkeys(cls, iterable, value=None):
|
||||
d = cls()
|
||||
for key in iterable:
|
||||
d[key] = value
|
||||
return d
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, OrderedDict):
|
||||
if len(self) != len(other):
|
||||
return False
|
||||
for p, q in zip(self.items(), other.items()):
|
||||
if p != q:
|
||||
return False
|
||||
return True
|
||||
return dict.__eq__(self, other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
498
lib/stem/util/proc.py
Normal file
498
lib/stem/util/proc.py
Normal file
@ -0,0 +1,498 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Helper functions for querying process and system information from the /proc
|
||||
contents. Fetching information this way provides huge performance benefits
|
||||
over lookups via system utilities (ps, netstat, etc). For instance, resolving
|
||||
connections this way cuts the runtime by around 90% verses the alternatives.
|
||||
These functions may not work on all platforms (only Linux?).
|
||||
|
||||
The method for reading these files (and a little code) are borrowed from
|
||||
`psutil <https://code.google.com/p/psutil/>`_, which was written by Jay Loden,
|
||||
Dave Daeschler, Giampaolo Rodola' and is under the BSD license.
|
||||
|
||||
**These functions are not being vended to stem users. They may change in the
|
||||
future, use them at your own risk.**
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
is_available - checks if proc utilities can be used on this system
|
||||
get_system_start_time - unix timestamp for when the system started
|
||||
get_physical_memory - memory available on this system
|
||||
get_cwd - provides the current working directory for a process
|
||||
get_uid - provides the user id a process is running under
|
||||
get_memory_usage - provides the memory usage of a process
|
||||
get_stats - queries statistics about a process
|
||||
get_connections - provides the connections made by a process
|
||||
|
||||
.. data:: Stat (enum)
|
||||
|
||||
Types of data available via the :func:`~stem.util.proc.get_stats` function.
|
||||
|
||||
============== ===========
|
||||
Stat Description
|
||||
============== ===========
|
||||
**COMMAND** command name under which the process is running
|
||||
**CPU_UTIME** total user time spent on the process
|
||||
**CPU_STIME** total system time spent on the process
|
||||
**START_TIME** when this process began, in unix time
|
||||
============== ===========
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
import platform
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
|
||||
import stem.util.enum
|
||||
|
||||
from stem.util import log
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# os.sysconf is only defined on unix
|
||||
try:
|
||||
CLOCK_TICKS = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
|
||||
except AttributeError:
|
||||
CLOCK_TICKS = None
|
||||
|
||||
Stat = stem.util.enum.Enum(
|
||||
("COMMAND", "command"), ("CPU_UTIME", "utime"),
|
||||
("CPU_STIME", "stime"), ("START_TIME", "start time")
|
||||
)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def is_available():
|
||||
"""
|
||||
Checks if proc information is available on this platform.
|
||||
|
||||
:returns: **True** if proc contents exist on this platform, **False** otherwise
|
||||
"""
|
||||
|
||||
if platform.system() != "Linux":
|
||||
return False
|
||||
else:
|
||||
# list of process independent proc paths we use
|
||||
proc_paths = ("/proc/stat", "/proc/meminfo", "/proc/net/tcp", "/proc/net/udp")
|
||||
|
||||
for path in proc_paths:
|
||||
if not os.path.exists(path):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_system_start_time():
|
||||
"""
|
||||
Provides the unix time (seconds since epoch) when the system started.
|
||||
|
||||
:returns: **float** for the unix time of when the system started
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "system start time"
|
||||
btime_line = _get_line("/proc/stat", "btime", parameter)
|
||||
|
||||
try:
|
||||
result = float(btime_line.strip().split()[1])
|
||||
_log_runtime(parameter, "/proc/stat[btime]", start_time)
|
||||
return result
|
||||
except:
|
||||
exc = IOError("unable to parse the /proc/stat btime entry: %s" % btime_line)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_physical_memory():
|
||||
"""
|
||||
Provides the total physical memory on the system in bytes.
|
||||
|
||||
:returns: **int** for the bytes of physical memory this system has
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "system physical memory"
|
||||
mem_total_line = _get_line("/proc/meminfo", "MemTotal:", parameter)
|
||||
|
||||
try:
|
||||
result = int(mem_total_line.split()[1]) * 1024
|
||||
_log_runtime(parameter, "/proc/meminfo[MemTotal]", start_time)
|
||||
return result
|
||||
except:
|
||||
exc = IOError("unable to parse the /proc/meminfo MemTotal entry: %s" % mem_total_line)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def get_cwd(pid):
|
||||
"""
|
||||
Provides the current working directory for the given process.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: **str** with the path of the working directory for the process
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "cwd"
|
||||
proc_cwd_link = "/proc/%s/cwd" % pid
|
||||
|
||||
if pid == 0:
|
||||
cwd = ""
|
||||
else:
|
||||
try:
|
||||
cwd = os.readlink(proc_cwd_link)
|
||||
except OSError:
|
||||
exc = IOError("unable to read %s" % proc_cwd_link)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
_log_runtime(parameter, proc_cwd_link, start_time)
|
||||
return cwd
|
||||
|
||||
|
||||
def get_uid(pid):
|
||||
"""
|
||||
Provides the user ID the given process is running under.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: **int** with the user id for the owner of the process
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "uid"
|
||||
status_path = "/proc/%s/status" % pid
|
||||
uid_line = _get_line(status_path, "Uid:", parameter)
|
||||
|
||||
try:
|
||||
result = int(uid_line.split()[1])
|
||||
_log_runtime(parameter, "%s[Uid]" % status_path, start_time)
|
||||
return result
|
||||
except:
|
||||
exc = IOError("unable to parse the %s Uid entry: %s" % (status_path, uid_line))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def get_memory_usage(pid):
|
||||
"""
|
||||
Provides the memory usage in bytes for the given process.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: **tuple** of two ints with the memory usage of the process, of the
|
||||
form **(resident_size, virtual_size)**
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
# checks if this is the kernel process
|
||||
|
||||
if pid == 0:
|
||||
return (0, 0)
|
||||
|
||||
start_time, parameter = time.time(), "memory usage"
|
||||
status_path = "/proc/%s/status" % pid
|
||||
mem_lines = _get_lines(status_path, ("VmRSS:", "VmSize:"), parameter)
|
||||
|
||||
try:
|
||||
residentSize = int(mem_lines["VmRSS:"].split()[1]) * 1024
|
||||
virtualSize = int(mem_lines["VmSize:"].split()[1]) * 1024
|
||||
|
||||
_log_runtime(parameter, "%s[VmRSS|VmSize]" % status_path, start_time)
|
||||
return (residentSize, virtualSize)
|
||||
except:
|
||||
exc = IOError("unable to parse the %s VmRSS and VmSize entries: %s" % (status_path, ", ".join(mem_lines)))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def get_stats(pid, *stat_types):
|
||||
"""
|
||||
Provides process specific information. See the :data:`~stem.util.proc.Stat`
|
||||
enum for valid options.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
:param Stat stat_types: information to be provided back
|
||||
|
||||
:returns: **tuple** with all of the requested statistics as strings
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
if CLOCK_TICKS is None:
|
||||
raise IOError("Unable to look up SC_CLK_TCK")
|
||||
|
||||
start_time, parameter = time.time(), "process %s" % ", ".join(stat_types)
|
||||
|
||||
# the stat file contains a single line, of the form...
|
||||
# 8438 (tor) S 8407 8438 8407 34818 8438 4202496...
|
||||
stat_path = "/proc/%s/stat" % pid
|
||||
stat_line = _get_line(stat_path, str(pid), parameter)
|
||||
|
||||
# breaks line into component values
|
||||
stat_comp = []
|
||||
cmd_start, cmd_end = stat_line.find("("), stat_line.find(")")
|
||||
|
||||
if cmd_start != -1 and cmd_end != -1:
|
||||
stat_comp.append(stat_line[:cmd_start])
|
||||
stat_comp.append(stat_line[cmd_start + 1:cmd_end])
|
||||
stat_comp += stat_line[cmd_end + 1:].split()
|
||||
|
||||
if len(stat_comp) < 44 and _is_float(stat_comp[13], stat_comp[14], stat_comp[21]):
|
||||
exc = IOError("stat file had an unexpected format: %s" % stat_path)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
results = []
|
||||
for stat_type in stat_types:
|
||||
if stat_type == Stat.COMMAND:
|
||||
if pid == 0:
|
||||
results.append("sched")
|
||||
else:
|
||||
results.append(stat_comp[1])
|
||||
elif stat_type == Stat.CPU_UTIME:
|
||||
if pid == 0:
|
||||
results.append("0")
|
||||
else:
|
||||
results.append(str(float(stat_comp[13]) / CLOCK_TICKS))
|
||||
elif stat_type == Stat.CPU_STIME:
|
||||
if pid == 0:
|
||||
results.append("0")
|
||||
else:
|
||||
results.append(str(float(stat_comp[14]) / CLOCK_TICKS))
|
||||
elif stat_type == Stat.START_TIME:
|
||||
if pid == 0:
|
||||
return get_system_start_time()
|
||||
else:
|
||||
# According to documentation, starttime is in field 21 and the unit is
|
||||
# jiffies (clock ticks). We divide it for clock ticks, then add the
|
||||
# uptime to get the seconds since the epoch.
|
||||
p_start_time = float(stat_comp[21]) / CLOCK_TICKS
|
||||
results.append(str(p_start_time + get_system_start_time()))
|
||||
|
||||
_log_runtime(parameter, stat_path, start_time)
|
||||
return tuple(results)
|
||||
|
||||
|
||||
def get_connections(pid):
|
||||
"""
|
||||
Queries connection related information from the proc contents. This provides
|
||||
similar results to netstat, lsof, sockstat, and other connection resolution
|
||||
utilities (though the lookup is far quicker).
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: A listing of connection tuples of the form **[(local_ipAddr1,
|
||||
local_port1, foreign_ipAddr1, foreign_port1, protocol), ...]** (addresses
|
||||
and protocols are strings and ports are ints)
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
if isinstance(pid, str):
|
||||
try:
|
||||
pid = int(pid)
|
||||
except ValueError:
|
||||
raise IOError("Process pid was non-numeric: %s" % pid)
|
||||
|
||||
if pid == 0:
|
||||
return []
|
||||
|
||||
# fetches the inode numbers for socket file descriptors
|
||||
|
||||
start_time, parameter = time.time(), "process connections"
|
||||
inodes = []
|
||||
|
||||
for fd in os.listdir("/proc/%s/fd" % pid):
|
||||
fd_path = "/proc/%s/fd/%s" % (pid, fd)
|
||||
|
||||
try:
|
||||
# File descriptor link, such as 'socket:[30899]'
|
||||
|
||||
fd_name = os.readlink(fd_path)
|
||||
|
||||
if fd_name.startswith('socket:['):
|
||||
inodes.append(fd_name[8:-1])
|
||||
except OSError:
|
||||
# most likely couldn't be read due to permissions
|
||||
exc = IOError("unable to determine file descriptor destination: %s" % fd_path)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
if not inodes:
|
||||
# unable to fetch any connections for this process
|
||||
return []
|
||||
|
||||
# check for the connection information from the /proc/net contents
|
||||
|
||||
conn = []
|
||||
|
||||
for proc_file_path in ("/proc/net/tcp", "/proc/net/udp"):
|
||||
try:
|
||||
proc_file = open(proc_file_path)
|
||||
proc_file.readline() # skip the first line
|
||||
|
||||
for line in proc_file:
|
||||
_, l_addr, f_addr, status, _, _, _, _, _, inode = line.split()[:10]
|
||||
|
||||
if inode in inodes:
|
||||
# if a tcp connection, skip if it isn't yet established
|
||||
if proc_file_path.endswith("/tcp") and status != "01":
|
||||
continue
|
||||
|
||||
local_ip, local_port = _decode_proc_address_encoding(l_addr)
|
||||
foreign_ip, foreign_port = _decode_proc_address_encoding(f_addr)
|
||||
protocol = proc_file_path[10:]
|
||||
conn.append((local_ip, local_port, foreign_ip, foreign_port, protocol))
|
||||
|
||||
proc_file.close()
|
||||
except IOError as exc:
|
||||
exc = IOError("unable to read '%s': %s" % (proc_file_path, exc))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
except Exception as exc:
|
||||
exc = IOError("unable to parse '%s': %s" % (proc_file_path, exc))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
_log_runtime(parameter, "/proc/net/[tcp|udp]", start_time)
|
||||
return conn
|
||||
|
||||
|
||||
def _decode_proc_address_encoding(addr):
|
||||
"""
|
||||
Translates an address entry in the /proc/net/* contents to a human readable
|
||||
form (`reference <http://linuxdevcenter.com/pub/a/linux/2000/11/16/LinuxAdmin.html>`_,
|
||||
for instance:
|
||||
|
||||
::
|
||||
|
||||
"0500000A:0016" -> ("10.0.0.5", 22)
|
||||
|
||||
:param str addr: proc address entry to be decoded
|
||||
|
||||
:returns: **tuple** of the form **(addr, port)**, with addr as a string and port an int
|
||||
"""
|
||||
|
||||
ip, port = addr.split(':')
|
||||
|
||||
# the port is represented as a two-byte hexadecimal number
|
||||
port = int(port, 16)
|
||||
|
||||
if sys.version_info >= (3,):
|
||||
ip = ip.encode('ascii')
|
||||
|
||||
# The IPv4 address portion is a little-endian four-byte hexadecimal number.
|
||||
# That is, the least significant byte is listed first, so we need to reverse
|
||||
# the order of the bytes to convert it to an IP address.
|
||||
#
|
||||
# This needs to account for the endian ordering as per...
|
||||
# http://code.google.com/p/psutil/issues/detail?id=201
|
||||
# https://trac.torproject.org/projects/tor/ticket/4777
|
||||
|
||||
if sys.byteorder == 'little':
|
||||
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip)[::-1])
|
||||
else:
|
||||
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip))
|
||||
|
||||
return (ip, port)
|
||||
|
||||
|
||||
def _is_float(*value):
|
||||
try:
|
||||
for v in value:
|
||||
float(v)
|
||||
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _get_line(file_path, line_prefix, parameter):
|
||||
return _get_lines(file_path, (line_prefix, ), parameter)[line_prefix]
|
||||
|
||||
|
||||
def _get_lines(file_path, line_prefixes, parameter):
|
||||
"""
|
||||
Fetches lines with the given prefixes from a file. This only provides back
|
||||
the first instance of each prefix.
|
||||
|
||||
:param str file_path: path of the file to read
|
||||
:param tuple line_prefixes: string prefixes of the lines to return
|
||||
:param str parameter: description of the proc attribute being fetch
|
||||
|
||||
:returns: mapping of prefixes to the matching line
|
||||
|
||||
:raises: **IOError** if unable to read the file or can't find all of the prefixes
|
||||
"""
|
||||
|
||||
try:
|
||||
remaining_prefixes = list(line_prefixes)
|
||||
proc_file, results = open(file_path), {}
|
||||
|
||||
for line in proc_file:
|
||||
if not remaining_prefixes:
|
||||
break # found everything we're looking for
|
||||
|
||||
for prefix in remaining_prefixes:
|
||||
if line.startswith(prefix):
|
||||
results[prefix] = line
|
||||
remaining_prefixes.remove(prefix)
|
||||
break
|
||||
|
||||
proc_file.close()
|
||||
|
||||
if remaining_prefixes:
|
||||
if len(remaining_prefixes) == 1:
|
||||
msg = "%s did not contain a %s entry" % (file_path, remaining_prefixes[0])
|
||||
else:
|
||||
msg = "%s did not contain %s entries" % (file_path, ", ".join(remaining_prefixes))
|
||||
|
||||
raise IOError(msg)
|
||||
else:
|
||||
return results
|
||||
except IOError as exc:
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def _log_runtime(parameter, proc_location, start_time):
|
||||
"""
|
||||
Logs a message indicating a successful proc query.
|
||||
|
||||
:param str parameter: description of the proc attribute being fetch
|
||||
:param str proc_location: proc files we were querying
|
||||
:param int start_time: unix time for when this query was started
|
||||
"""
|
||||
|
||||
runtime = time.time() - start_time
|
||||
log.debug("proc call (%s): %s (runtime: %0.4f)" % (parameter, proc_location, runtime))
|
||||
|
||||
|
||||
def _log_failure(parameter, exc):
|
||||
"""
|
||||
Logs a message indicating that the proc query failed.
|
||||
|
||||
:param str parameter: description of the proc attribute being fetch
|
||||
:param Exception exc: exception that we're raising
|
||||
"""
|
||||
|
||||
log.debug("proc call failed (%s): %s" % (parameter, exc))
|
387
lib/stem/util/str_tools.py
Normal file
387
lib/stem/util/str_tools.py
Normal file
@ -0,0 +1,387 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Toolkit for various string activity.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
get_size_label - human readable label for a number of bytes
|
||||
get_time_label - human readable label for a number of seconds
|
||||
get_time_labels - human readable labels for each time unit
|
||||
get_short_time_label - condensed time label output
|
||||
parse_short_time_label - seconds represented by a short time label
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import datetime
|
||||
|
||||
import stem.prereq
|
||||
|
||||
# label conversion tuples of the form...
|
||||
# (bits / bytes / seconds, short label, long label)
|
||||
SIZE_UNITS_BITS = (
|
||||
(140737488355328.0, " Pb", " Petabit"),
|
||||
(137438953472.0, " Tb", " Terabit"),
|
||||
(134217728.0, " Gb", " Gigabit"),
|
||||
(131072.0, " Mb", " Megabit"),
|
||||
(128.0, " Kb", " Kilobit"),
|
||||
(0.125, " b", " Bit"),
|
||||
)
|
||||
|
||||
SIZE_UNITS_BYTES = (
|
||||
(1125899906842624.0, " PB", " Petabyte"),
|
||||
(1099511627776.0, " TB", " Terabyte"),
|
||||
(1073741824.0, " GB", " Gigabyte"),
|
||||
(1048576.0, " MB", " Megabyte"),
|
||||
(1024.0, " KB", " Kilobyte"),
|
||||
(1.0, " B", " Byte"),
|
||||
)
|
||||
|
||||
TIME_UNITS = (
|
||||
(86400.0, "d", " day"),
|
||||
(3600.0, "h", " hour"),
|
||||
(60.0, "m", " minute"),
|
||||
(1.0, "s", " second"),
|
||||
)
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
def _to_bytes_impl(msg):
|
||||
if isinstance(msg, str):
|
||||
return codecs.latin_1_encode(msg, "replace")[0]
|
||||
else:
|
||||
return msg
|
||||
|
||||
def _to_unicode_impl(msg):
|
||||
if msg is not None and not isinstance(msg, str):
|
||||
return msg.decode("utf-8", "replace")
|
||||
else:
|
||||
return msg
|
||||
else:
|
||||
def _to_bytes_impl(msg):
|
||||
if msg is not None and isinstance(msg, unicode):
|
||||
return codecs.latin_1_encode(msg, "replace")[0]
|
||||
else:
|
||||
return msg
|
||||
|
||||
def _to_unicode_impl(msg):
|
||||
if msg is not None and not isinstance(msg, unicode):
|
||||
return msg.decode("utf-8", "replace")
|
||||
else:
|
||||
return msg
|
||||
|
||||
|
||||
def _to_bytes(msg):
|
||||
"""
|
||||
Provides the ASCII bytes for the given string. This is purely to provide
|
||||
python 3 compatability, normalizing the unicode/ASCII change in the version
|
||||
bump. For an explanation of this see...
|
||||
|
||||
http://python3porting.com/problems.html#nicer-solutions
|
||||
|
||||
:param str,unicode msg: string to be converted
|
||||
|
||||
:returns: ASCII bytes for string
|
||||
"""
|
||||
|
||||
return _to_bytes_impl(msg)
|
||||
|
||||
|
||||
def _to_unicode(msg):
|
||||
"""
|
||||
Provides the unicode string for the given ASCII bytes. This is purely to
|
||||
provide python 3 compatability, normalizing the unicode/ASCII change in the
|
||||
version bump.
|
||||
|
||||
:param str,unicode msg: string to be converted
|
||||
|
||||
:returns: unicode conversion
|
||||
"""
|
||||
|
||||
return _to_unicode_impl(msg)
|
||||
|
||||
|
||||
def _to_camel_case(label, divider = "_", joiner = " "):
|
||||
"""
|
||||
Converts the given string to camel case, ie:
|
||||
|
||||
::
|
||||
|
||||
>>> _to_camel_case("I_LIKE_PEPPERJACK!")
|
||||
'I Like Pepperjack!'
|
||||
|
||||
:param str label: input string to be converted
|
||||
:param str divider: word boundary
|
||||
:param str joiner: replacement for word boundaries
|
||||
|
||||
:returns: camel cased string
|
||||
"""
|
||||
|
||||
words = []
|
||||
for entry in label.split(divider):
|
||||
if len(entry) == 0:
|
||||
words.append("")
|
||||
elif len(entry) == 1:
|
||||
words.append(entry.upper())
|
||||
else:
|
||||
words.append(entry[0].upper() + entry[1:].lower())
|
||||
|
||||
return joiner.join(words)
|
||||
|
||||
|
||||
def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
|
||||
"""
|
||||
Converts a number of bytes into a human readable label in its most
|
||||
significant units. For instance, 7500 bytes would return "7 KB". If the
|
||||
is_long option is used this expands unit labels to be the properly pluralized
|
||||
full word (for instance 'Kilobytes' rather than 'KB'). Units go up through
|
||||
petabytes.
|
||||
|
||||
::
|
||||
|
||||
>>> get_size_label(2000000)
|
||||
'1 MB'
|
||||
|
||||
>>> get_size_label(1050, 2)
|
||||
'1.02 KB'
|
||||
|
||||
>>> get_size_label(1050, 3, True)
|
||||
'1.025 Kilobytes'
|
||||
|
||||
:param int byte_count: number of bytes to be converted
|
||||
:param int decimal: number of decimal digits to be included
|
||||
:param bool is_long: expands units label
|
||||
:param bool is_bytes: provides units in bytes if **True**, bits otherwise
|
||||
|
||||
:returns: **str** with human readable representation of the size
|
||||
"""
|
||||
|
||||
if is_bytes:
|
||||
return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long)
|
||||
else:
|
||||
return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long)
|
||||
|
||||
|
||||
def get_time_label(seconds, decimal = 0, is_long = False):
|
||||
"""
|
||||
Converts seconds into a time label truncated to its most significant units.
|
||||
For instance, 7500 seconds would return "2h". Units go up through days.
|
||||
|
||||
This defaults to presenting single character labels, but if the is_long
|
||||
option is used this expands labels to be the full word (space included and
|
||||
properly pluralized). For instance, "4h" would be "4 hours" and "1m" would
|
||||
become "1 minute".
|
||||
|
||||
::
|
||||
|
||||
>>> get_time_label(10000)
|
||||
'2h'
|
||||
|
||||
>>> get_time_label(61, 1, True)
|
||||
'1.0 minute'
|
||||
|
||||
>>> get_time_label(61, 2, True)
|
||||
'1.01 minutes'
|
||||
|
||||
:param int seconds: number of seconds to be converted
|
||||
:param int decimal: number of decimal digits to be included
|
||||
:param bool is_long: expands units label
|
||||
|
||||
:returns: **str** with human readable representation of the time
|
||||
"""
|
||||
|
||||
return _get_label(TIME_UNITS, seconds, decimal, is_long)
|
||||
|
||||
|
||||
def get_time_labels(seconds, is_long = False):
|
||||
"""
|
||||
Provides a list of label conversions for each time unit, starting with its
|
||||
most significant units on down. Any counts that evaluate to zero are omitted.
|
||||
For example...
|
||||
|
||||
::
|
||||
|
||||
>>> get_time_labels(400)
|
||||
['6m', '40s']
|
||||
|
||||
>>> get_time_labels(3640, True)
|
||||
['1 hour', '40 seconds']
|
||||
|
||||
:param int seconds: number of seconds to be converted
|
||||
:param bool is_long: expands units label
|
||||
|
||||
:returns: **list** of strings with human readable representations of the time
|
||||
"""
|
||||
|
||||
time_labels = []
|
||||
|
||||
for count_per_unit, _, _ in TIME_UNITS:
|
||||
if abs(seconds) >= count_per_unit:
|
||||
time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long))
|
||||
seconds %= count_per_unit
|
||||
|
||||
return time_labels
|
||||
|
||||
|
||||
def get_short_time_label(seconds):
|
||||
"""
|
||||
Provides a time in the following format:
|
||||
[[dd-]hh:]mm:ss
|
||||
|
||||
::
|
||||
|
||||
>>> get_short_time_label(111)
|
||||
'01:51'
|
||||
|
||||
>>> get_short_time_label(544100)
|
||||
'6-07:08:20'
|
||||
|
||||
:param int seconds: number of seconds to be converted
|
||||
|
||||
:returns: **str** with the short representation for the time
|
||||
|
||||
:raises: **ValueError** if the input is negative
|
||||
"""
|
||||
|
||||
if seconds < 0:
|
||||
raise ValueError("Input needs to be a non-negative integer, got '%i'" % seconds)
|
||||
|
||||
time_comp = {}
|
||||
|
||||
for amount, _, label in TIME_UNITS:
|
||||
count = int(seconds / amount)
|
||||
seconds %= amount
|
||||
time_comp[label.strip()] = count
|
||||
|
||||
label = "%02i:%02i" % (time_comp["minute"], time_comp["second"])
|
||||
|
||||
if time_comp["day"]:
|
||||
label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label)
|
||||
elif time_comp["hour"]:
|
||||
label = "%02i:%s" % (time_comp["hour"], label)
|
||||
|
||||
return label
|
||||
|
||||
|
||||
def parse_short_time_label(label):
|
||||
"""
|
||||
Provides the number of seconds corresponding to the formatting used for the
|
||||
cputime and etime fields of ps:
|
||||
[[dd-]hh:]mm:ss or mm:ss.ss
|
||||
|
||||
::
|
||||
|
||||
>>> parse_short_time_label('01:51')
|
||||
111
|
||||
|
||||
>>> parse_short_time_label('6-07:08:20')
|
||||
544100
|
||||
|
||||
:param str label: time entry to be parsed
|
||||
|
||||
:returns: **int** with the number of seconds represented by the label
|
||||
|
||||
:raises: **ValueError** if input is malformed
|
||||
"""
|
||||
|
||||
days, hours, minutes, seconds = '0', '0', '0', '0'
|
||||
|
||||
if '-' in label:
|
||||
days, label = label.split('-', 1)
|
||||
|
||||
time_comp = label.split(":")
|
||||
|
||||
if len(time_comp) == 3:
|
||||
hours, minutes, seconds = time_comp
|
||||
elif len(time_comp) == 2:
|
||||
minutes, seconds = time_comp
|
||||
else:
|
||||
raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label)
|
||||
|
||||
try:
|
||||
time_sum = int(float(seconds))
|
||||
time_sum += int(minutes) * 60
|
||||
time_sum += int(hours) * 3600
|
||||
time_sum += int(days) * 86400
|
||||
return time_sum
|
||||
except ValueError:
|
||||
raise ValueError("Non-numeric value in time entry: %s" % label)
|
||||
|
||||
|
||||
def _parse_iso_timestamp(entry):
|
||||
"""
|
||||
Parses the ISO 8601 standard that provides for timestamps like...
|
||||
|
||||
::
|
||||
|
||||
2012-11-08T16:48:41.420251
|
||||
|
||||
:param str entry: timestamp to be parsed
|
||||
|
||||
:returns: datetime for the time represented by the timestamp
|
||||
|
||||
:raises: ValueError if the timestamp is malformed
|
||||
"""
|
||||
|
||||
if not isinstance(entry, str):
|
||||
raise ValueError("parse_iso_timestamp() input must be a str, got a %s" % type(entry))
|
||||
|
||||
# based after suggestions from...
|
||||
# http://stackoverflow.com/questions/127803/how-to-parse-iso-formatted-date-in-python
|
||||
|
||||
if '.' in entry:
|
||||
timestamp_str, microseconds = entry.split('.')
|
||||
else:
|
||||
timestamp_str, microseconds = entry, '000000'
|
||||
|
||||
if len(microseconds) != 6 or not microseconds.isdigit():
|
||||
raise ValueError("timestamp's microseconds should be six digits")
|
||||
|
||||
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S")
|
||||
return timestamp + datetime.timedelta(microseconds = int(microseconds))
|
||||
|
||||
|
||||
def _get_label(units, count, decimal, is_long):
|
||||
"""
|
||||
Provides label corresponding to units of the highest significance in the
|
||||
provided set. This rounds down (ie, integer truncation after visible units).
|
||||
|
||||
:param tuple units: type of units to be used for conversion, containing
|
||||
(count_per_unit, short_label, long_label)
|
||||
:param int count: number of base units being converted
|
||||
:param int decimal: decimal precision of label
|
||||
:param bool is_long: uses the long label if **True**, short label otherwise
|
||||
"""
|
||||
|
||||
# formatted string for the requested number of digits
|
||||
label_format = "%%.%if" % decimal
|
||||
|
||||
if count < 0:
|
||||
label_format = "-" + label_format
|
||||
count = abs(count)
|
||||
elif count == 0:
|
||||
units_label = units[-1][2] + "s" if is_long else units[-1][1]
|
||||
return "%s%s" % (label_format % count, units_label)
|
||||
|
||||
for count_per_unit, short_label, long_label in units:
|
||||
if count >= count_per_unit:
|
||||
# Rounding down with a '%f' is a little clunky. Reducing the count so
|
||||
# it'll divide evenly as the rounded down value.
|
||||
|
||||
count -= count % (count_per_unit / (10 ** decimal))
|
||||
count_label = label_format % (count / count_per_unit)
|
||||
|
||||
if is_long:
|
||||
# Pluralize if any of the visible units make it greater than one. For
|
||||
# instance 1.0003 is plural but 1.000 isn't.
|
||||
|
||||
if decimal > 0:
|
||||
is_plural = count > count_per_unit
|
||||
else:
|
||||
is_plural = count >= count_per_unit * 2
|
||||
|
||||
return count_label + long_label + ("s" if is_plural else "")
|
||||
else:
|
||||
return count_label + short_label
|
1010
lib/stem/util/system.py
Normal file
1010
lib/stem/util/system.py
Normal file
File diff suppressed because it is too large
Load Diff
98
lib/stem/util/term.py
Normal file
98
lib/stem/util/term.py
Normal file
@ -0,0 +1,98 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utilities for working with the terminal.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
format - wrap text with ANSI for the given colors or attributes
|
||||
|
||||
.. data:: Color (enum)
|
||||
.. data:: BgColor (enum)
|
||||
|
||||
Enumerations for foreground or background terminal color.
|
||||
|
||||
=========== ===========
|
||||
Color Description
|
||||
=========== ===========
|
||||
**BLACK** black color
|
||||
**BLUE** blue color
|
||||
**CYAN** cyan color
|
||||
**GREEN** green color
|
||||
**MAGENTA** magenta color
|
||||
**RED** red color
|
||||
**WHITE** white color
|
||||
**YELLOW** yellow color
|
||||
=========== ===========
|
||||
|
||||
.. data:: Attr (enum)
|
||||
|
||||
Enumerations of terminal text attributes.
|
||||
|
||||
============= ===========
|
||||
Attr Description
|
||||
============= ===========
|
||||
**BOLD** heavy typeface
|
||||
**HILIGHT** inverted foreground and background
|
||||
**UNDERLINE** underlined text
|
||||
============= ===========
|
||||
"""
|
||||
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE")
|
||||
|
||||
Color = stem.util.enum.Enum(*TERM_COLORS)
|
||||
BgColor = stem.util.enum.Enum(*["BG_" + color for color in TERM_COLORS])
|
||||
Attr = stem.util.enum.Enum("BOLD", "UNDERLINE", "HILIGHT")
|
||||
|
||||
# mappings of terminal attribute enums to their ANSI escape encoding
|
||||
FG_ENCODING = dict([(list(Color)[i], str(30 + i)) for i in range(8)])
|
||||
BG_ENCODING = dict([(list(BgColor)[i], str(40 + i)) for i in range(8)])
|
||||
ATTR_ENCODING = {Attr.BOLD: "1", Attr.UNDERLINE: "4", Attr.HILIGHT: "7"}
|
||||
|
||||
CSI = "\x1B[%sm"
|
||||
RESET = CSI % "0"
|
||||
|
||||
|
||||
def format(msg, *attr):
|
||||
"""
|
||||
Simple terminal text formatting using `ANSI escape sequences
|
||||
<https://secure.wikimedia.org/wikipedia/en/wiki/ANSI_escape_code#CSI_codes>`_.
|
||||
The following are some toolkits providing similar capabilities:
|
||||
|
||||
* `django.utils.termcolors <https://code.djangoproject.com/browser/django/trunk/django/utils/termcolors.py>`_
|
||||
* `termcolor <http://pypi.python.org/pypi/termcolor>`_
|
||||
* `colorama <http://pypi.python.org/pypi/colorama>`_
|
||||
|
||||
:param str msg: string to be formatted
|
||||
:param str attr: text attributes, this can be :data:`~stem.util.term.Color`, :data:`~stem.util.term.BgColor`, or :data:`~stem.util.term.Attr` enums
|
||||
and are case insensitive (so strings like "red" are fine)
|
||||
|
||||
:returns: **str** wrapped with ANSI escape encodings, starting with the given
|
||||
attributes and ending with a reset
|
||||
"""
|
||||
|
||||
# if we have reset sequences in the message then apply our attributes
|
||||
# after each of them
|
||||
if RESET in msg:
|
||||
return "".join([format(comp, *attr) for comp in msg.split(RESET)])
|
||||
|
||||
encodings = []
|
||||
for text_attr in attr:
|
||||
text_attr, encoding = stem.util.str_tools._to_camel_case(text_attr), None
|
||||
encoding = FG_ENCODING.get(text_attr, encoding)
|
||||
encoding = BG_ENCODING.get(text_attr, encoding)
|
||||
encoding = ATTR_ENCODING.get(text_attr, encoding)
|
||||
|
||||
if encoding:
|
||||
encodings.append(encoding)
|
||||
|
||||
if encodings:
|
||||
return (CSI % ";".join(encodings)) + msg + RESET
|
||||
else:
|
||||
return msg
|
115
lib/stem/util/tor_tools.py
Normal file
115
lib/stem/util/tor_tools.py
Normal file
@ -0,0 +1,115 @@
|
||||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Miscellaneous utility functions for working with tor.
|
||||
|
||||
**These functions are not being vended to stem users. They may change in the
|
||||
future, use them at your own risk.**
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
is_valid_fingerprint - checks if a string is a valid tor relay fingerprint
|
||||
is_valid_nickname - checks if a string is a valid tor relay nickname
|
||||
is_valid_circuit_id - checks if a string is a valid tor circuit id
|
||||
is_valid_stream_id - checks if a string is a valid tor stream id
|
||||
is_hex_digits - checks if a string is only made up of hex digits
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
# The control-spec defines the following as...
|
||||
#
|
||||
# Fingerprint = "$" 40*HEXDIG
|
||||
# NicknameChar = "a"-"z" / "A"-"Z" / "0" - "9"
|
||||
# Nickname = 1*19 NicknameChar
|
||||
#
|
||||
# CircuitID = 1*16 IDChar
|
||||
# IDChar = ALPHA / DIGIT
|
||||
#
|
||||
# HEXDIG is defined in RFC 5234 as being uppercase and used in RFC 5987 as
|
||||
# case insensitive. Tor doesn't define this in the spec so flipping a coin
|
||||
# and going with case insensitive.
|
||||
|
||||
HEX_DIGIT = "[0-9a-fA-F]"
|
||||
FINGERPRINT_PATTERN = re.compile("^%s{40}$" % HEX_DIGIT)
|
||||
NICKNAME_PATTERN = re.compile("^[a-zA-Z0-9]{1,19}$")
|
||||
CIRC_ID_PATTERN = re.compile("^[a-zA-Z0-9]{1,16}$")
|
||||
|
||||
|
||||
def is_valid_fingerprint(entry, check_prefix = False):
|
||||
"""
|
||||
Checks if a string is a properly formatted relay fingerprint. This checks for
|
||||
a '$' prefix if check_prefix is true, otherwise this only validates the hex
|
||||
digits.
|
||||
|
||||
:param str entry: string to be checked
|
||||
:param bool check_prefix: checks for a '$' prefix
|
||||
|
||||
:returns: **True** if the string could be a relay fingerprint, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(entry, (str, unicode)):
|
||||
return False
|
||||
elif check_prefix:
|
||||
if not entry or entry[0] != "$":
|
||||
return False
|
||||
|
||||
entry = entry[1:]
|
||||
|
||||
return bool(FINGERPRINT_PATTERN.match(entry))
|
||||
|
||||
|
||||
def is_valid_nickname(entry):
|
||||
"""
|
||||
Checks if a string is a valid format for being a nickname.
|
||||
|
||||
:param str entry: string to be checked
|
||||
|
||||
:returns: **True** if the string could be a nickname, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(entry, (str, unicode)):
|
||||
return False
|
||||
|
||||
return bool(NICKNAME_PATTERN.match(entry))
|
||||
|
||||
|
||||
def is_valid_circuit_id(entry):
|
||||
"""
|
||||
Checks if a string is a valid format for being a circuit identifier.
|
||||
|
||||
:returns: **True** if the string could be a circuit id, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(entry, (str, unicode)):
|
||||
return False
|
||||
|
||||
return bool(CIRC_ID_PATTERN.match(entry))
|
||||
|
||||
|
||||
def is_valid_stream_id(entry):
|
||||
"""
|
||||
Checks if a string is a valid format for being a stream identifier.
|
||||
Currently, this is just an alias to :func:`~stem.util.tor_tools.is_valid_circuit_id`.
|
||||
|
||||
:returns: **True** if the string could be a stream id, **False** otherwise
|
||||
"""
|
||||
|
||||
return is_valid_circuit_id(entry)
|
||||
|
||||
|
||||
def is_hex_digits(entry, count):
|
||||
"""
|
||||
Checks if a string is the given number of hex digits. Digits represented by
|
||||
letters are case insensitive.
|
||||
|
||||
:param str entry: string to be checked
|
||||
:param int count: number of hex digits to be checked for
|
||||
|
||||
:returns: **True** if the string matches this number
|
||||
"""
|
||||
|
||||
return bool(re.match("^%s{%i}$" % (HEX_DIGIT, count), entry))
|
354
lib/stem/version.py
Normal file
354
lib/stem/version.py
Normal file
@ -0,0 +1,354 @@
|
||||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Tor versioning information and requirements for its features. These can be
|
||||
easily parsed and compared, for instance...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.version import get_system_tor_version, Requirement
|
||||
>>> my_version = get_system_tor_version()
|
||||
>>> print my_version
|
||||
0.2.1.30
|
||||
>>> my_version >= Requirement.CONTROL_SOCKET
|
||||
True
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
get_system_tor_version - gets the version of our system's tor installation
|
||||
|
||||
Version - Tor versioning information
|
||||
|
||||
.. data:: Requirement (enum)
|
||||
|
||||
Enumerations for the version requirements of features.
|
||||
|
||||
===================================== ===========
|
||||
Requirement Description
|
||||
===================================== ===========
|
||||
**AUTH_SAFECOOKIE** SAFECOOKIE authentication method
|
||||
**EVENT_AUTHDIR_NEWDESCS** AUTHDIR_NEWDESC events
|
||||
**EVENT_BUILDTIMEOUT_SET** BUILDTIMEOUT_SET events
|
||||
**EVENT_CIRC_MINOR** CIRC_MINOR events
|
||||
**EVENT_CLIENTS_SEEN** CLIENTS_SEEN events
|
||||
**EVENT_CONF_CHANGED** CONF_CHANGED events
|
||||
**EVENT_DESCCHANGED** DESCCHANGED events
|
||||
**EVENT_GUARD** GUARD events
|
||||
**EVENT_NEWCONSENSUS** NEWCONSENSUS events
|
||||
**EVENT_NS** NS events
|
||||
**EVENT_SIGNAL** SIGNAL events
|
||||
**EVENT_STATUS** STATUS_GENERAL, STATUS_CLIENT, and STATUS_SERVER events
|
||||
**EVENT_STREAM_BW** STREAM_BW events
|
||||
**EVENT_TRANSPORT_LAUNCHED** TRANSPORT_LAUNCHED events
|
||||
**EXTENDCIRCUIT_PATH_OPTIONAL** EXTENDCIRCUIT queries can omit the path if the circuit is zero
|
||||
**FEATURE_EXTENDED_EVENTS** 'EXTENDED_EVENTS' optional feature
|
||||
**FEATURE_VERBOSE_NAMES** 'VERBOSE_NAMES' optional feature
|
||||
**GETINFO_CONFIG_TEXT** 'GETINFO config-text' query
|
||||
**LOADCONF** LOADCONF requests
|
||||
**MICRODESCRIPTOR_IS_DEFAULT** Tor gets microdescriptors by default rather than server descriptors
|
||||
**TAKEOWNERSHIP** TAKEOWNERSHIP requests
|
||||
**TORRC_CONTROL_SOCKET** 'ControlSocket <path>' config option
|
||||
**TORRC_PORT_FORWARDING** 'PortForwarding' config option
|
||||
**TORRC_DISABLE_DEBUGGER_ATTACHMENT** 'DisableDebuggerAttachment' config option
|
||||
===================================== ===========
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
import stem.util.enum
|
||||
import stem.util.system
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# cache for the get_system_tor_version function
|
||||
VERSION_CACHE = {}
|
||||
|
||||
|
||||
def get_system_tor_version(tor_cmd = "tor"):
|
||||
"""
|
||||
Queries tor for its version. This is os dependent, only working on linux,
|
||||
osx, and bsd.
|
||||
|
||||
:param str tor_cmd: command used to run tor
|
||||
|
||||
:returns: :class:`~stem.version.Version` provided by the tor command
|
||||
|
||||
:raises: **IOError** if unable to query or parse the version
|
||||
"""
|
||||
|
||||
if not tor_cmd in VERSION_CACHE:
|
||||
version_cmd = "%s --version" % tor_cmd
|
||||
|
||||
try:
|
||||
version_output = stem.util.system.call(version_cmd)
|
||||
except OSError as exc:
|
||||
# make the error message nicer if this is due to tor being unavialable
|
||||
|
||||
if "No such file or directory" in str(exc):
|
||||
if os.path.isabs(tor_cmd):
|
||||
exc = "Unable to check tor's version. '%s' doesn't exist." % tor_cmd
|
||||
else:
|
||||
exc = "Unable to run '%s'. Mabye tor isn't in your PATH?" % version_cmd
|
||||
|
||||
raise IOError(exc)
|
||||
|
||||
if version_output:
|
||||
# output example:
|
||||
# Oct 21 07:19:27.438 [notice] Tor v0.2.1.30. This is experimental software. Do not rely on it for strong anonymity. (Running on Linux i686)
|
||||
# Tor version 0.2.1.30.
|
||||
|
||||
last_line = version_output[-1]
|
||||
|
||||
if last_line.startswith("Tor version ") and last_line.endswith("."):
|
||||
try:
|
||||
version_str = last_line[12:-1]
|
||||
VERSION_CACHE[tor_cmd] = Version(version_str)
|
||||
except ValueError as exc:
|
||||
raise IOError(exc)
|
||||
else:
|
||||
raise IOError("Unexpected response from '%s': %s" % (version_cmd, last_line))
|
||||
else:
|
||||
raise IOError("'%s' didn't have any output" % version_cmd)
|
||||
|
||||
return VERSION_CACHE[tor_cmd]
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def _get_version(version_str):
|
||||
return Version(version_str)
|
||||
|
||||
|
||||
class Version(object):
|
||||
"""
|
||||
Comparable tor version. These are constructed from strings that conform to
|
||||
the 'new' style in the `tor version-spec
|
||||
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/version-spec.txt>`_,
|
||||
such as "0.1.4" or "0.2.2.23-alpha (git-7dcd105be34a4f44)".
|
||||
|
||||
:var int major: major version
|
||||
:var int minor: minor version
|
||||
:var int micro: micro version
|
||||
:var int patch: patch level (**None** if undefined)
|
||||
:var str status: status tag such as 'alpha' or 'beta-dev' (**None** if undefined)
|
||||
:var str extra: extra information without its parentheses such as
|
||||
'git-8be6058d8f31e578' (**None** if undefined)
|
||||
:var str git_commit: git commit id (**None** if it wasn't provided)
|
||||
|
||||
:param str version_str: version to be parsed
|
||||
|
||||
:raises: **ValueError** if input isn't a valid tor version
|
||||
"""
|
||||
|
||||
def __init__(self, version_str):
|
||||
self.version_str = version_str
|
||||
version_parts = re.match(r'^([0-9]+)\.([0-9]+)\.([0-9]+)(\.[0-9]+)?(-\S*)?( \(\S*\))?$', version_str)
|
||||
self._hash = None
|
||||
|
||||
if version_parts:
|
||||
major, minor, micro, patch, status, extra = version_parts.groups()
|
||||
|
||||
# The patch and status matches are optional (may be None) and have an extra
|
||||
# proceeding period or dash if they exist. Stripping those off.
|
||||
|
||||
if patch:
|
||||
patch = int(patch[1:])
|
||||
|
||||
if status:
|
||||
status = status[1:]
|
||||
|
||||
if extra:
|
||||
extra = extra[2:-1]
|
||||
|
||||
self.major = int(major)
|
||||
self.minor = int(minor)
|
||||
self.micro = int(micro)
|
||||
self.patch = patch
|
||||
self.status = status
|
||||
self.extra = extra
|
||||
|
||||
if extra and re.match("^git-[0-9a-f]{16}$", extra):
|
||||
self.git_commit = extra[4:]
|
||||
else:
|
||||
self.git_commit = None
|
||||
else:
|
||||
raise ValueError("'%s' isn't a properly formatted tor version" % version_str)
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Provides the string used to construct the version.
|
||||
"""
|
||||
|
||||
return self.version_str
|
||||
|
||||
def _compare(self, other, method):
|
||||
"""
|
||||
Compares version ordering according to the spec.
|
||||
"""
|
||||
|
||||
if not isinstance(other, Version):
|
||||
return False
|
||||
|
||||
for attr in ("major", "minor", "micro", "patch"):
|
||||
my_version = getattr(self, attr)
|
||||
other_version = getattr(other, attr)
|
||||
|
||||
if my_version is None:
|
||||
my_version = 0
|
||||
|
||||
if other_version is None:
|
||||
other_version = 0
|
||||
|
||||
if my_version != other_version:
|
||||
return method(my_version, other_version)
|
||||
|
||||
# According to the version spec...
|
||||
#
|
||||
# If we *do* encounter two versions that differ only by status tag, we
|
||||
# compare them lexically as ASCII byte strings.
|
||||
|
||||
my_status = self.status if self.status else ""
|
||||
other_status = other.status if other.status else ""
|
||||
|
||||
return method(my_status, other_status)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __gt__(self, other):
|
||||
"""
|
||||
Checks if this version meets the requirements for a given feature. We can
|
||||
be compared to either a :class:`~stem.version.Version` or
|
||||
:class:`~stem.version._VersionRequirements`.
|
||||
"""
|
||||
|
||||
if isinstance(other, _VersionRequirements):
|
||||
for rule in other.rules:
|
||||
if rule(self):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
return self._compare(other, lambda s, o: s > o)
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, _VersionRequirements):
|
||||
for rule in other.rules:
|
||||
if rule(self):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
return self._compare(other, lambda s, o: s >= o)
|
||||
|
||||
def __hash__(self):
|
||||
if self._hash is None:
|
||||
my_hash = 0
|
||||
|
||||
for attr in ("major", "minor", "micro", "patch", "status"):
|
||||
my_hash *= 1024
|
||||
|
||||
attr_value = getattr(self, attr)
|
||||
|
||||
if attr_value is not None:
|
||||
my_hash += hash(attr_value)
|
||||
|
||||
self._hash = my_hash
|
||||
|
||||
return self._hash
|
||||
|
||||
|
||||
class _VersionRequirements(object):
|
||||
"""
|
||||
Series of version constraints that can be compared to. For instance, this
|
||||
allows for comparisons like 'if I'm greater than version X in the 0.2.2
|
||||
series, or greater than version Y in the 0.2.3 series'.
|
||||
|
||||
This is a logical 'or' of the series of rules.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.rules = []
|
||||
|
||||
def greater_than(self, version, inclusive = True):
|
||||
"""
|
||||
Adds a constraint that we're greater than the given version.
|
||||
|
||||
:param stem.version.Version version: version we're checking against
|
||||
:param bool inclusive: if comparison is inclusive or not
|
||||
"""
|
||||
|
||||
if inclusive:
|
||||
self.rules.append(lambda v: version <= v)
|
||||
else:
|
||||
self.rules.append(lambda v: version < v)
|
||||
|
||||
def less_than(self, version, inclusive = True):
|
||||
"""
|
||||
Adds a constraint that we're less than the given version.
|
||||
|
||||
:param stem.version.Version version: version we're checking against
|
||||
:param bool inclusive: if comparison is inclusive or not
|
||||
"""
|
||||
|
||||
if inclusive:
|
||||
self.rules.append(lambda v: version >= v)
|
||||
else:
|
||||
self.rules.append(lambda v: version > v)
|
||||
|
||||
def in_range(self, from_version, to_version, from_inclusive = True, to_inclusive = False):
|
||||
"""
|
||||
Adds constraint that we're within the range from one version to another.
|
||||
|
||||
:param stem.version.Version from_version: beginning of the comparison range
|
||||
:param stem.version.Version to_version: end of the comparison range
|
||||
:param bool from_inclusive: if comparison is inclusive with the starting version
|
||||
:param bool to_inclusive: if comparison is inclusive with the ending version
|
||||
"""
|
||||
|
||||
if from_inclusive and to_inclusive:
|
||||
new_rule = lambda v: from_version <= v <= to_version
|
||||
elif from_inclusive:
|
||||
new_rule = lambda v: from_version <= v < to_version
|
||||
else:
|
||||
new_rule = lambda v: from_version < v < to_version
|
||||
|
||||
self.rules.append(new_rule)
|
||||
|
||||
safecookie_req = _VersionRequirements()
|
||||
safecookie_req.in_range(Version("0.2.2.36"), Version("0.2.3.0"))
|
||||
safecookie_req.greater_than(Version("0.2.3.13"))
|
||||
|
||||
Requirement = stem.util.enum.Enum(
|
||||
("AUTH_SAFECOOKIE", safecookie_req),
|
||||
("EVENT_AUTHDIR_NEWDESCS", Version('0.1.1.10-alpha')),
|
||||
("EVENT_BUILDTIMEOUT_SET", Version('0.2.2.7-alpha')),
|
||||
("EVENT_CIRC_MINOR", Version('0.2.3.11-alpha')),
|
||||
("EVENT_CLIENTS_SEEN", Version('0.2.1.10-alpha')),
|
||||
("EVENT_CONF_CHANGED", Version('0.2.3.3-alpha')),
|
||||
("EVENT_DESCCHANGED", Version('0.1.2.2-alpha')),
|
||||
("EVENT_GUARD", Version('0.1.2.5-alpha')),
|
||||
("EVENT_NS", Version('0.1.2.3-alpha')),
|
||||
("EVENT_NEWCONSENSUS", Version('0.2.1.13-alpha')),
|
||||
("EVENT_SIGNAL", Version('0.2.3.1-alpha')),
|
||||
("EVENT_STATUS", Version('0.1.2.3-alpha')),
|
||||
("EVENT_STREAM_BW", Version('0.1.2.8-beta')),
|
||||
("EVENT_TRANSPORT_LAUNCHED", Version('0.2.5.0-alpha')),
|
||||
("EXTENDCIRCUIT_PATH_OPTIONAL", Version("0.2.2.9")),
|
||||
("FEATURE_EXTENDED_EVENTS", Version("0.2.2.1-alpha")),
|
||||
("FEATURE_VERBOSE_NAMES", Version("0.2.2.1-alpha")),
|
||||
("GETINFO_CONFIG_TEXT", Version("0.2.2.7-alpha")),
|
||||
("LOADCONF", Version("0.2.1.1")),
|
||||
("MICRODESCRIPTOR_IS_DEFAULT", Version("0.2.3.3")),
|
||||
("TAKEOWNERSHIP", Version("0.2.2.28-beta")),
|
||||
("TORRC_CONTROL_SOCKET", Version("0.2.0.30")),
|
||||
("TORRC_PORT_FORWARDING", Version("0.2.3.1-alpha")),
|
||||
("TORRC_DISABLE_DEBUGGER_ATTACHMENT", Version("0.2.3.9")),
|
||||
)
|
Loading…
x
Reference in New Issue
Block a user