diff --git a/lib/stem/__init__.py b/lib/stem/__init__.py new file mode 100644 index 00000000..1d477830 --- /dev/null +++ b/lib/stem/__init__.py @@ -0,0 +1,700 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Library for working with the tor process. + +**Module Overview:** + +:: + + ControllerError - Base exception raised when using the controller. + |- ProtocolError - Malformed socket data. + |- OperationFailed - Tor was unable to successfully complete the operation. + | |- UnsatisfiableRequest - Tor was unable to satisfy a valid request. + | | +- CircuitExtensionFailed - Attempt to make or extend a circuit failed. + | +- InvalidRequest - Invalid request. + | +- InvalidArguments - Invalid request parameters. + +- SocketError - Communication with the socket failed. + +- SocketClosed - Socket has been shut down. + +.. data:: Runlevel (enum) + + Rating of importance used for event logging. + + =========== =========== + Runlevel Description + =========== =========== + **ERR** critical issues that impair tor's ability to function + **WARN** non-critical issues the user should be aware of + **NOTICE** information that may be helpful to the user + **INFO** high level runtime information + **DEBUG** low level runtime information + =========== =========== + +.. data:: Signal (enum) + + Signals that the tor process will accept. + + ========================= =========== + Signal Description + ========================= =========== + **RELOAD** or **HUP** reloads our torrc + **SHUTDOWN** or **INT** shut down, waiting ShutdownWaitLength first if we're a relay + **DUMP** or **USR1** dumps information about open connections and circuits to our log + **DEBUG** or **USR2** switch our logging to the DEBUG runlevel + **HALT** or **TERM** exit tor immediately + **NEWNYM** switch to new circuits, so new application requests don't share any circuits with old ones (this also clears our DNS cache) + **CLEARDNSCACHE** clears cached DNS results + ========================= =========== + +.. data:: Flag (enum) + + Flag assigned to tor relays by the authorities to indicate various + characteristics. + + ================= =========== + Flag Description + ================= =========== + **AUTHORITY** relay is a directory authority + **BADEXIT** relay shouldn't be used as an exit due to being either problematic or malicious (`wiki `_) + **BADDIRECTORY** relay shouldn't be used for directory information + **EXIT** relay's exit policy makes it more useful as an exit rather than middle hop + **FAST** relay's suitable for high-bandwidth circuits + **GUARD** relay's suitable for being an entry guard (first hop) + **HSDIR** relay is being used as a v2 hidden service directory + **NAMED** relay can be referred to by its nickname + **RUNNING** relay is currently usable + **STABLE** relay's suitable for long-lived circuits + **UNNAMED** relay isn't presently bound to a nickname + **V2DIR** relay supports the v2 directory protocol + **VALID** relay has been validated + ================= =========== + +.. data:: CircStatus (enum) + + Statuses that a circuit can be in. Tor may provide statuses not in this enum. + + ============ =========== + CircStatus Description + ============ =========== + **LAUNCHED** new circuit was created + **BUILT** circuit finished being created and can accept traffic + **EXTENDED** circuit has been extended by a hop + **FAILED** circuit construction failed + **CLOSED** circuit has been closed + ============ =========== + +.. data:: CircBuildFlag (enum) + + Attributes about how a circuit is built. These were introduced in tor version + 0.2.3.11. Tor may provide flags not in this enum. + + ================= =========== + CircBuildFlag Description + ================= =========== + **ONEHOP_TUNNEL** single hop circuit to fetch directory information + **IS_INTERNAL** circuit that won't be used for client traffic + **NEED_CAPACITY** circuit only includes high capacity relays + **NEED_UPTIME** circuit only includes relays with a high uptime + ================= =========== + +.. data:: CircPurpose (enum) + + Description of what a circuit is intended for. These were introduced in tor + version 0.2.1.6. Tor may provide purposes not in this enum. + + ==================== =========== + CircPurpose Description + ==================== =========== + **GENERAL** client traffic or fetching directory information + **HS_CLIENT_INTRO** client side introduction point for a hidden service circuit + **HS_CLIENT_REND** client side hidden service rendezvous circuit + **HS_SERVICE_INTRO** server side introduction point for a hidden service circuit + **HS_SERVICE_REND** server side hidden service rendezvous circuit + **TESTING** testing to see if we're reachable, so we can be used as a relay + **CONTROLLER** circuit that was built by a controller + **MEASURE_TIMEOUT** circuit being kept around to see how long it takes + ==================== =========== + +.. data:: CircClosureReason (enum) + + Reason that a circuit is being closed or failed to be established. Tor may + provide reasons not in this enum. + + ========================= =========== + CircClosureReason Description + ========================= =========== + **NONE** no reason given + **TORPROTOCOL** violation in the tor protocol + **INTERNAL** internal error + **REQUESTED** requested by the client via a TRUNCATE command + **HIBERNATING** relay is presently hibernating + **RESOURCELIMIT** relay is out of memory, sockets, or circuit IDs + **CONNECTFAILED** unable to contact the relay + **OR_IDENTITY** relay had the wrong OR identification + **OR_CONN_CLOSED** connection failed after being established + **FINISHED** circuit has expired (see tor's MaxCircuitDirtiness config option) + **TIMEOUT** circuit construction timed out + **DESTROYED** circuit unexpectedly closed + **NOPATH** not enough relays to make a circuit + **NOSUCHSERVICE** requested hidden service does not exist + **MEASUREMENT_EXPIRED** same as **TIMEOUT** except that it was left open for measurement purposes + ========================= =========== + +.. data:: CircEvent (enum) + + Type of change reflected in a circuit by a CIRC_MINOR event. Tor may provide + event types not in this enum. + + ===================== =========== + CircEvent Description + ===================== =========== + **PURPOSE_CHANGED** circuit purpose or hidden service state has changed + **CANNIBALIZED** circuit connections are being reused for a different circuit + ===================== =========== + +.. data:: HiddenServiceState (enum) + + State that a hidden service circuit can have. These were introduced in tor + version 0.2.3.11. Tor may provide states not in this enum. + + Enumerations fall into four groups based on their prefix... + + ======= =========== + Prefix Description + ======= =========== + HSCI_* client-side introduction-point + HSCR_* client-side rendezvous-point + HSSI_* service-side introduction-point + HSSR_* service-side rendezvous-point + ======= =========== + + ============================= =========== + HiddenServiceState Description + ============================= =========== + **HSCI_CONNECTING** connecting to the introductory point + **HSCI_INTRO_SENT** sent INTRODUCE1 and awaiting a reply + **HSCI_DONE** received a reply, circuit is closing + **HSCR_CONNECTING** connecting to the introductory point + **HSCR_ESTABLISHED_IDLE** rendezvous-point established, awaiting an introduction + **HSCR_ESTABLISHED_WAITING** introduction received, awaiting a rend + **HSCR_JOINED** connected to the hidden service + **HSSI_CONNECTING** connecting to the introductory point + **HSSI_ESTABLISHED** established introductory point + **HSSR_CONNECTING** connecting to the introductory point + **HSSR_JOINED** connected to the rendezvous-point + ============================= =========== + +.. data:: RelayEndReason (enum) + + Reasons why the stream is to be closed. + + =================== =========== + RelayEndReason Description + =================== =========== + **MISC** none of the following reasons + **RESOLVEFAILED** unable to resolve the hostname + **CONNECTREFUSED** remote host refused the connection + **EXITPOLICY** OR refuses to connect to the destination + **DESTROY** circuit is being shut down + **DONE** connection has been closed + **TIMEOUT** connection timed out + **NOROUTE** routing error while contacting the destination + **HIBERNATING** relay is temporarily hibernating + **INTERNAL** internal error at the relay + **RESOURCELIMIT** relay has insufficient resources to service the request + **CONNRESET** connection was unexpectedly reset + **TORPROTOCOL** violation in the tor protocol + **NOTDIRECTORY** directory information requested from a relay that isn't mirroring it + =================== =========== + +.. data:: StreamStatus (enum) + + State that a stream going through tor can have. Tor may provide states not in + this enum. + + ================= =========== + StreamStatus Description + ================= =========== + **NEW** request for a new connection + **NEWRESOLVE** request to resolve an address + **REMAP** address is being re-mapped to another + **SENTCONNECT** sent a connect cell along a circuit + **SENTRESOLVE** sent a resolve cell along a circuit + **SUCCEEDED** stream has been established + **FAILED** stream is detached, and won't be re-established + **DETACHED** stream is detached, but might be re-established + **CLOSED** stream has closed + ================= =========== + +.. data:: StreamClosureReason (enum) + + Reason that a stream is being closed or failed to be established. This + includes all values in the :data:`~stem.RelayEndReason` enumeration as + well as the following. Tor may provide reasons not in this enum. + + ===================== =========== + StreamClosureReason Description + ===================== =========== + **END** endpoint has sent a RELAY_END cell + **PRIVATE_ADDR** endpoint was a private address (127.0.0.1, 10.0.0.1, etc) + ===================== =========== + +.. data:: StreamSource (enum) + + Cause of a stream being remapped to another address. Tor may provide sources + not in this enum. + + ============= =========== + StreamSource Description + ============= =========== + **CACHE** tor is remapping because of a cached answer + **EXIT** exit relay requested the remap + ============= =========== + +.. data:: StreamPurpose (enum) + + Purpsoe of the stream. This is only provided with new streams and tor may + provide purposes not in this enum. + + ================= =========== + StreamPurpose Description + ================= =========== + **DIR_FETCH** fetching directory information (descriptors, consensus, etc) + **DIR_UPLOAD** uploading our descriptor to an authority + **DNS_REQUEST** user initiated DNS request + **DIRPORT_TEST** checking that our directory port is reachable externally + **USER** either relaying user traffic or not one of the above categories + ================= =========== + +.. data:: ORStatus (enum) + + State that an OR connection can have. Tor may provide states not in this + enum. + + =============== =========== + ORStatus Description + =============== =========== + **NEW** received OR connection, starting server-side handshake + **LAUNCHED** launched outbound OR connection, starting client-side handshake + **CONNECTED** OR connection has been established + **FAILED** attempt to establish OR connection failed + **CLOSED** OR connection has been closed + =============== =========== + +.. data:: ORClosureReason (enum) + + Reason that an OR connection is being closed or failed to be established. Tor + may provide reasons not in this enum. + + =================== =========== + ORClosureReason Description + =================== =========== + **DONE** OR connection shut down cleanly + **CONNECTREFUSED** got a ECONNREFUSED when connecting to the relay + **IDENTITY** identity of the relay wasn't what we expected + **CONNECTRESET** got a ECONNRESET or similar error from relay + **TIMEOUT** got a ETIMEOUT or similar error from relay + **NOROUTE** got a ENOTCONN, ENETUNREACH, ENETDOWN, EHOSTUNREACH, or similar error from relay + **IOERROR** got a different kind of error from relay + **RESOURCELIMIT** relay has insufficient resources to service the request + **MISC** connection refused for another reason + =================== =========== + +.. data:: AuthDescriptorAction (enum) + + Actions that directory authorities might take with relay descriptors. Tor may + provide reasons not in this enum. + + ===================== =========== + AuthDescriptorAction Description + ===================== =========== + **ACCEPTED** accepting the descriptor as the newest version + **DROPPED** descriptor rejected without notifying the relay + **REJECTED** relay notified that its descriptor has been rejected + ===================== =========== + +.. data:: StatusType (enum) + + Sources for tor status events. Tor may provide types not in this enum. + + ============= =========== + StatusType Description + ============= =========== + **GENERAL** general tor activity, not specifically as a client or relay + **CLIENT** related to our activity as a tor client + **SERVER** related to our activity as a tor relay + ============= =========== + +.. data:: GuardType (enum) + + Use a guard relay can be for. Tor may provide types not in this enum. + + =========== =========== + GuardType Description + =========== =========== + **ENTRY** used to connect to the tor network + =========== =========== + +.. data:: GuardStatus (enum) + + Status a guard relay can have. Tor may provide types not in this enum. + + ============= =========== + GuardStatus Description + ============= =========== + **NEW** new guard that we weren't previously using + **DROPPED** removed from use as one of our guards + **UP** guard is now reachable + **DOWN** guard is now unreachable + **BAD** consensus or relay considers this relay to be unusable as a guard + **GOOD** consensus or relay considers this relay to be usable as a guard + ============= =========== + +.. data:: TimeoutSetType (enum) + + Way in which the timeout value of a circuit is changing. Tor may provide + types not in this enum. + + =============== =========== + TimeoutSetType Description + =============== =========== + **COMPUTED** tor has computed a new timeout based on prior circuits + **RESET** timeout reverted to its default + **SUSPENDED** timeout reverted to its default until network connectivity has recovered + **DISCARD** throwing out timeout value from when the network was down + **RESUME** resumed calculations to determine the proper timeout + =============== =========== +""" + +__version__ = '1.1.1' +__author__ = 'Damian Johnson' +__contact__ = 'atagar@torproject.org' +__url__ = 'https://stem.torproject.org/' +__license__ = 'LGPLv3' + +__all__ = [ + "descriptor", + "response", + "util", + "connection", + "control", + "exit_policy", + "prereq", + "process", + "socket", + "version", + "ControllerError", + "ProtocolError", + "OperationFailed", + "UnsatisfiableRequest", + "CircuitExtensionFailed", + "InvalidRequest", + "InvalidArguments", + "SocketError", + "SocketClosed", + "Runlevel", + "Signal", + "Flag", + "CircStatus", + "CircBuildFlag", + "CircPurpose", + "CircClosureReason", + "CircEvent", + "HiddenServiceState", + "RelayEndReason", + "StreamStatus", + "StreamClosureReason", + "StreamSource", + "StreamPurpose", + "ORStatus", + "ORClosureReason", + "AuthDescriptorAction", + "StatusType", + "GuardType", + "GuardStatus", + "TimeoutSetType", +] + +import stem.util.enum + +# Constant to indicate an undefined argument default. Usually we'd use None for +# this, but users will commonly provide None as the argument so need something +# else fairly unique... + +UNDEFINED = "" + + +class ControllerError(Exception): + "Base error for controller communication issues." + + +class ProtocolError(ControllerError): + "Malformed content from the control socket." + + +class OperationFailed(ControllerError): + """ + Base exception class for failed operations that return an error code + + :var str code: error code returned by Tor + :var str message: error message returned by Tor or a human readable error + message + """ + + def __init__(self, code = None, message = None): + super(ControllerError, self).__init__(message) + self.code = code + self.message = message + + +class UnsatisfiableRequest(OperationFailed): + """ + Exception raised if Tor was unable to process our request. + """ + + +class CircuitExtensionFailed(UnsatisfiableRequest): + """ + An attempt to create or extend a circuit failed. + + :var stem.response.CircuitEvent circ: response notifying us of the failure + """ + + def __init__(self, message, circ = None): + super(CircuitExtensionFailed, self).__init__(message = message) + self.circ = circ + + +class InvalidRequest(OperationFailed): + """ + Exception raised when the request was invalid or malformed. + """ + + +class InvalidArguments(InvalidRequest): + """ + Exception class for requests which had invalid arguments. + + :var str code: error code returned by Tor + :var str message: error message returned by Tor or a human readable error + message + :var list arguments: a list of arguments which were invalid + """ + + def __init__(self, code = None, message = None, arguments = None): + super(InvalidArguments, self).__init__(code, message) + self.arguments = arguments + + +class SocketError(ControllerError): + "Error arose while communicating with the control socket." + + +class SocketClosed(SocketError): + "Control socket was closed before completing the message." + +Runlevel = stem.util.enum.UppercaseEnum( + "DEBUG", + "INFO", + "NOTICE", + "WARN", + "ERR", +) + +Flag = stem.util.enum.Enum( + ("AUTHORITY", "Authority"), + ("BADEXIT", "BadExit"), + ("BADDIRECTORY", "BadDirectory"), + ("EXIT", "Exit"), + ("FAST", "Fast"), + ("GUARD", "Guard"), + ("HSDIR", "HSDir"), + ("NAMED", "Named"), + ("RUNNING", "Running"), + ("STABLE", "Stable"), + ("UNNAMED", "Unnamed"), + ("V2DIR", "V2Dir"), + ("V3DIR", "V3Dir"), + ("VALID", "Valid"), +) + +Signal = stem.util.enum.UppercaseEnum( + "RELOAD", + "HUP", + "SHUTDOWN", + "INT", + "DUMP", + "USR1", + "DEBUG", + "USR2", + "HALT", + "TERM", + "NEWNYM", + "CLEARDNSCACHE", +) + +CircStatus = stem.util.enum.UppercaseEnum( + "LAUNCHED", + "BUILT", + "EXTENDED", + "FAILED", + "CLOSED", +) + +CircBuildFlag = stem.util.enum.UppercaseEnum( + "ONEHOP_TUNNEL", + "IS_INTERNAL", + "NEED_CAPACITY", + "NEED_UPTIME", +) + +CircPurpose = stem.util.enum.UppercaseEnum( + "GENERAL", + "HS_CLIENT_INTRO", + "HS_CLIENT_REND", + "HS_SERVICE_INTRO", + "HS_SERVICE_REND", + "TESTING", + "CONTROLLER", + "MEASURE_TIMEOUT", +) + +CircClosureReason = stem.util.enum.UppercaseEnum( + "NONE", + "TORPROTOCOL", + "INTERNAL", + "REQUESTED", + "HIBERNATING", + "RESOURCELIMIT", + "CONNECTFAILED", + "OR_IDENTITY", + "OR_CONN_CLOSED", + "FINISHED", + "TIMEOUT", + "DESTROYED", + "NOPATH", + "NOSUCHSERVICE", + "MEASUREMENT_EXPIRED", +) + +CircEvent = stem.util.enum.UppercaseEnum( + "PURPOSE_CHANGED", + "CANNIBALIZED", +) + +HiddenServiceState = stem.util.enum.UppercaseEnum( + "HSCI_CONNECTING", + "HSCI_INTRO_SENT", + "HSCI_DONE", + "HSCR_CONNECTING", + "HSCR_ESTABLISHED_IDLE", + "HSCR_ESTABLISHED_WAITING", + "HSCR_JOINED", + "HSSI_CONNECTING", + "HSSI_ESTABLISHED", + "HSSR_CONNECTING", + "HSSR_JOINED", +) + +RelayEndReason = stem.util.enum.UppercaseEnum( + "MISC", + "RESOLVEFAILED", + "CONNECTREFUSED", + "EXITPOLICY", + "DESTROY", + "DONE", + "TIMEOUT", + "NOROUTE", + "HIBERNATING", + "INTERNAL", + "RESOURCELIMIT", + "CONNRESET", + "TORPROTOCOL", + "NOTDIRECTORY", +) + +StreamStatus = stem.util.enum.UppercaseEnum( + "NEW", + "NEWRESOLVE", + "REMAP", + "SENTCONNECT", + "SENTRESOLVE", + "SUCCEEDED", + "FAILED", + "DETACHED", + "CLOSED", +) + +# StreamClosureReason is a superset of RelayEndReason +StreamClosureReason = stem.util.enum.UppercaseEnum(*(RelayEndReason.keys() + [ + "END", + "PRIVATE_ADDR", +])) + +StreamSource = stem.util.enum.UppercaseEnum( + "CACHE", + "EXIT", +) + +StreamPurpose = stem.util.enum.UppercaseEnum( + "DIR_FETCH", + "DIR_UPLOAD", + "DNS_REQUEST", + "DIRPORT_TEST", + "USER", +) + +ORStatus = stem.util.enum.UppercaseEnum( + "NEW", + "LAUNCHED", + "CONNECTED", + "FAILED", + "CLOSED", +) + +ORClosureReason = stem.util.enum.UppercaseEnum( + "DONE", + "CONNECTREFUSED", + "IDENTITY", + "CONNECTRESET", + "TIMEOUT", + "NOROUTE", + "IOERROR", + "RESOURCELIMIT", + "MISC", +) + +AuthDescriptorAction = stem.util.enum.UppercaseEnum( + "ACCEPTED", + "DROPPED", + "REJECTED", +) + +StatusType = stem.util.enum.UppercaseEnum( + "GENERAL", + "CLIENT", + "SERVER", +) + +GuardType = stem.util.enum.UppercaseEnum( + "ENTRY", +) + +GuardStatus = stem.util.enum.UppercaseEnum( + "NEW", + "UP", + "DOWN", + "BAD", + "GOOD", + "DROPPED", +) + +TimeoutSetType = stem.util.enum.UppercaseEnum( + "COMPUTED", + "RESET", + "SUSPENDED", + "DISCARD", + "RESUME", +) diff --git a/lib/stem/connection.py b/lib/stem/connection.py new file mode 100644 index 00000000..dea9b009 --- /dev/null +++ b/lib/stem/connection.py @@ -0,0 +1,1090 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Functions for connecting and authenticating to the tor process. + +The :func:`~stem.connection.connect_port` and +:func:`~stem.connection.connect_socket_file` functions give an easy, one line +method for getting an authenticated control connection. This is handy for CLI +applications and the python interactive interpreter, but does several things +that makes it undesirable for applications (uses stdin/stdout, suppresses +exceptions, etc). + +The :func:`~stem.connection.authenticate` function, however, gives easy but +fine-grained control over the authentication process. For instance... + +:: + + import sys + import getpass + import stem.connection + import stem.socket + + try: + control_socket = stem.socket.ControlPort(port = 9051) + except stem.SocketError as exc: + print "Unable to connect to port 9051 (%s)" % exc + sys.exit(1) + + try: + stem.connection.authenticate(control_socket) + except stem.connection.IncorrectSocketType: + print "Please check in your torrc that 9051 is the ControlPort." + print "Maybe you configured it to be the ORPort or SocksPort instead?" + sys.exit(1) + except stem.connection.MissingPassword: + controller_password = getpass.getpass("Controller password: ") + + try: + stem.connection.authenticate_password(control_socket, controller_password) + except stem.connection.PasswordAuthFailed: + print "Unable to authenticate, password is incorrect" + sys.exit(1) + except stem.connection.AuthenticationFailure as exc: + print "Unable to authenticate: %s" % exc + sys.exit(1) + +**Module Overview:** + +:: + + connect_port - Convenience method to get an authenticated control connection + connect_socket_file - Similar to connect_port, but for control socket files + + authenticate - Main method for authenticating to a control socket + authenticate_none - Authenticates to an open control socket + authenticate_password - Authenticates to a socket supporting password auth + authenticate_cookie - Authenticates to a socket supporting cookie auth + authenticate_safecookie - Authenticates to a socket supporting safecookie auth + + get_protocolinfo - Issues a PROTOCOLINFO query + + AuthenticationFailure - Base exception raised for authentication failures + |- UnrecognizedAuthMethods - Authentication methods are unsupported + |- IncorrectSocketType - Socket does not speak the tor control protocol + | + |- OpenAuthFailed - Failure when authenticating by an open socket + | +- OpenAuthRejected - Tor rejected this method of authentication + | + |- PasswordAuthFailed - Failure when authenticating by a password + | |- PasswordAuthRejected - Tor rejected this method of authentication + | |- IncorrectPassword - Password was rejected + | +- MissingPassword - Socket supports password auth but wasn't attempted + | + |- CookieAuthFailed - Failure when authenticating by a cookie + | |- CookieAuthRejected - Tor rejected this method of authentication + | |- IncorrectCookieValue - Authentication cookie was rejected + | |- IncorrectCookieSize - Size of the cookie file is incorrect + | |- UnreadableCookieFile - Unable to read the contents of the auth cookie + | +- AuthChallengeFailed - Failure completing the authchallenge request + | |- AuthChallengeUnsupported - Tor doesn't recognize the AUTHCHALLENGE command + | |- AuthSecurityFailure - Server provided the wrong nonce credentials + | |- InvalidClientNonce - The client nonce is invalid + | +- UnrecognizedAuthChallengeMethod - AUTHCHALLENGE does not support the given methods. + | + +- MissingAuthInfo - Unexpected PROTOCOLINFO response, missing auth info + |- NoAuthMethods - Missing any methods for authenticating + +- NoAuthCookie - Supports cookie auth but doesn't have its path + +.. data:: AuthMethod (enum) + + Enumeration of PROTOCOLINFO responses for supported authentication methods. + + ============== =========== + AuthMethod Description + ============== =========== + **NONE** No authentication required. + **PASSWORD** Password required, see tor's HashedControlPassword option. + **COOKIE** Contents of the cookie file required, see tor's CookieAuthentication option. + **SAFECOOKIE** Need to reply to a hmac challenge using the contents of the cookie file. + **UNKNOWN** Tor provided one or more authentication methods that we don't recognize, probably something new. + ============== =========== +""" + +import binascii +import getpass +import os + +import stem.control +import stem.response +import stem.socket +import stem.util.connection +import stem.util.enum +import stem.util.str_tools +import stem.util.system +import stem.version + +from stem.util import log + +AuthMethod = stem.util.enum.Enum("NONE", "PASSWORD", "COOKIE", "SAFECOOKIE", "UNKNOWN") + +CLIENT_HASH_CONSTANT = b"Tor safe cookie authentication controller-to-server hash" +SERVER_HASH_CONSTANT = b"Tor safe cookie authentication server-to-controller hash" + + +def connect_port(address = "127.0.0.1", port = 9051, password = None, chroot_path = None, controller = stem.control.Controller): + """ + Convenience function for quickly getting a control connection. This is very + handy for debugging or CLI setup, handling setup and prompting for a password + if necessary (and none is provided). If any issues arise this prints a + description of the problem and returns **None**. + + :param str address: ip address of the controller + :param int port: port number of the controller + :param str password: passphrase to authenticate to the socket + :param str chroot_path: path prefix if in a chroot environment + :param Class controller: :class:`~stem.control.BaseController` subclass to be + returned, this provides a :class:`~stem.socket.ControlSocket` if **None** + + :returns: authenticated control connection, the type based on the controller argument + """ + + try: + control_port = stem.socket.ControlPort(address, port) + except stem.SocketError as exc: + print exc + return None + + return _connect(control_port, password, chroot_path, controller) + + +def connect_socket_file(path = "/var/run/tor/control", password = None, chroot_path = None, controller = stem.control.Controller): + """ + Convenience function for quickly getting a control connection. For more + information see the :func:`~stem.connection.connect_port` function. + + :param str path: path where the control socket is located + :param str password: passphrase to authenticate to the socket + :param str chroot_path: path prefix if in a chroot environment + :param Class controller: :class:`~stem.control.BaseController` subclass to be + returned, this provides a :class:`~stem.socket.ControlSocket` if **None** + + :returns: authenticated control connection, the type based on the controller argument + """ + + try: + control_socket = stem.socket.ControlSocketFile(path) + except stem.SocketError as exc: + print exc + return None + + return _connect(control_socket, password, chroot_path, controller) + + +def _connect(control_socket, password, chroot_path, controller): + """ + Common implementation for the connect_* functions. + + :param stem.socket.ControlSocket control_socket: socket being authenticated to + :param str password: passphrase to authenticate to the socket + :param str chroot_path: path prefix if in a chroot environment + :param Class controller: :class:`~stem.control.BaseController` subclass to be + returned, this provides a :class:`~stem.socket.ControlSocket` if **None** + + :returns: authenticated control connection, the type based on the controller argument + """ + + try: + authenticate(control_socket, password, chroot_path) + + if controller is None: + return control_socket + else: + return controller(control_socket) + except MissingPassword: + if password is not None: + raise ValueError("BUG: authenticate raised MissingPassword despite getting one") + + try: + password = getpass.getpass("Controller password: ") + except KeyboardInterrupt: + return None + + return _connect(control_socket, password, chroot_path, controller) + except AuthenticationFailure as exc: + control_socket.close() + print "Unable to authenticate: %s" % exc + return None + + +def authenticate(controller, password = None, chroot_path = None, protocolinfo_response = None): + """ + Authenticates to a control socket using the information provided by a + PROTOCOLINFO response. In practice this will often be all we need to + authenticate, raising an exception if all attempts to authenticate fail. + + All exceptions are subclasses of AuthenticationFailure so, in practice, + callers should catch the types of authentication failure that they care + about, then have a :class:`~stem.connection.AuthenticationFailure` catch-all + at the end. + + This can authenticate to either a :class:`~stem.control.BaseController` or + :class:`~stem.socket.ControlSocket`. + + :param controller: tor controller or socket to be authenticated + :param str password: passphrase to present to the socket if it uses password + authentication (skips password auth if **None**) + :param str chroot_path: path prefix if in a chroot environment + :param stem.response.protocolinfo.ProtocolInfoResponse protocolinfo_response: + tor protocolinfo response, this is retrieved on our own if **None** + + :raises: If all attempts to authenticate fails then this will raise a + :class:`~stem.connection.AuthenticationFailure` subclass. Since this may + try multiple authentication methods it may encounter multiple exceptions. + If so then the exception this raises is prioritized as follows... + + * :class:`stem.connection.IncorrectSocketType` + + The controller does not speak the tor control protocol. Most often this + happened because the user confused the SocksPort or ORPort with the + ControlPort. + + * :class:`stem.connection.UnrecognizedAuthMethods` + + All of the authentication methods tor will accept are new and + unrecognized. Please upgrade stem and, if that doesn't work, file a + ticket on 'trac.torproject.org' and I'd be happy to add support. + + * :class:`stem.connection.MissingPassword` + + We were unable to authenticate but didn't attempt password authentication + because none was provided. You should prompt the user for a password and + try again via 'authenticate_password'. + + * :class:`stem.connection.IncorrectPassword` + + We were provided with a password but it was incorrect. + + * :class:`stem.connection.IncorrectCookieSize` + + Tor allows for authentication by reading it a cookie file, but that file + is the wrong size to be an authentication cookie. + + * :class:`stem.connection.UnreadableCookieFile` + + Tor allows for authentication by reading it a cookie file, but we can't + read that file (probably due to permissions). + + * **\***:class:`stem.connection.IncorrectCookieValue` + + Tor allows for authentication by reading it a cookie file, but rejected + the contents of that file. + + * **\***:class:`stem.connection.AuthChallengeUnsupported` + + Tor doesn't recognize the AUTHCHALLENGE command. This is probably a Tor + version prior to SAFECOOKIE being implement, but this exception shouldn't + arise because we won't attempt SAFECOOKIE auth unless Tor claims to + support it. + + * **\***:class:`stem.connection.UnrecognizedAuthChallengeMethod` + + Tor couldn't recognize the AUTHCHALLENGE method Stem sent to it. This + shouldn't happen at all. + + * **\***:class:`stem.connection.InvalidClientNonce` + + Tor says that the client nonce provided by Stem during the AUTHCHALLENGE + process is invalid. + + * **\***:class:`stem.connection.AuthSecurityFailure` + + Nonce value provided by the server was invalid. + + * **\***:class:`stem.connection.OpenAuthRejected` + + Tor says that it allows for authentication without any credentials, but + then rejected our authentication attempt. + + * **\***:class:`stem.connection.MissingAuthInfo` + + Tor provided us with a PROTOCOLINFO reply that is technically valid, but + missing the information we need to authenticate. + + * **\***:class:`stem.connection.AuthenticationFailure` + + There are numerous other ways that authentication could have failed + including socket failures, malformed controller responses, etc. These + mostly constitute transient failures or bugs. + + **\*** In practice it is highly unusual for this to occur, being more of a + theoretical possibility rather than something you should expect. It's fine + to treat these as errors. If you have a use case where this commonly + happens, please file a ticket on 'trac.torproject.org'. + + In the future new :class:`~stem.connection.AuthenticationFailure` + subclasses may be added to allow for better error handling. + """ + + if not protocolinfo_response: + try: + protocolinfo_response = get_protocolinfo(controller) + except stem.ProtocolError: + raise IncorrectSocketType("unable to use the control socket") + except stem.SocketError as exc: + raise AuthenticationFailure("socket connection failed (%s)" % exc) + + auth_methods = list(protocolinfo_response.auth_methods) + auth_exceptions = [] + + if len(auth_methods) == 0: + raise NoAuthMethods("our PROTOCOLINFO response did not have any methods for authenticating") + + # remove authentication methods that are either unknown or for which we don't + # have an input + if AuthMethod.UNKNOWN in auth_methods: + auth_methods.remove(AuthMethod.UNKNOWN) + + unknown_methods = protocolinfo_response.unknown_auth_methods + plural_label = "s" if len(unknown_methods) > 1 else "" + methods_label = ", ".join(unknown_methods) + + # we... er, can't do anything with only unrecognized auth types + if not auth_methods: + exc_msg = "unrecognized authentication method%s (%s)" % (plural_label, methods_label) + auth_exceptions.append(UnrecognizedAuthMethods(exc_msg, unknown_methods)) + else: + log.debug("Authenticating to a socket with unrecognized auth method%s, ignoring them: %s" % (plural_label, methods_label)) + + if protocolinfo_response.cookie_path is None: + for cookie_auth_method in (AuthMethod.COOKIE, AuthMethod.SAFECOOKIE): + if cookie_auth_method in auth_methods: + auth_methods.remove(cookie_auth_method) + + exc_msg = "our PROTOCOLINFO response did not have the location of our authentication cookie" + auth_exceptions.append(NoAuthCookie(exc_msg, cookie_auth_method == AuthMethod.SAFECOOKIE)) + + if AuthMethod.PASSWORD in auth_methods and password is None: + auth_methods.remove(AuthMethod.PASSWORD) + auth_exceptions.append(MissingPassword("no passphrase provided")) + + # iterating over AuthMethods so we can try them in this order + for auth_type in (AuthMethod.NONE, AuthMethod.PASSWORD, AuthMethod.SAFECOOKIE, AuthMethod.COOKIE): + if not auth_type in auth_methods: + continue + + try: + if auth_type == AuthMethod.NONE: + authenticate_none(controller, False) + elif auth_type == AuthMethod.PASSWORD: + authenticate_password(controller, password, False) + elif auth_type in (AuthMethod.COOKIE, AuthMethod.SAFECOOKIE): + cookie_path = protocolinfo_response.cookie_path + + if chroot_path: + cookie_path = os.path.join(chroot_path, cookie_path.lstrip(os.path.sep)) + + if auth_type == AuthMethod.SAFECOOKIE: + authenticate_safecookie(controller, cookie_path, False) + else: + authenticate_cookie(controller, cookie_path, False) + + return # success! + except OpenAuthRejected as exc: + auth_exceptions.append(exc) + except IncorrectPassword as exc: + auth_exceptions.append(exc) + except PasswordAuthRejected as exc: + # Since the PROTOCOLINFO says password auth is available we can assume + # that if PasswordAuthRejected is raised it's being raised in error. + log.debug("The authenticate_password method raised a PasswordAuthRejected when password auth should be available. Stem may need to be corrected to recognize this response: %s" % exc) + auth_exceptions.append(IncorrectPassword(str(exc))) + except AuthSecurityFailure as exc: + log.info("Tor failed to provide the nonce expected for safecookie authentication. (%s)" % exc) + auth_exceptions.append(exc) + except (InvalidClientNonce, UnrecognizedAuthChallengeMethod, AuthChallengeFailed) as exc: + auth_exceptions.append(exc) + except (IncorrectCookieSize, UnreadableCookieFile, IncorrectCookieValue) as exc: + auth_exceptions.append(exc) + except CookieAuthRejected as exc: + auth_func = "authenticate_safecookie" if exc.is_safecookie else "authenticate_cookie" + + log.debug("The %s method raised a CookieAuthRejected when cookie auth should be available. Stem may need to be corrected to recognize this response: %s" % (auth_func, exc)) + auth_exceptions.append(IncorrectCookieValue(str(exc), exc.cookie_path, exc.is_safecookie)) + except stem.ControllerError as exc: + auth_exceptions.append(AuthenticationFailure(str(exc))) + + # All authentication attempts failed. Raise the exception that takes priority + # according to our pydocs. + + for exc_type in AUTHENTICATE_EXCEPTIONS: + for auth_exc in auth_exceptions: + if isinstance(auth_exc, exc_type): + raise auth_exc + + # We really, really shouldn't get here. It means that auth_exceptions is + # either empty or contains something that isn't an AuthenticationFailure. + + raise AssertionError("BUG: Authentication failed without providing a recognized exception: %s" % str(auth_exceptions)) + + +def authenticate_none(controller, suppress_ctl_errors = True): + """ + Authenticates to an open control socket. All control connections need to + authenticate before they can be used, even if tor hasn't been configured to + use any authentication. + + If authentication fails tor will disconnect and we'll make a best effort + attempt to re-establish the connection. This may not succeed, so check + :func:`~stem.socket.ControlSocket.is_alive` before using the socket further. + + This can authenticate to either a :class:`~stem.control.BaseController` or + :class:`~stem.socket.ControlSocket`. + + For general usage use the :func:`~stem.connection.authenticate` function + instead. + + :param controller: tor controller or socket to be authenticated + :param bool suppress_ctl_errors: reports raised + :class:`~stem.ControllerError` as authentication rejection if + **True**, otherwise they're re-raised + + :raises: :class:`stem.connection.OpenAuthRejected` if the empty authentication credentials aren't accepted + """ + + try: + auth_response = _msg(controller, "AUTHENTICATE") + + # if we got anything but an OK response then error + if str(auth_response) != "OK": + try: + controller.connect() + except: + pass + + raise OpenAuthRejected(str(auth_response), auth_response) + except stem.ControllerError as exc: + try: + controller.connect() + except: + pass + + if not suppress_ctl_errors: + raise exc + else: + raise OpenAuthRejected("Socket failed (%s)" % exc) + + +def authenticate_password(controller, password, suppress_ctl_errors = True): + """ + Authenticates to a control socket that uses a password (via the + HashedControlPassword torrc option). Quotes in the password are escaped. + + If authentication fails tor will disconnect and we'll make a best effort + attempt to re-establish the connection. This may not succeed, so check + :func:`~stem.socket.ControlSocket.is_alive` before using the socket further. + + If you use this function directly, rather than + :func:`~stem.connection.authenticate`, we may mistakenly raise a + PasswordAuthRejected rather than IncorrectPassword. This is because we rely + on tor's error messaging which is liable to change in future versions + (:trac:`4817`). + + This can authenticate to either a :class:`~stem.control.BaseController` or + :class:`~stem.socket.ControlSocket`. + + For general usage use the :func:`~stem.connection.authenticate` function + instead. + + :param controller: tor controller or socket to be authenticated + :param str password: passphrase to present to the socket + :param bool suppress_ctl_errors: reports raised + :class:`~stem.ControllerError` as authentication rejection if + **True**, otherwise they're re-raised + + :raises: + * :class:`stem.connection.PasswordAuthRejected` if the socket doesn't + accept password authentication + * :class:`stem.connection.IncorrectPassword` if the authentication + credentials aren't accepted + """ + + # Escapes quotes. Tor can include those in the password hash, in which case + # it expects escaped quotes from the controller. For more information see... + # https://trac.torproject.org/projects/tor/ticket/4600 + + password = password.replace('"', '\\"') + + try: + auth_response = _msg(controller, "AUTHENTICATE \"%s\"" % password) + + # if we got anything but an OK response then error + if str(auth_response) != "OK": + try: + controller.connect() + except: + pass + + # all we have to go on is the error message from tor... + # Password did not match HashedControlPassword value value from configuration... + # Password did not match HashedControlPassword *or*... + + if "Password did not match HashedControlPassword" in str(auth_response): + raise IncorrectPassword(str(auth_response), auth_response) + else: + raise PasswordAuthRejected(str(auth_response), auth_response) + except stem.ControllerError as exc: + try: + controller.connect() + except: + pass + + if not suppress_ctl_errors: + raise exc + else: + raise PasswordAuthRejected("Socket failed (%s)" % exc) + + +def authenticate_cookie(controller, cookie_path, suppress_ctl_errors = True): + """ + Authenticates to a control socket that uses the contents of an authentication + cookie (generated via the CookieAuthentication torrc option). This does basic + validation that this is a cookie before presenting the contents to the + socket. + + The :class:`~stem.connection.IncorrectCookieSize` and + :class:`~stem.connection.UnreadableCookieFile` exceptions take precedence + over the other types. + + If authentication fails tor will disconnect and we'll make a best effort + attempt to re-establish the connection. This may not succeed, so check + :func:`~stem.socket.ControlSocket.is_alive` before using the socket further. + + If you use this function directly, rather than + :func:`~stem.connection.authenticate`, we may mistakenly raise a + :class:`~stem.connection.CookieAuthRejected` rather than + :class:`~stem.connection.IncorrectCookieValue`. This is because we rely on + tor's error messaging which is liable to change in future versions + (:trac:`4817`). + + This can authenticate to either a :class:`~stem.control.BaseController` or + :class:`~stem.socket.ControlSocket`. + + For general usage use the :func:`~stem.connection.authenticate` function + instead. + + :param controller: tor controller or socket to be authenticated + :param str cookie_path: path of the authentication cookie to send to tor + :param bool suppress_ctl_errors: reports raised + :class:`~stem.ControllerError` as authentication rejection if + **True**, otherwise they're re-raised + + :raises: + * :class:`stem.connection.IncorrectCookieSize` if the cookie file's size + is wrong + * :class:`stem.connection.UnreadableCookieFile` if the cookie file doesn't + exist or we're unable to read it + * :class:`stem.connection.CookieAuthRejected` if cookie authentication is + attempted but the socket doesn't accept it + * :class:`stem.connection.IncorrectCookieValue` if the cookie file's value + is rejected + """ + + cookie_data = _read_cookie(cookie_path, False) + + try: + # binascii.b2a_hex() takes a byte string and returns one too. With python 3 + # this is a problem because string formatting for byte strings includes the + # b'' wrapper... + # + # >>> "AUTHENTICATE %s" % b'content' + # "AUTHENTICATE b'content'" + # + # This seems dumb but oh well. Converting the result to unicode so it won't + # misbehave. + + auth_token_hex = binascii.b2a_hex(stem.util.str_tools._to_bytes(cookie_data)) + msg = "AUTHENTICATE %s" % stem.util.str_tools._to_unicode(auth_token_hex) + auth_response = _msg(controller, msg) + + # if we got anything but an OK response then error + if str(auth_response) != "OK": + try: + controller.connect() + except: + pass + + # all we have to go on is the error message from tor... + # ... Authentication cookie did not match expected value. + # ... *or* authentication cookie. + + if "*or* authentication cookie." in str(auth_response) or \ + "Authentication cookie did not match expected value." in str(auth_response): + raise IncorrectCookieValue(str(auth_response), cookie_path, False, auth_response) + else: + raise CookieAuthRejected(str(auth_response), cookie_path, False, auth_response) + except stem.ControllerError as exc: + try: + controller.connect() + except: + pass + + if not suppress_ctl_errors: + raise exc + else: + raise CookieAuthRejected("Socket failed (%s)" % exc, cookie_path, False) + + +def authenticate_safecookie(controller, cookie_path, suppress_ctl_errors = True): + """ + Authenticates to a control socket using the safe cookie method, which is + enabled by setting the CookieAuthentication torrc option on Tor client's which + support it. + + Authentication with this is a two-step process... + + 1. send a nonce to the server and receives a challenge from the server for + the cookie's contents + 2. generate a hash digest using the challenge received in the first step, and + use it to authenticate the controller + + The :class:`~stem.connection.IncorrectCookieSize` and + :class:`~stem.connection.UnreadableCookieFile` exceptions take precedence + over the other exception types. + + The :class:`~stem.connection.AuthChallengeUnsupported`, + :class:`~stem.connection.UnrecognizedAuthChallengeMethod`, + :class:`~stem.connection.InvalidClientNonce` and + :class:`~stem.connection.CookieAuthRejected` exceptions are next in the order + of precedence. Depending on the reason, one of these is raised if the first + (AUTHCHALLENGE) step fails. + + In the second (AUTHENTICATE) step, + :class:`~stem.connection.IncorrectCookieValue` or + :class:`~stem.connection.CookieAuthRejected` maybe raised. + + If authentication fails tor will disconnect and we'll make a best effort + attempt to re-establish the connection. This may not succeed, so check + :func:`~stem.socket.ControlSocket.is_alive` before using the socket further. + + For general usage use the :func:`~stem.connection.authenticate` function + instead. + + :param controller: tor controller or socket to be authenticated + :param str cookie_path: path of the authentication cookie to send to tor + :param bool suppress_ctl_errors: reports raised + :class:`~stem.ControllerError` as authentication rejection if + **True**, otherwise they're re-raised + + :raises: + * :class:`stem.connection.IncorrectCookieSize` if the cookie file's size + is wrong + * :class:`stem.connection.UnreadableCookieFile` if the cookie file doesn't + exist or we're unable to read it + * :class:`stem.connection.CookieAuthRejected` if cookie authentication is + attempted but the socket doesn't accept it + * :class:`stem.connection.IncorrectCookieValue` if the cookie file's value + is rejected + * :class:`stem.connection.UnrecognizedAuthChallengeMethod` if the Tor + client fails to recognize the AuthChallenge method + * :class:`stem.connection.AuthChallengeUnsupported` if AUTHCHALLENGE is + unimplemented, or if unable to parse AUTHCHALLENGE response + * :class:`stem.connection.AuthSecurityFailure` if AUTHCHALLENGE's response + looks like a security attack + * :class:`stem.connection.InvalidClientNonce` if stem's AUTHCHALLENGE + client nonce is rejected for being invalid + """ + + cookie_data = _read_cookie(cookie_path, True) + client_nonce = os.urandom(32) + + try: + client_nonce_hex = stem.util.str_tools._to_unicode(binascii.b2a_hex(client_nonce)) + authchallenge_response = _msg(controller, "AUTHCHALLENGE SAFECOOKIE %s" % client_nonce_hex) + + if not authchallenge_response.is_ok(): + try: + controller.connect() + except: + pass + + authchallenge_response_str = str(authchallenge_response) + + if "Authentication required." in authchallenge_response_str: + raise AuthChallengeUnsupported("SAFECOOKIE authentication isn't supported", cookie_path) + elif "AUTHCHALLENGE only supports" in authchallenge_response_str: + raise UnrecognizedAuthChallengeMethod(authchallenge_response_str, cookie_path) + elif "Invalid base16 client nonce" in authchallenge_response_str: + raise InvalidClientNonce(authchallenge_response_str, cookie_path) + elif "Cookie authentication is disabled" in authchallenge_response_str: + raise CookieAuthRejected(authchallenge_response_str, cookie_path, True) + else: + raise AuthChallengeFailed(authchallenge_response, cookie_path) + except stem.ControllerError as exc: + try: + controller.connect() + except: + pass + + if not suppress_ctl_errors: + raise exc + else: + raise AuthChallengeFailed("Socket failed (%s)" % exc, cookie_path, True) + + try: + stem.response.convert("AUTHCHALLENGE", authchallenge_response) + except stem.ProtocolError as exc: + if not suppress_ctl_errors: + raise exc + else: + raise AuthChallengeFailed("Unable to parse AUTHCHALLENGE response: %s" % exc, cookie_path) + + expected_server_hash = stem.util.connection._hmac_sha256( + SERVER_HASH_CONSTANT, + cookie_data + client_nonce + authchallenge_response.server_nonce) + + if not stem.util.connection._cryptovariables_equal(authchallenge_response.server_hash, expected_server_hash): + raise AuthSecurityFailure("Tor provided the wrong server nonce", cookie_path) + + try: + client_hash = stem.util.connection._hmac_sha256( + CLIENT_HASH_CONSTANT, + cookie_data + client_nonce + authchallenge_response.server_nonce) + + auth_response = _msg(controller, "AUTHENTICATE %s" % stem.util.str_tools._to_unicode(binascii.b2a_hex(client_hash))) + except stem.ControllerError as exc: + try: + controller.connect() + except: + pass + + if not suppress_ctl_errors: + raise exc + else: + raise CookieAuthRejected("Socket failed (%s)" % exc, cookie_path, True, auth_response) + + # if we got anything but an OK response then err + if not auth_response.is_ok(): + try: + controller.connect() + except: + pass + + # all we have to go on is the error message from tor... + # ... Safe cookie response did not match expected value + # ... *or* authentication cookie. + + if "*or* authentication cookie." in str(auth_response) or \ + "Safe cookie response did not match expected value" in str(auth_response): + raise IncorrectCookieValue(str(auth_response), cookie_path, True, auth_response) + else: + raise CookieAuthRejected(str(auth_response), cookie_path, True, auth_response) + + +def get_protocolinfo(controller): + """ + Issues a PROTOCOLINFO query to a control socket, getting information about + the tor process running on it. If the socket is already closed then it is + first reconnected. + + According to the control spec the cookie_file is an absolute path. However, + this often is not the case (especially for the Tor Browser Bundle). If the + path is relative then we'll make an attempt (which may not work) to correct + this (:trac:`1101`). + + This can authenticate to either a :class:`~stem.control.BaseController` or + :class:`~stem.socket.ControlSocket`. + + :param controller: tor controller or socket to be queried + + :returns: :class:`~stem.response.protocolinfo.ProtocolInfoResponse` provided by tor + + :raises: + * :class:`stem.ProtocolError` if the PROTOCOLINFO response is + malformed + * :class:`stem.SocketError` if problems arise in establishing or + using the socket + """ + + try: + protocolinfo_response = _msg(controller, "PROTOCOLINFO 1") + except: + protocolinfo_response = None + + # Tor hangs up on sockets after receiving a PROTOCOLINFO query if it isn't + # next followed by authentication. Transparently reconnect if that happens. + + if not protocolinfo_response or str(protocolinfo_response) == "Authentication required.": + controller.connect() + + try: + protocolinfo_response = _msg(controller, "PROTOCOLINFO 1") + except stem.SocketClosed as exc: + raise stem.SocketError(exc) + + stem.response.convert("PROTOCOLINFO", protocolinfo_response) + + # attempt to expand relative cookie paths + + if protocolinfo_response.cookie_path: + _expand_cookie_path(protocolinfo_response, stem.util.system.get_pid_by_name, "tor") + + # attempt to expand relative cookie paths via the control port or socket file + + if isinstance(controller, stem.socket.ControlSocket): + control_socket = controller + else: + control_socket = controller.get_socket() + + if isinstance(control_socket, stem.socket.ControlPort): + if control_socket.get_address() == "127.0.0.1": + pid_method = stem.util.system.get_pid_by_port + _expand_cookie_path(protocolinfo_response, pid_method, control_socket.get_port()) + elif isinstance(control_socket, stem.socket.ControlSocketFile): + pid_method = stem.util.system.get_pid_by_open_file + _expand_cookie_path(protocolinfo_response, pid_method, control_socket.get_socket_path()) + + return protocolinfo_response + + +def _msg(controller, message): + """ + Sends and receives a message with either a + :class:`~stem.socket.ControlSocket` or :class:`~stem.control.BaseController`. + """ + + if isinstance(controller, stem.socket.ControlSocket): + controller.send(message) + return controller.recv() + else: + return controller.msg(message) + + +def _read_cookie(cookie_path, is_safecookie): + """ + Provides the contents of a given cookie file. + + :param str cookie_path: absolute path of the cookie file + :param bool is_safecookie: **True** if this was for SAFECOOKIE + authentication, **False** if for COOKIE + + :raises: + * :class:`stem.connection.UnreadableCookieFile` if the cookie file is + unreadable + * :class:`stem.connection.IncorrectCookieSize` if the cookie size is + incorrect (not 32 bytes) + """ + + if not os.path.exists(cookie_path): + exc_msg = "Authentication failed: '%s' doesn't exist" % cookie_path + raise UnreadableCookieFile(exc_msg, cookie_path, is_safecookie) + + # Abort if the file isn't 32 bytes long. This is to avoid exposing arbitrary + # file content to the port. + # + # Without this a malicious socket could, for instance, claim that + # '~/.bash_history' or '~/.ssh/id_rsa' was its authentication cookie to trick + # us into reading it for them with our current permissions. + # + # https://trac.torproject.org/projects/tor/ticket/4303 + + auth_cookie_size = os.path.getsize(cookie_path) + + if auth_cookie_size != 32: + exc_msg = "Authentication failed: authentication cookie '%s' is the wrong size (%i bytes instead of 32)" % (cookie_path, auth_cookie_size) + raise IncorrectCookieSize(exc_msg, cookie_path, is_safecookie) + + try: + with open(cookie_path, 'rb', 0) as f: + return f.read() + except IOError as exc: + exc_msg = "Authentication failed: unable to read '%s' (%s)" % (cookie_path, exc) + raise UnreadableCookieFile(exc_msg, cookie_path, is_safecookie) + + +def _expand_cookie_path(protocolinfo_response, pid_resolver, pid_resolution_arg): + """ + Attempts to expand a relative cookie path with the given pid resolver. This + leaves the cookie_path alone if it's already absolute, **None**, or the + system calls fail. + """ + + cookie_path = protocolinfo_response.cookie_path + if cookie_path and not os.path.isabs(cookie_path): + try: + tor_pid = pid_resolver(pid_resolution_arg) + + if not tor_pid: + raise IOError("pid lookup failed") + + tor_cwd = stem.util.system.get_cwd(tor_pid) + + if not tor_cwd: + raise IOError("cwd lookup failed") + + cookie_path = stem.util.system.expand_path(cookie_path, tor_cwd) + except IOError as exc: + resolver_labels = { + stem.util.system.get_pid_by_name: " by name", + stem.util.system.get_pid_by_port: " by port", + stem.util.system.get_pid_by_open_file: " by socket file", + } + + pid_resolver_label = resolver_labels.get(pid_resolver, "") + log.debug("unable to expand relative tor cookie path%s: %s" % (pid_resolver_label, exc)) + + protocolinfo_response.cookie_path = cookie_path + + +class AuthenticationFailure(Exception): + """ + Base error for authentication failures. + + :var stem.socket.ControlMessage auth_response: AUTHENTICATE response from the + control socket, **None** if one wasn't received + """ + + def __init__(self, message, auth_response = None): + super(AuthenticationFailure, self).__init__(message) + self.auth_response = auth_response + + +class UnrecognizedAuthMethods(AuthenticationFailure): + """ + All methods for authenticating aren't recognized. + + :var list unknown_auth_methods: authentication methods that weren't recognized + """ + + def __init__(self, message, unknown_auth_methods): + super(UnrecognizedAuthMethods, self).__init__(message) + self.unknown_auth_methods = unknown_auth_methods + + +class IncorrectSocketType(AuthenticationFailure): + "Socket does not speak the control protocol." + + +class OpenAuthFailed(AuthenticationFailure): + "Failure to authenticate to an open socket." + + +class OpenAuthRejected(OpenAuthFailed): + "Attempt to connect to an open control socket was rejected." + + +class PasswordAuthFailed(AuthenticationFailure): + "Failure to authenticate with a password." + + +class PasswordAuthRejected(PasswordAuthFailed): + "Socket does not support password authentication." + + +class IncorrectPassword(PasswordAuthFailed): + "Authentication password incorrect." + + +class MissingPassword(PasswordAuthFailed): + "Password authentication is supported but we weren't provided with one." + + +class CookieAuthFailed(AuthenticationFailure): + """ + Failure to authenticate with an authentication cookie. + + :param str cookie_path: location of the authentication cookie we attempted + :param bool is_safecookie: **True** if this was for SAFECOOKIE + authentication, **False** if for COOKIE + :param stem.response.ControlMessage auth_response: reply to our + authentication attempt + """ + + def __init__(self, message, cookie_path, is_safecookie, auth_response = None): + super(CookieAuthFailed, self).__init__(message, auth_response) + self.is_safecookie = is_safecookie + self.cookie_path = cookie_path + + +class CookieAuthRejected(CookieAuthFailed): + "Socket does not support password authentication." + + +class IncorrectCookieValue(CookieAuthFailed): + "Authentication cookie value was rejected." + + +class IncorrectCookieSize(CookieAuthFailed): + "Aborted because the cookie file is the wrong size." + + +class UnreadableCookieFile(CookieAuthFailed): + "Error arose in reading the authentication cookie." + + +class AuthChallengeFailed(CookieAuthFailed): + """ + AUTHCHALLENGE command has failed. + """ + + def __init__(self, message, cookie_path): + super(AuthChallengeFailed, self).__init__(message, cookie_path, True) + + +class AuthChallengeUnsupported(AuthChallengeFailed): + """ + AUTHCHALLENGE isn't implemented. + """ + + +class UnrecognizedAuthChallengeMethod(AuthChallengeFailed): + """ + Tor couldn't recognize our AUTHCHALLENGE method. + + :var str authchallenge_method: AUTHCHALLENGE method that Tor couldn't recognize + """ + + def __init__(self, message, cookie_path, authchallenge_method): + super(UnrecognizedAuthChallengeMethod, self).__init__(message, cookie_path) + self.authchallenge_method = authchallenge_method + + +class AuthSecurityFailure(AuthChallengeFailed): + "AUTHCHALLENGE response is invalid." + + +class InvalidClientNonce(AuthChallengeFailed): + "AUTHCHALLENGE request contains an invalid client nonce." + + +class MissingAuthInfo(AuthenticationFailure): + """ + The PROTOCOLINFO response didn't have enough information to authenticate. + These are valid control responses but really shouldn't happen in practice. + """ + + +class NoAuthMethods(MissingAuthInfo): + "PROTOCOLINFO response didn't have any methods for authenticating." + + +class NoAuthCookie(MissingAuthInfo): + """ + PROTOCOLINFO response supports cookie auth but doesn't have its path. + + :param bool is_safecookie: **True** if this was for SAFECOOKIE + authentication, **False** if for COOKIE + """ + + def __init__(self, message, is_safecookie): + super(NoAuthCookie, self).__init__(message) + self.is_safecookie = is_safecookie + +# authentication exceptions ordered as per the authenticate function's pydocs +AUTHENTICATE_EXCEPTIONS = ( + IncorrectSocketType, + UnrecognizedAuthMethods, + MissingPassword, + IncorrectPassword, + IncorrectCookieSize, + UnreadableCookieFile, + IncorrectCookieValue, + AuthChallengeUnsupported, + UnrecognizedAuthChallengeMethod, + InvalidClientNonce, + AuthSecurityFailure, + OpenAuthRejected, + MissingAuthInfo, + AuthenticationFailure +) diff --git a/lib/stem/control.py b/lib/stem/control.py new file mode 100644 index 00000000..16f8892c --- /dev/null +++ b/lib/stem/control.py @@ -0,0 +1,2498 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Classes for interacting with the tor control socket. + +Controllers are a wrapper around a :class:`~stem.socket.ControlSocket`, +retaining many of its methods (connect, close, is_alive, etc) in addition to +providing its own for interacting at a higher level. + +**Module Overview:** + +:: + + Controller - General controller class intended for direct use + | |- from_port - Provides a Controller based on a port connection. + | +- from_socket_file - Provides a Controller based on a socket file connection. + | + |- authenticate - authenticates this controller with tor + | + |- get_info - issues a GETINFO query for a parameter + |- get_version - provides our tor version + |- get_exit_policy - provides our exit policy + |- get_socks_listeners - provides where tor is listening for SOCKS connections + |- get_protocolinfo - information about the controller interface + |- get_user - provides the user tor is running as + |- get_pid - provides the pid of our tor process + | + |- get_microdescriptor - querying the microdescriptor for a relay + |- get_microdescriptors - provides all presently available microdescriptors + |- get_server_descriptor - querying the server descriptor for a relay + |- get_server_descriptors - provides all presently available server descriptors + |- get_network_status - querying the router status entry for a relay + |- get_network_statuses - provides all preently available router status entries + | + |- get_conf - gets the value of a configuration option + |- get_conf_map - gets the values of multiple configuration options + |- set_conf - sets the value of a configuration option + |- reset_conf - reverts configuration options to their default values + |- set_options - sets or resets the values of multiple configuration options + | + |- add_event_listener - attaches an event listener to be notified of tor events + |- remove_event_listener - removes a listener so it isn't notified of further events + | + |- is_caching_enabled - true if the controller has enabled caching + |- set_caching - enables or disables caching + |- clear_cache - clears any cached results + | + |- load_conf - loads configuration information as if it was in the torrc + |- save_conf - saves configuration information to the torrc + | + |- is_feature_enabled - checks if a given controller feature is enabled + |- enable_feature - enables a controller feature that has been disabled by default + | + |- get_circuit - provides an active circuit + |- get_circuits - provides a list of active circuits + |- new_circuit - create new circuits + |- extend_circuit - create new circuits and extend existing ones + |- repurpose_circuit - change a circuit's purpose + |- close_circuit - close a circuit + | + |- get_streams - provides a list of active streams + |- attach_stream - attach a stream to a circuit + |- close_stream - close a stream + | + |- signal - sends a signal to the tor client + |- is_geoip_unavailable - true if we've discovered our geoip db to be unavailable + +- map_address - maps one address to another such that connections to the original are replaced with the other + + BaseController - Base controller class asynchronous message handling + |- msg - communicates with the tor process + |- is_alive - reports if our connection to tor is open or closed + |- is_authenticated - checks if we're authenticated to tor + |- connect - connects or reconnects to tor + |- close - shuts down our connection to the tor process + |- get_socket - provides the socket used for control communication + |- get_latest_heartbeat - timestamp for when we last heard from tor + |- add_status_listener - notifies a callback of changes in our status + |- remove_status_listener - prevents further notification of status changes + +- __enter__ / __exit__ - manages socket connection + +.. data:: State (enum) + + Enumeration for states that a controller can have. + + ========== =========== + State Description + ========== =========== + **INIT** new control connection + **RESET** received a reset/sighup signal + **CLOSED** control connection closed + ========== =========== + +.. data:: EventType (enum) + + Known types of events that the + :func:`~stem.control.Controller.add_event_listener` method of the + :class:`~stem.control.Controller` can listen for. + + The most frequently listened for event types tend to be the logging events + (**DEBUG**, **INFO**, **NOTICE**, **WARN**, and **ERR**), bandwidth usage + (**BW**), and circuit or stream changes (**CIRC** and **STREAM**). + + Enums are mapped to :class:`~stem.response.events.Event` subclasses as + follows... + + ===================== =========== + EventType Event Class + ===================== =========== + **ADDRMAP** :class:`stem.response.events.AddrMapEvent` + **AUTHDIR_NEWDESCS** :class:`stem.response.events.AuthDirNewDescEvent` + **BUILDTIMEOUT_SET** :class:`stem.response.events.BuildTimeoutSetEvent` + **BW** :class:`stem.response.events.BandwidthEvent` + **CIRC** :class:`stem.response.events.CircuitEvent` + **CIRC_MINOR** :class:`stem.response.events.CircMinorEvent` + **CLIENTS_SEEN** :class:`stem.response.events.ClientsSeenEvent` + **CONF_CHANGED** :class:`stem.response.events.ConfChangedEvent` + **DEBUG** :class:`stem.response.events.LogEvent` + **DESCCHANGED** :class:`stem.response.events.DescChangedEvent` + **ERR** :class:`stem.response.events.LogEvent` + **GUARD** :class:`stem.response.events.GuardEvent` + **INFO** :class:`stem.response.events.LogEvent` + **NEWCONSENSUS** :class:`stem.response.events.NewConsensusEvent` + **NEWDESC** :class:`stem.response.events.NewDescEvent` + **NOTICE** :class:`stem.response.events.LogEvent` + **NS** :class:`stem.response.events.NetworkStatusEvent` + **ORCONN** :class:`stem.response.events.ORConnEvent` + **SIGNAL** :class:`stem.response.events.SignalEvent` + **STATUS_CLIENT** :class:`stem.response.events.StatusEvent` + **STATUS_GENERAL** :class:`stem.response.events.StatusEvent` + **STATUS_SERVER** :class:`stem.response.events.StatusEvent` + **STREAM** :class:`stem.response.events.StreamEvent` + **STREAM_BW** :class:`stem.response.events.StreamBwEvent` + **WARN** :class:`stem.response.events.LogEvent` + ===================== =========== +""" + +import io +import os +import Queue +import StringIO +import threading +import time + +import stem.descriptor.microdescriptor +import stem.descriptor.reader +import stem.descriptor.router_status_entry +import stem.descriptor.server_descriptor +import stem.exit_policy +import stem.response +import stem.response.events +import stem.socket +import stem.util.connection +import stem.util.enum +import stem.util.str_tools +import stem.util.system +import stem.util.tor_tools +import stem.version + +from stem import UNDEFINED, CircStatus, Signal +from stem.util import log + +# state changes a control socket can have + +State = stem.util.enum.Enum("INIT", "RESET", "CLOSED") + +EventType = stem.util.enum.UppercaseEnum( + "CIRC", + "STREAM", + "ORCONN", + "BW", + "DEBUG", + "INFO", + "NOTICE", + "WARN", + "ERR", + "NEWDESC", + "ADDRMAP", + "AUTHDIR_NEWDESCS", + "DESCCHANGED", + "STATUS_GENERAL", + "STATUS_CLIENT", + "STATUS_SERVER", + "GUARD", + "NS", + "STREAM_BW", + "CLIENTS_SEEN", + "NEWCONSENSUS", + "BUILDTIMEOUT_SET", + "SIGNAL", + "CONF_CHANGED", + "CIRC_MINOR", +) + +# Configuration options that are fetched by a special key. The keys are +# lowercase to make case insensitive lookups easier. + +MAPPED_CONFIG_KEYS = { + "hiddenservicedir": "HiddenServiceOptions", + "hiddenserviceport": "HiddenServiceOptions", + "hiddenserviceversion": "HiddenServiceOptions", + "hiddenserviceauthorizeclient": "HiddenServiceOptions", + "hiddenserviceoptions": "HiddenServiceOptions" +} + +# unchangeable GETINFO parameters + +CACHEABLE_GETINFO_PARAMS = ( + 'version', + 'config-file', + 'exit-policy/default', + 'fingerprint', + 'config/names', + 'config/defaults', + 'info/names', + 'events/names', + 'features/names', + 'process/descriptor-limit', +) + +# GETCONF parameters we shouldn't cache. This includes hidden service +# perameters due to the funky way they're set and retrieved (for instance, +# 'SETCONF HiddenServiceDir' effects 'GETCONF HiddenServiceOptions'). + +UNCACHEABLE_GETCONF_PARAMS = ( + 'hiddenserviceoptions', + 'hiddenservicedir', + 'hiddenserviceport', + 'hiddenserviceversion', + 'hiddenserviceauthorizeclient', + 'hiddenserviceoptions', +) + +# number of sequential attempts before we decide that the Tor geoip database +# is unavailable +GEOIP_FAILURE_THRESHOLD = 5 + +SERVER_DESCRIPTORS_UNSUPPORTED = "Tor is presently not configured to retrieve \ +server descriptors. As of Tor version 0.2.3.25 it downloads microdescriptors \ +instead unless you set 'UseMicrodescriptors 0' in your torrc." + + +class BaseController(object): + """ + Controller for the tor process. This is a minimal base class for other + controllers, providing basic process communication and event listing. Don't + use this directly - subclasses like the :class:`~stem.control.Controller` + provide higher level functionality. + + It's highly suggested that you don't interact directly with the + :class:`~stem.socket.ControlSocket` that we're constructed from - use our + wrapper methods instead. + """ + + def __init__(self, control_socket): + self._socket = control_socket + self._msg_lock = threading.RLock() + + self._status_listeners = [] # tuples of the form (callback, spawn_thread) + self._status_listeners_lock = threading.RLock() + + # queues where incoming messages are directed + self._reply_queue = Queue.Queue() + self._event_queue = Queue.Queue() + + # thread to continually pull from the control socket + self._reader_thread = None + + # thread to pull from the _event_queue and call handle_event + self._event_notice = threading.Event() + self._event_thread = None + + # saves our socket's prior _connect() and _close() methods so they can be + # called along with ours + + self._socket_connect = self._socket._connect + self._socket_close = self._socket._close + + self._socket._connect = self._connect + self._socket._close = self._close + + self._last_heartbeat = 0.0 # timestamp for when we last heard from tor + self._is_authenticated = False + + if self._socket.is_alive(): + self._launch_threads() + + def msg(self, message): + """ + Sends a message to our control socket and provides back its reply. + + :param str message: message to be formatted and sent to tor + + :returns: :class:`~stem.response.ControlMessage` with the response + + :raises: + * :class:`stem.ProtocolError` the content from the socket is + malformed + * :class:`stem.SocketError` if a problem arises in using the + socket + * :class:`stem.SocketClosed` if the socket is shut down + """ + + with self._msg_lock: + # If our _reply_queue isn't empty then one of a few things happened... + # + # - Our connection was closed and probably re-restablished. This was + # in reply to pulling for an asynchronous event and getting this is + # expected - ignore it. + # + # - Pulling for asynchronous events produced an error. If this was a + # ProtocolError then it's a tor bug, and if a non-closure SocketError + # then it was probably a socket glitch. Deserves an INFO level log + # message. + # + # - This is a leftover response for a msg() call. We can't tell who an + # exception was earmarked for, so we only know that this was the case + # if it's a ControlMessage. This should not be possible and indicates + # a stem bug. This deserves a NOTICE level log message since it + # indicates that one of our callers didn't get their reply. + + while not self._reply_queue.empty(): + try: + response = self._reply_queue.get_nowait() + + if isinstance(response, stem.SocketClosed): + pass # this is fine + elif isinstance(response, stem.ProtocolError): + log.info("Tor provided a malformed message (%s)" % response) + elif isinstance(response, stem.ControllerError): + log.info("Socket experienced a problem (%s)" % response) + elif isinstance(response, stem.response.ControlMessage): + log.notice("BUG: the msg() function failed to deliver a response: %s" % response) + except Queue.Empty: + # the empty() method is documented to not be fully reliable so this + # isn't entirely surprising + + break + + try: + self._socket.send(message) + response = self._reply_queue.get() + + # If the message we received back had an exception then re-raise it to the + # caller. Otherwise return the response. + + if isinstance(response, stem.ControllerError): + raise response + else: + # I really, really don't like putting hooks into this method, but + # this is the most reliable method I can think of for taking actions + # immediately after successfully authenticating to a connection. + + if message.upper().startswith("AUTHENTICATE"): + self._post_authentication() + + return response + except stem.SocketClosed as exc: + # If the recv() thread caused the SocketClosed then we could still be + # in the process of closing. Calling close() here so that we can + # provide an assurance to the caller that when we raise a SocketClosed + # exception we are shut down afterward for realz. + + self.close() + raise exc + + def is_alive(self): + """ + Checks if our socket is currently connected. This is a pass-through for our + socket's :func:`~stem.socket.ControlSocket.is_alive` method. + + :returns: **bool** that's **True** if our socket is connected and **False** otherwise + """ + + return self._socket.is_alive() + + def is_authenticated(self): + """ + Checks if our socket is both connected and authenticated. + + :returns: **bool** that's **True** if our socket is authenticated to tor + and **False** otherwise + """ + + if self.is_alive(): + return self._is_authenticated + + return False + + def connect(self): + """ + Reconnects our control socket. This is a pass-through for our socket's + :func:`~stem.socket.ControlSocket.connect` method. + + :raises: :class:`stem.SocketError` if unable to make a socket + """ + + self._socket.connect() + + def close(self): + """ + Closes our socket connection. This is a pass-through for our socket's + :func:`~stem.socket.ControlSocket.close` method. + """ + + self._socket.close() + + def get_socket(self): + """ + Provides the socket used to speak with the tor process. Communicating with + the socket directly isn't advised since it may confuse this controller. + + :returns: :class:`~stem.socket.ControlSocket` we're communicating with + """ + + return self._socket + + def get_latest_heartbeat(self): + """ + Provides the unix timestamp for when we last heard from tor. This is zero + if we've never received a message. + + :returns: float for the unix timestamp of when we last heard from tor + """ + + return self._last_heartbeat + + def add_status_listener(self, callback, spawn = True): + """ + Notifies a given function when the state of our socket changes. Functions + are expected to be of the form... + + :: + + my_function(controller, state, timestamp) + + The state is a value from the :data:`stem.control.State` enum. Functions + **must** allow for new values. The timestamp is a float for the unix time + when the change occurred. + + This class only provides **State.INIT** and **State.CLOSED** notifications. + Subclasses may provide others. + + If spawn is **True** then the callback is notified via a new daemon thread. + If **False** then the notice is under our locks, within the thread where + the change occurred. In general this isn't advised, especially if your + callback could block for a while. + + :param function callback: function to be notified when our state changes + :param bool spawn: calls function via a new thread if **True**, otherwise + it's part of the connect/close method call + """ + + with self._status_listeners_lock: + self._status_listeners.append((callback, spawn)) + + def remove_status_listener(self, callback): + """ + Stops listener from being notified of further events. + + :param function callback: function to be removed from our listeners + + :returns: **bool** that's **True** if we removed one or more occurrences of + the callback, **False** otherwise + """ + + with self._status_listeners_lock: + new_listeners, is_changed = [], False + + for listener, spawn in self._status_listeners: + if listener != callback: + new_listeners.append((listener, spawn)) + else: + is_changed = True + + self._status_listeners = new_listeners + return is_changed + + def __enter__(self): + return self + + def __exit__(self, exit_type, value, traceback): + self.close() + + def _handle_event(self, event_message): + """ + Callback to be overwritten by subclasses for event listening. This is + notified whenever we receive an event from the control socket. + + :param stem.response.ControlMessage event_message: message received from + the control socket + """ + + pass + + def _connect(self): + self._launch_threads() + self._notify_status_listeners(State.INIT) + self._socket_connect() + self._is_authenticated = False + + def _close(self): + # Our is_alive() state is now false. Our reader thread should already be + # awake from recv() raising a closure exception. Wake up the event thread + # too so it can end. + + self._event_notice.set() + self._is_authenticated = False + + # joins on our threads if it's safe to do so + + for t in (self._reader_thread, self._event_thread): + if t and t.is_alive() and threading.current_thread() != t: + t.join() + + self._notify_status_listeners(State.CLOSED) + self._socket_close() + + def _post_authentication(self): + # actions to be taken after we have a newly authenticated connection + + self._is_authenticated = True + + def _notify_status_listeners(self, state): + """ + Informs our status listeners that a state change occurred. + + :param stem.control.State state: state change that has occurred + """ + + # Any changes to our is_alive() state happen under the send lock, so we + # need to have it to ensure it doesn't change beneath us. + + with self._socket._get_send_lock(): + with self._status_listeners_lock: + # States imply that our socket is either alive or not, which may not + # hold true when multiple events occur in quick succession. For + # instance, a sighup could cause two events (State.RESET for the sighup + # and State.CLOSE if it causes tor to crash). However, there's no + # guarantee of the order in which they occur, and it would be bad if + # listeners got the State.RESET last, implying that we were alive. + + expect_alive = None + + if state in (State.INIT, State.RESET): + expect_alive = True + elif state == State.CLOSED: + expect_alive = False + + change_timestamp = time.time() + + if expect_alive is not None and expect_alive != self.is_alive(): + return + + for listener, spawn in self._status_listeners: + if spawn: + name = "%s notification" % state + args = (self, state, change_timestamp) + + notice_thread = threading.Thread(target = listener, args = args, name = name) + notice_thread.setDaemon(True) + notice_thread.start() + else: + listener(self, state, change_timestamp) + + def _launch_threads(self): + """ + Initializes daemon threads. Threads can't be reused so we need to recreate + them if we're restarted. + """ + + # In theory concurrent calls could result in multiple start() calls on a + # single thread, which would cause an unexpected exception. Best be safe. + + with self._socket._get_send_lock(): + if not self._reader_thread or not self._reader_thread.is_alive(): + self._reader_thread = threading.Thread(target = self._reader_loop, name = "Tor Listener") + self._reader_thread.setDaemon(True) + self._reader_thread.start() + + if not self._event_thread or not self._event_thread.is_alive(): + self._event_thread = threading.Thread(target = self._event_loop, name = "Event Notifier") + self._event_thread.setDaemon(True) + self._event_thread.start() + + def _reader_loop(self): + """ + Continually pulls from the control socket, directing the messages into + queues based on their type. Controller messages come in two varieties... + + * Responses to messages we've sent (GETINFO, SETCONF, etc). + * Asynchronous events, identified by a status code of 650. + """ + + while self.is_alive(): + try: + control_message = self._socket.recv() + self._last_heartbeat = time.time() + + if control_message.content()[-1][0] == "650": + # asynchronous message, adds to the event queue and wakes up its handler + self._event_queue.put(control_message) + self._event_notice.set() + else: + # response to a msg() call + self._reply_queue.put(control_message) + except stem.ControllerError as exc: + # Assume that all exceptions belong to the reader. This isn't always + # true, but the msg() call can do a better job of sorting it out. + # + # Be aware that the msg() method relies on this to unblock callers. + + self._reply_queue.put(exc) + + def _event_loop(self): + """ + Continually pulls messages from the _event_queue and sends them to our + handle_event callback. This is done via its own thread so subclasses with a + lengthy handle_event implementation don't block further reading from the + socket. + """ + + while True: + try: + event_message = self._event_queue.get_nowait() + self._handle_event(event_message) + except Queue.Empty: + if not self.is_alive(): + break + + self._event_notice.wait() + self._event_notice.clear() + + +class Controller(BaseController): + """ + Communicates with a control socket. This is built on top of the + BaseController and provides a more user friendly API for library users. + """ + + @staticmethod + def from_port(address = "127.0.0.1", port = 9051): + """ + Constructs a :class:`~stem.socket.ControlPort` based Controller. + + :param str address: ip address of the controller + :param int port: port number of the controller + + :returns: :class:`~stem.control.Controller` attached to the given port + + :raises: :class:`stem.SocketError` if we're unable to establish a connection + """ + + if not stem.util.connection.is_valid_ipv4_address(address): + raise ValueError("Invalid IP address: %s" % address) + elif not stem.util.connection.is_valid_port(port): + raise ValueError("Invalid port: %s" % port) + + control_port = stem.socket.ControlPort(address, port) + return Controller(control_port) + + @staticmethod + def from_socket_file(path = "/var/run/tor/control"): + """ + Constructs a :class:`~stem.socket.ControlSocketFile` based Controller. + + :param str path: path where the control socket is located + + :returns: :class:`~stem.control.Controller` attached to the given socket file + + :raises: :class:`stem.SocketError` if we're unable to establish a connection + """ + + control_socket = stem.socket.ControlSocketFile(path) + return Controller(control_socket) + + def __init__(self, control_socket): + super(Controller, self).__init__(control_socket) + + self._is_caching_enabled = True + self._request_cache = {} + + self._cache_lock = threading.RLock() + + # mapping of event types to their listeners + + self._event_listeners = {} + self._event_listeners_lock = threading.RLock() + + # number of sequential 'GETINFO ip-to-country/*' lookups that have failed + self._geoip_failure_count = 0 + self._enabled_features = [] + + def _sighup_listener(event): + if event.signal == Signal.RELOAD: + self.clear_cache() + self._notify_status_listeners(State.RESET) + + self.add_event_listener(_sighup_listener, EventType.SIGNAL) + + def _confchanged_listener(event): + if self.is_caching_enabled(): + self._set_cache(dict((k, None) for k in event.config), "getconf") + + if "exitpolicy" in event.config.keys(): + self._set_cache({"exitpolicy": None}) + + self.add_event_listener(_confchanged_listener, EventType.CONF_CHANGED) + + def connect(self): + super(Controller, self).connect() + self.clear_cache() + + def close(self): + # making a best-effort attempt to quit before detaching the socket + if self.is_alive(): + try: + self.msg("QUIT") + except: + pass + + self.clear_cache() + + super(Controller, self).close() + + def authenticate(self, *args, **kwargs): + """ + A convenience method to authenticate the controller. This is just a + pass-through to :func:`stem.connection.authenticate`. + """ + + import stem.connection + stem.connection.authenticate(self, *args, **kwargs) + + def get_info(self, params, default = UNDEFINED, get_bytes = False): + """ + Queries the control socket for the given GETINFO option. If provided a + default then that's returned if the GETINFO option is undefined or the + call fails for any reason (error response, control port closed, initiated, + etc). + + :param str,list params: GETINFO option or options to be queried + :param object default: response if the query fails + :param bool get_bytes: provides **bytes** values rather than a **str** under python 3.x + + :returns: + Response depends upon how we were called as follows... + + * **str** with the response if our param was a **str** + * **dict** with the 'param => response' mapping if our param was a **list** + * default if one was provided and our call failed + + :raises: + * :class:`stem.ControllerError` if the call fails and we weren't + provided a default response + * :class:`stem.InvalidArguments` if the 'params' requested was + invalid + * :class:`stem.ProtocolError` if the geoip database is known to be + unavailable + """ + + start_time = time.time() + reply = {} + + if isinstance(params, (bytes, unicode)): + is_multiple = False + params = set([params]) + else: + if not params: + return {} + + is_multiple = True + params = set(params) + + # check for cached results + + from_cache = [param.lower() for param in params] + cached_results = self._get_cache_map(from_cache, "getinfo") + + for key in cached_results: + user_expected_key = _case_insensitive_lookup(params, key) + reply[user_expected_key] = cached_results[key] + params.remove(user_expected_key) + + for param in params: + if param.startswith('ip-to-country/') and self.is_geoip_unavailable(): + # the geoip database already looks to be unavailable - abort the request + if default == UNDEFINED: + raise stem.ProtocolError("Tor geoip database is unavailable") + else: + return default + + # if everything was cached then short circuit making the query + if not params: + log.trace("GETINFO %s (cache fetch)" % " ".join(reply.keys())) + + if is_multiple: + return reply + else: + return reply.values()[0] + + try: + response = self.msg("GETINFO %s" % " ".join(params)) + stem.response.convert("GETINFO", response) + response._assert_matches(params) + + # usually we want unicode values under python 3.x + + if stem.prereq.is_python_3() and not get_bytes: + response.entries = dict((k, stem.util.str_tools._to_unicode(v)) for (k, v) in response.entries.items()) + + reply.update(response.entries) + + if self.is_caching_enabled(): + to_cache = {} + + for key, value in response.entries.items(): + key = key.lower() # make case insensitive + + if key in CACHEABLE_GETINFO_PARAMS: + to_cache[key] = value + elif key.startswith('ip-to-country/'): + # both cache-able and means that we should reset the geoip failure count + to_cache[key] = value + self._geoip_failure_count = -1 + + self._set_cache(to_cache, "getinfo") + + log.debug("GETINFO %s (runtime: %0.4f)" % (" ".join(params), time.time() - start_time)) + + if is_multiple: + return reply + else: + return reply.values()[0] + except stem.ControllerError as exc: + # bump geoip failure count if... + # * we're caching results + # * this was soley a geoip lookup + # * we've never had a successful geoip lookup (failure count isn't -1) + + is_geoip_request = len(params) == 1 and list(params)[0].startswith('ip-to-country/') + + if is_geoip_request and self.is_caching_enabled() and self._geoip_failure_count != -1: + self._geoip_failure_count += 1 + + if self.is_geoip_unavailable(): + log.warn("Tor's geoip database is unavailable.") + + log.debug("GETINFO %s (failed: %s)" % (" ".join(params), exc)) + + if default == UNDEFINED: + raise exc + else: + return default + + def get_version(self, default = UNDEFINED): + """ + A convenience method to get tor version that current controller is + connected to. + + :param object default: response if the query fails + + :returns: :class:`~stem.version.Version` of the tor instance that we're + connected to + + :raises: + * :class:`stem.ControllerError` if unable to query the version + * **ValueError** if unable to parse the version + + An exception is only raised if we weren't provided a default response. + """ + + try: + version = self._get_cache("version") + + if not version: + version = stem.version.Version(self.get_info("version")) + self._set_cache({"version": version}) + + return version + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_exit_policy(self, default = UNDEFINED): + """ + Effective ExitPolicy for our relay. This accounts for + ExitPolicyRejectPrivate and default policies. + + :param object default: response if the query fails + + :returns: :class:`~stem.exit_policy.ExitPolicy` of the tor instance that + we're connected to + + :raises: + * :class:`stem.ControllerError` if unable to query the policy + * **ValueError** if unable to parse the policy + + An exception is only raised if we weren't provided a default response. + """ + with self._msg_lock: + try: + config_policy = self._get_cache("exit_policy") + + if not config_policy: + policy = [] + + if self.get_conf("ExitPolicyRejectPrivate") == "1": + policy.append("reject private:*") + + public_addr = self.get_info("address", None) + + if public_addr: + policy.append("reject %s:*" % public_addr) + + for policy_line in self.get_conf("ExitPolicy", multiple = True): + policy += policy_line.split(",") + + policy += self.get_info("exit-policy/default").split(",") + + config_policy = stem.exit_policy.get_config_policy(policy) + self._set_cache({"exit_policy": config_policy}) + + return config_policy + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_socks_listeners(self, default = UNDEFINED): + """ + Provides the SOCKS **(address, port)** tuples that tor has open. + + :param object default: response if the query fails + + :returns: list of **(address, port)** tuples for the available SOCKS + listeners + + :raises: :class:`stem.ControllerError` if unable to determine the listeners + and no default was provided + """ + + try: + proxy_addrs = [] + + try: + for listener in self.get_info("net/listeners/socks").split(): + if not (listener.startswith('"') and listener.endswith('"')): + raise stem.ProtocolError("'GETINFO net/listeners/socks' responses are expected to be quoted: %s" % listener) + elif not ':' in listener: + raise stem.ProtocolError("'GETINFO net/listeners/socks' had a listener without a colon: %s" % listener) + + listener = listener[1:-1] # strip quotes + addr, port = listener.split(':') + proxy_addrs.append((addr, port)) + except stem.InvalidArguments: + # tor version is old (pre-tor-0.2.2.26-beta), use get_conf() instead + socks_port = self.get_conf('SocksPort') + + for listener in self.get_conf('SocksListenAddress', multiple = True): + if ':' in listener: + addr, port = listener.split(':') + proxy_addrs.append((addr, port)) + else: + proxy_addrs.append((listener, socks_port)) + + # validate that address/ports are valid, and convert ports to ints + + for addr, port in proxy_addrs: + if not stem.util.connection.is_valid_ipv4_address(addr): + raise stem.ProtocolError("Invalid address for a SOCKS listener: %s" % addr) + elif not stem.util.connection.is_valid_port(port): + raise stem.ProtocolError("Invalid port for a SOCKS listener: %s" % port) + + return [(addr, int(port)) for (addr, port) in proxy_addrs] + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_protocolinfo(self, default = UNDEFINED): + """ + A convenience method to get the protocol info of the controller. + + :param object default: response if the query fails + + :returns: :class:`~stem.response.protocolinfo.ProtocolInfoResponse` provided by tor + + :raises: + * :class:`stem.ProtocolError` if the PROTOCOLINFO response is + malformed + * :class:`stem.SocketError` if problems arise in establishing or + using the socket + + An exception is only raised if we weren't provided a default response. + """ + + import stem.connection + + try: + return stem.connection.get_protocolinfo(self) + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_user(self, default = UNDEFINED): + """ + Provides the user tor is running as. This often only works if tor is + running locally. Also, most of its checks are platform dependent, and hence + are not entirely reliable. + + :param object default: response if the query fails + + :returns: str with the username tor is running as + """ + + user = self._get_cache("user") + + if not user: + user = self.get_info("process/user", None) + + if not user and self.get_socket().is_localhost(): + pid = self.get_pid(None) + + if pid: + user = stem.util.system.get_user(pid) + + if user: + self._set_cache({"user": user}) + return user + elif default == UNDEFINED: + if self.get_socket().is_localhost(): + raise ValueError("Unable to resolve tor's user") + else: + raise ValueError("Tor isn't running locally") + else: + return default + + def get_pid(self, default = UNDEFINED): + """ + Provides the process id of tor. This often only works if tor is running + locally. Also, most of its checks are platform dependent, and hence are not + entirely reliable. + + :param object default: response if the query fails + + :returns: int with our process' pid + + :raises: **ValueError** if unable to determine the pid and no default was + provided + """ + + pid = self._get_cache("pid") + + if not pid: + getinfo_pid = self.get_info("process/pid", None) + + if getinfo_pid and getinfo_pid.isdigit(): + pid = int(getinfo_pid) + + if not pid and self.get_socket().is_localhost(): + pid_file_path = self.get_conf("PidFile", None) + + if pid_file_path is not None: + with open(pid_file_path) as pid_file: + pid_file_contents = pid_file.read().strip() + + if pid_file_contents.isdigit(): + pid = int(pid_file_contents) + + if not pid: + pid = stem.util.system.get_pid_by_name('tor') + + if not pid: + control_socket = self.get_socket() + + if isinstance(control_socket, stem.socket.ControlPort): + pid = stem.util.system.get_pid_by_port(control_socket.get_port()) + elif isinstance(control_socket, stem.socket.ControlSocketFile): + pid = stem.util.system.get_pid_by_open_file(control_socket.get_socket_path()) + + if pid: + self._set_cache({"pid": pid}) + return pid + elif default == UNDEFINED: + if self.get_socket().is_localhost(): + raise ValueError("Unable to resolve tor's pid") + else: + raise ValueError("Tor isn't running locally") + else: + return default + + def get_microdescriptor(self, relay, default = UNDEFINED): + """ + Provides the microdescriptor for the relay with the given fingerprint or + nickname. If the relay identifier could be either a fingerprint *or* + nickname then it's queried as a fingerprint. + + :param str relay: fingerprint or nickname of the relay to be queried + :param object default: response if the query fails + + :returns: :class:`~stem.descriptor.microdescriptor.Microdescriptor` for the given relay + + :raises: + * :class:`stem.ControllerError` if unable to query the descriptor + * **ValueError** if **relay** doesn't conform with the pattern for being + a fingerprint or nickname + + An exception is only raised if we weren't provided a default response. + """ + + try: + if stem.util.tor_tools.is_valid_fingerprint(relay): + query = "md/id/%s" % relay + elif stem.util.tor_tools.is_valid_nickname(relay): + query = "md/name/%s" % relay + else: + raise ValueError("'%s' isn't a valid fingerprint or nickname" % relay) + + desc_content = self.get_info(query, get_bytes = True) + return stem.descriptor.microdescriptor.Microdescriptor(desc_content) + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_microdescriptors(self, default = UNDEFINED): + """ + Provides an iterator for all of the microdescriptors that tor presently + knows about. + + **Tor does not expose this information via the control protocol** + (:trac:`8323`). Until it does this reads the microdescriptors from disk, + and hence won't work remotely or if we lack read permissions. + + :param list default: items to provide if the query fails + + :returns: iterates over + :class:`~stem.descriptor.microdescriptor.Microdescriptor` for relays in + the tor network + + :raises: :class:`stem.ControllerError` if unable to query tor and no + default was provided + """ + + try: + try: + data_directory = self.get_conf("DataDirectory") + except stem.ControllerError as exc: + raise stem.OperationFailed(message = "Unable to determine the data directory (%s)" % exc) + + cached_descriptor_path = os.path.join(data_directory, "cached-microdescs") + + if not os.path.exists(data_directory): + raise stem.OperationFailed(message = "Data directory reported by tor doesn't exist (%s)" % data_directory) + elif not os.path.exists(cached_descriptor_path): + raise stem.OperationFailed(message = "Data directory doens't contain cached microescriptors (%s)" % cached_descriptor_path) + + with stem.descriptor.reader.DescriptorReader([cached_descriptor_path]) as reader: + for desc in reader: + # It shouldn't be possible for these to be something other than + # microdescriptors but as the saying goes: trust but verify. + + if not isinstance(desc, stem.descriptor.microdescriptor.Microdescriptor): + raise stem.OperationFailed(message = "BUG: Descriptor reader provided non-microdescriptor content (%s)" % type(desc)) + + yield desc + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + if default is not None: + for entry in default: + yield entry + + def get_server_descriptor(self, relay, default = UNDEFINED): + """ + Provides the server descriptor for the relay with the given fingerprint or + nickname. If the relay identifier could be either a fingerprint *or* + nickname then it's queried as a fingerprint. + + **As of Tor version 0.2.3.25 relays no longer get server descriptors by + default.** It's advised that you use microdescriptors instead, but if you + really need server descriptors then you can get them by setting + 'UseMicrodescriptors 0'. + + :param str relay: fingerprint or nickname of the relay to be queried + :param object default: response if the query fails + + :returns: :class:`~stem.descriptor.server_descriptor.RelayDescriptor` for the given relay + + :raises: + * :class:`stem.ControllerError` if unable to query the descriptor + * **ValueError** if **relay** doesn't conform with the pattern for being + a fingerprint or nickname + + An exception is only raised if we weren't provided a default response. + """ + + try: + if stem.util.tor_tools.is_valid_fingerprint(relay): + query = "desc/id/%s" % relay + elif stem.util.tor_tools.is_valid_nickname(relay): + query = "desc/name/%s" % relay + else: + raise ValueError("'%s' isn't a valid fingerprint or nickname" % relay) + + desc_content = self.get_info(query, get_bytes = True) + return stem.descriptor.server_descriptor.RelayDescriptor(desc_content) + except Exception as exc: + if default == UNDEFINED: + if not self._is_server_descriptors_available(): + raise ValueError(SERVER_DESCRIPTORS_UNSUPPORTED) + + raise exc + else: + return default + + def get_server_descriptors(self, default = UNDEFINED): + """ + Provides an iterator for all of the server descriptors that tor presently + knows about. + + **As of Tor version 0.2.3.25 relays no longer get server descriptors by + default.** It's advised that you use microdescriptors instead, but if you + really need server descriptors then you can get them by setting + 'UseMicrodescriptors 0'. + + :param list default: items to provide if the query fails + + :returns: iterates over + :class:`~stem.descriptor.server_descriptor.RelayDescriptor` for relays in + the tor network + + :raises: :class:`stem.ControllerError` if unable to query tor and no + default was provided + """ + + try: + # TODO: We should iterate over the descriptors as they're read from the + # socket rather than reading the whole thing into memory. + # + # https://trac.torproject.org/8248 + + desc_content = self.get_info("desc/all-recent", get_bytes = True) + + if not desc_content and not self._is_server_descriptors_available(): + raise ValueError(SERVER_DESCRIPTORS_UNSUPPORTED) + + for desc in stem.descriptor.server_descriptor._parse_file(io.BytesIO(desc_content)): + yield desc + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + if default is not None: + for entry in default: + yield entry + + def _is_server_descriptors_available(self): + """ + Checks to see if tor server descriptors should be available or not. + """ + + return self.get_version() < stem.version.Requirement.MICRODESCRIPTOR_IS_DEFAULT or \ + self.get_conf('UseMicrodescriptors', None) == '0' + + def get_network_status(self, relay, default = UNDEFINED): + """ + Provides the router status entry for the relay with the given fingerprint + or nickname. If the relay identifier could be either a fingerprint *or* + nickname then it's queried as a fingerprint. + + :param str relay: fingerprint or nickname of the relay to be queried + :param object default: response if the query fails + + :returns: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` + for the given relay + + :raises: + * :class:`stem.ControllerError` if unable to query the descriptor + * **ValueError** if **relay** doesn't conform with the pattern for being + a fingerprint or nickname + + An exception is only raised if we weren't provided a default response. + """ + + # TODO: It would be great to add support for v3 router status entries. This + # is pending... + # + # https://trac.torproject.org/7953 + + try: + if stem.util.tor_tools.is_valid_fingerprint(relay): + query = "ns/id/%s" % relay + elif stem.util.tor_tools.is_valid_nickname(relay): + query = "ns/name/%s" % relay + else: + raise ValueError("'%s' isn't a valid fingerprint or nickname" % relay) + + desc_content = self.get_info(query, get_bytes = True) + return stem.descriptor.router_status_entry.RouterStatusEntryV3(desc_content) + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_network_statuses(self, default = UNDEFINED): + """ + Provides an iterator for all of the router status entries that tor + presently knows about. + + :param list default: items to provide if the query fails + + :returns: iterates over + :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` for + relays in the tor network + + :raises: :class:`stem.ControllerError` if unable to query tor and no + default was provided + """ + + try: + # TODO: We should iterate over the descriptors as they're read from the + # socket rather than reading the whole thing into memory. + # + # https://trac.torproject.org/8248 + + desc_content = self.get_info("ns/all", get_bytes = True) + + desc_iterator = stem.descriptor.router_status_entry._parse_file( + io.BytesIO(desc_content), + True, + entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3, + ) + + for desc in desc_iterator: + yield desc + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + if default is not None: + for entry in default: + yield entry + + def get_conf(self, param, default = UNDEFINED, multiple = False): + """ + Queries the current value for a configuration option. Some configuration + options (like the ExitPolicy) can have multiple values. This provides a + **list** with all of the values if **multiple** is **True**. Otherwise this + will be a **str** with the first value. + + If provided with a **default** then that is provided if the configuration + option was unset or the query fails (invalid configuration option, error + response, control port closed, initiated, etc). + + If the configuration value is unset and no **default** was given then this + provides **None** if **multiple** was **False** and an empty list if it was + **True**. + + :param str param: configuration option to be queried + :param object default: response if the option is unset or the query fails + :param bool multiple: if **True** then provides a list with all of the + present values (this is an empty list if the config option is unset) + + :returns: + Response depends upon how we were called as follows... + + * **str** with the configuration value if **multiple** was **False**, + **None** if it was unset + * **list** with the response strings if multiple was **True** + * default if one was provided and the configuration option was either + unset or our call failed + + :raises: + * :class:`stem.ControllerError` if the call fails and we weren't + provided a default response + * :class:`stem.InvalidArguments` if the configuration option + requested was invalid + """ + + # Config options are case insensitive and don't contain whitespace. Using + # strip so the following check will catch whitespace-only params. + + param = param.lower().strip() + + if not param: + return default if default != UNDEFINED else None + + entries = self.get_conf_map(param, default, multiple) + return _case_insensitive_lookup(entries, param, default) + + def get_conf_map(self, params, default = UNDEFINED, multiple = True): + """ + Similar to :func:`~stem.control.Controller.get_conf` but queries multiple + configuration options, providing back a mapping of those options to their + values. + + There are three use cases for GETCONF: + + 1. a single value is provided (e.g. **ControlPort**) + 2. multiple values are provided for the option (e.g. **ExitPolicy**) + 3. a set of options that weren't necessarily requested are returned (for + instance querying **HiddenServiceOptions** gives **HiddenServiceDir**, + **HiddenServicePort**, etc) + + The vast majority of the options fall into the first two categories, in + which case calling :func:`~stem.control.Controller.get_conf` is sufficient. + However, for batch queries or the special options that give a set of values + this provides back the full response. As of tor version 0.2.1.25 + **HiddenServiceOptions** was the only option that falls into the third + category. + + :param str,list params: configuration option(s) to be queried + :param object default: value for the mappings if the configuration option + is either undefined or the query fails + :param bool multiple: if **True** then the values provided are lists with + all of the present values + + :returns: + **dict** of the 'config key => value' mappings. The value is a... + + * **str** if **multiple** is **False**, **None** if the configuration + option is unset + * **list** if **multiple** is **True** + * the **default** if it was set and the value was either undefined or our + lookup failed + + :raises: + * :class:`stem.ControllerError` if the call fails and we weren't provided + a default response + * :class:`stem.InvalidArguments` if the configuration option requested + was invalid + """ + + start_time = time.time() + reply = {} + + if isinstance(params, (bytes, unicode)): + params = [params] + + # remove strings which contain only whitespace + params = filter(lambda entry: entry.strip(), params) + + if params == []: + return {} + + # translate context sensitive options + lookup_params = set([MAPPED_CONFIG_KEYS.get(entry, entry) for entry in params]) + + # check for cached results + + from_cache = [param.lower() for param in lookup_params] + cached_results = self._get_cache_map(from_cache, "getconf") + + for key in cached_results: + user_expected_key = _case_insensitive_lookup(lookup_params, key) + reply[user_expected_key] = cached_results[key] + lookup_params.remove(user_expected_key) + + # if everything was cached then short circuit making the query + if not lookup_params: + log.trace("GETCONF %s (cache fetch)" % " ".join(reply.keys())) + return self._get_conf_dict_to_response(reply, default, multiple) + + try: + response = self.msg("GETCONF %s" % ' '.join(lookup_params)) + stem.response.convert("GETCONF", response) + reply.update(response.entries) + + if self.is_caching_enabled(): + to_cache = dict((k.lower(), v) for k, v in response.entries.items()) + + for key in UNCACHEABLE_GETCONF_PARAMS: + if key in to_cache: + del to_cache[key] + + self._set_cache(to_cache, "getconf") + + # Maps the entries back to the parameters that the user requested so the + # capitalization matches (ie, if they request "exitpolicy" then that + # should be the key rather than "ExitPolicy"). When the same + # configuration key is provided multiple times this determines the case + # based on the first and ignores the rest. + # + # This retains the tor provided camel casing of MAPPED_CONFIG_KEYS + # entries since the user didn't request those by their key, so we can't + # be sure what they wanted. + + for key in reply: + if not key.lower() in MAPPED_CONFIG_KEYS.values(): + user_expected_key = _case_insensitive_lookup(params, key, key) + + if key != user_expected_key: + reply[user_expected_key] = reply[key] + del reply[key] + + log.debug("GETCONF %s (runtime: %0.4f)" % (" ".join(lookup_params), time.time() - start_time)) + return self._get_conf_dict_to_response(reply, default, multiple) + except stem.ControllerError as exc: + log.debug("GETCONF %s (failed: %s)" % (" ".join(lookup_params), exc)) + + if default != UNDEFINED: + return dict((param, default) for param in params) + else: + raise exc + + def _get_conf_dict_to_response(self, config_dict, default, multiple): + """ + Translates a dictionary of 'config key => [value1, value2...]' into the + return value of :func:`~stem.control.Controller.get_conf_map`, taking into + account what the caller requested. + """ + + return_dict = {} + + for key, values in config_dict.items(): + if values == []: + # config option was unset + if default != UNDEFINED: + return_dict[key] = default + else: + return_dict[key] = [] if multiple else None + else: + return_dict[key] = values if multiple else values[0] + + return return_dict + + def set_conf(self, param, value): + """ + Changes the value of a tor configuration option. Our value can be any of + the following... + + * a string to set a single value + * a list of strings to set a series of values (for instance the ExitPolicy) + * None to either set the value to 0/NULL + + :param str param: configuration option to be set + :param str,list value: value to set the parameter to + + :raises: + * :class:`stem.ControllerError` if the call fails + * :class:`stem.InvalidArguments` if configuration options + requested was invalid + * :class:`stem.InvalidRequest` if the configuration setting is + impossible or if there's a syntax error in the configuration values + """ + + self.set_options({param: value}, False) + + def reset_conf(self, *params): + """ + Reverts one or more parameters to their default values. + + :param str params: configuration option to be reset + + :raises: + * :class:`stem.ControllerError` if the call fails + * :class:`stem.InvalidArguments` if configuration options requested was invalid + * :class:`stem.InvalidRequest` if the configuration setting is + impossible or if there's a syntax error in the configuration values + """ + + self.set_options(dict([(entry, None) for entry in params]), True) + + def set_options(self, params, reset = False): + """ + Changes multiple tor configuration options via either a SETCONF or + RESETCONF query. Both behave identically unless our value is None, in which + case SETCONF sets the value to 0 or NULL, and RESETCONF returns it to its + default value. This accepts str, list, or None values in a similar fashion + to :func:`~stem.control.Controller.set_conf`. For example... + + :: + + my_controller.set_options({ + "Nickname": "caerSidi", + "ExitPolicy": ["accept *:80", "accept *:443", "reject *:*"], + "ContactInfo": "caerSidi-exit@someplace.com", + "Log": None, + }) + + The params can optionally be a list of key/value tuples, though the only + reason this type of argument would be useful is for hidden service + configuration (those options are order dependent). + + :param dict,list params: mapping of configuration options to the values + we're setting it to + :param bool reset: issues a RESETCONF, returning **None** values to their + defaults if **True** + + :raises: + * :class:`stem.ControllerError` if the call fails + * :class:`stem.InvalidArguments` if configuration options + requested was invalid + * :class:`stem.InvalidRequest` if the configuration setting is + impossible or if there's a syntax error in the configuration values + """ + + start_time = time.time() + + # constructs the SETCONF or RESETCONF query + query_comp = ["RESETCONF" if reset else "SETCONF"] + + if isinstance(params, dict): + params = params.items() + + for param, value in params: + if isinstance(value, str): + query_comp.append("%s=\"%s\"" % (param, value.strip())) + elif value: + query_comp.extend(["%s=\"%s\"" % (param, val.strip()) for val in value]) + else: + query_comp.append(param) + + query = " ".join(query_comp) + response = self.msg(query) + stem.response.convert("SINGLELINE", response) + + if response.is_ok(): + log.debug("%s (runtime: %0.4f)" % (query, time.time() - start_time)) + + if self.is_caching_enabled(): + to_cache = {} + + for param, value in params: + param = param.lower() + + if isinstance(value, (bytes, unicode)): + value = [value] + + to_cache[param] = value + + if param == "exitpolicy": + self._set_cache({"exitpolicy": None}) + + self._set_cache(to_cache, "getconf") + else: + log.debug("%s (failed, code: %s, message: %s)" % (query, response.code, response.message)) + + if response.code == "552": + if response.message.startswith("Unrecognized option: Unknown option '"): + key = response.message[37:response.message.find("\'", 37)] + raise stem.InvalidArguments(response.code, response.message, [key]) + raise stem.InvalidRequest(response.code, response.message) + elif response.code in ("513", "553"): + raise stem.InvalidRequest(response.code, response.message) + else: + raise stem.ProtocolError("Returned unexpected status code: %s" % response.code) + + def add_event_listener(self, listener, *events): + """ + Directs further tor controller events to a given function. The function is + expected to take a single argument, which is a + :class:`~stem.response.events.Event` subclass. For instance the following + would print the bytes sent and received by tor over five seconds... + + :: + + import time + from stem.control import Controller, EventType + + def print_bw(event): + print "sent: %i, received: %i" % (event.written, event.read) + + with Controller.from_port(port = 9051) as controller: + controller.authenticate() + controller.add_event_listener(print_bw, EventType.BW) + time.sleep(5) + + If a new control connection is initialized then this listener will be + reattached. + + :param functor listener: function to be called when an event is received + :param stem.control.EventType events: event types to be listened for + + :raises: :class:`stem.ProtocolError` if unable to set the events + """ + + # first checking that tor supports these event types + with self._event_listeners_lock: + if self.is_authenticated(): + for event_type in events: + event_version = stem.response.events.EVENT_TYPE_TO_CLASS[event_type]._VERSION_ADDED + + if self.get_version() < event_version: + raise stem.InvalidRequest(552, "%s event requires Tor version %s or later" % (event_type, event_version)) + + for event_type in events: + self._event_listeners.setdefault(event_type, []).append(listener) + + failed_events = self._attach_listeners()[1] + + # restricted the failures to just things we requested + failed_events = set(failed_events).intersection(set(events)) + + if failed_events: + raise stem.ProtocolError("SETEVENTS rejected %s" % ", ".join(failed_events)) + + def remove_event_listener(self, listener): + """ + Stops a listener from being notified of further tor events. + + :param stem.control.EventListener listener: listener to be removed + + :raises: :class:`stem.ProtocolError` if unable to set the events + """ + + with self._event_listeners_lock: + event_types_changed = False + + for event_type, event_listeners in self._event_listeners.items(): + if listener in event_listeners: + event_listeners.remove(listener) + + if len(event_listeners) == 0: + event_types_changed = True + del self._event_listeners[event_type] + + if event_types_changed: + response = self.msg("SETEVENTS %s" % " ".join(self._event_listeners.keys())) + + if not response.is_ok(): + raise stem.ProtocolError("SETEVENTS received unexpected response\n%s" % response) + + def _get_cache(self, param, namespace = None): + """ + Queries our request cache for the given key. + + :param str param: key to be queried + :param str namespace: namespace in which to check for the key + + :returns: cached value corresponding to key or **None** if the key wasn't found + """ + + return self._get_cache_map([param], namespace).get(param, None) + + def _get_cache_map(self, params, namespace = None): + """ + Queries our request cache for multiple entries. + + :param list params: keys to be queried + :param str namespace: namespace in which to check for the keys + + :returns: **dict** of 'param => cached value' pairs of keys present in cache + """ + + with self._cache_lock: + cached_values = {} + + if self.is_caching_enabled(): + for param in params: + if namespace: + cache_key = "%s.%s" % (namespace, param) + else: + cache_key = param + + if cache_key in self._request_cache: + cached_values[param] = self._request_cache[cache_key] + + return cached_values + + def _set_cache(self, params, namespace = None): + """ + Sets the given request cache entries. If the new cache value is **None** + then it is removed from our cache. + + :param dict params: **dict** of 'cache_key => value' pairs to be cached + :param str namespace: namespace for the keys + """ + + with self._cache_lock: + if not self.is_caching_enabled(): + return + + for key, value in params.items(): + if namespace: + cache_key = "%s.%s" % (namespace, key) + else: + cache_key = key + + if value is None: + if cache_key in self._request_cache: + del self._request_cache[cache_key] + else: + self._request_cache[cache_key] = value + + def is_caching_enabled(self): + """ + **True** if caching has been enabled, **False** otherwise. + + :returns: bool to indicate if caching is enabled + """ + + return self._is_caching_enabled + + def set_caching(self, enabled): + """ + Enables or disables caching of information retrieved from tor. + + :param bool enabled: **True** to enable caching, **False** to disable it + """ + + self._is_caching_enabled = enabled + + if not self._is_caching_enabled: + self.clear_cache() + + def clear_cache(self): + """ + Drops any cached results. + """ + + with self._cache_lock: + self._request_cache = {} + self._geoip_failure_count = 0 + + def load_conf(self, configtext): + """ + Sends the configuration text to Tor and loads it as if it has been read from + the torrc. + + :param str configtext: the configuration text + + :raises: :class:`stem.ControllerError` if the call fails + """ + + response = self.msg("LOADCONF\n%s" % configtext) + stem.response.convert("SINGLELINE", response) + + if response.code in ("552", "553"): + if response.code == "552" and response.message.startswith("Invalid config file: Failed to parse/validate config: Unknown option"): + raise stem.InvalidArguments(response.code, response.message, [response.message[70:response.message.find('.', 70) - 1]]) + raise stem.InvalidRequest(response.code, response.message) + elif not response.is_ok(): + raise stem.ProtocolError("+LOADCONF Received unexpected response\n%s" % str(response)) + + def save_conf(self): + """ + Saves the current configuration options into the active torrc file. + + :raises: + * :class:`stem.ControllerError` if the call fails + * :class:`stem.OperationFailed` if the client is unable to save + the configuration file + """ + + response = self.msg("SAVECONF") + stem.response.convert("SINGLELINE", response) + + if response.is_ok(): + return True + elif response.code == "551": + raise stem.OperationFailed(response.code, response.message) + else: + raise stem.ProtocolError("SAVECONF returned unexpected response code") + + def is_feature_enabled(self, feature): + """ + Checks if a control connection feature is enabled. These features can be + enabled using :func:`~stem.control.Controller.enable_feature`. + + :param str feature: feature to be checked + + :returns: **True** if feature is enabled, **False** otherwise + """ + + feature = feature.upper() + + if feature in self._enabled_features: + return True + else: + # check if this feature is on by default + defaulted_version = None + + if feature == "EXTENDED_EVENTS": + defaulted_version = stem.version.Requirement.FEATURE_EXTENDED_EVENTS + elif feature == "VERBOSE_NAMES": + defaulted_version = stem.version.Requirement.FEATURE_VERBOSE_NAMES + + if defaulted_version: + our_version = self.get_version(None) + + if our_version and our_version >= defaulted_version: + self._enabled_features.append(feature) + + return feature in self._enabled_features + + def enable_feature(self, features): + """ + Enables features that are disabled by default to maintain backward + compatibility. Once enabled, a feature cannot be disabled and a new + control connection must be opened to get a connection with the feature + disabled. Feature names are case-insensitive. + + The following features are currently accepted: + + * EXTENDED_EVENTS - Requests the extended event syntax + * VERBOSE_NAMES - Replaces ServerID with LongName in events and GETINFO results + + :param str,list features: a single feature or a list of features to be enabled + + :raises: + * :class:`stem.ControllerError` if the call fails + * :class:`stem.InvalidArguments` if features passed were invalid + """ + + if isinstance(features, (bytes, unicode)): + features = [features] + + response = self.msg("USEFEATURE %s" % " ".join(features)) + stem.response.convert("SINGLELINE", response) + + if not response.is_ok(): + if response.code == "552": + invalid_feature = [] + + if response.message.startswith("Unrecognized feature \""): + invalid_feature = [response.message[22:response.message.find("\"", 22)]] + + raise stem.InvalidArguments(response.code, response.message, invalid_feature) + + raise stem.ProtocolError("USEFEATURE provided an invalid response code: %s" % response.code) + + self._enabled_features += [entry.upper() for entry in features] + + def get_circuit(self, circuit_id, default = UNDEFINED): + """ + Provides a circuit presently available from tor. + + :param int circuit_id: circuit to be fetched + :param object default: response if the query fails + + :returns: :class:`stem.response.events.CircuitEvent` for the given circuit + + :raises: + * :class:`stem.ControllerError` if the call fails + * **ValueError** if the circuit doesn't exist + + An exception is only raised if we weren't provided a default response. + """ + + try: + for circ in self.get_circuits(): + if circ.id == circuit_id: + return circ + + raise ValueError("Tor presently does not have a circuit with the id of '%s'" % circuit_id) + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def get_circuits(self, default = UNDEFINED): + """ + Provides tor's currently available circuits. + + :param object default: response if the query fails + + :returns: **list** of :class:`stem.response.events.CircuitEvent` for our circuits + + :raises: :class:`stem.ControllerError` if the call fails and no default was provided + """ + + try: + circuits = [] + response = self.get_info("circuit-status") + + for circ in response.splitlines(): + circ_message = stem.socket.recv_message(StringIO.StringIO("650 CIRC " + circ + "\r\n")) + stem.response.convert("EVENT", circ_message, arrived_at = 0) + circuits.append(circ_message) + + return circuits + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def new_circuit(self, path = None, purpose = "general", await_build = False): + """ + Requests a new circuit. If the path isn't provided, one is automatically + selected. + + :param list,str path: one or more relays to make a circuit through + :param str purpose: "general" or "controller" + :param bool await_build: blocks until the circuit is built if **True** + + :returns: str of the circuit id of the newly created circuit + + :raises: :class:`stem.ControllerError` if the call fails + """ + + return self.extend_circuit('0', path, purpose, await_build) + + def extend_circuit(self, circuit_id = "0", path = None, purpose = "general", await_build = False): + """ + Either requests the creation of a new circuit or extends an existing one. + + When called with a circuit value of zero (the default) a new circuit is + created, and when non-zero the circuit with that id is extended. If the + path isn't provided, one is automatically selected. + + A python interpreter session used to create circuits could look like this... + + :: + + >>> control.extend_circuit('0', ["718BCEA286B531757ACAFF93AE04910EA73DE617", "30BAB8EE7606CBD12F3CC269AE976E0153E7A58D", "2765D8A8C4BBA3F89585A9FFE0E8575615880BEB"]) + 19 + >>> control.extend_circuit('0') + 20 + >>> print control.get_info('circuit-status') + 20 EXTENDED $718BCEA286B531757ACAFF93AE04910EA73DE617=KsmoinOK,$649F2D0ACF418F7CFC6539AB2257EB2D5297BAFA=Eskimo BUILD_FLAGS=NEED_CAPACITY PURPOSE=GENERAL TIME_CREATED=2012-12-06T13:51:11.433755 + 19 BUILT $718BCEA286B531757ACAFF93AE04910EA73DE617=KsmoinOK,$30BAB8EE7606CBD12F3CC269AE976E0153E7A58D=Pascal1,$2765D8A8C4BBA3F89585A9FFE0E8575615880BEB=Anthracite PURPOSE=GENERAL TIME_CREATED=2012-12-06T13:50:56.969938 + + :param str circuit_id: id of a circuit to be extended + :param list,str path: one or more relays to make a circuit through, this is + required if the circuit id is non-zero + :param str purpose: "general" or "controller" + :param bool await_build: blocks until the circuit is built if **True** + + :returns: str of the circuit id of the created or extended circuit + + :raises: + * :class:`stem.InvalidRequest` if one of the parameters were invalid + * :class:`stem.CircuitExtensionFailed` if we were waiting for the circuit + to build but it failed + * :class:`stem.ControllerError` if the call fails + """ + + # Attaches a temporary listener for CIRC events if we'll be waiting for it + # to build. This is icky, but we can't reliably do this via polling since + # we then can't get the failure if it can't be created. + + circ_queue, circ_listener = None, None + + if await_build: + circ_queue = Queue.Queue() + + def circ_listener(event): + circ_queue.put(event) + + self.add_event_listener(circ_listener, EventType.CIRC) + + try: + # we might accidently get integer circuit ids + circuit_id = str(circuit_id) + + if path is None and circuit_id == '0': + path_opt_version = stem.version.Requirement.EXTENDCIRCUIT_PATH_OPTIONAL + + if not self.get_version() >= path_opt_version: + raise stem.InvalidRequest(512, "EXTENDCIRCUIT requires the path prior to version %s" % path_opt_version) + + args = [circuit_id] + + if isinstance(path, (bytes, unicode)): + path = [path] + + if path: + args.append(",".join(path)) + + if purpose: + args.append("purpose=%s" % purpose) + + response = self.msg("EXTENDCIRCUIT %s" % " ".join(args)) + stem.response.convert("SINGLELINE", response) + + if response.code in ('512', '552'): + raise stem.InvalidRequest(response.code, response.message) + elif not response.is_ok(): + raise stem.ProtocolError("EXTENDCIRCUIT returned unexpected response code: %s" % response.code) + + if not response.message.startswith("EXTENDED "): + raise stem.ProtocolError("EXTENDCIRCUIT response invalid:\n%s", response) + + new_circuit = response.message.split(" ", 1)[1] + + if await_build: + while True: + circ = circ_queue.get() + + if circ.id == new_circuit: + if circ.status == CircStatus.BUILT: + break + elif circ.status == CircStatus.FAILED: + raise stem.CircuitExtensionFailed("Circuit failed to be created: %s" % circ.reason, circ) + elif circ.status == CircStatus.CLOSED: + raise stem.CircuitExtensionFailed("Circuit was closed prior to build", circ) + + return new_circuit + finally: + if circ_listener: + self.remove_event_listener(circ_listener) + + def repurpose_circuit(self, circuit_id, purpose): + """ + Changes a circuit's purpose. Currently, two purposes are recognized... + * general + * controller + + :param str circuit_id: id of the circuit whose purpose is to be changed + :param str purpose: purpose (either "general" or "controller") + + :raises: :class:`stem.InvalidArguments` if the circuit doesn't exist or if the purpose was invalid + """ + + response = self.msg("SETCIRCUITPURPOSE %s purpose=%s" % (circuit_id, purpose)) + stem.response.convert("SINGLELINE", response) + + if not response.is_ok(): + if response.code == "552": + raise stem.InvalidRequest(response.code, response.message) + else: + raise stem.ProtocolError("SETCIRCUITPURPOSE returned unexpected response code: %s" % response.code) + + def close_circuit(self, circuit_id, flag = ''): + """ + Closes the specified circuit. + + :param str circuit_id: id of the circuit to be closed + :param str flag: optional value to modify closing, the only flag available + is "IfUnused" which will not close the circuit unless it is unused + + :raises: :class:`stem.InvalidArguments` if the circuit is unknown + :raises: :class:`stem.InvalidRequest` if not enough information is provided + """ + + response = self.msg("CLOSECIRCUIT %s %s" % (circuit_id, flag)) + stem.response.convert("SINGLELINE", response) + + if not response.is_ok(): + if response.code in ('512', '552'): + if response.message.startswith("Unknown circuit "): + raise stem.InvalidArguments(response.code, response.message, [circuit_id]) + raise stem.InvalidRequest(response.code, response.message) + else: + raise stem.ProtocolError("CLOSECIRCUIT returned unexpected response code: %s" % response.code) + + def get_streams(self, default = UNDEFINED): + """ + Provides the list of streams tor is currently handling. + + :param object default: response if the query fails + + :returns: list of :class:`stem.response.events.StreamEvent` objects + + :raises: :class:`stem.ControllerError` if the call fails and no default was + provided + """ + + try: + streams = [] + response = self.get_info("stream-status") + + for stream in response.splitlines(): + message = stem.socket.recv_message(StringIO.StringIO("650 STREAM " + stream + "\r\n")) + stem.response.convert("EVENT", message, arrived_at = 0) + streams.append(message) + + return streams + except Exception as exc: + if default == UNDEFINED: + raise exc + else: + return default + + def attach_stream(self, stream_id, circuit_id, exiting_hop = None): + """ + Attaches a stream to a circuit. + + Note: Tor attaches streams to circuits automatically unless the + __LeaveStreamsUnattached configuration variable is set to "1" + + :param str stream_id: id of the stream that must be attached + :param str circuit_id: id of the circuit to which it must be attached + :param int exiting_hop: hop in the circuit where traffic should exit + + :raises: + * :class:`stem.InvalidRequest` if the stream or circuit id were unrecognized + * :class:`stem.UnsatisfiableRequest` if the stream isn't in a state where it can be attached + * :class:`stem.OperationFailed` if the stream couldn't be attached for any other reason + """ + + query = "ATTACHSTREAM %s %s" % (stream_id, circuit_id) + + if exiting_hop: + query += " HOP=%s" % exiting_hop + + response = self.msg(query) + stem.response.convert("SINGLELINE", response) + + if not response.is_ok(): + if response.code == '552': + raise stem.InvalidRequest(response.code, response.message) + elif response.code == '551': + raise stem.OperationFailed(response.code, response.message) + elif response.code == '555': + raise stem.UnsatisfiableRequest(response.code, response.message) + else: + raise stem.ProtocolError("ATTACHSTREAM returned unexpected response code: %s" % response.code) + + def close_stream(self, stream_id, reason = stem.RelayEndReason.MISC, flag = ''): + """ + Closes the specified stream. + + :param str stream_id: id of the stream to be closed + :param stem.RelayEndReason reason: reason the stream is closing + :param str flag: not currently used + + :raises: :class:`stem.InvalidArguments` if the stream or reason are not recognized + :raises: :class:`stem.InvalidRequest` if the stream and/or reason are missing + """ + + # there's a single value offset between RelayEndReason.index_of() and the + # value that tor expects since tor's value starts with the index of one + + response = self.msg("CLOSESTREAM %s %s %s" % (stream_id, stem.RelayEndReason.index_of(reason) + 1, flag)) + stem.response.convert("SINGLELINE", response) + + if not response.is_ok(): + if response.code in ('512', '552'): + if response.message.startswith("Unknown stream "): + raise stem.InvalidArguments(response.code, response.message, [stream_id]) + elif response.message.startswith("Unrecognized reason "): + raise stem.InvalidArguments(response.code, response.message, [reason]) + raise stem.InvalidRequest(response.code, response.message) + else: + raise stem.ProtocolError("CLOSESTREAM returned unexpected response code: %s" % response.code) + + def signal(self, signal): + """ + Sends a signal to the Tor client. + + :param stem.Signal signal: type of signal to be sent + + :raises: :class:`stem.InvalidArguments` if signal provided wasn't recognized + """ + + response = self.msg("SIGNAL %s" % signal) + stem.response.convert("SINGLELINE", response) + + if not response.is_ok(): + if response.code == "552": + raise stem.InvalidArguments(response.code, response.message, [signal]) + + raise stem.ProtocolError("SIGNAL response contained unrecognized status code: %s" % response.code) + + def is_geoip_unavailable(self): + """ + Provides **True** if we've concluded hat our geoip database is unavailable, + **False** otherwise. This is determined by having our 'GETINFO + ip-to-country/\*' lookups fail so this will default to **False** if we + aren't making those queries. + + Geoip failures will be untracked if caching is disabled. + + :returns: **bool** to indicate if we've concluded our geoip database to be + unavailable or not + """ + + return self._geoip_failure_count >= GEOIP_FAILURE_THRESHOLD + + def map_address(self, mapping): + """ + Map addresses to replacement addresses. Tor replaces subseqent connections + to the original addresses with the replacement addresses. + + If the original address is a null address, i.e., one of "0.0.0.0", "::0", or + "." Tor picks an original address itself and returns it in the reply. If the + original address is already mapped to a different address the mapping is + removed. + + :param dict mapping: mapping of original addresses to replacement addresses + + :raises: + * :class:`stem.InvalidRequest` if the addresses are malformed + * :class:`stem.OperationFailed` if Tor couldn't fulfill the request + + :returns: **dict** with 'original -> replacement' address mappings + """ + + mapaddress_arg = " ".join(["%s=%s" % (k, v) for (k, v) in mapping.items()]) + response = self.msg("MAPADDRESS %s" % mapaddress_arg) + stem.response.convert("MAPADDRESS", response) + + return response.entries + + def _post_authentication(self): + super(Controller, self)._post_authentication() + + # try to re-attach event listeners to the new instance + + with self._event_listeners_lock: + try: + failed_events = self._attach_listeners()[1] + + if failed_events: + # remove our listeners for these so we don't keep failing + for event_type in failed_events: + del self._event_listeners[event_type] + + logging_id = "stem.controller.event_reattach-%s" % "-".join(failed_events) + log.log_once(logging_id, log.WARN, "We were unable to re-attach our event listeners to the new tor instance for: %s" % ", ".join(failed_events)) + except stem.ProtocolError as exc: + log.warn("Unable to issue the SETEVENTS request to re-attach our listeners (%s)" % exc) + + # issue TAKEOWNERSHIP if we're the owning process for this tor instance + + owning_pid = self.get_conf("__OwningControllerProcess", None) + + if owning_pid == str(os.getpid()) and self.get_socket().is_localhost(): + response = self.msg("TAKEOWNERSHIP") + stem.response.convert("SINGLELINE", response) + + if response.is_ok(): + # Now that tor is tracking our ownership of the process via the control + # connection, we can stop having it check for us via our pid. + + try: + self.reset_conf("__OwningControllerProcess") + except stem.ControllerError as exc: + log.warn("We were unable to reset tor's __OwningControllerProcess configuration. It will continue to periodically check if our pid exists. (%s)" % exc) + else: + log.warn("We were unable assert ownership of tor through TAKEOWNERSHIP, despite being configured to be the owning process through __OwningControllerProcess. (%s)" % response) + + def _handle_event(self, event_message): + stem.response.convert("EVENT", event_message, arrived_at = time.time()) + + with self._event_listeners_lock: + for event_type, event_listeners in self._event_listeners.items(): + if event_type == event_message.type: + for listener in event_listeners: + listener(event_message) + + def _attach_listeners(self): + """ + Attempts to subscribe to the self._event_listeners events from tor. This is + a no-op if we're not presently authenticated. + + :returns: tuple of the form (set_events, failed_events) + + :raises: :class:`stem.ControllerError` if unable to make our request to tor + """ + + set_events, failed_events = [], [] + + with self._event_listeners_lock: + if self.is_authenticated(): + # try to set them all + response = self.msg("SETEVENTS %s" % " ".join(self._event_listeners.keys())) + + if response.is_ok(): + set_events = self._event_listeners.keys() + else: + # One of the following likely happened... + # + # * Our user attached listeners before having an authenticated + # connection, so we couldn't check if we met the version + # requirement. + # + # * User attached listeners to one tor instance, then connected us to + # an older tor instancce. + # + # * Some other controller hiccup (far less likely). + # + # See if we can set some subset of our events. + + for event in self._event_listeners.keys(): + response = self.msg("SETEVENTS %s" % " ".join(set_events + [event])) + + if response.is_ok(): + set_events.append(event) + else: + failed_events.append(event) + + return (set_events, failed_events) + + +def _parse_circ_path(path): + """ + Parses a circuit path as a list of **(fingerprint, nickname)** tuples. Tor + circuit paths are defined as being of the form... + + :: + + Path = LongName *("," LongName) + LongName = Fingerprint [ ( "=" / "~" ) Nickname ] + + example: + $999A226EBED397F331B612FE1E4CFAE5C1F201BA=piyaz + + ... *unless* this is prior to tor version 0.2.2.1 with the VERBOSE_NAMES + feature turned off (or before version 0.1.2.2 where the feature was + introduced). In that case either the fingerprint or nickname in the tuple + will be **None**, depending on which is missing. + + :: + + Path = ServerID *("," ServerID) + ServerID = Nickname / Fingerprint + + example: + $E57A476CD4DFBD99B4EE52A100A58610AD6E80B9,hamburgerphone,PrivacyRepublic14 + + :param str path: circuit path to be parsed + + :returns: list of **(fingerprint, nickname)** tuples, fingerprints do not have a proceeding '$' + + :raises: :class:`stem.ProtocolError` if the path is malformed + """ + + if path: + try: + return [_parse_circ_entry(entry) for entry in path.split(',')] + except stem.ProtocolError as exc: + # include the path with the exception + raise stem.ProtocolError("%s: %s" % (exc, path)) + else: + return [] + + +def _parse_circ_entry(entry): + """ + Parses a single relay's 'LongName' or 'ServerID'. See the + :func:`~_stem.control._parse_circ_path` function for more information. + + :param str entry: relay information to be parsed + + :returns: **(fingerprint, nickname)** tuple + + :raises: :class:`stem.ProtocolError` if the entry is malformed + """ + + if '=' in entry: + # common case + fingerprint, nickname = entry.split('=') + elif '~' in entry: + # this is allowed for by the spec, but I've never seen it used + fingerprint, nickname = entry.split('~') + elif entry[0] == '$': + # old style, fingerprint only + fingerprint, nickname = entry, None + else: + # old style, nickname only + fingerprint, nickname = None, entry + + if fingerprint is not None: + if not stem.util.tor_tools.is_valid_fingerprint(fingerprint, True): + raise stem.ProtocolError("Fingerprint in the circuit path is malformed (%s)" % fingerprint) + + fingerprint = fingerprint[1:] # strip off the leading '$' + + if nickname is not None and not stem.util.tor_tools.is_valid_nickname(nickname): + raise stem.ProtocolError("Nickname in the circuit path is malformed (%s)" % nickname) + + return (fingerprint, nickname) + + +def _case_insensitive_lookup(entries, key, default = UNDEFINED): + """ + Makes a case insensitive lookup within a list or dictionary, providing the + first matching entry that we come across. + + :param list,dict entries: list or dictionary to be searched + :param str key: entry or key value to look up + :param object default: value to be returned if the key doesn't exist + + :returns: case insensitive match or default if one was provided and key wasn't found + + :raises: **ValueError** if no such value exists + """ + + if entries is not None: + if isinstance(entries, dict): + for k, v in entries.items(): + if k.lower() == key.lower(): + return v + else: + for entry in entries: + if entry.lower() == key.lower(): + return entry + + if default != UNDEFINED: + return default + else: + raise ValueError("key '%s' doesn't exist in dict: %s" % (key, entries)) diff --git a/lib/stem/descriptor/__init__.py b/lib/stem/descriptor/__init__.py new file mode 100644 index 00000000..d6b007e4 --- /dev/null +++ b/lib/stem/descriptor/__init__.py @@ -0,0 +1,552 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Package for parsing and processing descriptor data. + +**Module Overview:** + +:: + + parse_file - Parses the descriptors in a file. + + Descriptor - Common parent for all descriptor file types. + |- get_path - location of the descriptor on disk if it came from a file + |- get_archive_path - location of the descriptor within the archive it came from + |- get_bytes - similar to str(), but provides our original bytes content + |- get_unrecognized_lines - unparsed descriptor content + +- __str__ - string that the descriptor was made from + +.. data:: DocumentHandler (enum) + + Ways in which we can parse a + :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`. + + Both **ENTRIES** and **BARE_DOCUMENT** have a 'thin' document, which doesn't + have a populated **routers** attribute. This allows for lower memory usage + and upfront runtime. However, if read time and memory aren't a concern then + **DOCUMENT** can provide you with a fully populated document. + + =================== =========== + DocumentHandler Description + =================== =========== + **ENTRIES** Iterates over the contained :class:`~stem.descriptor.router_status_entry.RouterStatusEntry`. Each has a reference to the bare document it came from (through its **document** attribute). + **DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` with the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` it contains (through its **routers** attribute). + **BARE_DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` **without** a reference to its contents (the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` are unread). + =================== =========== +""" + +__all__ = [ + "export", + "reader", + "remote", + "extrainfo_descriptor", + "server_descriptor", + "microdescriptor", + "networkstatus", + "router_status_entry", + "tordnsel", + "parse_file", + "Descriptor", +] + +import os +import re + +import stem.prereq +import stem.util.enum +import stem.util.str_tools + +try: + # added in python 2.7 + from collections import OrderedDict +except ImportError: + from stem.util.ordereddict import OrderedDict + +KEYWORD_CHAR = "a-zA-Z0-9-" +WHITESPACE = " \t" +KEYWORD_LINE = re.compile("^([%s]+)(?:[%s]+(.*))?$" % (KEYWORD_CHAR, WHITESPACE)) +PGP_BLOCK_START = re.compile("^-----BEGIN ([%s%s]+)-----$" % (KEYWORD_CHAR, WHITESPACE)) +PGP_BLOCK_END = "-----END %s-----" + +DocumentHandler = stem.util.enum.UppercaseEnum( + "ENTRIES", + "DOCUMENT", + "BARE_DOCUMENT", +) + + +def parse_file(descriptor_file, descriptor_type = None, validate = True, document_handler = DocumentHandler.ENTRIES, **kwargs): + """ + Simple function to read the descriptor contents from a file, providing an + iterator for its :class:`~stem.descriptor.__init__.Descriptor` contents. + + If you don't provide a **descriptor_type** argument then this automatically + tries to determine the descriptor type based on the following... + + * The @type annotation on the first line. These are generally only found in + the `descriptor archives `_. + + * The filename if it matches something from tor's data directory. For + instance, tor's 'cached-descriptors' contains server descriptors. + + This is a handy function for simple usage, but if you're reading multiple + descriptor files you might want to consider the + :class:`~stem.descriptor.reader.DescriptorReader`. + + Descriptor types include the following, including further minor versions (ie. + if we support 1.1 then we also support everything from 1.0 and most things + from 1.2, but not 2.0)... + + ========================================= ===== + Descriptor Type Class + ========================================= ===== + server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.RelayDescriptor` + extra-info 1.0 :class:`~stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor` + microdescriptor 1.0 :class:`~stem.descriptor.microdescriptor.Microdescriptor` + directory 1.0 **unsupported** + network-status-2 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV2`) + dir-key-certificate-3 1.0 :class:`~stem.descriptor.networkstatus.KeyCertificate` + network-status-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`) + network-status-vote-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`) + network-status-microdesc-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`) + bridge-network-status 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.BridgeNetworkStatusDocument`) + bridge-server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.BridgeDescriptor` + bridge-extra-info 1.1 :class:`~stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor` + torperf 1.0 **unsupported** + bridge-pool-assignment 1.0 **unsupported** + tordnsel 1.0 :class:`~stem.descriptor.tordnsel.TorDNSEL` + ========================================= ===== + + If you're using **python 3** then beware that the open() function defaults to + using text mode. **Binary mode** is strongly suggested because it's both + faster (by my testing by about 33x) and doesn't do universal newline + translation which can make us misparse the document. + + :: + + my_descriptor_file = open(descriptor_path, 'rb') + + :param str,file descriptor_file: path or opened file with the descriptor contents + :param str descriptor_type: `descriptor type `_, this is guessed if not provided + :param bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :param stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + :param dict kwargs: additional arguments for the descriptor constructor + + :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file + + :raises: + * **ValueError** if the contents is malformed and validate is True + * **TypeError** if we can't match the contents of the file to a descriptor type + * **IOError** if unable to read from the descriptor_file + """ + + # if we got a path then open that file for parsing + + if isinstance(descriptor_file, (bytes, unicode)): + with open(descriptor_file) as desc_file: + for desc in parse_file(desc_file, descriptor_type, validate, document_handler, **kwargs): + yield desc + + return + + # The tor descriptor specifications do not provide a reliable method for + # identifying a descriptor file's type and version so we need to guess + # based on its filename. Metrics descriptors, however, can be identified + # by an annotation on their first line... + # https://trac.torproject.org/5651 + + initial_position = descriptor_file.tell() + first_line = stem.util.str_tools._to_unicode(descriptor_file.readline().strip()) + metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line) + + if not metrics_header_match: + descriptor_file.seek(initial_position) + + descriptor_path = getattr(descriptor_file, 'name', None) + filename = '' if descriptor_path is None else os.path.basename(descriptor_file.name) + file_parser = None + + if descriptor_type is not None: + descriptor_type_match = re.match("^(\S+) (\d+).(\d+)$", descriptor_type) + + if descriptor_type_match: + desc_type, major_version, minor_version = descriptor_type_match.groups() + file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs) + else: + raise ValueError("The descriptor_type must be of the form ' .'") + elif metrics_header_match: + # Metrics descriptor handling + + desc_type, major_version, minor_version = metrics_header_match.groups() + file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs) + else: + # Cached descriptor handling. These contain multiple descriptors per file. + + if filename == "cached-descriptors": + file_parser = lambda f: stem.descriptor.server_descriptor._parse_file(f, validate = validate, **kwargs) + elif filename == "cached-extrainfo": + file_parser = lambda f: stem.descriptor.extrainfo_descriptor._parse_file(f, validate = validate, **kwargs) + elif filename == "cached-microdescs": + file_parser = lambda f: stem.descriptor.microdescriptor._parse_file(f, validate = validate, **kwargs) + elif filename == "cached-consensus": + file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, validate = validate, document_handler = document_handler, **kwargs) + elif filename == "cached-microdesc-consensus": + file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs) + + if file_parser: + for desc in file_parser(descriptor_file): + if descriptor_path is not None: + desc._set_path(os.path.abspath(descriptor_path)) + + yield desc + + return + + # Not recognized as a descriptor file. + + raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line)) + + +def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs): + # Parses descriptor files from metrics, yielding individual descriptors. This + # throws a TypeError if the descriptor_type or version isn't recognized. + + if descriptor_type == "server-descriptor" and major_version == 1: + for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs): + yield desc + elif descriptor_type == "bridge-server-descriptor" and major_version == 1: + for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs): + yield desc + elif descriptor_type == "extra-info" and major_version == 1: + for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs): + yield desc + elif descriptor_type == "microdescriptor" and major_version == 1: + for desc in stem.descriptor.microdescriptor._parse_file(descriptor_file, validate = validate, **kwargs): + yield desc + elif descriptor_type == "bridge-extra-info" and major_version == 1: + # version 1.1 introduced a 'transport' field... + # https://trac.torproject.org/6257 + + for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs): + yield desc + elif descriptor_type == "network-status-2" and major_version == 1: + document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV2 + + for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs): + yield desc + elif descriptor_type == "dir-key-certificate-3" and major_version == 1: + for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate, **kwargs): + yield desc + elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1: + document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3 + + for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs): + yield desc + elif descriptor_type == "network-status-microdesc-consensus-3" and major_version == 1: + document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3 + + for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs): + yield desc + elif descriptor_type == "bridge-network-status" and major_version == 1: + document_type = stem.descriptor.networkstatus.BridgeNetworkStatusDocument + + for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs): + yield desc + elif descriptor_type == "tordnsel" and major_version == 1: + document_type = stem.descriptor.tordnsel.TorDNSEL + + for desc in stem.descriptor.tordnsel._parse_file(descriptor_file, validate = validate, **kwargs): + yield desc + else: + raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version)) + + +class Descriptor(object): + """ + Common parent for all types of descriptors. + """ + + def __init__(self, contents): + self._path = None + self._archive_path = None + self._raw_contents = contents + + def get_path(self): + """ + Provides the absolute path that we loaded this descriptor from. + + :returns: **str** with the absolute path of the descriptor source + """ + + return self._path + + def get_archive_path(self): + """ + If this descriptor came from an archive then provides its path within the + archive. This is only set if the descriptor came from a + :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this + descriptor didn't come from an archive. + + :returns: **str** with the descriptor's path within the archive + """ + + return self._archive_path + + def get_bytes(self): + """ + Provides the ASCII **bytes** of the descriptor. This only differs from + **str()** if you're running python 3.x, in which case **str()** provides a + **unicode** string. + + :returns: **bytes** for the descriptor's contents + """ + + return self._raw_contents + + def get_unrecognized_lines(self): + """ + Provides a list of lines that were either ignored or had data that we did + not know how to process. This is most common due to new descriptor fields + that this library does not yet know how to process. Patches welcome! + + :returns: **list** of lines of unrecognized content + """ + + raise NotImplementedError + + def _set_path(self, path): + self._path = path + + def _set_archive_path(self, path): + self._archive_path = path + + def __str__(self): + if stem.prereq.is_python_3(): + return stem.util.str_tools._to_unicode(self._raw_contents) + else: + return self._raw_contents + + +def _get_bytes_field(keyword, content): + """ + Provides the value corresponding to the given keyword. This is handy to fetch + values specifically allowed to be arbitrary bytes prior to converting to + unicode. + + :param str keyword: line to look up + :param bytes content: content to look through + + :returns: **bytes** value on the given line, **None** if the line doesn't + exist + + :raises: **ValueError** if the content isn't bytes + """ + + if not isinstance(content, bytes): + raise ValueError("Content must be bytes, got a %s" % type(content)) + + line_match = re.search(stem.util.str_tools._to_bytes("^(opt )?%s(?:[%s]+(.*))?$" % (keyword, WHITESPACE)), content, re.MULTILINE) + + if line_match: + value = line_match.groups()[1] + return b"" if value is None else value + else: + return None + + +def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False): + """ + Reads from the descriptor file until we get to one of the given keywords or reach the + end of the file. + + :param str,list keywords: keyword(s) we want to read until + :param file descriptor_file: file with the descriptor content + :param bool inclusive: includes the line with the keyword if True + :param bool ignore_first: doesn't check if the first line read has one of the + given keywords + :param bool skip: skips buffering content, returning None + :param int end_position: end if we reach this point in the file + :param bool include_ending_keyword: provides the keyword we broke on if **True** + + :returns: **list** with the lines until we find one of the keywords, this is + a two value tuple with the ending keyword if include_ending_keyword is + **True** + """ + + content = None if skip else [] + ending_keyword = None + + if isinstance(keywords, (bytes, unicode)): + keywords = (keywords,) + + if ignore_first: + first_line = descriptor_file.readline() + + if content is not None and first_line is not None: + content.append(first_line) + + while True: + last_position = descriptor_file.tell() + + if end_position and last_position >= end_position: + break + + line = descriptor_file.readline() + + if not line: + break # EOF + + line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line)) + + if not line_match: + # no spaces or tabs in the line + line_keyword = stem.util.str_tools._to_unicode(line.strip()) + else: + line_keyword = line_match.groups()[0] + + if line_keyword in keywords: + ending_keyword = line_keyword + + if not inclusive: + descriptor_file.seek(last_position) + elif content is not None: + content.append(line) + + break + elif content is not None: + content.append(line) + + if include_ending_keyword: + return (content, ending_keyword) + else: + return content + + +def _get_pseudo_pgp_block(remaining_contents): + """ + Checks if given contents begins with a pseudo-Open-PGP-style block and, if + so, pops it off and provides it back to the caller. + + :param list remaining_contents: lines to be checked for a public key block + + :returns: **str** with the armor wrapped contents or None if it doesn't exist + + :raises: **ValueError** if the contents starts with a key block but it's + malformed (for instance, if it lacks an ending line) + """ + + if not remaining_contents: + return None # nothing left + + block_match = PGP_BLOCK_START.match(remaining_contents[0]) + + if block_match: + block_type = block_match.groups()[0] + block_lines = [] + end_line = PGP_BLOCK_END % block_type + + while True: + if not remaining_contents: + raise ValueError("Unterminated pgp style block (looking for '%s'):\n%s" % (end_line, "\n".join(block_lines))) + + line = remaining_contents.pop(0) + block_lines.append(line) + + if line == end_line: + return "\n".join(block_lines) + else: + return None + + +def _get_descriptor_components(raw_contents, validate, extra_keywords = ()): + """ + Initial breakup of the server descriptor contents to make parsing easier. + + A descriptor contains a series of 'keyword lines' which are simply a keyword + followed by an optional value. Lines can also be followed by a signature + block. + + To get a sub-listing with just certain keywords use extra_keywords. This can + be useful if we care about their relative ordering with respect to each + other. For instance, we care about the ordering of 'accept' and 'reject' + entries because this influences the resulting exit policy, but for everything + else in server descriptors the order does not matter. + + :param str raw_contents: descriptor content provided by the relay + :param bool validate: checks the validity of the descriptor's content if + True, skips these checks otherwise + :param list extra_keywords: entity keywords to put into a separate listing + with ordering intact + + :returns: + **collections.OrderedDict** with the 'keyword => (value, pgp key) entries' + mappings. If a extra_keywords was provided then this instead provides a two + value tuple, the second being a list of those entries. + """ + + entries = OrderedDict() + extra_entries = [] # entries with a keyword in extra_keywords + remaining_lines = raw_contents.split("\n") + + while remaining_lines: + line = remaining_lines.pop(0) + + # V2 network status documents explicitly can contain blank lines... + # + # "Implementations MAY insert blank lines for clarity between sections; + # these blank lines are ignored." + # + # ... and server descriptors end with an extra newline. But other documents + # don't say how blank lines should be handled so globally ignoring them. + + if not line: + continue + + # Some lines have an 'opt ' for backward compatibility. They should be + # ignored. This prefix is being removed in... + # https://trac.torproject.org/projects/tor/ticket/5124 + + if line.startswith("opt "): + line = line[4:] + + line_match = KEYWORD_LINE.match(line) + + if not line_match: + if not validate: + continue + + raise ValueError("Line contains invalid characters: %s" % line) + + keyword, value = line_match.groups() + + if value is None: + value = '' + + try: + block_contents = _get_pseudo_pgp_block(remaining_lines) + except ValueError as exc: + if not validate: + continue + + raise exc + + if keyword in extra_keywords: + extra_entries.append("%s %s" % (keyword, value)) + else: + entries.setdefault(keyword, []).append((value, block_contents)) + + if extra_keywords: + return entries, extra_entries + else: + return entries + +# importing at the end to avoid circular dependencies on our Descriptor class + +import stem.descriptor.server_descriptor +import stem.descriptor.extrainfo_descriptor +import stem.descriptor.networkstatus +import stem.descriptor.microdescriptor +import stem.descriptor.tordnsel diff --git a/lib/stem/descriptor/export.py b/lib/stem/descriptor/export.py new file mode 100644 index 00000000..764113ba --- /dev/null +++ b/lib/stem/descriptor/export.py @@ -0,0 +1,106 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Toolkit for exporting descriptors to other formats. + +**Module Overview:** + +:: + + export_csv - Exports descriptors to a CSV + export_csv_file - Writes exported CSV output to a file +""" + +import cStringIO +import csv + +import stem.descriptor +import stem.prereq + + +class _ExportDialect(csv.excel): + lineterminator = '\n' + + +def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True): + """ + Provides a newline separated CSV for one or more descriptors. If simply + provided with descriptors then the CSV contains all of its attributes, + labeled with a header row. Either 'included_fields' or 'excluded_fields' can + be used for more granular control over its attributes and the order. + + :param Descriptor,list descriptors: either a + :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported + :param list included_fields: attributes to include in the csv + :param list excluded_fields: attributes to exclude from the csv + :param bool header: if **True** then the first line will be a comma separated + list of the attribute names (**only supported in python 2.7 and higher**) + + :returns: **str** of the CSV for the descriptors, one per line + :raises: **ValueError** if descriptors contain more than one descriptor type + """ + + output_buffer = cStringIO.StringIO() + export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header) + return output_buffer.getvalue() + + +def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True): + """ + Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is + written directly to a file. + + :param file output_file: file to be written to + :param Descriptor,list descriptors: either a + :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported + :param list included_fields: attributes to include in the csv + :param list excluded_fields: attributes to exclude from the csv + :param bool header: if **True** then the first line will be a comma separated + list of the attribute names (**only supported in python 2.7 and higher**) + + :returns: **str** of the CSV for the descriptors, one per line + :raises: **ValueError** if descriptors contain more than one descriptor type + """ + + if isinstance(descriptors, stem.descriptor.Descriptor): + descriptors = (descriptors,) + + if not descriptors: + return + + descriptor_type = type(descriptors[0]) + descriptor_type_label = descriptor_type.__name__ + included_fields = list(included_fields) + + # If the user didn't specify the fields to include then export everything, + # ordered alphabetically. If they did specify fields then make sure that + # they exist. + + desc_attr = sorted(vars(descriptors[0]).keys()) + + if included_fields: + for field in included_fields: + if not field in desc_attr: + raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr))) + else: + included_fields = [attr for attr in desc_attr if not attr.startswith('_')] + + for field in excluded_fields: + try: + included_fields.remove(field) + except ValueError: + pass + + writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore') + + if header and stem.prereq.is_python_27(): + writer.writeheader() + + for desc in descriptors: + if not isinstance(desc, stem.descriptor.Descriptor): + raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__) + elif descriptor_type != type(desc): + raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc))) + + writer.writerow(vars(desc)) diff --git a/lib/stem/descriptor/extrainfo_descriptor.py b/lib/stem/descriptor/extrainfo_descriptor.py new file mode 100644 index 00000000..b13efff6 --- /dev/null +++ b/lib/stem/descriptor/extrainfo_descriptor.py @@ -0,0 +1,940 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parsing for Tor extra-info descriptors. These are published by relays whenever +their server descriptor is published and have a similar format. However, unlike +server descriptors these don't contain information that Tor clients require to +function and as such aren't fetched by default. + +Defined in section 2.2 of the `dir-spec +`_, +extra-info descriptors contain interesting but non-vital information such as +usage statistics. Tor clients cannot request these documents for bridges. + +Extra-info descriptors are available from a few sources... + +* if you have 'DownloadExtraInfo 1' in your torrc... + + * control port via 'GETINFO extra-info/digest/\*' queries + * the 'cached-extrainfo' file in tor's data directory + +* tor metrics, at https://metrics.torproject.org/data.html +* directory authorities and mirrors via their DirPort + +**Module Overview:** + +:: + + ExtraInfoDescriptor - Tor extra-info descriptor. + | |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay. + | +- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge. + | + |- digest - calculates the upper-case hex digest value for our content + +- get_unrecognized_lines - lines with unrecognized content + +.. data:: DirResponse (enum) + + Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses. + + =================== =========== + DirResponse Description + =================== =========== + **OK** network status requests that were answered + **NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities + **UNAVAILABLE** requested network status was unavailable + **NOT_FOUND** requested network status was not found + **NOT_MODIFIED** network status unmodified since If-Modified-Since time + **BUSY** directory was busy + =================== =========== + +.. data:: DirStat (enum) + + Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and + dir_*_tunneled_dl. + + ===================== =========== + DirStat Description + ===================== =========== + **COMPLETE** requests that completed successfully + **TIMEOUT** requests that didn't complete within a ten minute timeout + **RUNNING** requests still in process when measurement's taken + **MIN** smallest rate at which a descriptor was downloaded in B/s + **MAX** largest rate at which a descriptor was downloaded in B/s + **D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s + **Q1** and **Q3** rate of the slowest and fastest quarter download rates in B/s + **MD** median download rate in B/s + ===================== =========== +""" + +import datetime +import hashlib +import re + +import stem.util.connection +import stem.util.enum +import stem.util.str_tools + +from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + _read_until_keywords, + _get_descriptor_components, +) + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +# known statuses for dirreq-v2-resp and dirreq-v3-resp... +DirResponse = stem.util.enum.Enum( + ("OK", "ok"), + ("NOT_ENOUGH_SIGS", "not-enough-sigs"), + ("UNAVAILABLE", "unavailable"), + ("NOT_FOUND", "not-found"), + ("NOT_MODIFIED", "not-modified"), + ("BUSY", "busy"), +) + +# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl... +dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md'] +dir_stats += ['d%i' % i for i in range(1, 5)] +dir_stats += ['d%i' % i for i in range(6, 10)] +DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats]) + +# relay descriptors must have exactly one of the following +REQUIRED_FIELDS = ( + "extra-info", + "published", + "router-signature", +) + +# optional entries that can appear at most once +SINGLE_FIELDS = ( + "read-history", + "write-history", + "geoip-db-digest", + "geoip6-db-digest", + "bridge-stats-end", + "bridge-ips", + "dirreq-stats-end", + "dirreq-v2-ips", + "dirreq-v3-ips", + "dirreq-v2-reqs", + "dirreq-v3-reqs", + "dirreq-v2-share", + "dirreq-v3-share", + "dirreq-v2-resp", + "dirreq-v3-resp", + "dirreq-v2-direct-dl", + "dirreq-v3-direct-dl", + "dirreq-v2-tunneled-dl", + "dirreq-v3-tunneled-dl", + "dirreq-read-history", + "dirreq-write-history", + "entry-stats-end", + "entry-ips", + "cell-stats-end", + "cell-processed-cells", + "cell-queued-cells", + "cell-time-in-queue", + "cell-circuits-per-decile", + "conn-bi-direct", + "exit-stats-end", + "exit-kibibytes-written", + "exit-kibibytes-read", + "exit-streams-opened", +) + + +def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs): + """ + Iterates over the extra-info descriptors in a file. + + :param file descriptor_file: file with descriptor content + :param bool is_bridge: parses the file as being a bridge descriptor + :param bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :param dict kwargs: additional arguments for the descriptor constructor + + :returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor` + instances in the file + + :raises: + * **ValueError** if the contents is malformed and validate is **True** + * **IOError** if the file can't be read + """ + + while True: + extrainfo_content = _read_until_keywords("router-signature", descriptor_file) + + # we've reached the 'router-signature', now include the pgp style block + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True) + + if extrainfo_content: + if is_bridge: + yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs) + else: + yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate, **kwargs) + else: + break # done parsing file + + +def _parse_timestamp_and_interval(keyword, content): + """ + Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry. + + :param str keyword: line's keyword + :param str content: line content to be parsed + + :returns: **tuple** of the form (timestamp (**datetime**), interval + (**int**), remaining content (**str**)) + + :raises: **ValueError** if the content is malformed + """ + + line = "%s %s" % (keyword, content) + content_match = re.match("^(.*) \(([0-9]+) s\)( .*)?$", content) + + if not content_match: + raise ValueError("Malformed %s line: %s" % (keyword, line)) + + timestamp_str, interval, remainder = content_match.groups() + + if remainder: + remainder = remainder[1:] # remove leading space + + if not interval.isdigit(): + raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line)) + + try: + timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S") + return timestamp, int(interval), remainder + except ValueError: + raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line)) + + +class ExtraInfoDescriptor(Descriptor): + """ + Extra-info descriptor document. + + :var str nickname: **\*** relay's nickname + :var str fingerprint: **\*** identity key fingerprint + :var datetime published: **\*** time in UTC when this descriptor was made + :var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses + :var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses + :var dict transport: **\*** mapping of transport methods to their (address, + port, args) tuple, these usually appear on bridges in which case all of + those are **None** + + **Bi-directional connection usage:** + + :var datetime conn_bi_direct_end: end of the sampling interval + :var int conn_bi_direct_interval: seconds per interval + :var int conn_bi_direct_below: connections that read/wrote less than 20 KiB + :var int conn_bi_direct_read: connections that read at least 10x more than wrote + :var int conn_bi_direct_write: connections that wrote at least 10x more than read + :var int conn_bi_direct_both: remaining connections + + **Bytes read/written for relayed traffic:** + + :var datetime read_history_end: end of the sampling interval + :var int read_history_interval: seconds per interval + :var list read_history_values: bytes read during each interval + + :var datetime write_history_end: end of the sampling interval + :var int write_history_interval: seconds per interval + :var list write_history_values: bytes written during each interval + + **Cell relaying statistics:** + + :var datetime cell_stats_end: end of the period when stats were gathered + :var int cell_stats_interval: length in seconds of the interval + :var list cell_processed_cells: measurement of processed cells per circuit + :var list cell_queued_cells: measurement of queued cells per circuit + :var list cell_time_in_queue: mean enqueued time in milliseconds for cells + :var int cell_circuits_per_decile: mean number of circuits in a decile + + **Directory Mirror Attributes:** + + :var datetime dir_stats_end: end of the period when stats were gathered + :var int dir_stats_interval: length in seconds of the interval + :var dict dir_v2_ips: mapping of locales to rounded count of requester ips + :var dict dir_v3_ips: mapping of locales to rounded count of requester ips + :var float dir_v2_share: percent of total directory traffic it expects to serve + :var float dir_v3_share: percent of total directory traffic it expects to serve + :var dict dir_v2_requests: mapping of locales to rounded count of requests + :var dict dir_v3_requests: mapping of locales to rounded count of requests + + :var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count + :var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count + :var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count + :var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count + + :var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort + :var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort + :var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement + :var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement + + :var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort + :var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort + :var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement + :var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement + + **Bytes read/written for directory mirroring:** + + :var datetime dir_read_history_end: end of the sampling interval + :var int dir_read_history_interval: seconds per interval + :var list dir_read_history_values: bytes read during each interval + + :var datetime dir_write_history_end: end of the sampling interval + :var int dir_write_history_interval: seconds per interval + :var list dir_write_history_values: bytes read during each interval + + **Guard Attributes:** + + :var datetime entry_stats_end: end of the period when stats were gathered + :var int entry_stats_interval: length in seconds of the interval + :var dict entry_ips: mapping of locales to rounded count of unique user ips + + **Exit Attributes:** + + :var datetime exit_stats_end: end of the period when stats were gathered + :var int exit_stats_interval: length in seconds of the interval + :var dict exit_kibibytes_written: traffic per port (keys are ints or 'other') + :var dict exit_kibibytes_read: traffic per port (keys are ints or 'other') + :var dict exit_streams_opened: streams per port (keys are ints or 'other') + + **Bridge Attributes:** + + :var datetime bridge_stats_end: end of the period when stats were gathered + :var int bridge_stats_interval: length in seconds of the interval + :var dict bridge_ips: mapping of locales to rounded count of unique user ips + :var datetime geoip_start_time: replaced by bridge_stats_end (deprecated) + :var dict geoip_client_origins: replaced by bridge_ips (deprecated) + :var dict ip_versions: mapping of ip protocols to a rounded count for the number of users + :var dict ip_versions: mapping of ip transports to a count for the number of users + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, raw_contents, validate = True): + """ + Extra-info descriptor constructor. By default this validates the + descriptor's content as it's parsed. This validation can be disabled to + either improve performance or be accepting of malformed data. + + :param str raw_contents: extra-info content provided by the relay + :param bool validate: checks the validity of the extra-info descriptor if + **True**, skips these checks otherwise + + :raises: **ValueError** if the contents is malformed and validate is True + """ + + super(ExtraInfoDescriptor, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + + self.nickname = None + self.fingerprint = None + self.published = None + self.geoip_db_digest = None + self.geoip6_db_digest = None + self.transport = {} + + self.conn_bi_direct_end = None + self.conn_bi_direct_interval = None + self.conn_bi_direct_below = None + self.conn_bi_direct_read = None + self.conn_bi_direct_write = None + self.conn_bi_direct_both = None + + self.read_history_end = None + self.read_history_interval = None + self.read_history_values = None + + self.write_history_end = None + self.write_history_interval = None + self.write_history_values = None + + self.cell_stats_end = None + self.cell_stats_interval = None + self.cell_processed_cells = None + self.cell_queued_cells = None + self.cell_time_in_queue = None + self.cell_circuits_per_decile = None + + self.dir_stats_end = None + self.dir_stats_interval = None + self.dir_v2_ips = None + self.dir_v3_ips = None + self.dir_v2_share = None + self.dir_v3_share = None + self.dir_v2_requests = None + self.dir_v3_requests = None + self.dir_v2_responses = None + self.dir_v3_responses = None + self.dir_v2_responses_unknown = None + self.dir_v3_responses_unknown = None + self.dir_v2_direct_dl = None + self.dir_v3_direct_dl = None + self.dir_v2_direct_dl_unknown = None + self.dir_v3_direct_dl_unknown = None + self.dir_v2_tunneled_dl = None + self.dir_v3_tunneled_dl = None + self.dir_v2_tunneled_dl_unknown = None + self.dir_v3_tunneled_dl_unknown = None + + self.dir_read_history_end = None + self.dir_read_history_interval = None + self.dir_read_history_values = None + + self.dir_write_history_end = None + self.dir_write_history_interval = None + self.dir_write_history_values = None + + self.entry_stats_end = None + self.entry_stats_interval = None + self.entry_ips = None + + self.exit_stats_end = None + self.exit_stats_interval = None + self.exit_kibibytes_written = None + self.exit_kibibytes_read = None + self.exit_streams_opened = None + + self.bridge_stats_end = None + self.bridge_stats_interval = None + self.bridge_ips = None + self.geoip_start_time = None + self.geoip_client_origins = None + + self.ip_versions = None + self.ip_transports = None + + self._unrecognized_lines = [] + + entries = _get_descriptor_components(raw_contents, validate) + + if validate: + for keyword in self._required_fields(): + if not keyword in entries: + raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword) + + for keyword in self._required_fields() + SINGLE_FIELDS: + if keyword in entries and len(entries[keyword]) > 1: + raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword) + + expected_first_keyword = self._first_keyword() + if expected_first_keyword and expected_first_keyword != entries.keys()[0]: + raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword) + + expected_last_keyword = self._last_keyword() + if expected_last_keyword and expected_last_keyword != entries.keys()[-1]: + raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword) + + self._parse(entries, validate) + + def get_unrecognized_lines(self): + return list(self._unrecognized_lines) + + def _parse(self, entries, validate): + """ + Parses a series of 'keyword => (value, pgp block)' mappings and applies + them as attributes. + + :param dict entries: descriptor contents to be applied + :param bool validate: checks the validity of descriptor content if True + + :raises: **ValueError** if an error occurs in validation + """ + + for keyword, values in entries.items(): + # most just work with the first (and only) value + value, _ = values[0] + line = "%s %s" % (keyword, value) # original line + + if keyword == "extra-info": + # "extra-info" Nickname Fingerprint + extra_info_comp = value.split() + + if len(extra_info_comp) < 2: + if not validate: + continue + + raise ValueError("Extra-info line must have two values: %s" % line) + + if validate: + if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]): + raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0]) + elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]): + raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % extra_info_comp[1]) + + self.nickname = extra_info_comp[0] + self.fingerprint = extra_info_comp[1] + elif keyword == "geoip-db-digest": + # "geoip-db-digest" Digest + + if validate and not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line) + + self.geoip_db_digest = value + elif keyword == "geoip6-db-digest": + # "geoip6-db-digest" Digest + + if validate and not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError("Geoip v6 digest line had an invalid sha1 digest: %s" % line) + + self.geoip6_db_digest = value + elif keyword == "transport": + # "transport" transportname address:port [arglist] + # Everything after the transportname is scrubbed in published bridge + # descriptors, so we'll never see it in practice. + # + # These entries really only make sense for bridges, but have been seen + # on non-bridges in the wild when the relay operator configured it this + # way. + + for transport_value, _ in values: + name, address, port, args = None, None, None, None + + if not ' ' in transport_value: + # scrubbed + name = transport_value + else: + # not scrubbed + value_comp = transport_value.split() + + if len(value_comp) < 1: + raise ValueError("Transport line is missing its transport name: %s" % line) + else: + name = value_comp[0] + + if len(value_comp) < 2: + raise ValueError("Transport line is missing its address:port value: %s" % line) + elif not ":" in value_comp[1]: + raise ValueError("Transport line's address:port entry is missing a colon: %s" % line) + else: + address, port_str = value_comp[1].split(':', 1) + + if not stem.util.connection.is_valid_ipv4_address(address) or \ + stem.util.connection.is_valid_ipv6_address(address): + raise ValueError("Transport line has a malformed address: %s" % line) + elif not stem.util.connection.is_valid_port(port_str): + raise ValueError("Transport line has a malformed port: %s" % line) + + port = int(port_str) + + if len(value_comp) >= 3: + args = value_comp[2:] + else: + args = [] + + self.transport[name] = (address, port, args) + elif keyword == "cell-circuits-per-decile": + # "cell-circuits-per-decile" num + + if not value.isdigit(): + if validate: + raise ValueError("Non-numeric cell-circuits-per-decile value: %s" % line) + else: + continue + + stat = int(value) + + if validate and stat < 0: + raise ValueError("Negative cell-circuits-per-decile value: %s" % line) + + self.cell_circuits_per_decile = stat + elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"): + recognized_counts = {} + unrecognized_counts = {} + + is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp") + key_set = DirResponse if is_response_stats else DirStat + + key_type = "STATUS" if is_response_stats else "STAT" + error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line) + + if value: + for entry in value.split(","): + if not "=" in entry: + if validate: + raise ValueError(error_msg) + else: + continue + + status, count = entry.split("=", 1) + + if count.isdigit(): + if status in key_set: + recognized_counts[status] = int(count) + else: + unrecognized_counts[status] = int(count) + elif validate: + raise ValueError(error_msg) + + if keyword == "dirreq-v2-resp": + self.dir_v2_responses = recognized_counts + self.dir_v2_responses_unknown = unrecognized_counts + elif keyword == "dirreq-v3-resp": + self.dir_v3_responses = recognized_counts + self.dir_v3_responses_unknown = unrecognized_counts + elif keyword == "dirreq-v2-direct-dl": + self.dir_v2_direct_dl = recognized_counts + self.dir_v2_direct_dl_unknown = unrecognized_counts + elif keyword == "dirreq-v3-direct-dl": + self.dir_v3_direct_dl = recognized_counts + self.dir_v3_direct_dl_unknown = unrecognized_counts + elif keyword == "dirreq-v2-tunneled-dl": + self.dir_v2_tunneled_dl = recognized_counts + self.dir_v2_tunneled_dl_unknown = unrecognized_counts + elif keyword == "dirreq-v3-tunneled-dl": + self.dir_v3_tunneled_dl = recognized_counts + self.dir_v3_tunneled_dl_unknown = unrecognized_counts + elif keyword in ("dirreq-v2-share", "dirreq-v3-share"): + # "" num% + + try: + if not value.endswith("%"): + raise ValueError() + + percentage = float(value[:-1]) / 100 + + # Bug lets these be above 100%, however they're soon going away... + # https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html + + if validate and percentage < 0: + raise ValueError("Negative percentage value: %s" % line) + + if keyword == "dirreq-v2-share": + self.dir_v2_share = percentage + elif keyword == "dirreq-v3-share": + self.dir_v3_share = percentage + except ValueError as exc: + if validate: + raise ValueError("Value can't be parsed as a percentage: %s" % line) + elif keyword in ("cell-processed-cells", "cell-queued-cells", "cell-time-in-queue"): + # "" num,...,num + + entries = [] + + if value: + for entry in value.split(","): + try: + # Values should be positive but as discussed in ticket #5849 + # there was a bug around this. It was fixed in tor 0.2.2.1. + + entries.append(float(entry)) + except ValueError: + if validate: + raise ValueError("Non-numeric entry in %s listing: %s" % (keyword, line)) + + if keyword == "cell-processed-cells": + self.cell_processed_cells = entries + elif keyword == "cell-queued-cells": + self.cell_queued_cells = entries + elif keyword == "cell-time-in-queue": + self.cell_time_in_queue = entries + elif keyword in ("published", "geoip-start-time"): + # "" YYYY-MM-DD HH:MM:SS + + try: + timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + + if keyword == "published": + self.published = timestamp + elif keyword == "geoip-start-time": + self.geoip_start_time = timestamp + except ValueError: + if validate: + raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line)) + elif keyword in ("cell-stats-end", "entry-stats-end", "exit-stats-end", "bridge-stats-end", "dirreq-stats-end"): + # "" YYYY-MM-DD HH:MM:SS (NSEC s) + + try: + timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value) + + if keyword == "cell-stats-end": + self.cell_stats_end = timestamp + self.cell_stats_interval = interval + elif keyword == "entry-stats-end": + self.entry_stats_end = timestamp + self.entry_stats_interval = interval + elif keyword == "exit-stats-end": + self.exit_stats_end = timestamp + self.exit_stats_interval = interval + elif keyword == "bridge-stats-end": + self.bridge_stats_end = timestamp + self.bridge_stats_interval = interval + elif keyword == "dirreq-stats-end": + self.dir_stats_end = timestamp + self.dir_stats_interval = interval + except ValueError as exc: + if validate: + raise exc + elif keyword == "conn-bi-direct": + # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH + + try: + timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) + stats = remainder.split(",") + + if len(stats) != 4 or not \ + (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()): + raise ValueError("conn-bi-direct line should end with four numeric values: %s" % line) + + self.conn_bi_direct_end = timestamp + self.conn_bi_direct_interval = interval + self.conn_bi_direct_below = int(stats[0]) + self.conn_bi_direct_read = int(stats[1]) + self.conn_bi_direct_write = int(stats[2]) + self.conn_bi_direct_both = int(stats[3]) + except ValueError as exc: + if validate: + raise exc + elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"): + # "" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... + try: + timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) + history_values = [] + + if remainder: + try: + history_values = [int(entry) for entry in remainder.split(",")] + except ValueError: + raise ValueError("%s line has non-numeric values: %s" % (keyword, line)) + + if keyword == "read-history": + self.read_history_end = timestamp + self.read_history_interval = interval + self.read_history_values = history_values + elif keyword == "write-history": + self.write_history_end = timestamp + self.write_history_interval = interval + self.write_history_values = history_values + elif keyword == "dirreq-read-history": + self.dir_read_history_end = timestamp + self.dir_read_history_interval = interval + self.dir_read_history_values = history_values + elif keyword == "dirreq-write-history": + self.dir_write_history_end = timestamp + self.dir_write_history_interval = interval + self.dir_write_history_values = history_values + except ValueError as exc: + if validate: + raise exc + elif keyword in ("exit-kibibytes-written", "exit-kibibytes-read", "exit-streams-opened"): + # "" port=N,port=N,... + + port_mappings = {} + error_msg = "Entries in %s line should only be PORT=N entries: %s" % (keyword, line) + + if value: + for entry in value.split(","): + if not "=" in entry: + if validate: + raise ValueError(error_msg) + else: + continue + + port, stat = entry.split("=", 1) + + if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit(): + if port != 'other': + port = int(port) + port_mappings[port] = int(stat) + elif validate: + raise ValueError(error_msg) + + if keyword == "exit-kibibytes-written": + self.exit_kibibytes_written = port_mappings + elif keyword == "exit-kibibytes-read": + self.exit_kibibytes_read = port_mappings + elif keyword == "exit-streams-opened": + self.exit_streams_opened = port_mappings + elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "entry-ips", "bridge-ips"): + # "" CC=N,CC=N,... + # + # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric + # locale codes for some special values, for instance... + # A1,"Anonymous Proxy" + # A2,"Satellite Provider" + # ??,"Unknown" + + locale_usage = {} + error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line) + + if value: + for entry in value.split(","): + if not "=" in entry: + if validate: + raise ValueError(error_msg) + else: + continue + + locale, count = entry.split("=", 1) + + if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit(): + locale_usage[locale] = int(count) + elif validate: + raise ValueError(error_msg) + + if keyword == "dirreq-v2-ips": + self.dir_v2_ips = locale_usage + elif keyword == "dirreq-v3-ips": + self.dir_v3_ips = locale_usage + elif keyword == "dirreq-v2-reqs": + self.dir_v2_requests = locale_usage + elif keyword == "dirreq-v3-reqs": + self.dir_v3_requests = locale_usage + elif keyword == "geoip-client-origins": + self.geoip_client_origins = locale_usage + elif keyword == "entry-ips": + self.entry_ips = locale_usage + elif keyword == "bridge-ips": + self.bridge_ips = locale_usage + elif keyword == "bridge-ip-versions": + self.ip_versions = {} + + if value: + for entry in value.split(','): + if not '=' in entry: + raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '=' mappings: %s" % line) + + protocol, count = entry.split('=', 1) + + if not count.isdigit(): + raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, line)) + + self.ip_versions[protocol] = int(count) + elif keyword == "bridge-ip-transports": + self.ip_transports = {} + + if value: + for entry in value.split(','): + if not '=' in entry: + raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '=' mappings: %s" % line) + + protocol, count = entry.split('=', 1) + + if not count.isdigit(): + raise stem.ProtocolError("Transport count was non-numeric (%s): %s" % (count, line)) + + self.ip_transports[protocol] = int(count) + else: + self._unrecognized_lines.append(line) + + def digest(self): + """ + Provides the upper-case hex encoded sha1 of our content. This value is part + of the server descriptor entry for this relay. + + :returns: **str** with the upper-case hex digest value for this server + descriptor + """ + + raise NotImplementedError("Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass") + + def _required_fields(self): + return REQUIRED_FIELDS + + def _first_keyword(self): + return "extra-info" + + def _last_keyword(self): + return "router-signature" + + +class RelayExtraInfoDescriptor(ExtraInfoDescriptor): + """ + Relay extra-info descriptor, constructed from data such as that provided by + "GETINFO extra-info/digest/\*", cached descriptors, and metrics + (`specification `_). + + :var str signature: **\*** signature for this extrainfo descriptor + + **\*** attribute is required when we're parsed with validation + """ + + def __init__(self, raw_contents, validate = True): + self.signature = None + + super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate) + + @lru_cache() + def digest(self): + # our digest is calculated from everything except our signature + raw_content, ending = str(self), "\nrouter-signature\n" + raw_content = raw_content[:raw_content.find(ending) + len(ending)] + return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper() + + def _parse(self, entries, validate): + entries = dict(entries) # shallow copy since we're destructive + + # handles fields only in server descriptors + for keyword, values in entries.items(): + value, block_contents = values[0] + + line = "%s %s" % (keyword, value) # original line + + if block_contents: + line += "\n%s" % block_contents + + if keyword == "router-signature": + if validate and not block_contents: + raise ValueError("Router signature line must be followed by a signature block: %s" % line) + + self.signature = block_contents + del entries["router-signature"] + + ExtraInfoDescriptor._parse(self, entries, validate) + + +class BridgeExtraInfoDescriptor(ExtraInfoDescriptor): + """ + Bridge extra-info descriptor (`bridge descriptor specification + `_) + """ + + def __init__(self, raw_contents, validate = True): + self._digest = None + + super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate) + + def digest(self): + return self._digest + + def _parse(self, entries, validate): + entries = dict(entries) # shallow copy since we're destructive + + # handles fields only in server descriptors + for keyword, values in entries.items(): + value, _ = values[0] + line = "%s %s" % (keyword, value) # original line + + if keyword == "router-digest": + if validate and not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError("Router digest line had an invalid sha1 digest: %s" % line) + + self._digest = value + del entries["router-digest"] + + ExtraInfoDescriptor._parse(self, entries, validate) + + def _required_fields(self): + excluded_fields = [ + "router-signature", + ] + + included_fields = [ + "router-digest", + ] + + return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields]) + + def _last_keyword(self): + return None diff --git a/lib/stem/descriptor/microdescriptor.py b/lib/stem/descriptor/microdescriptor.py new file mode 100644 index 00000000..61dce836 --- /dev/null +++ b/lib/stem/descriptor/microdescriptor.py @@ -0,0 +1,309 @@ +# Copyright 2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parsing for Tor microdescriptors, which contain a distilled version of a +relay's server descriptor. As of Tor version 0.2.3.3-alpha Tor no longer +downloads server descriptors by default, opting for microdescriptors instead. + +Unlike most descriptor documents these aren't available on the metrics site +(since they don't contain any information that the server descriptors don't). + +The limited information in microdescriptors make them rather clunky to use +compared with server descriptors. For instance microdescriptors lack the +relay's fingerprint, making it difficut to use them to look up the relay's +other descriptors. + +To do so you need to match the microdescriptor's digest against its +corresponding router status entry. For added fun as of this writing the +controller doesn't even surface those router status entries +(:trac:`7953`). + +For instance, here's an example that prints the nickname and fignerprints of +the exit relays. + +:: + + import os + + from stem.control import Controller + from stem.descriptor import parse_file + + with Controller.from_port(port = 9051) as controller: + controller.authenticate() + + exit_digests = set() + data_dir = controller.get_conf("DataDirectory") + + for desc in controller.get_microdescriptors(): + if desc.exit_policy.is_exiting_allowed(): + exit_digests.add(desc.digest) + + print "Exit Relays:" + + for desc in parse_file(os.path.join(data_dir, 'cached-microdesc-consensus')): + if desc.digest in exit_digests: + print " %s (%s)" % (desc.nickname, desc.fingerprint) + +Doing the same is trivial with server descriptors... + +:: + + from stem.descriptor import parse_file + + print "Exit Relays:" + + for desc in parse_file("/home/atagar/.tor/cached-descriptors"): + if desc.exit_policy.is_exiting_allowed(): + print " %s (%s)" % (desc.nickname, desc.fingerprint) + +**Module Overview:** + +:: + + Microdescriptor - Tor microdescriptor. +""" + +import hashlib + +import stem.descriptor.router_status_entry +import stem.exit_policy + +from stem.descriptor import ( + Descriptor, + _get_descriptor_components, + _read_until_keywords, +) + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +REQUIRED_FIELDS = ( + "onion-key", +) + +SINGLE_FIELDS = ( + "onion-key", + "ntor-onion-key", + "family", + "p", + "p6", +) + + +def _parse_file(descriptor_file, validate = True, **kwargs): + """ + Iterates over the microdescriptors in a file. + + :param file descriptor_file: file with descriptor content + :param bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :param dict kwargs: additional arguments for the descriptor constructor + + :returns: iterator for Microdescriptor instances in the file + + :raises: + * **ValueError** if the contents is malformed and validate is True + * **IOError** if the file can't be read + """ + + while True: + annotations = _read_until_keywords("onion-key", descriptor_file) + + # read until we reach an annotation or onion-key line + descriptor_lines = [] + + # read the onion-key line, done if we're at the end of the document + + onion_key_line = descriptor_file.readline() + + if onion_key_line: + descriptor_lines.append(onion_key_line) + else: + break + + while True: + last_position = descriptor_file.tell() + line = descriptor_file.readline() + + if not line: + break # EOF + elif line.startswith(b"@") or line.startswith(b"onion-key"): + descriptor_file.seek(last_position) + break + else: + descriptor_lines.append(line) + + if descriptor_lines: + # strip newlines from annotations + annotations = map(bytes.strip, annotations) + + descriptor_text = bytes.join(b"", descriptor_lines) + + yield Microdescriptor(descriptor_text, validate, annotations, **kwargs) + else: + break # done parsing descriptors + + +class Microdescriptor(Descriptor): + """ + Microdescriptor (`descriptor specification + `_) + + :var str digest: **\*** hex digest for this microdescriptor, this can be used + to match against the corresponding digest attribute of a + :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3` + :var str onion_key: **\*** key used to encrypt EXTEND cells + :var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol + :var list or_addresses: **\*** alternative for our address/or_port attributes, each + entry is a tuple of the form (address (**str**), port (**int**), is_ipv6 + (**bool**)) + :var list family: **\*** nicknames or fingerprints of declared family + :var stem.exit_policy.MicroExitPolicy exit_policy: **\*** relay's exit policy + :var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6 + + **\*** attribute is required when we're parsed with validation + """ + + def __init__(self, raw_contents, validate = True, annotations = None): + super(Microdescriptor, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + + self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper() + + self.onion_key = None + self.ntor_onion_key = None + self.or_addresses = [] + self.family = [] + self.exit_policy = stem.exit_policy.MicroExitPolicy("reject 1-65535") + self.exit_policy_v6 = None + + self._unrecognized_lines = [] + + self._annotation_lines = annotations if annotations else [] + + entries = _get_descriptor_components(raw_contents, validate) + self._parse(entries, validate) + + if validate: + self._check_constraints(entries) + + def get_unrecognized_lines(self): + return list(self._unrecognized_lines) + + @lru_cache() + def get_annotations(self): + """ + Provides content that appeared prior to the descriptor. If this comes from + the cached-microdescs then this commonly contains content like... + + :: + + @last-listed 2013-02-24 00:18:30 + + :returns: **dict** with the key/value pairs in our annotations + """ + + annotation_dict = {} + + for line in self._annotation_lines: + if b" " in line: + key, value = line.split(b" ", 1) + annotation_dict[key] = value + else: + annotation_dict[line] = None + + return annotation_dict + + def get_annotation_lines(self): + """ + Provides the lines of content that appeared prior to the descriptor. This + is the same as the + :func:`~stem.descriptor.microdescriptor.Microdescriptor.get_annotations` + results, but with the unparsed lines and ordering retained. + + :returns: **list** with the lines of annotation that came before this descriptor + """ + + return self._annotation_lines + + def _parse(self, entries, validate): + """ + Parses a series of 'keyword => (value, pgp block)' mappings and applies + them as attributes. + + :param dict entries: descriptor contents to be applied + :param bool validate: checks the validity of descriptor content if **True** + + :raises: **ValueError** if an error occurs in validation + """ + + for keyword, values in entries.items(): + # most just work with the first (and only) value + value, block_contents = values[0] + + line = "%s %s" % (keyword, value) # original line + + if block_contents: + line += "\n%s" % block_contents + + if keyword == "onion-key": + if validate and not block_contents: + raise ValueError("Onion key line must be followed by a public key: %s" % line) + + self.onion_key = block_contents + elif keyword == "ntor-onion-key": + self.ntor_onion_key = value + elif keyword == "a": + for entry, _ in values: + stem.descriptor.router_status_entry._parse_a_line(self, entry, validate) + elif keyword == "family": + self.family = value.split(" ") + elif keyword == "p": + stem.descriptor.router_status_entry._parse_p_line(self, value, validate) + elif keyword == "p6": + self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value) + else: + self._unrecognized_lines.append(line) + + def _check_constraints(self, entries): + """ + Does a basic check that the entries conform to this descriptor type's + constraints. + + :param dict entries: keyword => (value, pgp key) entries + + :raises: **ValueError** if an issue arises in validation + """ + + for keyword in REQUIRED_FIELDS: + if not keyword in entries: + raise ValueError("Microdescriptor must have a '%s' entry" % keyword) + + for keyword in SINGLE_FIELDS: + if keyword in entries and len(entries[keyword]) > 1: + raise ValueError("The '%s' entry can only appear once in a microdescriptor" % keyword) + + if "onion-key" != entries.keys()[0]: + raise ValueError("Microdescriptor must start with a 'onion-key' entry") + + def _compare(self, other, method): + if not isinstance(other, Microdescriptor): + return False + + return method(str(self).strip(), str(other).strip()) + + def __hash__(self): + return hash(str(self).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) diff --git a/lib/stem/descriptor/networkstatus.py b/lib/stem/descriptor/networkstatus.py new file mode 100644 index 00000000..b5eb5b75 --- /dev/null +++ b/lib/stem/descriptor/networkstatus.py @@ -0,0 +1,1475 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parsing for Tor network status documents. This supports both the v2 and v3 +dir-spec. Documents can be obtained from a few sources... + +* the 'cached-consensus' file in tor's data directory +* tor metrics, at https://metrics.torproject.org/data.html +* directory authorities and mirrors via their DirPort + +... and contain the following sections... + +* document header +* list of :class:`stem.descriptor.networkstatus.DirectoryAuthority` +* list of :class:`stem.descriptor.router_status_entry.RouterStatusEntry` +* document footer + +Of these, the router status entry section can be quite large (on the order of +hundreds of kilobytes). As such we provide a couple of methods for reading +network status documents through :func:`~stem.descriptor.__init__.parse_file`. +For more information see :func:`~stem.descriptor.__init__.DocumentHandler`... + +:: + + from stem.descriptor import parse_file, DocumentHandler + + with open('.tor/cached-consensus', 'rb') as consensus_file: + # Processes the routers as we read them in. The routers refer to a document + # with an unset 'routers' attribute. + + for router in parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler = DocumentHandler.ENTRIES): + print router.nickname + +**Module Overview:** + +:: + + NetworkStatusDocument - Network status document + |- NetworkStatusDocumentV2 - Version 2 network status document + |- NetworkStatusDocumentV3 - Version 3 network status document + +- BridgeNetworkStatusDocument - Version 3 network status document for bridges + + KeyCertificate - Certificate used to authenticate an authority + DocumentSignature - Signature of a document by a directory authority + DirectoryAuthority - Directory authority as defined in a v3 network status document +""" + +import datetime +import io + +import stem.descriptor.router_status_entry +import stem.util.str_tools +import stem.util.tor_tools +import stem.version + +from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + DocumentHandler, + _get_descriptor_components, + _read_until_keywords, +) + +# Version 2 network status document fields, tuples of the form... +# (keyword, is_mandatory) + +NETWORK_STATUS_V2_FIELDS = ( + ("network-status-version", True), + ("dir-source", True), + ("fingerprint", True), + ("contact", True), + ("dir-signing-key", True), + ("client-versions", False), + ("server-versions", False), + ("published", True), + ("dir-options", False), + ("directory-signature", True), +) + +# Network status document are either a 'vote' or 'consensus', with different +# mandatory fields for each. Both though require that their fields appear in a +# specific order. This is an ordered listing of the following... +# +# (field, in_votes, in_consensus, is_mandatory) + +HEADER_STATUS_DOCUMENT_FIELDS = ( + ("network-status-version", True, True, True), + ("vote-status", True, True, True), + ("consensus-methods", True, False, False), + ("consensus-method", False, True, False), + ("published", True, False, True), + ("valid-after", True, True, True), + ("fresh-until", True, True, True), + ("valid-until", True, True, True), + ("voting-delay", True, True, True), + ("client-versions", True, True, False), + ("server-versions", True, True, False), + ("known-flags", True, True, True), + ("flag-thresholds", True, False, False), + ("params", True, True, False), +) + +FOOTER_STATUS_DOCUMENT_FIELDS = ( + ("directory-footer", True, True, False), + ("bandwidth-weights", False, True, False), + ("directory-signature", True, True, True), +) + +HEADER_FIELDS = [attr[0] for attr in HEADER_STATUS_DOCUMENT_FIELDS] +FOOTER_FIELDS = [attr[0] for attr in FOOTER_STATUS_DOCUMENT_FIELDS] + +AUTH_START = "dir-source" +ROUTERS_START = "r" +FOOTER_START = "directory-footer" +V2_FOOTER_START = "directory-signature" + +DEFAULT_PARAMS = { + "bwweightscale": 10000, + "cbtdisabled": 0, + "cbtnummodes": 3, + "cbtrecentcount": 20, + "cbtmaxtimeouts": 18, + "cbtmincircs": 100, + "cbtquantile": 80, + "cbtclosequantile": 95, + "cbttestfreq": 60, + "cbtmintimeout": 2000, + "cbtinitialtimeout": 60000, + "Support022HiddenServices": 1, +} + +# KeyCertificate fields, tuple is of the form... +# (keyword, is_mandatory) + +KEY_CERTIFICATE_PARAMS = ( + ('dir-key-certificate-version', True), + ('dir-address', False), + ('fingerprint', True), + ('dir-identity-key', True), + ('dir-key-published', True), + ('dir-key-expires', True), + ('dir-signing-key', True), + ('dir-key-crosscert', False), + ('dir-key-certification', True), +) + + +def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES, **kwargs): + """ + Parses a network status and iterates over the RouterStatusEntry in it. The + document that these instances reference have an empty 'routers' attribute to + allow for limited memory usage. + + :param file document_file: file with network status document content + :param class document_type: NetworkStatusDocument subclass + :param bool validate: checks the validity of the document's contents if + **True**, skips these checks otherwise + :param bool is_microdescriptor: **True** if this is for a microdescriptor + consensus, **False** otherwise + :param stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + :param dict kwargs: additional arguments for the descriptor constructor + + :returns: :class:`stem.descriptor.networkstatus.NetworkStatusDocument` object + + :raises: + * **ValueError** if the document_version is unrecognized or the contents is + malformed and validate is **True** + * **IOError** if the file can't be read + """ + + # we can't properly default this since NetworkStatusDocumentV3 isn't defined yet + + if document_type is None: + document_type = NetworkStatusDocumentV3 + + if document_type == NetworkStatusDocumentV2: + document_type = NetworkStatusDocumentV2 + router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2 + elif document_type == NetworkStatusDocumentV3: + if not is_microdescriptor: + router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3 + else: + router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3 + elif document_type == BridgeNetworkStatusDocument: + document_type = BridgeNetworkStatusDocument + router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2 + else: + raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type) + + if document_handler == DocumentHandler.DOCUMENT: + yield document_type(document_file.read(), validate, **kwargs) + return + + # getting the document without the routers section + + header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file) + + routers_start = document_file.tell() + _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True) + routers_end = document_file.tell() + + footer = document_file.readlines() + document_content = bytes.join(b"", header + footer) + + if document_handler == DocumentHandler.BARE_DOCUMENT: + yield document_type(document_content, validate, **kwargs) + elif document_handler == DocumentHandler.ENTRIES: + desc_iterator = stem.descriptor.router_status_entry._parse_file( + document_file, + validate, + entry_class = router_type, + entry_keyword = ROUTERS_START, + start_position = routers_start, + end_position = routers_end, + extra_args = (document_type(document_content, validate),), + **kwargs + ) + + for desc in desc_iterator: + yield desc + else: + raise ValueError("Unrecognized document_handler: %s" % document_handler) + + +def _parse_file_key_certs(certificate_file, validate = True): + """ + Parses a file containing one or more authority key certificates. + + :param file certificate_file: file with key certificates + :param bool validate: checks the validity of the certificate's contents if + **True**, skips these checks otherwise + + :returns: iterator for :class:`stem.descriptor.networkstatus.KeyCertificate` + instance in the file + + :raises: + * **ValueError** if the key certificate content is invalid and validate is + **True** + * **IOError** if the file can't be read + """ + + while True: + keycert_content = _read_until_keywords("dir-key-certification", certificate_file) + + # we've reached the 'router-signature', now include the pgp style block + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + keycert_content += _read_until_keywords(block_end_prefix, certificate_file, True) + + if keycert_content: + yield stem.descriptor.networkstatus.KeyCertificate(bytes.join(b"", keycert_content), validate = validate) + else: + break # done parsing file + + +class NetworkStatusDocument(Descriptor): + """ + Common parent for network status documents. + """ + + def __init__(self, raw_content): + super(NetworkStatusDocument, self).__init__(raw_content) + self._unrecognized_lines = [] + + def get_unrecognized_lines(self): + return list(self._unrecognized_lines) + + +class NetworkStatusDocumentV2(NetworkStatusDocument): + """ + Version 2 network status document. These have been deprecated and are no + longer generated by Tor. + + :var dict routers: fingerprints to :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` + contained in the document + + :var int version: **\*** document version + + :var str hostname: **\*** hostname of the authority + :var str address: **\*** authority's IP address + :var int dir_port: **\*** authority's DirPort + :var str fingerprint: **\*** authority's fingerprint + :var str contact: **\*** authority's contact information + :var str signing_key: **\*** authority's public signing key + + :var list client_versions: list of recommended client tor version strings + :var list server_versions: list of recommended server tor version strings + :var datetime published: **\*** time when the document was published + :var list options: **\*** list of things that this authority decides + + :var str signing_authority: **\*** name of the authority signing the document + :var str signature: **\*** authority's signature for the document + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, raw_content, validate = True): + super(NetworkStatusDocumentV2, self).__init__(raw_content) + + self.version = None + self.hostname = None + self.address = None + self.dir_port = None + self.fingerprint = None + self.contact = None + self.signing_key = None + + self.client_versions = [] + self.server_versions = [] + self.published = None + self.options = [] + + self.signing_authority = None + self.signatures = None + + # Splitting the document from the routers. Unlike v3 documents we're not + # bending over backwards on the validation by checking the field order or + # that header/footer attributes aren't in the wrong section. This is a + # deprecated descriptor type - patches welcome if you want those checks. + + document_file = io.BytesIO(raw_content) + document_content = bytes.join(b"", _read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file)) + + router_iter = stem.descriptor.router_status_entry._parse_file( + document_file, + validate, + entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2, + entry_keyword = ROUTERS_START, + section_end_keywords = (V2_FOOTER_START,), + extra_args = (self,), + ) + + self.routers = dict((desc.fingerprint, desc) for desc in router_iter) + + document_content += b"\n" + document_file.read() + document_content = stem.util.str_tools._to_unicode(document_content) + + entries = _get_descriptor_components(document_content, validate) + + if validate: + self._check_constraints(entries) + + self._parse(entries, validate) + + def _parse(self, entries, validate): + for keyword, values in entries.items(): + value, block_contents = values[0] + + line = "%s %s" % (keyword, value) # original line + + if block_contents: + line += "\n%s" % block_contents + + if keyword == "network-status-version": + if not value.isdigit(): + if not validate: + continue + + raise ValueError("Network status document has a non-numeric version: %s" % line) + + self.version = int(value) + + if validate and self.version != 2: + raise ValueError("Expected a version 2 network status document, got version '%s' instead" % self.version) + elif keyword == "dir-source": + dir_source_comp = value.split() + + if len(dir_source_comp) < 3: + if not validate: + continue + + raise ValueError("The 'dir-source' line of a v2 network status document must have three values: %s" % line) + + if validate: + if not dir_source_comp[0]: + # https://trac.torproject.org/7055 + raise ValueError("Authority's hostname can't be blank: %s" % line) + elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[1]): + raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[1]) + elif not stem.util.connection.is_valid_port(dir_source_comp[2], allow_zero = True): + raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[2]) + elif not dir_source_comp[2].isdigit(): + continue + + self.hostname = dir_source_comp[0] + self.address = dir_source_comp[1] + self.dir_port = None if dir_source_comp[2] == '0' else int(dir_source_comp[2]) + elif keyword == "fingerprint": + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Authority's fingerprint in a v2 network status document is malformed: %s" % line) + + self.fingerprint = value + elif keyword == "contact": + self.contact = value + elif keyword == "dir-signing-key": + self.signing_key = block_contents + elif keyword in ("client-versions", "server-versions"): + # v2 documents existed while there were tor versions using the 'old' + # style, hence we aren't attempting to parse them + + for version_str in value.split(","): + if keyword == 'client-versions': + self.client_versions.append(version_str) + elif keyword == 'server-versions': + self.server_versions.append(version_str) + elif keyword == "published": + try: + self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("Version 2 network status document's 'published' time wasn't parsable: %s" % value) + elif keyword == "dir-options": + self.options = value.split() + elif keyword == "directory-signature": + self.signing_authority = value + self.signature = block_contents + else: + self._unrecognized_lines.append(line) + + # 'client-versions' and 'server-versions' are only required if "Versions" + # is among the options + + if validate and "Versions" in self.options: + if not ('client-versions' in entries and 'server-versions' in entries): + raise ValueError("Version 2 network status documents must have a 'client-versions' and 'server-versions' when 'Versions' is listed among its dir-options:\n%s" % str(self)) + + def _check_constraints(self, entries): + required_fields = [field for (field, is_mandatory) in NETWORK_STATUS_V2_FIELDS if is_mandatory] + for keyword in required_fields: + if not keyword in entries: + raise ValueError("Network status document (v2) must have a '%s' line:\n%s" % (keyword, str(self))) + + # all recognized fields can only appear once + single_fields = [field for (field, _) in NETWORK_STATUS_V2_FIELDS] + for keyword in single_fields: + if keyword in entries and len(entries[keyword]) > 1: + raise ValueError("Network status document (v2) can only have a single '%s' line, got %i:\n%s" % (keyword, len(entries[keyword]), str(self))) + + if 'network-status-version' != entries.keys()[0]: + raise ValueError("Network status document (v2) are expected to start with a 'network-status-version' line:\n%s" % str(self)) + + +class NetworkStatusDocumentV3(NetworkStatusDocument): + """ + Version 3 network status document. This could be either a vote or consensus. + + :var tuple routers: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` + contained in the document + + :var int version: **\*** document version + :var str version_flavor: **\*** flavor associated with the document (such as 'microdesc') + :var bool is_consensus: **\*** **True** if the document is a consensus + :var bool is_vote: **\*** **True** if the document is a vote + :var bool is_microdescriptor: **\*** **True** if this is a microdescriptor + flavored document, **False** otherwise + :var datetime valid_after: **\*** time when the consensus became valid + :var datetime fresh_until: **\*** time when the next consensus should be produced + :var datetime valid_until: **\*** time when this consensus becomes obsolete + :var int vote_delay: **\*** number of seconds allowed for collecting votes + from all authorities + :var int dist_delay: **\*** number of seconds allowed for collecting + signatures from all authorities + :var list client_versions: list of recommended client tor versions + :var list server_versions: list of recommended server tor versions + :var list known_flags: **\*** list of :data:`~stem.Flag` for the router's flags + :var dict params: **\*** dict of parameter(**str**) => value(**int**) mappings + :var list directory_authorities: **\*** list of :class:`~stem.descriptor.networkstatus.DirectoryAuthority` + objects that have generated this document + :var list signatures: **\*** :class:`~stem.descriptor.networkstatus.DocumentSignature` + of the authorities that have signed the document + + **Consensus Attributes:** + + :var int consensus_method: method version used to generate this consensus + :var dict bandwidth_weights: dict of weight(str) => value(int) mappings + + **Vote Attributes:** + + :var list consensus_methods: list of ints for the supported method versions + :var datetime published: time when the document was published + :var dict flag_thresholds: **\*** mapping of internal performance thresholds used while making the vote, values are **ints** or **floats** + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as None if undefined + """ + + def __init__(self, raw_content, validate = True, default_params = True): + """ + Parse a v3 network status document. + + :param str raw_content: raw network status document data + :param bool validate: **True** if the document is to be validated, **False** otherwise + :param bool default_params: includes defaults in our params dict, otherwise + it just contains values from the document + + :raises: **ValueError** if the document is invalid + """ + + super(NetworkStatusDocumentV3, self).__init__(raw_content) + document_file = io.BytesIO(raw_content) + + self._header = _DocumentHeader(document_file, validate, default_params) + + # merge header attributes into us + for attr, value in vars(self._header).items(): + if attr != "_unrecognized_lines": + setattr(self, attr, value) + else: + self._unrecognized_lines += value + + self.directory_authorities = tuple(stem.descriptor.router_status_entry._parse_file( + document_file, + validate, + entry_class = DirectoryAuthority, + entry_keyword = AUTH_START, + section_end_keywords = (ROUTERS_START, FOOTER_START, V2_FOOTER_START), + extra_args = (self._header.is_vote,), + )) + + if validate and self._header.is_vote and len(self.directory_authorities) != 1: + raise ValueError("Votes should only have an authority entry for the one that issued it, got %i: %s" % (len(self.directory_authorities), self.directory_authorities)) + + if not self._header.is_microdescriptor: + router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3 + else: + router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3 + + router_iter = stem.descriptor.router_status_entry._parse_file( + document_file, + validate, + entry_class = router_type, + entry_keyword = ROUTERS_START, + section_end_keywords = (FOOTER_START, V2_FOOTER_START), + extra_args = (self,), + ) + + self.routers = dict((desc.fingerprint, desc) for desc in router_iter) + + self._footer = _DocumentFooter(document_file, validate, self._header) + + # merge header attributes into us + for attr, value in vars(self._footer).items(): + if attr != "_unrecognized_lines": + setattr(self, attr, value) + else: + self._unrecognized_lines += value + + def meets_consensus_method(self, method): + """ + Checks if we meet the given consensus-method. This works for both votes and + consensuses, checking our 'consensus-method' and 'consensus-methods' + entries. + + :param int method: consensus-method to check for + + :returns: **True** if we meet the given consensus-method, and **False** otherwise + """ + + return self._header.meets_consensus_method(method) + + def _compare(self, other, method): + if not isinstance(other, NetworkStatusDocumentV3): + return False + + return method(str(self).strip(), str(other).strip()) + + def __hash__(self): + return hash(str(self).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class _DocumentHeader(object): + def __init__(self, document_file, validate, default_params): + self.version = None + self.version_flavor = None + self.is_consensus = True + self.is_vote = False + self.is_microdescriptor = False + self.consensus_methods = [] + self.published = None + self.consensus_method = None + self.valid_after = None + self.fresh_until = None + self.valid_until = None + self.vote_delay = None + self.dist_delay = None + self.client_versions = [] + self.server_versions = [] + self.known_flags = [] + self.flag_thresholds = {} + self.params = dict(DEFAULT_PARAMS) if default_params else {} + + self._unrecognized_lines = [] + + content = bytes.join(b"", _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) + content = stem.util.str_tools._to_unicode(content) + entries = _get_descriptor_components(content, validate) + self._parse(entries, validate) + + # doing this validation afterward so we know our 'is_consensus' and + # 'is_vote' attributes + + if validate: + _check_for_missing_and_disallowed_fields(self, entries, HEADER_STATUS_DOCUMENT_FIELDS) + _check_for_misordered_fields(entries, HEADER_FIELDS) + + def meets_consensus_method(self, method): + if self.consensus_method is not None: + return self.consensus_method >= method + elif self.consensus_methods is not None: + return bool(filter(lambda x: x >= method, self.consensus_methods)) + else: + return False # malformed document + + def _parse(self, entries, validate): + for keyword, values in entries.items(): + value, _ = values[0] + line = "%s %s" % (keyword, value) + + # all known header fields can only appear once except + if validate and len(values) > 1 and keyword in HEADER_FIELDS: + raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) + + if keyword == 'network-status-version': + # "network-status-version" version + + if ' ' in value: + version, flavor = value.split(' ', 1) + else: + version, flavor = value, None + + if not version.isdigit(): + if not validate: + continue + + raise ValueError("Network status document has a non-numeric version: %s" % line) + + self.version = int(version) + self.version_flavor = flavor + self.is_microdescriptor = flavor == 'microdesc' + + if validate and self.version != 3: + raise ValueError("Expected a version 3 network status document, got version '%s' instead" % self.version) + elif keyword == 'vote-status': + # "vote-status" type + # + # The consensus-method and consensus-methods fields are optional since + # they weren't included in version 1. Setting a default now that we + # know if we're a vote or not. + + if value == 'consensus': + self.is_consensus, self.is_vote = True, False + self.consensus_method = 1 + elif value == 'vote': + self.is_consensus, self.is_vote = False, True + self.consensus_methods = [1] + elif validate: + raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value) + elif keyword == 'consensus-methods': + # "consensus-methods" IntegerList + + consensus_methods = [] + for entry in value.split(" "): + if entry.isdigit(): + consensus_methods.append(int(entry)) + elif validate: + raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value) + + self.consensus_methods = consensus_methods + + if validate and not (1 in self.consensus_methods): + raise ValueError("Network status votes must include consensus-method version 1") + elif keyword == 'consensus-method': + # "consensus-method" Integer + + if value.isdigit(): + self.consensus_method = int(value) + elif validate: + raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value) + elif keyword in ('published', 'valid-after', 'fresh-until', 'valid-until'): + try: + date_value = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + + if keyword == 'published': + self.published = date_value + elif keyword == 'valid-after': + self.valid_after = date_value + elif keyword == 'fresh-until': + self.fresh_until = date_value + elif keyword == 'valid-until': + self.valid_until = date_value + except ValueError: + if validate: + raise ValueError("Network status document's '%s' time wasn't parsable: %s" % (keyword, value)) + elif keyword == "voting-delay": + # "voting-delay" VoteSeconds DistSeconds + + value_comp = value.split(' ') + + if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit(): + self.vote_delay = int(value_comp[0]) + self.dist_delay = int(value_comp[1]) + elif validate: + raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value) + elif keyword in ("client-versions", "server-versions"): + for entry in value.split(","): + try: + version_value = stem.version._get_version(entry) + + if keyword == 'client-versions': + self.client_versions.append(version_value) + elif keyword == 'server-versions': + self.server_versions.append(version_value) + except ValueError: + if validate: + raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s" % (keyword, entry, line)) + elif keyword == "known-flags": + # "known-flags" FlagList + + # simply fetches the entries, excluding empty strings + self.known_flags = [entry for entry in value.split(" ") if entry] + elif keyword == "flag-thresholds": + # "flag-thresholds" SP THRESHOLDS + + value = value.strip() + + if value: + for entry in value.split(" "): + if not '=' in entry: + if not validate: + continue + + raise ValueError("Network status document's '%s' line is expected to be space separated key=value mappings, got: %s" % (keyword, line)) + + entry_key, entry_value = entry.split("=", 1) + + try: + if entry_value.endswith("%"): + # opting for string manipulation rather than just + # 'float(entry_value) / 100' because floating point arithmetic + # will lose precision + + self.flag_thresholds[entry_key] = float("0." + entry_value[:-1].replace('.', '', 1)) + elif '.' in entry_value: + self.flag_thresholds[entry_key] = float(entry_value) + else: + self.flag_thresholds[entry_key] = int(entry_value) + except ValueError: + if validate: + raise ValueError("Network status document's '%s' line is expected to have float values, got: %s" % (keyword, line)) + elif keyword == "params": + # "params" [Parameters] + # Parameter ::= Keyword '=' Int32 + # Int32 ::= A decimal integer between -2147483648 and 2147483647. + # Parameters ::= Parameter | Parameters SP Parameter + + # should only appear in consensus-method 7 or later + + if validate and not self.meets_consensus_method(7): + raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later") + + # skip if this is a blank line + + if value == "": + continue + + self.params.update(_parse_int_mappings(keyword, value, validate)) + + if validate: + self._check_params_constraints() + else: + self._unrecognized_lines.append(line) + + def _check_params_constraints(self): + """ + Checks that the params we know about are within their documented ranges. + """ + + for key, value in self.params.items(): + # all parameters are constrained to int32 range + minimum, maximum = -2147483648, 2147483647 + + if key == "circwindow": + minimum, maximum = 100, 1000 + elif key == "CircuitPriorityHalflifeMsec": + minimum = -1 + elif key in ("perconnbwrate", "perconnbwburst"): + minimum = 1 + elif key == "refuseunknownexits": + minimum, maximum = 0, 1 + elif key == "bwweightscale": + minimum = 1 + elif key == "cbtdisabled": + minimum, maximum = 0, 1 + elif key == "cbtnummodes": + minimum, maximum = 1, 20 + elif key == "cbtrecentcount": + minimum, maximum = 3, 1000 + elif key == "cbtmaxtimeouts": + minimum, maximum = 3, 10000 + elif key == "cbtmincircs": + minimum, maximum = 1, 10000 + elif key == "cbtquantile": + minimum, maximum = 10, 99 + elif key == "cbtclosequantile": + minimum, maximum = self.params.get("cbtquantile", minimum), 99 + elif key == "cbttestfreq": + minimum = 1 + elif key == "cbtmintimeout": + minimum = 500 + elif key == "cbtinitialtimeout": + minimum = self.params.get("cbtmintimeout", minimum) + elif key == "UseOptimisticData": + minimum, maximum = 0, 1 + elif key == "Support022HiddenServices": + minimum, maximum = 0, 1 + + if value < minimum or value > maximum: + raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value)) + + +class _DocumentFooter(object): + def __init__(self, document_file, validate, header): + self.signatures = [] + self.bandwidth_weights = {} + self._unrecognized_lines = [] + + content = stem.util.str_tools._to_unicode(document_file.read()) + + if not content: + return # footer is optional and there's nothing to parse + + entries = _get_descriptor_components(content, validate) + self._parse(entries, validate, header) + + if validate: + # Check that the footer has the right initial line. Prior to consensus + # method 9 it's a 'directory-signature' and after that footers start with + # 'directory-footer'. + + if header.meets_consensus_method(9): + if entries.keys()[0] != 'directory-footer': + raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later") + else: + if entries.keys()[0] != 'directory-signature': + raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9") + + _check_for_missing_and_disallowed_fields(header, entries, FOOTER_STATUS_DOCUMENT_FIELDS) + _check_for_misordered_fields(entries, FOOTER_FIELDS) + + def _parse(self, entries, validate, header): + for keyword, values in entries.items(): + value, block_contents = values[0] + line = "%s %s" % (keyword, value) + + # all known footer fields can only appear once except... + # * 'directory-signature' in a consensus + + if validate and len(values) > 1 and keyword in FOOTER_FIELDS: + if not (keyword == 'directory-signature' and header.is_consensus): + raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) + + if keyword == "directory-footer": + # nothing to parse, simply checking that we don't have a value + + if validate and value: + raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got '%s'" % line) + elif keyword == "bandwidth-weights": + self.bandwidth_weights = _parse_int_mappings(keyword, value, validate) + elif keyword == "directory-signature": + for sig_value, block_contents in values: + if not sig_value.count(" ") in (1, 2) or not block_contents: + if not validate: + continue + + raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST\\nSIGNATURE', got:\n%s\n%s" % (sig_value, block_contents)) + + if sig_value.count(" ") == 1: + method = 'sha1' # default if none was provided + fingerprint, key_digest = sig_value.split(" ", 1) + else: + method, fingerprint, key_digest = sig_value.split(" ", 2) + + self.signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, validate)) + + +def _check_for_missing_and_disallowed_fields(header, entries, fields): + """ + Checks that we have mandatory fields for our type, and that we don't have + any fields exclusive to the other (ie, no vote-only fields appear in a + consensus or vice versa). + + :param _DocumentHeader header: document header + :param dict entries: ordered keyword/value mappings of the header or footer + :param list fields: expected field attributes (either + **HEADER_STATUS_DOCUMENT_FIELDS** or **FOOTER_STATUS_DOCUMENT_FIELDS**) + + :raises: **ValueError** if we're missing mandatory fields or have fields we shouldn't + """ + + missing_fields, disallowed_fields = [], [] + + for field, in_votes, in_consensus, mandatory in fields: + if mandatory and ((header.is_consensus and in_consensus) or (header.is_vote and in_votes)): + # mandatory field, check that we have it + if not field in entries.keys(): + missing_fields.append(field) + elif (header.is_consensus and not in_consensus) or (header.is_vote and not in_votes): + # field we shouldn't have, check that we don't + if field in entries.keys(): + disallowed_fields.append(field) + + if missing_fields: + raise ValueError("Network status document is missing mandatory field: %s" % ', '.join(missing_fields)) + + if disallowed_fields: + raise ValueError("Network status document has fields that shouldn't appear in this document type or version: %s" % ', '.join(disallowed_fields)) + + +def _check_for_misordered_fields(entries, expected): + """ + To be valid a network status document's fiends need to appear in a specific + order. Checks that known fields appear in that order (unrecognized fields + are ignored). + + :param dict entries: ordered keyword/value mappings of the header or footer + :param list expected: ordered list of expected fields (either + **HEADER_FIELDS** or **FOOTER_FIELDS**) + + :raises: **ValueError** if entries aren't properly ordered + """ + + # Earlier validation has ensured that our fields either belong to our + # document type or are unknown. Remove the unknown fields since they + # reflect a spec change and can appear anywhere in the document. + + actual = filter(lambda field: field in expected, entries.keys()) + + # Narrow the expected to just what we have. If the lists then match then the + # order's valid. + + expected = filter(lambda field: field in actual, expected) + + if actual != expected: + actual_label = ', '.join(actual) + expected_label = ', '.join(expected) + raise ValueError("The fields in a section of the document are misordered. It should be '%s' but was '%s'" % (actual_label, expected_label)) + + +def _parse_int_mappings(keyword, value, validate): + # Parse a series of 'key=value' entries, checking the following: + # - values are integers + # - keys are sorted in lexical order + + results, seen_keys = {}, [] + for entry in value.split(" "): + try: + if not '=' in entry: + raise ValueError("must only have 'key=value' entries") + + entry_key, entry_value = entry.split("=", 1) + + try: + # the int() function accepts things like '+123', but we don't want to + if entry_value.startswith('+'): + raise ValueError() + + entry_value = int(entry_value) + except ValueError: + raise ValueError("'%s' is a non-numeric value" % entry_value) + + if validate: + # parameters should be in ascending order by their key + for prior_key in seen_keys: + if prior_key > entry_key: + raise ValueError("parameters must be sorted by their key") + + results[entry_key] = entry_value + seen_keys.append(entry_key) + except ValueError as exc: + if not validate: + continue + + raise ValueError("Unable to parse network status document's '%s' line (%s): %s'" % (keyword, exc, value)) + + return results + + +class DirectoryAuthority(Descriptor): + """ + Directory authority information obtained from a v3 network status document. + + Authorities can optionally use a legacy format. These are no longer found in + practice, but have the following differences... + + * The authority's nickname ends with '-legacy'. + * There's no **contact** or **vote_digest** attribute. + + :var str nickname: **\*** authority's nickname + :var str fingerprint: **\*** authority's fingerprint + :var str hostname: **\*** hostname of the authority + :var str address: **\*** authority's IP address + :var int dir_port: **\*** authority's DirPort + :var int or_port: **\*** authority's ORPort + :var bool is_legacy: **\*** if the authority's using the legacy format + :var str contact: contact information, this is included if is_legacy is **False** + + **Consensus Attributes:** + + :var str vote_digest: digest of the authority that contributed to the consensus, this is included if is_legacy is **False** + + **Vote Attributes:** + + :var str legacy_dir_key: fingerprint of and obsolete identity key + :var stem.descriptor.networkstatus.KeyCertificate key_certificate: **\*** + authority's key certificate + + **\*** mandatory attribute + """ + + def __init__(self, raw_content, validate = True, is_vote = False): + """ + Parse a directory authority entry in a v3 network status document. + + :param str raw_content: raw directory authority entry information + :param bool validate: checks the validity of the content if True, skips + these checks otherwise + :param bool is_vote: True if this is for a vote, False if it's for a consensus + + :raises: ValueError if the descriptor data is invalid + """ + + super(DirectoryAuthority, self).__init__(raw_content) + raw_content = stem.util.str_tools._to_unicode(raw_content) + + self.nickname = None + self.fingerprint = None + self.hostname = None + self.address = None + self.dir_port = None + self.or_port = None + self.is_legacy = False + self.contact = None + + self.vote_digest = None + + self.legacy_dir_key = None + self.key_certificate = None + + self._unrecognized_lines = [] + + self._parse(raw_content, validate, is_vote) + + def _parse(self, content, validate, is_vote): + """ + Parses the given content and applies the attributes. + + :param str content: descriptor content + :param bool validate: checks validity if True + :param bool is_vote: **True** if this is for a vote, **False** if it's for + a consensus + + :raises: **ValueError** if a validity check fails + """ + + # separate the directory authority entry from its key certificate + key_div = content.find('\ndir-key-certificate-version') + + if key_div != -1: + key_cert_content = content[key_div + 1:] + content = content[:key_div + 1] + else: + key_cert_content = None + + entries = _get_descriptor_components(content, validate) + + if validate and 'dir-source' != entries.keys()[0]: + raise ValueError("Authority entries are expected to start with a 'dir-source' line:\n%s" % (content)) + + # check that we have mandatory fields + + if validate: + is_legacy, dir_source_entry = False, entries.get("dir-source") + + if dir_source_entry: + is_legacy = dir_source_entry[0][0].split()[0].endswith("-legacy") + + required_fields, excluded_fields = ["dir-source"], [] + + if not is_legacy: + required_fields += ["contact"] + + if is_vote: + if not key_cert_content: + raise ValueError("Authority votes must have a key certificate:\n%s" % content) + + excluded_fields += ["vote-digest"] + elif not is_vote: + if key_cert_content: + raise ValueError("Authority consensus entries shouldn't have a key certificate:\n%s" % content) + + if not is_legacy: + required_fields += ["vote-digest"] + + excluded_fields += ["legacy-dir-key"] + + for keyword in required_fields: + if not keyword in entries: + raise ValueError("Authority entries must have a '%s' line:\n%s" % (keyword, content)) + + for keyword in entries: + if keyword in excluded_fields: + type_label = "votes" if is_vote else "consensus entries" + raise ValueError("Authority %s shouldn't have a '%s' line:\n%s" % (type_label, keyword, content)) + + for keyword, values in entries.items(): + value, _ = values[0] + line = "%s %s" % (keyword, value) + + # all known attributes can only appear at most once + if validate and len(values) > 1 and keyword in ('dir-source', 'contact', 'legacy-dir-key', 'vote-digest'): + raise ValueError("Authority entries can only have a single '%s' line, got %i:\n%s" % (keyword, len(values), content)) + + if keyword == 'dir-source': + # "dir-source" nickname identity address IP dirport orport + + dir_source_comp = value.split(" ") + + if len(dir_source_comp) < 6: + if not validate: + continue + + raise ValueError("Authority entry's 'dir-source' line must have six values: %s" % line) + + if validate: + if not stem.util.tor_tools.is_valid_nickname(dir_source_comp[0].rstrip('-legacy')): + raise ValueError("Authority's nickname is invalid: %s" % dir_source_comp[0]) + elif not stem.util.tor_tools.is_valid_fingerprint(dir_source_comp[1]): + raise ValueError("Authority's fingerprint is invalid: %s" % dir_source_comp[1]) + elif not dir_source_comp[2]: + # https://trac.torproject.org/7055 + raise ValueError("Authority's hostname can't be blank: %s" % line) + elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[3]): + raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[3]) + elif not stem.util.connection.is_valid_port(dir_source_comp[4], allow_zero = True): + raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[4]) + elif not stem.util.connection.is_valid_port(dir_source_comp[5]): + raise ValueError("Authority's ORPort is invalid: %s" % dir_source_comp[5]) + elif not (dir_source_comp[4].isdigit() and dir_source_comp[5].isdigit()): + continue + + self.nickname = dir_source_comp[0] + self.fingerprint = dir_source_comp[1] + self.hostname = dir_source_comp[2] + self.address = dir_source_comp[3] + self.dir_port = None if dir_source_comp[4] == '0' else int(dir_source_comp[4]) + self.or_port = int(dir_source_comp[5]) + self.is_legacy = self.nickname.endswith("-legacy") + elif keyword == 'contact': + # "contact" string + + self.contact = value + elif keyword == 'legacy-dir-key': + # "legacy-dir-key" FINGERPRINT + + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Authority has a malformed legacy directory key: %s" % line) + + self.legacy_dir_key = value + elif keyword == 'vote-digest': + # "vote-digest" digest + + # technically not a fingerprint, but has the same characteristics + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Authority has a malformed vote digest: %s" % line) + + self.vote_digest = value + else: + self._unrecognized_lines.append(line) + + if key_cert_content: + self.key_certificate = KeyCertificate(key_cert_content, validate) + + def get_unrecognized_lines(self): + """ + Returns any unrecognized lines. + + :returns: a list of unrecognized lines + """ + + return self._unrecognized_lines + + def _compare(self, other, method): + if not isinstance(other, DirectoryAuthority): + return False + + return method(str(self).strip(), str(other).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class KeyCertificate(Descriptor): + """ + Directory key certificate for a v3 network status document. + + :var int version: **\*** version of the key certificate + :var str address: authority's IP address + :var int dir_port: authority's DirPort + :var str fingerprint: **\*** authority's fingerprint + :var str identity_key: **\*** long term authority identity key + :var datetime published: **\*** time when this key was generated + :var datetime expires: **\*** time after which this key becomes invalid + :var str signing_key: **\*** directory server's public signing key + :var str crosscert: signature made using certificate's signing key + :var str certification: **\*** signature of this key certificate signed with + the identity key + + **\*** mandatory attribute + """ + + def __init__(self, raw_content, validate = True): + super(KeyCertificate, self).__init__(raw_content) + raw_content = stem.util.str_tools._to_unicode(raw_content) + + self.version = None + self.address = None + self.dir_port = None + self.fingerprint = None + self.identity_key = None + self.published = None + self.expires = None + self.signing_key = None + self.crosscert = None + self.certification = None + + self._unrecognized_lines = [] + + self._parse(raw_content, validate) + + def _parse(self, content, validate): + """ + Parses the given content and applies the attributes. + + :param str content: descriptor content + :param bool validate: checks validity if **True** + + :raises: **ValueError** if a validity check fails + """ + + entries = _get_descriptor_components(content, validate) + + if validate: + if 'dir-key-certificate-version' != entries.keys()[0]: + raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (content)) + elif 'dir-key-certification' != entries.keys()[-1]: + raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (content)) + + # check that we have mandatory fields and that our known fields only + # appear once + + for keyword, is_mandatory in KEY_CERTIFICATE_PARAMS: + if is_mandatory and not keyword in entries: + raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, content)) + + entry_count = len(entries.get(keyword, [])) + if entry_count > 1: + raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content)) + + for keyword, values in entries.items(): + value, block_contents = values[0] + line = "%s %s" % (keyword, value) + + if keyword == 'dir-key-certificate-version': + # "dir-key-certificate-version" version + + if not value.isdigit(): + if not validate: + continue + + raise ValueError("Key certificate has a non-integer version: %s" % line) + + self.version = int(value) + + if validate and self.version != 3: + raise ValueError("Expected a version 3 key certificate, got version '%i' instead" % self.version) + elif keyword == 'dir-address': + # "dir-address" IPPort + + if not ':' in value: + if not validate: + continue + + raise ValueError("Key certificate's 'dir-address' is expected to be of the form ADDRESS:PORT: %s" % line) + + address, dirport = value.split(':', 1) + + if validate: + if not stem.util.connection.is_valid_ipv4_address(address): + raise ValueError("Key certificate's address isn't a valid IPv4 address: %s" % line) + elif not stem.util.connection.is_valid_port(dirport): + raise ValueError("Key certificate's dirport is invalid: %s" % line) + elif not dirport.isdigit(): + continue + + self.address = address + self.dir_port = int(dirport) + elif keyword == 'fingerprint': + # "fingerprint" fingerprint + + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Key certificate's fingerprint is malformed: %s" % line) + + self.fingerprint = value + elif keyword in ('dir-key-published', 'dir-key-expires'): + # "dir-key-published" YYYY-MM-DD HH:MM:SS + # "dir-key-expires" YYYY-MM-DD HH:MM:SS + + try: + date_value = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + + if keyword == 'dir-key-published': + self.published = date_value + elif keyword == 'dir-key-expires': + self.expires = date_value + except ValueError: + if validate: + raise ValueError("Key certificate's '%s' time wasn't parsable: %s" % (keyword, value)) + elif keyword in ('dir-identity-key', 'dir-signing-key', 'dir-key-crosscert', 'dir-key-certification'): + # "dir-identity-key" NL a public key in PEM format + # "dir-signing-key" NL a key in PEM format + # "dir-key-crosscert" NL CrossSignature + # "dir-key-certification" NL Signature + + if validate and not block_contents: + raise ValueError("Key certificate's '%s' line must be followed by a key block: %s" % (keyword, line)) + + if keyword == 'dir-identity-key': + self.identity_key = block_contents + elif keyword == 'dir-signing-key': + self.signing_key = block_contents + elif keyword == 'dir-key-crosscert': + self.crosscert = block_contents + elif keyword == 'dir-key-certification': + self.certification = block_contents + else: + self._unrecognized_lines.append(line) + + def get_unrecognized_lines(self): + """ + Returns any unrecognized lines. + + :returns: **list** of unrecognized lines + """ + + return self._unrecognized_lines + + def _compare(self, other, method): + if not isinstance(other, KeyCertificate): + return False + + return method(str(self).strip(), str(other).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class DocumentSignature(object): + """ + Directory signature of a v3 network status document. + + :var str method: algorithm used to make the signature + :var str identity: fingerprint of the authority that made the signature + :var str key_digest: digest of the signing key + :var str signature: document signature + :param bool validate: checks validity if **True** + + :raises: **ValueError** if a validity check fails + """ + + def __init__(self, method, identity, key_digest, signature, validate = True): + # Checking that these attributes are valid. Technically the key + # digest isn't a fingerprint, but it has the same characteristics. + + if validate: + if not stem.util.tor_tools.is_valid_fingerprint(identity): + raise ValueError("Malformed fingerprint (%s) in the document signature" % identity) + + if not stem.util.tor_tools.is_valid_fingerprint(key_digest): + raise ValueError("Malformed key digest (%s) in the document signature" % key_digest) + + self.method = method + self.identity = identity + self.key_digest = key_digest + self.signature = signature + + def _compare(self, other, method): + if not isinstance(other, DocumentSignature): + return False + + for attr in ("method", "identity", "key_digest", "signature"): + if getattr(self, attr) != getattr(other, attr): + return method(getattr(self, attr), getattr(other, attr)) + + return method(True, True) # we're equal + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class BridgeNetworkStatusDocument(NetworkStatusDocument): + """ + Network status document containing bridges. This is only available through + the metrics site. + + :var tuple routers: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` + contained in the document + :var datetime published: time when the document was published + """ + + def __init__(self, raw_content, validate = True): + super(BridgeNetworkStatusDocument, self).__init__(raw_content) + + self.published = None + + document_file = io.BytesIO(raw_content) + published_line = stem.util.str_tools._to_unicode(document_file.readline()) + + if published_line.startswith("published "): + published_line = published_line.split(" ", 1)[1].strip() + + try: + self.published = datetime.datetime.strptime(published_line, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("Bridge network status document's 'published' time wasn't parsable: %s" % published_line) + elif validate: + raise ValueError("Bridge network status documents must start with a 'published' line:\n%s" % stem.util.str_tools._to_unicode(raw_content)) + + router_iter = stem.descriptor.router_status_entry._parse_file( + document_file, + validate, + entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2, + extra_args = (self,), + ) + + self.routers = dict((desc.fingerprint, desc) for desc in router_iter) diff --git a/lib/stem/descriptor/reader.py b/lib/stem/descriptor/reader.py new file mode 100644 index 00000000..1612159d --- /dev/null +++ b/lib/stem/descriptor/reader.py @@ -0,0 +1,580 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Utilities for reading descriptors from local directories and archives. This is +mostly done through the :class:`~stem.descriptor.reader.DescriptorReader` +class, which is an iterator for the descriptor data in a series of +destinations. For example... + +:: + + my_descriptors = [ + "/tmp/server-descriptors-2012-03.tar.bz2", + "/tmp/archived_descriptors/", + ] + + # prints the contents of all the descriptor files + with DescriptorReader(my_descriptors) as reader: + for descriptor in reader: + print descriptor + +This ignores files that cannot be processed due to read errors or unparsable +content. To be notified of skipped files you can register a listener with +:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`. + +The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last +modified timestamps for descriptor files that it has read so it can skip +unchanged files if run again. This listing of processed files can also be +persisted and applied to other +:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the +following prints descriptors as they're changed over the course of a minute, +and picks up where it left off if run again... + +:: + + reader = DescriptorReader(["/tmp/descriptor_data"]) + + try: + processed_files = load_processed_files("/tmp/used_descriptors") + reader.set_processed_files(processed_files) + except: pass # could not load, maybe this is the first run + + start_time = time.time() + + while (time.time() - start_time) < 60: + # prints any descriptors that have changed since last checked + with reader: + for descriptor in reader: + print descriptor + + time.sleep(1) + + save_processed_files("/tmp/used_descriptors", reader.get_processed_files()) + +**Module Overview:** + +:: + + load_processed_files - Loads a listing of processed files + save_processed_files - Saves a listing of processed files + + DescriptorReader - Iterator for descriptor data on the local file system + |- get_processed_files - provides the listing of files that we've processed + |- set_processed_files - sets our tracking of the files we have processed + |- register_read_listener - adds a listener for when files are read + |- register_skip_listener - adds a listener that's notified of skipped files + |- start - begins reading descriptor data + |- stop - stops reading descriptor data + |- __enter__ / __exit__ - manages the descriptor reader thread in the context + +- __iter__ - iterates over descriptor data in unread files + + FileSkipped - Base exception for a file that was skipped + |- AlreadyRead - We've already read a file with this last modified timestamp + |- ParsingFailure - Contents can't be parsed as descriptor data + |- UnrecognizedType - File extension indicates non-descriptor data + +- ReadFailed - Wraps an error that was raised while reading the file + +- FileMissing - File does not exist +""" + +import mimetypes +import os +import Queue +import tarfile +import threading + +import stem.descriptor +import stem.prereq + +# flag to indicate when the reader thread is out of descriptor files to read +FINISHED = "DONE" + + +class FileSkipped(Exception): + "Base error when we can't provide descriptor data from a file." + + +class AlreadyRead(FileSkipped): + """ + Already read a file with this 'last modified' timestamp or later. + + :param int last_modified: unix timestamp for when the file was last modified + :param int last_modified_when_read: unix timestamp for the modification time + when we last read this file + """ + + def __init__(self, last_modified, last_modified_when_read): + super(AlreadyRead, self).__init__("File has already been read since it was last modified. modification time: %s, last read: %s" % (last_modified, last_modified_when_read)) + self.last_modified = last_modified + self.last_modified_when_read = last_modified_when_read + + +class ParsingFailure(FileSkipped): + """ + File contents could not be parsed as descriptor data. + + :param ValueError exception: issue that arose when parsing + """ + + def __init__(self, parsing_exception): + super(ParsingFailure, self).__init__(parsing_exception) + self.exception = parsing_exception + + +class UnrecognizedType(FileSkipped): + """ + File doesn't contain descriptor data. This could either be due to its file + type or because it doesn't conform to a recognizable descriptor type. + + :param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type() + """ + + def __init__(self, mime_type): + super(UnrecognizedType, self).__init__("Unrecognized mime type: %s (%s)" % mime_type) + self.mime_type = mime_type + + +class ReadFailed(FileSkipped): + """ + An IOError occurred while trying to read the file. + + :param IOError exception: issue that arose when reading the file, **None** if + this arose due to the file not being present + """ + + def __init__(self, read_exception): + super(ReadFailed, self).__init__(read_exception) + self.exception = read_exception + + +class FileMissing(ReadFailed): + "File does not exist." + + def __init__(self): + super(FileMissing, self).__init__("File does not exist") + + +def load_processed_files(path): + """ + Loads a dictionary of 'path => last modified timestamp' mappings, as + persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a + file. + + :param str path: location to load the processed files dictionary from + + :returns: **dict** of 'path (**str**) => last modified unix timestamp + (**int**)' mappings + + :raises: + * **IOError** if unable to read the file + * **TypeError** if unable to parse the file's contents + """ + + processed_files = {} + + with open(path) as input_file: + for line in input_file.readlines(): + line = line.strip() + + if not line: + continue # skip blank lines + + if not " " in line: + raise TypeError("Malformed line: %s" % line) + + path, timestamp = line.rsplit(" ", 1) + + if not os.path.isabs(path): + raise TypeError("'%s' is not an absolute path" % path) + elif not timestamp.isdigit(): + raise TypeError("'%s' is not an integer timestamp" % timestamp) + + processed_files[path] = int(timestamp) + + return processed_files + + +def save_processed_files(path, processed_files): + """ + Persists a dictionary of 'path => last modified timestamp' mappings (as + provided by the DescriptorReader's + :func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method) + so that they can be loaded later and applied to another + :class:`~stem.descriptor.reader.DescriptorReader`. + + :param str path: location to save the processed files dictionary to + :param dict processed_files: 'path => last modified' mappings + + :raises: + * **IOError** if unable to write to the file + * **TypeError** if processed_files is of the wrong type + """ + + # makes the parent directory if it doesn't already exist + try: + path_dir = os.path.dirname(path) + + if not os.path.exists(path_dir): + os.makedirs(path_dir) + except OSError as exc: + raise IOError(exc) + + with open(path, "w") as output_file: + for path, timestamp in processed_files.items(): + if not os.path.isabs(path): + raise TypeError("Only absolute paths are acceptable: %s" % path) + + output_file.write("%s %i\n" % (path, timestamp)) + + +class DescriptorReader(object): + """ + Iterator for the descriptor data on the local file system. This can process + text files, tarball archives (gzip or bzip2), or recurse directories. + + By default this limits the number of descriptors that we'll read ahead before + waiting for our caller to fetch some of them. This is included to avoid + unbounded memory usage. + + Our persistence_path argument is a convenient method to persist the listing + of files we have processed between runs, however it doesn't allow for error + handling. If you want that then use the + :func:`~stem.descriptor.reader.load_processed_files` and + :func:`~stem.descriptor.reader.save_processed_files` functions instead. + + :param str,list target: path or list of paths for files or directories to be read from + :param bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :param bool follow_links: determines if we'll follow symlinks when traversing + directories (requires python 2.6) + :param int buffer_size: descriptors we'll buffer before waiting for some to + be read, this is unbounded if zero + :param str persistence_path: if set we will load and save processed file + listings from this path, errors are ignored + :param stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + :param dict kwargs: additional arguments for the descriptor constructor + """ + + def __init__(self, target, validate = True, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs): + if isinstance(target, (bytes, unicode)): + self._targets = [target] + else: + self._targets = target + + # expand any relative paths we got + + target = map(os.path.abspath, target) + + self._validate = validate + self._follow_links = follow_links + self._persistence_path = persistence_path + self._document_handler = document_handler + self._kwargs = kwargs + self._read_listeners = [] + self._skip_listeners = [] + self._processed_files = {} + + self._reader_thread = None + self._reader_thread_lock = threading.RLock() + + self._iter_lock = threading.RLock() + self._iter_notice = threading.Event() + + self._is_stopped = threading.Event() + self._is_stopped.set() + + # Descriptors that we have read but not yet provided to the caller. A + # FINISHED entry is used by the reading thread to indicate the end. + + self._unreturned_descriptors = Queue.Queue(buffer_size) + + if self._persistence_path: + try: + processed_files = load_processed_files(self._persistence_path) + self.set_processed_files(processed_files) + except: + pass + + def get_processed_files(self): + """ + For each file that we have read descriptor data from this provides a + mapping of the form... + + :: + + absolute path (str) => last modified unix timestamp (int) + + This includes entries set through the + :func:`~stem.descriptor.reader.DescriptorReader.set_processed_files` + method. Each run resets this to only the files that were present during + that run. + + :returns: **dict** with the absolute paths and unix timestamp for the last + modified times of the files we have processed + """ + + # make sure that we only provide back absolute paths + return dict((os.path.abspath(k), v) for (k, v) in self._processed_files.items()) + + def set_processed_files(self, processed_files): + """ + Sets the listing of the files we have processed. Most often this is used + with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to + pre-populate the listing of descriptor files that we have seen. + + :param dict processed_files: mapping of absolute paths (**str**) to unix + timestamps for the last modified time (**int**) + """ + + self._processed_files = dict(processed_files) + + def register_read_listener(self, listener): + """ + Registers a listener for when files are read. This is executed prior to + processing files. Listeners are expected to be of the form... + + :: + + my_listener(path) + + :param functor listener: functor to be notified when files are read + """ + + self._read_listeners.append(listener) + + def register_skip_listener(self, listener): + """ + Registers a listener for files that are skipped. This listener is expected + to be a functor of the form... + + :: + + my_listener(path, exception) + + :param functor listener: functor to be notified of files that are skipped + to read errors or because they couldn't be parsed as valid descriptor data + """ + + self._skip_listeners.append(listener) + + def get_buffered_descriptor_count(self): + """ + Provides the number of descriptors that are waiting to be iterated over. + This is limited to the buffer_size that we were constructed with. + + :returns: **int** for the estimated number of currently enqueued + descriptors, this is not entirely reliable + """ + + return self._unreturned_descriptors.qsize() + + def start(self): + """ + Starts reading our descriptor files. + + :raises: **ValueError** if we're already reading the descriptor files + """ + + with self._reader_thread_lock: + if self._reader_thread: + raise ValueError("Already running, you need to call stop() first") + else: + self._is_stopped.clear() + self._reader_thread = threading.Thread(target = self._read_descriptor_files, name="Descriptor Reader") + self._reader_thread.setDaemon(True) + self._reader_thread.start() + + def stop(self): + """ + Stops further reading of descriptor files. + """ + + with self._reader_thread_lock: + self._is_stopped.set() + self._iter_notice.set() + + # clears our queue to unblock enqueue calls + + try: + while True: + self._unreturned_descriptors.get_nowait() + except Queue.Empty: + pass + + self._reader_thread.join() + self._reader_thread = None + + if self._persistence_path: + try: + processed_files = self.get_processed_files() + save_processed_files(self._persistence_path, processed_files) + except: + pass + + def _read_descriptor_files(self): + new_processed_files = {} + remaining_files = list(self._targets) + + while remaining_files and not self._is_stopped.is_set(): + target = remaining_files.pop(0) + + if not os.path.exists(target): + self._notify_skip_listeners(target, FileMissing()) + continue + + if os.path.isdir(target): + walker = os.walk(target, followlinks = self._follow_links) + self._handle_walker(walker, new_processed_files) + else: + self._handle_file(target, new_processed_files) + + self._processed_files = new_processed_files + + if not self._is_stopped.is_set(): + self._unreturned_descriptors.put(FINISHED) + + self._iter_notice.set() + + def __iter__(self): + with self._iter_lock: + while not self._is_stopped.is_set(): + try: + descriptor = self._unreturned_descriptors.get_nowait() + + if descriptor == FINISHED: + break + else: + yield descriptor + except Queue.Empty: + self._iter_notice.wait() + self._iter_notice.clear() + + def _handle_walker(self, walker, new_processed_files): + for root, _, files in walker: + for filename in files: + self._handle_file(os.path.join(root, filename), new_processed_files) + + # this can take a while if, say, we're including the root directory + if self._is_stopped.is_set(): + return + + def _handle_file(self, target, new_processed_files): + # This is a file. Register its last modified timestamp and check if + # it's a file that we should skip. + + try: + last_modified = int(os.stat(target).st_mtime) + last_used = self._processed_files.get(target) + new_processed_files[target] = last_modified + except OSError as exc: + self._notify_skip_listeners(target, ReadFailed(exc)) + return + + if last_used and last_used >= last_modified: + self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used)) + return + + # Block devices and such are never descriptors, and can cause us to block + # for quite a while so skipping anything that isn't a regular file. + + if not os.path.isfile(target): + return + + # The mimetypes module only checks the file extension. To actually + # check the content (like the 'file' command) we'd need something like + # pymagic (https://github.com/cloudburst/pymagic). + + target_type = mimetypes.guess_type(target) + + # Checking if it's a tar file may fail due to permissions so failing back + # to the mime type... + # + # IOError: [Errno 13] Permission denied: '/vmlinuz.old' + # + # With python 3 insuffient permissions raises an AttributeError instead... + # + # http://bugs.python.org/issue17059 + + try: + is_tar = tarfile.is_tarfile(target) + except (IOError, AttributeError): + is_tar = target_type[0] == 'application/x-tar' + + if target_type[0] in (None, 'text/plain'): + # either '.txt' or an unknown type + self._handle_descriptor_file(target, target_type) + elif is_tar: + # handles gzip, bz2, and decompressed tarballs among others + self._handle_archive(target) + else: + self._notify_skip_listeners(target, UnrecognizedType(target_type)) + + def _handle_descriptor_file(self, target, mime_type): + try: + self._notify_read_listeners(target) + + with open(target, 'rb') as target_file: + for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs): + if self._is_stopped.is_set(): + return + + self._unreturned_descriptors.put(desc) + self._iter_notice.set() + except TypeError as exc: + self._notify_skip_listeners(target, UnrecognizedType(mime_type)) + except ValueError as exc: + self._notify_skip_listeners(target, ParsingFailure(exc)) + except IOError as exc: + self._notify_skip_listeners(target, ReadFailed(exc)) + + def _handle_archive(self, target): + # TODO: This would be nicer via the 'with' keyword, but tarfile's __exit__ + # method was added sometime after python 2.5. We should change this when + # we drop python 2.5 support. + + tar_file = None + + try: + self._notify_read_listeners(target) + tar_file = tarfile.open(target) + + for tar_entry in tar_file: + if tar_entry.isfile(): + entry = tar_file.extractfile(tar_entry) + + try: + for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs): + if self._is_stopped.is_set(): + return + + desc._set_path(os.path.abspath(target)) + desc._set_archive_path(entry.name) + self._unreturned_descriptors.put(desc) + self._iter_notice.set() + except TypeError as exc: + self._notify_skip_listeners(target, ParsingFailure(exc)) + except ValueError as exc: + self._notify_skip_listeners(target, ParsingFailure(exc)) + finally: + entry.close() + except IOError as exc: + self._notify_skip_listeners(target, ReadFailed(exc)) + finally: + if tar_file: + tar_file.close() + + def _notify_read_listeners(self, path): + for listener in self._read_listeners: + listener(path) + + def _notify_skip_listeners(self, path, exception): + for listener in self._skip_listeners: + listener(path, exception) + + def __enter__(self): + self.start() + return self + + def __exit__(self, exit_type, value, traceback): + self.stop() diff --git a/lib/stem/descriptor/remote.py b/lib/stem/descriptor/remote.py new file mode 100644 index 00000000..aa4d00ca --- /dev/null +++ b/lib/stem/descriptor/remote.py @@ -0,0 +1,758 @@ +# Copyright 2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Module for remotely retrieving descriptors from directory authorities and +mirrors. This is most easily done through the +:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues +:class:`~stem.descriptor.remote.Query` instances to get you the descriptor +content. For example... + +:: + + from stem.descriptor.remote import DescriptorDownloader + + downloader = DescriptorDownloader( + use_mirrors = True, + timeout = 10, + ) + + query = downloader.get_server_descriptors() + + print "Exit Relays:" + + try: + for desc in query.run(): + if desc.exit_policy.is_exiting_allowed(): + print " %s (%s)" % (desc.nickname, desc.fingerprint) + + print + print "Query took %0.2f seconds" % query.runtime + except Exception as exc: + print "Unable to retrieve the server descriptors: %s" % exc + +If you don't care about errors then you can also simply iterate over the query +itself... + +:: + + for desc in downloader.get_server_descriptors(): + if desc.exit_policy.is_exiting_allowed(): + print " %s (%s)" % (desc.nickname, desc.fingerprint) + +:: + + get_authorities - Provides tor directory information. + + DirectoryAuthority - Information about a tor directory authority. + + Query - Asynchronous request to download tor descriptors + |- start - issues the query if it isn't already running + +- run - blocks until the request is finished and provides the results + + DescriptorDownloader - Configurable class for issuing queries + |- use_directory_mirrors - use directory mirrors to download future descriptors + |- get_server_descriptors - provides present server descriptors + |- get_extrainfo_descriptors - provides present extrainfo descriptors + |- get_microdescriptors - provides present microdescriptors + |- get_consensus - provides the present consensus or router status entries + |- get_key_certificates - provides present authority key certificates + +- query - request an arbitrary descriptor resource + +.. data:: MAX_FINGERPRINTS + + Maximum number of descriptors that can requested at a time by their + fingerprints. + +.. data:: MAX_MICRODESCRIPTOR_HASHES + + Maximum number of microdescriptors that can requested at a time by their + hashes. +""" + +import io +import random +import sys +import threading +import time +import urllib2 +import zlib + +import stem.descriptor + +from stem import Flag +from stem.util import log + +# Tor has a limited number of descriptors we can fetch explicitly by their +# fingerprint or hashes due to a limit on the url length by squid proxies. + +MAX_FINGERPRINTS = 96 +MAX_MICRODESCRIPTOR_HASHES = 92 + +# We commonly only want authorities that vote in the consensus, and hence have +# a v3ident. + +HAS_V3IDENT = lambda auth: auth.v3ident is not None + + +def _guess_descriptor_type(resource): + # Attempts to determine the descriptor type based on the resource url. This + # raises a ValueError if the resource isn't recognized. + + if resource.startswith('/tor/server/'): + return 'server-descriptor 1.0' + elif resource.startswith('/tor/extra/'): + return 'extra-info 1.0' + elif resource.startswith('/tor/micro/'): + return 'microdescriptor 1.0' + elif resource.startswith('/tor/status-vote/'): + return 'network-status-consensus-3 1.0' + elif resource.startswith('/tor/keys/'): + return 'dir-key-certificate-3 1.0' + else: + raise ValueError("Unable to determine the descriptor type for '%s'" % resource) + + +class Query(object): + """ + Asynchronous request for descriptor content from a directory authority or + mirror. These can either be made through the + :class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more + advanced usage. + + To block on the response and get results either call + :func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The + :func:`~stem.descriptor.remote.Query.run` method pass along any errors that + arise... + + :: + + from stem.descriptor.remote import Query + + query = Query( + '/tor/server/all.z', + block = True, + timeout = 30, + ) + + print "Current relays:" + + if not query.error: + for desc in query: + print desc.fingerprint + else: + print "Unable to retrieve the server descriptors: %s" % query.error + + ... while iterating fails silently... + + :: + + print "Current relays:" + + for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'): + print desc.fingerprint + + In either case exceptions are available via our 'error' attribute. + + Tor provides quite a few different descriptor resources via its directory + protocol (see section 4.2 and later of the `dir-spec + `_). + Commonly useful ones include... + + ===================================== =========== + Resource Description + ===================================== =========== + /tor/server/all.z all present server descriptors + /tor/server/fp/++.z server descriptors with the given fingerprints + /tor/extra/all.z all present extrainfo descriptors + /tor/extra/fp/++.z extrainfo descriptors with the given fingerprints + /tor/micro/d/-.z microdescriptors with the given hashes + /tor/status-vote/current/consensus.z present consensus + /tor/keys/all.z key certificates for the authorities + /tor/keys/fp/+.z key certificates for specific authorities + ===================================== =========== + + The '.z' suffix can be excluded to get a plaintext rather than compressed + response. Compression is handled transparently, so this shouldn't matter to + the caller. + + :var str resource: resource being fetched, such as '/tor/server/all.z' + :var str descriptor_type: type of descriptors being fetched (for options see + :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the + resource if **None** + + :var list endpoints: (address, dirport) tuples of the authority or mirror + we're querying, this uses authorities if undefined + :var int retries: number of times to attempt the request if downloading it + fails + :var bool fall_back_to_authority: when retrying request issues the last + request to a directory authority if **True** + + :var str content: downloaded descriptor content + :var Exception error: exception if a problem occured + :var bool is_done: flag that indicates if our request has finished + :var str download_url: last url used to download the descriptor, this is + unset until we've actually made a download attempt + + :var float start_time: unix timestamp when we first started running + :var float timeout: duration before we'll time out our request + :var float runtime: time our query took, this is **None** if it's not yet + finished + + :var bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :var stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + :var dict kwargs: additional arguments for the descriptor constructor + + :param bool start: start making the request when constructed (default is **True**) + :param bool block: only return after the request has been completed, this is + the same as running **query.run(True)** (default is **False**) + """ + + def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs): + if not resource.startswith('/'): + raise ValueError("Resources should start with a '/': %s" % resource) + + self.resource = resource + + if descriptor_type: + self.descriptor_type = descriptor_type + else: + self.descriptor_type = _guess_descriptor_type(resource) + + self.endpoints = endpoints if endpoints else [] + self.retries = retries + self.fall_back_to_authority = fall_back_to_authority + + self.content = None + self.error = None + self.is_done = False + self.download_url = None + + self.start_time = None + self.timeout = timeout + self.runtime = None + + self.validate = validate + self.document_handler = document_handler + self.kwargs = kwargs + + self._downloader_thread = None + self._downloader_thread_lock = threading.RLock() + + if start: + self.start() + + if block: + self.run(True) + + def start(self): + """ + Starts downloading the scriptors if we haven't started already. + """ + + with self._downloader_thread_lock: + if self._downloader_thread is None: + self._downloader_thread = threading.Thread( + name = "Descriptor Query", + target = self._download_descriptors, + args = (self.retries,) + ) + + self._downloader_thread.setDaemon(True) + self._downloader_thread.start() + + def run(self, suppress = False): + """ + Blocks until our request is complete then provides the descriptors. If we + haven't yet started our request then this does so. + + :param bool suppress: avoids raising exceptions if **True** + + :returns: list for the requested :class:`~stem.descriptor.__init__.Descriptor` instances + + :raises: + Using the iterator can fail with the following if **suppress** is + **False**... + + * **ValueError** if the descriptor contents is malformed + * **socket.timeout** if our request timed out + * **urllib2.URLError** for most request failures + + Note that the urllib2 module may fail with other exception types, in + which case we'll pass it along. + """ + + return list(self._run(suppress)) + + def _run(self, suppress): + with self._downloader_thread_lock: + self.start() + self._downloader_thread.join() + + if self.error: + if suppress: + return + + raise self.error + else: + if self.content is None: + if suppress: + return + + raise ValueError('BUG: _download_descriptors() finished without either results or an error') + + try: + results = stem.descriptor.parse_file( + io.BytesIO(self.content), + self.descriptor_type, + validate = self.validate, + document_handler = self.document_handler, + **self.kwargs + ) + + for desc in results: + yield desc + except ValueError as exc: + self.error = exc # encountered a parsing error + + if suppress: + return + + raise self.error + + def __iter__(self): + for desc in self._run(True): + yield desc + + def _pick_url(self, use_authority = False): + """ + Provides a url that can be queried. If we have multiple endpoints then one + will be picked randomly. + + :param bool use_authority: ignores our endpoints and uses a directory + authority instead + + :returns: **str** for the url being queried by this request + """ + + if use_authority or not self.endpoints: + authority = random.choice(filter(HAS_V3IDENT, get_authorities().values())) + address, dirport = authority.address, authority.dir_port + else: + address, dirport = random.choice(self.endpoints) + + return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/')) + + def _download_descriptors(self, retries): + try: + use_authority = retries == 0 and self.fall_back_to_authority + self.download_url = self._pick_url(use_authority) + + self.start_time = time.time() + response = urllib2.urlopen(self.download_url, timeout = self.timeout).read() + + if self.download_url.endswith('.z'): + response = zlib.decompress(response) + + self.content = response.strip() + + self.runtime = time.time() - self.start_time + log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) + except: + exc = sys.exc_info()[1] + + if retries > 0: + log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc)) + return self._download_descriptors(retries - 1) + else: + log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc)) + self.error = exc + finally: + self.is_done = True + + +class DescriptorDownloader(object): + """ + Configurable class that issues :class:`~stem.descriptor.remote.Query` + instances on your behalf. + + :param bool use_mirrors: downloads the present consensus and uses the directory + mirrors to fetch future requests, this fails silently if the consensus + cannot be downloaded + :param default_args: default arguments for the + :class:`~stem.descriptor.remote.Query` constructor + """ + + def __init__(self, use_mirrors = False, **default_args): + self._default_args = default_args + + authorities = filter(HAS_V3IDENT, get_authorities().values()) + self._endpoints = [(auth.address, auth.dir_port) for auth in authorities] + + if use_mirrors: + try: + start_time = time.time() + self.use_directory_mirrors() + log.debug("Retrieved directory mirrors (took %0.2fs)" % (time.time() - start_time)) + except Exception as exc: + log.debug("Unable to retrieve directory mirrors: %s" % exc) + + def use_directory_mirrors(self): + """ + Downloads the present consensus and configures ourselves to use directory + mirrors, in addition to authorities. + + :returns: :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3` + from which we got the directory mirrors + + :raises: **Exception** if unable to determine the directory mirrors + """ + + authorities = filter(HAS_V3IDENT, get_authorities().values()) + new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities]) + + consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0] + + for desc in consensus.routers.values(): + if Flag.V2DIR in desc.flags: + new_endpoints.add((desc.address, desc.dir_port)) + + # we need our endpoints to be a list rather than set for random.choice() + + self._endpoints = list(new_endpoints) + + return consensus + + def get_server_descriptors(self, fingerprints = None, **query_args): + """ + Provides the server descriptors with the given fingerprints. If no + fingerprints are provided then this returns all descriptors in the present + consensus. + + :param str,list fingerprints: fingerprint or list of fingerprints to be + retrieved, gets all descriptors if **None** + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the server descriptors + + :raises: **ValueError** if we request more than 96 descriptors by their + fingerprints (this is due to a limit on the url length by squid proxies). + """ + + resource = '/tor/server/all.z' + + if isinstance(fingerprints, str): + fingerprints = [fingerprints] + + if fingerprints: + if len(fingerprints) > MAX_FINGERPRINTS: + raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS) + + resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) + + return self.query(resource, **query_args) + + def get_extrainfo_descriptors(self, fingerprints = None, **query_args): + """ + Provides the extrainfo descriptors with the given fingerprints. If no + fingerprints are provided then this returns all descriptors in the present + consensus. + + :param str,list fingerprints: fingerprint or list of fingerprints to be + retrieved, gets all descriptors if **None** + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors + + :raises: **ValueError** if we request more than 96 descriptors by their + fingerprints (this is due to a limit on the url length by squid proxies). + """ + + resource = '/tor/extra/all.z' + + if isinstance(fingerprints, str): + fingerprints = [fingerprints] + + if fingerprints: + if len(fingerprints) > MAX_FINGERPRINTS: + raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS) + + resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) + + return self.query(resource, **query_args) + + def get_microdescriptors(self, hashes, **query_args): + """ + Provides the microdescriptors with the given hashes. To get these see the + 'microdescriptor_hashes' attribute of + :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`. Note + that these are only provided via a microdescriptor consensus (such as + 'cached-microdesc-consensus' in your data directory). + + :param str,list hashes: microdescriptor hash or list of hashes to be + retrieved + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors + + :raises: **ValueError** if we request more than 92 microdescriptors by their + hashes (this is due to a limit on the url length by squid proxies). + """ + + if isinstance(hashes, str): + hashes = [hashes] + + if len(hashes) > MAX_MICRODESCRIPTOR_HASHES: + raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_HASHES) + + return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args) + + def get_consensus(self, authority_v3ident = None, **query_args): + """ + Provides the present router status entries. + + :param str authority_v3ident: fingerprint of the authority key for which + to get the consensus, see `'v3ident' in tor's config.c + `_ + for the values. + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the router status + entries + """ + + resource = '/tor/status-vote/current/consensus' + + if authority_v3ident: + resource += '/%s' % authority_v3ident + + return self.query(resource + '.z', **query_args) + + def get_vote(self, authority, **query_args): + """ + Provides the present vote for a given directory authority. + + :param stem.descriptor.remote.DirectoryAuthority authority: authority for which to retrieve a vote for + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the router status + entries + """ + + resource = '/tor/status-vote/current/authority' + + if not 'endpoint' in query_args: + query_args['endpoints'] = [(authority.address, authority.dir_port)] + + return self.query(resource + '.z', **query_args) + + def get_key_certificates(self, authority_v3idents = None, **query_args): + """ + Provides the key certificates for authorities with the given fingerprints. + If no fingerprints are provided then this returns all present key + certificates. + + :param str authority_v3idents: fingerprint or list of fingerprints of the + authority keys, see `'v3ident' in tor's config.c + `_ + for the values. + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the key certificates + + :raises: **ValueError** if we request more than 96 key certificates by + their identity fingerprints (this is due to a limit on the url length by + squid proxies). + """ + + resource = '/tor/keys/all.z' + + if isinstance(authority_v3idents, str): + authority_v3idents = [authority_v3idents] + + if authority_v3idents: + if len(authority_v3idents) > MAX_FINGERPRINTS: + raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_FINGERPRINTS) + + resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents) + + return self.query(resource, **query_args) + + def query(self, resource, **query_args): + """ + Issues a request for the given resource. + + :param str resource: resource being fetched, such as '/tor/server/all.z' + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the descriptors + + :raises: **ValueError** if resource is clearly invalid or the descriptor + type can't be determined when 'descriptor_type' is **None** + """ + + args = dict(self._default_args) + args.update(query_args) + + if not 'endpoints' in args: + args['endpoints'] = self._endpoints + + if not 'fall_back_to_authority' in args: + args['fall_back_to_authority'] = True + + return Query( + resource, + **args + ) + + +class DirectoryAuthority(object): + """ + Tor directory authority, a special type of relay `hardcoded into tor + `_ + that enumerates the other relays within the network. + + At a very high level tor works as follows... + + 1. A volunteer starts up a new tor relay, during which it sends a `server + descriptor `_ to each of the directory + authorities. + + 2. Each hour the directory authorities make a `vote `_ + that says who they think the active relays are in the network and some + attributes about them. + + 3. The directory authorities send each other their votes, and compile that + into the `consensus `_. This document is very similar + to the votes, the only difference being that the majority of the + authorities agree upon and sign this document. The idividual relay entries + in the vote or consensus is called `router status entries + `_. + + 4. Tor clients (people using the service) download the consensus from one of + the authorities or a mirror to determine the active relays within the + network. They in turn use this to construct their circuits and use the + network. + + :var str nickname: nickname of the authority + :var str address: IP address of the authority, presently they're all IPv4 but + this may not always be the case + :var int or_port: port on which the relay services relay traffic + :var int dir_port: port on which directory information is available + :var str fingerprint: relay fingerprint + :var str v3ident: identity key fingerprint used to sign votes and consensus + """ + + def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, fingerprint = None, v3ident = None): + self.nickname = nickname + self.address = address + self.or_port = or_port + self.dir_port = dir_port + self.fingerprint = fingerprint + self.v3ident = v3ident + + +DIRECTORY_AUTHORITIES = { + 'moria1': DirectoryAuthority( + nickname = 'moria1', + address = '128.31.0.39', + or_port = 9101, + dir_port = 9131, + fingerprint = '9695DFC35FFEB861329B9F1AB04C46397020CE31', + v3ident = 'D586D18309DED4CD6D57C18FDB97EFA96D330566', + ), + 'tor26': DirectoryAuthority( + nickname = 'tor26', + address = '86.59.21.38', + or_port = 443, + dir_port = 80, + fingerprint = '847B1F850344D7876491A54892F904934E4EB85D', + v3ident = '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4', + ), + 'dizum': DirectoryAuthority( + nickname = 'dizum', + address = '194.109.206.212', + or_port = 443, + dir_port = 80, + fingerprint = '7EA6EAD6FD83083C538F44038BBFA077587DD755', + v3ident = 'E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58', + ), + 'Tonga': DirectoryAuthority( + nickname = 'Tonga', + address = '82.94.251.203', + or_port = 443, + dir_port = 80, + fingerprint = '4A0CCD2DDC7995083D73F5D667100C8A5831F16D', + v3ident = None, # does not vote in the consensus + ), + 'turtles': DirectoryAuthority( + nickname = 'turtles', + address = '76.73.17.194', + or_port = 9090, + dir_port = 9030, + fingerprint = 'F397038ADC51336135E7B80BD99CA3844360292B', + v3ident = '27B6B5996C426270A5C95488AA5BCEB6BCC86956', + ), + 'gabelmoo': DirectoryAuthority( + nickname = 'gabelmoo', + address = '212.112.245.170', + or_port = 443, + dir_port = 80, + fingerprint = 'F2044413DAC2E02E3D6BCF4735A19BCA1DE97281', + v3ident = 'ED03BB616EB2F60BEC80151114BB25CEF515B226', + ), + 'dannenberg': DirectoryAuthority( + nickname = 'dannenberg', + address = '193.23.244.244', + or_port = 443, + dir_port = 80, + fingerprint = '7BE683E65D48141321C5ED92F075C55364AC7123', + v3ident = '585769C78764D58426B8B52B6651A5A71137189A', + ), + 'urras': DirectoryAuthority( + nickname = 'urras', + address = '208.83.223.34', + or_port = 80, + dir_port = 443, + fingerprint = '0AD3FA884D18F89EEA2D89C019379E0E7FD94417', + v3ident = '80550987E1D626E3EBA5E5E75A458DE0626D088C', + ), + 'maatuska': DirectoryAuthority( + nickname = 'maatuska', + address = '171.25.193.9', + or_port = 80, + dir_port = 443, + fingerprint = 'BD6A829255CB08E66FBE7D3748363586E46B3810', + v3ident = '49015F787433103580E3B66A1707A00E60F2D15B', + ), + 'Faravahar': DirectoryAuthority( + nickname = 'Faravahar', + address = '154.35.32.5', + or_port = 443, + dir_port = 80, + fingerprint = 'CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC', + v3ident = 'EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97', + ), +} + + +def get_authorities(): + """ + Provides the Tor directory authority information as of **Tor commit 00bcc25 + (8/27/13)**. The directory information hardcoded into Tor and occasionally + changes, so the information this provides might not necessarily match your + version of tor. + + :returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances + """ + + return dict(DIRECTORY_AUTHORITIES) diff --git a/lib/stem/descriptor/router_status_entry.py b/lib/stem/descriptor/router_status_entry.py new file mode 100644 index 00000000..4018b223 --- /dev/null +++ b/lib/stem/descriptor/router_status_entry.py @@ -0,0 +1,749 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parsing for router status entries, the information for individual routers +within a network status document. This information is provided from a few +sources... + +* control port via 'GETINFO ns/\*' and 'GETINFO md/\*' queries +* router entries in a network status document, like the cached-consensus + +**Module Overview:** + +:: + + RouterStatusEntry - Common parent for router status entries + |- RouterStatusEntryV2 - Entry for a network status v2 document + |- RouterStatusEntryV3 - Entry for a network status v3 document + +- RouterStatusEntryMicroV3 - Entry for a microdescriptor flavored v3 document +""" + +import base64 +import binascii +import datetime + +import stem.exit_policy +import stem.util.str_tools + +from stem.descriptor import ( + KEYWORD_LINE, + Descriptor, + _get_descriptor_components, + _read_until_keywords, +) + + +def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()): + """ + Reads a range of the document_file containing some number of entry_class + instances. We deliminate the entry_class entries by the keyword on their + first line (entry_keyword). When finished the document is left at the + end_position. + + Either an end_position or section_end_keywords must be provided. + + :param file document_file: file with network status document content + :param bool validate: checks the validity of the document's contents if + **True**, skips these checks otherwise + :param class entry_class: class to construct instance for + :param str entry_keyword: first keyword for the entry instances + :param int start_position: start of the section, default is the current position + :param int end_position: end of the section + :param tuple section_end_keywords: keyword(s) that deliminate the end of the + section if no end_position was provided + :param tuple extra_args: extra arguments for the entry_class (after the + content and validate flag) + + :returns: iterator over entry_class instances + + :raises: + * **ValueError** if the contents is malformed and validate is **True** + * **IOError** if the file can't be read + """ + + if start_position: + document_file.seek(start_position) + else: + start_position = document_file.tell() + + # check if we're starting at the end of the section (ie, there's no entries to read) + if section_end_keywords: + first_keyword = None + line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline())) + + if line_match: + first_keyword = line_match.groups()[0] + + document_file.seek(start_position) + + if first_keyword in section_end_keywords: + return + + while end_position is None or document_file.tell() < end_position: + desc_lines, ending_keyword = _read_until_keywords( + (entry_keyword,) + section_end_keywords, + document_file, + ignore_first = True, + end_position = end_position, + include_ending_keyword = True + ) + + desc_content = bytes.join(b"", desc_lines) + + if desc_content: + yield entry_class(desc_content, validate, *extra_args) + + # check if we stopped at the end of the section + if ending_keyword in section_end_keywords: + break + else: + break + + +class RouterStatusEntry(Descriptor): + """ + Information about an individual router stored within a network status + document. This is the common parent for concrete status entry types. + + :var stem.descriptor.networkstatus.NetworkStatusDocument document: **\*** document that this descriptor came from + + :var str nickname: **\*** router's nickname + :var str fingerprint: **\*** router's fingerprint + :var datetime published: **\*** router's publication + :var str address: **\*** router's IP address + :var int or_port: **\*** router's ORPort + :var int dir_port: **\*** router's DirPort + + :var list flags: **\*** list of :data:`~stem.Flag` associated with the relay + + :var stem.version.Version version: parsed version of tor, this is **None** if + the relay's using a new versioning scheme + :var str version_line: versioning information reported by the relay + """ + + def __init__(self, content, validate, document): + """ + Parse a router descriptor in a network status document. + + :param str content: router descriptor content to be parsed + :param NetworkStatusDocument document: document this descriptor came from + :param bool validate: checks the validity of the content if **True**, skips + these checks otherwise + + :raises: **ValueError** if the descriptor data is invalid + """ + + super(RouterStatusEntry, self).__init__(content) + content = stem.util.str_tools._to_unicode(content) + + self.document = document + + self.nickname = None + self.fingerprint = None + self.published = None + self.address = None + self.or_port = None + self.dir_port = None + + self.flags = None + + self.version_line = None + self.version = None + + self._unrecognized_lines = [] + + entries = _get_descriptor_components(content, validate) + + if validate: + self._check_constraints(entries) + + self._parse(entries, validate) + + def _parse(self, entries, validate): + """ + Parses the given content and applies the attributes. + + :param dict entries: keyword => (value, pgp key) entries + :param bool validate: checks validity if **True** + + :raises: **ValueError** if a validity check fails + """ + + for keyword, values in entries.items(): + value, _ = values[0] + + if keyword == 's': + _parse_s_line(self, value, validate) + elif keyword == 'v': + _parse_v_line(self, value, validate) + else: + self._unrecognized_lines.append("%s %s" % (keyword, value)) + + def _check_constraints(self, entries): + """ + Does a basic check that the entries conform to this descriptor type's + constraints. + + :param dict entries: keyword => (value, pgp key) entries + + :raises: **ValueError** if an issue arises in validation + """ + + for keyword in self._required_fields(): + if not keyword in entries: + raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self))) + + for keyword in self._single_fields(): + if keyword in entries and len(entries[keyword]) > 1: + raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self))) + + if 'r' != entries.keys()[0]: + raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self))) + + def _name(self, is_plural = False): + """ + Name for this descriptor type. + """ + + if is_plural: + return "Router status entries" + else: + return "Router status entry" + + def _required_fields(self): + """ + Provides lines that must appear in the descriptor. + """ + + return () + + def _single_fields(self): + """ + Provides lines that can only appear in the descriptor once. + """ + + return () + + def get_unrecognized_lines(self): + """ + Provides any unrecognized lines. + + :returns: list of unrecognized lines + """ + + return list(self._unrecognized_lines) + + def _compare(self, other, method): + if not isinstance(other, RouterStatusEntry): + return False + + return method(str(self).strip(), str(other).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class RouterStatusEntryV2(RouterStatusEntry): + """ + Information about an individual router stored within a version 2 network + status document. + + :var str digest: **\*** router's upper-case hex digest + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, content, validate = True, document = None): + self.digest = None + super(RouterStatusEntryV2, self).__init__(content, validate, document) + + def _parse(self, entries, validate): + for keyword, values in entries.items(): + value, _ = values[0] + + if keyword == 'r': + _parse_r_line(self, value, validate, True) + del entries['r'] + + RouterStatusEntry._parse(self, entries, validate) + + def _name(self, is_plural = False): + if is_plural: + return "Router status entries (v2)" + else: + return "Router status entry (v2)" + + def _required_fields(self): + return ('r') + + def _single_fields(self): + return ('r', 's', 'v') + + def _compare(self, other, method): + if not isinstance(other, RouterStatusEntryV2): + return False + + return method(str(self).strip(), str(other).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class RouterStatusEntryV3(RouterStatusEntry): + """ + Information about an individual router stored within a version 3 network + status document. + + :var list or_addresses: **\*** relay's OR addresses, this is a tuple listing + of the form (address (**str**), port (**int**), is_ipv6 (**bool**)) + :var str digest: **\*** router's upper-case hex digest + + :var int bandwidth: bandwidth claimed by the relay (in kb/s) + :var int measured: bandwidth measured to be available by the relay + :var bool is_unmeasured: bandwidth measurement isn't based on three or more + measurements + :var list unrecognized_bandwidth_entries: **\*** bandwidth weighting + information that isn't yet recognized + + :var stem.exit_policy.MicroExitPolicy exit_policy: router's exit policy + + :var list microdescriptor_hashes: **\*** tuples of two values, the list of + consensus methods for generating a set of digests and the 'algorithm => + digest' mappings + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, content, validate = True, document = None): + self.or_addresses = [] + self.digest = None + + self.bandwidth = None + self.measured = None + self.is_unmeasured = False + self.unrecognized_bandwidth_entries = [] + + self.exit_policy = None + self.microdescriptor_hashes = [] + + super(RouterStatusEntryV3, self).__init__(content, validate, document) + + def _parse(self, entries, validate): + for keyword, values in entries.items(): + value, _ = values[0] + + if keyword == 'r': + _parse_r_line(self, value, validate, True) + del entries['r'] + elif keyword == 'a': + for entry, _ in values: + _parse_a_line(self, entry, validate) + + del entries['a'] + elif keyword == 'w': + _parse_w_line(self, value, validate) + del entries['w'] + elif keyword == 'p': + _parse_p_line(self, value, validate) + del entries['p'] + elif keyword == 'm': + for entry, _ in values: + _parse_m_line(self, entry, validate) + + del entries['m'] + + RouterStatusEntry._parse(self, entries, validate) + + def _name(self, is_plural = False): + if is_plural: + return "Router status entries (v3)" + else: + return "Router status entry (v3)" + + def _required_fields(self): + return ('r', 's') + + def _single_fields(self): + return ('r', 's', 'v', 'w', 'p') + + def _compare(self, other, method): + if not isinstance(other, RouterStatusEntryV3): + return False + + return method(str(self).strip(), str(other).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +class RouterStatusEntryMicroV3(RouterStatusEntry): + """ + Information about an individual router stored within a microdescriptor + flavored network status document. + + :var int bandwidth: bandwidth claimed by the relay (in kb/s) + :var int measured: bandwidth measured to be available by the relay + :var bool is_unmeasured: bandwidth measurement isn't based on three or more + measurements + :var list unrecognized_bandwidth_entries: **\*** bandwidth weighting + information that isn't yet recognized + + :var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, content, validate = True, document = None): + self.bandwidth = None + self.measured = None + self.is_unmeasured = False + self.unrecognized_bandwidth_entries = [] + + self.digest = None + + super(RouterStatusEntryMicroV3, self).__init__(content, validate, document) + + def _parse(self, entries, validate): + for keyword, values in entries.items(): + value, _ = values[0] + + if keyword == 'r': + _parse_r_line(self, value, validate, False) + del entries['r'] + elif keyword == 'w': + _parse_w_line(self, value, validate) + del entries['w'] + elif keyword == 'm': + # "m" digest + # example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70 + + self.digest = _base64_to_hex(value, validate, False) + del entries['m'] + + RouterStatusEntry._parse(self, entries, validate) + + def _name(self, is_plural = False): + if is_plural: + return "Router status entries (micro v3)" + else: + return "Router status entry (micro v3)" + + def _required_fields(self): + return ('r', 's', 'm') + + def _single_fields(self): + return ('r', 's', 'v', 'w', 'm') + + def _compare(self, other, method): + if not isinstance(other, RouterStatusEntryMicroV3): + return False + + return method(str(self).strip(), str(other).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + +def _parse_r_line(desc, value, validate, include_digest = True): + # Parses a RouterStatusEntry's 'r' line. They're very nearly identical for + # all current entry types (v2, v3, and microdescriptor v3) with one little + # wrinkle: only the microdescriptor flavor excludes a 'digest' field. + # + # For v2 and v3 router status entries: + # "r" nickname identity digest publication IP ORPort DirPort + # example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0 + # + # For v3 microdescriptor router status entries: + # "r" nickname identity publication IP ORPort DirPort + # example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030 + + r_comp = value.split(" ") + + # inject a None for the digest to normalize the field positioning + if not include_digest: + r_comp.insert(2, None) + + if len(r_comp) < 8: + if not validate: + return + + expected_field_count = 'eight' if include_digest else 'seven' + raise ValueError("%s 'r' line must have %s values: r %s" % (desc._name(), expected_field_count, value)) + + if validate: + if not stem.util.tor_tools.is_valid_nickname(r_comp[0]): + raise ValueError("%s nickname isn't valid: %s" % (desc._name(), r_comp[0])) + elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]): + raise ValueError("%s address isn't a valid IPv4 address: %s" % (desc._name(), r_comp[5])) + elif not stem.util.connection.is_valid_port(r_comp[6]): + raise ValueError("%s ORPort is invalid: %s" % (desc._name(), r_comp[6])) + elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True): + raise ValueError("%s DirPort is invalid: %s" % (desc._name(), r_comp[7])) + elif not (r_comp[6].isdigit() and r_comp[7].isdigit()): + return + + desc.nickname = r_comp[0] + desc.fingerprint = _base64_to_hex(r_comp[1], validate) + + if include_digest: + desc.digest = _base64_to_hex(r_comp[2], validate) + + desc.address = r_comp[5] + desc.or_port = int(r_comp[6]) + desc.dir_port = None if r_comp[7] == '0' else int(r_comp[7]) + + try: + published = "%s %s" % (r_comp[3], r_comp[4]) + desc.published = datetime.datetime.strptime(published, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("Publication time time wasn't parsable: r %s" % value) + + +def _parse_a_line(desc, value, validate): + # "a" SP address ":" portlist + # example: a [2001:888:2133:0:82:94:251:204]:9001 + + if not ':' in value: + if not validate: + return + + raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (desc._name(), value)) + + address, port = value.rsplit(':', 1) + is_ipv6 = address.startswith("[") and address.endswith("]") + + if is_ipv6: + address = address[1:-1] # remove brackets + + if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or + (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))): + if not validate: + return + else: + raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (desc._name(), value)) + + if stem.util.connection.is_valid_port(port): + desc.or_addresses.append((address, int(port), is_ipv6)) + elif validate: + raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (desc._name(), port, value)) + + +def _parse_s_line(desc, value, validate): + # "s" Flags + # example: s Named Running Stable Valid + + flags = [] if value == "" else value.split(" ") + desc.flags = flags + + if validate: + for flag in flags: + if flags.count(flag) > 1: + raise ValueError("%s had duplicate flags: s %s" % (desc._name(), value)) + elif flag == "": + raise ValueError("%s had extra whitespace on its 's' line: s %s" % (desc._name(), value)) + + +def _parse_v_line(desc, value, validate): + # "v" version + # example: v Tor 0.2.2.35 + # + # The spec says that if this starts with "Tor " then what follows is a + # tor version. If not then it has "upgraded to a more sophisticated + # protocol versioning system". + + desc.version_line = value + + if value.startswith("Tor "): + try: + desc.version = stem.version._get_version(value[4:]) + except ValueError as exc: + if validate: + raise ValueError("%s has a malformed tor version (%s): v %s" % (desc._name(), exc, value)) + + +def _parse_w_line(desc, value, validate): + # "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"] + # example: w Bandwidth=7980 + + w_comp = value.split(" ") + + if len(w_comp) < 1: + if not validate: + return + + raise ValueError("%s 'w' line is blank: w %s" % (desc._name(), value)) + elif not w_comp[0].startswith("Bandwidth="): + if not validate: + return + + raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (desc._name(), value)) + + for w_entry in w_comp: + if '=' in w_entry: + w_key, w_value = w_entry.split('=', 1) + else: + w_key, w_value = w_entry, None + + if w_key == "Bandwidth": + if not (w_value and w_value.isdigit()): + if not validate: + return + + raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (desc._name(), value)) + + desc.bandwidth = int(w_value) + elif w_key == "Measured": + if not (w_value and w_value.isdigit()): + if not validate: + return + + raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (desc._name(), value)) + + desc.measured = int(w_value) + elif w_key == "Unmeasured": + if validate and w_value != "1": + raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (desc._name(), value)) + + desc.is_unmeasured = True + else: + desc.unrecognized_bandwidth_entries.append(w_entry) + + +def _parse_p_line(desc, value, validate): + # "p" ("accept" / "reject") PortList + # p reject 1-65535 + # example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001 + + try: + desc.exit_policy = stem.exit_policy.MicroExitPolicy(value) + except ValueError as exc: + if not validate: + return + + raise ValueError("%s exit policy is malformed (%s): p %s" % (desc._name(), exc, value)) + + +def _parse_m_line(desc, value, validate): + # "m" methods 1*(algorithm "=" digest) + # example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs + + m_comp = value.split(" ") + + if not (desc.document and desc.document.is_vote): + if not validate: + return + + vote_status = "vote" if desc.document else "" + raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (desc._name(), vote_status, value)) + elif len(m_comp) < 1: + if not validate: + return + + raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (desc._name(), value)) + + try: + methods = [int(entry) for entry in m_comp[0].split(",")] + except ValueError: + if not validate: + return + + raise ValueError("%s microdescriptor methods should be a series of comma separated integers: m %s" % (desc._name(), value)) + + hashes = {} + + for entry in m_comp[1:]: + if not '=' in entry: + if not validate: + continue + + raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (desc._name(), value)) + + hash_name, digest = entry.split('=', 1) + hashes[hash_name] = digest + + desc.microdescriptor_hashes.append((methods, hashes)) + + +def _base64_to_hex(identity, validate, check_if_fingerprint = True): + """ + Decodes a base64 value to hex. For example... + + :: + + >>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s') + 'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB' + + :param str identity: encoded fingerprint from the consensus + :param bool validate: checks validity if **True** + :param bool check_if_fingerprint: asserts that the result is a fingerprint if **True** + + :returns: **str** with the uppercase hex encoding of the relay's fingerprint + + :raises: **ValueError** if the result isn't a valid fingerprint + """ + + # trailing equal signs were stripped from the identity + missing_padding = len(identity) % 4 + identity += "=" * missing_padding + + fingerprint = "" + + try: + identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity)) + except (TypeError, binascii.Error): + if not validate: + return None + + raise ValueError("Unable to decode identity string '%s'" % identity) + + for char in identity_decoded: + # Individual characters are either standard ASCII or hex encoded, and each + # represent two hex digits. For instance... + # + # >>> ord('\n') + # 10 + # >>> hex(10) + # '0xa' + # >>> '0xa'[2:].zfill(2).upper() + # '0A' + + char_int = char if isinstance(char, int) else ord(char) + fingerprint += hex(char_int)[2:].zfill(2).upper() + + if check_if_fingerprint: + if not stem.util.tor_tools.is_valid_fingerprint(fingerprint): + if not validate: + return None + + raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint)) + + return fingerprint diff --git a/lib/stem/descriptor/server_descriptor.py b/lib/stem/descriptor/server_descriptor.py new file mode 100644 index 00000000..d4dbaf3d --- /dev/null +++ b/lib/stem/descriptor/server_descriptor.py @@ -0,0 +1,968 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parsing for Tor server descriptors, which contains the infrequently changing +information about a Tor relay (contact information, exit policy, public keys, +etc). This information is provided from a few sources... + +* control port via 'GETINFO desc/\*' queries +* the 'cached-descriptors' file in tor's data directory +* tor metrics, at https://metrics.torproject.org/data.html +* directory authorities and mirrors via their DirPort + +**Module Overview:** + +:: + + ServerDescriptor - Tor server descriptor. + |- RelayDescriptor - Server descriptor for a relay. + | + |- BridgeDescriptor - Scrubbed server descriptor for a bridge. + | |- is_scrubbed - checks if our content has been properly scrubbed + | +- get_scrubbing_issues - description of issues with our scrubbing + | + |- digest - calculates the upper-case hex digest value for our content + |- get_unrecognized_lines - lines with unrecognized content + |- get_annotations - dictionary of content prior to the descriptor entry + +- get_annotation_lines - lines that provided the annotations +""" + +import base64 +import codecs +import datetime +import hashlib +import re + +import stem.descriptor.extrainfo_descriptor +import stem.exit_policy +import stem.prereq +import stem.util.connection +import stem.util.str_tools +import stem.util.tor_tools +import stem.version + +from stem.util import log + +from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + _get_bytes_field, + _get_descriptor_components, + _read_until_keywords, +) + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +# relay descriptors must have exactly one of the following +REQUIRED_FIELDS = ( + "router", + "bandwidth", + "published", + "onion-key", + "signing-key", + "router-signature", +) + +# optional entries that can appear at most once +SINGLE_FIELDS = ( + "platform", + "fingerprint", + "hibernating", + "uptime", + "contact", + "read-history", + "write-history", + "eventdns", + "family", + "caches-extra-info", + "extra-info-digest", + "hidden-service-dir", + "protocols", + "allow-single-hop-exits", + "ntor-onion-key", +) + +DEFAULT_IPV6_EXIT_POLICY = stem.exit_policy.MicroExitPolicy("reject 1-65535") +REJECT_ALL_POLICY = stem.exit_policy.ExitPolicy("reject *:*") + + +def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs): + """ + Iterates over the server descriptors in a file. + + :param file descriptor_file: file with descriptor content + :param bool is_bridge: parses the file as being a bridge descriptor + :param bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :param dict kwargs: additional arguments for the descriptor constructor + + :returns: iterator for ServerDescriptor instances in the file + + :raises: + * **ValueError** if the contents is malformed and validate is True + * **IOError** if the file can't be read + """ + + # Handler for relay descriptors + # + # Cached descriptors consist of annotations followed by the descriptor + # itself. For instance... + # + # @downloaded-at 2012-03-14 16:31:05 + # @source "145.53.65.130" + # router caerSidi 71.35.143.157 9001 0 0 + # platform Tor 0.2.1.30 on Linux x86_64 + # + # router-signature + # -----BEGIN SIGNATURE----- + # + # -----END SIGNATURE----- + # + # Metrics descriptor files are the same, but lack any annotations. The + # following simply does the following... + # + # - parse as annotations until we get to "router" + # - parse as descriptor content until we get to "router-signature" followed + # by the end of the signature block + # - construct a descriptor and provide it back to the caller + # + # Any annotations after the last server descriptor is ignored (never provided + # to the caller). + + while True: + annotations = _read_until_keywords("router", descriptor_file) + descriptor_content = _read_until_keywords("router-signature", descriptor_file) + + # we've reached the 'router-signature', now include the pgp style block + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True) + + if descriptor_content: + # strip newlines from annotations + annotations = map(bytes.strip, annotations) + + descriptor_text = bytes.join(b"", descriptor_content) + + if is_bridge: + yield BridgeDescriptor(descriptor_text, validate, annotations, **kwargs) + else: + yield RelayDescriptor(descriptor_text, validate, annotations, **kwargs) + else: + if validate and annotations: + orphaned_annotations = stem.util.str_tools._to_unicode(b'\n'.join(annotations)) + raise ValueError('Content conform to being a server descriptor:\n%s' % orphaned_annotations) + + break # done parsing descriptors + + +class ServerDescriptor(Descriptor): + """ + Common parent for server descriptors. + + :var str nickname: **\*** relay's nickname + :var str fingerprint: identity key fingerprint + :var datetime published: **\*** time in UTC when this descriptor was made + + :var str address: **\*** IPv4 address of the relay + :var int or_port: **\*** port used for relaying + :var int socks_port: **\*** port used as client (deprecated, always **None**) + :var int dir_port: **\*** port used for descriptor mirroring + + :var bytes platform: line with operating system and tor version + :var stem.version.Version tor_version: version of tor + :var str operating_system: operating system + :var int uptime: uptime when published in seconds + :var bytes contact: contact information + :var stem.exit_policy.ExitPolicy exit_policy: **\*** stated exit policy + :var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6 + :var set family: **\*** nicknames or fingerprints of declared family + + :var int average_bandwidth: **\*** average rate it's willing to relay in bytes/s + :var int burst_bandwidth: **\*** burst rate it's willing to relay in bytes/s + :var int observed_bandwidth: **\*** estimated capacity based on usage in bytes/s + + :var list link_protocols: link protocols supported by the relay + :var list circuit_protocols: circuit protocols supported by the relay + :var bool hibernating: **\*** hibernating when published + :var bool allow_single_hop_exits: **\*** flag if single hop exiting is allowed + :var bool extra_info_cache: **\*** flag if a mirror for extra-info documents + :var str extra_info_digest: upper-case hex encoded digest of our extra-info document + :var bool eventdns: flag for evdns backend (deprecated, always unset) + :var list or_addresses: **\*** alternative for our address/or_port + attributes, each entry is a tuple of the form (address (**str**), port + (**int**), is_ipv6 (**bool**)) + + Deprecated, moved to extra-info descriptor... + + :var datetime read_history_end: end of the sampling interval + :var int read_history_interval: seconds per interval + :var list read_history_values: bytes read during each interval + + :var datetime write_history_end: end of the sampling interval + :var int write_history_interval: seconds per interval + :var list write_history_values: bytes written during each interval + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, raw_contents, validate = True, annotations = None): + """ + Server descriptor constructor, created from an individual relay's + descriptor content (as provided by "GETINFO desc/*", cached descriptors, + and metrics). + + By default this validates the descriptor's content as it's parsed. This + validation can be disables to either improve performance or be accepting of + malformed data. + + :param str raw_contents: descriptor content provided by the relay + :param bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :param list annotations: lines that appeared prior to the descriptor + + :raises: **ValueError** if the contents is malformed and validate is True + """ + + super(ServerDescriptor, self).__init__(raw_contents) + + # Only a few things can be arbitrary bytes according to the dir-spec, so + # parsing them separately. + + self.platform = _get_bytes_field("platform", raw_contents) + self.contact = _get_bytes_field("contact", raw_contents) + + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + + self.nickname = None + self.fingerprint = None + self.published = None + + self.address = None + self.or_port = None + self.socks_port = None + self.dir_port = None + + self.tor_version = None + self.operating_system = None + self.uptime = None + self.exit_policy = None + self.exit_policy_v6 = DEFAULT_IPV6_EXIT_POLICY + self.family = set() + + self.average_bandwidth = None + self.burst_bandwidth = None + self.observed_bandwidth = None + + self.link_protocols = None + self.circuit_protocols = None + self.hibernating = False + self.allow_single_hop_exits = False + self.extra_info_cache = False + self.extra_info_digest = None + self.hidden_service_dir = None + self.eventdns = None + self.or_addresses = [] + + self.read_history_end = None + self.read_history_interval = None + self.read_history_values = None + + self.write_history_end = None + self.write_history_interval = None + self.write_history_values = None + + self._unrecognized_lines = [] + + self._annotation_lines = annotations if annotations else [] + + # A descriptor contains a series of 'keyword lines' which are simply a + # keyword followed by an optional value. Lines can also be followed by a + # signature block. + # + # We care about the ordering of 'accept' and 'reject' entries because this + # influences the resulting exit policy, but for everything else the order + # does not matter so breaking it into key / value pairs. + + entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject")) + + if policy == [u'reject *:*']: + self.exit_policy = REJECT_ALL_POLICY + else: + self.exit_policy = stem.exit_policy.ExitPolicy(*policy) + + self._parse(entries, validate) + + if validate: + self._check_constraints(entries) + + def digest(self): + """ + Provides the hex encoded sha1 of our content. This value is part of the + network status entry for this relay. + + :returns: **unicode** with the upper-case hex digest value for this server descriptor + """ + + raise NotImplementedError("Unsupported Operation: this should be implemented by the ServerDescriptor subclass") + + def get_unrecognized_lines(self): + return list(self._unrecognized_lines) + + @lru_cache() + def get_annotations(self): + """ + Provides content that appeared prior to the descriptor. If this comes from + the cached-descriptors file then this commonly contains content like... + + :: + + @downloaded-at 2012-03-18 21:18:29 + @source "173.254.216.66" + + :returns: **dict** with the key/value pairs in our annotations + """ + + annotation_dict = {} + + for line in self._annotation_lines: + if b" " in line: + key, value = line.split(b" ", 1) + annotation_dict[key] = value + else: + annotation_dict[line] = None + + return annotation_dict + + def get_annotation_lines(self): + """ + Provides the lines of content that appeared prior to the descriptor. This + is the same as the + :func:`~stem.descriptor.server_descriptor.ServerDescriptor.get_annotations` + results, but with the unparsed lines and ordering retained. + + :returns: **list** with the lines of annotation that came before this descriptor + """ + + return self._annotation_lines + + def _parse(self, entries, validate): + """ + Parses a series of 'keyword => (value, pgp block)' mappings and applies + them as attributes. + + :param dict entries: descriptor contents to be applied + :param bool validate: checks the validity of descriptor content if **True** + + :raises: **ValueError** if an error occurs in validation + """ + + for keyword, values in entries.items(): + # most just work with the first (and only) value + value, block_contents = values[0] + + line = "%s %s" % (keyword, value) # original line + + if block_contents: + line += "\n%s" % block_contents + + if keyword == "router": + # "router" nickname address ORPort SocksPort DirPort + router_comp = value.split() + + if len(router_comp) < 5: + if not validate: + continue + + raise ValueError("Router line must have five values: %s" % line) + + if validate: + if not stem.util.tor_tools.is_valid_nickname(router_comp[0]): + raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0]) + elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]): + raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1]) + elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True): + raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2]) + elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True): + raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3]) + elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True): + raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4]) + elif not (router_comp[2].isdigit() and router_comp[3].isdigit() and router_comp[4].isdigit()): + continue + + self.nickname = router_comp[0] + self.address = router_comp[1] + self.or_port = int(router_comp[2]) + self.socks_port = None if router_comp[3] == '0' else int(router_comp[3]) + self.dir_port = None if router_comp[4] == '0' else int(router_comp[4]) + elif keyword == "bandwidth": + # "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed + bandwidth_comp = value.split() + + if len(bandwidth_comp) < 3: + if not validate: + continue + + raise ValueError("Bandwidth line must have three values: %s" % line) + elif not bandwidth_comp[0].isdigit(): + if not validate: + continue + + raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0]) + elif not bandwidth_comp[1].isdigit(): + if not validate: + continue + + raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1]) + elif not bandwidth_comp[2].isdigit(): + if not validate: + continue + + raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2]) + + self.average_bandwidth = int(bandwidth_comp[0]) + self.burst_bandwidth = int(bandwidth_comp[1]) + self.observed_bandwidth = int(bandwidth_comp[2]) + elif keyword == "platform": + # "platform" string + + # The platform attribute was set earlier. This line can contain any + # arbitrary data, but tor seems to report its version followed by the + # os like the following... + # + # platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64 + # + # There's no guarantee that we'll be able to pick these out the + # version, but might as well try to save our caller the effort. + + platform_match = re.match("^Tor (\S*).* on (.*)$", value) + + if platform_match: + version_str, self.operating_system = platform_match.groups() + + try: + self.tor_version = stem.version._get_version(version_str) + except ValueError: + pass + elif keyword == "published": + # "published" YYYY-MM-DD HH:MM:SS + + try: + self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("Published line's time wasn't parsable: %s" % line) + elif keyword == "fingerprint": + # This is forty hex digits split into space separated groups of four. + # Checking that we match this pattern. + + fingerprint = value.replace(" ", "") + + if validate: + for grouping in value.split(" "): + if len(grouping) != 4: + raise ValueError("Fingerprint line should have groupings of four hex digits: %s" % value) + + if not stem.util.tor_tools.is_valid_fingerprint(fingerprint): + raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value) + + self.fingerprint = fingerprint + elif keyword == "hibernating": + # "hibernating" 0|1 (in practice only set if one) + + if validate and not value in ("0", "1"): + raise ValueError("Hibernating line had an invalid value, must be zero or one: %s" % value) + + self.hibernating = value == "1" + elif keyword == "allow-single-hop-exits": + self.allow_single_hop_exits = True + elif keyword == "caches-extra-info": + self.extra_info_cache = True + elif keyword == "extra-info-digest": + # this is forty hex digits which just so happens to be the same a + # fingerprint + + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Extra-info digests should consist of forty hex digits: %s" % value) + + self.extra_info_digest = value + elif keyword == "hidden-service-dir": + if value: + self.hidden_service_dir = value.split(" ") + else: + self.hidden_service_dir = ["2"] + elif keyword == "uptime": + # We need to be tolerant of negative uptimes to accommodate a past tor + # bug... + # + # Changes in version 0.1.2.7-alpha - 2007-02-06 + # - If our system clock jumps back in time, don't publish a negative + # uptime in the descriptor. Also, don't let the global rate limiting + # buckets go absurdly negative. + # + # After parsing all of the attributes we'll double check that negative + # uptimes only occurred prior to this fix. + + try: + self.uptime = int(value) + except ValueError: + if not validate: + continue + + raise ValueError("Uptime line must have an integer value: %s" % value) + elif keyword == "contact": + pass # parsed as a bytes field earlier + elif keyword == "protocols": + protocols_match = re.match("^Link (.*) Circuit (.*)$", value) + + if protocols_match: + link_versions, circuit_versions = protocols_match.groups() + self.link_protocols = link_versions.split(" ") + self.circuit_protocols = circuit_versions.split(" ") + elif validate: + raise ValueError("Protocols line did not match the expected pattern: %s" % line) + elif keyword == "family": + self.family = set(value.split(" ")) + elif keyword == "eventdns": + self.eventdns = value == "1" + elif keyword == "ipv6-policy": + self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value) + elif keyword == "or-address": + or_address_entries = [value for (value, _) in values] + + for entry in or_address_entries: + line = "%s %s" % (keyword, entry) + + if not ":" in entry: + if not validate: + continue + else: + raise ValueError("or-address line missing a colon: %s" % line) + + address, port = entry.rsplit(':', 1) + is_ipv6 = address.startswith("[") and address.endswith("]") + + if is_ipv6: + address = address[1:-1] # remove brackets + + if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or + (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))): + if not validate: + continue + else: + raise ValueError("or-address line has a malformed address: %s" % line) + + if stem.util.connection.is_valid_port(port): + self.or_addresses.append((address, int(port), is_ipv6)) + elif validate: + raise ValueError("or-address line has a malformed port: %s" % line) + elif keyword in ("read-history", "write-history"): + try: + timestamp, interval, remainder = \ + stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value) + + try: + if remainder: + history_values = [int(entry) for entry in remainder.split(",")] + else: + history_values = [] + except ValueError: + raise ValueError("%s line has non-numeric values: %s" % (keyword, line)) + + if keyword == "read-history": + self.read_history_end = timestamp + self.read_history_interval = interval + self.read_history_values = history_values + else: + self.write_history_end = timestamp + self.write_history_interval = interval + self.write_history_values = history_values + except ValueError as exc: + if validate: + raise exc + else: + self._unrecognized_lines.append(line) + + # if we have a negative uptime and a tor version that shouldn't exhibit + # this bug then fail validation + + if validate and self.uptime and self.tor_version: + if self.uptime < 0 and self.tor_version >= stem.version.Version("0.1.2.7"): + raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime)) + + def _check_constraints(self, entries): + """ + Does a basic check that the entries conform to this descriptor type's + constraints. + + :param dict entries: keyword => (value, pgp key) entries + + :raises: **ValueError** if an issue arises in validation + """ + + for keyword in self._required_fields(): + if not keyword in entries: + raise ValueError("Descriptor must have a '%s' entry" % keyword) + + for keyword in self._single_fields(): + if keyword in entries and len(entries[keyword]) > 1: + raise ValueError("The '%s' entry can only appear once in a descriptor" % keyword) + + expected_first_keyword = self._first_keyword() + if expected_first_keyword and expected_first_keyword != entries.keys()[0]: + raise ValueError("Descriptor must start with a '%s' entry" % expected_first_keyword) + + expected_last_keyword = self._last_keyword() + if expected_last_keyword and expected_last_keyword != entries.keys()[-1]: + raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword) + + if not self.exit_policy: + raise ValueError("Descriptor must have at least one 'accept' or 'reject' entry") + + # Constraints that the descriptor must meet to be valid. These can be None if + # not applicable. + + def _required_fields(self): + return REQUIRED_FIELDS + + def _single_fields(self): + return REQUIRED_FIELDS + SINGLE_FIELDS + + def _first_keyword(self): + return "router" + + def _last_keyword(self): + return "router-signature" + + +class RelayDescriptor(ServerDescriptor): + """ + Server descriptor (`descriptor specification + `_) + + :var str onion_key: **\*** key used to encrypt EXTEND cells + :var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol + :var str signing_key: **\*** relay's long-term identity key + :var str signature: **\*** signature for this descriptor + + **\*** attribute is required when we're parsed with validation + """ + + def __init__(self, raw_contents, validate = True, annotations = None): + self.onion_key = None + self.ntor_onion_key = None + self.signing_key = None + self.signature = None + + super(RelayDescriptor, self).__init__(raw_contents, validate, annotations) + + # validate the descriptor if required + if validate: + self._validate_content() + + @lru_cache() + def digest(self): + """ + Provides the digest of our descriptor's content. + + :returns: the digest string encoded in uppercase hex + + :raises: ValueError if the digest canot be calculated + """ + + # Digest is calculated from everything in the + # descriptor except the router-signature. + + raw_descriptor = self.get_bytes() + start_token = b"router " + sig_token = b"\nrouter-signature\n" + start = raw_descriptor.find(start_token) + sig_start = raw_descriptor.find(sig_token) + end = sig_start + len(sig_token) + + if start >= 0 and sig_start > 0 and end > start: + for_digest = raw_descriptor[start:end] + digest_hash = hashlib.sha1(stem.util.str_tools._to_bytes(for_digest)) + return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper()) + else: + raise ValueError("unable to calculate digest for descriptor") + + def _validate_content(self): + """ + Validates that the descriptor content matches the signature. + + :raises: ValueError if the signature does not match the content + """ + + key_as_bytes = RelayDescriptor._get_key_bytes(self.signing_key) + + # ensure the fingerprint is a hash of the signing key + + if self.fingerprint: + # calculate the signing key hash + + key_der_as_hash = hashlib.sha1(stem.util.str_tools._to_bytes(key_as_bytes)).hexdigest() + + if key_der_as_hash != self.fingerprint.lower(): + log.warn("Signing key hash: %s != fingerprint: %s" % (key_der_as_hash, self.fingerprint.lower())) + raise ValueError("Fingerprint does not match hash") + + self._verify_digest(key_as_bytes) + + def _verify_digest(self, key_as_der): + # check that our digest matches what was signed + + if not stem.prereq.is_crypto_available(): + return + + from Crypto.Util import asn1 + from Crypto.Util.number import bytes_to_long, long_to_bytes + + # get the ASN.1 sequence + + seq = asn1.DerSequence() + seq.decode(key_as_der) + modulus = seq[0] + public_exponent = seq[1] # should always be 65537 + + sig_as_bytes = RelayDescriptor._get_key_bytes(self.signature) + + # convert the descriptor signature to an int + + sig_as_long = bytes_to_long(sig_as_bytes) + + # use the public exponent[e] & the modulus[n] to decrypt the int + + decrypted_int = pow(sig_as_long, public_exponent, modulus) + + # block size will always be 128 for a 1024 bit key + + blocksize = 128 + + # convert the int to a byte array. + + decrypted_bytes = long_to_bytes(decrypted_int, blocksize) + + ############################################################################ + ## The decrypted bytes should have a structure exactly along these lines. + ## 1 byte - [null '\x00'] + ## 1 byte - [block type identifier '\x01'] - Should always be 1 + ## N bytes - [padding '\xFF' ] + ## 1 byte - [separator '\x00' ] + ## M bytes - [message] + ## Total - 128 bytes + ## More info here http://www.ietf.org/rfc/rfc2313.txt + ## esp the Notes in section 8.1 + ############################################################################ + + try: + if decrypted_bytes.index(b'\x00\x01') != 0: + raise ValueError("Verification failed, identifier missing") + except ValueError: + raise ValueError("Verification failed, malformed data") + + try: + identifier_offset = 2 + + # find the separator + seperator_index = decrypted_bytes.index(b'\x00', identifier_offset) + except ValueError: + raise ValueError("Verification failed, seperator not found") + + digest_hex = codecs.encode(decrypted_bytes[seperator_index + 1:], 'hex_codec') + digest = stem.util.str_tools._to_unicode(digest_hex.upper()) + + local_digest = self.digest() + + if digest != local_digest: + raise ValueError("Decrypted digest does not match local digest (calculated: %s, local: %s)" % (digest, local_digest)) + + def _parse(self, entries, validate): + entries = dict(entries) # shallow copy since we're destructive + + # handles fields only in server descriptors + + for keyword, values in entries.items(): + value, block_contents = values[0] + line = "%s %s" % (keyword, value) + + if keyword == "onion-key": + if validate and not block_contents: + raise ValueError("Onion key line must be followed by a public key: %s" % line) + + self.onion_key = block_contents + del entries["onion-key"] + elif keyword == "ntor-onion-key": + self.ntor_onion_key = value + del entries["ntor-onion-key"] + elif keyword == "signing-key": + if validate and not block_contents: + raise ValueError("Signing key line must be followed by a public key: %s" % line) + + self.signing_key = block_contents + del entries["signing-key"] + elif keyword == "router-signature": + if validate and not block_contents: + raise ValueError("Router signature line must be followed by a signature block: %s" % line) + + self.signature = block_contents + del entries["router-signature"] + + ServerDescriptor._parse(self, entries, validate) + + def _compare(self, other, method): + if not isinstance(other, RelayDescriptor): + return False + + return method(str(self).strip(), str(other).strip()) + + def __hash__(self): + return hash(str(self).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + @staticmethod + def _get_key_bytes(key_string): + # Remove the newlines from the key string & strip off the + # '-----BEGIN RSA PUBLIC KEY-----' header and + # '-----END RSA PUBLIC KEY-----' footer + + key_as_string = ''.join(key_string.split('\n')[1:4]) + + # get the key representation in bytes + + key_bytes = base64.b64decode(stem.util.str_tools._to_bytes(key_as_string)) + + return key_bytes + + +class BridgeDescriptor(ServerDescriptor): + """ + Bridge descriptor (`bridge descriptor specification + `_) + """ + + def __init__(self, raw_contents, validate = True, annotations = None): + self._digest = None + + super(BridgeDescriptor, self).__init__(raw_contents, validate, annotations) + + def digest(self): + return self._digest + + def _parse(self, entries, validate): + entries = dict(entries) + + # handles fields only in bridge descriptors + for keyword, values in entries.items(): + value, block_contents = values[0] + line = "%s %s" % (keyword, value) + + if keyword == "router-digest": + if validate and not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError("Router digest line had an invalid sha1 digest: %s" % line) + + self._digest = stem.util.str_tools._to_unicode(value) + del entries["router-digest"] + + ServerDescriptor._parse(self, entries, validate) + + def is_scrubbed(self): + """ + Checks if we've been properly scrubbed in accordance with the `bridge + descriptor specification + `_. Validation is a + moving target so this may not + be fully up to date. + + :returns: **True** if we're scrubbed, **False** otherwise + """ + + return self.get_scrubbing_issues() == [] + + @lru_cache() + def get_scrubbing_issues(self): + """ + Provides issues with our scrubbing. + + :returns: **list** of strings which describe issues we have with our + scrubbing, this list is empty if we're properly scrubbed + """ + + issues = [] + + if not self.address.startswith("10."): + issues.append("Router line's address should be scrubbed to be '10.x.x.x': %s" % self.address) + + if self.contact and self.contact != "somebody": + issues.append("Contact line should be scrubbed to be 'somebody', but instead had '%s'" % self.contact) + + for address, _, is_ipv6 in self.or_addresses: + if not is_ipv6 and not address.startswith("10."): + issues.append("or-address line's address should be scrubbed to be '10.x.x.x': %s" % address) + elif is_ipv6 and not address.startswith("fd9f:2e19:3bcf::"): + # TODO: this check isn't quite right because we aren't checking that + # the next grouping of hex digits contains 1-2 digits + issues.append("or-address line's address should be scrubbed to be 'fd9f:2e19:3bcf::xx:xxxx': %s" % address) + + for line in self.get_unrecognized_lines(): + if line.startswith("onion-key "): + issues.append("Bridge descriptors should have their onion-key scrubbed: %s" % line) + elif line.startswith("signing-key "): + issues.append("Bridge descriptors should have their signing-key scrubbed: %s" % line) + elif line.startswith("router-signature "): + issues.append("Bridge descriptors should have their signature scrubbed: %s" % line) + + return issues + + def _required_fields(self): + # bridge required fields are the same as a relay descriptor, minus items + # excluded according to the format page + + excluded_fields = [ + "onion-key", + "signing-key", + "router-signature", + ] + + included_fields = [ + "router-digest", + ] + + return tuple(included_fields + [f for f in REQUIRED_FIELDS if not f in excluded_fields]) + + def _single_fields(self): + return self._required_fields() + SINGLE_FIELDS + + def _last_keyword(self): + return None + + def _compare(self, other, method): + if not isinstance(other, BridgeDescriptor): + return False + + return method(str(self).strip(), str(other).strip()) + + def __hash__(self): + return hash(str(self).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) diff --git a/lib/stem/descriptor/tordnsel.py b/lib/stem/descriptor/tordnsel.py new file mode 100644 index 00000000..3d7da862 --- /dev/null +++ b/lib/stem/descriptor/tordnsel.py @@ -0,0 +1,115 @@ +# Copyright 2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parsing for `TorDNSEL `_ +exit list files. +""" + +import datetime + +import stem.util.connection +import stem.util.str_tools +import stem.util.tor_tools + +from stem.descriptor import ( + Descriptor, + _read_until_keywords, + _get_descriptor_components, +) + + +def _parse_file(tordnsel_file, validate = True, **kwargs): + """ + Iterates over a tordnsel file. + + :returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL` + instances in the file + + :raises: + * **ValueError** if the contents is malformed and validate is **True** + * **IOError** if the file can't be read + """ + + # skip content prior to the first ExitNode + _read_until_keywords("ExitNode", tordnsel_file, skip = True) + + while True: + contents = _read_until_keywords("ExitAddress", tordnsel_file) + contents += _read_until_keywords("ExitNode", tordnsel_file) + + if contents: + yield TorDNSEL(bytes.join(b"", contents), validate, **kwargs) + else: + break # done parsing file + + +class TorDNSEL(Descriptor): + """ + TorDNSEL descriptor (`exitlist specification + `_) + + :var str fingerprint: **\*** authority's fingerprint + :var datetime published: **\*** time in UTC when this descriptor was made + :var datetime last_status: **\*** time in UTC when the relay was seen in a v2 network status + :var list exit_addresses: **\*** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time + + **\*** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, raw_contents, validate): + super(TorDNSEL, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + entries = _get_descriptor_components(raw_contents, validate) + + self.fingerprint = None + self.published = None + self.last_status = None + self.exit_addresses = [] + + self._parse(entries, validate) + + def _parse(self, entries, validate): + + for keyword, values in entries.items(): + value, block_content = values[0] + + if validate and block_content: + raise ValueError("Unexpected block content: %s" % block_content) + + if keyword == "ExitNode": + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value) + + self.fingerprint = value + elif keyword == "Published": + try: + self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("Published time wasn't parsable: %s" % value) + elif keyword == "LastStatus": + try: + self.last_status = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("LastStatus time wasn't parsable: %s" % value) + elif keyword == "ExitAddress": + for value, block_content in values: + address, date = value.split(" ", 1) + + if validate: + if not stem.util.connection.is_valid_ipv4_address(address): + raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address) + elif block_content: + raise ValueError("Unexpected block content: %s" % block_content) + + try: + date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") + self.exit_addresses.append((address, date)) + except ValueError: + if validate: + raise ValueError("ExitAddress found time wasn't parsable: %s" % value) + elif validate: + raise ValueError("Unrecognized keyword: %s" % keyword) diff --git a/lib/stem/exit_policy.py b/lib/stem/exit_policy.py new file mode 100644 index 00000000..0f033ded --- /dev/null +++ b/lib/stem/exit_policy.py @@ -0,0 +1,880 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Representation of tor exit policies. These can be easily used to check if +exiting to a destination is permissible or not. For instance... + +:: + + >>> from stem.exit_policy import ExitPolicy, MicroExitPolicy + >>> policy = ExitPolicy("accept *:80", "accept *:443", "reject *:*") + >>> print policy + accept *:80, accept *:443, reject *:* + >>> print policy.summary() + accept 80, 443 + >>> policy.can_exit_to("75.119.206.243", 80) + True + + >>> policy = MicroExitPolicy("accept 80,443") + >>> print policy + accept 80,443 + >>> policy.can_exit_to("75.119.206.243", 80) + True + +:: + + ExitPolicy - Exit policy for a Tor relay + | + MicroExitPolicy - Microdescriptor exit policy + |- can_exit_to - check if exiting to this destination is allowed or not + |- is_exiting_allowed - check if any exiting is allowed + |- summary - provides a short label, similar to a microdescriptor + |- __str__ - string representation + +- __iter__ - ExitPolicyRule entries that this contains + + ExitPolicyRule - Single rule of an exit policy chain + |- is_address_wildcard - checks if we'll accept any address + |- is_port_wildcard - checks if we'll accept any port + |- get_address_type - provides the protocol our ip address belongs to + |- is_match - checks if we match a given destination + |- get_mask - provides the address representation of our mask + |- get_masked_bits - provides the bit representation of our mask + +- __str__ - string representation for this rule + + get_config_policy - provides the ExitPolicy based on torrc rules + +.. data:: AddressType (enum) + + Enumerations for IP address types that can be in an exit policy. + + ============ =========== + AddressType Description + ============ =========== + **WILDCARD** any address of either IPv4 or IPv6 + **IPv4** IPv4 address + **IPv6** IPv6 address + ============ =========== +""" + +import zlib + +import stem.prereq +import stem.util.connection +import stem.util.enum +import stem.util.str_tools + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +AddressType = stem.util.enum.Enum(("WILDCARD", "Wildcard"), ("IPv4", "IPv4"), ("IPv6", "IPv6")) + +# Addresses aliased by the 'private' policy. From the tor man page... +# +# To specify all internal and link-local networks (including 0.0.0.0/8, +# 169.254.0.0/16, 127.0.0.0/8, 192.168.0.0/16, 10.0.0.0/8, and 172.16.0.0/12), +# you can use the "private" alias instead of an address. + +PRIVATE_ADDRESSES = ( + "0.0.0.0/8", + "169.254.0.0/16", + "127.0.0.0/8", + "192.168.0.0/16", + "10.0.0.0/8", + "172.16.0.0/12", +) + + +def get_config_policy(rules): + """ + Converts an ExitPolicy found in a torrc to a proper exit pattern. This + accounts for... + + * ports being optional + * the 'private' keyword + + :param str,list rules: comma separated rules or list to be converted + + :returns: :class:`~stem.exit_policy.ExitPolicy` reflected by the rules + + :raises: **ValueError** if input isn't a valid tor exit policy + """ + + if isinstance(rules, (bytes, unicode)): + rules = rules.split(',') + + result = [] + + for rule in rules: + rule = rule.strip() + + if not rule: + continue + + if not ':' in rule: + rule = "%s:*" % rule + + if 'private' in rule: + acceptance = rule.split(' ', 1)[0] + port = rule.split(':', 1)[1] + + for private_addr in PRIVATE_ADDRESSES: + result.append(ExitPolicyRule("%s %s:%s" % (acceptance, private_addr, port))) + else: + result.append(ExitPolicyRule(rule)) + + # torrc policies can apply to IPv4 or IPv6, so we need to make sure /0 + # addresses aren't treated as being a full wildcard + + for rule in result: + rule._submask_wildcard = False + + return ExitPolicy(*result) + + +class ExitPolicy(object): + """ + Policy for the destinations that a relay allows or denies exiting to. This + is, in effect, just a list of :class:`~stem.exit_policy.ExitPolicyRule` + entries. + + :param list rules: **str** or :class:`~stem.exit_policy.ExitPolicyRule` + entries that make up this policy + """ + + def __init__(self, *rules): + # sanity check the types + for rule in rules: + if not isinstance(rule, (bytes, unicode, ExitPolicyRule)): + raise TypeError("Exit policy rules can only contain strings or ExitPolicyRules, got a %s (%s)" % (type(rule), rules)) + + # Unparsed representation of the rules we were constructed with. Our + # _get_rules() method consumes this to provide ExitPolicyRule instances. + # This is lazily evaluated so we don't need to actually parse the exit + # policy if it's never used. + + is_all_str = True + + for rule in rules: + if not isinstance(rule, (bytes, unicode)): + is_all_str = False + + if rules and is_all_str: + byte_rules = [stem.util.str_tools._to_bytes(r) for r in rules] + self._input_rules = zlib.compress(b','.join(byte_rules)) + else: + self._input_rules = rules + + # Result when no rules apply. According to the spec policies default to 'is + # allowed', but our microdescriptor policy subclass might want to change + # this. + + self._is_allowed_default = True + + @lru_cache() + def can_exit_to(self, address = None, port = None, strict = False): + """ + Checks if this policy allows exiting to a given destination or not. If the + address or port is omitted then this will check if we're allowed to exit to + any instances of the defined address or port. + + :param str address: IPv4 or IPv6 address (with or without brackets) + :param int port: port number + :param bool strict: if the address or port is excluded then check if we can + exit to **all** instances of the defined address or port + + :returns: **True** if exiting to this destination is allowed, **False** otherwise + """ + + for rule in self._get_rules(): + if rule.is_match(address, port, strict): + return rule.is_accept + + return self._is_allowed_default + + @lru_cache() + def is_exiting_allowed(self): + """ + Provides **True** if the policy allows exiting whatsoever, **False** + otherwise. + """ + + rejected_ports = set() + + for rule in self._get_rules(): + if rule.is_accept: + for port in xrange(rule.min_port, rule.max_port + 1): + if not port in rejected_ports: + return True + elif rule.is_address_wildcard(): + if rule.is_port_wildcard(): + return False + else: + rejected_ports.update(range(rule.min_port, rule.max_port + 1)) + + return self._is_allowed_default + + @lru_cache() + def summary(self): + """ + Provides a short description of our policy chain, similar to a + microdescriptor. This excludes entries that don't cover all IP + addresses, and is either white-list or blacklist policy based on + the final entry. For instance... + + :: + + >>> policy = ExitPolicy('accept *:80', 'accept *:443', 'reject *:*') + >>> policy.summary() + "accept 80, 443" + + >>> policy = ExitPolicy('accept *:443', 'reject *:1-1024', 'accept *:*') + >>> policy.summary() + "reject 1-442, 444-1024" + + :returns: **str** with a concise summary for our policy + """ + + # determines if we're a white-list or blacklist + is_whitelist = not self._is_allowed_default + + for rule in self._get_rules(): + if rule.is_address_wildcard() and rule.is_port_wildcard(): + is_whitelist = not rule.is_accept + break + + # Iterates over the policies and adds the the ports we'll return (ie, + # allows if a white-list and rejects if a blacklist). Regardless of a + # port's allow/reject policy, all further entries with that port are + # ignored since policies respect the first matching policy. + + display_ports, skip_ports = [], set() + + for rule in self._get_rules(): + if not rule.is_address_wildcard(): + continue + elif rule.is_port_wildcard(): + break + + for port in xrange(rule.min_port, rule.max_port + 1): + if port in skip_ports: + continue + + # if accept + white-list or reject + blacklist then add + if rule.is_accept == is_whitelist: + display_ports.append(port) + + # all further entries with this port should be ignored + skip_ports.add(port) + + # convert port list to a list of ranges (ie, ['1-3'] rather than [1, 2, 3]) + if display_ports: + display_ranges, temp_range = [], [] + display_ports.sort() + display_ports.append(None) # ending item to include last range in loop + + for port in display_ports: + if not temp_range or temp_range[-1] + 1 == port: + temp_range.append(port) + else: + if len(temp_range) > 1: + display_ranges.append("%i-%i" % (temp_range[0], temp_range[-1])) + else: + display_ranges.append(str(temp_range[0])) + + temp_range = [port] + else: + # everything for the inverse + is_whitelist = not is_whitelist + display_ranges = ["1-65535"] + + # constructs the summary string + label_prefix = "accept " if is_whitelist else "reject " + + return (label_prefix + ", ".join(display_ranges)).strip() + + @lru_cache() + def _get_rules(self): + rules = [] + is_all_accept, is_all_reject = True, True + + if isinstance(self._input_rules, bytes): + decompressed_rules = zlib.decompress(self._input_rules).split(b',') + else: + decompressed_rules = self._input_rules + + for rule in decompressed_rules: + if isinstance(rule, bytes): + rule = stem.util.str_tools._to_unicode(rule) + + if isinstance(rule, unicode): + rule = ExitPolicyRule(rule.strip()) + + if rule.is_accept: + is_all_reject = False + else: + is_all_accept = False + + rules.append(rule) + + if rule.is_address_wildcard() and rule.is_port_wildcard(): + break # this is a catch-all, no reason to include more + + # If we only have one kind of entry *and* end with a wildcard then + # we might as well use the simpler version. For instance... + # + # reject *:80, reject *:443, reject *:* + # + # ... could also be represented as simply... + # + # reject *:* + # + # This mostly comes up with reject-all policies because the + # 'reject private:*' appends an extra seven rules that have no + # effect. + + if rules and (rules[-1].is_address_wildcard() and rules[-1].is_port_wildcard()): + if is_all_accept: + rules = [ExitPolicyRule("accept *:*")] + elif is_all_reject: + rules = [ExitPolicyRule("reject *:*")] + + self._input_rules = None + return rules + + def __iter__(self): + for rule in self._get_rules(): + yield rule + + @lru_cache() + def __str__(self): + return ', '.join([str(rule) for rule in self._get_rules()]) + + def __hash__(self): + # TODO: It would be nice to provide a real hash function, but doing so is + # tricky due to how we lazily load the rules. Like equality checks a proper + # hash function would need to call _get_rules(), but that's behind + # @lru_cache which calls hash() forming a circular dependency. + + return id(self) + + def __eq__(self, other): + if isinstance(other, ExitPolicy): + return self._get_rules() == list(other) + else: + return False + + +class MicroExitPolicy(ExitPolicy): + """ + Exit policy provided by the microdescriptors. This is a distilled version of + a normal :class:`~stem.exit_policy.ExitPolicy` contains, just consisting of a + list of ports that are either accepted or rejected. For instance... + + :: + + accept 80,443 # only accepts common http ports + reject 1-1024 # only accepts non-privileged ports + + Since these policies are a subset of the exit policy information (lacking IP + ranges) clients can only use them to guess if a relay will accept traffic or + not. To quote the `dir-spec `_ (section 3.2.1)... + + :: + + With microdescriptors, clients don't learn exact exit policies: + clients can only guess whether a relay accepts their request, try the + BEGIN request, and might get end-reason-exit-policy if they guessed + wrong, in which case they'll have to try elsewhere. + + :var bool is_accept: **True** if these are ports that we accept, **False** if + they're ports that we reject + + :param str policy: policy string that describes this policy + """ + + def __init__(self, policy): + # Microdescriptor policies are of the form... + # + # MicrodescriptrPolicy ::= ("accept" / "reject") SP PortList NL + # PortList ::= PortOrRange + # PortList ::= PortList "," PortOrRange + # PortOrRange ::= INT "-" INT / INT + + self._policy = policy + + if policy.startswith("accept"): + self.is_accept = True + elif policy.startswith("reject"): + self.is_accept = False + else: + raise ValueError("A microdescriptor exit policy must start with either 'accept' or 'reject': %s" % policy) + + policy = policy[6:] + + if not policy.startswith(" ") or (len(policy) - 1 != len(policy.lstrip())): + raise ValueError("A microdescriptor exit policy should have a space separating accept/reject from its port list: %s" % self._policy) + + policy = policy[1:] + + # convert our port list into MicroExitPolicyRule + rules = [] + + for port_entry in policy.split(","): + if '-' in port_entry: + min_port, max_port = port_entry.split('-', 1) + else: + min_port = max_port = port_entry + + if not stem.util.connection.is_valid_port(min_port) or \ + not stem.util.connection.is_valid_port(max_port): + raise ValueError("'%s' is an invalid port range" % port_entry) + + rules.append(MicroExitPolicyRule(self.is_accept, int(min_port), int(max_port))) + + super(MicroExitPolicy, self).__init__(*rules) + self._is_allowed_default = not self.is_accept + + def __str__(self): + return self._policy + + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, MicroExitPolicy): + return str(self) == str(other) + else: + return False + + +class ExitPolicyRule(object): + """ + Single rule from the user's exit policy. These rules are chained together to + form complete policies that describe where a relay will and will not allow + traffic to exit. + + The format of these rules are formally described in the `dir-spec + `_ as an + "exitpattern". Note that while these are similar to tor's man page entry for + ExitPolicies, it's not the exact same. An exitpattern is better defined and + stricter in what it'll accept. For instance, ports are not optional and it + does not contain the 'private' alias. + + This should be treated as an immutable object. + + :var bool is_accept: indicates if exiting is allowed or disallowed + + :var str address: address that this rule is for + + :var int min_port: lower end of the port range that we include (inclusive) + :var int max_port: upper end of the port range that we include (inclusive) + + :param str rule: exit policy rule to be parsed + + :raises: **ValueError** if input isn't a valid tor exit policy rule + """ + + def __init__(self, rule): + # policy ::= "accept" exitpattern | "reject" exitpattern + # exitpattern ::= addrspec ":" portspec + + if rule.startswith("accept"): + self.is_accept = True + elif rule.startswith("reject"): + self.is_accept = False + else: + raise ValueError("An exit policy must start with either 'accept' or 'reject': %s" % rule) + + exitpattern = rule[6:] + + if not exitpattern.startswith(" ") or (len(exitpattern) - 1 != len(exitpattern.lstrip())): + raise ValueError("An exit policy should have a space separating its accept/reject from the exit pattern: %s" % rule) + + exitpattern = exitpattern[1:] + + if not ":" in exitpattern: + raise ValueError("An exitpattern must be of the form 'addrspec:portspec': %s" % rule) + + self.address = None + self._address_type = None + self._masked_bits = None + self.min_port = self.max_port = None + self._hash = None + + # Our mask in ip notation (ex. "255.255.255.0"). This is only set if we + # either have a custom mask that can't be represented by a number of bits, + # or the user has called mask(), lazily loading this. + + self._mask = None + + addrspec, portspec = exitpattern.rsplit(":", 1) + self._apply_addrspec(rule, addrspec) + self._apply_portspec(rule, portspec) + + # If true then a submask of /0 is treated by is_address_wildcard() as being + # a wildcard. + + self._submask_wildcard = True + + def is_address_wildcard(self): + """ + **True** if we'll match against any address, **False** otherwise. + + Note that if this policy can apply to both IPv4 and IPv6 then this is + different from being for a /0 (since, for instance, 0.0.0.0/0 wouldn't + match against an IPv6 address). That said, /0 addresses are highly unusual + and most things citing exit policies are IPv4 specific anyway, making this + moot. + + :returns: **bool** for if our address matching is a wildcard + """ + + if self._submask_wildcard and self.get_masked_bits() == 0: + return True + + return self._address_type == _address_type_to_int(AddressType.WILDCARD) + + def is_port_wildcard(self): + """ + **True** if we'll match against any port, **False** otherwise. + + :returns: **bool** for if our port matching is a wildcard + """ + + return self.min_port in (0, 1) and self.max_port == 65535 + + def is_match(self, address = None, port = None, strict = False): + """ + **True** if we match against the given destination, **False** otherwise. If + the address or port is omitted then this will check if we're allowed to + exit to any instances of the defined address or port. + + :param str address: IPv4 or IPv6 address (with or without brackets) + :param int port: port number + :param bool strict: if the address or port is excluded then check if we can + exit to **all** instances of the defined address or port + + :returns: **bool** indicating if we match against this destination + + :raises: **ValueError** if provided with a malformed address or port + """ + + # validate our input and check if the argument doesn't match our address type + if address is not None: + address_type = self.get_address_type() + + if stem.util.connection.is_valid_ipv4_address(address): + if address_type == AddressType.IPv6: + return False + elif stem.util.connection.is_valid_ipv6_address(address, allow_brackets = True): + if address_type == AddressType.IPv4: + return False + + address = address.lstrip("[").rstrip("]") + else: + raise ValueError("'%s' isn't a valid IPv4 or IPv6 address" % address) + + if port is not None and not stem.util.connection.is_valid_port(port): + raise ValueError("'%s' isn't a valid port" % port) + + if not self.is_address_wildcard(): + # Already got the integer representation of our mask and our address + # with the mask applied. Just need to check if this address with the + # mask applied matches. + + if address is None: + if strict: + return False + else: + comparison_addr_bin = int(stem.util.connection._get_address_binary(address), 2) + comparison_addr_bin &= self._get_mask_bin() + + if self._get_address_bin() != comparison_addr_bin: + return False + + if not self.is_port_wildcard(): + if port is None: + if strict: + return False + elif port < self.min_port or port > self.max_port: + return False + + return True + + def get_address_type(self): + """ + Provides the :data:`~stem.exit_policy.AddressType` for our policy. + + :returns: :data:`~stem.exit_policy.AddressType` for the type of address that we have + """ + + return _int_to_address_type(self._address_type) + + def get_mask(self, cache = True): + """ + Provides the address represented by our mask. This is **None** if our + address type is a wildcard. + + :param bool cache: caches the result if **True** + + :returns: str of our subnet mask for the address (ex. "255.255.255.0") + """ + + # Lazy loading our mask because it very infrequently requested. There's + # no reason to usually usse memory for it. + + if not self._mask: + address_type = self.get_address_type() + + if address_type == AddressType.WILDCARD: + mask = None + elif address_type == AddressType.IPv4: + mask = stem.util.connection.get_mask_ipv4(self._masked_bits) + elif address_type == AddressType.IPv6: + mask = stem.util.connection.get_mask_ipv6(self._masked_bits) + + if not cache: + return mask + + self._mask = mask + + return self._mask + + def get_masked_bits(self): + """ + Provides the number of bits our subnet mask represents. This is **None** if + our mask can't have a bit representation. + + :returns: int with the bit representation of our mask + """ + + return self._masked_bits + + @lru_cache() + def __str__(self): + """ + Provides the string representation of our policy. This does not + necessarily match the rule that we were constructed from (due to things + like IPv6 address collapsing or the multiple representations that our mask + can have). However, it is a valid that would be accepted by our constructor + to re-create this rule. + """ + + label = "accept " if self.is_accept else "reject " + + if self.is_address_wildcard(): + label += "*:" + else: + address_type = self.get_address_type() + + if address_type == AddressType.IPv4: + label += self.address + else: + label += "[%s]" % self.address + + # Including our mask label as follows... + # - exclude our mask if it doesn't do anything + # - use our masked bit count if we can + # - use the mask itself otherwise + + if (address_type == AddressType.IPv4 and self._masked_bits == 32) or \ + (address_type == AddressType.IPv6 and self._masked_bits == 128): + label += ":" + elif self._masked_bits is not None: + label += "/%i:" % self._masked_bits + else: + label += "/%s:" % self.get_mask() + + if self.is_port_wildcard(): + label += "*" + elif self.min_port == self.max_port: + label += str(self.min_port) + else: + label += "%i-%i" % (self.min_port, self.max_port) + + return label + + def __hash__(self): + if self._hash is None: + my_hash = 0 + + for attr in ("is_accept", "address", "min_port", "max_port"): + my_hash *= 1024 + + attr_value = getattr(self, attr) + + if attr_value is not None: + my_hash += hash(attr_value) + + my_hash *= 1024 + my_hash += hash(self.get_mask(False)) + + self._hash = my_hash + + return self._hash + + @lru_cache() + def _get_mask_bin(self): + # provides an integer representation of our mask + + return int(stem.util.connection._get_address_binary(self.get_mask(False)), 2) + + @lru_cache() + def _get_address_bin(self): + # provides an integer representation of our address + + return int(stem.util.connection._get_address_binary(self.address), 2) & self._get_mask_bin() + + def _apply_addrspec(self, rule, addrspec): + # Parses the addrspec... + # addrspec ::= "*" | ip4spec | ip6spec + + if "/" in addrspec: + self.address, addr_extra = addrspec.split("/", 1) + else: + self.address, addr_extra = addrspec, None + + if addrspec == "*": + self._address_type = _address_type_to_int(AddressType.WILDCARD) + self.address = self._masked_bits = None + elif stem.util.connection.is_valid_ipv4_address(self.address): + # ipv4spec ::= ip4 | ip4 "/" num_ip4_bits | ip4 "/" ip4mask + # ip4 ::= an IPv4 address in dotted-quad format + # ip4mask ::= an IPv4 mask in dotted-quad format + # num_ip4_bits ::= an integer between 0 and 32 + + self._address_type = _address_type_to_int(AddressType.IPv4) + + if addr_extra is None: + self._masked_bits = 32 + elif stem.util.connection.is_valid_ipv4_address(addr_extra): + # provided with an ip4mask + try: + self._masked_bits = stem.util.connection._get_masked_bits(addr_extra) + except ValueError: + # mask can't be represented as a number of bits (ex. "255.255.0.255") + self._mask = addr_extra + self._masked_bits = None + elif addr_extra.isdigit(): + # provided with a num_ip4_bits + self._masked_bits = int(addr_extra) + + if self._masked_bits < 0 or self._masked_bits > 32: + raise ValueError("IPv4 masks must be in the range of 0-32 bits") + else: + raise ValueError("The '%s' isn't a mask nor number of bits: %s" % (addr_extra, rule)) + elif self.address.startswith("[") and self.address.endswith("]") and \ + stem.util.connection.is_valid_ipv6_address(self.address[1:-1]): + # ip6spec ::= ip6 | ip6 "/" num_ip6_bits + # ip6 ::= an IPv6 address, surrounded by square brackets. + # num_ip6_bits ::= an integer between 0 and 128 + + self.address = stem.util.connection.expand_ipv6_address(self.address[1:-1].upper()) + self._address_type = _address_type_to_int(AddressType.IPv6) + + if addr_extra is None: + self._masked_bits = 128 + elif addr_extra.isdigit(): + # provided with a num_ip6_bits + self._masked_bits = int(addr_extra) + + if self._masked_bits < 0 or self._masked_bits > 128: + raise ValueError("IPv6 masks must be in the range of 0-128 bits") + else: + raise ValueError("The '%s' isn't a number of bits: %s" % (addr_extra, rule)) + else: + raise ValueError("Address isn't a wildcard, IPv4, or IPv6 address: %s" % rule) + + def _apply_portspec(self, rule, portspec): + # Parses the portspec... + # portspec ::= "*" | port | port "-" port + # port ::= an integer between 1 and 65535, inclusive. + # + # Due to a tor bug the spec says that we should accept port of zero, but + # connections to port zero are never permitted. + + if portspec == "*": + self.min_port, self.max_port = 1, 65535 + elif portspec.isdigit(): + # provided with a single port + if stem.util.connection.is_valid_port(portspec, allow_zero = True): + self.min_port = self.max_port = int(portspec) + else: + raise ValueError("'%s' isn't within a valid port range: %s" % (portspec, rule)) + elif "-" in portspec: + # provided with a port range + port_comp = portspec.split("-", 1) + + if stem.util.connection.is_valid_port(port_comp, allow_zero = True): + self.min_port = int(port_comp[0]) + self.max_port = int(port_comp[1]) + + if self.min_port > self.max_port: + raise ValueError("Port range has a lower bound that's greater than its upper bound: %s" % rule) + else: + raise ValueError("Malformed port range: %s" % rule) + else: + raise ValueError("Port value isn't a wildcard, integer, or range: %s" % rule) + + def __eq__(self, other): + if isinstance(other, ExitPolicyRule): + # Our string representation encompasses our effective policy. Technically + # this isn't quite right since our rule attribute may differ (ie, "accept + # 0.0.0.0/0" == "accept 0.0.0.0/0.0.0.0" will be True), but these + # policies are effectively equivalent. + + return hash(self) == hash(other) + else: + return False + + +def _address_type_to_int(address_type): + return AddressType.index_of(address_type) + + +def _int_to_address_type(address_type_int): + return AddressType[AddressType.keys()[address_type_int]] + + +class MicroExitPolicyRule(ExitPolicyRule): + """ + Lighter weight ExitPolicyRule derivative for microdescriptors. + """ + + def __init__(self, is_accept, min_port, max_port): + self.is_accept = is_accept + self.address = None # wildcard address + self.min_port = min_port + self.max_port = max_port + self._hash = None + + def is_address_wildcard(self): + return True + + def get_address_type(self): + return AddressType.WILDCARD + + def get_mask(self, cache = True): + return None + + def get_masked_bits(self): + return None + + def __hash__(self): + if self._hash is None: + my_hash = 0 + + for attr in ("is_accept", "min_port", "max_port"): + my_hash *= 1024 + + attr_value = getattr(self, attr) + + if attr_value is not None: + my_hash += hash(attr_value) + + self._hash = my_hash + + return self._hash diff --git a/lib/stem/prereq.py b/lib/stem/prereq.py new file mode 100644 index 00000000..df2e10d4 --- /dev/null +++ b/lib/stem/prereq.py @@ -0,0 +1,131 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Checks for stem dependencies. We require python 2.6 or greater (including the +3.x series). Other requirements for complete functionality are... + +* pycrypto module + + * validating descriptor signature integrity + +:: + + check_requirements - checks for minimum requirements for running stem + + is_python_27 - checks if python 2.7 or later is available + is_python_3 - checks if python 3.0 or later is available + + is_crypto_available - checks if the pycrypto module is available +""" + +import inspect +import sys + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +CRYPTO_UNAVAILABLE = "Unable to import the pycrypto module. Because of this we'll be unable to verify descriptor signature integrity. You can get pycrypto from: https://www.dlitz.net/software/pycrypto/" + + +def check_requirements(): + """ + Checks that we meet the minimum requirements to run stem. If we don't then + this raises an ImportError with the issue. + + :raises: ImportError with the problem if we don't meet stem's requirements + """ + + major_version, minor_version = sys.version_info[0:2] + + if major_version < 2 or (major_version == 2 and minor_version < 6): + raise ImportError("stem requires python version 2.6 or greater") + + +def is_python_27(): + """ + Checks if we're running python 2.7 or above (including the 3.x series). + + :returns: **True** if we meet this requirement and **False** otherwise + """ + + major_version, minor_version = sys.version_info[0:2] + + return major_version > 2 or (major_version == 2 and minor_version >= 7) + + +def is_python_3(): + """ + Checks if we're in the 3.0 - 3.x range. + + :returns: **True** if we meet this requirement and **False** otherwise + """ + + return sys.version_info[0] == 3 + + +@lru_cache() +def is_crypto_available(): + """ + Checks if the pycrypto functions we use are available. This is used for + verifying relay descriptor signatures. + + :returns: **True** if we can use pycrypto and **False** otherwise + """ + + from stem.util import log + + try: + from Crypto.PublicKey import RSA + from Crypto.Util import asn1 + from Crypto.Util.number import long_to_bytes + return True + except ImportError: + log.log_once("stem.prereq.is_crypto_available", log.INFO, CRYPTO_UNAVAILABLE) + return False + + +@lru_cache() +def is_mock_available(): + """ + Checks if the mock module is available. In python 3.3 and up it is a builtin + unittest module, but before this it needed to be `installed separately + `_. Imports should be as follows.... + + :: + + try: + # added in python 3.3 + from unittest.mock import Mock + except ImportError: + from mock import Mock + + :returns: **True** if the mock module is available and **False** otherwise + """ + + try: + # checks for python 3.3 version + import unittest.mock + return True + except ImportError: + pass + + try: + import mock + + # check for mock's patch.dict() which was introduced in version 0.7.0 + + if not hasattr(mock.patch, 'dict'): + raise ImportError() + + # check for mock's new_callable argument for patch() which was introduced in version 0.8.0 + + if not 'new_callable' in inspect.getargspec(mock.patch).args: + raise ImportError() + + return True + except ImportError: + return False diff --git a/lib/stem/process.py b/lib/stem/process.py new file mode 100644 index 00000000..9f4250fb --- /dev/null +++ b/lib/stem/process.py @@ -0,0 +1,255 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Helper functions for working with tor as a process. + +:NO_TORRC: + when provided as a torrc_path tor is ran with a blank configuration + +:DEFAULT_INIT_TIMEOUT: + number of seconds before we time out our attempt to start a tor instance + +**Module Overview:** + +:: + + launch_tor - starts up a tor process + launch_tor_with_config - starts a tor process with a custom torrc +""" + +import os +import re +import signal +import subprocess +import tempfile + +import stem.prereq +import stem.util.system + +NO_TORRC = "" +DEFAULT_INIT_TIMEOUT = 90 + + +def launch_tor(tor_cmd = "tor", args = None, torrc_path = None, completion_percent = 100, init_msg_handler = None, timeout = DEFAULT_INIT_TIMEOUT, take_ownership = False): + """ + Initializes a tor process. This blocks until initialization completes or we + error out. + + If tor's data directory is missing or stale then bootstrapping will include + making several requests to the directory authorities which can take a little + while. Usually this is done in 50 seconds or so, but occasionally calls seem + to get stuck, taking well over the default timeout. + + **To work to must log at NOTICE runlevel to stdout.** It does this by + default, but if you have a 'Log' entry in your torrc then you'll also need + 'Log NOTICE stdout'. + + Note: The timeout argument does not work on Windows, and relies on the global + state of the signal module. + + :param str tor_cmd: command for starting tor + :param list args: additional arguments for tor + :param str torrc_path: location of the torrc for us to use + :param int completion_percent: percent of bootstrap completion at which + this'll return + :param functor init_msg_handler: optional functor that will be provided with + tor's initialization stdout as we get it + :param int timeout: time after which the attempt to start tor is aborted, no + timeouts are applied if **None** + :param bool take_ownership: asserts ownership over the tor process so it + aborts if this python process terminates or a :class:`~stem.control.Controller` + we establish to it disconnects + + :returns: **subprocess.Popen** instance for the tor subprocess + + :raises: **OSError** if we either fail to create the tor process or reached a + timeout without success + """ + + if stem.util.system.is_windows(): + timeout = None + + # sanity check that we got a tor binary + + if os.path.sep in tor_cmd: + # got a path (either relative or absolute), check what it leads to + + if os.path.isdir(tor_cmd): + raise OSError("'%s' is a directory, not the tor executable" % tor_cmd) + elif not os.path.isfile(tor_cmd): + raise OSError("'%s' doesn't exist" % tor_cmd) + elif not stem.util.system.is_available(tor_cmd): + raise OSError("'%s' isn't available on your system. Maybe it's not in your PATH?" % tor_cmd) + + # double check that we have a torrc to work with + if not torrc_path in (None, NO_TORRC) and not os.path.exists(torrc_path): + raise OSError("torrc doesn't exist (%s)" % torrc_path) + + # starts a tor subprocess, raising an OSError if it fails + runtime_args, temp_file = [tor_cmd], None + + if args: + runtime_args += args + + if torrc_path: + if torrc_path == NO_TORRC: + temp_file = tempfile.mkstemp(prefix = "empty-torrc-", text = True)[1] + runtime_args += ["-f", temp_file] + else: + runtime_args += ["-f", torrc_path] + + if take_ownership: + runtime_args += ["__OwningControllerProcess", str(os.getpid())] + + tor_process = subprocess.Popen(runtime_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + + if timeout: + def timeout_handler(signum, frame): + # terminates the uninitialized tor process and raise on timeout + if temp_file: + try: + os.remove(temp_file) + except: + pass + + tor_process.kill() + + raise OSError("reached a %i second timeout without success" % timeout) + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(timeout) + + bootstrap_line = re.compile("Bootstrapped ([0-9]+)%: ") + problem_line = re.compile("\[(warn|err)\] (.*)$") + last_problem = "Timed out" + + while True: + # Tor's stdout will be read as ASCII bytes. This is fine for python 2, but + # in python 3 that means it'll mismatch with other operations (for instance + # the bootstrap_line.search() call later will fail). + # + # It seems like python 2.x is perfectly happy for this to be unicode, so + # normalizing to that. + + init_line = tor_process.stdout.readline().decode("utf-8", "replace").strip() + + # this will provide empty results if the process is terminated + if not init_line: + if timeout: + signal.alarm(0) # stop alarm + + # ... but best make sure + + tor_process.kill() + + raise OSError("Process terminated: %s" % last_problem) + + # provide the caller with the initialization message if they want it + + if init_msg_handler: + init_msg_handler(init_line) + + # return the process if we're done with bootstrapping + bootstrap_match = bootstrap_line.search(init_line) + problem_match = problem_line.search(init_line) + + if bootstrap_match and int(bootstrap_match.groups()[0]) >= completion_percent: + if timeout: + signal.alarm(0) # stop alarm + + if temp_file: + try: + os.remove(temp_file) + except: + pass + + return tor_process + elif problem_match: + runlevel, msg = problem_match.groups() + + if not "see warnings above" in msg: + if ": " in msg: + msg = msg.split(": ")[-1].strip() + + last_problem = msg + + +def launch_tor_with_config(config, tor_cmd = "tor", completion_percent = 100, init_msg_handler = None, timeout = DEFAULT_INIT_TIMEOUT, take_ownership = False): + """ + Initializes a tor process, like :func:`~stem.process.launch_tor`, but with a + customized configuration. This writes a temporary torrc to disk, launches + tor, then deletes the torrc. + + For example... + + :: + + tor_process = stem.process.launch_tor_with_config( + config = { + 'ControlPort': '2778', + 'Log': [ + 'NOTICE stdout', + 'ERR file /tmp/tor_error_log', + ], + }, + ) + + :param dict config: configuration options, such as '{"ControlPort": "9051"}', + values can either be a **str** or **list of str** if for multiple values + :param str tor_cmd: command for starting tor + :param int completion_percent: percent of bootstrap completion at which + this'll return + :param functor init_msg_handler: optional functor that will be provided with + tor's initialization stdout as we get it + :param int timeout: time after which the attempt to start tor is aborted, no + timeouts are applied if **None** + :param bool take_ownership: asserts ownership over the tor process so it + aborts if this python process terminates or a :class:`~stem.control.Controller` + we establish to it disconnects + + :returns: **subprocess.Popen** instance for the tor subprocess + + :raises: **OSError** if we either fail to create the tor process or reached a + timeout without success + """ + + # we need to be sure that we're logging to stdout to figure out when we're + # done bootstrapping + + if 'Log' in config: + stdout_options = ['DEBUG stdout', 'INFO stdout', 'NOTICE stdout'] + + if isinstance(config['Log'], str): + config['Log'] = [config['Log']] + + has_stdout = False + + for log_config in config['Log']: + if log_config in stdout_options: + has_stdout = True + break + + if not has_stdout: + config['Log'].append('NOTICE stdout') + + torrc_path = tempfile.mkstemp(prefix = "torrc-", text = True)[1] + + try: + with open(torrc_path, "w") as torrc_file: + for key, values in config.items(): + if isinstance(values, str): + torrc_file.write("%s %s\n" % (key, values)) + else: + for value in values: + torrc_file.write("%s %s\n" % (key, value)) + + # prevents tor from erroring out due to a missing torrc if it gets a sighup + args = ['__ReloadTorrcOnSIGHUP', '0'] + + return launch_tor(tor_cmd, args, torrc_path, completion_percent, init_msg_handler, timeout, take_ownership) + finally: + try: + os.remove(torrc_path) + except: + pass diff --git a/lib/stem/response/__init__.py b/lib/stem/response/__init__.py new file mode 100644 index 00000000..33815730 --- /dev/null +++ b/lib/stem/response/__init__.py @@ -0,0 +1,571 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Parses replies from the control socket. + +**Module Overview:** + +:: + + convert - translates a ControlMessage into a particular response subclass + + ControlMessage - Message that's read from the control socket. + |- from_str - provides a ControlMessage for the given string + |- content - provides the parsed message content + |- raw_content - unparsed socket data + |- __str__ - content stripped of protocol formatting + +- __iter__ - ControlLine entries for the content of the message + + ControlLine - String subclass with methods for parsing controller responses. + |- remainder - provides the unparsed content + |- is_empty - checks if the remaining content is empty + |- is_next_quoted - checks if the next entry is a quoted value + |- is_next_mapping - checks if the next entry is a KEY=VALUE mapping + |- peek_key - provides the key of the next entry + |- pop - removes and returns the next entry + +- pop_mapping - removes and returns the next entry as a KEY=VALUE mapping + + SingleLineResponse - Simple tor response only including a single line of information. +""" + +__all__ = [ + "events", + "getinfo", + "getconf", + "protocolinfo", + "authchallenge", + "convert", + "ControlMessage", + "ControlLine", + "SingleLineResponse", +] + +import re +import StringIO +import threading + +import stem.socket + +KEY_ARG = re.compile("^(\S+)=") + +# Escape sequences from the 'esc_for_log' function of tor's 'common/util.c'. +# It's hard to tell what controller functions use this in practice, but direct +# users are... +# - 'COOKIEFILE' field of PROTOCOLINFO responses +# - logged messages about bugs +# - the 'getinfo_helper_listeners' function of control.c + +CONTROL_ESCAPES = {r"\\": "\\", r"\"": "\"", r"\'": "'", + r"\r": "\r", r"\n": "\n", r"\t": "\t"} + + +def convert(response_type, message, **kwargs): + """ + Converts a :class:`~stem.response.ControlMessage` into a particular kind of + tor response. This does an in-place conversion of the message from being a + :class:`~stem.response.ControlMessage` to a subclass for its response type. + Recognized types include... + + =================== ===== + response_type Class + =================== ===== + **GETINFO** :class:`stem.response.getinfo.GetInfoResponse` + **GETCONF** :class:`stem.response.getconf.GetConfResponse` + **MAPADDRESS** :class:`stem.response.mapaddress.MapAddressResponse` + **EVENT** :class:`stem.response.events.Event` subclass + **PROTOCOLINFO** :class:`stem.response.protocolinfo.ProtocolInfoResponse` + **AUTHCHALLENGE** :class:`stem.response.authchallenge.AuthChallengeResponse` + **SINGLELINE** :class:`stem.response.SingleLineResponse` + =================== ===== + + :param str response_type: type of tor response to convert to + :param stem.response.ControlMessage message: message to be converted + :param kwargs: optional keyword arguments to be passed to the parser method + + :raises: + * :class:`stem.ProtocolError` the message isn't a proper response of + that type + * :class:`stem.InvalidArguments` the arguments given as input are + invalid, this is can only be raised if the response_type is: **GETINFO**, + **GETCONF** + * :class:`stem.InvalidRequest` the arguments given as input are + invalid, this is can only be raised if the response_type is: + **MAPADDRESS** + * :class:`stem.OperationFailed` if the action the event represents failed, + this is can only be raised if the response_type is: **MAPADDRESS** + * **TypeError** if argument isn't a :class:`~stem.response.ControlMessage` + or response_type isn't supported + """ + + import stem.response.events + import stem.response.getinfo + import stem.response.getconf + import stem.response.protocolinfo + import stem.response.authchallenge + import stem.response.mapaddress + + if not isinstance(message, ControlMessage): + raise TypeError("Only able to convert stem.response.ControlMessage instances") + + response_types = { + "EVENT": stem.response.events.Event, + "GETINFO": stem.response.getinfo.GetInfoResponse, + "GETCONF": stem.response.getconf.GetConfResponse, + "MAPADDRESS": stem.response.mapaddress.MapAddressResponse, + "SINGLELINE": SingleLineResponse, + "PROTOCOLINFO": stem.response.protocolinfo.ProtocolInfoResponse, + "AUTHCHALLENGE": stem.response.authchallenge.AuthChallengeResponse, + } + + try: + response_class = response_types[response_type] + except TypeError: + raise TypeError("Unsupported response type: %s" % response_type) + + message.__class__ = response_class + message._parse_message(**kwargs) + + +class ControlMessage(object): + """ + Message from the control socket. This is iterable and can be stringified for + individual message components stripped of protocol formatting. Messages are + never empty. + """ + + @staticmethod + def from_str(content, msg_type = None, **kwargs): + """ + Provides a ControlMessage for the given content. + + :param str content: message to construct the message from + :param str msg_type: type of tor reply to parse the content as + :param kwargs: optional keyword arguments to be passed to the parser method + + :returns: stem.response.ControlMessage instance + """ + + msg = stem.socket.recv_message(StringIO.StringIO(content)) + + if msg_type is not None: + convert(msg_type, msg, **kwargs) + + return msg + + def __init__(self, parsed_content, raw_content): + if not parsed_content: + raise ValueError("ControlMessages can't be empty") + + self._parsed_content = parsed_content + self._raw_content = raw_content + + def is_ok(self): + """ + Checks if any of our lines have a 250 response. + + :returns: **True** if any lines have a 250 response code, **False** otherwise + """ + + for code, _, _ in self._parsed_content: + if code == "250": + return True + + return False + + def content(self, get_bytes = False): + """ + Provides the parsed message content. These are entries of the form... + + :: + + (status_code, divider, content) + + **status_code** + Three character code for the type of response (defined in section 4 of + the control-spec). + + **divider** + Single character to indicate if this is mid-reply, data, or an end to the + message (defined in section 2.3 of the control-spec). + + **content** + The following content is the actual payload of the line. + + For data entries the content is the full multi-line payload with newline + linebreaks and leading periods unescaped. + + The **status_code** and **divider** are both strings (**bytes** in python + 2.x and **unicode** in python 3.x). The **content** however is **bytes** if + **get_bytes** is **True**. + + :param bool get_bytes: provides **bytes** for the **content** rather than a **str** + + :returns: **list** of (str, str, str) tuples for the components of this message + """ + + if stem.prereq.is_python_3() and not get_bytes: + return [(code, div, stem.util.str_tools._to_unicode(content)) for (code, div, content) in self._parsed_content] + else: + return list(self._parsed_content) + + def raw_content(self, get_bytes = False): + """ + Provides the unparsed content read from the control socket. + + :param bool get_bytes: if **True** then this provides **bytes** rather than a **str** + + :returns: **str** of the socket data used to generate this message + """ + + if stem.prereq.is_python_3() and not get_bytes: + return stem.util.str_tools._to_unicode(self._raw_content) + else: + return self._raw_content + + def __str__(self): + """ + Content of the message, stripped of status code and divider protocol + formatting. + """ + + return "\n".join(list(self)) + + def __iter__(self): + """ + Provides :class:`~stem.response.ControlLine` instances for the content of + the message. This is stripped of status codes and dividers, for instance... + + :: + + 250+info/names= + desc/id/* -- Router descriptors by ID. + desc/name/* -- Router descriptors by nickname. + . + 250 OK + + Would provide two entries... + + :: + + 1st - "info/names= + desc/id/* -- Router descriptors by ID. + desc/name/* -- Router descriptors by nickname." + 2nd - "OK" + """ + + for _, _, content in self._parsed_content: + if stem.prereq.is_python_3(): + content = stem.util.str_tools._to_unicode(content) + + yield ControlLine(content) + + def __len__(self): + """ + :returns: number of ControlLines + """ + + return len(self._parsed_content) + + def __getitem__(self, index): + """ + :returns: :class:`~stem.response.ControlLine` at the index + """ + + content = self._parsed_content[index][2] + + if stem.prereq.is_python_3(): + content = stem.util.str_tools._to_unicode(content) + + return ControlLine(content) + + +class ControlLine(str): + """ + String subclass that represents a line of controller output. This behaves as + a normal string with additional methods for parsing and popping entries from + a space delimited series of elements like a stack. + + None of these additional methods effect ourselves as a string (which is still + immutable). All methods are thread safe. + """ + + def __new__(self, value): + return str.__new__(self, value) + + def __init__(self, value): + self._remainder = value + self._remainder_lock = threading.RLock() + + def remainder(self): + """ + Provides our unparsed content. This is an empty string after we've popped + all entries. + + :returns: **str** of the unparsed content + """ + + return self._remainder + + def is_empty(self): + """ + Checks if we have further content to pop or not. + + :returns: **True** if we have additional content, **False** otherwise + """ + + return self._remainder == "" + + def is_next_quoted(self, escaped = False): + """ + Checks if our next entry is a quoted value or not. + + :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + + :returns: **True** if the next entry can be parsed as a quoted value, **False** otherwise + """ + + start_quote, end_quote = _get_quote_indices(self._remainder, escaped) + return start_quote == 0 and end_quote != -1 + + def is_next_mapping(self, key = None, quoted = False, escaped = False): + """ + Checks if our next entry is a KEY=VALUE mapping or not. + + :param str key: checks that the key matches this value, skipping the check if **None** + :param bool quoted: checks that the mapping is to a quoted value + :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + + :returns: **True** if the next entry can be parsed as a key=value mapping, + **False** otherwise + """ + + remainder = self._remainder # temp copy to avoid locking + key_match = KEY_ARG.match(remainder) + + if key_match: + if key and key != key_match.groups()[0]: + return False + + if quoted: + # checks that we have a quoted value and that it comes after the 'key=' + start_quote, end_quote = _get_quote_indices(remainder, escaped) + return start_quote == key_match.end() and end_quote != -1 + else: + return True # we just needed to check for the key + else: + return False # doesn't start with a key + + def peek_key(self): + """ + Provides the key of the next entry, providing **None** if it isn't a + key/value mapping. + + :returns: **str** with the next entry's key + """ + + remainder = self._remainder + key_match = KEY_ARG.match(remainder) + + if key_match: + return key_match.groups()[0] + else: + return None + + def pop(self, quoted = False, escaped = False): + """ + Parses the next space separated entry, removing it and the space from our + remaining content. Examples... + + :: + + >>> line = ControlLine("\\"We're all mad here.\\" says the grinning cat.") + >>> print line.pop(True) + "We're all mad here." + >>> print line.pop() + "says" + >>> print line.remainder() + "the grinning cat." + + >>> line = ControlLine("\\"this has a \\\\\\" and \\\\\\\\ in it\\" foo=bar more_data") + >>> print line.pop(True, True) + "this has a \\" and \\\\ in it" + + :param bool quoted: parses the next entry as a quoted value, removing the quotes + :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + + :returns: **str** of the next space separated entry + + :raises: + * **ValueError** if quoted is True without the value being quoted + * **IndexError** if we don't have any remaining content left to parse + """ + + with self._remainder_lock: + next_entry, remainder = _parse_entry(self._remainder, quoted, escaped) + self._remainder = remainder + return next_entry + + def pop_mapping(self, quoted = False, escaped = False): + """ + Parses the next space separated entry as a KEY=VALUE mapping, removing it + and the space from our remaining content. + + :param bool quoted: parses the value as being quoted, removing the quotes + :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + + :returns: **tuple** of the form (key, value) + + :raises: **ValueError** if this isn't a KEY=VALUE mapping or if quoted is + **True** without the value being quoted + :raises: **IndexError** if there's nothing to parse from the line + """ + + with self._remainder_lock: + if self.is_empty(): + raise IndexError("no remaining content to parse") + + key_match = KEY_ARG.match(self._remainder) + + if not key_match: + raise ValueError("the next entry isn't a KEY=VALUE mapping: " + self._remainder) + + # parse off the key + key = key_match.groups()[0] + remainder = self._remainder[key_match.end():] + + next_entry, remainder = _parse_entry(remainder, quoted, escaped) + self._remainder = remainder + return (key, next_entry) + + +def _parse_entry(line, quoted, escaped): + """ + Parses the next entry from the given space separated content. + + :param str line: content to be parsed + :param bool quoted: parses the next entry as a quoted value, removing the quotes + :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + + :returns: **tuple** of the form (entry, remainder) + + :raises: + * **ValueError** if quoted is True without the next value being quoted + * **IndexError** if there's nothing to parse from the line + """ + + if line == "": + raise IndexError("no remaining content to parse") + + next_entry, remainder = "", line + + if quoted: + # validate and parse the quoted value + start_quote, end_quote = _get_quote_indices(remainder, escaped) + + if start_quote != 0 or end_quote == -1: + raise ValueError("the next entry isn't a quoted value: " + line) + + next_entry, remainder = remainder[1:end_quote], remainder[end_quote + 1:] + else: + # non-quoted value, just need to check if there's more data afterward + if " " in remainder: + next_entry, remainder = remainder.split(" ", 1) + else: + next_entry, remainder = remainder, "" + + if escaped: + next_entry = _unescape(next_entry) + + return (next_entry, remainder.lstrip()) + + +def _get_quote_indices(line, escaped): + """ + Provides the indices of the next two quotes in the given content. + + :param str line: content to be parsed + :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + + :returns: **tuple** of two ints, indices being -1 if a quote doesn't exist + """ + + indices, quote_index = [], -1 + + for _ in range(2): + quote_index = line.find("\"", quote_index + 1) + + # if we have escapes then we need to skip any r'\"' entries + if escaped: + # skip check if index is -1 (no match) or 0 (first character) + while quote_index >= 1 and line[quote_index - 1] == "\\": + quote_index = line.find("\"", quote_index + 1) + + indices.append(quote_index) + + return tuple(indices) + + +def _unescape(entry): + # Unescapes the given string with the mappings in CONTROL_ESCAPES. + # + # This can't be a simple series of str.replace() calls because replacements + # need to be excluded from consideration for further unescaping. For + # instance, '\\t' should be converted to '\t' rather than a tab. + + def _pop_with_unescape(entry): + # Pop either the first character or the escape sequence conversion the + # entry starts with. This provides a tuple of... + # + # (unescaped prefix, remaining entry) + + for esc_sequence, replacement in CONTROL_ESCAPES.items(): + if entry.startswith(esc_sequence): + return (replacement, entry[len(esc_sequence):]) + + return (entry[0], entry[1:]) + + result = [] + + while entry: + prefix, entry = _pop_with_unescape(entry) + result.append(prefix) + + return "".join(result) + + +class SingleLineResponse(ControlMessage): + """ + Reply to a request that performs an action rather than querying data. These + requests only contain a single line, which is 'OK' if successful, and a + description of the problem if not. + + :var str code: status code for our line + :var str message: content of the line + """ + + def is_ok(self, strict = False): + """ + Checks if the response code is "250". If strict is **True** then this + checks if the response is "250 OK" + + :param bool strict: checks for a "250 OK" message if **True** + + :returns: + * If strict is **False**: **True** if the response code is "250", **False** otherwise + * If strict is **True**: **True** if the response is "250 OK", **False** otherwise + """ + + if strict: + return self.content()[0] == ("250", " ", "OK") + return self.content()[0][0] == "250" + + def _parse_message(self): + content = self.content() + + if len(content) > 1: + raise stem.ProtocolError("Received multi-line response") + elif len(content) == 0: + raise stem.ProtocolError("Received empty response") + else: + self.code, _, self.message = content[0] diff --git a/lib/stem/response/authchallenge.py b/lib/stem/response/authchallenge.py new file mode 100644 index 00000000..18a5b2c4 --- /dev/null +++ b/lib/stem/response/authchallenge.py @@ -0,0 +1,56 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +import binascii + +import stem.response +import stem.socket +import stem.util.str_tools +import stem.util.tor_tools + + +class AuthChallengeResponse(stem.response.ControlMessage): + """ + AUTHCHALLENGE query response. + + :var str server_hash: server hash provided by tor + :var str server_nonce: server nonce provided by tor + """ + + def _parse_message(self): + # Example: + # 250 AUTHCHALLENGE SERVERHASH=680A73C9836C4F557314EA1C4EDE54C285DB9DC89C83627401AEF9D7D27A95D5 SERVERNONCE=F8EA4B1F2C8B40EF1AF68860171605B910E3BBCABADF6FC3DB1FA064F4690E85 + + self.server_hash = None + self.server_nonce = None + + if not self.is_ok(): + raise stem.ProtocolError("AUTHCHALLENGE response didn't have an OK status:\n%s" % self) + elif len(self) > 1: + raise stem.ProtocolError("Received multiline AUTHCHALLENGE response:\n%s" % self) + + line = self[0] + + # sanity check that we're a AUTHCHALLENGE response + if not line.pop() == "AUTHCHALLENGE": + raise stem.ProtocolError("Message is not an AUTHCHALLENGE response (%s)" % self) + + if line.is_next_mapping("SERVERHASH"): + value = line.pop_mapping()[1] + + if not stem.util.tor_tools.is_hex_digits(value, 64): + raise stem.ProtocolError("SERVERHASH has an invalid value: %s" % value) + + self.server_hash = binascii.a2b_hex(stem.util.str_tools._to_bytes(value)) + else: + raise stem.ProtocolError("Missing SERVERHASH mapping: %s" % line) + + if line.is_next_mapping("SERVERNONCE"): + value = line.pop_mapping()[1] + + if not stem.util.tor_tools.is_hex_digits(value, 64): + raise stem.ProtocolError("SERVERNONCE has an invalid value: %s" % value) + + self.server_nonce = binascii.a2b_hex(stem.util.str_tools._to_bytes(value)) + else: + raise stem.ProtocolError("Missing SERVERNONCE mapping: %s" % line) diff --git a/lib/stem/response/events.py b/lib/stem/response/events.py new file mode 100644 index 00000000..3a5cf411 --- /dev/null +++ b/lib/stem/response/events.py @@ -0,0 +1,945 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +import datetime +import io +import re +import time + +import stem +import stem.control +import stem.descriptor.router_status_entry +import stem.response +import stem.version + +from stem.util import connection, log, str_tools, tor_tools + +# Matches keyword=value arguments. This can't be a simple "(.*)=(.*)" pattern +# because some positional arguments, like circuit paths, can have an equal +# sign. + +KW_ARG = re.compile("^(.*) ([A-Za-z0-9_]+)=(\S*)$") +QUOTED_KW_ARG = re.compile("^(.*) ([A-Za-z0-9_]+)=\"(.*)\"$") + + +class Event(stem.response.ControlMessage): + """ + Base for events we receive asynchronously, as described in section 4.1 of the + `control-spec + `_. + + :var str type: event type + :var int arrived_at: unix timestamp for when the message arrived + :var list positional_args: positional arguments of the event + :var dict keyword_args: key/value arguments of the event + """ + + _POSITIONAL_ARGS = () # attribute names for recognized positional arguments + _KEYWORD_ARGS = {} # map of 'keyword => attribute' for recognized attributes + _QUOTED = () # positional arguments that are quoted + _OPTIONALLY_QUOTED = () # positional arguments that may or may not be quoted + _SKIP_PARSING = False # skip parsing contents into our positional_args and keyword_args + _VERSION_ADDED = stem.version.Version('0.1.1.1-alpha') # minimum version with control-spec V1 event support + + def _parse_message(self, arrived_at = None): + if arrived_at is None: + arrived_at = int(time.time()) + + if not str(self).strip(): + raise stem.ProtocolError("Received a blank tor event. Events must at the very least have a type.") + + self.type = str(self).split().pop(0) + self.arrived_at = arrived_at + + # if we're a recognized event type then translate ourselves into that subclass + + if self.type in EVENT_TYPE_TO_CLASS: + self.__class__ = EVENT_TYPE_TO_CLASS[self.type] + + self.positional_args = [] + self.keyword_args = {} + + if not self._SKIP_PARSING: + self._parse_standard_attr() + + self._parse() + + def _parse_standard_attr(self): + """ + Most events are of the form... + 650 *( positional_args ) *( key "=" value ) + + This parses this standard format, populating our **positional_args** and + **keyword_args** attributes and creating attributes if it's in our event's + **_POSITIONAL_ARGS** and **_KEYWORD_ARGS**. + """ + + # Tor events contain some number of positional arguments followed by + # key/value mappings. Parsing keyword arguments from the end until we hit + # something that isn't a key/value mapping. The rest are positional. + + content = str(self) + + while True: + match = QUOTED_KW_ARG.match(content) + + if not match: + match = KW_ARG.match(content) + + if match: + content, keyword, value = match.groups() + self.keyword_args[keyword] = value + else: + break + + # Setting attributes for the fields that we recognize. + + self.positional_args = content.split()[1:] + positional = list(self.positional_args) + + for attr_name in self._POSITIONAL_ARGS: + attr_value = None + + if positional: + if attr_name in self._QUOTED or (attr_name in self._OPTIONALLY_QUOTED and positional[0].startswith('"')): + attr_values = [positional.pop(0)] + + if not attr_values[0].startswith('"'): + raise stem.ProtocolError("The %s value should be quoted, but didn't have a starting quote: %s" % (attr_name, self)) + + while True: + if not positional: + raise stem.ProtocolError("The %s value should be quoted, but didn't have an ending quote: %s" % (attr_name, self)) + + attr_values.append(positional.pop(0)) + + if attr_values[-1].endswith('"'): + break + + attr_value = " ".join(attr_values)[1:-1] + else: + attr_value = positional.pop(0) + + setattr(self, attr_name, attr_value) + + for controller_attr_name, attr_name in self._KEYWORD_ARGS.items(): + setattr(self, attr_name, self.keyword_args.get(controller_attr_name)) + + # method overwritten by our subclasses for special handling that they do + def _parse(self): + pass + + def _log_if_unrecognized(self, attr, attr_enum): + """ + Checks if an attribute exists in a given enumeration, logging a message if + it isn't. Attributes can either be for a string or collection of strings + + :param str attr: name of the attribute to check + :param stem.util.enum.Enum enum: enumeration to check against + """ + + attr_values = getattr(self, attr) + + if attr_values: + if isinstance(attr_values, (bytes, unicode)): + attr_values = [attr_values] + + for value in attr_values: + if not value in attr_enum: + log_id = "event.%s.unknown_%s.%s" % (self.type.lower(), attr, value) + unrecognized_msg = "%s event had an unrecognized %s (%s). Maybe a new addition to the control protocol? Full Event: '%s'" % (self.type, attr, value, self) + log.log_once(log_id, log.INFO, unrecognized_msg) + + +class AddrMapEvent(Event): + """ + Event that indicates a new address mapping. + + The ADDRMAP event was one of the first Control Protocol V1 events and was + introduced in tor version 0.1.1.1-alpha. + + :var str hostname: address being resolved + :var str destination: destionation of the resolution, this is usually an ip, + but could be a hostname if TrackHostExits is enabled or **NONE** if the + resolution failed + :var datetime expiry: expiration time of the resolution in local time + :var str error: error code if the resolution failed + :var datetime utc_expiry: expiration time of the resolution in UTC + :var bool cached: **True** if the resolution will be kept until it expires, + **False** otherwise or **None** if undefined + """ + + _POSITIONAL_ARGS = ("hostname", "destination", "expiry") + _KEYWORD_ARGS = { + "error": "error", + "EXPIRES": "utc_expiry", + "CACHED": "cached", + } + _OPTIONALLY_QUOTED = ("expiry") + + def _parse(self): + if self.destination == "": + self.destination = None + + if self.expiry is not None: + if self.expiry == "NEVER": + self.expiry = None + else: + try: + self.expiry = datetime.datetime.strptime(self.expiry, "%Y-%m-%d %H:%M:%S") + except ValueError: + raise stem.ProtocolError("Unable to parse date in ADDRMAP event: %s" % self) + + if self.utc_expiry is not None: + self.utc_expiry = datetime.datetime.strptime(self.utc_expiry, "%Y-%m-%d %H:%M:%S") + + if self.cached is not None: + if self.cached == "YES": + self.cached = True + elif self.cached == "NO": + self.cached = False + else: + raise stem.ProtocolError("An ADDRMAP event's CACHED mapping can only be 'YES' or 'NO': %s" % self) + + +class AuthDirNewDescEvent(Event): + """ + Event specific to directory authorities, indicating that we just received new + descriptors. The descriptor type contained within this event is unspecified + so the descriptor contents are left unparsed. + + The AUTHDIR_NEWDESCS event was introduced in tor version 0.1.1.10-alpha. + + :var stem.AuthDescriptorAction action: what is being done with the descriptor + :var str message: explanation of why we chose this action + :var str descriptor: content of the descriptor + """ + + _SKIP_PARSING = True + _VERSION_ADDED = stem.version.Requirement.EVENT_AUTHDIR_NEWDESCS + + def _parse(self): + lines = str(self).split('\n') + + if len(lines) < 5: + raise stem.ProtocolError("AUTHDIR_NEWDESCS events must contain lines for at least the type, action, message, descriptor, and terminating 'OK'") + elif not lines[-1] == "OK": + raise stem.ProtocolError("AUTHDIR_NEWDESCS doesn't end with an 'OK'") + + self.action = lines[1] + self.message = lines[2] + self.descriptor = '\n'.join(lines[3:-1]) + + +class BandwidthEvent(Event): + """ + Event emitted every second with the bytes sent and received by tor. + + The BW event was one of the first Control Protocol V1 events and was + introduced in tor version 0.1.1.1-alpha. + + :var long read: bytes received by tor that second + :var long written: bytes sent by tor that second + """ + + _POSITIONAL_ARGS = ("read", "written") + + def _parse(self): + if not self.read: + raise stem.ProtocolError("BW event is missing its read value") + elif not self.written: + raise stem.ProtocolError("BW event is missing its written value") + elif not self.read.isdigit() or not self.written.isdigit(): + raise stem.ProtocolError("A BW event's bytes sent and received should be a positive numeric value, received: %s" % self) + + self.read = long(self.read) + self.written = long(self.written) + + +class BuildTimeoutSetEvent(Event): + """ + Event indicating that the timeout value for a circuit has changed. This was + first added in tor version 0.2.2.7. + + The BUILDTIMEOUT_SET event was introduced in tor version 0.2.2.7-alpha. + + :var stem.TimeoutSetType set_type: way in which the timeout is changing + :var int total_times: circuit build times tor used to determine the timeout + :var int timeout: circuit timeout value in milliseconds + :var int xm: Pareto parameter Xm in milliseconds + :var float alpha: Pareto parameter alpha + :var float quantile: CDF quantile cutoff point + :var float timeout_rate: ratio of circuits that have time out + :var int close_timeout: duration to keep measurement circuits in milliseconds + :var float close_rate: ratio of measurement circuits that are closed + """ + + _POSITIONAL_ARGS = ("set_type",) + _KEYWORD_ARGS = { + "TOTAL_TIMES": "total_times", + "TIMEOUT_MS": "timeout", + "XM": "xm", + "ALPHA": "alpha", + "CUTOFF_QUANTILE": "quantile", + "TIMEOUT_RATE": "timeout_rate", + "CLOSE_MS": "close_timeout", + "CLOSE_RATE": "close_rate", + } + _VERSION_ADDED = stem.version.Requirement.EVENT_BUILDTIMEOUT_SET + + def _parse(self): + # convert our integer and float parameters + + for param in ('total_times', 'timeout', 'xm', 'close_timeout'): + param_value = getattr(self, param) + + if param_value is not None: + try: + setattr(self, param, int(param_value)) + except ValueError: + raise stem.ProtocolError("The %s of a BUILDTIMEOUT_SET should be an integer: %s" % (param, self)) + + for param in ('alpha', 'quantile', 'timeout_rate', 'close_rate'): + param_value = getattr(self, param) + + if param_value is not None: + try: + setattr(self, param, float(param_value)) + except ValueError: + raise stem.ProtocolError("The %s of a BUILDTIMEOUT_SET should be a float: %s" % (param, self)) + + self._log_if_unrecognized('set_type', stem.TimeoutSetType) + + +class CircuitEvent(Event): + """ + Event that indicates that a circuit has changed. + + The fingerprint or nickname values in our 'path' may be **None** if the + VERBOSE_NAMES feature isn't enabled. The option was first introduced in tor + version 0.1.2.2, and on by default after 0.2.2.1. + + The CIRC event was one of the first Control Protocol V1 events and was + introduced in tor version 0.1.1.1-alpha. + + :var str id: circuit identifier + :var stem.CircStatus status: reported status for the circuit + :var tuple path: relays involved in the circuit, these are + **(fingerprint, nickname)** tuples + :var tuple build_flags: :data:`~stem.CircBuildFlag` attributes + governing how the circuit is built + :var stem.CircPurpose purpose: purpose that the circuit is intended for + :var stem.HiddenServiceState hs_state: status if this is a hidden service circuit + :var str rend_query: circuit's rendezvous-point if this is hidden service related + :var datetime created: time when the circuit was created or cannibalized + :var stem.CircClosureReason reason: reason for the circuit to be closed + :var stem.CircClosureReason remote_reason: remote side's reason for the circuit to be closed + """ + + _POSITIONAL_ARGS = ("id", "status", "path") + _KEYWORD_ARGS = { + "BUILD_FLAGS": "build_flags", + "PURPOSE": "purpose", + "HS_STATE": "hs_state", + "REND_QUERY": "rend_query", + "TIME_CREATED": "created", + "REASON": "reason", + "REMOTE_REASON": "remote_reason", + } + + def _parse(self): + self.path = tuple(stem.control._parse_circ_path(self.path)) + + if self.build_flags is not None: + self.build_flags = tuple(self.build_flags.split(',')) + + if self.created is not None: + try: + self.created = str_tools._parse_iso_timestamp(self.created) + except ValueError as exc: + raise stem.ProtocolError("Unable to parse create date (%s): %s" % (exc, self)) + + if not tor_tools.is_valid_circuit_id(self.id): + raise stem.ProtocolError("Circuit IDs must be one to sixteen alphanumeric characters, got '%s': %s" % (self.id, self)) + + self._log_if_unrecognized('status', stem.CircStatus) + self._log_if_unrecognized('build_flags', stem.CircBuildFlag) + self._log_if_unrecognized('purpose', stem.CircPurpose) + self._log_if_unrecognized('hs_state', stem.HiddenServiceState) + self._log_if_unrecognized('reason', stem.CircClosureReason) + self._log_if_unrecognized('remote_reason', stem.CircClosureReason) + + +class CircMinorEvent(Event): + """ + Event providing information about minor changes in our circuits. This was + first added in tor version 0.2.3.11. + + The CIRC_MINOR event was introduced in tor version 0.2.3.11-alpha. + + :var str id: circuit identifier + :var stem.CircEvent event: type of change in the circuit + :var tuple path: relays involved in the circuit, these are + **(fingerprint, nickname)** tuples + :var tuple build_flags: :data:`~stem.CircBuildFlag` attributes + governing how the circuit is built + :var stem.CircPurpose purpose: purpose that the circuit is intended for + :var stem.HiddenServiceState hs_state: status if this is a hidden service circuit + :var str rend_query: circuit's rendezvous-point if this is hidden service related + :var datetime created: time when the circuit was created or cannibalized + :var stem.CircPurpose old_purpose: prior purpose for the circuit + :var stem.HiddenServiceState old_hs_state: prior status as a hidden service circuit + """ + + _POSITIONAL_ARGS = ("id", "event", "path") + _KEYWORD_ARGS = { + "BUILD_FLAGS": "build_flags", + "PURPOSE": "purpose", + "HS_STATE": "hs_state", + "REND_QUERY": "rend_query", + "TIME_CREATED": "created", + "OLD_PURPOSE": "old_purpose", + "OLD_HS_STATE": "old_hs_state", + } + _VERSION_ADDED = stem.version.Requirement.EVENT_CIRC_MINOR + + def _parse(self): + self.path = tuple(stem.control._parse_circ_path(self.path)) + + if self.build_flags is not None: + self.build_flags = tuple(self.build_flags.split(',')) + + if self.created is not None: + try: + self.created = str_tools._parse_iso_timestamp(self.created) + except ValueError as exc: + raise stem.ProtocolError("Unable to parse create date (%s): %s" % (exc, self)) + + if not tor_tools.is_valid_circuit_id(self.id): + raise stem.ProtocolError("Circuit IDs must be one to sixteen alphanumeric characters, got '%s': %s" % (self.id, self)) + + self._log_if_unrecognized('event', stem.CircEvent) + self._log_if_unrecognized('build_flags', stem.CircBuildFlag) + self._log_if_unrecognized('purpose', stem.CircPurpose) + self._log_if_unrecognized('hs_state', stem.HiddenServiceState) + self._log_if_unrecognized('old_purpose', stem.CircPurpose) + self._log_if_unrecognized('old_hs_state', stem.HiddenServiceState) + + +class ClientsSeenEvent(Event): + """ + Periodic event on bridge relays that provides a summary of our users. + + The CLIENTS_SEEN event was introduced in tor version 0.2.1.10-alpha. + + :var datetime start_time: time in UTC that we started collecting these stats + :var dict locales: mapping of country codes to a rounded count for the number of users + :var dict ip_versions: mapping of ip protocols to a rounded count for the number of users + """ + + _KEYWORD_ARGS = { + "TimeStarted": "start_time", + "CountrySummary": "locales", + "IPVersions": "ip_versions", + } + _VERSION_ADDED = stem.version.Requirement.EVENT_CLIENTS_SEEN + + def _parse(self): + if self.start_time is not None: + self.start_time = datetime.datetime.strptime(self.start_time, "%Y-%m-%d %H:%M:%S") + + if self.locales is not None: + locale_to_count = {} + + for entry in self.locales.split(','): + if not '=' in entry: + raise stem.ProtocolError("The CLIENTS_SEEN's CountrySummary should be a comma separated listing of '=' mappings: %s" % self) + + locale, count = entry.split('=', 1) + + if len(locale) != 2: + raise stem.ProtocolError("Locales should be a two character code, got '%s': %s" % (locale, self)) + elif not count.isdigit(): + raise stem.ProtocolError("Locale count was non-numeric (%s): %s" % (count, self)) + elif locale in locale_to_count: + raise stem.ProtocolError("CountrySummary had multiple mappings for '%s': %s" % (locale, self)) + + locale_to_count[locale] = int(count) + + self.locales = locale_to_count + + if self.ip_versions is not None: + protocol_to_count = {} + + for entry in self.ip_versions.split(','): + if not '=' in entry: + raise stem.ProtocolError("The CLIENTS_SEEN's IPVersions should be a comma separated listing of '=' mappings: %s" % self) + + protocol, count = entry.split('=', 1) + + if not count.isdigit(): + raise stem.ProtocolError("IP protocol count was non-numeric (%s): %s" % (count, self)) + + protocol_to_count[protocol] = int(count) + + self.ip_versions = protocol_to_count + + +class ConfChangedEvent(Event): + """ + Event that indicates that our configuration changed, either in response to a + SETCONF or RELOAD signal. + + The CONF_CHANGED event was introduced in tor version 0.2.3.3-alpha. + + :var dict config: mapping of configuration options to their new values + (**None** if the option is being unset) + """ + + _SKIP_PARSING = True + _VERSION_ADDED = stem.version.Requirement.EVENT_CONF_CHANGED + + def _parse(self): + self.config = {} + + # Skip first and last line since they're the header and footer. For + # instance... + # + # 650-CONF_CHANGED + # 650-ExitNodes=caerSidi + # 650-ExitPolicy + # 650-MaxCircuitDirtiness=20 + # 650 OK + + for line in str(self).splitlines()[1:-1]: + if '=' in line: + key, value = line.split('=', 1) + else: + key, value = line, None + + self.config[key] = value + + +class DescChangedEvent(Event): + """ + Event that indicates that our descriptor has changed. + + The DESCCHANGED event was introduced in tor version 0.1.2.2-alpha. + """ + + _VERSION_ADDED = stem.version.Requirement.EVENT_DESCCHANGED + + +class GuardEvent(Event): + """ + Event that indicates that our guard relays have changed. The 'endpoint' could + be either a... + + * fingerprint + * 'fingerprint=nickname' pair + + The derived 'endpoint_*' attributes are generally more useful. + + The GUARD event was introduced in tor version 0.1.2.5-alpha. + + :var stem.GuardType guard_type: purpose the guard relay is for + :var str endpoint: relay that the event concerns + :var str endpoint_fingerprint: endpoint's finterprint + :var str endpoint_nickname: endpoint's nickname if it was provided + :var stem.GuardStatus status: status of the guard relay + """ + + _VERSION_ADDED = stem.version.Requirement.EVENT_GUARD + _POSITIONAL_ARGS = ("guard_type", "endpoint", "status") + + def _parse(self): + self.endpoint_fingerprint = None + self.endpoint_nickname = None + + try: + self.endpoint_fingerprint, self.endpoint_nickname = \ + stem.control._parse_circ_entry(self.endpoint) + except stem.ProtocolError: + raise stem.ProtocolError("ORCONN's endpoint doesn't match a ServerSpec: %s" % self) + + self._log_if_unrecognized('guard_type', stem.GuardType) + self._log_if_unrecognized('status', stem.GuardStatus) + + +class LogEvent(Event): + """ + Tor logging event. These are the most visible kind of event since, by + default, tor logs at the NOTICE :data:`~stem.Runlevel` to stdout. + + The logging events were some of the first Control Protocol V1 events + and were introduced in tor version 0.1.1.1-alpha. + + :var stem.Runlevel runlevel: runlevel of the logged message + :var str message: logged message + """ + + _SKIP_PARSING = True + + def _parse(self): + self.runlevel = self.type + self._log_if_unrecognized('runlevel', stem.Runlevel) + + # message is our content, minus the runlevel and ending "OK" if a + # multi-line message + + self.message = str(self)[len(self.runlevel) + 1:].rstrip("\nOK") + + +class NetworkStatusEvent(Event): + """ + Event for when our copy of the consensus has changed. This was introduced in + tor version 0.1.2.3. + + The NS event was introduced in tor version 0.1.2.3-alpha. + + :var list desc: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` for the changed descriptors + """ + + _SKIP_PARSING = True + _VERSION_ADDED = stem.version.Requirement.EVENT_NS + + def _parse(self): + content = str(self).lstrip("NS\n").rstrip("\nOK") + + self.desc = list(stem.descriptor.router_status_entry._parse_file( + io.BytesIO(str_tools._to_bytes(content)), + True, + entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3, + )) + + +class NewConsensusEvent(Event): + """ + Event for when we have a new consensus. This is similar to + :class:`~stem.response.events.NetworkStatusEvent`, except that it contains + the whole consensus so anything not listed is implicitly no longer + recommended. + + The NEWCONSENSUS event was introduced in tor version 0.2.1.13-alpha. + + :var list desc: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` for the changed descriptors + """ + + _SKIP_PARSING = True + _VERSION_ADDED = stem.version.Requirement.EVENT_NEWCONSENSUS + + def _parse(self): + content = str(self).lstrip("NEWCONSENSUS\n").rstrip("\nOK") + + self.desc = list(stem.descriptor.router_status_entry._parse_file( + io.BytesIO(str_tools._to_bytes(content)), + True, + entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3, + )) + + +class NewDescEvent(Event): + """ + Event that indicates that a new descriptor is available. + + The fingerprint or nickname values in our 'relays' may be **None** if the + VERBOSE_NAMES feature isn't enabled. The option was first introduced in tor + version 0.1.2.2, and on by default after 0.2.2.1. + + The NEWDESC event was one of the first Control Protocol V1 events and was + introduced in tor version 0.1.1.1-alpha. + + :var tuple relays: **(fingerprint, nickname)** tuples for the relays with + new descriptors + """ + + def _parse(self): + self.relays = tuple([stem.control._parse_circ_entry(entry) for entry in str(self).split()[1:]]) + + +class ORConnEvent(Event): + """ + Event that indicates a change in a relay connection. The 'endpoint' could be + any of several things including a... + + * fingerprint + * nickname + * 'fingerprint=nickname' pair + * address:port + + The derived 'endpoint_*' attributes are generally more useful. + + The ORCONN event was one of the first Control Protocol V1 events and was + introduced in tor version 0.1.1.1-alpha. + + :var str endpoint: relay that the event concerns + :var str endpoint_fingerprint: endpoint's finterprint if it was provided + :var str endpoint_nickname: endpoint's nickname if it was provided + :var str endpoint_address: endpoint's address if it was provided + :var int endpoint_port: endpoint's port if it was provided + :var stem.ORStatus status: state of the connection + :var stem.ORClosureReason reason: reason for the connection to be closed + :var int circ_count: number of established and pending circuits + """ + + _POSITIONAL_ARGS = ("endpoint", "status") + _KEYWORD_ARGS = { + "REASON": "reason", + "NCIRCS": "circ_count", + } + + def _parse(self): + self.endpoint_fingerprint = None + self.endpoint_nickname = None + self.endpoint_address = None + self.endpoint_port = None + + try: + self.endpoint_fingerprint, self.endpoint_nickname = \ + stem.control._parse_circ_entry(self.endpoint) + except stem.ProtocolError: + if not ':' in self.endpoint: + raise stem.ProtocolError("ORCONN endpoint is neither a relay nor 'address:port': %s" % self) + + address, port = self.endpoint.split(':', 1) + + if not connection.is_valid_port(port): + raise stem.ProtocolError("ORCONN's endpoint location's port is invalid: %s" % self) + + self.endpoint_address = address + self.endpoint_port = int(port) + + if self.circ_count is not None: + if not self.circ_count.isdigit(): + raise stem.ProtocolError("ORCONN event got a non-numeric circuit count (%s): %s" % (self.circ_count, self)) + + self.circ_count = int(self.circ_count) + + self._log_if_unrecognized('status', stem.ORStatus) + self._log_if_unrecognized('reason', stem.ORClosureReason) + + +class SignalEvent(Event): + """ + Event that indicates that tor has received and acted upon a signal being sent + to the process. As of tor version 0.2.4.6 the only signals conveyed by this + event are... + + * RELOAD + * DUMP + * DEBUG + * NEWNYM + * CLEARDNSCACHE + + The SIGNAL event was introduced in tor version 0.2.3.1-alpha. + + :var stem.Signal signal: signal that tor received + """ + + _POSITIONAL_ARGS = ("signal",) + _VERSION_ADDED = stem.version.Requirement.EVENT_SIGNAL + + def _parse(self): + # log if we recieved an unrecognized signal + expected_signals = ( + stem.Signal.RELOAD, + stem.Signal.DUMP, + stem.Signal.DEBUG, + stem.Signal.NEWNYM, + stem.Signal.CLEARDNSCACHE, + ) + + self._log_if_unrecognized('signal', expected_signals) + + +class StatusEvent(Event): + """ + Notification of a change in tor's state. These are generally triggered for + the same sort of things as log messages of the NOTICE level or higher. + However, unlike :class:`~stem.response.events.LogEvent` these contain well + formed data. + + The STATUS_GENERAL, STATUS_CLIENT, STATUS_SERVER events were introduced + in tor version 0.1.2.3-alpha. + + :var stem.StatusType status_type: category of the status event + :var stem.Runlevel runlevel: runlevel of the logged message + :var str message: logged message + """ + + _POSITIONAL_ARGS = ("runlevel", "action") + _VERSION_ADDED = stem.version.Requirement.EVENT_STATUS + + def _parse(self): + if self.type == 'STATUS_GENERAL': + self.status_type = stem.StatusType.GENERAL + elif self.type == 'STATUS_CLIENT': + self.status_type = stem.StatusType.CLIENT + elif self.type == 'STATUS_SERVER': + self.status_type = stem.StatusType.SERVER + else: + raise ValueError("BUG: Unrecognized status type (%s), likely an EVENT_TYPE_TO_CLASS addition without revising how 'status_type' is assigned." % self.type) + + self._log_if_unrecognized('runlevel', stem.Runlevel) + + +class StreamEvent(Event): + """ + Event that indicates that a stream has changed. + + The STREAM event was one of the first Control Protocol V1 events and was + introduced in tor version 0.1.1.1-alpha. + + :var str id: stream identifier + :var stem.StreamStatus status: reported status for the stream + :var str circ_id: circuit that the stream is attached to + :var str target: destination of the stream + :var str target_address: destination address (ip, hostname, or '(Tor_internal)') + :var int target_port: destination port + :var stem.StreamClosureReason reason: reason for the stream to be closed + :var stem.StreamClosureReason remote_reason: remote side's reason for the stream to be closed + :var stem.StreamSource source: origin of the REMAP request + :var str source_addr: requester of the connection + :var str source_address: requester address (ip or hostname) + :var int source_port: requester port + :var stem.StreamPurpose purpose: purpose for the stream + """ + + _POSITIONAL_ARGS = ("id", "status", "circ_id", "target") + _KEYWORD_ARGS = { + "REASON": "reason", + "REMOTE_REASON": "remote_reason", + "SOURCE": "source", + "SOURCE_ADDR": "source_addr", + "PURPOSE": "purpose", + } + + def _parse(self): + if self.target is None: + raise stem.ProtocolError("STREAM event didn't have a target: %s" % self) + else: + if not ':' in self.target: + raise stem.ProtocolError("Target location must be of the form 'address:port': %s" % self) + + address, port = self.target.rsplit(':', 1) + + if not connection.is_valid_port(port, allow_zero = True): + raise stem.ProtocolError("Target location's port is invalid: %s" % self) + + self.target_address = address + self.target_port = int(port) + + if self.source_addr is None: + self.source_address = None + self.source_port = None + else: + if not ':' in self.source_addr: + raise stem.ProtocolError("Source location must be of the form 'address:port': %s" % self) + + address, port = self.source_addr.split(':', 1) + + if not connection.is_valid_port(port, allow_zero = True): + raise stem.ProtocolError("Source location's port is invalid: %s" % self) + + self.source_address = address + self.source_port = int(port) + + # spec specifies a circ_id of zero if the stream is unattached + + if self.circ_id == "0": + self.circ_id = None + + self._log_if_unrecognized('reason', stem.StreamClosureReason) + self._log_if_unrecognized('remote_reason', stem.StreamClosureReason) + self._log_if_unrecognized('purpose', stem.StreamPurpose) + + +class StreamBwEvent(Event): + """ + Event (emitted approximately every second) with the bytes sent and received + by the application since the last such event on this stream. + + The STREAM_BW event was introduced in tor version 0.1.2.8-beta. + + :var str id: stream identifier + :var long written: bytes sent by the application + :var long read: bytes received by the application + """ + + _POSITIONAL_ARGS = ("id", "written", "read") + _VERSION_ADDED = stem.version.Requirement.EVENT_STREAM_BW + + def _parse(self): + if not tor_tools.is_valid_stream_id(self.id): + raise stem.ProtocolError("Stream IDs must be one to sixteen alphanumeric characters, got '%s': %s" % (self.id, self)) + elif not self.written: + raise stem.ProtocolError("STREAM_BW event is missing its written value") + elif not self.read: + raise stem.ProtocolError("STREAM_BW event is missing its read value") + elif not self.read.isdigit() or not self.written.isdigit(): + raise stem.ProtocolError("A STREAM_BW event's bytes sent and received should be a positive numeric value, received: %s" % self) + + self.read = long(self.read) + self.written = long(self.written) + + +class TransportLaunchedEvent(Event): + """ + Event triggered when a pluggable transport is launched. + + The TRANSPORT_LAUNCHED event was introduced in tor version 0.2.5.0-alpha. + + :var str type: 'server' or 'client' + :var str name: name of the pluggable transport + :var str address: IPv4 or IPv6 address where the transport is listening for + connections + :var int port: port where the transport is listening for connections + """ + + _POSITIONAL_ARGS = ("type", "name", "address", "port") + _VERSION_ADDED = stem.version.Requirement.EVENT_TRANSPORT_LAUNCHED + + def _parse(self): + if not self.type in ('server', 'client'): + raise stem.ProtocolError("Transport type should either be 'server' or 'client': %s" % self) + + if not connection.is_valid_ipv4_address(self.address) and \ + not connection.is_valid_ipv6_address(self.address): + raise stem.ProtocolError("Transport address isn't a valid IPv4 or IPv6 address: %s" % self) + + if not connection.is_valid_port(self.port): + raise stem.ProtocolError("Transport port is invalid: %s" % self) + + self.port = int(self.port) + +EVENT_TYPE_TO_CLASS = { + "ADDRMAP": AddrMapEvent, + "AUTHDIR_NEWDESCS": AuthDirNewDescEvent, + "BUILDTIMEOUT_SET": BuildTimeoutSetEvent, + "BW": BandwidthEvent, + "CIRC": CircuitEvent, + "CIRC_MINOR": CircMinorEvent, + "CLIENTS_SEEN": ClientsSeenEvent, + "CONF_CHANGED": ConfChangedEvent, + "DEBUG": LogEvent, + "DESCCHANGED": DescChangedEvent, + "ERR": LogEvent, + "GUARD": GuardEvent, + "INFO": LogEvent, + "NEWCONSENSUS": NewConsensusEvent, + "NEWDESC": NewDescEvent, + "NOTICE": LogEvent, + "NS": NetworkStatusEvent, + "ORCONN": ORConnEvent, + "SIGNAL": SignalEvent, + "STATUS_CLIENT": StatusEvent, + "STATUS_GENERAL": StatusEvent, + "STATUS_SERVER": StatusEvent, + "STREAM": StreamEvent, + "STREAM_BW": StreamBwEvent, + "TRANSPORT_LAUNCHED": TransportLaunchedEvent, + "WARN": LogEvent, + + # accounting for a bug in tor 0.2.0.22 + "STATUS_SEVER": StatusEvent, +} diff --git a/lib/stem/response/getconf.py b/lib/stem/response/getconf.py new file mode 100644 index 00000000..1b9007fd --- /dev/null +++ b/lib/stem/response/getconf.py @@ -0,0 +1,55 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +import stem.response +import stem.socket + + +class GetConfResponse(stem.response.ControlMessage): + """ + Reply for a GETCONF query. + + Note that configuration parameters won't match what we queried for if it's one + of the special mapping options (ex. "HiddenServiceOptions"). + + :var dict entries: mapping between the config parameter (**str**) and their + values (**list** of **str**) + """ + + def _parse_message(self): + # Example: + # 250-CookieAuthentication=0 + # 250-ControlPort=9100 + # 250-DataDirectory=/home/neena/.tor + # 250 DirPort + + self.entries = {} + remaining_lines = list(self) + + if self.content() == [("250", " ", "OK")]: + return + + if not self.is_ok(): + unrecognized_keywords = [] + for code, _, line in self.content(): + if code == "552" and line.startswith("Unrecognized configuration key \"") and line.endswith("\""): + unrecognized_keywords.append(line[32:-1]) + + if unrecognized_keywords: + raise stem.InvalidArguments("552", "GETCONF request contained unrecognized keywords: %s" % ', '.join(unrecognized_keywords), unrecognized_keywords) + else: + raise stem.ProtocolError("GETCONF response contained a non-OK status code:\n%s" % self) + + while remaining_lines: + line = remaining_lines.pop(0) + + if line.is_next_mapping(): + key, value = line.split("=", 1) + else: + key, value = (line.pop(), None) + + if not key in self.entries: + self.entries[key] = [] + + if value is not None: + self.entries[key].append(value) diff --git a/lib/stem/response/getinfo.py b/lib/stem/response/getinfo.py new file mode 100644 index 00000000..c613dedd --- /dev/null +++ b/lib/stem/response/getinfo.py @@ -0,0 +1,78 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +import stem.response +import stem.socket + + +class GetInfoResponse(stem.response.ControlMessage): + """ + Reply for a GETINFO query. + + :var dict entries: mapping between the queried options and their bytes values + """ + + def _parse_message(self): + # Example: + # 250-version=0.2.3.11-alpha-dev (git-ef0bc7f8f26a917c) + # 250+config-text= + # ControlPort 9051 + # DataDirectory /home/atagar/.tor + # ExitPolicy reject *:* + # Log notice stdout + # Nickname Unnamed + # ORPort 9050 + # . + # 250 OK + + self.entries = {} + remaining_lines = [content for (code, div, content) in self.content(get_bytes = True)] + + if not self.is_ok() or not remaining_lines.pop() == b"OK": + unrecognized_keywords = [] + for code, _, line in self.content(): + if code == '552' and line.startswith("Unrecognized key \"") and line.endswith("\""): + unrecognized_keywords.append(line[18:-1]) + + if unrecognized_keywords: + raise stem.InvalidArguments("552", "GETINFO request contained unrecognized keywords: %s\n" % ', '.join(unrecognized_keywords), unrecognized_keywords) + else: + raise stem.ProtocolError("GETINFO response didn't have an OK status:\n%s" % self) + + while remaining_lines: + try: + key, value = remaining_lines.pop(0).split(b"=", 1) + except ValueError: + raise stem.ProtocolError("GETINFO replies should only contain parameter=value mappings:\n%s" % self) + + if stem.prereq.is_python_3(): + key = stem.util.str_tools._to_unicode(key) + + # if the value is a multiline value then it *must* be of the form + # '=\n' + + if b"\n" in value: + if not value.startswith(b"\n"): + raise stem.ProtocolError("GETINFO response contained a multi-line value that didn't start with a newline:\n%s" % self) + + value = value[1:] + + self.entries[key] = value + + def _assert_matches(self, params): + """ + Checks if we match a given set of parameters, and raise a ProtocolError if not. + + :param set params: parameters to assert that we contain + + :raises: + * :class:`stem.ProtocolError` if parameters don't match this response + """ + + reply_params = set(self.entries.keys()) + + if params != reply_params: + requested_label = ", ".join(params) + reply_label = ", ".join(reply_params) + + raise stem.ProtocolError("GETINFO reply doesn't match the parameters that we requested. Queried '%s' but got '%s'." % (requested_label, reply_label)) diff --git a/lib/stem/response/mapaddress.py b/lib/stem/response/mapaddress.py new file mode 100644 index 00000000..8799ea15 --- /dev/null +++ b/lib/stem/response/mapaddress.py @@ -0,0 +1,42 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +import stem.response +import stem.socket + + +class MapAddressResponse(stem.response.ControlMessage): + """ + Reply for a MAPADDRESS query. + Doesn't raise an exception unless no addresses were mapped successfully. + + :var dict entries: mapping between the original and replacement addresses + + :raises: + * :class:`stem.OperationFailed` if Tor was unable to satisfy the request + * :class:`stem.InvalidRequest` if the addresses provided were invalid + """ + + def _parse_message(self): + # Example: + # 250-127.192.10.10=torproject.org + # 250 1.2.3.4=tor.freehaven.net + + if not self.is_ok(): + for code, _, message in self.content(): + if code == "512": + raise stem.InvalidRequest(code, message) + elif code == "451": + raise stem.OperationFailed(code, message) + else: + raise stem.ProtocolError("MAPADDRESS returned unexpected response code: %s", code) + + self.entries = {} + + for code, _, message in self.content(): + if code == "250": + try: + key, value = message.split("=", 1) + self.entries[key] = value + except ValueError: + raise stem.ProtocolError(None, "MAPADDRESS returned '%s', which isn't a mapping" % message) diff --git a/lib/stem/response/protocolinfo.py b/lib/stem/response/protocolinfo.py new file mode 100644 index 00000000..0b4d3526 --- /dev/null +++ b/lib/stem/response/protocolinfo.py @@ -0,0 +1,122 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +import stem.response +import stem.socket +import stem.version + +from stem.connection import AuthMethod +from stem.util import log + + +class ProtocolInfoResponse(stem.response.ControlMessage): + """ + Version one PROTOCOLINFO query response. + + The protocol_version is the only mandatory data for a valid PROTOCOLINFO + response, so all other values are None if undefined or empty if a collection. + + :var int protocol_version: protocol version of the response + :var stem.version.Version tor_version: version of the tor process + :var tuple auth_methods: :data:`stem.connection.AuthMethod` types that tor will accept + :var tuple unknown_auth_methods: strings of unrecognized auth methods + :var str cookie_path: path of tor's authentication cookie + """ + + def _parse_message(self): + # Example: + # 250-PROTOCOLINFO 1 + # 250-AUTH METHODS=COOKIE COOKIEFILE="/home/atagar/.tor/control_auth_cookie" + # 250-VERSION Tor="0.2.1.30" + # 250 OK + + self.protocol_version = None + self.tor_version = None + self.auth_methods = () + self.unknown_auth_methods = () + self.cookie_path = None + + auth_methods, unknown_auth_methods = [], [] + remaining_lines = list(self) + + if not self.is_ok() or not remaining_lines.pop() == "OK": + raise stem.ProtocolError("PROTOCOLINFO response didn't have an OK status:\n%s" % self) + + # sanity check that we're a PROTOCOLINFO response + if not remaining_lines[0].startswith("PROTOCOLINFO"): + raise stem.ProtocolError("Message is not a PROTOCOLINFO response:\n%s" % self) + + while remaining_lines: + line = remaining_lines.pop(0) + line_type = line.pop() + + if line_type == "PROTOCOLINFO": + # Line format: + # FirstLine = "PROTOCOLINFO" SP PIVERSION CRLF + # PIVERSION = 1*DIGIT + + if line.is_empty(): + raise stem.ProtocolError("PROTOCOLINFO response's initial line is missing the protocol version: %s" % line) + + try: + self.protocol_version = int(line.pop()) + except ValueError: + raise stem.ProtocolError("PROTOCOLINFO response version is non-numeric: %s" % line) + + # The piversion really should be "1" but, according to the spec, tor + # does not necessarily need to provide the PROTOCOLINFO version that we + # requested. Log if it's something we aren't expecting but still make + # an effort to parse like a v1 response. + + if self.protocol_version != 1: + log.info("We made a PROTOCOLINFO version 1 query but got a version %i response instead. We'll still try to use it, but this may cause problems." % self.protocol_version) + elif line_type == "AUTH": + # Line format: + # AuthLine = "250-AUTH" SP "METHODS=" AuthMethod *("," AuthMethod) + # *(SP "COOKIEFILE=" AuthCookieFile) CRLF + # AuthMethod = "NULL" / "HASHEDPASSWORD" / "COOKIE" + # AuthCookieFile = QuotedString + + # parse AuthMethod mapping + if not line.is_next_mapping("METHODS"): + raise stem.ProtocolError("PROTOCOLINFO response's AUTH line is missing its mandatory 'METHODS' mapping: %s" % line) + + for method in line.pop_mapping()[1].split(","): + if method == "NULL": + auth_methods.append(AuthMethod.NONE) + elif method == "HASHEDPASSWORD": + auth_methods.append(AuthMethod.PASSWORD) + elif method == "COOKIE": + auth_methods.append(AuthMethod.COOKIE) + elif method == "SAFECOOKIE": + auth_methods.append(AuthMethod.SAFECOOKIE) + else: + unknown_auth_methods.append(method) + message_id = "stem.response.protocolinfo.unknown_auth_%s" % method + log.log_once(message_id, log.INFO, "PROTOCOLINFO response included a type of authentication that we don't recognize: %s" % method) + + # our auth_methods should have a single AuthMethod.UNKNOWN entry if + # any unknown authentication methods exist + if not AuthMethod.UNKNOWN in auth_methods: + auth_methods.append(AuthMethod.UNKNOWN) + + # parse optional COOKIEFILE mapping (quoted and can have escapes) + if line.is_next_mapping("COOKIEFILE", True, True): + self.cookie_path = line.pop_mapping(True, True)[1] + elif line_type == "VERSION": + # Line format: + # VersionLine = "250-VERSION" SP "Tor=" TorVersion OptArguments CRLF + # TorVersion = QuotedString + + if not line.is_next_mapping("Tor", True): + raise stem.ProtocolError("PROTOCOLINFO response's VERSION line is missing its mandatory tor version mapping: %s" % line) + + try: + self.tor_version = stem.version.Version(line.pop_mapping(True)[1]) + except ValueError as exc: + raise stem.ProtocolError(exc) + else: + log.debug("Unrecognized PROTOCOLINFO line type '%s', ignoring it: %s" % (line_type, line)) + + self.auth_methods = tuple(auth_methods) + self.unknown_auth_methods = tuple(unknown_auth_methods) diff --git a/lib/stem/socket.py b/lib/stem/socket.py new file mode 100644 index 00000000..d1f4896c --- /dev/null +++ b/lib/stem/socket.py @@ -0,0 +1,609 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Supports message based communication with sockets speaking the tor control +protocol. This lets users send messages as basic strings and receive responses +as instances of the :class:`~stem.response.ControlMessage` class. + +**Module Overview:** + +:: + + ControlSocket - Socket wrapper that speaks the tor control protocol. + |- ControlPort - Control connection via a port. + | |- get_address - provides the ip address of our socket + | +- get_port - provides the port of our socket + | + |- ControlSocketFile - Control connection via a local file socket. + | +- get_socket_path - provides the path of the socket we connect to + | + |- send - sends a message to the socket + |- recv - receives a ControlMessage from the socket + |- is_alive - reports if the socket is known to be closed + |- is_localhost - returns if the socket is for the local system or not + |- connect - connects a new socket + |- close - shuts down the socket + +- __enter__ / __exit__ - manages socket connection + + send_message - Writes a message to a control socket. + recv_message - Reads a ControlMessage from a control socket. + send_formatting - Performs the formatting expected from sent messages. +""" + +from __future__ import absolute_import + +import re +import socket +import threading + +import stem.prereq +import stem.response +import stem.util.str_tools + +from stem.util import log + + +class ControlSocket(object): + """ + Wrapper for a socket connection that speaks the Tor control protocol. To the + better part this transparently handles the formatting for sending and + receiving complete messages. All methods are thread safe. + + Callers should not instantiate this class directly, but rather use subclasses + which are expected to implement the **_make_socket()** method. + """ + + def __init__(self): + self._socket, self._socket_file = None, None + self._is_alive = False + + # Tracks sending and receiving separately. This should be safe, and doing + # so prevents deadlock where we block writes because we're waiting to read + # a message that isn't coming. + + self._send_lock = threading.RLock() + self._recv_lock = threading.RLock() + + def send(self, message, raw = False): + """ + Formats and sends a message to the control socket. For more information see + the :func:`~stem.socket.send_message` function. + + :param str message: message to be formatted and sent to the socket + :param bool raw: leaves the message formatting untouched, passing it to the socket as-is + + :raises: + * :class:`stem.SocketError` if a problem arises in using the socket + * :class:`stem.SocketClosed` if the socket is known to be shut down + """ + + with self._send_lock: + try: + if not self.is_alive(): + raise stem.SocketClosed() + + send_message(self._socket_file, message, raw) + except stem.SocketClosed as exc: + # if send_message raises a SocketClosed then we should properly shut + # everything down + + if self.is_alive(): + self.close() + + raise exc + + def recv(self): + """ + Receives a message from the control socket, blocking until we've received + one. For more information see the :func:`~stem.socket.recv_message` function. + + :returns: :class:`~stem.response.ControlMessage` for the message received + + :raises: + * :class:`stem.ProtocolError` the content from the socket is malformed + * :class:`stem.SocketClosed` if the socket closes before we receive a complete message + """ + + with self._recv_lock: + try: + # makes a temporary reference to the _socket_file because connect() + # and close() may set or unset it + + socket_file = self._socket_file + + if not socket_file: + raise stem.SocketClosed() + + return recv_message(socket_file) + except stem.SocketClosed as exc: + # If recv_message raises a SocketClosed then we should properly shut + # everything down. However, there's a couple cases where this will + # cause deadlock... + # + # * this socketClosed was *caused by* a close() call, which is joining + # on our thread + # + # * a send() call that's currently in flight is about to call close(), + # also attempting to join on us + # + # To resolve this we make a non-blocking call to acquire the send lock. + # If we get it then great, we can close safely. If not then one of the + # above are in progress and we leave the close to them. + + if self.is_alive(): + if self._send_lock.acquire(False): + self.close() + self._send_lock.release() + + raise exc + + def is_alive(self): + """ + Checks if the socket is known to be closed. We won't be aware if it is + until we either use it or have explicitily shut it down. + + In practice a socket derived from a port knows about its disconnection + after a failed :func:`~stem.socket.ControlSocket.recv` call. Socket file + derived connections know after either a + :func:`~stem.socket.ControlSocket.send` or + :func:`~stem.socket.ControlSocket.recv`. + + This means that to have reliable detection for when we're disconnected + you need to continually pull from the socket (which is part of what the + :class:`~stem.control.BaseController` does). + + :returns: **bool** that's **True** if our socket is connected and **False** otherwise + """ + + return self._is_alive + + def is_localhost(self): + """ + Returns if the connection is for the local system or not. + + :returns: **bool** that's **True** if the connection is for the local host and **False** otherwise + """ + + return False + + def connect(self): + """ + Connects to a new socket, closing our previous one if we're already + attached. + + :raises: :class:`stem.SocketError` if unable to make a socket + """ + + with self._send_lock: + # Closes the socket if we're currently attached to one. Once we're no + # longer alive it'll be safe to acquire the recv lock because recv() + # calls no longer block (raising SocketClosed instead). + + if self.is_alive(): + self.close() + + with self._recv_lock: + self._socket = self._make_socket() + self._socket_file = self._socket.makefile(mode = "rwb") + self._is_alive = True + + # It's possible for this to have a transient failure... + # SocketError: [Errno 4] Interrupted system call + # + # It's safe to retry, so give it another try if it fails. + + try: + self._connect() + except stem.SocketError: + self._connect() # single retry + + def close(self): + """ + Shuts down the socket. If it's already closed then this is a no-op. + """ + + with self._send_lock: + # Function is idempotent with one exception: we notify _close() if this + # is causing our is_alive() state to change. + + is_change = self.is_alive() + + if self._socket: + # if we haven't yet established a connection then this raises an error + # socket.error: [Errno 107] Transport endpoint is not connected + + try: + self._socket.shutdown(socket.SHUT_RDWR) + except socket.error: + pass + + # Suppressing unexpected exceptions from close. For instance, if the + # socket's file has already been closed then with python 2.7 that raises + # with... + # error: [Errno 32] Broken pipe + + try: + self._socket.close() + except: + pass + + if self._socket_file: + try: + self._socket_file.close() + except: + pass + + self._socket = None + self._socket_file = None + self._is_alive = False + + if is_change: + self._close() + + def _get_send_lock(self): + """ + The send lock is useful to classes that interact with us at a deep level + because it's used to lock :func:`stem.socket.ControlSocket.connect` / + :func:`stem.socket.ControlSocket.close`, and by extension our + :func:`stem.socket.ControlSocket.is_alive` state changes. + + :returns: **threading.RLock** that governs sending messages to our socket + and state changes + """ + + return self._send_lock + + def __enter__(self): + return self + + def __exit__(self, exit_type, value, traceback): + self.close() + + def _connect(self): + """ + Connection callback that can be overwritten by subclasses and wrappers. + """ + + pass + + def _close(self): + """ + Disconnection callback that can be overwritten by subclasses and wrappers. + """ + + pass + + def _make_socket(self): + """ + Constructs and connects new socket. This is implemented by subclasses. + + :returns: **socket.socket** for our configuration + + :raises: + * :class:`stem.SocketError` if unable to make a socket + * **NotImplementedError** if not implemented by a subclass + """ + + raise NotImplementedError("Unsupported Operation: this should be implemented by the ControlSocket subclass") + + +class ControlPort(ControlSocket): + """ + Control connection to tor. For more information see tor's ControlPort torrc + option. + """ + + def __init__(self, address = "127.0.0.1", port = 9051, connect = True): + """ + ControlPort constructor. + + :param str address: ip address of the controller + :param int port: port number of the controller + :param bool connect: connects to the socket if True, leaves it unconnected otherwise + + :raises: :class:`stem.SocketError` if connect is **True** and we're + unable to establish a connection + """ + + super(ControlPort, self).__init__() + self._control_addr = address + self._control_port = port + + if connect: + self.connect() + + def get_address(self): + """ + Provides the ip address our socket connects to. + + :returns: str with the ip address of our socket + """ + + return self._control_addr + + def get_port(self): + """ + Provides the port our socket connects to. + + :returns: int with the port of our socket + """ + + return self._control_port + + def is_localhost(self): + return self._control_addr == "127.0.0.1" + + def _make_socket(self): + try: + control_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + control_socket.connect((self._control_addr, self._control_port)) + return control_socket + except socket.error as exc: + raise stem.SocketError(exc) + + +class ControlSocketFile(ControlSocket): + """ + Control connection to tor. For more information see tor's ControlSocket torrc + option. + """ + + def __init__(self, path = "/var/run/tor/control", connect = True): + """ + ControlSocketFile constructor. + + :param str socket_path: path where the control socket is located + :param bool connect: connects to the socket if True, leaves it unconnected otherwise + + :raises: :class:`stem.SocketError` if connect is **True** and we're + unable to establish a connection + """ + + super(ControlSocketFile, self).__init__() + self._socket_path = path + + if connect: + self.connect() + + def get_socket_path(self): + """ + Provides the path our socket connects to. + + :returns: str with the path for our control socket + """ + + return self._socket_path + + def is_localhost(self): + return True + + def _make_socket(self): + try: + control_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + control_socket.connect(self._socket_path) + return control_socket + except socket.error as exc: + raise stem.SocketError(exc) + + +def send_message(control_file, message, raw = False): + """ + Sends a message to the control socket, adding the expected formatting for + single verses multi-line messages. Neither message type should contain an + ending newline (if so it'll be treated as a multi-line message with a blank + line at the end). If the message doesn't contain a newline then it's sent + as... + + :: + + \\r\\n + + and if it does contain newlines then it's split on ``\\n`` and sent as... + + :: + + +\\r\\n + \\r\\n + \\r\\n + .\\r\\n + + :param file control_file: file derived from the control socket (see the + socket's makefile() method for more information) + :param str message: message to be sent on the control socket + :param bool raw: leaves the message formatting untouched, passing it to the + socket as-is + + :raises: + * :class:`stem.SocketError` if a problem arises in using the socket + * :class:`stem.SocketClosed` if the socket is known to be shut down + """ + + if not raw: + message = send_formatting(message) + + try: + control_file.write(stem.util.str_tools._to_bytes(message)) + control_file.flush() + + log_message = message.replace("\r\n", "\n").rstrip() + log.trace("Sent to tor:\n" + log_message) + except socket.error as exc: + log.info("Failed to send message: %s" % exc) + + # When sending there doesn't seem to be a reliable method for + # distinguishing between failures from a disconnect verses other things. + # Just accounting for known disconnection responses. + + if str(exc) == "[Errno 32] Broken pipe": + raise stem.SocketClosed(exc) + else: + raise stem.SocketError(exc) + except AttributeError: + # if the control_file has been closed then flush will receive: + # AttributeError: 'NoneType' object has no attribute 'sendall' + + log.info("Failed to send message: file has been closed") + raise stem.SocketClosed("file has been closed") + + +def recv_message(control_file): + """ + Pulls from a control socket until we either have a complete message or + encounter a problem. + + :param file control_file: file derived from the control socket (see the + socket's makefile() method for more information) + + :returns: :class:`~stem.response.ControlMessage` read from the socket + + :raises: + * :class:`stem.ProtocolError` the content from the socket is malformed + * :class:`stem.SocketClosed` if the socket closes before we receive + a complete message + """ + + parsed_content, raw_content = [], b"" + logging_prefix = "Error while receiving a control message (%s): " + + while True: + try: + # From a real socket readline() would always provide bytes, but during + # tests we might be given a StringIO in which case it's unicode under + # python 3.x. + + line = stem.util.str_tools._to_bytes(control_file.readline()) + except AttributeError: + # if the control_file has been closed then we will receive: + # AttributeError: 'NoneType' object has no attribute 'recv' + + prefix = logging_prefix % "SocketClosed" + log.info(prefix + "socket file has been closed") + raise stem.SocketClosed("socket file has been closed") + except (socket.error, ValueError) as exc: + # When disconnected we get... + # + # Python 2: + # socket.error: [Errno 107] Transport endpoint is not connected + # + # Python 3: + # ValueError: I/O operation on closed file. + + prefix = logging_prefix % "SocketClosed" + log.info(prefix + "received exception \"%s\"" % exc) + raise stem.SocketClosed(exc) + + raw_content += line + + # Parses the tor control lines. These are of the form... + # \r\n + + if len(line) == 0: + # if the socket is disconnected then the readline() method will provide + # empty content + + prefix = logging_prefix % "SocketClosed" + log.info(prefix + "empty socket content") + raise stem.SocketClosed("Received empty socket content.") + elif len(line) < 4: + prefix = logging_prefix % "ProtocolError" + log.info(prefix + "line too short, \"%s\"" % log.escape(line)) + raise stem.ProtocolError("Badly formatted reply line: too short") + elif not re.match(b'^[a-zA-Z0-9]{3}[-+ ]', line): + prefix = logging_prefix % "ProtocolError" + log.info(prefix + "malformed status code/divider, \"%s\"" % log.escape(line)) + raise stem.ProtocolError("Badly formatted reply line: beginning is malformed") + elif not line.endswith(b"\r\n"): + prefix = logging_prefix % "ProtocolError" + log.info(prefix + "no CRLF linebreak, \"%s\"" % log.escape(line)) + raise stem.ProtocolError("All lines should end with CRLF") + + line = line[:-2] # strips off the CRLF + status_code, divider, content = line[:3], line[3:4], line[4:] + + if stem.prereq.is_python_3(): + status_code = stem.util.str_tools._to_unicode(status_code) + divider = stem.util.str_tools._to_unicode(divider) + + if divider == "-": + # mid-reply line, keep pulling for more content + parsed_content.append((status_code, divider, content)) + elif divider == " ": + # end of the message, return the message + parsed_content.append((status_code, divider, content)) + + log_message = raw_content.replace(b"\r\n", b"\n").rstrip() + log.trace("Received from tor:\n" + stem.util.str_tools._to_unicode(log_message)) + + return stem.response.ControlMessage(parsed_content, raw_content) + elif divider == "+": + # data entry, all of the following lines belong to the content until we + # get a line with just a period + + while True: + try: + line = stem.util.str_tools._to_bytes(control_file.readline()) + except socket.error as exc: + prefix = logging_prefix % "SocketClosed" + log.info(prefix + "received an exception while mid-way through a data reply (exception: \"%s\", read content: \"%s\")" % (exc, log.escape(raw_content))) + raise stem.SocketClosed(exc) + + raw_content += line + + if not line.endswith(b"\r\n"): + prefix = logging_prefix % "ProtocolError" + log.info(prefix + "CRLF linebreaks missing from a data reply, \"%s\"" % log.escape(raw_content)) + raise stem.ProtocolError("All lines should end with CRLF") + elif line == b".\r\n": + break # data block termination + + line = line[:-2] # strips off the CRLF + + # lines starting with a period are escaped by a second period (as per + # section 2.4 of the control-spec) + + if line.startswith(b".."): + line = line[1:] + + # appends to previous content, using a newline rather than CRLF + # separator (more conventional for multi-line string content outside + # the windows world) + + content += b"\n" + line + + parsed_content.append((status_code, divider, content)) + else: + # this should never be reached due to the prefix regex, but might as well + # be safe... + prefix = logging_prefix % "ProtocolError" + log.warn(prefix + "\"%s\" isn't a recognized divider type" % divider) + raise stem.ProtocolError("Unrecognized divider type '%s': %s" % (divider, stem.util.str_tools._to_unicode(line))) + + +def send_formatting(message): + """ + Performs the formatting expected from sent control messages. For more + information see the :func:`~stem.socket.send_message` function. + + :param str message: message to be formatted + + :returns: **str** of the message wrapped by the formatting expected from + controllers + """ + + # From control-spec section 2.2... + # Command = Keyword OptArguments CRLF / "+" Keyword OptArguments CRLF CmdData + # Keyword = 1*ALPHA + # OptArguments = [ SP *(SP / VCHAR) ] + # + # A command is either a single line containing a Keyword and arguments, or a + # multiline command whose initial keyword begins with +, and whose data + # section ends with a single "." on a line of its own. + + # if we already have \r\n entries then standardize on \n to start with + message = message.replace("\r\n", "\n") + + if "\n" in message: + return "+%s\r\n.\r\n" % message.replace("\n", "\r\n") + else: + return message + "\r\n" diff --git a/lib/stem/util/__init__.py b/lib/stem/util/__init__.py new file mode 100644 index 00000000..dacd8040 --- /dev/null +++ b/lib/stem/util/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Utility functions used by the stem library. +""" + +__all__ = [ + "conf", + "connection", + "enum", + "log", + "lru_cache", + "ordereddict", + "proc", + "system", + "term", + "tor_tools", +] diff --git a/lib/stem/util/conf.py b/lib/stem/util/conf.py new file mode 100644 index 00000000..817c4063 --- /dev/null +++ b/lib/stem/util/conf.py @@ -0,0 +1,673 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Handlers for text configuration files. Configurations are simple string to +string mappings, with the configuration files using the following rules... + +* the key/value is separated by a space +* anything after a "#" is ignored as a comment +* excess whitespace is trimmed +* empty lines are ignored +* multi-line values can be defined by following the key with lines starting + with a '|' + +For instance... + +:: + + # This is my sample config + user.name Galen + user.password yabba1234 # here's an inline comment + user.notes takes a fancy to pepperjack cheese + blankEntry.example + + msg.greeting + |Multi-line message exclaiming of the + |wonder and awe that is pepperjack! + +... would be loaded as... + +:: + + config = { + "user.name": "Galen", + "user.password": "yabba1234", + "user.notes": "takes a fancy to pepperjack cheese", + "blankEntry.example": "", + "msg.greeting": "Multi-line message exclaiming of the\\nwonder and awe that is pepperjack!", + } + +Configurations are managed via the :class:`~stem.util.conf.Config` class. The +:class:`~stem.util.conf.Config` can be be used directly with its +:func:`~stem.util.conf.Config.get` and :func:`~stem.util.conf.Config.set` +methods, but usually modules will want a local dictionary with just the +configurations that it cares about. + +To do this use the :func:`~stem.util.conf.config_dict` function. For example... + +:: + + import getpass + from stem.util import conf, connection + + def config_validator(key, value): + if key == "timeout": + # require at least a one second timeout + return max(1, value) + elif key == "endpoint": + if not connection.is_valid_ipv4_address(value): + raise ValueError("'%s' isn't a valid IPv4 address" % value) + elif key == "port": + if not connection.is_valid_port(value): + raise ValueError("'%s' isn't a valid port" % value) + elif key == "retries": + # negative retries really don't make sense + return max(0, value) + + CONFIG = conf.config_dict("ssh_login", { + "username": getpass.getuser(), + "password": "", + "timeout": 10, + "endpoint": "263.12.8.0", + "port": 22, + "reconnect": False, + "retries": 3, + }, config_validator) + +There's several things going on here so lets take it step by step... + +* The :func:`~stem.util.conf.config_dict` provides a dictionary that's bound + to a given configuration. If the "ssh_proxy_config" configuration changes + then so will the contents of CONFIG. + +* The dictionary we're passing to :func:`~stem.util.conf.config_dict` provides + two important pieces of information: default values and their types. See the + Config's :func:`~stem.util.conf.Config.get` method for how these type + inferences work. + +* The config_validator is a hook we're adding to make sure CONFIG only gets + values we think are valid. In this case it ensures that our timeout value + is at least one second, and rejects endpoints or ports that are invalid. + +Now lets say our user has the following configuration file... + +:: + + username waddle_doo + password jabberwocky + timeout -15 + port 9000000 + retries lots + reconnect true + logging debug + +... and we load it as follows... + +:: + + >>> from from stem.util import conf + >>> our_config = conf.get_config("ssh_login") + >>> our_config.load("/home/atagar/user_config") + >>> print CONFIG + { + "username": "waddle_doo", + "password": "jabberwocky", + "timeout": 1, + "endpoint": "263.12.8.0", + "port": 22, + "reconnect": True, + "retries": 3, + } + +Here's an expanation of what happened... + +* the username, password, and reconnect attributes took the values in the + configuration file + +* the 'config_validator' we added earlier allows for a minimum timeout of one + and rejected the invalid port (with a log message) + +* we weren't able to convert the retries' "lots" value to an integer so it kept + its default value and logged a warning + +* the user didn't supply an endpoint so that remained unchanged + +* our CONFIG didn't have a 'logging' attribute so it was ignored + +**Module Overview:** + +:: + + config_dict - provides a dictionary that's kept in sync with our config + get_config - singleton for getting configurations + parse_enum_csv - helper funcion for parsing confguration entries for enums + + Config - Custom configuration + |- load - reads a configuration file + |- save - writes the current configuration to a file + |- clear - empties our loaded configuration contents + |- add_listener - notifies the given listener when an update occurs + |- clear_listeners - removes any attached listeners + |- keys - provides keys in the loaded configuration + |- set - sets the given key/value pair + |- unused_keys - provides keys that have never been requested + |- get - provides the value for a given key, with type inference + +- get_value - provides the value for a given key as a string +""" + +import threading + +from stem.util import log + +CONFS = {} # mapping of identifier to singleton instances of configs + + +class _SyncListener(object): + def __init__(self, config_dict, interceptor): + self.config_dict = config_dict + self.interceptor = interceptor + + def update(self, config, key): + if key in self.config_dict: + new_value = config.get(key, self.config_dict[key]) + + if new_value == self.config_dict[key]: + return # no change + + if self.interceptor: + interceptor_value = self.interceptor(key, new_value) + + if interceptor_value: + new_value = interceptor_value + + self.config_dict[key] = new_value + + +def config_dict(handle, conf_mappings, handler = None): + """ + Makes a dictionary that stays synchronized with a configuration. + + This takes a dictionary of 'config_key => default_value' mappings and + changes the values to reflect our current configuration. This will leave + the previous values alone if... + + * we don't have a value for that config_key + * we can't convert our value to be the same type as the default_value + + If a handler is provided then this is called just prior to assigning new + values to the config_dict. The handler function is expected to accept the + (key, value) for the new values and return what we should actually insert + into the dictionary. If this returns None then the value is updated as + normal. + + For more information about how we convert types see our + :func:`~stem.util.conf.Config.get` method. + + **The dictionary you get from this is manged by the + :class:`~stem.util.conf.Config` class and should be treated as being + read-only.** + + :param str handle: unique identifier for a config instance + :param dict conf_mappings: config key/value mappings used as our defaults + :param functor handler: function referred to prior to assigning values + """ + + selected_config = get_config(handle) + selected_config.add_listener(_SyncListener(conf_mappings, handler).update) + return conf_mappings + + +def get_config(handle): + """ + Singleton constructor for configuration file instances. If a configuration + already exists for the handle then it's returned. Otherwise a fresh instance + is constructed. + + :param str handle: unique identifier used to access this config instance + """ + + if not handle in CONFS: + CONFS[handle] = Config() + + return CONFS[handle] + + +def parse_enum(key, value, enumeration): + """ + Provides the enumeration value for a given key. This is a case insensitive + lookup and raises an exception if the enum key doesn't exist. + + :param str key: configuration key being looked up + :param str value: value to be parsed + :param stem.util.enum.Enum enumeration: enumeration the values should be in + + :returns: enumeration value + + :raises: **ValueError** if the **value** isn't among the enumeration keys + """ + + return parse_enum_csv(key, value, enumeration, 1)[0] + + +def parse_enum_csv(key, value, enumeration, count = None): + """ + Parses a given value as being a comma separated listing of enumeration keys, + returning the corresponding enumeration values. This is intended to be a + helper for config handlers. The checks this does are case insensitive. + + The **count** attribute can be used to make assertions based on the number of + values. This can be... + + * None to indicate that there's no restrictions. + * An int to indicate that we should have this many values. + * An (int, int) tuple to indicate the range that values can be in. This range + is inclusive and either can be None to indicate the lack of a lower or + upper bound. + + :param str key: configuration key being looked up + :param str value: value to be parsed + :param stem.util.enum.Enum enumeration: enumeration the values should be in + :param int,tuple count: validates that we have this many items + + :returns: list with the enumeration values + + :raises: **ValueError** if the count assertion fails or the **value** entries + don't match the enumeration keys + """ + + values = [val.upper().strip() for val in value.split(',')] + + if values == ['']: + return [] + + if count is None: + pass # no count validateion checks to do + elif isinstance(count, int): + if len(values) != count: + raise ValueError("Config entry '%s' is expected to be %i comma separated values, got '%s'" % (key, count, value)) + elif isinstance(count, tuple) and len(count) == 2: + minimum, maximum = count + + if minimum is not None and len(values) < minimum: + raise ValueError("Config entry '%s' must have at least %i comma separated values, got '%s'" % (key, minimum, value)) + + if maximum is not None and len(values) > maximum: + raise ValueError("Config entry '%s' can have at most %i comma separated values, got '%s'" % (key, maximum, value)) + else: + raise ValueError("The count must be None, an int, or two value tuple. Got '%s' (%s)'" % (count, type(count))) + + result = [] + enum_keys = [k.upper() for k in enumeration.keys()] + enum_values = list(enumeration) + + for val in values: + if val in enum_keys: + result.append(enum_values[enum_keys.index(val)]) + else: + raise ValueError("The '%s' entry of config entry '%s' wasn't in the enumeration (expected %s)" % (val, key, ', '.join(enum_keys))) + + return result + + +class Config(object): + """ + Handler for easily working with custom configurations, providing persistence + to and from files. All operations are thread safe. + + **Example usage:** + + User has a file at '/home/atagar/myConfig' with... + + :: + + destination.ip 1.2.3.4 + destination.port blarg + + startup.run export PATH=$PATH:~/bin + startup.run alias l=ls + + And they have a script with... + + :: + + from stem.util import conf + + # Configuration values we'll use in this file. These are mappings of + # configuration keys to the default values we'll use if the user doesn't + # have something different in their config file (or it doesn't match this + # type). + + ssh_config = conf.config_dict("ssh_login", { + "login.user": "atagar", + "login.password": "pepperjack_is_awesome!", + "destination.ip": "127.0.0.1", + "destination.port": 22, + "startup.run": [], + }) + + # Makes an empty config instance with the handle of 'ssh_login'. This is + # a singleton so other classes can fetch this same configuration from + # this handle. + + user_config = conf.get_config("ssh_login") + + # Loads the user's configuration file, warning if this fails. + + try: + user_config.load("/home/atagar/myConfig") + except IOError as exc: + print "Unable to load the user's config: %s" % exc + + # This replace the contents of ssh_config with the values from the user's + # config file if... + # + # * the key is present in the config file + # * we're able to convert the configuration file's value to the same type + # as what's in the mapping (see the Config.get() method for how these + # type inferences work) + # + # For instance in this case... + # + # * the login values are left alone because they aren't in the user's + # config file + # + # * the 'destination.port' is also left with the value of 22 because we + # can't turn "blarg" into an integer + # + # The other values are replaced, so ssh_config now becomes... + # + # {"login.user": "atagar", + # "login.password": "pepperjack_is_awesome!", + # "destination.ip": "1.2.3.4", + # "destination.port": 22, + # "startup.run": ["export PATH=$PATH:~/bin", "alias l=ls"]} + # + # Information for what values fail to load and why are reported to + # 'stem.util.log'. + """ + + def __init__(self): + self._path = None # location we last loaded from or saved to + self._contents = {} # configuration key/value pairs + self._listeners = [] # functors to be notified of config changes + + # used for accessing _contents + self._contents_lock = threading.RLock() + + # keys that have been requested (used to provide unused config contents) + self._requested_keys = set() + + def load(self, path = None): + """ + Reads in the contents of the given path, adding its configuration values + to our current contents. + + :param str path: file path to be loaded, this uses the last loaded path if + not provided + + :raises: + * **IOError** if we fail to read the file (it doesn't exist, insufficient + permissions, etc) + * **ValueError** if no path was provided and we've never been provided one + """ + + if path: + self._path = path + elif not self._path: + raise ValueError("Unable to load configuration: no path provided") + + with open(self._path, "r") as config_file: + read_contents = config_file.readlines() + + with self._contents_lock: + while read_contents: + line = read_contents.pop(0) + + # strips any commenting or excess whitespace + comment_start = line.find("#") + + if comment_start != -1: + line = line[:comment_start] + + line = line.strip() + + # parse the key/value pair + if line: + try: + key, value = line.split(" ", 1) + value = value.strip() + except ValueError: + log.debug("Config entry '%s' is expected to be of the format 'Key Value', defaulting to '%s' -> ''" % (line, line)) + key, value = line, "" + + if not value: + # this might be a multi-line entry, try processing it as such + multiline_buffer = [] + + while read_contents and read_contents[0].lstrip().startswith("|"): + content = read_contents.pop(0).lstrip()[1:] # removes '\s+|' prefix + content = content.rstrip("\n") # trailing newline + multiline_buffer.append(content) + + if multiline_buffer: + self.set(key, "\n".join(multiline_buffer), False) + continue + + self.set(key, value, False) + + def save(self, path = None): + """ + Saves configuration contents to disk. If a path is provided then it + replaces the configuration location that we track. + + :param str path: location to be saved to + + :raises: **ValueError** if no path was provided and we've never been provided one + """ + + if path: + self._path = path + elif not self._path: + raise ValueError("Unable to save configuration: no path provided") + + with self._contents_lock: + with open(self._path, 'w') as output_file: + for entry_key in sorted(self.keys()): + for entry_value in self.get_value(entry_key, multiple = True): + # check for multi line entries + if "\n" in entry_value: + entry_value = "\n|" + entry_value.replace("\n", "\n|") + + output_file.write('%s %s\n' % (entry_key, entry_value)) + + def clear(self): + """ + Drops the configuration contents and reverts back to a blank, unloaded + state. + """ + + with self._contents_lock: + self._contents.clear() + self._requested_keys = set() + + def add_listener(self, listener, backfill = True): + """ + Registers the function to be notified of configuration updates. Listeners + are expected to be functors which accept (config, key). + + :param functor listener: function to be notified when our configuration is changed + :param bool backfill: calls the function with our current values if **True** + """ + + with self._contents_lock: + self._listeners.append(listener) + + if backfill: + for key in self.keys(): + listener(self, key) + + def clear_listeners(self): + """ + Removes all attached listeners. + """ + + self._listeners = [] + + def keys(self): + """ + Provides all keys in the currently loaded configuration. + + :returns: **list** if strings for the configuration keys we've loaded + """ + + return self._contents.keys() + + def unused_keys(self): + """ + Provides the configuration keys that have never been provided to a caller + via :func:`~stem.util.conf.config_dict` or the + :func:`~stem.util.conf.Config.get` and + :func:`~stem.util.conf.Config.get_value` methods. + + :returns: **set** of configuration keys we've loaded but have never been requested + """ + + return set(self.keys()).difference(self._requested_keys) + + def set(self, key, value, overwrite = True): + """ + Appends the given key/value configuration mapping, behaving the same as if + we'd loaded this from a configuration file. + + :param str key: key for the configuration mapping + :param str,list value: value we're setting the mapping to + :param bool overwrite: replaces the previous value if **True**, otherwise + the values are appended + """ + + with self._contents_lock: + if isinstance(value, str): + if not overwrite and key in self._contents: + self._contents[key].append(value) + else: + self._contents[key] = [value] + + for listener in self._listeners: + listener(self, key) + elif isinstance(value, (list, tuple)): + if not overwrite and key in self._contents: + self._contents[key] += value + else: + self._contents[key] = value + + for listener in self._listeners: + listener(self, key) + else: + raise ValueError("Config.set() only accepts str, list, or tuple. Provided value was a '%s'" % type(value)) + + def get(self, key, default = None): + """ + Fetches the given configuration, using the key and default value to + determine the type it should be. Recognized inferences are: + + * **default is a boolean => boolean** + + * values are case insensitive + * provides the default if the value isn't "true" or "false" + + * **default is an integer => int** + + * provides the default if the value can't be converted to an int + + * **default is a float => float** + + * provides the default if the value can't be converted to a float + + * **default is a list => list** + + * string contents for all configuration values with this key + + * **default is a tuple => tuple** + + * string contents for all configuration values with this key + + * **default is a dictionary => dict** + + * values without "=>" in them are ignored + * values are split into key/value pairs on "=>" with extra whitespace + stripped + + :param str key: config setting to be fetched + :param default object: value provided if no such key exists or fails to be converted + + :returns: given configuration value with its type inferred with the above rules + """ + + is_multivalue = isinstance(default, (list, tuple, dict)) + val = self.get_value(key, default, is_multivalue) + + if val == default: + return val # don't try to infer undefined values + + if isinstance(default, bool): + if val.lower() == "true": + val = True + elif val.lower() == "false": + val = False + else: + log.debug("Config entry '%s' is expected to be a boolean, defaulting to '%s'" % (key, str(default))) + val = default + elif isinstance(default, int): + try: + val = int(val) + except ValueError: + log.debug("Config entry '%s' is expected to be an integer, defaulting to '%i'" % (key, default)) + val = default + elif isinstance(default, float): + try: + val = float(val) + except ValueError: + log.debug("Config entry '%s' is expected to be a float, defaulting to '%f'" % (key, default)) + val = default + elif isinstance(default, list): + pass # nothing special to do (already a list) + elif isinstance(default, tuple): + val = tuple(val) + elif isinstance(default, dict): + valMap = {} + for entry in val: + if "=>" in entry: + entryKey, entryVal = entry.split("=>", 1) + valMap[entryKey.strip()] = entryVal.strip() + else: + log.debug("Ignoring invalid %s config entry (expected a mapping, but \"%s\" was missing \"=>\")" % (key, entry)) + val = valMap + + return val + + def get_value(self, key, default = None, multiple = False): + """ + This provides the current value associated with a given key. + + :param str key: config setting to be fetched + :param object default: value provided if no such key exists + :param bool multiple: provides back a list of all values if **True**, + otherwise this returns the last loaded configuration value + + :returns: **str** or **list** of string configuration values associated + with the given key, providing the default if no such key exists + """ + + with self._contents_lock: + if key in self._contents: + self._requested_keys.add(key) + + if multiple: + return self._contents[key] + else: + return self._contents[key][-1] + else: + message_id = "stem.util.conf.missing_config_key_%s" % key + log.log_once(message_id, log.TRACE, "config entry '%s' not found, defaulting to '%s'" % (key, default)) + return default diff --git a/lib/stem/util/connection.py b/lib/stem/util/connection.py new file mode 100644 index 00000000..0b93130b --- /dev/null +++ b/lib/stem/util/connection.py @@ -0,0 +1,562 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Connection and networking based utility functions. + +:: + + get_connections - quieries the connections belonging to a given process + get_system_resolvers - provides connection resolution methods that are likely to be available + + is_valid_ipv4_address - checks if a string is a valid IPv4 address + is_valid_ipv6_address - checks if a string is a valid IPv6 address + is_valid_port - checks if something is a valid representation for a port + is_private_address - checks if an IPv4 address belongs to a private range or not + + expand_ipv6_address - provides an IPv6 address with its collapsed portions expanded + get_mask_ipv4 - provides the mask representation for a given number of bits + get_mask_ipv6 - provides the IPv6 mask representation for a given number of bits + +.. data:: Resolver (enum) + + Method for resolving a process' connections. + + ================= =========== + Resolver Description + ================= =========== + **PROC** /proc contents + **NETSTAT** netstat command + **SS** ss command + **LSOF** lsof command + **SOCKSTAT** sockstat command under *nix + **BSD_SOCKSTAT** sockstat command under FreeBSD + **BSD_PROCSTAT** procstat command under FreeBSD + ================= =========== +""" + +import collections +import hashlib +import hmac +import os +import platform +import re + +import stem.util.proc +import stem.util.system + +from stem.util import enum, log + +# Connection resolution is risky to log about since it's highly likely to +# contain sensitive information. That said, it's also difficult to get right in +# a platform independent fashion. To opt into the logging requried to +# troubleshoot connection resolution set the following... + +LOG_CONNECTION_RESOLUTION = False + +Resolver = enum.Enum( + ('PROC', 'proc'), + ('NETSTAT', 'netstat'), + ('SS', 'ss'), + ('LSOF', 'lsof'), + ('SOCKSTAT', 'sockstat'), + ('BSD_SOCKSTAT', 'sockstat (bsd)'), + ('BSD_PROCSTAT', 'procstat (bsd)') +) + +Connection = collections.namedtuple('Connection', [ + 'local_address', + 'local_port', + 'remote_address', + 'remote_port', + 'protocol', +]) + +FULL_IPv4_MASK = "255.255.255.255" +FULL_IPv6_MASK = "FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF" + +CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE = os.urandom(32) + +RESOLVER_COMMAND = { + Resolver.PROC: '', + + # -n = prevents dns lookups, -p = include process + Resolver.NETSTAT: 'netstat -np', + + # -n = numeric ports, -p = include process, -t = tcp sockets, -u = udp sockets + Resolver.SS: 'ss -nptu', + + # -n = prevent dns lookups, -P = show port numbers (not names), -i = ip only, -w = no warnings + # (lsof provides a '-p ' but oddly in practice it seems to be ~11-28% slower) + Resolver.LSOF: 'lsof -wnPi', + + Resolver.SOCKSTAT: 'sockstat', + + # -4 = IPv4, -c = connected sockets + Resolver.BSD_SOCKSTAT: 'sockstat -4c', + + # -f = process pid + Resolver.BSD_PROCSTAT: 'procstat -f {pid}', +} + +RESOLVER_FILTER = { + Resolver.PROC: '', + + # tcp 0 586 192.168.0.1:44284 38.229.79.2:443 ESTABLISHED 15843/tor + Resolver.NETSTAT: '^{protocol}\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED\s+{pid}/{name}\s*$', + + # tcp ESTAB 0 0 192.168.0.20:44415 38.229.79.2:443 users:(("tor",15843,9)) + Resolver.SS: '^{protocol}\s+ESTAB\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+users:\(\("{name}",{pid},[0-9]+\)\)$', + + # tor 3873 atagar 45u IPv4 40994 0t0 TCP 10.243.55.20:45724->194.154.227.109:9001 (ESTABLISHED) + Resolver.LSOF: '^{name}\s+{pid}\s+.*\s+{protocol}\s+{local_address}:{local_port}->{remote_address}:{remote_port} \(ESTABLISHED\)$', + + # atagar tor 15843 tcp4 192.168.0.20:44092 68.169.35.102:443 ESTABLISHED + Resolver.SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED$', + + # _tor tor 4397 12 tcp4 172.27.72.202:54011 127.0.0.1:9001 + Resolver.BSD_SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+\S+\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$', + + # 3561 tor 4 s - rw---n-- 2 0 TCP 10.0.0.2:9050 10.0.0.1:22370 + Resolver.BSD_PROCSTAT: '^\s*{pid}\s+{name}\s+.*\s+{protocol}\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$', +} + + +def get_connections(resolver, process_pid = None, process_name = None): + """ + Retrieves a list of the current connections for a given process. The provides + a list of Connection instances, which have four attributes... + + * local_address (str) + * local_port (int) + * remote_address (str) + * remote_port (int) + * protocol (str, generally either 'tcp' or 'udp') + + :param Resolver resolver: method of connection resolution to use + :param int process_pid: pid of the process to retrieve + :param str process_name: name of the process to retrieve + + :raises: + * **ValueError** if using **Resolver.PROC** or **Resolver.BSD_PROCSTAT** + and the process_pid wasn't provided + + * **IOError** if no connections are available or resolution fails + (generally they're indistinguishable). The common causes are the + command being unavailable or permissions. + """ + + def _log(msg): + if LOG_CONNECTION_RESOLUTION: + log.debug(msg) + + _log("=" * 80) + _log("Querying connections for resolver: %s, pid: %s, name: %s" % (resolver, process_pid, process_name)) + + if isinstance(process_pid, str): + try: + process_pid = int(process_pid) + except ValueError: + raise ValueError("Process pid was non-numeric: %s" % process_pid) + + if process_pid is None and resolver in (Resolver.PROC, Resolver.BSD_PROCSTAT): + raise ValueError("%s resolution requires a pid" % resolver) + + if resolver == Resolver.PROC: + return [Connection(*conn) for conn in stem.util.proc.get_connections(process_pid)] + + resolver_command = RESOLVER_COMMAND[resolver].format(pid = process_pid) + + try: + results = stem.util.system.call(resolver_command) + except OSError as exc: + raise IOError("Unable to query '%s': %s" % (resolver_command, exc)) + + resolver_regex_str = RESOLVER_FILTER[resolver].format( + protocol = '(?P\S+)', + local_address = '(?P[0-9.]+)', + local_port = '(?P[0-9]+)', + remote_address = '(?P[0-9.]+)', + remote_port = '(?P[0-9]+)', + pid = process_pid if process_pid else '[0-9]*', + name = process_name if process_name else '\S*', + ) + + _log("Resolver regex: %s" % resolver_regex_str) + _log("Resolver results:\n%s" % '\n'.join(results)) + + connections = [] + resolver_regex = re.compile(resolver_regex_str) + + for line in results: + match = resolver_regex.match(line) + + if match: + attr = match.groupdict() + local_addr = attr['local_address'] + local_port = int(attr['local_port']) + remote_addr = attr['remote_address'] + remote_port = int(attr['remote_port']) + protocol = attr['protocol'].lower() + + if remote_addr == '0.0.0.0': + continue # procstat response for unestablished connections + + if not (is_valid_ipv4_address(local_addr) and is_valid_ipv4_address(remote_addr)): + _log("Invalid address (%s or %s): %s" % (local_addr, remote_addr, line)) + elif not (is_valid_port(local_port) and is_valid_port(remote_port)): + _log("Invalid port (%s or %s): %s" % (local_port, remote_port, line)) + elif protocol not in ('tcp', 'udp'): + _log("Unrecognized protocol (%s): %s" % (protocol, line)) + + conn = Connection(local_addr, local_port, remote_addr, remote_port, protocol) + connections.append(conn) + _log(str(conn)) + + _log("%i connections found" % len(connections)) + + if not connections: + raise IOError("No results found using: %s" % resolver_command) + + return connections + + +def get_system_resolvers(system = None): + """ + Provides the types of connection resolvers likely to be available on this platform. + + :param str system: system to get resolvers for, this is determined by + platform.system() if not provided + + :returns: **list** of Resolvers likely to be available on this platform + """ + + if system is None: + system = platform.system() + + if system == 'Windows': + resolvers = [] + elif system in ('Darwin', 'OpenBSD'): + resolvers = [Resolver.LSOF] + elif system == 'FreeBSD': + # Netstat is available, but lacks a '-p' equivilant so we can't associate + # the results to processes. The platform also has a ss command, but it + # belongs to a spreadsheet application. + + resolvers = [Resolver.BSD_SOCKSTAT, Resolver.BSD_PROCSTAT, Resolver.LSOF] + else: + # Sockstat isn't available by default on ubuntu. + + resolvers = [Resolver.NETSTAT, Resolver.SOCKSTAT, Resolver.LSOF, Resolver.SS] + + # remove any that aren't in the user's PATH + + resolvers = filter(lambda r: stem.util.system.is_available(RESOLVER_COMMAND[r]), resolvers) + + # proc resolution, by far, outperforms the others so defaults to this is able + + if stem.util.proc.is_available(): + resolvers = [Resolver.PROC] + resolvers + + return resolvers + + +def is_valid_ipv4_address(address): + """ + Checks if a string is a valid IPv4 address. + + :param str address: string to be checked + + :returns: **True** if input is a valid IPv4 address, **False** otherwise + """ + + if not isinstance(address, (bytes, unicode)): + return False + + # checks if theres four period separated values + + if address.count(".") != 3: + return False + + # checks that each value in the octet are decimal values between 0-255 + for entry in address.split("."): + if not entry.isdigit() or int(entry) < 0 or int(entry) > 255: + return False + elif entry[0] == "0" and len(entry) > 1: + return False # leading zeros, for instance in "1.2.3.001" + + return True + + +def is_valid_ipv6_address(address, allow_brackets = False): + """ + Checks if a string is a valid IPv6 address. + + :param str address: string to be checked + :param bool allow_brackets: ignore brackets which form '[address]' + + :returns: **True** if input is a valid IPv6 address, **False** otherwise + """ + + if allow_brackets: + if address.startswith("[") and address.endswith("]"): + address = address[1:-1] + + # addresses are made up of eight colon separated groups of four hex digits + # with leading zeros being optional + # https://en.wikipedia.org/wiki/IPv6#Address_format + + colon_count = address.count(":") + + if colon_count > 7: + return False # too many groups + elif colon_count != 7 and not "::" in address: + return False # not enough groups and none are collapsed + elif address.count("::") > 1 or ":::" in address: + return False # multiple groupings of zeros can't be collapsed + + for entry in address.split(":"): + if not re.match("^[0-9a-fA-f]{0,4}$", entry): + return False + + return True + + +def is_valid_port(entry, allow_zero = False): + """ + Checks if a string or int is a valid port number. + + :param list,str,int entry: string, integer or list to be checked + :param bool allow_zero: accept port number of zero (reserved by definition) + + :returns: **True** if input is an integer and within the valid port range, **False** otherwise + """ + + if isinstance(entry, list): + for port in entry: + if not is_valid_port(port, allow_zero): + return False + + return True + elif isinstance(entry, (bytes, unicode)): + if not entry.isdigit(): + return False + elif entry[0] == "0" and len(entry) > 1: + return False # leading zeros, ex "001" + + entry = int(entry) + + if allow_zero and entry == 0: + return True + + return entry > 0 and entry < 65536 + + +def is_private_address(address): + """ + Checks if the IPv4 address is in a range belonging to the local network or + loopback. These include: + + * Private ranges: 10.*, 172.16.* - 172.31.*, 192.168.* + * Loopback: 127.* + + :param str address: string to be checked + + :returns: **True** if input is in a private range, **False** otherwise + + :raises: **ValueError** if the address isn't a valid IPv4 address + """ + + if not is_valid_ipv4_address(address): + raise ValueError("'%s' isn't a valid IPv4 address" % address) + + # checks for any of the simple wildcard ranges + + if address.startswith("10.") or address.startswith("192.168.") or address.startswith("127."): + return True + + # checks for the 172.16.* - 172.31.* range + + if address.startswith("172."): + second_octet = int(address.split('.')[1]) + + if second_octet >= 16 and second_octet <= 31: + return True + + return False + + +def expand_ipv6_address(address): + """ + Expands abbreviated IPv6 addresses to their full colon separated hex format. + For instance... + + :: + + >>> expand_ipv6_address("2001:db8::ff00:42:8329") + "2001:0db8:0000:0000:0000:ff00:0042:8329" + + >>> expand_ipv6_address("::") + "0000:0000:0000:0000:0000:0000:0000:0000" + + :param str address: IPv6 address to be expanded + + :raises: **ValueError** if the address can't be expanded due to being malformed + """ + + if not is_valid_ipv6_address(address): + raise ValueError("'%s' isn't a valid IPv6 address" % address) + + # expands collapsed groupings, there can only be a single '::' in a valid + # address + if "::" in address: + missing_groups = 7 - address.count(":") + address = address.replace("::", "::" + ":" * missing_groups) + + # inserts missing zeros + for index in xrange(8): + start = index * 5 + end = address.index(":", start) if index != 7 else len(address) + missing_zeros = 4 - (end - start) + + if missing_zeros > 0: + address = address[:start] + "0" * missing_zeros + address[start:] + + return address + + +def get_mask_ipv4(bits): + """ + Provides the IPv4 mask for a given number of bits, in the dotted-quad format. + + :param int bits: number of bits to be converted + + :returns: **str** with the subnet mask representation for this many bits + + :raises: **ValueError** if given a number of bits outside the range of 0-32 + """ + + if bits > 32 or bits < 0: + raise ValueError("A mask can only be 0-32 bits, got %i" % bits) + elif bits == 32: + return FULL_IPv4_MASK + + # get the binary representation of the mask + mask_bin = _get_binary(2 ** bits - 1, 32)[::-1] + + # breaks it into eight character groupings + octets = [mask_bin[8 * i:8 * (i + 1)] for i in xrange(4)] + + # converts each octet into its integer value + return ".".join([str(int(octet, 2)) for octet in octets]) + + +def get_mask_ipv6(bits): + """ + Provides the IPv6 mask for a given number of bits, in the hex colon-delimited + format. + + :param int bits: number of bits to be converted + + :returns: **str** with the subnet mask representation for this many bits + + :raises: **ValueError** if given a number of bits outside the range of 0-128 + """ + + if bits > 128 or bits < 0: + raise ValueError("A mask can only be 0-128 bits, got %i" % bits) + elif bits == 128: + return FULL_IPv6_MASK + + # get the binary representation of the mask + mask_bin = _get_binary(2 ** bits - 1, 128)[::-1] + + # breaks it into sixteen character groupings + groupings = [mask_bin[16 * i:16 * (i + 1)] for i in xrange(8)] + + # converts each group into its hex value + return ":".join(["%04x" % int(group, 2) for group in groupings]).upper() + + +def _get_masked_bits(mask): + """ + Provides the number of bits that an IPv4 subnet mask represents. Note that + not all masks can be represented by a bit count. + + :param str mask: mask to be converted + + :returns: **int** with the number of bits represented by the mask + + :raises: **ValueError** if the mask is invalid or can't be converted + """ + + if not is_valid_ipv4_address(mask): + raise ValueError("'%s' is an invalid subnet mask" % mask) + + # converts octets to binary representation + mask_bin = _get_address_binary(mask) + mask_match = re.match("^(1*)(0*)$", mask_bin) + + if mask_match: + return 32 - len(mask_match.groups()[1]) + else: + raise ValueError("Unable to convert mask to a bit count: %s" % mask) + + +def _get_binary(value, bits): + """ + Provides the given value as a binary string, padded with zeros to the given + number of bits. + + :param int value: value to be converted + :param int bits: number of bits to pad to + """ + + # http://www.daniweb.com/code/snippet216539.html + return "".join([str((value >> y) & 1) for y in range(bits - 1, -1, -1)]) + + +def _get_address_binary(address): + """ + Provides the binary value for an IPv4 or IPv6 address. + + :returns: **str** with the binary representation of this address + + :raises: **ValueError** if address is neither an IPv4 nor IPv6 address + """ + + if is_valid_ipv4_address(address): + return "".join([_get_binary(int(octet), 8) for octet in address.split(".")]) + elif is_valid_ipv6_address(address): + address = expand_ipv6_address(address) + return "".join([_get_binary(int(grouping, 16), 16) for grouping in address.split(":")]) + else: + raise ValueError("'%s' is neither an IPv4 or IPv6 address" % address) + + +def _hmac_sha256(key, msg): + """ + Generates a sha256 digest using the given key and message. + + :param str key: starting key for the hash + :param str msg: message to be hashed + + :returns: sha256 digest of msg as bytes, hashed using the given key + """ + + return hmac.new(key, msg, hashlib.sha256).digest() + + +def _cryptovariables_equal(x, y): + """ + Compares two strings for equality securely. + + :param str x: string to be compared. + :param str y: the other string to be compared. + + :returns: **True** if both strings are equal, **False** otherwise. + """ + + return ( + _hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, x) == + _hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, y)) diff --git a/lib/stem/util/enum.py b/lib/stem/util/enum.py new file mode 100644 index 00000000..ee5a9c11 --- /dev/null +++ b/lib/stem/util/enum.py @@ -0,0 +1,170 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Basic enumeration, providing ordered types for collections. These can be +constructed as simple type listings... + +:: + + >>> from stem.util import enum + >>> insects = enum.Enum("ANT", "WASP", "LADYBUG", "FIREFLY") + >>> insects.ANT + 'Ant' + >>> tuple(insects) + ('Ant', 'Wasp', 'Ladybug', 'Firefly') + +... or with overwritten string counterparts... + +:: + + >>> from stem.util import enum + >>> pets = enum.Enum(("DOG", "Skippy"), "CAT", ("FISH", "Nemo")) + >>> pets.DOG + 'Skippy' + >>> pets.CAT + 'Cat' + +**Module Overview:** + +:: + + UppercaseEnum - Provides an enum instance with capitalized values + + Enum - Provides a basic, ordered enumeration + |- keys - string representation of our enum keys + |- index_of - index of an enum value + |- next - provides the enum after a given enum value + |- previous - provides the enum before a given value + |- __getitem__ - provides the value for an enum key + +- __iter__ - iterator over our enum keys +""" + +import stem.util.str_tools + + +def UppercaseEnum(*args): + """ + Provides an :class:`~stem.util.enum.Enum` instance where the values are + identical to the keys. Since the keys are uppercase by convention this means + the values are too. For instance... + + :: + + >>> from stem.util import enum + >>> runlevels = enum.UppercaseEnum("DEBUG", "INFO", "NOTICE", "WARN", "ERROR") + >>> runlevels.DEBUG + 'DEBUG' + + :param list args: enum keys to initialize with + + :returns: :class:`~stem.util.enum.Enum` instance with the given keys + """ + + return Enum(*[(v, v) for v in args]) + + +class Enum(object): + """ + Basic enumeration. + """ + + def __init__(self, *args): + # ordered listings of our keys and values + keys, values = [], [] + + for entry in args: + if isinstance(entry, (bytes, unicode)): + key, val = entry, stem.util.str_tools._to_camel_case(entry) + elif isinstance(entry, tuple) and len(entry) == 2: + key, val = entry + else: + raise ValueError("Unrecognized input: %s" % args) + + keys.append(key) + values.append(val) + setattr(self, key, val) + + self._keys = tuple(keys) + self._values = tuple(values) + + def keys(self): + """ + Provides an ordered listing of the enumeration keys in this set. + + :returns: **list** with our enum keys + """ + + return list(self._keys) + + def index_of(self, value): + """ + Provides the index of the given value in the collection. + + :param str value: entry to be looked up + + :returns: **int** index of the given entry + + :raises: **ValueError** if no such element exists + """ + + return self._values.index(value) + + def next(self, value): + """ + Provides the next enumeration after the given value. + + :param str value: enumeration for which to get the next entry + + :returns: enum value following the given entry + + :raises: **ValueError** if no such element exists + """ + + if not value in self._values: + raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values))) + + next_index = (self._values.index(value) + 1) % len(self._values) + return self._values[next_index] + + def previous(self, value): + """ + Provides the previous enumeration before the given value. + + :param str value: enumeration for which to get the previous entry + + :returns: enum value proceeding the given entry + + :raises: **ValueError** if no such element exists + """ + + if not value in self._values: + raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values))) + + prev_index = (self._values.index(value) - 1) % len(self._values) + return self._values[prev_index] + + def __getitem__(self, item): + """ + Provides the values for the given key. + + :param str item: key to be looked up + + :returns: **str** with the value for the given key + + :raises: **ValueError** if the key doesn't exist + """ + + if item in vars(self): + return getattr(self, item) + else: + keys = ", ".join(self.keys()) + raise ValueError("'%s' isn't among our enumeration keys, which includes: %s" % (item, keys)) + + def __iter__(self): + """ + Provides an ordered listing of the enums in this set. + """ + + for entry in self._values: + yield entry diff --git a/lib/stem/util/log.py b/lib/stem/util/log.py new file mode 100644 index 00000000..b87e741a --- /dev/null +++ b/lib/stem/util/log.py @@ -0,0 +1,247 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Functions to aid library logging. The default logging +:data:`~stem.util.log.Runlevel` is usually NOTICE and above. + +**Stem users are more than welcome to listen for stem events, but these +functions are not being vended to our users. They may change in the future, use +them at your own risk.** + +**Module Overview:** + +:: + + get_logger - provides the stem's Logger instance + logging_level - converts a runlevel to its logging number + escape - escapes special characters in a message in preparation for logging + + log - logs a message at the given runlevel + log_once - logs a message, deduplicating if it has already been logged + trace - logs a message at the TRACE runlevel + debug - logs a message at the DEBUG runlevel + info - logs a message at the INFO runlevel + notice - logs a message at the NOTICE runlevel + warn - logs a message at the WARN runlevel + error - logs a message at the ERROR runlevel + + LogBuffer - Buffers logged events so they can be iterated over. + |- is_empty - checks if there's events in our buffer + +- __iter__ - iterates over and removes the buffered events + + log_to_stdout - reports further logged events to stdout + +.. data:: Runlevel (enum) + + Enumeration for logging runlevels. + + ========== =========== + Runlevel Description + ========== =========== + **ERROR** critical issue occurred, the user needs to be notified + **WARN** non-critical issue occurred that the user should be aware of + **NOTICE** information that is helpful to the user + **INFO** high level library activity + **DEBUG** low level library activity + **TRACE** request/reply logging + ========== =========== +""" + +import logging + +import stem.prereq +import stem.util.enum +import stem.util.str_tools + +# Logging runlevels. These are *very* commonly used so including shorter +# aliases (so they can be referenced as log.DEBUG, log.WARN, etc). + +Runlevel = stem.util.enum.UppercaseEnum("TRACE", "DEBUG", "INFO", "NOTICE", "WARN", "ERROR") +TRACE, DEBUG, INFO, NOTICE, WARN, ERR = list(Runlevel) + +# mapping of runlevels to the logger module's values, TRACE and DEBUG aren't +# built into the module + +LOG_VALUES = { + Runlevel.TRACE: logging.DEBUG - 5, + Runlevel.DEBUG: logging.DEBUG, + Runlevel.INFO: logging.INFO, + Runlevel.NOTICE: logging.INFO + 5, + Runlevel.WARN: logging.WARN, + Runlevel.ERROR: logging.ERROR, +} + +logging.addLevelName(LOG_VALUES[TRACE], "TRACE") +logging.addLevelName(LOG_VALUES[NOTICE], "NOTICE") + +LOGGER = logging.getLogger("stem") +LOGGER.setLevel(LOG_VALUES[TRACE]) + +# There's some messages that we don't want to log more than once. This set has +# the messages IDs that we've logged which fall into this category. +DEDUPLICATION_MESSAGE_IDS = set() + +# Adds a default nullhandler for the stem logger, suppressing the 'No handlers +# could be found for logger "stem"' warning as per... +# http://docs.python.org/release/3.1.3/library/logging.html#configuring-logging-for-a-library + + +class _NullHandler(logging.Handler): + def emit(self, record): + pass + +if not LOGGER.handlers: + LOGGER.addHandler(_NullHandler()) + + +def get_logger(): + """ + Provides the stem logger. + + :return: **logging.Logger** for stem + """ + + return LOGGER + + +def logging_level(runlevel): + """ + Translates a runlevel into the value expected by the logging module. + + :param stem.util.log.Runlevel runlevel: runlevel to be returned, no logging if **None** + """ + + if runlevel: + return LOG_VALUES[runlevel] + else: + return logging.FATAL + 5 + + +def escape(message): + """ + Escapes specific sequences for logging (newlines, tabs, carriage returns). If + the input is **bytes** then this converts it to **unicode** under python 3.x. + + :param str message: string to be escaped + + :returns: str that is escaped + """ + + if stem.prereq.is_python_3(): + message = stem.util.str_tools._to_unicode(message) + + for pattern, replacement in (("\n", "\\n"), ("\r", "\\r"), ("\t", "\\t")): + message = message.replace(pattern, replacement) + + return message + + +def log(runlevel, message): + """ + Logs a message at the given runlevel. + + :param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None** + :param str message: message to be logged + """ + + if runlevel: + LOGGER.log(LOG_VALUES[runlevel], message) + + +def log_once(message_id, runlevel, message): + """ + Logs a message at the given runlevel. If a message with this ID has already + been logged then this is a no-op. + + :param str message_id: unique message identifier to deduplicate on + :param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None** + :param str message: message to be logged + + :returns: **True** if we log the message, **False** otherwise + """ + + if not runlevel or message_id in DEDUPLICATION_MESSAGE_IDS: + return False + else: + DEDUPLICATION_MESSAGE_IDS.add(message_id) + log(runlevel, message) + +# shorter aliases for logging at a runlevel + + +def trace(message): + log(Runlevel.TRACE, message) + + +def debug(message): + log(Runlevel.DEBUG, message) + + +def info(message): + log(Runlevel.INFO, message) + + +def notice(message): + log(Runlevel.NOTICE, message) + + +def warn(message): + log(Runlevel.WARN, message) + + +def error(message): + log(Runlevel.ERROR, message) + + +class LogBuffer(logging.Handler): + """ + Basic log handler that listens for stem events and stores them so they can be + read later. Log entries are cleared as they are read. + """ + + def __init__(self, runlevel): + # TODO: At least in python 2.6 logging.Handler has a bug in that it doesn't + # extend object, causing our super() call to fail. When we drop python 2.6 + # support we should switch back to using super() instead. + #super(LogBuffer, self).__init__(level = logging_level(runlevel)) + + logging.Handler.__init__(self, level = logging_level(runlevel)) + + self.formatter = logging.Formatter( + fmt = '%(asctime)s [%(levelname)s] %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S') + + self._buffer = [] + + def is_empty(self): + return not bool(self._buffer) + + def __iter__(self): + while self._buffer: + yield self.formatter.format(self._buffer.pop(0)) + + def emit(self, record): + self._buffer.append(record) + + +class _StdoutLogger(logging.Handler): + def __init__(self, runlevel): + logging.Handler.__init__(self, level = logging_level(runlevel)) + + self.formatter = logging.Formatter( + fmt = '%(asctime)s [%(levelname)s] %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S') + + def emit(self, record): + print self.formatter.format(record) + + +def log_to_stdout(runlevel): + """ + Logs further events to stdout. + + :param stem.util.log.Runlevel runlevel: minimum runlevel a message needs to be to be logged + """ + + get_logger().addHandler(_StdoutLogger(runlevel)) diff --git a/lib/stem/util/lru_cache.py b/lib/stem/util/lru_cache.py new file mode 100644 index 00000000..f1e427c6 --- /dev/null +++ b/lib/stem/util/lru_cache.py @@ -0,0 +1,182 @@ +# Drop in replace for python 3.2's collections.lru_cache, from... +# http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/ +# +# ... which is under the MIT license. Stem users should *not* rely upon this +# module. It will be removed when we drop support for python 3.2 and below. + +""" +Memoization decorator that caches a function's return value. If later called +with the same arguments then the cached value is returned rather than +reevaluated. + +This is a a python 2.x port of `functools.lru_cache +`_. If +using python 3.2 or later you should use that instead. +""" + +from collections import namedtuple +from functools import update_wrapper +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + + +class _HashedSeq(list): + __slots__ = 'hashvalue' + + def __init__(self, tup, hash=hash): + self[:] = tup + self.hashvalue = hash(tup) + + def __hash__(self): + return self.hashvalue + + +def _make_key(args, kwds, typed, + kwd_mark = (object(),), + fasttypes = set([int, str, frozenset, type(None)]), + sorted=sorted, tuple=tuple, type=type, len=len): + 'Make a cache key from optionally typed positional and keyword arguments' + key = args + if kwds: + sorted_items = sorted(kwds.items()) + key += kwd_mark + for item in sorted_items: + key += item + if typed: + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for k, v in sorted_items) + elif len(key) == 1 and type(key[0]) in fasttypes: + return key[0] + return _HashedSeq(key) + + +def lru_cache(maxsize=100, typed=False): + """Least-recently-used cache decorator. + + If *maxsize* is set to None, the LRU features are disabled and the cache + can grow without bound. + + If *typed* is True, arguments of different types will be cached separately. + For example, f(3.0) and f(3) will be treated as distinct calls with + distinct results. + + Arguments to the cached function must be hashable. + + View the cache statistics named tuple (hits, misses, maxsize, currsize) with + f.cache_info(). Clear the cache and statistics with f.cache_clear(). + Access the underlying function with f.__wrapped__. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + + # Users should only access the lru_cache through its public API: + # cache_info, cache_clear, and f.__wrapped__ + # The internals of the lru_cache are encapsulated for thread safety and + # to allow the implementation to change (including a possible C version). + + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + make_key = _make_key + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # because linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + if maxsize == 0: + + def wrapper(*args, **kwds): + # no caching, just do a statistics update after a successful call + result = user_function(*args, **kwds) + stats[MISSES] += 1 + return result + + elif maxsize is None: + + def wrapper(*args, **kwds): + # simple caching without ordering or size limit + key = make_key(args, kwds, typed) + result = cache_get(key, root) # root used here as a unique not-found sentinel + if result is not root: + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + cache[key] = result + stats[MISSES] += 1 + return result + + else: + + def wrapper(*args, **kwds): + # size limited caching that tracks accesses by recency + key = make_key(args, kwds, typed) if kwds or typed else args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function diff --git a/lib/stem/util/ordereddict.py b/lib/stem/util/ordereddict.py new file mode 100644 index 00000000..07c7d4ea --- /dev/null +++ b/lib/stem/util/ordereddict.py @@ -0,0 +1,133 @@ +# Drop in replacement for python 2.7's OrderedDict, from... +# http://pypi.python.org/pypi/ordereddict +# +# Stem users should *not* rely upon this module. It will be removed when we +# drop support for python 2.6 and below. + +# Copyright (c) 2009 Raymond Hettinger +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from UserDict import DictMixin + + +class OrderedDict(dict, DictMixin): + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = reversed(self).next() + else: + key = iter(self).next() + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(self.items(), other.items()): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/lib/stem/util/proc.py b/lib/stem/util/proc.py new file mode 100644 index 00000000..95e75de8 --- /dev/null +++ b/lib/stem/util/proc.py @@ -0,0 +1,498 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Helper functions for querying process and system information from the /proc +contents. Fetching information this way provides huge performance benefits +over lookups via system utilities (ps, netstat, etc). For instance, resolving +connections this way cuts the runtime by around 90% verses the alternatives. +These functions may not work on all platforms (only Linux?). + +The method for reading these files (and a little code) are borrowed from +`psutil `_, which was written by Jay Loden, +Dave Daeschler, Giampaolo Rodola' and is under the BSD license. + +**These functions are not being vended to stem users. They may change in the +future, use them at your own risk.** + +**Module Overview:** + +:: + + is_available - checks if proc utilities can be used on this system + get_system_start_time - unix timestamp for when the system started + get_physical_memory - memory available on this system + get_cwd - provides the current working directory for a process + get_uid - provides the user id a process is running under + get_memory_usage - provides the memory usage of a process + get_stats - queries statistics about a process + get_connections - provides the connections made by a process + +.. data:: Stat (enum) + + Types of data available via the :func:`~stem.util.proc.get_stats` function. + + ============== =========== + Stat Description + ============== =========== + **COMMAND** command name under which the process is running + **CPU_UTIME** total user time spent on the process + **CPU_STIME** total system time spent on the process + **START_TIME** when this process began, in unix time + ============== =========== +""" + +import base64 +import os +import platform +import socket +import sys +import time + +import stem.util.enum + +from stem.util import log + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +# os.sysconf is only defined on unix +try: + CLOCK_TICKS = os.sysconf(os.sysconf_names["SC_CLK_TCK"]) +except AttributeError: + CLOCK_TICKS = None + +Stat = stem.util.enum.Enum( + ("COMMAND", "command"), ("CPU_UTIME", "utime"), + ("CPU_STIME", "stime"), ("START_TIME", "start time") +) + + +@lru_cache() +def is_available(): + """ + Checks if proc information is available on this platform. + + :returns: **True** if proc contents exist on this platform, **False** otherwise + """ + + if platform.system() != "Linux": + return False + else: + # list of process independent proc paths we use + proc_paths = ("/proc/stat", "/proc/meminfo", "/proc/net/tcp", "/proc/net/udp") + + for path in proc_paths: + if not os.path.exists(path): + return False + + return True + + +@lru_cache() +def get_system_start_time(): + """ + Provides the unix time (seconds since epoch) when the system started. + + :returns: **float** for the unix time of when the system started + + :raises: **IOError** if it can't be determined + """ + + start_time, parameter = time.time(), "system start time" + btime_line = _get_line("/proc/stat", "btime", parameter) + + try: + result = float(btime_line.strip().split()[1]) + _log_runtime(parameter, "/proc/stat[btime]", start_time) + return result + except: + exc = IOError("unable to parse the /proc/stat btime entry: %s" % btime_line) + _log_failure(parameter, exc) + raise exc + + +@lru_cache() +def get_physical_memory(): + """ + Provides the total physical memory on the system in bytes. + + :returns: **int** for the bytes of physical memory this system has + + :raises: **IOError** if it can't be determined + """ + + start_time, parameter = time.time(), "system physical memory" + mem_total_line = _get_line("/proc/meminfo", "MemTotal:", parameter) + + try: + result = int(mem_total_line.split()[1]) * 1024 + _log_runtime(parameter, "/proc/meminfo[MemTotal]", start_time) + return result + except: + exc = IOError("unable to parse the /proc/meminfo MemTotal entry: %s" % mem_total_line) + _log_failure(parameter, exc) + raise exc + + +def get_cwd(pid): + """ + Provides the current working directory for the given process. + + :param int pid: process id of the process to be queried + + :returns: **str** with the path of the working directory for the process + + :raises: **IOError** if it can't be determined + """ + + start_time, parameter = time.time(), "cwd" + proc_cwd_link = "/proc/%s/cwd" % pid + + if pid == 0: + cwd = "" + else: + try: + cwd = os.readlink(proc_cwd_link) + except OSError: + exc = IOError("unable to read %s" % proc_cwd_link) + _log_failure(parameter, exc) + raise exc + + _log_runtime(parameter, proc_cwd_link, start_time) + return cwd + + +def get_uid(pid): + """ + Provides the user ID the given process is running under. + + :param int pid: process id of the process to be queried + + :returns: **int** with the user id for the owner of the process + + :raises: **IOError** if it can't be determined + """ + + start_time, parameter = time.time(), "uid" + status_path = "/proc/%s/status" % pid + uid_line = _get_line(status_path, "Uid:", parameter) + + try: + result = int(uid_line.split()[1]) + _log_runtime(parameter, "%s[Uid]" % status_path, start_time) + return result + except: + exc = IOError("unable to parse the %s Uid entry: %s" % (status_path, uid_line)) + _log_failure(parameter, exc) + raise exc + + +def get_memory_usage(pid): + """ + Provides the memory usage in bytes for the given process. + + :param int pid: process id of the process to be queried + + :returns: **tuple** of two ints with the memory usage of the process, of the + form **(resident_size, virtual_size)** + + :raises: **IOError** if it can't be determined + """ + + # checks if this is the kernel process + + if pid == 0: + return (0, 0) + + start_time, parameter = time.time(), "memory usage" + status_path = "/proc/%s/status" % pid + mem_lines = _get_lines(status_path, ("VmRSS:", "VmSize:"), parameter) + + try: + residentSize = int(mem_lines["VmRSS:"].split()[1]) * 1024 + virtualSize = int(mem_lines["VmSize:"].split()[1]) * 1024 + + _log_runtime(parameter, "%s[VmRSS|VmSize]" % status_path, start_time) + return (residentSize, virtualSize) + except: + exc = IOError("unable to parse the %s VmRSS and VmSize entries: %s" % (status_path, ", ".join(mem_lines))) + _log_failure(parameter, exc) + raise exc + + +def get_stats(pid, *stat_types): + """ + Provides process specific information. See the :data:`~stem.util.proc.Stat` + enum for valid options. + + :param int pid: process id of the process to be queried + :param Stat stat_types: information to be provided back + + :returns: **tuple** with all of the requested statistics as strings + + :raises: **IOError** if it can't be determined + """ + + if CLOCK_TICKS is None: + raise IOError("Unable to look up SC_CLK_TCK") + + start_time, parameter = time.time(), "process %s" % ", ".join(stat_types) + + # the stat file contains a single line, of the form... + # 8438 (tor) S 8407 8438 8407 34818 8438 4202496... + stat_path = "/proc/%s/stat" % pid + stat_line = _get_line(stat_path, str(pid), parameter) + + # breaks line into component values + stat_comp = [] + cmd_start, cmd_end = stat_line.find("("), stat_line.find(")") + + if cmd_start != -1 and cmd_end != -1: + stat_comp.append(stat_line[:cmd_start]) + stat_comp.append(stat_line[cmd_start + 1:cmd_end]) + stat_comp += stat_line[cmd_end + 1:].split() + + if len(stat_comp) < 44 and _is_float(stat_comp[13], stat_comp[14], stat_comp[21]): + exc = IOError("stat file had an unexpected format: %s" % stat_path) + _log_failure(parameter, exc) + raise exc + + results = [] + for stat_type in stat_types: + if stat_type == Stat.COMMAND: + if pid == 0: + results.append("sched") + else: + results.append(stat_comp[1]) + elif stat_type == Stat.CPU_UTIME: + if pid == 0: + results.append("0") + else: + results.append(str(float(stat_comp[13]) / CLOCK_TICKS)) + elif stat_type == Stat.CPU_STIME: + if pid == 0: + results.append("0") + else: + results.append(str(float(stat_comp[14]) / CLOCK_TICKS)) + elif stat_type == Stat.START_TIME: + if pid == 0: + return get_system_start_time() + else: + # According to documentation, starttime is in field 21 and the unit is + # jiffies (clock ticks). We divide it for clock ticks, then add the + # uptime to get the seconds since the epoch. + p_start_time = float(stat_comp[21]) / CLOCK_TICKS + results.append(str(p_start_time + get_system_start_time())) + + _log_runtime(parameter, stat_path, start_time) + return tuple(results) + + +def get_connections(pid): + """ + Queries connection related information from the proc contents. This provides + similar results to netstat, lsof, sockstat, and other connection resolution + utilities (though the lookup is far quicker). + + :param int pid: process id of the process to be queried + + :returns: A listing of connection tuples of the form **[(local_ipAddr1, + local_port1, foreign_ipAddr1, foreign_port1, protocol), ...]** (addresses + and protocols are strings and ports are ints) + + :raises: **IOError** if it can't be determined + """ + + if isinstance(pid, str): + try: + pid = int(pid) + except ValueError: + raise IOError("Process pid was non-numeric: %s" % pid) + + if pid == 0: + return [] + + # fetches the inode numbers for socket file descriptors + + start_time, parameter = time.time(), "process connections" + inodes = [] + + for fd in os.listdir("/proc/%s/fd" % pid): + fd_path = "/proc/%s/fd/%s" % (pid, fd) + + try: + # File descriptor link, such as 'socket:[30899]' + + fd_name = os.readlink(fd_path) + + if fd_name.startswith('socket:['): + inodes.append(fd_name[8:-1]) + except OSError: + # most likely couldn't be read due to permissions + exc = IOError("unable to determine file descriptor destination: %s" % fd_path) + _log_failure(parameter, exc) + raise exc + + if not inodes: + # unable to fetch any connections for this process + return [] + + # check for the connection information from the /proc/net contents + + conn = [] + + for proc_file_path in ("/proc/net/tcp", "/proc/net/udp"): + try: + proc_file = open(proc_file_path) + proc_file.readline() # skip the first line + + for line in proc_file: + _, l_addr, f_addr, status, _, _, _, _, _, inode = line.split()[:10] + + if inode in inodes: + # if a tcp connection, skip if it isn't yet established + if proc_file_path.endswith("/tcp") and status != "01": + continue + + local_ip, local_port = _decode_proc_address_encoding(l_addr) + foreign_ip, foreign_port = _decode_proc_address_encoding(f_addr) + protocol = proc_file_path[10:] + conn.append((local_ip, local_port, foreign_ip, foreign_port, protocol)) + + proc_file.close() + except IOError as exc: + exc = IOError("unable to read '%s': %s" % (proc_file_path, exc)) + _log_failure(parameter, exc) + raise exc + except Exception as exc: + exc = IOError("unable to parse '%s': %s" % (proc_file_path, exc)) + _log_failure(parameter, exc) + raise exc + + _log_runtime(parameter, "/proc/net/[tcp|udp]", start_time) + return conn + + +def _decode_proc_address_encoding(addr): + """ + Translates an address entry in the /proc/net/* contents to a human readable + form (`reference `_, + for instance: + + :: + + "0500000A:0016" -> ("10.0.0.5", 22) + + :param str addr: proc address entry to be decoded + + :returns: **tuple** of the form **(addr, port)**, with addr as a string and port an int + """ + + ip, port = addr.split(':') + + # the port is represented as a two-byte hexadecimal number + port = int(port, 16) + + if sys.version_info >= (3,): + ip = ip.encode('ascii') + + # The IPv4 address portion is a little-endian four-byte hexadecimal number. + # That is, the least significant byte is listed first, so we need to reverse + # the order of the bytes to convert it to an IP address. + # + # This needs to account for the endian ordering as per... + # http://code.google.com/p/psutil/issues/detail?id=201 + # https://trac.torproject.org/projects/tor/ticket/4777 + + if sys.byteorder == 'little': + ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip)[::-1]) + else: + ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip)) + + return (ip, port) + + +def _is_float(*value): + try: + for v in value: + float(v) + + return True + except ValueError: + return False + + +def _get_line(file_path, line_prefix, parameter): + return _get_lines(file_path, (line_prefix, ), parameter)[line_prefix] + + +def _get_lines(file_path, line_prefixes, parameter): + """ + Fetches lines with the given prefixes from a file. This only provides back + the first instance of each prefix. + + :param str file_path: path of the file to read + :param tuple line_prefixes: string prefixes of the lines to return + :param str parameter: description of the proc attribute being fetch + + :returns: mapping of prefixes to the matching line + + :raises: **IOError** if unable to read the file or can't find all of the prefixes + """ + + try: + remaining_prefixes = list(line_prefixes) + proc_file, results = open(file_path), {} + + for line in proc_file: + if not remaining_prefixes: + break # found everything we're looking for + + for prefix in remaining_prefixes: + if line.startswith(prefix): + results[prefix] = line + remaining_prefixes.remove(prefix) + break + + proc_file.close() + + if remaining_prefixes: + if len(remaining_prefixes) == 1: + msg = "%s did not contain a %s entry" % (file_path, remaining_prefixes[0]) + else: + msg = "%s did not contain %s entries" % (file_path, ", ".join(remaining_prefixes)) + + raise IOError(msg) + else: + return results + except IOError as exc: + _log_failure(parameter, exc) + raise exc + + +def _log_runtime(parameter, proc_location, start_time): + """ + Logs a message indicating a successful proc query. + + :param str parameter: description of the proc attribute being fetch + :param str proc_location: proc files we were querying + :param int start_time: unix time for when this query was started + """ + + runtime = time.time() - start_time + log.debug("proc call (%s): %s (runtime: %0.4f)" % (parameter, proc_location, runtime)) + + +def _log_failure(parameter, exc): + """ + Logs a message indicating that the proc query failed. + + :param str parameter: description of the proc attribute being fetch + :param Exception exc: exception that we're raising + """ + + log.debug("proc call failed (%s): %s" % (parameter, exc)) diff --git a/lib/stem/util/str_tools.py b/lib/stem/util/str_tools.py new file mode 100644 index 00000000..33d5e1be --- /dev/null +++ b/lib/stem/util/str_tools.py @@ -0,0 +1,387 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Toolkit for various string activity. + +**Module Overview:** + +:: + + get_size_label - human readable label for a number of bytes + get_time_label - human readable label for a number of seconds + get_time_labels - human readable labels for each time unit + get_short_time_label - condensed time label output + parse_short_time_label - seconds represented by a short time label +""" + +import codecs +import datetime + +import stem.prereq + +# label conversion tuples of the form... +# (bits / bytes / seconds, short label, long label) +SIZE_UNITS_BITS = ( + (140737488355328.0, " Pb", " Petabit"), + (137438953472.0, " Tb", " Terabit"), + (134217728.0, " Gb", " Gigabit"), + (131072.0, " Mb", " Megabit"), + (128.0, " Kb", " Kilobit"), + (0.125, " b", " Bit"), +) + +SIZE_UNITS_BYTES = ( + (1125899906842624.0, " PB", " Petabyte"), + (1099511627776.0, " TB", " Terabyte"), + (1073741824.0, " GB", " Gigabyte"), + (1048576.0, " MB", " Megabyte"), + (1024.0, " KB", " Kilobyte"), + (1.0, " B", " Byte"), +) + +TIME_UNITS = ( + (86400.0, "d", " day"), + (3600.0, "h", " hour"), + (60.0, "m", " minute"), + (1.0, "s", " second"), +) + +if stem.prereq.is_python_3(): + def _to_bytes_impl(msg): + if isinstance(msg, str): + return codecs.latin_1_encode(msg, "replace")[0] + else: + return msg + + def _to_unicode_impl(msg): + if msg is not None and not isinstance(msg, str): + return msg.decode("utf-8", "replace") + else: + return msg +else: + def _to_bytes_impl(msg): + if msg is not None and isinstance(msg, unicode): + return codecs.latin_1_encode(msg, "replace")[0] + else: + return msg + + def _to_unicode_impl(msg): + if msg is not None and not isinstance(msg, unicode): + return msg.decode("utf-8", "replace") + else: + return msg + + +def _to_bytes(msg): + """ + Provides the ASCII bytes for the given string. This is purely to provide + python 3 compatability, normalizing the unicode/ASCII change in the version + bump. For an explanation of this see... + + http://python3porting.com/problems.html#nicer-solutions + + :param str,unicode msg: string to be converted + + :returns: ASCII bytes for string + """ + + return _to_bytes_impl(msg) + + +def _to_unicode(msg): + """ + Provides the unicode string for the given ASCII bytes. This is purely to + provide python 3 compatability, normalizing the unicode/ASCII change in the + version bump. + + :param str,unicode msg: string to be converted + + :returns: unicode conversion + """ + + return _to_unicode_impl(msg) + + +def _to_camel_case(label, divider = "_", joiner = " "): + """ + Converts the given string to camel case, ie: + + :: + + >>> _to_camel_case("I_LIKE_PEPPERJACK!") + 'I Like Pepperjack!' + + :param str label: input string to be converted + :param str divider: word boundary + :param str joiner: replacement for word boundaries + + :returns: camel cased string + """ + + words = [] + for entry in label.split(divider): + if len(entry) == 0: + words.append("") + elif len(entry) == 1: + words.append(entry.upper()) + else: + words.append(entry[0].upper() + entry[1:].lower()) + + return joiner.join(words) + + +def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True): + """ + Converts a number of bytes into a human readable label in its most + significant units. For instance, 7500 bytes would return "7 KB". If the + is_long option is used this expands unit labels to be the properly pluralized + full word (for instance 'Kilobytes' rather than 'KB'). Units go up through + petabytes. + + :: + + >>> get_size_label(2000000) + '1 MB' + + >>> get_size_label(1050, 2) + '1.02 KB' + + >>> get_size_label(1050, 3, True) + '1.025 Kilobytes' + + :param int byte_count: number of bytes to be converted + :param int decimal: number of decimal digits to be included + :param bool is_long: expands units label + :param bool is_bytes: provides units in bytes if **True**, bits otherwise + + :returns: **str** with human readable representation of the size + """ + + if is_bytes: + return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long) + else: + return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long) + + +def get_time_label(seconds, decimal = 0, is_long = False): + """ + Converts seconds into a time label truncated to its most significant units. + For instance, 7500 seconds would return "2h". Units go up through days. + + This defaults to presenting single character labels, but if the is_long + option is used this expands labels to be the full word (space included and + properly pluralized). For instance, "4h" would be "4 hours" and "1m" would + become "1 minute". + + :: + + >>> get_time_label(10000) + '2h' + + >>> get_time_label(61, 1, True) + '1.0 minute' + + >>> get_time_label(61, 2, True) + '1.01 minutes' + + :param int seconds: number of seconds to be converted + :param int decimal: number of decimal digits to be included + :param bool is_long: expands units label + + :returns: **str** with human readable representation of the time + """ + + return _get_label(TIME_UNITS, seconds, decimal, is_long) + + +def get_time_labels(seconds, is_long = False): + """ + Provides a list of label conversions for each time unit, starting with its + most significant units on down. Any counts that evaluate to zero are omitted. + For example... + + :: + + >>> get_time_labels(400) + ['6m', '40s'] + + >>> get_time_labels(3640, True) + ['1 hour', '40 seconds'] + + :param int seconds: number of seconds to be converted + :param bool is_long: expands units label + + :returns: **list** of strings with human readable representations of the time + """ + + time_labels = [] + + for count_per_unit, _, _ in TIME_UNITS: + if abs(seconds) >= count_per_unit: + time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long)) + seconds %= count_per_unit + + return time_labels + + +def get_short_time_label(seconds): + """ + Provides a time in the following format: + [[dd-]hh:]mm:ss + + :: + + >>> get_short_time_label(111) + '01:51' + + >>> get_short_time_label(544100) + '6-07:08:20' + + :param int seconds: number of seconds to be converted + + :returns: **str** with the short representation for the time + + :raises: **ValueError** if the input is negative + """ + + if seconds < 0: + raise ValueError("Input needs to be a non-negative integer, got '%i'" % seconds) + + time_comp = {} + + for amount, _, label in TIME_UNITS: + count = int(seconds / amount) + seconds %= amount + time_comp[label.strip()] = count + + label = "%02i:%02i" % (time_comp["minute"], time_comp["second"]) + + if time_comp["day"]: + label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label) + elif time_comp["hour"]: + label = "%02i:%s" % (time_comp["hour"], label) + + return label + + +def parse_short_time_label(label): + """ + Provides the number of seconds corresponding to the formatting used for the + cputime and etime fields of ps: + [[dd-]hh:]mm:ss or mm:ss.ss + + :: + + >>> parse_short_time_label('01:51') + 111 + + >>> parse_short_time_label('6-07:08:20') + 544100 + + :param str label: time entry to be parsed + + :returns: **int** with the number of seconds represented by the label + + :raises: **ValueError** if input is malformed + """ + + days, hours, minutes, seconds = '0', '0', '0', '0' + + if '-' in label: + days, label = label.split('-', 1) + + time_comp = label.split(":") + + if len(time_comp) == 3: + hours, minutes, seconds = time_comp + elif len(time_comp) == 2: + minutes, seconds = time_comp + else: + raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label) + + try: + time_sum = int(float(seconds)) + time_sum += int(minutes) * 60 + time_sum += int(hours) * 3600 + time_sum += int(days) * 86400 + return time_sum + except ValueError: + raise ValueError("Non-numeric value in time entry: %s" % label) + + +def _parse_iso_timestamp(entry): + """ + Parses the ISO 8601 standard that provides for timestamps like... + + :: + + 2012-11-08T16:48:41.420251 + + :param str entry: timestamp to be parsed + + :returns: datetime for the time represented by the timestamp + + :raises: ValueError if the timestamp is malformed + """ + + if not isinstance(entry, str): + raise ValueError("parse_iso_timestamp() input must be a str, got a %s" % type(entry)) + + # based after suggestions from... + # http://stackoverflow.com/questions/127803/how-to-parse-iso-formatted-date-in-python + + if '.' in entry: + timestamp_str, microseconds = entry.split('.') + else: + timestamp_str, microseconds = entry, '000000' + + if len(microseconds) != 6 or not microseconds.isdigit(): + raise ValueError("timestamp's microseconds should be six digits") + + timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S") + return timestamp + datetime.timedelta(microseconds = int(microseconds)) + + +def _get_label(units, count, decimal, is_long): + """ + Provides label corresponding to units of the highest significance in the + provided set. This rounds down (ie, integer truncation after visible units). + + :param tuple units: type of units to be used for conversion, containing + (count_per_unit, short_label, long_label) + :param int count: number of base units being converted + :param int decimal: decimal precision of label + :param bool is_long: uses the long label if **True**, short label otherwise + """ + + # formatted string for the requested number of digits + label_format = "%%.%if" % decimal + + if count < 0: + label_format = "-" + label_format + count = abs(count) + elif count == 0: + units_label = units[-1][2] + "s" if is_long else units[-1][1] + return "%s%s" % (label_format % count, units_label) + + for count_per_unit, short_label, long_label in units: + if count >= count_per_unit: + # Rounding down with a '%f' is a little clunky. Reducing the count so + # it'll divide evenly as the rounded down value. + + count -= count % (count_per_unit / (10 ** decimal)) + count_label = label_format % (count / count_per_unit) + + if is_long: + # Pluralize if any of the visible units make it greater than one. For + # instance 1.0003 is plural but 1.000 isn't. + + if decimal > 0: + is_plural = count > count_per_unit + else: + is_plural = count >= count_per_unit * 2 + + return count_label + long_label + ("s" if is_plural else "") + else: + return count_label + short_label diff --git a/lib/stem/util/system.py b/lib/stem/util/system.py new file mode 100644 index 00000000..db0943eb --- /dev/null +++ b/lib/stem/util/system.py @@ -0,0 +1,1010 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Helper functions for working with the underlying system. These are mostly os +dependent, only working on linux, osx, and bsd. In almost all cases they're +best-effort, providing **None** if the lookup fails. + +**Module Overview:** + +:: + + is_windows - checks if we're running on windows + is_mac - checks if we're running on a mac + is_bsd - checks if we're running on the bsd family of operating systems + + is_available - determines if a command is available on this system + is_running - determines if a given process is running + get_name_by_pid - gets the name for a process by the given pid + get_pid_by_name - gets the pid for a process by the given name + get_pid_by_port - gets the pid for a process listening to a given port + get_pid_by_open_file - gets the pid for the process with an open file + get_cwd - provides the current working directory for a given process + get_user - provides the user a process is running under + get_start_time - provides the unix timestamp when the process started + get_bsd_jail_id - provides the BSD jail id a given process is running within + get_bsd_jail_path - provides the path of the given BSD jail + expand_path - expands relative paths and ~ entries + call - runs the given system command and provides back the results + + get_process_name - provides our process' name + set_process_name - changes our process' name +""" + +import ctypes +import ctypes.util +import os +import platform +import subprocess +import time + +import stem.util.proc +import stem.util.str_tools + +from stem import UNDEFINED +from stem.util import log + +# Mapping of commands to if they're available or not. + +CMD_AVAILABLE_CACHE = {} + +# An incomplete listing of commands provided by the shell. Expand this as +# needed. Some noteworthy things about shell commands... +# +# * They're not in the path so is_available() will fail. +# * subprocess.Popen() without the 'shell = True' argument will fail with... +# OSError: [Errno 2] No such file or directory + +SHELL_COMMANDS = ['ulimit'] + +IS_RUNNING_PS_LINUX = "ps -A co command" +IS_RUNNING_PS_BSD = "ps -ao ucomm=" +GET_NAME_BY_PID_PS = "ps -p %s -o comm" +GET_PID_BY_NAME_PGREP = "pgrep -x %s" +GET_PID_BY_NAME_PIDOF = "pidof %s" +GET_PID_BY_NAME_PS_LINUX = "ps -o pid -C %s" +GET_PID_BY_NAME_PS_BSD = "ps axc" +GET_PID_BY_NAME_LSOF = "lsof -tc %s" +GET_PID_BY_PORT_NETSTAT = "netstat -npltu" +GET_PID_BY_PORT_SOCKSTAT = "sockstat -4l -P tcp -p %s" +GET_PID_BY_PORT_LSOF = "lsof -wnP -iTCP -sTCP:LISTEN" +GET_PID_BY_FILE_LSOF = "lsof -tw %s" +GET_CWD_PWDX = "pwdx %s" +GET_CWD_LSOF = "lsof -a -p %s -d cwd -Fn" +GET_BSD_JAIL_ID_PS = "ps -p %s -o jid" +GET_BSD_JAIL_PATH = "jls -j %s" + +# flag for setting the process name, found in '/usr/include/linux/prctl.h' + +PR_SET_NAME = 15 + +argc_t = ctypes.POINTER(ctypes.c_char_p) + +# The following can fail with pypy... +# AttributeError: No symbol Py_GetArgcArgv found in library + +try: + Py_GetArgcArgv = ctypes.pythonapi.Py_GetArgcArgv + Py_GetArgcArgv.restype = None + Py_GetArgcArgv.argtypes = [ + ctypes.POINTER(ctypes.c_int), + ctypes.POINTER(argc_t), + ] +except: + Py_GetArgcArgv = None + +# This is both a cache for get_process_name() and tracks what we've changed our +# process name to. + +_PROCESS_NAME = None + +# Length of our original process name. +# +# The original author our process renaming is based on did a memset for 256, +# while Jake did it for the original process name length (capped at 1608). I'm +# not sure of the reasons for either of these limits, but setting it to +# anything higher than our original name length should be pointless, so opting +# for Jake's limit. + +_MAX_NAME_LENGTH = -1 + + +def is_windows(): + """ + Checks if we are running on Windows. + + :returns: **bool** to indicate if we're on Windows + """ + + return platform.system() == "Windows" + + +def is_mac(): + """ + Checks if we are running on Mac OSX. + + :returns: **bool** to indicate if we're on a Mac + """ + + return platform.system() == "Darwin" + + +def is_bsd(): + """ + Checks if we are within the BSD family of operating systems. This presently + recognizes Macs, FreeBSD, and OpenBSD but may be expanded later. + + :returns: **bool** to indicate if we're on a BSD OS + """ + + return platform.system() in ("Darwin", "FreeBSD", "OpenBSD") + + +def is_available(command, cached=True): + """ + Checks the current PATH to see if a command is available or not. If more + than one command is present (for instance "ls -a | grep foo") then this + just checks the first. + + Note that shell (like cd and ulimit) aren't in the PATH so this lookup will + try to assume that it's available. This only happends for recognized shell + commands (those in SHELL_COMMANDS). + + :param str command: command to search for + :param bool cached: makes use of available cached results if **True** + + :returns: **True** if an executable we can use by that name exists in the + PATH, **False** otherwise + """ + + if " " in command: + command = command.split(" ")[0] + + if command in SHELL_COMMANDS: + # we can't actually look it up, so hope the shell really provides it... + + return True + elif cached and command in CMD_AVAILABLE_CACHE: + return CMD_AVAILABLE_CACHE[command] + else: + cmd_exists = False + for path in os.environ["PATH"].split(os.pathsep): + cmd_path = os.path.join(path, command) + + if is_windows(): + cmd_path += ".exe" + + if os.path.exists(cmd_path) and os.access(cmd_path, os.X_OK): + cmd_exists = True + break + + CMD_AVAILABLE_CACHE[command] = cmd_exists + return cmd_exists + + +def is_running(command): + """ + Checks for if a process with a given name is running or not. + + :param str command: process name to be checked + + :returns: **True** if the process is running, **False** if it's not among ps + results, and **None** if ps can't be queried + """ + + # Linux and the BSD families have different variants of ps. Guess based on + # the is_bsd() check which to try first, then fall back to the other. + # + # Linux + # -A - Select all processes. + # -co command - Shows just the base command. + # + # Mac / BSD + # -a - Display information about other users' processes as well as + # our own. + # -o ucomm= - Shows just the ucomm attribute ("name to be used for + # accounting") + + if is_available("ps"): + if is_bsd(): + primary_resolver = IS_RUNNING_PS_BSD + secondary_resolver = IS_RUNNING_PS_LINUX + else: + primary_resolver = IS_RUNNING_PS_LINUX + secondary_resolver = IS_RUNNING_PS_BSD + + command_listing = call(primary_resolver, None) + + if not command_listing: + command_listing = call(secondary_resolver, None) + + if command_listing: + command_listing = map(unicode.strip, command_listing) + return command in command_listing + + return None + + +def get_name_by_pid(pid): + """ + Attempts to determine the name a given process is running under (not + including arguments). This uses... + + :: + + 1. Information from /proc + 2. ps -p -o command + + :param int pid: process id of the process to be queried + + :returns: **str** with the process name, **None** if it can't be determined + """ + + process_name = None + + if stem.util.proc.is_available(): + try: + process_name = stem.util.proc.get_stats(pid, stem.util.proc.Stat.COMMAND)[0] + except IOError: + pass + + # attempts to resolve using ps, failing if: + # - system's ps variant doesn't handle these flags (none known at the moment) + # + # example output: + # atagar@morrigan:~$ ps -p 5767 -o comm + # COMMAND + # vim + + if not process_name: + results = call(GET_NAME_BY_PID_PS % pid) + + if results and len(results) == 2 and results[0] == 'COMMAND': + process_name = results[1].strip() + + return process_name + + +def get_pid_by_name(process_name, multiple = False): + """ + Attempts to determine the process id for a running process, using... + + :: + + 1. pgrep -x + 2. pidof + 3. ps -o pid -C (linux) + ps axc | egrep " $" (bsd) + 4. lsof -tc + + :param str process_name: process name for which to fetch the pid + :param bool multiple: provides a list of all pids if **True**, otherwise + results with multiple processes are discarded + + :returns: + Response depends upon the 'multiple' argument as follows... + + * if **False** then this provides an **int** with the process id or **None** if it can't be determined + * if **True** then this provides a **list** of all **int** process ids, and an empty list if it can't be determined + """ + + # attempts to resolve using pgrep, failing if: + # - we're running on bsd (command unavailable) + # + # example output: + # atagar@morrigan:~$ pgrep -x vim + # 3283 + # 3392 + + if is_available("pgrep"): + results = call(GET_PID_BY_NAME_PGREP % process_name, None) + + if results: + try: + pids = map(int, results) + + if multiple: + return pids + elif len(pids) == 1: + return pids[0] + except ValueError: + pass + + # attempts to resolve using pidof, failing if: + # - we're running on bsd (command unavailable) + # + # example output: + # atagar@morrigan:~$ pidof vim + # 3392 3283 + + if is_available("pidof"): + results = call(GET_PID_BY_NAME_PIDOF % process_name, None) + + if results and len(results) == 1: + try: + pids = map(int, results[0].split()) + + if multiple: + return pids + elif len(pids) == 1: + return pids[0] + except ValueError: + pass + + # attempts to resolve using ps, failing if: + # - system's ps variant doesn't handle these flags (none known at the moment) + # + # example output: + # atagar@morrigan:~/Desktop/stem$ ps -o pid -C vim + # PID + # 3283 + # 3392 + # + # atagar$ ps axc + # PID TT STAT TIME COMMAND + # 1 ?? Ss 9:00.22 launchd + # 10 ?? Ss 0:09.97 kextd + # 11 ?? Ss 5:47.36 DirectoryService + # 12 ?? Ss 3:01.44 notifyd + + if is_available("ps"): + if not is_bsd(): + # linux variant of ps + results = call(GET_PID_BY_NAME_PS_LINUX % process_name, None) + + if results: + try: + pids = map(int, results[1:]) + + if multiple: + return pids + elif len(pids) == 1: + return pids[0] + except ValueError: + pass + + if is_bsd(): + # bsd variant of ps + results = call(GET_PID_BY_NAME_PS_BSD, None) + + if results: + # filters results to those with our process name + results = [r.split()[0] for r in results if r.endswith(" %s" % process_name)] + + try: + pids = map(int, results) + + if multiple: + return pids + elif len(pids) == 1: + return pids[0] + except ValueError: + pass + + # resolves using lsof which works on both Linux and BSD, only failing if: + # - lsof is unavailable (not included by default on OpenBSD) + # - the process being run as a different user due to permissions + # - the process doesn't have any open files to be reported by lsof? + # + # flags: + # t - only show pids + # c - restrict results to that command + # + # example output: + # atagar@morrigan:~$ lsof -t -c vim + # 2470 + # 2561 + + if is_available("lsof"): + results = call(GET_PID_BY_NAME_LSOF % process_name, None) + + if results: + try: + pids = map(int, results) + + if multiple: + return pids + elif len(pids) == 1: + return pids[0] + except ValueError: + pass + + log.debug("failed to resolve a pid for '%s'" % process_name) + return [] if multiple else None + + +def get_pid_by_port(port): + """ + Attempts to determine the process id for a process with the given port, + using... + + :: + + 1. netstat -npltu | grep 127.0.0.1: + 2. sockstat -4l -P tcp -p + 3. lsof -wnP -iTCP -sTCP:LISTEN | grep ":" + + Most queries limit results to listening TCP connections. This function likely + won't work on Mac OSX. + + :param int port: port where the process we're looking for is listening + + :returns: **int** with the process id, **None** if it can't be determined + """ + + # attempts to resolve using netstat, failing if: + # - netstat doesn't accept these flags (Linux only) + # - the process being run as a different user due to permissions + # + # flags: + # n - numeric (disables hostname lookups) + # p - program (include pids) + # l - listening (include listening sockets) + # tu - show tcp and udp sockets, and nothing else + # + # example output: + # atagar@morrigan:~$ netstat -npltu + # Active Internet connections (only servers) + # Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name + # tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN - + # tcp 0 0 127.0.0.1:9051 0.0.0.0:* LISTEN 1641/tor + # tcp6 0 0 ::1:631 :::* LISTEN - + # udp 0 0 0.0.0.0:5353 0.0.0.0:* - + # udp6 0 0 fe80::7ae4:ff:fe2f::123 :::* - + + if is_available("netstat"): + results = call(GET_PID_BY_PORT_NETSTAT, None) + + if results: + # filters to results with our port + results = [r for r in results if "127.0.0.1:%s" % port in r] + + if len(results) == 1 and len(results[0].split()) == 7: + results = results[0].split()[6] # process field (ex. "7184/tor") + pid = results[:results.find("/")] + + if pid.isdigit(): + return int(pid) + + # attempts to resolve using sockstat, failing if: + # - sockstat doesn't accept the -4 flag (BSD only) + # - sockstat isn't available (encountered with OSX 10.5.8) + # - there are multiple instances using the same port on different addresses + # + # flags: + # 4 - only show IPv4 sockets + # l - listening sockets + # P tcp - only show tcp connections + # p - only includes results if the local or foreign port match this + # + # example output: + # # sockstat -4 | grep tor + # _tor tor 4397 7 tcp4 51.64.7.84:9050 *:* + # _tor tor 4397 8 udp4 51.64.7.84:53 *:* + # _tor tor 4397 12 tcp4 51.64.7.84:54011 80.3.121.7:9001 + # _tor tor 4397 15 tcp4 51.64.7.84:59374 7.42.1.102:9001 + # _tor tor 4397 20 tcp4 51.64.7.84:51946 32.83.7.104:443 + + if is_available("sockstat"): + results = call(GET_PID_BY_PORT_SOCKSTAT % port, None) + + if results: + # filters to results where this is the local port + results = [r for r in results if (len(r.split()) == 7 and (":%s" % port) in r.split()[5])] + + if len(results) == 1: + pid = results[0].split()[2] + + if pid.isdigit(): + return int(pid) + + # resolves using lsof which works on both Linux and BSD, only failing if: + # - lsof is unavailable (not included by default on OpenBSD) + # - lsof doesn't provide the port ip/port, nor accept the -i and -s args + # (encountered with OSX 10.5.8) + # - the process being run as a different user due to permissions + # - there are multiple instances using the same port on different addresses + # + # flags: + # w - disables warning messages + # n - numeric addresses (disables hostname lookups) + # P - numeric ports (disables replacement of ports with their protocol) + # iTCP - only show tcp connections + # sTCP:LISTEN - listening sockets + # + # example output: + # atagar@morrigan:~$ lsof -wnP -iTCP -sTCP:LISTEN + # COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME + # tor 1745 atagar 6u IPv4 14229 0t0 TCP 127.0.0.1:9051 (LISTEN) + + if is_available("lsof"): + results = call(GET_PID_BY_PORT_LSOF, None) + + if results: + # filters to results with our port + results = [r for r in results if (len(r.split()) == 10 and (":%s" % port) in r.split()[8])] + + if len(results) == 1: + pid = results[0].split()[1] + + if pid.isdigit(): + return int(pid) + + return None # all queries failed + + +def get_pid_by_open_file(path): + """ + Attempts to determine the process id for a process with the given open file, + using... + + :: + + lsof -w + + :param str path: location of the socket file to query against + + :returns: **int** with the process id, **None** if it can't be determined + """ + + # resolves using lsof which works on both Linux and BSD, only failing if: + # - lsof is unavailable (not included by default on OpenBSD) + # - the file can't be read due to permissions + # + # flags: + # t - only show pids + # w - disables warning messages + # + # example output: + # atagar@morrigan:~$ lsof -tw /tmp/foo + # 4762 + + if is_available("lsof"): + results = call(GET_PID_BY_FILE_LSOF % path, []) + + if len(results) == 1: + pid = results[0].strip() + + if pid.isdigit(): + return int(pid) + + return None # all queries failed + + +def get_cwd(pid): + """ + Provides the working directory of the given process. + + :param int pid: process id of the process to be queried + + :returns: **str** with the absolute path for the process' present working + directory, **None** if it can't be determined + """ + + # try fetching via the proc contents if it's available + if stem.util.proc.is_available(): + try: + return stem.util.proc.get_cwd(pid) + except IOError: + pass + + # Fall back to a pwdx query. This isn't available on BSD. + logging_prefix = "get_cwd(%s):" % pid + + if is_available("pwdx"): + # pwdx results are of the form: + # 3799: /home/atagar + # 5839: No such process + + results = call(GET_CWD_PWDX % pid, None) + + if not results: + log.debug("%s pwdx didn't return any results" % logging_prefix) + elif results[0].endswith("No such process"): + log.debug("%s pwdx processes reported for this pid" % logging_prefix) + elif len(results) != 1 or results[0].count(" ") != 1 or not results[0].startswith("%s: " % pid): + log.debug("%s we got unexpected output from pwdx: %s" % (logging_prefix, results)) + else: + return results[0].split(" ", 1)[1].strip() + + # Use lsof as the final fallback. This is available on both Linux and is the + # only lookup method here that works for BSD... + # https://trac.torproject.org/projects/tor/ticket/4236 + # + # flags: + # a - presents the intersection of the following arguments + # p - limits results to this pid + # d cwd - limits results to just the cwd rather than all open files + # Fn - short listing in a single column, with just the pid and cwd + # + # example output: + # ~$ lsof -a -p 75717 -d cwd -Fn + # p75717 + # n/Users/atagar/tor/src/or + + if is_available("lsof"): + results = call(GET_CWD_LSOF % pid, []) + + if len(results) == 2 and results[1].startswith("n/"): + lsof_result = results[1][1:].strip() + + # If we lack read permissions for the cwd then it returns... + # p2683 + # n/proc/2683/cwd (readlink: Permission denied) + + if not " " in lsof_result: + return lsof_result + else: + log.debug("%s we got unexpected output from lsof: %s" % (logging_prefix, results)) + + return None # all queries failed + + +def get_user(pid): + """ + Provides the user a process is running under. + + :param int pid: process id of the process to be queried + + :returns: **str** with the username a process is running under, **None** if + it can't be determined + """ + + if not isinstance(pid, int) or pid < 0: + return None + + if stem.util.proc.is_available(): + try: + import pwd # only available on unix platforms + + uid = stem.util.proc.get_uid(pid) + + if uid and uid.isdigit(): + return pwd.getpwuid(int(uid)).pw_name + except: + pass + + if is_available("ps"): + results = call("ps -o user %s" % pid, []) + + if len(results) >= 2: + return results[1].strip() + + return None + + +def get_start_time(pid): + """ + Provides the unix timestamp when the given process started. + + :param int pid: process id of the process to be queried + + :returns: **float** for the unix timestamp when the process began, **None** + if it can't be determined + """ + + if not isinstance(pid, int) or pid < 0: + return None + + if stem.util.proc.is_available(): + try: + return float(stem.util.proc.get_stats(pid, stem.util.proc.Stat.START_TIME)[0]) + except IOError: + pass + + try: + ps_results = call("ps -p %s -o etime" % pid, []) + + if len(ps_results) >= 2: + etime = ps_results[1].strip() + return time.time() - stem.util.str_tools.parse_short_time_label(etime) + except: + pass + + return None + + +def get_bsd_jail_id(pid): + """ + Gets the jail id for a process. These seem to only exist for FreeBSD (this + style for jails does not exist on Linux, OSX, or OpenBSD). + + :param int pid: process id of the jail id to be queried + + :returns: **int** for the jail id, zero if this can't be determined + """ + + # Output when called from a FreeBSD jail or when Tor isn't jailed: + # JID + # 0 + # + # Otherwise it's something like: + # JID + # 1 + + ps_output = call(GET_BSD_JAIL_ID_PS % pid, []) + + if len(ps_output) == 2 and len(ps_output[1].split()) == 1: + jid = ps_output[1].strip() + + if jid.isdigit(): + return int(jid) + + os_name = platform.system() + if os_name == "FreeBSD": + log.warn("Unable to get the jail id for process %s." % pid) + else: + log.debug("get_bsd_jail_id(%s): jail ids do not exist on %s" % (pid, os_name)) + + return 0 + + +def get_bsd_jail_path(jid): + """ + Provides the path of the given FreeBSD jail. + + :param int jid: jail id to be queried + + :returns: **str** of the path prefix, **None** if this can't be determined + """ + + if jid != 0: + # Output should be something like: + # JID IP Address Hostname Path + # 1 10.0.0.2 tor-jail /usr/jails/tor-jail + + jls_output = call(GET_BSD_JAIL_PATH % jid, []) + + if len(jls_output) == 2 and len(jls_output[1].split()) == 4: + return jls_output[1].split()[3] + + return None + + +def expand_path(path, cwd = None): + """ + Provides an absolute path, expanding tildes with the user's home and + appending a current working directory if the path was relative. + + :param str path: path to be expanded + :param str cwd: current working directory to expand relative paths with, our + process' if this is **None** + + :returns: **str** of the path expanded to be an absolute path, never with an + ending slash + """ + + if is_windows(): + relative_path = path.replace("/", "\\").rstrip("\\") + else: + relative_path = path.rstrip("/") + + if not relative_path or os.path.isabs(relative_path): + # empty or already absolute - nothing to do + pass + elif relative_path.startswith("~"): + # prefixed with a ~ or ~user entry + relative_path = os.path.expanduser(relative_path) + else: + # relative path, expand with the cwd + + if not cwd: + cwd = os.getcwd() + + # we'll be dealing with both "my/path/" and "./my/path" entries, so + # cropping the later + if relative_path.startswith("./") or relative_path.startswith(".\\"): + relative_path = relative_path[2:] + elif relative_path == ".": + relative_path = "" + + if relative_path == "": + relative_path = cwd + else: + relative_path = os.path.join(cwd, relative_path) + + return relative_path + + +def call(command, default = UNDEFINED, ignore_exit_status = False): + """ + Issues a command in a subprocess, blocking until completion and returning the + results. This is not actually ran in a shell so pipes and other shell syntax + are not permitted. + + :param str command: command to be issued + :param object default: response if the query fails + :param bool ignore_exit_status: reports failure if our command's exit status + was non-zero + + :returns: **list** with the lines of output from the command + + :raises: **OSError** if this fails and no default was provided + """ + + try: + is_shell_command = command.split(" ")[0] in SHELL_COMMANDS + + start_time = time.time() + process = subprocess.Popen(command.split(), stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = is_shell_command) + + stdout, stderr = process.communicate() + stdout, stderr = stdout.strip(), stderr.strip() + runtime = time.time() - start_time + + log.debug("System call: %s (runtime: %0.2f)" % (command, runtime)) + trace_prefix = "Received from system (%s)" % command + + if stdout and stderr: + log.trace(trace_prefix + ", stdout:\n%s\nstderr:\n%s" % (stdout, stderr)) + elif stdout: + log.trace(trace_prefix + ", stdout:\n%s" % stdout) + elif stderr: + log.trace(trace_prefix + ", stderr:\n%s" % stderr) + + exit_code = process.poll() + + if not ignore_exit_status and exit_code != 0: + raise OSError("%s returned exit status %i" % (command, exit_code)) + + if stdout: + return stdout.decode("utf-8", "replace").splitlines() + else: + return [] + except OSError as exc: + log.debug("System call (failed): %s (error: %s)" % (command, exc)) + + if default != UNDEFINED: + return default + else: + raise exc + + +def get_process_name(): + """ + Provides the present name of our process. + + :returns: **str** with the present name of our process + """ + + global _PROCESS_NAME, _MAX_NAME_LENGTH + + if _PROCESS_NAME is None: + # Example output... + # + # COMMAND + # python run_tests.py --unit + + ps_output = call("ps -p %i -o args" % os.getpid(), []) + + if len(ps_output) == 2 and ps_output[0] in ("COMMAND", "ARGS"): + _PROCESS_NAME = ps_output[1] + else: + # Falling back on using ctypes to get our argv. Unfortunately the simple + # method for getting this... + # + # " ".join(["python"] + sys.argv) + # + # ... doesn't do the trick since this will miss interpretor arguments. + # + # python -W ignore::DeprecationWarning my_script.py + + args, argc = [], argc_t() + + for i in xrange(100): + # The ending index can be either None or raise a ValueError when + # accessed... + # + # ValueError: NULL pointer access + + try: + if argc[i] is None: + break + except ValueError: + break + + args.append(str(argc[i])) + + _PROCESS_NAME = " ".join(args) + + _MAX_NAME_LENGTH = len(_PROCESS_NAME) + + return _PROCESS_NAME + + +def set_process_name(process_name): + """ + Renames our current process from "python " to a custom name. This is + best-effort, not necessarily working on all platforms. + + :param str process_name: new name for our process + """ + + # This is mostly based on... + # + # http://www.rhinocerus.net/forum/lang-python/569677-setting-program-name-like-0-perl.html#post2272369 + # + # ... and an adaptation by Jake... + # + # https://github.com/ioerror/chameleon + # + # A cleaner implementation is available at... + # + # https://github.com/cream/libs/blob/b38970e2a6f6d2620724c828808235be0445b799/cream/util/procname.py + # + # but I'm not quite clear on their implementation, and it only does targeted + # argument replacement (ie, replace argv[0], argv[1], etc but with a string + # the same size). + + _set_argv(process_name) + + if platform.system() == "Linux": + _set_prctl_name(process_name) + elif platform.system() in ("Darwin", "FreeBSD", "OpenBSD"): + _set_proc_title(process_name) + + +def _set_argv(process_name): + """ + Overwrites our argv in a similar fashion to how it's done in C with: + strcpy(argv[0], "new_name"); + """ + + if Py_GetArgcArgv is None: + return + + global _PROCESS_NAME + + # both gets the current process name and initializes _MAX_NAME_LENGTH + + current_name = get_process_name() + + argv, argc = ctypes.c_int(0), argc_t() + Py_GetArgcArgv(argv, ctypes.pointer(argc)) + + if len(process_name) > _MAX_NAME_LENGTH: + raise IOError("Can't rename process to something longer than our initial name (this would overwrite memory used for the env)") + + # space we need to clear + zero_size = max(len(current_name), len(process_name)) + + ctypes.memset(argc.contents, 0, zero_size + 1) # null terminate the string's end + process_name_encoded = process_name.encode('utf8') + ctypes.memmove(argc.contents, process_name_encoded, len(process_name)) + _PROCESS_NAME = process_name + + +def _set_prctl_name(process_name): + """ + Sets the prctl name, which is used by top and killall. This appears to be + Linux specific and has the max of 15 characters. + + This is from... + http://stackoverflow.com/questions/564695/is-there-a-way-to-change-effective-process-name-in-python/923034#923034 + """ + + libc = ctypes.CDLL(ctypes.util.find_library("c")) + name_buffer = ctypes.create_string_buffer(len(process_name) + 1) + name_buffer.value = stem.util.str_tools._to_bytes(process_name) + libc.prctl(PR_SET_NAME, ctypes.byref(name_buffer), 0, 0, 0) + + +def _set_proc_title(process_name): + """ + BSD specific calls (should be compataible with both FreeBSD and OpenBSD: + http://fxr.watson.org/fxr/source/gen/setproctitle.c?v=FREEBSD-LIBC + http://www.rootr.net/man/man/setproctitle/3 + """ + + libc = ctypes.CDLL(ctypes.util.find_library("c")) + name_buffer = ctypes.create_string_buffer(len(process_name) + 1) + name_buffer.value = process_name + + try: + libc.setproctitle(ctypes.byref(name_buffer)) + except AttributeError: + # Possible issue (seen on OSX): + # AttributeError: dlsym(0x7fff6a41d1e0, setproctitle): symbol not found + + pass diff --git a/lib/stem/util/term.py b/lib/stem/util/term.py new file mode 100644 index 00000000..f1c71ecb --- /dev/null +++ b/lib/stem/util/term.py @@ -0,0 +1,98 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Utilities for working with the terminal. + +**Module Overview:** + +:: + + format - wrap text with ANSI for the given colors or attributes + +.. data:: Color (enum) +.. data:: BgColor (enum) + + Enumerations for foreground or background terminal color. + + =========== =========== + Color Description + =========== =========== + **BLACK** black color + **BLUE** blue color + **CYAN** cyan color + **GREEN** green color + **MAGENTA** magenta color + **RED** red color + **WHITE** white color + **YELLOW** yellow color + =========== =========== + +.. data:: Attr (enum) + + Enumerations of terminal text attributes. + + ============= =========== + Attr Description + ============= =========== + **BOLD** heavy typeface + **HILIGHT** inverted foreground and background + **UNDERLINE** underlined text + ============= =========== +""" + +import stem.util.enum +import stem.util.str_tools + +TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE") + +Color = stem.util.enum.Enum(*TERM_COLORS) +BgColor = stem.util.enum.Enum(*["BG_" + color for color in TERM_COLORS]) +Attr = stem.util.enum.Enum("BOLD", "UNDERLINE", "HILIGHT") + +# mappings of terminal attribute enums to their ANSI escape encoding +FG_ENCODING = dict([(list(Color)[i], str(30 + i)) for i in range(8)]) +BG_ENCODING = dict([(list(BgColor)[i], str(40 + i)) for i in range(8)]) +ATTR_ENCODING = {Attr.BOLD: "1", Attr.UNDERLINE: "4", Attr.HILIGHT: "7"} + +CSI = "\x1B[%sm" +RESET = CSI % "0" + + +def format(msg, *attr): + """ + Simple terminal text formatting using `ANSI escape sequences + `_. + The following are some toolkits providing similar capabilities: + + * `django.utils.termcolors `_ + * `termcolor `_ + * `colorama `_ + + :param str msg: string to be formatted + :param str attr: text attributes, this can be :data:`~stem.util.term.Color`, :data:`~stem.util.term.BgColor`, or :data:`~stem.util.term.Attr` enums + and are case insensitive (so strings like "red" are fine) + + :returns: **str** wrapped with ANSI escape encodings, starting with the given + attributes and ending with a reset + """ + + # if we have reset sequences in the message then apply our attributes + # after each of them + if RESET in msg: + return "".join([format(comp, *attr) for comp in msg.split(RESET)]) + + encodings = [] + for text_attr in attr: + text_attr, encoding = stem.util.str_tools._to_camel_case(text_attr), None + encoding = FG_ENCODING.get(text_attr, encoding) + encoding = BG_ENCODING.get(text_attr, encoding) + encoding = ATTR_ENCODING.get(text_attr, encoding) + + if encoding: + encodings.append(encoding) + + if encodings: + return (CSI % ";".join(encodings)) + msg + RESET + else: + return msg diff --git a/lib/stem/util/tor_tools.py b/lib/stem/util/tor_tools.py new file mode 100644 index 00000000..510bff1b --- /dev/null +++ b/lib/stem/util/tor_tools.py @@ -0,0 +1,115 @@ +# Copyright 2012-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Miscellaneous utility functions for working with tor. + +**These functions are not being vended to stem users. They may change in the +future, use them at your own risk.** + +**Module Overview:** + +:: + + is_valid_fingerprint - checks if a string is a valid tor relay fingerprint + is_valid_nickname - checks if a string is a valid tor relay nickname + is_valid_circuit_id - checks if a string is a valid tor circuit id + is_valid_stream_id - checks if a string is a valid tor stream id + is_hex_digits - checks if a string is only made up of hex digits +""" + +import re + +# The control-spec defines the following as... +# +# Fingerprint = "$" 40*HEXDIG +# NicknameChar = "a"-"z" / "A"-"Z" / "0" - "9" +# Nickname = 1*19 NicknameChar +# +# CircuitID = 1*16 IDChar +# IDChar = ALPHA / DIGIT +# +# HEXDIG is defined in RFC 5234 as being uppercase and used in RFC 5987 as +# case insensitive. Tor doesn't define this in the spec so flipping a coin +# and going with case insensitive. + +HEX_DIGIT = "[0-9a-fA-F]" +FINGERPRINT_PATTERN = re.compile("^%s{40}$" % HEX_DIGIT) +NICKNAME_PATTERN = re.compile("^[a-zA-Z0-9]{1,19}$") +CIRC_ID_PATTERN = re.compile("^[a-zA-Z0-9]{1,16}$") + + +def is_valid_fingerprint(entry, check_prefix = False): + """ + Checks if a string is a properly formatted relay fingerprint. This checks for + a '$' prefix if check_prefix is true, otherwise this only validates the hex + digits. + + :param str entry: string to be checked + :param bool check_prefix: checks for a '$' prefix + + :returns: **True** if the string could be a relay fingerprint, **False** otherwise + """ + + if not isinstance(entry, (str, unicode)): + return False + elif check_prefix: + if not entry or entry[0] != "$": + return False + + entry = entry[1:] + + return bool(FINGERPRINT_PATTERN.match(entry)) + + +def is_valid_nickname(entry): + """ + Checks if a string is a valid format for being a nickname. + + :param str entry: string to be checked + + :returns: **True** if the string could be a nickname, **False** otherwise + """ + + if not isinstance(entry, (str, unicode)): + return False + + return bool(NICKNAME_PATTERN.match(entry)) + + +def is_valid_circuit_id(entry): + """ + Checks if a string is a valid format for being a circuit identifier. + + :returns: **True** if the string could be a circuit id, **False** otherwise + """ + + if not isinstance(entry, (str, unicode)): + return False + + return bool(CIRC_ID_PATTERN.match(entry)) + + +def is_valid_stream_id(entry): + """ + Checks if a string is a valid format for being a stream identifier. + Currently, this is just an alias to :func:`~stem.util.tor_tools.is_valid_circuit_id`. + + :returns: **True** if the string could be a stream id, **False** otherwise + """ + + return is_valid_circuit_id(entry) + + +def is_hex_digits(entry, count): + """ + Checks if a string is the given number of hex digits. Digits represented by + letters are case insensitive. + + :param str entry: string to be checked + :param int count: number of hex digits to be checked for + + :returns: **True** if the string matches this number + """ + + return bool(re.match("^%s{%i}$" % (HEX_DIGIT, count), entry)) diff --git a/lib/stem/version.py b/lib/stem/version.py new file mode 100644 index 00000000..44f1d02f --- /dev/null +++ b/lib/stem/version.py @@ -0,0 +1,354 @@ +# Copyright 2011-2013, Damian Johnson and The Tor Project +# See LICENSE for licensing information + +""" +Tor versioning information and requirements for its features. These can be +easily parsed and compared, for instance... + +:: + + >>> from stem.version import get_system_tor_version, Requirement + >>> my_version = get_system_tor_version() + >>> print my_version + 0.2.1.30 + >>> my_version >= Requirement.CONTROL_SOCKET + True + +**Module Overview:** + +:: + + get_system_tor_version - gets the version of our system's tor installation + + Version - Tor versioning information + +.. data:: Requirement (enum) + + Enumerations for the version requirements of features. + + ===================================== =========== + Requirement Description + ===================================== =========== + **AUTH_SAFECOOKIE** SAFECOOKIE authentication method + **EVENT_AUTHDIR_NEWDESCS** AUTHDIR_NEWDESC events + **EVENT_BUILDTIMEOUT_SET** BUILDTIMEOUT_SET events + **EVENT_CIRC_MINOR** CIRC_MINOR events + **EVENT_CLIENTS_SEEN** CLIENTS_SEEN events + **EVENT_CONF_CHANGED** CONF_CHANGED events + **EVENT_DESCCHANGED** DESCCHANGED events + **EVENT_GUARD** GUARD events + **EVENT_NEWCONSENSUS** NEWCONSENSUS events + **EVENT_NS** NS events + **EVENT_SIGNAL** SIGNAL events + **EVENT_STATUS** STATUS_GENERAL, STATUS_CLIENT, and STATUS_SERVER events + **EVENT_STREAM_BW** STREAM_BW events + **EVENT_TRANSPORT_LAUNCHED** TRANSPORT_LAUNCHED events + **EXTENDCIRCUIT_PATH_OPTIONAL** EXTENDCIRCUIT queries can omit the path if the circuit is zero + **FEATURE_EXTENDED_EVENTS** 'EXTENDED_EVENTS' optional feature + **FEATURE_VERBOSE_NAMES** 'VERBOSE_NAMES' optional feature + **GETINFO_CONFIG_TEXT** 'GETINFO config-text' query + **LOADCONF** LOADCONF requests + **MICRODESCRIPTOR_IS_DEFAULT** Tor gets microdescriptors by default rather than server descriptors + **TAKEOWNERSHIP** TAKEOWNERSHIP requests + **TORRC_CONTROL_SOCKET** 'ControlSocket ' config option + **TORRC_PORT_FORWARDING** 'PortForwarding' config option + **TORRC_DISABLE_DEBUGGER_ATTACHMENT** 'DisableDebuggerAttachment' config option + ===================================== =========== +""" + +import os +import re + +import stem.util.enum +import stem.util.system + +try: + # added in python 3.2 + from functools import lru_cache +except ImportError: + from stem.util.lru_cache import lru_cache + +# cache for the get_system_tor_version function +VERSION_CACHE = {} + + +def get_system_tor_version(tor_cmd = "tor"): + """ + Queries tor for its version. This is os dependent, only working on linux, + osx, and bsd. + + :param str tor_cmd: command used to run tor + + :returns: :class:`~stem.version.Version` provided by the tor command + + :raises: **IOError** if unable to query or parse the version + """ + + if not tor_cmd in VERSION_CACHE: + version_cmd = "%s --version" % tor_cmd + + try: + version_output = stem.util.system.call(version_cmd) + except OSError as exc: + # make the error message nicer if this is due to tor being unavialable + + if "No such file or directory" in str(exc): + if os.path.isabs(tor_cmd): + exc = "Unable to check tor's version. '%s' doesn't exist." % tor_cmd + else: + exc = "Unable to run '%s'. Mabye tor isn't in your PATH?" % version_cmd + + raise IOError(exc) + + if version_output: + # output example: + # Oct 21 07:19:27.438 [notice] Tor v0.2.1.30. This is experimental software. Do not rely on it for strong anonymity. (Running on Linux i686) + # Tor version 0.2.1.30. + + last_line = version_output[-1] + + if last_line.startswith("Tor version ") and last_line.endswith("."): + try: + version_str = last_line[12:-1] + VERSION_CACHE[tor_cmd] = Version(version_str) + except ValueError as exc: + raise IOError(exc) + else: + raise IOError("Unexpected response from '%s': %s" % (version_cmd, last_line)) + else: + raise IOError("'%s' didn't have any output" % version_cmd) + + return VERSION_CACHE[tor_cmd] + + +@lru_cache() +def _get_version(version_str): + return Version(version_str) + + +class Version(object): + """ + Comparable tor version. These are constructed from strings that conform to + the 'new' style in the `tor version-spec + `_, + such as "0.1.4" or "0.2.2.23-alpha (git-7dcd105be34a4f44)". + + :var int major: major version + :var int minor: minor version + :var int micro: micro version + :var int patch: patch level (**None** if undefined) + :var str status: status tag such as 'alpha' or 'beta-dev' (**None** if undefined) + :var str extra: extra information without its parentheses such as + 'git-8be6058d8f31e578' (**None** if undefined) + :var str git_commit: git commit id (**None** if it wasn't provided) + + :param str version_str: version to be parsed + + :raises: **ValueError** if input isn't a valid tor version + """ + + def __init__(self, version_str): + self.version_str = version_str + version_parts = re.match(r'^([0-9]+)\.([0-9]+)\.([0-9]+)(\.[0-9]+)?(-\S*)?( \(\S*\))?$', version_str) + self._hash = None + + if version_parts: + major, minor, micro, patch, status, extra = version_parts.groups() + + # The patch and status matches are optional (may be None) and have an extra + # proceeding period or dash if they exist. Stripping those off. + + if patch: + patch = int(patch[1:]) + + if status: + status = status[1:] + + if extra: + extra = extra[2:-1] + + self.major = int(major) + self.minor = int(minor) + self.micro = int(micro) + self.patch = patch + self.status = status + self.extra = extra + + if extra and re.match("^git-[0-9a-f]{16}$", extra): + self.git_commit = extra[4:] + else: + self.git_commit = None + else: + raise ValueError("'%s' isn't a properly formatted tor version" % version_str) + + def __str__(self): + """ + Provides the string used to construct the version. + """ + + return self.version_str + + def _compare(self, other, method): + """ + Compares version ordering according to the spec. + """ + + if not isinstance(other, Version): + return False + + for attr in ("major", "minor", "micro", "patch"): + my_version = getattr(self, attr) + other_version = getattr(other, attr) + + if my_version is None: + my_version = 0 + + if other_version is None: + other_version = 0 + + if my_version != other_version: + return method(my_version, other_version) + + # According to the version spec... + # + # If we *do* encounter two versions that differ only by status tag, we + # compare them lexically as ASCII byte strings. + + my_status = self.status if self.status else "" + other_status = other.status if other.status else "" + + return method(my_status, other_status) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __gt__(self, other): + """ + Checks if this version meets the requirements for a given feature. We can + be compared to either a :class:`~stem.version.Version` or + :class:`~stem.version._VersionRequirements`. + """ + + if isinstance(other, _VersionRequirements): + for rule in other.rules: + if rule(self): + return True + + return False + + return self._compare(other, lambda s, o: s > o) + + def __ge__(self, other): + if isinstance(other, _VersionRequirements): + for rule in other.rules: + if rule(self): + return True + + return False + + return self._compare(other, lambda s, o: s >= o) + + def __hash__(self): + if self._hash is None: + my_hash = 0 + + for attr in ("major", "minor", "micro", "patch", "status"): + my_hash *= 1024 + + attr_value = getattr(self, attr) + + if attr_value is not None: + my_hash += hash(attr_value) + + self._hash = my_hash + + return self._hash + + +class _VersionRequirements(object): + """ + Series of version constraints that can be compared to. For instance, this + allows for comparisons like 'if I'm greater than version X in the 0.2.2 + series, or greater than version Y in the 0.2.3 series'. + + This is a logical 'or' of the series of rules. + """ + + def __init__(self): + self.rules = [] + + def greater_than(self, version, inclusive = True): + """ + Adds a constraint that we're greater than the given version. + + :param stem.version.Version version: version we're checking against + :param bool inclusive: if comparison is inclusive or not + """ + + if inclusive: + self.rules.append(lambda v: version <= v) + else: + self.rules.append(lambda v: version < v) + + def less_than(self, version, inclusive = True): + """ + Adds a constraint that we're less than the given version. + + :param stem.version.Version version: version we're checking against + :param bool inclusive: if comparison is inclusive or not + """ + + if inclusive: + self.rules.append(lambda v: version >= v) + else: + self.rules.append(lambda v: version > v) + + def in_range(self, from_version, to_version, from_inclusive = True, to_inclusive = False): + """ + Adds constraint that we're within the range from one version to another. + + :param stem.version.Version from_version: beginning of the comparison range + :param stem.version.Version to_version: end of the comparison range + :param bool from_inclusive: if comparison is inclusive with the starting version + :param bool to_inclusive: if comparison is inclusive with the ending version + """ + + if from_inclusive and to_inclusive: + new_rule = lambda v: from_version <= v <= to_version + elif from_inclusive: + new_rule = lambda v: from_version <= v < to_version + else: + new_rule = lambda v: from_version < v < to_version + + self.rules.append(new_rule) + +safecookie_req = _VersionRequirements() +safecookie_req.in_range(Version("0.2.2.36"), Version("0.2.3.0")) +safecookie_req.greater_than(Version("0.2.3.13")) + +Requirement = stem.util.enum.Enum( + ("AUTH_SAFECOOKIE", safecookie_req), + ("EVENT_AUTHDIR_NEWDESCS", Version('0.1.1.10-alpha')), + ("EVENT_BUILDTIMEOUT_SET", Version('0.2.2.7-alpha')), + ("EVENT_CIRC_MINOR", Version('0.2.3.11-alpha')), + ("EVENT_CLIENTS_SEEN", Version('0.2.1.10-alpha')), + ("EVENT_CONF_CHANGED", Version('0.2.3.3-alpha')), + ("EVENT_DESCCHANGED", Version('0.1.2.2-alpha')), + ("EVENT_GUARD", Version('0.1.2.5-alpha')), + ("EVENT_NS", Version('0.1.2.3-alpha')), + ("EVENT_NEWCONSENSUS", Version('0.2.1.13-alpha')), + ("EVENT_SIGNAL", Version('0.2.3.1-alpha')), + ("EVENT_STATUS", Version('0.1.2.3-alpha')), + ("EVENT_STREAM_BW", Version('0.1.2.8-beta')), + ("EVENT_TRANSPORT_LAUNCHED", Version('0.2.5.0-alpha')), + ("EXTENDCIRCUIT_PATH_OPTIONAL", Version("0.2.2.9")), + ("FEATURE_EXTENDED_EVENTS", Version("0.2.2.1-alpha")), + ("FEATURE_VERBOSE_NAMES", Version("0.2.2.1-alpha")), + ("GETINFO_CONFIG_TEXT", Version("0.2.2.7-alpha")), + ("LOADCONF", Version("0.2.1.1")), + ("MICRODESCRIPTOR_IS_DEFAULT", Version("0.2.3.3")), + ("TAKEOWNERSHIP", Version("0.2.2.28-beta")), + ("TORRC_CONTROL_SOCKET", Version("0.2.0.30")), + ("TORRC_PORT_FORWARDING", Version("0.2.3.1-alpha")), + ("TORRC_DISABLE_DEBUGGER_ATTACHMENT", Version("0.2.3.9")), +)