added stem python library

This commit is contained in:
Micah Lee 2014-05-21 14:09:41 -04:00
parent 8ffa569094
commit 619ab6db0f
37 changed files with 19032 additions and 0 deletions

19
lib/stem/util/__init__.py Normal file
View file

@ -0,0 +1,19 @@
# Copyright 2011-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Utility functions used by the stem library.
"""
__all__ = [
"conf",
"connection",
"enum",
"log",
"lru_cache",
"ordereddict",
"proc",
"system",
"term",
"tor_tools",
]

673
lib/stem/util/conf.py Normal file
View file

@ -0,0 +1,673 @@
# Copyright 2011-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Handlers for text configuration files. Configurations are simple string to
string mappings, with the configuration files using the following rules...
* the key/value is separated by a space
* anything after a "#" is ignored as a comment
* excess whitespace is trimmed
* empty lines are ignored
* multi-line values can be defined by following the key with lines starting
with a '|'
For instance...
::
# This is my sample config
user.name Galen
user.password yabba1234 # here's an inline comment
user.notes takes a fancy to pepperjack cheese
blankEntry.example
msg.greeting
|Multi-line message exclaiming of the
|wonder and awe that is pepperjack!
... would be loaded as...
::
config = {
"user.name": "Galen",
"user.password": "yabba1234",
"user.notes": "takes a fancy to pepperjack cheese",
"blankEntry.example": "",
"msg.greeting": "Multi-line message exclaiming of the\\nwonder and awe that is pepperjack!",
}
Configurations are managed via the :class:`~stem.util.conf.Config` class. The
:class:`~stem.util.conf.Config` can be be used directly with its
:func:`~stem.util.conf.Config.get` and :func:`~stem.util.conf.Config.set`
methods, but usually modules will want a local dictionary with just the
configurations that it cares about.
To do this use the :func:`~stem.util.conf.config_dict` function. For example...
::
import getpass
from stem.util import conf, connection
def config_validator(key, value):
if key == "timeout":
# require at least a one second timeout
return max(1, value)
elif key == "endpoint":
if not connection.is_valid_ipv4_address(value):
raise ValueError("'%s' isn't a valid IPv4 address" % value)
elif key == "port":
if not connection.is_valid_port(value):
raise ValueError("'%s' isn't a valid port" % value)
elif key == "retries":
# negative retries really don't make sense
return max(0, value)
CONFIG = conf.config_dict("ssh_login", {
"username": getpass.getuser(),
"password": "",
"timeout": 10,
"endpoint": "263.12.8.0",
"port": 22,
"reconnect": False,
"retries": 3,
}, config_validator)
There's several things going on here so lets take it step by step...
* The :func:`~stem.util.conf.config_dict` provides a dictionary that's bound
to a given configuration. If the "ssh_proxy_config" configuration changes
then so will the contents of CONFIG.
* The dictionary we're passing to :func:`~stem.util.conf.config_dict` provides
two important pieces of information: default values and their types. See the
Config's :func:`~stem.util.conf.Config.get` method for how these type
inferences work.
* The config_validator is a hook we're adding to make sure CONFIG only gets
values we think are valid. In this case it ensures that our timeout value
is at least one second, and rejects endpoints or ports that are invalid.
Now lets say our user has the following configuration file...
::
username waddle_doo
password jabberwocky
timeout -15
port 9000000
retries lots
reconnect true
logging debug
... and we load it as follows...
::
>>> from from stem.util import conf
>>> our_config = conf.get_config("ssh_login")
>>> our_config.load("/home/atagar/user_config")
>>> print CONFIG
{
"username": "waddle_doo",
"password": "jabberwocky",
"timeout": 1,
"endpoint": "263.12.8.0",
"port": 22,
"reconnect": True,
"retries": 3,
}
Here's an expanation of what happened...
* the username, password, and reconnect attributes took the values in the
configuration file
* the 'config_validator' we added earlier allows for a minimum timeout of one
and rejected the invalid port (with a log message)
* we weren't able to convert the retries' "lots" value to an integer so it kept
its default value and logged a warning
* the user didn't supply an endpoint so that remained unchanged
* our CONFIG didn't have a 'logging' attribute so it was ignored
**Module Overview:**
::
config_dict - provides a dictionary that's kept in sync with our config
get_config - singleton for getting configurations
parse_enum_csv - helper funcion for parsing confguration entries for enums
Config - Custom configuration
|- load - reads a configuration file
|- save - writes the current configuration to a file
|- clear - empties our loaded configuration contents
|- add_listener - notifies the given listener when an update occurs
|- clear_listeners - removes any attached listeners
|- keys - provides keys in the loaded configuration
|- set - sets the given key/value pair
|- unused_keys - provides keys that have never been requested
|- get - provides the value for a given key, with type inference
+- get_value - provides the value for a given key as a string
"""
import threading
from stem.util import log
CONFS = {} # mapping of identifier to singleton instances of configs
class _SyncListener(object):
def __init__(self, config_dict, interceptor):
self.config_dict = config_dict
self.interceptor = interceptor
def update(self, config, key):
if key in self.config_dict:
new_value = config.get(key, self.config_dict[key])
if new_value == self.config_dict[key]:
return # no change
if self.interceptor:
interceptor_value = self.interceptor(key, new_value)
if interceptor_value:
new_value = interceptor_value
self.config_dict[key] = new_value
def config_dict(handle, conf_mappings, handler = None):
"""
Makes a dictionary that stays synchronized with a configuration.
This takes a dictionary of 'config_key => default_value' mappings and
changes the values to reflect our current configuration. This will leave
the previous values alone if...
* we don't have a value for that config_key
* we can't convert our value to be the same type as the default_value
If a handler is provided then this is called just prior to assigning new
values to the config_dict. The handler function is expected to accept the
(key, value) for the new values and return what we should actually insert
into the dictionary. If this returns None then the value is updated as
normal.
For more information about how we convert types see our
:func:`~stem.util.conf.Config.get` method.
**The dictionary you get from this is manged by the
:class:`~stem.util.conf.Config` class and should be treated as being
read-only.**
:param str handle: unique identifier for a config instance
:param dict conf_mappings: config key/value mappings used as our defaults
:param functor handler: function referred to prior to assigning values
"""
selected_config = get_config(handle)
selected_config.add_listener(_SyncListener(conf_mappings, handler).update)
return conf_mappings
def get_config(handle):
"""
Singleton constructor for configuration file instances. If a configuration
already exists for the handle then it's returned. Otherwise a fresh instance
is constructed.
:param str handle: unique identifier used to access this config instance
"""
if not handle in CONFS:
CONFS[handle] = Config()
return CONFS[handle]
def parse_enum(key, value, enumeration):
"""
Provides the enumeration value for a given key. This is a case insensitive
lookup and raises an exception if the enum key doesn't exist.
:param str key: configuration key being looked up
:param str value: value to be parsed
:param stem.util.enum.Enum enumeration: enumeration the values should be in
:returns: enumeration value
:raises: **ValueError** if the **value** isn't among the enumeration keys
"""
return parse_enum_csv(key, value, enumeration, 1)[0]
def parse_enum_csv(key, value, enumeration, count = None):
"""
Parses a given value as being a comma separated listing of enumeration keys,
returning the corresponding enumeration values. This is intended to be a
helper for config handlers. The checks this does are case insensitive.
The **count** attribute can be used to make assertions based on the number of
values. This can be...
* None to indicate that there's no restrictions.
* An int to indicate that we should have this many values.
* An (int, int) tuple to indicate the range that values can be in. This range
is inclusive and either can be None to indicate the lack of a lower or
upper bound.
:param str key: configuration key being looked up
:param str value: value to be parsed
:param stem.util.enum.Enum enumeration: enumeration the values should be in
:param int,tuple count: validates that we have this many items
:returns: list with the enumeration values
:raises: **ValueError** if the count assertion fails or the **value** entries
don't match the enumeration keys
"""
values = [val.upper().strip() for val in value.split(',')]
if values == ['']:
return []
if count is None:
pass # no count validateion checks to do
elif isinstance(count, int):
if len(values) != count:
raise ValueError("Config entry '%s' is expected to be %i comma separated values, got '%s'" % (key, count, value))
elif isinstance(count, tuple) and len(count) == 2:
minimum, maximum = count
if minimum is not None and len(values) < minimum:
raise ValueError("Config entry '%s' must have at least %i comma separated values, got '%s'" % (key, minimum, value))
if maximum is not None and len(values) > maximum:
raise ValueError("Config entry '%s' can have at most %i comma separated values, got '%s'" % (key, maximum, value))
else:
raise ValueError("The count must be None, an int, or two value tuple. Got '%s' (%s)'" % (count, type(count)))
result = []
enum_keys = [k.upper() for k in enumeration.keys()]
enum_values = list(enumeration)
for val in values:
if val in enum_keys:
result.append(enum_values[enum_keys.index(val)])
else:
raise ValueError("The '%s' entry of config entry '%s' wasn't in the enumeration (expected %s)" % (val, key, ', '.join(enum_keys)))
return result
class Config(object):
"""
Handler for easily working with custom configurations, providing persistence
to and from files. All operations are thread safe.
**Example usage:**
User has a file at '/home/atagar/myConfig' with...
::
destination.ip 1.2.3.4
destination.port blarg
startup.run export PATH=$PATH:~/bin
startup.run alias l=ls
And they have a script with...
::
from stem.util import conf
# Configuration values we'll use in this file. These are mappings of
# configuration keys to the default values we'll use if the user doesn't
# have something different in their config file (or it doesn't match this
# type).
ssh_config = conf.config_dict("ssh_login", {
"login.user": "atagar",
"login.password": "pepperjack_is_awesome!",
"destination.ip": "127.0.0.1",
"destination.port": 22,
"startup.run": [],
})
# Makes an empty config instance with the handle of 'ssh_login'. This is
# a singleton so other classes can fetch this same configuration from
# this handle.
user_config = conf.get_config("ssh_login")
# Loads the user's configuration file, warning if this fails.
try:
user_config.load("/home/atagar/myConfig")
except IOError as exc:
print "Unable to load the user's config: %s" % exc
# This replace the contents of ssh_config with the values from the user's
# config file if...
#
# * the key is present in the config file
# * we're able to convert the configuration file's value to the same type
# as what's in the mapping (see the Config.get() method for how these
# type inferences work)
#
# For instance in this case...
#
# * the login values are left alone because they aren't in the user's
# config file
#
# * the 'destination.port' is also left with the value of 22 because we
# can't turn "blarg" into an integer
#
# The other values are replaced, so ssh_config now becomes...
#
# {"login.user": "atagar",
# "login.password": "pepperjack_is_awesome!",
# "destination.ip": "1.2.3.4",
# "destination.port": 22,
# "startup.run": ["export PATH=$PATH:~/bin", "alias l=ls"]}
#
# Information for what values fail to load and why are reported to
# 'stem.util.log'.
"""
def __init__(self):
self._path = None # location we last loaded from or saved to
self._contents = {} # configuration key/value pairs
self._listeners = [] # functors to be notified of config changes
# used for accessing _contents
self._contents_lock = threading.RLock()
# keys that have been requested (used to provide unused config contents)
self._requested_keys = set()
def load(self, path = None):
"""
Reads in the contents of the given path, adding its configuration values
to our current contents.
:param str path: file path to be loaded, this uses the last loaded path if
not provided
:raises:
* **IOError** if we fail to read the file (it doesn't exist, insufficient
permissions, etc)
* **ValueError** if no path was provided and we've never been provided one
"""
if path:
self._path = path
elif not self._path:
raise ValueError("Unable to load configuration: no path provided")
with open(self._path, "r") as config_file:
read_contents = config_file.readlines()
with self._contents_lock:
while read_contents:
line = read_contents.pop(0)
# strips any commenting or excess whitespace
comment_start = line.find("#")
if comment_start != -1:
line = line[:comment_start]
line = line.strip()
# parse the key/value pair
if line:
try:
key, value = line.split(" ", 1)
value = value.strip()
except ValueError:
log.debug("Config entry '%s' is expected to be of the format 'Key Value', defaulting to '%s' -> ''" % (line, line))
key, value = line, ""
if not value:
# this might be a multi-line entry, try processing it as such
multiline_buffer = []
while read_contents and read_contents[0].lstrip().startswith("|"):
content = read_contents.pop(0).lstrip()[1:] # removes '\s+|' prefix
content = content.rstrip("\n") # trailing newline
multiline_buffer.append(content)
if multiline_buffer:
self.set(key, "\n".join(multiline_buffer), False)
continue
self.set(key, value, False)
def save(self, path = None):
"""
Saves configuration contents to disk. If a path is provided then it
replaces the configuration location that we track.
:param str path: location to be saved to
:raises: **ValueError** if no path was provided and we've never been provided one
"""
if path:
self._path = path
elif not self._path:
raise ValueError("Unable to save configuration: no path provided")
with self._contents_lock:
with open(self._path, 'w') as output_file:
for entry_key in sorted(self.keys()):
for entry_value in self.get_value(entry_key, multiple = True):
# check for multi line entries
if "\n" in entry_value:
entry_value = "\n|" + entry_value.replace("\n", "\n|")
output_file.write('%s %s\n' % (entry_key, entry_value))
def clear(self):
"""
Drops the configuration contents and reverts back to a blank, unloaded
state.
"""
with self._contents_lock:
self._contents.clear()
self._requested_keys = set()
def add_listener(self, listener, backfill = True):
"""
Registers the function to be notified of configuration updates. Listeners
are expected to be functors which accept (config, key).
:param functor listener: function to be notified when our configuration is changed
:param bool backfill: calls the function with our current values if **True**
"""
with self._contents_lock:
self._listeners.append(listener)
if backfill:
for key in self.keys():
listener(self, key)
def clear_listeners(self):
"""
Removes all attached listeners.
"""
self._listeners = []
def keys(self):
"""
Provides all keys in the currently loaded configuration.
:returns: **list** if strings for the configuration keys we've loaded
"""
return self._contents.keys()
def unused_keys(self):
"""
Provides the configuration keys that have never been provided to a caller
via :func:`~stem.util.conf.config_dict` or the
:func:`~stem.util.conf.Config.get` and
:func:`~stem.util.conf.Config.get_value` methods.
:returns: **set** of configuration keys we've loaded but have never been requested
"""
return set(self.keys()).difference(self._requested_keys)
def set(self, key, value, overwrite = True):
"""
Appends the given key/value configuration mapping, behaving the same as if
we'd loaded this from a configuration file.
:param str key: key for the configuration mapping
:param str,list value: value we're setting the mapping to
:param bool overwrite: replaces the previous value if **True**, otherwise
the values are appended
"""
with self._contents_lock:
if isinstance(value, str):
if not overwrite and key in self._contents:
self._contents[key].append(value)
else:
self._contents[key] = [value]
for listener in self._listeners:
listener(self, key)
elif isinstance(value, (list, tuple)):
if not overwrite and key in self._contents:
self._contents[key] += value
else:
self._contents[key] = value
for listener in self._listeners:
listener(self, key)
else:
raise ValueError("Config.set() only accepts str, list, or tuple. Provided value was a '%s'" % type(value))
def get(self, key, default = None):
"""
Fetches the given configuration, using the key and default value to
determine the type it should be. Recognized inferences are:
* **default is a boolean => boolean**
* values are case insensitive
* provides the default if the value isn't "true" or "false"
* **default is an integer => int**
* provides the default if the value can't be converted to an int
* **default is a float => float**
* provides the default if the value can't be converted to a float
* **default is a list => list**
* string contents for all configuration values with this key
* **default is a tuple => tuple**
* string contents for all configuration values with this key
* **default is a dictionary => dict**
* values without "=>" in them are ignored
* values are split into key/value pairs on "=>" with extra whitespace
stripped
:param str key: config setting to be fetched
:param default object: value provided if no such key exists or fails to be converted
:returns: given configuration value with its type inferred with the above rules
"""
is_multivalue = isinstance(default, (list, tuple, dict))
val = self.get_value(key, default, is_multivalue)
if val == default:
return val # don't try to infer undefined values
if isinstance(default, bool):
if val.lower() == "true":
val = True
elif val.lower() == "false":
val = False
else:
log.debug("Config entry '%s' is expected to be a boolean, defaulting to '%s'" % (key, str(default)))
val = default
elif isinstance(default, int):
try:
val = int(val)
except ValueError:
log.debug("Config entry '%s' is expected to be an integer, defaulting to '%i'" % (key, default))
val = default
elif isinstance(default, float):
try:
val = float(val)
except ValueError:
log.debug("Config entry '%s' is expected to be a float, defaulting to '%f'" % (key, default))
val = default
elif isinstance(default, list):
pass # nothing special to do (already a list)
elif isinstance(default, tuple):
val = tuple(val)
elif isinstance(default, dict):
valMap = {}
for entry in val:
if "=>" in entry:
entryKey, entryVal = entry.split("=>", 1)
valMap[entryKey.strip()] = entryVal.strip()
else:
log.debug("Ignoring invalid %s config entry (expected a mapping, but \"%s\" was missing \"=>\")" % (key, entry))
val = valMap
return val
def get_value(self, key, default = None, multiple = False):
"""
This provides the current value associated with a given key.
:param str key: config setting to be fetched
:param object default: value provided if no such key exists
:param bool multiple: provides back a list of all values if **True**,
otherwise this returns the last loaded configuration value
:returns: **str** or **list** of string configuration values associated
with the given key, providing the default if no such key exists
"""
with self._contents_lock:
if key in self._contents:
self._requested_keys.add(key)
if multiple:
return self._contents[key]
else:
return self._contents[key][-1]
else:
message_id = "stem.util.conf.missing_config_key_%s" % key
log.log_once(message_id, log.TRACE, "config entry '%s' not found, defaulting to '%s'" % (key, default))
return default

562
lib/stem/util/connection.py Normal file
View file

@ -0,0 +1,562 @@
# Copyright 2012-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Connection and networking based utility functions.
::
get_connections - quieries the connections belonging to a given process
get_system_resolvers - provides connection resolution methods that are likely to be available
is_valid_ipv4_address - checks if a string is a valid IPv4 address
is_valid_ipv6_address - checks if a string is a valid IPv6 address
is_valid_port - checks if something is a valid representation for a port
is_private_address - checks if an IPv4 address belongs to a private range or not
expand_ipv6_address - provides an IPv6 address with its collapsed portions expanded
get_mask_ipv4 - provides the mask representation for a given number of bits
get_mask_ipv6 - provides the IPv6 mask representation for a given number of bits
.. data:: Resolver (enum)
Method for resolving a process' connections.
================= ===========
Resolver Description
================= ===========
**PROC** /proc contents
**NETSTAT** netstat command
**SS** ss command
**LSOF** lsof command
**SOCKSTAT** sockstat command under *nix
**BSD_SOCKSTAT** sockstat command under FreeBSD
**BSD_PROCSTAT** procstat command under FreeBSD
================= ===========
"""
import collections
import hashlib
import hmac
import os
import platform
import re
import stem.util.proc
import stem.util.system
from stem.util import enum, log
# Connection resolution is risky to log about since it's highly likely to
# contain sensitive information. That said, it's also difficult to get right in
# a platform independent fashion. To opt into the logging requried to
# troubleshoot connection resolution set the following...
LOG_CONNECTION_RESOLUTION = False
Resolver = enum.Enum(
('PROC', 'proc'),
('NETSTAT', 'netstat'),
('SS', 'ss'),
('LSOF', 'lsof'),
('SOCKSTAT', 'sockstat'),
('BSD_SOCKSTAT', 'sockstat (bsd)'),
('BSD_PROCSTAT', 'procstat (bsd)')
)
Connection = collections.namedtuple('Connection', [
'local_address',
'local_port',
'remote_address',
'remote_port',
'protocol',
])
FULL_IPv4_MASK = "255.255.255.255"
FULL_IPv6_MASK = "FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF"
CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE = os.urandom(32)
RESOLVER_COMMAND = {
Resolver.PROC: '',
# -n = prevents dns lookups, -p = include process
Resolver.NETSTAT: 'netstat -np',
# -n = numeric ports, -p = include process, -t = tcp sockets, -u = udp sockets
Resolver.SS: 'ss -nptu',
# -n = prevent dns lookups, -P = show port numbers (not names), -i = ip only, -w = no warnings
# (lsof provides a '-p <pid>' but oddly in practice it seems to be ~11-28% slower)
Resolver.LSOF: 'lsof -wnPi',
Resolver.SOCKSTAT: 'sockstat',
# -4 = IPv4, -c = connected sockets
Resolver.BSD_SOCKSTAT: 'sockstat -4c',
# -f <pid> = process pid
Resolver.BSD_PROCSTAT: 'procstat -f {pid}',
}
RESOLVER_FILTER = {
Resolver.PROC: '',
# tcp 0 586 192.168.0.1:44284 38.229.79.2:443 ESTABLISHED 15843/tor
Resolver.NETSTAT: '^{protocol}\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED\s+{pid}/{name}\s*$',
# tcp ESTAB 0 0 192.168.0.20:44415 38.229.79.2:443 users:(("tor",15843,9))
Resolver.SS: '^{protocol}\s+ESTAB\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+users:\(\("{name}",{pid},[0-9]+\)\)$',
# tor 3873 atagar 45u IPv4 40994 0t0 TCP 10.243.55.20:45724->194.154.227.109:9001 (ESTABLISHED)
Resolver.LSOF: '^{name}\s+{pid}\s+.*\s+{protocol}\s+{local_address}:{local_port}->{remote_address}:{remote_port} \(ESTABLISHED\)$',
# atagar tor 15843 tcp4 192.168.0.20:44092 68.169.35.102:443 ESTABLISHED
Resolver.SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED$',
# _tor tor 4397 12 tcp4 172.27.72.202:54011 127.0.0.1:9001
Resolver.BSD_SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+\S+\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
# 3561 tor 4 s - rw---n-- 2 0 TCP 10.0.0.2:9050 10.0.0.1:22370
Resolver.BSD_PROCSTAT: '^\s*{pid}\s+{name}\s+.*\s+{protocol}\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
}
def get_connections(resolver, process_pid = None, process_name = None):
"""
Retrieves a list of the current connections for a given process. The provides
a list of Connection instances, which have four attributes...
* local_address (str)
* local_port (int)
* remote_address (str)
* remote_port (int)
* protocol (str, generally either 'tcp' or 'udp')
:param Resolver resolver: method of connection resolution to use
:param int process_pid: pid of the process to retrieve
:param str process_name: name of the process to retrieve
:raises:
* **ValueError** if using **Resolver.PROC** or **Resolver.BSD_PROCSTAT**
and the process_pid wasn't provided
* **IOError** if no connections are available or resolution fails
(generally they're indistinguishable). The common causes are the
command being unavailable or permissions.
"""
def _log(msg):
if LOG_CONNECTION_RESOLUTION:
log.debug(msg)
_log("=" * 80)
_log("Querying connections for resolver: %s, pid: %s, name: %s" % (resolver, process_pid, process_name))
if isinstance(process_pid, str):
try:
process_pid = int(process_pid)
except ValueError:
raise ValueError("Process pid was non-numeric: %s" % process_pid)
if process_pid is None and resolver in (Resolver.PROC, Resolver.BSD_PROCSTAT):
raise ValueError("%s resolution requires a pid" % resolver)
if resolver == Resolver.PROC:
return [Connection(*conn) for conn in stem.util.proc.get_connections(process_pid)]
resolver_command = RESOLVER_COMMAND[resolver].format(pid = process_pid)
try:
results = stem.util.system.call(resolver_command)
except OSError as exc:
raise IOError("Unable to query '%s': %s" % (resolver_command, exc))
resolver_regex_str = RESOLVER_FILTER[resolver].format(
protocol = '(?P<protocol>\S+)',
local_address = '(?P<local_address>[0-9.]+)',
local_port = '(?P<local_port>[0-9]+)',
remote_address = '(?P<remote_address>[0-9.]+)',
remote_port = '(?P<remote_port>[0-9]+)',
pid = process_pid if process_pid else '[0-9]*',
name = process_name if process_name else '\S*',
)
_log("Resolver regex: %s" % resolver_regex_str)
_log("Resolver results:\n%s" % '\n'.join(results))
connections = []
resolver_regex = re.compile(resolver_regex_str)
for line in results:
match = resolver_regex.match(line)
if match:
attr = match.groupdict()
local_addr = attr['local_address']
local_port = int(attr['local_port'])
remote_addr = attr['remote_address']
remote_port = int(attr['remote_port'])
protocol = attr['protocol'].lower()
if remote_addr == '0.0.0.0':
continue # procstat response for unestablished connections
if not (is_valid_ipv4_address(local_addr) and is_valid_ipv4_address(remote_addr)):
_log("Invalid address (%s or %s): %s" % (local_addr, remote_addr, line))
elif not (is_valid_port(local_port) and is_valid_port(remote_port)):
_log("Invalid port (%s or %s): %s" % (local_port, remote_port, line))
elif protocol not in ('tcp', 'udp'):
_log("Unrecognized protocol (%s): %s" % (protocol, line))
conn = Connection(local_addr, local_port, remote_addr, remote_port, protocol)
connections.append(conn)
_log(str(conn))
_log("%i connections found" % len(connections))
if not connections:
raise IOError("No results found using: %s" % resolver_command)
return connections
def get_system_resolvers(system = None):
"""
Provides the types of connection resolvers likely to be available on this platform.
:param str system: system to get resolvers for, this is determined by
platform.system() if not provided
:returns: **list** of Resolvers likely to be available on this platform
"""
if system is None:
system = platform.system()
if system == 'Windows':
resolvers = []
elif system in ('Darwin', 'OpenBSD'):
resolvers = [Resolver.LSOF]
elif system == 'FreeBSD':
# Netstat is available, but lacks a '-p' equivilant so we can't associate
# the results to processes. The platform also has a ss command, but it
# belongs to a spreadsheet application.
resolvers = [Resolver.BSD_SOCKSTAT, Resolver.BSD_PROCSTAT, Resolver.LSOF]
else:
# Sockstat isn't available by default on ubuntu.
resolvers = [Resolver.NETSTAT, Resolver.SOCKSTAT, Resolver.LSOF, Resolver.SS]
# remove any that aren't in the user's PATH
resolvers = filter(lambda r: stem.util.system.is_available(RESOLVER_COMMAND[r]), resolvers)
# proc resolution, by far, outperforms the others so defaults to this is able
if stem.util.proc.is_available():
resolvers = [Resolver.PROC] + resolvers
return resolvers
def is_valid_ipv4_address(address):
"""
Checks if a string is a valid IPv4 address.
:param str address: string to be checked
:returns: **True** if input is a valid IPv4 address, **False** otherwise
"""
if not isinstance(address, (bytes, unicode)):
return False
# checks if theres four period separated values
if address.count(".") != 3:
return False
# checks that each value in the octet are decimal values between 0-255
for entry in address.split("."):
if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
return False
elif entry[0] == "0" and len(entry) > 1:
return False # leading zeros, for instance in "1.2.3.001"
return True
def is_valid_ipv6_address(address, allow_brackets = False):
"""
Checks if a string is a valid IPv6 address.
:param str address: string to be checked
:param bool allow_brackets: ignore brackets which form '[address]'
:returns: **True** if input is a valid IPv6 address, **False** otherwise
"""
if allow_brackets:
if address.startswith("[") and address.endswith("]"):
address = address[1:-1]
# addresses are made up of eight colon separated groups of four hex digits
# with leading zeros being optional
# https://en.wikipedia.org/wiki/IPv6#Address_format
colon_count = address.count(":")
if colon_count > 7:
return False # too many groups
elif colon_count != 7 and not "::" in address:
return False # not enough groups and none are collapsed
elif address.count("::") > 1 or ":::" in address:
return False # multiple groupings of zeros can't be collapsed
for entry in address.split(":"):
if not re.match("^[0-9a-fA-f]{0,4}$", entry):
return False
return True
def is_valid_port(entry, allow_zero = False):
"""
Checks if a string or int is a valid port number.
:param list,str,int entry: string, integer or list to be checked
:param bool allow_zero: accept port number of zero (reserved by definition)
:returns: **True** if input is an integer and within the valid port range, **False** otherwise
"""
if isinstance(entry, list):
for port in entry:
if not is_valid_port(port, allow_zero):
return False
return True
elif isinstance(entry, (bytes, unicode)):
if not entry.isdigit():
return False
elif entry[0] == "0" and len(entry) > 1:
return False # leading zeros, ex "001"
entry = int(entry)
if allow_zero and entry == 0:
return True
return entry > 0 and entry < 65536
def is_private_address(address):
"""
Checks if the IPv4 address is in a range belonging to the local network or
loopback. These include:
* Private ranges: 10.*, 172.16.* - 172.31.*, 192.168.*
* Loopback: 127.*
:param str address: string to be checked
:returns: **True** if input is in a private range, **False** otherwise
:raises: **ValueError** if the address isn't a valid IPv4 address
"""
if not is_valid_ipv4_address(address):
raise ValueError("'%s' isn't a valid IPv4 address" % address)
# checks for any of the simple wildcard ranges
if address.startswith("10.") or address.startswith("192.168.") or address.startswith("127."):
return True
# checks for the 172.16.* - 172.31.* range
if address.startswith("172."):
second_octet = int(address.split('.')[1])
if second_octet >= 16 and second_octet <= 31:
return True
return False
def expand_ipv6_address(address):
"""
Expands abbreviated IPv6 addresses to their full colon separated hex format.
For instance...
::
>>> expand_ipv6_address("2001:db8::ff00:42:8329")
"2001:0db8:0000:0000:0000:ff00:0042:8329"
>>> expand_ipv6_address("::")
"0000:0000:0000:0000:0000:0000:0000:0000"
:param str address: IPv6 address to be expanded
:raises: **ValueError** if the address can't be expanded due to being malformed
"""
if not is_valid_ipv6_address(address):
raise ValueError("'%s' isn't a valid IPv6 address" % address)
# expands collapsed groupings, there can only be a single '::' in a valid
# address
if "::" in address:
missing_groups = 7 - address.count(":")
address = address.replace("::", "::" + ":" * missing_groups)
# inserts missing zeros
for index in xrange(8):
start = index * 5
end = address.index(":", start) if index != 7 else len(address)
missing_zeros = 4 - (end - start)
if missing_zeros > 0:
address = address[:start] + "0" * missing_zeros + address[start:]
return address
def get_mask_ipv4(bits):
"""
Provides the IPv4 mask for a given number of bits, in the dotted-quad format.
:param int bits: number of bits to be converted
:returns: **str** with the subnet mask representation for this many bits
:raises: **ValueError** if given a number of bits outside the range of 0-32
"""
if bits > 32 or bits < 0:
raise ValueError("A mask can only be 0-32 bits, got %i" % bits)
elif bits == 32:
return FULL_IPv4_MASK
# get the binary representation of the mask
mask_bin = _get_binary(2 ** bits - 1, 32)[::-1]
# breaks it into eight character groupings
octets = [mask_bin[8 * i:8 * (i + 1)] for i in xrange(4)]
# converts each octet into its integer value
return ".".join([str(int(octet, 2)) for octet in octets])
def get_mask_ipv6(bits):
"""
Provides the IPv6 mask for a given number of bits, in the hex colon-delimited
format.
:param int bits: number of bits to be converted
:returns: **str** with the subnet mask representation for this many bits
:raises: **ValueError** if given a number of bits outside the range of 0-128
"""
if bits > 128 or bits < 0:
raise ValueError("A mask can only be 0-128 bits, got %i" % bits)
elif bits == 128:
return FULL_IPv6_MASK
# get the binary representation of the mask
mask_bin = _get_binary(2 ** bits - 1, 128)[::-1]
# breaks it into sixteen character groupings
groupings = [mask_bin[16 * i:16 * (i + 1)] for i in xrange(8)]
# converts each group into its hex value
return ":".join(["%04x" % int(group, 2) for group in groupings]).upper()
def _get_masked_bits(mask):
"""
Provides the number of bits that an IPv4 subnet mask represents. Note that
not all masks can be represented by a bit count.
:param str mask: mask to be converted
:returns: **int** with the number of bits represented by the mask
:raises: **ValueError** if the mask is invalid or can't be converted
"""
if not is_valid_ipv4_address(mask):
raise ValueError("'%s' is an invalid subnet mask" % mask)
# converts octets to binary representation
mask_bin = _get_address_binary(mask)
mask_match = re.match("^(1*)(0*)$", mask_bin)
if mask_match:
return 32 - len(mask_match.groups()[1])
else:
raise ValueError("Unable to convert mask to a bit count: %s" % mask)
def _get_binary(value, bits):
"""
Provides the given value as a binary string, padded with zeros to the given
number of bits.
:param int value: value to be converted
:param int bits: number of bits to pad to
"""
# http://www.daniweb.com/code/snippet216539.html
return "".join([str((value >> y) & 1) for y in range(bits - 1, -1, -1)])
def _get_address_binary(address):
"""
Provides the binary value for an IPv4 or IPv6 address.
:returns: **str** with the binary representation of this address
:raises: **ValueError** if address is neither an IPv4 nor IPv6 address
"""
if is_valid_ipv4_address(address):
return "".join([_get_binary(int(octet), 8) for octet in address.split(".")])
elif is_valid_ipv6_address(address):
address = expand_ipv6_address(address)
return "".join([_get_binary(int(grouping, 16), 16) for grouping in address.split(":")])
else:
raise ValueError("'%s' is neither an IPv4 or IPv6 address" % address)
def _hmac_sha256(key, msg):
"""
Generates a sha256 digest using the given key and message.
:param str key: starting key for the hash
:param str msg: message to be hashed
:returns: sha256 digest of msg as bytes, hashed using the given key
"""
return hmac.new(key, msg, hashlib.sha256).digest()
def _cryptovariables_equal(x, y):
"""
Compares two strings for equality securely.
:param str x: string to be compared.
:param str y: the other string to be compared.
:returns: **True** if both strings are equal, **False** otherwise.
"""
return (
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, x) ==
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, y))

170
lib/stem/util/enum.py Normal file
View file

@ -0,0 +1,170 @@
# Copyright 2011-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Basic enumeration, providing ordered types for collections. These can be
constructed as simple type listings...
::
>>> from stem.util import enum
>>> insects = enum.Enum("ANT", "WASP", "LADYBUG", "FIREFLY")
>>> insects.ANT
'Ant'
>>> tuple(insects)
('Ant', 'Wasp', 'Ladybug', 'Firefly')
... or with overwritten string counterparts...
::
>>> from stem.util import enum
>>> pets = enum.Enum(("DOG", "Skippy"), "CAT", ("FISH", "Nemo"))
>>> pets.DOG
'Skippy'
>>> pets.CAT
'Cat'
**Module Overview:**
::
UppercaseEnum - Provides an enum instance with capitalized values
Enum - Provides a basic, ordered enumeration
|- keys - string representation of our enum keys
|- index_of - index of an enum value
|- next - provides the enum after a given enum value
|- previous - provides the enum before a given value
|- __getitem__ - provides the value for an enum key
+- __iter__ - iterator over our enum keys
"""
import stem.util.str_tools
def UppercaseEnum(*args):
"""
Provides an :class:`~stem.util.enum.Enum` instance where the values are
identical to the keys. Since the keys are uppercase by convention this means
the values are too. For instance...
::
>>> from stem.util import enum
>>> runlevels = enum.UppercaseEnum("DEBUG", "INFO", "NOTICE", "WARN", "ERROR")
>>> runlevels.DEBUG
'DEBUG'
:param list args: enum keys to initialize with
:returns: :class:`~stem.util.enum.Enum` instance with the given keys
"""
return Enum(*[(v, v) for v in args])
class Enum(object):
"""
Basic enumeration.
"""
def __init__(self, *args):
# ordered listings of our keys and values
keys, values = [], []
for entry in args:
if isinstance(entry, (bytes, unicode)):
key, val = entry, stem.util.str_tools._to_camel_case(entry)
elif isinstance(entry, tuple) and len(entry) == 2:
key, val = entry
else:
raise ValueError("Unrecognized input: %s" % args)
keys.append(key)
values.append(val)
setattr(self, key, val)
self._keys = tuple(keys)
self._values = tuple(values)
def keys(self):
"""
Provides an ordered listing of the enumeration keys in this set.
:returns: **list** with our enum keys
"""
return list(self._keys)
def index_of(self, value):
"""
Provides the index of the given value in the collection.
:param str value: entry to be looked up
:returns: **int** index of the given entry
:raises: **ValueError** if no such element exists
"""
return self._values.index(value)
def next(self, value):
"""
Provides the next enumeration after the given value.
:param str value: enumeration for which to get the next entry
:returns: enum value following the given entry
:raises: **ValueError** if no such element exists
"""
if not value in self._values:
raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values)))
next_index = (self._values.index(value) + 1) % len(self._values)
return self._values[next_index]
def previous(self, value):
"""
Provides the previous enumeration before the given value.
:param str value: enumeration for which to get the previous entry
:returns: enum value proceeding the given entry
:raises: **ValueError** if no such element exists
"""
if not value in self._values:
raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values)))
prev_index = (self._values.index(value) - 1) % len(self._values)
return self._values[prev_index]
def __getitem__(self, item):
"""
Provides the values for the given key.
:param str item: key to be looked up
:returns: **str** with the value for the given key
:raises: **ValueError** if the key doesn't exist
"""
if item in vars(self):
return getattr(self, item)
else:
keys = ", ".join(self.keys())
raise ValueError("'%s' isn't among our enumeration keys, which includes: %s" % (item, keys))
def __iter__(self):
"""
Provides an ordered listing of the enums in this set.
"""
for entry in self._values:
yield entry

247
lib/stem/util/log.py Normal file
View file

@ -0,0 +1,247 @@
# Copyright 2011-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Functions to aid library logging. The default logging
:data:`~stem.util.log.Runlevel` is usually NOTICE and above.
**Stem users are more than welcome to listen for stem events, but these
functions are not being vended to our users. They may change in the future, use
them at your own risk.**
**Module Overview:**
::
get_logger - provides the stem's Logger instance
logging_level - converts a runlevel to its logging number
escape - escapes special characters in a message in preparation for logging
log - logs a message at the given runlevel
log_once - logs a message, deduplicating if it has already been logged
trace - logs a message at the TRACE runlevel
debug - logs a message at the DEBUG runlevel
info - logs a message at the INFO runlevel
notice - logs a message at the NOTICE runlevel
warn - logs a message at the WARN runlevel
error - logs a message at the ERROR runlevel
LogBuffer - Buffers logged events so they can be iterated over.
|- is_empty - checks if there's events in our buffer
+- __iter__ - iterates over and removes the buffered events
log_to_stdout - reports further logged events to stdout
.. data:: Runlevel (enum)
Enumeration for logging runlevels.
========== ===========
Runlevel Description
========== ===========
**ERROR** critical issue occurred, the user needs to be notified
**WARN** non-critical issue occurred that the user should be aware of
**NOTICE** information that is helpful to the user
**INFO** high level library activity
**DEBUG** low level library activity
**TRACE** request/reply logging
========== ===========
"""
import logging
import stem.prereq
import stem.util.enum
import stem.util.str_tools
# Logging runlevels. These are *very* commonly used so including shorter
# aliases (so they can be referenced as log.DEBUG, log.WARN, etc).
Runlevel = stem.util.enum.UppercaseEnum("TRACE", "DEBUG", "INFO", "NOTICE", "WARN", "ERROR")
TRACE, DEBUG, INFO, NOTICE, WARN, ERR = list(Runlevel)
# mapping of runlevels to the logger module's values, TRACE and DEBUG aren't
# built into the module
LOG_VALUES = {
Runlevel.TRACE: logging.DEBUG - 5,
Runlevel.DEBUG: logging.DEBUG,
Runlevel.INFO: logging.INFO,
Runlevel.NOTICE: logging.INFO + 5,
Runlevel.WARN: logging.WARN,
Runlevel.ERROR: logging.ERROR,
}
logging.addLevelName(LOG_VALUES[TRACE], "TRACE")
logging.addLevelName(LOG_VALUES[NOTICE], "NOTICE")
LOGGER = logging.getLogger("stem")
LOGGER.setLevel(LOG_VALUES[TRACE])
# There's some messages that we don't want to log more than once. This set has
# the messages IDs that we've logged which fall into this category.
DEDUPLICATION_MESSAGE_IDS = set()
# Adds a default nullhandler for the stem logger, suppressing the 'No handlers
# could be found for logger "stem"' warning as per...
# http://docs.python.org/release/3.1.3/library/logging.html#configuring-logging-for-a-library
class _NullHandler(logging.Handler):
def emit(self, record):
pass
if not LOGGER.handlers:
LOGGER.addHandler(_NullHandler())
def get_logger():
"""
Provides the stem logger.
:return: **logging.Logger** for stem
"""
return LOGGER
def logging_level(runlevel):
"""
Translates a runlevel into the value expected by the logging module.
:param stem.util.log.Runlevel runlevel: runlevel to be returned, no logging if **None**
"""
if runlevel:
return LOG_VALUES[runlevel]
else:
return logging.FATAL + 5
def escape(message):
"""
Escapes specific sequences for logging (newlines, tabs, carriage returns). If
the input is **bytes** then this converts it to **unicode** under python 3.x.
:param str message: string to be escaped
:returns: str that is escaped
"""
if stem.prereq.is_python_3():
message = stem.util.str_tools._to_unicode(message)
for pattern, replacement in (("\n", "\\n"), ("\r", "\\r"), ("\t", "\\t")):
message = message.replace(pattern, replacement)
return message
def log(runlevel, message):
"""
Logs a message at the given runlevel.
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
:param str message: message to be logged
"""
if runlevel:
LOGGER.log(LOG_VALUES[runlevel], message)
def log_once(message_id, runlevel, message):
"""
Logs a message at the given runlevel. If a message with this ID has already
been logged then this is a no-op.
:param str message_id: unique message identifier to deduplicate on
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
:param str message: message to be logged
:returns: **True** if we log the message, **False** otherwise
"""
if not runlevel or message_id in DEDUPLICATION_MESSAGE_IDS:
return False
else:
DEDUPLICATION_MESSAGE_IDS.add(message_id)
log(runlevel, message)
# shorter aliases for logging at a runlevel
def trace(message):
log(Runlevel.TRACE, message)
def debug(message):
log(Runlevel.DEBUG, message)
def info(message):
log(Runlevel.INFO, message)
def notice(message):
log(Runlevel.NOTICE, message)
def warn(message):
log(Runlevel.WARN, message)
def error(message):
log(Runlevel.ERROR, message)
class LogBuffer(logging.Handler):
"""
Basic log handler that listens for stem events and stores them so they can be
read later. Log entries are cleared as they are read.
"""
def __init__(self, runlevel):
# TODO: At least in python 2.6 logging.Handler has a bug in that it doesn't
# extend object, causing our super() call to fail. When we drop python 2.6
# support we should switch back to using super() instead.
#super(LogBuffer, self).__init__(level = logging_level(runlevel))
logging.Handler.__init__(self, level = logging_level(runlevel))
self.formatter = logging.Formatter(
fmt = '%(asctime)s [%(levelname)s] %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S')
self._buffer = []
def is_empty(self):
return not bool(self._buffer)
def __iter__(self):
while self._buffer:
yield self.formatter.format(self._buffer.pop(0))
def emit(self, record):
self._buffer.append(record)
class _StdoutLogger(logging.Handler):
def __init__(self, runlevel):
logging.Handler.__init__(self, level = logging_level(runlevel))
self.formatter = logging.Formatter(
fmt = '%(asctime)s [%(levelname)s] %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S')
def emit(self, record):
print self.formatter.format(record)
def log_to_stdout(runlevel):
"""
Logs further events to stdout.
:param stem.util.log.Runlevel runlevel: minimum runlevel a message needs to be to be logged
"""
get_logger().addHandler(_StdoutLogger(runlevel))

182
lib/stem/util/lru_cache.py Normal file
View file

@ -0,0 +1,182 @@
# Drop in replace for python 3.2's collections.lru_cache, from...
# http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
#
# ... which is under the MIT license. Stem users should *not* rely upon this
# module. It will be removed when we drop support for python 3.2 and below.
"""
Memoization decorator that caches a function's return value. If later called
with the same arguments then the cached value is returned rather than
reevaluated.
This is a a python 2.x port of `functools.lru_cache
<http://docs.python.org/3/library/functools.html#functools.lru_cache>`_. If
using python 3.2 or later you should use that instead.
"""
from collections import namedtuple
from functools import update_wrapper
from threading import RLock
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
class _HashedSeq(list):
__slots__ = 'hashvalue'
def __init__(self, tup, hash=hash):
self[:] = tup
self.hashvalue = hash(tup)
def __hash__(self):
return self.hashvalue
def _make_key(args, kwds, typed,
kwd_mark = (object(),),
fasttypes = set([int, str, frozenset, type(None)]),
sorted=sorted, tuple=tuple, type=type, len=len):
'Make a cache key from optionally typed positional and keyword arguments'
key = args
if kwds:
sorted_items = sorted(kwds.items())
key += kwd_mark
for item in sorted_items:
key += item
if typed:
key += tuple(type(v) for v in args)
if kwds:
key += tuple(type(v) for k, v in sorted_items)
elif len(key) == 1 and type(key[0]) in fasttypes:
return key[0]
return _HashedSeq(key)
def lru_cache(maxsize=100, typed=False):
"""Least-recently-used cache decorator.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
If *typed* is True, arguments of different types will be cached separately.
For example, f(3.0) and f(3) will be treated as distinct calls with
distinct results.
Arguments to the cached function must be hashable.
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
# Users should only access the lru_cache through its public API:
# cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version).
def decorating_function(user_function):
cache = dict()
stats = [0, 0] # make statistics updateable non-locally
HITS, MISSES = 0, 1 # names for the stats fields
make_key = _make_key
cache_get = cache.get # bound method to lookup key or return None
_len = len # localize the global len() function
lock = RLock() # because linkedlist updates aren't threadsafe
root = [] # root of the circular doubly linked list
root[:] = [root, root, None, None] # initialize by pointing to self
nonlocal_root = [root] # make updateable non-locally
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
if maxsize == 0:
def wrapper(*args, **kwds):
# no caching, just do a statistics update after a successful call
result = user_function(*args, **kwds)
stats[MISSES] += 1
return result
elif maxsize is None:
def wrapper(*args, **kwds):
# simple caching without ordering or size limit
key = make_key(args, kwds, typed)
result = cache_get(key, root) # root used here as a unique not-found sentinel
if result is not root:
stats[HITS] += 1
return result
result = user_function(*args, **kwds)
cache[key] = result
stats[MISSES] += 1
return result
else:
def wrapper(*args, **kwds):
# size limited caching that tracks accesses by recency
key = make_key(args, kwds, typed) if kwds or typed else args
with lock:
link = cache_get(key)
if link is not None:
# record recent use of the key by moving it to the front of the list
root, = nonlocal_root
link_prev, link_next, key, result = link
link_prev[NEXT] = link_next
link_next[PREV] = link_prev
last = root[PREV]
last[NEXT] = root[PREV] = link
link[PREV] = last
link[NEXT] = root
stats[HITS] += 1
return result
result = user_function(*args, **kwds)
with lock:
root, = nonlocal_root
if key in cache:
# getting here means that this same key was added to the
# cache while the lock was released. since the link
# update is already done, we need only return the
# computed result and update the count of misses.
pass
elif _len(cache) >= maxsize:
# use the old root to store the new key and result
oldroot = root
oldroot[KEY] = key
oldroot[RESULT] = result
# empty the oldest link and make it the new root
root = nonlocal_root[0] = oldroot[NEXT]
oldkey = root[KEY]
root[KEY] = root[RESULT] = None
# now update the cache dictionary for the new links
del cache[oldkey]
cache[key] = oldroot
else:
# put result in a new link at the front of the list
last = root[PREV]
link = [last, root, key, result]
last[NEXT] = root[PREV] = cache[key] = link
stats[MISSES] += 1
return result
def cache_info():
"""Report cache statistics"""
with lock:
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
def cache_clear():
"""Clear the cache and cache statistics"""
with lock:
cache.clear()
root = nonlocal_root[0]
root[:] = [root, root, None, None]
stats[:] = [0, 0]
wrapper.__wrapped__ = user_function
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return update_wrapper(wrapper, user_function)
return decorating_function

View file

@ -0,0 +1,133 @@
# Drop in replacement for python 2.7's OrderedDict, from...
# http://pypi.python.org/pypi/ordereddict
#
# Stem users should *not* rely upon this module. It will be removed when we
# drop support for python 2.6 and below.
# Copyright (c) 2009 Raymond Hettinger
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
from UserDict import DictMixin
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
if last:
key = reversed(self).next()
else:
key = iter(self).next()
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.items())
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
if len(self) != len(other):
return False
for p, q in zip(self.items(), other.items()):
if p != q:
return False
return True
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other

498
lib/stem/util/proc.py Normal file
View file

@ -0,0 +1,498 @@
# Copyright 2011-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Helper functions for querying process and system information from the /proc
contents. Fetching information this way provides huge performance benefits
over lookups via system utilities (ps, netstat, etc). For instance, resolving
connections this way cuts the runtime by around 90% verses the alternatives.
These functions may not work on all platforms (only Linux?).
The method for reading these files (and a little code) are borrowed from
`psutil <https://code.google.com/p/psutil/>`_, which was written by Jay Loden,
Dave Daeschler, Giampaolo Rodola' and is under the BSD license.
**These functions are not being vended to stem users. They may change in the
future, use them at your own risk.**
**Module Overview:**
::
is_available - checks if proc utilities can be used on this system
get_system_start_time - unix timestamp for when the system started
get_physical_memory - memory available on this system
get_cwd - provides the current working directory for a process
get_uid - provides the user id a process is running under
get_memory_usage - provides the memory usage of a process
get_stats - queries statistics about a process
get_connections - provides the connections made by a process
.. data:: Stat (enum)
Types of data available via the :func:`~stem.util.proc.get_stats` function.
============== ===========
Stat Description
============== ===========
**COMMAND** command name under which the process is running
**CPU_UTIME** total user time spent on the process
**CPU_STIME** total system time spent on the process
**START_TIME** when this process began, in unix time
============== ===========
"""
import base64
import os
import platform
import socket
import sys
import time
import stem.util.enum
from stem.util import log
try:
# added in python 3.2
from functools import lru_cache
except ImportError:
from stem.util.lru_cache import lru_cache
# os.sysconf is only defined on unix
try:
CLOCK_TICKS = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
except AttributeError:
CLOCK_TICKS = None
Stat = stem.util.enum.Enum(
("COMMAND", "command"), ("CPU_UTIME", "utime"),
("CPU_STIME", "stime"), ("START_TIME", "start time")
)
@lru_cache()
def is_available():
"""
Checks if proc information is available on this platform.
:returns: **True** if proc contents exist on this platform, **False** otherwise
"""
if platform.system() != "Linux":
return False
else:
# list of process independent proc paths we use
proc_paths = ("/proc/stat", "/proc/meminfo", "/proc/net/tcp", "/proc/net/udp")
for path in proc_paths:
if not os.path.exists(path):
return False
return True
@lru_cache()
def get_system_start_time():
"""
Provides the unix time (seconds since epoch) when the system started.
:returns: **float** for the unix time of when the system started
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), "system start time"
btime_line = _get_line("/proc/stat", "btime", parameter)
try:
result = float(btime_line.strip().split()[1])
_log_runtime(parameter, "/proc/stat[btime]", start_time)
return result
except:
exc = IOError("unable to parse the /proc/stat btime entry: %s" % btime_line)
_log_failure(parameter, exc)
raise exc
@lru_cache()
def get_physical_memory():
"""
Provides the total physical memory on the system in bytes.
:returns: **int** for the bytes of physical memory this system has
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), "system physical memory"
mem_total_line = _get_line("/proc/meminfo", "MemTotal:", parameter)
try:
result = int(mem_total_line.split()[1]) * 1024
_log_runtime(parameter, "/proc/meminfo[MemTotal]", start_time)
return result
except:
exc = IOError("unable to parse the /proc/meminfo MemTotal entry: %s" % mem_total_line)
_log_failure(parameter, exc)
raise exc
def get_cwd(pid):
"""
Provides the current working directory for the given process.
:param int pid: process id of the process to be queried
:returns: **str** with the path of the working directory for the process
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), "cwd"
proc_cwd_link = "/proc/%s/cwd" % pid
if pid == 0:
cwd = ""
else:
try:
cwd = os.readlink(proc_cwd_link)
except OSError:
exc = IOError("unable to read %s" % proc_cwd_link)
_log_failure(parameter, exc)
raise exc
_log_runtime(parameter, proc_cwd_link, start_time)
return cwd
def get_uid(pid):
"""
Provides the user ID the given process is running under.
:param int pid: process id of the process to be queried
:returns: **int** with the user id for the owner of the process
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), "uid"
status_path = "/proc/%s/status" % pid
uid_line = _get_line(status_path, "Uid:", parameter)
try:
result = int(uid_line.split()[1])
_log_runtime(parameter, "%s[Uid]" % status_path, start_time)
return result
except:
exc = IOError("unable to parse the %s Uid entry: %s" % (status_path, uid_line))
_log_failure(parameter, exc)
raise exc
def get_memory_usage(pid):
"""
Provides the memory usage in bytes for the given process.
:param int pid: process id of the process to be queried
:returns: **tuple** of two ints with the memory usage of the process, of the
form **(resident_size, virtual_size)**
:raises: **IOError** if it can't be determined
"""
# checks if this is the kernel process
if pid == 0:
return (0, 0)
start_time, parameter = time.time(), "memory usage"
status_path = "/proc/%s/status" % pid
mem_lines = _get_lines(status_path, ("VmRSS:", "VmSize:"), parameter)
try:
residentSize = int(mem_lines["VmRSS:"].split()[1]) * 1024
virtualSize = int(mem_lines["VmSize:"].split()[1]) * 1024
_log_runtime(parameter, "%s[VmRSS|VmSize]" % status_path, start_time)
return (residentSize, virtualSize)
except:
exc = IOError("unable to parse the %s VmRSS and VmSize entries: %s" % (status_path, ", ".join(mem_lines)))
_log_failure(parameter, exc)
raise exc
def get_stats(pid, *stat_types):
"""
Provides process specific information. See the :data:`~stem.util.proc.Stat`
enum for valid options.
:param int pid: process id of the process to be queried
:param Stat stat_types: information to be provided back
:returns: **tuple** with all of the requested statistics as strings
:raises: **IOError** if it can't be determined
"""
if CLOCK_TICKS is None:
raise IOError("Unable to look up SC_CLK_TCK")
start_time, parameter = time.time(), "process %s" % ", ".join(stat_types)
# the stat file contains a single line, of the form...
# 8438 (tor) S 8407 8438 8407 34818 8438 4202496...
stat_path = "/proc/%s/stat" % pid
stat_line = _get_line(stat_path, str(pid), parameter)
# breaks line into component values
stat_comp = []
cmd_start, cmd_end = stat_line.find("("), stat_line.find(")")
if cmd_start != -1 and cmd_end != -1:
stat_comp.append(stat_line[:cmd_start])
stat_comp.append(stat_line[cmd_start + 1:cmd_end])
stat_comp += stat_line[cmd_end + 1:].split()
if len(stat_comp) < 44 and _is_float(stat_comp[13], stat_comp[14], stat_comp[21]):
exc = IOError("stat file had an unexpected format: %s" % stat_path)
_log_failure(parameter, exc)
raise exc
results = []
for stat_type in stat_types:
if stat_type == Stat.COMMAND:
if pid == 0:
results.append("sched")
else:
results.append(stat_comp[1])
elif stat_type == Stat.CPU_UTIME:
if pid == 0:
results.append("0")
else:
results.append(str(float(stat_comp[13]) / CLOCK_TICKS))
elif stat_type == Stat.CPU_STIME:
if pid == 0:
results.append("0")
else:
results.append(str(float(stat_comp[14]) / CLOCK_TICKS))
elif stat_type == Stat.START_TIME:
if pid == 0:
return get_system_start_time()
else:
# According to documentation, starttime is in field 21 and the unit is
# jiffies (clock ticks). We divide it for clock ticks, then add the
# uptime to get the seconds since the epoch.
p_start_time = float(stat_comp[21]) / CLOCK_TICKS
results.append(str(p_start_time + get_system_start_time()))
_log_runtime(parameter, stat_path, start_time)
return tuple(results)
def get_connections(pid):
"""
Queries connection related information from the proc contents. This provides
similar results to netstat, lsof, sockstat, and other connection resolution
utilities (though the lookup is far quicker).
:param int pid: process id of the process to be queried
:returns: A listing of connection tuples of the form **[(local_ipAddr1,
local_port1, foreign_ipAddr1, foreign_port1, protocol), ...]** (addresses
and protocols are strings and ports are ints)
:raises: **IOError** if it can't be determined
"""
if isinstance(pid, str):
try:
pid = int(pid)
except ValueError:
raise IOError("Process pid was non-numeric: %s" % pid)
if pid == 0:
return []
# fetches the inode numbers for socket file descriptors
start_time, parameter = time.time(), "process connections"
inodes = []
for fd in os.listdir("/proc/%s/fd" % pid):
fd_path = "/proc/%s/fd/%s" % (pid, fd)
try:
# File descriptor link, such as 'socket:[30899]'
fd_name = os.readlink(fd_path)
if fd_name.startswith('socket:['):
inodes.append(fd_name[8:-1])
except OSError:
# most likely couldn't be read due to permissions
exc = IOError("unable to determine file descriptor destination: %s" % fd_path)
_log_failure(parameter, exc)
raise exc
if not inodes:
# unable to fetch any connections for this process
return []
# check for the connection information from the /proc/net contents
conn = []
for proc_file_path in ("/proc/net/tcp", "/proc/net/udp"):
try:
proc_file = open(proc_file_path)
proc_file.readline() # skip the first line
for line in proc_file:
_, l_addr, f_addr, status, _, _, _, _, _, inode = line.split()[:10]
if inode in inodes:
# if a tcp connection, skip if it isn't yet established
if proc_file_path.endswith("/tcp") and status != "01":
continue
local_ip, local_port = _decode_proc_address_encoding(l_addr)
foreign_ip, foreign_port = _decode_proc_address_encoding(f_addr)
protocol = proc_file_path[10:]
conn.append((local_ip, local_port, foreign_ip, foreign_port, protocol))
proc_file.close()
except IOError as exc:
exc = IOError("unable to read '%s': %s" % (proc_file_path, exc))
_log_failure(parameter, exc)
raise exc
except Exception as exc:
exc = IOError("unable to parse '%s': %s" % (proc_file_path, exc))
_log_failure(parameter, exc)
raise exc
_log_runtime(parameter, "/proc/net/[tcp|udp]", start_time)
return conn
def _decode_proc_address_encoding(addr):
"""
Translates an address entry in the /proc/net/* contents to a human readable
form (`reference <http://linuxdevcenter.com/pub/a/linux/2000/11/16/LinuxAdmin.html>`_,
for instance:
::
"0500000A:0016" -> ("10.0.0.5", 22)
:param str addr: proc address entry to be decoded
:returns: **tuple** of the form **(addr, port)**, with addr as a string and port an int
"""
ip, port = addr.split(':')
# the port is represented as a two-byte hexadecimal number
port = int(port, 16)
if sys.version_info >= (3,):
ip = ip.encode('ascii')
# The IPv4 address portion is a little-endian four-byte hexadecimal number.
# That is, the least significant byte is listed first, so we need to reverse
# the order of the bytes to convert it to an IP address.
#
# This needs to account for the endian ordering as per...
# http://code.google.com/p/psutil/issues/detail?id=201
# https://trac.torproject.org/projects/tor/ticket/4777
if sys.byteorder == 'little':
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip)[::-1])
else:
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip))
return (ip, port)
def _is_float(*value):
try:
for v in value:
float(v)
return True
except ValueError:
return False
def _get_line(file_path, line_prefix, parameter):
return _get_lines(file_path, (line_prefix, ), parameter)[line_prefix]
def _get_lines(file_path, line_prefixes, parameter):
"""
Fetches lines with the given prefixes from a file. This only provides back
the first instance of each prefix.
:param str file_path: path of the file to read
:param tuple line_prefixes: string prefixes of the lines to return
:param str parameter: description of the proc attribute being fetch
:returns: mapping of prefixes to the matching line
:raises: **IOError** if unable to read the file or can't find all of the prefixes
"""
try:
remaining_prefixes = list(line_prefixes)
proc_file, results = open(file_path), {}
for line in proc_file:
if not remaining_prefixes:
break # found everything we're looking for
for prefix in remaining_prefixes:
if line.startswith(prefix):
results[prefix] = line
remaining_prefixes.remove(prefix)
break
proc_file.close()
if remaining_prefixes:
if len(remaining_prefixes) == 1:
msg = "%s did not contain a %s entry" % (file_path, remaining_prefixes[0])
else:
msg = "%s did not contain %s entries" % (file_path, ", ".join(remaining_prefixes))
raise IOError(msg)
else:
return results
except IOError as exc:
_log_failure(parameter, exc)
raise exc
def _log_runtime(parameter, proc_location, start_time):
"""
Logs a message indicating a successful proc query.
:param str parameter: description of the proc attribute being fetch
:param str proc_location: proc files we were querying
:param int start_time: unix time for when this query was started
"""
runtime = time.time() - start_time
log.debug("proc call (%s): %s (runtime: %0.4f)" % (parameter, proc_location, runtime))
def _log_failure(parameter, exc):
"""
Logs a message indicating that the proc query failed.
:param str parameter: description of the proc attribute being fetch
:param Exception exc: exception that we're raising
"""
log.debug("proc call failed (%s): %s" % (parameter, exc))

387
lib/stem/util/str_tools.py Normal file
View file

@ -0,0 +1,387 @@
# Copyright 2012-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Toolkit for various string activity.
**Module Overview:**
::
get_size_label - human readable label for a number of bytes
get_time_label - human readable label for a number of seconds
get_time_labels - human readable labels for each time unit
get_short_time_label - condensed time label output
parse_short_time_label - seconds represented by a short time label
"""
import codecs
import datetime
import stem.prereq
# label conversion tuples of the form...
# (bits / bytes / seconds, short label, long label)
SIZE_UNITS_BITS = (
(140737488355328.0, " Pb", " Petabit"),
(137438953472.0, " Tb", " Terabit"),
(134217728.0, " Gb", " Gigabit"),
(131072.0, " Mb", " Megabit"),
(128.0, " Kb", " Kilobit"),
(0.125, " b", " Bit"),
)
SIZE_UNITS_BYTES = (
(1125899906842624.0, " PB", " Petabyte"),
(1099511627776.0, " TB", " Terabyte"),
(1073741824.0, " GB", " Gigabyte"),
(1048576.0, " MB", " Megabyte"),
(1024.0, " KB", " Kilobyte"),
(1.0, " B", " Byte"),
)
TIME_UNITS = (
(86400.0, "d", " day"),
(3600.0, "h", " hour"),
(60.0, "m", " minute"),
(1.0, "s", " second"),
)
if stem.prereq.is_python_3():
def _to_bytes_impl(msg):
if isinstance(msg, str):
return codecs.latin_1_encode(msg, "replace")[0]
else:
return msg
def _to_unicode_impl(msg):
if msg is not None and not isinstance(msg, str):
return msg.decode("utf-8", "replace")
else:
return msg
else:
def _to_bytes_impl(msg):
if msg is not None and isinstance(msg, unicode):
return codecs.latin_1_encode(msg, "replace")[0]
else:
return msg
def _to_unicode_impl(msg):
if msg is not None and not isinstance(msg, unicode):
return msg.decode("utf-8", "replace")
else:
return msg
def _to_bytes(msg):
"""
Provides the ASCII bytes for the given string. This is purely to provide
python 3 compatability, normalizing the unicode/ASCII change in the version
bump. For an explanation of this see...
http://python3porting.com/problems.html#nicer-solutions
:param str,unicode msg: string to be converted
:returns: ASCII bytes for string
"""
return _to_bytes_impl(msg)
def _to_unicode(msg):
"""
Provides the unicode string for the given ASCII bytes. This is purely to
provide python 3 compatability, normalizing the unicode/ASCII change in the
version bump.
:param str,unicode msg: string to be converted
:returns: unicode conversion
"""
return _to_unicode_impl(msg)
def _to_camel_case(label, divider = "_", joiner = " "):
"""
Converts the given string to camel case, ie:
::
>>> _to_camel_case("I_LIKE_PEPPERJACK!")
'I Like Pepperjack!'
:param str label: input string to be converted
:param str divider: word boundary
:param str joiner: replacement for word boundaries
:returns: camel cased string
"""
words = []
for entry in label.split(divider):
if len(entry) == 0:
words.append("")
elif len(entry) == 1:
words.append(entry.upper())
else:
words.append(entry[0].upper() + entry[1:].lower())
return joiner.join(words)
def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
"""
Converts a number of bytes into a human readable label in its most
significant units. For instance, 7500 bytes would return "7 KB". If the
is_long option is used this expands unit labels to be the properly pluralized
full word (for instance 'Kilobytes' rather than 'KB'). Units go up through
petabytes.
::
>>> get_size_label(2000000)
'1 MB'
>>> get_size_label(1050, 2)
'1.02 KB'
>>> get_size_label(1050, 3, True)
'1.025 Kilobytes'
:param int byte_count: number of bytes to be converted
:param int decimal: number of decimal digits to be included
:param bool is_long: expands units label
:param bool is_bytes: provides units in bytes if **True**, bits otherwise
:returns: **str** with human readable representation of the size
"""
if is_bytes:
return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long)
else:
return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long)
def get_time_label(seconds, decimal = 0, is_long = False):
"""
Converts seconds into a time label truncated to its most significant units.
For instance, 7500 seconds would return "2h". Units go up through days.
This defaults to presenting single character labels, but if the is_long
option is used this expands labels to be the full word (space included and
properly pluralized). For instance, "4h" would be "4 hours" and "1m" would
become "1 minute".
::
>>> get_time_label(10000)
'2h'
>>> get_time_label(61, 1, True)
'1.0 minute'
>>> get_time_label(61, 2, True)
'1.01 minutes'
:param int seconds: number of seconds to be converted
:param int decimal: number of decimal digits to be included
:param bool is_long: expands units label
:returns: **str** with human readable representation of the time
"""
return _get_label(TIME_UNITS, seconds, decimal, is_long)
def get_time_labels(seconds, is_long = False):
"""
Provides a list of label conversions for each time unit, starting with its
most significant units on down. Any counts that evaluate to zero are omitted.
For example...
::
>>> get_time_labels(400)
['6m', '40s']
>>> get_time_labels(3640, True)
['1 hour', '40 seconds']
:param int seconds: number of seconds to be converted
:param bool is_long: expands units label
:returns: **list** of strings with human readable representations of the time
"""
time_labels = []
for count_per_unit, _, _ in TIME_UNITS:
if abs(seconds) >= count_per_unit:
time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long))
seconds %= count_per_unit
return time_labels
def get_short_time_label(seconds):
"""
Provides a time in the following format:
[[dd-]hh:]mm:ss
::
>>> get_short_time_label(111)
'01:51'
>>> get_short_time_label(544100)
'6-07:08:20'
:param int seconds: number of seconds to be converted
:returns: **str** with the short representation for the time
:raises: **ValueError** if the input is negative
"""
if seconds < 0:
raise ValueError("Input needs to be a non-negative integer, got '%i'" % seconds)
time_comp = {}
for amount, _, label in TIME_UNITS:
count = int(seconds / amount)
seconds %= amount
time_comp[label.strip()] = count
label = "%02i:%02i" % (time_comp["minute"], time_comp["second"])
if time_comp["day"]:
label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label)
elif time_comp["hour"]:
label = "%02i:%s" % (time_comp["hour"], label)
return label
def parse_short_time_label(label):
"""
Provides the number of seconds corresponding to the formatting used for the
cputime and etime fields of ps:
[[dd-]hh:]mm:ss or mm:ss.ss
::
>>> parse_short_time_label('01:51')
111
>>> parse_short_time_label('6-07:08:20')
544100
:param str label: time entry to be parsed
:returns: **int** with the number of seconds represented by the label
:raises: **ValueError** if input is malformed
"""
days, hours, minutes, seconds = '0', '0', '0', '0'
if '-' in label:
days, label = label.split('-', 1)
time_comp = label.split(":")
if len(time_comp) == 3:
hours, minutes, seconds = time_comp
elif len(time_comp) == 2:
minutes, seconds = time_comp
else:
raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label)
try:
time_sum = int(float(seconds))
time_sum += int(minutes) * 60
time_sum += int(hours) * 3600
time_sum += int(days) * 86400
return time_sum
except ValueError:
raise ValueError("Non-numeric value in time entry: %s" % label)
def _parse_iso_timestamp(entry):
"""
Parses the ISO 8601 standard that provides for timestamps like...
::
2012-11-08T16:48:41.420251
:param str entry: timestamp to be parsed
:returns: datetime for the time represented by the timestamp
:raises: ValueError if the timestamp is malformed
"""
if not isinstance(entry, str):
raise ValueError("parse_iso_timestamp() input must be a str, got a %s" % type(entry))
# based after suggestions from...
# http://stackoverflow.com/questions/127803/how-to-parse-iso-formatted-date-in-python
if '.' in entry:
timestamp_str, microseconds = entry.split('.')
else:
timestamp_str, microseconds = entry, '000000'
if len(microseconds) != 6 or not microseconds.isdigit():
raise ValueError("timestamp's microseconds should be six digits")
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S")
return timestamp + datetime.timedelta(microseconds = int(microseconds))
def _get_label(units, count, decimal, is_long):
"""
Provides label corresponding to units of the highest significance in the
provided set. This rounds down (ie, integer truncation after visible units).
:param tuple units: type of units to be used for conversion, containing
(count_per_unit, short_label, long_label)
:param int count: number of base units being converted
:param int decimal: decimal precision of label
:param bool is_long: uses the long label if **True**, short label otherwise
"""
# formatted string for the requested number of digits
label_format = "%%.%if" % decimal
if count < 0:
label_format = "-" + label_format
count = abs(count)
elif count == 0:
units_label = units[-1][2] + "s" if is_long else units[-1][1]
return "%s%s" % (label_format % count, units_label)
for count_per_unit, short_label, long_label in units:
if count >= count_per_unit:
# Rounding down with a '%f' is a little clunky. Reducing the count so
# it'll divide evenly as the rounded down value.
count -= count % (count_per_unit / (10 ** decimal))
count_label = label_format % (count / count_per_unit)
if is_long:
# Pluralize if any of the visible units make it greater than one. For
# instance 1.0003 is plural but 1.000 isn't.
if decimal > 0:
is_plural = count > count_per_unit
else:
is_plural = count >= count_per_unit * 2
return count_label + long_label + ("s" if is_plural else "")
else:
return count_label + short_label

1010
lib/stem/util/system.py Normal file

File diff suppressed because it is too large Load diff

98
lib/stem/util/term.py Normal file
View file

@ -0,0 +1,98 @@
# Copyright 2011-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Utilities for working with the terminal.
**Module Overview:**
::
format - wrap text with ANSI for the given colors or attributes
.. data:: Color (enum)
.. data:: BgColor (enum)
Enumerations for foreground or background terminal color.
=========== ===========
Color Description
=========== ===========
**BLACK** black color
**BLUE** blue color
**CYAN** cyan color
**GREEN** green color
**MAGENTA** magenta color
**RED** red color
**WHITE** white color
**YELLOW** yellow color
=========== ===========
.. data:: Attr (enum)
Enumerations of terminal text attributes.
============= ===========
Attr Description
============= ===========
**BOLD** heavy typeface
**HILIGHT** inverted foreground and background
**UNDERLINE** underlined text
============= ===========
"""
import stem.util.enum
import stem.util.str_tools
TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE")
Color = stem.util.enum.Enum(*TERM_COLORS)
BgColor = stem.util.enum.Enum(*["BG_" + color for color in TERM_COLORS])
Attr = stem.util.enum.Enum("BOLD", "UNDERLINE", "HILIGHT")
# mappings of terminal attribute enums to their ANSI escape encoding
FG_ENCODING = dict([(list(Color)[i], str(30 + i)) for i in range(8)])
BG_ENCODING = dict([(list(BgColor)[i], str(40 + i)) for i in range(8)])
ATTR_ENCODING = {Attr.BOLD: "1", Attr.UNDERLINE: "4", Attr.HILIGHT: "7"}
CSI = "\x1B[%sm"
RESET = CSI % "0"
def format(msg, *attr):
"""
Simple terminal text formatting using `ANSI escape sequences
<https://secure.wikimedia.org/wikipedia/en/wiki/ANSI_escape_code#CSI_codes>`_.
The following are some toolkits providing similar capabilities:
* `django.utils.termcolors <https://code.djangoproject.com/browser/django/trunk/django/utils/termcolors.py>`_
* `termcolor <http://pypi.python.org/pypi/termcolor>`_
* `colorama <http://pypi.python.org/pypi/colorama>`_
:param str msg: string to be formatted
:param str attr: text attributes, this can be :data:`~stem.util.term.Color`, :data:`~stem.util.term.BgColor`, or :data:`~stem.util.term.Attr` enums
and are case insensitive (so strings like "red" are fine)
:returns: **str** wrapped with ANSI escape encodings, starting with the given
attributes and ending with a reset
"""
# if we have reset sequences in the message then apply our attributes
# after each of them
if RESET in msg:
return "".join([format(comp, *attr) for comp in msg.split(RESET)])
encodings = []
for text_attr in attr:
text_attr, encoding = stem.util.str_tools._to_camel_case(text_attr), None
encoding = FG_ENCODING.get(text_attr, encoding)
encoding = BG_ENCODING.get(text_attr, encoding)
encoding = ATTR_ENCODING.get(text_attr, encoding)
if encoding:
encodings.append(encoding)
if encodings:
return (CSI % ";".join(encodings)) + msg + RESET
else:
return msg

115
lib/stem/util/tor_tools.py Normal file
View file

@ -0,0 +1,115 @@
# Copyright 2012-2013, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Miscellaneous utility functions for working with tor.
**These functions are not being vended to stem users. They may change in the
future, use them at your own risk.**
**Module Overview:**
::
is_valid_fingerprint - checks if a string is a valid tor relay fingerprint
is_valid_nickname - checks if a string is a valid tor relay nickname
is_valid_circuit_id - checks if a string is a valid tor circuit id
is_valid_stream_id - checks if a string is a valid tor stream id
is_hex_digits - checks if a string is only made up of hex digits
"""
import re
# The control-spec defines the following as...
#
# Fingerprint = "$" 40*HEXDIG
# NicknameChar = "a"-"z" / "A"-"Z" / "0" - "9"
# Nickname = 1*19 NicknameChar
#
# CircuitID = 1*16 IDChar
# IDChar = ALPHA / DIGIT
#
# HEXDIG is defined in RFC 5234 as being uppercase and used in RFC 5987 as
# case insensitive. Tor doesn't define this in the spec so flipping a coin
# and going with case insensitive.
HEX_DIGIT = "[0-9a-fA-F]"
FINGERPRINT_PATTERN = re.compile("^%s{40}$" % HEX_DIGIT)
NICKNAME_PATTERN = re.compile("^[a-zA-Z0-9]{1,19}$")
CIRC_ID_PATTERN = re.compile("^[a-zA-Z0-9]{1,16}$")
def is_valid_fingerprint(entry, check_prefix = False):
"""
Checks if a string is a properly formatted relay fingerprint. This checks for
a '$' prefix if check_prefix is true, otherwise this only validates the hex
digits.
:param str entry: string to be checked
:param bool check_prefix: checks for a '$' prefix
:returns: **True** if the string could be a relay fingerprint, **False** otherwise
"""
if not isinstance(entry, (str, unicode)):
return False
elif check_prefix:
if not entry or entry[0] != "$":
return False
entry = entry[1:]
return bool(FINGERPRINT_PATTERN.match(entry))
def is_valid_nickname(entry):
"""
Checks if a string is a valid format for being a nickname.
:param str entry: string to be checked
:returns: **True** if the string could be a nickname, **False** otherwise
"""
if not isinstance(entry, (str, unicode)):
return False
return bool(NICKNAME_PATTERN.match(entry))
def is_valid_circuit_id(entry):
"""
Checks if a string is a valid format for being a circuit identifier.
:returns: **True** if the string could be a circuit id, **False** otherwise
"""
if not isinstance(entry, (str, unicode)):
return False
return bool(CIRC_ID_PATTERN.match(entry))
def is_valid_stream_id(entry):
"""
Checks if a string is a valid format for being a stream identifier.
Currently, this is just an alias to :func:`~stem.util.tor_tools.is_valid_circuit_id`.
:returns: **True** if the string could be a stream id, **False** otherwise
"""
return is_valid_circuit_id(entry)
def is_hex_digits(entry, count):
"""
Checks if a string is the given number of hex digits. Digits represented by
letters are case insensitive.
:param str entry: string to be checked
:param int count: number of hex digits to be checked for
:returns: **True** if the string matches this number
"""
return bool(re.match("^%s{%i}$" % (HEX_DIGIT, count), entry))