mirror of
https://github.com/onionshare/onionshare.git
synced 2025-08-06 05:14:13 -04:00
added stem python library
This commit is contained in:
parent
8ffa569094
commit
619ab6db0f
37 changed files with 19032 additions and 0 deletions
19
lib/stem/util/__init__.py
Normal file
19
lib/stem/util/__init__.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utility functions used by the stem library.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
"conf",
|
||||
"connection",
|
||||
"enum",
|
||||
"log",
|
||||
"lru_cache",
|
||||
"ordereddict",
|
||||
"proc",
|
||||
"system",
|
||||
"term",
|
||||
"tor_tools",
|
||||
]
|
673
lib/stem/util/conf.py
Normal file
673
lib/stem/util/conf.py
Normal file
|
@ -0,0 +1,673 @@
|
|||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Handlers for text configuration files. Configurations are simple string to
|
||||
string mappings, with the configuration files using the following rules...
|
||||
|
||||
* the key/value is separated by a space
|
||||
* anything after a "#" is ignored as a comment
|
||||
* excess whitespace is trimmed
|
||||
* empty lines are ignored
|
||||
* multi-line values can be defined by following the key with lines starting
|
||||
with a '|'
|
||||
|
||||
For instance...
|
||||
|
||||
::
|
||||
|
||||
# This is my sample config
|
||||
user.name Galen
|
||||
user.password yabba1234 # here's an inline comment
|
||||
user.notes takes a fancy to pepperjack cheese
|
||||
blankEntry.example
|
||||
|
||||
msg.greeting
|
||||
|Multi-line message exclaiming of the
|
||||
|wonder and awe that is pepperjack!
|
||||
|
||||
... would be loaded as...
|
||||
|
||||
::
|
||||
|
||||
config = {
|
||||
"user.name": "Galen",
|
||||
"user.password": "yabba1234",
|
||||
"user.notes": "takes a fancy to pepperjack cheese",
|
||||
"blankEntry.example": "",
|
||||
"msg.greeting": "Multi-line message exclaiming of the\\nwonder and awe that is pepperjack!",
|
||||
}
|
||||
|
||||
Configurations are managed via the :class:`~stem.util.conf.Config` class. The
|
||||
:class:`~stem.util.conf.Config` can be be used directly with its
|
||||
:func:`~stem.util.conf.Config.get` and :func:`~stem.util.conf.Config.set`
|
||||
methods, but usually modules will want a local dictionary with just the
|
||||
configurations that it cares about.
|
||||
|
||||
To do this use the :func:`~stem.util.conf.config_dict` function. For example...
|
||||
|
||||
::
|
||||
|
||||
import getpass
|
||||
from stem.util import conf, connection
|
||||
|
||||
def config_validator(key, value):
|
||||
if key == "timeout":
|
||||
# require at least a one second timeout
|
||||
return max(1, value)
|
||||
elif key == "endpoint":
|
||||
if not connection.is_valid_ipv4_address(value):
|
||||
raise ValueError("'%s' isn't a valid IPv4 address" % value)
|
||||
elif key == "port":
|
||||
if not connection.is_valid_port(value):
|
||||
raise ValueError("'%s' isn't a valid port" % value)
|
||||
elif key == "retries":
|
||||
# negative retries really don't make sense
|
||||
return max(0, value)
|
||||
|
||||
CONFIG = conf.config_dict("ssh_login", {
|
||||
"username": getpass.getuser(),
|
||||
"password": "",
|
||||
"timeout": 10,
|
||||
"endpoint": "263.12.8.0",
|
||||
"port": 22,
|
||||
"reconnect": False,
|
||||
"retries": 3,
|
||||
}, config_validator)
|
||||
|
||||
There's several things going on here so lets take it step by step...
|
||||
|
||||
* The :func:`~stem.util.conf.config_dict` provides a dictionary that's bound
|
||||
to a given configuration. If the "ssh_proxy_config" configuration changes
|
||||
then so will the contents of CONFIG.
|
||||
|
||||
* The dictionary we're passing to :func:`~stem.util.conf.config_dict` provides
|
||||
two important pieces of information: default values and their types. See the
|
||||
Config's :func:`~stem.util.conf.Config.get` method for how these type
|
||||
inferences work.
|
||||
|
||||
* The config_validator is a hook we're adding to make sure CONFIG only gets
|
||||
values we think are valid. In this case it ensures that our timeout value
|
||||
is at least one second, and rejects endpoints or ports that are invalid.
|
||||
|
||||
Now lets say our user has the following configuration file...
|
||||
|
||||
::
|
||||
|
||||
username waddle_doo
|
||||
password jabberwocky
|
||||
timeout -15
|
||||
port 9000000
|
||||
retries lots
|
||||
reconnect true
|
||||
logging debug
|
||||
|
||||
... and we load it as follows...
|
||||
|
||||
::
|
||||
|
||||
>>> from from stem.util import conf
|
||||
>>> our_config = conf.get_config("ssh_login")
|
||||
>>> our_config.load("/home/atagar/user_config")
|
||||
>>> print CONFIG
|
||||
{
|
||||
"username": "waddle_doo",
|
||||
"password": "jabberwocky",
|
||||
"timeout": 1,
|
||||
"endpoint": "263.12.8.0",
|
||||
"port": 22,
|
||||
"reconnect": True,
|
||||
"retries": 3,
|
||||
}
|
||||
|
||||
Here's an expanation of what happened...
|
||||
|
||||
* the username, password, and reconnect attributes took the values in the
|
||||
configuration file
|
||||
|
||||
* the 'config_validator' we added earlier allows for a minimum timeout of one
|
||||
and rejected the invalid port (with a log message)
|
||||
|
||||
* we weren't able to convert the retries' "lots" value to an integer so it kept
|
||||
its default value and logged a warning
|
||||
|
||||
* the user didn't supply an endpoint so that remained unchanged
|
||||
|
||||
* our CONFIG didn't have a 'logging' attribute so it was ignored
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
config_dict - provides a dictionary that's kept in sync with our config
|
||||
get_config - singleton for getting configurations
|
||||
parse_enum_csv - helper funcion for parsing confguration entries for enums
|
||||
|
||||
Config - Custom configuration
|
||||
|- load - reads a configuration file
|
||||
|- save - writes the current configuration to a file
|
||||
|- clear - empties our loaded configuration contents
|
||||
|- add_listener - notifies the given listener when an update occurs
|
||||
|- clear_listeners - removes any attached listeners
|
||||
|- keys - provides keys in the loaded configuration
|
||||
|- set - sets the given key/value pair
|
||||
|- unused_keys - provides keys that have never been requested
|
||||
|- get - provides the value for a given key, with type inference
|
||||
+- get_value - provides the value for a given key as a string
|
||||
"""
|
||||
|
||||
import threading
|
||||
|
||||
from stem.util import log
|
||||
|
||||
CONFS = {} # mapping of identifier to singleton instances of configs
|
||||
|
||||
|
||||
class _SyncListener(object):
|
||||
def __init__(self, config_dict, interceptor):
|
||||
self.config_dict = config_dict
|
||||
self.interceptor = interceptor
|
||||
|
||||
def update(self, config, key):
|
||||
if key in self.config_dict:
|
||||
new_value = config.get(key, self.config_dict[key])
|
||||
|
||||
if new_value == self.config_dict[key]:
|
||||
return # no change
|
||||
|
||||
if self.interceptor:
|
||||
interceptor_value = self.interceptor(key, new_value)
|
||||
|
||||
if interceptor_value:
|
||||
new_value = interceptor_value
|
||||
|
||||
self.config_dict[key] = new_value
|
||||
|
||||
|
||||
def config_dict(handle, conf_mappings, handler = None):
|
||||
"""
|
||||
Makes a dictionary that stays synchronized with a configuration.
|
||||
|
||||
This takes a dictionary of 'config_key => default_value' mappings and
|
||||
changes the values to reflect our current configuration. This will leave
|
||||
the previous values alone if...
|
||||
|
||||
* we don't have a value for that config_key
|
||||
* we can't convert our value to be the same type as the default_value
|
||||
|
||||
If a handler is provided then this is called just prior to assigning new
|
||||
values to the config_dict. The handler function is expected to accept the
|
||||
(key, value) for the new values and return what we should actually insert
|
||||
into the dictionary. If this returns None then the value is updated as
|
||||
normal.
|
||||
|
||||
For more information about how we convert types see our
|
||||
:func:`~stem.util.conf.Config.get` method.
|
||||
|
||||
**The dictionary you get from this is manged by the
|
||||
:class:`~stem.util.conf.Config` class and should be treated as being
|
||||
read-only.**
|
||||
|
||||
:param str handle: unique identifier for a config instance
|
||||
:param dict conf_mappings: config key/value mappings used as our defaults
|
||||
:param functor handler: function referred to prior to assigning values
|
||||
"""
|
||||
|
||||
selected_config = get_config(handle)
|
||||
selected_config.add_listener(_SyncListener(conf_mappings, handler).update)
|
||||
return conf_mappings
|
||||
|
||||
|
||||
def get_config(handle):
|
||||
"""
|
||||
Singleton constructor for configuration file instances. If a configuration
|
||||
already exists for the handle then it's returned. Otherwise a fresh instance
|
||||
is constructed.
|
||||
|
||||
:param str handle: unique identifier used to access this config instance
|
||||
"""
|
||||
|
||||
if not handle in CONFS:
|
||||
CONFS[handle] = Config()
|
||||
|
||||
return CONFS[handle]
|
||||
|
||||
|
||||
def parse_enum(key, value, enumeration):
|
||||
"""
|
||||
Provides the enumeration value for a given key. This is a case insensitive
|
||||
lookup and raises an exception if the enum key doesn't exist.
|
||||
|
||||
:param str key: configuration key being looked up
|
||||
:param str value: value to be parsed
|
||||
:param stem.util.enum.Enum enumeration: enumeration the values should be in
|
||||
|
||||
:returns: enumeration value
|
||||
|
||||
:raises: **ValueError** if the **value** isn't among the enumeration keys
|
||||
"""
|
||||
|
||||
return parse_enum_csv(key, value, enumeration, 1)[0]
|
||||
|
||||
|
||||
def parse_enum_csv(key, value, enumeration, count = None):
|
||||
"""
|
||||
Parses a given value as being a comma separated listing of enumeration keys,
|
||||
returning the corresponding enumeration values. This is intended to be a
|
||||
helper for config handlers. The checks this does are case insensitive.
|
||||
|
||||
The **count** attribute can be used to make assertions based on the number of
|
||||
values. This can be...
|
||||
|
||||
* None to indicate that there's no restrictions.
|
||||
* An int to indicate that we should have this many values.
|
||||
* An (int, int) tuple to indicate the range that values can be in. This range
|
||||
is inclusive and either can be None to indicate the lack of a lower or
|
||||
upper bound.
|
||||
|
||||
:param str key: configuration key being looked up
|
||||
:param str value: value to be parsed
|
||||
:param stem.util.enum.Enum enumeration: enumeration the values should be in
|
||||
:param int,tuple count: validates that we have this many items
|
||||
|
||||
:returns: list with the enumeration values
|
||||
|
||||
:raises: **ValueError** if the count assertion fails or the **value** entries
|
||||
don't match the enumeration keys
|
||||
"""
|
||||
|
||||
values = [val.upper().strip() for val in value.split(',')]
|
||||
|
||||
if values == ['']:
|
||||
return []
|
||||
|
||||
if count is None:
|
||||
pass # no count validateion checks to do
|
||||
elif isinstance(count, int):
|
||||
if len(values) != count:
|
||||
raise ValueError("Config entry '%s' is expected to be %i comma separated values, got '%s'" % (key, count, value))
|
||||
elif isinstance(count, tuple) and len(count) == 2:
|
||||
minimum, maximum = count
|
||||
|
||||
if minimum is not None and len(values) < minimum:
|
||||
raise ValueError("Config entry '%s' must have at least %i comma separated values, got '%s'" % (key, minimum, value))
|
||||
|
||||
if maximum is not None and len(values) > maximum:
|
||||
raise ValueError("Config entry '%s' can have at most %i comma separated values, got '%s'" % (key, maximum, value))
|
||||
else:
|
||||
raise ValueError("The count must be None, an int, or two value tuple. Got '%s' (%s)'" % (count, type(count)))
|
||||
|
||||
result = []
|
||||
enum_keys = [k.upper() for k in enumeration.keys()]
|
||||
enum_values = list(enumeration)
|
||||
|
||||
for val in values:
|
||||
if val in enum_keys:
|
||||
result.append(enum_values[enum_keys.index(val)])
|
||||
else:
|
||||
raise ValueError("The '%s' entry of config entry '%s' wasn't in the enumeration (expected %s)" % (val, key, ', '.join(enum_keys)))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Config(object):
|
||||
"""
|
||||
Handler for easily working with custom configurations, providing persistence
|
||||
to and from files. All operations are thread safe.
|
||||
|
||||
**Example usage:**
|
||||
|
||||
User has a file at '/home/atagar/myConfig' with...
|
||||
|
||||
::
|
||||
|
||||
destination.ip 1.2.3.4
|
||||
destination.port blarg
|
||||
|
||||
startup.run export PATH=$PATH:~/bin
|
||||
startup.run alias l=ls
|
||||
|
||||
And they have a script with...
|
||||
|
||||
::
|
||||
|
||||
from stem.util import conf
|
||||
|
||||
# Configuration values we'll use in this file. These are mappings of
|
||||
# configuration keys to the default values we'll use if the user doesn't
|
||||
# have something different in their config file (or it doesn't match this
|
||||
# type).
|
||||
|
||||
ssh_config = conf.config_dict("ssh_login", {
|
||||
"login.user": "atagar",
|
||||
"login.password": "pepperjack_is_awesome!",
|
||||
"destination.ip": "127.0.0.1",
|
||||
"destination.port": 22,
|
||||
"startup.run": [],
|
||||
})
|
||||
|
||||
# Makes an empty config instance with the handle of 'ssh_login'. This is
|
||||
# a singleton so other classes can fetch this same configuration from
|
||||
# this handle.
|
||||
|
||||
user_config = conf.get_config("ssh_login")
|
||||
|
||||
# Loads the user's configuration file, warning if this fails.
|
||||
|
||||
try:
|
||||
user_config.load("/home/atagar/myConfig")
|
||||
except IOError as exc:
|
||||
print "Unable to load the user's config: %s" % exc
|
||||
|
||||
# This replace the contents of ssh_config with the values from the user's
|
||||
# config file if...
|
||||
#
|
||||
# * the key is present in the config file
|
||||
# * we're able to convert the configuration file's value to the same type
|
||||
# as what's in the mapping (see the Config.get() method for how these
|
||||
# type inferences work)
|
||||
#
|
||||
# For instance in this case...
|
||||
#
|
||||
# * the login values are left alone because they aren't in the user's
|
||||
# config file
|
||||
#
|
||||
# * the 'destination.port' is also left with the value of 22 because we
|
||||
# can't turn "blarg" into an integer
|
||||
#
|
||||
# The other values are replaced, so ssh_config now becomes...
|
||||
#
|
||||
# {"login.user": "atagar",
|
||||
# "login.password": "pepperjack_is_awesome!",
|
||||
# "destination.ip": "1.2.3.4",
|
||||
# "destination.port": 22,
|
||||
# "startup.run": ["export PATH=$PATH:~/bin", "alias l=ls"]}
|
||||
#
|
||||
# Information for what values fail to load and why are reported to
|
||||
# 'stem.util.log'.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._path = None # location we last loaded from or saved to
|
||||
self._contents = {} # configuration key/value pairs
|
||||
self._listeners = [] # functors to be notified of config changes
|
||||
|
||||
# used for accessing _contents
|
||||
self._contents_lock = threading.RLock()
|
||||
|
||||
# keys that have been requested (used to provide unused config contents)
|
||||
self._requested_keys = set()
|
||||
|
||||
def load(self, path = None):
|
||||
"""
|
||||
Reads in the contents of the given path, adding its configuration values
|
||||
to our current contents.
|
||||
|
||||
:param str path: file path to be loaded, this uses the last loaded path if
|
||||
not provided
|
||||
|
||||
:raises:
|
||||
* **IOError** if we fail to read the file (it doesn't exist, insufficient
|
||||
permissions, etc)
|
||||
* **ValueError** if no path was provided and we've never been provided one
|
||||
"""
|
||||
|
||||
if path:
|
||||
self._path = path
|
||||
elif not self._path:
|
||||
raise ValueError("Unable to load configuration: no path provided")
|
||||
|
||||
with open(self._path, "r") as config_file:
|
||||
read_contents = config_file.readlines()
|
||||
|
||||
with self._contents_lock:
|
||||
while read_contents:
|
||||
line = read_contents.pop(0)
|
||||
|
||||
# strips any commenting or excess whitespace
|
||||
comment_start = line.find("#")
|
||||
|
||||
if comment_start != -1:
|
||||
line = line[:comment_start]
|
||||
|
||||
line = line.strip()
|
||||
|
||||
# parse the key/value pair
|
||||
if line:
|
||||
try:
|
||||
key, value = line.split(" ", 1)
|
||||
value = value.strip()
|
||||
except ValueError:
|
||||
log.debug("Config entry '%s' is expected to be of the format 'Key Value', defaulting to '%s' -> ''" % (line, line))
|
||||
key, value = line, ""
|
||||
|
||||
if not value:
|
||||
# this might be a multi-line entry, try processing it as such
|
||||
multiline_buffer = []
|
||||
|
||||
while read_contents and read_contents[0].lstrip().startswith("|"):
|
||||
content = read_contents.pop(0).lstrip()[1:] # removes '\s+|' prefix
|
||||
content = content.rstrip("\n") # trailing newline
|
||||
multiline_buffer.append(content)
|
||||
|
||||
if multiline_buffer:
|
||||
self.set(key, "\n".join(multiline_buffer), False)
|
||||
continue
|
||||
|
||||
self.set(key, value, False)
|
||||
|
||||
def save(self, path = None):
|
||||
"""
|
||||
Saves configuration contents to disk. If a path is provided then it
|
||||
replaces the configuration location that we track.
|
||||
|
||||
:param str path: location to be saved to
|
||||
|
||||
:raises: **ValueError** if no path was provided and we've never been provided one
|
||||
"""
|
||||
|
||||
if path:
|
||||
self._path = path
|
||||
elif not self._path:
|
||||
raise ValueError("Unable to save configuration: no path provided")
|
||||
|
||||
with self._contents_lock:
|
||||
with open(self._path, 'w') as output_file:
|
||||
for entry_key in sorted(self.keys()):
|
||||
for entry_value in self.get_value(entry_key, multiple = True):
|
||||
# check for multi line entries
|
||||
if "\n" in entry_value:
|
||||
entry_value = "\n|" + entry_value.replace("\n", "\n|")
|
||||
|
||||
output_file.write('%s %s\n' % (entry_key, entry_value))
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Drops the configuration contents and reverts back to a blank, unloaded
|
||||
state.
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
self._contents.clear()
|
||||
self._requested_keys = set()
|
||||
|
||||
def add_listener(self, listener, backfill = True):
|
||||
"""
|
||||
Registers the function to be notified of configuration updates. Listeners
|
||||
are expected to be functors which accept (config, key).
|
||||
|
||||
:param functor listener: function to be notified when our configuration is changed
|
||||
:param bool backfill: calls the function with our current values if **True**
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
self._listeners.append(listener)
|
||||
|
||||
if backfill:
|
||||
for key in self.keys():
|
||||
listener(self, key)
|
||||
|
||||
def clear_listeners(self):
|
||||
"""
|
||||
Removes all attached listeners.
|
||||
"""
|
||||
|
||||
self._listeners = []
|
||||
|
||||
def keys(self):
|
||||
"""
|
||||
Provides all keys in the currently loaded configuration.
|
||||
|
||||
:returns: **list** if strings for the configuration keys we've loaded
|
||||
"""
|
||||
|
||||
return self._contents.keys()
|
||||
|
||||
def unused_keys(self):
|
||||
"""
|
||||
Provides the configuration keys that have never been provided to a caller
|
||||
via :func:`~stem.util.conf.config_dict` or the
|
||||
:func:`~stem.util.conf.Config.get` and
|
||||
:func:`~stem.util.conf.Config.get_value` methods.
|
||||
|
||||
:returns: **set** of configuration keys we've loaded but have never been requested
|
||||
"""
|
||||
|
||||
return set(self.keys()).difference(self._requested_keys)
|
||||
|
||||
def set(self, key, value, overwrite = True):
|
||||
"""
|
||||
Appends the given key/value configuration mapping, behaving the same as if
|
||||
we'd loaded this from a configuration file.
|
||||
|
||||
:param str key: key for the configuration mapping
|
||||
:param str,list value: value we're setting the mapping to
|
||||
:param bool overwrite: replaces the previous value if **True**, otherwise
|
||||
the values are appended
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
if isinstance(value, str):
|
||||
if not overwrite and key in self._contents:
|
||||
self._contents[key].append(value)
|
||||
else:
|
||||
self._contents[key] = [value]
|
||||
|
||||
for listener in self._listeners:
|
||||
listener(self, key)
|
||||
elif isinstance(value, (list, tuple)):
|
||||
if not overwrite and key in self._contents:
|
||||
self._contents[key] += value
|
||||
else:
|
||||
self._contents[key] = value
|
||||
|
||||
for listener in self._listeners:
|
||||
listener(self, key)
|
||||
else:
|
||||
raise ValueError("Config.set() only accepts str, list, or tuple. Provided value was a '%s'" % type(value))
|
||||
|
||||
def get(self, key, default = None):
|
||||
"""
|
||||
Fetches the given configuration, using the key and default value to
|
||||
determine the type it should be. Recognized inferences are:
|
||||
|
||||
* **default is a boolean => boolean**
|
||||
|
||||
* values are case insensitive
|
||||
* provides the default if the value isn't "true" or "false"
|
||||
|
||||
* **default is an integer => int**
|
||||
|
||||
* provides the default if the value can't be converted to an int
|
||||
|
||||
* **default is a float => float**
|
||||
|
||||
* provides the default if the value can't be converted to a float
|
||||
|
||||
* **default is a list => list**
|
||||
|
||||
* string contents for all configuration values with this key
|
||||
|
||||
* **default is a tuple => tuple**
|
||||
|
||||
* string contents for all configuration values with this key
|
||||
|
||||
* **default is a dictionary => dict**
|
||||
|
||||
* values without "=>" in them are ignored
|
||||
* values are split into key/value pairs on "=>" with extra whitespace
|
||||
stripped
|
||||
|
||||
:param str key: config setting to be fetched
|
||||
:param default object: value provided if no such key exists or fails to be converted
|
||||
|
||||
:returns: given configuration value with its type inferred with the above rules
|
||||
"""
|
||||
|
||||
is_multivalue = isinstance(default, (list, tuple, dict))
|
||||
val = self.get_value(key, default, is_multivalue)
|
||||
|
||||
if val == default:
|
||||
return val # don't try to infer undefined values
|
||||
|
||||
if isinstance(default, bool):
|
||||
if val.lower() == "true":
|
||||
val = True
|
||||
elif val.lower() == "false":
|
||||
val = False
|
||||
else:
|
||||
log.debug("Config entry '%s' is expected to be a boolean, defaulting to '%s'" % (key, str(default)))
|
||||
val = default
|
||||
elif isinstance(default, int):
|
||||
try:
|
||||
val = int(val)
|
||||
except ValueError:
|
||||
log.debug("Config entry '%s' is expected to be an integer, defaulting to '%i'" % (key, default))
|
||||
val = default
|
||||
elif isinstance(default, float):
|
||||
try:
|
||||
val = float(val)
|
||||
except ValueError:
|
||||
log.debug("Config entry '%s' is expected to be a float, defaulting to '%f'" % (key, default))
|
||||
val = default
|
||||
elif isinstance(default, list):
|
||||
pass # nothing special to do (already a list)
|
||||
elif isinstance(default, tuple):
|
||||
val = tuple(val)
|
||||
elif isinstance(default, dict):
|
||||
valMap = {}
|
||||
for entry in val:
|
||||
if "=>" in entry:
|
||||
entryKey, entryVal = entry.split("=>", 1)
|
||||
valMap[entryKey.strip()] = entryVal.strip()
|
||||
else:
|
||||
log.debug("Ignoring invalid %s config entry (expected a mapping, but \"%s\" was missing \"=>\")" % (key, entry))
|
||||
val = valMap
|
||||
|
||||
return val
|
||||
|
||||
def get_value(self, key, default = None, multiple = False):
|
||||
"""
|
||||
This provides the current value associated with a given key.
|
||||
|
||||
:param str key: config setting to be fetched
|
||||
:param object default: value provided if no such key exists
|
||||
:param bool multiple: provides back a list of all values if **True**,
|
||||
otherwise this returns the last loaded configuration value
|
||||
|
||||
:returns: **str** or **list** of string configuration values associated
|
||||
with the given key, providing the default if no such key exists
|
||||
"""
|
||||
|
||||
with self._contents_lock:
|
||||
if key in self._contents:
|
||||
self._requested_keys.add(key)
|
||||
|
||||
if multiple:
|
||||
return self._contents[key]
|
||||
else:
|
||||
return self._contents[key][-1]
|
||||
else:
|
||||
message_id = "stem.util.conf.missing_config_key_%s" % key
|
||||
log.log_once(message_id, log.TRACE, "config entry '%s' not found, defaulting to '%s'" % (key, default))
|
||||
return default
|
562
lib/stem/util/connection.py
Normal file
562
lib/stem/util/connection.py
Normal file
|
@ -0,0 +1,562 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Connection and networking based utility functions.
|
||||
|
||||
::
|
||||
|
||||
get_connections - quieries the connections belonging to a given process
|
||||
get_system_resolvers - provides connection resolution methods that are likely to be available
|
||||
|
||||
is_valid_ipv4_address - checks if a string is a valid IPv4 address
|
||||
is_valid_ipv6_address - checks if a string is a valid IPv6 address
|
||||
is_valid_port - checks if something is a valid representation for a port
|
||||
is_private_address - checks if an IPv4 address belongs to a private range or not
|
||||
|
||||
expand_ipv6_address - provides an IPv6 address with its collapsed portions expanded
|
||||
get_mask_ipv4 - provides the mask representation for a given number of bits
|
||||
get_mask_ipv6 - provides the IPv6 mask representation for a given number of bits
|
||||
|
||||
.. data:: Resolver (enum)
|
||||
|
||||
Method for resolving a process' connections.
|
||||
|
||||
================= ===========
|
||||
Resolver Description
|
||||
================= ===========
|
||||
**PROC** /proc contents
|
||||
**NETSTAT** netstat command
|
||||
**SS** ss command
|
||||
**LSOF** lsof command
|
||||
**SOCKSTAT** sockstat command under *nix
|
||||
**BSD_SOCKSTAT** sockstat command under FreeBSD
|
||||
**BSD_PROCSTAT** procstat command under FreeBSD
|
||||
================= ===========
|
||||
"""
|
||||
|
||||
import collections
|
||||
import hashlib
|
||||
import hmac
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
|
||||
import stem.util.proc
|
||||
import stem.util.system
|
||||
|
||||
from stem.util import enum, log
|
||||
|
||||
# Connection resolution is risky to log about since it's highly likely to
|
||||
# contain sensitive information. That said, it's also difficult to get right in
|
||||
# a platform independent fashion. To opt into the logging requried to
|
||||
# troubleshoot connection resolution set the following...
|
||||
|
||||
LOG_CONNECTION_RESOLUTION = False
|
||||
|
||||
Resolver = enum.Enum(
|
||||
('PROC', 'proc'),
|
||||
('NETSTAT', 'netstat'),
|
||||
('SS', 'ss'),
|
||||
('LSOF', 'lsof'),
|
||||
('SOCKSTAT', 'sockstat'),
|
||||
('BSD_SOCKSTAT', 'sockstat (bsd)'),
|
||||
('BSD_PROCSTAT', 'procstat (bsd)')
|
||||
)
|
||||
|
||||
Connection = collections.namedtuple('Connection', [
|
||||
'local_address',
|
||||
'local_port',
|
||||
'remote_address',
|
||||
'remote_port',
|
||||
'protocol',
|
||||
])
|
||||
|
||||
FULL_IPv4_MASK = "255.255.255.255"
|
||||
FULL_IPv6_MASK = "FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF"
|
||||
|
||||
CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE = os.urandom(32)
|
||||
|
||||
RESOLVER_COMMAND = {
|
||||
Resolver.PROC: '',
|
||||
|
||||
# -n = prevents dns lookups, -p = include process
|
||||
Resolver.NETSTAT: 'netstat -np',
|
||||
|
||||
# -n = numeric ports, -p = include process, -t = tcp sockets, -u = udp sockets
|
||||
Resolver.SS: 'ss -nptu',
|
||||
|
||||
# -n = prevent dns lookups, -P = show port numbers (not names), -i = ip only, -w = no warnings
|
||||
# (lsof provides a '-p <pid>' but oddly in practice it seems to be ~11-28% slower)
|
||||
Resolver.LSOF: 'lsof -wnPi',
|
||||
|
||||
Resolver.SOCKSTAT: 'sockstat',
|
||||
|
||||
# -4 = IPv4, -c = connected sockets
|
||||
Resolver.BSD_SOCKSTAT: 'sockstat -4c',
|
||||
|
||||
# -f <pid> = process pid
|
||||
Resolver.BSD_PROCSTAT: 'procstat -f {pid}',
|
||||
}
|
||||
|
||||
RESOLVER_FILTER = {
|
||||
Resolver.PROC: '',
|
||||
|
||||
# tcp 0 586 192.168.0.1:44284 38.229.79.2:443 ESTABLISHED 15843/tor
|
||||
Resolver.NETSTAT: '^{protocol}\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED\s+{pid}/{name}\s*$',
|
||||
|
||||
# tcp ESTAB 0 0 192.168.0.20:44415 38.229.79.2:443 users:(("tor",15843,9))
|
||||
Resolver.SS: '^{protocol}\s+ESTAB\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+users:\(\("{name}",{pid},[0-9]+\)\)$',
|
||||
|
||||
# tor 3873 atagar 45u IPv4 40994 0t0 TCP 10.243.55.20:45724->194.154.227.109:9001 (ESTABLISHED)
|
||||
Resolver.LSOF: '^{name}\s+{pid}\s+.*\s+{protocol}\s+{local_address}:{local_port}->{remote_address}:{remote_port} \(ESTABLISHED\)$',
|
||||
|
||||
# atagar tor 15843 tcp4 192.168.0.20:44092 68.169.35.102:443 ESTABLISHED
|
||||
Resolver.SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED$',
|
||||
|
||||
# _tor tor 4397 12 tcp4 172.27.72.202:54011 127.0.0.1:9001
|
||||
Resolver.BSD_SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+\S+\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
|
||||
|
||||
# 3561 tor 4 s - rw---n-- 2 0 TCP 10.0.0.2:9050 10.0.0.1:22370
|
||||
Resolver.BSD_PROCSTAT: '^\s*{pid}\s+{name}\s+.*\s+{protocol}\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
|
||||
}
|
||||
|
||||
|
||||
def get_connections(resolver, process_pid = None, process_name = None):
|
||||
"""
|
||||
Retrieves a list of the current connections for a given process. The provides
|
||||
a list of Connection instances, which have four attributes...
|
||||
|
||||
* local_address (str)
|
||||
* local_port (int)
|
||||
* remote_address (str)
|
||||
* remote_port (int)
|
||||
* protocol (str, generally either 'tcp' or 'udp')
|
||||
|
||||
:param Resolver resolver: method of connection resolution to use
|
||||
:param int process_pid: pid of the process to retrieve
|
||||
:param str process_name: name of the process to retrieve
|
||||
|
||||
:raises:
|
||||
* **ValueError** if using **Resolver.PROC** or **Resolver.BSD_PROCSTAT**
|
||||
and the process_pid wasn't provided
|
||||
|
||||
* **IOError** if no connections are available or resolution fails
|
||||
(generally they're indistinguishable). The common causes are the
|
||||
command being unavailable or permissions.
|
||||
"""
|
||||
|
||||
def _log(msg):
|
||||
if LOG_CONNECTION_RESOLUTION:
|
||||
log.debug(msg)
|
||||
|
||||
_log("=" * 80)
|
||||
_log("Querying connections for resolver: %s, pid: %s, name: %s" % (resolver, process_pid, process_name))
|
||||
|
||||
if isinstance(process_pid, str):
|
||||
try:
|
||||
process_pid = int(process_pid)
|
||||
except ValueError:
|
||||
raise ValueError("Process pid was non-numeric: %s" % process_pid)
|
||||
|
||||
if process_pid is None and resolver in (Resolver.PROC, Resolver.BSD_PROCSTAT):
|
||||
raise ValueError("%s resolution requires a pid" % resolver)
|
||||
|
||||
if resolver == Resolver.PROC:
|
||||
return [Connection(*conn) for conn in stem.util.proc.get_connections(process_pid)]
|
||||
|
||||
resolver_command = RESOLVER_COMMAND[resolver].format(pid = process_pid)
|
||||
|
||||
try:
|
||||
results = stem.util.system.call(resolver_command)
|
||||
except OSError as exc:
|
||||
raise IOError("Unable to query '%s': %s" % (resolver_command, exc))
|
||||
|
||||
resolver_regex_str = RESOLVER_FILTER[resolver].format(
|
||||
protocol = '(?P<protocol>\S+)',
|
||||
local_address = '(?P<local_address>[0-9.]+)',
|
||||
local_port = '(?P<local_port>[0-9]+)',
|
||||
remote_address = '(?P<remote_address>[0-9.]+)',
|
||||
remote_port = '(?P<remote_port>[0-9]+)',
|
||||
pid = process_pid if process_pid else '[0-9]*',
|
||||
name = process_name if process_name else '\S*',
|
||||
)
|
||||
|
||||
_log("Resolver regex: %s" % resolver_regex_str)
|
||||
_log("Resolver results:\n%s" % '\n'.join(results))
|
||||
|
||||
connections = []
|
||||
resolver_regex = re.compile(resolver_regex_str)
|
||||
|
||||
for line in results:
|
||||
match = resolver_regex.match(line)
|
||||
|
||||
if match:
|
||||
attr = match.groupdict()
|
||||
local_addr = attr['local_address']
|
||||
local_port = int(attr['local_port'])
|
||||
remote_addr = attr['remote_address']
|
||||
remote_port = int(attr['remote_port'])
|
||||
protocol = attr['protocol'].lower()
|
||||
|
||||
if remote_addr == '0.0.0.0':
|
||||
continue # procstat response for unestablished connections
|
||||
|
||||
if not (is_valid_ipv4_address(local_addr) and is_valid_ipv4_address(remote_addr)):
|
||||
_log("Invalid address (%s or %s): %s" % (local_addr, remote_addr, line))
|
||||
elif not (is_valid_port(local_port) and is_valid_port(remote_port)):
|
||||
_log("Invalid port (%s or %s): %s" % (local_port, remote_port, line))
|
||||
elif protocol not in ('tcp', 'udp'):
|
||||
_log("Unrecognized protocol (%s): %s" % (protocol, line))
|
||||
|
||||
conn = Connection(local_addr, local_port, remote_addr, remote_port, protocol)
|
||||
connections.append(conn)
|
||||
_log(str(conn))
|
||||
|
||||
_log("%i connections found" % len(connections))
|
||||
|
||||
if not connections:
|
||||
raise IOError("No results found using: %s" % resolver_command)
|
||||
|
||||
return connections
|
||||
|
||||
|
||||
def get_system_resolvers(system = None):
|
||||
"""
|
||||
Provides the types of connection resolvers likely to be available on this platform.
|
||||
|
||||
:param str system: system to get resolvers for, this is determined by
|
||||
platform.system() if not provided
|
||||
|
||||
:returns: **list** of Resolvers likely to be available on this platform
|
||||
"""
|
||||
|
||||
if system is None:
|
||||
system = platform.system()
|
||||
|
||||
if system == 'Windows':
|
||||
resolvers = []
|
||||
elif system in ('Darwin', 'OpenBSD'):
|
||||
resolvers = [Resolver.LSOF]
|
||||
elif system == 'FreeBSD':
|
||||
# Netstat is available, but lacks a '-p' equivilant so we can't associate
|
||||
# the results to processes. The platform also has a ss command, but it
|
||||
# belongs to a spreadsheet application.
|
||||
|
||||
resolvers = [Resolver.BSD_SOCKSTAT, Resolver.BSD_PROCSTAT, Resolver.LSOF]
|
||||
else:
|
||||
# Sockstat isn't available by default on ubuntu.
|
||||
|
||||
resolvers = [Resolver.NETSTAT, Resolver.SOCKSTAT, Resolver.LSOF, Resolver.SS]
|
||||
|
||||
# remove any that aren't in the user's PATH
|
||||
|
||||
resolvers = filter(lambda r: stem.util.system.is_available(RESOLVER_COMMAND[r]), resolvers)
|
||||
|
||||
# proc resolution, by far, outperforms the others so defaults to this is able
|
||||
|
||||
if stem.util.proc.is_available():
|
||||
resolvers = [Resolver.PROC] + resolvers
|
||||
|
||||
return resolvers
|
||||
|
||||
|
||||
def is_valid_ipv4_address(address):
|
||||
"""
|
||||
Checks if a string is a valid IPv4 address.
|
||||
|
||||
:param str address: string to be checked
|
||||
|
||||
:returns: **True** if input is a valid IPv4 address, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(address, (bytes, unicode)):
|
||||
return False
|
||||
|
||||
# checks if theres four period separated values
|
||||
|
||||
if address.count(".") != 3:
|
||||
return False
|
||||
|
||||
# checks that each value in the octet are decimal values between 0-255
|
||||
for entry in address.split("."):
|
||||
if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
|
||||
return False
|
||||
elif entry[0] == "0" and len(entry) > 1:
|
||||
return False # leading zeros, for instance in "1.2.3.001"
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_ipv6_address(address, allow_brackets = False):
|
||||
"""
|
||||
Checks if a string is a valid IPv6 address.
|
||||
|
||||
:param str address: string to be checked
|
||||
:param bool allow_brackets: ignore brackets which form '[address]'
|
||||
|
||||
:returns: **True** if input is a valid IPv6 address, **False** otherwise
|
||||
"""
|
||||
|
||||
if allow_brackets:
|
||||
if address.startswith("[") and address.endswith("]"):
|
||||
address = address[1:-1]
|
||||
|
||||
# addresses are made up of eight colon separated groups of four hex digits
|
||||
# with leading zeros being optional
|
||||
# https://en.wikipedia.org/wiki/IPv6#Address_format
|
||||
|
||||
colon_count = address.count(":")
|
||||
|
||||
if colon_count > 7:
|
||||
return False # too many groups
|
||||
elif colon_count != 7 and not "::" in address:
|
||||
return False # not enough groups and none are collapsed
|
||||
elif address.count("::") > 1 or ":::" in address:
|
||||
return False # multiple groupings of zeros can't be collapsed
|
||||
|
||||
for entry in address.split(":"):
|
||||
if not re.match("^[0-9a-fA-f]{0,4}$", entry):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_port(entry, allow_zero = False):
|
||||
"""
|
||||
Checks if a string or int is a valid port number.
|
||||
|
||||
:param list,str,int entry: string, integer or list to be checked
|
||||
:param bool allow_zero: accept port number of zero (reserved by definition)
|
||||
|
||||
:returns: **True** if input is an integer and within the valid port range, **False** otherwise
|
||||
"""
|
||||
|
||||
if isinstance(entry, list):
|
||||
for port in entry:
|
||||
if not is_valid_port(port, allow_zero):
|
||||
return False
|
||||
|
||||
return True
|
||||
elif isinstance(entry, (bytes, unicode)):
|
||||
if not entry.isdigit():
|
||||
return False
|
||||
elif entry[0] == "0" and len(entry) > 1:
|
||||
return False # leading zeros, ex "001"
|
||||
|
||||
entry = int(entry)
|
||||
|
||||
if allow_zero and entry == 0:
|
||||
return True
|
||||
|
||||
return entry > 0 and entry < 65536
|
||||
|
||||
|
||||
def is_private_address(address):
|
||||
"""
|
||||
Checks if the IPv4 address is in a range belonging to the local network or
|
||||
loopback. These include:
|
||||
|
||||
* Private ranges: 10.*, 172.16.* - 172.31.*, 192.168.*
|
||||
* Loopback: 127.*
|
||||
|
||||
:param str address: string to be checked
|
||||
|
||||
:returns: **True** if input is in a private range, **False** otherwise
|
||||
|
||||
:raises: **ValueError** if the address isn't a valid IPv4 address
|
||||
"""
|
||||
|
||||
if not is_valid_ipv4_address(address):
|
||||
raise ValueError("'%s' isn't a valid IPv4 address" % address)
|
||||
|
||||
# checks for any of the simple wildcard ranges
|
||||
|
||||
if address.startswith("10.") or address.startswith("192.168.") or address.startswith("127."):
|
||||
return True
|
||||
|
||||
# checks for the 172.16.* - 172.31.* range
|
||||
|
||||
if address.startswith("172."):
|
||||
second_octet = int(address.split('.')[1])
|
||||
|
||||
if second_octet >= 16 and second_octet <= 31:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def expand_ipv6_address(address):
|
||||
"""
|
||||
Expands abbreviated IPv6 addresses to their full colon separated hex format.
|
||||
For instance...
|
||||
|
||||
::
|
||||
|
||||
>>> expand_ipv6_address("2001:db8::ff00:42:8329")
|
||||
"2001:0db8:0000:0000:0000:ff00:0042:8329"
|
||||
|
||||
>>> expand_ipv6_address("::")
|
||||
"0000:0000:0000:0000:0000:0000:0000:0000"
|
||||
|
||||
:param str address: IPv6 address to be expanded
|
||||
|
||||
:raises: **ValueError** if the address can't be expanded due to being malformed
|
||||
"""
|
||||
|
||||
if not is_valid_ipv6_address(address):
|
||||
raise ValueError("'%s' isn't a valid IPv6 address" % address)
|
||||
|
||||
# expands collapsed groupings, there can only be a single '::' in a valid
|
||||
# address
|
||||
if "::" in address:
|
||||
missing_groups = 7 - address.count(":")
|
||||
address = address.replace("::", "::" + ":" * missing_groups)
|
||||
|
||||
# inserts missing zeros
|
||||
for index in xrange(8):
|
||||
start = index * 5
|
||||
end = address.index(":", start) if index != 7 else len(address)
|
||||
missing_zeros = 4 - (end - start)
|
||||
|
||||
if missing_zeros > 0:
|
||||
address = address[:start] + "0" * missing_zeros + address[start:]
|
||||
|
||||
return address
|
||||
|
||||
|
||||
def get_mask_ipv4(bits):
|
||||
"""
|
||||
Provides the IPv4 mask for a given number of bits, in the dotted-quad format.
|
||||
|
||||
:param int bits: number of bits to be converted
|
||||
|
||||
:returns: **str** with the subnet mask representation for this many bits
|
||||
|
||||
:raises: **ValueError** if given a number of bits outside the range of 0-32
|
||||
"""
|
||||
|
||||
if bits > 32 or bits < 0:
|
||||
raise ValueError("A mask can only be 0-32 bits, got %i" % bits)
|
||||
elif bits == 32:
|
||||
return FULL_IPv4_MASK
|
||||
|
||||
# get the binary representation of the mask
|
||||
mask_bin = _get_binary(2 ** bits - 1, 32)[::-1]
|
||||
|
||||
# breaks it into eight character groupings
|
||||
octets = [mask_bin[8 * i:8 * (i + 1)] for i in xrange(4)]
|
||||
|
||||
# converts each octet into its integer value
|
||||
return ".".join([str(int(octet, 2)) for octet in octets])
|
||||
|
||||
|
||||
def get_mask_ipv6(bits):
|
||||
"""
|
||||
Provides the IPv6 mask for a given number of bits, in the hex colon-delimited
|
||||
format.
|
||||
|
||||
:param int bits: number of bits to be converted
|
||||
|
||||
:returns: **str** with the subnet mask representation for this many bits
|
||||
|
||||
:raises: **ValueError** if given a number of bits outside the range of 0-128
|
||||
"""
|
||||
|
||||
if bits > 128 or bits < 0:
|
||||
raise ValueError("A mask can only be 0-128 bits, got %i" % bits)
|
||||
elif bits == 128:
|
||||
return FULL_IPv6_MASK
|
||||
|
||||
# get the binary representation of the mask
|
||||
mask_bin = _get_binary(2 ** bits - 1, 128)[::-1]
|
||||
|
||||
# breaks it into sixteen character groupings
|
||||
groupings = [mask_bin[16 * i:16 * (i + 1)] for i in xrange(8)]
|
||||
|
||||
# converts each group into its hex value
|
||||
return ":".join(["%04x" % int(group, 2) for group in groupings]).upper()
|
||||
|
||||
|
||||
def _get_masked_bits(mask):
|
||||
"""
|
||||
Provides the number of bits that an IPv4 subnet mask represents. Note that
|
||||
not all masks can be represented by a bit count.
|
||||
|
||||
:param str mask: mask to be converted
|
||||
|
||||
:returns: **int** with the number of bits represented by the mask
|
||||
|
||||
:raises: **ValueError** if the mask is invalid or can't be converted
|
||||
"""
|
||||
|
||||
if not is_valid_ipv4_address(mask):
|
||||
raise ValueError("'%s' is an invalid subnet mask" % mask)
|
||||
|
||||
# converts octets to binary representation
|
||||
mask_bin = _get_address_binary(mask)
|
||||
mask_match = re.match("^(1*)(0*)$", mask_bin)
|
||||
|
||||
if mask_match:
|
||||
return 32 - len(mask_match.groups()[1])
|
||||
else:
|
||||
raise ValueError("Unable to convert mask to a bit count: %s" % mask)
|
||||
|
||||
|
||||
def _get_binary(value, bits):
|
||||
"""
|
||||
Provides the given value as a binary string, padded with zeros to the given
|
||||
number of bits.
|
||||
|
||||
:param int value: value to be converted
|
||||
:param int bits: number of bits to pad to
|
||||
"""
|
||||
|
||||
# http://www.daniweb.com/code/snippet216539.html
|
||||
return "".join([str((value >> y) & 1) for y in range(bits - 1, -1, -1)])
|
||||
|
||||
|
||||
def _get_address_binary(address):
|
||||
"""
|
||||
Provides the binary value for an IPv4 or IPv6 address.
|
||||
|
||||
:returns: **str** with the binary representation of this address
|
||||
|
||||
:raises: **ValueError** if address is neither an IPv4 nor IPv6 address
|
||||
"""
|
||||
|
||||
if is_valid_ipv4_address(address):
|
||||
return "".join([_get_binary(int(octet), 8) for octet in address.split(".")])
|
||||
elif is_valid_ipv6_address(address):
|
||||
address = expand_ipv6_address(address)
|
||||
return "".join([_get_binary(int(grouping, 16), 16) for grouping in address.split(":")])
|
||||
else:
|
||||
raise ValueError("'%s' is neither an IPv4 or IPv6 address" % address)
|
||||
|
||||
|
||||
def _hmac_sha256(key, msg):
|
||||
"""
|
||||
Generates a sha256 digest using the given key and message.
|
||||
|
||||
:param str key: starting key for the hash
|
||||
:param str msg: message to be hashed
|
||||
|
||||
:returns: sha256 digest of msg as bytes, hashed using the given key
|
||||
"""
|
||||
|
||||
return hmac.new(key, msg, hashlib.sha256).digest()
|
||||
|
||||
|
||||
def _cryptovariables_equal(x, y):
|
||||
"""
|
||||
Compares two strings for equality securely.
|
||||
|
||||
:param str x: string to be compared.
|
||||
:param str y: the other string to be compared.
|
||||
|
||||
:returns: **True** if both strings are equal, **False** otherwise.
|
||||
"""
|
||||
|
||||
return (
|
||||
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, x) ==
|
||||
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, y))
|
170
lib/stem/util/enum.py
Normal file
170
lib/stem/util/enum.py
Normal file
|
@ -0,0 +1,170 @@
|
|||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Basic enumeration, providing ordered types for collections. These can be
|
||||
constructed as simple type listings...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.util import enum
|
||||
>>> insects = enum.Enum("ANT", "WASP", "LADYBUG", "FIREFLY")
|
||||
>>> insects.ANT
|
||||
'Ant'
|
||||
>>> tuple(insects)
|
||||
('Ant', 'Wasp', 'Ladybug', 'Firefly')
|
||||
|
||||
... or with overwritten string counterparts...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.util import enum
|
||||
>>> pets = enum.Enum(("DOG", "Skippy"), "CAT", ("FISH", "Nemo"))
|
||||
>>> pets.DOG
|
||||
'Skippy'
|
||||
>>> pets.CAT
|
||||
'Cat'
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
UppercaseEnum - Provides an enum instance with capitalized values
|
||||
|
||||
Enum - Provides a basic, ordered enumeration
|
||||
|- keys - string representation of our enum keys
|
||||
|- index_of - index of an enum value
|
||||
|- next - provides the enum after a given enum value
|
||||
|- previous - provides the enum before a given value
|
||||
|- __getitem__ - provides the value for an enum key
|
||||
+- __iter__ - iterator over our enum keys
|
||||
"""
|
||||
|
||||
import stem.util.str_tools
|
||||
|
||||
|
||||
def UppercaseEnum(*args):
|
||||
"""
|
||||
Provides an :class:`~stem.util.enum.Enum` instance where the values are
|
||||
identical to the keys. Since the keys are uppercase by convention this means
|
||||
the values are too. For instance...
|
||||
|
||||
::
|
||||
|
||||
>>> from stem.util import enum
|
||||
>>> runlevels = enum.UppercaseEnum("DEBUG", "INFO", "NOTICE", "WARN", "ERROR")
|
||||
>>> runlevels.DEBUG
|
||||
'DEBUG'
|
||||
|
||||
:param list args: enum keys to initialize with
|
||||
|
||||
:returns: :class:`~stem.util.enum.Enum` instance with the given keys
|
||||
"""
|
||||
|
||||
return Enum(*[(v, v) for v in args])
|
||||
|
||||
|
||||
class Enum(object):
|
||||
"""
|
||||
Basic enumeration.
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
# ordered listings of our keys and values
|
||||
keys, values = [], []
|
||||
|
||||
for entry in args:
|
||||
if isinstance(entry, (bytes, unicode)):
|
||||
key, val = entry, stem.util.str_tools._to_camel_case(entry)
|
||||
elif isinstance(entry, tuple) and len(entry) == 2:
|
||||
key, val = entry
|
||||
else:
|
||||
raise ValueError("Unrecognized input: %s" % args)
|
||||
|
||||
keys.append(key)
|
||||
values.append(val)
|
||||
setattr(self, key, val)
|
||||
|
||||
self._keys = tuple(keys)
|
||||
self._values = tuple(values)
|
||||
|
||||
def keys(self):
|
||||
"""
|
||||
Provides an ordered listing of the enumeration keys in this set.
|
||||
|
||||
:returns: **list** with our enum keys
|
||||
"""
|
||||
|
||||
return list(self._keys)
|
||||
|
||||
def index_of(self, value):
|
||||
"""
|
||||
Provides the index of the given value in the collection.
|
||||
|
||||
:param str value: entry to be looked up
|
||||
|
||||
:returns: **int** index of the given entry
|
||||
|
||||
:raises: **ValueError** if no such element exists
|
||||
"""
|
||||
|
||||
return self._values.index(value)
|
||||
|
||||
def next(self, value):
|
||||
"""
|
||||
Provides the next enumeration after the given value.
|
||||
|
||||
:param str value: enumeration for which to get the next entry
|
||||
|
||||
:returns: enum value following the given entry
|
||||
|
||||
:raises: **ValueError** if no such element exists
|
||||
"""
|
||||
|
||||
if not value in self._values:
|
||||
raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values)))
|
||||
|
||||
next_index = (self._values.index(value) + 1) % len(self._values)
|
||||
return self._values[next_index]
|
||||
|
||||
def previous(self, value):
|
||||
"""
|
||||
Provides the previous enumeration before the given value.
|
||||
|
||||
:param str value: enumeration for which to get the previous entry
|
||||
|
||||
:returns: enum value proceeding the given entry
|
||||
|
||||
:raises: **ValueError** if no such element exists
|
||||
"""
|
||||
|
||||
if not value in self._values:
|
||||
raise ValueError("No such enumeration exists: %s (options: %s)" % (value, ", ".join(self._values)))
|
||||
|
||||
prev_index = (self._values.index(value) - 1) % len(self._values)
|
||||
return self._values[prev_index]
|
||||
|
||||
def __getitem__(self, item):
|
||||
"""
|
||||
Provides the values for the given key.
|
||||
|
||||
:param str item: key to be looked up
|
||||
|
||||
:returns: **str** with the value for the given key
|
||||
|
||||
:raises: **ValueError** if the key doesn't exist
|
||||
"""
|
||||
|
||||
if item in vars(self):
|
||||
return getattr(self, item)
|
||||
else:
|
||||
keys = ", ".join(self.keys())
|
||||
raise ValueError("'%s' isn't among our enumeration keys, which includes: %s" % (item, keys))
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
Provides an ordered listing of the enums in this set.
|
||||
"""
|
||||
|
||||
for entry in self._values:
|
||||
yield entry
|
247
lib/stem/util/log.py
Normal file
247
lib/stem/util/log.py
Normal file
|
@ -0,0 +1,247 @@
|
|||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Functions to aid library logging. The default logging
|
||||
:data:`~stem.util.log.Runlevel` is usually NOTICE and above.
|
||||
|
||||
**Stem users are more than welcome to listen for stem events, but these
|
||||
functions are not being vended to our users. They may change in the future, use
|
||||
them at your own risk.**
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
get_logger - provides the stem's Logger instance
|
||||
logging_level - converts a runlevel to its logging number
|
||||
escape - escapes special characters in a message in preparation for logging
|
||||
|
||||
log - logs a message at the given runlevel
|
||||
log_once - logs a message, deduplicating if it has already been logged
|
||||
trace - logs a message at the TRACE runlevel
|
||||
debug - logs a message at the DEBUG runlevel
|
||||
info - logs a message at the INFO runlevel
|
||||
notice - logs a message at the NOTICE runlevel
|
||||
warn - logs a message at the WARN runlevel
|
||||
error - logs a message at the ERROR runlevel
|
||||
|
||||
LogBuffer - Buffers logged events so they can be iterated over.
|
||||
|- is_empty - checks if there's events in our buffer
|
||||
+- __iter__ - iterates over and removes the buffered events
|
||||
|
||||
log_to_stdout - reports further logged events to stdout
|
||||
|
||||
.. data:: Runlevel (enum)
|
||||
|
||||
Enumeration for logging runlevels.
|
||||
|
||||
========== ===========
|
||||
Runlevel Description
|
||||
========== ===========
|
||||
**ERROR** critical issue occurred, the user needs to be notified
|
||||
**WARN** non-critical issue occurred that the user should be aware of
|
||||
**NOTICE** information that is helpful to the user
|
||||
**INFO** high level library activity
|
||||
**DEBUG** low level library activity
|
||||
**TRACE** request/reply logging
|
||||
========== ===========
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
# Logging runlevels. These are *very* commonly used so including shorter
|
||||
# aliases (so they can be referenced as log.DEBUG, log.WARN, etc).
|
||||
|
||||
Runlevel = stem.util.enum.UppercaseEnum("TRACE", "DEBUG", "INFO", "NOTICE", "WARN", "ERROR")
|
||||
TRACE, DEBUG, INFO, NOTICE, WARN, ERR = list(Runlevel)
|
||||
|
||||
# mapping of runlevels to the logger module's values, TRACE and DEBUG aren't
|
||||
# built into the module
|
||||
|
||||
LOG_VALUES = {
|
||||
Runlevel.TRACE: logging.DEBUG - 5,
|
||||
Runlevel.DEBUG: logging.DEBUG,
|
||||
Runlevel.INFO: logging.INFO,
|
||||
Runlevel.NOTICE: logging.INFO + 5,
|
||||
Runlevel.WARN: logging.WARN,
|
||||
Runlevel.ERROR: logging.ERROR,
|
||||
}
|
||||
|
||||
logging.addLevelName(LOG_VALUES[TRACE], "TRACE")
|
||||
logging.addLevelName(LOG_VALUES[NOTICE], "NOTICE")
|
||||
|
||||
LOGGER = logging.getLogger("stem")
|
||||
LOGGER.setLevel(LOG_VALUES[TRACE])
|
||||
|
||||
# There's some messages that we don't want to log more than once. This set has
|
||||
# the messages IDs that we've logged which fall into this category.
|
||||
DEDUPLICATION_MESSAGE_IDS = set()
|
||||
|
||||
# Adds a default nullhandler for the stem logger, suppressing the 'No handlers
|
||||
# could be found for logger "stem"' warning as per...
|
||||
# http://docs.python.org/release/3.1.3/library/logging.html#configuring-logging-for-a-library
|
||||
|
||||
|
||||
class _NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
if not LOGGER.handlers:
|
||||
LOGGER.addHandler(_NullHandler())
|
||||
|
||||
|
||||
def get_logger():
|
||||
"""
|
||||
Provides the stem logger.
|
||||
|
||||
:return: **logging.Logger** for stem
|
||||
"""
|
||||
|
||||
return LOGGER
|
||||
|
||||
|
||||
def logging_level(runlevel):
|
||||
"""
|
||||
Translates a runlevel into the value expected by the logging module.
|
||||
|
||||
:param stem.util.log.Runlevel runlevel: runlevel to be returned, no logging if **None**
|
||||
"""
|
||||
|
||||
if runlevel:
|
||||
return LOG_VALUES[runlevel]
|
||||
else:
|
||||
return logging.FATAL + 5
|
||||
|
||||
|
||||
def escape(message):
|
||||
"""
|
||||
Escapes specific sequences for logging (newlines, tabs, carriage returns). If
|
||||
the input is **bytes** then this converts it to **unicode** under python 3.x.
|
||||
|
||||
:param str message: string to be escaped
|
||||
|
||||
:returns: str that is escaped
|
||||
"""
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
message = stem.util.str_tools._to_unicode(message)
|
||||
|
||||
for pattern, replacement in (("\n", "\\n"), ("\r", "\\r"), ("\t", "\\t")):
|
||||
message = message.replace(pattern, replacement)
|
||||
|
||||
return message
|
||||
|
||||
|
||||
def log(runlevel, message):
|
||||
"""
|
||||
Logs a message at the given runlevel.
|
||||
|
||||
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
|
||||
:param str message: message to be logged
|
||||
"""
|
||||
|
||||
if runlevel:
|
||||
LOGGER.log(LOG_VALUES[runlevel], message)
|
||||
|
||||
|
||||
def log_once(message_id, runlevel, message):
|
||||
"""
|
||||
Logs a message at the given runlevel. If a message with this ID has already
|
||||
been logged then this is a no-op.
|
||||
|
||||
:param str message_id: unique message identifier to deduplicate on
|
||||
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
|
||||
:param str message: message to be logged
|
||||
|
||||
:returns: **True** if we log the message, **False** otherwise
|
||||
"""
|
||||
|
||||
if not runlevel or message_id in DEDUPLICATION_MESSAGE_IDS:
|
||||
return False
|
||||
else:
|
||||
DEDUPLICATION_MESSAGE_IDS.add(message_id)
|
||||
log(runlevel, message)
|
||||
|
||||
# shorter aliases for logging at a runlevel
|
||||
|
||||
|
||||
def trace(message):
|
||||
log(Runlevel.TRACE, message)
|
||||
|
||||
|
||||
def debug(message):
|
||||
log(Runlevel.DEBUG, message)
|
||||
|
||||
|
||||
def info(message):
|
||||
log(Runlevel.INFO, message)
|
||||
|
||||
|
||||
def notice(message):
|
||||
log(Runlevel.NOTICE, message)
|
||||
|
||||
|
||||
def warn(message):
|
||||
log(Runlevel.WARN, message)
|
||||
|
||||
|
||||
def error(message):
|
||||
log(Runlevel.ERROR, message)
|
||||
|
||||
|
||||
class LogBuffer(logging.Handler):
|
||||
"""
|
||||
Basic log handler that listens for stem events and stores them so they can be
|
||||
read later. Log entries are cleared as they are read.
|
||||
"""
|
||||
|
||||
def __init__(self, runlevel):
|
||||
# TODO: At least in python 2.6 logging.Handler has a bug in that it doesn't
|
||||
# extend object, causing our super() call to fail. When we drop python 2.6
|
||||
# support we should switch back to using super() instead.
|
||||
#super(LogBuffer, self).__init__(level = logging_level(runlevel))
|
||||
|
||||
logging.Handler.__init__(self, level = logging_level(runlevel))
|
||||
|
||||
self.formatter = logging.Formatter(
|
||||
fmt = '%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt = '%m/%d/%Y %H:%M:%S')
|
||||
|
||||
self._buffer = []
|
||||
|
||||
def is_empty(self):
|
||||
return not bool(self._buffer)
|
||||
|
||||
def __iter__(self):
|
||||
while self._buffer:
|
||||
yield self.formatter.format(self._buffer.pop(0))
|
||||
|
||||
def emit(self, record):
|
||||
self._buffer.append(record)
|
||||
|
||||
|
||||
class _StdoutLogger(logging.Handler):
|
||||
def __init__(self, runlevel):
|
||||
logging.Handler.__init__(self, level = logging_level(runlevel))
|
||||
|
||||
self.formatter = logging.Formatter(
|
||||
fmt = '%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt = '%m/%d/%Y %H:%M:%S')
|
||||
|
||||
def emit(self, record):
|
||||
print self.formatter.format(record)
|
||||
|
||||
|
||||
def log_to_stdout(runlevel):
|
||||
"""
|
||||
Logs further events to stdout.
|
||||
|
||||
:param stem.util.log.Runlevel runlevel: minimum runlevel a message needs to be to be logged
|
||||
"""
|
||||
|
||||
get_logger().addHandler(_StdoutLogger(runlevel))
|
182
lib/stem/util/lru_cache.py
Normal file
182
lib/stem/util/lru_cache.py
Normal file
|
@ -0,0 +1,182 @@
|
|||
# Drop in replace for python 3.2's collections.lru_cache, from...
|
||||
# http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
|
||||
#
|
||||
# ... which is under the MIT license. Stem users should *not* rely upon this
|
||||
# module. It will be removed when we drop support for python 3.2 and below.
|
||||
|
||||
"""
|
||||
Memoization decorator that caches a function's return value. If later called
|
||||
with the same arguments then the cached value is returned rather than
|
||||
reevaluated.
|
||||
|
||||
This is a a python 2.x port of `functools.lru_cache
|
||||
<http://docs.python.org/3/library/functools.html#functools.lru_cache>`_. If
|
||||
using python 3.2 or later you should use that instead.
|
||||
"""
|
||||
|
||||
from collections import namedtuple
|
||||
from functools import update_wrapper
|
||||
from threading import RLock
|
||||
|
||||
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
|
||||
|
||||
|
||||
class _HashedSeq(list):
|
||||
__slots__ = 'hashvalue'
|
||||
|
||||
def __init__(self, tup, hash=hash):
|
||||
self[:] = tup
|
||||
self.hashvalue = hash(tup)
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashvalue
|
||||
|
||||
|
||||
def _make_key(args, kwds, typed,
|
||||
kwd_mark = (object(),),
|
||||
fasttypes = set([int, str, frozenset, type(None)]),
|
||||
sorted=sorted, tuple=tuple, type=type, len=len):
|
||||
'Make a cache key from optionally typed positional and keyword arguments'
|
||||
key = args
|
||||
if kwds:
|
||||
sorted_items = sorted(kwds.items())
|
||||
key += kwd_mark
|
||||
for item in sorted_items:
|
||||
key += item
|
||||
if typed:
|
||||
key += tuple(type(v) for v in args)
|
||||
if kwds:
|
||||
key += tuple(type(v) for k, v in sorted_items)
|
||||
elif len(key) == 1 and type(key[0]) in fasttypes:
|
||||
return key[0]
|
||||
return _HashedSeq(key)
|
||||
|
||||
|
||||
def lru_cache(maxsize=100, typed=False):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
If *maxsize* is set to None, the LRU features are disabled and the cache
|
||||
can grow without bound.
|
||||
|
||||
If *typed* is True, arguments of different types will be cached separately.
|
||||
For example, f(3.0) and f(3) will be treated as distinct calls with
|
||||
distinct results.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
|
||||
f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
|
||||
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
|
||||
|
||||
"""
|
||||
|
||||
# Users should only access the lru_cache through its public API:
|
||||
# cache_info, cache_clear, and f.__wrapped__
|
||||
# The internals of the lru_cache are encapsulated for thread safety and
|
||||
# to allow the implementation to change (including a possible C version).
|
||||
|
||||
def decorating_function(user_function):
|
||||
|
||||
cache = dict()
|
||||
stats = [0, 0] # make statistics updateable non-locally
|
||||
HITS, MISSES = 0, 1 # names for the stats fields
|
||||
make_key = _make_key
|
||||
cache_get = cache.get # bound method to lookup key or return None
|
||||
_len = len # localize the global len() function
|
||||
lock = RLock() # because linkedlist updates aren't threadsafe
|
||||
root = [] # root of the circular doubly linked list
|
||||
root[:] = [root, root, None, None] # initialize by pointing to self
|
||||
nonlocal_root = [root] # make updateable non-locally
|
||||
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
|
||||
|
||||
if maxsize == 0:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# no caching, just do a statistics update after a successful call
|
||||
result = user_function(*args, **kwds)
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
elif maxsize is None:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# simple caching without ordering or size limit
|
||||
key = make_key(args, kwds, typed)
|
||||
result = cache_get(key, root) # root used here as a unique not-found sentinel
|
||||
if result is not root:
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
cache[key] = result
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
else:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# size limited caching that tracks accesses by recency
|
||||
key = make_key(args, kwds, typed) if kwds or typed else args
|
||||
with lock:
|
||||
link = cache_get(key)
|
||||
if link is not None:
|
||||
# record recent use of the key by moving it to the front of the list
|
||||
root, = nonlocal_root
|
||||
link_prev, link_next, key, result = link
|
||||
link_prev[NEXT] = link_next
|
||||
link_next[PREV] = link_prev
|
||||
last = root[PREV]
|
||||
last[NEXT] = root[PREV] = link
|
||||
link[PREV] = last
|
||||
link[NEXT] = root
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
with lock:
|
||||
root, = nonlocal_root
|
||||
if key in cache:
|
||||
# getting here means that this same key was added to the
|
||||
# cache while the lock was released. since the link
|
||||
# update is already done, we need only return the
|
||||
# computed result and update the count of misses.
|
||||
pass
|
||||
elif _len(cache) >= maxsize:
|
||||
# use the old root to store the new key and result
|
||||
oldroot = root
|
||||
oldroot[KEY] = key
|
||||
oldroot[RESULT] = result
|
||||
# empty the oldest link and make it the new root
|
||||
root = nonlocal_root[0] = oldroot[NEXT]
|
||||
oldkey = root[KEY]
|
||||
root[KEY] = root[RESULT] = None
|
||||
# now update the cache dictionary for the new links
|
||||
del cache[oldkey]
|
||||
cache[key] = oldroot
|
||||
else:
|
||||
# put result in a new link at the front of the list
|
||||
last = root[PREV]
|
||||
link = [last, root, key, result]
|
||||
last[NEXT] = root[PREV] = cache[key] = link
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
"""Report cache statistics"""
|
||||
with lock:
|
||||
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
with lock:
|
||||
cache.clear()
|
||||
root = nonlocal_root[0]
|
||||
root[:] = [root, root, None, None]
|
||||
stats[:] = [0, 0]
|
||||
|
||||
wrapper.__wrapped__ = user_function
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return update_wrapper(wrapper, user_function)
|
||||
|
||||
return decorating_function
|
133
lib/stem/util/ordereddict.py
Normal file
133
lib/stem/util/ordereddict.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
# Drop in replacement for python 2.7's OrderedDict, from...
|
||||
# http://pypi.python.org/pypi/ordereddict
|
||||
#
|
||||
# Stem users should *not* rely upon this module. It will be removed when we
|
||||
# drop support for python 2.6 and below.
|
||||
|
||||
# Copyright (c) 2009 Raymond Hettinger
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation files
|
||||
# (the "Software"), to deal in the Software without restriction,
|
||||
# including without limitation the rights to use, copy, modify, merge,
|
||||
# publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
# and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from UserDict import DictMixin
|
||||
|
||||
|
||||
class OrderedDict(dict, DictMixin):
|
||||
def __init__(self, *args, **kwds):
|
||||
if len(args) > 1:
|
||||
raise TypeError('expected at most 1 arguments, got %d' % len(args))
|
||||
try:
|
||||
self.__end
|
||||
except AttributeError:
|
||||
self.clear()
|
||||
self.update(*args, **kwds)
|
||||
|
||||
def clear(self):
|
||||
self.__end = end = []
|
||||
end += [None, end, end] # sentinel node for doubly linked list
|
||||
self.__map = {} # key --> [key, prev, next]
|
||||
dict.clear(self)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key not in self:
|
||||
end = self.__end
|
||||
curr = end[1]
|
||||
curr[2] = end[1] = self.__map[key] = [key, curr, end]
|
||||
dict.__setitem__(self, key, value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, key)
|
||||
key, prev, next = self.__map.pop(key)
|
||||
prev[2] = next
|
||||
next[1] = prev
|
||||
|
||||
def __iter__(self):
|
||||
end = self.__end
|
||||
curr = end[2]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[2]
|
||||
|
||||
def __reversed__(self):
|
||||
end = self.__end
|
||||
curr = end[1]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[1]
|
||||
|
||||
def popitem(self, last=True):
|
||||
if not self:
|
||||
raise KeyError('dictionary is empty')
|
||||
if last:
|
||||
key = reversed(self).next()
|
||||
else:
|
||||
key = iter(self).next()
|
||||
value = self.pop(key)
|
||||
return key, value
|
||||
|
||||
def __reduce__(self):
|
||||
items = [[k, self[k]] for k in self]
|
||||
tmp = self.__map, self.__end
|
||||
del self.__map, self.__end
|
||||
inst_dict = vars(self).copy()
|
||||
self.__map, self.__end = tmp
|
||||
if inst_dict:
|
||||
return (self.__class__, (items,), inst_dict)
|
||||
return self.__class__, (items,)
|
||||
|
||||
def keys(self):
|
||||
return list(self)
|
||||
|
||||
setdefault = DictMixin.setdefault
|
||||
update = DictMixin.update
|
||||
pop = DictMixin.pop
|
||||
values = DictMixin.values
|
||||
items = DictMixin.items
|
||||
iterkeys = DictMixin.iterkeys
|
||||
itervalues = DictMixin.itervalues
|
||||
iteritems = DictMixin.iteritems
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, self.items())
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self)
|
||||
|
||||
@classmethod
|
||||
def fromkeys(cls, iterable, value=None):
|
||||
d = cls()
|
||||
for key in iterable:
|
||||
d[key] = value
|
||||
return d
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, OrderedDict):
|
||||
if len(self) != len(other):
|
||||
return False
|
||||
for p, q in zip(self.items(), other.items()):
|
||||
if p != q:
|
||||
return False
|
||||
return True
|
||||
return dict.__eq__(self, other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
498
lib/stem/util/proc.py
Normal file
498
lib/stem/util/proc.py
Normal file
|
@ -0,0 +1,498 @@
|
|||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Helper functions for querying process and system information from the /proc
|
||||
contents. Fetching information this way provides huge performance benefits
|
||||
over lookups via system utilities (ps, netstat, etc). For instance, resolving
|
||||
connections this way cuts the runtime by around 90% verses the alternatives.
|
||||
These functions may not work on all platforms (only Linux?).
|
||||
|
||||
The method for reading these files (and a little code) are borrowed from
|
||||
`psutil <https://code.google.com/p/psutil/>`_, which was written by Jay Loden,
|
||||
Dave Daeschler, Giampaolo Rodola' and is under the BSD license.
|
||||
|
||||
**These functions are not being vended to stem users. They may change in the
|
||||
future, use them at your own risk.**
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
is_available - checks if proc utilities can be used on this system
|
||||
get_system_start_time - unix timestamp for when the system started
|
||||
get_physical_memory - memory available on this system
|
||||
get_cwd - provides the current working directory for a process
|
||||
get_uid - provides the user id a process is running under
|
||||
get_memory_usage - provides the memory usage of a process
|
||||
get_stats - queries statistics about a process
|
||||
get_connections - provides the connections made by a process
|
||||
|
||||
.. data:: Stat (enum)
|
||||
|
||||
Types of data available via the :func:`~stem.util.proc.get_stats` function.
|
||||
|
||||
============== ===========
|
||||
Stat Description
|
||||
============== ===========
|
||||
**COMMAND** command name under which the process is running
|
||||
**CPU_UTIME** total user time spent on the process
|
||||
**CPU_STIME** total system time spent on the process
|
||||
**START_TIME** when this process began, in unix time
|
||||
============== ===========
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
import platform
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
|
||||
import stem.util.enum
|
||||
|
||||
from stem.util import log
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# os.sysconf is only defined on unix
|
||||
try:
|
||||
CLOCK_TICKS = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
|
||||
except AttributeError:
|
||||
CLOCK_TICKS = None
|
||||
|
||||
Stat = stem.util.enum.Enum(
|
||||
("COMMAND", "command"), ("CPU_UTIME", "utime"),
|
||||
("CPU_STIME", "stime"), ("START_TIME", "start time")
|
||||
)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def is_available():
|
||||
"""
|
||||
Checks if proc information is available on this platform.
|
||||
|
||||
:returns: **True** if proc contents exist on this platform, **False** otherwise
|
||||
"""
|
||||
|
||||
if platform.system() != "Linux":
|
||||
return False
|
||||
else:
|
||||
# list of process independent proc paths we use
|
||||
proc_paths = ("/proc/stat", "/proc/meminfo", "/proc/net/tcp", "/proc/net/udp")
|
||||
|
||||
for path in proc_paths:
|
||||
if not os.path.exists(path):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_system_start_time():
|
||||
"""
|
||||
Provides the unix time (seconds since epoch) when the system started.
|
||||
|
||||
:returns: **float** for the unix time of when the system started
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "system start time"
|
||||
btime_line = _get_line("/proc/stat", "btime", parameter)
|
||||
|
||||
try:
|
||||
result = float(btime_line.strip().split()[1])
|
||||
_log_runtime(parameter, "/proc/stat[btime]", start_time)
|
||||
return result
|
||||
except:
|
||||
exc = IOError("unable to parse the /proc/stat btime entry: %s" % btime_line)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_physical_memory():
|
||||
"""
|
||||
Provides the total physical memory on the system in bytes.
|
||||
|
||||
:returns: **int** for the bytes of physical memory this system has
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "system physical memory"
|
||||
mem_total_line = _get_line("/proc/meminfo", "MemTotal:", parameter)
|
||||
|
||||
try:
|
||||
result = int(mem_total_line.split()[1]) * 1024
|
||||
_log_runtime(parameter, "/proc/meminfo[MemTotal]", start_time)
|
||||
return result
|
||||
except:
|
||||
exc = IOError("unable to parse the /proc/meminfo MemTotal entry: %s" % mem_total_line)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def get_cwd(pid):
|
||||
"""
|
||||
Provides the current working directory for the given process.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: **str** with the path of the working directory for the process
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "cwd"
|
||||
proc_cwd_link = "/proc/%s/cwd" % pid
|
||||
|
||||
if pid == 0:
|
||||
cwd = ""
|
||||
else:
|
||||
try:
|
||||
cwd = os.readlink(proc_cwd_link)
|
||||
except OSError:
|
||||
exc = IOError("unable to read %s" % proc_cwd_link)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
_log_runtime(parameter, proc_cwd_link, start_time)
|
||||
return cwd
|
||||
|
||||
|
||||
def get_uid(pid):
|
||||
"""
|
||||
Provides the user ID the given process is running under.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: **int** with the user id for the owner of the process
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
start_time, parameter = time.time(), "uid"
|
||||
status_path = "/proc/%s/status" % pid
|
||||
uid_line = _get_line(status_path, "Uid:", parameter)
|
||||
|
||||
try:
|
||||
result = int(uid_line.split()[1])
|
||||
_log_runtime(parameter, "%s[Uid]" % status_path, start_time)
|
||||
return result
|
||||
except:
|
||||
exc = IOError("unable to parse the %s Uid entry: %s" % (status_path, uid_line))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def get_memory_usage(pid):
|
||||
"""
|
||||
Provides the memory usage in bytes for the given process.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: **tuple** of two ints with the memory usage of the process, of the
|
||||
form **(resident_size, virtual_size)**
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
# checks if this is the kernel process
|
||||
|
||||
if pid == 0:
|
||||
return (0, 0)
|
||||
|
||||
start_time, parameter = time.time(), "memory usage"
|
||||
status_path = "/proc/%s/status" % pid
|
||||
mem_lines = _get_lines(status_path, ("VmRSS:", "VmSize:"), parameter)
|
||||
|
||||
try:
|
||||
residentSize = int(mem_lines["VmRSS:"].split()[1]) * 1024
|
||||
virtualSize = int(mem_lines["VmSize:"].split()[1]) * 1024
|
||||
|
||||
_log_runtime(parameter, "%s[VmRSS|VmSize]" % status_path, start_time)
|
||||
return (residentSize, virtualSize)
|
||||
except:
|
||||
exc = IOError("unable to parse the %s VmRSS and VmSize entries: %s" % (status_path, ", ".join(mem_lines)))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def get_stats(pid, *stat_types):
|
||||
"""
|
||||
Provides process specific information. See the :data:`~stem.util.proc.Stat`
|
||||
enum for valid options.
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
:param Stat stat_types: information to be provided back
|
||||
|
||||
:returns: **tuple** with all of the requested statistics as strings
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
if CLOCK_TICKS is None:
|
||||
raise IOError("Unable to look up SC_CLK_TCK")
|
||||
|
||||
start_time, parameter = time.time(), "process %s" % ", ".join(stat_types)
|
||||
|
||||
# the stat file contains a single line, of the form...
|
||||
# 8438 (tor) S 8407 8438 8407 34818 8438 4202496...
|
||||
stat_path = "/proc/%s/stat" % pid
|
||||
stat_line = _get_line(stat_path, str(pid), parameter)
|
||||
|
||||
# breaks line into component values
|
||||
stat_comp = []
|
||||
cmd_start, cmd_end = stat_line.find("("), stat_line.find(")")
|
||||
|
||||
if cmd_start != -1 and cmd_end != -1:
|
||||
stat_comp.append(stat_line[:cmd_start])
|
||||
stat_comp.append(stat_line[cmd_start + 1:cmd_end])
|
||||
stat_comp += stat_line[cmd_end + 1:].split()
|
||||
|
||||
if len(stat_comp) < 44 and _is_float(stat_comp[13], stat_comp[14], stat_comp[21]):
|
||||
exc = IOError("stat file had an unexpected format: %s" % stat_path)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
results = []
|
||||
for stat_type in stat_types:
|
||||
if stat_type == Stat.COMMAND:
|
||||
if pid == 0:
|
||||
results.append("sched")
|
||||
else:
|
||||
results.append(stat_comp[1])
|
||||
elif stat_type == Stat.CPU_UTIME:
|
||||
if pid == 0:
|
||||
results.append("0")
|
||||
else:
|
||||
results.append(str(float(stat_comp[13]) / CLOCK_TICKS))
|
||||
elif stat_type == Stat.CPU_STIME:
|
||||
if pid == 0:
|
||||
results.append("0")
|
||||
else:
|
||||
results.append(str(float(stat_comp[14]) / CLOCK_TICKS))
|
||||
elif stat_type == Stat.START_TIME:
|
||||
if pid == 0:
|
||||
return get_system_start_time()
|
||||
else:
|
||||
# According to documentation, starttime is in field 21 and the unit is
|
||||
# jiffies (clock ticks). We divide it for clock ticks, then add the
|
||||
# uptime to get the seconds since the epoch.
|
||||
p_start_time = float(stat_comp[21]) / CLOCK_TICKS
|
||||
results.append(str(p_start_time + get_system_start_time()))
|
||||
|
||||
_log_runtime(parameter, stat_path, start_time)
|
||||
return tuple(results)
|
||||
|
||||
|
||||
def get_connections(pid):
|
||||
"""
|
||||
Queries connection related information from the proc contents. This provides
|
||||
similar results to netstat, lsof, sockstat, and other connection resolution
|
||||
utilities (though the lookup is far quicker).
|
||||
|
||||
:param int pid: process id of the process to be queried
|
||||
|
||||
:returns: A listing of connection tuples of the form **[(local_ipAddr1,
|
||||
local_port1, foreign_ipAddr1, foreign_port1, protocol), ...]** (addresses
|
||||
and protocols are strings and ports are ints)
|
||||
|
||||
:raises: **IOError** if it can't be determined
|
||||
"""
|
||||
|
||||
if isinstance(pid, str):
|
||||
try:
|
||||
pid = int(pid)
|
||||
except ValueError:
|
||||
raise IOError("Process pid was non-numeric: %s" % pid)
|
||||
|
||||
if pid == 0:
|
||||
return []
|
||||
|
||||
# fetches the inode numbers for socket file descriptors
|
||||
|
||||
start_time, parameter = time.time(), "process connections"
|
||||
inodes = []
|
||||
|
||||
for fd in os.listdir("/proc/%s/fd" % pid):
|
||||
fd_path = "/proc/%s/fd/%s" % (pid, fd)
|
||||
|
||||
try:
|
||||
# File descriptor link, such as 'socket:[30899]'
|
||||
|
||||
fd_name = os.readlink(fd_path)
|
||||
|
||||
if fd_name.startswith('socket:['):
|
||||
inodes.append(fd_name[8:-1])
|
||||
except OSError:
|
||||
# most likely couldn't be read due to permissions
|
||||
exc = IOError("unable to determine file descriptor destination: %s" % fd_path)
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
if not inodes:
|
||||
# unable to fetch any connections for this process
|
||||
return []
|
||||
|
||||
# check for the connection information from the /proc/net contents
|
||||
|
||||
conn = []
|
||||
|
||||
for proc_file_path in ("/proc/net/tcp", "/proc/net/udp"):
|
||||
try:
|
||||
proc_file = open(proc_file_path)
|
||||
proc_file.readline() # skip the first line
|
||||
|
||||
for line in proc_file:
|
||||
_, l_addr, f_addr, status, _, _, _, _, _, inode = line.split()[:10]
|
||||
|
||||
if inode in inodes:
|
||||
# if a tcp connection, skip if it isn't yet established
|
||||
if proc_file_path.endswith("/tcp") and status != "01":
|
||||
continue
|
||||
|
||||
local_ip, local_port = _decode_proc_address_encoding(l_addr)
|
||||
foreign_ip, foreign_port = _decode_proc_address_encoding(f_addr)
|
||||
protocol = proc_file_path[10:]
|
||||
conn.append((local_ip, local_port, foreign_ip, foreign_port, protocol))
|
||||
|
||||
proc_file.close()
|
||||
except IOError as exc:
|
||||
exc = IOError("unable to read '%s': %s" % (proc_file_path, exc))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
except Exception as exc:
|
||||
exc = IOError("unable to parse '%s': %s" % (proc_file_path, exc))
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
_log_runtime(parameter, "/proc/net/[tcp|udp]", start_time)
|
||||
return conn
|
||||
|
||||
|
||||
def _decode_proc_address_encoding(addr):
|
||||
"""
|
||||
Translates an address entry in the /proc/net/* contents to a human readable
|
||||
form (`reference <http://linuxdevcenter.com/pub/a/linux/2000/11/16/LinuxAdmin.html>`_,
|
||||
for instance:
|
||||
|
||||
::
|
||||
|
||||
"0500000A:0016" -> ("10.0.0.5", 22)
|
||||
|
||||
:param str addr: proc address entry to be decoded
|
||||
|
||||
:returns: **tuple** of the form **(addr, port)**, with addr as a string and port an int
|
||||
"""
|
||||
|
||||
ip, port = addr.split(':')
|
||||
|
||||
# the port is represented as a two-byte hexadecimal number
|
||||
port = int(port, 16)
|
||||
|
||||
if sys.version_info >= (3,):
|
||||
ip = ip.encode('ascii')
|
||||
|
||||
# The IPv4 address portion is a little-endian four-byte hexadecimal number.
|
||||
# That is, the least significant byte is listed first, so we need to reverse
|
||||
# the order of the bytes to convert it to an IP address.
|
||||
#
|
||||
# This needs to account for the endian ordering as per...
|
||||
# http://code.google.com/p/psutil/issues/detail?id=201
|
||||
# https://trac.torproject.org/projects/tor/ticket/4777
|
||||
|
||||
if sys.byteorder == 'little':
|
||||
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip)[::-1])
|
||||
else:
|
||||
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip))
|
||||
|
||||
return (ip, port)
|
||||
|
||||
|
||||
def _is_float(*value):
|
||||
try:
|
||||
for v in value:
|
||||
float(v)
|
||||
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _get_line(file_path, line_prefix, parameter):
|
||||
return _get_lines(file_path, (line_prefix, ), parameter)[line_prefix]
|
||||
|
||||
|
||||
def _get_lines(file_path, line_prefixes, parameter):
|
||||
"""
|
||||
Fetches lines with the given prefixes from a file. This only provides back
|
||||
the first instance of each prefix.
|
||||
|
||||
:param str file_path: path of the file to read
|
||||
:param tuple line_prefixes: string prefixes of the lines to return
|
||||
:param str parameter: description of the proc attribute being fetch
|
||||
|
||||
:returns: mapping of prefixes to the matching line
|
||||
|
||||
:raises: **IOError** if unable to read the file or can't find all of the prefixes
|
||||
"""
|
||||
|
||||
try:
|
||||
remaining_prefixes = list(line_prefixes)
|
||||
proc_file, results = open(file_path), {}
|
||||
|
||||
for line in proc_file:
|
||||
if not remaining_prefixes:
|
||||
break # found everything we're looking for
|
||||
|
||||
for prefix in remaining_prefixes:
|
||||
if line.startswith(prefix):
|
||||
results[prefix] = line
|
||||
remaining_prefixes.remove(prefix)
|
||||
break
|
||||
|
||||
proc_file.close()
|
||||
|
||||
if remaining_prefixes:
|
||||
if len(remaining_prefixes) == 1:
|
||||
msg = "%s did not contain a %s entry" % (file_path, remaining_prefixes[0])
|
||||
else:
|
||||
msg = "%s did not contain %s entries" % (file_path, ", ".join(remaining_prefixes))
|
||||
|
||||
raise IOError(msg)
|
||||
else:
|
||||
return results
|
||||
except IOError as exc:
|
||||
_log_failure(parameter, exc)
|
||||
raise exc
|
||||
|
||||
|
||||
def _log_runtime(parameter, proc_location, start_time):
|
||||
"""
|
||||
Logs a message indicating a successful proc query.
|
||||
|
||||
:param str parameter: description of the proc attribute being fetch
|
||||
:param str proc_location: proc files we were querying
|
||||
:param int start_time: unix time for when this query was started
|
||||
"""
|
||||
|
||||
runtime = time.time() - start_time
|
||||
log.debug("proc call (%s): %s (runtime: %0.4f)" % (parameter, proc_location, runtime))
|
||||
|
||||
|
||||
def _log_failure(parameter, exc):
|
||||
"""
|
||||
Logs a message indicating that the proc query failed.
|
||||
|
||||
:param str parameter: description of the proc attribute being fetch
|
||||
:param Exception exc: exception that we're raising
|
||||
"""
|
||||
|
||||
log.debug("proc call failed (%s): %s" % (parameter, exc))
|
387
lib/stem/util/str_tools.py
Normal file
387
lib/stem/util/str_tools.py
Normal file
|
@ -0,0 +1,387 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Toolkit for various string activity.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
get_size_label - human readable label for a number of bytes
|
||||
get_time_label - human readable label for a number of seconds
|
||||
get_time_labels - human readable labels for each time unit
|
||||
get_short_time_label - condensed time label output
|
||||
parse_short_time_label - seconds represented by a short time label
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import datetime
|
||||
|
||||
import stem.prereq
|
||||
|
||||
# label conversion tuples of the form...
|
||||
# (bits / bytes / seconds, short label, long label)
|
||||
SIZE_UNITS_BITS = (
|
||||
(140737488355328.0, " Pb", " Petabit"),
|
||||
(137438953472.0, " Tb", " Terabit"),
|
||||
(134217728.0, " Gb", " Gigabit"),
|
||||
(131072.0, " Mb", " Megabit"),
|
||||
(128.0, " Kb", " Kilobit"),
|
||||
(0.125, " b", " Bit"),
|
||||
)
|
||||
|
||||
SIZE_UNITS_BYTES = (
|
||||
(1125899906842624.0, " PB", " Petabyte"),
|
||||
(1099511627776.0, " TB", " Terabyte"),
|
||||
(1073741824.0, " GB", " Gigabyte"),
|
||||
(1048576.0, " MB", " Megabyte"),
|
||||
(1024.0, " KB", " Kilobyte"),
|
||||
(1.0, " B", " Byte"),
|
||||
)
|
||||
|
||||
TIME_UNITS = (
|
||||
(86400.0, "d", " day"),
|
||||
(3600.0, "h", " hour"),
|
||||
(60.0, "m", " minute"),
|
||||
(1.0, "s", " second"),
|
||||
)
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
def _to_bytes_impl(msg):
|
||||
if isinstance(msg, str):
|
||||
return codecs.latin_1_encode(msg, "replace")[0]
|
||||
else:
|
||||
return msg
|
||||
|
||||
def _to_unicode_impl(msg):
|
||||
if msg is not None and not isinstance(msg, str):
|
||||
return msg.decode("utf-8", "replace")
|
||||
else:
|
||||
return msg
|
||||
else:
|
||||
def _to_bytes_impl(msg):
|
||||
if msg is not None and isinstance(msg, unicode):
|
||||
return codecs.latin_1_encode(msg, "replace")[0]
|
||||
else:
|
||||
return msg
|
||||
|
||||
def _to_unicode_impl(msg):
|
||||
if msg is not None and not isinstance(msg, unicode):
|
||||
return msg.decode("utf-8", "replace")
|
||||
else:
|
||||
return msg
|
||||
|
||||
|
||||
def _to_bytes(msg):
|
||||
"""
|
||||
Provides the ASCII bytes for the given string. This is purely to provide
|
||||
python 3 compatability, normalizing the unicode/ASCII change in the version
|
||||
bump. For an explanation of this see...
|
||||
|
||||
http://python3porting.com/problems.html#nicer-solutions
|
||||
|
||||
:param str,unicode msg: string to be converted
|
||||
|
||||
:returns: ASCII bytes for string
|
||||
"""
|
||||
|
||||
return _to_bytes_impl(msg)
|
||||
|
||||
|
||||
def _to_unicode(msg):
|
||||
"""
|
||||
Provides the unicode string for the given ASCII bytes. This is purely to
|
||||
provide python 3 compatability, normalizing the unicode/ASCII change in the
|
||||
version bump.
|
||||
|
||||
:param str,unicode msg: string to be converted
|
||||
|
||||
:returns: unicode conversion
|
||||
"""
|
||||
|
||||
return _to_unicode_impl(msg)
|
||||
|
||||
|
||||
def _to_camel_case(label, divider = "_", joiner = " "):
|
||||
"""
|
||||
Converts the given string to camel case, ie:
|
||||
|
||||
::
|
||||
|
||||
>>> _to_camel_case("I_LIKE_PEPPERJACK!")
|
||||
'I Like Pepperjack!'
|
||||
|
||||
:param str label: input string to be converted
|
||||
:param str divider: word boundary
|
||||
:param str joiner: replacement for word boundaries
|
||||
|
||||
:returns: camel cased string
|
||||
"""
|
||||
|
||||
words = []
|
||||
for entry in label.split(divider):
|
||||
if len(entry) == 0:
|
||||
words.append("")
|
||||
elif len(entry) == 1:
|
||||
words.append(entry.upper())
|
||||
else:
|
||||
words.append(entry[0].upper() + entry[1:].lower())
|
||||
|
||||
return joiner.join(words)
|
||||
|
||||
|
||||
def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
|
||||
"""
|
||||
Converts a number of bytes into a human readable label in its most
|
||||
significant units. For instance, 7500 bytes would return "7 KB". If the
|
||||
is_long option is used this expands unit labels to be the properly pluralized
|
||||
full word (for instance 'Kilobytes' rather than 'KB'). Units go up through
|
||||
petabytes.
|
||||
|
||||
::
|
||||
|
||||
>>> get_size_label(2000000)
|
||||
'1 MB'
|
||||
|
||||
>>> get_size_label(1050, 2)
|
||||
'1.02 KB'
|
||||
|
||||
>>> get_size_label(1050, 3, True)
|
||||
'1.025 Kilobytes'
|
||||
|
||||
:param int byte_count: number of bytes to be converted
|
||||
:param int decimal: number of decimal digits to be included
|
||||
:param bool is_long: expands units label
|
||||
:param bool is_bytes: provides units in bytes if **True**, bits otherwise
|
||||
|
||||
:returns: **str** with human readable representation of the size
|
||||
"""
|
||||
|
||||
if is_bytes:
|
||||
return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long)
|
||||
else:
|
||||
return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long)
|
||||
|
||||
|
||||
def get_time_label(seconds, decimal = 0, is_long = False):
|
||||
"""
|
||||
Converts seconds into a time label truncated to its most significant units.
|
||||
For instance, 7500 seconds would return "2h". Units go up through days.
|
||||
|
||||
This defaults to presenting single character labels, but if the is_long
|
||||
option is used this expands labels to be the full word (space included and
|
||||
properly pluralized). For instance, "4h" would be "4 hours" and "1m" would
|
||||
become "1 minute".
|
||||
|
||||
::
|
||||
|
||||
>>> get_time_label(10000)
|
||||
'2h'
|
||||
|
||||
>>> get_time_label(61, 1, True)
|
||||
'1.0 minute'
|
||||
|
||||
>>> get_time_label(61, 2, True)
|
||||
'1.01 minutes'
|
||||
|
||||
:param int seconds: number of seconds to be converted
|
||||
:param int decimal: number of decimal digits to be included
|
||||
:param bool is_long: expands units label
|
||||
|
||||
:returns: **str** with human readable representation of the time
|
||||
"""
|
||||
|
||||
return _get_label(TIME_UNITS, seconds, decimal, is_long)
|
||||
|
||||
|
||||
def get_time_labels(seconds, is_long = False):
|
||||
"""
|
||||
Provides a list of label conversions for each time unit, starting with its
|
||||
most significant units on down. Any counts that evaluate to zero are omitted.
|
||||
For example...
|
||||
|
||||
::
|
||||
|
||||
>>> get_time_labels(400)
|
||||
['6m', '40s']
|
||||
|
||||
>>> get_time_labels(3640, True)
|
||||
['1 hour', '40 seconds']
|
||||
|
||||
:param int seconds: number of seconds to be converted
|
||||
:param bool is_long: expands units label
|
||||
|
||||
:returns: **list** of strings with human readable representations of the time
|
||||
"""
|
||||
|
||||
time_labels = []
|
||||
|
||||
for count_per_unit, _, _ in TIME_UNITS:
|
||||
if abs(seconds) >= count_per_unit:
|
||||
time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long))
|
||||
seconds %= count_per_unit
|
||||
|
||||
return time_labels
|
||||
|
||||
|
||||
def get_short_time_label(seconds):
|
||||
"""
|
||||
Provides a time in the following format:
|
||||
[[dd-]hh:]mm:ss
|
||||
|
||||
::
|
||||
|
||||
>>> get_short_time_label(111)
|
||||
'01:51'
|
||||
|
||||
>>> get_short_time_label(544100)
|
||||
'6-07:08:20'
|
||||
|
||||
:param int seconds: number of seconds to be converted
|
||||
|
||||
:returns: **str** with the short representation for the time
|
||||
|
||||
:raises: **ValueError** if the input is negative
|
||||
"""
|
||||
|
||||
if seconds < 0:
|
||||
raise ValueError("Input needs to be a non-negative integer, got '%i'" % seconds)
|
||||
|
||||
time_comp = {}
|
||||
|
||||
for amount, _, label in TIME_UNITS:
|
||||
count = int(seconds / amount)
|
||||
seconds %= amount
|
||||
time_comp[label.strip()] = count
|
||||
|
||||
label = "%02i:%02i" % (time_comp["minute"], time_comp["second"])
|
||||
|
||||
if time_comp["day"]:
|
||||
label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label)
|
||||
elif time_comp["hour"]:
|
||||
label = "%02i:%s" % (time_comp["hour"], label)
|
||||
|
||||
return label
|
||||
|
||||
|
||||
def parse_short_time_label(label):
|
||||
"""
|
||||
Provides the number of seconds corresponding to the formatting used for the
|
||||
cputime and etime fields of ps:
|
||||
[[dd-]hh:]mm:ss or mm:ss.ss
|
||||
|
||||
::
|
||||
|
||||
>>> parse_short_time_label('01:51')
|
||||
111
|
||||
|
||||
>>> parse_short_time_label('6-07:08:20')
|
||||
544100
|
||||
|
||||
:param str label: time entry to be parsed
|
||||
|
||||
:returns: **int** with the number of seconds represented by the label
|
||||
|
||||
:raises: **ValueError** if input is malformed
|
||||
"""
|
||||
|
||||
days, hours, minutes, seconds = '0', '0', '0', '0'
|
||||
|
||||
if '-' in label:
|
||||
days, label = label.split('-', 1)
|
||||
|
||||
time_comp = label.split(":")
|
||||
|
||||
if len(time_comp) == 3:
|
||||
hours, minutes, seconds = time_comp
|
||||
elif len(time_comp) == 2:
|
||||
minutes, seconds = time_comp
|
||||
else:
|
||||
raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label)
|
||||
|
||||
try:
|
||||
time_sum = int(float(seconds))
|
||||
time_sum += int(minutes) * 60
|
||||
time_sum += int(hours) * 3600
|
||||
time_sum += int(days) * 86400
|
||||
return time_sum
|
||||
except ValueError:
|
||||
raise ValueError("Non-numeric value in time entry: %s" % label)
|
||||
|
||||
|
||||
def _parse_iso_timestamp(entry):
|
||||
"""
|
||||
Parses the ISO 8601 standard that provides for timestamps like...
|
||||
|
||||
::
|
||||
|
||||
2012-11-08T16:48:41.420251
|
||||
|
||||
:param str entry: timestamp to be parsed
|
||||
|
||||
:returns: datetime for the time represented by the timestamp
|
||||
|
||||
:raises: ValueError if the timestamp is malformed
|
||||
"""
|
||||
|
||||
if not isinstance(entry, str):
|
||||
raise ValueError("parse_iso_timestamp() input must be a str, got a %s" % type(entry))
|
||||
|
||||
# based after suggestions from...
|
||||
# http://stackoverflow.com/questions/127803/how-to-parse-iso-formatted-date-in-python
|
||||
|
||||
if '.' in entry:
|
||||
timestamp_str, microseconds = entry.split('.')
|
||||
else:
|
||||
timestamp_str, microseconds = entry, '000000'
|
||||
|
||||
if len(microseconds) != 6 or not microseconds.isdigit():
|
||||
raise ValueError("timestamp's microseconds should be six digits")
|
||||
|
||||
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S")
|
||||
return timestamp + datetime.timedelta(microseconds = int(microseconds))
|
||||
|
||||
|
||||
def _get_label(units, count, decimal, is_long):
|
||||
"""
|
||||
Provides label corresponding to units of the highest significance in the
|
||||
provided set. This rounds down (ie, integer truncation after visible units).
|
||||
|
||||
:param tuple units: type of units to be used for conversion, containing
|
||||
(count_per_unit, short_label, long_label)
|
||||
:param int count: number of base units being converted
|
||||
:param int decimal: decimal precision of label
|
||||
:param bool is_long: uses the long label if **True**, short label otherwise
|
||||
"""
|
||||
|
||||
# formatted string for the requested number of digits
|
||||
label_format = "%%.%if" % decimal
|
||||
|
||||
if count < 0:
|
||||
label_format = "-" + label_format
|
||||
count = abs(count)
|
||||
elif count == 0:
|
||||
units_label = units[-1][2] + "s" if is_long else units[-1][1]
|
||||
return "%s%s" % (label_format % count, units_label)
|
||||
|
||||
for count_per_unit, short_label, long_label in units:
|
||||
if count >= count_per_unit:
|
||||
# Rounding down with a '%f' is a little clunky. Reducing the count so
|
||||
# it'll divide evenly as the rounded down value.
|
||||
|
||||
count -= count % (count_per_unit / (10 ** decimal))
|
||||
count_label = label_format % (count / count_per_unit)
|
||||
|
||||
if is_long:
|
||||
# Pluralize if any of the visible units make it greater than one. For
|
||||
# instance 1.0003 is plural but 1.000 isn't.
|
||||
|
||||
if decimal > 0:
|
||||
is_plural = count > count_per_unit
|
||||
else:
|
||||
is_plural = count >= count_per_unit * 2
|
||||
|
||||
return count_label + long_label + ("s" if is_plural else "")
|
||||
else:
|
||||
return count_label + short_label
|
1010
lib/stem/util/system.py
Normal file
1010
lib/stem/util/system.py
Normal file
File diff suppressed because it is too large
Load diff
98
lib/stem/util/term.py
Normal file
98
lib/stem/util/term.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
# Copyright 2011-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utilities for working with the terminal.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
format - wrap text with ANSI for the given colors or attributes
|
||||
|
||||
.. data:: Color (enum)
|
||||
.. data:: BgColor (enum)
|
||||
|
||||
Enumerations for foreground or background terminal color.
|
||||
|
||||
=========== ===========
|
||||
Color Description
|
||||
=========== ===========
|
||||
**BLACK** black color
|
||||
**BLUE** blue color
|
||||
**CYAN** cyan color
|
||||
**GREEN** green color
|
||||
**MAGENTA** magenta color
|
||||
**RED** red color
|
||||
**WHITE** white color
|
||||
**YELLOW** yellow color
|
||||
=========== ===========
|
||||
|
||||
.. data:: Attr (enum)
|
||||
|
||||
Enumerations of terminal text attributes.
|
||||
|
||||
============= ===========
|
||||
Attr Description
|
||||
============= ===========
|
||||
**BOLD** heavy typeface
|
||||
**HILIGHT** inverted foreground and background
|
||||
**UNDERLINE** underlined text
|
||||
============= ===========
|
||||
"""
|
||||
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE")
|
||||
|
||||
Color = stem.util.enum.Enum(*TERM_COLORS)
|
||||
BgColor = stem.util.enum.Enum(*["BG_" + color for color in TERM_COLORS])
|
||||
Attr = stem.util.enum.Enum("BOLD", "UNDERLINE", "HILIGHT")
|
||||
|
||||
# mappings of terminal attribute enums to their ANSI escape encoding
|
||||
FG_ENCODING = dict([(list(Color)[i], str(30 + i)) for i in range(8)])
|
||||
BG_ENCODING = dict([(list(BgColor)[i], str(40 + i)) for i in range(8)])
|
||||
ATTR_ENCODING = {Attr.BOLD: "1", Attr.UNDERLINE: "4", Attr.HILIGHT: "7"}
|
||||
|
||||
CSI = "\x1B[%sm"
|
||||
RESET = CSI % "0"
|
||||
|
||||
|
||||
def format(msg, *attr):
|
||||
"""
|
||||
Simple terminal text formatting using `ANSI escape sequences
|
||||
<https://secure.wikimedia.org/wikipedia/en/wiki/ANSI_escape_code#CSI_codes>`_.
|
||||
The following are some toolkits providing similar capabilities:
|
||||
|
||||
* `django.utils.termcolors <https://code.djangoproject.com/browser/django/trunk/django/utils/termcolors.py>`_
|
||||
* `termcolor <http://pypi.python.org/pypi/termcolor>`_
|
||||
* `colorama <http://pypi.python.org/pypi/colorama>`_
|
||||
|
||||
:param str msg: string to be formatted
|
||||
:param str attr: text attributes, this can be :data:`~stem.util.term.Color`, :data:`~stem.util.term.BgColor`, or :data:`~stem.util.term.Attr` enums
|
||||
and are case insensitive (so strings like "red" are fine)
|
||||
|
||||
:returns: **str** wrapped with ANSI escape encodings, starting with the given
|
||||
attributes and ending with a reset
|
||||
"""
|
||||
|
||||
# if we have reset sequences in the message then apply our attributes
|
||||
# after each of them
|
||||
if RESET in msg:
|
||||
return "".join([format(comp, *attr) for comp in msg.split(RESET)])
|
||||
|
||||
encodings = []
|
||||
for text_attr in attr:
|
||||
text_attr, encoding = stem.util.str_tools._to_camel_case(text_attr), None
|
||||
encoding = FG_ENCODING.get(text_attr, encoding)
|
||||
encoding = BG_ENCODING.get(text_attr, encoding)
|
||||
encoding = ATTR_ENCODING.get(text_attr, encoding)
|
||||
|
||||
if encoding:
|
||||
encodings.append(encoding)
|
||||
|
||||
if encodings:
|
||||
return (CSI % ";".join(encodings)) + msg + RESET
|
||||
else:
|
||||
return msg
|
115
lib/stem/util/tor_tools.py
Normal file
115
lib/stem/util/tor_tools.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
# Copyright 2012-2013, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Miscellaneous utility functions for working with tor.
|
||||
|
||||
**These functions are not being vended to stem users. They may change in the
|
||||
future, use them at your own risk.**
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
is_valid_fingerprint - checks if a string is a valid tor relay fingerprint
|
||||
is_valid_nickname - checks if a string is a valid tor relay nickname
|
||||
is_valid_circuit_id - checks if a string is a valid tor circuit id
|
||||
is_valid_stream_id - checks if a string is a valid tor stream id
|
||||
is_hex_digits - checks if a string is only made up of hex digits
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
# The control-spec defines the following as...
|
||||
#
|
||||
# Fingerprint = "$" 40*HEXDIG
|
||||
# NicknameChar = "a"-"z" / "A"-"Z" / "0" - "9"
|
||||
# Nickname = 1*19 NicknameChar
|
||||
#
|
||||
# CircuitID = 1*16 IDChar
|
||||
# IDChar = ALPHA / DIGIT
|
||||
#
|
||||
# HEXDIG is defined in RFC 5234 as being uppercase and used in RFC 5987 as
|
||||
# case insensitive. Tor doesn't define this in the spec so flipping a coin
|
||||
# and going with case insensitive.
|
||||
|
||||
HEX_DIGIT = "[0-9a-fA-F]"
|
||||
FINGERPRINT_PATTERN = re.compile("^%s{40}$" % HEX_DIGIT)
|
||||
NICKNAME_PATTERN = re.compile("^[a-zA-Z0-9]{1,19}$")
|
||||
CIRC_ID_PATTERN = re.compile("^[a-zA-Z0-9]{1,16}$")
|
||||
|
||||
|
||||
def is_valid_fingerprint(entry, check_prefix = False):
|
||||
"""
|
||||
Checks if a string is a properly formatted relay fingerprint. This checks for
|
||||
a '$' prefix if check_prefix is true, otherwise this only validates the hex
|
||||
digits.
|
||||
|
||||
:param str entry: string to be checked
|
||||
:param bool check_prefix: checks for a '$' prefix
|
||||
|
||||
:returns: **True** if the string could be a relay fingerprint, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(entry, (str, unicode)):
|
||||
return False
|
||||
elif check_prefix:
|
||||
if not entry or entry[0] != "$":
|
||||
return False
|
||||
|
||||
entry = entry[1:]
|
||||
|
||||
return bool(FINGERPRINT_PATTERN.match(entry))
|
||||
|
||||
|
||||
def is_valid_nickname(entry):
|
||||
"""
|
||||
Checks if a string is a valid format for being a nickname.
|
||||
|
||||
:param str entry: string to be checked
|
||||
|
||||
:returns: **True** if the string could be a nickname, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(entry, (str, unicode)):
|
||||
return False
|
||||
|
||||
return bool(NICKNAME_PATTERN.match(entry))
|
||||
|
||||
|
||||
def is_valid_circuit_id(entry):
|
||||
"""
|
||||
Checks if a string is a valid format for being a circuit identifier.
|
||||
|
||||
:returns: **True** if the string could be a circuit id, **False** otherwise
|
||||
"""
|
||||
|
||||
if not isinstance(entry, (str, unicode)):
|
||||
return False
|
||||
|
||||
return bool(CIRC_ID_PATTERN.match(entry))
|
||||
|
||||
|
||||
def is_valid_stream_id(entry):
|
||||
"""
|
||||
Checks if a string is a valid format for being a stream identifier.
|
||||
Currently, this is just an alias to :func:`~stem.util.tor_tools.is_valid_circuit_id`.
|
||||
|
||||
:returns: **True** if the string could be a stream id, **False** otherwise
|
||||
"""
|
||||
|
||||
return is_valid_circuit_id(entry)
|
||||
|
||||
|
||||
def is_hex_digits(entry, count):
|
||||
"""
|
||||
Checks if a string is the given number of hex digits. Digits represented by
|
||||
letters are case insensitive.
|
||||
|
||||
:param str entry: string to be checked
|
||||
:param int count: number of hex digits to be checked for
|
||||
|
||||
:returns: **True** if the string matches this number
|
||||
"""
|
||||
|
||||
return bool(re.match("^%s{%i}$" % (HEX_DIGIT, count), entry))
|
Loading…
Add table
Add a link
Reference in a new issue