replacing portions

This commit is contained in:
Amber Brown 2018-05-21 19:47:37 -05:00
parent c60e0d5e02
commit df9f72d9e5
23 changed files with 270 additions and 416 deletions

View file

@ -13,29 +13,52 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import synapse.metrics
from prometheus_client.core import GaugeMetricFamily, REGISTRY
import os
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
metrics = synapse.metrics.get_metrics_for("synapse.util.caches")
caches_by_name = {}
# cache_counter = metrics.register_cache(
# "cache",
# lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()},
# labels=["name"],
# )
collectors_by_name = {}
def register_cache(name, cache_name, cache):
def register_cache(name, cache):
caches_by_name[name] = cache
return metrics.register_cache(
"cache",
lambda: len(cache),
name,
)
# Check if the metric is already registered. Unregister it, if so.
metric_name = "synapse_util_caches_%s:%s" % (name, cache_name,)
if metric_name in collectors_by_name.keys():
REGISTRY.unregister(collectors_by_name[metric_name])
class CacheMetric(object):
hits = 0
misses = 0
evicted_size = 0
def inc_hits(self):
self.hits += 1
def inc_misses(self):
self.misses += 1
def inc_evictions(self, size=1):
self.evicted_size += size
def collect(self):
cache_size = len(cache)
gm = GaugeMetricFamily(metric_name, "", labels=["size", "hits", "misses", "total"])
gm.add_metric(["size"], cache_size)
gm.add_metric(["hits"], self.hits)
gm.add_metric(["misses"], self.misses)
gm.add_metric(["total"], self.hits + self.misses)
yield gm
metric = CacheMetric()
REGISTRY.register(metric)
caches_by_name[cache_name] = cache
collectors_by_name[metric_name] = metric
return metric
KNOWN_KEYS = {
key: key for key in

View file

@ -80,7 +80,7 @@ class Cache(object):
self.name = name
self.keylen = keylen
self.thread = None
self.metrics = register_cache(name, self.cache)
self.metrics = register_cache("descriptor", name, self.cache)
def _on_evicted(self, evicted_count):
self.metrics.inc_evictions(evicted_count)

View file

@ -55,7 +55,7 @@ class DictionaryCache(object):
__slots__ = []
self.sentinel = Sentinel()
self.metrics = register_cache(name, self.cache)
self.metrics = register_cache("dictionary", name, self.cache)
def check_thread(self):
expected_thread = self.thread

View file

@ -52,12 +52,12 @@ class ExpiringCache(object):
self._cache = OrderedDict()
self.metrics = register_cache(cache_name, self)
self.iterable = iterable
self._size_estimate = 0
self.metrics = register_cache("expiring", cache_name, self)
def start(self):
if not self._expiry_ms:
# Don't bother starting the loop if things never expire

View file

@ -17,7 +17,7 @@ import logging
from twisted.internet import defer
from synapse.util.async import ObservableDeferred
from synapse.util.caches import metrics as cache_metrics
from synapse.util.caches import register_cache
from synapse.util.logcontext import make_deferred_yieldable, run_in_background
logger = logging.getLogger(__name__)
@ -38,15 +38,16 @@ class ResponseCache(object):
self.timeout_sec = timeout_ms / 1000.
self._name = name
self._metrics = cache_metrics.register_cache(
"response_cache",
size_callback=lambda: self.size(),
cache_name=name,
self._metrics = register_cache(
"response_cache", name, self
)
def size(self):
return len(self.pending_result_cache)
def __len__(self):
return self.size()
def get(self, key):
"""Look up the given key.

View file

@ -38,7 +38,7 @@ class StreamChangeCache(object):
self._cache = sorteddict()
self._earliest_known_stream_pos = current_stream_pos
self.name = name
self.metrics = register_cache(self.name, self._cache)
self.metrics = register_cache("cache", self.name, self._cache)
for entity, stream_pos in prefilled_cache.items():
self.entity_has_changed(entity, stream_pos)

View file

@ -15,8 +15,8 @@
from twisted.internet import defer
from prometheus_client import Counter
from synapse.util.logcontext import LoggingContext
import synapse.metrics
from functools import wraps
import logging
@ -24,66 +24,21 @@ import logging
logger = logging.getLogger(__name__)
block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"])
metrics = synapse.metrics.get_metrics_for(__name__)
block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"])
# total number of times we have hit this block
block_counter = metrics.register_counter(
"block_count",
labels=["block_name"],
alternative_names=(
# the following are all deprecated aliases for the same metric
metrics.name_prefix + x for x in (
"_block_timer:count",
"_block_ru_utime:count",
"_block_ru_stime:count",
"_block_db_txn_count:count",
"_block_db_txn_duration:count",
)
)
)
block_ru_utime = Counter("synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"])
block_timer = metrics.register_counter(
"block_time_seconds",
labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_timer:total",
),
)
block_ru_stime = Counter("synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"])
block_ru_utime = metrics.register_counter(
"block_ru_utime_seconds", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_ru_utime:total",
),
)
block_ru_stime = metrics.register_counter(
"block_ru_stime_seconds", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_ru_stime:total",
),
)
block_db_txn_count = metrics.register_counter(
"block_db_txn_count", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_db_txn_count:total",
),
)
block_db_txn_count = Counter("synapse_util_metrics_block_db_txn_count", "", ["block_name"])
# seconds spent waiting for db txns, excluding scheduling time, in this block
block_db_txn_duration = metrics.register_counter(
"block_db_txn_duration_seconds", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_db_txn_duration:total",
),
)
block_db_txn_duration = Counter("synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"])
# seconds spent waiting for a db connection, in this block
block_db_sched_duration = metrics.register_counter(
"block_db_sched_duration_seconds", labels=["block_name"],
)
block_db_sched_duration = Counter("synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"])
def measure_func(name):
@ -132,8 +87,8 @@ class Measure(object):
duration = self.clock.time_msec() - self.start
block_counter.inc(self.name)
block_timer.inc_by(duration, self.name)
block_counter.labels(self.name).inc()
block_timer.labels(self.name).inc(duration)
context = LoggingContext.current_context()
@ -150,19 +105,13 @@ class Measure(object):
ru_utime, ru_stime = context.get_resource_usage()
block_ru_utime.inc_by(ru_utime - self.ru_utime, self.name)
block_ru_stime.inc_by(ru_stime - self.ru_stime, self.name)
block_db_txn_count.inc_by(
context.db_txn_count - self.db_txn_count, self.name
)
block_db_txn_duration.inc_by(
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.,
self.name
)
block_db_sched_duration.inc_by(
(context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.,
self.name
)
block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime)
block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime)
block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count)
block_db_txn_duration.labels(self.name).inc(
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.)
block_db_sched_duration.labels(self.name).inc(
(context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.)
if self.created_context:
self.start_context.__exit__(exc_type, exc_val, exc_tb)