From 813cd4db01f0380fd511d38f721787488bc8cb83 Mon Sep 17 00:00:00 2001 From: marina <138340846+bt3gl-cryptographer@users.noreply.github.com> Date: Mon, 7 Aug 2023 20:31:01 -0700 Subject: [PATCH] Update README.md --- sets/README.md | 50 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/sets/README.md b/sets/README.md index 12aa549..8c3967d 100644 --- a/sets/README.md +++ b/sets/README.md @@ -2,15 +2,23 @@
+--- + ### implementing an `O(1)` randomized set class
-* let's think about a set structure where we would implement insert, delete, and get_random at O(1) time. this type of structure widely used in statistical algorithms such as markov chain monte carlo and metropolis-hastings algorithms, which needs sampling from a probability distribution when it's difficult to compute the distribution itself. -* candidates for O(1) average insert time are: - * **hashmaps (or hashsets)**: we could have problems with get_random(), as its idea is to choose a random index and then to retrieve an element with that index. since there is no indexes in hashmaps, to get a true random value, one would have to convert hashmap keys in a list, which is linear time. the solution would build a list of keys aside and use this list to compute get_random in constant time. - * **array lists**: we could have time with delete, since to delete a value at arbitrary index takes linear time. the solution would be always delete the last value (first swap the element to delete with the last one, then pop the last element out). for that, we need to compute an index of each element in constant time, and we need a hashmap that stores `element -> index`. -* we see that both ways need the same combination of data structures: a hashmap and an array. +* a set structure where we would implement `insert`, `delete`, and `get_random` at `O(1)` time. + +* this type of structure widely used in statistical algorithms such as markov chain monte carlo and metropolis-hastings algorithms, which needs sampling from a probability distribution when it's difficult to compute the distribution itself. + +* candidates for `O(1)` average insert time are: + * **hashmaps (or hashsets)**: to be able to implement `get_random()` at `O(1)` (choose a random index and to retrieve narandom element), we would have to convert hashmap keys in a list, which is `O(N)`. a solution is to build a list of keys aside and use this list to compute `get_random` in `O(1)`. + * **array lists**: we would have `O(N)` with `delete`. the solution would be delete the last value (first swap the element to delete with the last one, then pop the last element out). for that, we need to compute an index of each element in `O(N)`, and we need a hashmap that stores `element -> index`. + +* either way, we need the same combination of data structures: a hashmap and an array. + * an array keeps the values appended in order. `deletes` always replace elements to the end. + * an dictionary maps the values (key) to the corresponding length of the array (their index) so it guarantees `O(1)` lookup and provide a list for `random.choice()`.
@@ -20,25 +28,35 @@ import random class RandomizedSet: def __init__(self): - self.set = [] - self.dict = {} + self.random_set = {} + self.index_list = [] def insert(self, val: int) -> bool: - if val in self.dict: + + if val in self.random_set.keys(): return False - self.set.append(val) - self.dict[val] = len(self.set) + + self.index_list.append(val) + self.random_set[val] = len(self.index_list) + return True def remove(self, val: int) -> bool: - if val in self.dict: - last_element, idx = self.set[-1], self.dict[val] - self.set[idx], self.dict[last_element] = last_element, idx - self.set.pop() - del self.dict[val] + + if val in self.random_set.keys(): + + last_element = self.index_list[-1] + index_val = self.random_set[val] + self.index_list[index_val] = last_element + self.random_set[last_element] = index_val + + self.index_list.pop() + del self.random_set[val] + return True + return False def get_random(self) -> int: - return random.choice(self.set) + return random.choice(self.index_list) ```