reorganize dir

Signed-off-by: Mia Steinkirch <mia.steinkirch@gmail.com>
2025-08-02 19:46:41 -04:00 · 2019-10-11 04:29:17 -07:00 · 2019-10-11 04:29:17 -07:00 · a8e71c50db
commit a8e71c50db
parent 1b6f705e7c
276 changed files with 23954 additions and 0 deletions
--- a/other_resources/interview_cake/data_structures/angry_bird.py
+++ b/other_resources/interview_cake/data_structures/angry_bird.py
@ -0,0 +1,37 @@
+#!/bin/python
+
+"""
+Each round, players receive a score between 0 and 100, which you use to rank them from highest to lowest. So far you're using an algorithm that sorts in O(n\lg{n})O(nlgn) time, but players are complaining that their rankings aren't updated fast enough. You need a faster sorting algorithm.
+
+Write a function that takes:
+
+a list of unsorted_scores
+the highest_possible_score in the game
+and returns a sorted list of scores in less than O(n\lg{n})O(nlgn) time.
+"""
+
+def sort_scores(unsorted_scores, highest_score):
+
+    score_counts = [0] * (highest_score+1)
+
+    for score in unsorted_scores:
+        score_counts[score] += 1
+
+    sorted_scores = []
+
+    for score in range(len(score_counts)-1, -1, -1):
+        count = score_counts[score]
+
+        for i in range(count):
+            sorted_scores.append(score)
+
+    return sorted_scores
+
+
+
+if __name__ == '__main__': 
+
+    unsorted_scores = [37, 89, 41, 65, 91, 53]
+    HIGHEST_POSSIBLE_SCORE = 100
+
+    print sort_scores(unsorted_scores, HIGHEST_POSSIBLE_SCORE)
--- a/other_resources/interview_cake/data_structures/file_system_hashing.py
+++ b/other_resources/interview_cake/data_structures/file_system_hashing.py
@ -0,0 +1,76 @@
+#!/bin/python
+
+"""
+Write a function that returns a list of all the duplicate files. 
+
+the first item is the duplicate file
+the second item is the original file
+For example:
+
+  [('/tmp/parker_is_dumb.mpg', '/home/parker/secret_puppy_dance.mpg'),
+ ('/home/trololol.mov', '/etc/apache2/httpd.conf')]
+You can assume each file was only duplicated once.
+"""
+
+import os
+import hashlib
+
+def find_duplicate_files(starting_directory):
+    files_seen_already = {}
+    stack = [starting_directory]
+
+    duplicates = []
+
+    while len(stack):
+        current_path = stack.pop()
+
+        if os.path.isdir(current_path):
+            for path in os.listdir(current_path):
+                full_path = os.path.join(current_path, path)
+                stack.append(full_path)
+
+        else:
+            file_hash = sample_hash_file(current_path)
+
+            current_last_edited_time = os.path.getmtime(current_path)
+
+            if file_hash in files_seen_already:
+                existing_last_edited_time, existing_path = files_seen_already[file_hash]
+                if current_last_edited_time > existing_last_edited_time:
+
+                    duplicates.append((current_path, existing_path))
+                else:
+
+                    duplicates.append((existing_path, current_path))
+                    files_seen_already[file_hash] = (current_last_edited_time, current_path)
+
+            else:
+                files_seen_already[file_hash] = (current_last_edited_time, current_path)
+
+    return duplicates
+
+
+def sample_hash_file(path):
+    num_bytes_to_read_per_sample = 4000
+    total_bytes = os.path.getsize(path)
+    hasher = hashlib.sha512()
+
+    with open(path, 'rb') as file:
+
+        if total_bytes < num_bytes_to_read_per_sample * 3:
+            hasher.update(file.read())
+        else:
+            num_bytes_between_samples = (
+                (total_bytes - num_bytes_to_read_per_sample * 3) / 2
+            )
+
+            for offset_multiplier in range(3):
+                start_of_sample = (
+                    offset_multiplier
+                    * (num_bytes_to_read_per_sample + num_bytes_between_samples)
+                )
+                file.seek(start_of_sample)
+                sample = file.read(num_bytes_to_read_per_sample)
+                hasher.update(sample)
+
+    return hasher.hexdigest()
--- a/other_resources/interview_cake/data_structures/inflight_entrain.py
+++ b/other_resources/interview_cake/data_structures/inflight_entrain.py
@ -0,0 +1,42 @@
+#!/bin/python
+
+"""
+Users on longer flights like to start a second movie right when their first one ends, 
+but they complain that the plane usually lands before they can see the ending. 
+So you're building a feature for choosing two movies whose total runtimes will equal the exact flight length.
+
+Write a function that takes an integer flight_length (in minutes) and a 
+list of integers movie_lengths (in minutes) and returns a boolean indicating 
+whether there are two numbers in movie_lengths whose sum equals flight_length.
+
+When building your function:
+
+Assume your users will watch exactly two movies
+Don't make your users watch the same movie twice
+Optimize for runtime over memory
+"""
+
+def is_there_two_movies(flight_length, movie_lengths):
+    movie_lengths_seen = set()
+
+    for first_movie_length in movie_lengths:
+        matching_second_movie_length = flight_length - first_movie_length
+        if matching_second_movie_length in movie_lengths_seen:
+            return True
+        movie_lengths_seen.add(first_movie_length)
+
+    return False
+
+
+
+if __name__ == '__main__':
+
+    flight_length = 10
+
+    movie_lengths = [2, 4, 7]
+    print(is_there_two_movies(flight_length, movie_lengths))
+    print("Should be True")
+
+    movie_lengths = [5, 6, 7, 8]
+    print(is_there_two_movies(flight_length, movie_lengths))
+    print("Should be False")