mirror of
https://github.com/autistic-symposium/master-algorithms-py.git
synced 2025-05-02 06:46:18 -04:00
reorganize dir
Signed-off-by: Mia Steinkirch <mia.steinkirch@gmail.com>
This commit is contained in:
parent
1b6f705e7c
commit
a8e71c50db
276 changed files with 23954 additions and 0 deletions
37
other_resources/interview_cake/data_structures/angry_bird.py
Normal file
37
other_resources/interview_cake/data_structures/angry_bird.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
#!/bin/python
|
||||
|
||||
"""
|
||||
Each round, players receive a score between 0 and 100, which you use to rank them from highest to lowest. So far you're using an algorithm that sorts in O(n\lg{n})O(nlgn) time, but players are complaining that their rankings aren't updated fast enough. You need a faster sorting algorithm.
|
||||
|
||||
Write a function that takes:
|
||||
|
||||
a list of unsorted_scores
|
||||
the highest_possible_score in the game
|
||||
and returns a sorted list of scores in less than O(n\lg{n})O(nlgn) time.
|
||||
"""
|
||||
|
||||
def sort_scores(unsorted_scores, highest_score):
|
||||
|
||||
score_counts = [0] * (highest_score+1)
|
||||
|
||||
for score in unsorted_scores:
|
||||
score_counts[score] += 1
|
||||
|
||||
sorted_scores = []
|
||||
|
||||
for score in range(len(score_counts)-1, -1, -1):
|
||||
count = score_counts[score]
|
||||
|
||||
for i in range(count):
|
||||
sorted_scores.append(score)
|
||||
|
||||
return sorted_scores
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
unsorted_scores = [37, 89, 41, 65, 91, 53]
|
||||
HIGHEST_POSSIBLE_SCORE = 100
|
||||
|
||||
print sort_scores(unsorted_scores, HIGHEST_POSSIBLE_SCORE)
|
|
@ -0,0 +1,76 @@
|
|||
#!/bin/python
|
||||
|
||||
"""
|
||||
Write a function that returns a list of all the duplicate files.
|
||||
|
||||
the first item is the duplicate file
|
||||
the second item is the original file
|
||||
For example:
|
||||
|
||||
[('/tmp/parker_is_dumb.mpg', '/home/parker/secret_puppy_dance.mpg'),
|
||||
('/home/trololol.mov', '/etc/apache2/httpd.conf')]
|
||||
You can assume each file was only duplicated once.
|
||||
"""
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
def find_duplicate_files(starting_directory):
|
||||
files_seen_already = {}
|
||||
stack = [starting_directory]
|
||||
|
||||
duplicates = []
|
||||
|
||||
while len(stack):
|
||||
current_path = stack.pop()
|
||||
|
||||
if os.path.isdir(current_path):
|
||||
for path in os.listdir(current_path):
|
||||
full_path = os.path.join(current_path, path)
|
||||
stack.append(full_path)
|
||||
|
||||
else:
|
||||
file_hash = sample_hash_file(current_path)
|
||||
|
||||
current_last_edited_time = os.path.getmtime(current_path)
|
||||
|
||||
if file_hash in files_seen_already:
|
||||
existing_last_edited_time, existing_path = files_seen_already[file_hash]
|
||||
if current_last_edited_time > existing_last_edited_time:
|
||||
|
||||
duplicates.append((current_path, existing_path))
|
||||
else:
|
||||
|
||||
duplicates.append((existing_path, current_path))
|
||||
files_seen_already[file_hash] = (current_last_edited_time, current_path)
|
||||
|
||||
else:
|
||||
files_seen_already[file_hash] = (current_last_edited_time, current_path)
|
||||
|
||||
return duplicates
|
||||
|
||||
|
||||
def sample_hash_file(path):
|
||||
num_bytes_to_read_per_sample = 4000
|
||||
total_bytes = os.path.getsize(path)
|
||||
hasher = hashlib.sha512()
|
||||
|
||||
with open(path, 'rb') as file:
|
||||
|
||||
if total_bytes < num_bytes_to_read_per_sample * 3:
|
||||
hasher.update(file.read())
|
||||
else:
|
||||
num_bytes_between_samples = (
|
||||
(total_bytes - num_bytes_to_read_per_sample * 3) / 2
|
||||
)
|
||||
|
||||
for offset_multiplier in range(3):
|
||||
start_of_sample = (
|
||||
offset_multiplier
|
||||
* (num_bytes_to_read_per_sample + num_bytes_between_samples)
|
||||
)
|
||||
file.seek(start_of_sample)
|
||||
sample = file.read(num_bytes_to_read_per_sample)
|
||||
hasher.update(sample)
|
||||
|
||||
return hasher.hexdigest()
|
|
@ -0,0 +1,42 @@
|
|||
#!/bin/python
|
||||
|
||||
"""
|
||||
Users on longer flights like to start a second movie right when their first one ends,
|
||||
but they complain that the plane usually lands before they can see the ending.
|
||||
So you're building a feature for choosing two movies whose total runtimes will equal the exact flight length.
|
||||
|
||||
Write a function that takes an integer flight_length (in minutes) and a
|
||||
list of integers movie_lengths (in minutes) and returns a boolean indicating
|
||||
whether there are two numbers in movie_lengths whose sum equals flight_length.
|
||||
|
||||
When building your function:
|
||||
|
||||
Assume your users will watch exactly two movies
|
||||
Don't make your users watch the same movie twice
|
||||
Optimize for runtime over memory
|
||||
"""
|
||||
|
||||
def is_there_two_movies(flight_length, movie_lengths):
|
||||
movie_lengths_seen = set()
|
||||
|
||||
for first_movie_length in movie_lengths:
|
||||
matching_second_movie_length = flight_length - first_movie_length
|
||||
if matching_second_movie_length in movie_lengths_seen:
|
||||
return True
|
||||
movie_lengths_seen.add(first_movie_length)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
flight_length = 10
|
||||
|
||||
movie_lengths = [2, 4, 7]
|
||||
print(is_there_two_movies(flight_length, movie_lengths))
|
||||
print("Should be True")
|
||||
|
||||
movie_lengths = [5, 6, 7, 8]
|
||||
print(is_there_two_movies(flight_length, movie_lengths))
|
||||
print("Should be False")
|
Loading…
Add table
Add a link
Reference in a new issue