diff --git a/interview_cake/data_structures/file_system_hashing.py b/interview_cake/data_structures/file_system_hashing.py new file mode 100644 index 0000000..06c5e19 --- /dev/null +++ b/interview_cake/data_structures/file_system_hashing.py @@ -0,0 +1,76 @@ +#!/bin/python + +""" +Write a function that returns a list of all the duplicate files. + +the first item is the duplicate file +the second item is the original file +For example: + + [('/tmp/parker_is_dumb.mpg', '/home/parker/secret_puppy_dance.mpg'), + ('/home/trololol.mov', '/etc/apache2/httpd.conf')] +You can assume each file was only duplicated once. +""" + +import os +import hashlib + +def find_duplicate_files(starting_directory): + files_seen_already = {} + stack = [starting_directory] + + duplicates = [] + + while len(stack): + current_path = stack.pop() + + if os.path.isdir(current_path): + for path in os.listdir(current_path): + full_path = os.path.join(current_path, path) + stack.append(full_path) + + else: + file_hash = sample_hash_file(current_path) + + current_last_edited_time = os.path.getmtime(current_path) + + if file_hash in files_seen_already: + existing_last_edited_time, existing_path = files_seen_already[file_hash] + if current_last_edited_time > existing_last_edited_time: + + duplicates.append((current_path, existing_path)) + else: + + duplicates.append((existing_path, current_path)) + files_seen_already[file_hash] = (current_last_edited_time, current_path) + + else: + files_seen_already[file_hash] = (current_last_edited_time, current_path) + + return duplicates + + +def sample_hash_file(path): + num_bytes_to_read_per_sample = 4000 + total_bytes = os.path.getsize(path) + hasher = hashlib.sha512() + + with open(path, 'rb') as file: + + if total_bytes < num_bytes_to_read_per_sample * 3: + hasher.update(file.read()) + else: + num_bytes_between_samples = ( + (total_bytes - num_bytes_to_read_per_sample * 3) / 2 + ) + + for offset_multiplier in range(3): + start_of_sample = ( + offset_multiplier + * (num_bytes_to_read_per_sample + num_bytes_between_samples) + ) + file.seek(start_of_sample) + sample = file.read(num_bytes_to_read_per_sample) + hasher.update(sample) + + return hasher.hexdigest() \ No newline at end of file diff --git a/interview_cake/math/apple_stocks.py b/interview_cake/math/apple_stocks.py new file mode 100644 index 0000000..d4a5646 --- /dev/null +++ b/interview_cake/math/apple_stocks.py @@ -0,0 +1,27 @@ +#!/bin/python + +""" +Grab Apple's stock prices and put them in a list called stock_prices, where: + +The indices are the time (in minutes) past trade opening time, which was 9:30am local time. +The values are the price (in US dollars) of one share of Apple stock at that time. +So if the stock cost $500 at 10:30am, that means stock_prices[60] = 500. + +Write an efficient function that takes stock_prices and returns the best profit I could have made from one purchase and one sale of one share. +""" + +def apple_stock_profit(stock_prices): + + min_s, max_s = max(stock_prices), 0 + + while stock_prices: + stock = stock_prices.pop() + min_s = min(min_s, stock) + max_s = max(max_s, stock) + + return max_s - min_s + + +stock_prices = [10, 7, 5, 8, 11, 9] +print apple_stock_profit(stock_prices) +print("Should return 6 (buying for $5 and selling for $11)") \ No newline at end of file