mirror of
https://github.com/autistic-symposium/master-algorithms-py.git
synced 2025-04-29 20:26:07 -04:00
searching and sorting organized
This commit is contained in:
parent
91825867f6
commit
28b84cef65
@ -1,56 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
|
||||
def binary_search_rec(array, item, lo=0, hi = None):
|
||||
'''
|
||||
>>> binary_search_rec([2,3,5,6,8,10,15,23], 15)
|
||||
(True, 6)
|
||||
>>> binary_search_rec([2,3,5,6,8,10,15,23], 4)
|
||||
False
|
||||
'''
|
||||
hi = hi or len(array)
|
||||
if hi < lo :
|
||||
return False
|
||||
|
||||
mid = (hi + lo)//2
|
||||
|
||||
if array[mid] == item:
|
||||
return True, mid
|
||||
elif array[mid] < item:
|
||||
return binary_search_rec(array, item, mid + 1, hi)
|
||||
else:
|
||||
return binary_search_rec(array[:mid], item, lo, mid -1)
|
||||
|
||||
|
||||
|
||||
def binary_search_iter(array, item):
|
||||
'''
|
||||
>>> binary_search_iter([2,3,5,6,8,10,15,23], 15)
|
||||
(True, 6)
|
||||
>>> binary_search_iter([2,3,5,6,8,10,15,23], 4)
|
||||
False
|
||||
'''
|
||||
hi = len(array)
|
||||
lo = 0
|
||||
|
||||
while lo < hi:
|
||||
mid = (hi+lo)//2
|
||||
if array[mid] == item:
|
||||
return True, mid
|
||||
elif array[mid] > item:
|
||||
hi = mid
|
||||
else:
|
||||
lo = mid + 1
|
||||
return False
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
def qs(array):
|
||||
'''
|
||||
>>> qs([4,1,6,2,7,9,3])
|
||||
[1, 2, 3, 4, 6, 7, 9]
|
||||
'''
|
||||
if len(array) < 2:
|
||||
return array
|
||||
|
||||
piv = len(array)//2
|
||||
piv_element = array[piv]
|
||||
new_array = array[:piv] + array[piv+1:]
|
||||
|
||||
left = [a for a in new_array if a <= piv_element]
|
||||
right = [a for a in new_array if a > piv_element]
|
||||
|
||||
|
||||
return qs(left) + [array[piv]] + qs(right)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
@ -1,49 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
|
||||
def binary_search(seq, key):
|
||||
''' binary search iterative algorithm '''
|
||||
''' observe that the index is returned '''
|
||||
hi = len(seq)
|
||||
def binary_search_rec(array, item, lo=0, hi = None):
|
||||
'''
|
||||
>>> binary_search_rec([2,3,5,6,8,10,15,23], 15)
|
||||
(True, 6)
|
||||
>>> binary_search_rec([2,3,5,6,8,10,15,23], 4)
|
||||
False
|
||||
'''
|
||||
hi = hi or len(array)
|
||||
if hi < lo :
|
||||
return False
|
||||
|
||||
mid = (hi + lo)//2
|
||||
|
||||
if array[mid] == item:
|
||||
return True, mid
|
||||
elif array[mid] < item:
|
||||
return binary_search_rec(array, item, mid + 1, hi)
|
||||
else:
|
||||
return binary_search_rec(array[:mid], item, lo, mid -1)
|
||||
|
||||
|
||||
|
||||
def binary_search_iter(array, item):
|
||||
'''
|
||||
>>> binary_search_iter([2,3,5,6,8,10,15,23], 15)
|
||||
(True, 6)
|
||||
>>> binary_search_iter([2,3,5,6,8,10,15,23], 4)
|
||||
False
|
||||
'''
|
||||
hi = len(array)
|
||||
lo = 0
|
||||
|
||||
while lo < hi:
|
||||
mid = (hi+lo) // 2
|
||||
if seq[mid] == key:
|
||||
return mid
|
||||
elif key < seq[mid]:
|
||||
mid = (hi+lo)//2
|
||||
if array[mid] == item:
|
||||
return True, mid
|
||||
elif array[mid] > item:
|
||||
hi = mid
|
||||
else:
|
||||
lo = mid + 1
|
||||
lo = mid + 1
|
||||
return False
|
||||
|
||||
|
||||
def binary_search_rec(seq, key, lo=0, hi=None):
|
||||
''' binary search recursive algorithm '''
|
||||
hi = hi or len(seq)
|
||||
if hi < lo: return None
|
||||
mid = (hi + lo) // 2
|
||||
if seq[mid] == key:
|
||||
return mid
|
||||
elif seq[mid] < key:
|
||||
return binary_search_rec(seq, key, mid + 1, hi)
|
||||
else:
|
||||
return binary_search_rec(seq, key, lo, mid - 1)
|
||||
|
||||
|
||||
def test_binary_search():
|
||||
seq = [1,2,5,6,7,10,12,12,14,15]
|
||||
key = 6
|
||||
assert(binary_search(seq, key) == 3)
|
||||
assert(binary_search_rec(seq, key) == 3)
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_binary_search()
|
||||
|
||||
|
||||
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
@ -1,75 +1,89 @@
|
||||
#!/usr/bin/python
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
''' Searches an element in a matrix where in every row, the values are increasing from left to right, but the last number in a row is smaller than the first number in the next row.
|
||||
|
||||
(1) The naive brute force solution (sequential search) scan all numbers and cost O(nm). However, since the numbers are already sorted, the matrix can be viewed as a 1D sorted array. The binary search algorithm is suitable. The efficiency is O(logmn).
|
||||
|
||||
def binary_search_matrix_rec(m, key, lo=0, hi=None):
|
||||
'''
|
||||
>>> m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
|
||||
>>> binary_search_matrix_rec(m, 6)
|
||||
(1, 2)
|
||||
>>> binary_search_matrix_rec(m, 12)
|
||||
>>> binary_search_matrix_iter(m, 6)
|
||||
(1, 2)
|
||||
>>> binary_search_matrix_iter(m, 12)
|
||||
>>> binary_search_matrix_iter(m, 1)
|
||||
(0, 0)
|
||||
'''
|
||||
if not m:
|
||||
return None
|
||||
|
||||
(2) Another solution is "discarding" arrays in the way. The efficiency is O(logm).
|
||||
>>> m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
|
||||
>>> searching_matrix(m, 6)
|
||||
(1, 2)
|
||||
>>> searching_matrix(m, 12)
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def binary_search_matrix_rec(m, key, lo=0, hi=None):
|
||||
if not m: return None
|
||||
rows = len(m)
|
||||
cols = len(m[0])
|
||||
hi = hi or rows*cols
|
||||
if hi > lo: # -----> REMEMBER THIS OR INDEX WILL EXPLODE!!!!!!!!
|
||||
|
||||
if hi > lo:
|
||||
|
||||
mid = (hi + lo)//2
|
||||
row = mid//cols
|
||||
col = mid%cols
|
||||
item = m[row][col]
|
||||
if key == item: return row, col
|
||||
elif key < item: return binary_search_matrix_rec(m, key, lo, mid-1)
|
||||
else: return binary_search_matrix_rec(m, key, mid+1, hi)
|
||||
|
||||
if key == item:
|
||||
return row, col
|
||||
elif key < item:
|
||||
return binary_search_matrix_rec(m, key, lo, mid-1)
|
||||
else:
|
||||
return binary_search_matrix_rec(m, key, mid+1, hi)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def binary_search_matrix_iter(m, key):
|
||||
if not m: return None
|
||||
'''
|
||||
|
||||
'''
|
||||
|
||||
if not m:
|
||||
return None
|
||||
rows = len(m)
|
||||
cols = len(m[0])
|
||||
lo, hi = 0, rows*cols
|
||||
|
||||
while lo < hi:
|
||||
mid = (hi + lo)//2
|
||||
row = mid//rows
|
||||
col = mid%rows
|
||||
item = m[row][col]
|
||||
if key == item: return (row, col)
|
||||
elif key < item: hi = mid
|
||||
else: lo = mid +1
|
||||
if key == item:
|
||||
return (row, col)
|
||||
elif key < item:
|
||||
hi = mid
|
||||
else:
|
||||
lo = mid +1
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def searching_matrix(m, key):
|
||||
if not m: return None
|
||||
'''
|
||||
>>> m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
|
||||
>>> searching_matrix(m, 6)
|
||||
(1, 2)
|
||||
>>> searching_matrix(m, 12)
|
||||
'''
|
||||
|
||||
if not m:
|
||||
return None
|
||||
rows = len(m)
|
||||
cols = len(m[0])
|
||||
i, j = 0, cols -1
|
||||
|
||||
while i < rows and j > 0:
|
||||
item = m[i][j]
|
||||
if key == item: return (i, j)
|
||||
elif key < item: j -= 1
|
||||
else: i += 1
|
||||
if key == item:
|
||||
return (i, j)
|
||||
elif key < item:
|
||||
j -= 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
@ -1,21 +1,27 @@
|
||||
#!/usr/bin/python
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
|
||||
''' Given a sorted array that was rotated, find an item with binary search:
|
||||
'''
|
||||
Given a sorted array that was rotated, find an item with binary search:
|
||||
'''
|
||||
|
||||
def find_element_rot_array(seq, key, lo=0, hi=None):
|
||||
|
||||
hi = hi or len(seq)
|
||||
if hi <= lo: return None # base case: <= for odd and even numbers!
|
||||
if hi <= lo:
|
||||
return None # base case: <= for odd and even numbers!
|
||||
|
||||
mid = (hi + lo) // 2
|
||||
if key == seq[mid]: return mid
|
||||
|
||||
if key == seq[mid]:
|
||||
return mid
|
||||
|
||||
# if left is ordered --> we work here
|
||||
if seq[lo] <= seq[mid]:
|
||||
|
||||
# now, is the key there?
|
||||
if key < seq[mid] and key >= seq[lo]:
|
||||
return find_element_rot_array(seq, key, lo, mid)
|
||||
@ -25,6 +31,7 @@ def find_element_rot_array(seq, key, lo=0, hi=None):
|
||||
|
||||
# right is ordered --> we work here
|
||||
else:
|
||||
|
||||
# now, is the key there?
|
||||
if key > seq[mid] and key <= seq[hi-1]: # stupid hi-1!!!
|
||||
return find_element_rot_array(seq, key, mid+1, hi)
|
||||
|
@ -1,15 +1,15 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
def find_max_unimodal_array(A):
|
||||
if len(A) <= 2 : return None
|
||||
if len(A) <= 2 :
|
||||
return None
|
||||
left = 0
|
||||
right = len(A)-1
|
||||
|
||||
while right > left +1:
|
||||
|
||||
mid = (left + right)//2
|
||||
if A[mid] > A[mid-1] and A[mid] > A[mid+1]:
|
||||
return A[mid]
|
||||
@ -17,6 +17,7 @@ def find_max_unimodal_array(A):
|
||||
left = mid
|
||||
else:
|
||||
right = mid
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
@ -1,14 +1,11 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
''' implement square root using binary search '''
|
||||
|
||||
|
||||
def find_sqrt_bin_search(n, error=0.001):
|
||||
''' implement square root using binary search '''
|
||||
lower = n < 1 and n or 1
|
||||
upper = n < 1 and 1 or n
|
||||
mid = lower + (upper - lower) / 2.0
|
||||
|
@ -1,20 +1,22 @@
|
||||
#!/usr/bin/python
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
''' Given a sorted an array with empty strings, we use binary search to find some string (since
|
||||
the list is sorted):
|
||||
--> we deal with the empty strings with strip and then run to left and right, or move
|
||||
mid to the closed non-empty str (remember that the index must be conserved):
|
||||
''' Given a sorted an array with empty strings,
|
||||
we use binary search to find some string (since the list is sorted):
|
||||
--> we deal with the empty strings with strip and then run to left
|
||||
and right, or move mid to the closed non-empty str (remember that
|
||||
the index must be conserved):
|
||||
'''
|
||||
|
||||
|
||||
def find_str_array_with_empty_str(seq, s1):
|
||||
if not seq or not s1: return None
|
||||
if not seq or not s1:
|
||||
return None
|
||||
hi = len(seq)
|
||||
lo = 0
|
||||
|
||||
while hi > lo:
|
||||
mid = (hi+lo)//2
|
||||
|
||||
@ -32,9 +34,12 @@ def find_str_array_with_empty_str(seq, s1):
|
||||
right += 1
|
||||
left -= 1
|
||||
|
||||
if s1 == seq[mid] == s1: return mid
|
||||
elif s1 < seq[mid]: hi = mid
|
||||
else: lo = mid + 1
|
||||
if s1 == seq[mid] == s1:
|
||||
return mid
|
||||
elif s1 < seq[mid]:
|
||||
hi = mid
|
||||
else:
|
||||
lo = mid + 1
|
||||
|
||||
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
def binary_serch_counting(lst1, k, lo=0, hi=None):
|
||||
if hi is None: hi = len(lst1)
|
||||
@ -19,10 +17,12 @@ def binary_serch_counting(lst1, k, lo=0, hi=None):
|
||||
|
||||
|
||||
def find_time_occurrence_list(seq, k):
|
||||
""" find how many times a k element appears in a sorted list. One way of doing this is using
|
||||
collections.OrderedDict to no mess with the sorting, and add entries for every count. This
|
||||
should be O(n). It has a O(1) space complexity since the size of the dict is fixed.
|
||||
Another way, since the array is sorted, it to use binary search, since this is only O(logn).
|
||||
""" find how many times a k element appears in a sorted list.
|
||||
One way of doing this is using collections.OrderedDict to no
|
||||
mess with the sorting, and add entries for every count. This
|
||||
should be O(n). It has a O(1) space complexity since the size of
|
||||
the dict is fixed. Another way, since the array is sorted, it to
|
||||
use binary search, since this is only O(logn).
|
||||
"""
|
||||
index_some_k = binary_serch_counting(seq, k)
|
||||
count = 1
|
||||
|
@ -1,80 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
|
||||
|
||||
|
||||
''' using sets '''
|
||||
|
||||
def intersection_two_arrays_sets(seq1, seq2):
|
||||
''' find the intersection of two arrays using set proprieties '''
|
||||
set1 = set(seq1)
|
||||
set2 = set(seq2)
|
||||
return set1.intersection(set2) #same as list(set1 & set2
|
||||
|
||||
|
||||
|
||||
''' using merge sort '''
|
||||
|
||||
def intersection_two_arrays_ms(seq1, seq2):
|
||||
''' find the intersection of two arrays using merge sort '''
|
||||
res = []
|
||||
while seq1 and seq2:
|
||||
if seq1[-1] == seq2[-1]:
|
||||
res.append(seq1.pop())
|
||||
seq2.pop()
|
||||
elif seq1[-1] > seq2[-1]:
|
||||
seq1.pop()
|
||||
else:
|
||||
seq2.pop()
|
||||
res.reverse()
|
||||
return res
|
||||
|
||||
|
||||
|
||||
|
||||
''' using binary search '''
|
||||
|
||||
def binary_search(seq, key, lo=0, hi=None):
|
||||
''' binary search iterative algorithm '''
|
||||
hi = hi or len(seq)
|
||||
while lo < hi:
|
||||
mid = (hi+lo) // 2
|
||||
if seq[mid] == key:
|
||||
return True
|
||||
elif key > seq[mid]:
|
||||
lo = mid + 1
|
||||
else:
|
||||
hi = mid
|
||||
return None
|
||||
|
||||
def intersection_two_arrays_bs(seq1, seq2):
|
||||
''' if A small and B is too large, we can do a binary search on each entry in B '''
|
||||
''' only works if sorted and the small sequence has not larger nmbers!!!'''
|
||||
if len(seq1) > len(seq2): seq, key = seq1, seq2
|
||||
else: seq, key = seq2, seq1
|
||||
|
||||
intersec = []
|
||||
for item in key:
|
||||
if binary_search(seq, item):
|
||||
intersec.append(item)
|
||||
return intersec
|
||||
|
||||
|
||||
|
||||
def test_intersection_two_arrays(module_name='this module'):
|
||||
seq1 = [1,2,3,5,7,8]
|
||||
seq2 = [3,5,6]
|
||||
|
||||
assert(set(intersection_two_arrays_sets(seq1,seq2)) == set([3,5]))
|
||||
assert(intersection_two_arrays_bs(seq1,seq2) == [3,5])
|
||||
assert(intersection_two_arrays_ms(seq1,seq2) == [3,5])
|
||||
|
||||
s = 'Tests in {name} have {con}!'
|
||||
print(s.format(name=module_name, con='passed'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_intersection_two_arrays()
|
||||
|
@ -1,8 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
|
||||
|
@ -1,12 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
#!/usr/bin/python
|
||||
__author__ = "bt3"
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
|
||||
import random
|
||||
|
||||
|
||||
''' The simplest way...'''
|
||||
def quickSelect(seq, k):
|
||||
# this part is the same as quick sort
|
||||
|
@ -1,8 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
import numpy
|
||||
|
@ -1,8 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
__author__ = "Mari Wahl"
|
||||
__email__ = "marina.w4hl@gmail.com"
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
def sequential_search(seq, n):
|
||||
|
@ -7,8 +7,6 @@ def bubble_sort(seq):
|
||||
"""
|
||||
Implementation of bubble sort.
|
||||
O(n2) and thus highly ineffective.
|
||||
:param seq: the sequence to be sorted.
|
||||
:return: the sorted sequence.
|
||||
"""
|
||||
size = len(seq) -1
|
||||
for num in range(size, 0, -1):
|
||||
@ -24,9 +22,6 @@ def test_bubble_sort(module_name='this module'):
|
||||
seq = [4, 5, 2, 1, 6, 2, 7, 10, 13, 8]
|
||||
assert(bubble_sort(seq) == sorted(seq))
|
||||
|
||||
s = 'Tests in {name} have {con}!'
|
||||
print(s.format(name=module_name, con='passed'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_bubble_sort()
|
||||
|
@ -19,7 +19,6 @@ def count_sort_dict(a):
|
||||
def test_count_sort():
|
||||
seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2, 5, 4, 1, 5, 3]
|
||||
assert(count_sort_dict(seq) == sorted(seq))
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -18,7 +18,6 @@ def gnome_sort(seq):
|
||||
def test_gnome_sort():
|
||||
seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2, 5, 4, 1, 5, 3]
|
||||
assert(gnome_sort(seq) == sorted(seq))
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -31,7 +31,6 @@ def test_insertion_sort():
|
||||
seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2, 5, 4, 1, 5, 3]
|
||||
assert(insertion_sort(seq) == sorted(seq))
|
||||
assert(insertion_sort_rec(seq) == sorted(seq))
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
'''
|
||||
You have two arrays with N integers in them. Merge those arrays using a
|
||||
recursive algorithm so that the integers in the final array are sorted.
|
@ -3,56 +3,12 @@
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
|
||||
|
||||
''' Some examples of how to implement Merge Sort in Python.
|
||||
--> RUNTIME: WORST/BEST/AVERAGE Is O(nlogn)
|
||||
--> space complexity is O(n) for arrays
|
||||
--> in general not in place, good for large arrays
|
||||
--> In the case of two arrays: we can merge two arrays using the merge function from the merge sort
|
||||
--> we can do this for files too, merging each two
|
||||
|
||||
1) If we can modify the arrays (pop) we can use:
|
||||
def merge(left, right):
|
||||
if not left or not right: return left or right # nothing to be merged
|
||||
result = []
|
||||
while left and right:
|
||||
if left[-1] >= right[-1]:
|
||||
result.append(left.pop())
|
||||
else:
|
||||
result.append(right.pop())
|
||||
result.reverse()
|
||||
return (left or right) + result
|
||||
|
||||
|
||||
2) If we can't modify or we want to in place, we need two pointers:
|
||||
>>> l1 = [1, 2, 3, 4, 5, 6, 7]
|
||||
>>> l2 = [2, 4, 5, 8]
|
||||
>>> merge(l1, l2)
|
||||
[1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8]
|
||||
|
||||
|
||||
3) For example, in the case we have a big array filled 0s in the end, and another array with the size of the number of 0s:
|
||||
>>> l1 = [1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0]
|
||||
>>> l2 = [2, 4, 5, 8]
|
||||
>>> merge_two_arrays_inplace(l1, l2)
|
||||
[1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8]
|
||||
|
||||
|
||||
4) If we want to merge sorted files (and we have plenty of RAM to load all files):
|
||||
>>> list_files = ['1.dat', '2.dat', '3.dat']
|
||||
>>> merge_files(list_files)
|
||||
[1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 7, 8]
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
The typical example...
|
||||
"""
|
||||
|
||||
def merge_sort(seq):
|
||||
'''
|
||||
>>> seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2]
|
||||
>>> merge_sort(seq)
|
||||
[0, 1, 2, 2, 3, 3, 5, 5, 6, 6, 8]
|
||||
'''
|
||||
if len(seq) < 2:
|
||||
return seq
|
||||
mid = len(seq)//2
|
||||
@ -73,22 +29,21 @@ def merge_sort(seq):
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
We could also divide this sort into two parts, separating
|
||||
the merge part in another function
|
||||
'''
|
||||
|
||||
# separating the merge part in another function
|
||||
def merge_sort_sep(seq):
|
||||
'''
|
||||
>>> seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2]
|
||||
>>> merge_sort_sep(seq)
|
||||
[0, 1, 2, 2, 3, 3, 5, 5, 6, 6, 8]
|
||||
'''
|
||||
if len(seq) < 2 :
|
||||
return seq # base case
|
||||
return seq
|
||||
mid = len(seq)//2
|
||||
left = merge_sort(seq[:mid])
|
||||
right = merge_sort(seq[mid:]) # notice that mid is included!
|
||||
return merge(left, right) # merge iteratively
|
||||
|
||||
|
||||
|
||||
def merge(left, right):
|
||||
if not left or not right:
|
||||
return left or right # nothing to be merged
|
||||
@ -106,43 +61,6 @@ def merge(left, right):
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
||||
''' The following merge functions is O(2n) and
|
||||
illustrate many features in Python that '''
|
||||
def merge_2n(left, right):
|
||||
if not left or not right: return left or right # nothing to be merged
|
||||
result = []
|
||||
while left and right:
|
||||
if left[-1] >= right[-1]:
|
||||
result.append(left.pop())
|
||||
else:
|
||||
result.append(right.pop())
|
||||
result.reverse()
|
||||
return (left or right) + result
|
||||
|
||||
|
||||
|
||||
''' Merge two arrays in place '''
|
||||
def merge_two_arrays_inplace(l1, l2):
|
||||
if not l1 or not l2: return l1 or l2 # nothing to be merged
|
||||
p2 = len(l2) - 1
|
||||
p1 = len(l1) - len(l2) - 1
|
||||
p12 = len(l1) - 1
|
||||
while p2 >= 0 and p1 >= 0:
|
||||
item_to_be_merged = l2[p2]
|
||||
item_bigger_array = l1[p1]
|
||||
if item_to_be_merged < item_bigger_array:
|
||||
l1[p12] = item_bigger_array
|
||||
p1 -= 1
|
||||
else:
|
||||
l1[p12] = item_to_be_merged
|
||||
p2 -= 1
|
||||
p12 -= 1
|
||||
return l1
|
||||
|
||||
|
||||
|
||||
''' Merge sort for files '''
|
||||
def merge_files(list_files):
|
||||
result = []
|
||||
@ -159,16 +77,6 @@ def merge_files(list_files):
|
||||
return final
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def test_merge_sort():
|
||||
seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2]
|
||||
seq_sorted = sorted(seq)
|
||||
assert(merge_sort(seq) == seq_sorted)
|
||||
assert(merge_sort_sep(seq) == seq_sorted)
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_merge_sort()
|
||||
import doctest
|
||||
doctest.testmod()
|
@ -3,38 +3,23 @@
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
''' Some examples of how to implement Quick Sort in Python
|
||||
--> RUNTIME: BEST/AVERAGE Is O(nlogn), WORST is O(n2)
|
||||
--> the first example is not in place, the second is in place
|
||||
--> test with two element arrays, identical values
|
||||
def qs(array):
|
||||
'''
|
||||
>>> qs([4,1,6,2,7,9,3])
|
||||
[1, 2, 3, 4, 6, 7, 9]
|
||||
'''
|
||||
if len(array) < 2:
|
||||
return array
|
||||
|
||||
Quick sort in place:
|
||||
1) select pivot as the index = 0
|
||||
2) start pointer1 at index = 1 and pointer2 in the last element
|
||||
3) while pointer1 < pointer2:
|
||||
if value in pointer1 <= pivot
|
||||
swap value in pointer1 with value in pointer2 and advanced pointer2
|
||||
else
|
||||
advance pointer1
|
||||
4) now the array is like this:
|
||||
[pivot, larger than pivot, smaller than pivot]
|
||||
5) swap the pivot where pointer 1 stop
|
||||
6) do recursively for [smaller] + [pivot] + [larger]
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
def quick_sort(seq):
|
||||
if len(seq) < 2 : return seq
|
||||
mid = len(seq)//2
|
||||
pi = seq[mid]
|
||||
seq = seq[:mid] + seq[mid+1:]
|
||||
left = quick_sort([x for x in seq if x <= pi]) # REMEMBER TO INCLUDE X (OR IN RIGHT)
|
||||
right = quick_sort([x for x in seq if x > pi])
|
||||
return left + [pi] + right
|
||||
piv = len(array)//2
|
||||
piv_element = array[piv]
|
||||
new_array = array[:piv] + array[piv+1:]
|
||||
|
||||
left = [a for a in new_array if a <= piv_element]
|
||||
right = [a for a in new_array if a > piv_element]
|
||||
|
||||
|
||||
return qs(left) + [array[piv]] + qs(right)
|
||||
|
||||
|
||||
""" we can also divide them into two functions """
|
||||
@ -45,39 +30,20 @@ def partition(seq):
|
||||
return lo, pi, hi
|
||||
|
||||
def quick_sort_divided(seq):
|
||||
if len(seq) < 2: return seq
|
||||
'''
|
||||
>>> quick_sort_divided([4,1,6,2,7,9,3])
|
||||
[1, 2, 3, 4, 6, 7, 9]
|
||||
'''
|
||||
if len(seq) < 2:
|
||||
return seq
|
||||
lo, pi, hi = partition(seq)
|
||||
return quick_sort_divided(lo) + [pi] + quick_sort_divided(hi)
|
||||
|
||||
|
||||
''' quick_sort in place '''
|
||||
def quick_sort_in(seq):
|
||||
if len(seq) < 2 : return seq
|
||||
if len(seq) == 2 and seq[0] > seq[1]:
|
||||
seq[0], seq[1] = seq[1], seq[0] # problems when only 2 elements because of swap
|
||||
pivot = seq[0] # start at the ends because we don't know how many elements
|
||||
p1, p2 = 1, len(seq) -1 # set pointers at both ends
|
||||
while p1 < p2: # must be < or out of range
|
||||
if seq[p1] <= pivot: # must be <= because of pivot swap
|
||||
seq[p1], seq[p2] = seq[p2], seq[p1]
|
||||
p2 -= 1
|
||||
else:
|
||||
p1 += 1
|
||||
seq[0], seq[p1] = seq[p1], pivot
|
||||
return quick_sort_in(seq[p1+1:]) + [seq[p1]] + quick_sort_in(seq[:p1])
|
||||
|
||||
|
||||
|
||||
|
||||
def test_quick_sort():
|
||||
seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2]
|
||||
assert(quick_sort(seq) == sorted(seq))
|
||||
assert(quick_sort_divided(seq) == sorted(seq))
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_quick_sort()
|
||||
|
||||
|
||||
|
||||
import doctest
|
||||
doctest.testmod()
|
@ -17,7 +17,6 @@ def selection_sort(seq):
|
||||
def test_selection_sort():
|
||||
seq = [3, 5, 2, 6, 8, 1, 0, 3, 5, 6, 2]
|
||||
assert(selection_sort(seq) == sorted(seq))
|
||||
print('Tests passed!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -3,33 +3,31 @@
|
||||
__author__ = "bt3"
|
||||
|
||||
|
||||
''' A method to sort an array so that all the anagrams are together. Since we only
|
||||
want the anagrams to be grouped, we can use a dictionary for this task. This
|
||||
algorithm is O(n).
|
||||
>>> l1 = ['hat', 'ball', 'tha', 'cut', 'labl', 'hta', 'cool', 'cuy', 'uct']
|
||||
>>> sort_anagrams_together(l1)
|
||||
['cut', 'uct', 'cool', 'ball', 'labl', 'hat', 'tha', 'hta', 'cuy']
|
||||
''' A method to sort an array so that all the anagrams are together.
|
||||
Since we only want the anagrams to be grouped, we can use a
|
||||
dictionary for this task. This algorithm is O(n).
|
||||
'''
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
def sort_anagrams_together(l1):
|
||||
'''
|
||||
>>> l1 = ['hat', 'ball', 'tha', 'cut', 'labl', 'hta', 'cool', 'cuy', 'uct']
|
||||
>>> sort_anagrams_together(l1)
|
||||
['cuy', 'cut', 'uct', 'cool', 'ball', 'labl', 'hat', 'tha', 'hta']
|
||||
'''
|
||||
result = []
|
||||
|
||||
# step 1 save the anagrams together
|
||||
dict_aux = defaultdict(list) # rememebr to indicate the type
|
||||
dict_aux = defaultdict(list)
|
||||
for word in l1:
|
||||
key = ''.join(sorted(word)) # need to sort the strings and join it
|
||||
dict_aux[key].append(word) # because only sorted give a list of each char
|
||||
key = ''.join(sorted(word))
|
||||
dict_aux[key].append(word)
|
||||
|
||||
# step 2 print the anagrams. Note that if you want everything
|
||||
# sorted you would have to sort the keys and insert the angrams after that
|
||||
for key in dict_aux:
|
||||
result.extend(dict_aux[key])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
Loading…
x
Reference in New Issue
Block a user