diff --git a/book/book_second_edition.pdf b/book/book_second_edition.pdf index 245e104..861a6fb 100644 Binary files a/book/book_second_edition.pdf and b/book/book_second_edition.pdf differ diff --git a/src/searching_and_sorting/searching/quick_select.py b/src/searching_and_sorting/searching/quick_select.py index 1abcb92..383b64d 100644 --- a/src/searching_and_sorting/searching/quick_select.py +++ b/src/searching_and_sorting/searching/quick_select.py @@ -4,14 +4,21 @@ __author__ = "Mari Wahl" __email__ = "marina.w4hl@gmail.com" +import random + + +''' The simplest way...''' def quickSelect(seq, k): # this part is the same as quick sort len_seq = len(seq) if len_seq < 2: return seq + # we could use a random choice here doing + #pivot = random.choice(seq) ipivot = len_seq // 2 pivot = seq[ipivot] + # O(n) smallerList = [x for i,x in enumerate(seq) if x <= pivot and i != ipivot] largerList = [x for i,x in enumerate(seq) if x > pivot and i != ipivot] @@ -22,7 +29,48 @@ def quickSelect(seq, k): elif k < m: return quickSelect(smallerList, k) else: - return quickSelect(largerList, k-m) + return quickSelect(largerList, k-m-1) + + + +''' If you don't want to use pythons feature at all and + also select pivot randomly''' + +def swap(seq, x, y): + tmp = seq[x] + seq[x] = seq[y] + seq[y] = tmp + + +def quickSelectHard(seq, k, left=None, right=None): + left = left or 0 + right = right or len(seq) - 1 + #ipivot = random.randint(left, right) + ipivot = len(seq)//2 + pivot = seq[ipivot] + + # Move pivot out of the sorting range + swap(seq, ipivot, right) + swapIndex, i = left, left + while i < right: + if seq[i] < pivot: + swap(seq, i, swapIndex) + swapIndex += 1 + i += 1 + + # Move pivot to final position + swap(seq, right, swapIndex) + + # Check if pivot matches, else recurse on the correct half + rank = len(seq) - swapIndex + + + if k == rank: + return seq[swapIndex] + elif k < rank: + return quickSelectHard(seq, k, swapIndex+1, right) + else: + return quickSelectHard(seq, k, left, swapIndex-1) @@ -30,6 +78,7 @@ def quickSelect(seq, k): if __name__ == '__main__': # Checking the Answer seq = [10, 60, 100, 50, 60, 75, 31, 50, 30, 20, 120, 170, 200] + #seq = [3, 7, 2, 1, 4, 6, 5, 10, 9, 11] # we want the middle element k = len(seq) // 2 @@ -37,5 +86,6 @@ if __name__ == '__main__': # Note that this only work for odd arrays, since median in # even arrays is the mean of the two middle elements print(quickSelect(seq, k)) + print(quickSelectHard(seq, k)) import numpy print numpy.median(seq) \ No newline at end of file diff --git a/src/searching_and_sorting/sorting/find_k_largest_seq_quickselect.py b/src/searching_and_sorting/sorting/find_k_largest_seq_quickselect.py deleted file mode 100644 index 94a7708..0000000 --- a/src/searching_and_sorting/sorting/find_k_largest_seq_quickselect.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/python - -__author__ = "Mari Wahl" -__email__ = "marina.w4hl@gmail.com" - -import random - -def swap(A, x, y): - tmp = A[x] - A[x] = A[y] - A[y] = tmp - - -def qselect(A, k, left=None, right=None): - left = left or 0 - right = right or len(A) - 1 - pivot = random.randint(left, right) - pivotVal = A[pivot] - - # Move pivot out of the sorting range - swap(A, pivot, right) - swapIndex, i = left, left - while i <= right - 1: - if A[i] < pivotVal: - swap(A, i, swapIndex) - swapIndex += 1 - i += 1 - - # Move pivot to final position - swap(A, right, swapIndex) - - # Check if pivot matches, else recurse on the correct half - rank = len(A) - swapIndex - if k == rank: - return A[swapIndex] - elif k < rank: - return qselect(A, k, left=swapIndex+1, right=right) - else: - return qselect(A, k, left=left, right=swapIndex-1) - - - -def find_k_largest_seq_quickselect(seq, k): - ''' perform quick select to get kth element, and find all elements larger ''' - kth_largest = qselect(seq, k) - result = [] - for item in seq: - if item >= kth_largest: - result.append(item) - return result - - - -def test_find_k_largest_seq_quickselect(): - seq = [3, 10, 4, 5, 1, 8, 9, 11, 5] - k = 2 - assert(find_k_largest_seq_quickselect(seq,k) == [10, 11]) - print("Tests passed!") - - -if __name__ == '__main__': - test_find_k_largest_seq_quickselect() -