Through this post, I’m sharing Python code implementing the **median of medians algorithm**, an algorithm that resembles quickselect, differing only in the way in which the pivot is chosen, i.e, *deterministically*, instead of at *random*.

Its best case complexity is O(n) and worst case complexity O(nlog_{2}n)

I don’t have a formal education in CS, and came across this algorithm while going through Tim Roughgarden’s Coursera MOOC on the design and analysis of algorithms. Check out my implementation in Python.

def merge_tuple(a,b): | |

""" Function to merge two arrays of tuples """ | |

c = [] | |

while len(a) != 0 and len(b) != 0: | |

if a[0][0] < b[0][0]: | |

c.append(a[0]) | |

a.remove(a[0]) | |

else: | |

c.append(b[0]) | |

b.remove(b[0]) | |

if len(a) == 0: | |

c += b | |

else: | |

c += a | |

return c | |

def mergesort_tuple(x): | |

""" Function to sort an array using merge sort algorithm """ | |

if len(x) == 0 or len(x) == 1: | |

return x | |

else: | |

middle = len(x)/2 | |

a = mergesort_tuple(x[:middle]) | |

b = mergesort_tuple(x[middle:]) | |

return merge_tuple(a,b) | |

def lol(x,k): | |

""" Function to divide a list into a list of lists of size k each. """ | |

return [x[i:i+k] for i in range(0,len(x),k)] | |

def preprocess(x): | |

""" Function to assign an index to each element of a list of integers, outputting a list of tuples""" | |

return zip(x,range(len(x))) | |

def partition(x, pivot_index = 0): | |

""" Function to partition an unsorted array around a pivot""" | |

i = 0 | |

if pivot_index !=0: x[0],x[pivot_index] = x[pivot_index],x[0] | |

for j in range(len(x)-1): | |

if x[j+1] < x[0]: | |

x[j+1],x[i+1] = x[i+1],x[j+1] | |

i += 1 | |

x[0],x[i] = x[i],x[0] | |

return x,i | |

def ChoosePivot(x): | |

""" Function to choose pivot element of an unsorted array using 'Median of Medians' method. """ | |

if len(x) <= 5: | |

return mergesort_tuple(x)[middle_index(x)] | |

else: | |

lst = lol(x,5) | |

lst = [mergesort_tuple(el) for el in lst] | |

C = [el[middle_index(el)] for el in lst] | |

return ChoosePivot(C) | |

def DSelect(x,k): | |

""" Function to """ | |

if len(x) == 1: | |

return x[0] | |

else: | |

xpart = partition(x,ChoosePivot(preprocess(x))[1]) | |

x = xpart[0] # partitioned array | |

j = xpart[1] # pivot index | |

if j == k: | |

return x[j] | |

elif j > k: | |

return DSelect(x[:j],k) | |

else: | |

k = k - j - 1 | |

return DSelect(x[(j+1):], k) | |

arr = range(100,0,-1) | |

print DSelect(arr,50) | |

%timeit DSelect(arr,50) |

I get the following output:

Note that on the same input, quickselect is faster, giving us:

`1000 loops, best of 3: 254 µs per loop`