我相信有一种方法可以找到长度为n的O(n)无序数组中第k大的元素。也可能是期望O(n)之类的。我们该怎么做呢?


当前回答

虽然不是很确定O(n)复杂度,但肯定在O(n)和nLog(n)之间。也肯定更接近于O(n)而不是nLog(n)函数是用Java编写的

public int quickSelect(ArrayList<Integer>list, int nthSmallest){
    //Choose random number in range of 0 to array length
    Random random =  new Random();
    //This will give random number which is not greater than length - 1
    int pivotIndex = random.nextInt(list.size() - 1); 

    int pivot = list.get(pivotIndex);

    ArrayList<Integer> smallerNumberList = new ArrayList<Integer>();
    ArrayList<Integer> greaterNumberList = new ArrayList<Integer>();

    //Split list into two. 
    //Value smaller than pivot should go to smallerNumberList
    //Value greater than pivot should go to greaterNumberList
    //Do nothing for value which is equal to pivot
    for(int i=0; i<list.size(); i++){
        if(list.get(i)<pivot){
            smallerNumberList.add(list.get(i));
        }
        else if(list.get(i)>pivot){
            greaterNumberList.add(list.get(i));
        }
        else{
            //Do nothing
        }
    }

    //If smallerNumberList size is greater than nthSmallest value, nthSmallest number must be in this list 
    if(nthSmallest < smallerNumberList.size()){
        return quickSelect(smallerNumberList, nthSmallest);
    }
    //If nthSmallest is greater than [ list.size() - greaterNumberList.size() ], nthSmallest number must be in this list
    //The step is bit tricky. If confusing, please see the above loop once again for clarification.
    else if(nthSmallest > (list.size() - greaterNumberList.size())){
        //nthSmallest will have to be changed here. [ list.size() - greaterNumberList.size() ] elements are already in 
        //smallerNumberList
        nthSmallest = nthSmallest - (list.size() - greaterNumberList.size());
        return quickSelect(greaterNumberList,nthSmallest);
    }
    else{
        return pivot;
    }
}

其他回答

中位数中位数算法的解释可以在这里找到n中第k大的整数: http://cs.indstate.edu/~spitla/presentation.pdf

c++中的实现如下:

#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

int findMedian(vector<int> vec){
//    Find median of a vector
    int median;
    size_t size = vec.size();
    median = vec[(size/2)];
    return median;
}

int findMedianOfMedians(vector<vector<int> > values){
    vector<int> medians;

    for (int i = 0; i < values.size(); i++) {
        int m = findMedian(values[i]);
        medians.push_back(m);
    }

    return findMedian(medians);
}

void selectionByMedianOfMedians(const vector<int> values, int k){
//    Divide the list into n/5 lists of 5 elements each
    vector<vector<int> > vec2D;

    int count = 0;
    while (count != values.size()) {
        int countRow = 0;
        vector<int> row;

        while ((countRow < 5) && (count < values.size())) {
            row.push_back(values[count]);
            count++;
            countRow++;
        }
        vec2D.push_back(row);
    }

    cout<<endl<<endl<<"Printing 2D vector : "<<endl;
    for (int i = 0; i < vec2D.size(); i++) {
        for (int j = 0; j < vec2D[i].size(); j++) {
            cout<<vec2D[i][j]<<" ";
        }
        cout<<endl;
    }
    cout<<endl;

//    Calculating a new pivot for making splits
    int m = findMedianOfMedians(vec2D);
    cout<<"Median of medians is : "<<m<<endl;

//    Partition the list into unique elements larger than 'm' (call this sublist L1) and
//    those smaller them 'm' (call this sublist L2)
    vector<int> L1, L2;

    for (int i = 0; i < vec2D.size(); i++) {
        for (int j = 0; j < vec2D[i].size(); j++) {
            if (vec2D[i][j] > m) {
                L1.push_back(vec2D[i][j]);
            }else if (vec2D[i][j] < m){
                L2.push_back(vec2D[i][j]);
            }
        }
    }

//    Checking the splits as per the new pivot 'm'
    cout<<endl<<"Printing L1 : "<<endl;
    for (int i = 0; i < L1.size(); i++) {
        cout<<L1[i]<<" ";
    }

    cout<<endl<<endl<<"Printing L2 : "<<endl;
    for (int i = 0; i < L2.size(); i++) {
        cout<<L2[i]<<" ";
    }

//    Recursive calls
    if ((k - 1) == L1.size()) {
        cout<<endl<<endl<<"Answer :"<<m;
    }else if (k <= L1.size()) {
        return selectionByMedianOfMedians(L1, k);
    }else if (k > (L1.size() + 1)){
        return selectionByMedianOfMedians(L2, k-((int)L1.size())-1);
    }

}

int main()
{
    int values[] = {2, 3, 5, 4, 1, 12, 11, 13, 16, 7, 8, 6, 10, 9, 17, 15, 19, 20, 18, 23, 21, 22, 25, 24, 14};

    vector<int> vec(values, values + 25);

    cout<<"The given array is : "<<endl;
    for (int i = 0; i < vec.size(); i++) {
        cout<<vec[i]<<" ";
    }

    selectionByMedianOfMedians(vec, 8);

    return 0;
}

A Programmer's Companion to Algorithm Analysis给出了一个O(n)的版本,尽管作者指出常数因子如此之高,您可能更喜欢简单的排序-列表-然后选择方法。

我已经回答了你的问题:)

Python中性感的快速选择

def quickselect(arr, k):
    '''
     k = 1 returns first element in ascending order.
     can be easily modified to return first element in descending order
    '''

    r = random.randrange(0, len(arr))

    a1 = [i for i in arr if i < arr[r]] '''partition'''
    a2 = [i for i in arr if i > arr[r]]

    if k <= len(a1):
        return quickselect(a1, k)
    elif k > len(arr)-len(a2):
        return quickselect(a2, k - (len(arr) - len(a2)))
    else:
        return arr[r]

这是一个Javascript实现。

如果您释放了不能修改数组的约束,则可以使用两个索引来标识“当前分区”(经典快速排序样式- http://www.nczonline.net/blog/2012/11/27/computer-science-in-javascript-quicksort/)来防止使用额外的内存。

function kthMax(a, k){
    var size = a.length;

    var pivot = a[ parseInt(Math.random()*size) ]; //Another choice could have been (size / 2) 

    //Create an array with all element lower than the pivot and an array with all element higher than the pivot
    var i, lowerArray = [], upperArray = [];
    for (i = 0; i  < size; i++){
        var current = a[i];

        if (current < pivot) {
            lowerArray.push(current);
        } else if (current > pivot) {
            upperArray.push(current);
        }
    }

    //Which one should I continue with?
    if(k <= upperArray.length) {
        //Upper
        return kthMax(upperArray, k);
    } else {
        var newK = k - (size - lowerArray.length);

        if (newK > 0) {
            ///Lower
            return kthMax(lowerArray, newK);
        } else {
            //None ... it's the current pivot!
            return pivot;
        }   
    }
}  

如果你想测试它的表现,你可以使用这个变量:

    function kthMax (a, k, logging) {
         var comparisonCount = 0; //Number of comparison that the algorithm uses
         var memoryCount = 0;     //Number of integers in memory that the algorithm uses
         var _log = logging;

         if(k < 0 || k >= a.length) {
            if (_log) console.log ("k is out of range"); 
            return false;
         }      

         function _kthmax(a, k){
             var size = a.length;
             var pivot = a[parseInt(Math.random()*size)];
             if(_log) console.log("Inputs:", a,  "size="+size, "k="+k, "pivot="+pivot);

             // This should never happen. Just a nice check in this exercise
             // if you are playing with the code to avoid never ending recursion            
             if(typeof pivot === "undefined") {
                 if (_log) console.log ("Ops..."); 
                 return false;
             }

             var i, lowerArray = [], upperArray = [];
             for (i = 0; i  < size; i++){
                 var current = a[i];
                 if (current < pivot) {
                     comparisonCount += 1;
                     memoryCount++;
                     lowerArray.push(current);
                 } else if (current > pivot) {
                     comparisonCount += 2;
                     memoryCount++;
                     upperArray.push(current);
                 }
             }
             if(_log) console.log("Pivoting:",lowerArray, "*"+pivot+"*", upperArray);

             if(k <= upperArray.length) {
                 comparisonCount += 1;
                 return _kthmax(upperArray, k);
             } else if (k > size - lowerArray.length) {
                 comparisonCount += 2;
                 return _kthmax(lowerArray, k - (size - lowerArray.length));
             } else {
                 comparisonCount += 2;
                 return pivot;
             }
     /* 
      * BTW, this is the logic for kthMin if we want to implement that... ;-)
      * 

             if(k <= lowerArray.length) {
                 return kthMin(lowerArray, k);
             } else if (k > size - upperArray.length) {
                 return kthMin(upperArray, k - (size - upperArray.length));
             } else 
                 return pivot;
     */            
         }

         var result = _kthmax(a, k);
         return {result: result, iterations: comparisonCount, memory: memoryCount};
     }

剩下的代码只是创建一些游乐场:

    function getRandomArray (n){
        var ar = [];
        for (var i = 0, l = n; i < l; i++) {
            ar.push(Math.round(Math.random() * l))
        }

        return ar;
    }

    //Create a random array of 50 numbers
    var ar = getRandomArray (50);   

现在给你做几次测试。 因为Math.random()每次都会产生不同的结果:

    kthMax(ar, 2, true);
    kthMax(ar, 2);
    kthMax(ar, 2);
    kthMax(ar, 2);
    kthMax(ar, 2);
    kthMax(ar, 2);
    kthMax(ar, 34, true);
    kthMax(ar, 34);
    kthMax(ar, 34);
    kthMax(ar, 34);
    kthMax(ar, 34);
    kthMax(ar, 34);

如果你测试它几次,你甚至可以看到经验的迭代次数,平均来说,O(n) ~=常数* n, k的值不会影响算法。

虽然不是很确定O(n)复杂度,但肯定在O(n)和nLog(n)之间。也肯定更接近于O(n)而不是nLog(n)函数是用Java编写的

public int quickSelect(ArrayList<Integer>list, int nthSmallest){
    //Choose random number in range of 0 to array length
    Random random =  new Random();
    //This will give random number which is not greater than length - 1
    int pivotIndex = random.nextInt(list.size() - 1); 

    int pivot = list.get(pivotIndex);

    ArrayList<Integer> smallerNumberList = new ArrayList<Integer>();
    ArrayList<Integer> greaterNumberList = new ArrayList<Integer>();

    //Split list into two. 
    //Value smaller than pivot should go to smallerNumberList
    //Value greater than pivot should go to greaterNumberList
    //Do nothing for value which is equal to pivot
    for(int i=0; i<list.size(); i++){
        if(list.get(i)<pivot){
            smallerNumberList.add(list.get(i));
        }
        else if(list.get(i)>pivot){
            greaterNumberList.add(list.get(i));
        }
        else{
            //Do nothing
        }
    }

    //If smallerNumberList size is greater than nthSmallest value, nthSmallest number must be in this list 
    if(nthSmallest < smallerNumberList.size()){
        return quickSelect(smallerNumberList, nthSmallest);
    }
    //If nthSmallest is greater than [ list.size() - greaterNumberList.size() ], nthSmallest number must be in this list
    //The step is bit tricky. If confusing, please see the above loop once again for clarification.
    else if(nthSmallest > (list.size() - greaterNumberList.size())){
        //nthSmallest will have to be changed here. [ list.size() - greaterNumberList.size() ] elements are already in 
        //smallerNumberList
        nthSmallest = nthSmallest - (list.size() - greaterNumberList.size());
        return quickSelect(greaterNumberList,nthSmallest);
    }
    else{
        return pivot;
    }
}