[Data Structure] Heap sorting and top-K problem

Heap implementation source code

#define _CRT_SECURE_NO_WARNINGS

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdbool.h>
#include <assert.h>
typedef struct Heap
{<!-- -->
int* a;
int size;
int capacity;
}Heap;
void HeapInit(Heap* st)
{<!-- -->
st->a = NULL;
st->capacity = 0;
st->size = 0;
}
void swap(int* str1, int* str2)
{<!-- -->
int tmp = *str1;
*str1 = *str2;
*str2 = tmp;

}
void Adjustup(int* a, int child)
{<!-- -->
int parent = (child - 1) / 2;
while (child > 0)
{<!-- -->
if (a[child] < a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
child = parent;
parent = (child - 1) / 2;



}
else
{<!-- -->
break;
}










}








}

void HeapPush(Heap* st, int x)
{<!-- -->
if (st->capacity == st->size)
{<!-- -->
int newcapcity = st->capacity == 0 ? 4 : st->capacity * 2;
int* tmp = (int*)realloc(st->a, newcapcity * sizeof(int));
if (tmp == NULL)
{<!-- -->
perror("realloc fail");




}
st->a = tmp;
st->capacity = newcapcity;
}
st->a[st->size] = x;
st->size + + ;
Adjustup(st->a, st->size - 1);
}
void Heapprint(Heap* st)
{<!-- -->

for (int i = 0; i < st->size; i + + )
{<!-- -->
printf("%d ", st->a[i]);
}







}
void AdjustDown(int* a, int n, int parent)
{<!-- -->
int child = parent * 2 + 1;
while (child < n)
{<!-- -->
if (child + 1 < n & amp; & amp; a[child + 1] < a[child])
{<!-- -->
child + + ;
}
if (a[child] < a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
parent = child;
child = parent * 2 + 1;



}
else
{<!-- -->
break;
}













}
}
bool HeapEmpty(Heap* st)
{<!-- -->
assert(st);
if (st->size == 0)
{<!-- -->
return true;



}
else
{<!-- -->
return false;

}

}
int HeapSize(Heap* st)
{<!-- -->
assert(st);
return st->size;
}
void HeapPop(Heap* hp)
{<!-- -->
assert(hp);
assert(!HeapEmpty(hp));
swap( & amp;hp->a[0], & amp;hp->a[hp->size - 1]);
hp->size--;
AdjustDown(hp->a, hp->size, 0);
}
void HeapDestroy(Heap* st)
{<!-- -->
assert(st);
free(st->a);
st->a = NULL;
st->size = 0;
st->capacity = 0;
}
int HeapTop(Heap* st)
{<!-- -->
assert(st);
assert(!HeapEmpty(st));
return st->a[0];

}

Use heap structure to implement heap sort

void HeapSort(int* a, int n)
{<!-- -->
Heap HP;

HeapInit( & amp;hp);//Initialize the heap
for (int i = 0; i < 10; i + + )
{<!-- -->
HeapPush( & amp;hp, a[i]);//Insert elements into the heap, and adjust upwards each time it is inserted. Ensure that the heap is a small heap
}

while (!HeapEmpty( & amp;hp))//If the heap is not empty
{<!-- -->
int top=HeapTop( & amp;hp); // Take the top data of the heap as the minimum each time
printf("%d ", top);
HeapPop( & amp;hp);//When deleting the top element of the heap, readjust the heap so that it is still a small heap




}





}

Main function

int main()
{<!-- -->

\t
int arr[10] = {<!-- --> 52,85,74,46,23,14,65,25,32,78};
HeapSort(arr, 10);


}

Compile and run

If you use the above method to write a heap, you must also write a heap structure, which is particularly troublesome.

Sort method 2

1. Arrange in ascending order

We can directly use upward adjustment and downward adjustment to build a heap. In fact, the heap is just an array, and upward adjustment and downward adjustment are just subscript operations.
Should we create a large heap or a small heap in ascending order? If we don’t think about it carefully, we may create a small heap and then take the top elements in sequence. However, if we take the top elements of the heap in turn, the remaining elements cannot form a The relationship between the heap and the heap is completely messed up. As shown in the figure below, take out the small elements at the top of the heap and rearrange them.

Therefore, if we want to sort in ascending order, we will create a large pile. We can either use upward adjustment to create a large pile or downward adjustment to create a large pile.

Adjust downward to create a large pile

void AdjustDown(int* a, int n, int parent)
{<!-- -->
int child = parent * 2 + 1;
while (child < n)
{<!-- -->
if (child + 1 < n & amp; & amp; a[child + 1] >a[child])//Here you need to select the older child to exchange with the father
{<!-- -->
child + + ;
}
if (a[child] >a[parent])//The child who is older than his father walks up.
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
parent = child;
child = parent * 2 + 1;



}
else
{<!-- -->
break;
}













}
}
void HeapSort(int* a, int n)
{<!-- -->
Heap HP;


for (int i = (n - 1 - 1) / 2; i >=0; --i)//Start from the last non-cotyledon node until the element at the top of the heap
{<!-- -->
AdjustDown(a, n, i);


}
\t

\t
\t
\t
\t
\t
\t






}
















int main()
{<!-- -->
Heap HP;
//createdata();
//printtopK(10);
int arr[10] = {<!-- --> 52,85,74,46,23,14,65,25,32,78};
HeapSort(arr, 10);
for (int i = 0; i < 10; i + + )
{<!-- -->
printf("%d ", arr[i]);
}















\t
}

Adjust upward to create a large pile

void Adjustup(int* a, int child)
{<!-- -->
int parent = (child - 1) / 2;
while (child > 0)
{<!-- -->
if (a[child] > a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
child = parent;
parent = (child - 1) / 2;



}
else
{<!-- -->
break;
}










}








}
void HeapSort(int* a, int n)
{<!-- -->
Heap HP;

for (int i = 1; i < n; i + + )
{<!-- -->
Adjustup(a, i);
   }
//for (int i = (n - 1 - 1) / 2; i >=0; --i)
//{<!-- -->
// AdjustDown(a, n, i);


//}
\t


\t
\t
\t
\t
\t






}


int main()
{<!-- -->
Heap HP;
//createdata();
//printtopK(10);
int arr[10] = {<!-- --> 52,85,74,46,23,14,65,25,32,78};
HeapSort(arr, 10);
for (int i = 0; i < 10; i + + )
{<!-- -->
printf("%d ", arr[i]);
}















\t
}

Create a huge pile

Then sort the created large pile
Modified heap sort

void HeapSort(int* a, int n)
{<!-- -->
Heap HP;


for (int i = (n - 1 - 1) / 2; i >=0; --i)
{<!-- -->
AdjustDown(a, n, i);


}
\t

int end = n - 1;
while (end > 0)
{<!-- -->
swap( & amp;a[0], & amp;a[end]);
AdjustDown(a, end, 0);
--end;
}
}

Sort descending

A large heap is created by sorting in ascending order, and a small heap is created by sorting in descending order. To create a small heap, you can use either the upward adjustment method or the downward adjustment method.

Adjust downward to create a small heap

void AdjustDown(int* a, int n, int parent)
{<!-- -->
int child = parent * 2 + 1;
while (child < n)
{<!-- -->
if (child + 1 < n & amp; & amp; a[child + 1] <a[child])
{<!-- -->
child + + ;
}
if (a[child] <a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
parent = child;
child = parent * 2 + 1;



}
else
{<!-- -->
break;
}













}
}
void HeapSort(int* a, int n)
{<!-- -->
Heap HP;


for (int i = (n - 1 - 1) / 2; i >=0; --i)
{<!-- -->
AdjustDown(a, n, i);


}
\t


\t
\t
\t
\t
\t






}


int main()
{<!-- -->
Heap HP;
//createdata();
//printtopK(10);
int arr[10] = {<!-- --> 52,85,74,46,23,14,65,25,32,78};
HeapSort(arr, 10);
for (int i = 0; i < 10; i + + )
{<!-- -->
printf("%d ", arr[i]);
}















\t
}

Adjust upward to create a small heap

void Adjustup(int* a, int child)
{<!-- -->
int parent = (child - 1) / 2;
while (child > 0)
{<!-- -->
if (a[child] < a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
child = parent;
parent = (child - 1) / 2;



}
else
{<!-- -->
break;
}










}








}
void HeapSort(int* a, int n)
{<!-- -->
Heap HP;

for (int i = 1; i < n; i + + )
{<!-- -->
Adjustup(a, i);
   }
//for (int i = (n - 1 - 1) / 2; i >=0; --i)
//{<!-- -->
// AdjustDown(a, n, i);


//}
\t


\t
\t
\t
\t
\t






}


int main()
{<!-- -->
Heap HP;
//createdata();
//printtopK(10);
int arr[10] = {<!-- --> 52,85,74,46,23,14,65,25,32,78};
HeapSort(arr, 10);
for (int i = 0; i < 10; i + + )
{<!-- -->
printf("%d ", arr[i]);
}















\t
}

Sort the created small heaps

void Adjustup(int* a, int child)
{<!-- -->
int parent = (child - 1) / 2;
while (child > 0)
{<!-- -->
if (a[child] < a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
child = parent;
parent = (child - 1) / 2;



}
else
{<!-- -->
break;
}










}








}
void HeapSort(int* a, int n)
{<!-- -->
Heap HP;

for (int i = 1; i < n; i + + )
{<!-- -->
Adjustup(a, i);
   }
//for (int i = (n - 1 - 1) / 2; i >=0; --i)
//{<!-- -->
// AdjustDown(a, n, i);


//}
int end = n - 1;
while (end > 0)
{<!-- -->
swap( & amp;a[0], & amp;a[end]);
AdjustDown(a, end, 0);
--end;
}


\t
\t
\t
\t
\t






}


int main()
{<!-- -->
Heap HP;
//createdata();
//printtopK(10);
int arr[10] = {<!-- --> 52,85,74,46,23,14,65,25,32,78};
HeapSort(arr, 10);
for (int i = 0; i < 10; i + + )
{<!-- -->
printf("%d ", arr[i]);
}















\t
}

Summary: You can use upward adjustment and downward adjustment to create a large or small pile. If you create a large pile, the corresponding adjustment must be modified to make the child larger than the father for exchange. In this case, you can put the large number in On the top of the heap. If you create a small heap and the child is smaller than the father, then exchange. It is worth noting that when you create a large heap and use downward adjustment, you need to place the subscript of the child in the larger left and right child. On the child. When creating a small pile, you need to place the subscript of the child on the smaller left and right child.

top-K problem

TOP-K problem: Find the top K largest elements or smallest elements in data combination. Generally, the amount of data is relatively large.
For example: top 10 professionals, Fortune 500, rich list, top 100 active players in the game, etc.
For the Top-K problem, the simplest and most direct way that can be thought of is sorting. However, if the amount of data is very large, sorting is not advisable (the data may not all be loaded into the memory at once). The best way is to use a heap to solve the problem. The basic idea is as follows:

  1. Build a heap using the first K elements in the data set
    For the first k largest elements, build a small heap
    For the first k smallest elements, build a big pile
  2. Use the remaining N-K elements to compare with the top element of the heap in turn. If not satisfied, replace the top element of the heap. After comparing the remaining N-K elements with the top element of the heap in turn, the remaining K elements in the heap are the top K smallest ones required. Or the largest element.

Find the 10 largest ones

#define _CRT_SECURE_NO_WARNINGS

#include<stdio.h>
#include <stdlib.h>//srand header file
#include <time.h>//time header file
void swap(int* str1, int* str2)//Exchange the values of two data
{<!-- -->
int tmp = *str1;
*str1 = *str2;
*str2 = tmp;

}

void AdjustDown(int* a, int n, int parent)
{<!-- -->
int child = parent * 2 + 1;
while (child < n)
{<!-- -->
if (child + 1 < n & amp; & amp; a[child + 1] < a[child])
{<!-- -->
child + + ;
}
if (a[child] < a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
parent = child;
child = parent * 2 + 1;



}
else
{<!-- -->
break;
}
}

}
void createddata()
{<!-- -->
int n = 10000;
srand((unsigned int)time(NULL));//Generate random number seed
FILE* fp = fopen("xa.txt", "w");//Open the file for writing
if (fp == NULL)
{<!-- -->
perror("fopen fail");
return;


}
for (int i = 0; i < 10000; i + + )//Generate 10000 random numbers,
{<!-- -->
int x = rand() 000;//And each random number is less than 10000.
fprintf(fp,"%d\\
",x);//Write it to the file and wrap it in a new line
    }
fclose(fp);
}
void printtopk(int k)
{<!-- -->
FILE* fp = fopen("xa.txt", "r");
if (fp == NULL)
{<!-- -->
perror("fopen fail");
return;


}
int* kminheap = (int*)malloc(sizeof(int) * k);//Dynamicly apply for 10 space sizes
if (kminheap == NULL)
{<!-- -->
perror("malloc fail");



}
for (int i = 0; i < k; i + + )
{<!-- -->
fscanf(fp,"%d", & amp;kminheap[i]);//Read the first 10 data of the file first
\t\t
}
for (int i = (k - 1 - 1) / 2; i >= 0; i--)//Adjust downward to a small heap, because selecting the largest 10 requires arranging a small heap
{<!-- -->
AdjustDown(kminheap, k, i);
\t\t
    }
int val = 0;
while (!feof(fp))//The remaining data in the file needs to be compared with the top element of the heap in the small heap. If it is greater than the top element of the heap, update the top element of the heap.
{<!-- -->
fscanf(fp,"%d", & amp;val);//Read the data in the file into val
if (val > kminheap[0])
{<!-- -->
kminheap[0] = val;
AdjustDown(kminheap, k,0);//If it is larger than the smallest one on the top of the heap, put it into the heap, and then adjust the top element of the heap downward. After adjustment, you need to ensure that it is a small heap.



}






}
for (int i = 0; i < k; i + + )
{<!-- -->
printf("%d ", kminheap[i]);




}

}

int main()
{<!-- -->
createddata();
printtopk(10);



}

Find the 10 smallest ones

Need to create a lot of

#define _CRT_SECURE_NO_WARNINGS

#include<stdio.h>
#include <stdlib.h>
#include <time.h>
void swap(int* str1, int* str2)
{<!-- -->
int tmp = *str1;
*str1 = *str2;
*str2 = tmp;

}

void AdjustDown(int* a, int n, int parent)
{<!-- -->
int child = parent * 2 + 1;
while (child < n)
{<!-- -->
if (child + 1 < n & amp; & amp; a[child + 1] > a[child])
{<!-- -->
child + + ;
}
if (a[child] > a[parent])
{<!-- -->
swap( & amp;a[child], & amp;a[parent]);
parent = child;
child = parent * 2 + 1;



}
else
{<!-- -->
break;
}
}

}
void createddata()
{<!-- -->
int n = 10000;
srand((unsigned int)time(NULL));
FILE* fp = fopen("xa.txt", "w");
if (fp == NULL)
{<!-- -->
perror("fopen fail");
return;


}
for (int i = 0; i < 10000; i + + )
{<!-- -->
int x = rand() 000;
fprintf(fp,"%d\\
",x);
    }
fclose(fp);
}
void printtopk(int k)
{<!-- -->
FILE* fp = fopen("xa.txt", "r");
if (fp == NULL)
{<!-- -->
perror("fopen fail");
return;


}
int* kminheap = (int*)malloc(sizeof(int) * k);
if (kminheap == NULL)
{<!-- -->
perror("malloc fail");



}
for (int i = 0; i < k; i + + )
{<!-- -->
fscanf(fp,"%d", & amp;kminheap[i]);
\t\t
}
for (int i = (k - 1 - 1) / 2; i >= 0; i--)
{<!-- -->
AdjustDown(kminheap, k, i);
\t\t
    }
int val = 0;
while (!feof(fp))
{<!-- -->
fscanf(fp,"%d", & amp;val);
if (val <kminheap[0])
{<!-- -->
kminheap[0] = val;
AdjustDown(kminheap, k,0);



}






}
for (int i = 0; i < k; i + + )
{<!-- -->
printf("%d ", kminheap[i]);




}

}

int main()
{<!-- -->
createddata();
printtopk(10);



}