/*
Copyright (C) 2014-2020 Igor van den Hoven
[email protected]
*/
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
quadsort 1.1.1.1
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <assert.h>
typedef int CMPFUNC (const void *a, const void *b);
void quad_swap32(void *array, void *swap, size_t nmemb, CMPFUNC *cmp)
{
size_t offset;
register int *pts, *pta;
pta = array;
pts = swap;
for (offset = 0 ; offset + 4 <= nmemb ; offset += 4)
{
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[1];
pts[1] = pta[0];
}
else
{
pts[0] = pta[0];
pts[1] = pta[1];
}
if (cmp(&pta[2], &pta[3]) > 0)
{
pts[2] = pta[3];
pts[3] = pta[2];
}
else
{
pts[2] = pta[2];
pts[3] = pta[3];
}
if (cmp(&pts[1], &pts[2]) <= 0)
{
*pta++ = pts[0];
*pta++ = pts[1];
*pta++ = pts[2];
*pta++ = pts[3];
}
else if (cmp(&pts[0], &pts[3]) > 0)
{
*pta++ = pts[2];
*pta++ = pts[3];
*pta++ = pts[0];
*pta++ = pts[1];
}
else if (cmp(&pts[0], &pts[2]) > 0)
{
*pta++ = pts[2];
*pta++ = pts[0];
if (cmp(&pts[1], &pts[3]) > 0)
{
*pta++ = pts[3];
*pta++ = pts[1];
}
else
{
*pta++ = pts[1];
*pta++ = pts[3];
}
}
else
{
*pta++ = pts[0];
*pta++ = pts[2];
if (cmp(&pts[1], &pts[3]) > 0)
{
*pta++ = pts[3];
*pta++ = pts[1];
}
else
{
*pta++ = pts[1];
*pta++ = pts[3];
}
}
}
switch (nmemb - offset)
{
case 0:
case 1:
return;
case 2:
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[0];
pta[0] = pta[1];
pta[1] = pts[0];
}
return;
case 3:
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[0];
pta[0] = pta[1];
pta[1] = pts[0];
}
if (cmp(&pta[1], &pta[2]) > 0)
{
pts[0] = pta[1];
pta[1] = pta[2];
pta[2] = pts[0];
}
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[0];
pta[0] = pta[1];
pta[1] = pts[0];
}
return;
default:
assert(nmemb < 1 && nmemb > 3);
}
}
void quad_swap64(void *array, void *swap, size_t nmemb, CMPFUNC *cmp)
{
size_t offset;
register long long *pts, *pta;
pta = array;
pts = swap;
for (offset = 0 ; offset + 4 <= nmemb ; offset += 4)
{
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[1];
pts[1] = pta[0];
}
else
{
pts[0] = pta[0];
pts[1] = pta[1];
}
if (cmp(&pta[2], &pta[3]) > 0)
{
pts[2] = pta[3];
pts[3] = pta[2];
}
else
{
pts[2] = pta[2];
pts[3] = pta[3];
}
if (cmp(&pts[1], &pts[2]) <= 0)
{
*pta++ = pts[0];
*pta++ = pts[1];
*pta++ = pts[2];
*pta++ = pts[3];
}
else if (cmp(&pts[0], &pts[3]) > 0)
{
*pta++ = pts[2];
*pta++ = pts[3];
*pta++ = pts[0];
*pta++ = pts[1];
}
else if (cmp(&pts[0], &pts[2]) > 0)
{
*pta++ = pts[2];
*pta++ = pts[0];
if (cmp(&pts[1], &pts[3]) > 0)
{
*pta++ = pts[3];
*pta++ = pts[1];
}
else
{
*pta++ = pts[1];
*pta++ = pts[3];
}
}
else
{
*pta++ = pts[0];
*pta++ = pts[2];
if (cmp(&pts[1], &pts[3]) > 0)
{
*pta++ = pts[3];
*pta++ = pts[1];
}
else
{
*pta++ = pts[1];
*pta++ = pts[3];
}
}
}
switch (nmemb - offset)
{
case 0:
case 1:
return;
case 2:
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[0];
pta[0] = pta[1];
pta[1] = pts[0];
}
return;
case 3:
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[0];
pta[0] = pta[1];
pta[1] = pts[0];
}
if (cmp(&pta[1], &pta[2]) > 0)
{
pts[0] = pta[1];
pta[1] = pta[2];
pta[2] = pts[0];
}
if (cmp(&pta[0], &pta[1]) > 0)
{
pts[0] = pta[0];
pta[0] = pta[1];
pta[1] = pts[0];
}
return;
default:
assert(nmemb < 1 && nmemb > 3);
}
}
void quad_sort32(void *array, void *swap, size_t nmemb, CMPFUNC *cmp)
{
size_t offset, block = 4;
register int *pta, *pts, *c, *c_max, *d, *d_max, *end;
end = array;
end += nmemb;
while (block < nmemb)
{
offset = 0;
while (offset + block < nmemb)
{
pta = array;
pta += offset;
d_max = pta + block;
if (cmp(d_max - 1, d_max) <= 0)
{
if (offset + block * 3 < nmemb)
{
d_max = pta + block * 3;
if (cmp(d_max - 1, d_max) <= 0)
{
d_max = pta + block * 2;
if (cmp(d_max - 1, d_max) <= 0)
{
offset += block * 4;
continue;
}
pts = swap;
c = pta;
c_max = pta + block * 2;
d = c_max;
d_max = offset + block * 4 <= nmemb ? d + block * 2 : end;
while (c < c_max)
*pts++ = *c++;
while (d < d_max)
*pts++ = *d++;
goto step3;
}
pts = swap;
c = pta;
c_max = pta + block * 2;
while (c < c_max)
*pts++ = *c++;
goto step2;
}
else if (offset + block * 2 < nmemb)
{
d_max = pta + block * 2;
if (cmp(d_max - 1, d_max) <= 0)
{
offset += block * 4;
continue;
}
pts = swap;
c = pta;
c_max = pta + block * 2;
while (c < c_max)
*pts++ = *c++;
goto step2;
}
else
{
offset += block * 4;
continue;
}
}
// step1:
pts = swap;
c = pta;
c_max = pta + block;
d = c_max;
d_max = offset + block * 2 <= nmemb ? d + block : end;
if (cmp(c_max - 1, d_max - 1) <= 0)
{
while (c < c_max)
{
while (cmp(c, d) > 0)
{
*pts++ = *d++;
}
*pts++ = *c++;
}
while (d < d_max)
*pts++ = *d++;
}
else if (cmp(c, d_max - 1) > 0)
{
while (d < d_max)
*pts++ = *d++;
while (c < c_max)
*pts++ = *c++;
}
else
{
while (d < d_max)
{
while (cmp(c, d) <= 0)
{
*pts++ = *c++;
}
*pts++ = *d++;
}
while (c < c_max)
{
*pts++ = *c++;
}
}
step2:
if (offset + block * 2 < nmemb)
{
c = pta + block * 2;
if (offset + block * 3 < nmemb)
{
c_max = c + block;
d = c_max;
d_max = offset + block * 4 <= nmemb ? d + block : end;
if (cmp(c_max - 1, d_max - 1) <= 0)
{
while (c < c_max)
{
while (cmp(c, d) > 0)
{
*pts++ = *d++;
}
*pts++ = *c++;
}
while (d < d_max)
*pts++ = *d++;
}
else if (cmp(c, d_max - 1) > 0)
{
while (d < d_max)
*pts++ = *d++;
while (c < c_max)
*pts++ = *c++;
}
else
{
while (d < d_max)
{
while (cmp(c, d) <= 0)
{
*pts++ = *c++;
}
*pts++ = *d++;
}
while (c < c_max)
*pts++ = *c++;
}
}
else
{
while (c < end)
*pts++ = *c++;
}
}
step3:
pts = swap;
c = pts;
if (offset + block * 2 < nmemb)
{
c_max = c + block * 2;
d = c_max;
d_max = offset + block * 4 <= nmemb ? d + block * 2 : pts + nmemb - offset;
if (cmp(c_max - 1, d_max - 1) <= 0)
{
while (c < c_max)
{
while (cmp(c, d) > 0)
{
*pta++ = *d++;
}
*pta++ = *c++;
}
while (d < d_max)
*pta++ = *d++;
}
else if (cmp(c, d_max - 1) > 0)
{
while (d < d_max)
*pta++ = *d++;
while (c < c_max)
*pta++ = *c++;
}
else
{
while (d < d_max)
{
while (cmp(d, c) > 0)
{
*pta++ = *c++;
}
*pta++ = *d++;
}
while (c < c_max)
*pta++ = *c++;
}
}
else
{
d_max = pts + nmemb - offset;
while (c < d_max)
*pta++ = *c++;
}
offset += block * 4;
}
block *= 4;
}
}
void quad_sort64(void *array, void *swap, size_t nmemb, CMPFUNC *cmp)
{
size_t offset, block = 4;
register long long *pta, *pts, *c, *c_max, *d, *d_max, *end;
end = array;
end += nmemb;
while (block < nmemb)
{
offset = 0;
while (offset + block < nmemb)
{
pta = array;
pta += offset;
d_max = pta + block;
if (cmp(d_max - 1, d_max) <= 0)
{
if (offset + block * 3 < nmemb)
{
d_max = pta + block * 3;
if (cmp(d_max - 1, d_max) <= 0)
{
d_max = pta + block * 2;
if (cmp(d_max - 1, d_max) <= 0)
{
offset += block * 4;
continue;
}
pts = swap;
c = pta;
c_max = pta + block * 2;
d = c_max;
d_max = offset + block * 4 <= nmemb ? d + block * 2 : end;
while (c < c_max)
*pts++ = *c++;
while (d < d_max)
*pts++ = *d++;
goto step3;
}
pts = swap;
c = pta;
c_max = pta + block * 2;
while (c < c_max)
*pts++ = *c++;
goto step2;
}
else if (offset + block * 2 < nmemb)
{
d_max = pta + block * 2;
if (cmp(d_max - 1, d_max) <= 0)
{
offset += block * 4;
continue;
}
pts = swap;
c = pta;
c_max = pta + block * 2;
while (c < c_max)
*pts++ = *c++;
goto step2;
}
else
{
offset += block * 4;
continue;
}
}
// step1:
pts = swap;
c = pta;
c_max = pta + block;
d = c_max;
d_max = offset + block * 2 <= nmemb ? d + block : end;
if (cmp(c_max - 1, d_max - 1) <= 0)
{
while (c < c_max)
{
while (cmp(c, d) > 0)
{
*pts++ = *d++;
}
*pts++ = *c++;
}
while (d < d_max)
*pts++ = *d++;
}
else if (cmp(c, d_max - 1) > 0)
{
while (d < d_max)
*pts++ = *d++;
while (c < c_max)
*pts++ = *c++;
}
else
{
while (d < d_max)
{
while (cmp(c, d) <= 0)
{
*pts++ = *c++;
}
*pts++ = *d++;
}
while (c < c_max)
{
*pts++ = *c++;
}
}
step2:
if (offset + block * 2 < nmemb)
{
c = pta + block * 2;
if (offset + block * 3 < nmemb)
{
c_max = c + block;
d = c_max;
d_max = offset + block * 4 <= nmemb ? d + block : end;
if (cmp(c_max - 1, d_max - 1) <= 0)
{
while (c < c_max)
{
while (cmp(c, d) > 0)
{
*pts++ = *d++;
}
*pts++ = *c++;
}
while (d < d_max)
*pts++ = *d++;
}
else if (cmp(c, d_max - 1) > 0)
{
while (d < d_max)
*pts++ = *d++;
while (c < c_max)
*pts++ = *c++;
}
else
{
while (d < d_max)
{
while (cmp(c, d) <= 0)
{
*pts++ = *c++;
}
*pts++ = *d++;
}
while (c < c_max)
*pts++ = *c++;
}
}
else
{
while (c < end)
*pts++ = *c++;
}
}
step3:
pts = swap;
c = pts;
if (offset + block * 2 < nmemb)
{
c_max = c + block * 2;
d = c_max;
d_max = offset + block * 4 <= nmemb ? d + block * 2 : pts + nmemb - offset;
if (cmp(c_max - 1, d_max - 1) <= 0)
{
while (c < c_max)
{
while (cmp(c, d) > 0)
{
*pta++ = *d++;
}
*pta++ = *c++;
}
while (d < d_max)
*pta++ = *d++;
}
else if (cmp(c, d_max - 1) > 0)
{
while (d < d_max)
*pta++ = *d++;
while (c < c_max)
*pta++ = *c++;
}
else
{
while (d < d_max)
{
while (cmp(d, c) > 0)
{
*pta++ = *c++;
}
*pta++ = *d++;
}
while (c < c_max)
*pta++ = *c++;
}
}
else
{
d_max = pts + nmemb - offset;
while (c < d_max)
*pta++ = *c++;
}
offset += block * 4;
}
block *= 4;
}
}
void quadsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp)
{
void *swap;
swap = malloc(nmemb * size);
if (size == sizeof(int))
{
quad_swap32(array, swap, nmemb, cmp);
quad_sort32(array, swap, nmemb, cmp);
}
else if (size == sizeof(long long))
{
quad_swap64(array, swap, nmemb, cmp);
quad_sort64(array, swap, nmemb, cmp);
}
else
{
assert(size == 4 || size == 8);
}
free(swap);
}
int cmp_int(const void * a, const void * b)
{
return *(int *) a - *(int *) b;
}
int cmp_str(const void * a, const void * b)
{
return strcmp(*(const char **) a, *(const char **) b);
}
int cmp_float(const void * a, const void * b)
{
return *(float *) a - *(float *) b;
}
// benchmarking utilities
long long utime()
{
struct timeval now_time;
gettimeofday(&now_time, NULL);
return now_time.tv_sec * 1000000LL + now_time.tv_usec;
}
void test_quad(int *z_array, int *r_array, int max, char *desc)
{
long long start, end, cnt;
memcpy(z_array, r_array, max * sizeof(int));
if (max <= 10) printf("\e[1;31m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);
start = utime();
quadsort(z_array, max, sizeof(int), cmp_int);
end = utime();
if (max <= 10) printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);
printf("\e[0m Quad Sort: sorted %d elements in %f seconds. (%s)\n", max, (end - start) / 1000000.0, desc);
for (cnt = 1 ; cnt < max ; cnt++) if (z_array[cnt - 1] > z_array[cnt]) {printf(" Quad Sort: not properly sorted at index %lld. (%d vs %d\n", cnt, z_array[cnt - 1], z_array[cnt]);break;}
}
#define swap(a, b) { int _h = a; a = b; b = _h; }
#define min(a, b) ((a) < (b) ? (a) : (b))
void insert_sort(int* left, int* right, CMPFUNC cmp) {
// put minimum to left position, so we can save
// one inner loop comparsion for insert sort
for (int* pi = left + 1; pi <= right; pi++) {
if (cmp(pi, left) < 0) {
// if (*pi < *left) {
swap(*pi, *left);
}
}
for (int* pi = left + 2; pi <= right; pi++) {
int h = *pi;
int* pj = pi - 1;
while (cmp(&h, pj) < 0) {
// while (h < *pj) {
*(pj + 1) = *pj;
pj -= 1;
}
*(pj + 1) = h;
}
}
#define sort3fast(a, b, c, cmp) \
if (cmp(&b, &a) < 0) { \
if (cmp(&c, &a) < 0) { \
if (cmp(&c, &b) < 0) { swap(a, c); } \
else { int h = a; a = b; b = c; c = h;} \
} \
else { swap(a, b); } \
} \
else { \
if (cmp(&c, &b) < 0) { \
if (cmp(&c, &a) < 0) { int h = c; c = b; b = a; a = h;} \
else { swap(b, c); } \
} \
} \
#define BSZ 256
int* partition(int* left, int* right, CMPFUNC cmp) {
int* left_piv = left + 1;
int* p = left + (right - left) / 2;
int pivl = *left;
int piv = *p;
int pivr = *right;
*p = *left_piv;
sort3fast(pivl, piv, pivr, cmp);
*right = pivr;
*left_piv = piv;
*left = pivl;
left += 1;
if (right - left > 2 * BSZ + 2) {
left += 1;
right -= 1;
int* offl[BSZ];
int* offr[BSZ];
int** ol = offl;
int** or = offr;
do {
if (ol == offl) {
int* pd = left;
do {
*ol = pd;
ol += cmp(&piv, pd) < 0;
// ol += (piv < *pd);
pd += 1;
}
while (pd < left + BSZ);
}
if (or == offr) {
int* pd = right;
do {
*or = pd;
or += cmp(&piv, pd) > 0;
// or += (piv > *pd);
pd -= 1;
}
while (pd > right - BSZ);
}
int min = min(ol - offl, or - offr);
ol -= min;
or -= min;
for (int i = 0; i < min; i++) {
swap(**(ol + i), **(or + i));
}
if (ol == offl) left += BSZ;
if (or == offr) right -= BSZ;
}
while (right - left > 2 * BSZ);
left -= 1;
right += 1;
}
while (1) {
// do left += 1; while(*left < piv);
// do right -= 1; while (*right > piv);
do left += 1; while (cmp(left, &piv) < 0);
do right -= 1; while (cmp(right, &piv) > 0);
if (left >= right) break;
swap(*left, *right);
}
*left_piv = *right;
*right = piv;
return right;
}
void sort(int* data, int len, CMPFUNC *cmp) {
int* stack[64];
int sp = 0;
int* left = data;
int* right = data + len - 1;
while (1) {
// for arrays smaller than 40 use insert sort
if (right - left < 40) {
insert_sort(left, right, cmp);
if (sp == 0) break;
sp -= 2;
left = stack[sp];
right = stack[sp + 1];
}
else {
int* mid = partition(left,right, cmp);
if (mid < left + (right - left) / 2) {
stack[sp] = mid + 1;
stack[sp + 1] = right;
right = mid - 1;
}
else {
stack[sp] = left;
stack[sp + 1] = mid - 1;
left = mid + 1;
}
sp += 2;
}
}
}
void test_myquick(int *z_array, int *r_array, int max, char *desc)
{
long long start, end, cnt;
memcpy(z_array, r_array, max * sizeof(int));
if (max <= 10) printf("\e[1;31m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);
start = utime();
sort(z_array, max, cmp_int);
end = utime();
if (max <= 10) printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);
printf("\e[0m MyQuickSort: sorted %d elements in %f seconds. (%s)\n", max, (end - start) / 1000000.0, desc);
for (cnt = 1 ; cnt < max ; cnt++) if (z_array[cnt - 1] > z_array[cnt]) {printf(" Quick Sort: not properly sorted at index %lld. (%d vs %d\n", cnt, z_array[cnt - 1], z_array[cnt]);break;}
}
int main(int argc, char **argv)
{
static int max = 1000000;
int cnt, rnd;
int *z_array, *r_array;
rnd = 1;
srand(rnd);
z_array = malloc(max * sizeof(int));
r_array = malloc(max * sizeof(int));
srand(rnd);
for (cnt = 0 ; cnt < max ; cnt++)
{
r_array[cnt] = rand();
}
test_quad(z_array, r_array, max, "random order");
test_myquick(z_array, r_array, max, "random order");
printf("\n");
free(z_array);
free(r_array);
return 0;
}