SDL_qsort.c (16060B)
1/* qsort.c 2 * (c) 1998 Gareth McCaughan 3 * 4 * This is a drop-in replacement for the C library's |qsort()| routine. 5 * 6 * Features: 7 * - Median-of-three pivoting (and more) 8 * - Truncation and final polishing by a single insertion sort 9 * - Early truncation when no swaps needed in pivoting step 10 * - Explicit recursion, guaranteed not to overflow 11 * - A few little wrinkles stolen from the GNU |qsort()|. 12 * - separate code for non-aligned / aligned / word-size objects 13 * 14 * This code may be reproduced freely provided 15 * - this file is retained unaltered apart from minor 16 * changes for portability and efficiency 17 * - no changes are made to this comment 18 * - any changes that *are* made are clearly flagged 19 * - the _ID string below is altered by inserting, after 20 * the date, the string " altered" followed at your option 21 * by other material. (Exceptions: you may change the name 22 * of the exported routine without changing the ID string. 23 * You may change the values of the macros TRUNC_* and 24 * PIVOT_THRESHOLD without changing the ID string, provided 25 * they remain constants with TRUNC_nonaligned, TRUNC_aligned 26 * and TRUNC_words/WORD_BYTES between 8 and 24, and 27 * PIVOT_THRESHOLD between 32 and 200.) 28 * 29 * You may use it in anything you like; you may make money 30 * out of it; you may distribute it in object form or as 31 * part of an executable without including source code; 32 * you don't have to credit me. (But it would be nice if 33 * you did.) 34 * 35 * If you find problems with this code, or find ways of 36 * making it significantly faster, please let me know! 37 * My e-mail address, valid as of early 1998 and certainly 38 * OK for at least the next 18 months, is 39 * gjm11@dpmms.cam.ac.uk 40 * Thanks! 41 * 42 * Gareth McCaughan Peterhouse Cambridge 1998 43 */ 44#include "../SDL_internal.h" 45 46/* 47#include <assert.h> 48#include <stdlib.h> 49#include <string.h> 50*/ 51#include "SDL_stdinc.h" 52#include "SDL_assert.h" 53 54#if defined(HAVE_QSORT) 55void 56SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *)) 57{ 58 qsort(base, nmemb, size, compare); 59} 60#else 61 62#ifdef assert 63#undef assert 64#endif 65#define assert(X) SDL_assert(X) 66#ifdef malloc 67#undef malloc 68#endif 69#define malloc SDL_malloc 70#ifdef free 71#undef free 72#endif 73#define free SDL_free 74#ifdef memcpy 75#undef memcpy 76#endif 77#define memcpy SDL_memcpy 78#ifdef memmove 79#undef memmove 80#endif 81#define memmove SDL_memmove 82#ifdef qsort 83#undef qsort 84#endif 85#define qsort SDL_qsort 86 87static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>"; 88 89/* How many bytes are there per word? (Must be a power of 2, 90 * and must in fact equal sizeof(int).) 91 */ 92#define WORD_BYTES sizeof(int) 93 94/* How big does our stack need to be? Answer: one entry per 95 * bit in a |size_t|. 96 */ 97#define STACK_SIZE (8*sizeof(size_t)) 98 99/* Different situations have slightly different requirements, 100 * and we make life epsilon easier by using different truncation 101 * points for the three different cases. 102 * So far, I have tuned TRUNC_words and guessed that the same 103 * value might work well for the other two cases. Of course 104 * what works well on my machine might work badly on yours. 105 */ 106#define TRUNC_nonaligned 12 107#define TRUNC_aligned 12 108#define TRUNC_words 12*WORD_BYTES /* nb different meaning */ 109 110/* We use a simple pivoting algorithm for shortish sub-arrays 111 * and a more complicated one for larger ones. The threshold 112 * is PIVOT_THRESHOLD. 113 */ 114#define PIVOT_THRESHOLD 40 115 116typedef struct 117{ 118 char *first; 119 char *last; 120} stack_entry; 121#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;} 122#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;} 123#define doLeft {first=ffirst;llast=last;continue;} 124#define doRight {ffirst=first;last=llast;continue;} 125#define pop {if (--stacktop<0) break;\ 126 first=ffirst=stack[stacktop].first;\ 127 last=llast=stack[stacktop].last;\ 128 continue;} 129 130/* Some comments on the implementation. 131 * 1. When we finish partitioning the array into "low" 132 * and "high", we forget entirely about short subarrays, 133 * because they'll be done later by insertion sort. 134 * Doing lots of little insertion sorts might be a win 135 * on large datasets for locality-of-reference reasons, 136 * but it makes the code much nastier and increases 137 * bookkeeping overhead. 138 * 2. We always save the shorter and get to work on the 139 * longer. This guarantees that every time we push 140 * an item onto the stack its size is <= 1/2 of that 141 * of its parent; so the stack can't need more than 142 * log_2(max-array-size) entries. 143 * 3. We choose a pivot by looking at the first, last 144 * and middle elements. We arrange them into order 145 * because it's easy to do that in conjunction with 146 * choosing the pivot, and it makes things a little 147 * easier in the partitioning step. Anyway, the pivot 148 * is the middle of these three. It's still possible 149 * to construct datasets where the algorithm takes 150 * time of order n^2, but it simply never happens in 151 * practice. 152 * 3' Newsflash: On further investigation I find that 153 * it's easy to construct datasets where median-of-3 154 * simply isn't good enough. So on large-ish subarrays 155 * we do a more sophisticated pivoting: we take three 156 * sets of 3 elements, find their medians, and then 157 * take the median of those. 158 * 4. We copy the pivot element to a separate place 159 * because that way we can always do our comparisons 160 * directly against a pointer to that separate place, 161 * and don't have to wonder "did we move the pivot 162 * element?". This makes the inner loop better. 163 * 5. It's possible to make the pivoting even more 164 * reliable by looking at more candidates when n 165 * is larger. (Taking this to its logical conclusion 166 * results in a variant of quicksort that doesn't 167 * have that n^2 worst case.) However, the overhead 168 * from the extra bookkeeping means that it's just 169 * not worth while. 170 * 6. This is pretty clean and portable code. Here are 171 * all the potential portability pitfalls and problems 172 * I know of: 173 * - In one place (the insertion sort) I construct 174 * a pointer that points just past the end of the 175 * supplied array, and assume that (a) it won't 176 * compare equal to any pointer within the array, 177 * and (b) it will compare equal to a pointer 178 * obtained by stepping off the end of the array. 179 * These might fail on some segmented architectures. 180 * - I assume that there are 8 bits in a |char| when 181 * computing the size of stack needed. This would 182 * fail on machines with 9-bit or 16-bit bytes. 183 * - I assume that if |((int)base&(sizeof(int)-1))==0| 184 * and |(size&(sizeof(int)-1))==0| then it's safe to 185 * get at array elements via |int*|s, and that if 186 * actually |size==sizeof(int)| as well then it's 187 * safe to treat the elements as |int|s. This might 188 * fail on systems that convert pointers to integers 189 * in non-standard ways. 190 * - I assume that |8*sizeof(size_t)<=INT_MAX|. This 191 * would be false on a machine with 8-bit |char|s, 192 * 16-bit |int|s and 4096-bit |size_t|s. :-) 193 */ 194 195/* The recursion logic is the same in each case: */ 196#define Recurse(Trunc) \ 197 { size_t l=last-ffirst,r=llast-first; \ 198 if (l<Trunc) { \ 199 if (r>=Trunc) doRight \ 200 else pop \ 201 } \ 202 else if (l<=r) { pushLeft; doRight } \ 203 else if (r>=Trunc) { pushRight; doLeft }\ 204 else doLeft \ 205 } 206 207/* and so is the pivoting logic: */ 208#define Pivot(swapper,sz) \ 209 if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\ 210 else { \ 211 if (compare(first,mid)<0) { \ 212 if (compare(mid,last)>0) { \ 213 swapper(mid,last); \ 214 if (compare(first,mid)>0) swapper(first,mid);\ 215 } \ 216 } \ 217 else { \ 218 if (compare(mid,last)>0) swapper(first,last)\ 219 else { \ 220 swapper(first,mid); \ 221 if (compare(mid,last)>0) swapper(mid,last);\ 222 } \ 223 } \ 224 first+=sz; last-=sz; \ 225 } 226 227#ifdef DEBUG_QSORT 228#include <stdio.h> 229#endif 230 231/* and so is the partitioning logic: */ 232#define Partition(swapper,sz) { \ 233 int swapped=0; \ 234 do { \ 235 while (compare(first,pivot)<0) first+=sz; \ 236 while (compare(pivot,last)<0) last-=sz; \ 237 if (first<last) { \ 238 swapper(first,last); swapped=1; \ 239 first+=sz; last-=sz; } \ 240 else if (first==last) { first+=sz; last-=sz; break; }\ 241 } while (first<=last); \ 242 if (!swapped) pop \ 243} 244 245/* and so is the pre-insertion-sort operation of putting 246 * the smallest element into place as a sentinel. 247 * Doing this makes the inner loop nicer. I got this 248 * idea from the GNU implementation of qsort(). 249 */ 250#define PreInsertion(swapper,limit,sz) \ 251 first=base; \ 252 last=first + (nmemb>limit ? limit : nmemb-1)*sz;\ 253 while (last!=base) { \ 254 if (compare(first,last)>0) first=last; \ 255 last-=sz; } \ 256 if (first!=base) swapper(first,(char*)base); 257 258/* and so is the insertion sort, in the first two cases: */ 259#define Insertion(swapper) \ 260 last=((char*)base)+nmemb*size; \ 261 for (first=((char*)base)+size;first!=last;first+=size) { \ 262 char *test; \ 263 /* Find the right place for |first|. \ 264 * My apologies for var reuse. */ \ 265 for (test=first-size;compare(test,first)>0;test-=size) ; \ 266 test+=size; \ 267 if (test!=first) { \ 268 /* Shift everything in [test,first) \ 269 * up by one, and place |first| \ 270 * where |test| is. */ \ 271 memcpy(pivot,first,size); \ 272 memmove(test+size,test,first-test); \ 273 memcpy(test,pivot,size); \ 274 } \ 275 } 276 277#define SWAP_nonaligned(a,b) { \ 278 register char *aa=(a),*bb=(b); \ 279 register size_t sz=size; \ 280 do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); } 281 282#define SWAP_aligned(a,b) { \ 283 register int *aa=(int*)(a),*bb=(int*)(b); \ 284 register size_t sz=size; \ 285 do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); } 286 287#define SWAP_words(a,b) { \ 288 register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; } 289 290/* ---------------------------------------------------------------------- */ 291 292static char * 293pivot_big(char *first, char *mid, char *last, size_t size, 294 int compare(const void *, const void *)) 295{ 296 size_t d = (((last - first) / size) >> 3) * size; 297 char *m1, *m2, *m3; 298 { 299 char *a = first, *b = first + d, *c = first + 2 * d; 300#ifdef DEBUG_QSORT 301 fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c); 302#endif 303 m1 = compare(a, b) < 0 ? 304 (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a)) 305 : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b)); 306 } 307 { 308 char *a = mid - d, *b = mid, *c = mid + d; 309#ifdef DEBUG_QSORT 310 fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c); 311#endif 312 m2 = compare(a, b) < 0 ? 313 (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a)) 314 : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b)); 315 } 316 { 317 char *a = last - 2 * d, *b = last - d, *c = last; 318#ifdef DEBUG_QSORT 319 fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c); 320#endif 321 m3 = compare(a, b) < 0 ? 322 (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a)) 323 : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b)); 324 } 325#ifdef DEBUG_QSORT 326 fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3); 327#endif 328 return compare(m1, m2) < 0 ? 329 (compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1)) 330 : (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2)); 331} 332 333/* ---------------------------------------------------------------------- */ 334 335static void 336qsort_nonaligned(void *base, size_t nmemb, size_t size, 337 int (*compare) (const void *, const void *)) 338{ 339 340 stack_entry stack[STACK_SIZE]; 341 int stacktop = 0; 342 char *first, *last; 343 char *pivot = malloc(size); 344 size_t trunc = TRUNC_nonaligned * size; 345 assert(pivot != 0); 346 347 first = (char *) base; 348 last = first + (nmemb - 1) * size; 349 350 if ((size_t) (last - first) > trunc) { 351 char *ffirst = first, *llast = last; 352 while (1) { 353 /* Select pivot */ 354 { 355 char *mid = first + size * ((last - first) / size >> 1); 356 Pivot(SWAP_nonaligned, size); 357 memcpy(pivot, mid, size); 358 } 359 /* Partition. */ 360 Partition(SWAP_nonaligned, size); 361 /* Prepare to recurse/iterate. */ 362 Recurse(trunc)} 363 } 364 PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size); 365 Insertion(SWAP_nonaligned); 366 free(pivot); 367} 368 369static void 370qsort_aligned(void *base, size_t nmemb, size_t size, 371 int (*compare) (const void *, const void *)) 372{ 373 374 stack_entry stack[STACK_SIZE]; 375 int stacktop = 0; 376 char *first, *last; 377 char *pivot = malloc(size); 378 size_t trunc = TRUNC_aligned * size; 379 assert(pivot != 0); 380 381 first = (char *) base; 382 last = first + (nmemb - 1) * size; 383 384 if ((size_t) (last - first) > trunc) { 385 char *ffirst = first, *llast = last; 386 while (1) { 387 /* Select pivot */ 388 { 389 char *mid = first + size * ((last - first) / size >> 1); 390 Pivot(SWAP_aligned, size); 391 memcpy(pivot, mid, size); 392 } 393 /* Partition. */ 394 Partition(SWAP_aligned, size); 395 /* Prepare to recurse/iterate. */ 396 Recurse(trunc)} 397 } 398 PreInsertion(SWAP_aligned, TRUNC_aligned, size); 399 Insertion(SWAP_aligned); 400 free(pivot); 401} 402 403static void 404qsort_words(void *base, size_t nmemb, 405 int (*compare) (const void *, const void *)) 406{ 407 408 stack_entry stack[STACK_SIZE]; 409 int stacktop = 0; 410 char *first, *last; 411 char *pivot = malloc(WORD_BYTES); 412 assert(pivot != 0); 413 414 first = (char *) base; 415 last = first + (nmemb - 1) * WORD_BYTES; 416 417 if (last - first > TRUNC_words) { 418 char *ffirst = first, *llast = last; 419 while (1) { 420#ifdef DEBUG_QSORT 421 fprintf(stderr, "Doing %d:%d: ", 422 (first - (char *) base) / WORD_BYTES, 423 (last - (char *) base) / WORD_BYTES); 424#endif 425 /* Select pivot */ 426 { 427 char *mid = 428 first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES)); 429 Pivot(SWAP_words, WORD_BYTES); 430 *(int *) pivot = *(int *) mid; 431 } 432#ifdef DEBUG_QSORT 433 fprintf(stderr, "pivot=%d\n", *(int *) pivot); 434#endif 435 /* Partition. */ 436 Partition(SWAP_words, WORD_BYTES); 437 /* Prepare to recurse/iterate. */ 438 Recurse(TRUNC_words)} 439 } 440 PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES); 441 /* Now do insertion sort. */ 442 last = ((char *) base) + nmemb * WORD_BYTES; 443 for (first = ((char *) base) + WORD_BYTES; first != last; 444 first += WORD_BYTES) { 445 /* Find the right place for |first|. My apologies for var reuse */ 446 int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first; 447 *(int *) pivot = *(int *) first; 448 for (; compare(pl, pivot) > 0; pr = pl, --pl) { 449 *pr = *pl; 450 } 451 if (pr != (int *) first) 452 *pr = *(int *) pivot; 453 } 454 free(pivot); 455} 456 457/* ---------------------------------------------------------------------- */ 458 459void 460qsort(void *base, size_t nmemb, size_t size, 461 int (*compare) (const void *, const void *)) 462{ 463 464 if (nmemb <= 1) 465 return; 466 if (((uintptr_t) base | size) & (WORD_BYTES - 1)) 467 qsort_nonaligned(base, nmemb, size, compare); 468 else if (size != WORD_BYTES) 469 qsort_aligned(base, nmemb, size, compare); 470 else 471 qsort_words(base, nmemb, compare); 472} 473 474#endif /* !SDL_qsort */ 475 476/* vi: set ts=4 sw=4 expandtab: */