00001 /* $Id: viewport_sprite_sorter_sse4.cpp 26207 2014-01-02 18:52:54Z rubidium $ */ 00002 00003 /* 00004 * This file is part of OpenTTD. 00005 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. 00006 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00007 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>. 00008 */ 00009 00012 #ifdef WITH_SSE 00013 00014 #include "stdafx.h" 00015 #include "cpu.h" 00016 #include "smmintrin.h" 00017 #include "viewport_sprite_sorter.h" 00018 00019 #ifdef _SQ64 00020 assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0); 00021 #define LOAD_128 _mm_load_si128 00022 #else 00023 #define LOAD_128 _mm_loadu_si128 00024 #endif 00025 00027 void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv) 00028 { 00029 const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0); 00030 ParentSpriteToDraw ** const psdvend = psdv->End(); 00031 ParentSpriteToDraw **psd = psdv->Begin(); 00032 while (psd != psdvend) { 00033 ParentSpriteToDraw * const ps = *psd; 00034 00035 if (ps->comparison_done) { 00036 psd++; 00037 continue; 00038 } 00039 00040 ps->comparison_done = true; 00041 00042 for (ParentSpriteToDraw **psd2 = psd + 1; psd2 != psdvend; psd2++) { 00043 ParentSpriteToDraw * const ps2 = *psd2; 00044 00045 if (ps2->comparison_done) continue; 00046 00047 /* 00048 * Decide which comparator to use, based on whether the bounding boxes overlap 00049 * 00050 * Original code: 00051 * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X? 00052 * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y? 00053 * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z? 00054 * 00055 * Above conditions are equivalent to: 00056 * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) ) 00057 * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) 00058 * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) ) 00059 * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) ) 00060 * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 --------------------------------------> 00061 */ 00062 __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax); 00063 __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin); 00064 __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min); 00065 if (!_mm_testz_si128(mask_ptest, rslt1)) 00066 continue; 00067 00068 __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin); 00069 __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax); 00070 __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min); 00071 if (_mm_testz_si128(mask_ptest, rslt2)) { 00072 /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of 00073 * the screen and with higher Z elevation, are drawn in front. 00074 * Here X,Y,Z are the coordinates of the "center of mass" of the sprite, 00075 * i.e. X=(left+right)/2, etc. 00076 * However, since we only care about order, don't actually divide / 2 00077 */ 00078 if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <= 00079 ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) { 00080 continue; 00081 } 00082 } 00083 00084 /* Move ps2 in front of ps */ 00085 ParentSpriteToDraw * const temp = ps2; 00086 for (ParentSpriteToDraw **psd3 = psd2; psd3 > psd; psd3--) { 00087 *psd3 = *(psd3 - 1); 00088 } 00089 *psd = temp; 00090 } 00091 } 00092 } 00093 00098 bool ViewportSortParentSpritesSSE41Checker() 00099 { 00100 return HasCPUIDFlag(1, 2, 19); 00101 } 00102 00103 #endif /* WITH_SSE */