![]() |
Shadowrun: Awakened 29 September 2011 - Build 871
|
00001 /* 00002 Copyright 2005-2010 Intel Corporation. All Rights Reserved. 00003 00004 This file is part of Threading Building Blocks. 00005 00006 Threading Building Blocks is free software; you can redistribute it 00007 and/or modify it under the terms of the GNU General Public License 00008 version 2 as published by the Free Software Foundation. 00009 00010 Threading Building Blocks is distributed in the hope that it will be 00011 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00012 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU General Public License 00016 along with Threading Building Blocks; if not, write to the Free Software 00017 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00018 00019 As a special exception, you may use this file as part of a free software 00020 library without restriction. Specifically, if other files instantiate 00021 templates or use macros or inline functions from this file, or you compile 00022 this file and link it with other files to produce an executable, this 00023 file does not by itself cause the resulting executable to be covered by 00024 the GNU General Public License. This exception does not however 00025 invalidate any other reasons why the executable file might be covered by 00026 the GNU General Public License. 00027 */ 00028 00029 #include "Evolution.h" 00030 00031 #ifdef USE_SSE 00032 /* Update states with SSE */ 00033 00034 #include <xmmintrin.h> 00035 #include <emmintrin.h> 00036 00037 inline void create_record( char * src, unsigned * dst, unsigned width) 00038 { 00039 dst[0] |= src[width - 1]; 00040 for( unsigned a=0; a<31u; ++a ) 00041 dst[0] |= src[a]<<(a+1); 00042 unsigned a; 00043 for( a=31u; a<width; ++a ) 00044 dst[(a+1)/32u] |= src[a]<<((a+1)%32u); 00045 dst[(a+1)/32u] |= src[0]<<((a+1)%32u); 00046 } 00047 00048 inline void sum_offset( __m128i * X, __m128i * A, __m128i * B, __m128i * C, 00049 unsigned size_sse_ar, unsigned shift ) 00050 { 00051 for(unsigned i=0; i<size_sse_ar; ++i) 00052 { 00053 __m128i tmp = _mm_and_si128(A[i],X[shift + i]); 00054 A[i]=_mm_xor_si128(A[i],X[shift + i]); 00055 C[i]=_mm_or_si128(C[i],_mm_and_si128(B[i],tmp)); 00056 B[i]=_mm_xor_si128(B[i],tmp); 00057 } 00058 } 00059 00060 inline void shift_left2D( __m128i * X, unsigned height, unsigned size_sse_row ) 00061 { 00062 for( unsigned b=0; b<height; ++b ) 00063 { 00064 unsigned ind = b*size_sse_row; 00065 unsigned x0 = X[ind].m128i_u32[0] & 1; 00066 00067 X[ind] =_mm_or_si128( _mm_srli_epi16(X[ind],1), 00068 _mm_slli_epi16( _mm_srli_si128( X[ind], 2), 15) ); 00069 00070 unsigned x1 = X[ind + 1].m128i_u32[0] & 1; 00071 X[ind+1] =_mm_or_si128( _mm_srli_epi16( X[ind+1],1), 00072 _mm_slli_epi16( _mm_srli_si128( X[ind+1], 2), 15) ); 00073 X[ind].m128i_u32[3] |= x1<<31; 00074 00075 unsigned x2 = X[ind + 2].m128i_u32[0] & 1; 00076 X[ind+2] =_mm_or_si128( _mm_srli_epi16( X[ind+2],1), 00077 _mm_slli_epi16( _mm_srli_si128( X[ind+2], 2), 15) ); 00078 X[ind+1].m128i_u32[3] |= x2<<31; 00079 00080 unsigned* dst = (unsigned*)&X[ind]; 00081 dst[301/32u] |= x0<<(301%32u); 00082 } 00083 } 00084 00085 inline void shift_right2D( __m128i * X, unsigned height, unsigned size_sse_row ) 00086 { 00087 for( unsigned b=0; b<height; ++b ) 00088 { 00089 unsigned ind = b*size_sse_row; 00090 00091 unsigned x0 = X[ind].m128i_u32[3]; x0>>=31; 00092 X[ind] =_mm_or_si128( _mm_slli_epi16(X[ind],1), 00093 _mm_srli_epi16( _mm_slli_si128( X[ind], 2), 15) ); 00094 00095 unsigned x1 = X[ind + 1].m128i_u32[3]; x1>>=31; 00096 X[ind + 1] =_mm_or_si128( _mm_slli_epi16(X[ind + 1],1), 00097 _mm_srli_epi16( _mm_slli_si128( X[ind + 1], 2), 15) ); 00098 X[ind + 1].m128i_u32[0] |= x0; 00099 00100 unsigned* dst = (unsigned*)&X[ind]; 00101 unsigned x2 = dst[301/32u] & (1<<(301%32u)); x2>>=(301%32u); 00102 X[ind + 2] =_mm_or_si128( _mm_slli_epi16(X[ind + 2],1), 00103 _mm_srli_epi16( _mm_slli_si128( X[ind + 2], 2), 15) ); 00104 X[ind + 2].m128i_u32[0] |= x1; 00105 X[ind].m128i_u32[0] |= x2; 00106 } 00107 } 00108 00109 void UpdateState(Matrix * m_matrix, char * dest ,int begin, int end) 00110 { 00111 //300/128 + 1 =3, 3*300=900 00112 unsigned size_sse_row = m_matrix->width/128 + 1; //3 00113 unsigned size_sse_ar=size_sse_row * (end - begin); 00114 __m128i X[906], A[900], B[900], C[900]; 00115 char * mas = m_matrix->data; 00116 00117 for( unsigned i=0; i<size_sse_ar; ++i) 00118 { 00119 A[i].m128i_u32[0]=0;A[i].m128i_u32[1]=0;A[i].m128i_u32[2]=0;A[i].m128i_u32[3]=0; 00120 B[i].m128i_u32[0]=0;B[i].m128i_u32[1]=0;B[i].m128i_u32[2]=0;B[i].m128i_u32[3]=0; 00121 C[i].m128i_u32[0]=0;C[i].m128i_u32[1]=0;C[i].m128i_u32[2]=0;C[i].m128i_u32[3]=0; 00122 } 00123 00124 for( unsigned i=0; i<size_sse_ar+6; ++i) 00125 { 00126 X[i].m128i_u32[0]=0;X[i].m128i_u32[1]=0;X[i].m128i_u32[2]=0;X[i].m128i_u32[3]=0; 00127 } 00128 00129 // create X[] with bounds 00130 unsigned height = end - begin; 00131 unsigned width = m_matrix->width; 00132 for( unsigned b = 0 ; b < height; ++b ) 00133 { 00134 char* src = &mas[(b + begin)*width]; 00135 unsigned* dst = (unsigned*)&X[(b+1)*size_sse_row]; 00136 create_record(src, dst, width); 00137 } 00138 // create high row in X[] 00139 char * src; 00140 if(begin == 0) 00141 { 00142 src = &mas[(m_matrix->height-1)*width]; 00143 } 00144 else 00145 { 00146 src = &mas[(begin-1)*width]; 00147 } 00148 unsigned* dst = (unsigned*)X; 00149 create_record(src, dst, width); 00150 00151 //create lower row in X[] 00152 if(end == m_matrix->height ) 00153 { 00154 src = mas; 00155 } 00156 else 00157 { 00158 src = &mas[end*width]; 00159 } 00160 dst = (unsigned*)&X[(height+1)*size_sse_row]; 00161 create_record(src, dst, width); 00162 00163 //sum( C, B, A, X+offset_for_upwards ); high-left friend 00164 sum_offset(X,A,B,C,size_sse_ar, 0); 00165 00166 //sum( C, B, A, X+offset_for_no_vertical_shift ); 00167 sum_offset(X,A,B,C,size_sse_ar, size_sse_row); 00168 00169 //sum( C, B, A, X+offset_for_downwards ); 00170 sum_offset(X,A,B,C,size_sse_ar, 2*size_sse_row); 00171 00172 //shift_left( X ); (when view 2D) in our logic it is in right 00173 height = end - begin + 2; 00174 shift_left2D( X, height, size_sse_row); 00175 00176 //sum( C, B, A, X+offset_for_upwards ); high-left friend 00177 sum_offset(X,A,B,C,size_sse_ar, 0); 00178 00179 //sum( C, B, A, X+offset_for_downwards ); 00180 sum_offset(X,A,B,C,size_sse_ar, 2*size_sse_row); 00181 00182 //shift_left( X ); (view in 2D) in our logic it is right shift 00183 height = end - begin + 2; 00184 shift_left2D( X, height, size_sse_row); 00185 00186 //sum( C, B, A, X+offset_for_upwards ); high-right friend 00187 sum_offset(X,A,B,C,size_sse_ar, 0); 00188 00189 //sum( C, B, A, X+offset_for_no_vertical_shift ); right friend 00190 sum_offset(X,A,B,C,size_sse_ar, size_sse_row); 00191 00192 //sum( C, B, A, X+offset_for_downwards ); right down friend 00193 sum_offset(X,A,B,C,size_sse_ar, 2*size_sse_row); 00194 00195 //shift_right( X ); (when view in 2D) in our case it left shift. 00196 height = end - begin + 2; 00197 shift_right2D( X, height, size_sse_row); 00198 00199 //X = (X|A)&B&~C (done bitwise over the arrays) 00200 unsigned shift = size_sse_row; 00201 for(unsigned i=0; i<size_sse_ar; ++i) 00202 { 00203 C[i].m128i_u32[0] = ~C[i].m128i_u32[0]; 00204 C[i].m128i_u32[1] = ~C[i].m128i_u32[1]; 00205 C[i].m128i_u32[2] = ~C[i].m128i_u32[2]; 00206 C[i].m128i_u32[3] = ~C[i].m128i_u32[3]; 00207 X[shift + i] = _mm_and_si128(_mm_and_si128(_mm_or_si128(X[shift + i], 00208 A[i]),B[i]),C[i]); 00209 } 00210 00211 height = end - begin; 00212 width=m_matrix->width; 00213 for( unsigned b=0; b<height; ++b ) 00214 { 00215 char* dst = &dest[(b+begin)*width]; 00216 unsigned* src = (unsigned*)&X[(b+1)*size_sse_row]; 00217 for( unsigned a=0; a<width; ++a ) 00218 { 00219 unsigned c = src[a/32u] & 1<<(a%32u); 00220 dst[a] = c>>(a%32u); 00221 } 00222 } 00223 } 00224 #else 00225 /* end SSE block */ 00226 00227 // ---------------------------------------------------------------------- 00228 // GetAdjacentCellState() - returns the state (value) of the specified 00229 // adjacent cell of the current cell "cellNumber" 00230 char GetAdjacentCellState( 00231 char* source, // pointer to source data block 00232 int x, // logical width of field 00233 int y, // logical height of field 00234 int cellNumber, // number of cell position to examine 00235 int cp // which adjacent position 00236 ) 00237 { 00238 /* 00239 cp 00240 *-- cp=1 ... --- cp=8 (summary: -1-2-3- 00241 -x- -x- -4-x-5- 00242 --- --* -6-7-8- ) 00243 */ 00244 char cellState = 0; // return value 00245 00246 // set up boundary flags to trigger field-wrap logic 00247 bool onTopRow = false; 00248 bool onBottomRow = false; 00249 bool onLeftColumn = false; 00250 bool onRightColumn = false; 00251 00252 // check to see if cell is on top row 00253 if (cellNumber < x) 00254 { 00255 onTopRow = true; 00256 } 00257 // check to see if cell is on bottom row 00258 if ((x*y)-cellNumber <= x) 00259 { 00260 onBottomRow = true; 00261 } 00262 // check to see if cell is on left column 00263 if (cellNumber%x == 0) 00264 { 00265 onLeftColumn = true; 00266 } 00267 // check to see if cell is on right column 00268 if ((cellNumber+1)%x == 0) 00269 { 00270 onRightColumn = true; 00271 } 00272 00273 switch (cp) 00274 { 00275 case 1: 00276 if (onTopRow && onLeftColumn) 00277 { 00278 return *(source+((x*y)-1)); 00279 } 00280 if (onTopRow && !onLeftColumn) 00281 { 00282 return *(source+(((x*y)-x)+(cellNumber-1))); 00283 } 00284 if (onLeftColumn && !onTopRow) 00285 { 00286 return *(source+(cellNumber-1)); 00287 } 00288 return *((source+cellNumber)-(x+1)); 00289 00290 case 2: 00291 if (onTopRow) 00292 { 00293 return *(source+(((x*y)-x)+cellNumber)); 00294 } 00295 return *((source+cellNumber)-x); 00296 00297 case 3: 00298 if (onTopRow && onRightColumn) 00299 { 00300 return *(source+((x*y)-x)); 00301 } 00302 if (onTopRow && !onRightColumn) 00303 { 00304 return *(source+(((x*y)-x)+(cellNumber+1))); 00305 } 00306 if (onRightColumn && !onTopRow) 00307 { 00308 return *(source+((cellNumber-(x*2))+1)); 00309 } 00310 return *(source+(cellNumber-(x-1))); 00311 00312 case 4: 00313 if (onRightColumn) 00314 { 00315 return *(source+(cellNumber-(x-1))); 00316 } 00317 return *(source+(cellNumber+1)); 00318 00319 case 5: 00320 if (onBottomRow && onRightColumn) 00321 { 00322 return *source; 00323 } 00324 if (onBottomRow && !onRightColumn) 00325 { 00326 return *(source+((cellNumber-((x*y)-x))+1)); 00327 } 00328 if (onRightColumn && !onBottomRow) 00329 { 00330 return *(source+(cellNumber+1)); 00331 } 00332 return *(source+(((cellNumber+x))+1)); 00333 00334 case 6: 00335 if (onBottomRow) 00336 { 00337 return *(source+(cellNumber-((x*y)-x))); 00338 } 00339 return *(source+(cellNumber+x)); 00340 00341 case 7: 00342 if (onBottomRow && onLeftColumn) 00343 { 00344 return *(source+(x-1)); 00345 } 00346 if (onBottomRow && !onLeftColumn) 00347 { 00348 return *(source+(cellNumber-((x*y)-x)-1)); 00349 } 00350 if (onLeftColumn && !onBottomRow) 00351 { 00352 return *(source+(cellNumber+((x*2)-1))); 00353 } 00354 return *(source+(cellNumber+(x-1))); 00355 00356 case 8: 00357 if (onLeftColumn) 00358 { 00359 return *(source+(cellNumber+(x-1))); 00360 } 00361 return *(source+(cellNumber-1)); 00362 } 00363 return cellState; 00364 } 00365 00366 char CheckCell(Matrix * m_matrix, int cellNumber) 00367 { 00368 char total = 0; 00369 char* source = m_matrix->data; 00370 //look around to find cell's with status "alive" 00371 for(int i=1; i<9; i++) 00372 { 00373 total += GetAdjacentCellState(source, m_matrix->width, m_matrix->height, cellNumber, i); 00374 } 00375 // if the number of adjacent live cells is < 2 or > 3, the result is a dead 00376 // cell regardless of its current state. (A live cell dies of loneliness if it 00377 // has less than 2 neighbors, and of overcrowding if it has more than 3; a new 00378 // cell is born in an empty spot only if it has exactly 3 neighbors. 00379 if (total < 2 || total > 3) 00380 { 00381 return 0; 00382 } 00383 00384 // if we get here and the cell position holds a living cell, it stays alive 00385 if (*(source+cellNumber)) 00386 { 00387 return 1; 00388 } 00389 00390 // we have an empty position. If there are only 2 neighbors, the position stays 00391 // empty. 00392 if (total == 2) 00393 { 00394 return 0; 00395 } 00396 00397 // we have an empty position and exactly 3 neighbors. A cell is born. 00398 return 1; 00399 } 00400 00401 void UpdateState(Matrix * m_matrix, char * dest ,int begin, int end) 00402 { 00403 for (int i=begin; i<=end; i++) 00404 { 00405 *(dest+i) = CheckCell(m_matrix, i); 00406 } 00407 } 00408 00409 #endif 00410 /* end non-SSE block */
Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.