Shadowrun: Awakened 29 September 2011 - Build 871
Update_state.cpp
Go to the documentation of this file.
00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     This file is part of Threading Building Blocks.
00005 
00006     Threading Building Blocks is free software; you can redistribute it
00007     and/or modify it under the terms of the GNU General Public License
00008     version 2 as published by the Free Software Foundation.
00009 
00010     Threading Building Blocks is distributed in the hope that it will be
00011     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00012     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU General Public License for more details.
00014 
00015     You should have received a copy of the GNU General Public License
00016     along with Threading Building Blocks; if not, write to the Free Software
00017     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018 
00019     As a special exception, you may use this file as part of a free software
00020     library without restriction.  Specifically, if other files instantiate
00021     templates or use macros or inline functions from this file, or you compile
00022     this file and link it with other files to produce an executable, this
00023     file does not by itself cause the resulting executable to be covered by
00024     the GNU General Public License.  This exception does not however
00025     invalidate any other reasons why the executable file might be covered by
00026     the GNU General Public License.
00027 */
00028 
00029 #include "Evolution.h"
00030 
00031 #ifdef USE_SSE 
00032 /* Update states with SSE */
00033 
00034 #include <xmmintrin.h>
00035 #include <emmintrin.h>
00036 
00037 inline void create_record( char * src, unsigned * dst, unsigned width)
00038 {
00039     dst[0] |= src[width - 1];
00040     for( unsigned a=0; a<31u; ++a )
00041         dst[0] |= src[a]<<(a+1);
00042     unsigned a;
00043     for( a=31u; a<width; ++a )
00044         dst[(a+1)/32u] |= src[a]<<((a+1)%32u);
00045     dst[(a+1)/32u] |= src[0]<<((a+1)%32u);
00046 }
00047 
00048 inline void sum_offset( __m128i * X, __m128i * A, __m128i * B, __m128i * C, 
00049                         unsigned size_sse_ar, unsigned shift )
00050 {
00051     for(unsigned i=0; i<size_sse_ar; ++i) 
00052     {
00053         __m128i tmp = _mm_and_si128(A[i],X[shift + i]);    
00054         A[i]=_mm_xor_si128(A[i],X[shift + i]);    
00055         C[i]=_mm_or_si128(C[i],_mm_and_si128(B[i],tmp));
00056         B[i]=_mm_xor_si128(B[i],tmp);
00057     }
00058 }
00059 
00060 inline void shift_left2D( __m128i * X, unsigned height, unsigned size_sse_row )
00061 {
00062     for( unsigned b=0; b<height; ++b ) 
00063     {
00064         unsigned ind = b*size_sse_row;
00065         unsigned x0 = X[ind].m128i_u32[0] & 1;
00066 
00067         X[ind] =_mm_or_si128( _mm_srli_epi16(X[ind],1), 
00068             _mm_slli_epi16( _mm_srli_si128( X[ind], 2), 15) );
00069     
00070         unsigned x1 = X[ind + 1].m128i_u32[0] & 1;
00071         X[ind+1] =_mm_or_si128( _mm_srli_epi16( X[ind+1],1), 
00072             _mm_slli_epi16( _mm_srli_si128( X[ind+1], 2), 15) );
00073         X[ind].m128i_u32[3] |= x1<<31;
00074         
00075         unsigned x2 = X[ind + 2].m128i_u32[0] & 1;
00076         X[ind+2] =_mm_or_si128( _mm_srli_epi16( X[ind+2],1), 
00077             _mm_slli_epi16( _mm_srli_si128( X[ind+2], 2), 15) );
00078         X[ind+1].m128i_u32[3] |= x2<<31;
00079         
00080         unsigned* dst = (unsigned*)&X[ind];
00081         dst[301/32u] |= x0<<(301%32u);
00082    }
00083 }
00084 
00085 inline void shift_right2D( __m128i * X, unsigned height, unsigned size_sse_row )
00086 {
00087     for( unsigned b=0; b<height; ++b ) 
00088     {
00089         unsigned ind = b*size_sse_row;
00090 
00091         unsigned x0 = X[ind].m128i_u32[3]; x0>>=31;
00092         X[ind] =_mm_or_si128( _mm_slli_epi16(X[ind],1), 
00093             _mm_srli_epi16( _mm_slli_si128( X[ind], 2), 15) );
00094             
00095         unsigned x1 = X[ind + 1].m128i_u32[3]; x1>>=31;
00096         X[ind + 1] =_mm_or_si128( _mm_slli_epi16(X[ind + 1],1),
00097                 _mm_srli_epi16( _mm_slli_si128( X[ind + 1], 2), 15) );
00098         X[ind + 1].m128i_u32[0] |= x0;
00099                 
00100         unsigned* dst = (unsigned*)&X[ind];
00101         unsigned x2 = dst[301/32u] & (1<<(301%32u)); x2>>=(301%32u);
00102         X[ind + 2] =_mm_or_si128( _mm_slli_epi16(X[ind + 2],1),
00103             _mm_srli_epi16( _mm_slli_si128( X[ind + 2], 2), 15) );        
00104         X[ind + 2].m128i_u32[0] |= x1;    
00105         X[ind].m128i_u32[0] |= x2;
00106    }
00107 }
00108 
00109 void UpdateState(Matrix * m_matrix, char * dest ,int begin, int end)
00110 {
00111     //300/128 + 1 =3, 3*300=900
00112     unsigned size_sse_row = m_matrix->width/128 + 1; //3
00113     unsigned size_sse_ar=size_sse_row * (end - begin); 
00114     __m128i X[906], A[900], B[900], C[900];
00115     char * mas  = m_matrix->data;
00116     
00117     for( unsigned i=0; i<size_sse_ar; ++i)
00118     {
00119         A[i].m128i_u32[0]=0;A[i].m128i_u32[1]=0;A[i].m128i_u32[2]=0;A[i].m128i_u32[3]=0;
00120         B[i].m128i_u32[0]=0;B[i].m128i_u32[1]=0;B[i].m128i_u32[2]=0;B[i].m128i_u32[3]=0;
00121         C[i].m128i_u32[0]=0;C[i].m128i_u32[1]=0;C[i].m128i_u32[2]=0;C[i].m128i_u32[3]=0;    
00122     }
00123 
00124     for( unsigned i=0; i<size_sse_ar+6; ++i)
00125     {
00126         X[i].m128i_u32[0]=0;X[i].m128i_u32[1]=0;X[i].m128i_u32[2]=0;X[i].m128i_u32[3]=0;
00127     }
00128 
00129     // create X[] with bounds
00130     unsigned height = end - begin;
00131     unsigned width = m_matrix->width;
00132     for( unsigned b = 0 ; b < height; ++b ) 
00133     {
00134         char* src = &mas[(b + begin)*width];
00135         unsigned* dst = (unsigned*)&X[(b+1)*size_sse_row];
00136         create_record(src, dst, width);
00137     }
00138     // create high row in X[]
00139     char * src;
00140     if(begin == 0) 
00141     {
00142         src = &mas[(m_matrix->height-1)*width];
00143     }
00144     else 
00145     {
00146         src = &mas[(begin-1)*width];
00147     }
00148     unsigned* dst = (unsigned*)X;
00149     create_record(src, dst, width);
00150     
00151     //create lower row in X[]
00152     if(end == m_matrix->height ) 
00153     {
00154         src = mas;
00155     }        
00156     else 
00157     {
00158         src = &mas[end*width];
00159     }
00160     dst = (unsigned*)&X[(height+1)*size_sse_row];
00161     create_record(src, dst, width);
00162     
00163     //sum( C, B, A, X+offset_for_upwards ); high-left friend
00164     sum_offset(X,A,B,C,size_sse_ar, 0);
00165     
00166     //sum( C, B, A, X+offset_for_no_vertical_shift );
00167     sum_offset(X,A,B,C,size_sse_ar, size_sse_row);
00168     
00169     //sum( C, B, A, X+offset_for_downwards );
00170     sum_offset(X,A,B,C,size_sse_ar, 2*size_sse_row);
00171 
00172     //shift_left( X ); (when view 2D) in our logic it is in right
00173     height = end - begin + 2;
00174     shift_left2D( X, height, size_sse_row);
00175 
00176     //sum( C, B, A, X+offset_for_upwards ); high-left friend
00177     sum_offset(X,A,B,C,size_sse_ar, 0);
00178 
00179     //sum( C, B, A, X+offset_for_downwards );
00180     sum_offset(X,A,B,C,size_sse_ar, 2*size_sse_row);
00181 
00182     //shift_left( X ); (view in 2D) in our logic it is right shift
00183     height = end - begin + 2;
00184     shift_left2D( X, height, size_sse_row);
00185     
00186     //sum( C, B, A, X+offset_for_upwards ); high-right friend
00187     sum_offset(X,A,B,C,size_sse_ar, 0);
00188     
00189     //sum( C, B, A, X+offset_for_no_vertical_shift ); right friend
00190     sum_offset(X,A,B,C,size_sse_ar, size_sse_row);    
00191     
00192     //sum( C, B, A, X+offset_for_downwards ); right down friend
00193     sum_offset(X,A,B,C,size_sse_ar, 2*size_sse_row);
00194 
00195     //shift_right( X ); (when view in 2D) in our case it left shift.
00196     height = end - begin + 2;
00197     shift_right2D( X, height, size_sse_row);
00198     
00199     //X = (X|A)&B&~C (done bitwise over the arrays) 
00200     unsigned shift = size_sse_row;
00201     for(unsigned i=0; i<size_sse_ar; ++i) 
00202     {
00203         C[i].m128i_u32[0] = ~C[i].m128i_u32[0];
00204         C[i].m128i_u32[1] = ~C[i].m128i_u32[1];
00205         C[i].m128i_u32[2] = ~C[i].m128i_u32[2];
00206         C[i].m128i_u32[3] = ~C[i].m128i_u32[3];
00207         X[shift + i] = _mm_and_si128(_mm_and_si128(_mm_or_si128(X[shift + i],
00208             A[i]),B[i]),C[i]);    
00209     }
00210 
00211     height = end - begin;
00212     width=m_matrix->width;
00213     for( unsigned b=0; b<height; ++b ) 
00214     {
00215         char* dst = &dest[(b+begin)*width];
00216         unsigned* src = (unsigned*)&X[(b+1)*size_sse_row];
00217         for( unsigned a=0; a<width; ++a )
00218         {
00219             unsigned c = src[a/32u] & 1<<(a%32u);
00220             dst[a] = c>>(a%32u);
00221         }
00222     }
00223 }
00224 #else 
00225 /* end SSE block */
00226 
00227 // ----------------------------------------------------------------------
00228 // GetAdjacentCellState() - returns the state (value) of the specified 
00229 // adjacent cell of the current cell "cellNumber"
00230 char GetAdjacentCellState(
00231                                 char* source,      // pointer to source data block
00232                                 int x,             // logical width of field
00233                                 int y,             // logical height of field
00234                                 int cellNumber,    // number of cell position to examine
00235                                 int cp             // which adjacent position
00236                                )
00237 {
00238 /* 
00239 cp 
00240 *-- cp=1 ... --- cp=8 (summary: -1-2-3-
00241 -x-          -x-                -4-x-5-
00242 ---          --*                -6-7-8- )
00243 */
00244     char cellState = 0;        // return value
00245 
00246     // set up boundary flags to trigger field-wrap logic
00247     bool onTopRow = false;
00248     bool onBottomRow = false;
00249     bool onLeftColumn = false;
00250     bool onRightColumn = false;
00251 
00252     // check to see if cell is on top row
00253     if (cellNumber < x)
00254     {
00255         onTopRow = true;
00256     }
00257     // check to see if cell is on bottom row
00258     if ((x*y)-cellNumber <= x)
00259     {
00260         onBottomRow = true;
00261     }
00262     // check to see if cell is on left column
00263     if (cellNumber%x == 0)
00264     {
00265         onLeftColumn = true;
00266     }
00267     // check to see if cell is on right column
00268     if ((cellNumber+1)%x == 0)
00269     {
00270         onRightColumn = true;
00271     }
00272 
00273     switch (cp)
00274     {
00275         case 1:
00276             if (onTopRow && onLeftColumn)
00277             {
00278                 return *(source+((x*y)-1));
00279             }
00280             if (onTopRow && !onLeftColumn)
00281             {
00282                 return *(source+(((x*y)-x)+(cellNumber-1)));
00283             }
00284             if (onLeftColumn && !onTopRow)
00285             {
00286                 return *(source+(cellNumber-1));
00287             }
00288             return *((source+cellNumber)-(x+1));
00289 
00290         case 2:
00291             if (onTopRow)
00292             {
00293                 return *(source+(((x*y)-x)+cellNumber));
00294             }
00295             return *((source+cellNumber)-x);
00296 
00297         case 3:
00298             if (onTopRow && onRightColumn)
00299             {
00300                 return *(source+((x*y)-x));
00301             }
00302             if (onTopRow && !onRightColumn)
00303             {
00304                 return *(source+(((x*y)-x)+(cellNumber+1)));
00305             }
00306             if (onRightColumn && !onTopRow)
00307             {
00308                 return *(source+((cellNumber-(x*2))+1));
00309             }
00310             return *(source+(cellNumber-(x-1)));
00311 
00312         case 4:
00313             if (onRightColumn)
00314             {
00315                 return *(source+(cellNumber-(x-1)));
00316             }
00317             return *(source+(cellNumber+1));
00318 
00319         case 5:
00320             if (onBottomRow && onRightColumn)
00321             {
00322                 return *source;
00323             }
00324             if (onBottomRow && !onRightColumn)
00325             {
00326                 return *(source+((cellNumber-((x*y)-x))+1));
00327             }
00328             if (onRightColumn && !onBottomRow)
00329             {
00330                 return *(source+(cellNumber+1));
00331             }
00332             return *(source+(((cellNumber+x))+1));
00333 
00334         case 6:
00335             if (onBottomRow)
00336             {
00337                 return *(source+(cellNumber-((x*y)-x)));
00338             }
00339             return *(source+(cellNumber+x));
00340 
00341         case 7:
00342             if (onBottomRow && onLeftColumn)
00343             {
00344                 return *(source+(x-1));
00345             }
00346             if (onBottomRow && !onLeftColumn)
00347             {
00348                 return *(source+(cellNumber-((x*y)-x)-1));
00349             }
00350             if (onLeftColumn && !onBottomRow)
00351             {
00352                 return *(source+(cellNumber+((x*2)-1)));
00353             }
00354             return *(source+(cellNumber+(x-1)));
00355 
00356         case 8:
00357             if (onLeftColumn)
00358             {
00359                 return *(source+(cellNumber+(x-1)));
00360             }
00361             return *(source+(cellNumber-1));
00362     }
00363     return cellState;
00364 }
00365 
00366 char CheckCell(Matrix * m_matrix, int cellNumber)
00367 {
00368     char total = 0;
00369     char* source = m_matrix->data;
00370     //look around to find cell's with status "alive"
00371     for(int i=1; i<9; i++)
00372     {
00373         total += GetAdjacentCellState(source, m_matrix->width, m_matrix->height, cellNumber, i);
00374     }
00375     // if the number of adjacent live cells is < 2 or > 3, the result is a dead 
00376     // cell regardless of its current state. (A live cell dies of loneliness if it
00377     // has less than 2 neighbors, and of overcrowding if it has more than 3; a new
00378     // cell is born in an empty spot only if it has exactly 3 neighbors.
00379     if (total < 2 || total > 3)
00380     {
00381         return 0;
00382     }
00383 
00384     // if we get here and the cell position holds a living cell, it stays alive
00385     if (*(source+cellNumber))
00386     {
00387         return 1;
00388     }
00389 
00390     // we have an empty position. If there are only 2 neighbors, the position stays
00391     // empty.
00392     if (total == 2)
00393     {
00394         return 0;
00395     }
00396 
00397     // we have an empty position and exactly 3 neighbors. A cell is born.
00398     return 1;
00399 }
00400 
00401 void UpdateState(Matrix * m_matrix, char * dest ,int begin, int end)
00402 {
00403         for (int i=begin; i<=end; i++)
00404         {
00405             *(dest+i) = CheckCell(m_matrix, i);
00406         }
00407 }
00408 
00409 #endif 
00410 /* end non-SSE block */

Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.

GNU Lesser General Public License 3 Sourceforge.net