![]() |
Shadowrun: Awakened 29 September 2011 - Build 871
|
00001 /* 00002 Copyright 2005-2010 Intel Corporation. All Rights Reserved. 00003 00004 This file is part of Threading Building Blocks. 00005 00006 Threading Building Blocks is free software; you can redistribute it 00007 and/or modify it under the terms of the GNU General Public License 00008 version 2 as published by the Free Software Foundation. 00009 00010 Threading Building Blocks is distributed in the hope that it will be 00011 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00012 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU General Public License 00016 along with Threading Building Blocks; if not, write to the Free Software 00017 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00018 00019 As a special exception, you may use this file as part of a free software 00020 library without restriction. Specifically, if other files instantiate 00021 templates or use macros or inline functions from this file, or you compile 00022 this file and link it with other files to produce an executable, this 00023 file does not by itself cause the resulting executable to be covered by 00024 the GNU General Public License. This exception does not however 00025 invalidate any other reasons why the executable file might be covered by 00026 the GNU General Public License. 00027 */ 00028 00029 // Example program that computes number of prime numbers up to n, 00030 // where n is a command line argument. The algorithm here is a 00031 // fairly efficient version of the sieve of Eratosthenes. 00032 // The parallel version demonstrates how to use parallel_reduce, 00033 // and in particular how to exploit lazy splitting. 00034 00035 #include <cassert> 00036 #include <cstdio> 00037 #include <cstring> 00038 #include <math.h> 00039 #include <cstdlib> 00040 #include <cctype> 00041 #include "tbb/parallel_reduce.h" 00042 #include "tbb/task_scheduler_init.h" 00043 #include "tbb/tick_count.h" 00044 00045 using namespace std; 00046 using namespace tbb; 00047 00048 typedef unsigned long Number; 00049 00051 static bool PrintPrimes = false; 00052 00054 static Number GrainSize = 1000; 00055 00056 class Multiples { 00057 inline Number strike( Number start, Number limit, Number stride ) { 00058 // Hoist "my_is_composite" into register for sake of speed. 00059 bool* is_composite = my_is_composite; 00060 assert( stride>=2 ); 00061 for( ;start<limit; start+=stride ) 00062 is_composite[start] = true; 00063 return start; 00064 } 00066 bool* my_is_composite; 00067 00069 00071 Number* my_striker; 00072 00074 Number* my_factor; 00075 public: 00077 Number n_factor; 00078 Number m; 00079 Multiples( Number n ) : 00080 is_forked_copy(false) 00081 { 00082 m = Number(sqrt(double(n))); 00083 // Round up to even 00084 m += m&1; 00085 my_is_composite = new bool[m/2]; 00086 my_striker = new Number[m/2]; 00087 my_factor = new Number[m/2]; 00088 n_factor = 0; 00089 memset( my_is_composite, 0, m/2 ); 00090 for( Number i=3; i<m; i+=2 ) { 00091 if( !my_is_composite[i/2] ) { 00092 if( PrintPrimes ) 00093 printf("%d\n",(int)i); 00094 my_striker[n_factor] = strike( i/2, m/2, i ); 00095 my_factor[n_factor++] = i; 00096 } 00097 } 00098 } 00099 00101 00102 Number find_primes_in_window( Number start, Number window_size ) { 00103 bool* is_composite = my_is_composite; 00104 memset( is_composite, 0, window_size/2 ); 00105 for( size_t k=0; k<n_factor; ++k ) 00106 my_striker[k] = strike( my_striker[k]-m/2, window_size/2, my_factor[k] ); 00107 Number count = 0; 00108 for( Number k=0; k<window_size/2; ++k ) { 00109 if( !is_composite[k] ) { 00110 if( PrintPrimes ) 00111 printf("%ld\n",long(start+2*k+1)); 00112 ++count; 00113 } 00114 } 00115 return count; 00116 } 00117 00118 ~Multiples() { 00119 if( !is_forked_copy ) 00120 delete[] my_factor; 00121 delete[] my_striker; 00122 delete[] my_is_composite; 00123 } 00124 00125 //------------------------------------------------------------------------ 00126 // Begin extra members required by parallel version 00127 //------------------------------------------------------------------------ 00128 00130 const bool is_forked_copy; 00131 00132 Multiples( const Multiples& f, split ) : 00133 n_factor(f.n_factor), 00134 m(f.m), 00135 my_is_composite(NULL), 00136 my_striker(NULL), 00137 my_factor(f.my_factor), 00138 is_forked_copy(true) 00139 {} 00140 00141 bool is_initialized() const { 00142 return my_is_composite!=NULL; 00143 } 00144 00145 void initialize( Number start ) { 00146 assert( start>=1 ); 00147 my_is_composite = new bool[m/2]; 00148 my_striker = new Number[m/2]; 00149 for( size_t k=0; k<n_factor; ++k ) { 00150 Number f = my_factor[k]; 00151 Number p = (start-1)/f*f % m; 00152 my_striker[k] = (p&1 ? p+2*f : p+f)/2; 00153 assert( m/2<=my_striker[k] ); 00154 } 00155 } 00156 //------------------------------------------------------------------------ 00157 // End extra methods required by parallel version 00158 //------------------------------------------------------------------------ 00159 }; 00160 00162 00163 Number SerialCountPrimes( Number n ) { 00164 // Two is special case 00165 Number count = n>=2; 00166 if( n>=3 ) { 00167 Multiples multiples(n); 00168 count += multiples.n_factor; 00169 if( PrintPrimes ) 00170 printf("---\n"); 00171 Number window_size = multiples.m; 00172 for( Number j=multiples.m; j<=n; j+=window_size ) { 00173 if( j+window_size>n+1 ) 00174 window_size = n+1-j; 00175 count += multiples.find_primes_in_window( j, window_size ); 00176 } 00177 } 00178 return count; 00179 } 00180 00182 class SieveRange { 00184 const Number my_stride; 00185 00187 Number my_begin; 00188 00190 Number my_end; 00191 00193 const Number my_grainsize; 00194 00195 bool assert_okay() const { 00196 assert( my_begin%my_stride==0 ); 00197 assert( my_begin<=my_end ); 00198 assert( my_stride<=my_grainsize ); 00199 return true; 00200 } 00201 public: 00202 //------------------------------------------------------------------------ 00203 // Begin signatures required by parallel_reduce 00204 //------------------------------------------------------------------------ 00205 bool is_divisible() const {return my_end-my_begin>my_grainsize;} 00206 bool empty() const {return my_end<=my_begin;} 00207 SieveRange( SieveRange& r, split ) : 00208 my_stride(r.my_stride), 00209 my_grainsize(r.my_grainsize), 00210 my_end(r.my_end) 00211 { 00212 assert( r.is_divisible() ); 00213 assert( r.assert_okay() ); 00214 Number middle = r.my_begin + (r.my_end-r.my_begin+r.my_stride-1)/2; 00215 middle = middle/my_stride*my_stride; 00216 my_begin = middle; 00217 r.my_end = middle; 00218 assert( assert_okay() ); 00219 assert( r.assert_okay() ); 00220 } 00221 //------------------------------------------------------------------------ 00222 // End of signatures required by parallel_reduce 00223 //------------------------------------------------------------------------ 00224 Number begin() const {return my_begin;} 00225 Number end() const {return my_end;} 00226 SieveRange( Number begin, Number end, Number stride, Number grainsize ) : 00227 my_begin(begin), 00228 my_end(end), 00229 my_stride(stride), 00230 my_grainsize(grainsize<stride?stride:grainsize) 00231 { 00232 assert( assert_okay() ); 00233 } 00234 }; 00235 00237 00239 class Sieve { 00240 public: 00242 Multiples multiples; 00243 00245 Number count; 00246 00248 Sieve( Number n ) : 00249 multiples(n), 00250 count(0) 00251 {} 00252 00253 //------------------------------------------------------------------------ 00254 // Begin signatures required by parallel_reduce 00255 //------------------------------------------------------------------------ 00256 void operator()( const SieveRange& r ) { 00257 Number m = multiples.m; 00258 if( multiples.is_initialized() ) { 00259 // Simply reuse "multiples" structure from previous window 00260 // This works because parallel_reduce always applies 00261 // *this from left to right. 00262 } else { 00263 // Need to initialize "multiples" because *this is a forked copy 00264 // that needs to be set up to start at r.begin(). 00265 multiples.initialize( r.begin() ); 00266 } 00267 Number window_size = m; 00268 for( Number j=r.begin(); j<r.end(); j+=window_size ) { 00269 assert( j%multiples.m==0 ); 00270 if( j+window_size>r.end() ) 00271 window_size = r.end()-j; 00272 count += multiples.find_primes_in_window( j, window_size ); 00273 } 00274 } 00275 void join( Sieve& other ) { 00276 count += other.count; 00277 } 00278 Sieve( Sieve& other, split ) : 00279 multiples(other.multiples,split()), 00280 count(0) 00281 {} 00282 //------------------------------------------------------------------------ 00283 // End of signatures required by parallel_reduce 00284 //------------------------------------------------------------------------ 00285 }; 00286 00288 00289 Number ParallelCountPrimes( Number n ) { 00290 // Two is special case 00291 Number count = n>=2; 00292 if( n>=3 ) { 00293 Sieve s(n); 00294 count += s.multiples.n_factor; 00295 if( PrintPrimes ) 00296 printf("---\n"); 00297 // Explicit grain size and simple_partitioner() used here instead of automatic grainsize 00298 // determination becase we want SieveRange to be decomposed down to GrainSize or smaller. 00299 // Doing so improves odds that the working set fits in cache when evaluating Sieve::operator(). 00300 parallel_reduce( SieveRange( s.multiples.m, n, s.multiples.m, GrainSize ), s, simple_partitioner() ); 00301 count += s.count; 00302 } 00303 return count; 00304 } 00305 00306 //------------------------------------------------------------------------ 00307 // Code below this line constitutes the driver that calls SerialCountPrimes 00308 // and ParallelCountPrimes. 00309 //------------------------------------------------------------------------ 00310 00312 struct NumberRange { 00313 Number low; 00314 Number high; 00315 void set_from_string( const char* s ); 00316 NumberRange( Number low_, Number high_ ) : low(low_), high(high_) {} 00317 }; 00318 00319 void NumberRange::set_from_string( const char* s ) { 00320 char* end; 00321 high = low = strtol(s,&end,0); 00322 switch( *end ) { 00323 case ':': 00324 high = strtol(end+1,0,0); 00325 break; 00326 case '\0': 00327 break; 00328 default: 00329 printf("unexpected character = %c\n",*end); 00330 } 00331 00332 } 00333 00335 static NumberRange NThread(0,4); 00336 00338 static bool PauseFlag = false; 00339 00341 static Number ParseCommandLine( int argc, char* argv[] ) { 00342 Number n = 100000000; 00343 int i = 1; 00344 if( i<argc && strcmp( argv[i], "pause" )==0 ) { 00345 PauseFlag = true; 00346 ++i; 00347 } 00348 if( i<argc && !isdigit(argv[i][0]) ) { 00349 // Command line is garbled. 00350 fprintf(stderr,"Usage: %s [['pause'] n [nthread [grainsize]]]\n", argv[0]); 00351 fprintf(stderr,"where n is a positive integer [%lu]\n",n); 00352 fprintf(stderr," nthread is a non-negative integer, or range of the form low:high [%ld:%lu]\n",NThread.low,NThread.high); 00353 fprintf(stderr," grainsize is an optional postive integer [%lu]\n",GrainSize); 00354 exit(1); 00355 } 00356 if( i<argc ) 00357 n = strtol(argv[i++],0,0); 00358 if( i<argc ) 00359 NThread.set_from_string(argv[i++]); 00360 if( i<argc ) 00361 GrainSize = strtol(argv[i++],0,0); 00362 return n; 00363 } 00364 00365 static void WaitForUser() { 00366 char c; 00367 printf("Press return to continue\n"); 00368 do { 00369 c = getchar(); 00370 } while( c!='\n' ); 00371 } 00372 00373 int main( int argc, char* argv[] ) { 00374 Number n = ParseCommandLine(argc,argv); 00375 00376 // Try different numbers of threads 00377 for( Number p=NThread.low; p<=NThread.high; ++p ) { 00378 task_scheduler_init init(task_scheduler_init::deferred); 00379 // If p!=0, we are doing a parallel run 00380 if( p ) 00381 init.initialize(p); 00382 00383 Number count; 00384 tick_count t0 = tick_count::now(); 00385 if( p==0 ) { 00386 count = SerialCountPrimes(n); 00387 } else { 00388 count = ParallelCountPrimes(n); 00389 } 00390 tick_count t1 = tick_count::now(); 00391 00392 printf("#primes from [2..%lu] = %lu (%.2f sec with ", 00393 (unsigned long)n, (unsigned long)count, (t1-t0).seconds()); 00394 if( p ) 00395 printf("%lu-way parallelism)\n", p ); 00396 else 00397 printf("serial code)\n"); 00398 } 00399 if( PauseFlag ) { 00400 WaitForUser(); 00401 } 00402 return 0; 00403 }
Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.