Shadowrun: Awakened 29 September 2011 - Build 871
primes.cpp
Go to the documentation of this file.
00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     This file is part of Threading Building Blocks.
00005 
00006     Threading Building Blocks is free software; you can redistribute it
00007     and/or modify it under the terms of the GNU General Public License
00008     version 2 as published by the Free Software Foundation.
00009 
00010     Threading Building Blocks is distributed in the hope that it will be
00011     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00012     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU General Public License for more details.
00014 
00015     You should have received a copy of the GNU General Public License
00016     along with Threading Building Blocks; if not, write to the Free Software
00017     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018 
00019     As a special exception, you may use this file as part of a free software
00020     library without restriction.  Specifically, if other files instantiate
00021     templates or use macros or inline functions from this file, or you compile
00022     this file and link it with other files to produce an executable, this
00023     file does not by itself cause the resulting executable to be covered by
00024     the GNU General Public License.  This exception does not however
00025     invalidate any other reasons why the executable file might be covered by
00026     the GNU General Public License.
00027 */
00028 
00029 // Example program that computes number of prime numbers up to n, 
00030 // where n is a command line argument.  The algorithm here is a 
00031 // fairly efficient version of the sieve of Eratosthenes. 
00032 // The parallel version demonstrates how to use parallel_reduce,
00033 // and in particular how to exploit lazy splitting.
00034 
00035 #include <cassert>
00036 #include <cstdio>
00037 #include <cstring>
00038 #include <math.h>
00039 #include <cstdlib>
00040 #include <cctype>
00041 #include "tbb/parallel_reduce.h"
00042 #include "tbb/task_scheduler_init.h"
00043 #include "tbb/tick_count.h"
00044 
00045 using namespace std;
00046 using namespace tbb;
00047 
00048 typedef unsigned long Number;
00049 
00051 static bool PrintPrimes = false;
00052 
00054 static Number GrainSize = 1000;
00055 
00056 class Multiples {
00057     inline Number strike( Number start, Number limit, Number stride ) {
00058         // Hoist "my_is_composite" into register for sake of speed.
00059         bool* is_composite = my_is_composite;
00060         assert( stride>=2 );
00061         for( ;start<limit; start+=stride ) 
00062             is_composite[start] = true;
00063         return start;
00064     }
00066     bool* my_is_composite;
00067 
00069 
00071     Number* my_striker;
00072 
00074     Number* my_factor;
00075 public:
00077     Number n_factor;
00078     Number m;
00079     Multiples( Number n ) : 
00080         is_forked_copy(false) 
00081     {
00082         m = Number(sqrt(double(n)));
00083         // Round up to even
00084         m += m&1;
00085         my_is_composite = new bool[m/2];
00086         my_striker = new Number[m/2];
00087         my_factor = new Number[m/2];
00088         n_factor = 0;
00089         memset( my_is_composite, 0, m/2 );
00090         for( Number i=3; i<m; i+=2 ) {
00091             if( !my_is_composite[i/2] ) {
00092                 if( PrintPrimes )
00093                     printf("%d\n",(int)i);
00094                 my_striker[n_factor] = strike( i/2, m/2, i );
00095                 my_factor[n_factor++] = i;
00096             }
00097         }
00098     }
00099 
00101 
00102     Number find_primes_in_window( Number start, Number window_size ) {
00103         bool* is_composite = my_is_composite;
00104         memset( is_composite, 0, window_size/2 );
00105         for( size_t k=0; k<n_factor; ++k )
00106             my_striker[k] = strike( my_striker[k]-m/2, window_size/2, my_factor[k] );
00107         Number count = 0;
00108         for( Number k=0; k<window_size/2; ++k ) {
00109             if( !is_composite[k] ) {
00110                 if( PrintPrimes )
00111                     printf("%ld\n",long(start+2*k+1));
00112                 ++count;
00113             }
00114         }
00115         return count;
00116     }
00117 
00118     ~Multiples() {
00119         if( !is_forked_copy )
00120             delete[] my_factor;
00121         delete[] my_striker;
00122         delete[] my_is_composite;
00123     }
00124 
00125     //------------------------------------------------------------------------
00126     // Begin extra members required by parallel version
00127     //------------------------------------------------------------------------
00128 
00130     const bool is_forked_copy;
00131 
00132     Multiples( const Multiples& f, split ) :
00133         n_factor(f.n_factor),
00134         m(f.m),
00135         my_is_composite(NULL),
00136         my_striker(NULL),
00137         my_factor(f.my_factor),
00138         is_forked_copy(true)
00139     {}
00140 
00141     bool is_initialized() const {
00142         return my_is_composite!=NULL;
00143     }
00144 
00145     void initialize( Number start ) { 
00146         assert( start>=1 );
00147         my_is_composite = new bool[m/2];
00148         my_striker = new Number[m/2];
00149         for( size_t k=0; k<n_factor; ++k ) {
00150             Number f = my_factor[k];
00151             Number p = (start-1)/f*f % m;
00152             my_striker[k] = (p&1 ? p+2*f : p+f)/2;
00153             assert( m/2<=my_striker[k] );
00154         }
00155     }
00156     //------------------------------------------------------------------------
00157     // End extra methods required by parallel version
00158     //------------------------------------------------------------------------
00159 };
00160 
00162 
00163 Number SerialCountPrimes( Number n ) {
00164     // Two is special case
00165     Number count = n>=2;
00166     if( n>=3 ) {
00167         Multiples multiples(n);
00168         count += multiples.n_factor;
00169         if( PrintPrimes ) 
00170             printf("---\n");
00171         Number window_size = multiples.m;
00172         for( Number j=multiples.m; j<=n; j+=window_size ) { 
00173             if( j+window_size>n+1 ) 
00174                 window_size = n+1-j;
00175             count += multiples.find_primes_in_window( j, window_size );
00176         }
00177     }
00178     return count;
00179 }
00180 
00182 class SieveRange {
00184     const Number my_stride;
00185 
00187     Number my_begin;
00188 
00190     Number my_end;
00191 
00193     const Number my_grainsize;
00194 
00195     bool assert_okay() const {
00196         assert( my_begin%my_stride==0 );
00197         assert( my_begin<=my_end );
00198         assert( my_stride<=my_grainsize );
00199         return true;
00200     } 
00201 public:
00202     //------------------------------------------------------------------------
00203     // Begin signatures required by parallel_reduce
00204     //------------------------------------------------------------------------
00205     bool is_divisible() const {return my_end-my_begin>my_grainsize;}
00206     bool empty() const {return my_end<=my_begin;}
00207     SieveRange( SieveRange& r, split ) : 
00208         my_stride(r.my_stride), 
00209         my_grainsize(r.my_grainsize),
00210         my_end(r.my_end)
00211     {
00212         assert( r.is_divisible() );
00213         assert( r.assert_okay() );
00214         Number middle = r.my_begin + (r.my_end-r.my_begin+r.my_stride-1)/2;
00215         middle = middle/my_stride*my_stride;
00216         my_begin = middle;
00217         r.my_end = middle;
00218         assert( assert_okay() );
00219         assert( r.assert_okay() );
00220     }
00221     //------------------------------------------------------------------------
00222     // End of signatures required by parallel_reduce
00223     //------------------------------------------------------------------------
00224     Number begin() const {return my_begin;}
00225     Number end() const {return my_end;}
00226     SieveRange( Number begin, Number end, Number stride, Number grainsize ) :
00227         my_begin(begin),
00228         my_end(end),
00229         my_stride(stride),      
00230         my_grainsize(grainsize<stride?stride:grainsize)
00231     {
00232         assert( assert_okay() );
00233     }
00234 };
00235 
00237 
00239 class Sieve {
00240 public:
00242     Multiples multiples;
00243 
00245     Number count;
00246 
00248     Sieve( Number n ) : 
00249         multiples(n),
00250         count(0)
00251     {}
00252 
00253     //------------------------------------------------------------------------
00254     // Begin signatures required by parallel_reduce
00255     //------------------------------------------------------------------------
00256     void operator()( const SieveRange& r ) {
00257         Number m = multiples.m;
00258         if( multiples.is_initialized() ) { 
00259             // Simply reuse "multiples" structure from previous window
00260             // This works because parallel_reduce always applies
00261             // *this from left to right.
00262         } else {
00263             // Need to initialize "multiples" because *this is a forked copy
00264             // that needs to be set up to start at r.begin().
00265             multiples.initialize( r.begin() );
00266         }
00267         Number window_size = m;
00268         for( Number j=r.begin(); j<r.end(); j+=window_size ) { 
00269             assert( j%multiples.m==0 );
00270             if( j+window_size>r.end() ) 
00271                 window_size = r.end()-j;
00272             count += multiples.find_primes_in_window( j, window_size );
00273         }
00274     }
00275     void join( Sieve& other ) {
00276         count += other.count;
00277     }
00278     Sieve( Sieve& other, split ) : 
00279         multiples(other.multiples,split()),
00280         count(0)
00281     {}
00282     //------------------------------------------------------------------------
00283     // End of signatures required by parallel_reduce
00284     //------------------------------------------------------------------------
00285 };
00286 
00288 
00289 Number ParallelCountPrimes( Number n ) {
00290     // Two is special case
00291     Number count = n>=2;
00292     if( n>=3 ) {
00293         Sieve s(n);
00294         count += s.multiples.n_factor;
00295         if( PrintPrimes ) 
00296             printf("---\n");
00297         // Explicit grain size and simple_partitioner() used here instead of automatic grainsize 
00298         // determination becase we want SieveRange to be decomposed down to GrainSize or smaller.  
00299         // Doing so improves odds that the working set fits in cache when evaluating Sieve::operator().
00300         parallel_reduce( SieveRange( s.multiples.m, n, s.multiples.m, GrainSize ), s, simple_partitioner() );
00301         count += s.count;
00302     }
00303     return count;
00304 }
00305 
00306 //------------------------------------------------------------------------
00307 // Code below this line constitutes the driver that calls SerialCountPrimes
00308 // and ParallelCountPrimes.
00309 //------------------------------------------------------------------------
00310 
00312 struct NumberRange {
00313     Number low;
00314     Number high;
00315     void set_from_string( const char* s );
00316     NumberRange( Number low_, Number high_ ) : low(low_), high(high_) {}
00317 };
00318 
00319 void NumberRange::set_from_string( const char* s ) {
00320     char* end;
00321     high = low = strtol(s,&end,0);
00322     switch( *end ) {
00323     case ':': 
00324         high = strtol(end+1,0,0); 
00325         break;
00326     case '\0':
00327         break;
00328     default:
00329         printf("unexpected character = %c\n",*end);
00330     }
00331     
00332 }
00333 
00335 static NumberRange NThread(0,4);
00336 
00338 static bool PauseFlag = false;
00339 
00341 static Number ParseCommandLine( int argc, char* argv[] ) {
00342     Number n = 100000000;
00343     int i = 1;
00344     if( i<argc && strcmp( argv[i], "pause" )==0 ) {
00345         PauseFlag = true;
00346         ++i;
00347     }
00348     if( i<argc && !isdigit(argv[i][0]) ) { 
00349         // Command line is garbled.
00350         fprintf(stderr,"Usage: %s [['pause'] n [nthread [grainsize]]]\n", argv[0]);
00351         fprintf(stderr,"where n is a positive integer [%lu]\n",n);
00352         fprintf(stderr,"      nthread is a non-negative integer, or range of the form low:high [%ld:%lu]\n",NThread.low,NThread.high);
00353         fprintf(stderr,"      grainsize is an optional postive integer [%lu]\n",GrainSize);
00354         exit(1);
00355     }
00356     if( i<argc )
00357         n = strtol(argv[i++],0,0);
00358     if( i<argc )
00359         NThread.set_from_string(argv[i++]);
00360     if( i<argc )
00361         GrainSize = strtol(argv[i++],0,0);
00362     return n;
00363 }
00364 
00365 static void WaitForUser() {
00366     char c;
00367     printf("Press return to continue\n");
00368     do {
00369         c = getchar();
00370     } while( c!='\n' );
00371 }
00372 
00373 int main( int argc, char* argv[] ) {
00374     Number n = ParseCommandLine(argc,argv);
00375 
00376     // Try different numbers of threads
00377     for( Number p=NThread.low; p<=NThread.high; ++p ) {
00378         task_scheduler_init init(task_scheduler_init::deferred);
00379         // If p!=0, we are doing a parallel run
00380         if( p ) 
00381             init.initialize(p);
00382 
00383         Number count;
00384         tick_count t0 = tick_count::now();
00385         if( p==0 ) {
00386             count = SerialCountPrimes(n);
00387         } else {
00388             count = ParallelCountPrimes(n);
00389         }
00390         tick_count t1 = tick_count::now();
00391 
00392         printf("#primes from [2..%lu] = %lu (%.2f sec with ",
00393             (unsigned long)n, (unsigned long)count, (t1-t0).seconds());
00394         if( p ) 
00395             printf("%lu-way parallelism)\n", p );
00396         else 
00397             printf("serial code)\n");
00398     }
00399     if( PauseFlag ) {
00400         WaitForUser();
00401     }
00402     return 0;
00403 }

Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.

GNU Lesser General Public License 3 Sourceforge.net