Shadowrun: Awakened 29 September 2011 - Build 871
main.cpp
Go to the documentation of this file.
00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     This file is part of Threading Building Blocks.
00005 
00006     Threading Building Blocks is free software; you can redistribute it
00007     and/or modify it under the terms of the GNU General Public License
00008     version 2 as published by the Free Software Foundation.
00009 
00010     Threading Building Blocks is distributed in the hope that it will be
00011     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00012     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU General Public License for more details.
00014 
00015     You should have received a copy of the GNU General Public License
00016     along with Threading Building Blocks; if not, write to the Free Software
00017     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018 
00019     As a special exception, you may use this file as part of a free software
00020     library without restriction.  Specifically, if other files instantiate
00021     templates or use macros or inline functions from this file, or you compile
00022     this file and link it with other files to produce an executable, this
00023     file does not by itself cause the resulting executable to be covered by
00024     the GNU General Public License.  This exception does not however
00025     invalidate any other reasons why the executable file might be covered by
00026     the GNU General Public License.
00027 */
00028 
00029 #include "common.h"
00030 #include "tbb/tick_count.h"
00031 #include "tbb/task.h"
00032 #include "tbb/task_scheduler_init.h"
00033 #include <cstdlib>
00034 #include <cstdio>
00035 #include <cstring>
00036 
00037 // The performance of this example can be significantly better when
00038 // the objects are allocated by the scalable_allocator instead of the
00039 // default "operator new".  The reason is that the scalable_allocator
00040 // typically packs small objects more tightly than the default "operator new",
00041 // resulting in a smaller memory footprint, and thus more efficient use of
00042 // cache and virtual memory.  Also the scalable_allocator works faster for
00043 // multi-threaded allocations.
00044 //
00045 // Pass -stdmalloc as the 1st command line parameter to use the default "operator new"
00046 // and see the performance difference.
00047 
00048 #include "tbb/scalable_allocator.h"
00049 
00050 using namespace std;
00051 
00052 static double Pi = 3.14159265358979;
00053 
00054 const bool tbbmalloc = true;
00055 const bool stdmalloc = false;
00056 
00057 template<bool use_tbbmalloc>
00058 class TreeMaker {
00059 
00060     class SubTreeCreationTask: public tbb::task {
00061         TreeNode*& my_root;
00062         bool is_continuation;
00063         typedef TreeMaker<use_tbbmalloc> MyTreeMaker;
00064 
00065     public:
00066         SubTreeCreationTask( TreeNode*& root, long number_of_nodes ) : my_root(root), is_continuation(false) {
00067             my_root = MyTreeMaker::allocate_node();
00068             my_root->node_count = number_of_nodes;
00069             my_root->value = Value(Pi*number_of_nodes);
00070         }
00071 
00072         tbb::task* execute() {
00073             tbb::task* next = NULL;
00074             if( !is_continuation ) {
00075                 long subtree_size = my_root->node_count - 1;
00076                 if( subtree_size<1000 ) { /* grainsize */
00077                     my_root->left  = MyTreeMaker::do_in_one_thread(subtree_size/2);
00078                     my_root->right = MyTreeMaker::do_in_one_thread(subtree_size - subtree_size/2);
00079                 } else {
00080                     // Create tasks before spawning any of them.
00081                     tbb::task* a = new( allocate_child() ) SubTreeCreationTask(my_root->left,subtree_size/2);
00082                     tbb::task* b = new( allocate_child() ) SubTreeCreationTask(my_root->right,subtree_size - subtree_size/2);
00083                     recycle_as_continuation();
00084                     is_continuation = true;
00085                     set_ref_count(2);
00086                     spawn(*b);
00087                     next = a;
00088                 }
00089             } 
00090             return next;
00091         }
00092     };
00093 
00094 public:
00095     static TreeNode* allocate_node() {
00096         return use_tbbmalloc? tbb::scalable_allocator<TreeNode>().allocate(1) : new TreeNode;
00097     }
00098 
00099     static TreeNode* do_in_one_thread( long number_of_nodes ) {
00100         if( number_of_nodes==0 ) {
00101             return NULL;
00102         } else {
00103             TreeNode* n = allocate_node();
00104             n->node_count = number_of_nodes;
00105             n->value = Value(Pi*number_of_nodes);
00106             --number_of_nodes;
00107             n->left  = do_in_one_thread( number_of_nodes/2 ); 
00108             n->right = do_in_one_thread( number_of_nodes - number_of_nodes/2 );
00109             return n;
00110         }
00111     }
00112 
00113     static TreeNode* do_in_parallel( long number_of_nodes ) {
00114         TreeNode* root_node;
00115         SubTreeCreationTask& a = *new(tbb::task::allocate_root()) SubTreeCreationTask(root_node, number_of_nodes);
00116         tbb::task::spawn_root_and_wait(a);
00117         return root_node;
00118     }
00119 
00120     static TreeNode* create_and_time( long number_of_nodes ) {
00121         tbb::tick_count t0, t1;
00122         TreeNode* root = allocate_node();
00123         root->node_count = number_of_nodes;
00124         root->value = Value(Pi*number_of_nodes);
00125         --number_of_nodes;
00126 
00127         t0 = tbb::tick_count::now();
00128         root->left  = do_in_one_thread( number_of_nodes/2 );
00129         t1 = tbb::tick_count::now();
00130         printf ("%24s: time = %.1f msec\n", "half created serially", (t1-t0).seconds()*1000);
00131 
00132         t0 = tbb::tick_count::now();
00133         root->right = do_in_parallel( number_of_nodes - number_of_nodes/2 );
00134         t1 = tbb::tick_count::now();
00135         printf ("%24s: time = %.1f msec\n", "half done in parallel", (t1-t0).seconds()*1000);
00136 
00137         return root;
00138     }
00139 };
00140 
00141 int main( int argc, char *argv[] ) {
00142     // Parse command line parameters
00143     // The format is: <exe_name> [-stdmalloc] [num_of_nodes [num_of_threads]]
00144     bool use_tbbmalloc = true;
00145     int arg_idx = 1;
00146     if( argc>1 && strcmp(argv[1], "-stdmalloc")==0 ) {
00147         use_tbbmalloc = false;
00148         arg_idx = 2;
00149     }
00150     long number_of_nodes = argc>arg_idx ? strtol(argv[arg_idx],0,0) : 10000000;
00151     ++arg_idx;
00152     int nthread = argc>arg_idx ? strtol(argv[arg_idx],0,0) : tbb::task_scheduler_init::automatic;
00153 
00154     // Start up scheduler
00155     // For production, no argument should be provided to the constructor, so that
00156     // the application gets the number of threads that are physically available.
00157     tbb::task_scheduler_init init(nthread);
00158 
00159     TreeNode* root;
00160     if( use_tbbmalloc ) {
00161         printf("Tree creation using TBB scalable allocator\n");
00162         root = TreeMaker<tbbmalloc>::create_and_time( number_of_nodes );
00163     } else {
00164         printf("Tree creation using standard operator new\n");
00165         root = TreeMaker<stdmalloc>::create_and_time( number_of_nodes );
00166     }
00167 
00168     // Warm up caches
00169     SerialSumTree(root);
00170     printf("Calculations:\n");
00171     const char* which;
00172     for( int i=0; i<3; ++i ) {
00173         tbb::tick_count t0 = tbb::tick_count::now();
00174         Value result;
00175         switch( i ) {
00176             case 0: 
00177                 which = "SerialSumTree";
00178                 result = SerialSumTree(root); 
00179                 break;
00180             case 1: 
00181                 which = "SimpleParallelSumTree";
00182                 result = SimpleParallelSumTree(root); 
00183                 break;
00184             case 2: 
00185                 which = "OptimizedParallelSumTree";
00186                 result = OptimizedParallelSumTree(root); 
00187                 break;
00188         }
00189         tbb::tick_count t1 = tbb::tick_count::now();
00190         printf ("%24s: time = %.1f msec, sum=%g\n", which, (t1-t0).seconds()*1000, result);
00191     }
00192     return 0;
00193 }

Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.

GNU Lesser General Public License 3 Sourceforge.net