![]() |
Shadowrun: Awakened 29 September 2011 - Build 871
|
00001 /* 00002 Copyright 2005-2010 Intel Corporation. All Rights Reserved. 00003 00004 This file is part of Threading Building Blocks. 00005 00006 Threading Building Blocks is free software; you can redistribute it 00007 and/or modify it under the terms of the GNU General Public License 00008 version 2 as published by the Free Software Foundation. 00009 00010 Threading Building Blocks is distributed in the hope that it will be 00011 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00012 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU General Public License 00016 along with Threading Building Blocks; if not, write to the Free Software 00017 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00018 00019 As a special exception, you may use this file as part of a free software 00020 library without restriction. Specifically, if other files instantiate 00021 templates or use macros or inline functions from this file, or you compile 00022 this file and link it with other files to produce an executable, this 00023 file does not by itself cause the resulting executable to be covered by 00024 the GNU General Public License. This exception does not however 00025 invalidate any other reasons why the executable file might be covered by 00026 the GNU General Public License. 00027 */ 00028 00029 #include "common.h" 00030 #include "tbb/tick_count.h" 00031 #include "tbb/task.h" 00032 #include "tbb/task_scheduler_init.h" 00033 #include <cstdlib> 00034 #include <cstdio> 00035 #include <cstring> 00036 00037 // The performance of this example can be significantly better when 00038 // the objects are allocated by the scalable_allocator instead of the 00039 // default "operator new". The reason is that the scalable_allocator 00040 // typically packs small objects more tightly than the default "operator new", 00041 // resulting in a smaller memory footprint, and thus more efficient use of 00042 // cache and virtual memory. Also the scalable_allocator works faster for 00043 // multi-threaded allocations. 00044 // 00045 // Pass -stdmalloc as the 1st command line parameter to use the default "operator new" 00046 // and see the performance difference. 00047 00048 #include "tbb/scalable_allocator.h" 00049 00050 using namespace std; 00051 00052 static double Pi = 3.14159265358979; 00053 00054 const bool tbbmalloc = true; 00055 const bool stdmalloc = false; 00056 00057 template<bool use_tbbmalloc> 00058 class TreeMaker { 00059 00060 class SubTreeCreationTask: public tbb::task { 00061 TreeNode*& my_root; 00062 bool is_continuation; 00063 typedef TreeMaker<use_tbbmalloc> MyTreeMaker; 00064 00065 public: 00066 SubTreeCreationTask( TreeNode*& root, long number_of_nodes ) : my_root(root), is_continuation(false) { 00067 my_root = MyTreeMaker::allocate_node(); 00068 my_root->node_count = number_of_nodes; 00069 my_root->value = Value(Pi*number_of_nodes); 00070 } 00071 00072 tbb::task* execute() { 00073 tbb::task* next = NULL; 00074 if( !is_continuation ) { 00075 long subtree_size = my_root->node_count - 1; 00076 if( subtree_size<1000 ) { /* grainsize */ 00077 my_root->left = MyTreeMaker::do_in_one_thread(subtree_size/2); 00078 my_root->right = MyTreeMaker::do_in_one_thread(subtree_size - subtree_size/2); 00079 } else { 00080 // Create tasks before spawning any of them. 00081 tbb::task* a = new( allocate_child() ) SubTreeCreationTask(my_root->left,subtree_size/2); 00082 tbb::task* b = new( allocate_child() ) SubTreeCreationTask(my_root->right,subtree_size - subtree_size/2); 00083 recycle_as_continuation(); 00084 is_continuation = true; 00085 set_ref_count(2); 00086 spawn(*b); 00087 next = a; 00088 } 00089 } 00090 return next; 00091 } 00092 }; 00093 00094 public: 00095 static TreeNode* allocate_node() { 00096 return use_tbbmalloc? tbb::scalable_allocator<TreeNode>().allocate(1) : new TreeNode; 00097 } 00098 00099 static TreeNode* do_in_one_thread( long number_of_nodes ) { 00100 if( number_of_nodes==0 ) { 00101 return NULL; 00102 } else { 00103 TreeNode* n = allocate_node(); 00104 n->node_count = number_of_nodes; 00105 n->value = Value(Pi*number_of_nodes); 00106 --number_of_nodes; 00107 n->left = do_in_one_thread( number_of_nodes/2 ); 00108 n->right = do_in_one_thread( number_of_nodes - number_of_nodes/2 ); 00109 return n; 00110 } 00111 } 00112 00113 static TreeNode* do_in_parallel( long number_of_nodes ) { 00114 TreeNode* root_node; 00115 SubTreeCreationTask& a = *new(tbb::task::allocate_root()) SubTreeCreationTask(root_node, number_of_nodes); 00116 tbb::task::spawn_root_and_wait(a); 00117 return root_node; 00118 } 00119 00120 static TreeNode* create_and_time( long number_of_nodes ) { 00121 tbb::tick_count t0, t1; 00122 TreeNode* root = allocate_node(); 00123 root->node_count = number_of_nodes; 00124 root->value = Value(Pi*number_of_nodes); 00125 --number_of_nodes; 00126 00127 t0 = tbb::tick_count::now(); 00128 root->left = do_in_one_thread( number_of_nodes/2 ); 00129 t1 = tbb::tick_count::now(); 00130 printf ("%24s: time = %.1f msec\n", "half created serially", (t1-t0).seconds()*1000); 00131 00132 t0 = tbb::tick_count::now(); 00133 root->right = do_in_parallel( number_of_nodes - number_of_nodes/2 ); 00134 t1 = tbb::tick_count::now(); 00135 printf ("%24s: time = %.1f msec\n", "half done in parallel", (t1-t0).seconds()*1000); 00136 00137 return root; 00138 } 00139 }; 00140 00141 int main( int argc, char *argv[] ) { 00142 // Parse command line parameters 00143 // The format is: <exe_name> [-stdmalloc] [num_of_nodes [num_of_threads]] 00144 bool use_tbbmalloc = true; 00145 int arg_idx = 1; 00146 if( argc>1 && strcmp(argv[1], "-stdmalloc")==0 ) { 00147 use_tbbmalloc = false; 00148 arg_idx = 2; 00149 } 00150 long number_of_nodes = argc>arg_idx ? strtol(argv[arg_idx],0,0) : 10000000; 00151 ++arg_idx; 00152 int nthread = argc>arg_idx ? strtol(argv[arg_idx],0,0) : tbb::task_scheduler_init::automatic; 00153 00154 // Start up scheduler 00155 // For production, no argument should be provided to the constructor, so that 00156 // the application gets the number of threads that are physically available. 00157 tbb::task_scheduler_init init(nthread); 00158 00159 TreeNode* root; 00160 if( use_tbbmalloc ) { 00161 printf("Tree creation using TBB scalable allocator\n"); 00162 root = TreeMaker<tbbmalloc>::create_and_time( number_of_nodes ); 00163 } else { 00164 printf("Tree creation using standard operator new\n"); 00165 root = TreeMaker<stdmalloc>::create_and_time( number_of_nodes ); 00166 } 00167 00168 // Warm up caches 00169 SerialSumTree(root); 00170 printf("Calculations:\n"); 00171 const char* which; 00172 for( int i=0; i<3; ++i ) { 00173 tbb::tick_count t0 = tbb::tick_count::now(); 00174 Value result; 00175 switch( i ) { 00176 case 0: 00177 which = "SerialSumTree"; 00178 result = SerialSumTree(root); 00179 break; 00180 case 1: 00181 which = "SimpleParallelSumTree"; 00182 result = SimpleParallelSumTree(root); 00183 break; 00184 case 2: 00185 which = "OptimizedParallelSumTree"; 00186 result = OptimizedParallelSumTree(root); 00187 break; 00188 } 00189 tbb::tick_count t1 = tbb::tick_count::now(); 00190 printf ("%24s: time = %.1f msec, sum=%g\n", which, (t1-t0).seconds()*1000, result); 00191 } 00192 return 0; 00193 }
Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.