Shadowrun: Awakened 29 September 2011 - Build 871
parallel_scan.h
Go to the documentation of this file.
00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     This file is part of Threading Building Blocks.
00005 
00006     Threading Building Blocks is free software; you can redistribute it
00007     and/or modify it under the terms of the GNU General Public License
00008     version 2 as published by the Free Software Foundation.
00009 
00010     Threading Building Blocks is distributed in the hope that it will be
00011     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00012     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU General Public License for more details.
00014 
00015     You should have received a copy of the GNU General Public License
00016     along with Threading Building Blocks; if not, write to the Free Software
00017     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018 
00019     As a special exception, you may use this file as part of a free software
00020     library without restriction.  Specifically, if other files instantiate
00021     templates or use macros or inline functions from this file, or you compile
00022     this file and link it with other files to produce an executable, this
00023     file does not by itself cause the resulting executable to be covered by
00024     the GNU General Public License.  This exception does not however
00025     invalidate any other reasons why the executable file might be covered by
00026     the GNU General Public License.
00027 */
00028 
00029 #ifndef __TBB_parallel_scan_H
00030 #define __TBB_parallel_scan_H
00031 
00032 #include "task.h"
00033 #include "aligned_space.h"
00034 #include <new>
00035 #include "partitioner.h"
00036 
00037 namespace tbb {
00038 
00040 
00041 struct pre_scan_tag {
00042     static bool is_final_scan() {return false;}
00043 };
00044 
00046 
00047 struct final_scan_tag {
00048     static bool is_final_scan() {return true;}
00049 };
00050 
00052 namespace internal {
00053 
00055 
00056     template<typename Range, typename Body>
00057     class final_sum: public task {
00058     public:
00059         Body body;
00060     private:
00061         aligned_space<Range,1> range;
00063         Body* stuff_last;
00064     public:
00065         final_sum( Body& body_ ) :
00066             body(body_,split())
00067         {
00068             poison_pointer(stuff_last);
00069         }
00070         ~final_sum() {
00071             range.begin()->~Range();
00072         }     
00073         void finish_construction( const Range& range_, Body* stuff_last_ ) {
00074             new( range.begin() ) Range(range_);
00075             stuff_last = stuff_last_;
00076         }
00077     private:
00078         /*override*/ task* execute() {
00079             body( *range.begin(), final_scan_tag() );
00080             if( stuff_last )
00081                 stuff_last->assign(body);
00082             return NULL;
00083         }
00084     };       
00085 
00087 
00088     template<typename Range, typename Body>
00089     class sum_node: public task {
00090         typedef final_sum<Range,Body> final_sum_type;
00091     public:
00092         final_sum_type *incoming; 
00093         final_sum_type *body;
00094         Body *stuff_last;
00095     private:
00096         final_sum_type *left_sum;
00097         sum_node *left;
00098         sum_node *right;     
00099         bool left_is_final;
00100         Range range;
00101         sum_node( const Range range_, bool left_is_final_ ) : 
00102             left_sum(NULL), 
00103             left(NULL), 
00104             right(NULL), 
00105             left_is_final(left_is_final_), 
00106             range(range_)
00107         {
00108             // Poison fields that will be set by second pass.
00109             poison_pointer(body);
00110             poison_pointer(incoming);
00111         }
00112         task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
00113             if( !n ) {
00114                 f.recycle_as_child_of( *this );
00115                 f.finish_construction( range_, stuff_last_ );
00116                 return &f;
00117             } else {
00118                 n->body = &f;
00119                 n->incoming = incoming_;
00120                 n->stuff_last = stuff_last_;
00121                 return n;
00122             }
00123         }
00124         /*override*/ task* execute() {
00125             if( body ) {
00126                 if( incoming )
00127                     left_sum->body.reverse_join( incoming->body );
00128                 recycle_as_continuation();
00129                 sum_node& c = *this;
00130                 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
00131                 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
00132                 set_ref_count( (a!=NULL)+(b!=NULL) );
00133                 body = NULL; 
00134                 if( a ) spawn(*b);
00135                 else a = b;
00136                 return a;
00137             } else {
00138                 return NULL;
00139             }
00140         }
00141         template<typename Range_,typename Body_,typename Partitioner_>
00142         friend class start_scan;
00143 
00144         template<typename Range_,typename Body_>
00145         friend class finish_scan;
00146     };
00147 
00149 
00150     template<typename Range, typename Body>
00151     class finish_scan: public task {
00152         typedef sum_node<Range,Body> sum_node_type;
00153         typedef final_sum<Range,Body> final_sum_type;
00154         final_sum_type** const sum;
00155         sum_node_type*& return_slot;
00156     public:
00157         final_sum_type* right_zombie;
00158         sum_node_type& result;
00159 
00160         /*override*/ task* execute() {
00161             __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
00162             if( result.left )
00163                 result.left_is_final = false;
00164             if( right_zombie && sum ) 
00165                 ((*sum)->body).reverse_join(result.left_sum->body);
00166             __TBB_ASSERT( !return_slot, NULL );
00167             if( right_zombie || result.right ) {
00168                 return_slot = &result;
00169             } else {
00170                 destroy( result );
00171             }
00172             if( right_zombie && !sum && !result.right ) destroy(*right_zombie);
00173             return NULL;
00174         }
00175 
00176         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
00177             sum(sum_),
00178             return_slot(return_slot_), 
00179             right_zombie(NULL),
00180             result(result_)
00181         {
00182             __TBB_ASSERT( !return_slot, NULL );
00183         }
00184     };
00185 
00187 
00188     template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00189     class start_scan: public task {
00190         typedef sum_node<Range,Body> sum_node_type;
00191         typedef final_sum<Range,Body> final_sum_type;
00192         final_sum_type* body;
00194         final_sum_type** sum; 
00195         sum_node_type** return_slot;
00197         sum_node_type* parent_sum;
00198         bool is_final;
00199         bool is_right_child;
00200         Range range;
00201         typename Partitioner::partition_type partition;
00202         /*override*/ task* execute();
00203     public:
00204         start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
00205             body(parent_.body),
00206             sum(parent_.sum),
00207             return_slot(&return_slot_),
00208             parent_sum(parent_sum_),
00209             is_final(parent_.is_final),
00210             is_right_child(false),
00211             range(parent_.range,split()),
00212             partition(parent_.partition,split())
00213         {
00214             __TBB_ASSERT( !*return_slot, NULL );
00215         }
00216 
00217         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
00218             body(&body_),
00219             sum(NULL),
00220             return_slot(&return_slot_),
00221             parent_sum(NULL),
00222             is_final(true),
00223             is_right_child(false),
00224             range(range_),
00225             partition(partitioner_)
00226         {
00227             __TBB_ASSERT( !*return_slot, NULL );
00228         }
00229 
00230         static void run(  const Range& range, Body& body, const Partitioner& partitioner ) {
00231             if( !range.empty() ) {
00232                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00233                 internal::sum_node<Range,Body>* root = NULL;
00234                 typedef internal::final_sum<Range,Body> final_sum_type;
00235                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
00236                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00237                     /*return_slot=*/root,
00238                     range,
00239                     *temp_body,
00240                     partitioner );
00241                 task::spawn_root_and_wait( pass1 );
00242                 if( root ) {
00243                     root->body = temp_body;
00244                     root->incoming = NULL;
00245                     root->stuff_last = &body;
00246                     task::spawn_root_and_wait( *root );
00247                 } else {
00248                     body.assign(temp_body->body);
00249                     temp_body->finish_construction( range, NULL );
00250                     temp_body->destroy(*temp_body);
00251                 }
00252             }
00253         }
00254     };
00255 
00256     template<typename Range, typename Body, typename Partitioner>
00257     task* start_scan<Range,Body,Partitioner>::execute() {
00258         typedef internal::finish_scan<Range,Body> finish_pass1_type;
00259         finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00260         // Inspecting p->result.left_sum would ordinarily be a race condition.
00261         // But we inspect it only if we are not a stolen task, in which case we
00262         // know that task assigning to p->result.left_sum has completed.
00263         bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
00264         if( treat_as_stolen ) {
00265             // Invocation is for right child that has been really stolen or needs to be virtually stolen
00266             p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
00267             is_final = false;
00268         }
00269         task* next_task = NULL;
00270         if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
00271             if( is_final )
00272                 (body->body)( range, final_scan_tag() );
00273             else if( sum )
00274                 (body->body)( range, pre_scan_tag() );
00275             if( sum ) 
00276                 *sum = body;
00277             __TBB_ASSERT( !*return_slot, NULL );
00278         } else {
00279             sum_node_type* result;
00280             if( parent_sum ) 
00281                 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
00282             else
00283                 result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
00284             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
00285             // Split off right child
00286             start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result );
00287             b.is_right_child = true;    
00288             // Left child is recycling of *this.  Must recycle this before spawning b, 
00289             // otherwise b might complete and decrement c.ref_count() to zero, which
00290             // would cause c.execute() to run prematurely.
00291             recycle_as_child_of(c);
00292             c.set_ref_count(2);
00293             c.spawn(b);
00294             sum = &result->left_sum;
00295             return_slot = &result->left;
00296             is_right_child = false;
00297             next_task = this;
00298             parent_sum = result; 
00299             __TBB_ASSERT( !*return_slot, NULL );
00300         }
00301         return next_task;
00302     } 
00303 } // namespace internal
00305 
00306 // Requirements on Range concept are documented in blocked_range.h
00307 
00325 
00327 
00328 template<typename Range, typename Body>
00329 void parallel_scan( const Range& range, Body& body ) {
00330     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
00331 }
00332 
00334 
00335 template<typename Range, typename Body>
00336 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00337     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
00338 }
00339 
00341 
00342 template<typename Range, typename Body>
00343 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00344     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
00345 }
00347 
00348 } // namespace tbb
00349 
00350 #endif /* __TBB_parallel_scan_H */
00351 

Copyright © 2007-2010 by The Shadowrun: Awakened Team. This work is licensed under the GNU Lesser General Public License 3.

GNU Lesser General Public License 3 Sourceforge.net