MeshLib C++ Docs
Loading...
Searching...
No Matches
MRBitSetParallelFor.h
Go to the documentation of this file.
1#pragma once
2
3#include "MRBitSet.h"
4#include "MRParallel.h"
6#include "MRTbbThreadMutex.h"
7
8#include <atomic>
9#include <cassert>
10
11namespace MR
12{
13
16
18template <typename Id>
19struct IdRange
20{
22 auto size() const { return end - beg; }
23};
24
25namespace BitSetParallel
26{
27
28template <typename IndexType>
30{
31 const size_t beginBlock = bitRange.beg / BitSet::bits_per_block;
32 const size_t endBlock = ( size_t( bitRange.end ) + BitSet::bits_per_block - 1 ) / BitSet::bits_per_block;
33 return tbb::blocked_range<size_t>( beginBlock, endBlock );
34}
35
36template <typename BS>
37inline auto blockRange( const BS & bs )
38{
39 const size_t endBlock = ( bs.size() + BS::bits_per_block - 1 ) / BS::bits_per_block;
40 return tbb::blocked_range<size_t>( 0, endBlock );
41}
42
43template <typename BS>
44inline auto bitRange( const BS & bs )
45{
46 return IdRange<typename BS::IndexType>{ bs.beginId(), bs.endId() };
47}
48
49template <typename IndexType>
50auto bitSubRange( const IdRange<IndexType> & bitRange, const tbb::blocked_range<size_t> & range, const tbb::blocked_range<size_t> & subRange )
51{
53 {
54 .beg = subRange.begin() > range.begin() ? IndexType( subRange.begin() * BitSet::bits_per_block ) : bitRange.beg,
55 .end = subRange.end() < range.end() ? IndexType( subRange.end() * BitSet::bits_per_block ) : bitRange.end
56 };
57}
58
59template <typename IndexType, typename CM, typename F>
60void ForAllRanged( const IdRange<IndexType> & bitRange, const CM & callMaker, F && f )
61{
62 const auto range = BitSetParallel::blockRange( bitRange );
63 tbb::parallel_for( range, [&]( const tbb::blocked_range<size_t> & subRange )
64 {
65 auto c = callMaker();
66 const auto bitSubRange = BitSetParallel::bitSubRange( bitRange, range, subRange );
67 for ( auto id = bitSubRange.beg; id < bitSubRange.end; ++id )
68 c( f, id, bitSubRange );
69 } );
70}
71
72template <typename BS, typename CM, typename F>
73inline void ForAllRanged( const BS & bs, const CM & callMaker, F && f )
74{
75 ForAllRanged( bitRange( bs ), callMaker, std::forward<F>( f ) );
76}
77
78template <typename IndexType, typename CM, typename F>
79bool ForAllRanged( const IdRange<IndexType> & bitRange, const CM & callMaker, F && f, ProgressCallback progressCb, size_t reportProgressEveryBit = 1024 )
80{
81 if ( !progressCb )
82 {
83 ForAllRanged( bitRange, callMaker, std::forward<F>( f ) );
84 return true;
85 }
86
87 TbbThreadMutex callingThreadMutex;
88 std::atomic<bool> keepGoing{ true };
89
90 // avoid false sharing with other local variables
91 // by putting processedBits in its own cache line
92 constexpr int hardware_destructive_interference_size = 64;
93 struct alignas( hardware_destructive_interference_size ) S
94 {
95 std::atomic<size_t> processedBits{ 0 };
96 } s;
97 static_assert( alignof( S ) == hardware_destructive_interference_size );
98 static_assert( sizeof( S ) == hardware_destructive_interference_size );
99
100 const auto range = BitSetParallel::blockRange( bitRange );
101 tbb::parallel_for( range, [&] ( const tbb::blocked_range<size_t>& subRange )
102 {
103 const auto bitSubRange = BitSetParallel::bitSubRange( bitRange, range, subRange );
104 size_t myProcessedBits = 0;
105 const auto callingThreadLock = callingThreadMutex.tryLock();
106 const bool report = progressCb && callingThreadLock;
107 auto c = callMaker();
108 for ( auto id = bitSubRange.beg; id < bitSubRange.end; ++id )
109 {
110 if ( !keepGoing.load( std::memory_order_relaxed ) )
111 break;
112 c( f, id, bitSubRange );
113 if ( ( ++myProcessedBits % reportProgressEveryBit ) == 0 )
114 {
115 if ( report )
116 {
117 if ( !progressCb( float( myProcessedBits + s.processedBits.load( std::memory_order_relaxed ) ) / bitRange.size() ) )
118 keepGoing.store( false, std::memory_order_relaxed );
119 }
120 else
121 {
122 s.processedBits.fetch_add( myProcessedBits, std::memory_order_relaxed );
123 myProcessedBits = 0;
124 }
125 }
126 }
127 const auto total = myProcessedBits + s.processedBits.fetch_add( myProcessedBits, std::memory_order_relaxed );
128 if ( report && !progressCb( float( total ) / bitRange.size() ) )
129 keepGoing.store( false, std::memory_order_relaxed );
130 } );
131 return keepGoing.load( std::memory_order_relaxed );
132}
133
134template <typename BS, typename CM, typename F>
135inline bool ForAllRanged( const BS & bs, const CM & callMaker, F && f, ProgressCallback progressCb, size_t reportProgressEveryBit = 1024 )
136{
137 return ForAllRanged( bitRange( bs ), callMaker, std::forward<F>( f ), progressCb, reportProgressEveryBit );
138}
139
140} // namespace BitSetParallel
141
147template <typename BS, typename ...F>
148inline auto BitSetParallelForAllRanged( const BS & bs, F &&... f )
149{
150 return BitSetParallel::ForAllRanged( bs, Parallel::CallSimplyMaker{}, std::forward<F>( f )... );
151}
152
159template <typename BS, typename L, typename ...F>
160inline auto BitSetParallelForAllRanged( const BS & bs, tbb::enumerable_thread_specific<L> & e, F &&... f )
161{
162 return BitSetParallel::ForAllRanged( bs, Parallel::CallWithTLSMaker<L>{ e }, std::forward<F>( f )... );
163}
164
169template <typename BS, typename F, typename ...Cb>
170inline auto BitSetParallelForAll( const BS & bs, F && f, Cb&&... cb )
171{
172 return BitSetParallel::ForAllRanged( bs, Parallel::CallSimplyMaker{}, [&f]( auto bit, auto && ) { f( bit ); }, std::forward<Cb>( cb )... );
173}
174
180template <typename BS, typename L, typename F, typename ...Cb>
181inline auto BitSetParallelForAll( const BS & bs, tbb::enumerable_thread_specific<L> & e, F && f, Cb&&... cb )
182{
183 return BitSetParallel::ForAllRanged( bs, Parallel::CallWithTLSMaker<L>{ e }, [&f]( auto bit, auto &&, auto & tls ) { f( bit, tls ); }, std::forward<Cb>( cb )... );
184}
185
190template <typename BS, typename F, typename ...Cb>
191inline auto BitSetParallelFor( const BS& bs, F && f, Cb&&... cb )
192{
193 return BitSetParallelForAll( bs, [&]( auto bit ) { if ( bs.test( bit ) ) f( bit ); }, std::forward<Cb>( cb )... );
194}
195
201template <typename BS, typename L, typename F, typename ...Cb>
202inline auto BitSetParallelFor( const BS& bs, tbb::enumerable_thread_specific<L> & e, F && f, Cb&&... cb )
203{
204 return BitSetParallelForAll( bs, e, [&]( auto bit, auto & tls ) { if ( bs.test( bit ) ) f( bit, tls ); }, std::forward<Cb>( cb )... );
205}
206
208
209} // namespace MR
Definition MRMesh/MRId.h:13
Definition MRTbbThreadMutex.h:32
MRMESH_API std::optional< LockGuard > tryLock()
auto BitSetParallelForAll(const BS &bs, F &&f, Cb &&... cb)
Definition MRBitSetParallelFor.h:170
auto BitSetParallelForAllRanged(const BS &bs, F &&... f)
Definition MRBitSetParallelFor.h:148
auto BitSetParallelFor(const BS &bs, F &&f, Cb &&... cb)
Definition MRBitSetParallelFor.h:191
std::function< bool(float)> ProgressCallback
Definition MRMesh/MRMeshFwd.h:641
void ForAllRanged(const IdRange< IndexType > &bitRange, const CM &callMaker, F &&f)
Definition MRBitSetParallelFor.h:60
auto bitRange(const BS &bs)
Definition MRBitSetParallelFor.h:44
auto blockRange(const IdRange< IndexType > &bitRange)
Definition MRBitSetParallelFor.h:29
auto bitSubRange(const IdRange< IndexType > &bitRange, const tbb::blocked_range< size_t > &range, const tbb::blocked_range< size_t > &subRange)
Definition MRBitSetParallelFor.h:50
range of indices [beg, end)
Definition MRBitSetParallelFor.h:20
auto size() const
Definition MRBitSetParallelFor.h:22
Id beg
Definition MRBitSetParallelFor.h:21
Id end
Definition MRBitSetParallelFor.h:21
Definition MRParallel.h:18
Definition MRParallel.h:32