8 #ifndef THREADING_STD_LAUNCH
9 #define THREADING_STD_LAUNCH async // async or deferred
12 namespace threading_common {
20 template <
typename Value>
48 assert(!(
end() <
begin()) &&
"size() unspecified if end()<begin()");
76 "blocked_range has been split incorrectly");
88 assert(r.
is_divisible() &&
"cannot split blocked_range that is not divisible");
96 namespace threading_std {
99 using namespace threading_common;
102 template <
typename Fn,
104 typename Result = std::result_of_t<Fn && (Args && ...)>>
113 template <
typename F>
115 threads_.emplace_back(
async(std::forward<F>(
f)));
120 for (
auto& child : this->threads_) {
132 template <
typename Int,
typename Body,
typename Partitioner = auto_partitioner>
135 const Partitioner& p = Partitioner()) {
137 std::vector<std::future<void>> worker_threads;
138 worker_threads.reserve(worker_count);
141 start_entry = range.
begin(),
142 stop_entry = range.
end(),
143 stride = (range.
size() + worker_count - 1) / worker_count;
144 i < worker_count && start_entry < stop_entry;
145 ++i, start_entry += stride) {
146 const auto end_entry = std::min(start_entry + stride, stop_entry);
148 worker_threads.emplace_back(
151 for (
auto& child : worker_threads) {
158 template <
typename Index,
typename Function,
typename Partitioner = auto_partitioner>
162 const Partitioner& p = Partitioner()) {
168 for (
auto i = r.
begin(), e = r.
end(); i < e; i++) {
177 template <
typename Int,
183 const Value& identity,
184 const RealBody& real_body,
185 const Reduction& reduction,
186 const Partitioner& p = Partitioner()) {
188 std::vector<std::future<Value>> worker_threads;
189 worker_threads.reserve(worker_count);
192 start_entry = range.
begin(),
193 stop_entry = range.
end(),
194 stride = (range.
size() + worker_count - 1) / worker_count;
195 i < worker_count && start_entry < stop_entry;
196 ++i, start_entry += stride) {
197 const auto end_entry = std::min(start_entry + stride, stop_entry);
203 for (
auto& child : worker_threads) {
204 v = reduction(v, child.get());
size_type size() const
Size of the range.
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
Parallel iteration over range with default partitioner.
static Value do_split(blocked_range &r, split)
Auxiliary function used by the splitting constructor.
std::vector< future< void > > threads_
bool empty() const
True if range is empty.
future< Result > async(Fn &&fn, Args &&...args)
A range over which to iterate.
future< Result > async(Fn &&fn, Args &&...args)
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
size_type grainsize() const
The grain size for this range.
blocked_range(Value begin_, Value end_)
Construct range over half-open interval [begin,end), with the given grainsize.
blocked_range(blocked_range &r, split)
Split range.
#define THREADING_STD_LAUNCH
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
const_iterator end() const
One past last value in range.
const_iterator begin() const
Beginning of range.
bool is_divisible() const
True if range is divisible.
std::size_t size_type
Type for size of a range.