threading__serial_8h_source.html

 #include "threading_std.h"


 namespace threading_serial {


 using namespace threading_common;

 using std::future;


 template <typename Fn,

           typename... Args,

           typename Result = std::result_of_t<Fn && (Args && ...)>>

 future<Result> async(Fn&& fn, Args&&... args) {

   std::promise<Result> pr;

   if constexpr (std::is_same<void, Result>::value) {

     fn(std::forward<Args>(args)...);

     pr.set_value();

   } else {

     pr.set_value(fn(std::forward<Args>(args)...));

   }

   return pr.get_future();

 }


 class task_group {

  public:

   template <typename F>

   void run(F&& f) {

     f();

   }

   void cancel() { /*not implemented*/

   }

   void wait() {}

 };  // class task_group


 template <typename Int, typename Body, typename Partitioner = auto_partitioner>

 void parallel_for(const blocked_range<Int>& range,

                   const Body& body,

                   const Partitioner& p = Partitioner()) {

   const Int worker_count = cpu_threads();


   for (Int i = 0,

            start_entry = range.begin(),

            stop_entry = range.end(),

            stride = (range.size() + worker_count - 1) / worker_count;

        i < worker_count && start_entry < stop_entry;

        ++i, start_entry += stride) {

     const auto end_entry = std::min(start_entry + stride, stop_entry);

     body(blocked_range<Int>(start_entry, end_entry));

   }

 }


 template <typename Index, typename Function, typename Partitioner = auto_partitioner>

 void parallel_for(Index first,

                   Index last,

                   const Function& f,

                   const Partitioner& p = Partitioner()) {

   parallel_for(

       blocked_range<Index>(first, last),

       [&f](const blocked_range<Index>& r) {

         for (auto i = r.begin(), e = r.end(); i < e; i++) {

           f(i);

         }

       },

       p);

 }


 template <typename Int,

           typename Value,

           typename RealBody,

           typename Reduction,

           typename Partitioner = auto_partitioner>

 Value parallel_reduce(const blocked_range<Int>& range,

                       const Value& identity,

                       const RealBody& real_body,

                       const Reduction& reduction,

                       const Partitioner& p = Partitioner()) {

   const size_t worker_count = cpu_threads();

   std::vector<Value> worker_threads;

   worker_threads.reserve(worker_count);


   for (Int i = 0,

            start_entry = range.begin(),

            stop_entry = range.end(),

            stride = (range.size() + worker_count - 1) / worker_count;

        i < worker_count && start_entry < stop_entry;

        ++i, start_entry += stride) {

     const auto end_entry = std::min(start_entry + stride, stop_entry);

     // TODO grainsize?

     worker_threads.emplace_back(

         real_body(blocked_range<Int>(start_entry, end_entry), Value{}));

   }

   Value v = identity;

   for (auto& child : worker_threads) {

     v = reduction(v, child);

   }


   return v;

 }


 }  // namespace threading_serial

threading_serial::task_group
Definition: threading_serial.h:22

threading_common::blocked_range::size
size_type size() const
Size of the range.
Definition: threading_std.h:47

run_benchmark_import.args
tuple args
Definition: run_benchmark_import.py:247

threading_common::auto_partitioner
Definition: threading_std.h:15

threading_serial::task_group::wait
void wait()
Definition: threading_serial.h:30

threading_serial::task_group::cancel
void cancel()
Definition: threading_serial.h:28

threading_serial::async
future< Result > async(Fn &&fn, Args &&...args)
Definition: threading_serial.h:11

Value
Definition: ResultSetReductionOps.h:168

threading_std.h

threading_common::blocked_range
A range over which to iterate.
Definition: threading_std.h:21

threading_serial::parallel_reduce
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
Definition: threading_serial.h:74

f
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
Definition: TestTorchTableFunctions.cpp:103

threading_serial::parallel_for
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
Definition: threading_serial.h:34

threading_common::blocked_range::end
const_iterator end() const
One past last value in range.
Definition: threading_std.h:43

threading_common::blocked_range::begin
const_iterator begin() const
Beginning of range.
Definition: threading_std.h:40

cpu_threads
int cpu_threads()
Definition: thread_count.h:25

threading_serial::task_group::run
void run(F &&f)
Definition: threading_serial.h:25