Intel Threading Building Blocks
UFABC - MCZA020-13 - Programação Paralela - 2018.Q3
Índice
Os exemplos de código abaixo foram criados originalmente para o curso MCS 572: Introduction to Supercomputing da University of Illinois at Chicago pelo professor Jan Verschelde. Todos os créditos são de seus respectivos criadores.
- Baixe aqui todos os exemplos em um só arquivo.
1 TBB - Hello World
/* L-11 MCS 572 Fri 16 Sep 2016 : hello_task_group.cpp * The code below is a hello world with threading building blocks. * The makefile contains the following information: * * TBB_ROOT = < location where TBB is installed > * * hello_task_group: * g++ -I$(TBB_ROOT)/include -L$(TBB_ROOT)/lib \ * hello_task_group.cpp -o /tmp/hello_task_group -ltbb * * The code is adjusted from an online article of Arpan Sen * "Learning the Intel Threading Building Blocks Open Source 2.1 Library" * available via http://www.ibm.com/developerworks/aix/library/" */ #include "tbb/tbb.h" #include <cstdio> using namespace tbb; class say_hello { const char* id; public: say_hello(const char* s) : id(s) { } void operator( ) ( ) const { printf("hello from task %s\n",id); } }; int main( ) { task_group tg; tg.run(say_hello("1")); // spawn 1st task and return tg.run(say_hello("2")); // spawn 2nd task and return tg.wait( ); // wait for tasks to complete }
2 Potenciação - Versão sequencial
// L-11 MCS 572 Fri 16 Sep 2016 : powers_serial.cpp // Simple C++ program to raise every element of an array // of n complex doubles to the power d. // For timing purposes n, d, and the verbose level // can be provided at the command line. #include <cstdlib> #include <cmath> #include <complex> #include <ctime> #include <iostream> #include <iomanip> using namespace std; typedef complex<double> dcmplx; dcmplx random_dcmplx ( void ); // generates a random complex number // on the complex unit circle void write_numbers ( int n, dcmplx *x ); // writes the array of n doubles in x void compute_powers ( int n, dcmplx *x, dcmplx *y, int d ); // for arrays x and y of length n, // on return y[i] equals x[i]**d int main ( int argc, char *argv[] ) { int v = 1; // verbose if > 0 if(argc > 3) v = atoi(argv[3]); int dim; // get the dimension if(argc > 1) dim = atoi(argv[1]); else { cout << "how many numbers ? "; cin >> dim; } // fix the seed for comparisons srand(20120203); //srand(time(0)); dcmplx r[dim]; for(int i=0; i<dim; i++) r[i] = random_dcmplx(); if(v > 0) write_numbers(dim,r); int deg; // get the degree if(argc > 1) deg = atoi(argv[2]); else { cout << "give the power : "; cin >> deg; } dcmplx s[dim]; compute_powers(dim,r,s,deg); if(v > 0) write_numbers(dim,s); return 0; } dcmplx random_dcmplx ( void ) { int r = rand(); double d = ((double) r)/RAND_MAX; double e = 2*M_PI*d; dcmplx c(cos(e),sin(e)); return c; } void write_numbers ( int n, dcmplx *x ) { for(int i=0; i<n; i++) cout << scientific << setprecision(4) << "x[" << i << "] = ( " << x[i].real() << " , " << x[i].imag() << ")\n"; } void compute_powers ( int n, dcmplx *x, dcmplx *y, int d ) { for(int i=0; i < n; i++) // y[i] = pow(x[i],d); { // pow is too efficient dcmplx r(1.0,0.0); for(int j=0; j < d; j++) r = r*x[i]; y[i] = r; } }
3 Potenciação Paralela com TBB
// L-11 MCS 572 Fri 16 Sep 2016 : powers_tbb.cpp // Simple C++ program to raise every element of an array // of n complex doubles to the power d, // using the parallel_for of the Intel TBB library. // For timing purposes n, d, and the verbose level // can be provided at the command line. #include <cstdlib> #include <cmath> #include <complex> #include <ctime> #include <iostream> #include <iomanip> #include "tbb/tbb.h" #include "tbb/blocked_range.h" #include "tbb/parallel_for.h" #include "tbb/task_scheduler_init.h" using namespace std; using namespace tbb; typedef complex<double> dcmplx; dcmplx random_dcmplx ( void ); // generates a random complex number // on the complex unit circle void write_numbers ( int n, dcmplx *x ); // writes the array of n doubles in x void compute_powers ( int n, dcmplx *x, dcmplx *y, int d ); // for arrays x and y of length n, // on return y[i] equals x[i]**d class ComputePowers { dcmplx *const c; // numbers on input int d; // degree dcmplx *result; // output public: ComputePowers(dcmplx x[], int deg, dcmplx y[]) : c(x), d(deg), result(y) { } void operator() ( const blocked_range<size_t>& r ) const { for(size_t i=r.begin(); i!=r.end(); ++i) { dcmplx z(1.0,0.0); for(int j=0; j < d; j++) z = z*c[i]; result[i] = z; } } }; int main ( int argc, char *argv[] ) { int v = 1; // verbose if > 0 if(argc > 3) v = atoi(argv[3]); int dim; // get the dimension if(argc > 1) dim = atoi(argv[1]); else { cout << "how many numbers ? "; cin >> dim; } // fix seed to compare with serial srand(20120203); // srand(time(0)); dcmplx r[dim]; for(int i=0; i<dim; i++) r[i] = random_dcmplx(); if(v > 0) write_numbers(dim,r); int deg; // get the degree if(argc > 1) deg = atoi(argv[2]); else { cout << "give the power : "; cin >> deg; } dcmplx s[dim]; task_scheduler_init init(task_scheduler_init::automatic); parallel_for(blocked_range<size_t>(0,dim), ComputePowers(r,deg,s)); if(v > 0) write_numbers(dim,s); return 0; } dcmplx random_dcmplx ( void ) { int r = rand(); double d = ((double) r)/RAND_MAX; double e = 2*M_PI*d; dcmplx c(cos(e),sin(e)); return c; } void write_numbers ( int n, dcmplx *x ) { for(int i=0; i<n; i++) cout << scientific << setprecision(4) << "x[" << i << "] = ( " << x[i].real() << " , " << x[i].imag() << ")\n"; } void compute_powers ( int n, dcmplx *x, dcmplx *y, int d ) { for(int i=0; i < n; i++) // y[i] = pow(x[i],d); { // pow is too efficient dcmplx r(1.0,0.0); for(int j=0; j < d; j++) r = r*x[i]; y[i] = r; } }
4 Soma Paralela de Elementos de um Vetor - Redução com TBB
// L-11 MCS 572 Fri 16 Sep 2016 : parsum_tbb.cpp // illustration of parallel_reduce #include <cstdlib> #include <iostream> #include "tbb/tbb.h" #include "tbb/blocked_range.h" #include "tbb/parallel_reduce.h" #include "tbb/task_scheduler_init.h" using namespace std; using namespace tbb; class SumIntegers { int *data; public: int sum; SumIntegers ( int *d ) : data(d), sum(0) {} void operator() ( const blocked_range<size_t>& r ) { int s = sum; // must accumulate ! int *d = data; size_t end = r.end(); for(size_t i=r.begin(); i != end; ++i) s += d[i]; sum = s; } // the splitting constructor SumIntegers ( SumIntegers& x, split ) : data(x.data), sum(0) {} // the join method does the merge void join ( const SumIntegers& x ) { sum += x.sum; } }; int ParallelSum ( int *x, size_t n ) { SumIntegers S(x); parallel_reduce(blocked_range<size_t>(0,n), S); return S.sum; } int main ( int argc, char *argv[] ) { int n; if(argc > 1) n = atoi(argv[1]); else { cout << "give n : "; cin >> n; } int *d; d = (int*)calloc(n,sizeof(int)); for(int i=0; i<n; i++) d[i] = i+1; task_scheduler_init init (task_scheduler_init::automatic); int s = ParallelSum(d,n); cout << "the sum is " << s << " and it should be " << n*(n+1)/2 << endl; return 0; }