44 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			44 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| 
 | |
| #include <vector>
 | |
| 
 | |
| void vecaddgpu(float* r, float* a, float* b, std::size_t n)
 | |
| {
 | |
| #pragma acc kernels loop present(r, a, b)
 | |
|   for (std::size_t i = 0; i < n; ++i)
 | |
|     r[i] = a[i] + b[i];
 | |
| }
 | |
| 
 | |
| int main(int, char* [])
 | |
| {
 | |
|   const std::size_t n = 100000; /* vector length */
 | |
|   std::vector<float> a(n);      /* input vector 1 */
 | |
|   std::vector<float> b(n);      /* input vector 2 */
 | |
|   std::vector<float> r(n);      /* output vector */
 | |
|   std::vector<float> e(n);      /* expected output values */
 | |
| 
 | |
|   for (std::size_t i = 0; i < n; ++i) {
 | |
|     a[i] = static_cast<float>(i + 1);
 | |
|     b[i] = static_cast<float>(1000 * i);
 | |
|   }
 | |
| 
 | |
|   /* compute on the GPU */
 | |
|   auto a_ptr = a.data();
 | |
|   auto b_ptr = b.data();
 | |
|   auto r_ptr = r.data();
 | |
| #pragma acc data copyin(a_ptr [0:n], b_ptr [0:n]) copyout(r_ptr [0:n])
 | |
|   {
 | |
|     vecaddgpu(r_ptr, a_ptr, b_ptr, n);
 | |
|   }
 | |
|   /* compute on the host to compare */
 | |
|   for (std::size_t i = 0; i < n; ++i)
 | |
|     e[i] = a[i] + b[i];
 | |
|   /* compare results */
 | |
|   int errs = 0;
 | |
|   for (std::size_t i = 0; i < n; ++i) {
 | |
|     if (r[i] != e[i]) {
 | |
|       ++errs;
 | |
|     }
 | |
|   }
 | |
|   return errs;
 | |
| }
 |