Narzędzia użytkownika

Narzędzia witryny


studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone

Różnice

Różnice między wybraną wersją a wersją aktualną.

Odnośnik do tego porównania

Poprzednia rewizja po obu stronach Poprzednia wersja
Nowa wersja
Poprzednia wersja
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/03/30 23:10]
46.22.174.138
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna)
149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c]
Linia 781: Linia 781:
  
  
- 
- 
- 
- 
- //​TWORZENIE STRUKTURY 
     struct rekord wys;     struct rekord wys;
-    wys.x_min = x_min; 
-    wys.x_max = x_max; 
-    wys.n = n;    ​ 
- 
     MPI_Datatype rekord_typ;     MPI_Datatype rekord_typ;
     int tab_dlug_blokow[3] = {1, 1, 1};     int tab_dlug_blokow[3] = {1, 1, 1};
Linia 802: Linia 793:
     MPI_Type_struct(3,​ tab_dlug_blokow,​ tab_odstepow,​ tab_typow, &​rekord_typ);​     MPI_Type_struct(3,​ tab_dlug_blokow,​ tab_odstepow,​ tab_typow, &​rekord_typ);​
     MPI_Type_commit(&​rekord_typ);​     MPI_Type_commit(&​rekord_typ);​
-    ​MPI_Send ​( &wys, 1, rekord_typ, j, 1, komunikator ​);+    ​MPI_Recv ​( &wys, 1, rekord_typ, 0, 1, parentcomm, &status ); 
 +</​code>​ 
 + 
 +====== Zajęcia 4 ====== 
 +wkrótce 
 +====== Zajęcia 5 ====== 
 +==== Makefile ==== 
 +<code bash># optimization and other system dependent options 
 +#​include ​ make.$(SRR_ARCH) 
 +# or directly 
 +include ​ make.lab_404_NVIDIA 
 + 
 +NAME = Hello_GPU 
 + 
 +program: main.o  
 + $(CC) $(LDFL) main.o ​ $(LIB) -o $(NAME) 
 + 
 +main.o: main.c  
 + $(CC) $(CFL) -c main.c ​ $(INC) -o main.o 
 + 
 +clean: 
 + rm -f obj/* 
 + rm -f $(NAME) 
 +</​code>​ 
 +==== make.lab_404_NVIDIA ==== 
 +<code bash># C compiler 
 +#CC = icc 
 +CC = gcc 
 + 
 +# C++ compiler 
 +CPPC = icpc 
 +#CPPC = g++ 
 + 
 +# Loader (to link C/C++ and Fortran libraries) 
 +LD = icpc 
 +#LD = g++ 
 + 
 +# Archiver 
 +AR = ar r 
 + 
 +# For removing files 
 +RM = rm -f 
 + 
 +# Include directories 
 +INC = -I/​opt/​cuda7/​include 
 + 
 +# Standard and/or local libraries 
 +LIB = -L/​opt/​cuda7/​lib64 -lOpenCL  
 + 
 +# C optimization and other flags 
 +#CFL(icc) = -O3 -openmp 
 +#CFL(gcc) = -O3 -fopenmp 
 +#​CFL(icc,​debug) = -g 
 +#CFL(gcc) = -g 
 +CFL = 
 + 
 +# Loader optimization and other flags 
 +#​LDFL(debug) = -g  
 +#​LDFL(profile) = -p                                 
 +LDFL =  
 + 
 +</​code>​ 
 +==== main.c ==== 
 +<code c>#​include<​stdlib.h>​ 
 +#​include<​stdio.h>​ 
 + 
 +#include <​CL/​cl.h>​ 
 + 
 +// functions to display platform and device properties 
 +void DisplayPlatformInfo( 
 + cl_platform_id id,  
 + cl_platform_info name, 
 + char* str) 
 +
 +  cl_int retval; 
 +  size_t paramValueSize;​ 
 +   
 +  retval = clGetPlatformInfo( 
 +      id, 
 +      ​name,​ 
 +      0, 
 +      ​NULL,​ 
 +      &​paramValueSize);​ 
 +  if (retval != CL_SUCCESS){ 
 +    printf("​Failed to find OpenCL platform %s.\n",​ str); 
 +    return; 
 +  } 
 +   
 +  char * info = (char *)malloc(sizeof(char) * paramValueSize);​ 
 +  retval = clGetPlatformInfo( 
 +      id, 
 +      ​name,​ 
 +      ​paramValueSize,​ 
 +      ​info,​ 
 +      ​NULL);​ 
 +  if (retval != CL_SUCCESS) ​ { 
 +    printf("​Failed to find OpenCL platform %s.\n",​ str); 
 +    return; 
 +  } 
 +   
 +  printf("​\t%s:​\t%s\n",​ str, info ); 
 +  free(info);  
 +
 + 
 +void DisplayDeviceInfo_char( 
 +        ​cl_device_id id,  
 +        ​cl_device_info name, 
 +        char* str) 
 +
 +  cl_int retval; 
 +  size_t paramValueSize;​ 
 +   
 +  retval = clGetDeviceInfo( 
 +    id, 
 +    ​name,​ 
 +    0, 
 +    ​NULL,​ 
 +    &​paramValueSize);​ 
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to find OpenCL device info %s.\n",​ str); 
 +    return; 
 +  } 
 +   
 +  char * info = (char *)malloc(sizeof(char) * paramValueSize);​ 
 +  retval = clGetDeviceInfo( 
 +    id, 
 +    ​name,​ 
 +    ​paramValueSize,​ 
 +    ​info,​ 
 +    ​NULL);​ 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to find OpenCL device info %s.\n",​ str); 
 +    return; 
 +  } 
 + 
 +  printf("​\t\t%s:​\t%s\n",​ str, info ); 
 +  free(info);​ 
 +}; 
 + 
 +void DisplayDeviceInfo_ulong( 
 +        ​cl_device_id id,  
 +        ​cl_device_info name, 
 +        char* str) 
 +
 +  cl_int retval; 
 +  size_t paramValueSize;//​ = sizeof(cl_ulong);​ 
 +   
 +  retval = clGetDeviceInfo( 
 +    id, 
 +    ​name,​ 
 +    0, 
 +    ​NULL,​ 
 +    &​paramValueSize);​ 
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to find OpenCL device info %s.\n",​ str); 
 +    return; 
 +  } 
 +   
 +  cl_ulong * info = (cl_ulong *)malloc(sizeof(cl_ulong) * paramValueSize);​ 
 +  retval = clGetDeviceInfo( 
 +    id, 
 +    ​name,​ 
 +    ​paramValueSize,​ 
 +    ​info,​ 
 +    ​NULL);​ 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to find OpenCL device info %s.\n",​ str); 
 +    return; 
 +  } 
 + 
 +  printf("​\t\t%s:​\t%lu MB\n", str, info[0] / 1024 / 1024 ); 
 +  free(info);​ 
 +}; 
 + 
 +void DisplayDeviceInfo_uint( 
 +        ​cl_device_id id,  
 +        ​cl_device_info name, 
 +        char* str) 
 +
 +  cl_int retval; 
 +  size_t paramValueSize;//​ = sizeof(cl_ulong);​ 
 +   
 +  retval = clGetDeviceInfo( 
 +    id, 
 +    ​name,​ 
 +    0, 
 +    ​NULL,​ 
 +    &​paramValueSize);​ 
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to find OpenCL device info %s.\n",​ str); 
 +    return; 
 +  } 
 +   
 +  cl_uint * info = (cl_uint *)malloc(sizeof(cl_uint) * paramValueSize);​ 
 +  retval = clGetDeviceInfo( 
 +    id, 
 +    ​name,​ 
 +    ​paramValueSize,​ 
 +    ​info,​ 
 +    ​NULL);​ 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to find OpenCL device info %s.\n",​ str); 
 +    return; 
 +  } 
 + 
 +  printf("​\t\t%s:​\t%d MHz\n",​ str, info[0] ); 
 +  free(info);​ 
 +}; 
 + 
 + 
 +// main program controlling execution of CPU code and OpenCL kernels 
 +int main(int argc, char** argv) 
 +
 +  cl_uint number_of_contexts = 2; 
 +  cl_context context = NULL; 
 +  cl_context list_of_contexts[2] = {0,0}; 
 +  cl_command_queue commandQueue = 0; 
 +  cl_program program = 0; 
 +  cl_uint number_of_devices;​ 
 +  cl_device_id device = 0; 
 +  cl_device_id *list_of_devices;​ 
 +  cl_device_type type; 
 +  cl_kernel kernel = 0; 
 +  cl_mem memObjects[3] = { 0, 0, 0 }; 
 +  cl_int retval; 
 +  int icon, idev; 
 +  cl_uint numPlatforms;​ 
 +  cl_platform_id * platformIds;​ 
 +  cl_uint i,j
 + 
 +  // flag to control displaying 
 +  int Monitor = 1; 
 + 
 + 
 +  // Create OpenCL contexts  
 + 
 +  // First, query the total number of platforms 
 +  retval = clGetPlatformIDs(0,​ (cl_platform_id *) NULL, &​numPlatforms);​ 
 + 
 +  // Next, allocate memory for the installed plaforms, and qeury  
 +  // to get the list. 
 +  platformIds = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms);​ 
 + 
 +  // Then, query the platform IDs 
 +  retval = clGetPlatformIDs(numPlatforms,​ platformIds,​ NULL); 
 + 
 +  if(Monitor>​=0){ 
 +    printf("​Number of platforms: \t%d\n",​ numPlatforms);​  
 +  } 
 + 
 +  // Iterate through the list of platforms displaying associated information 
 +  for (i = 0; i < numPlatforms;​ i++) { 
 + 
 +    if(Monitor>​0){ 
 + 
 +      printf("​Platform ID - %d\n",​i);​ 
 +      // First we display information associated with the platform 
 +      DisplayPlatformInfo( 
 + platformIds[i],​  
 + CL_PLATFORM_NAME,​  
 + "​CL_PLATFORM_NAME"​);​ 
 +      DisplayPlatformInfo( 
 + platformIds[i],​  
 + CL_PLATFORM_PROFILE,​  
 + "​CL_PLATFORM_PROFILE"​);​ 
 +      DisplayPlatformInfo( 
 + platformIds[i],​  
 + CL_PLATFORM_VERSION,​  
 + "​CL_PLATFORM_VERSION"​);​ 
 +      DisplayPlatformInfo( 
 + platformIds[i],​  
 + CL_PLATFORM_VENDOR,​  
 + "​CL_PLATFORM_VENDOR"​);​ 
 +    } 
 + 
 +  } 
 + 
 +  // For the first platform 
 +  int iplat; 
 +  printf("​Select Platform ID: "); scanf("​%d",​ &​iplat);​ 
 + 
 +  // Query the set of devices associated with the platform 
 +  retval = clGetDeviceIDs( 
 +   platformIds[iplat],​ 
 +   CL_DEVICE_TYPE_ALL,​ 
 +   0, 
 +   NULL, 
 +   &​number_of_devices);​ 
 +   
 +   
 +  list_of_devices =  
 +    (cl_device_id *) malloc (sizeof(cl_device_id) * number_of_devices);​ 
 +   
 +  retval = clGetDeviceIDs( 
 +   platformIds[iplat],​ 
 +   CL_DEVICE_TYPE_ALL,​ 
 +   number_of_devices,​ 
 +   list_of_devices,​ 
 +   NULL); 
 +   
 +  if(Monitor>​=0){ 
 +    printf("​Number of devices: \t%d\n",​ number_of_devices);​  
 +  } 
 + 
 +  // Iterate through each device, displaying associated information 
 +  for (j = 0; j < number_of_devices;​ j++) { 
 +     
 +    clGetDeviceInfo(list_of_devices[j],​ CL_DEVICE_TYPE,​  
 +     sizeof(cl_device_type),​ &type, NULL); 
 +     
 +    if(Monitor>​0){ 
 +       
 +      DisplayDeviceInfo_char( 
 + list_of_devices[j],​  
 + CL_DEVICE_NAME,​  
 + "​CL_DEVICE_NAME"​);​ 
 +       
 +      DisplayDeviceInfo_char( 
 + list_of_devices[j],​  
 + CL_DEVICE_VENDOR,​  
 + "​CL_DEVICE_VENDOR"​);​ 
 +       
 +      DisplayDeviceInfo_char( 
 + list_of_devices[j],​  
 + CL_DEVICE_VERSION,​  
 + "​CL_DEVICE_VERSION"​);​ 
 + 
 +      DisplayDeviceInfo_ulong( 
 + list_of_devices[j],​  
 + CL_DEVICE_GLOBAL_MEM_SIZE,​  
 + "​CL_DEVICE_GLOBAL_MEM_SIZE"​);​ 
 + 
 +      DisplayDeviceInfo_uint( 
 + list_of_devices[j],​  
 + CL_DEVICE_MAX_CLOCK_FREQUENCY,​  
 + "​CL_DEVICE_MAX_CLOCK_FREQUENCY"​);​ 
 +      printf("​\n"​);​ 
 +    } 
 +  } 
 +   
 +  // Next, create OpenCL contexts on platforms 
 +  cl_context_properties contextProperties[] = { 
 +    CL_CONTEXT_PLATFORM,​ 
 +    (cl_context_properties)platformIds[iplat],​ 
 +    0 
 +  }; 
 +   
 +  if(Monitor>​0){ 
 +    printf("​Creating CPU context %d on platform %d\n", 1, iplat); 
 +  } 
 +   
 +  list_of_contexts[1] =  
 +    clCreateContextFromType(contextProperties,​  
 +     CL_DEVICE_TYPE_CPU,​ NULL, NULL, &​retval);​ 
 +   
 +  if(Monitor>​=0 && retval != CL_SUCCESS){ 
 +    printf("​Could not create CPU context on platform %d\n", i); 
 +  } 
 +   
 +  if(Monitor>​0){ 
 +    printf("​Creating GPU context 0 on platform %d\n", iplat); 
 +  } 
 +   
 +  list_of_contexts[0] =  
 +    clCreateContextFromType(contextProperties,​  
 +     CL_DEVICE_TYPE_GPU,​ NULL, NULL, &​retval);​ 
 +   
 +  if(Monitor>​=0 && retval != CL_SUCCESS){ 
 +    printf("​Could not create GPU context on platform %d\n", i); 
 +  } 
 +   
 +  // in a loop over devices of the seleceted platform ​   
 +  for(idev=0; idev<​number_of_devices;​idev++){ 
 +     
 +    if(Monitor>​0){ 
 +      printf("​\nFor context %d and device %d:​\n",​ 
 +      idev, idev); 
 +    } 
 +    device = list_of_devices[idev];​ 
 +    icon = idev; 
 +     
 +    // choose OpenCL context on first available platform 
 +    context = list_of_contexts[icon];​ 
 +     
 +    if(context !=0){ 
 +             
 +      commandQueue = clCreateCommandQueue(context,​ device, 0, NULL); 
 +      if (commandQueue == NULL) { 
 + printf("​Failed to create commandQueue for device %d\n", idev); 
 + exit(0); 
 +      } 
 +       
 +      if(Monitor>​0){ 
 + printf("​Reading program from source\n"​);​ 
 +      } 
 + 
 +      // read source code from file 
 +      FILE *fp; 
 +      char* source; 
 +      long int size; 
 +       
 +      fp = fopen("​HelloWorld.cl",​ "​rb"​);​ 
 +      if(!fp) { 
 + printf("​Could not open kernel file\n"​);​ 
 + exit(-1);​ 
 +      } 
 +      int status = fseek(fp, 0, SEEK_END);​ 
 +      if(status != 0) { 
 + printf("​Error seeking to end of file\n"​);​ 
 + exit(-1);​ 
 +      } 
 +      size = ftell(fp);​ 
 +      if(size < 0) { 
 + printf("​Error getting file position\n"​);​ 
 + exit(-1);​ 
 +      } 
 +       
 +      rewind(fp);​ 
 +       
 +      source = (char *)malloc(size + 1); 
 +       
 +      int i; 
 +      for (i = 0; i < size+1; i++) { 
 + source[i]='​\0';​ 
 +      } 
 +       
 +      if(source == NULL) { 
 + printf("​Error allocating space for the kernel source\n"​);​ 
 + exit(-1);​ 
 +      } 
 +       
 +      fread(source,​ 1, size, fp); 
 +      source[size] = '​\0';​ 
 + 
 +      const char* source_const = source; 
 +      cl_program program = clCreateProgramWithSource(context,​ 1, 
 +  ​    &​source_const,​ 
 +  ​    NULL, NULL); 
 +      if (program == NULL) 
 +
 +   printf("​Failed to create CL program from source.\n"​);​ 
 +   exit(-1); 
 +
 +       
 +      if(Monitor>​0){ 
 + printf("​Creating program and kernel\n"​);​ 
 +      } 
 +      // build program (passing options to compiler if necessary 
 +      retval = clBuildProgram(program,​ 0, NULL, NULL, NULL, NULL); 
 +      char* buildLog; size_t size_of_buildLog;​  
 +      clGetProgramBuildInfo(program,​ device, CL_PROGRAM_BUILD_LOG,​  
 +     0, NULL, &​size_of_buildLog);​  
 +      buildLog = malloc(size_of_buildLog+1);​  
 +      clGetProgramBuildInfo(program,​ device, CL_PROGRAM_BUILD_LOG,​  
 +     size_of_buildLog,​ buildLog, NULL);  
 +      buildLog[size_of_buildLog]= '​\0';​  
 +      printf("​Kernel buildLog: %s\n", buildLog);  
 +      if (retval != CL_SUCCESS) 
 +
 +   printf("​Error in kernel\n"​);​ 
 +   clReleaseProgram(program);​ 
 +   exit(-1); 
 +
 +       
 +       
 +      // Create OpenCL kernel 
 +      kernel = clCreateKernel(program,​ "​hello_kernel",​ NULL); 
 +      if (kernel == NULL) 
 +
 +   printf("​Failed to create kernel.\n"​);​ 
 +   exit(0); 
 +
 +       
 +      if(Monitor>​0){ 
 + printf("​Creating memory objects\n"​);​ 
 +      } 
 +      // Create memory objects that will be used as arguments to 
 +      // kernel. ​ First create host memory arrays that will be 
 +      // used to store the arguments to the kernel 
 +      float result[1];​ 
 +      float a[1]; 
 +      float b[1]; 
 +      a[0] = 2; 
 +      b[0] = 2; 
 +       
 +      memObjects[0] = clCreateBuffer(context,​ CL_MEM_READ_ONLY , 
 +      ​sizeof(float),​ NULL, NULL); 
 +      memObjects[1] = clCreateBuffer(context,​ CL_MEM_READ_ONLY , 
 +      ​sizeof(float),​ NULL, NULL); 
 +      memObjects[2] = clCreateBuffer(context,​ CL_MEM_READ_WRITE,​ 
 +      ​sizeof(float),​ NULL, NULL); 
 +       
 +      if (memObjects[0]==NULL || memObjects[1]==NULL || memObjects[2]==NULL){ 
 + printf("​Error creating memory objects.\n"​);​ 
 + return 0; 
 +      } 
 +       
 +      if(Monitor>​0){ 
 + printf("​Sending kernel arguments\n"​);​ 
 +      } 
 +      retval = clEnqueueWriteBuffer( 
 +     commandQueue,​  
 +     memObjects[0],​  
 +     CL_FALSE,  
 +     0,  
 +     sizeof(float), ​                         
 +     a,  
 +     0,  
 +     NULL,  
 +     NULL); 
 +       
 +      // Use clEnqueueWriteBuffer() to write input array B to  
 +      // the device buffer bufferB 
 +      retval = clEnqueueWriteBuffer( 
 +     commandQueue,​  
 +     memObjects[1],​  
 +     CL_FALSE,  
 +     0,  
 +     sizeof(float), ​                         
 +     b, 
 +     0,  
 +     NULL,  
 +     NULL); 
 +       
 +      // Set the kernel arguments (result, a, b) 
 +      retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +      retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +      retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +      if (retval != CL_SUCCESS) 
 +
 +   printf("​Failed to Set the kernel arguments.\n"​);​ 
 +   //​Cleanup(context,​ commandQueue,​ program, kernel, memObjects);​ 
 +   return 1; 
 +
 +       
 +      if(Monitor>​0){ 
 + printf("​Running the kernel!\n"​);​ 
 +      } 
 +      size_t globalWorkSize[1] = { 1 }; 
 +      size_t localWorkSize[1] = { 1 }; 
 +       
 +      // Queue the kernel up for execution across the array 
 +      retval = clEnqueueNDRangeKernel(commandQueue,​ kernel, 1, NULL, 
 +       globalWorkSize,​ localWorkSize,​ 
 +       0, NULL, NULL); 
 +      if (retval != CL_SUCCESS) 
 +
 +   printf("​Failed to queue kernel for execution.\n"​);​ 
 +   //​Cleanup(context,​ commandQueue,​ program, kernel, memObjects);​ 
 +   return 1; 
 +
 +       
 +      if(Monitor>​0){ 
 + printf("​Transfering back results\n"​);​ 
 +      } 
 +      // Read the output buffer back to the Host 
 +      retval = clEnqueueReadBuffer(commandQueue,​ memObjects[2],​ CL_TRUE, 
 +    0, sizeof(float),​ result, 
 +    0, NULL, NULL); 
 +      if (retval != CL_SUCCESS) 
 +
 +   printf("​Failed to read result buffer.\n"​);​ 
 +   //​Cleanup(context,​ commandQueue,​ program, kernel, memObjects);​ 
 +   return 1; 
 +
 +       
 +      // Verify the output 
 +      if(result[0]==4) ​ { 
 +        printf("​Output is correct: %lf + %lf = %lf\n",​ 
 +        a[0], b[0], result[0]);​ 
 +      } else { 
 +        printf("​Output is incorrect: %lf + %lf != %lf\n",​ 
 +        a[0], b[0], result[0]);​ 
 +      } 
 +       
 +      for (i = 0; i < 3; i++) 
 +      { 
 +        if (memObjects[i] != 0) 
 +            clReleaseMemObject(memObjects[i]);​ 
 +      } 
 +      if (commandQueue != 0) 
 +        clReleaseCommandQueue(commandQueue);​ 
 +       
 +      if (kernel != 0) 
 +        clReleaseKernel(kernel);​ 
 +       
 +      if (program != 0) 
 +        clReleaseProgram(program);​ 
 +       
 +    } 
 +  } 
 +   
 +  free(list_of_devices);​ 
 +  free(platformIds);​ 
 +   
 +  return 0; 
 +
 +</​code>​ 
 +==== HelloWorld.cl ==== 
 +<code c> 
 +__kernel void hello_kernel(__global const float *a, 
 +            ​__global const float *b, 
 +    ​__global float *result) 
 +
 +    int gid = get_global_id(0);​ 
 + 
 +    if(gid<​1) result[gid] = a[gid] + b[gid]; 
 +}</​code>​ 
 +===== Zajęcia 6 ===== 
 +Pliki zadania: {{:​studia:​magisterskie:​1sem:​opencl_vecadd.tgz|}} 
 +Działające 3 kernele: {{:​studia:​magisterskie:​1sem:​sala404.zip|}} 
 + 
 +==== opencl_vecadd/​vecadd_host/​execute_kernels.c ==== 
 +<code c>#​include<​stdlib.h>​ 
 +#​include<​stdio.h>​ 
 +#include <​math.h>​ 
 + 
 +#​include<​omp.h>​ 
 +#include <​CL/​cl.h>​ 
 + 
 +#​include"​uth_ocl_intf.h"​ 
 + 
 +#​include"​./​vecadd_host.h"​ 
 +#​include"​./​hardware_params.h"​ 
 + 
 +#define time_measurments 
 + 
 +#ifdef time_measurments 
 +#​include"​uth_time_intf.h"​ 
 +  static double t_begin, t_end, t_total; 
 +#endif 
 + 
 +int execute_kernel_2_CPU( 
 + int platform_index,​ 
 + int device_index,​ 
 + int kernel_index,​ 
 + int array_size,​ 
 + cl_mem* memObjects,​ 
 + int monitor 
 +
 +
 +   
 +  cl_kernel kernel; 
 +  cl_event kernel_execution;​ 
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +  double exec_time;​ 
 +  cl_int retval; 
 +   
 +  size_t globalWorkSize[1] = { 0 }; 
 +  size_t localWorkSize[1] = { 0 }; 
 +   
 +  int i,j,k; 
 +   
 +   
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +   
 +  // create the kernel for CPU 
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​ 
 +   // kernel name:         , file: 
 +   "​vecadd_2_blocks_kernel",​ "​vecadd_2_blocks.cl",​ monitor); 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​ 
 +#endif 
 +   
 +  // choose the context 
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +   
 +  // choose the command queue 
 +  cl_command_queue command_queue =  
 +    utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \t3. Restoring context and command queue for platform %d and device %d\n",​ 
 +    ​platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL){  
 +     
 +    printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +    ​platform_index,​ device_index);​ 
 +    printf("​%lu %lu\n",​ context, command_queue);​ 
 +  } 
 +   
 +   
 +  int NDRange_dim = 1; 
 +  int nr_cores_CPU = omp_get_num_procs();​ 
 +  int nr_threads = nr_cores_CPU;​ // total number of threads 
 +  int work_group_size = 1; // number of threads in work-group 
 +  int nr_work_groups = nr_threads / work_group_size;​ 
 +  globalWorkSize[0] = nr_threads ; 
 +  localWorkSize[0] = work_group_size ; 
 +   
 +  printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",​ 
 + nr_cores_CPU,​ nr_cores_CPU,​ 1); 
 +  printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",​ 
 + work_group_size,​ nr_threads / work_group_size,​  
 + nr_threads / work_group_size / nr_cores_CPU);​ 
 +  printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",​ 
 + globalWorkSize[0],​ globalWorkSize[0]/​nr_cores_CPU,​  
 + globalWorkSize[0]/​nr_cores_CPU);​ 
 +  printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",​ 
 + array_size, array_size/​globalWorkSize[0],​ array_size/​nr_cores_CPU);​ 
 +   
 +   
 +  // choose the kernel 
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \tRestoring kernel %d for platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL || kernel == NULL){  
 +     
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +    ​platform_index,​ device_index,​ kernel_index);​ 
 +    printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +    ​context,​ command_queue,​ kernel); 
 +  } 
 +   
 +  int size = array_size;​ 
 +  int size_per_thread = ceilf((float)size/​nr_threads);​  
 +   
 +  // Set the kernel arguments (result, a, b) 
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &​size);​ 
 +  retval |= clSetKernelArg(kernel,​ 4, sizeof(int),​ &​size_per_thread);​ 
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to Set the kernel arguments.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_begin = time_clock();​ 
 +#endif 
 + 
 +  // Queue the kernel up for execution across the array 
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL, 
 +   globalWorkSize,​ localWorkSize,​ 
 +   0, NULL, &​kernel_execution);​ 
 +  clWaitForEvents(1,​ &​kernel_execution);​ 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_end = time_clock();​ 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​ 
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​ 
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​ 
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​ 
 + array_size, array_size / exec_time * 1e-9); 
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​ 
 + 3*array_size*sizeof(float)*1e-9,​ 
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​ 
 +#endif 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to queue kernel for execution.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +  return(0);​ 
 +
 + 
 + 
 +int execute_kernel_3_CPU( 
 + int platform_index,​ 
 + int device_index,​ 
 + int kernel_index,​ 
 + int array_size,​ 
 + cl_mem* memObjects,​ 
 + int monitor 
 +
 +
 +   
 +  cl_kernel kernel; 
 +  cl_event kernel_execution;​ 
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +  double exec_time;​ 
 +  cl_int retval; 
 +   
 +  size_t globalWorkSize[1] = { 0 }; 
 +  size_t localWorkSize[1] = { 0 }; 
 +   
 +  int i,j,k; 
 +   
 +   
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +  // create the kernel for CPU 
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​ 
 +   // kernel name:         , file: 
 +   "​vecadd_3_opt_blocks_kernel","​vecadd_3_opt_blocks.cl",​monitor);​ 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​ 
 +#endif 
 +   
 +   
 +  // choose the context 
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +   
 +  // choose the command queue 
 +  cl_command_queue command_queue =  
 +    utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",​ 
 +    ​platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL){  
 +    printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +    ​platform_index,​ device_index);​ 
 +    printf("​%lu %lu\n",​ context, command_queue);​ 
 +  } 
 +   
 +   
 +  int NDRange_dim = 1; 
 +  int nr_cores_CPU = omp_get_num_procs();​ 
 +  int nr_threads = nr_cores_CPU;​ // total number of threads 
 +  int work_group_size = 1; // number of threads in work-group 
 +  int nr_work_groups = nr_threads / work_group_size;​ 
 +  globalWorkSize[0] = nr_threads ; 
 +  localWorkSize[0] = work_group_size ; 
 +   
 +  printf("​\t\tExecuting kernel %d, on platform %d and device %d\n",​ 
 + kernel_index,​ platform_index,​ device_index);​ 
 +  printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",​ 
 + nr_cores_CPU,​ nr_cores_CPU,​ 1); 
 +  printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",​ 
 + work_group_size,​ nr_threads / work_group_size,​  
 + nr_threads / work_group_size / nr_cores_CPU);​ 
 +  printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",​ 
 + globalWorkSize[0],​ globalWorkSize[0]/​nr_cores_CPU,​  
 + globalWorkSize[0]/​nr_cores_CPU);​ 
 +  printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",​ 
 + array_size, array_size/​globalWorkSize[0],​ array_size/​nr_cores_CPU);​ 
 +   
 +  // choose the kernel 
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL || kernel == NULL){  
 +     
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +    ​platform_index,​ device_index,​ kernel_index);​ 
 +    printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +    ​context,​ command_queue,​ kernel); 
 +  } 
 +   
 +  int size = array_size;​ 
 +   
 +  // Set the kernel arguments (result, a, b) 
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &​size);​ 
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to Set the kernel arguments.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    return 1; 
 +  } 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_begin = time_clock();​ 
 +#endif 
 +  // Queue the kernel up for execution across the array 
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL, 
 +   globalWorkSize,​ localWorkSize,​ 
 +   0, NULL, &​kernel_execution);​ 
 +  clWaitForEvents(1,​ &​kernel_execution);​ 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_end = time_clock();​ 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​ 
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​ 
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​ 
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​ 
 + array_size, array_size / exec_time * 1e-9); 
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​ 
 + 3*array_size*sizeof(float)*1e-9,​ 
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​ 
 +#endif 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to queue kernel for execution.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +  return(0);​ 
 +
 + 
 +// tutaj modyfikujemy 
 +int execute_kernel_0_GPU( 
 + int platform_index,​ 
 + int device_index,​ 
 + int kernel_index,​ 
 + int array_size,​ 
 + cl_mem* memObjects,​ 
 + int monitor 
 +
 +
 +   
 +  cl_kernel kernel; 
 +  cl_event kernel_execution;​ 
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +  double exec_time;​ 
 +  cl_int retval; 
 +   
 +  size_t globalWorkSize[1] = { 0 }; 
 +  size_t localWorkSize[1] = { 0 }; 
 +   
 +  int i,j,k; 
 +   
 +   
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +  // create the kernel for GPU 
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 +     // kernel name:         , file: 
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor); 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​ 
 +#endif 
 +   
 +   
 +  // choose the context 
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +   
 +  // choose the command queue 
 +  cl_command_queue command_queue =  
 +    utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",​ 
 +    ​platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL){  
 +    printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +    ​platform_index,​ device_index);​ 
 +    printf("​%lu %lu\n",​ context, command_queue);​ 
 +  } 
 +   
 +   
 +  int NDRange_dim = 1; 
 +   
 +  // globalna liczba wątków i lokalna liczba wątków 
 +  int work_group_size = WORK_GROUP_SIZE;​ 
 +  globalWorkSize[0] = array_size;​ 
 +  localWorkSize[0] = work_group_size;​ 
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",​ 
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​ 
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",​ 
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​ 
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​ 
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​ 
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0],​  
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +  } 
 +    
 +   
 +  // choose the kernel 
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL || kernel == NULL){  
 +     
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +    ​platform_index,​ device_index,​ kernel_index);​ 
 +    printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +    ​context,​ command_queue,​ kernel); 
 +  } 
 +   
 +  // Set the kernel arguments (result, a, b) 
 +  // ustawiamy liczbę argumentów 
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +  if (retval != CL_SUCCESS) { 
 +      printf("​Failed to Set the kernel arguments.\n"​);​ 
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +      return 1; 
 +    } 
 +   
 +  
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_begin = time_clock();​ 
 +#endif 
 +  // Queue the kernel up for execution across the array 
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL, 
 +   globalWorkSize,​ localWorkSize,​ 
 +   0, NULL, &​kernel_execution);​ 
 +  clWaitForEvents(1,​ &​kernel_execution);​ 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_end = time_clock();​ 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​ 
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​ 
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​ 
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​ 
 + array_size, array_size / exec_time * 1e-9); 
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​ 
 + 3*array_size*sizeof(float)*1e-9,​ 
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​ 
 +#endif 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to queue kernel for execution.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +  return(0);​ 
 +
 + 
 + 
 +int execute_kernel_1_GPU( 
 + int platform_index,​ 
 + int device_index,​ 
 + int kernel_index,​ 
 + int array_size,​ 
 + cl_mem* memObjects,​ 
 + int monitor 
 +
 +
 +   
 +  cl_kernel kernel; 
 +  cl_event kernel_execution;​ 
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +  double exec_time;​ 
 +  cl_int retval; 
 +   
 +  size_t globalWorkSize[1] = { 0 }; 
 +  size_t localWorkSize[1] = { 0 }; 
 +   
 +  int i,j,k; 
 +   
 +   
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +  // create the kernel for GPU 
 +/* 
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 +     // kernel name:         , file: 
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);​*/​ 
 + 
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 + ​  ​   // kernel name:         , file: */ 
 + ​  ​   "​vecadd_1_kernel",​ "​vecadd_1.cl",​ monitor); 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​ 
 +#endif 
 +   
 +   
 +  // choose the context 
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +   
 +  // choose the command queue 
 +  cl_command_queue command_queue =  
 +    utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",​ 
 +    ​platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL){  
 +    printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +    ​platform_index,​ device_index);​ 
 +    printf("​%lu %lu\n",​ context, command_queue);​ 
 +  } 
 +   
 +   
 +  int NDRange_dim = 1; 
 +   
 +  int work_group_size = WORK_GROUP_SIZE;​ 
 +  globalWorkSize[0] = array_size / 4; 
 +  localWorkSize[0] = work_group_size;​ 
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",​ 
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​ 
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",​ 
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​ 
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​ 
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​ 
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0],​  
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +  } 
 +    
 +   
 +  // choose the kernel 
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL || kernel == NULL){  
 +     
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +    ​platform_index,​ device_index,​ kernel_index);​ 
 +    printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +    ​context,​ command_queue,​ kernel); 
 +  } 
 +   
 +  // Set the kernel arguments (result, a, b) 
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +  if (retval != CL_SUCCESS) { 
 +      printf("​Failed to Set the kernel arguments.\n"​);​ 
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +      return 1; 
 +    } 
 +   
 +  
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_begin = time_clock();​ 
 +#endif 
 +  // Queue the kernel up for execution across the array 
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL, 
 +   globalWorkSize,​ localWorkSize,​ 
 +   0, NULL, &​kernel_execution);​ 
 +  clWaitForEvents(1,​ &​kernel_execution);​ 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_end = time_clock();​ 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​ 
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​ 
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​ 
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​ 
 + array_size, array_size / exec_time * 1e-9); 
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​ 
 + 3*array_size*sizeof(float)*1e-9,​ 
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​ 
 +#endif 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to queue kernel for execution.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +  return(0);​ 
 +
 + 
 + 
 +int execute_kernel_2_GPU( 
 + int platform_index,​ 
 + int device_index,​ 
 + int kernel_index,​ 
 + int array_size,​ 
 + cl_mem* memObjects,​ 
 + int monitor 
 +
 +
 +   
 +  cl_kernel kernel; 
 +  cl_event kernel_execution;​ 
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +  double exec_time;​ 
 +  cl_int retval; 
 +   
 +  size_t globalWorkSize[1] = { 0 }; 
 +  size_t localWorkSize[1] = { 0 }; 
 +   
 +  int i,j,k; 
 +   
 +   
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +  // create the kernel for GPU 
 +/*    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 +     // kernel name:         , file: 
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);​*/​ 
 +     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 + ​  ​   // kernel name:         , file: */ 
 + ​  ​   "​vecadd_2_blocks_kernel",​ "​vecadd_2_blocks.cl",​ monitor); 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​ 
 +#endif 
 +   
 +   
 +  // choose the context 
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +   
 +  // choose the command queue 
 +  cl_command_queue command_queue =  
 +    utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",​ 
 +    ​platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL){  
 +    printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +    ​platform_index,​ device_index);​ 
 +    printf("​%lu %lu\n",​ context, command_queue);​ 
 +  } 
 +   
 +   
 +  int NDRange_dim = 1; 
 +   
 +  int work_group_size = WORK_GROUP_SIZE;​ 
 +  globalWorkSize[0] = array_size / work_group_size;​ 
 +  localWorkSize[0] = work_group_size;​ 
 + 
 +  int size_per_thread = work_group_size;​ 
 +  int size = array_size;​ 
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",​ 
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​ 
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",​ 
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​ 
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​ 
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​ 
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0],​  
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +  } 
 +    
 +   
 +  // choose the kernel 
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL || kernel == NULL){  
 +     
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +    ​platform_index,​ device_index,​ kernel_index);​ 
 +    printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +    ​context,​ command_queue,​ kernel); 
 +  } 
 +   
 +  // Set the kernel arguments (result, a, b) 
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &​size);​ 
 +  retval |= clSetKernelArg(kernel,​ 4, sizeof(int),​ &​size_per_thread);​ 
 +  if (retval != CL_SUCCESS) { 
 +      printf("​Failed to Set the kernel arguments.\n"​);​ 
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +      return 1; 
 +    } 
 +   
 +  
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_begin = time_clock();​ 
 +#endif 
 +  // Queue the kernel up for execution across the array 
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL, 
 +   globalWorkSize,​ localWorkSize,​ 
 +   0, NULL, &​kernel_execution);​ 
 +  clWaitForEvents(1,​ &​kernel_execution);​ 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_end = time_clock();​ 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​ 
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​ 
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​ 
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​ 
 + array_size, array_size / exec_time * 1e-9); 
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​ 
 + 3*array_size*sizeof(float)*1e-9,​ 
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​ 
 +#endif 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to queue kernel for execution.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +  return(0);​ 
 +
 + 
 + 
 +int execute_kernel_3_GPU( 
 + int platform_index,​ 
 + int device_index,​ 
 + int kernel_index,​ 
 + int array_size,​ 
 + cl_mem* memObjects,​ 
 + int monitor 
 +
 +
 +   
 +  cl_kernel kernel; 
 +  cl_event kernel_execution;​ 
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +  double exec_time;​ 
 +  cl_int retval; 
 +   
 +  size_t globalWorkSize[1] = { 0 }; 
 +  size_t localWorkSize[1] = { 0 }; 
 +   
 +  int i,j,k; 
 +   
 +  int size = array_size;​ 
 +   
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +  // create the kernel for GPU 
 +/*    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 +     // kernel name:         , file: 
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);​*/​ 
 +     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 + ​  ​   // kernel name:         , file: 
 +     "​vecadd_3_opt_blocks_kernel",​ "​vecadd_3_opt_blocks.cl",​ monitor); 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​ 
 +#endif 
 +   
 +   
 +  // choose the context 
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +   
 +  // choose the command queue 
 +  cl_command_queue command_queue =  
 +    utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",​ 
 +    ​platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL){  
 +    printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +    ​platform_index,​ device_index);​ 
 +    printf("​%lu %lu\n",​ context, command_queue);​ 
 +  } 
 +   
 +   
 +  int NDRange_dim = 1; 
 +   
 +  int work_group_size = WORK_GROUP_SIZE;​ 
 +  globalWorkSize[0] = array_size;​ 
 +  localWorkSize[0] = work_group_size;​ 
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",​ 
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​ 
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",​ 
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​ 
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​ 
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​ 
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",​ 
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0],​  
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​ 
 +  } 
 +    
 +   
 +  // choose the kernel 
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +   
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",​ 
 +    ​kernel_index,​ platform_index,​ device_index);​ 
 +  } 
 +   
 +  if(context == NULL || command_queue == NULL || kernel == NULL){  
 +     
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +    ​platform_index,​ device_index,​ kernel_index);​ 
 +    printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +    ​context,​ command_queue,​ kernel); 
 +  } 
 +   
 +  // Set the kernel arguments (result, a, b) 
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​ 
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​ 
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​ 
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &​size);​ 
 + 
 +  if (retval != CL_SUCCESS) { 
 +      printf("​Failed to Set the kernel arguments.\n"​);​ 
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +      return 1; 
 +    } 
 +   
 +  
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_begin = time_clock();​ 
 +#endif 
 +  // Queue the kernel up for execution across the array 
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL, 
 +   globalWorkSize,​ localWorkSize,​ 
 +   0, NULL, &​kernel_execution);​ 
 +  clWaitForEvents(1,​ &​kernel_execution);​ 
 +   
 +   
 +#ifdef time_measurments 
 +  clFinish(command_queue);​ 
 +  t_end = time_clock();​ 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(kernel_execution,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​ 
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​ 
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​ 
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​ 
 + array_size, array_size / exec_time * 1e-9); 
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​ 
 + 3*array_size*sizeof(float)*1e-9,​ 
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​ 
 +#endif 
 +   
 +  if (retval != CL_SUCCESS) { 
 +    printf("​Failed to queue kernel for execution.\n"​);​ 
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​ 
 +    exit(-1); 
 +  } 
 +   
 +  return(0);​ 
 +
 + 
 + 
 + 
 + 
 +     
 +/*     // create the fourth kernel for GPU */ 
 +/*     ​kernel_index = 4;  */ 
 +/*     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ */ 
 +/*     // kernel name:         , file: */ 
 +/*     "​vecadd_4_opt_blocks_vect_kernel",​ "​vecadd_4_opt_blocks_vect.cl",​ monitor); */</​code>​ 
 + 
 +==== opencl_vecadd/​vecadd_2_blocks.cl ==== 
 +<code c> 
 +__kernel void vecadd_2_blocks_kernel(__global const float *a, 
 +      ​__global const float *b, 
 +      ​__global float *result, 
 +                      const int size, 
 +      const int size_per_thread) 
 +
 +  int gid = get_global_id(0);​ 
 +  int index_start = gid * size_per_thread;​ 
 +  int index_end = (gid+1) * size_per_thread;​ 
 + 
 +  if (gid == 0) { 
 + //​printf("​size:​ %d, %d", size, size_per_thread);​ 
 +  } 
 + 
 +  for (int i=index_start;​ i < index_end && i < size; i++) {  
 +  //for (int i=index_start;​ i < index_end ; i++) {  
 +    result[i] = a[i]+b[i];​ 
 +  } 
 +}</​code>​ 
 +===== Zajęcia 7 ===== 
 +{{:​studia:​magisterskie:​1sem:​zajecia_7.zip|}} 
 + 
 +==== tmp/​opencl_mat_transp/​mat_transp_1.cl ==== 
 +<code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable 
 + 
 +__kernel void mat_transp_1_kernel( 
 +  __global float *A ,  
 +  __global float *B , 
 +  __global float *C , 
 +  int N) 
 +
 + 
 +int id = get_global_id(0) * get_global_size(0) + get_global_id(1);​ 
 +float temp = 0.0; 
 + 
 +for(int i = 0; i<​N;​i++){ 
 + temp += A[id*N + i] *B[i]; 
 +
 + 
 +C[id] = temp; 
 +}</​code>​ 
 + 
 +==== tmp/​opencl_mat_transp/​mat_transp/​mat_transp.c ==== 
 +<code c>#​include<​stdlib.h>​ 
 +#​include<​stdio.h>​ 
 +#include <​math.h>​ 
 + 
 +#include <​CL/​cl.h>​ 
 + 
 +#​include"​uth_ocl_intf.h"​ 
 + 
 +#define time_measurments 
 + 
 +#ifdef time_measurments 
 +#​include"​uth_time_intf.h"​ 
 +  static double t_begin, t_end, t_total; 
 +#endif 
 + 
 + 
 + 
 +#define BLOCK_SIZE 16 
 +#define NR_GROUPS 16 
 +#define MULT 16 
 +#define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT) 
 +#define ROZMIAR (WYMIAR*WYMIAR) 
 +// Matrices are stored in row-major order:  
 +// M(row, col) = M( row * WYMIAR + col )  
 + 
 +int verify_result( 
 +  float* result, 
 +  float* result_compare 
 +   ) 
 +
 +  // Verify the result 
 +  int result_OK = 1; 
 +  int i,j; 
 +  for(i = 0; i < WYMIAR; i++) { 
 +      if(fabs(result[i] - result_compare[i])>​1.e-6) { 
 + result_OK = 0; 
 + break; 
 +    } 
 +  } 
 +  printf("​\t\t6. verifying results: "); 
 +  if(result_OK) { 
 +    printf("​Output is correct\n"​);​ 
 +  } else { 
 +    printf("​Output is incorrect\n"​);​ 
 +    j = 0; 
 +    for(i = 0; i < WYMIAR, j<100; i++) { 
 + if(fabs(result[i] - result_compare[i])>​1.e-9) { 
 +   j++; 
 +   printf("​%d %16.8f %16.8f\n",​  
 +   i, result[i], result_compare[i]);​ 
 +
 +    } 
 +    exit(0); 
 +  } 
 +    /* for(i = 0; i < length; i++) { */ 
 +    /*   ​printf("​%16.8f %16.8f\n",​ result[i], result_compare[i]);​ */ 
 +    /* } */ 
 + 
 +  return(result_OK);​ 
 +
 + 
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​ 
 +void create_kernels() 
 +
 + 
 +  // for all operations indicate explicit info messages 
 +  int monitor = UTC_BASIC_INFO + 1; 
 + 
 +  int kernel_index;​ 
 +   
 +  int platform_index = utv_ocl_struct.current_platform_index;​ 
 + 
 +  if(utr_ocl_CPU_context_exists(platform_index)){ 
 + 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +   
 +  // create the first kernel for CPU 
 +  /* kernel_index = 0;  */ 
 +  /* utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​ */ 
 +  /*   // kernel name:         , file: */ 
 +  /*   "​mat_transp_1_kernel",​ "​mat_transp_1.cl",​ monitor); */ 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel %d: %lf\n",​ kernel_index,​ t_end-t_begin);​ 
 +#endif 
 +   
 +  } 
 + 
 +  if(utr_ocl_GPU_context_exists(platform_index)){ 
 + 
 + 
 +#ifdef time_measurments 
 +  t_begin = time_clock();​ 
 +#endif 
 +  // create the first kernel for GPU 
 +  kernel_index = 0;  
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 +   // kernel name:         , file: 
 +   "​mat_transp_1_kernel",​ "​mat_transp_1.cl",​ monitor); 
 +   
 +#ifdef time_measurments 
 +  t_end = time_clock();​ 
 +  printf("​EXECUTION TIME: creating CPU kernel %d: %lf\n",​ kernel_index,​ t_end-t_begin);​ 
 +#endif 
 +   
 +  // create the second kernel for GPU 
 +  kernel_index = 1;  
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ 
 +   // kernel name:         , file: 
 +   "​mat_transp_2_kernel",​ "​mat_transp_2.cl",​ monitor); 
 + 
 +   
 + 
 +  } 
 + 
 +
 + 
 +  /​*----------------EXECUTION PHASE----------------------*/​ 
 +int execute_kernels() 
 +
 + 
 +  // for all operations indicate explicit info messages 
 +  int monitor = UTC_BASIC_INFO + 1; 
 + 
 +  int platform_index = utv_ocl_struct.current_platform_index;​ 
 +  utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index];​ 
 + 
 +  int kernel_index;​ 
 +  int i,j,n; 
 + 
 +  double nr_access;​ 
 +  double t1,t2; 
 + 
 +  if(monitor>​UTC_BASIC_INFO){ 
 +    printf("​\n------------Starting execution phase----------------\n"​);​ 
 +  } 
 +   
 +  // create matrices 
 +  n=WYMIAR; 
 +  float* A = (float *) malloc(ROZMIAR*sizeof(float));​ 
 +  float* B = (float *) malloc(WYMIAR*sizeof(float));​ 
 +  float* C = (float *) malloc(WYMIAR*sizeof(float));​ 
 +  float* D = (float *) malloc(WYMIAR*sizeof(float));​ 
 +   
 +  for(i=0;​i<​ROZMIAR;​i++) A[i]=1.0*i/​10000000.0;​ 
 + 
 +  for(i=0;​i<​WYMIAR;​i++) B[i]=1.0*i/​10000000.0;​ 
 + 
 +  // mnozenie macierz wektor 
 +  for(i=0; i<​WYMIAR;​ i++){ 
 + for(j=0; j<​WYMIAR;​ j++){ 
 + D[i] += A[i*WYMIAR + j] * B[j];  
 +
 +  } 
 +   
 +  nr_access= ROZMIAR + 2.0*WYMIAR; // read + write 
 +   
 +  printf("​mat_transp:​ nr_access %lf\n",​ nr_access);​ 
 +   
 +  // get hardware characteristics to select good matrix shape 
 +  // the set of device characteristics stored in data structure 
 +  int device_index = 0;  
 +  utt_ocl_device_struct device_struct =  
 +    utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index];​ 
 +  double global_mem_bytes = device_struct.global_mem_bytes;​ 
 +  double global_max_alloc = device_struct.global_max_alloc;​ 
 +  double local_mem_bytes = device_struct.local_mem_bytes;​ 
 +  double constant_mem_bytes = device_struct.constant_mem_bytes;​ 
 +  int max_num_comp_units = device_struct.max_num_comp_units;​ 
 +  int max_work_group_size = device_struct.max_work_group_size;​ 
 +   
 +  // in a loop over devices (or for a selected device) 
 +  int idev=0;  
 +  for(idev=0; idev<​platform_struct.number_of_devices;​ idev++){ 
 +     
 +    // int device_type = ..... 
 +    // choose device_index 
 +    // int device_index = utr_ocl_select_device(platform_index,​ device_type);​ 
 +    int device_index = idev; 
 +    int device_type = utr_ocl_device_type(platform_index,​ device_index);​  
 +     
 +    if(device_index>​0 && device_type==utr_ocl_device_type(platform_index,​ device_index-1)) break;  
 +    if(device_type == UTC_OCL_DEVICE_CPU) break; 
 + 
 +    // choose the context 
 +    cl_context context = utr_ocl_select_context(platform_index,​ device_index); ​  
 +     
 +    // choose the command queue 
 +    cl_command_queue command_queue =  
 +      utr_ocl_select_command_queue(platform_index,​ device_index); ​  
 +     
 +    if(monitor>​UTC_BASIC_INFO){ 
 +      printf("​\nExecution:​ \t0. restoring context and command queue for platform %d and device %d\n",​ 
 +      ​platform_index,​ device_index);​ 
 +    } 
 +     
 +    if(context == NULL || command_queue == NULL){  
 +       
 +      printf("​failed to restore context and command queue for platform %d, device %d\n",  
 +      ​platform_index,​ device_index);​ 
 +      printf("​%lu %lu\n",​ context, command_queue);​ 
 +    } 
 +     
 +    // choose the kernel 
 +    kernel_index = 0; 
 +    cl_kernel kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index); ​  
 +     
 +    if(monitor>​UTC_BASIC_INFO){ 
 +      printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",​ 
 +      ​kernel_index,​ platform_index,​ device_index);​ 
 +    } 
 +     
 +    if(context == NULL || command_queue == NULL || kernel == NULL){  
 +       
 +      printf("​failed to restore kernel for platform %d, device %d, kernel %d\n",  
 +      ​platform_index,​ device_index,​ kernel_index);​ 
 +      printf("​context %lu, command queue %lu, kernel %lu\n",​  
 +      ​context,​ command_queue,​ kernel); 
 +    } 
 +     
 +    for(i=0;​i<​WYMIAR;​i++) C[i]=0.0; 
 +    time_init();​ t1 = time_clock();​ 
 +     
 +    // call routine to perform matrix transposition 
 +    mat_transp_host(kernel_index,​ A, B, C, n, context, kernel, command_queue);​ 
 +     
 +    t2 = time_clock();​ time_print();​ 
 +    printf("​GB/​s = %lf\n\n",​ nr_access*sizeof(float)/​(t2-t1)/​1024/​1024/​1024);​ 
 +     
 +     
 +    // verify result  
 +    verify_result(C,​ D); 
 +     
 +     
 +  } // end loop over devices 
 +   
 +  return(0);​ 
 +
 + 
 + 
 + 
 +// Matrix transposition - Host code  
 +// Matrix dimensions are assumed to be multiples of BLOCK_SIZE  
 +int mat_transp_host( 
 +  int kernel_index,​ 
 +  float* A,  
 +  float* B, 
 +  float* C,  
 +  int N,  
 +  const cl_context context,  
 +  const cl_kernel mat_transp_kernel,​  
 +  const cl_command_queue queue 
 + )  
 +{  
 + 
 +// Load A to device memory  
 +  size_t size_bytesm = N*N*sizeof(float);​  
 +  size_t size_bytesw = N*sizeof(float);​  
 +   
 + 
 +  cl_mem d_A = clCreateBuffer(context,​ CL_MEM_READ_ONLY,​ 
 + size_bytesm,​ NULL, NULL);  
 + 
 +  // Write A to device memory  
 +  clEnqueueWriteBuffer(queue,​ d_A, CL_TRUE, 0, size_bytesm,​ A, 0, 0, 0);  
 + 
 +  cl_mem d_B = clCreateBuffer(context,​ CL_MEM_READ_ONLY,​ 
 + size_bytesw,​ NULL, NULL);  
 + 
 +  // Write A to device memory  
 +  clEnqueueWriteBuffer(queue,​ d_B, CL_TRUE, 0, size_bytesw,​ B, 0, 0, 0);  
 + 
 +  // Allocate B in device memory  
 +  cl_mem d_C = clCreateBuffer(context,​ CL_MEM_WRITE_ONLY,​ size_bytesw,​ NULL, NULL);  
 + 
 + 
 +  // Invoke kernel  
 +  clSetKernelArg(mat_transp_kernel,​ 0, sizeof(cl_mem),​ (void*)&​d_A);​ 
 +  clSetKernelArg(mat_transp_kernel,​ 1, sizeof(cl_mem),​ (void*)&​d_B);​ 
 +  clSetKernelArg(mat_transp_kernel,​ 2, sizeof(cl_mem),​ (void*)&​d_C);​ 
 +  clSetKernelArg(mat_transp_kernel,​ 3, sizeof(int),​ (void*)&​N);​ 
 + 
 +  size_t localWorkSize[3];​ 
 +  size_t globalWorkSize[3];​ 
 +  cl_uint work_dim; 
 + 
 +  if(kernel_index==3){ 
 + 
 +    work_dim = 2; 
 +    localWorkSize[0] = BLOCK_SIZE;  
 +    globalWorkSize[0] = N/MULT;  
 +    localWorkSize[1] = BLOCK_SIZE;  
 +    globalWorkSize[1] = N/MULT;  
 +    localWorkSize[2] = 0;  
 +    globalWorkSize[2] = 0;  
 + 
 + 
 +  } 
 +  else{ 
 + 
 +    work_dim = 2; 
 +    localWorkSize[0] = BLOCK_SIZE;  
 +    globalWorkSize[0] = sqrt(N);  
 +    localWorkSize[1] = BLOCK_SIZE;  
 +    globalWorkSize[1] = sqrt(N);  
 +    localWorkSize[2] = 0;  
 +    globalWorkSize[2] = 0;  
 +   
 +  } 
 + 
 +  clFinish(queue);​ 
 +  double t1 = time_clock();​ 
 +  // Enqueue a kernel run call 
 +  cl_event ndrEvt; 
 +  clEnqueueNDRangeKernel(queue,​ mat_transp_kernel,​ work_dim, 0,  
 + globalWorkSize,​ localWorkSize,​ 0, 0, &​ndrEvt);​  
 +  clWaitForEvents(1,​ &​ndrEvt);​ 
 +  clFinish(queue);​ 
 +  double t2 = time_clock();​ 
 +  // Calculate performance  
 +  cl_ulong startTime;​ 
 +  cl_ulong endTime; 
 +   
 +  // Get kernel profiling info  
 +  clGetEventProfilingInfo(ndrEvt,​ 
 +   CL_PROFILING_COMMAND_START,​ 
 +   sizeof(cl_ulong),​ 
 +   &​startTime,​ 
 +   0); 
 +  clGetEventProfilingInfo(ndrEvt,​ 
 +   CL_PROFILING_COMMAND_END,​ 
 +   sizeof(cl_ulong),​ 
 +   &​endTime,​ 
 +   0); 
 +  double time = (double)endTime - (double)startTime;​ 
 +  printf("​\nKernel execution internal: time %lf, GB/s = %lf\n",​  
 + time*1.0e-9,​ 2.0*N*N*sizeof(float)/​(time*1.0e-9)/​1024/​1024/​1024);​ 
 + 
 +  printf("​Kernel execution external: time %lf,  GB/s = %lf\n\n",​  
 + t2-t1, 2.0*N*N*sizeof(float)/​(t2-t1)/​1024/​1024/​1024);​ 
 + 
 +  // Read B from device memory  
 +  clEnqueueReadBuffer(queue,​ d_C, CL_TRUE, 0, size_bytesw,​ C, 0, 0, 0);  
 + 
 +  // Free device memory  
 +  clReleaseMemObject(d_A);​  
 +  clReleaseMemObject(d_B);​  
 +  clReleaseMemObject(d_C);​ 
 + 
 +  return(0);​ 
 + 
 +}</​code>​ 
 +==== Kod wogu ==== 
 +=== server.c ==== 
 +<code c> 
 +/* 
 +    C socket server example, handles multiple clients using threads 
 +    Compile 
 +    gcc server.c -lpthread -o server 
 +*/ 
 +  
 +#​include<​stdio.h>​ 
 +#​include<​string.h> ​   //strlen 
 +#​include<​stdlib.h> ​   //strlen 
 +#​include<​sys/​socket.h>​ 
 +#​include<​arpa/​inet.h>​ //​inet_addr 
 +#​include<​unistd.h> ​   //write 
 +#​include<​pthread.h>​ //for threading , link with lpthread 
 +#include <​unistd.h>​ 
 +  
 +//the thread function 
 +void *connection_handler(void *); 
 +  
 +int main(int argc , char *argv[]) 
 +
 +    int counter = 0; 
 +    pid_t pid = fork(); 
 + 
 +    if (pid == 0) 
 +    { 
 +       ​  ​   int sock; 
 +     struct sockaddr_in server; 
 +     char message[1000] , server_reply[2000];​ 
 +            int counter; 
 +     unsigned int ip[] = { 192, 168, 102, 1 }; 
 +            char* ip_final; 
 + char *buf; 
 + size_t sz; 
 +  
 +     //Create socket 
 + 
 +            for ( counter = 0; counter < 255; ++counter ) { 
 + 
 +     sock = socket(AF_INET , SOCK_STREAM , 0); 
 +     if (sock == -1) 
 +     { 
 + printf("​Could not create socket"​);​ 
 +     } 
 +     puts("​Socket created"​);​ 
 + 
 +     //​printf("​192.168.102.%d",​ counter); 
 + 
 + sz = snprintf(NULL,​ 0, "​192.168.102.%d",​ counter); 
 + buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */ 
 +      
 +     server.sin_addr.s_addr = inet_addr(buf);​ 
 +     server.sin_family = AF_INET; 
 +     server.sin_port = htons( 65001 ); 
 +  
 +     //Connect to remote server 
 +     if (connect(sock , (struct sockaddr *)&​server , sizeof(server)) < 0) 
 +     { 
 + perror("​connect failed. Error"​);​ 
 + continue;​ 
 +     } 
 +  
 +     puts("​Connected\n"​);​ 
 +  
 +     //keep communicating with server 
 +     while(1) 
 +     { 
 + //Send some data 
 + if( send(sock , "​Werset 6-client"​ , 15 , 0) < 0) 
 +
 +     puts("​Send failed"​);​ 
 +     return 1; 
 +
 +  
 + //​Receive a reply from the server 
 + if( recv(sock , server_reply , 2000 , 0) < 0) 
 +
 +     puts("​recv failed"​);​ 
 +     break; 
 +
 + 
 + puts(server_reply);​ 
 + 
 + break; 
 +     } 
 +  
 +     close(sock);​ 
 +           } 
 +    } 
 +    else if (pid > 0) 
 +    { 
 +        int socket_desc , client_sock , c; 
 +     struct sockaddr_in server , client; 
 +       
 +     //Create socket 
 +     socket_desc = socket(AF_INET , SOCK_STREAM , 0); 
 +     if (socket_desc == -1) 
 +     { 
 + printf("​Could not create socket"​);​ 
 +     } 
 +     puts("​Socket created"​);​ 
 +       
 +     //Prepare the sockaddr_in structure 
 +     server.sin_family = AF_INET; 
 +     server.sin_addr.s_addr = INADDR_ANY;​ 
 +     server.sin_port = htons( 65001 ); 
 +       
 +     //Bind 
 +     if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0) 
 +     { 
 + //print the error message 
 + perror("​bind failed. Error"​);​ 
 + return 1; 
 +     } 
 +     puts("​bind done"​);​ 
 +       
 +     //Listen 
 +     listen(socket_desc , 3); 
 +       
 +     //Accept and incoming connection 
 +     puts("​Waiting for incoming connections..."​);​ 
 +     c = sizeof(struct sockaddr_in);​ 
 +       
 +       
 +     //Accept and incoming connection 
 +     puts("​Waiting for incoming connections..."​);​ 
 +     c = sizeof(struct sockaddr_in);​ 
 + pthread_t thread_id;​ 
 +  
 +     while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) ) 
 +     { 
 + puts("​Connection accepted"​);​ 
 +  
 + if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0) 
 +
 +     perror("​could not create thread"​);​ 
 +     return 1; 
 +
 +  
 + //Now join the thread , so that we dont terminate before the thread 
 + //​pthread_join( thread_id , NULL); 
 + puts("​Handler assigned"​);​ 
 +     } 
 +       
 +     if (client_sock < 0) 
 +     { 
 + perror("​accept failed"​);​ 
 + return 1; 
 +     } 
 +    } 
 +    else 
 +    { 
 +        // fork failed 
 +        printf("​fork() failed!\n"​);​ 
 +        return 1; 
 +    } 
 +      
 +    return 0; 
 +
 +  
 +/* 
 + * This will handle connection for each client 
 + * */ 
 +void *connection_handler(void *socket_desc) 
 +
 +    //Get the socket descriptor 
 +    int sock = *(int*)socket_desc;​ 
 +    int read_size;​ 
 +    char *message , client_message[2000];​ 
 + 
 +    //Receive a message from client 
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) 
 +    { 
 + printf("​%s\n",​ client_message);​ 
 + 
 + //end of string marker 
 + client_message[read_size] = '​\0';​ 
 +  
 + //Send the message back to client 
 + write(sock , "​Werset 6" , 8); 
 +  
 + //clear the message buffer 
 + memset(client_message,​ 0, 2000); 
 +    } 
 +      
 +    if(read_size == 0) 
 +    { 
 +        puts("​Client disconnected"​);​ 
 +        fflush(stdout);​ 
 +    } 
 +    else if(read_size == -1) 
 +    { 
 +        perror("​recv failed"​);​ 
 +    } 
 +          
 +    return 0; 
 +}</​code>​ 
 +=== Makefile === 
 +<code bash># kompilator c 
 +CC = gcc 
 +  
 +# konsolidator 
 +CFLAGS=-g 
 + 
 +all: server client 
 +  
 +server: server.o 
 + $(CC) $(LFLAGS) server.o -lpthread -o server 
 +  
 +server.o: server.c 
 + $(CC) -c server.c -o server.o 
 + 
 +client: client.o 
 + $(CC) client.o -o client 
 +  
 +client.o: client.c 
 + $(CC) -c client.c -o client.o 
 +  
 +clean: 
 + rm -f *.o</​code>​ 
 +===== Zajęcia 9 ===== 
 +<code c>/* 
 +    C socket server example, handles multiple clients using threads 
 +    Compile 
 +    gcc server.c -lpthread -o server 
 +*/ 
 +  
 +#​include<​stdio.h>​ 
 +#​include<​string.h> ​   //strlen 
 +#​include<​stdlib.h> ​   //strlen 
 +#​include<​sys/​socket.h>​ 
 +#​include<​arpa/​inet.h>​ //​inet_addr 
 +#​include<​unistd.h> ​   //write 
 +#​include<​pthread.h>​ //for threading , link with lpthread 
 +  
 +//the thread function 
 +void *connection_handler(void *); 
 +  
 +int main(int argc , char *argv[]) 
 +
 +    int socket_desc , client_sock , c; 
 +    struct sockaddr_in server , client; 
 +      
 +    //Create socket 
 +    socket_desc = socket(AF_INET , SOCK_STREAM , 0); 
 +    if (socket_desc == -1) 
 +    { 
 +        printf("​Could not create socket"​);​ 
 +    } 
 +    puts("​Socket created"​);​ 
 +      
 +    //Prepare the sockaddr_in structure 
 +    server.sin_family = AF_INET; 
 +    server.sin_addr.s_addr = INADDR_ANY;​ 
 +    server.sin_port = htons( 8888 ); 
 +      
 +    //Bind 
 +    if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0) 
 +    { 
 +        //print the error message 
 +        perror("​bind failed. Error"​);​ 
 +        return 1; 
 +    } 
 +    puts("​bind done"​);​ 
 +      
 +    //Listen 
 +    listen(socket_desc , 3); 
 +      
 +    //Accept and incoming connection 
 +    puts("​Waiting for incoming connections..."​);​ 
 +    c = sizeof(struct sockaddr_in);​ 
 +      
 +      
 +    //Accept and incoming connection 
 +    puts("​Waiting for incoming connections..."​);​ 
 +    c = sizeof(struct sockaddr_in);​ 
 + pthread_t thread_id;​ 
 +  
 +    while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) ) 
 +    { 
 +        puts("​Connection accepted"​);​ 
 +          
 +        if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0) 
 +        { 
 +            perror("​could not create thread"​);​ 
 +            return 1; 
 +        } 
 +          
 +        //Now join the thread , so that we dont terminate before the thread 
 +        //​pthread_join( thread_id , NULL); 
 +        puts("​Handler assigned"​);​ 
 +    } 
 +      
 +    if (client_sock < 0) 
 +    { 
 +        perror("​accept failed"​);​ 
 +        return 1; 
 +    } 
 +      
 +    return 0; 
 +
 +  
 +/* 
 + * This will handle connection for each client 
 + * */ 
 +void *connection_handler(void *socket_desc) 
 +
 +    //Get the socket descriptor 
 +    int sock = *(int*)socket_desc;​ 
 +    int read_size;​ 
 +    char *message , client_message[2000];​ 
 + 
 +    //Receive a message from client 
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) 
 +    { 
 + //end of string marker 
 + client_message[read_size] = '​\0';​ 
 +  
 + //Send the message back to client 
 + write(sock , "​Werset 6" , strlen(client_message));​ 
 +  
 + //clear the message buffer 
 + memset(client_message,​ 0, 2000); 
 +    } 
 +      
 +    if(read_size == 0) 
 +    { 
 +        puts("​Client disconnected"​);​ 
 +        fflush(stdout);​ 
 +    } 
 +    else if(read_size == -1) 
 +    { 
 +        perror("​recv failed"​);​ 
 +    } 
 +          
 +    return 0; 
 +} </​code>​ 
 + 
 +==== Client C ==== 
 + 
 +<code c>#​include<​stdio.h>​ //printf 
 +#​include<​string.h> ​   //strlen 
 +#​include<​sys/​socket.h> ​   //socket 
 +#​include<​arpa/​inet.h>​ //​inet_addr 
 +  
 +int main(int argc , char *argv[]) 
 +
 +    int sock; 
 +    struct sockaddr_in server; 
 +    char message[1000] , server_reply[2000];​ 
 +      
 +    //Create socket 
 +    sock = socket(AF_INET , SOCK_STREAM , 0); 
 +    if (sock == -1) 
 +    { 
 +        printf("​Could not create socket"​);​ 
 +    } 
 +    puts("​Socket created"​);​ 
 +      
 +    server.sin_addr.s_addr = inet_addr("​127.0.0.1"​);​ 
 +    server.sin_family = AF_INET; 
 +    server.sin_port = htons( 8888 ); 
 +  
 +    //Connect to remote server 
 +    if (connect(sock , (struct sockaddr *)&​server , sizeof(server)) < 0) 
 +    { 
 +        perror("​connect failed. Error"​);​ 
 +        return 1; 
 +    } 
 +      
 +    puts("​Connected\n"​);​ 
 +      
 +    //keep communicating with server 
 +    while(1) 
 +    { 
 +        printf("​Enter message : "); 
 +        scanf("​%s"​ , message); 
 +          
 +        //Send some data 
 +        if( send(sock , message , strlen(message) , 0) < 0) 
 +        { 
 +            puts("​Send failed"​);​ 
 +            return 1; 
 +        } 
 +          
 +        //Receive a reply from the server 
 +        if( recv(sock , server_reply , 2000 , 0) < 0) 
 +        { 
 +            puts("​recv failed"​);​ 
 +            break; 
 +        } 
 +          
 +        puts("​Server reply :"); 
 +        puts(server_reply);​ 
 +    } 
 +      
 +    close(sock);​ 
 +    return 0; 
 +}</​code>​ 
 + 
 + 
 +==== Kod Maciek Gonzo itp ==== 
 +<code c> 
 +#​include<​stdio.h>​ 
 +#​include<​string.h> ​   //strlen 
 +#​include<​stdlib.h> ​   //strlen 
 +#​include<​sys/​socket.h>​ 
 +#​include<​arpa/​inet.h>​ //​inet_addr 
 +#​include<​unistd.h> ​   //write 
 +#​include<​pthread.h>​ //for threading , link with lpthread 
 +  
 +//the thread function 
 +void *connection_handler(void *); 
 + 
 +void *client_function(void *socket_desc){ 
 +  int sock; 
 +    struct sockaddr_in server_id;​ 
 +    char message[1000] , server_reply[2000];​ 
 +      
 +    //Create socket 
 +    sock = socket(AF_INET , SOCK_STREAM , 0); 
 +    if (sock == -1) 
 +    { 
 +        printf("​Could not create socket"​);​ 
 +    } 
 +    puts("​Socket created"​);​ 
 +     // 192.168.102.63 wogu 
 +    server_id.sin_addr.s_addr = inet_addr("​localhost"​);​ 
 +    server_id.sin_family = AF_INET; 
 +    server_id.sin_port = htons( 65000 ); 
 +  
 +    //Connect to remote server 
 + 
 +    if (connect(sock , (struct sockaddr *)&​server_id , sizeof(server_id)) < 0) 
 +    { 
 +        perror("​connect failed. Error"​);​ 
 + 
 +    }else{ 
 + 
 +    puts("​Connected\n"​);​ 
 +      
 +    //keep communicating with server 
 +    while(1) 
 +    { 
 +        printf("​Enter message : "); 
 +        scanf("​%s"​ , message); 
 +          
 +        //Send some data 
 +        if( send(sock , message , strlen(message) , 0) < 0) 
 +        { 
 +            puts("​Send failed"​);​ 
 + 
 +        } 
 +          
 +        //Receive a reply from the server 
 +        if( recv(sock , server_reply , 2000 , 0) < 0) 
 +        { 
 +            puts("​recv failed"​);​ 
 +            break; 
 +        } 
 +          
 +        puts("​Server reply :"); 
 +        puts(server_reply);​ 
 +    } 
 +      
 +    close(sock);​  
 +
 +    
 +}; 
 +  
 + 
 +void *server_function(void * sock){ 
 +    printf("​SERVER Start"​);​ 
 +fflush(stdout);​ 
 +    int socket_desc , client_sock , c; 
 +    pthread_t thread_id;​ 
 +    struct sockaddr_in server , client; 
 +  
 +    //Create socket 
 +    socket_desc = socket(AF_INET , SOCK_STREAM , 0); 
 +    if (socket_desc == -1) 
 +    { 
 +        printf("​Could not create socket"​);​ 
 +    } 
 +    puts("​Socket created"​);​ 
 +  
 +    //Prepare the sockaddr_in structure 
 +    server.sin_family = AF_INET; 
 +    server.sin_addr.s_addr = INADDR_ANY;​ 
 +    server.sin_port = htons( 65000); 
 +  
 +    //Bind 
 +    if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0) 
 +    { 
 +        //print the error message 
 +        perror("​bind failed. Error"​);​ 
 +        return; 
 +    } 
 +    puts("​bind done"​);​ 
 + 
 + 
 + 
 +    //Listen 
 +    listen(socket_desc , 3); 
 +  
 +    //Accept and incoming connection 
 +    puts("​Waiting for incoming connections..."​);​ 
 +    c = sizeof(struct sockaddr_in);​ 
 +fflush(stdout);​ 
 +  
 +    while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) ) 
 +    { 
 +        puts("​Connection accepted"​);​ 
 +  
 +        if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0) 
 +        { 
 +            perror("​could not create thread"​);​ 
 +            return; 
 +        } 
 +  
 +        //Now join the thread , so that we dont terminate before the thread 
 +        //​pthread_join( thread_id , NULL); 
 +        puts("​Handler assigned"​);​ 
 +    } 
 +  
 +    if (client_sock < 0) 
 +    { 
 +        perror("​accept failed"​);​ 
 +        return; 
 +    } 
 +
 + 
 + 
 +int main(int argc , char *argv[]) 
 +
 +    pthread_t thread_id;​ 
 +//Create server 
 +        if( pthread_create( &​thread_id , NULL ,  server_function , NULL) != 0) 
 +        { 
 +            perror("​could not create server thread"​);​ 
 +     fflush(stdout);​ 
 +            return 1; 
 +        } 
 +    pthread_t thread_id2;​ 
 + 
 + // Create client 
 +        if( pthread_create( &​thread_id2 , NULL ,  client_function , NULL) != 0) 
 +        { 
 +            perror("​could not create client thread"​);​ 
 +     fflush(stdout);​ 
 +            return 1; 
 +        } 
 + 
 +    return 0; 
 +
 +  
 +/* 
 + * This will handle connection for each client 
 + * */ 
 +void *connection_handler(void *socket_desc) 
 +
 +    //Get the socket descriptor 
 +    int sock = *(int*)socket_desc;​ 
 +    int read_size;​ 
 +    char *message , client_message[2000];​ 
 +  
 +    //Receive a message from client 
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) 
 +    { 
 +        //end of string marker 
 + client_message[read_size] = '​\0';​ 
 +  
 + //Send the message back to client 
 +        write(sock , "​Werset 5" , strlen(client_message));​ 
 +  
 + //clear the message buffer 
 + memset(client_message,​ 0, 2000); 
 +    } 
 +  
 +    if(read_size == 0) 
 +    { 
 +        puts("​Client disconnected"​);​ 
 +        fflush(stdout);​ 
 +    } 
 +    else if(read_size == -1) 
 +    { 
 +        perror("​recv failed"​);​ 
 +    } 
 +  
 +    return 0; 
 +
  
 </​code>​ </​code>​
studia/magisterskie/1sem/systemy_rownolegle_i_rozproszone.1459372245.txt.gz · ostatnio zmienione: 2016/03/30 23:10 przez 46.22.174.138