Narzędzia użytkownika

Narzędzia witryny


studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone

Różnice

Różnice między wybraną wersją a wersją aktualną.

Odnośnik do tego porównania

Poprzednia rewizja po obu stronach Poprzednia wersja
Nowa wersja
Poprzednia wersja
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/03/13 21:31]
wogu
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna)
149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c]
Linia 774: Linia 774:
  
   return 0;   return 0;
 +}
 +
 +
 +
 +
 +
 +
 +    struct rekord wys;
 +    MPI_Datatype rekord_typ;
 +    int tab_dlug_blokow[3] = {1, 1, 1};
 +    MPI_Datatype tab_typow[3] = {MPI_DOUBLE,​ MPI_DOUBLE, MPI_INT};
 +    MPI_Aint podstawa, tab_odstepow[3];​
 +
 +    MPI_Get_address(&​wys.x_min,​ &​tab_odstepow[0]);​
 +    MPI_Get_address(&​wys.x_max,​ &​tab_odstepow[1]);​
 +    MPI_Get_address(&​wys.n,​ &​tab_odstepow[2]);​
 +
 +    MPI_Type_struct(3,​ tab_dlug_blokow,​ tab_odstepow,​ tab_typow, &​rekord_typ);​
 +    MPI_Type_commit(&​rekord_typ);​
 +    MPI_Recv ( &wys, 1, rekord_typ, 0, 1, parentcomm, &status );
 +</​code>​
 +
 +====== Zajęcia 4 ======
 +wkrótce
 +====== Zajęcia 5 ======
 +==== Makefile ====
 +<code bash># optimization and other system dependent options
 +#​include ​ make.$(SRR_ARCH)
 +# or directly
 +include ​ make.lab_404_NVIDIA
 +
 +NAME = Hello_GPU
 +
 +program: main.o ​
 + $(CC) $(LDFL) main.o ​ $(LIB) -o $(NAME)
 +
 +main.o: main.c ​
 + $(CC) $(CFL) -c main.c ​ $(INC) -o main.o
 +
 +clean:
 + rm -f obj/*
 + rm -f $(NAME)
 +</​code>​
 +==== make.lab_404_NVIDIA ====
 +<code bash># C compiler
 +#CC = icc
 +CC = gcc
 +
 +# C++ compiler
 +CPPC = icpc
 +#CPPC = g++
 +
 +# Loader (to link C/C++ and Fortran libraries)
 +LD = icpc
 +#LD = g++
 +
 +# Archiver
 +AR = ar r
 +
 +# For removing files
 +RM = rm -f
 +
 +# Include directories
 +INC = -I/​opt/​cuda7/​include
 +
 +# Standard and/or local libraries
 +LIB = -L/​opt/​cuda7/​lib64 -lOpenCL ​
 +
 +# C optimization and other flags
 +#CFL(icc) = -O3 -openmp
 +#CFL(gcc) = -O3 -fopenmp
 +#​CFL(icc,​debug) = -g
 +#CFL(gcc) = -g
 +CFL =
 +
 +# Loader optimization and other flags
 +#​LDFL(debug) = -g 
 +#​LDFL(profile) = -p                                ​
 +LDFL = 
 +
 +</​code>​
 +==== main.c ====
 +<code c>#​include<​stdlib.h>​
 +#​include<​stdio.h>​
 +
 +#include <​CL/​cl.h>​
 +
 +// functions to display platform and device properties
 +void DisplayPlatformInfo(
 + cl_platform_id id, 
 + cl_platform_info name,
 + char* str)
 +{
 +  cl_int retval;
 +  size_t paramValueSize;​
 +  ​
 +  retval = clGetPlatformInfo(
 +      id,
 +      name,
 +      0,
 +      NULL,
 +      &​paramValueSize);​
 +  if (retval != CL_SUCCESS){
 +    printf("​Failed to find OpenCL platform %s.\n",​ str);
 +    return;
 +  }
 +  ​
 +  char * info = (char *)malloc(sizeof(char) * paramValueSize);​
 +  retval = clGetPlatformInfo(
 +      id,
 +      name,
 +      ​paramValueSize,​
 +      info,
 +      ​NULL);​
 +  if (retval != CL_SUCCESS) ​ {
 +    printf("​Failed to find OpenCL platform %s.\n",​ str);
 +    return;
 +  }
 +  ​
 +  printf("​\t%s:​\t%s\n",​ str, info );
 +  free(info); ​
 +}
 +
 +void DisplayDeviceInfo_char(
 +        ​cl_device_id id, 
 +        ​cl_device_info name,
 +        char* str)
 +{
 +  cl_int retval;
 +  size_t paramValueSize;​
 +  ​
 +  retval = clGetDeviceInfo(
 +    id,
 +    name,
 +    0,
 +    NULL,
 +    &​paramValueSize);​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to find OpenCL device info %s.\n",​ str);
 +    return;
 +  }
 +  ​
 +  char * info = (char *)malloc(sizeof(char) * paramValueSize);​
 +  retval = clGetDeviceInfo(
 +    id,
 +    name,
 +    ​paramValueSize,​
 +    info,
 +    ​NULL);​
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to find OpenCL device info %s.\n",​ str);
 +    return;
 +  }
 +
 +  printf("​\t\t%s:​\t%s\n",​ str, info );
 +  free(info);
 +};
 +
 +void DisplayDeviceInfo_ulong(
 +        ​cl_device_id id, 
 +        ​cl_device_info name,
 +        char* str)
 +{
 +  cl_int retval;
 +  size_t paramValueSize;//​ = sizeof(cl_ulong);​
 +  ​
 +  retval = clGetDeviceInfo(
 +    id,
 +    name,
 +    0,
 +    NULL,
 +    &​paramValueSize);​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to find OpenCL device info %s.\n",​ str);
 +    return;
 +  }
 +  ​
 +  cl_ulong * info = (cl_ulong *)malloc(sizeof(cl_ulong) * paramValueSize);​
 +  retval = clGetDeviceInfo(
 +    id,
 +    name,
 +    ​paramValueSize,​
 +    info,
 +    ​NULL);​
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to find OpenCL device info %s.\n",​ str);
 +    return;
 +  }
 +
 +  printf("​\t\t%s:​\t%lu MB\n", str, info[0] / 1024 / 1024 );
 +  free(info);
 +};
 +
 +void DisplayDeviceInfo_uint(
 +        ​cl_device_id id, 
 +        ​cl_device_info name,
 +        char* str)
 +{
 +  cl_int retval;
 +  size_t paramValueSize;//​ = sizeof(cl_ulong);​
 +  ​
 +  retval = clGetDeviceInfo(
 +    id,
 +    name,
 +    0,
 +    NULL,
 +    &​paramValueSize);​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to find OpenCL device info %s.\n",​ str);
 +    return;
 +  }
 +  ​
 +  cl_uint * info = (cl_uint *)malloc(sizeof(cl_uint) * paramValueSize);​
 +  retval = clGetDeviceInfo(
 +    id,
 +    name,
 +    ​paramValueSize,​
 +    info,
 +    ​NULL);​
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to find OpenCL device info %s.\n",​ str);
 +    return;
 +  }
 +
 +  printf("​\t\t%s:​\t%d MHz\n",​ str, info[0] );
 +  free(info);
 +};
 +
 +
 +// main program controlling execution of CPU code and OpenCL kernels
 +int main(int argc, char** argv)
 +{
 +  cl_uint number_of_contexts = 2;
 +  cl_context context = NULL;
 +  cl_context list_of_contexts[2] = {0,0};
 +  cl_command_queue commandQueue = 0;
 +  cl_program program = 0;
 +  cl_uint number_of_devices;​
 +  cl_device_id device = 0;
 +  cl_device_id *list_of_devices;​
 +  cl_device_type type;
 +  cl_kernel kernel = 0;
 +  cl_mem memObjects[3] = { 0, 0, 0 };
 +  cl_int retval;
 +  int icon, idev;
 +  cl_uint numPlatforms;​
 +  cl_platform_id * platformIds;​
 +  cl_uint i,j;
 +
 +  // flag to control displaying
 +  int Monitor = 1;
 +
 +
 +  // Create OpenCL contexts ​
 +
 +  // First, query the total number of platforms
 +  retval = clGetPlatformIDs(0,​ (cl_platform_id *) NULL, &​numPlatforms);​
 +
 +  // Next, allocate memory for the installed plaforms, and qeury 
 +  // to get the list.
 +  platformIds = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms);​
 +
 +  // Then, query the platform IDs
 +  retval = clGetPlatformIDs(numPlatforms,​ platformIds,​ NULL);
 +
 +  if(Monitor>​=0){
 +    printf("​Number of platforms: \t%d\n",​ numPlatforms); ​
 +  }
 +
 +  // Iterate through the list of platforms displaying associated information
 +  for (i = 0; i < numPlatforms;​ i++) {
 +
 +    if(Monitor>​0){
 +
 +      printf("​Platform ID - %d\n",​i);​
 +      // First we display information associated with the platform
 +      DisplayPlatformInfo(
 + platformIds[i], ​
 + CL_PLATFORM_NAME, ​
 + "​CL_PLATFORM_NAME"​);​
 +      DisplayPlatformInfo(
 + platformIds[i], ​
 + CL_PLATFORM_PROFILE, ​
 + "​CL_PLATFORM_PROFILE"​);​
 +      DisplayPlatformInfo(
 + platformIds[i], ​
 + CL_PLATFORM_VERSION, ​
 + "​CL_PLATFORM_VERSION"​);​
 +      DisplayPlatformInfo(
 + platformIds[i], ​
 + CL_PLATFORM_VENDOR, ​
 + "​CL_PLATFORM_VENDOR"​);​
 +    }
 +
 +  }
 +
 +  // For the first platform
 +  int iplat;
 +  printf("​Select Platform ID: "); scanf("​%d",​ &​iplat);​
 +
 +  // Query the set of devices associated with the platform
 +  retval = clGetDeviceIDs(
 +   platformIds[iplat],​
 +   CL_DEVICE_TYPE_ALL,​
 +   0,
 +   NULL,
 +   &​number_of_devices);​
 +  ​
 +  ​
 +  list_of_devices = 
 +    (cl_device_id *) malloc (sizeof(cl_device_id) * number_of_devices);​
 +  ​
 +  retval = clGetDeviceIDs(
 +   platformIds[iplat],​
 +   CL_DEVICE_TYPE_ALL,​
 +   number_of_devices,​
 +   list_of_devices,​
 +   NULL);
 +  ​
 +  if(Monitor>​=0){
 +    printf("​Number of devices: \t%d\n",​ number_of_devices); ​
 +  }
 +
 +  // Iterate through each device, displaying associated information
 +  for (j = 0; j < number_of_devices;​ j++) {
 +    ​
 +    clGetDeviceInfo(list_of_devices[j],​ CL_DEVICE_TYPE, ​
 +     sizeof(cl_device_type),​ &type, NULL);
 +    ​
 +    if(Monitor>​0){
 +      ​
 +      DisplayDeviceInfo_char(
 + list_of_devices[j], ​
 + CL_DEVICE_NAME, ​
 + "​CL_DEVICE_NAME"​);​
 +      ​
 +      DisplayDeviceInfo_char(
 + list_of_devices[j], ​
 + CL_DEVICE_VENDOR, ​
 + "​CL_DEVICE_VENDOR"​);​
 +      ​
 +      DisplayDeviceInfo_char(
 + list_of_devices[j], ​
 + CL_DEVICE_VERSION, ​
 + "​CL_DEVICE_VERSION"​);​
 +
 +      DisplayDeviceInfo_ulong(
 + list_of_devices[j], ​
 + CL_DEVICE_GLOBAL_MEM_SIZE, ​
 + "​CL_DEVICE_GLOBAL_MEM_SIZE"​);​
 +
 +      DisplayDeviceInfo_uint(
 + list_of_devices[j], ​
 + CL_DEVICE_MAX_CLOCK_FREQUENCY, ​
 + "​CL_DEVICE_MAX_CLOCK_FREQUENCY"​);​
 +      printf("​\n"​);​
 +    }
 +  }
 +  ​
 +  // Next, create OpenCL contexts on platforms
 +  cl_context_properties contextProperties[] = {
 +    CL_CONTEXT_PLATFORM,​
 +    (cl_context_properties)platformIds[iplat],​
 +    0
 +  };
 +  ​
 +  if(Monitor>​0){
 +    printf("​Creating CPU context %d on platform %d\n", 1, iplat);
 +  }
 +  ​
 +  list_of_contexts[1] = 
 +    clCreateContextFromType(contextProperties, ​
 +     CL_DEVICE_TYPE_CPU,​ NULL, NULL, &​retval);​
 +  ​
 +  if(Monitor>​=0 && retval != CL_SUCCESS){
 +    printf("​Could not create CPU context on platform %d\n", i);
 +  }
 +  ​
 +  if(Monitor>​0){
 +    printf("​Creating GPU context 0 on platform %d\n", iplat);
 +  }
 +  ​
 +  list_of_contexts[0] = 
 +    clCreateContextFromType(contextProperties, ​
 +     CL_DEVICE_TYPE_GPU,​ NULL, NULL, &​retval);​
 +  ​
 +  if(Monitor>​=0 && retval != CL_SUCCESS){
 +    printf("​Could not create GPU context on platform %d\n", i);
 +  }
 +  ​
 +  // in a loop over devices of the seleceted platform ​  
 +  for(idev=0; idev<​number_of_devices;​idev++){
 +    ​
 +    if(Monitor>​0){
 +      printf("​\nFor context %d and device %d:​\n",​
 +      idev, idev);
 +    }
 +    device = list_of_devices[idev];​
 +    icon = idev;
 +    ​
 +    // choose OpenCL context on first available platform
 +    context = list_of_contexts[icon];​
 +    ​
 +    if(context !=0){
 +            ​
 +      commandQueue = clCreateCommandQueue(context,​ device, 0, NULL);
 +      if (commandQueue == NULL) {
 + printf("​Failed to create commandQueue for device %d\n", idev);
 + exit(0);
 +      }
 +      ​
 +      if(Monitor>​0){
 + printf("​Reading program from source\n"​);​
 +      }
 +
 +      // read source code from file
 +      FILE *fp;
 +      char* source;
 +      long int size;
 +      ​
 +      fp = fopen("​HelloWorld.cl",​ "​rb"​);​
 +      if(!fp) {
 + printf("​Could not open kernel file\n"​);​
 + exit(-1);
 +      }
 +      int status = fseek(fp, 0, SEEK_END);
 +      if(status != 0) {
 + printf("​Error seeking to end of file\n"​);​
 + exit(-1);
 +      }
 +      size = ftell(fp);
 +      if(size < 0) {
 + printf("​Error getting file position\n"​);​
 + exit(-1);
 +      }
 +      ​
 +      rewind(fp);
 +      ​
 +      source = (char *)malloc(size + 1);
 +      ​
 +      int i;
 +      for (i = 0; i < size+1; i++) {
 + source[i]='​\0';​
 +      }
 +      ​
 +      if(source == NULL) {
 + printf("​Error allocating space for the kernel source\n"​);​
 + exit(-1);
 +      }
 +      ​
 +      fread(source,​ 1, size, fp);
 +      source[size] = '​\0';​
 +
 +      const char* source_const = source;
 +      cl_program program = clCreateProgramWithSource(context,​ 1,
 +  ​    &​source_const,​
 +  ​    NULL, NULL);
 +      if (program == NULL)
 + {
 +   printf("​Failed to create CL program from source.\n"​);​
 +   exit(-1);
 + }
 +      ​
 +      if(Monitor>​0){
 + printf("​Creating program and kernel\n"​);​
 +      }
 +      // build program (passing options to compiler if necessary
 +      retval = clBuildProgram(program,​ 0, NULL, NULL, NULL, NULL);
 +      char* buildLog; size_t size_of_buildLog; ​
 +      clGetProgramBuildInfo(program,​ device, CL_PROGRAM_BUILD_LOG, ​
 +     0, NULL, &​size_of_buildLog); ​
 +      buildLog = malloc(size_of_buildLog+1); ​
 +      clGetProgramBuildInfo(program,​ device, CL_PROGRAM_BUILD_LOG, ​
 +     size_of_buildLog,​ buildLog, NULL); ​
 +      buildLog[size_of_buildLog]= '​\0'; ​
 +      printf("​Kernel buildLog: %s\n", buildLog); ​
 +      if (retval != CL_SUCCESS)
 + {
 +   printf("​Error in kernel\n"​);​
 +   clReleaseProgram(program);​
 +   exit(-1);
 + }
 +      ​
 +      ​
 +      // Create OpenCL kernel
 +      kernel = clCreateKernel(program,​ "​hello_kernel",​ NULL);
 +      if (kernel == NULL)
 + {
 +   printf("​Failed to create kernel.\n"​);​
 +   exit(0);
 + }
 +      ​
 +      if(Monitor>​0){
 + printf("​Creating memory objects\n"​);​
 +      }
 +      // Create memory objects that will be used as arguments to
 +      // kernel. ​ First create host memory arrays that will be
 +      // used to store the arguments to the kernel
 +      float result[1];
 +      float a[1];
 +      float b[1];
 +      a[0] = 2;
 +      b[0] = 2;
 +      ​
 +      memObjects[0] = clCreateBuffer(context,​ CL_MEM_READ_ONLY ,
 +      ​sizeof(float),​ NULL, NULL);
 +      memObjects[1] = clCreateBuffer(context,​ CL_MEM_READ_ONLY ,
 +      ​sizeof(float),​ NULL, NULL);
 +      memObjects[2] = clCreateBuffer(context,​ CL_MEM_READ_WRITE,​
 +      ​sizeof(float),​ NULL, NULL);
 +      ​
 +      if (memObjects[0]==NULL || memObjects[1]==NULL || memObjects[2]==NULL){
 + printf("​Error creating memory objects.\n"​);​
 + return 0;
 +      }
 +      ​
 +      if(Monitor>​0){
 + printf("​Sending kernel arguments\n"​);​
 +      }
 +      retval = clEnqueueWriteBuffer(
 +     commandQueue, ​
 +     memObjects[0], ​
 +     CL_FALSE, ​
 +     0, 
 +     sizeof(float), ​                        
 +     a, 
 +     0, 
 +     NULL, 
 +     NULL);
 +      ​
 +      // Use clEnqueueWriteBuffer() to write input array B to 
 +      // the device buffer bufferB
 +      retval = clEnqueueWriteBuffer(
 +     commandQueue, ​
 +     memObjects[1], ​
 +     CL_FALSE, ​
 +     0, 
 +     sizeof(float), ​                        
 +     b,
 +     0, 
 +     NULL, 
 +     NULL);
 +      ​
 +      // Set the kernel arguments (result, a, b)
 +      retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +      retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +      retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +      if (retval != CL_SUCCESS)
 + {
 +   printf("​Failed to Set the kernel arguments.\n"​);​
 +   //​Cleanup(context,​ commandQueue,​ program, kernel, memObjects);​
 +   return 1;
 + }
 +      ​
 +      if(Monitor>​0){
 + printf("​Running the kernel!\n"​);​
 +      }
 +      size_t globalWorkSize[1] = { 1 };
 +      size_t localWorkSize[1] = { 1 };
 +      ​
 +      // Queue the kernel up for execution across the array
 +      retval = clEnqueueNDRangeKernel(commandQueue,​ kernel, 1, NULL,
 +       globalWorkSize,​ localWorkSize,​
 +       0, NULL, NULL);
 +      if (retval != CL_SUCCESS)
 + {
 +   printf("​Failed to queue kernel for execution.\n"​);​
 +   //​Cleanup(context,​ commandQueue,​ program, kernel, memObjects);​
 +   return 1;
 + }
 +      ​
 +      if(Monitor>​0){
 + printf("​Transfering back results\n"​);​
 +      }
 +      // Read the output buffer back to the Host
 +      retval = clEnqueueReadBuffer(commandQueue,​ memObjects[2],​ CL_TRUE,
 +    0, sizeof(float),​ result,
 +    0, NULL, NULL);
 +      if (retval != CL_SUCCESS)
 + {
 +   printf("​Failed to read result buffer.\n"​);​
 +   //​Cleanup(context,​ commandQueue,​ program, kernel, memObjects);​
 +   return 1;
 + }
 +      ​
 +      // Verify the output
 +      if(result[0]==4) ​ {
 +        printf("​Output is correct: %lf + %lf = %lf\n",​
 +        a[0], b[0], result[0]);
 +      } else {
 +        printf("​Output is incorrect: %lf + %lf != %lf\n",​
 +        a[0], b[0], result[0]);
 +      }
 +      ​
 +      for (i = 0; i < 3; i++)
 +      {
 +        if (memObjects[i] != 0)
 +            clReleaseMemObject(memObjects[i]);​
 +      }
 +      if (commandQueue != 0)
 +        clReleaseCommandQueue(commandQueue);​
 +      ​
 +      if (kernel != 0)
 +        clReleaseKernel(kernel);​
 +      ​
 +      if (program != 0)
 +        clReleaseProgram(program);​
 +      ​
 +    }
 +  }
 +  ​
 +  free(list_of_devices);​
 +  free(platformIds);​
 +  ​
 +  return 0;
 +}
 +</​code>​
 +==== HelloWorld.cl ====
 +<code c>
 +__kernel void hello_kernel(__global const float *a,
 +            ​__global const float *b,
 +    ​__global float *result)
 +{
 +    int gid = get_global_id(0);​
 +
 +    if(gid<​1) result[gid] = a[gid] + b[gid];
 }</​code>​ }</​code>​
 +===== Zajęcia 6 =====
 +Pliki zadania: {{:​studia:​magisterskie:​1sem:​opencl_vecadd.tgz|}}
 +Działające 3 kernele: {{:​studia:​magisterskie:​1sem:​sala404.zip|}}
 +
 +==== opencl_vecadd/​vecadd_host/​execute_kernels.c ====
 +<code c>#​include<​stdlib.h>​
 +#​include<​stdio.h>​
 +#include <​math.h>​
 +
 +#​include<​omp.h>​
 +#include <​CL/​cl.h>​
 +
 +#​include"​uth_ocl_intf.h"​
 +
 +#​include"​./​vecadd_host.h"​
 +#​include"​./​hardware_params.h"​
 +
 +#define time_measurments
 +
 +#ifdef time_measurments
 +#​include"​uth_time_intf.h"​
 +  static double t_begin, t_end, t_total;
 +#endif
 +
 +int execute_kernel_2_CPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  ​
 +  // create the kernel for CPU
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​vecadd_2_blocks_kernel",​ "​vecadd_2_blocks.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. Restoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  int nr_cores_CPU = omp_get_num_procs();​
 +  int nr_threads = nr_cores_CPU;​ // total number of threads
 +  int work_group_size = 1; // number of threads in work-group
 +  int nr_work_groups = nr_threads / work_group_size;​
 +  globalWorkSize[0] = nr_threads ;
 +  localWorkSize[0] = work_group_size ;
 +  ​
 +  printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 + nr_cores_CPU,​ nr_cores_CPU,​ 1);
 +  printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 + work_group_size,​ nr_threads / work_group_size, ​
 + nr_threads / work_group_size / nr_cores_CPU);​
 +  printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 + globalWorkSize[0],​ globalWorkSize[0]/​nr_cores_CPU, ​
 + globalWorkSize[0]/​nr_cores_CPU);​
 +  printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 + array_size, array_size/​globalWorkSize[0],​ array_size/​nr_cores_CPU);​
 +  ​
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  int size = array_size;
 +  int size_per_thread = ceilf((float)size/​nr_threads); ​
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +  retval |= clSetKernelArg(kernel,​ 4, sizeof(int),​ &​size_per_thread);​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to Set the kernel arguments.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_3_CPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for CPU
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​vecadd_3_opt_blocks_kernel","​vecadd_3_opt_blocks.cl",​monitor);​
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  int nr_cores_CPU = omp_get_num_procs();​
 +  int nr_threads = nr_cores_CPU;​ // total number of threads
 +  int work_group_size = 1; // number of threads in work-group
 +  int nr_work_groups = nr_threads / work_group_size;​
 +  globalWorkSize[0] = nr_threads ;
 +  localWorkSize[0] = work_group_size ;
 +  ​
 +  printf("​\t\tExecuting kernel %d, on platform %d and device %d\n",
 + kernel_index,​ platform_index,​ device_index);​
 +  printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 + nr_cores_CPU,​ nr_cores_CPU,​ 1);
 +  printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 + work_group_size,​ nr_threads / work_group_size, ​
 + nr_threads / work_group_size / nr_cores_CPU);​
 +  printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 + globalWorkSize[0],​ globalWorkSize[0]/​nr_cores_CPU, ​
 + globalWorkSize[0]/​nr_cores_CPU);​
 +  printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 + array_size, array_size/​globalWorkSize[0],​ array_size/​nr_cores_CPU);​
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  int size = array_size;
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to Set the kernel arguments.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    return 1;
 +  }
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +// tutaj modyfikujemy
 +int execute_kernel_0_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  // globalna liczba wątków i lokalna liczba wątków
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size;
 +  localWorkSize[0] = work_group_size;​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  // ustawiamy liczbę argumentów
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_1_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +/*
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);*/
 +
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 + ​  ​   // kernel name:         , file: */
 + ​  ​   "​vecadd_1_kernel",​ "​vecadd_1.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size / 4;
 +  localWorkSize[0] = work_group_size;​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_2_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +/*    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);*/
 +     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 + ​  ​   // kernel name:         , file: */
 + ​  ​   "​vecadd_2_blocks_kernel",​ "​vecadd_2_blocks.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size / work_group_size;​
 +  localWorkSize[0] = work_group_size;​
 +
 +  int size_per_thread = work_group_size;​
 +  int size = array_size;
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +  retval |= clSetKernelArg(kernel,​ 4, sizeof(int),​ &​size_per_thread);​
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_3_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  int size = array_size;
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +/*    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);*/
 +     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 + ​  ​   // kernel name:         , file:
 +     "​vecadd_3_opt_blocks_kernel",​ "​vecadd_3_opt_blocks.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size;
 +  localWorkSize[0] = work_group_size;​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +
 +
 +    ​
 +/*     // create the fourth kernel for GPU */
 +/*     ​kernel_index = 4;  */
 +/*     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ */
 +/*     // kernel name:         , file: */
 +/*     "​vecadd_4_opt_blocks_vect_kernel",​ "​vecadd_4_opt_blocks_vect.cl",​ monitor); */</​code>​
 +
 +==== opencl_vecadd/​vecadd_2_blocks.cl ====
 +<code c>
 +__kernel void vecadd_2_blocks_kernel(__global const float *a,
 +      ​__global const float *b,
 +      ​__global float *result,
 +                      const int size,
 +      const int size_per_thread)
 +{
 +  int gid = get_global_id(0);​
 +  int index_start = gid * size_per_thread;​
 +  int index_end = (gid+1) * size_per_thread;​
 +
 +  if (gid == 0) {
 + //​printf("​size:​ %d, %d", size, size_per_thread);​
 +  }
 +
 +  for (int i=index_start;​ i < index_end && i < size; i++) {
 +  //for (int i=index_start;​ i < index_end ; i++) {
 +    result[i] = a[i]+b[i];
 +  }
 +}</​code>​
 +===== Zajęcia 7 =====
 +{{:​studia:​magisterskie:​1sem:​zajecia_7.zip|}}
 +
 +==== tmp/​opencl_mat_transp/​mat_transp_1.cl ====
 +<code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 +
 +__kernel void mat_transp_1_kernel(
 +  __global float *A , 
 +  __global float *B ,
 +  __global float *C ,
 +  int N)
 +{
 +
 +int id = get_global_id(0) * get_global_size(0) + get_global_id(1);​
 +float temp = 0.0;
 +
 +for(int i = 0; i<​N;​i++){
 + temp += A[id*N + i] *B[i];
 +}
 +
 +C[id] = temp;
 +}</​code>​
 +
 +==== tmp/​opencl_mat_transp/​mat_transp/​mat_transp.c ====
 +<code c>#​include<​stdlib.h>​
 +#​include<​stdio.h>​
 +#include <​math.h>​
 +
 +#include <​CL/​cl.h>​
 +
 +#​include"​uth_ocl_intf.h"​
 +
 +#define time_measurments
 +
 +#ifdef time_measurments
 +#​include"​uth_time_intf.h"​
 +  static double t_begin, t_end, t_total;
 +#endif
 +
 +
 +
 +#define BLOCK_SIZE 16
 +#define NR_GROUPS 16
 +#define MULT 16
 +#define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT)
 +#define ROZMIAR (WYMIAR*WYMIAR)
 +// Matrices are stored in row-major order: ​
 +// M(row, col) = M( row * WYMIAR + col ) 
 +
 +int verify_result(
 +  float* result,
 +  float* result_compare
 +   )
 +{
 +  // Verify the result
 +  int result_OK = 1;
 +  int i,j;
 +  for(i = 0; i < WYMIAR; i++) {
 +      if(fabs(result[i] - result_compare[i])>​1.e-6) {
 + result_OK = 0;
 + break;
 +    }
 +  }
 +  printf("​\t\t6. verifying results: ");
 +  if(result_OK) {
 +    printf("​Output is correct\n"​);​
 +  } else {
 +    printf("​Output is incorrect\n"​);​
 +    j = 0;
 +    for(i = 0; i < WYMIAR, j<100; i++) {
 + if(fabs(result[i] - result_compare[i])>​1.e-9) {
 +   j++;
 +   printf("​%d %16.8f %16.8f\n", ​
 +   i, result[i], result_compare[i]);​
 + }
 +    }
 +    exit(0);
 +  }
 +    /* for(i = 0; i < length; i++) { */
 +    /*   ​printf("​%16.8f %16.8f\n",​ result[i], result_compare[i]);​ */
 +    /* } */
 +
 +  return(result_OK);​
 +}
 +
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +void create_kernels()
 +{
 +
 +  // for all operations indicate explicit info messages
 +  int monitor = UTC_BASIC_INFO + 1;
 +
 +  int kernel_index;​
 +  ​
 +  int platform_index = utv_ocl_struct.current_platform_index;​
 +
 +  if(utr_ocl_CPU_context_exists(platform_index)){
 +
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the first kernel for CPU
 +  /* kernel_index = 0;  */
 +  /* utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​ */
 +  /*   // kernel name:         , file: */
 +  /*   "​mat_transp_1_kernel",​ "​mat_transp_1.cl",​ monitor); */
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel %d: %lf\n",​ kernel_index,​ t_end-t_begin);​
 +#endif
 +  ​
 +  }
 +
 +  if(utr_ocl_GPU_context_exists(platform_index)){
 +
 +
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  // create the first kernel for GPU
 +  kernel_index = 0; 
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​mat_transp_1_kernel",​ "​mat_transp_1.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel %d: %lf\n",​ kernel_index,​ t_end-t_begin);​
 +#endif
 +  ​
 +  // create the second kernel for GPU
 +  kernel_index = 1; 
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​mat_transp_2_kernel",​ "​mat_transp_2.cl",​ monitor);
 +
 +  ​
 +
 +  }
 +
 +}
 +
 +  /​*----------------EXECUTION PHASE----------------------*/​
 +int execute_kernels()
 +{
 +
 +  // for all operations indicate explicit info messages
 +  int monitor = UTC_BASIC_INFO + 1;
 +
 +  int platform_index = utv_ocl_struct.current_platform_index;​
 +  utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index];​
 +
 +  int kernel_index;​
 +  int i,j,n;
 +
 +  double nr_access;
 +  double t1,t2;
 +
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\n------------Starting execution phase----------------\n"​);​
 +  }
 +  ​
 +  // create matrices
 +  n=WYMIAR;
 +  float* A = (float *) malloc(ROZMIAR*sizeof(float));​
 +  float* B = (float *) malloc(WYMIAR*sizeof(float));​
 +  float* C = (float *) malloc(WYMIAR*sizeof(float));​
 +  float* D = (float *) malloc(WYMIAR*sizeof(float));​
 +  ​
 +  for(i=0;​i<​ROZMIAR;​i++) A[i]=1.0*i/​10000000.0;​
 +
 +  for(i=0;​i<​WYMIAR;​i++) B[i]=1.0*i/​10000000.0;​
 +
 +  // mnozenie macierz wektor
 +  for(i=0; i<​WYMIAR;​ i++){
 + for(j=0; j<​WYMIAR;​ j++){
 + D[i] += A[i*WYMIAR + j] * B[j];
 + }
 +  }
 +  ​
 +  nr_access= ROZMIAR + 2.0*WYMIAR; // read + write
 +  ​
 +  printf("​mat_transp:​ nr_access %lf\n",​ nr_access);
 +  ​
 +  // get hardware characteristics to select good matrix shape
 +  // the set of device characteristics stored in data structure
 +  int device_index = 0; 
 +  utt_ocl_device_struct device_struct = 
 +    utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index];​
 +  double global_mem_bytes = device_struct.global_mem_bytes;​
 +  double global_max_alloc = device_struct.global_max_alloc;​
 +  double local_mem_bytes = device_struct.local_mem_bytes;​
 +  double constant_mem_bytes = device_struct.constant_mem_bytes;​
 +  int max_num_comp_units = device_struct.max_num_comp_units;​
 +  int max_work_group_size = device_struct.max_work_group_size;​
 +  ​
 +  // in a loop over devices (or for a selected device)
 +  int idev=0; ​
 +  for(idev=0; idev<​platform_struct.number_of_devices;​ idev++){
 +    ​
 +    // int device_type = .....
 +    // choose device_index
 +    // int device_index = utr_ocl_select_device(platform_index,​ device_type);​
 +    int device_index = idev;
 +    int device_type = utr_ocl_device_type(platform_index,​ device_index); ​
 +    ​
 +    if(device_index>​0 && device_type==utr_ocl_device_type(platform_index,​ device_index-1)) break; ​
 +    if(device_type == UTC_OCL_DEVICE_CPU) break;
 +
 +    // choose the context
 +    cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +    ​
 +    // choose the command queue
 +    cl_command_queue command_queue = 
 +      utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +    ​
 +    if(monitor>​UTC_BASIC_INFO){
 +      printf("​\nExecution:​ \t0. restoring context and command queue for platform %d and device %d\n",
 +      ​platform_index,​ device_index);​
 +    }
 +    ​
 +    if(context == NULL || command_queue == NULL){ ​
 +      ​
 +      printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +      ​platform_index,​ device_index);​
 +      printf("​%lu %lu\n",​ context, command_queue);​
 +    }
 +    ​
 +    // choose the kernel
 +    kernel_index = 0;
 +    cl_kernel kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +    ​
 +    if(monitor>​UTC_BASIC_INFO){
 +      printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +      ​kernel_index,​ platform_index,​ device_index);​
 +    }
 +    ​
 +    if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +      ​
 +      printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +      ​platform_index,​ device_index,​ kernel_index);​
 +      printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +      ​context,​ command_queue,​ kernel);
 +    }
 +    ​
 +    for(i=0;​i<​WYMIAR;​i++) C[i]=0.0;
 +    time_init();​ t1 = time_clock();​
 +    ​
 +    // call routine to perform matrix transposition
 +    mat_transp_host(kernel_index,​ A, B, C, n, context, kernel, command_queue);​
 +    ​
 +    t2 = time_clock();​ time_print();​
 +    printf("​GB/​s = %lf\n\n",​ nr_access*sizeof(float)/​(t2-t1)/​1024/​1024/​1024);​
 +    ​
 +    ​
 +    // verify result ​
 +    verify_result(C,​ D);
 +    ​
 +    ​
 +  } // end loop over devices
 +  ​
 +  return(0);
 +}
 +
 +
 +
 +// Matrix transposition - Host code 
 +// Matrix dimensions are assumed to be multiples of BLOCK_SIZE ​
 +int mat_transp_host(
 +  int kernel_index,​
 +  float* A, 
 +  float* B,
 +  float* C, 
 +  int N, 
 +  const cl_context context, ​
 +  const cl_kernel mat_transp_kernel, ​
 +  const cl_command_queue queue
 +
 +
 +
 +// Load A to device memory ​
 +  size_t size_bytesm = N*N*sizeof(float); ​
 +  size_t size_bytesw = N*sizeof(float); ​
 +  ​
 +
 +  cl_mem d_A = clCreateBuffer(context,​ CL_MEM_READ_ONLY,​
 + size_bytesm,​ NULL, NULL); ​
 +
 +  // Write A to device memory ​
 +  clEnqueueWriteBuffer(queue,​ d_A, CL_TRUE, 0, size_bytesm,​ A, 0, 0, 0); 
 +
 +  cl_mem d_B = clCreateBuffer(context,​ CL_MEM_READ_ONLY,​
 + size_bytesw,​ NULL, NULL); ​
 +
 +  // Write A to device memory ​
 +  clEnqueueWriteBuffer(queue,​ d_B, CL_TRUE, 0, size_bytesw,​ B, 0, 0, 0); 
 +
 +  // Allocate B in device memory ​
 +  cl_mem d_C = clCreateBuffer(context,​ CL_MEM_WRITE_ONLY,​ size_bytesw,​ NULL, NULL); ​
 +
 +
 +  // Invoke kernel ​
 +  clSetKernelArg(mat_transp_kernel,​ 0, sizeof(cl_mem),​ (void*)&​d_A);​
 +  clSetKernelArg(mat_transp_kernel,​ 1, sizeof(cl_mem),​ (void*)&​d_B);​
 +  clSetKernelArg(mat_transp_kernel,​ 2, sizeof(cl_mem),​ (void*)&​d_C);​
 +  clSetKernelArg(mat_transp_kernel,​ 3, sizeof(int),​ (void*)&​N);​
 +
 +  size_t localWorkSize[3];​
 +  size_t globalWorkSize[3];​
 +  cl_uint work_dim;
 +
 +  if(kernel_index==3){
 +
 +    work_dim = 2;
 +    localWorkSize[0] = BLOCK_SIZE; ​
 +    globalWorkSize[0] = N/​MULT; ​
 +    localWorkSize[1] = BLOCK_SIZE; ​
 +    globalWorkSize[1] = N/​MULT; ​
 +    localWorkSize[2] = 0; 
 +    globalWorkSize[2] = 0; 
 +
 +
 +  }
 +  else{
 +
 +    work_dim = 2;
 +    localWorkSize[0] = BLOCK_SIZE; ​
 +    globalWorkSize[0] = sqrt(N); ​
 +    localWorkSize[1] = BLOCK_SIZE; ​
 +    globalWorkSize[1] = sqrt(N); ​
 +    localWorkSize[2] = 0; 
 +    globalWorkSize[2] = 0; 
 +  ​
 +  }
 +
 +  clFinish(queue);​
 +  double t1 = time_clock();​
 +  // Enqueue a kernel run call
 +  cl_event ndrEvt;
 +  clEnqueueNDRangeKernel(queue,​ mat_transp_kernel,​ work_dim, 0, 
 + globalWorkSize,​ localWorkSize,​ 0, 0, &​ndrEvt); ​
 +  clWaitForEvents(1,​ &​ndrEvt);​
 +  clFinish(queue);​
 +  double t2 = time_clock();​
 +  // Calculate performance ​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  ​
 +  // Get kernel profiling info 
 +  clGetEventProfilingInfo(ndrEvt,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(ndrEvt,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  double time = (double)endTime - (double)startTime;​
 +  printf("​\nKernel execution internal: time %lf, GB/s = %lf\n", ​
 + time*1.0e-9,​ 2.0*N*N*sizeof(float)/​(time*1.0e-9)/​1024/​1024/​1024);​
 +
 +  printf("​Kernel execution external: time %lf,  GB/s = %lf\n\n", ​
 + t2-t1, 2.0*N*N*sizeof(float)/​(t2-t1)/​1024/​1024/​1024);​
 +
 +  // Read B from device memory ​
 +  clEnqueueReadBuffer(queue,​ d_C, CL_TRUE, 0, size_bytesw,​ C, 0, 0, 0); 
 +
 +  // Free device memory ​
 +  clReleaseMemObject(d_A); ​
 +  clReleaseMemObject(d_B); ​
 +  clReleaseMemObject(d_C);​
 +
 +  return(0);
 +
 +}</​code>​
 +==== Kod wogu ====
 +=== server.c ====
 +<code c>
 +/*
 +    C socket server example, handles multiple clients using threads
 +    Compile
 +    gcc server.c -lpthread -o server
 +*/
 + 
 +#​include<​stdio.h>​
 +#​include<​string.h> ​   //strlen
 +#​include<​stdlib.h> ​   //strlen
 +#​include<​sys/​socket.h>​
 +#​include<​arpa/​inet.h>​ //inet_addr
 +#​include<​unistd.h> ​   //write
 +#​include<​pthread.h>​ //for threading , link with lpthread
 +#include <​unistd.h>​
 + 
 +//the thread function
 +void *connection_handler(void *);
 + 
 +int main(int argc , char *argv[])
 +{
 +    int counter = 0;
 +    pid_t pid = fork();
 +
 +    if (pid == 0)
 +    {
 +       ​  ​   int sock;
 +     struct sockaddr_in server;
 +     char message[1000] , server_reply[2000];​
 +            int counter;
 +     unsigned int ip[] = { 192, 168, 102, 1 };
 +            char* ip_final;
 + char *buf;
 + size_t sz;
 +  
 +     //Create socket
 +
 +            for ( counter = 0; counter < 255; ++counter ) {
 +
 +     sock = socket(AF_INET , SOCK_STREAM , 0);
 +     if (sock == -1)
 +     {
 + printf("​Could not create socket"​);​
 +     }
 +     puts("​Socket created"​);​
 +
 +     //​printf("​192.168.102.%d",​ counter);
 +
 + sz = snprintf(NULL,​ 0, "​192.168.102.%d",​ counter);
 + buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */
 +     ​
 +     server.sin_addr.s_addr = inet_addr(buf);​
 +     server.sin_family = AF_INET;
 +     server.sin_port = htons( 65001 );
 +  
 +     //Connect to remote server
 +     if (connect(sock , (struct sockaddr *)&​server , sizeof(server)) < 0)
 +     {
 + perror("​connect failed. Error"​);​
 + continue;​
 +     }
 +  
 +     puts("​Connected\n"​);​
 +  
 +     //keep communicating with server
 +     while(1)
 +     {
 + //Send some data
 + if( send(sock , "​Werset 6-client"​ , 15 , 0) < 0)
 + {
 +     puts("​Send failed"​);​
 +     return 1;
 + }
 +  
 + //​Receive a reply from the server
 + if( recv(sock , server_reply , 2000 , 0) < 0)
 + {
 +     puts("​recv failed"​);​
 +     break;
 + }
 +
 + puts(server_reply);​
 +
 + break;
 +     }
 +  
 +     close(sock);​
 +           }
 +    }
 +    else if (pid > 0)
 +    {
 +        int socket_desc , client_sock , c;
 +     struct sockaddr_in server , client;
 +      
 +     //Create socket
 +     socket_desc = socket(AF_INET , SOCK_STREAM , 0);
 +     if (socket_desc == -1)
 +     {
 + printf("​Could not create socket"​);​
 +     }
 +     puts("​Socket created"​);​
 +      
 +     //Prepare the sockaddr_in structure
 +     server.sin_family = AF_INET;
 +     server.sin_addr.s_addr = INADDR_ANY;
 +     server.sin_port = htons( 65001 );
 +      
 +     //Bind
 +     if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0)
 +     {
 + //print the error message
 + perror("​bind failed. Error"​);​
 + return 1;
 +     }
 +     puts("​bind done"​);​
 +      
 +     //Listen
 +     listen(socket_desc , 3);
 +      
 +     //Accept and incoming connection
 +     puts("​Waiting for incoming connections..."​);​
 +     c = sizeof(struct sockaddr_in);​
 +      
 +      
 +     //Accept and incoming connection
 +     puts("​Waiting for incoming connections..."​);​
 +     c = sizeof(struct sockaddr_in);​
 + pthread_t thread_id;
 +
 +     while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) )
 +     {
 + puts("​Connection accepted"​);​
 +  
 + if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0)
 + {
 +     perror("​could not create thread"​);​
 +     return 1;
 + }
 +  
 + //Now join the thread , so that we dont terminate before the thread
 + //​pthread_join( thread_id , NULL);
 + puts("​Handler assigned"​);​
 +     }
 +      
 +     if (client_sock < 0)
 +     {
 + perror("​accept failed"​);​
 + return 1;
 +     }
 +    }
 +    else
 +    {
 +        // fork failed
 +        printf("​fork() failed!\n"​);​
 +        return 1;
 +    }
 +     
 +    return 0;
 +}
 + 
 +/*
 + * This will handle connection for each client
 + * */
 +void *connection_handler(void *socket_desc)
 +{
 +    //Get the socket descriptor
 +    int sock = *(int*)socket_desc;​
 +    int read_size;
 +    char *message , client_message[2000];​
 +
 +    //Receive a message from client
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 )
 +    {
 + printf("​%s\n",​ client_message);​
 +
 + //end of string marker
 + client_message[read_size] = '​\0';​
 +
 + //Send the message back to client
 + write(sock , "​Werset 6" , 8);
 +
 + //clear the message buffer
 + memset(client_message,​ 0, 2000);
 +    }
 +     
 +    if(read_size == 0)
 +    {
 +        puts("​Client disconnected"​);​
 +        fflush(stdout);​
 +    }
 +    else if(read_size == -1)
 +    {
 +        perror("​recv failed"​);​
 +    }
 +         
 +    return 0;
 +}</​code>​
 +=== Makefile ===
 +<code bash># kompilator c
 +CC = gcc
 + 
 +# konsolidator
 +CFLAGS=-g
 +
 +all: server client
 + 
 +server: server.o
 + $(CC) $(LFLAGS) server.o -lpthread -o server
 + 
 +server.o: server.c
 + $(CC) -c server.c -o server.o
 +
 +client: client.o
 + $(CC) client.o -o client
 + 
 +client.o: client.c
 + $(CC) -c client.c -o client.o
 + 
 +clean:
 + rm -f *.o</​code>​
 +===== Zajęcia 9 =====
 +<code c>/*
 +    C socket server example, handles multiple clients using threads
 +    Compile
 +    gcc server.c -lpthread -o server
 +*/
 + 
 +#​include<​stdio.h>​
 +#​include<​string.h> ​   //strlen
 +#​include<​stdlib.h> ​   //strlen
 +#​include<​sys/​socket.h>​
 +#​include<​arpa/​inet.h>​ //inet_addr
 +#​include<​unistd.h> ​   //write
 +#​include<​pthread.h>​ //for threading , link with lpthread
 + 
 +//the thread function
 +void *connection_handler(void *);
 + 
 +int main(int argc , char *argv[])
 +{
 +    int socket_desc , client_sock , c;
 +    struct sockaddr_in server , client;
 +     
 +    //Create socket
 +    socket_desc = socket(AF_INET , SOCK_STREAM , 0);
 +    if (socket_desc == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 +     
 +    //Prepare the sockaddr_in structure
 +    server.sin_family = AF_INET;
 +    server.sin_addr.s_addr = INADDR_ANY;
 +    server.sin_port = htons( 8888 );
 +     
 +    //Bind
 +    if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0)
 +    {
 +        //print the error message
 +        perror("​bind failed. Error"​);​
 +        return 1;
 +    }
 +    puts("​bind done"​);​
 +     
 +    //Listen
 +    listen(socket_desc , 3);
 +     
 +    //Accept and incoming connection
 +    puts("​Waiting for incoming connections..."​);​
 +    c = sizeof(struct sockaddr_in);​
 +     
 +     
 +    //Accept and incoming connection
 +    puts("​Waiting for incoming connections..."​);​
 +    c = sizeof(struct sockaddr_in);​
 + pthread_t thread_id;
 +
 +    while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) )
 +    {
 +        puts("​Connection accepted"​);​
 +         
 +        if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0)
 +        {
 +            perror("​could not create thread"​);​
 +            return 1;
 +        }
 +         
 +        //Now join the thread , so that we dont terminate before the thread
 +        //​pthread_join( thread_id , NULL);
 +        puts("​Handler assigned"​);​
 +    }
 +     
 +    if (client_sock < 0)
 +    {
 +        perror("​accept failed"​);​
 +        return 1;
 +    }
 +     
 +    return 0;
 +}
 + 
 +/*
 + * This will handle connection for each client
 + * */
 +void *connection_handler(void *socket_desc)
 +{
 +    //Get the socket descriptor
 +    int sock = *(int*)socket_desc;​
 +    int read_size;
 +    char *message , client_message[2000];​
 +
 +    //Receive a message from client
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 )
 +    {
 + //end of string marker
 + client_message[read_size] = '​\0';​
 +
 + //Send the message back to client
 + write(sock , "​Werset 6" , strlen(client_message));​
 +
 + //clear the message buffer
 + memset(client_message,​ 0, 2000);
 +    }
 +     
 +    if(read_size == 0)
 +    {
 +        puts("​Client disconnected"​);​
 +        fflush(stdout);​
 +    }
 +    else if(read_size == -1)
 +    {
 +        perror("​recv failed"​);​
 +    }
 +         
 +    return 0;
 +} </​code>​
 +
 +==== Client C ====
 +
 +<code c>#​include<​stdio.h>​ //printf
 +#​include<​string.h> ​   //strlen
 +#​include<​sys/​socket.h> ​   //socket
 +#​include<​arpa/​inet.h>​ //inet_addr
 + 
 +int main(int argc , char *argv[])
 +{
 +    int sock;
 +    struct sockaddr_in server;
 +    char message[1000] , server_reply[2000];​
 +     
 +    //Create socket
 +    sock = socket(AF_INET , SOCK_STREAM , 0);
 +    if (sock == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 +     
 +    server.sin_addr.s_addr = inet_addr("​127.0.0.1"​);​
 +    server.sin_family = AF_INET;
 +    server.sin_port = htons( 8888 );
 + 
 +    //Connect to remote server
 +    if (connect(sock , (struct sockaddr *)&​server , sizeof(server)) < 0)
 +    {
 +        perror("​connect failed. Error"​);​
 +        return 1;
 +    }
 +     
 +    puts("​Connected\n"​);​
 +     
 +    //keep communicating with server
 +    while(1)
 +    {
 +        printf("​Enter message : ");
 +        scanf("​%s"​ , message);
 +         
 +        //Send some data
 +        if( send(sock , message , strlen(message) , 0) < 0)
 +        {
 +            puts("​Send failed"​);​
 +            return 1;
 +        }
 +         
 +        //Receive a reply from the server
 +        if( recv(sock , server_reply , 2000 , 0) < 0)
 +        {
 +            puts("​recv failed"​);​
 +            break;
 +        }
 +         
 +        puts("​Server reply :");
 +        puts(server_reply);​
 +    }
 +     
 +    close(sock);​
 +    return 0;
 +}</​code>​
 +
 +
 +==== Kod Maciek Gonzo itp ====
 +<code c>
 +#​include<​stdio.h>​
 +#​include<​string.h> ​   //strlen
 +#​include<​stdlib.h> ​   //strlen
 +#​include<​sys/​socket.h>​
 +#​include<​arpa/​inet.h>​ //inet_addr
 +#​include<​unistd.h> ​   //write
 +#​include<​pthread.h>​ //for threading , link with lpthread
 + 
 +//the thread function
 +void *connection_handler(void *);
 +
 +void *client_function(void *socket_desc){
 +  int sock;
 +    struct sockaddr_in server_id;
 +    char message[1000] , server_reply[2000];​
 +     
 +    //Create socket
 +    sock = socket(AF_INET , SOCK_STREAM , 0);
 +    if (sock == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 +     // 192.168.102.63 wogu
 +    server_id.sin_addr.s_addr = inet_addr("​localhost"​);​
 +    server_id.sin_family = AF_INET;
 +    server_id.sin_port = htons( 65000 );
 + 
 +    //Connect to remote server
 +
 +    if (connect(sock , (struct sockaddr *)&​server_id , sizeof(server_id)) < 0)
 +    {
 +        perror("​connect failed. Error"​);​
 +
 +    }else{
 +
 +    puts("​Connected\n"​);​
 +     
 +    //keep communicating with server
 +    while(1)
 +    {
 +        printf("​Enter message : ");
 +        scanf("​%s"​ , message);
 +         
 +        //Send some data
 +        if( send(sock , message , strlen(message) , 0) < 0)
 +        {
 +            puts("​Send failed"​);​
 +
 +        }
 +         
 +        //Receive a reply from the server
 +        if( recv(sock , server_reply , 2000 , 0) < 0)
 +        {
 +            puts("​recv failed"​);​
 +            break;
 +        }
 +         
 +        puts("​Server reply :");
 +        puts(server_reply);​
 +    }
 +     
 +    close(sock); ​
 +}
 +   
 +};
 + 
 +
 +void *server_function(void * sock){
 +    printf("​SERVER Start"​);​
 +fflush(stdout);​
 +    int socket_desc , client_sock , c;
 +    pthread_t thread_id;
 +    struct sockaddr_in server , client;
 + 
 +    //Create socket
 +    socket_desc = socket(AF_INET , SOCK_STREAM , 0);
 +    if (socket_desc == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 + 
 +    //Prepare the sockaddr_in structure
 +    server.sin_family = AF_INET;
 +    server.sin_addr.s_addr = INADDR_ANY;
 +    server.sin_port = htons( 65000);
 + 
 +    //Bind
 +    if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0)
 +    {
 +        //print the error message
 +        perror("​bind failed. Error"​);​
 +        return;
 +    }
 +    puts("​bind done"​);​
 +
 +
 +
 +    //Listen
 +    listen(socket_desc , 3);
 + 
 +    //Accept and incoming connection
 +    puts("​Waiting for incoming connections..."​);​
 +    c = sizeof(struct sockaddr_in);​
 +fflush(stdout);​
 + 
 +    while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) )
 +    {
 +        puts("​Connection accepted"​);​
 + 
 +        if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0)
 +        {
 +            perror("​could not create thread"​);​
 +            return;
 +        }
 + 
 +        //Now join the thread , so that we dont terminate before the thread
 +        //​pthread_join( thread_id , NULL);
 +        puts("​Handler assigned"​);​
 +    }
 + 
 +    if (client_sock < 0)
 +    {
 +        perror("​accept failed"​);​
 +        return;
 +    }
 +}
 +
 +
 +int main(int argc , char *argv[])
 +{
 +    pthread_t thread_id;
 +//Create server
 +        if( pthread_create( &​thread_id , NULL ,  server_function , NULL) != 0)
 +        {
 +            perror("​could not create server thread"​);​
 +     fflush(stdout);​
 +            return 1;
 +        }
 +    pthread_t thread_id2;
 +
 + // Create client
 +        if( pthread_create( &​thread_id2 , NULL ,  client_function , NULL) != 0)
 +        {
 +            perror("​could not create client thread"​);​
 +     fflush(stdout);​
 +            return 1;
 +        }
 +
 +    return 0;
 +}
 + 
 +/*
 + * This will handle connection for each client
 + * */
 +void *connection_handler(void *socket_desc)
 +{
 +    //Get the socket descriptor
 +    int sock = *(int*)socket_desc;​
 +    int read_size;
 +    char *message , client_message[2000];​
 + 
 +    //Receive a message from client
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 )
 +    {
 +        //end of string marker
 + client_message[read_size] = '​\0';​
 + 
 + //Send the message back to client
 +        write(sock , "​Werset 5" , strlen(client_message));​
 + 
 + //clear the message buffer
 + memset(client_message,​ 0, 2000);
 +    }
 + 
 +    if(read_size == 0)
 +    {
 +        puts("​Client disconnected"​);​
 +        fflush(stdout);​
 +    }
 +    else if(read_size == -1)
 +    {
 +        perror("​recv failed"​);​
 +    }
 + 
 +    return 0;
 +
 +
 +</​code>​
studia/magisterskie/1sem/systemy_rownolegle_i_rozproszone.1457901069.txt.gz · ostatnio zmienione: 2016/03/13 21:31 przez wogu