Narzędzia użytkownika

Narzędzia witryny


studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone

Różnice

Różnice między wybraną wersją a wersją aktualną.

Odnośnik do tego porównania

Poprzednia rewizja po obu stronach Poprzednia wersja
Nowa wersja
Poprzednia wersja
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/04/14 10:58]
149.156.112.6
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna)
149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c]
Linia 1405: Linia 1405:
 }</​code>​ }</​code>​
 ===== Zajęcia 6 ===== ===== Zajęcia 6 =====
 +Pliki zadania: {{:​studia:​magisterskie:​1sem:​opencl_vecadd.tgz|}}
 +Działające 3 kernele: {{:​studia:​magisterskie:​1sem:​sala404.zip|}}
  
 +==== opencl_vecadd/​vecadd_host/​execute_kernels.c ====
 +<code c>#​include<​stdlib.h>​
 +#​include<​stdio.h>​
 +#include <​math.h>​
 +
 +#​include<​omp.h>​
 +#include <​CL/​cl.h>​
 +
 +#​include"​uth_ocl_intf.h"​
 +
 +#​include"​./​vecadd_host.h"​
 +#​include"​./​hardware_params.h"​
 +
 +#define time_measurments
 +
 +#ifdef time_measurments
 +#​include"​uth_time_intf.h"​
 +  static double t_begin, t_end, t_total;
 +#endif
 +
 +int execute_kernel_2_CPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  ​
 +  // create the kernel for CPU
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​vecadd_2_blocks_kernel",​ "​vecadd_2_blocks.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. Restoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  int nr_cores_CPU = omp_get_num_procs();​
 +  int nr_threads = nr_cores_CPU;​ // total number of threads
 +  int work_group_size = 1; // number of threads in work-group
 +  int nr_work_groups = nr_threads / work_group_size;​
 +  globalWorkSize[0] = nr_threads ;
 +  localWorkSize[0] = work_group_size ;
 +  ​
 +  printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 + nr_cores_CPU,​ nr_cores_CPU,​ 1);
 +  printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 + work_group_size,​ nr_threads / work_group_size, ​
 + nr_threads / work_group_size / nr_cores_CPU);​
 +  printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 + globalWorkSize[0],​ globalWorkSize[0]/​nr_cores_CPU, ​
 + globalWorkSize[0]/​nr_cores_CPU);​
 +  printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 + array_size, array_size/​globalWorkSize[0],​ array_size/​nr_cores_CPU);​
 +  ​
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  int size = array_size;
 +  int size_per_thread = ceilf((float)size/​nr_threads); ​
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +  retval |= clSetKernelArg(kernel,​ 4, sizeof(int),​ &​size_per_thread);​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to Set the kernel arguments.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_3_CPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for CPU
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​vecadd_3_opt_blocks_kernel","​vecadd_3_opt_blocks.cl",​monitor);​
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  int nr_cores_CPU = omp_get_num_procs();​
 +  int nr_threads = nr_cores_CPU;​ // total number of threads
 +  int work_group_size = 1; // number of threads in work-group
 +  int nr_work_groups = nr_threads / work_group_size;​
 +  globalWorkSize[0] = nr_threads ;
 +  localWorkSize[0] = work_group_size ;
 +  ​
 +  printf("​\t\tExecuting kernel %d, on platform %d and device %d\n",
 + kernel_index,​ platform_index,​ device_index);​
 +  printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 + nr_cores_CPU,​ nr_cores_CPU,​ 1);
 +  printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 + work_group_size,​ nr_threads / work_group_size, ​
 + nr_threads / work_group_size / nr_cores_CPU);​
 +  printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 + globalWorkSize[0],​ globalWorkSize[0]/​nr_cores_CPU, ​
 + globalWorkSize[0]/​nr_cores_CPU);​
 +  printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 + array_size, array_size/​globalWorkSize[0],​ array_size/​nr_cores_CPU);​
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  int size = array_size;
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to Set the kernel arguments.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    return 1;
 +  }
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +// tutaj modyfikujemy
 +int execute_kernel_0_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  // globalna liczba wątków i lokalna liczba wątków
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size;
 +  localWorkSize[0] = work_group_size;​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  // ustawiamy liczbę argumentów
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_1_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +/*
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);*/
 +
 +    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 + ​  ​   // kernel name:         , file: */
 + ​  ​   "​vecadd_1_kernel",​ "​vecadd_1.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size / 4;
 +  localWorkSize[0] = work_group_size;​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_2_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +/*    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);*/
 +     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 + ​  ​   // kernel name:         , file: */
 + ​  ​   "​vecadd_2_blocks_kernel",​ "​vecadd_2_blocks.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size / work_group_size;​
 +  localWorkSize[0] = work_group_size;​
 +
 +  int size_per_thread = work_group_size;​
 +  int size = array_size;
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +  retval |= clSetKernelArg(kernel,​ 4, sizeof(int),​ &​size_per_thread);​
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +int execute_kernel_3_GPU(
 + int platform_index,​
 + int device_index,​
 + int kernel_index,​
 + int array_size,
 + cl_mem* memObjects,
 + int monitor
 + )
 +{
 +  ​
 +  cl_kernel kernel;
 +  cl_event kernel_execution;​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  double exec_time;
 +  cl_int retval;
 +  ​
 +  size_t globalWorkSize[1] = { 0 };
 +  size_t localWorkSize[1] = { 0 };
 +  ​
 +  int i,j,k;
 +  ​
 +  int size = array_size;
 +  ​
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the kernel for GPU
 +/*    utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +     // kernel name:         , file:
 +     "​vecadd_0_kernel",​ "​vecadd_0.cl",​ monitor);*/
 +     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 + ​  ​   // kernel name:         , file:
 +     "​vecadd_3_opt_blocks_kernel",​ "​vecadd_3_opt_blocks.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel: %lf\n",​ t_end-t_begin);​
 +#endif
 +  ​
 +  ​
 +  // choose the context
 +  cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +  ​
 +  // choose the command queue
 +  cl_command_queue command_queue = 
 +    utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \tRestoring context and command queue for platform %d and device %d\n",
 +    ​platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL){ ​
 +    printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +    ​platform_index,​ device_index);​
 +    printf("​%lu %lu\n",​ context, command_queue);​
 +  }
 +  ​
 +  ​
 +  int NDRange_dim = 1;
 +  ​
 +  int work_group_size = WORK_GROUP_SIZE;​
 +  globalWorkSize[0] = array_size;
 +  localWorkSize[0] = work_group_size;​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\t\t4. executing kernel %d, on platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +    printf("​PARAMETERS:​ \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n",
 +    ​NR_COMP_UNITS,​ NR_CORES_PER_CU*NR_COMP_UNITS,​ NR_CORES_PER_CU);​
 +    printf("​\t\tworkgroup size %d, nr_workgroups %d ,​nr_workgroups_per_CU %d\n",
 +    ​localWorkSize[0],​ globalWorkSize[0]/​localWorkSize[0],​
 +    ​globalWorkSize[0]/​localWorkSize[0]/​NR_COMP_UNITS);​
 +    printf("​\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​NR_COMP_UNITS,​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +    printf("​\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n",
 +    ​globalWorkSize[0],​ globalWorkSize[0]/​globalWorkSize[0], ​
 +    ​globalWorkSize[0]/​(NR_CORES_PER_CU*NR_COMP_UNITS));​
 +  }
 +   
 +  ​
 +  // choose the kernel
 +  kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +  ​
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +    ​kernel_index,​ platform_index,​ device_index);​
 +  }
 +  ​
 +  if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +    ​
 +    printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +    ​platform_index,​ device_index,​ kernel_index);​
 +    printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +    ​context,​ command_queue,​ kernel);
 +  }
 +  ​
 +  // Set the kernel arguments (result, a, b)
 +  retval = clSetKernelArg(kernel,​ 0, sizeof(cl_mem),​ &​memObjects[0]);​
 +  retval |= clSetKernelArg(kernel,​ 1, sizeof(cl_mem),​ &​memObjects[1]);​
 +  retval |= clSetKernelArg(kernel,​ 2, sizeof(cl_mem),​ &​memObjects[2]);​
 +  retval |= clSetKernelArg(kernel,​ 3, sizeof(int),​ &size);
 +
 +  if (retval != CL_SUCCESS) {
 +      printf("​Failed to Set the kernel arguments.\n"​);​
 +      //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +      return 1;
 +    }
 +  ​
 + 
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_begin = time_clock();​
 +#endif
 +  // Queue the kernel up for execution across the array
 +  retval = clEnqueueNDRangeKernel(command_queue,​ kernel, 1, NULL,
 +   globalWorkSize,​ localWorkSize,​
 +   0, NULL, &​kernel_execution);​
 +  clWaitForEvents(1,​ &​kernel_execution);​
 +  ​
 +  ​
 +#ifdef time_measurments
 +  clFinish(command_queue);​
 +  t_end = time_clock();​
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(kernel_execution,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  exec_time = ((double)endTime - (double)startTime)*1.0e-9;​
 +  printf("​EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n",​
 + t_end-t_begin,​ ((double)endTime - (double)startTime)*1.0e-9);​
 +  printf("​\tNumber of operations %d, performance %lf GFlops\n",​
 + array_size, array_size / exec_time * 1e-9);
 +  printf("​\tGBytes transferred to processor %lf, speed %lf GB/​s\n",​
 + 3*array_size*sizeof(float)*1e-9,​
 + 3*array_size*sizeof(float)/​exec_time*1e-9);​
 +#endif
 +  ​
 +  if (retval != CL_SUCCESS) {
 +    printf("​Failed to queue kernel for execution.\n"​);​
 +    //​Cleanup(context,​ command_queue,​ program, kernel, memObjects);​
 +    exit(-1);
 +  }
 +  ​
 +  return(0);
 +}
 +
 +
 +
 +
 +    ​
 +/*     // create the fourth kernel for GPU */
 +/*     ​kernel_index = 4;  */
 +/*     ​utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​ */
 +/*     // kernel name:         , file: */
 +/*     "​vecadd_4_opt_blocks_vect_kernel",​ "​vecadd_4_opt_blocks_vect.cl",​ monitor); */</​code>​
 +
 +==== opencl_vecadd/​vecadd_2_blocks.cl ====
 +<code c>
 +__kernel void vecadd_2_blocks_kernel(__global const float *a,
 +      ​__global const float *b,
 +      ​__global float *result,
 +                      const int size,
 +      const int size_per_thread)
 +{
 +  int gid = get_global_id(0);​
 +  int index_start = gid * size_per_thread;​
 +  int index_end = (gid+1) * size_per_thread;​
 +
 +  if (gid == 0) {
 + //​printf("​size:​ %d, %d", size, size_per_thread);​
 +  }
 +
 +  for (int i=index_start;​ i < index_end && i < size; i++) {
 +  //for (int i=index_start;​ i < index_end ; i++) {
 +    result[i] = a[i]+b[i];
 +  }
 +}</​code>​
 +===== Zajęcia 7 =====
 +{{:​studia:​magisterskie:​1sem:​zajecia_7.zip|}}
 +
 +==== tmp/​opencl_mat_transp/​mat_transp_1.cl ====
 +<code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 +
 +__kernel void mat_transp_1_kernel(
 +  __global float *A , 
 +  __global float *B ,
 +  __global float *C ,
 +  int N)
 +{
 +
 +int id = get_global_id(0) * get_global_size(0) + get_global_id(1);​
 +float temp = 0.0;
 +
 +for(int i = 0; i<​N;​i++){
 + temp += A[id*N + i] *B[i];
 +}
 +
 +C[id] = temp;
 +}</​code>​
 +
 +==== tmp/​opencl_mat_transp/​mat_transp/​mat_transp.c ====
 +<code c>#​include<​stdlib.h>​
 +#​include<​stdio.h>​
 +#include <​math.h>​
 +
 +#include <​CL/​cl.h>​
 +
 +#​include"​uth_ocl_intf.h"​
 +
 +#define time_measurments
 +
 +#ifdef time_measurments
 +#​include"​uth_time_intf.h"​
 +  static double t_begin, t_end, t_total;
 +#endif
 +
 +
 +
 +#define BLOCK_SIZE 16
 +#define NR_GROUPS 16
 +#define MULT 16
 +#define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT)
 +#define ROZMIAR (WYMIAR*WYMIAR)
 +// Matrices are stored in row-major order: ​
 +// M(row, col) = M( row * WYMIAR + col ) 
 +
 +int verify_result(
 +  float* result,
 +  float* result_compare
 +   )
 +{
 +  // Verify the result
 +  int result_OK = 1;
 +  int i,j;
 +  for(i = 0; i < WYMIAR; i++) {
 +      if(fabs(result[i] - result_compare[i])>​1.e-6) {
 + result_OK = 0;
 + break;
 +    }
 +  }
 +  printf("​\t\t6. verifying results: ");
 +  if(result_OK) {
 +    printf("​Output is correct\n"​);​
 +  } else {
 +    printf("​Output is incorrect\n"​);​
 +    j = 0;
 +    for(i = 0; i < WYMIAR, j<100; i++) {
 + if(fabs(result[i] - result_compare[i])>​1.e-9) {
 +   j++;
 +   printf("​%d %16.8f %16.8f\n", ​
 +   i, result[i], result_compare[i]);​
 + }
 +    }
 +    exit(0);
 +  }
 +    /* for(i = 0; i < length; i++) { */
 +    /*   ​printf("​%16.8f %16.8f\n",​ result[i], result_compare[i]);​ */
 +    /* } */
 +
 +  return(result_OK);​
 +}
 +
 +  /​*----------------KERNEL CREATION PHASE----------------------*/​
 +void create_kernels()
 +{
 +
 +  // for all operations indicate explicit info messages
 +  int monitor = UTC_BASIC_INFO + 1;
 +
 +  int kernel_index;​
 +  ​
 +  int platform_index = utv_ocl_struct.current_platform_index;​
 +
 +  if(utr_ocl_CPU_context_exists(platform_index)){
 +
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  ​
 +  // create the first kernel for CPU
 +  /* kernel_index = 0;  */
 +  /* utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_CPU,​ kernel_index,​ */
 +  /*   // kernel name:         , file: */
 +  /*   "​mat_transp_1_kernel",​ "​mat_transp_1.cl",​ monitor); */
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel %d: %lf\n",​ kernel_index,​ t_end-t_begin);​
 +#endif
 +  ​
 +  }
 +
 +  if(utr_ocl_GPU_context_exists(platform_index)){
 +
 +
 +#ifdef time_measurments
 +  t_begin = time_clock();​
 +#endif
 +  // create the first kernel for GPU
 +  kernel_index = 0; 
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​mat_transp_1_kernel",​ "​mat_transp_1.cl",​ monitor);
 +  ​
 +#ifdef time_measurments
 +  t_end = time_clock();​
 +  printf("​EXECUTION TIME: creating CPU kernel %d: %lf\n",​ kernel_index,​ t_end-t_begin);​
 +#endif
 +  ​
 +  // create the second kernel for GPU
 +  kernel_index = 1; 
 +  utr_ocl_create_kernel_dev_type( platform_index,​ UTC_OCL_DEVICE_GPU,​ kernel_index,​
 +   // kernel name:         , file:
 +   "​mat_transp_2_kernel",​ "​mat_transp_2.cl",​ monitor);
 +
 +  ​
 +
 +  }
 +
 +}
 +
 +  /​*----------------EXECUTION PHASE----------------------*/​
 +int execute_kernels()
 +{
 +
 +  // for all operations indicate explicit info messages
 +  int monitor = UTC_BASIC_INFO + 1;
 +
 +  int platform_index = utv_ocl_struct.current_platform_index;​
 +  utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index];​
 +
 +  int kernel_index;​
 +  int i,j,n;
 +
 +  double nr_access;
 +  double t1,t2;
 +
 +  if(monitor>​UTC_BASIC_INFO){
 +    printf("​\n------------Starting execution phase----------------\n"​);​
 +  }
 +  ​
 +  // create matrices
 +  n=WYMIAR;
 +  float* A = (float *) malloc(ROZMIAR*sizeof(float));​
 +  float* B = (float *) malloc(WYMIAR*sizeof(float));​
 +  float* C = (float *) malloc(WYMIAR*sizeof(float));​
 +  float* D = (float *) malloc(WYMIAR*sizeof(float));​
 +  ​
 +  for(i=0;​i<​ROZMIAR;​i++) A[i]=1.0*i/​10000000.0;​
 +
 +  for(i=0;​i<​WYMIAR;​i++) B[i]=1.0*i/​10000000.0;​
 +
 +  // mnozenie macierz wektor
 +  for(i=0; i<​WYMIAR;​ i++){
 + for(j=0; j<​WYMIAR;​ j++){
 + D[i] += A[i*WYMIAR + j] * B[j];
 + }
 +  }
 +  ​
 +  nr_access= ROZMIAR + 2.0*WYMIAR; // read + write
 +  ​
 +  printf("​mat_transp:​ nr_access %lf\n",​ nr_access);
 +  ​
 +  // get hardware characteristics to select good matrix shape
 +  // the set of device characteristics stored in data structure
 +  int device_index = 0; 
 +  utt_ocl_device_struct device_struct = 
 +    utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index];​
 +  double global_mem_bytes = device_struct.global_mem_bytes;​
 +  double global_max_alloc = device_struct.global_max_alloc;​
 +  double local_mem_bytes = device_struct.local_mem_bytes;​
 +  double constant_mem_bytes = device_struct.constant_mem_bytes;​
 +  int max_num_comp_units = device_struct.max_num_comp_units;​
 +  int max_work_group_size = device_struct.max_work_group_size;​
 +  ​
 +  // in a loop over devices (or for a selected device)
 +  int idev=0; ​
 +  for(idev=0; idev<​platform_struct.number_of_devices;​ idev++){
 +    ​
 +    // int device_type = .....
 +    // choose device_index
 +    // int device_index = utr_ocl_select_device(platform_index,​ device_type);​
 +    int device_index = idev;
 +    int device_type = utr_ocl_device_type(platform_index,​ device_index); ​
 +    ​
 +    if(device_index>​0 && device_type==utr_ocl_device_type(platform_index,​ device_index-1)) break; ​
 +    if(device_type == UTC_OCL_DEVICE_CPU) break;
 +
 +    // choose the context
 +    cl_context context = utr_ocl_select_context(platform_index,​ device_index);  ​
 +    ​
 +    // choose the command queue
 +    cl_command_queue command_queue = 
 +      utr_ocl_select_command_queue(platform_index,​ device_index);  ​
 +    ​
 +    if(monitor>​UTC_BASIC_INFO){
 +      printf("​\nExecution:​ \t0. restoring context and command queue for platform %d and device %d\n",
 +      ​platform_index,​ device_index);​
 +    }
 +    ​
 +    if(context == NULL || command_queue == NULL){ ​
 +      ​
 +      printf("​failed to restore context and command queue for platform %d, device %d\n", ​
 +      ​platform_index,​ device_index);​
 +      printf("​%lu %lu\n",​ context, command_queue);​
 +    }
 +    ​
 +    // choose the kernel
 +    kernel_index = 0;
 +    cl_kernel kernel = utr_ocl_select_kernel(platform_index,​ device_index,​ kernel_index);  ​
 +    ​
 +    if(monitor>​UTC_BASIC_INFO){
 +      printf("​\nExecution:​ \t3. restoring kernel %d for platform %d and device %d\n",
 +      ​kernel_index,​ platform_index,​ device_index);​
 +    }
 +    ​
 +    if(context == NULL || command_queue == NULL || kernel == NULL){ ​
 +      ​
 +      printf("​failed to restore kernel for platform %d, device %d, kernel %d\n", ​
 +      ​platform_index,​ device_index,​ kernel_index);​
 +      printf("​context %lu, command queue %lu, kernel %lu\n", ​
 +      ​context,​ command_queue,​ kernel);
 +    }
 +    ​
 +    for(i=0;​i<​WYMIAR;​i++) C[i]=0.0;
 +    time_init();​ t1 = time_clock();​
 +    ​
 +    // call routine to perform matrix transposition
 +    mat_transp_host(kernel_index,​ A, B, C, n, context, kernel, command_queue);​
 +    ​
 +    t2 = time_clock();​ time_print();​
 +    printf("​GB/​s = %lf\n\n",​ nr_access*sizeof(float)/​(t2-t1)/​1024/​1024/​1024);​
 +    ​
 +    ​
 +    // verify result ​
 +    verify_result(C,​ D);
 +    ​
 +    ​
 +  } // end loop over devices
 +  ​
 +  return(0);
 +}
 +
 +
 +
 +// Matrix transposition - Host code 
 +// Matrix dimensions are assumed to be multiples of BLOCK_SIZE ​
 +int mat_transp_host(
 +  int kernel_index,​
 +  float* A, 
 +  float* B,
 +  float* C, 
 +  int N, 
 +  const cl_context context, ​
 +  const cl_kernel mat_transp_kernel, ​
 +  const cl_command_queue queue
 +
 +
 +
 +// Load A to device memory ​
 +  size_t size_bytesm = N*N*sizeof(float); ​
 +  size_t size_bytesw = N*sizeof(float); ​
 +  ​
 +
 +  cl_mem d_A = clCreateBuffer(context,​ CL_MEM_READ_ONLY,​
 + size_bytesm,​ NULL, NULL); ​
 +
 +  // Write A to device memory ​
 +  clEnqueueWriteBuffer(queue,​ d_A, CL_TRUE, 0, size_bytesm,​ A, 0, 0, 0); 
 +
 +  cl_mem d_B = clCreateBuffer(context,​ CL_MEM_READ_ONLY,​
 + size_bytesw,​ NULL, NULL); ​
 +
 +  // Write A to device memory ​
 +  clEnqueueWriteBuffer(queue,​ d_B, CL_TRUE, 0, size_bytesw,​ B, 0, 0, 0); 
 +
 +  // Allocate B in device memory ​
 +  cl_mem d_C = clCreateBuffer(context,​ CL_MEM_WRITE_ONLY,​ size_bytesw,​ NULL, NULL); ​
 +
 +
 +  // Invoke kernel ​
 +  clSetKernelArg(mat_transp_kernel,​ 0, sizeof(cl_mem),​ (void*)&​d_A);​
 +  clSetKernelArg(mat_transp_kernel,​ 1, sizeof(cl_mem),​ (void*)&​d_B);​
 +  clSetKernelArg(mat_transp_kernel,​ 2, sizeof(cl_mem),​ (void*)&​d_C);​
 +  clSetKernelArg(mat_transp_kernel,​ 3, sizeof(int),​ (void*)&​N);​
 +
 +  size_t localWorkSize[3];​
 +  size_t globalWorkSize[3];​
 +  cl_uint work_dim;
 +
 +  if(kernel_index==3){
 +
 +    work_dim = 2;
 +    localWorkSize[0] = BLOCK_SIZE; ​
 +    globalWorkSize[0] = N/​MULT; ​
 +    localWorkSize[1] = BLOCK_SIZE; ​
 +    globalWorkSize[1] = N/​MULT; ​
 +    localWorkSize[2] = 0; 
 +    globalWorkSize[2] = 0; 
 +
 +
 +  }
 +  else{
 +
 +    work_dim = 2;
 +    localWorkSize[0] = BLOCK_SIZE; ​
 +    globalWorkSize[0] = sqrt(N); ​
 +    localWorkSize[1] = BLOCK_SIZE; ​
 +    globalWorkSize[1] = sqrt(N); ​
 +    localWorkSize[2] = 0; 
 +    globalWorkSize[2] = 0; 
 +  ​
 +  }
 +
 +  clFinish(queue);​
 +  double t1 = time_clock();​
 +  // Enqueue a kernel run call
 +  cl_event ndrEvt;
 +  clEnqueueNDRangeKernel(queue,​ mat_transp_kernel,​ work_dim, 0, 
 + globalWorkSize,​ localWorkSize,​ 0, 0, &​ndrEvt); ​
 +  clWaitForEvents(1,​ &​ndrEvt);​
 +  clFinish(queue);​
 +  double t2 = time_clock();​
 +  // Calculate performance ​
 +  cl_ulong startTime;
 +  cl_ulong endTime;
 +  ​
 +  // Get kernel profiling info 
 +  clGetEventProfilingInfo(ndrEvt,​
 +   CL_PROFILING_COMMAND_START,​
 +   sizeof(cl_ulong),​
 +   &​startTime,​
 +   0);
 +  clGetEventProfilingInfo(ndrEvt,​
 +   CL_PROFILING_COMMAND_END,​
 +   sizeof(cl_ulong),​
 +   &​endTime,​
 +   0);
 +  double time = (double)endTime - (double)startTime;​
 +  printf("​\nKernel execution internal: time %lf, GB/s = %lf\n", ​
 + time*1.0e-9,​ 2.0*N*N*sizeof(float)/​(time*1.0e-9)/​1024/​1024/​1024);​
 +
 +  printf("​Kernel execution external: time %lf,  GB/s = %lf\n\n", ​
 + t2-t1, 2.0*N*N*sizeof(float)/​(t2-t1)/​1024/​1024/​1024);​
 +
 +  // Read B from device memory ​
 +  clEnqueueReadBuffer(queue,​ d_C, CL_TRUE, 0, size_bytesw,​ C, 0, 0, 0); 
 +
 +  // Free device memory ​
 +  clReleaseMemObject(d_A); ​
 +  clReleaseMemObject(d_B); ​
 +  clReleaseMemObject(d_C);​
 +
 +  return(0);
 +
 +}</​code>​
 +==== Kod wogu ====
 +=== server.c ====
 +<code c>
 +/*
 +    C socket server example, handles multiple clients using threads
 +    Compile
 +    gcc server.c -lpthread -o server
 +*/
 + 
 +#​include<​stdio.h>​
 +#​include<​string.h> ​   //strlen
 +#​include<​stdlib.h> ​   //strlen
 +#​include<​sys/​socket.h>​
 +#​include<​arpa/​inet.h>​ //inet_addr
 +#​include<​unistd.h> ​   //write
 +#​include<​pthread.h>​ //for threading , link with lpthread
 +#include <​unistd.h>​
 + 
 +//the thread function
 +void *connection_handler(void *);
 + 
 +int main(int argc , char *argv[])
 +{
 +    int counter = 0;
 +    pid_t pid = fork();
 +
 +    if (pid == 0)
 +    {
 +       ​  ​   int sock;
 +     struct sockaddr_in server;
 +     char message[1000] , server_reply[2000];​
 +            int counter;
 +     unsigned int ip[] = { 192, 168, 102, 1 };
 +            char* ip_final;
 + char *buf;
 + size_t sz;
 +  
 +     //Create socket
 +
 +            for ( counter = 0; counter < 255; ++counter ) {
 +
 +     sock = socket(AF_INET , SOCK_STREAM , 0);
 +     if (sock == -1)
 +     {
 + printf("​Could not create socket"​);​
 +     }
 +     puts("​Socket created"​);​
 +
 +     //​printf("​192.168.102.%d",​ counter);
 +
 + sz = snprintf(NULL,​ 0, "​192.168.102.%d",​ counter);
 + buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */
 +     ​
 +     server.sin_addr.s_addr = inet_addr(buf);​
 +     server.sin_family = AF_INET;
 +     server.sin_port = htons( 65001 );
 +  
 +     //Connect to remote server
 +     if (connect(sock , (struct sockaddr *)&​server , sizeof(server)) < 0)
 +     {
 + perror("​connect failed. Error"​);​
 + continue;​
 +     }
 +  
 +     puts("​Connected\n"​);​
 +  
 +     //keep communicating with server
 +     while(1)
 +     {
 + //Send some data
 + if( send(sock , "​Werset 6-client"​ , 15 , 0) < 0)
 + {
 +     puts("​Send failed"​);​
 +     return 1;
 + }
 +  
 + //​Receive a reply from the server
 + if( recv(sock , server_reply , 2000 , 0) < 0)
 + {
 +     puts("​recv failed"​);​
 +     break;
 + }
 +
 + puts(server_reply);​
 +
 + break;
 +     }
 +  
 +     close(sock);​
 +           }
 +    }
 +    else if (pid > 0)
 +    {
 +        int socket_desc , client_sock , c;
 +     struct sockaddr_in server , client;
 +      
 +     //Create socket
 +     socket_desc = socket(AF_INET , SOCK_STREAM , 0);
 +     if (socket_desc == -1)
 +     {
 + printf("​Could not create socket"​);​
 +     }
 +     puts("​Socket created"​);​
 +      
 +     //Prepare the sockaddr_in structure
 +     server.sin_family = AF_INET;
 +     server.sin_addr.s_addr = INADDR_ANY;
 +     server.sin_port = htons( 65001 );
 +      
 +     //Bind
 +     if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0)
 +     {
 + //print the error message
 + perror("​bind failed. Error"​);​
 + return 1;
 +     }
 +     puts("​bind done"​);​
 +      
 +     //Listen
 +     listen(socket_desc , 3);
 +      
 +     //Accept and incoming connection
 +     puts("​Waiting for incoming connections..."​);​
 +     c = sizeof(struct sockaddr_in);​
 +      
 +      
 +     //Accept and incoming connection
 +     puts("​Waiting for incoming connections..."​);​
 +     c = sizeof(struct sockaddr_in);​
 + pthread_t thread_id;
 +
 +     while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) )
 +     {
 + puts("​Connection accepted"​);​
 +  
 + if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0)
 + {
 +     perror("​could not create thread"​);​
 +     return 1;
 + }
 +  
 + //Now join the thread , so that we dont terminate before the thread
 + //​pthread_join( thread_id , NULL);
 + puts("​Handler assigned"​);​
 +     }
 +      
 +     if (client_sock < 0)
 +     {
 + perror("​accept failed"​);​
 + return 1;
 +     }
 +    }
 +    else
 +    {
 +        // fork failed
 +        printf("​fork() failed!\n"​);​
 +        return 1;
 +    }
 +     
 +    return 0;
 +}
 + 
 +/*
 + * This will handle connection for each client
 + * */
 +void *connection_handler(void *socket_desc)
 +{
 +    //Get the socket descriptor
 +    int sock = *(int*)socket_desc;​
 +    int read_size;
 +    char *message , client_message[2000];​
 +
 +    //Receive a message from client
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 )
 +    {
 + printf("​%s\n",​ client_message);​
 +
 + //end of string marker
 + client_message[read_size] = '​\0';​
 +
 + //Send the message back to client
 + write(sock , "​Werset 6" , 8);
 +
 + //clear the message buffer
 + memset(client_message,​ 0, 2000);
 +    }
 +     
 +    if(read_size == 0)
 +    {
 +        puts("​Client disconnected"​);​
 +        fflush(stdout);​
 +    }
 +    else if(read_size == -1)
 +    {
 +        perror("​recv failed"​);​
 +    }
 +         
 +    return 0;
 +}</​code>​
 +=== Makefile ===
 +<code bash># kompilator c
 +CC = gcc
 + 
 +# konsolidator
 +CFLAGS=-g
 +
 +all: server client
 + 
 +server: server.o
 + $(CC) $(LFLAGS) server.o -lpthread -o server
 + 
 +server.o: server.c
 + $(CC) -c server.c -o server.o
 +
 +client: client.o
 + $(CC) client.o -o client
 + 
 +client.o: client.c
 + $(CC) -c client.c -o client.o
 + 
 +clean:
 + rm -f *.o</​code>​
 +===== Zajęcia 9 =====
 +<code c>/*
 +    C socket server example, handles multiple clients using threads
 +    Compile
 +    gcc server.c -lpthread -o server
 +*/
 + 
 +#​include<​stdio.h>​
 +#​include<​string.h> ​   //strlen
 +#​include<​stdlib.h> ​   //strlen
 +#​include<​sys/​socket.h>​
 +#​include<​arpa/​inet.h>​ //inet_addr
 +#​include<​unistd.h> ​   //write
 +#​include<​pthread.h>​ //for threading , link with lpthread
 + 
 +//the thread function
 +void *connection_handler(void *);
 + 
 +int main(int argc , char *argv[])
 +{
 +    int socket_desc , client_sock , c;
 +    struct sockaddr_in server , client;
 +     
 +    //Create socket
 +    socket_desc = socket(AF_INET , SOCK_STREAM , 0);
 +    if (socket_desc == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 +     
 +    //Prepare the sockaddr_in structure
 +    server.sin_family = AF_INET;
 +    server.sin_addr.s_addr = INADDR_ANY;
 +    server.sin_port = htons( 8888 );
 +     
 +    //Bind
 +    if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0)
 +    {
 +        //print the error message
 +        perror("​bind failed. Error"​);​
 +        return 1;
 +    }
 +    puts("​bind done"​);​
 +     
 +    //Listen
 +    listen(socket_desc , 3);
 +     
 +    //Accept and incoming connection
 +    puts("​Waiting for incoming connections..."​);​
 +    c = sizeof(struct sockaddr_in);​
 +     
 +     
 +    //Accept and incoming connection
 +    puts("​Waiting for incoming connections..."​);​
 +    c = sizeof(struct sockaddr_in);​
 + pthread_t thread_id;
 +
 +    while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) )
 +    {
 +        puts("​Connection accepted"​);​
 +         
 +        if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0)
 +        {
 +            perror("​could not create thread"​);​
 +            return 1;
 +        }
 +         
 +        //Now join the thread , so that we dont terminate before the thread
 +        //​pthread_join( thread_id , NULL);
 +        puts("​Handler assigned"​);​
 +    }
 +     
 +    if (client_sock < 0)
 +    {
 +        perror("​accept failed"​);​
 +        return 1;
 +    }
 +     
 +    return 0;
 +}
 + 
 +/*
 + * This will handle connection for each client
 + * */
 +void *connection_handler(void *socket_desc)
 +{
 +    //Get the socket descriptor
 +    int sock = *(int*)socket_desc;​
 +    int read_size;
 +    char *message , client_message[2000];​
 +
 +    //Receive a message from client
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 )
 +    {
 + //end of string marker
 + client_message[read_size] = '​\0';​
 +
 + //Send the message back to client
 + write(sock , "​Werset 6" , strlen(client_message));​
 +
 + //clear the message buffer
 + memset(client_message,​ 0, 2000);
 +    }
 +     
 +    if(read_size == 0)
 +    {
 +        puts("​Client disconnected"​);​
 +        fflush(stdout);​
 +    }
 +    else if(read_size == -1)
 +    {
 +        perror("​recv failed"​);​
 +    }
 +         
 +    return 0;
 +} </​code>​
 +
 +==== Client C ====
 +
 +<code c>#​include<​stdio.h>​ //printf
 +#​include<​string.h> ​   //strlen
 +#​include<​sys/​socket.h> ​   //socket
 +#​include<​arpa/​inet.h>​ //inet_addr
 + 
 +int main(int argc , char *argv[])
 +{
 +    int sock;
 +    struct sockaddr_in server;
 +    char message[1000] , server_reply[2000];​
 +     
 +    //Create socket
 +    sock = socket(AF_INET , SOCK_STREAM , 0);
 +    if (sock == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 +     
 +    server.sin_addr.s_addr = inet_addr("​127.0.0.1"​);​
 +    server.sin_family = AF_INET;
 +    server.sin_port = htons( 8888 );
 + 
 +    //Connect to remote server
 +    if (connect(sock , (struct sockaddr *)&​server , sizeof(server)) < 0)
 +    {
 +        perror("​connect failed. Error"​);​
 +        return 1;
 +    }
 +     
 +    puts("​Connected\n"​);​
 +     
 +    //keep communicating with server
 +    while(1)
 +    {
 +        printf("​Enter message : ");
 +        scanf("​%s"​ , message);
 +         
 +        //Send some data
 +        if( send(sock , message , strlen(message) , 0) < 0)
 +        {
 +            puts("​Send failed"​);​
 +            return 1;
 +        }
 +         
 +        //Receive a reply from the server
 +        if( recv(sock , server_reply , 2000 , 0) < 0)
 +        {
 +            puts("​recv failed"​);​
 +            break;
 +        }
 +         
 +        puts("​Server reply :");
 +        puts(server_reply);​
 +    }
 +     
 +    close(sock);​
 +    return 0;
 +}</​code>​
 +
 +
 +==== Kod Maciek Gonzo itp ====
 +<code c>
 +#​include<​stdio.h>​
 +#​include<​string.h> ​   //strlen
 +#​include<​stdlib.h> ​   //strlen
 +#​include<​sys/​socket.h>​
 +#​include<​arpa/​inet.h>​ //inet_addr
 +#​include<​unistd.h> ​   //write
 +#​include<​pthread.h>​ //for threading , link with lpthread
 + 
 +//the thread function
 +void *connection_handler(void *);
 +
 +void *client_function(void *socket_desc){
 +  int sock;
 +    struct sockaddr_in server_id;
 +    char message[1000] , server_reply[2000];​
 +     
 +    //Create socket
 +    sock = socket(AF_INET , SOCK_STREAM , 0);
 +    if (sock == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 +     // 192.168.102.63 wogu
 +    server_id.sin_addr.s_addr = inet_addr("​localhost"​);​
 +    server_id.sin_family = AF_INET;
 +    server_id.sin_port = htons( 65000 );
 + 
 +    //Connect to remote server
 +
 +    if (connect(sock , (struct sockaddr *)&​server_id , sizeof(server_id)) < 0)
 +    {
 +        perror("​connect failed. Error"​);​
 +
 +    }else{
 +
 +    puts("​Connected\n"​);​
 +     
 +    //keep communicating with server
 +    while(1)
 +    {
 +        printf("​Enter message : ");
 +        scanf("​%s"​ , message);
 +         
 +        //Send some data
 +        if( send(sock , message , strlen(message) , 0) < 0)
 +        {
 +            puts("​Send failed"​);​
 +
 +        }
 +         
 +        //Receive a reply from the server
 +        if( recv(sock , server_reply , 2000 , 0) < 0)
 +        {
 +            puts("​recv failed"​);​
 +            break;
 +        }
 +         
 +        puts("​Server reply :");
 +        puts(server_reply);​
 +    }
 +     
 +    close(sock); ​
 +}
 +   
 +};
 + 
 +
 +void *server_function(void * sock){
 +    printf("​SERVER Start"​);​
 +fflush(stdout);​
 +    int socket_desc , client_sock , c;
 +    pthread_t thread_id;
 +    struct sockaddr_in server , client;
 + 
 +    //Create socket
 +    socket_desc = socket(AF_INET , SOCK_STREAM , 0);
 +    if (socket_desc == -1)
 +    {
 +        printf("​Could not create socket"​);​
 +    }
 +    puts("​Socket created"​);​
 + 
 +    //Prepare the sockaddr_in structure
 +    server.sin_family = AF_INET;
 +    server.sin_addr.s_addr = INADDR_ANY;
 +    server.sin_port = htons( 65000);
 + 
 +    //Bind
 +    if( bind(socket_desc,​(struct sockaddr *)&​server , sizeof(server)) < 0)
 +    {
 +        //print the error message
 +        perror("​bind failed. Error"​);​
 +        return;
 +    }
 +    puts("​bind done"​);​
 +
 +
 +
 +    //Listen
 +    listen(socket_desc , 3);
 + 
 +    //Accept and incoming connection
 +    puts("​Waiting for incoming connections..."​);​
 +    c = sizeof(struct sockaddr_in);​
 +fflush(stdout);​
 + 
 +    while( (client_sock = accept(socket_desc,​ (struct sockaddr *)&​client,​ (socklen_t*)&​c)) )
 +    {
 +        puts("​Connection accepted"​);​
 + 
 +        if( pthread_create( &​thread_id , NULL ,  connection_handler , (void*) &​client_sock) < 0)
 +        {
 +            perror("​could not create thread"​);​
 +            return;
 +        }
 + 
 +        //Now join the thread , so that we dont terminate before the thread
 +        //​pthread_join( thread_id , NULL);
 +        puts("​Handler assigned"​);​
 +    }
 + 
 +    if (client_sock < 0)
 +    {
 +        perror("​accept failed"​);​
 +        return;
 +    }
 +}
 +
 +
 +int main(int argc , char *argv[])
 +{
 +    pthread_t thread_id;
 +//Create server
 +        if( pthread_create( &​thread_id , NULL ,  server_function , NULL) != 0)
 +        {
 +            perror("​could not create server thread"​);​
 +     fflush(stdout);​
 +            return 1;
 +        }
 +    pthread_t thread_id2;
 +
 + // Create client
 +        if( pthread_create( &​thread_id2 , NULL ,  client_function , NULL) != 0)
 +        {
 +            perror("​could not create client thread"​);​
 +     fflush(stdout);​
 +            return 1;
 +        }
 +
 +    return 0;
 +}
 + 
 +/*
 + * This will handle connection for each client
 + * */
 +void *connection_handler(void *socket_desc)
 +{
 +    //Get the socket descriptor
 +    int sock = *(int*)socket_desc;​
 +    int read_size;
 +    char *message , client_message[2000];​
 + 
 +    //Receive a message from client
 +    while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 )
 +    {
 +        //end of string marker
 + client_message[read_size] = '​\0';​
 + 
 + //Send the message back to client
 +        write(sock , "​Werset 5" , strlen(client_message));​
 + 
 + //clear the message buffer
 + memset(client_message,​ 0, 2000);
 +    }
 + 
 +    if(read_size == 0)
 +    {
 +        puts("​Client disconnected"​);​
 +        fflush(stdout);​
 +    }
 +    else if(read_size == -1)
 +    {
 +        perror("​recv failed"​);​
 +    }
 + 
 +    return 0;
 +
 +
 +</​code>​
studia/magisterskie/1sem/systemy_rownolegle_i_rozproszone.1460624294.txt.gz · ostatnio zmienione: 2016/04/14 10:58 przez 149.156.112.6