Różnice między wybraną wersją a wersją aktualną.
| Poprzednia rewizja po obu stronach Poprzednia wersja Nowa wersja | Poprzednia wersja | ||
|
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/03/31 10:42] 149.156.112.6 [Zajęcia 5] |
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna) 149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c] |
||
|---|---|---|---|
| Linia 1393: | Linia 1393: | ||
| return 0; | return 0; | ||
| } | } | ||
| + | </code> | ||
| + | ==== HelloWorld.cl ==== | ||
| + | <code c> | ||
| + | __kernel void hello_kernel(__global const float *a, | ||
| + | __global const float *b, | ||
| + | __global float *result) | ||
| + | { | ||
| + | int gid = get_global_id(0); | ||
| + | |||
| + | if(gid<1) result[gid] = a[gid] + b[gid]; | ||
| + | }</code> | ||
| + | ===== Zajęcia 6 ===== | ||
| + | Pliki zadania: {{:studia:magisterskie:1sem:opencl_vecadd.tgz|}} | ||
| + | Działające 3 kernele: {{:studia:magisterskie:1sem:sala404.zip|}} | ||
| + | |||
| + | ==== opencl_vecadd/vecadd_host/execute_kernels.c ==== | ||
| + | <code c>#include<stdlib.h> | ||
| + | #include<stdio.h> | ||
| + | #include <math.h> | ||
| + | |||
| + | #include<omp.h> | ||
| + | #include <CL/cl.h> | ||
| + | |||
| + | #include"uth_ocl_intf.h" | ||
| + | |||
| + | #include"./vecadd_host.h" | ||
| + | #include"./hardware_params.h" | ||
| + | |||
| + | #define time_measurments | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | #include"uth_time_intf.h" | ||
| + | static double t_begin, t_end, t_total; | ||
| + | #endif | ||
| + | |||
| + | int execute_kernel_2_CPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | | ||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | | ||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | | ||
| + | int i,j,k; | ||
| + | | ||
| + | | ||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | | ||
| + | // create the kernel for CPU | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. Restoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | int NDRange_dim = 1; | ||
| + | int nr_cores_CPU = omp_get_num_procs(); | ||
| + | int nr_threads = nr_cores_CPU; // total number of threads | ||
| + | int work_group_size = 1; // number of threads in work-group | ||
| + | int nr_work_groups = nr_threads / work_group_size; | ||
| + | globalWorkSize[0] = nr_threads ; | ||
| + | localWorkSize[0] = work_group_size ; | ||
| + | | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | nr_cores_CPU, nr_cores_CPU, 1); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | work_group_size, nr_threads / work_group_size, | ||
| + | nr_threads / work_group_size / nr_cores_CPU); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
| + | globalWorkSize[0]/nr_cores_CPU); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
| + | | ||
| + | | ||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | int size = array_size; | ||
| + | int size_per_thread = ceilf((float)size/nr_threads); | ||
| + | | ||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_3_CPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | | ||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | | ||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | | ||
| + | int i,j,k; | ||
| + | | ||
| + | | ||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | // create the kernel for CPU | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_3_opt_blocks_kernel","vecadd_3_opt_blocks.cl",monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | | ||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | int NDRange_dim = 1; | ||
| + | int nr_cores_CPU = omp_get_num_procs(); | ||
| + | int nr_threads = nr_cores_CPU; // total number of threads | ||
| + | int work_group_size = 1; // number of threads in work-group | ||
| + | int nr_work_groups = nr_threads / work_group_size; | ||
| + | globalWorkSize[0] = nr_threads ; | ||
| + | localWorkSize[0] = work_group_size ; | ||
| + | | ||
| + | printf("\t\tExecuting kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | nr_cores_CPU, nr_cores_CPU, 1); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | work_group_size, nr_threads / work_group_size, | ||
| + | nr_threads / work_group_size / nr_cores_CPU); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
| + | globalWorkSize[0]/nr_cores_CPU); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
| + | | ||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | int size = array_size; | ||
| + | | ||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | // tutaj modyfikujemy | ||
| + | int execute_kernel_0_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | | ||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | | ||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | | ||
| + | int i,j,k; | ||
| + | | ||
| + | | ||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | // create the kernel for GPU | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | | ||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | int NDRange_dim = 1; | ||
| + | | ||
| + | // globalna liczba wątków i lokalna liczba wątków | ||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | | ||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | // Set the kernel arguments (result, a, b) | ||
| + | // ustawiamy liczbę argumentów | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_1_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | | ||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | | ||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | | ||
| + | int i,j,k; | ||
| + | | ||
| + | | ||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | // create the kernel for GPU | ||
| + | /* | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
| + | |||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: */ | ||
| + | "vecadd_1_kernel", "vecadd_1.cl", monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | | ||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | int NDRange_dim = 1; | ||
| + | | ||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size / 4; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | | ||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_2_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | | ||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | | ||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | | ||
| + | int i,j,k; | ||
| + | | ||
| + | | ||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | // create the kernel for GPU | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: */ | ||
| + | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | | ||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | int NDRange_dim = 1; | ||
| + | | ||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size / work_group_size; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | |||
| + | int size_per_thread = work_group_size; | ||
| + | int size = array_size; | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | | ||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_3_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | | ||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | | ||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | | ||
| + | int i,j,k; | ||
| + | | ||
| + | int size = array_size; | ||
| + | | ||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | // create the kernel for GPU | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_3_opt_blocks_kernel", "vecadd_3_opt_blocks.cl", monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | | ||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | | ||
| + | int NDRange_dim = 1; | ||
| + | | ||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | | ||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | | ||
| + | /* // create the fourth kernel for GPU */ | ||
| + | /* kernel_index = 4; */ | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, */ | ||
| + | /* // kernel name: , file: */ | ||
| + | /* "vecadd_4_opt_blocks_vect_kernel", "vecadd_4_opt_blocks_vect.cl", monitor); */</code> | ||
| + | |||
| + | ==== opencl_vecadd/vecadd_2_blocks.cl ==== | ||
| + | <code c> | ||
| + | __kernel void vecadd_2_blocks_kernel(__global const float *a, | ||
| + | __global const float *b, | ||
| + | __global float *result, | ||
| + | const int size, | ||
| + | const int size_per_thread) | ||
| + | { | ||
| + | int gid = get_global_id(0); | ||
| + | int index_start = gid * size_per_thread; | ||
| + | int index_end = (gid+1) * size_per_thread; | ||
| + | |||
| + | if (gid == 0) { | ||
| + | //printf("size: %d, %d", size, size_per_thread); | ||
| + | } | ||
| + | |||
| + | for (int i=index_start; i < index_end && i < size; i++) { | ||
| + | //for (int i=index_start; i < index_end ; i++) { | ||
| + | result[i] = a[i]+b[i]; | ||
| + | } | ||
| + | }</code> | ||
| + | ===== Zajęcia 7 ===== | ||
| + | {{:studia:magisterskie:1sem:zajecia_7.zip|}} | ||
| + | |||
| + | ==== tmp/opencl_mat_transp/mat_transp_1.cl ==== | ||
| + | <code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
| + | |||
| + | __kernel void mat_transp_1_kernel( | ||
| + | __global float *A , | ||
| + | __global float *B , | ||
| + | __global float *C , | ||
| + | int N) | ||
| + | { | ||
| + | |||
| + | int id = get_global_id(0) * get_global_size(0) + get_global_id(1); | ||
| + | float temp = 0.0; | ||
| + | |||
| + | for(int i = 0; i<N;i++){ | ||
| + | temp += A[id*N + i] *B[i]; | ||
| + | } | ||
| + | |||
| + | C[id] = temp; | ||
| + | }</code> | ||
| + | |||
| + | ==== tmp/opencl_mat_transp/mat_transp/mat_transp.c ==== | ||
| + | <code c>#include<stdlib.h> | ||
| + | #include<stdio.h> | ||
| + | #include <math.h> | ||
| + | |||
| + | #include <CL/cl.h> | ||
| + | |||
| + | #include"uth_ocl_intf.h" | ||
| + | |||
| + | #define time_measurments | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | #include"uth_time_intf.h" | ||
| + | static double t_begin, t_end, t_total; | ||
| + | #endif | ||
| + | |||
| + | |||
| + | |||
| + | #define BLOCK_SIZE 16 | ||
| + | #define NR_GROUPS 16 | ||
| + | #define MULT 16 | ||
| + | #define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT) | ||
| + | #define ROZMIAR (WYMIAR*WYMIAR) | ||
| + | // Matrices are stored in row-major order: | ||
| + | // M(row, col) = M( row * WYMIAR + col ) | ||
| + | |||
| + | int verify_result( | ||
| + | float* result, | ||
| + | float* result_compare | ||
| + | ) | ||
| + | { | ||
| + | // Verify the result | ||
| + | int result_OK = 1; | ||
| + | int i,j; | ||
| + | for(i = 0; i < WYMIAR; i++) { | ||
| + | if(fabs(result[i] - result_compare[i])>1.e-6) { | ||
| + | result_OK = 0; | ||
| + | break; | ||
| + | } | ||
| + | } | ||
| + | printf("\t\t6. verifying results: "); | ||
| + | if(result_OK) { | ||
| + | printf("Output is correct\n"); | ||
| + | } else { | ||
| + | printf("Output is incorrect\n"); | ||
| + | j = 0; | ||
| + | for(i = 0; i < WYMIAR, j<100; i++) { | ||
| + | if(fabs(result[i] - result_compare[i])>1.e-9) { | ||
| + | j++; | ||
| + | printf("%d %16.8f %16.8f\n", | ||
| + | i, result[i], result_compare[i]); | ||
| + | } | ||
| + | } | ||
| + | exit(0); | ||
| + | } | ||
| + | /* for(i = 0; i < length; i++) { */ | ||
| + | /* printf("%16.8f %16.8f\n", result[i], result_compare[i]); */ | ||
| + | /* } */ | ||
| + | |||
| + | return(result_OK); | ||
| + | } | ||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | void create_kernels() | ||
| + | { | ||
| + | |||
| + | // for all operations indicate explicit info messages | ||
| + | int monitor = UTC_BASIC_INFO + 1; | ||
| + | |||
| + | int kernel_index; | ||
| + | | ||
| + | int platform_index = utv_ocl_struct.current_platform_index; | ||
| + | |||
| + | if(utr_ocl_CPU_context_exists(platform_index)){ | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | | ||
| + | // create the first kernel for CPU | ||
| + | /* kernel_index = 0; */ | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, */ | ||
| + | /* // kernel name: , file: */ | ||
| + | /* "mat_transp_1_kernel", "mat_transp_1.cl", monitor); */ | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | } | ||
| + | |||
| + | if(utr_ocl_GPU_context_exists(platform_index)){ | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // create the first kernel for GPU | ||
| + | kernel_index = 0; | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "mat_transp_1_kernel", "mat_transp_1.cl", monitor); | ||
| + | | ||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
| + | #endif | ||
| + | | ||
| + | // create the second kernel for GPU | ||
| + | kernel_index = 1; | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "mat_transp_2_kernel", "mat_transp_2.cl", monitor); | ||
| + | |||
| + | | ||
| + | |||
| + | } | ||
| + | |||
| + | } | ||
| + | |||
| + | /*----------------EXECUTION PHASE----------------------*/ | ||
| + | int execute_kernels() | ||
| + | { | ||
| + | |||
| + | // for all operations indicate explicit info messages | ||
| + | int monitor = UTC_BASIC_INFO + 1; | ||
| + | |||
| + | int platform_index = utv_ocl_struct.current_platform_index; | ||
| + | utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index]; | ||
| + | |||
| + | int kernel_index; | ||
| + | int i,j,n; | ||
| + | |||
| + | double nr_access; | ||
| + | double t1,t2; | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\n------------Starting execution phase----------------\n"); | ||
| + | } | ||
| + | | ||
| + | // create matrices | ||
| + | n=WYMIAR; | ||
| + | float* A = (float *) malloc(ROZMIAR*sizeof(float)); | ||
| + | float* B = (float *) malloc(WYMIAR*sizeof(float)); | ||
| + | float* C = (float *) malloc(WYMIAR*sizeof(float)); | ||
| + | float* D = (float *) malloc(WYMIAR*sizeof(float)); | ||
| + | | ||
| + | for(i=0;i<ROZMIAR;i++) A[i]=1.0*i/10000000.0; | ||
| + | |||
| + | for(i=0;i<WYMIAR;i++) B[i]=1.0*i/10000000.0; | ||
| + | |||
| + | // mnozenie macierz wektor | ||
| + | for(i=0; i<WYMIAR; i++){ | ||
| + | for(j=0; j<WYMIAR; j++){ | ||
| + | D[i] += A[i*WYMIAR + j] * B[j]; | ||
| + | } | ||
| + | } | ||
| + | | ||
| + | nr_access= ROZMIAR + 2.0*WYMIAR; // read + write | ||
| + | | ||
| + | printf("mat_transp: nr_access %lf\n", nr_access); | ||
| + | | ||
| + | // get hardware characteristics to select good matrix shape | ||
| + | // the set of device characteristics stored in data structure | ||
| + | int device_index = 0; | ||
| + | utt_ocl_device_struct device_struct = | ||
| + | utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index]; | ||
| + | double global_mem_bytes = device_struct.global_mem_bytes; | ||
| + | double global_max_alloc = device_struct.global_max_alloc; | ||
| + | double local_mem_bytes = device_struct.local_mem_bytes; | ||
| + | double constant_mem_bytes = device_struct.constant_mem_bytes; | ||
| + | int max_num_comp_units = device_struct.max_num_comp_units; | ||
| + | int max_work_group_size = device_struct.max_work_group_size; | ||
| + | | ||
| + | // in a loop over devices (or for a selected device) | ||
| + | int idev=0; | ||
| + | for(idev=0; idev<platform_struct.number_of_devices; idev++){ | ||
| + | | ||
| + | // int device_type = ..... | ||
| + | // choose device_index | ||
| + | // int device_index = utr_ocl_select_device(platform_index, device_type); | ||
| + | int device_index = idev; | ||
| + | int device_type = utr_ocl_device_type(platform_index, device_index); | ||
| + | | ||
| + | if(device_index>0 && device_type==utr_ocl_device_type(platform_index, device_index-1)) break; | ||
| + | if(device_type == UTC_OCL_DEVICE_CPU) break; | ||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | | ||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t0. restoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | | ||
| + | // choose the kernel | ||
| + | kernel_index = 0; | ||
| + | cl_kernel kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | | ||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | | ||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | | ||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | | ||
| + | for(i=0;i<WYMIAR;i++) C[i]=0.0; | ||
| + | time_init(); t1 = time_clock(); | ||
| + | | ||
| + | // call routine to perform matrix transposition | ||
| + | mat_transp_host(kernel_index, A, B, C, n, context, kernel, command_queue); | ||
| + | | ||
| + | t2 = time_clock(); time_print(); | ||
| + | printf("GB/s = %lf\n\n", nr_access*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
| + | | ||
| + | | ||
| + | // verify result | ||
| + | verify_result(C, D); | ||
| + | | ||
| + | | ||
| + | } // end loop over devices | ||
| + | | ||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | |||
| + | // Matrix transposition - Host code | ||
| + | // Matrix dimensions are assumed to be multiples of BLOCK_SIZE | ||
| + | int mat_transp_host( | ||
| + | int kernel_index, | ||
| + | float* A, | ||
| + | float* B, | ||
| + | float* C, | ||
| + | int N, | ||
| + | const cl_context context, | ||
| + | const cl_kernel mat_transp_kernel, | ||
| + | const cl_command_queue queue | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | // Load A to device memory | ||
| + | size_t size_bytesm = N*N*sizeof(float); | ||
| + | size_t size_bytesw = N*sizeof(float); | ||
| + | | ||
| + | |||
| + | cl_mem d_A = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
| + | size_bytesm, NULL, NULL); | ||
| + | |||
| + | // Write A to device memory | ||
| + | clEnqueueWriteBuffer(queue, d_A, CL_TRUE, 0, size_bytesm, A, 0, 0, 0); | ||
| + | |||
| + | cl_mem d_B = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
| + | size_bytesw, NULL, NULL); | ||
| + | |||
| + | // Write A to device memory | ||
| + | clEnqueueWriteBuffer(queue, d_B, CL_TRUE, 0, size_bytesw, B, 0, 0, 0); | ||
| + | |||
| + | // Allocate B in device memory | ||
| + | cl_mem d_C = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size_bytesw, NULL, NULL); | ||
| + | |||
| + | |||
| + | // Invoke kernel | ||
| + | clSetKernelArg(mat_transp_kernel, 0, sizeof(cl_mem), (void*)&d_A); | ||
| + | clSetKernelArg(mat_transp_kernel, 1, sizeof(cl_mem), (void*)&d_B); | ||
| + | clSetKernelArg(mat_transp_kernel, 2, sizeof(cl_mem), (void*)&d_C); | ||
| + | clSetKernelArg(mat_transp_kernel, 3, sizeof(int), (void*)&N); | ||
| + | |||
| + | size_t localWorkSize[3]; | ||
| + | size_t globalWorkSize[3]; | ||
| + | cl_uint work_dim; | ||
| + | |||
| + | if(kernel_index==3){ | ||
| + | |||
| + | work_dim = 2; | ||
| + | localWorkSize[0] = BLOCK_SIZE; | ||
| + | globalWorkSize[0] = N/MULT; | ||
| + | localWorkSize[1] = BLOCK_SIZE; | ||
| + | globalWorkSize[1] = N/MULT; | ||
| + | localWorkSize[2] = 0; | ||
| + | globalWorkSize[2] = 0; | ||
| + | |||
| + | |||
| + | } | ||
| + | else{ | ||
| + | |||
| + | work_dim = 2; | ||
| + | localWorkSize[0] = BLOCK_SIZE; | ||
| + | globalWorkSize[0] = sqrt(N); | ||
| + | localWorkSize[1] = BLOCK_SIZE; | ||
| + | globalWorkSize[1] = sqrt(N); | ||
| + | localWorkSize[2] = 0; | ||
| + | globalWorkSize[2] = 0; | ||
| + | | ||
| + | } | ||
| + | |||
| + | clFinish(queue); | ||
| + | double t1 = time_clock(); | ||
| + | // Enqueue a kernel run call | ||
| + | cl_event ndrEvt; | ||
| + | clEnqueueNDRangeKernel(queue, mat_transp_kernel, work_dim, 0, | ||
| + | globalWorkSize, localWorkSize, 0, 0, &ndrEvt); | ||
| + | clWaitForEvents(1, &ndrEvt); | ||
| + | clFinish(queue); | ||
| + | double t2 = time_clock(); | ||
| + | // Calculate performance | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | | ||
| + | // Get kernel profiling info | ||
| + | clGetEventProfilingInfo(ndrEvt, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(ndrEvt, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | double time = (double)endTime - (double)startTime; | ||
| + | printf("\nKernel execution internal: time %lf, GB/s = %lf\n", | ||
| + | time*1.0e-9, 2.0*N*N*sizeof(float)/(time*1.0e-9)/1024/1024/1024); | ||
| + | |||
| + | printf("Kernel execution external: time %lf, GB/s = %lf\n\n", | ||
| + | t2-t1, 2.0*N*N*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
| + | |||
| + | // Read B from device memory | ||
| + | clEnqueueReadBuffer(queue, d_C, CL_TRUE, 0, size_bytesw, C, 0, 0, 0); | ||
| + | |||
| + | // Free device memory | ||
| + | clReleaseMemObject(d_A); | ||
| + | clReleaseMemObject(d_B); | ||
| + | clReleaseMemObject(d_C); | ||
| + | |||
| + | return(0); | ||
| + | |||
| + | }</code> | ||
| + | ==== Kod wogu ==== | ||
| + | === server.c ==== | ||
| + | <code c> | ||
| + | /* | ||
| + | C socket server example, handles multiple clients using threads | ||
| + | Compile | ||
| + | gcc server.c -lpthread -o server | ||
| + | */ | ||
| + | |||
| + | #include<stdio.h> | ||
| + | #include<string.h> //strlen | ||
| + | #include<stdlib.h> //strlen | ||
| + | #include<sys/socket.h> | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | #include<unistd.h> //write | ||
| + | #include<pthread.h> //for threading , link with lpthread | ||
| + | #include <unistd.h> | ||
| + | |||
| + | //the thread function | ||
| + | void *connection_handler(void *); | ||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | int counter = 0; | ||
| + | pid_t pid = fork(); | ||
| + | |||
| + | if (pid == 0) | ||
| + | { | ||
| + | int sock; | ||
| + | struct sockaddr_in server; | ||
| + | char message[1000] , server_reply[2000]; | ||
| + | int counter; | ||
| + | unsigned int ip[] = { 192, 168, 102, 1 }; | ||
| + | char* ip_final; | ||
| + | char *buf; | ||
| + | size_t sz; | ||
| + | |||
| + | //Create socket | ||
| + | |||
| + | for ( counter = 0; counter < 255; ++counter ) { | ||
| + | |||
| + | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (sock == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //printf("192.168.102.%d", counter); | ||
| + | |||
| + | sz = snprintf(NULL, 0, "192.168.102.%d", counter); | ||
| + | buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */ | ||
| + | | ||
| + | server.sin_addr.s_addr = inet_addr(buf); | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_port = htons( 65001 ); | ||
| + | |||
| + | //Connect to remote server | ||
| + | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | perror("connect failed. Error"); | ||
| + | continue; | ||
| + | } | ||
| + | |||
| + | puts("Connected\n"); | ||
| + | |||
| + | //keep communicating with server | ||
| + | while(1) | ||
| + | { | ||
| + | //Send some data | ||
| + | if( send(sock , "Werset 6-client" , 15 , 0) < 0) | ||
| + | { | ||
| + | puts("Send failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Receive a reply from the server | ||
| + | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
| + | { | ||
| + | puts("recv failed"); | ||
| + | break; | ||
| + | } | ||
| + | |||
| + | puts(server_reply); | ||
| + | |||
| + | break; | ||
| + | } | ||
| + | |||
| + | close(sock); | ||
| + | } | ||
| + | } | ||
| + | else if (pid > 0) | ||
| + | { | ||
| + | int socket_desc , client_sock , c; | ||
| + | struct sockaddr_in server , client; | ||
| + | |||
| + | //Create socket | ||
| + | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (socket_desc == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //Prepare the sockaddr_in structure | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_addr.s_addr = INADDR_ANY; | ||
| + | server.sin_port = htons( 65001 ); | ||
| + | |||
| + | //Bind | ||
| + | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | //print the error message | ||
| + | perror("bind failed. Error"); | ||
| + | return 1; | ||
| + | } | ||
| + | puts("bind done"); | ||
| + | |||
| + | //Listen | ||
| + | listen(socket_desc , 3); | ||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | |||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | pthread_t thread_id; | ||
| + | |||
| + | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
| + | { | ||
| + | puts("Connection accepted"); | ||
| + | |||
| + | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
| + | { | ||
| + | perror("could not create thread"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Now join the thread , so that we dont terminate before the thread | ||
| + | //pthread_join( thread_id , NULL); | ||
| + | puts("Handler assigned"); | ||
| + | } | ||
| + | |||
| + | if (client_sock < 0) | ||
| + | { | ||
| + | perror("accept failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | } | ||
| + | else | ||
| + | { | ||
| + | // fork failed | ||
| + | printf("fork() failed!\n"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | /* | ||
| + | * This will handle connection for each client | ||
| + | * */ | ||
| + | void *connection_handler(void *socket_desc) | ||
| + | { | ||
| + | //Get the socket descriptor | ||
| + | int sock = *(int*)socket_desc; | ||
| + | int read_size; | ||
| + | char *message , client_message[2000]; | ||
| + | |||
| + | //Receive a message from client | ||
| + | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
| + | { | ||
| + | printf("%s\n", client_message); | ||
| + | |||
| + | //end of string marker | ||
| + | client_message[read_size] = '\0'; | ||
| + | |||
| + | //Send the message back to client | ||
| + | write(sock , "Werset 6" , 8); | ||
| + | |||
| + | //clear the message buffer | ||
| + | memset(client_message, 0, 2000); | ||
| + | } | ||
| + | |||
| + | if(read_size == 0) | ||
| + | { | ||
| + | puts("Client disconnected"); | ||
| + | fflush(stdout); | ||
| + | } | ||
| + | else if(read_size == -1) | ||
| + | { | ||
| + | perror("recv failed"); | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | }</code> | ||
| + | === Makefile === | ||
| + | <code bash># kompilator c | ||
| + | CC = gcc | ||
| + | |||
| + | # konsolidator | ||
| + | CFLAGS=-g | ||
| + | |||
| + | all: server client | ||
| + | |||
| + | server: server.o | ||
| + | $(CC) $(LFLAGS) server.o -lpthread -o server | ||
| + | |||
| + | server.o: server.c | ||
| + | $(CC) -c server.c -o server.o | ||
| + | |||
| + | client: client.o | ||
| + | $(CC) client.o -o client | ||
| + | |||
| + | client.o: client.c | ||
| + | $(CC) -c client.c -o client.o | ||
| + | |||
| + | clean: | ||
| + | rm -f *.o</code> | ||
| + | ===== Zajęcia 9 ===== | ||
| + | <code c>/* | ||
| + | C socket server example, handles multiple clients using threads | ||
| + | Compile | ||
| + | gcc server.c -lpthread -o server | ||
| + | */ | ||
| + | |||
| + | #include<stdio.h> | ||
| + | #include<string.h> //strlen | ||
| + | #include<stdlib.h> //strlen | ||
| + | #include<sys/socket.h> | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | #include<unistd.h> //write | ||
| + | #include<pthread.h> //for threading , link with lpthread | ||
| + | |||
| + | //the thread function | ||
| + | void *connection_handler(void *); | ||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | int socket_desc , client_sock , c; | ||
| + | struct sockaddr_in server , client; | ||
| + | |||
| + | //Create socket | ||
| + | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (socket_desc == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //Prepare the sockaddr_in structure | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_addr.s_addr = INADDR_ANY; | ||
| + | server.sin_port = htons( 8888 ); | ||
| + | |||
| + | //Bind | ||
| + | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | //print the error message | ||
| + | perror("bind failed. Error"); | ||
| + | return 1; | ||
| + | } | ||
| + | puts("bind done"); | ||
| + | |||
| + | //Listen | ||
| + | listen(socket_desc , 3); | ||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | |||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | pthread_t thread_id; | ||
| + | |||
| + | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
| + | { | ||
| + | puts("Connection accepted"); | ||
| + | |||
| + | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
| + | { | ||
| + | perror("could not create thread"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Now join the thread , so that we dont terminate before the thread | ||
| + | //pthread_join( thread_id , NULL); | ||
| + | puts("Handler assigned"); | ||
| + | } | ||
| + | |||
| + | if (client_sock < 0) | ||
| + | { | ||
| + | perror("accept failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | /* | ||
| + | * This will handle connection for each client | ||
| + | * */ | ||
| + | void *connection_handler(void *socket_desc) | ||
| + | { | ||
| + | //Get the socket descriptor | ||
| + | int sock = *(int*)socket_desc; | ||
| + | int read_size; | ||
| + | char *message , client_message[2000]; | ||
| + | |||
| + | //Receive a message from client | ||
| + | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
| + | { | ||
| + | //end of string marker | ||
| + | client_message[read_size] = '\0'; | ||
| + | |||
| + | //Send the message back to client | ||
| + | write(sock , "Werset 6" , strlen(client_message)); | ||
| + | |||
| + | //clear the message buffer | ||
| + | memset(client_message, 0, 2000); | ||
| + | } | ||
| + | |||
| + | if(read_size == 0) | ||
| + | { | ||
| + | puts("Client disconnected"); | ||
| + | fflush(stdout); | ||
| + | } | ||
| + | else if(read_size == -1) | ||
| + | { | ||
| + | perror("recv failed"); | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } </code> | ||
| + | |||
| + | ==== Client C ==== | ||
| + | |||
| + | <code c>#include<stdio.h> //printf | ||
| + | #include<string.h> //strlen | ||
| + | #include<sys/socket.h> //socket | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | int sock; | ||
| + | struct sockaddr_in server; | ||
| + | char message[1000] , server_reply[2000]; | ||
| + | |||
| + | //Create socket | ||
| + | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (sock == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | server.sin_addr.s_addr = inet_addr("127.0.0.1"); | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_port = htons( 8888 ); | ||
| + | |||
| + | //Connect to remote server | ||
| + | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | perror("connect failed. Error"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | puts("Connected\n"); | ||
| + | |||
| + | //keep communicating with server | ||
| + | while(1) | ||
| + | { | ||
| + | printf("Enter message : "); | ||
| + | scanf("%s" , message); | ||
| + | |||
| + | //Send some data | ||
| + | if( send(sock , message , strlen(message) , 0) < 0) | ||
| + | { | ||
| + | puts("Send failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Receive a reply from the server | ||
| + | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
| + | { | ||
| + | puts("recv failed"); | ||
| + | break; | ||
| + | } | ||
| + | |||
| + | puts("Server reply :"); | ||
| + | puts(server_reply); | ||
| + | } | ||
| + | |||
| + | close(sock); | ||
| + | return 0; | ||
| + | }</code> | ||
| + | |||
| + | |||
| + | ==== Kod Maciek Gonzo itp ==== | ||
| + | <code c> | ||
| + | #include<stdio.h> | ||
| + | #include<string.h> //strlen | ||
| + | #include<stdlib.h> //strlen | ||
| + | #include<sys/socket.h> | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | #include<unistd.h> //write | ||
| + | #include<pthread.h> //for threading , link with lpthread | ||
| + | |||
| + | //the thread function | ||
| + | void *connection_handler(void *); | ||
| + | |||
| + | void *client_function(void *socket_desc){ | ||
| + | int sock; | ||
| + | struct sockaddr_in server_id; | ||
| + | char message[1000] , server_reply[2000]; | ||
| + | |||
| + | //Create socket | ||
| + | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (sock == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | // 192.168.102.63 wogu | ||
| + | server_id.sin_addr.s_addr = inet_addr("localhost"); | ||
| + | server_id.sin_family = AF_INET; | ||
| + | server_id.sin_port = htons( 65000 ); | ||
| + | |||
| + | //Connect to remote server | ||
| + | |||
| + | if (connect(sock , (struct sockaddr *)&server_id , sizeof(server_id)) < 0) | ||
| + | { | ||
| + | perror("connect failed. Error"); | ||
| + | |||
| + | }else{ | ||
| + | |||
| + | puts("Connected\n"); | ||
| + | |||
| + | //keep communicating with server | ||
| + | while(1) | ||
| + | { | ||
| + | printf("Enter message : "); | ||
| + | scanf("%s" , message); | ||
| + | |||
| + | //Send some data | ||
| + | if( send(sock , message , strlen(message) , 0) < 0) | ||
| + | { | ||
| + | puts("Send failed"); | ||
| + | |||
| + | } | ||
| + | |||
| + | //Receive a reply from the server | ||
| + | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
| + | { | ||
| + | puts("recv failed"); | ||
| + | break; | ||
| + | } | ||
| + | |||
| + | puts("Server reply :"); | ||
| + | puts(server_reply); | ||
| + | } | ||
| + | |||
| + | close(sock); | ||
| + | } | ||
| + | |||
| + | }; | ||
| + | |||
| + | |||
| + | void *server_function(void * sock){ | ||
| + | printf("SERVER Start"); | ||
| + | fflush(stdout); | ||
| + | int socket_desc , client_sock , c; | ||
| + | pthread_t thread_id; | ||
| + | struct sockaddr_in server , client; | ||
| + | |||
| + | //Create socket | ||
| + | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (socket_desc == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //Prepare the sockaddr_in structure | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_addr.s_addr = INADDR_ANY; | ||
| + | server.sin_port = htons( 65000); | ||
| + | |||
| + | //Bind | ||
| + | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | //print the error message | ||
| + | perror("bind failed. Error"); | ||
| + | return; | ||
| + | } | ||
| + | puts("bind done"); | ||
| + | |||
| + | |||
| + | |||
| + | //Listen | ||
| + | listen(socket_desc , 3); | ||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | fflush(stdout); | ||
| + | |||
| + | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
| + | { | ||
| + | puts("Connection accepted"); | ||
| + | |||
| + | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
| + | { | ||
| + | perror("could not create thread"); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | //Now join the thread , so that we dont terminate before the thread | ||
| + | //pthread_join( thread_id , NULL); | ||
| + | puts("Handler assigned"); | ||
| + | } | ||
| + | |||
| + | if (client_sock < 0) | ||
| + | { | ||
| + | perror("accept failed"); | ||
| + | return; | ||
| + | } | ||
| + | } | ||
| + | |||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | pthread_t thread_id; | ||
| + | //Create server | ||
| + | if( pthread_create( &thread_id , NULL , server_function , NULL) != 0) | ||
| + | { | ||
| + | perror("could not create server thread"); | ||
| + | fflush(stdout); | ||
| + | return 1; | ||
| + | } | ||
| + | pthread_t thread_id2; | ||
| + | |||
| + | // Create client | ||
| + | if( pthread_create( &thread_id2 , NULL , client_function , NULL) != 0) | ||
| + | { | ||
| + | perror("could not create client thread"); | ||
| + | fflush(stdout); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | /* | ||
| + | * This will handle connection for each client | ||
| + | * */ | ||
| + | void *connection_handler(void *socket_desc) | ||
| + | { | ||
| + | //Get the socket descriptor | ||
| + | int sock = *(int*)socket_desc; | ||
| + | int read_size; | ||
| + | char *message , client_message[2000]; | ||
| + | |||
| + | //Receive a message from client | ||
| + | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
| + | { | ||
| + | //end of string marker | ||
| + | client_message[read_size] = '\0'; | ||
| + | |||
| + | //Send the message back to client | ||
| + | write(sock , "Werset 5" , strlen(client_message)); | ||
| + | |||
| + | //clear the message buffer | ||
| + | memset(client_message, 0, 2000); | ||
| + | } | ||
| + | |||
| + | if(read_size == 0) | ||
| + | { | ||
| + | puts("Client disconnected"); | ||
| + | fflush(stdout); | ||
| + | } | ||
| + | else if(read_size == -1) | ||
| + | { | ||
| + | perror("recv failed"); | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| </code> | </code> | ||