Różnice między wybraną wersją a wersją aktualną.
Poprzednia rewizja po obu stronach Poprzednia wersja Nowa wersja | Poprzednia wersja | ||
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/04/14 10:58] 149.156.112.6 |
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna) 149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c] |
||
---|---|---|---|
Linia 1405: | Linia 1405: | ||
}</code> | }</code> | ||
===== Zajęcia 6 ===== | ===== Zajęcia 6 ===== | ||
+ | Pliki zadania: {{:studia:magisterskie:1sem:opencl_vecadd.tgz|}} | ||
+ | Działające 3 kernele: {{:studia:magisterskie:1sem:sala404.zip|}} | ||
+ | ==== opencl_vecadd/vecadd_host/execute_kernels.c ==== | ||
+ | <code c>#include<stdlib.h> | ||
+ | #include<stdio.h> | ||
+ | #include <math.h> | ||
+ | |||
+ | #include<omp.h> | ||
+ | #include <CL/cl.h> | ||
+ | |||
+ | #include"uth_ocl_intf.h" | ||
+ | |||
+ | #include"./vecadd_host.h" | ||
+ | #include"./hardware_params.h" | ||
+ | |||
+ | #define time_measurments | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | #include"uth_time_intf.h" | ||
+ | static double t_begin, t_end, t_total; | ||
+ | #endif | ||
+ | |||
+ | int execute_kernel_2_CPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // create the kernel for CPU | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. Restoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | int nr_cores_CPU = omp_get_num_procs(); | ||
+ | int nr_threads = nr_cores_CPU; // total number of threads | ||
+ | int work_group_size = 1; // number of threads in work-group | ||
+ | int nr_work_groups = nr_threads / work_group_size; | ||
+ | globalWorkSize[0] = nr_threads ; | ||
+ | localWorkSize[0] = work_group_size ; | ||
+ | | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | nr_cores_CPU, nr_cores_CPU, 1); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | work_group_size, nr_threads / work_group_size, | ||
+ | nr_threads / work_group_size / nr_cores_CPU); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
+ | globalWorkSize[0]/nr_cores_CPU); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
+ | | ||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | int size = array_size; | ||
+ | int size_per_thread = ceilf((float)size/nr_threads); | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | |||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_3_CPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for CPU | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_3_opt_blocks_kernel","vecadd_3_opt_blocks.cl",monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | int nr_cores_CPU = omp_get_num_procs(); | ||
+ | int nr_threads = nr_cores_CPU; // total number of threads | ||
+ | int work_group_size = 1; // number of threads in work-group | ||
+ | int nr_work_groups = nr_threads / work_group_size; | ||
+ | globalWorkSize[0] = nr_threads ; | ||
+ | localWorkSize[0] = work_group_size ; | ||
+ | | ||
+ | printf("\t\tExecuting kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | nr_cores_CPU, nr_cores_CPU, 1); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | work_group_size, nr_threads / work_group_size, | ||
+ | nr_threads / work_group_size / nr_cores_CPU); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
+ | globalWorkSize[0]/nr_cores_CPU); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | int size = array_size; | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | // tutaj modyfikujemy | ||
+ | int execute_kernel_0_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | // globalna liczba wątków i lokalna liczba wątków | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | // ustawiamy liczbę argumentów | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_1_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | /* | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
+ | |||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: */ | ||
+ | "vecadd_1_kernel", "vecadd_1.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size / 4; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_2_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: */ | ||
+ | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size / work_group_size; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | |||
+ | int size_per_thread = work_group_size; | ||
+ | int size = array_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_3_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | int size = array_size; | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_3_opt_blocks_kernel", "vecadd_3_opt_blocks.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | |||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | | ||
+ | /* // create the fourth kernel for GPU */ | ||
+ | /* kernel_index = 4; */ | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, */ | ||
+ | /* // kernel name: , file: */ | ||
+ | /* "vecadd_4_opt_blocks_vect_kernel", "vecadd_4_opt_blocks_vect.cl", monitor); */</code> | ||
+ | |||
+ | ==== opencl_vecadd/vecadd_2_blocks.cl ==== | ||
+ | <code c> | ||
+ | __kernel void vecadd_2_blocks_kernel(__global const float *a, | ||
+ | __global const float *b, | ||
+ | __global float *result, | ||
+ | const int size, | ||
+ | const int size_per_thread) | ||
+ | { | ||
+ | int gid = get_global_id(0); | ||
+ | int index_start = gid * size_per_thread; | ||
+ | int index_end = (gid+1) * size_per_thread; | ||
+ | |||
+ | if (gid == 0) { | ||
+ | //printf("size: %d, %d", size, size_per_thread); | ||
+ | } | ||
+ | |||
+ | for (int i=index_start; i < index_end && i < size; i++) { | ||
+ | //for (int i=index_start; i < index_end ; i++) { | ||
+ | result[i] = a[i]+b[i]; | ||
+ | } | ||
+ | }</code> | ||
+ | ===== Zajęcia 7 ===== | ||
+ | {{:studia:magisterskie:1sem:zajecia_7.zip|}} | ||
+ | |||
+ | ==== tmp/opencl_mat_transp/mat_transp_1.cl ==== | ||
+ | <code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
+ | |||
+ | __kernel void mat_transp_1_kernel( | ||
+ | __global float *A , | ||
+ | __global float *B , | ||
+ | __global float *C , | ||
+ | int N) | ||
+ | { | ||
+ | |||
+ | int id = get_global_id(0) * get_global_size(0) + get_global_id(1); | ||
+ | float temp = 0.0; | ||
+ | |||
+ | for(int i = 0; i<N;i++){ | ||
+ | temp += A[id*N + i] *B[i]; | ||
+ | } | ||
+ | |||
+ | C[id] = temp; | ||
+ | }</code> | ||
+ | |||
+ | ==== tmp/opencl_mat_transp/mat_transp/mat_transp.c ==== | ||
+ | <code c>#include<stdlib.h> | ||
+ | #include<stdio.h> | ||
+ | #include <math.h> | ||
+ | |||
+ | #include <CL/cl.h> | ||
+ | |||
+ | #include"uth_ocl_intf.h" | ||
+ | |||
+ | #define time_measurments | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | #include"uth_time_intf.h" | ||
+ | static double t_begin, t_end, t_total; | ||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | #define BLOCK_SIZE 16 | ||
+ | #define NR_GROUPS 16 | ||
+ | #define MULT 16 | ||
+ | #define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT) | ||
+ | #define ROZMIAR (WYMIAR*WYMIAR) | ||
+ | // Matrices are stored in row-major order: | ||
+ | // M(row, col) = M( row * WYMIAR + col ) | ||
+ | |||
+ | int verify_result( | ||
+ | float* result, | ||
+ | float* result_compare | ||
+ | ) | ||
+ | { | ||
+ | // Verify the result | ||
+ | int result_OK = 1; | ||
+ | int i,j; | ||
+ | for(i = 0; i < WYMIAR; i++) { | ||
+ | if(fabs(result[i] - result_compare[i])>1.e-6) { | ||
+ | result_OK = 0; | ||
+ | break; | ||
+ | } | ||
+ | } | ||
+ | printf("\t\t6. verifying results: "); | ||
+ | if(result_OK) { | ||
+ | printf("Output is correct\n"); | ||
+ | } else { | ||
+ | printf("Output is incorrect\n"); | ||
+ | j = 0; | ||
+ | for(i = 0; i < WYMIAR, j<100; i++) { | ||
+ | if(fabs(result[i] - result_compare[i])>1.e-9) { | ||
+ | j++; | ||
+ | printf("%d %16.8f %16.8f\n", | ||
+ | i, result[i], result_compare[i]); | ||
+ | } | ||
+ | } | ||
+ | exit(0); | ||
+ | } | ||
+ | /* for(i = 0; i < length; i++) { */ | ||
+ | /* printf("%16.8f %16.8f\n", result[i], result_compare[i]); */ | ||
+ | /* } */ | ||
+ | |||
+ | return(result_OK); | ||
+ | } | ||
+ | |||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | void create_kernels() | ||
+ | { | ||
+ | |||
+ | // for all operations indicate explicit info messages | ||
+ | int monitor = UTC_BASIC_INFO + 1; | ||
+ | |||
+ | int kernel_index; | ||
+ | | ||
+ | int platform_index = utv_ocl_struct.current_platform_index; | ||
+ | |||
+ | if(utr_ocl_CPU_context_exists(platform_index)){ | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the first kernel for CPU | ||
+ | /* kernel_index = 0; */ | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, */ | ||
+ | /* // kernel name: , file: */ | ||
+ | /* "mat_transp_1_kernel", "mat_transp_1.cl", monitor); */ | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | } | ||
+ | |||
+ | if(utr_ocl_GPU_context_exists(platform_index)){ | ||
+ | |||
+ | |||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // create the first kernel for GPU | ||
+ | kernel_index = 0; | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "mat_transp_1_kernel", "mat_transp_1.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | // create the second kernel for GPU | ||
+ | kernel_index = 1; | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "mat_transp_2_kernel", "mat_transp_2.cl", monitor); | ||
+ | |||
+ | | ||
+ | |||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | /*----------------EXECUTION PHASE----------------------*/ | ||
+ | int execute_kernels() | ||
+ | { | ||
+ | |||
+ | // for all operations indicate explicit info messages | ||
+ | int monitor = UTC_BASIC_INFO + 1; | ||
+ | |||
+ | int platform_index = utv_ocl_struct.current_platform_index; | ||
+ | utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index]; | ||
+ | |||
+ | int kernel_index; | ||
+ | int i,j,n; | ||
+ | |||
+ | double nr_access; | ||
+ | double t1,t2; | ||
+ | |||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\n------------Starting execution phase----------------\n"); | ||
+ | } | ||
+ | | ||
+ | // create matrices | ||
+ | n=WYMIAR; | ||
+ | float* A = (float *) malloc(ROZMIAR*sizeof(float)); | ||
+ | float* B = (float *) malloc(WYMIAR*sizeof(float)); | ||
+ | float* C = (float *) malloc(WYMIAR*sizeof(float)); | ||
+ | float* D = (float *) malloc(WYMIAR*sizeof(float)); | ||
+ | | ||
+ | for(i=0;i<ROZMIAR;i++) A[i]=1.0*i/10000000.0; | ||
+ | |||
+ | for(i=0;i<WYMIAR;i++) B[i]=1.0*i/10000000.0; | ||
+ | |||
+ | // mnozenie macierz wektor | ||
+ | for(i=0; i<WYMIAR; i++){ | ||
+ | for(j=0; j<WYMIAR; j++){ | ||
+ | D[i] += A[i*WYMIAR + j] * B[j]; | ||
+ | } | ||
+ | } | ||
+ | | ||
+ | nr_access= ROZMIAR + 2.0*WYMIAR; // read + write | ||
+ | | ||
+ | printf("mat_transp: nr_access %lf\n", nr_access); | ||
+ | | ||
+ | // get hardware characteristics to select good matrix shape | ||
+ | // the set of device characteristics stored in data structure | ||
+ | int device_index = 0; | ||
+ | utt_ocl_device_struct device_struct = | ||
+ | utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index]; | ||
+ | double global_mem_bytes = device_struct.global_mem_bytes; | ||
+ | double global_max_alloc = device_struct.global_max_alloc; | ||
+ | double local_mem_bytes = device_struct.local_mem_bytes; | ||
+ | double constant_mem_bytes = device_struct.constant_mem_bytes; | ||
+ | int max_num_comp_units = device_struct.max_num_comp_units; | ||
+ | int max_work_group_size = device_struct.max_work_group_size; | ||
+ | | ||
+ | // in a loop over devices (or for a selected device) | ||
+ | int idev=0; | ||
+ | for(idev=0; idev<platform_struct.number_of_devices; idev++){ | ||
+ | | ||
+ | // int device_type = ..... | ||
+ | // choose device_index | ||
+ | // int device_index = utr_ocl_select_device(platform_index, device_type); | ||
+ | int device_index = idev; | ||
+ | int device_type = utr_ocl_device_type(platform_index, device_index); | ||
+ | | ||
+ | if(device_index>0 && device_type==utr_ocl_device_type(platform_index, device_index-1)) break; | ||
+ | if(device_type == UTC_OCL_DEVICE_CPU) break; | ||
+ | |||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t0. restoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | // choose the kernel | ||
+ | kernel_index = 0; | ||
+ | cl_kernel kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | for(i=0;i<WYMIAR;i++) C[i]=0.0; | ||
+ | time_init(); t1 = time_clock(); | ||
+ | | ||
+ | // call routine to perform matrix transposition | ||
+ | mat_transp_host(kernel_index, A, B, C, n, context, kernel, command_queue); | ||
+ | | ||
+ | t2 = time_clock(); time_print(); | ||
+ | printf("GB/s = %lf\n\n", nr_access*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
+ | | ||
+ | | ||
+ | // verify result | ||
+ | verify_result(C, D); | ||
+ | | ||
+ | | ||
+ | } // end loop over devices | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | // Matrix transposition - Host code | ||
+ | // Matrix dimensions are assumed to be multiples of BLOCK_SIZE | ||
+ | int mat_transp_host( | ||
+ | int kernel_index, | ||
+ | float* A, | ||
+ | float* B, | ||
+ | float* C, | ||
+ | int N, | ||
+ | const cl_context context, | ||
+ | const cl_kernel mat_transp_kernel, | ||
+ | const cl_command_queue queue | ||
+ | ) | ||
+ | { | ||
+ | |||
+ | // Load A to device memory | ||
+ | size_t size_bytesm = N*N*sizeof(float); | ||
+ | size_t size_bytesw = N*sizeof(float); | ||
+ | | ||
+ | |||
+ | cl_mem d_A = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
+ | size_bytesm, NULL, NULL); | ||
+ | |||
+ | // Write A to device memory | ||
+ | clEnqueueWriteBuffer(queue, d_A, CL_TRUE, 0, size_bytesm, A, 0, 0, 0); | ||
+ | |||
+ | cl_mem d_B = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
+ | size_bytesw, NULL, NULL); | ||
+ | |||
+ | // Write A to device memory | ||
+ | clEnqueueWriteBuffer(queue, d_B, CL_TRUE, 0, size_bytesw, B, 0, 0, 0); | ||
+ | |||
+ | // Allocate B in device memory | ||
+ | cl_mem d_C = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size_bytesw, NULL, NULL); | ||
+ | |||
+ | |||
+ | // Invoke kernel | ||
+ | clSetKernelArg(mat_transp_kernel, 0, sizeof(cl_mem), (void*)&d_A); | ||
+ | clSetKernelArg(mat_transp_kernel, 1, sizeof(cl_mem), (void*)&d_B); | ||
+ | clSetKernelArg(mat_transp_kernel, 2, sizeof(cl_mem), (void*)&d_C); | ||
+ | clSetKernelArg(mat_transp_kernel, 3, sizeof(int), (void*)&N); | ||
+ | |||
+ | size_t localWorkSize[3]; | ||
+ | size_t globalWorkSize[3]; | ||
+ | cl_uint work_dim; | ||
+ | |||
+ | if(kernel_index==3){ | ||
+ | |||
+ | work_dim = 2; | ||
+ | localWorkSize[0] = BLOCK_SIZE; | ||
+ | globalWorkSize[0] = N/MULT; | ||
+ | localWorkSize[1] = BLOCK_SIZE; | ||
+ | globalWorkSize[1] = N/MULT; | ||
+ | localWorkSize[2] = 0; | ||
+ | globalWorkSize[2] = 0; | ||
+ | |||
+ | |||
+ | } | ||
+ | else{ | ||
+ | |||
+ | work_dim = 2; | ||
+ | localWorkSize[0] = BLOCK_SIZE; | ||
+ | globalWorkSize[0] = sqrt(N); | ||
+ | localWorkSize[1] = BLOCK_SIZE; | ||
+ | globalWorkSize[1] = sqrt(N); | ||
+ | localWorkSize[2] = 0; | ||
+ | globalWorkSize[2] = 0; | ||
+ | | ||
+ | } | ||
+ | |||
+ | clFinish(queue); | ||
+ | double t1 = time_clock(); | ||
+ | // Enqueue a kernel run call | ||
+ | cl_event ndrEvt; | ||
+ | clEnqueueNDRangeKernel(queue, mat_transp_kernel, work_dim, 0, | ||
+ | globalWorkSize, localWorkSize, 0, 0, &ndrEvt); | ||
+ | clWaitForEvents(1, &ndrEvt); | ||
+ | clFinish(queue); | ||
+ | double t2 = time_clock(); | ||
+ | // Calculate performance | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | | ||
+ | // Get kernel profiling info | ||
+ | clGetEventProfilingInfo(ndrEvt, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(ndrEvt, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | double time = (double)endTime - (double)startTime; | ||
+ | printf("\nKernel execution internal: time %lf, GB/s = %lf\n", | ||
+ | time*1.0e-9, 2.0*N*N*sizeof(float)/(time*1.0e-9)/1024/1024/1024); | ||
+ | |||
+ | printf("Kernel execution external: time %lf, GB/s = %lf\n\n", | ||
+ | t2-t1, 2.0*N*N*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
+ | |||
+ | // Read B from device memory | ||
+ | clEnqueueReadBuffer(queue, d_C, CL_TRUE, 0, size_bytesw, C, 0, 0, 0); | ||
+ | |||
+ | // Free device memory | ||
+ | clReleaseMemObject(d_A); | ||
+ | clReleaseMemObject(d_B); | ||
+ | clReleaseMemObject(d_C); | ||
+ | |||
+ | return(0); | ||
+ | |||
+ | }</code> | ||
+ | ==== Kod wogu ==== | ||
+ | === server.c ==== | ||
+ | <code c> | ||
+ | /* | ||
+ | C socket server example, handles multiple clients using threads | ||
+ | Compile | ||
+ | gcc server.c -lpthread -o server | ||
+ | */ | ||
+ | |||
+ | #include<stdio.h> | ||
+ | #include<string.h> //strlen | ||
+ | #include<stdlib.h> //strlen | ||
+ | #include<sys/socket.h> | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | #include<unistd.h> //write | ||
+ | #include<pthread.h> //for threading , link with lpthread | ||
+ | #include <unistd.h> | ||
+ | |||
+ | //the thread function | ||
+ | void *connection_handler(void *); | ||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | int counter = 0; | ||
+ | pid_t pid = fork(); | ||
+ | |||
+ | if (pid == 0) | ||
+ | { | ||
+ | int sock; | ||
+ | struct sockaddr_in server; | ||
+ | char message[1000] , server_reply[2000]; | ||
+ | int counter; | ||
+ | unsigned int ip[] = { 192, 168, 102, 1 }; | ||
+ | char* ip_final; | ||
+ | char *buf; | ||
+ | size_t sz; | ||
+ | |||
+ | //Create socket | ||
+ | |||
+ | for ( counter = 0; counter < 255; ++counter ) { | ||
+ | |||
+ | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (sock == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //printf("192.168.102.%d", counter); | ||
+ | |||
+ | sz = snprintf(NULL, 0, "192.168.102.%d", counter); | ||
+ | buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */ | ||
+ | | ||
+ | server.sin_addr.s_addr = inet_addr(buf); | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_port = htons( 65001 ); | ||
+ | |||
+ | //Connect to remote server | ||
+ | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | perror("connect failed. Error"); | ||
+ | continue; | ||
+ | } | ||
+ | |||
+ | puts("Connected\n"); | ||
+ | |||
+ | //keep communicating with server | ||
+ | while(1) | ||
+ | { | ||
+ | //Send some data | ||
+ | if( send(sock , "Werset 6-client" , 15 , 0) < 0) | ||
+ | { | ||
+ | puts("Send failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Receive a reply from the server | ||
+ | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
+ | { | ||
+ | puts("recv failed"); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | puts(server_reply); | ||
+ | |||
+ | break; | ||
+ | } | ||
+ | |||
+ | close(sock); | ||
+ | } | ||
+ | } | ||
+ | else if (pid > 0) | ||
+ | { | ||
+ | int socket_desc , client_sock , c; | ||
+ | struct sockaddr_in server , client; | ||
+ | |||
+ | //Create socket | ||
+ | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (socket_desc == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //Prepare the sockaddr_in structure | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_addr.s_addr = INADDR_ANY; | ||
+ | server.sin_port = htons( 65001 ); | ||
+ | |||
+ | //Bind | ||
+ | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | //print the error message | ||
+ | perror("bind failed. Error"); | ||
+ | return 1; | ||
+ | } | ||
+ | puts("bind done"); | ||
+ | |||
+ | //Listen | ||
+ | listen(socket_desc , 3); | ||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | |||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | pthread_t thread_id; | ||
+ | |||
+ | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
+ | { | ||
+ | puts("Connection accepted"); | ||
+ | |||
+ | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
+ | { | ||
+ | perror("could not create thread"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Now join the thread , so that we dont terminate before the thread | ||
+ | //pthread_join( thread_id , NULL); | ||
+ | puts("Handler assigned"); | ||
+ | } | ||
+ | |||
+ | if (client_sock < 0) | ||
+ | { | ||
+ | perror("accept failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | // fork failed | ||
+ | printf("fork() failed!\n"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * This will handle connection for each client | ||
+ | * */ | ||
+ | void *connection_handler(void *socket_desc) | ||
+ | { | ||
+ | //Get the socket descriptor | ||
+ | int sock = *(int*)socket_desc; | ||
+ | int read_size; | ||
+ | char *message , client_message[2000]; | ||
+ | |||
+ | //Receive a message from client | ||
+ | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
+ | { | ||
+ | printf("%s\n", client_message); | ||
+ | |||
+ | //end of string marker | ||
+ | client_message[read_size] = '\0'; | ||
+ | |||
+ | //Send the message back to client | ||
+ | write(sock , "Werset 6" , 8); | ||
+ | |||
+ | //clear the message buffer | ||
+ | memset(client_message, 0, 2000); | ||
+ | } | ||
+ | |||
+ | if(read_size == 0) | ||
+ | { | ||
+ | puts("Client disconnected"); | ||
+ | fflush(stdout); | ||
+ | } | ||
+ | else if(read_size == -1) | ||
+ | { | ||
+ | perror("recv failed"); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | }</code> | ||
+ | === Makefile === | ||
+ | <code bash># kompilator c | ||
+ | CC = gcc | ||
+ | |||
+ | # konsolidator | ||
+ | CFLAGS=-g | ||
+ | |||
+ | all: server client | ||
+ | |||
+ | server: server.o | ||
+ | $(CC) $(LFLAGS) server.o -lpthread -o server | ||
+ | |||
+ | server.o: server.c | ||
+ | $(CC) -c server.c -o server.o | ||
+ | |||
+ | client: client.o | ||
+ | $(CC) client.o -o client | ||
+ | |||
+ | client.o: client.c | ||
+ | $(CC) -c client.c -o client.o | ||
+ | |||
+ | clean: | ||
+ | rm -f *.o</code> | ||
+ | ===== Zajęcia 9 ===== | ||
+ | <code c>/* | ||
+ | C socket server example, handles multiple clients using threads | ||
+ | Compile | ||
+ | gcc server.c -lpthread -o server | ||
+ | */ | ||
+ | |||
+ | #include<stdio.h> | ||
+ | #include<string.h> //strlen | ||
+ | #include<stdlib.h> //strlen | ||
+ | #include<sys/socket.h> | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | #include<unistd.h> //write | ||
+ | #include<pthread.h> //for threading , link with lpthread | ||
+ | |||
+ | //the thread function | ||
+ | void *connection_handler(void *); | ||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | int socket_desc , client_sock , c; | ||
+ | struct sockaddr_in server , client; | ||
+ | |||
+ | //Create socket | ||
+ | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (socket_desc == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //Prepare the sockaddr_in structure | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_addr.s_addr = INADDR_ANY; | ||
+ | server.sin_port = htons( 8888 ); | ||
+ | |||
+ | //Bind | ||
+ | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | //print the error message | ||
+ | perror("bind failed. Error"); | ||
+ | return 1; | ||
+ | } | ||
+ | puts("bind done"); | ||
+ | |||
+ | //Listen | ||
+ | listen(socket_desc , 3); | ||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | |||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | pthread_t thread_id; | ||
+ | |||
+ | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
+ | { | ||
+ | puts("Connection accepted"); | ||
+ | |||
+ | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
+ | { | ||
+ | perror("could not create thread"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Now join the thread , so that we dont terminate before the thread | ||
+ | //pthread_join( thread_id , NULL); | ||
+ | puts("Handler assigned"); | ||
+ | } | ||
+ | |||
+ | if (client_sock < 0) | ||
+ | { | ||
+ | perror("accept failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * This will handle connection for each client | ||
+ | * */ | ||
+ | void *connection_handler(void *socket_desc) | ||
+ | { | ||
+ | //Get the socket descriptor | ||
+ | int sock = *(int*)socket_desc; | ||
+ | int read_size; | ||
+ | char *message , client_message[2000]; | ||
+ | |||
+ | //Receive a message from client | ||
+ | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
+ | { | ||
+ | //end of string marker | ||
+ | client_message[read_size] = '\0'; | ||
+ | |||
+ | //Send the message back to client | ||
+ | write(sock , "Werset 6" , strlen(client_message)); | ||
+ | |||
+ | //clear the message buffer | ||
+ | memset(client_message, 0, 2000); | ||
+ | } | ||
+ | |||
+ | if(read_size == 0) | ||
+ | { | ||
+ | puts("Client disconnected"); | ||
+ | fflush(stdout); | ||
+ | } | ||
+ | else if(read_size == -1) | ||
+ | { | ||
+ | perror("recv failed"); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } </code> | ||
+ | |||
+ | ==== Client C ==== | ||
+ | |||
+ | <code c>#include<stdio.h> //printf | ||
+ | #include<string.h> //strlen | ||
+ | #include<sys/socket.h> //socket | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | int sock; | ||
+ | struct sockaddr_in server; | ||
+ | char message[1000] , server_reply[2000]; | ||
+ | |||
+ | //Create socket | ||
+ | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (sock == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | server.sin_addr.s_addr = inet_addr("127.0.0.1"); | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_port = htons( 8888 ); | ||
+ | |||
+ | //Connect to remote server | ||
+ | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | perror("connect failed. Error"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | puts("Connected\n"); | ||
+ | |||
+ | //keep communicating with server | ||
+ | while(1) | ||
+ | { | ||
+ | printf("Enter message : "); | ||
+ | scanf("%s" , message); | ||
+ | |||
+ | //Send some data | ||
+ | if( send(sock , message , strlen(message) , 0) < 0) | ||
+ | { | ||
+ | puts("Send failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Receive a reply from the server | ||
+ | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
+ | { | ||
+ | puts("recv failed"); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | puts("Server reply :"); | ||
+ | puts(server_reply); | ||
+ | } | ||
+ | |||
+ | close(sock); | ||
+ | return 0; | ||
+ | }</code> | ||
+ | |||
+ | |||
+ | ==== Kod Maciek Gonzo itp ==== | ||
+ | <code c> | ||
+ | #include<stdio.h> | ||
+ | #include<string.h> //strlen | ||
+ | #include<stdlib.h> //strlen | ||
+ | #include<sys/socket.h> | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | #include<unistd.h> //write | ||
+ | #include<pthread.h> //for threading , link with lpthread | ||
+ | |||
+ | //the thread function | ||
+ | void *connection_handler(void *); | ||
+ | |||
+ | void *client_function(void *socket_desc){ | ||
+ | int sock; | ||
+ | struct sockaddr_in server_id; | ||
+ | char message[1000] , server_reply[2000]; | ||
+ | |||
+ | //Create socket | ||
+ | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (sock == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | // 192.168.102.63 wogu | ||
+ | server_id.sin_addr.s_addr = inet_addr("localhost"); | ||
+ | server_id.sin_family = AF_INET; | ||
+ | server_id.sin_port = htons( 65000 ); | ||
+ | |||
+ | //Connect to remote server | ||
+ | |||
+ | if (connect(sock , (struct sockaddr *)&server_id , sizeof(server_id)) < 0) | ||
+ | { | ||
+ | perror("connect failed. Error"); | ||
+ | |||
+ | }else{ | ||
+ | |||
+ | puts("Connected\n"); | ||
+ | |||
+ | //keep communicating with server | ||
+ | while(1) | ||
+ | { | ||
+ | printf("Enter message : "); | ||
+ | scanf("%s" , message); | ||
+ | |||
+ | //Send some data | ||
+ | if( send(sock , message , strlen(message) , 0) < 0) | ||
+ | { | ||
+ | puts("Send failed"); | ||
+ | |||
+ | } | ||
+ | |||
+ | //Receive a reply from the server | ||
+ | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
+ | { | ||
+ | puts("recv failed"); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | puts("Server reply :"); | ||
+ | puts(server_reply); | ||
+ | } | ||
+ | |||
+ | close(sock); | ||
+ | } | ||
+ | |||
+ | }; | ||
+ | |||
+ | |||
+ | void *server_function(void * sock){ | ||
+ | printf("SERVER Start"); | ||
+ | fflush(stdout); | ||
+ | int socket_desc , client_sock , c; | ||
+ | pthread_t thread_id; | ||
+ | struct sockaddr_in server , client; | ||
+ | |||
+ | //Create socket | ||
+ | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (socket_desc == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //Prepare the sockaddr_in structure | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_addr.s_addr = INADDR_ANY; | ||
+ | server.sin_port = htons( 65000); | ||
+ | |||
+ | //Bind | ||
+ | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | //print the error message | ||
+ | perror("bind failed. Error"); | ||
+ | return; | ||
+ | } | ||
+ | puts("bind done"); | ||
+ | |||
+ | |||
+ | |||
+ | //Listen | ||
+ | listen(socket_desc , 3); | ||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | fflush(stdout); | ||
+ | |||
+ | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
+ | { | ||
+ | puts("Connection accepted"); | ||
+ | |||
+ | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
+ | { | ||
+ | perror("could not create thread"); | ||
+ | return; | ||
+ | } | ||
+ | |||
+ | //Now join the thread , so that we dont terminate before the thread | ||
+ | //pthread_join( thread_id , NULL); | ||
+ | puts("Handler assigned"); | ||
+ | } | ||
+ | |||
+ | if (client_sock < 0) | ||
+ | { | ||
+ | perror("accept failed"); | ||
+ | return; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | pthread_t thread_id; | ||
+ | //Create server | ||
+ | if( pthread_create( &thread_id , NULL , server_function , NULL) != 0) | ||
+ | { | ||
+ | perror("could not create server thread"); | ||
+ | fflush(stdout); | ||
+ | return 1; | ||
+ | } | ||
+ | pthread_t thread_id2; | ||
+ | |||
+ | // Create client | ||
+ | if( pthread_create( &thread_id2 , NULL , client_function , NULL) != 0) | ||
+ | { | ||
+ | perror("could not create client thread"); | ||
+ | fflush(stdout); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * This will handle connection for each client | ||
+ | * */ | ||
+ | void *connection_handler(void *socket_desc) | ||
+ | { | ||
+ | //Get the socket descriptor | ||
+ | int sock = *(int*)socket_desc; | ||
+ | int read_size; | ||
+ | char *message , client_message[2000]; | ||
+ | |||
+ | //Receive a message from client | ||
+ | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
+ | { | ||
+ | //end of string marker | ||
+ | client_message[read_size] = '\0'; | ||
+ | |||
+ | //Send the message back to client | ||
+ | write(sock , "Werset 5" , strlen(client_message)); | ||
+ | |||
+ | //clear the message buffer | ||
+ | memset(client_message, 0, 2000); | ||
+ | } | ||
+ | |||
+ | if(read_size == 0) | ||
+ | { | ||
+ | puts("Client disconnected"); | ||
+ | fflush(stdout); | ||
+ | } | ||
+ | else if(read_size == -1) | ||
+ | { | ||
+ | perror("recv failed"); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | </code> |