Różnice między wybraną wersją a wersją aktualną.
Poprzednia rewizja po obu stronach Poprzednia wersja Nowa wersja | Poprzednia wersja | ||
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/03/13 21:31] wogu |
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna) 149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c] |
||
---|---|---|---|
Linia 774: | Linia 774: | ||
return 0; | return 0; | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | struct rekord wys; | ||
+ | MPI_Datatype rekord_typ; | ||
+ | int tab_dlug_blokow[3] = {1, 1, 1}; | ||
+ | MPI_Datatype tab_typow[3] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT}; | ||
+ | MPI_Aint podstawa, tab_odstepow[3]; | ||
+ | |||
+ | MPI_Get_address(&wys.x_min, &tab_odstepow[0]); | ||
+ | MPI_Get_address(&wys.x_max, &tab_odstepow[1]); | ||
+ | MPI_Get_address(&wys.n, &tab_odstepow[2]); | ||
+ | |||
+ | MPI_Type_struct(3, tab_dlug_blokow, tab_odstepow, tab_typow, &rekord_typ); | ||
+ | MPI_Type_commit(&rekord_typ); | ||
+ | MPI_Recv ( &wys, 1, rekord_typ, 0, 1, parentcomm, &status ); | ||
+ | </code> | ||
+ | |||
+ | ====== Zajęcia 4 ====== | ||
+ | wkrótce | ||
+ | ====== Zajęcia 5 ====== | ||
+ | ==== Makefile ==== | ||
+ | <code bash># optimization and other system dependent options | ||
+ | #include make.$(SRR_ARCH) | ||
+ | # or directly | ||
+ | include make.lab_404_NVIDIA | ||
+ | |||
+ | NAME = Hello_GPU | ||
+ | |||
+ | program: main.o | ||
+ | $(CC) $(LDFL) main.o $(LIB) -o $(NAME) | ||
+ | |||
+ | main.o: main.c | ||
+ | $(CC) $(CFL) -c main.c $(INC) -o main.o | ||
+ | |||
+ | clean: | ||
+ | rm -f obj/* | ||
+ | rm -f $(NAME) | ||
+ | </code> | ||
+ | ==== make.lab_404_NVIDIA ==== | ||
+ | <code bash># C compiler | ||
+ | #CC = icc | ||
+ | CC = gcc | ||
+ | |||
+ | # C++ compiler | ||
+ | CPPC = icpc | ||
+ | #CPPC = g++ | ||
+ | |||
+ | # Loader (to link C/C++ and Fortran libraries) | ||
+ | LD = icpc | ||
+ | #LD = g++ | ||
+ | |||
+ | # Archiver | ||
+ | AR = ar r | ||
+ | |||
+ | # For removing files | ||
+ | RM = rm -f | ||
+ | |||
+ | # Include directories | ||
+ | INC = -I/opt/cuda7/include | ||
+ | |||
+ | # Standard and/or local libraries | ||
+ | LIB = -L/opt/cuda7/lib64 -lOpenCL | ||
+ | |||
+ | # C optimization and other flags | ||
+ | #CFL(icc) = -O3 -openmp | ||
+ | #CFL(gcc) = -O3 -fopenmp | ||
+ | #CFL(icc,debug) = -g | ||
+ | #CFL(gcc) = -g | ||
+ | CFL = | ||
+ | |||
+ | # Loader optimization and other flags | ||
+ | #LDFL(debug) = -g | ||
+ | #LDFL(profile) = -p | ||
+ | LDFL = | ||
+ | |||
+ | </code> | ||
+ | ==== main.c ==== | ||
+ | <code c>#include<stdlib.h> | ||
+ | #include<stdio.h> | ||
+ | |||
+ | #include <CL/cl.h> | ||
+ | |||
+ | // functions to display platform and device properties | ||
+ | void DisplayPlatformInfo( | ||
+ | cl_platform_id id, | ||
+ | cl_platform_info name, | ||
+ | char* str) | ||
+ | { | ||
+ | cl_int retval; | ||
+ | size_t paramValueSize; | ||
+ | | ||
+ | retval = clGetPlatformInfo( | ||
+ | id, | ||
+ | name, | ||
+ | 0, | ||
+ | NULL, | ||
+ | ¶mValueSize); | ||
+ | if (retval != CL_SUCCESS){ | ||
+ | printf("Failed to find OpenCL platform %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | | ||
+ | char * info = (char *)malloc(sizeof(char) * paramValueSize); | ||
+ | retval = clGetPlatformInfo( | ||
+ | id, | ||
+ | name, | ||
+ | paramValueSize, | ||
+ | info, | ||
+ | NULL); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL platform %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | | ||
+ | printf("\t%s:\t%s\n", str, info ); | ||
+ | free(info); | ||
+ | } | ||
+ | |||
+ | void DisplayDeviceInfo_char( | ||
+ | cl_device_id id, | ||
+ | cl_device_info name, | ||
+ | char* str) | ||
+ | { | ||
+ | cl_int retval; | ||
+ | size_t paramValueSize; | ||
+ | | ||
+ | retval = clGetDeviceInfo( | ||
+ | id, | ||
+ | name, | ||
+ | 0, | ||
+ | NULL, | ||
+ | ¶mValueSize); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL device info %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | | ||
+ | char * info = (char *)malloc(sizeof(char) * paramValueSize); | ||
+ | retval = clGetDeviceInfo( | ||
+ | id, | ||
+ | name, | ||
+ | paramValueSize, | ||
+ | info, | ||
+ | NULL); | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL device info %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | |||
+ | printf("\t\t%s:\t%s\n", str, info ); | ||
+ | free(info); | ||
+ | }; | ||
+ | |||
+ | void DisplayDeviceInfo_ulong( | ||
+ | cl_device_id id, | ||
+ | cl_device_info name, | ||
+ | char* str) | ||
+ | { | ||
+ | cl_int retval; | ||
+ | size_t paramValueSize;// = sizeof(cl_ulong); | ||
+ | | ||
+ | retval = clGetDeviceInfo( | ||
+ | id, | ||
+ | name, | ||
+ | 0, | ||
+ | NULL, | ||
+ | ¶mValueSize); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL device info %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | | ||
+ | cl_ulong * info = (cl_ulong *)malloc(sizeof(cl_ulong) * paramValueSize); | ||
+ | retval = clGetDeviceInfo( | ||
+ | id, | ||
+ | name, | ||
+ | paramValueSize, | ||
+ | info, | ||
+ | NULL); | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL device info %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | |||
+ | printf("\t\t%s:\t%lu MB\n", str, info[0] / 1024 / 1024 ); | ||
+ | free(info); | ||
+ | }; | ||
+ | |||
+ | void DisplayDeviceInfo_uint( | ||
+ | cl_device_id id, | ||
+ | cl_device_info name, | ||
+ | char* str) | ||
+ | { | ||
+ | cl_int retval; | ||
+ | size_t paramValueSize;// = sizeof(cl_ulong); | ||
+ | | ||
+ | retval = clGetDeviceInfo( | ||
+ | id, | ||
+ | name, | ||
+ | 0, | ||
+ | NULL, | ||
+ | ¶mValueSize); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL device info %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | | ||
+ | cl_uint * info = (cl_uint *)malloc(sizeof(cl_uint) * paramValueSize); | ||
+ | retval = clGetDeviceInfo( | ||
+ | id, | ||
+ | name, | ||
+ | paramValueSize, | ||
+ | info, | ||
+ | NULL); | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to find OpenCL device info %s.\n", str); | ||
+ | return; | ||
+ | } | ||
+ | |||
+ | printf("\t\t%s:\t%d MHz\n", str, info[0] ); | ||
+ | free(info); | ||
+ | }; | ||
+ | |||
+ | |||
+ | // main program controlling execution of CPU code and OpenCL kernels | ||
+ | int main(int argc, char** argv) | ||
+ | { | ||
+ | cl_uint number_of_contexts = 2; | ||
+ | cl_context context = NULL; | ||
+ | cl_context list_of_contexts[2] = {0,0}; | ||
+ | cl_command_queue commandQueue = 0; | ||
+ | cl_program program = 0; | ||
+ | cl_uint number_of_devices; | ||
+ | cl_device_id device = 0; | ||
+ | cl_device_id *list_of_devices; | ||
+ | cl_device_type type; | ||
+ | cl_kernel kernel = 0; | ||
+ | cl_mem memObjects[3] = { 0, 0, 0 }; | ||
+ | cl_int retval; | ||
+ | int icon, idev; | ||
+ | cl_uint numPlatforms; | ||
+ | cl_platform_id * platformIds; | ||
+ | cl_uint i,j; | ||
+ | |||
+ | // flag to control displaying | ||
+ | int Monitor = 1; | ||
+ | |||
+ | |||
+ | // Create OpenCL contexts | ||
+ | |||
+ | // First, query the total number of platforms | ||
+ | retval = clGetPlatformIDs(0, (cl_platform_id *) NULL, &numPlatforms); | ||
+ | |||
+ | // Next, allocate memory for the installed plaforms, and qeury | ||
+ | // to get the list. | ||
+ | platformIds = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms); | ||
+ | |||
+ | // Then, query the platform IDs | ||
+ | retval = clGetPlatformIDs(numPlatforms, platformIds, NULL); | ||
+ | |||
+ | if(Monitor>=0){ | ||
+ | printf("Number of platforms: \t%d\n", numPlatforms); | ||
+ | } | ||
+ | |||
+ | // Iterate through the list of platforms displaying associated information | ||
+ | for (i = 0; i < numPlatforms; i++) { | ||
+ | |||
+ | if(Monitor>0){ | ||
+ | |||
+ | printf("Platform ID - %d\n",i); | ||
+ | // First we display information associated with the platform | ||
+ | DisplayPlatformInfo( | ||
+ | platformIds[i], | ||
+ | CL_PLATFORM_NAME, | ||
+ | "CL_PLATFORM_NAME"); | ||
+ | DisplayPlatformInfo( | ||
+ | platformIds[i], | ||
+ | CL_PLATFORM_PROFILE, | ||
+ | "CL_PLATFORM_PROFILE"); | ||
+ | DisplayPlatformInfo( | ||
+ | platformIds[i], | ||
+ | CL_PLATFORM_VERSION, | ||
+ | "CL_PLATFORM_VERSION"); | ||
+ | DisplayPlatformInfo( | ||
+ | platformIds[i], | ||
+ | CL_PLATFORM_VENDOR, | ||
+ | "CL_PLATFORM_VENDOR"); | ||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | // For the first platform | ||
+ | int iplat; | ||
+ | printf("Select Platform ID: "); scanf("%d", &iplat); | ||
+ | |||
+ | // Query the set of devices associated with the platform | ||
+ | retval = clGetDeviceIDs( | ||
+ | platformIds[iplat], | ||
+ | CL_DEVICE_TYPE_ALL, | ||
+ | 0, | ||
+ | NULL, | ||
+ | &number_of_devices); | ||
+ | | ||
+ | | ||
+ | list_of_devices = | ||
+ | (cl_device_id *) malloc (sizeof(cl_device_id) * number_of_devices); | ||
+ | | ||
+ | retval = clGetDeviceIDs( | ||
+ | platformIds[iplat], | ||
+ | CL_DEVICE_TYPE_ALL, | ||
+ | number_of_devices, | ||
+ | list_of_devices, | ||
+ | NULL); | ||
+ | | ||
+ | if(Monitor>=0){ | ||
+ | printf("Number of devices: \t%d\n", number_of_devices); | ||
+ | } | ||
+ | |||
+ | // Iterate through each device, displaying associated information | ||
+ | for (j = 0; j < number_of_devices; j++) { | ||
+ | | ||
+ | clGetDeviceInfo(list_of_devices[j], CL_DEVICE_TYPE, | ||
+ | sizeof(cl_device_type), &type, NULL); | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | | ||
+ | DisplayDeviceInfo_char( | ||
+ | list_of_devices[j], | ||
+ | CL_DEVICE_NAME, | ||
+ | "CL_DEVICE_NAME"); | ||
+ | | ||
+ | DisplayDeviceInfo_char( | ||
+ | list_of_devices[j], | ||
+ | CL_DEVICE_VENDOR, | ||
+ | "CL_DEVICE_VENDOR"); | ||
+ | | ||
+ | DisplayDeviceInfo_char( | ||
+ | list_of_devices[j], | ||
+ | CL_DEVICE_VERSION, | ||
+ | "CL_DEVICE_VERSION"); | ||
+ | |||
+ | DisplayDeviceInfo_ulong( | ||
+ | list_of_devices[j], | ||
+ | CL_DEVICE_GLOBAL_MEM_SIZE, | ||
+ | "CL_DEVICE_GLOBAL_MEM_SIZE"); | ||
+ | |||
+ | DisplayDeviceInfo_uint( | ||
+ | list_of_devices[j], | ||
+ | CL_DEVICE_MAX_CLOCK_FREQUENCY, | ||
+ | "CL_DEVICE_MAX_CLOCK_FREQUENCY"); | ||
+ | printf("\n"); | ||
+ | } | ||
+ | } | ||
+ | | ||
+ | // Next, create OpenCL contexts on platforms | ||
+ | cl_context_properties contextProperties[] = { | ||
+ | CL_CONTEXT_PLATFORM, | ||
+ | (cl_context_properties)platformIds[iplat], | ||
+ | 0 | ||
+ | }; | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Creating CPU context %d on platform %d\n", 1, iplat); | ||
+ | } | ||
+ | | ||
+ | list_of_contexts[1] = | ||
+ | clCreateContextFromType(contextProperties, | ||
+ | CL_DEVICE_TYPE_CPU, NULL, NULL, &retval); | ||
+ | | ||
+ | if(Monitor>=0 && retval != CL_SUCCESS){ | ||
+ | printf("Could not create CPU context on platform %d\n", i); | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Creating GPU context 0 on platform %d\n", iplat); | ||
+ | } | ||
+ | | ||
+ | list_of_contexts[0] = | ||
+ | clCreateContextFromType(contextProperties, | ||
+ | CL_DEVICE_TYPE_GPU, NULL, NULL, &retval); | ||
+ | | ||
+ | if(Monitor>=0 && retval != CL_SUCCESS){ | ||
+ | printf("Could not create GPU context on platform %d\n", i); | ||
+ | } | ||
+ | | ||
+ | // in a loop over devices of the seleceted platform | ||
+ | for(idev=0; idev<number_of_devices;idev++){ | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("\nFor context %d and device %d:\n", | ||
+ | idev, idev); | ||
+ | } | ||
+ | device = list_of_devices[idev]; | ||
+ | icon = idev; | ||
+ | | ||
+ | // choose OpenCL context on first available platform | ||
+ | context = list_of_contexts[icon]; | ||
+ | | ||
+ | if(context !=0){ | ||
+ | | ||
+ | commandQueue = clCreateCommandQueue(context, device, 0, NULL); | ||
+ | if (commandQueue == NULL) { | ||
+ | printf("Failed to create commandQueue for device %d\n", idev); | ||
+ | exit(0); | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Reading program from source\n"); | ||
+ | } | ||
+ | |||
+ | // read source code from file | ||
+ | FILE *fp; | ||
+ | char* source; | ||
+ | long int size; | ||
+ | | ||
+ | fp = fopen("HelloWorld.cl", "rb"); | ||
+ | if(!fp) { | ||
+ | printf("Could not open kernel file\n"); | ||
+ | exit(-1); | ||
+ | } | ||
+ | int status = fseek(fp, 0, SEEK_END); | ||
+ | if(status != 0) { | ||
+ | printf("Error seeking to end of file\n"); | ||
+ | exit(-1); | ||
+ | } | ||
+ | size = ftell(fp); | ||
+ | if(size < 0) { | ||
+ | printf("Error getting file position\n"); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | rewind(fp); | ||
+ | | ||
+ | source = (char *)malloc(size + 1); | ||
+ | | ||
+ | int i; | ||
+ | for (i = 0; i < size+1; i++) { | ||
+ | source[i]='\0'; | ||
+ | } | ||
+ | | ||
+ | if(source == NULL) { | ||
+ | printf("Error allocating space for the kernel source\n"); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | fread(source, 1, size, fp); | ||
+ | source[size] = '\0'; | ||
+ | |||
+ | const char* source_const = source; | ||
+ | cl_program program = clCreateProgramWithSource(context, 1, | ||
+ | &source_const, | ||
+ | NULL, NULL); | ||
+ | if (program == NULL) | ||
+ | { | ||
+ | printf("Failed to create CL program from source.\n"); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Creating program and kernel\n"); | ||
+ | } | ||
+ | // build program (passing options to compiler if necessary | ||
+ | retval = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); | ||
+ | char* buildLog; size_t size_of_buildLog; | ||
+ | clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, | ||
+ | 0, NULL, &size_of_buildLog); | ||
+ | buildLog = malloc(size_of_buildLog+1); | ||
+ | clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, | ||
+ | size_of_buildLog, buildLog, NULL); | ||
+ | buildLog[size_of_buildLog]= '\0'; | ||
+ | printf("Kernel buildLog: %s\n", buildLog); | ||
+ | if (retval != CL_SUCCESS) | ||
+ | { | ||
+ | printf("Error in kernel\n"); | ||
+ | clReleaseProgram(program); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | // Create OpenCL kernel | ||
+ | kernel = clCreateKernel(program, "hello_kernel", NULL); | ||
+ | if (kernel == NULL) | ||
+ | { | ||
+ | printf("Failed to create kernel.\n"); | ||
+ | exit(0); | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Creating memory objects\n"); | ||
+ | } | ||
+ | // Create memory objects that will be used as arguments to | ||
+ | // kernel. First create host memory arrays that will be | ||
+ | // used to store the arguments to the kernel | ||
+ | float result[1]; | ||
+ | float a[1]; | ||
+ | float b[1]; | ||
+ | a[0] = 2; | ||
+ | b[0] = 2; | ||
+ | | ||
+ | memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY , | ||
+ | sizeof(float), NULL, NULL); | ||
+ | memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY , | ||
+ | sizeof(float), NULL, NULL); | ||
+ | memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, | ||
+ | sizeof(float), NULL, NULL); | ||
+ | | ||
+ | if (memObjects[0]==NULL || memObjects[1]==NULL || memObjects[2]==NULL){ | ||
+ | printf("Error creating memory objects.\n"); | ||
+ | return 0; | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Sending kernel arguments\n"); | ||
+ | } | ||
+ | retval = clEnqueueWriteBuffer( | ||
+ | commandQueue, | ||
+ | memObjects[0], | ||
+ | CL_FALSE, | ||
+ | 0, | ||
+ | sizeof(float), | ||
+ | a, | ||
+ | 0, | ||
+ | NULL, | ||
+ | NULL); | ||
+ | | ||
+ | // Use clEnqueueWriteBuffer() to write input array B to | ||
+ | // the device buffer bufferB | ||
+ | retval = clEnqueueWriteBuffer( | ||
+ | commandQueue, | ||
+ | memObjects[1], | ||
+ | CL_FALSE, | ||
+ | 0, | ||
+ | sizeof(float), | ||
+ | b, | ||
+ | 0, | ||
+ | NULL, | ||
+ | NULL); | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | if (retval != CL_SUCCESS) | ||
+ | { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, commandQueue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Running the kernel!\n"); | ||
+ | } | ||
+ | size_t globalWorkSize[1] = { 1 }; | ||
+ | size_t localWorkSize[1] = { 1 }; | ||
+ | | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, NULL); | ||
+ | if (retval != CL_SUCCESS) | ||
+ | { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, commandQueue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | if(Monitor>0){ | ||
+ | printf("Transfering back results\n"); | ||
+ | } | ||
+ | // Read the output buffer back to the Host | ||
+ | retval = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, | ||
+ | 0, sizeof(float), result, | ||
+ | 0, NULL, NULL); | ||
+ | if (retval != CL_SUCCESS) | ||
+ | { | ||
+ | printf("Failed to read result buffer.\n"); | ||
+ | //Cleanup(context, commandQueue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | // Verify the output | ||
+ | if(result[0]==4) { | ||
+ | printf("Output is correct: %lf + %lf = %lf\n", | ||
+ | a[0], b[0], result[0]); | ||
+ | } else { | ||
+ | printf("Output is incorrect: %lf + %lf != %lf\n", | ||
+ | a[0], b[0], result[0]); | ||
+ | } | ||
+ | | ||
+ | for (i = 0; i < 3; i++) | ||
+ | { | ||
+ | if (memObjects[i] != 0) | ||
+ | clReleaseMemObject(memObjects[i]); | ||
+ | } | ||
+ | if (commandQueue != 0) | ||
+ | clReleaseCommandQueue(commandQueue); | ||
+ | | ||
+ | if (kernel != 0) | ||
+ | clReleaseKernel(kernel); | ||
+ | | ||
+ | if (program != 0) | ||
+ | clReleaseProgram(program); | ||
+ | | ||
+ | } | ||
+ | } | ||
+ | | ||
+ | free(list_of_devices); | ||
+ | free(platformIds); | ||
+ | | ||
+ | return 0; | ||
+ | } | ||
+ | </code> | ||
+ | ==== HelloWorld.cl ==== | ||
+ | <code c> | ||
+ | __kernel void hello_kernel(__global const float *a, | ||
+ | __global const float *b, | ||
+ | __global float *result) | ||
+ | { | ||
+ | int gid = get_global_id(0); | ||
+ | |||
+ | if(gid<1) result[gid] = a[gid] + b[gid]; | ||
}</code> | }</code> | ||
+ | ===== Zajęcia 6 ===== | ||
+ | Pliki zadania: {{:studia:magisterskie:1sem:opencl_vecadd.tgz|}} | ||
+ | Działające 3 kernele: {{:studia:magisterskie:1sem:sala404.zip|}} | ||
+ | |||
+ | ==== opencl_vecadd/vecadd_host/execute_kernels.c ==== | ||
+ | <code c>#include<stdlib.h> | ||
+ | #include<stdio.h> | ||
+ | #include <math.h> | ||
+ | |||
+ | #include<omp.h> | ||
+ | #include <CL/cl.h> | ||
+ | |||
+ | #include"uth_ocl_intf.h" | ||
+ | |||
+ | #include"./vecadd_host.h" | ||
+ | #include"./hardware_params.h" | ||
+ | |||
+ | #define time_measurments | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | #include"uth_time_intf.h" | ||
+ | static double t_begin, t_end, t_total; | ||
+ | #endif | ||
+ | |||
+ | int execute_kernel_2_CPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // create the kernel for CPU | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. Restoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | int nr_cores_CPU = omp_get_num_procs(); | ||
+ | int nr_threads = nr_cores_CPU; // total number of threads | ||
+ | int work_group_size = 1; // number of threads in work-group | ||
+ | int nr_work_groups = nr_threads / work_group_size; | ||
+ | globalWorkSize[0] = nr_threads ; | ||
+ | localWorkSize[0] = work_group_size ; | ||
+ | | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | nr_cores_CPU, nr_cores_CPU, 1); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | work_group_size, nr_threads / work_group_size, | ||
+ | nr_threads / work_group_size / nr_cores_CPU); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
+ | globalWorkSize[0]/nr_cores_CPU); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
+ | | ||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | int size = array_size; | ||
+ | int size_per_thread = ceilf((float)size/nr_threads); | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | |||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_3_CPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for CPU | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_3_opt_blocks_kernel","vecadd_3_opt_blocks.cl",monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | int nr_cores_CPU = omp_get_num_procs(); | ||
+ | int nr_threads = nr_cores_CPU; // total number of threads | ||
+ | int work_group_size = 1; // number of threads in work-group | ||
+ | int nr_work_groups = nr_threads / work_group_size; | ||
+ | globalWorkSize[0] = nr_threads ; | ||
+ | localWorkSize[0] = work_group_size ; | ||
+ | | ||
+ | printf("\t\tExecuting kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | nr_cores_CPU, nr_cores_CPU, 1); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | work_group_size, nr_threads / work_group_size, | ||
+ | nr_threads / work_group_size / nr_cores_CPU); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
+ | globalWorkSize[0]/nr_cores_CPU); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | int size = array_size; | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | // tutaj modyfikujemy | ||
+ | int execute_kernel_0_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | // globalna liczba wątków i lokalna liczba wątków | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | // ustawiamy liczbę argumentów | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_1_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | /* | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
+ | |||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: */ | ||
+ | "vecadd_1_kernel", "vecadd_1.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size / 4; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_2_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: */ | ||
+ | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size / work_group_size; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | |||
+ | int size_per_thread = work_group_size; | ||
+ | int size = array_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | int execute_kernel_3_GPU( | ||
+ | int platform_index, | ||
+ | int device_index, | ||
+ | int kernel_index, | ||
+ | int array_size, | ||
+ | cl_mem* memObjects, | ||
+ | int monitor | ||
+ | ) | ||
+ | { | ||
+ | | ||
+ | cl_kernel kernel; | ||
+ | cl_event kernel_execution; | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | double exec_time; | ||
+ | cl_int retval; | ||
+ | | ||
+ | size_t globalWorkSize[1] = { 0 }; | ||
+ | size_t localWorkSize[1] = { 0 }; | ||
+ | | ||
+ | int i,j,k; | ||
+ | | ||
+ | int size = array_size; | ||
+ | | ||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the kernel for GPU | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "vecadd_3_opt_blocks_kernel", "vecadd_3_opt_blocks.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | | ||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | | ||
+ | int NDRange_dim = 1; | ||
+ | | ||
+ | int work_group_size = WORK_GROUP_SIZE; | ||
+ | globalWorkSize[0] = array_size; | ||
+ | localWorkSize[0] = work_group_size; | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
+ | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
+ | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
+ | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
+ | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
+ | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
+ | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
+ | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
+ | } | ||
+ | |||
+ | | ||
+ | // choose the kernel | ||
+ | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | // Set the kernel arguments (result, a, b) | ||
+ | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
+ | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
+ | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
+ | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
+ | |||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to Set the kernel arguments.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | return 1; | ||
+ | } | ||
+ | | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // Queue the kernel up for execution across the array | ||
+ | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
+ | globalWorkSize, localWorkSize, | ||
+ | 0, NULL, &kernel_execution); | ||
+ | clWaitForEvents(1, &kernel_execution); | ||
+ | | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | clFinish(command_queue); | ||
+ | t_end = time_clock(); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(kernel_execution, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
+ | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
+ | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
+ | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
+ | array_size, array_size / exec_time * 1e-9); | ||
+ | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
+ | 3*array_size*sizeof(float)*1e-9, | ||
+ | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
+ | #endif | ||
+ | | ||
+ | if (retval != CL_SUCCESS) { | ||
+ | printf("Failed to queue kernel for execution.\n"); | ||
+ | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
+ | exit(-1); | ||
+ | } | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | | ||
+ | /* // create the fourth kernel for GPU */ | ||
+ | /* kernel_index = 4; */ | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, */ | ||
+ | /* // kernel name: , file: */ | ||
+ | /* "vecadd_4_opt_blocks_vect_kernel", "vecadd_4_opt_blocks_vect.cl", monitor); */</code> | ||
+ | |||
+ | ==== opencl_vecadd/vecadd_2_blocks.cl ==== | ||
+ | <code c> | ||
+ | __kernel void vecadd_2_blocks_kernel(__global const float *a, | ||
+ | __global const float *b, | ||
+ | __global float *result, | ||
+ | const int size, | ||
+ | const int size_per_thread) | ||
+ | { | ||
+ | int gid = get_global_id(0); | ||
+ | int index_start = gid * size_per_thread; | ||
+ | int index_end = (gid+1) * size_per_thread; | ||
+ | |||
+ | if (gid == 0) { | ||
+ | //printf("size: %d, %d", size, size_per_thread); | ||
+ | } | ||
+ | |||
+ | for (int i=index_start; i < index_end && i < size; i++) { | ||
+ | //for (int i=index_start; i < index_end ; i++) { | ||
+ | result[i] = a[i]+b[i]; | ||
+ | } | ||
+ | }</code> | ||
+ | ===== Zajęcia 7 ===== | ||
+ | {{:studia:magisterskie:1sem:zajecia_7.zip|}} | ||
+ | |||
+ | ==== tmp/opencl_mat_transp/mat_transp_1.cl ==== | ||
+ | <code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
+ | |||
+ | __kernel void mat_transp_1_kernel( | ||
+ | __global float *A , | ||
+ | __global float *B , | ||
+ | __global float *C , | ||
+ | int N) | ||
+ | { | ||
+ | |||
+ | int id = get_global_id(0) * get_global_size(0) + get_global_id(1); | ||
+ | float temp = 0.0; | ||
+ | |||
+ | for(int i = 0; i<N;i++){ | ||
+ | temp += A[id*N + i] *B[i]; | ||
+ | } | ||
+ | |||
+ | C[id] = temp; | ||
+ | }</code> | ||
+ | |||
+ | ==== tmp/opencl_mat_transp/mat_transp/mat_transp.c ==== | ||
+ | <code c>#include<stdlib.h> | ||
+ | #include<stdio.h> | ||
+ | #include <math.h> | ||
+ | |||
+ | #include <CL/cl.h> | ||
+ | |||
+ | #include"uth_ocl_intf.h" | ||
+ | |||
+ | #define time_measurments | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | #include"uth_time_intf.h" | ||
+ | static double t_begin, t_end, t_total; | ||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | #define BLOCK_SIZE 16 | ||
+ | #define NR_GROUPS 16 | ||
+ | #define MULT 16 | ||
+ | #define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT) | ||
+ | #define ROZMIAR (WYMIAR*WYMIAR) | ||
+ | // Matrices are stored in row-major order: | ||
+ | // M(row, col) = M( row * WYMIAR + col ) | ||
+ | |||
+ | int verify_result( | ||
+ | float* result, | ||
+ | float* result_compare | ||
+ | ) | ||
+ | { | ||
+ | // Verify the result | ||
+ | int result_OK = 1; | ||
+ | int i,j; | ||
+ | for(i = 0; i < WYMIAR; i++) { | ||
+ | if(fabs(result[i] - result_compare[i])>1.e-6) { | ||
+ | result_OK = 0; | ||
+ | break; | ||
+ | } | ||
+ | } | ||
+ | printf("\t\t6. verifying results: "); | ||
+ | if(result_OK) { | ||
+ | printf("Output is correct\n"); | ||
+ | } else { | ||
+ | printf("Output is incorrect\n"); | ||
+ | j = 0; | ||
+ | for(i = 0; i < WYMIAR, j<100; i++) { | ||
+ | if(fabs(result[i] - result_compare[i])>1.e-9) { | ||
+ | j++; | ||
+ | printf("%d %16.8f %16.8f\n", | ||
+ | i, result[i], result_compare[i]); | ||
+ | } | ||
+ | } | ||
+ | exit(0); | ||
+ | } | ||
+ | /* for(i = 0; i < length; i++) { */ | ||
+ | /* printf("%16.8f %16.8f\n", result[i], result_compare[i]); */ | ||
+ | /* } */ | ||
+ | |||
+ | return(result_OK); | ||
+ | } | ||
+ | |||
+ | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
+ | void create_kernels() | ||
+ | { | ||
+ | |||
+ | // for all operations indicate explicit info messages | ||
+ | int monitor = UTC_BASIC_INFO + 1; | ||
+ | |||
+ | int kernel_index; | ||
+ | | ||
+ | int platform_index = utv_ocl_struct.current_platform_index; | ||
+ | |||
+ | if(utr_ocl_CPU_context_exists(platform_index)){ | ||
+ | |||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | | ||
+ | // create the first kernel for CPU | ||
+ | /* kernel_index = 0; */ | ||
+ | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, */ | ||
+ | /* // kernel name: , file: */ | ||
+ | /* "mat_transp_1_kernel", "mat_transp_1.cl", monitor); */ | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | } | ||
+ | |||
+ | if(utr_ocl_GPU_context_exists(platform_index)){ | ||
+ | |||
+ | |||
+ | #ifdef time_measurments | ||
+ | t_begin = time_clock(); | ||
+ | #endif | ||
+ | // create the first kernel for GPU | ||
+ | kernel_index = 0; | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "mat_transp_1_kernel", "mat_transp_1.cl", monitor); | ||
+ | | ||
+ | #ifdef time_measurments | ||
+ | t_end = time_clock(); | ||
+ | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
+ | #endif | ||
+ | | ||
+ | // create the second kernel for GPU | ||
+ | kernel_index = 1; | ||
+ | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
+ | // kernel name: , file: | ||
+ | "mat_transp_2_kernel", "mat_transp_2.cl", monitor); | ||
+ | |||
+ | | ||
+ | |||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | /*----------------EXECUTION PHASE----------------------*/ | ||
+ | int execute_kernels() | ||
+ | { | ||
+ | |||
+ | // for all operations indicate explicit info messages | ||
+ | int monitor = UTC_BASIC_INFO + 1; | ||
+ | |||
+ | int platform_index = utv_ocl_struct.current_platform_index; | ||
+ | utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index]; | ||
+ | |||
+ | int kernel_index; | ||
+ | int i,j,n; | ||
+ | |||
+ | double nr_access; | ||
+ | double t1,t2; | ||
+ | |||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\n------------Starting execution phase----------------\n"); | ||
+ | } | ||
+ | | ||
+ | // create matrices | ||
+ | n=WYMIAR; | ||
+ | float* A = (float *) malloc(ROZMIAR*sizeof(float)); | ||
+ | float* B = (float *) malloc(WYMIAR*sizeof(float)); | ||
+ | float* C = (float *) malloc(WYMIAR*sizeof(float)); | ||
+ | float* D = (float *) malloc(WYMIAR*sizeof(float)); | ||
+ | | ||
+ | for(i=0;i<ROZMIAR;i++) A[i]=1.0*i/10000000.0; | ||
+ | |||
+ | for(i=0;i<WYMIAR;i++) B[i]=1.0*i/10000000.0; | ||
+ | |||
+ | // mnozenie macierz wektor | ||
+ | for(i=0; i<WYMIAR; i++){ | ||
+ | for(j=0; j<WYMIAR; j++){ | ||
+ | D[i] += A[i*WYMIAR + j] * B[j]; | ||
+ | } | ||
+ | } | ||
+ | | ||
+ | nr_access= ROZMIAR + 2.0*WYMIAR; // read + write | ||
+ | | ||
+ | printf("mat_transp: nr_access %lf\n", nr_access); | ||
+ | | ||
+ | // get hardware characteristics to select good matrix shape | ||
+ | // the set of device characteristics stored in data structure | ||
+ | int device_index = 0; | ||
+ | utt_ocl_device_struct device_struct = | ||
+ | utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index]; | ||
+ | double global_mem_bytes = device_struct.global_mem_bytes; | ||
+ | double global_max_alloc = device_struct.global_max_alloc; | ||
+ | double local_mem_bytes = device_struct.local_mem_bytes; | ||
+ | double constant_mem_bytes = device_struct.constant_mem_bytes; | ||
+ | int max_num_comp_units = device_struct.max_num_comp_units; | ||
+ | int max_work_group_size = device_struct.max_work_group_size; | ||
+ | | ||
+ | // in a loop over devices (or for a selected device) | ||
+ | int idev=0; | ||
+ | for(idev=0; idev<platform_struct.number_of_devices; idev++){ | ||
+ | | ||
+ | // int device_type = ..... | ||
+ | // choose device_index | ||
+ | // int device_index = utr_ocl_select_device(platform_index, device_type); | ||
+ | int device_index = idev; | ||
+ | int device_type = utr_ocl_device_type(platform_index, device_index); | ||
+ | | ||
+ | if(device_index>0 && device_type==utr_ocl_device_type(platform_index, device_index-1)) break; | ||
+ | if(device_type == UTC_OCL_DEVICE_CPU) break; | ||
+ | |||
+ | // choose the context | ||
+ | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
+ | | ||
+ | // choose the command queue | ||
+ | cl_command_queue command_queue = | ||
+ | utr_ocl_select_command_queue(platform_index, device_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t0. restoring context and command queue for platform %d and device %d\n", | ||
+ | platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL){ | ||
+ | | ||
+ | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
+ | platform_index, device_index); | ||
+ | printf("%lu %lu\n", context, command_queue); | ||
+ | } | ||
+ | | ||
+ | // choose the kernel | ||
+ | kernel_index = 0; | ||
+ | cl_kernel kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
+ | | ||
+ | if(monitor>UTC_BASIC_INFO){ | ||
+ | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
+ | kernel_index, platform_index, device_index); | ||
+ | } | ||
+ | | ||
+ | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
+ | | ||
+ | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
+ | platform_index, device_index, kernel_index); | ||
+ | printf("context %lu, command queue %lu, kernel %lu\n", | ||
+ | context, command_queue, kernel); | ||
+ | } | ||
+ | | ||
+ | for(i=0;i<WYMIAR;i++) C[i]=0.0; | ||
+ | time_init(); t1 = time_clock(); | ||
+ | | ||
+ | // call routine to perform matrix transposition | ||
+ | mat_transp_host(kernel_index, A, B, C, n, context, kernel, command_queue); | ||
+ | | ||
+ | t2 = time_clock(); time_print(); | ||
+ | printf("GB/s = %lf\n\n", nr_access*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
+ | | ||
+ | | ||
+ | // verify result | ||
+ | verify_result(C, D); | ||
+ | | ||
+ | | ||
+ | } // end loop over devices | ||
+ | | ||
+ | return(0); | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | // Matrix transposition - Host code | ||
+ | // Matrix dimensions are assumed to be multiples of BLOCK_SIZE | ||
+ | int mat_transp_host( | ||
+ | int kernel_index, | ||
+ | float* A, | ||
+ | float* B, | ||
+ | float* C, | ||
+ | int N, | ||
+ | const cl_context context, | ||
+ | const cl_kernel mat_transp_kernel, | ||
+ | const cl_command_queue queue | ||
+ | ) | ||
+ | { | ||
+ | |||
+ | // Load A to device memory | ||
+ | size_t size_bytesm = N*N*sizeof(float); | ||
+ | size_t size_bytesw = N*sizeof(float); | ||
+ | | ||
+ | |||
+ | cl_mem d_A = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
+ | size_bytesm, NULL, NULL); | ||
+ | |||
+ | // Write A to device memory | ||
+ | clEnqueueWriteBuffer(queue, d_A, CL_TRUE, 0, size_bytesm, A, 0, 0, 0); | ||
+ | |||
+ | cl_mem d_B = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
+ | size_bytesw, NULL, NULL); | ||
+ | |||
+ | // Write A to device memory | ||
+ | clEnqueueWriteBuffer(queue, d_B, CL_TRUE, 0, size_bytesw, B, 0, 0, 0); | ||
+ | |||
+ | // Allocate B in device memory | ||
+ | cl_mem d_C = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size_bytesw, NULL, NULL); | ||
+ | |||
+ | |||
+ | // Invoke kernel | ||
+ | clSetKernelArg(mat_transp_kernel, 0, sizeof(cl_mem), (void*)&d_A); | ||
+ | clSetKernelArg(mat_transp_kernel, 1, sizeof(cl_mem), (void*)&d_B); | ||
+ | clSetKernelArg(mat_transp_kernel, 2, sizeof(cl_mem), (void*)&d_C); | ||
+ | clSetKernelArg(mat_transp_kernel, 3, sizeof(int), (void*)&N); | ||
+ | |||
+ | size_t localWorkSize[3]; | ||
+ | size_t globalWorkSize[3]; | ||
+ | cl_uint work_dim; | ||
+ | |||
+ | if(kernel_index==3){ | ||
+ | |||
+ | work_dim = 2; | ||
+ | localWorkSize[0] = BLOCK_SIZE; | ||
+ | globalWorkSize[0] = N/MULT; | ||
+ | localWorkSize[1] = BLOCK_SIZE; | ||
+ | globalWorkSize[1] = N/MULT; | ||
+ | localWorkSize[2] = 0; | ||
+ | globalWorkSize[2] = 0; | ||
+ | |||
+ | |||
+ | } | ||
+ | else{ | ||
+ | |||
+ | work_dim = 2; | ||
+ | localWorkSize[0] = BLOCK_SIZE; | ||
+ | globalWorkSize[0] = sqrt(N); | ||
+ | localWorkSize[1] = BLOCK_SIZE; | ||
+ | globalWorkSize[1] = sqrt(N); | ||
+ | localWorkSize[2] = 0; | ||
+ | globalWorkSize[2] = 0; | ||
+ | | ||
+ | } | ||
+ | |||
+ | clFinish(queue); | ||
+ | double t1 = time_clock(); | ||
+ | // Enqueue a kernel run call | ||
+ | cl_event ndrEvt; | ||
+ | clEnqueueNDRangeKernel(queue, mat_transp_kernel, work_dim, 0, | ||
+ | globalWorkSize, localWorkSize, 0, 0, &ndrEvt); | ||
+ | clWaitForEvents(1, &ndrEvt); | ||
+ | clFinish(queue); | ||
+ | double t2 = time_clock(); | ||
+ | // Calculate performance | ||
+ | cl_ulong startTime; | ||
+ | cl_ulong endTime; | ||
+ | | ||
+ | // Get kernel profiling info | ||
+ | clGetEventProfilingInfo(ndrEvt, | ||
+ | CL_PROFILING_COMMAND_START, | ||
+ | sizeof(cl_ulong), | ||
+ | &startTime, | ||
+ | 0); | ||
+ | clGetEventProfilingInfo(ndrEvt, | ||
+ | CL_PROFILING_COMMAND_END, | ||
+ | sizeof(cl_ulong), | ||
+ | &endTime, | ||
+ | 0); | ||
+ | double time = (double)endTime - (double)startTime; | ||
+ | printf("\nKernel execution internal: time %lf, GB/s = %lf\n", | ||
+ | time*1.0e-9, 2.0*N*N*sizeof(float)/(time*1.0e-9)/1024/1024/1024); | ||
+ | |||
+ | printf("Kernel execution external: time %lf, GB/s = %lf\n\n", | ||
+ | t2-t1, 2.0*N*N*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
+ | |||
+ | // Read B from device memory | ||
+ | clEnqueueReadBuffer(queue, d_C, CL_TRUE, 0, size_bytesw, C, 0, 0, 0); | ||
+ | |||
+ | // Free device memory | ||
+ | clReleaseMemObject(d_A); | ||
+ | clReleaseMemObject(d_B); | ||
+ | clReleaseMemObject(d_C); | ||
+ | |||
+ | return(0); | ||
+ | |||
+ | }</code> | ||
+ | ==== Kod wogu ==== | ||
+ | === server.c ==== | ||
+ | <code c> | ||
+ | /* | ||
+ | C socket server example, handles multiple clients using threads | ||
+ | Compile | ||
+ | gcc server.c -lpthread -o server | ||
+ | */ | ||
+ | |||
+ | #include<stdio.h> | ||
+ | #include<string.h> //strlen | ||
+ | #include<stdlib.h> //strlen | ||
+ | #include<sys/socket.h> | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | #include<unistd.h> //write | ||
+ | #include<pthread.h> //for threading , link with lpthread | ||
+ | #include <unistd.h> | ||
+ | |||
+ | //the thread function | ||
+ | void *connection_handler(void *); | ||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | int counter = 0; | ||
+ | pid_t pid = fork(); | ||
+ | |||
+ | if (pid == 0) | ||
+ | { | ||
+ | int sock; | ||
+ | struct sockaddr_in server; | ||
+ | char message[1000] , server_reply[2000]; | ||
+ | int counter; | ||
+ | unsigned int ip[] = { 192, 168, 102, 1 }; | ||
+ | char* ip_final; | ||
+ | char *buf; | ||
+ | size_t sz; | ||
+ | |||
+ | //Create socket | ||
+ | |||
+ | for ( counter = 0; counter < 255; ++counter ) { | ||
+ | |||
+ | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (sock == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //printf("192.168.102.%d", counter); | ||
+ | |||
+ | sz = snprintf(NULL, 0, "192.168.102.%d", counter); | ||
+ | buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */ | ||
+ | | ||
+ | server.sin_addr.s_addr = inet_addr(buf); | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_port = htons( 65001 ); | ||
+ | |||
+ | //Connect to remote server | ||
+ | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | perror("connect failed. Error"); | ||
+ | continue; | ||
+ | } | ||
+ | |||
+ | puts("Connected\n"); | ||
+ | |||
+ | //keep communicating with server | ||
+ | while(1) | ||
+ | { | ||
+ | //Send some data | ||
+ | if( send(sock , "Werset 6-client" , 15 , 0) < 0) | ||
+ | { | ||
+ | puts("Send failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Receive a reply from the server | ||
+ | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
+ | { | ||
+ | puts("recv failed"); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | puts(server_reply); | ||
+ | |||
+ | break; | ||
+ | } | ||
+ | |||
+ | close(sock); | ||
+ | } | ||
+ | } | ||
+ | else if (pid > 0) | ||
+ | { | ||
+ | int socket_desc , client_sock , c; | ||
+ | struct sockaddr_in server , client; | ||
+ | |||
+ | //Create socket | ||
+ | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (socket_desc == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //Prepare the sockaddr_in structure | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_addr.s_addr = INADDR_ANY; | ||
+ | server.sin_port = htons( 65001 ); | ||
+ | |||
+ | //Bind | ||
+ | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | //print the error message | ||
+ | perror("bind failed. Error"); | ||
+ | return 1; | ||
+ | } | ||
+ | puts("bind done"); | ||
+ | |||
+ | //Listen | ||
+ | listen(socket_desc , 3); | ||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | |||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | pthread_t thread_id; | ||
+ | |||
+ | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
+ | { | ||
+ | puts("Connection accepted"); | ||
+ | |||
+ | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
+ | { | ||
+ | perror("could not create thread"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Now join the thread , so that we dont terminate before the thread | ||
+ | //pthread_join( thread_id , NULL); | ||
+ | puts("Handler assigned"); | ||
+ | } | ||
+ | |||
+ | if (client_sock < 0) | ||
+ | { | ||
+ | perror("accept failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | // fork failed | ||
+ | printf("fork() failed!\n"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * This will handle connection for each client | ||
+ | * */ | ||
+ | void *connection_handler(void *socket_desc) | ||
+ | { | ||
+ | //Get the socket descriptor | ||
+ | int sock = *(int*)socket_desc; | ||
+ | int read_size; | ||
+ | char *message , client_message[2000]; | ||
+ | |||
+ | //Receive a message from client | ||
+ | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
+ | { | ||
+ | printf("%s\n", client_message); | ||
+ | |||
+ | //end of string marker | ||
+ | client_message[read_size] = '\0'; | ||
+ | |||
+ | //Send the message back to client | ||
+ | write(sock , "Werset 6" , 8); | ||
+ | |||
+ | //clear the message buffer | ||
+ | memset(client_message, 0, 2000); | ||
+ | } | ||
+ | |||
+ | if(read_size == 0) | ||
+ | { | ||
+ | puts("Client disconnected"); | ||
+ | fflush(stdout); | ||
+ | } | ||
+ | else if(read_size == -1) | ||
+ | { | ||
+ | perror("recv failed"); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | }</code> | ||
+ | === Makefile === | ||
+ | <code bash># kompilator c | ||
+ | CC = gcc | ||
+ | |||
+ | # konsolidator | ||
+ | CFLAGS=-g | ||
+ | |||
+ | all: server client | ||
+ | |||
+ | server: server.o | ||
+ | $(CC) $(LFLAGS) server.o -lpthread -o server | ||
+ | |||
+ | server.o: server.c | ||
+ | $(CC) -c server.c -o server.o | ||
+ | |||
+ | client: client.o | ||
+ | $(CC) client.o -o client | ||
+ | |||
+ | client.o: client.c | ||
+ | $(CC) -c client.c -o client.o | ||
+ | |||
+ | clean: | ||
+ | rm -f *.o</code> | ||
+ | ===== Zajęcia 9 ===== | ||
+ | <code c>/* | ||
+ | C socket server example, handles multiple clients using threads | ||
+ | Compile | ||
+ | gcc server.c -lpthread -o server | ||
+ | */ | ||
+ | |||
+ | #include<stdio.h> | ||
+ | #include<string.h> //strlen | ||
+ | #include<stdlib.h> //strlen | ||
+ | #include<sys/socket.h> | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | #include<unistd.h> //write | ||
+ | #include<pthread.h> //for threading , link with lpthread | ||
+ | |||
+ | //the thread function | ||
+ | void *connection_handler(void *); | ||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | int socket_desc , client_sock , c; | ||
+ | struct sockaddr_in server , client; | ||
+ | |||
+ | //Create socket | ||
+ | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (socket_desc == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //Prepare the sockaddr_in structure | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_addr.s_addr = INADDR_ANY; | ||
+ | server.sin_port = htons( 8888 ); | ||
+ | |||
+ | //Bind | ||
+ | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | //print the error message | ||
+ | perror("bind failed. Error"); | ||
+ | return 1; | ||
+ | } | ||
+ | puts("bind done"); | ||
+ | |||
+ | //Listen | ||
+ | listen(socket_desc , 3); | ||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | |||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | pthread_t thread_id; | ||
+ | |||
+ | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
+ | { | ||
+ | puts("Connection accepted"); | ||
+ | |||
+ | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
+ | { | ||
+ | perror("could not create thread"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Now join the thread , so that we dont terminate before the thread | ||
+ | //pthread_join( thread_id , NULL); | ||
+ | puts("Handler assigned"); | ||
+ | } | ||
+ | |||
+ | if (client_sock < 0) | ||
+ | { | ||
+ | perror("accept failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * This will handle connection for each client | ||
+ | * */ | ||
+ | void *connection_handler(void *socket_desc) | ||
+ | { | ||
+ | //Get the socket descriptor | ||
+ | int sock = *(int*)socket_desc; | ||
+ | int read_size; | ||
+ | char *message , client_message[2000]; | ||
+ | |||
+ | //Receive a message from client | ||
+ | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
+ | { | ||
+ | //end of string marker | ||
+ | client_message[read_size] = '\0'; | ||
+ | |||
+ | //Send the message back to client | ||
+ | write(sock , "Werset 6" , strlen(client_message)); | ||
+ | |||
+ | //clear the message buffer | ||
+ | memset(client_message, 0, 2000); | ||
+ | } | ||
+ | |||
+ | if(read_size == 0) | ||
+ | { | ||
+ | puts("Client disconnected"); | ||
+ | fflush(stdout); | ||
+ | } | ||
+ | else if(read_size == -1) | ||
+ | { | ||
+ | perror("recv failed"); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } </code> | ||
+ | |||
+ | ==== Client C ==== | ||
+ | |||
+ | <code c>#include<stdio.h> //printf | ||
+ | #include<string.h> //strlen | ||
+ | #include<sys/socket.h> //socket | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | int sock; | ||
+ | struct sockaddr_in server; | ||
+ | char message[1000] , server_reply[2000]; | ||
+ | |||
+ | //Create socket | ||
+ | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (sock == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | server.sin_addr.s_addr = inet_addr("127.0.0.1"); | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_port = htons( 8888 ); | ||
+ | |||
+ | //Connect to remote server | ||
+ | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | perror("connect failed. Error"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | puts("Connected\n"); | ||
+ | |||
+ | //keep communicating with server | ||
+ | while(1) | ||
+ | { | ||
+ | printf("Enter message : "); | ||
+ | scanf("%s" , message); | ||
+ | |||
+ | //Send some data | ||
+ | if( send(sock , message , strlen(message) , 0) < 0) | ||
+ | { | ||
+ | puts("Send failed"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //Receive a reply from the server | ||
+ | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
+ | { | ||
+ | puts("recv failed"); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | puts("Server reply :"); | ||
+ | puts(server_reply); | ||
+ | } | ||
+ | |||
+ | close(sock); | ||
+ | return 0; | ||
+ | }</code> | ||
+ | |||
+ | |||
+ | ==== Kod Maciek Gonzo itp ==== | ||
+ | <code c> | ||
+ | #include<stdio.h> | ||
+ | #include<string.h> //strlen | ||
+ | #include<stdlib.h> //strlen | ||
+ | #include<sys/socket.h> | ||
+ | #include<arpa/inet.h> //inet_addr | ||
+ | #include<unistd.h> //write | ||
+ | #include<pthread.h> //for threading , link with lpthread | ||
+ | |||
+ | //the thread function | ||
+ | void *connection_handler(void *); | ||
+ | |||
+ | void *client_function(void *socket_desc){ | ||
+ | int sock; | ||
+ | struct sockaddr_in server_id; | ||
+ | char message[1000] , server_reply[2000]; | ||
+ | |||
+ | //Create socket | ||
+ | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (sock == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | // 192.168.102.63 wogu | ||
+ | server_id.sin_addr.s_addr = inet_addr("localhost"); | ||
+ | server_id.sin_family = AF_INET; | ||
+ | server_id.sin_port = htons( 65000 ); | ||
+ | |||
+ | //Connect to remote server | ||
+ | |||
+ | if (connect(sock , (struct sockaddr *)&server_id , sizeof(server_id)) < 0) | ||
+ | { | ||
+ | perror("connect failed. Error"); | ||
+ | |||
+ | }else{ | ||
+ | |||
+ | puts("Connected\n"); | ||
+ | |||
+ | //keep communicating with server | ||
+ | while(1) | ||
+ | { | ||
+ | printf("Enter message : "); | ||
+ | scanf("%s" , message); | ||
+ | |||
+ | //Send some data | ||
+ | if( send(sock , message , strlen(message) , 0) < 0) | ||
+ | { | ||
+ | puts("Send failed"); | ||
+ | |||
+ | } | ||
+ | |||
+ | //Receive a reply from the server | ||
+ | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
+ | { | ||
+ | puts("recv failed"); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | puts("Server reply :"); | ||
+ | puts(server_reply); | ||
+ | } | ||
+ | |||
+ | close(sock); | ||
+ | } | ||
+ | |||
+ | }; | ||
+ | |||
+ | |||
+ | void *server_function(void * sock){ | ||
+ | printf("SERVER Start"); | ||
+ | fflush(stdout); | ||
+ | int socket_desc , client_sock , c; | ||
+ | pthread_t thread_id; | ||
+ | struct sockaddr_in server , client; | ||
+ | |||
+ | //Create socket | ||
+ | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
+ | if (socket_desc == -1) | ||
+ | { | ||
+ | printf("Could not create socket"); | ||
+ | } | ||
+ | puts("Socket created"); | ||
+ | |||
+ | //Prepare the sockaddr_in structure | ||
+ | server.sin_family = AF_INET; | ||
+ | server.sin_addr.s_addr = INADDR_ANY; | ||
+ | server.sin_port = htons( 65000); | ||
+ | |||
+ | //Bind | ||
+ | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
+ | { | ||
+ | //print the error message | ||
+ | perror("bind failed. Error"); | ||
+ | return; | ||
+ | } | ||
+ | puts("bind done"); | ||
+ | |||
+ | |||
+ | |||
+ | //Listen | ||
+ | listen(socket_desc , 3); | ||
+ | |||
+ | //Accept and incoming connection | ||
+ | puts("Waiting for incoming connections..."); | ||
+ | c = sizeof(struct sockaddr_in); | ||
+ | fflush(stdout); | ||
+ | |||
+ | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
+ | { | ||
+ | puts("Connection accepted"); | ||
+ | |||
+ | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
+ | { | ||
+ | perror("could not create thread"); | ||
+ | return; | ||
+ | } | ||
+ | |||
+ | //Now join the thread , so that we dont terminate before the thread | ||
+ | //pthread_join( thread_id , NULL); | ||
+ | puts("Handler assigned"); | ||
+ | } | ||
+ | |||
+ | if (client_sock < 0) | ||
+ | { | ||
+ | perror("accept failed"); | ||
+ | return; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | |||
+ | int main(int argc , char *argv[]) | ||
+ | { | ||
+ | pthread_t thread_id; | ||
+ | //Create server | ||
+ | if( pthread_create( &thread_id , NULL , server_function , NULL) != 0) | ||
+ | { | ||
+ | perror("could not create server thread"); | ||
+ | fflush(stdout); | ||
+ | return 1; | ||
+ | } | ||
+ | pthread_t thread_id2; | ||
+ | |||
+ | // Create client | ||
+ | if( pthread_create( &thread_id2 , NULL , client_function , NULL) != 0) | ||
+ | { | ||
+ | perror("could not create client thread"); | ||
+ | fflush(stdout); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * This will handle connection for each client | ||
+ | * */ | ||
+ | void *connection_handler(void *socket_desc) | ||
+ | { | ||
+ | //Get the socket descriptor | ||
+ | int sock = *(int*)socket_desc; | ||
+ | int read_size; | ||
+ | char *message , client_message[2000]; | ||
+ | |||
+ | //Receive a message from client | ||
+ | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
+ | { | ||
+ | //end of string marker | ||
+ | client_message[read_size] = '\0'; | ||
+ | |||
+ | //Send the message back to client | ||
+ | write(sock , "Werset 5" , strlen(client_message)); | ||
+ | |||
+ | //clear the message buffer | ||
+ | memset(client_message, 0, 2000); | ||
+ | } | ||
+ | |||
+ | if(read_size == 0) | ||
+ | { | ||
+ | puts("Client disconnected"); | ||
+ | fflush(stdout); | ||
+ | } | ||
+ | else if(read_size == -1) | ||
+ | { | ||
+ | perror("recv failed"); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | </code> |