Różnice między wybraną wersją a wersją aktualną.
| Poprzednia rewizja po obu stronach Poprzednia wersja Nowa wersja | Poprzednia wersja | ||
|
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/03/30 23:10] 46.22.174.138 |
studia:magisterskie:1sem:systemy_rownolegle_i_rozproszone [2016/05/19 11:13] (aktualna) 149.156.112.6 [tmp/opencl_mat_transp/mat_transp/mat_transp.c] |
||
|---|---|---|---|
| Linia 781: | Linia 781: | ||
| - | |||
| - | |||
| - | |||
| - | |||
| - | //TWORZENIE STRUKTURY | ||
| struct rekord wys; | struct rekord wys; | ||
| - | wys.x_min = x_min; | ||
| - | wys.x_max = x_max; | ||
| - | wys.n = n; | ||
| - | |||
| MPI_Datatype rekord_typ; | MPI_Datatype rekord_typ; | ||
| int tab_dlug_blokow[3] = {1, 1, 1}; | int tab_dlug_blokow[3] = {1, 1, 1}; | ||
| Linia 802: | Linia 793: | ||
| MPI_Type_struct(3, tab_dlug_blokow, tab_odstepow, tab_typow, &rekord_typ); | MPI_Type_struct(3, tab_dlug_blokow, tab_odstepow, tab_typow, &rekord_typ); | ||
| MPI_Type_commit(&rekord_typ); | MPI_Type_commit(&rekord_typ); | ||
| - | MPI_Send ( &wys, 1, rekord_typ, j, 1, komunikator ); | + | MPI_Recv ( &wys, 1, rekord_typ, 0, 1, parentcomm, &status ); |
| + | </code> | ||
| + | |||
| + | ====== Zajęcia 4 ====== | ||
| + | wkrótce | ||
| + | ====== Zajęcia 5 ====== | ||
| + | ==== Makefile ==== | ||
| + | <code bash># optimization and other system dependent options | ||
| + | #include make.$(SRR_ARCH) | ||
| + | # or directly | ||
| + | include make.lab_404_NVIDIA | ||
| + | |||
| + | NAME = Hello_GPU | ||
| + | |||
| + | program: main.o | ||
| + | $(CC) $(LDFL) main.o $(LIB) -o $(NAME) | ||
| + | |||
| + | main.o: main.c | ||
| + | $(CC) $(CFL) -c main.c $(INC) -o main.o | ||
| + | |||
| + | clean: | ||
| + | rm -f obj/* | ||
| + | rm -f $(NAME) | ||
| + | </code> | ||
| + | ==== make.lab_404_NVIDIA ==== | ||
| + | <code bash># C compiler | ||
| + | #CC = icc | ||
| + | CC = gcc | ||
| + | |||
| + | # C++ compiler | ||
| + | CPPC = icpc | ||
| + | #CPPC = g++ | ||
| + | |||
| + | # Loader (to link C/C++ and Fortran libraries) | ||
| + | LD = icpc | ||
| + | #LD = g++ | ||
| + | |||
| + | # Archiver | ||
| + | AR = ar r | ||
| + | |||
| + | # For removing files | ||
| + | RM = rm -f | ||
| + | |||
| + | # Include directories | ||
| + | INC = -I/opt/cuda7/include | ||
| + | |||
| + | # Standard and/or local libraries | ||
| + | LIB = -L/opt/cuda7/lib64 -lOpenCL | ||
| + | |||
| + | # C optimization and other flags | ||
| + | #CFL(icc) = -O3 -openmp | ||
| + | #CFL(gcc) = -O3 -fopenmp | ||
| + | #CFL(icc,debug) = -g | ||
| + | #CFL(gcc) = -g | ||
| + | CFL = | ||
| + | |||
| + | # Loader optimization and other flags | ||
| + | #LDFL(debug) = -g | ||
| + | #LDFL(profile) = -p | ||
| + | LDFL = | ||
| + | |||
| + | </code> | ||
| + | ==== main.c ==== | ||
| + | <code c>#include<stdlib.h> | ||
| + | #include<stdio.h> | ||
| + | |||
| + | #include <CL/cl.h> | ||
| + | |||
| + | // functions to display platform and device properties | ||
| + | void DisplayPlatformInfo( | ||
| + | cl_platform_id id, | ||
| + | cl_platform_info name, | ||
| + | char* str) | ||
| + | { | ||
| + | cl_int retval; | ||
| + | size_t paramValueSize; | ||
| + | |||
| + | retval = clGetPlatformInfo( | ||
| + | id, | ||
| + | name, | ||
| + | 0, | ||
| + | NULL, | ||
| + | ¶mValueSize); | ||
| + | if (retval != CL_SUCCESS){ | ||
| + | printf("Failed to find OpenCL platform %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | char * info = (char *)malloc(sizeof(char) * paramValueSize); | ||
| + | retval = clGetPlatformInfo( | ||
| + | id, | ||
| + | name, | ||
| + | paramValueSize, | ||
| + | info, | ||
| + | NULL); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL platform %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | printf("\t%s:\t%s\n", str, info ); | ||
| + | free(info); | ||
| + | } | ||
| + | |||
| + | void DisplayDeviceInfo_char( | ||
| + | cl_device_id id, | ||
| + | cl_device_info name, | ||
| + | char* str) | ||
| + | { | ||
| + | cl_int retval; | ||
| + | size_t paramValueSize; | ||
| + | |||
| + | retval = clGetDeviceInfo( | ||
| + | id, | ||
| + | name, | ||
| + | 0, | ||
| + | NULL, | ||
| + | ¶mValueSize); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL device info %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | char * info = (char *)malloc(sizeof(char) * paramValueSize); | ||
| + | retval = clGetDeviceInfo( | ||
| + | id, | ||
| + | name, | ||
| + | paramValueSize, | ||
| + | info, | ||
| + | NULL); | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL device info %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | printf("\t\t%s:\t%s\n", str, info ); | ||
| + | free(info); | ||
| + | }; | ||
| + | |||
| + | void DisplayDeviceInfo_ulong( | ||
| + | cl_device_id id, | ||
| + | cl_device_info name, | ||
| + | char* str) | ||
| + | { | ||
| + | cl_int retval; | ||
| + | size_t paramValueSize;// = sizeof(cl_ulong); | ||
| + | |||
| + | retval = clGetDeviceInfo( | ||
| + | id, | ||
| + | name, | ||
| + | 0, | ||
| + | NULL, | ||
| + | ¶mValueSize); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL device info %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | cl_ulong * info = (cl_ulong *)malloc(sizeof(cl_ulong) * paramValueSize); | ||
| + | retval = clGetDeviceInfo( | ||
| + | id, | ||
| + | name, | ||
| + | paramValueSize, | ||
| + | info, | ||
| + | NULL); | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL device info %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | printf("\t\t%s:\t%lu MB\n", str, info[0] / 1024 / 1024 ); | ||
| + | free(info); | ||
| + | }; | ||
| + | |||
| + | void DisplayDeviceInfo_uint( | ||
| + | cl_device_id id, | ||
| + | cl_device_info name, | ||
| + | char* str) | ||
| + | { | ||
| + | cl_int retval; | ||
| + | size_t paramValueSize;// = sizeof(cl_ulong); | ||
| + | |||
| + | retval = clGetDeviceInfo( | ||
| + | id, | ||
| + | name, | ||
| + | 0, | ||
| + | NULL, | ||
| + | ¶mValueSize); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL device info %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | cl_uint * info = (cl_uint *)malloc(sizeof(cl_uint) * paramValueSize); | ||
| + | retval = clGetDeviceInfo( | ||
| + | id, | ||
| + | name, | ||
| + | paramValueSize, | ||
| + | info, | ||
| + | NULL); | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to find OpenCL device info %s.\n", str); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | printf("\t\t%s:\t%d MHz\n", str, info[0] ); | ||
| + | free(info); | ||
| + | }; | ||
| + | |||
| + | |||
| + | // main program controlling execution of CPU code and OpenCL kernels | ||
| + | int main(int argc, char** argv) | ||
| + | { | ||
| + | cl_uint number_of_contexts = 2; | ||
| + | cl_context context = NULL; | ||
| + | cl_context list_of_contexts[2] = {0,0}; | ||
| + | cl_command_queue commandQueue = 0; | ||
| + | cl_program program = 0; | ||
| + | cl_uint number_of_devices; | ||
| + | cl_device_id device = 0; | ||
| + | cl_device_id *list_of_devices; | ||
| + | cl_device_type type; | ||
| + | cl_kernel kernel = 0; | ||
| + | cl_mem memObjects[3] = { 0, 0, 0 }; | ||
| + | cl_int retval; | ||
| + | int icon, idev; | ||
| + | cl_uint numPlatforms; | ||
| + | cl_platform_id * platformIds; | ||
| + | cl_uint i,j; | ||
| + | |||
| + | // flag to control displaying | ||
| + | int Monitor = 1; | ||
| + | |||
| + | |||
| + | // Create OpenCL contexts | ||
| + | |||
| + | // First, query the total number of platforms | ||
| + | retval = clGetPlatformIDs(0, (cl_platform_id *) NULL, &numPlatforms); | ||
| + | |||
| + | // Next, allocate memory for the installed plaforms, and qeury | ||
| + | // to get the list. | ||
| + | platformIds = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms); | ||
| + | |||
| + | // Then, query the platform IDs | ||
| + | retval = clGetPlatformIDs(numPlatforms, platformIds, NULL); | ||
| + | |||
| + | if(Monitor>=0){ | ||
| + | printf("Number of platforms: \t%d\n", numPlatforms); | ||
| + | } | ||
| + | |||
| + | // Iterate through the list of platforms displaying associated information | ||
| + | for (i = 0; i < numPlatforms; i++) { | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | |||
| + | printf("Platform ID - %d\n",i); | ||
| + | // First we display information associated with the platform | ||
| + | DisplayPlatformInfo( | ||
| + | platformIds[i], | ||
| + | CL_PLATFORM_NAME, | ||
| + | "CL_PLATFORM_NAME"); | ||
| + | DisplayPlatformInfo( | ||
| + | platformIds[i], | ||
| + | CL_PLATFORM_PROFILE, | ||
| + | "CL_PLATFORM_PROFILE"); | ||
| + | DisplayPlatformInfo( | ||
| + | platformIds[i], | ||
| + | CL_PLATFORM_VERSION, | ||
| + | "CL_PLATFORM_VERSION"); | ||
| + | DisplayPlatformInfo( | ||
| + | platformIds[i], | ||
| + | CL_PLATFORM_VENDOR, | ||
| + | "CL_PLATFORM_VENDOR"); | ||
| + | } | ||
| + | |||
| + | } | ||
| + | |||
| + | // For the first platform | ||
| + | int iplat; | ||
| + | printf("Select Platform ID: "); scanf("%d", &iplat); | ||
| + | |||
| + | // Query the set of devices associated with the platform | ||
| + | retval = clGetDeviceIDs( | ||
| + | platformIds[iplat], | ||
| + | CL_DEVICE_TYPE_ALL, | ||
| + | 0, | ||
| + | NULL, | ||
| + | &number_of_devices); | ||
| + | |||
| + | |||
| + | list_of_devices = | ||
| + | (cl_device_id *) malloc (sizeof(cl_device_id) * number_of_devices); | ||
| + | |||
| + | retval = clGetDeviceIDs( | ||
| + | platformIds[iplat], | ||
| + | CL_DEVICE_TYPE_ALL, | ||
| + | number_of_devices, | ||
| + | list_of_devices, | ||
| + | NULL); | ||
| + | |||
| + | if(Monitor>=0){ | ||
| + | printf("Number of devices: \t%d\n", number_of_devices); | ||
| + | } | ||
| + | |||
| + | // Iterate through each device, displaying associated information | ||
| + | for (j = 0; j < number_of_devices; j++) { | ||
| + | |||
| + | clGetDeviceInfo(list_of_devices[j], CL_DEVICE_TYPE, | ||
| + | sizeof(cl_device_type), &type, NULL); | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | |||
| + | DisplayDeviceInfo_char( | ||
| + | list_of_devices[j], | ||
| + | CL_DEVICE_NAME, | ||
| + | "CL_DEVICE_NAME"); | ||
| + | |||
| + | DisplayDeviceInfo_char( | ||
| + | list_of_devices[j], | ||
| + | CL_DEVICE_VENDOR, | ||
| + | "CL_DEVICE_VENDOR"); | ||
| + | |||
| + | DisplayDeviceInfo_char( | ||
| + | list_of_devices[j], | ||
| + | CL_DEVICE_VERSION, | ||
| + | "CL_DEVICE_VERSION"); | ||
| + | |||
| + | DisplayDeviceInfo_ulong( | ||
| + | list_of_devices[j], | ||
| + | CL_DEVICE_GLOBAL_MEM_SIZE, | ||
| + | "CL_DEVICE_GLOBAL_MEM_SIZE"); | ||
| + | |||
| + | DisplayDeviceInfo_uint( | ||
| + | list_of_devices[j], | ||
| + | CL_DEVICE_MAX_CLOCK_FREQUENCY, | ||
| + | "CL_DEVICE_MAX_CLOCK_FREQUENCY"); | ||
| + | printf("\n"); | ||
| + | } | ||
| + | } | ||
| + | |||
| + | // Next, create OpenCL contexts on platforms | ||
| + | cl_context_properties contextProperties[] = { | ||
| + | CL_CONTEXT_PLATFORM, | ||
| + | (cl_context_properties)platformIds[iplat], | ||
| + | 0 | ||
| + | }; | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Creating CPU context %d on platform %d\n", 1, iplat); | ||
| + | } | ||
| + | |||
| + | list_of_contexts[1] = | ||
| + | clCreateContextFromType(contextProperties, | ||
| + | CL_DEVICE_TYPE_CPU, NULL, NULL, &retval); | ||
| + | |||
| + | if(Monitor>=0 && retval != CL_SUCCESS){ | ||
| + | printf("Could not create CPU context on platform %d\n", i); | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Creating GPU context 0 on platform %d\n", iplat); | ||
| + | } | ||
| + | |||
| + | list_of_contexts[0] = | ||
| + | clCreateContextFromType(contextProperties, | ||
| + | CL_DEVICE_TYPE_GPU, NULL, NULL, &retval); | ||
| + | |||
| + | if(Monitor>=0 && retval != CL_SUCCESS){ | ||
| + | printf("Could not create GPU context on platform %d\n", i); | ||
| + | } | ||
| + | |||
| + | // in a loop over devices of the seleceted platform | ||
| + | for(idev=0; idev<number_of_devices;idev++){ | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("\nFor context %d and device %d:\n", | ||
| + | idev, idev); | ||
| + | } | ||
| + | device = list_of_devices[idev]; | ||
| + | icon = idev; | ||
| + | |||
| + | // choose OpenCL context on first available platform | ||
| + | context = list_of_contexts[icon]; | ||
| + | |||
| + | if(context !=0){ | ||
| + | |||
| + | commandQueue = clCreateCommandQueue(context, device, 0, NULL); | ||
| + | if (commandQueue == NULL) { | ||
| + | printf("Failed to create commandQueue for device %d\n", idev); | ||
| + | exit(0); | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Reading program from source\n"); | ||
| + | } | ||
| + | |||
| + | // read source code from file | ||
| + | FILE *fp; | ||
| + | char* source; | ||
| + | long int size; | ||
| + | |||
| + | fp = fopen("HelloWorld.cl", "rb"); | ||
| + | if(!fp) { | ||
| + | printf("Could not open kernel file\n"); | ||
| + | exit(-1); | ||
| + | } | ||
| + | int status = fseek(fp, 0, SEEK_END); | ||
| + | if(status != 0) { | ||
| + | printf("Error seeking to end of file\n"); | ||
| + | exit(-1); | ||
| + | } | ||
| + | size = ftell(fp); | ||
| + | if(size < 0) { | ||
| + | printf("Error getting file position\n"); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | rewind(fp); | ||
| + | |||
| + | source = (char *)malloc(size + 1); | ||
| + | |||
| + | int i; | ||
| + | for (i = 0; i < size+1; i++) { | ||
| + | source[i]='\0'; | ||
| + | } | ||
| + | |||
| + | if(source == NULL) { | ||
| + | printf("Error allocating space for the kernel source\n"); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | fread(source, 1, size, fp); | ||
| + | source[size] = '\0'; | ||
| + | |||
| + | const char* source_const = source; | ||
| + | cl_program program = clCreateProgramWithSource(context, 1, | ||
| + | &source_const, | ||
| + | NULL, NULL); | ||
| + | if (program == NULL) | ||
| + | { | ||
| + | printf("Failed to create CL program from source.\n"); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Creating program and kernel\n"); | ||
| + | } | ||
| + | // build program (passing options to compiler if necessary | ||
| + | retval = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); | ||
| + | char* buildLog; size_t size_of_buildLog; | ||
| + | clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, | ||
| + | 0, NULL, &size_of_buildLog); | ||
| + | buildLog = malloc(size_of_buildLog+1); | ||
| + | clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, | ||
| + | size_of_buildLog, buildLog, NULL); | ||
| + | buildLog[size_of_buildLog]= '\0'; | ||
| + | printf("Kernel buildLog: %s\n", buildLog); | ||
| + | if (retval != CL_SUCCESS) | ||
| + | { | ||
| + | printf("Error in kernel\n"); | ||
| + | clReleaseProgram(program); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | |||
| + | // Create OpenCL kernel | ||
| + | kernel = clCreateKernel(program, "hello_kernel", NULL); | ||
| + | if (kernel == NULL) | ||
| + | { | ||
| + | printf("Failed to create kernel.\n"); | ||
| + | exit(0); | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Creating memory objects\n"); | ||
| + | } | ||
| + | // Create memory objects that will be used as arguments to | ||
| + | // kernel. First create host memory arrays that will be | ||
| + | // used to store the arguments to the kernel | ||
| + | float result[1]; | ||
| + | float a[1]; | ||
| + | float b[1]; | ||
| + | a[0] = 2; | ||
| + | b[0] = 2; | ||
| + | |||
| + | memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY , | ||
| + | sizeof(float), NULL, NULL); | ||
| + | memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY , | ||
| + | sizeof(float), NULL, NULL); | ||
| + | memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, | ||
| + | sizeof(float), NULL, NULL); | ||
| + | |||
| + | if (memObjects[0]==NULL || memObjects[1]==NULL || memObjects[2]==NULL){ | ||
| + | printf("Error creating memory objects.\n"); | ||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Sending kernel arguments\n"); | ||
| + | } | ||
| + | retval = clEnqueueWriteBuffer( | ||
| + | commandQueue, | ||
| + | memObjects[0], | ||
| + | CL_FALSE, | ||
| + | 0, | ||
| + | sizeof(float), | ||
| + | a, | ||
| + | 0, | ||
| + | NULL, | ||
| + | NULL); | ||
| + | |||
| + | // Use clEnqueueWriteBuffer() to write input array B to | ||
| + | // the device buffer bufferB | ||
| + | retval = clEnqueueWriteBuffer( | ||
| + | commandQueue, | ||
| + | memObjects[1], | ||
| + | CL_FALSE, | ||
| + | 0, | ||
| + | sizeof(float), | ||
| + | b, | ||
| + | 0, | ||
| + | NULL, | ||
| + | NULL); | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | if (retval != CL_SUCCESS) | ||
| + | { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, commandQueue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Running the kernel!\n"); | ||
| + | } | ||
| + | size_t globalWorkSize[1] = { 1 }; | ||
| + | size_t localWorkSize[1] = { 1 }; | ||
| + | |||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, NULL); | ||
| + | if (retval != CL_SUCCESS) | ||
| + | { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, commandQueue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | if(Monitor>0){ | ||
| + | printf("Transfering back results\n"); | ||
| + | } | ||
| + | // Read the output buffer back to the Host | ||
| + | retval = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, | ||
| + | 0, sizeof(float), result, | ||
| + | 0, NULL, NULL); | ||
| + | if (retval != CL_SUCCESS) | ||
| + | { | ||
| + | printf("Failed to read result buffer.\n"); | ||
| + | //Cleanup(context, commandQueue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | // Verify the output | ||
| + | if(result[0]==4) { | ||
| + | printf("Output is correct: %lf + %lf = %lf\n", | ||
| + | a[0], b[0], result[0]); | ||
| + | } else { | ||
| + | printf("Output is incorrect: %lf + %lf != %lf\n", | ||
| + | a[0], b[0], result[0]); | ||
| + | } | ||
| + | |||
| + | for (i = 0; i < 3; i++) | ||
| + | { | ||
| + | if (memObjects[i] != 0) | ||
| + | clReleaseMemObject(memObjects[i]); | ||
| + | } | ||
| + | if (commandQueue != 0) | ||
| + | clReleaseCommandQueue(commandQueue); | ||
| + | |||
| + | if (kernel != 0) | ||
| + | clReleaseKernel(kernel); | ||
| + | |||
| + | if (program != 0) | ||
| + | clReleaseProgram(program); | ||
| + | |||
| + | } | ||
| + | } | ||
| + | |||
| + | free(list_of_devices); | ||
| + | free(platformIds); | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | </code> | ||
| + | ==== HelloWorld.cl ==== | ||
| + | <code c> | ||
| + | __kernel void hello_kernel(__global const float *a, | ||
| + | __global const float *b, | ||
| + | __global float *result) | ||
| + | { | ||
| + | int gid = get_global_id(0); | ||
| + | |||
| + | if(gid<1) result[gid] = a[gid] + b[gid]; | ||
| + | }</code> | ||
| + | ===== Zajęcia 6 ===== | ||
| + | Pliki zadania: {{:studia:magisterskie:1sem:opencl_vecadd.tgz|}} | ||
| + | Działające 3 kernele: {{:studia:magisterskie:1sem:sala404.zip|}} | ||
| + | |||
| + | ==== opencl_vecadd/vecadd_host/execute_kernels.c ==== | ||
| + | <code c>#include<stdlib.h> | ||
| + | #include<stdio.h> | ||
| + | #include <math.h> | ||
| + | |||
| + | #include<omp.h> | ||
| + | #include <CL/cl.h> | ||
| + | |||
| + | #include"uth_ocl_intf.h" | ||
| + | |||
| + | #include"./vecadd_host.h" | ||
| + | #include"./hardware_params.h" | ||
| + | |||
| + | #define time_measurments | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | #include"uth_time_intf.h" | ||
| + | static double t_begin, t_end, t_total; | ||
| + | #endif | ||
| + | |||
| + | int execute_kernel_2_CPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | |||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | |||
| + | int i,j,k; | ||
| + | |||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | |||
| + | // create the kernel for CPU | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. Restoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | |||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | |||
| + | int NDRange_dim = 1; | ||
| + | int nr_cores_CPU = omp_get_num_procs(); | ||
| + | int nr_threads = nr_cores_CPU; // total number of threads | ||
| + | int work_group_size = 1; // number of threads in work-group | ||
| + | int nr_work_groups = nr_threads / work_group_size; | ||
| + | globalWorkSize[0] = nr_threads ; | ||
| + | localWorkSize[0] = work_group_size ; | ||
| + | |||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | nr_cores_CPU, nr_cores_CPU, 1); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | work_group_size, nr_threads / work_group_size, | ||
| + | nr_threads / work_group_size / nr_cores_CPU); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
| + | globalWorkSize[0]/nr_cores_CPU); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
| + | |||
| + | |||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | int size = array_size; | ||
| + | int size_per_thread = ceilf((float)size/nr_threads); | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_3_CPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | |||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | |||
| + | int i,j,k; | ||
| + | |||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // create the kernel for CPU | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_3_opt_blocks_kernel","vecadd_3_opt_blocks.cl",monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | |||
| + | int NDRange_dim = 1; | ||
| + | int nr_cores_CPU = omp_get_num_procs(); | ||
| + | int nr_threads = nr_cores_CPU; // total number of threads | ||
| + | int work_group_size = 1; // number of threads in work-group | ||
| + | int nr_work_groups = nr_threads / work_group_size; | ||
| + | globalWorkSize[0] = nr_threads ; | ||
| + | localWorkSize[0] = work_group_size ; | ||
| + | |||
| + | printf("\t\tExecuting kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | nr_cores_CPU, nr_cores_CPU, 1); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | work_group_size, nr_threads / work_group_size, | ||
| + | nr_threads / work_group_size / nr_cores_CPU); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/nr_cores_CPU, | ||
| + | globalWorkSize[0]/nr_cores_CPU); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | array_size, array_size/globalWorkSize[0], array_size/nr_cores_CPU); | ||
| + | |||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | int size = array_size; | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | // tutaj modyfikujemy | ||
| + | int execute_kernel_0_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | |||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | |||
| + | int i,j,k; | ||
| + | |||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // create the kernel for GPU | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | |||
| + | int NDRange_dim = 1; | ||
| + | |||
| + | // globalna liczba wątków i lokalna liczba wątków | ||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | |||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | // ustawiamy liczbę argumentów | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_1_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | |||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | |||
| + | int i,j,k; | ||
| + | |||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // create the kernel for GPU | ||
| + | /* | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
| + | |||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: */ | ||
| + | "vecadd_1_kernel", "vecadd_1.cl", monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | |||
| + | int NDRange_dim = 1; | ||
| + | |||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size / 4; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | |||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_2_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | |||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | |||
| + | int i,j,k; | ||
| + | |||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // create the kernel for GPU | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: */ | ||
| + | "vecadd_2_blocks_kernel", "vecadd_2_blocks.cl", monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | |||
| + | int NDRange_dim = 1; | ||
| + | |||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size / work_group_size; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | |||
| + | int size_per_thread = work_group_size; | ||
| + | int size = array_size; | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | |||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | retval |= clSetKernelArg(kernel, 4, sizeof(int), &size_per_thread); | ||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | int execute_kernel_3_GPU( | ||
| + | int platform_index, | ||
| + | int device_index, | ||
| + | int kernel_index, | ||
| + | int array_size, | ||
| + | cl_mem* memObjects, | ||
| + | int monitor | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | cl_kernel kernel; | ||
| + | cl_event kernel_execution; | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | double exec_time; | ||
| + | cl_int retval; | ||
| + | |||
| + | size_t globalWorkSize[1] = { 0 }; | ||
| + | size_t localWorkSize[1] = { 0 }; | ||
| + | |||
| + | int i,j,k; | ||
| + | |||
| + | int size = array_size; | ||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // create the kernel for GPU | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_0_kernel", "vecadd_0.cl", monitor);*/ | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "vecadd_3_opt_blocks_kernel", "vecadd_3_opt_blocks.cl", monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel: %lf\n", t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \tRestoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | |||
| + | int NDRange_dim = 1; | ||
| + | |||
| + | int work_group_size = WORK_GROUP_SIZE; | ||
| + | globalWorkSize[0] = array_size; | ||
| + | localWorkSize[0] = work_group_size; | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\t\t4. executing kernel %d, on platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | printf("PARAMETERS: \tnr_CU %d, nr_cores %d, nr_cores_per_CU %d\n", | ||
| + | NR_COMP_UNITS, NR_CORES_PER_CU*NR_COMP_UNITS, NR_CORES_PER_CU); | ||
| + | printf("\t\tworkgroup size %d, nr_workgroups %d ,nr_workgroups_per_CU %d\n", | ||
| + | localWorkSize[0], globalWorkSize[0]/localWorkSize[0], | ||
| + | globalWorkSize[0]/localWorkSize[0]/NR_COMP_UNITS); | ||
| + | printf("\t\tnr_threads %d, nr_threads_per_CU %d, nr_threads_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/NR_COMP_UNITS, | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | printf("\t\tarray size %d, nr_entries_per_thread %d, nr_entries_per_core %d\n", | ||
| + | globalWorkSize[0], globalWorkSize[0]/globalWorkSize[0], | ||
| + | globalWorkSize[0]/(NR_CORES_PER_CU*NR_COMP_UNITS)); | ||
| + | } | ||
| + | |||
| + | |||
| + | // choose the kernel | ||
| + | kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | // Set the kernel arguments (result, a, b) | ||
| + | retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); | ||
| + | retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); | ||
| + | retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); | ||
| + | retval |= clSetKernelArg(kernel, 3, sizeof(int), &size); | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to Set the kernel arguments.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // Queue the kernel up for execution across the array | ||
| + | retval = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | ||
| + | globalWorkSize, localWorkSize, | ||
| + | 0, NULL, &kernel_execution); | ||
| + | clWaitForEvents(1, &kernel_execution); | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | clFinish(command_queue); | ||
| + | t_end = time_clock(); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(kernel_execution, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | exec_time = ((double)endTime - (double)startTime)*1.0e-9; | ||
| + | printf("EXECUTION TIME: executing kernel: %lf (profiler: %lf)\n", | ||
| + | t_end-t_begin, ((double)endTime - (double)startTime)*1.0e-9); | ||
| + | printf("\tNumber of operations %d, performance %lf GFlops\n", | ||
| + | array_size, array_size / exec_time * 1e-9); | ||
| + | printf("\tGBytes transferred to processor %lf, speed %lf GB/s\n", | ||
| + | 3*array_size*sizeof(float)*1e-9, | ||
| + | 3*array_size*sizeof(float)/exec_time*1e-9); | ||
| + | #endif | ||
| + | |||
| + | if (retval != CL_SUCCESS) { | ||
| + | printf("Failed to queue kernel for execution.\n"); | ||
| + | //Cleanup(context, command_queue, program, kernel, memObjects); | ||
| + | exit(-1); | ||
| + | } | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | /* // create the fourth kernel for GPU */ | ||
| + | /* kernel_index = 4; */ | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, */ | ||
| + | /* // kernel name: , file: */ | ||
| + | /* "vecadd_4_opt_blocks_vect_kernel", "vecadd_4_opt_blocks_vect.cl", monitor); */</code> | ||
| + | |||
| + | ==== opencl_vecadd/vecadd_2_blocks.cl ==== | ||
| + | <code c> | ||
| + | __kernel void vecadd_2_blocks_kernel(__global const float *a, | ||
| + | __global const float *b, | ||
| + | __global float *result, | ||
| + | const int size, | ||
| + | const int size_per_thread) | ||
| + | { | ||
| + | int gid = get_global_id(0); | ||
| + | int index_start = gid * size_per_thread; | ||
| + | int index_end = (gid+1) * size_per_thread; | ||
| + | |||
| + | if (gid == 0) { | ||
| + | //printf("size: %d, %d", size, size_per_thread); | ||
| + | } | ||
| + | |||
| + | for (int i=index_start; i < index_end && i < size; i++) { | ||
| + | //for (int i=index_start; i < index_end ; i++) { | ||
| + | result[i] = a[i]+b[i]; | ||
| + | } | ||
| + | }</code> | ||
| + | ===== Zajęcia 7 ===== | ||
| + | {{:studia:magisterskie:1sem:zajecia_7.zip|}} | ||
| + | |||
| + | ==== tmp/opencl_mat_transp/mat_transp_1.cl ==== | ||
| + | <code c> #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
| + | |||
| + | __kernel void mat_transp_1_kernel( | ||
| + | __global float *A , | ||
| + | __global float *B , | ||
| + | __global float *C , | ||
| + | int N) | ||
| + | { | ||
| + | |||
| + | int id = get_global_id(0) * get_global_size(0) + get_global_id(1); | ||
| + | float temp = 0.0; | ||
| + | |||
| + | for(int i = 0; i<N;i++){ | ||
| + | temp += A[id*N + i] *B[i]; | ||
| + | } | ||
| + | |||
| + | C[id] = temp; | ||
| + | }</code> | ||
| + | |||
| + | ==== tmp/opencl_mat_transp/mat_transp/mat_transp.c ==== | ||
| + | <code c>#include<stdlib.h> | ||
| + | #include<stdio.h> | ||
| + | #include <math.h> | ||
| + | |||
| + | #include <CL/cl.h> | ||
| + | |||
| + | #include"uth_ocl_intf.h" | ||
| + | |||
| + | #define time_measurments | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | #include"uth_time_intf.h" | ||
| + | static double t_begin, t_end, t_total; | ||
| + | #endif | ||
| + | |||
| + | |||
| + | |||
| + | #define BLOCK_SIZE 16 | ||
| + | #define NR_GROUPS 16 | ||
| + | #define MULT 16 | ||
| + | #define WYMIAR (BLOCK_SIZE*NR_GROUPS*MULT) | ||
| + | #define ROZMIAR (WYMIAR*WYMIAR) | ||
| + | // Matrices are stored in row-major order: | ||
| + | // M(row, col) = M( row * WYMIAR + col ) | ||
| + | |||
| + | int verify_result( | ||
| + | float* result, | ||
| + | float* result_compare | ||
| + | ) | ||
| + | { | ||
| + | // Verify the result | ||
| + | int result_OK = 1; | ||
| + | int i,j; | ||
| + | for(i = 0; i < WYMIAR; i++) { | ||
| + | if(fabs(result[i] - result_compare[i])>1.e-6) { | ||
| + | result_OK = 0; | ||
| + | break; | ||
| + | } | ||
| + | } | ||
| + | printf("\t\t6. verifying results: "); | ||
| + | if(result_OK) { | ||
| + | printf("Output is correct\n"); | ||
| + | } else { | ||
| + | printf("Output is incorrect\n"); | ||
| + | j = 0; | ||
| + | for(i = 0; i < WYMIAR, j<100; i++) { | ||
| + | if(fabs(result[i] - result_compare[i])>1.e-9) { | ||
| + | j++; | ||
| + | printf("%d %16.8f %16.8f\n", | ||
| + | i, result[i], result_compare[i]); | ||
| + | } | ||
| + | } | ||
| + | exit(0); | ||
| + | } | ||
| + | /* for(i = 0; i < length; i++) { */ | ||
| + | /* printf("%16.8f %16.8f\n", result[i], result_compare[i]); */ | ||
| + | /* } */ | ||
| + | |||
| + | return(result_OK); | ||
| + | } | ||
| + | |||
| + | /*----------------KERNEL CREATION PHASE----------------------*/ | ||
| + | void create_kernels() | ||
| + | { | ||
| + | |||
| + | // for all operations indicate explicit info messages | ||
| + | int monitor = UTC_BASIC_INFO + 1; | ||
| + | |||
| + | int kernel_index; | ||
| + | |||
| + | int platform_index = utv_ocl_struct.current_platform_index; | ||
| + | |||
| + | if(utr_ocl_CPU_context_exists(platform_index)){ | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | |||
| + | // create the first kernel for CPU | ||
| + | /* kernel_index = 0; */ | ||
| + | /* utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_CPU, kernel_index, */ | ||
| + | /* // kernel name: , file: */ | ||
| + | /* "mat_transp_1_kernel", "mat_transp_1.cl", monitor); */ | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | } | ||
| + | |||
| + | if(utr_ocl_GPU_context_exists(platform_index)){ | ||
| + | |||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_begin = time_clock(); | ||
| + | #endif | ||
| + | // create the first kernel for GPU | ||
| + | kernel_index = 0; | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "mat_transp_1_kernel", "mat_transp_1.cl", monitor); | ||
| + | |||
| + | #ifdef time_measurments | ||
| + | t_end = time_clock(); | ||
| + | printf("EXECUTION TIME: creating CPU kernel %d: %lf\n", kernel_index, t_end-t_begin); | ||
| + | #endif | ||
| + | |||
| + | // create the second kernel for GPU | ||
| + | kernel_index = 1; | ||
| + | utr_ocl_create_kernel_dev_type( platform_index, UTC_OCL_DEVICE_GPU, kernel_index, | ||
| + | // kernel name: , file: | ||
| + | "mat_transp_2_kernel", "mat_transp_2.cl", monitor); | ||
| + | |||
| + | |||
| + | |||
| + | } | ||
| + | |||
| + | } | ||
| + | |||
| + | /*----------------EXECUTION PHASE----------------------*/ | ||
| + | int execute_kernels() | ||
| + | { | ||
| + | |||
| + | // for all operations indicate explicit info messages | ||
| + | int monitor = UTC_BASIC_INFO + 1; | ||
| + | |||
| + | int platform_index = utv_ocl_struct.current_platform_index; | ||
| + | utt_ocl_platform_struct platform_struct = utv_ocl_struct.list_of_platforms[platform_index]; | ||
| + | |||
| + | int kernel_index; | ||
| + | int i,j,n; | ||
| + | |||
| + | double nr_access; | ||
| + | double t1,t2; | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\n------------Starting execution phase----------------\n"); | ||
| + | } | ||
| + | |||
| + | // create matrices | ||
| + | n=WYMIAR; | ||
| + | float* A = (float *) malloc(ROZMIAR*sizeof(float)); | ||
| + | float* B = (float *) malloc(WYMIAR*sizeof(float)); | ||
| + | float* C = (float *) malloc(WYMIAR*sizeof(float)); | ||
| + | float* D = (float *) malloc(WYMIAR*sizeof(float)); | ||
| + | |||
| + | for(i=0;i<ROZMIAR;i++) A[i]=1.0*i/10000000.0; | ||
| + | |||
| + | for(i=0;i<WYMIAR;i++) B[i]=1.0*i/10000000.0; | ||
| + | |||
| + | // mnozenie macierz wektor | ||
| + | for(i=0; i<WYMIAR; i++){ | ||
| + | for(j=0; j<WYMIAR; j++){ | ||
| + | D[i] += A[i*WYMIAR + j] * B[j]; | ||
| + | } | ||
| + | } | ||
| + | |||
| + | nr_access= ROZMIAR + 2.0*WYMIAR; // read + write | ||
| + | |||
| + | printf("mat_transp: nr_access %lf\n", nr_access); | ||
| + | |||
| + | // get hardware characteristics to select good matrix shape | ||
| + | // the set of device characteristics stored in data structure | ||
| + | int device_index = 0; | ||
| + | utt_ocl_device_struct device_struct = | ||
| + | utv_ocl_struct.list_of_platforms[platform_index].list_of_devices[device_index]; | ||
| + | double global_mem_bytes = device_struct.global_mem_bytes; | ||
| + | double global_max_alloc = device_struct.global_max_alloc; | ||
| + | double local_mem_bytes = device_struct.local_mem_bytes; | ||
| + | double constant_mem_bytes = device_struct.constant_mem_bytes; | ||
| + | int max_num_comp_units = device_struct.max_num_comp_units; | ||
| + | int max_work_group_size = device_struct.max_work_group_size; | ||
| + | |||
| + | // in a loop over devices (or for a selected device) | ||
| + | int idev=0; | ||
| + | for(idev=0; idev<platform_struct.number_of_devices; idev++){ | ||
| + | |||
| + | // int device_type = ..... | ||
| + | // choose device_index | ||
| + | // int device_index = utr_ocl_select_device(platform_index, device_type); | ||
| + | int device_index = idev; | ||
| + | int device_type = utr_ocl_device_type(platform_index, device_index); | ||
| + | |||
| + | if(device_index>0 && device_type==utr_ocl_device_type(platform_index, device_index-1)) break; | ||
| + | if(device_type == UTC_OCL_DEVICE_CPU) break; | ||
| + | |||
| + | // choose the context | ||
| + | cl_context context = utr_ocl_select_context(platform_index, device_index); | ||
| + | |||
| + | // choose the command queue | ||
| + | cl_command_queue command_queue = | ||
| + | utr_ocl_select_command_queue(platform_index, device_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t0. restoring context and command queue for platform %d and device %d\n", | ||
| + | platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL){ | ||
| + | |||
| + | printf("failed to restore context and command queue for platform %d, device %d\n", | ||
| + | platform_index, device_index); | ||
| + | printf("%lu %lu\n", context, command_queue); | ||
| + | } | ||
| + | |||
| + | // choose the kernel | ||
| + | kernel_index = 0; | ||
| + | cl_kernel kernel = utr_ocl_select_kernel(platform_index, device_index, kernel_index); | ||
| + | |||
| + | if(monitor>UTC_BASIC_INFO){ | ||
| + | printf("\nExecution: \t3. restoring kernel %d for platform %d and device %d\n", | ||
| + | kernel_index, platform_index, device_index); | ||
| + | } | ||
| + | |||
| + | if(context == NULL || command_queue == NULL || kernel == NULL){ | ||
| + | |||
| + | printf("failed to restore kernel for platform %d, device %d, kernel %d\n", | ||
| + | platform_index, device_index, kernel_index); | ||
| + | printf("context %lu, command queue %lu, kernel %lu\n", | ||
| + | context, command_queue, kernel); | ||
| + | } | ||
| + | |||
| + | for(i=0;i<WYMIAR;i++) C[i]=0.0; | ||
| + | time_init(); t1 = time_clock(); | ||
| + | |||
| + | // call routine to perform matrix transposition | ||
| + | mat_transp_host(kernel_index, A, B, C, n, context, kernel, command_queue); | ||
| + | |||
| + | t2 = time_clock(); time_print(); | ||
| + | printf("GB/s = %lf\n\n", nr_access*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
| + | |||
| + | |||
| + | // verify result | ||
| + | verify_result(C, D); | ||
| + | |||
| + | |||
| + | } // end loop over devices | ||
| + | |||
| + | return(0); | ||
| + | } | ||
| + | |||
| + | |||
| + | |||
| + | // Matrix transposition - Host code | ||
| + | // Matrix dimensions are assumed to be multiples of BLOCK_SIZE | ||
| + | int mat_transp_host( | ||
| + | int kernel_index, | ||
| + | float* A, | ||
| + | float* B, | ||
| + | float* C, | ||
| + | int N, | ||
| + | const cl_context context, | ||
| + | const cl_kernel mat_transp_kernel, | ||
| + | const cl_command_queue queue | ||
| + | ) | ||
| + | { | ||
| + | |||
| + | // Load A to device memory | ||
| + | size_t size_bytesm = N*N*sizeof(float); | ||
| + | size_t size_bytesw = N*sizeof(float); | ||
| + | |||
| + | |||
| + | cl_mem d_A = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
| + | size_bytesm, NULL, NULL); | ||
| + | |||
| + | // Write A to device memory | ||
| + | clEnqueueWriteBuffer(queue, d_A, CL_TRUE, 0, size_bytesm, A, 0, 0, 0); | ||
| + | |||
| + | cl_mem d_B = clCreateBuffer(context, CL_MEM_READ_ONLY, | ||
| + | size_bytesw, NULL, NULL); | ||
| + | |||
| + | // Write A to device memory | ||
| + | clEnqueueWriteBuffer(queue, d_B, CL_TRUE, 0, size_bytesw, B, 0, 0, 0); | ||
| + | |||
| + | // Allocate B in device memory | ||
| + | cl_mem d_C = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size_bytesw, NULL, NULL); | ||
| + | |||
| + | |||
| + | // Invoke kernel | ||
| + | clSetKernelArg(mat_transp_kernel, 0, sizeof(cl_mem), (void*)&d_A); | ||
| + | clSetKernelArg(mat_transp_kernel, 1, sizeof(cl_mem), (void*)&d_B); | ||
| + | clSetKernelArg(mat_transp_kernel, 2, sizeof(cl_mem), (void*)&d_C); | ||
| + | clSetKernelArg(mat_transp_kernel, 3, sizeof(int), (void*)&N); | ||
| + | |||
| + | size_t localWorkSize[3]; | ||
| + | size_t globalWorkSize[3]; | ||
| + | cl_uint work_dim; | ||
| + | |||
| + | if(kernel_index==3){ | ||
| + | |||
| + | work_dim = 2; | ||
| + | localWorkSize[0] = BLOCK_SIZE; | ||
| + | globalWorkSize[0] = N/MULT; | ||
| + | localWorkSize[1] = BLOCK_SIZE; | ||
| + | globalWorkSize[1] = N/MULT; | ||
| + | localWorkSize[2] = 0; | ||
| + | globalWorkSize[2] = 0; | ||
| + | |||
| + | |||
| + | } | ||
| + | else{ | ||
| + | |||
| + | work_dim = 2; | ||
| + | localWorkSize[0] = BLOCK_SIZE; | ||
| + | globalWorkSize[0] = sqrt(N); | ||
| + | localWorkSize[1] = BLOCK_SIZE; | ||
| + | globalWorkSize[1] = sqrt(N); | ||
| + | localWorkSize[2] = 0; | ||
| + | globalWorkSize[2] = 0; | ||
| + | |||
| + | } | ||
| + | |||
| + | clFinish(queue); | ||
| + | double t1 = time_clock(); | ||
| + | // Enqueue a kernel run call | ||
| + | cl_event ndrEvt; | ||
| + | clEnqueueNDRangeKernel(queue, mat_transp_kernel, work_dim, 0, | ||
| + | globalWorkSize, localWorkSize, 0, 0, &ndrEvt); | ||
| + | clWaitForEvents(1, &ndrEvt); | ||
| + | clFinish(queue); | ||
| + | double t2 = time_clock(); | ||
| + | // Calculate performance | ||
| + | cl_ulong startTime; | ||
| + | cl_ulong endTime; | ||
| + | |||
| + | // Get kernel profiling info | ||
| + | clGetEventProfilingInfo(ndrEvt, | ||
| + | CL_PROFILING_COMMAND_START, | ||
| + | sizeof(cl_ulong), | ||
| + | &startTime, | ||
| + | 0); | ||
| + | clGetEventProfilingInfo(ndrEvt, | ||
| + | CL_PROFILING_COMMAND_END, | ||
| + | sizeof(cl_ulong), | ||
| + | &endTime, | ||
| + | 0); | ||
| + | double time = (double)endTime - (double)startTime; | ||
| + | printf("\nKernel execution internal: time %lf, GB/s = %lf\n", | ||
| + | time*1.0e-9, 2.0*N*N*sizeof(float)/(time*1.0e-9)/1024/1024/1024); | ||
| + | |||
| + | printf("Kernel execution external: time %lf, GB/s = %lf\n\n", | ||
| + | t2-t1, 2.0*N*N*sizeof(float)/(t2-t1)/1024/1024/1024); | ||
| + | |||
| + | // Read B from device memory | ||
| + | clEnqueueReadBuffer(queue, d_C, CL_TRUE, 0, size_bytesw, C, 0, 0, 0); | ||
| + | |||
| + | // Free device memory | ||
| + | clReleaseMemObject(d_A); | ||
| + | clReleaseMemObject(d_B); | ||
| + | clReleaseMemObject(d_C); | ||
| + | |||
| + | return(0); | ||
| + | |||
| + | }</code> | ||
| + | ==== Kod wogu ==== | ||
| + | === server.c ==== | ||
| + | <code c> | ||
| + | /* | ||
| + | C socket server example, handles multiple clients using threads | ||
| + | Compile | ||
| + | gcc server.c -lpthread -o server | ||
| + | */ | ||
| + | |||
| + | #include<stdio.h> | ||
| + | #include<string.h> //strlen | ||
| + | #include<stdlib.h> //strlen | ||
| + | #include<sys/socket.h> | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | #include<unistd.h> //write | ||
| + | #include<pthread.h> //for threading , link with lpthread | ||
| + | #include <unistd.h> | ||
| + | |||
| + | //the thread function | ||
| + | void *connection_handler(void *); | ||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | int counter = 0; | ||
| + | pid_t pid = fork(); | ||
| + | |||
| + | if (pid == 0) | ||
| + | { | ||
| + | int sock; | ||
| + | struct sockaddr_in server; | ||
| + | char message[1000] , server_reply[2000]; | ||
| + | int counter; | ||
| + | unsigned int ip[] = { 192, 168, 102, 1 }; | ||
| + | char* ip_final; | ||
| + | char *buf; | ||
| + | size_t sz; | ||
| + | |||
| + | //Create socket | ||
| + | |||
| + | for ( counter = 0; counter < 255; ++counter ) { | ||
| + | |||
| + | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (sock == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //printf("192.168.102.%d", counter); | ||
| + | |||
| + | sz = snprintf(NULL, 0, "192.168.102.%d", counter); | ||
| + | buf = (char *)malloc(sz + 1); /* make sure you check for != NULL in real code */ | ||
| + | |||
| + | server.sin_addr.s_addr = inet_addr(buf); | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_port = htons( 65001 ); | ||
| + | |||
| + | //Connect to remote server | ||
| + | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | perror("connect failed. Error"); | ||
| + | continue; | ||
| + | } | ||
| + | |||
| + | puts("Connected\n"); | ||
| + | |||
| + | //keep communicating with server | ||
| + | while(1) | ||
| + | { | ||
| + | //Send some data | ||
| + | if( send(sock , "Werset 6-client" , 15 , 0) < 0) | ||
| + | { | ||
| + | puts("Send failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Receive a reply from the server | ||
| + | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
| + | { | ||
| + | puts("recv failed"); | ||
| + | break; | ||
| + | } | ||
| + | |||
| + | puts(server_reply); | ||
| + | |||
| + | break; | ||
| + | } | ||
| + | |||
| + | close(sock); | ||
| + | } | ||
| + | } | ||
| + | else if (pid > 0) | ||
| + | { | ||
| + | int socket_desc , client_sock , c; | ||
| + | struct sockaddr_in server , client; | ||
| + | |||
| + | //Create socket | ||
| + | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (socket_desc == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //Prepare the sockaddr_in structure | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_addr.s_addr = INADDR_ANY; | ||
| + | server.sin_port = htons( 65001 ); | ||
| + | |||
| + | //Bind | ||
| + | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | //print the error message | ||
| + | perror("bind failed. Error"); | ||
| + | return 1; | ||
| + | } | ||
| + | puts("bind done"); | ||
| + | |||
| + | //Listen | ||
| + | listen(socket_desc , 3); | ||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | |||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | pthread_t thread_id; | ||
| + | |||
| + | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
| + | { | ||
| + | puts("Connection accepted"); | ||
| + | |||
| + | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
| + | { | ||
| + | perror("could not create thread"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Now join the thread , so that we dont terminate before the thread | ||
| + | //pthread_join( thread_id , NULL); | ||
| + | puts("Handler assigned"); | ||
| + | } | ||
| + | |||
| + | if (client_sock < 0) | ||
| + | { | ||
| + | perror("accept failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | } | ||
| + | else | ||
| + | { | ||
| + | // fork failed | ||
| + | printf("fork() failed!\n"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | /* | ||
| + | * This will handle connection for each client | ||
| + | * */ | ||
| + | void *connection_handler(void *socket_desc) | ||
| + | { | ||
| + | //Get the socket descriptor | ||
| + | int sock = *(int*)socket_desc; | ||
| + | int read_size; | ||
| + | char *message , client_message[2000]; | ||
| + | |||
| + | //Receive a message from client | ||
| + | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
| + | { | ||
| + | printf("%s\n", client_message); | ||
| + | |||
| + | //end of string marker | ||
| + | client_message[read_size] = '\0'; | ||
| + | |||
| + | //Send the message back to client | ||
| + | write(sock , "Werset 6" , 8); | ||
| + | |||
| + | //clear the message buffer | ||
| + | memset(client_message, 0, 2000); | ||
| + | } | ||
| + | |||
| + | if(read_size == 0) | ||
| + | { | ||
| + | puts("Client disconnected"); | ||
| + | fflush(stdout); | ||
| + | } | ||
| + | else if(read_size == -1) | ||
| + | { | ||
| + | perror("recv failed"); | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | }</code> | ||
| + | === Makefile === | ||
| + | <code bash># kompilator c | ||
| + | CC = gcc | ||
| + | |||
| + | # konsolidator | ||
| + | CFLAGS=-g | ||
| + | |||
| + | all: server client | ||
| + | |||
| + | server: server.o | ||
| + | $(CC) $(LFLAGS) server.o -lpthread -o server | ||
| + | |||
| + | server.o: server.c | ||
| + | $(CC) -c server.c -o server.o | ||
| + | |||
| + | client: client.o | ||
| + | $(CC) client.o -o client | ||
| + | |||
| + | client.o: client.c | ||
| + | $(CC) -c client.c -o client.o | ||
| + | |||
| + | clean: | ||
| + | rm -f *.o</code> | ||
| + | ===== Zajęcia 9 ===== | ||
| + | <code c>/* | ||
| + | C socket server example, handles multiple clients using threads | ||
| + | Compile | ||
| + | gcc server.c -lpthread -o server | ||
| + | */ | ||
| + | |||
| + | #include<stdio.h> | ||
| + | #include<string.h> //strlen | ||
| + | #include<stdlib.h> //strlen | ||
| + | #include<sys/socket.h> | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | #include<unistd.h> //write | ||
| + | #include<pthread.h> //for threading , link with lpthread | ||
| + | |||
| + | //the thread function | ||
| + | void *connection_handler(void *); | ||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | int socket_desc , client_sock , c; | ||
| + | struct sockaddr_in server , client; | ||
| + | |||
| + | //Create socket | ||
| + | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (socket_desc == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //Prepare the sockaddr_in structure | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_addr.s_addr = INADDR_ANY; | ||
| + | server.sin_port = htons( 8888 ); | ||
| + | |||
| + | //Bind | ||
| + | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | //print the error message | ||
| + | perror("bind failed. Error"); | ||
| + | return 1; | ||
| + | } | ||
| + | puts("bind done"); | ||
| + | |||
| + | //Listen | ||
| + | listen(socket_desc , 3); | ||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | |||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | pthread_t thread_id; | ||
| + | |||
| + | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
| + | { | ||
| + | puts("Connection accepted"); | ||
| + | |||
| + | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
| + | { | ||
| + | perror("could not create thread"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Now join the thread , so that we dont terminate before the thread | ||
| + | //pthread_join( thread_id , NULL); | ||
| + | puts("Handler assigned"); | ||
| + | } | ||
| + | |||
| + | if (client_sock < 0) | ||
| + | { | ||
| + | perror("accept failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | /* | ||
| + | * This will handle connection for each client | ||
| + | * */ | ||
| + | void *connection_handler(void *socket_desc) | ||
| + | { | ||
| + | //Get the socket descriptor | ||
| + | int sock = *(int*)socket_desc; | ||
| + | int read_size; | ||
| + | char *message , client_message[2000]; | ||
| + | |||
| + | //Receive a message from client | ||
| + | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
| + | { | ||
| + | //end of string marker | ||
| + | client_message[read_size] = '\0'; | ||
| + | |||
| + | //Send the message back to client | ||
| + | write(sock , "Werset 6" , strlen(client_message)); | ||
| + | |||
| + | //clear the message buffer | ||
| + | memset(client_message, 0, 2000); | ||
| + | } | ||
| + | |||
| + | if(read_size == 0) | ||
| + | { | ||
| + | puts("Client disconnected"); | ||
| + | fflush(stdout); | ||
| + | } | ||
| + | else if(read_size == -1) | ||
| + | { | ||
| + | perror("recv failed"); | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } </code> | ||
| + | |||
| + | ==== Client C ==== | ||
| + | |||
| + | <code c>#include<stdio.h> //printf | ||
| + | #include<string.h> //strlen | ||
| + | #include<sys/socket.h> //socket | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | int sock; | ||
| + | struct sockaddr_in server; | ||
| + | char message[1000] , server_reply[2000]; | ||
| + | |||
| + | //Create socket | ||
| + | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (sock == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | server.sin_addr.s_addr = inet_addr("127.0.0.1"); | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_port = htons( 8888 ); | ||
| + | |||
| + | //Connect to remote server | ||
| + | if (connect(sock , (struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | perror("connect failed. Error"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | puts("Connected\n"); | ||
| + | |||
| + | //keep communicating with server | ||
| + | while(1) | ||
| + | { | ||
| + | printf("Enter message : "); | ||
| + | scanf("%s" , message); | ||
| + | |||
| + | //Send some data | ||
| + | if( send(sock , message , strlen(message) , 0) < 0) | ||
| + | { | ||
| + | puts("Send failed"); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | //Receive a reply from the server | ||
| + | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
| + | { | ||
| + | puts("recv failed"); | ||
| + | break; | ||
| + | } | ||
| + | |||
| + | puts("Server reply :"); | ||
| + | puts(server_reply); | ||
| + | } | ||
| + | |||
| + | close(sock); | ||
| + | return 0; | ||
| + | }</code> | ||
| + | |||
| + | |||
| + | ==== Kod Maciek Gonzo itp ==== | ||
| + | <code c> | ||
| + | #include<stdio.h> | ||
| + | #include<string.h> //strlen | ||
| + | #include<stdlib.h> //strlen | ||
| + | #include<sys/socket.h> | ||
| + | #include<arpa/inet.h> //inet_addr | ||
| + | #include<unistd.h> //write | ||
| + | #include<pthread.h> //for threading , link with lpthread | ||
| + | |||
| + | //the thread function | ||
| + | void *connection_handler(void *); | ||
| + | |||
| + | void *client_function(void *socket_desc){ | ||
| + | int sock; | ||
| + | struct sockaddr_in server_id; | ||
| + | char message[1000] , server_reply[2000]; | ||
| + | |||
| + | //Create socket | ||
| + | sock = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (sock == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | // 192.168.102.63 wogu | ||
| + | server_id.sin_addr.s_addr = inet_addr("localhost"); | ||
| + | server_id.sin_family = AF_INET; | ||
| + | server_id.sin_port = htons( 65000 ); | ||
| + | |||
| + | //Connect to remote server | ||
| + | |||
| + | if (connect(sock , (struct sockaddr *)&server_id , sizeof(server_id)) < 0) | ||
| + | { | ||
| + | perror("connect failed. Error"); | ||
| + | |||
| + | }else{ | ||
| + | |||
| + | puts("Connected\n"); | ||
| + | |||
| + | //keep communicating with server | ||
| + | while(1) | ||
| + | { | ||
| + | printf("Enter message : "); | ||
| + | scanf("%s" , message); | ||
| + | |||
| + | //Send some data | ||
| + | if( send(sock , message , strlen(message) , 0) < 0) | ||
| + | { | ||
| + | puts("Send failed"); | ||
| + | |||
| + | } | ||
| + | |||
| + | //Receive a reply from the server | ||
| + | if( recv(sock , server_reply , 2000 , 0) < 0) | ||
| + | { | ||
| + | puts("recv failed"); | ||
| + | break; | ||
| + | } | ||
| + | |||
| + | puts("Server reply :"); | ||
| + | puts(server_reply); | ||
| + | } | ||
| + | |||
| + | close(sock); | ||
| + | } | ||
| + | |||
| + | }; | ||
| + | |||
| + | |||
| + | void *server_function(void * sock){ | ||
| + | printf("SERVER Start"); | ||
| + | fflush(stdout); | ||
| + | int socket_desc , client_sock , c; | ||
| + | pthread_t thread_id; | ||
| + | struct sockaddr_in server , client; | ||
| + | |||
| + | //Create socket | ||
| + | socket_desc = socket(AF_INET , SOCK_STREAM , 0); | ||
| + | if (socket_desc == -1) | ||
| + | { | ||
| + | printf("Could not create socket"); | ||
| + | } | ||
| + | puts("Socket created"); | ||
| + | |||
| + | //Prepare the sockaddr_in structure | ||
| + | server.sin_family = AF_INET; | ||
| + | server.sin_addr.s_addr = INADDR_ANY; | ||
| + | server.sin_port = htons( 65000); | ||
| + | |||
| + | //Bind | ||
| + | if( bind(socket_desc,(struct sockaddr *)&server , sizeof(server)) < 0) | ||
| + | { | ||
| + | //print the error message | ||
| + | perror("bind failed. Error"); | ||
| + | return; | ||
| + | } | ||
| + | puts("bind done"); | ||
| + | |||
| + | |||
| + | |||
| + | //Listen | ||
| + | listen(socket_desc , 3); | ||
| + | |||
| + | //Accept and incoming connection | ||
| + | puts("Waiting for incoming connections..."); | ||
| + | c = sizeof(struct sockaddr_in); | ||
| + | fflush(stdout); | ||
| + | |||
| + | while( (client_sock = accept(socket_desc, (struct sockaddr *)&client, (socklen_t*)&c)) ) | ||
| + | { | ||
| + | puts("Connection accepted"); | ||
| + | |||
| + | if( pthread_create( &thread_id , NULL , connection_handler , (void*) &client_sock) < 0) | ||
| + | { | ||
| + | perror("could not create thread"); | ||
| + | return; | ||
| + | } | ||
| + | |||
| + | //Now join the thread , so that we dont terminate before the thread | ||
| + | //pthread_join( thread_id , NULL); | ||
| + | puts("Handler assigned"); | ||
| + | } | ||
| + | |||
| + | if (client_sock < 0) | ||
| + | { | ||
| + | perror("accept failed"); | ||
| + | return; | ||
| + | } | ||
| + | } | ||
| + | |||
| + | |||
| + | int main(int argc , char *argv[]) | ||
| + | { | ||
| + | pthread_t thread_id; | ||
| + | //Create server | ||
| + | if( pthread_create( &thread_id , NULL , server_function , NULL) != 0) | ||
| + | { | ||
| + | perror("could not create server thread"); | ||
| + | fflush(stdout); | ||
| + | return 1; | ||
| + | } | ||
| + | pthread_t thread_id2; | ||
| + | |||
| + | // Create client | ||
| + | if( pthread_create( &thread_id2 , NULL , client_function , NULL) != 0) | ||
| + | { | ||
| + | perror("could not create client thread"); | ||
| + | fflush(stdout); | ||
| + | return 1; | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | |||
| + | /* | ||
| + | * This will handle connection for each client | ||
| + | * */ | ||
| + | void *connection_handler(void *socket_desc) | ||
| + | { | ||
| + | //Get the socket descriptor | ||
| + | int sock = *(int*)socket_desc; | ||
| + | int read_size; | ||
| + | char *message , client_message[2000]; | ||
| + | |||
| + | //Receive a message from client | ||
| + | while( (read_size = recv(sock , client_message , 2000 , 0)) > 0 ) | ||
| + | { | ||
| + | //end of string marker | ||
| + | client_message[read_size] = '\0'; | ||
| + | |||
| + | //Send the message back to client | ||
| + | write(sock , "Werset 5" , strlen(client_message)); | ||
| + | |||
| + | //clear the message buffer | ||
| + | memset(client_message, 0, 2000); | ||
| + | } | ||
| + | |||
| + | if(read_size == 0) | ||
| + | { | ||
| + | puts("Client disconnected"); | ||
| + | fflush(stdout); | ||
| + | } | ||
| + | else if(read_size == -1) | ||
| + | { | ||
| + | perror("recv failed"); | ||
| + | } | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| </code> | </code> | ||