To jest stara wersja strony!
# kompilator c CCOMP = mpicc # konsolidator LINK = mpicc MPIRUN = /usr/lib64/openmpi/bin/mpiexec # opcje optymalizacji: # wersja do debugowania # OPT = -g -DDEBUG -p # wersja zoptymalizowana do mierzenia czasu # OPT = -O3 -fopenmp -p # pliki naglowkowe #INC = -I../pomiar_czasu # biblioteki #LIB = -L../pomiar_czasu -lm LIB = -lm # zaleznosci i komendy heat: heat.o $(LINK) $(OPT) heat.o -o heat $(LIB) heat.o: heat.c $(CCOMP) -c $(OPT) heat.c $(INC) run: $(MPIRUN) -np 8 ./heat clean: rm -f *.o
# include <stdlib.h> # include <stdio.h> # include <math.h> # include "mpi.h" int main ( int argc, char *argv[] ); void heat_part ( int n, int p, int id, double x_min, double x_max ); /******************************************************************************/ int main ( int argc, char *argv[] ) { double a = 0.0; // lewy brzeg przedzialu double b = 1.0; // prawy brzeg przedzialu int i; int id; // rank int n; // liczba punktow dla kazdego wezla int p; // size double x_max; double x_min; MPI_Init ( &argc, &argv ); MPI_Comm_rank ( MPI_COMM_WORLD, &id ); MPI_Comm_size ( MPI_COMM_WORLD, &p ); n = 12; // liczba punktow dla kazdego wezla i = 0; // poczatkowa chwila czasu // wspolrzedna lewego punktu dla wezla id x_min = ( ( double )( p * n + 1 - id * n - i ) * a + ( double )( id * n + i ) * b ) / ( double ) ( p * n + 1 ); i = n + 1; // wspolrzedna prawego punktu dla wezla id x_max = ( ( double )( p * n + 1 - id * n - i ) * a + ( double )( id * n + i ) * b ) / ( double )( p * n + 1 ); heat_part ( n, p, id, x_min, x_max ); // obliczenia dla pojedynczego wezla MPI_Finalize ( ); } /******************************************************************************/ // obliczenia dla pojedynczego wezla - pojedynczego podobszaru /******************************************************************************/ void heat_part ( int n, int p, int id, double x_min, double x_max ) { double cfl; double *h; double *h_new; int i; int ierr; int j; int j_max; int j_min; double k; MPI_Status status; double t; double t_del; double t_max; double t_min; int tag; double wtime; double *x; double x_del; h = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i h_new = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i+1 x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // wspolrzedne punktow k = 0.002 / ( double ) p; // przewodniosc cieplna j_min = 0; // indeksy krokow czasowych - min i max j_max = 100; t_min = 0.0; // chwile czasu - min i max t_max = 10.0; t_del = ( t_max - t_min ) / ( double ) ( j_max - j_min ); // krok czasowy Delta t x_del = ( x_max - x_min ) / ( double ) ( n + 1 ); // odstep miedzy punktami for ( i = 0; i <= n + 1; i++ ) { x[i] = ( ( double ) ( i ) * x_max + ( double ) ( n + 1 - i ) * x_min ) / ( double ) ( n + 1 ); } // ustawienie warunku poczatkowego for ( i = 0; i <= n + 1; i++ ) { h[i] = 95.0; } // sprawdzenie stabilnosci schematu cfl = k * t_del / x_del / x_del; if ( 0.5 <= cfl ) { printf ( " CFL condition failed.\n" ); exit ( 1 ); } wtime = MPI_Wtime ( ); //poczatek pomiaru czasu for ( j = 1; j <= j_max; j++ ) { // wymiana informacji z wezlami sasiednimi tag = 1; if ( id < p - 1 ) { MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD ); } if ( 0 < id ) { MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status ); } tag = 2; if ( 0 < id ) { // DO UZUPELNIENIA MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD ); } if ( id < p - 1 ) { // DO UZUPELNIENIA MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status ); } // implementacja wzoru roznicowego for ( i = 1; i <= n; i++ ) { h_new[i] = h[i] + t_del * ( k * ( h[i-1] - 2.0 * h[i] + h[i+1] ) / x_del / x_del + 2.0 * sin ( x[i] * t ) ); } // nowa chwila czasu t = ( ( double ) ( j - j_min ) * t_max + ( double ) ( j_max - j ) * t_min ) / ( double ) ( j_max - j_min ); // przygotowanie do nastepnego kroku czasowego for ( i = 1; i < n + 1; i++ ) { h[i] = h_new[i]; } if ( 0 == id ) h[0] = 100.0 + 10.0 * sin ( t ); if ( id == p - 1 ) h[n+1] = 75; } // koncowa wymiana informacji z wezlami sasiednimi tag = 11; if ( id < p - 1 ) { MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD ); } if ( 0 < id ) { MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status ); } tag = 12; if ( 0 < id ) { // DO UZUPELNIENIA MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD ); } if ( id < p - 1 ) { // DO UZUPELNIENIA MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status ); } wtime = MPI_Wtime ( ) - wtime; if ( id == 0 ) { printf ( "\n" ); printf ( " Wall clock elapsed seconds = %f\n", wtime ); } // wydruk wyniku printf ( "%2d T= %f\n", id, t ); printf ( "%2d X= ", id ); for ( i = 0; i <= n + 1; i++ ) { printf ( "%7.2f", x[i] ); } printf ( "\n" ); printf ( "%2d H= ", id ); for ( i = 0; i <= n + 1; i++ ) { printf ( "%7.2f", h[i] ); } printf ( "\n" ); free ( h ); free ( h_new ); free ( x ); return; }
# include <stdlib.h> # include <stdio.h> # include <math.h> # include "mpi.h" int main ( int argc, char *argv[] ); void heat_part ( int n, int p, int id, double x_min, double x_max ); /******************************************************************************/ int main ( int argc, char *argv[] ) { double a = 0.0; // lewy brzeg przedzialu double b = 1000000.0; // prawy brzeg przedzialu int i; int id; // rank int n; // liczba punktow dla kazdego wezla int p; // size double x_max; double x_min; MPI_Init ( &argc, &argv ); MPI_Comm_rank ( MPI_COMM_WORLD, &id ); MPI_Comm_size ( MPI_COMM_WORLD, &p ); n = 1000000; // liczba punktow dla kazdego wezla i = 0; // poczatkowa chwila czasu // wspolrzedna lewego punktu dla wezla id x_min = ( ( double )( p * n + 1 - id * n - i ) * a + ( double )( id * n + i ) * b ) / ( double ) ( p * n + 1 ); i = n + 1; // wspolrzedna prawego punktu dla wezla id x_max = ( ( double )( p * n + 1 - id * n - i ) * a + ( double )( id * n + i ) * b ) / ( double )( p * n + 1 ); heat_part ( n, p, id, x_min, x_max ); // obliczenia dla pojedynczego wezla MPI_Finalize ( ); } /******************************************************************************/ // obliczenia dla pojedynczego wezla - pojedynczego podobszaru /******************************************************************************/ void heat_part ( int n, int p, int id, double x_min, double x_max ) { double cfl; double *h; double *h_new; int i; int ierr; int j; int j_max; int j_min; double k; MPI_Status status; double t; double t_del; double t_max; double t_min; int tag; double wtime; double *x; double x_del; MPI_Request req1, req2, req3, req4; MPI_Status stat1, stat2, stat3, stat4; h = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i h_new = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i+1 x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // wspolrzedne punktow k = 0.002 / ( double ) p; // przewodniosc cieplna j_min = 0; // indeksy krokow czasowych - min i max j_max = 100; t_min = 0.0; // chwile czasu - min i max t_max = 10.0; t_del = ( t_max - t_min ) / ( double ) ( j_max - j_min ); // krok czasowy Delta t x_del = ( x_max - x_min ) / ( double ) ( n + 1 ); // odstep miedzy punktami for ( i = 0; i <= n + 1; i++ ) { x[i] = ( ( double ) ( i ) * x_max + ( double ) ( n + 1 - i ) * x_min ) / ( double ) ( n + 1 ); } // ustawienie warunku poczatkowego for ( i = 0; i <= n + 1; i++ ) { h[i] = 95.0; } // sprawdzenie stabilnosci schematu cfl = k * t_del / x_del / x_del; if ( 0.5 <= cfl ) { printf ( " CFL condition failed.\n" ); exit ( 1 ); } wtime = MPI_Wtime ( ); //poczatek pomiaru czasu for ( j = 1; j <= j_max; j++ ) { // wymiana informacji z wezlami sasiednimi ///NON BLOCK if ( id > 0 ) { MPI_Irecv ( &h[0], 1, MPI_DOUBLE, id-1, 1, MPI_COMM_WORLD, &req1 ); } if ( id < p - 1 ) { MPI_Irecv ( &h[n+1], 1, MPI_DOUBLE, id+1, 2, MPI_COMM_WORLD, &req2 ); } ///END NON BLOCK if ( id > 0 ) { MPI_Isend ( &h[1], 1, MPI_DOUBLE, id-1, 2, MPI_COMM_WORLD, &req3 ); } if ( id < p - 1 ) { MPI_Isend ( &h[n], 1, MPI_DOUBLE, id+1, 1, MPI_COMM_WORLD, &req4 ); } // implementacja wzoru roznicowego for ( i = 2; i <= n-1; i++ ) { h_new[i] = h[i] + t_del * ( k * ( h[i-1] - 2.0 * h[i] + h[i+1] ) / x_del / x_del + 2.0 * sin ( x[i] * t ) ); } ///NON BLOCK ///WAIT FOR RECEIVE if ( id > 0 ) { MPI_Wait(&req1, &stat1); } if ( id < p - 1 ) { MPI_Wait(&req2, &stat2); } int tmp = 1; h_new[tmp] = h[tmp] + t_del * ( k * ( h[tmp-1] - 2.0 * h[tmp] + h[tmp+1] ) / x_del / x_del + 2.0 * sin ( x[tmp] * t ) ); tmp = n; h_new[tmp] = h[tmp] + t_del * ( k * ( h[tmp-1] - 2.0 * h[tmp] + h[tmp+1] ) / x_del / x_del + 2.0 * sin ( x[tmp] * t ) ); ///WAIT FOR SEND if ( id > 0 ) { MPI_Wait(&req3, &stat3); } if ( id < p - 1 ) { MPI_Wait(&req4, &stat4); } ///END NON BLOCK // nowa chwila czasu t = ( ( double ) ( j - j_min ) * t_max + ( double ) ( j_max - j ) * t_min ) / ( double ) ( j_max - j_min ); // przygotowanie do nastepnego kroku czasowego for ( i = 1; i < n + 1; i++ ) { h[i] = h_new[i]; } if ( 0 == id ) h[0] = 100.0 + 10.0 * sin ( t ); if ( id == p - 1 ) h[n+1] = 75; } // koncowa wymiana informacji z wezlami sasiednimi tag = 11; if ( id < p - 1 ) { MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD ); } if ( 0 < id ) { MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status ); } tag = 12; if ( 0 < id ) { // DO UZUPELNIENIA MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD ); } if ( id < p - 1 ) { // DO UZUPELNIENIA MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status ); } wtime = MPI_Wtime ( ) - wtime; if ( id == 0 ) { printf ( "\n" ); printf ( " Wall clock elapsed seconds = %f\n", wtime ); } // wydruk wyniku /*printf ( "%2d T= %f\n", id, t ); printf ( "%2d X= ", id ); for ( i = 0; i <= n + 1; i++ ) { printf ( "%7.2f", x[i] ); } printf ( "\n" ); printf ( "%2d H= ", id ); for ( i = 0; i <= n + 1; i++ ) { printf ( "%7.2f", h[i] ); } printf ( "\n" ); */ free ( h ); free ( h_new ); free ( x ); return; }
# include <stdlib.h> # include <stdio.h> # include <math.h> # include "mpi.h" int main ( int argc, char *argv[] ); void heat_part ( int n, int p, int id, double x_min, double x_max ); /******************************************************************************/ int main ( int argc, char *argv[] ) { int id; // rank int n; // liczba punktow dla kazdego wezla int p; // size double x_max; double x_min; MPI_Comm parentcomm; MPI_Status status; MPI_Init ( &argc, &argv ); MPI_Comm_rank ( MPI_COMM_WORLD, &id ); MPI_Comm_size ( MPI_COMM_WORLD, &p ); MPI_Comm_get_parent(&parentcomm); MPI_Recv ( &x_min, 1, MPI_DOUBLE, 0, 1, parentcomm, &status ); MPI_Recv ( &x_max, 1, MPI_DOUBLE, 0, 2, parentcomm, &status ); MPI_Recv ( &n, 1, MPI_INT, 0, 3, parentcomm, &status ); //MPI_Barrier(MPI_COMM_WORLD); heat_part ( n, p, id, x_min, x_max ); // obliczenia dla pojedynczego wezla MPI_Finalize ( ); return 0; } /******************************************************************************/ // obliczenia dla pojedynczego wezla - pojedynczego podobszaru /******************************************************************************/ void heat_part ( int n, int p, int id, double x_min, double x_max ) { double cfl; double *h; double *h_new; int i; int ierr; int j; int j_max; int j_min; double k; MPI_Status status; double t; double t_del; double t_max; double t_min; int tag; double wtime; double *x; double x_del; h = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i h_new = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i+1 x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // wspolrzedne punktow k = 0.002 / ( double ) p; // przewodniosc cieplna j_min = 0; // indeksy krokow czasowych - min i max j_max = 100; t_min = 0.0; // chwile czasu - min i max t_max = 10.0; t_del = ( t_max - t_min ) / ( double ) ( j_max - j_min ); // krok czasowy Delta t x_del = ( x_max - x_min ) / ( double ) ( n + 1 ); // odstep miedzy punktami for ( i = 0; i <= n + 1; i++ ) { x[i] = ( ( double ) ( i ) * x_max + ( double ) ( n + 1 - i ) * x_min ) / ( double ) ( n + 1 ); } // ustawienie warunku poczatkowego for ( i = 0; i <= n + 1; i++ ) { h[i] = 95.0; } // sprawdzenie stabilnosci schematu cfl = k * t_del / x_del / x_del; if ( 0.5 <= cfl ) { printf ( " CFL condition failed.\n" ); exit ( 1 ); } wtime = MPI_Wtime ( ); //poczatek pomiaru czasu for ( j = 1; j <= j_max; j++ ) { // wymiana informacji z wezlami sasiednimi tag = 1; if ( id < p - 1 ) { MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD ); } if ( 0 < id ) { MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status ); } tag = 2; if ( 0 < id ) { MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD ); } if ( id < p - 1 ) { MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status ); } // implementacja wzoru roznicowego for ( i = 1; i <= n; i++ ) { h_new[i] = h[i] + t_del * ( k * ( h[i-1] - 2.0 * h[i] + h[i+1] ) / x_del / x_del + 2.0 * sin ( x[i] * t ) ); } // nowa chwila czasu t = ( ( double ) ( j - j_min ) * t_max + ( double ) ( j_max - j ) * t_min ) / ( double ) ( j_max - j_min ); // przygotowanie do nastepnego kroku czasowego for ( i = 1; i < n + 1; i++ ) { h[i] = h_new[i]; } if ( 0 == id ) h[0] = 100.0 + 10.0 * sin ( t ); if ( id == p - 1 ) h[n+1] = 75; } // koncowa wymiana informacji z wezlami sasiednimi tag = 11; if ( id < p - 1 ) { MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD ); } if ( 0 < id ) { MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status ); } tag = 12; if ( 0 < id ) { // DO UZUPELNIENIA MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD ); } if ( id < p - 1 ) { // DO UZUPELNIENIA MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status ); } wtime = MPI_Wtime ( ) - wtime; if ( id == 0 ) { printf ( "\n" ); printf ( " Wall clock elapsed seconds = %f\n", wtime ); } // wydruk wyniku printf ( "%2d T= %f\n", id, t ); printf ( "%2d X= ", id ); for ( i = 0; i <= n + 1; i++ ) { printf ( "%7.2f", x[i] ); } printf ( "\n" ); printf ( "%2d H= ", id ); for ( i = 0; i <= n + 1; i++ ) { printf ( "%7.2f", h[i] ); } printf ( "\n" ); free ( h ); free ( h_new ); free ( x ); return; }
# include <stdlib.h> # include <stdio.h> # include <math.h> # include "mpi.h" int main ( int argc, char *argv[] ); /******************************************************************************/ int main ( int argc, char *argv[] ) { double a = 0.0; // lewy brzeg przedzialu double b = 1.0; // prawy brzeg przedzialu int i; int id; // rank int n; // liczba punktow dla kazdego wezla int p; // size int np = 8; // liczba procesow double x_max; double x_min; MPI_Comm komunikator; int *errcodes; MPI_Init ( &argc, &argv ); //MPI_Comm_rank ( MPI_COMM_WORLD, &id ); //MPI_Comm_size ( MPI_COMM_WORLD, &p ); MPI_Comm_spawn("dziecko", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &komunikator, errcodes); n = 12; // liczba punktow dla kazdego wezla int j; for(j=0;j<np;j++){ i = 0; // poczatkowa chwila czasu // wspolrzedna lewego punktu dla wezla id x_min = ( ( double )( np * n + 1 - j * n - i ) * a + ( double )( j * n + i ) * b ) / ( double ) ( np * n + 1 ); i = n + 1; // wspolrzedna prawego punktu dla wezla id x_max = ( ( double )( np * n + 1 - j * n - i ) * a + ( double )( j * n + i ) * b ) / ( double )( np * n + 1 ); MPI_Send ( &x_min, 1, MPI_DOUBLE, j, 1, komunikator ); MPI_Send ( &x_max, 1, MPI_DOUBLE, j, 2, komunikator ); MPI_Send ( &n, 1, MPI_INT, j, 3, komunikator ); } MPI_Finalize ( ); return 0; } struct rekord wys; MPI_Datatype rekord_typ; int tab_dlug_blokow[3] = {1, 1, 1}; MPI_Datatype tab_typow[3] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT}; MPI_Aint podstawa, tab_odstepow[3]; MPI_Get_address(&wys.x_min, &tab_odstepow[0]); MPI_Get_address(&wys.x_max, &tab_odstepow[1]); MPI_Get_address(&wys.n, &tab_odstepow[2]); MPI_Type_struct(3, tab_dlug_blokow, tab_odstepow, tab_typow, &rekord_typ); MPI_Type_commit(&rekord_typ); MPI_Recv ( &wys, 1, rekord_typ, 0, 1, parentcomm, &status );
wkrótce
# optimization and other system dependent options #include make.$(SRR_ARCH) # or directly include make.lab_404_NVIDIA NAME = Hello_GPU program: main.o $(CC) $(LDFL) main.o $(LIB) -o $(NAME) main.o: main.c $(CC) $(CFL) -c main.c $(INC) -o main.o clean: rm -f obj/* rm -f $(NAME)
# C compiler #CC = icc CC = gcc # C++ compiler CPPC = icpc #CPPC = g++ # Loader (to link C/C++ and Fortran libraries) LD = icpc #LD = g++ # Archiver AR = ar r # For removing files RM = rm -f # Include directories INC = -I/opt/cuda7/include # Standard and/or local libraries LIB = -L/opt/cuda7/lib64 -lOpenCL # C optimization and other flags #CFL(icc) = -O3 -openmp #CFL(gcc) = -O3 -fopenmp #CFL(icc,debug) = -g #CFL(gcc) = -g CFL = # Loader optimization and other flags #LDFL(debug) = -g #LDFL(profile) = -p LDFL =
#include<stdlib.h> #include<stdio.h> #include <CL/cl.h> // functions to display platform and device properties void DisplayPlatformInfo( cl_platform_id id, cl_platform_info name, char* str) { cl_int retval; size_t paramValueSize; retval = clGetPlatformInfo( id, name, 0, NULL, ¶mValueSize); if (retval != CL_SUCCESS){ printf("Failed to find OpenCL platform %s.\n", str); return; } char * info = (char *)malloc(sizeof(char) * paramValueSize); retval = clGetPlatformInfo( id, name, paramValueSize, info, NULL); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL platform %s.\n", str); return; } printf("\t%s:\t%s\n", str, info ); free(info); } void DisplayDeviceInfo_char( cl_device_id id, cl_device_info name, char* str) { cl_int retval; size_t paramValueSize; retval = clGetDeviceInfo( id, name, 0, NULL, ¶mValueSize); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL device info %s.\n", str); return; } char * info = (char *)malloc(sizeof(char) * paramValueSize); retval = clGetDeviceInfo( id, name, paramValueSize, info, NULL); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL device info %s.\n", str); return; } printf("\t\t%s:\t%s\n", str, info ); free(info); }; void DisplayDeviceInfo_ulong( cl_device_id id, cl_device_info name, char* str) { cl_int retval; size_t paramValueSize;// = sizeof(cl_ulong); retval = clGetDeviceInfo( id, name, 0, NULL, ¶mValueSize); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL device info %s.\n", str); return; } cl_ulong * info = (cl_ulong *)malloc(sizeof(cl_ulong) * paramValueSize); retval = clGetDeviceInfo( id, name, paramValueSize, info, NULL); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL device info %s.\n", str); return; } printf("\t\t%s:\t%lu MB\n", str, info[0] / 1024 / 1024 ); free(info); }; void DisplayDeviceInfo_uint( cl_device_id id, cl_device_info name, char* str) { cl_int retval; size_t paramValueSize;// = sizeof(cl_ulong); retval = clGetDeviceInfo( id, name, 0, NULL, ¶mValueSize); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL device info %s.\n", str); return; } cl_uint * info = (cl_uint *)malloc(sizeof(cl_uint) * paramValueSize); retval = clGetDeviceInfo( id, name, paramValueSize, info, NULL); if (retval != CL_SUCCESS) { printf("Failed to find OpenCL device info %s.\n", str); return; } printf("\t\t%s:\t%d MHz\n", str, info[0] ); free(info); }; // main program controlling execution of CPU code and OpenCL kernels int main(int argc, char** argv) { cl_uint number_of_contexts = 2; cl_context context = NULL; cl_context list_of_contexts[2] = {0,0}; cl_command_queue commandQueue = 0; cl_program program = 0; cl_uint number_of_devices; cl_device_id device = 0; cl_device_id *list_of_devices; cl_device_type type; cl_kernel kernel = 0; cl_mem memObjects[3] = { 0, 0, 0 }; cl_int retval; int icon, idev; cl_uint numPlatforms; cl_platform_id * platformIds; cl_uint i,j; // flag to control displaying int Monitor = 1; // Create OpenCL contexts // First, query the total number of platforms retval = clGetPlatformIDs(0, (cl_platform_id *) NULL, &numPlatforms); // Next, allocate memory for the installed plaforms, and qeury // to get the list. platformIds = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms); // Then, query the platform IDs retval = clGetPlatformIDs(numPlatforms, platformIds, NULL); if(Monitor>=0){ printf("Number of platforms: \t%d\n", numPlatforms); } // Iterate through the list of platforms displaying associated information for (i = 0; i < numPlatforms; i++) { if(Monitor>0){ printf("Platform ID - %d\n",i); // First we display information associated with the platform DisplayPlatformInfo( platformIds[i], CL_PLATFORM_NAME, "CL_PLATFORM_NAME"); DisplayPlatformInfo( platformIds[i], CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE"); DisplayPlatformInfo( platformIds[i], CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION"); DisplayPlatformInfo( platformIds[i], CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR"); } } // For the first platform int iplat; printf("Select Platform ID: "); scanf("%d", &iplat); // Query the set of devices associated with the platform retval = clGetDeviceIDs( platformIds[iplat], CL_DEVICE_TYPE_ALL, 0, NULL, &number_of_devices); list_of_devices = (cl_device_id *) malloc (sizeof(cl_device_id) * number_of_devices); retval = clGetDeviceIDs( platformIds[iplat], CL_DEVICE_TYPE_ALL, number_of_devices, list_of_devices, NULL); if(Monitor>=0){ printf("Number of devices: \t%d\n", number_of_devices); } // Iterate through each device, displaying associated information for (j = 0; j < number_of_devices; j++) { clGetDeviceInfo(list_of_devices[j], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); if(Monitor>0){ DisplayDeviceInfo_char( list_of_devices[j], CL_DEVICE_NAME, "CL_DEVICE_NAME"); DisplayDeviceInfo_char( list_of_devices[j], CL_DEVICE_VENDOR, "CL_DEVICE_VENDOR"); DisplayDeviceInfo_char( list_of_devices[j], CL_DEVICE_VERSION, "CL_DEVICE_VERSION"); DisplayDeviceInfo_ulong( list_of_devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, "CL_DEVICE_GLOBAL_MEM_SIZE"); DisplayDeviceInfo_uint( list_of_devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, "CL_DEVICE_MAX_CLOCK_FREQUENCY"); printf("\n"); } } // Next, create OpenCL contexts on platforms cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformIds[iplat], 0 }; if(Monitor>0){ printf("Creating CPU context %d on platform %d\n", 1, iplat); } list_of_contexts[1] = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &retval); if(Monitor>=0 && retval != CL_SUCCESS){ printf("Could not create CPU context on platform %d\n", i); } if(Monitor>0){ printf("Creating GPU context 0 on platform %d\n", iplat); } list_of_contexts[0] = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &retval); if(Monitor>=0 && retval != CL_SUCCESS){ printf("Could not create GPU context on platform %d\n", i); } // in a loop over devices of the seleceted platform for(idev=0; idev<number_of_devices;idev++){ if(Monitor>0){ printf("\nFor context %d and device %d:\n", idev, idev); } device = list_of_devices[idev]; icon = idev; // choose OpenCL context on first available platform context = list_of_contexts[icon]; if(context !=0){ commandQueue = clCreateCommandQueue(context, device, 0, NULL); if (commandQueue == NULL) { printf("Failed to create commandQueue for device %d\n", idev); exit(0); } if(Monitor>0){ printf("Reading program from source\n"); } // read source code from file FILE *fp; char* source; long int size; fp = fopen("HelloWorld.cl", "rb"); if(!fp) { printf("Could not open kernel file\n"); exit(-1); } int status = fseek(fp, 0, SEEK_END); if(status != 0) { printf("Error seeking to end of file\n"); exit(-1); } size = ftell(fp); if(size < 0) { printf("Error getting file position\n"); exit(-1); } rewind(fp); source = (char *)malloc(size + 1); int i; for (i = 0; i < size+1; i++) { source[i]='\0'; } if(source == NULL) { printf("Error allocating space for the kernel source\n"); exit(-1); } fread(source, 1, size, fp); source[size] = '\0'; const char* source_const = source; cl_program program = clCreateProgramWithSource(context, 1, &source_const, NULL, NULL); if (program == NULL) { printf("Failed to create CL program from source.\n"); exit(-1); } if(Monitor>0){ printf("Creating program and kernel\n"); } // build program (passing options to compiler if necessary retval = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); char* buildLog; size_t size_of_buildLog; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_of_buildLog); buildLog = malloc(size_of_buildLog+1); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, size_of_buildLog, buildLog, NULL); buildLog[size_of_buildLog]= '\0'; printf("Kernel buildLog: %s\n", buildLog); if (retval != CL_SUCCESS) { printf("Error in kernel\n"); clReleaseProgram(program); exit(-1); } // Create OpenCL kernel kernel = clCreateKernel(program, "hello_kernel", NULL); if (kernel == NULL) { printf("Failed to create kernel.\n"); exit(0); } if(Monitor>0){ printf("Creating memory objects\n"); } // Create memory objects that will be used as arguments to // kernel. First create host memory arrays that will be // used to store the arguments to the kernel float result[1]; float a[1]; float b[1]; a[0] = 2; b[0] = 2; memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float), NULL, NULL); memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float), NULL, NULL); memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float), NULL, NULL); if (memObjects[0]==NULL || memObjects[1]==NULL || memObjects[2]==NULL){ printf("Error creating memory objects.\n"); return 0; } if(Monitor>0){ printf("Sending kernel arguments\n"); } retval = clEnqueueWriteBuffer( commandQueue, memObjects[0], CL_FALSE, 0, sizeof(float), a, 0, NULL, NULL); // Use clEnqueueWriteBuffer() to write input array B to // the device buffer bufferB retval = clEnqueueWriteBuffer( commandQueue, memObjects[1], CL_FALSE, 0, sizeof(float), b, 0, NULL, NULL); // Set the kernel arguments (result, a, b) retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); if (retval != CL_SUCCESS) { printf("Failed to Set the kernel arguments.\n"); //Cleanup(context, commandQueue, program, kernel, memObjects); return 1; } if(Monitor>0){ printf("Running the kernel!\n"); } size_t globalWorkSize[1] = { 1 }; size_t localWorkSize[1] = { 1 }; // Queue the kernel up for execution across the array retval = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); if (retval != CL_SUCCESS) { printf("Failed to queue kernel for execution.\n"); //Cleanup(context, commandQueue, program, kernel, memObjects); return 1; } if(Monitor>0){ printf("Transfering back results\n"); } // Read the output buffer back to the Host retval = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, sizeof(float), result, 0, NULL, NULL); if (retval != CL_SUCCESS) { printf("Failed to read result buffer.\n"); //Cleanup(context, commandQueue, program, kernel, memObjects); return 1; } // Verify the output if(result[0]==4) { printf("Output is correct: %lf + %lf = %lf\n", a[0], b[0], result[0]); } else { printf("Output is incorrect: %lf + %lf != %lf\n", a[0], b[0], result[0]); } for (i = 0; i < 3; i++) { if (memObjects[i] != 0) clReleaseMemObject(memObjects[i]); } if (commandQueue != 0) clReleaseCommandQueue(commandQueue); if (kernel != 0) clReleaseKernel(kernel); if (program != 0) clReleaseProgram(program); } } free(list_of_devices); free(platformIds); return 0; }