Zajęcia 1

Makefile

# kompilator c
CCOMP = mpicc
 
# konsolidator
LINK = mpicc
 
MPIRUN = /usr/lib64/openmpi/bin/mpiexec 
 
# opcje optymalizacji:
# wersja do debugowania
# OPT = -g -DDEBUG -p
# wersja zoptymalizowana do mierzenia czasu
# OPT = -O3 -fopenmp -p
 
# pliki naglowkowe
#INC = -I../pomiar_czasu
 
# biblioteki
#LIB = -L../pomiar_czasu -lm
LIB = -lm
 
# zaleznosci i komendy
 
heat: heat.o
	$(LINK) $(OPT) heat.o -o heat $(LIB)
 
heat.o: heat.c
	$(CCOMP) -c $(OPT) heat.c $(INC)
 
run:
	$(MPIRUN) -np 8 ./heat
 
clean:
	rm -f *.o

heat.c

# include <stdlib.h>
# include <stdio.h>
# include <math.h>
 
# include "mpi.h"
 
int main ( int argc, char *argv[] );
void heat_part ( int n, int p, int id, double x_min, double x_max );
 
/******************************************************************************/
 
int main ( int argc, char *argv[] )
{
  double a = 0.0; // lewy brzeg przedzialu
  double b = 1.0; // prawy brzeg przedzialu
  int i;  
  int id; // rank
  int n;  // liczba punktow dla kazdego wezla
  int p;  // size
  double x_max;
  double x_min;
 
  MPI_Init ( &argc, &argv );
 
  MPI_Comm_rank ( MPI_COMM_WORLD, &id );
 
  MPI_Comm_size ( MPI_COMM_WORLD, &p );
 
  n = 12; // liczba punktow dla kazdego wezla 
  i = 0;  // poczatkowa chwila czasu
 
  // wspolrzedna lewego punktu dla wezla id
  x_min = ( ( double )( p * n + 1 - id * n - i ) * a   
          + ( double )(             id * n + i ) * b ) 
          / ( double ) ( p * n + 1              );
 
  i = n + 1;
 
  // wspolrzedna prawego punktu dla wezla id
  x_max = ( ( double )( p * n + 1 - id * n - i ) * a   
          + ( double )(             id * n + i ) * b ) 
          / ( double )( p * n + 1              );
 
  heat_part ( n, p, id, x_min, x_max ); // obliczenia dla pojedynczego wezla
 
  MPI_Finalize ( );
}
 
/******************************************************************************/
// obliczenia dla pojedynczego wezla - pojedynczego podobszaru
/******************************************************************************/
void heat_part ( int n, int p, int id, double x_min, double x_max )
{
  double cfl;
  double *h;
  double *h_new;
  int i;
  int ierr;
  int j;
  int j_max;
  int j_min;
  double k;
  MPI_Status status;
  double t;
  double t_del;
  double t_max;
  double t_min;
  int tag;
  double wtime;
  double *x;
  double x_del;
 
  h = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i
  h_new = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i+1
  x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // wspolrzedne punktow
 
  k = 0.002 / ( double ) p; // przewodniosc cieplna
 
  j_min = 0; // indeksy krokow czasowych - min i max
  j_max = 100;
  t_min = 0.0; // chwile czasu - min i max
  t_max = 10.0;
  t_del = ( t_max - t_min ) / ( double ) ( j_max - j_min ); // krok czasowy Delta t
 
  x_del = ( x_max - x_min ) / ( double ) ( n + 1 ); // odstep miedzy punktami
  for ( i = 0; i <= n + 1; i++ )
  {
    x[i] = ( ( double ) (         i ) * x_max   
           + ( double ) ( n + 1 - i ) * x_min ) 
           / ( double ) ( n + 1     );
  }
 
  // ustawienie warunku poczatkowego
  for ( i = 0; i <= n + 1; i++ )
  {
    h[i] = 95.0;
  }
 
  // sprawdzenie stabilnosci schematu
  cfl = k * t_del / x_del / x_del;
 
  if ( 0.5 <= cfl )
  {
    printf ( "  CFL condition failed.\n" );
    exit ( 1 );
  }
 
  wtime = MPI_Wtime ( ); //poczatek pomiaru czasu
 
  for ( j = 1; j <= j_max; j++ )
  {
 
    // wymiana informacji z wezlami sasiednimi
    tag = 1;
 
    if ( id < p - 1 )
    {
      MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD );
    }
 
    if ( 0 < id )
    {
      MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status );
    }
 
    tag = 2;
 
    if ( 0 < id )
    {
      // DO UZUPELNIENIA
	MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD );
    }
 
    if ( id < p - 1 )
    {
      // DO UZUPELNIENIA   
	MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status );  
    }
 
 
    // implementacja wzoru roznicowego
    for ( i = 1; i <= n; i++ )
    {
      h_new[i] = h[i] + t_del * ( 
        k * ( h[i-1] - 2.0 * h[i] + h[i+1] ) / x_del / x_del 
        + 2.0 * sin ( x[i] * t ) );
    }
 
    // nowa chwila czasu
    t = ( ( double ) (         j - j_min ) * t_max   
            + ( double ) ( j_max - j         ) * t_min ) 
            / ( double ) ( j_max     - j_min );
 
    // przygotowanie do nastepnego kroku czasowego
    for ( i = 1; i < n + 1; i++ )
    {
      h[i] = h_new[i];
    }
    if ( 0 == id ) h[0] = 100.0 + 10.0 * sin ( t );
    if ( id == p - 1 ) h[n+1] = 75;
 
  }
 
  // koncowa wymiana informacji z wezlami sasiednimi
  tag = 11;
 
  if ( id < p - 1 ) {
    MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD );
  }
 
  if ( 0 < id ) {
    MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status );
  }
 
  tag = 12;
 
  if ( 0 < id ) {
    // DO UZUPELNIENIA
    MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD );
  }
 
  if ( id < p - 1 ) {
    // DO UZUPELNIENIA
    MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status );      
  }
 
  wtime = MPI_Wtime ( ) - wtime;
 
  if ( id == 0 )
  {
    printf ( "\n" );
    printf ( "  Wall clock elapsed seconds = %f\n", wtime );
  }
 
  // wydruk wyniku
  printf ( "%2d  T= %f\n", id, t );
  printf ( "%2d  X= ", id );
  for ( i = 0; i <= n + 1; i++ )
  {
    printf ( "%7.2f", x[i] );
  }
  printf ( "\n" );
  printf ( "%2d  H= ", id );
  for ( i = 0; i <= n + 1; i++ )
  {
    printf ( "%7.2f", h[i] );
  }
  printf ( "\n" );
 
  free ( h );
  free ( h_new );
  free ( x );
 
  return;
}

Zajęcia 2

heat.c

# include <stdlib.h>
# include <stdio.h>
# include <math.h>
 
# include "mpi.h"
 
int main ( int argc, char *argv[] );
void heat_part ( int n, int p, int id, double x_min, double x_max );
 
/******************************************************************************/
 
int main ( int argc, char *argv[] )
{
  double a = 0.0; // lewy brzeg przedzialu
  double b = 1000000.0; // prawy brzeg przedzialu
  int i;  
  int id; // rank
  int n;  // liczba punktow dla kazdego wezla
  int p;  // size
  double x_max;
  double x_min;
 
  MPI_Init ( &argc, &argv );
 
  MPI_Comm_rank ( MPI_COMM_WORLD, &id );
 
  MPI_Comm_size ( MPI_COMM_WORLD, &p );
 
  n = 1000000; // liczba punktow dla kazdego wezla 
  i = 0;  // poczatkowa chwila czasu
 
  // wspolrzedna lewego punktu dla wezla id
  x_min = ( ( double )( p * n + 1 - id * n - i ) * a   
          + ( double )(             id * n + i ) * b ) 
          / ( double ) ( p * n + 1              );
 
  i = n + 1;
 
  // wspolrzedna prawego punktu dla wezla id
  x_max = ( ( double )( p * n + 1 - id * n - i ) * a   
          + ( double )(             id * n + i ) * b ) 
          / ( double )( p * n + 1              );
 
  heat_part ( n, p, id, x_min, x_max ); // obliczenia dla pojedynczego wezla
 
  MPI_Finalize ( );
 
 
}
 
/******************************************************************************/
// obliczenia dla pojedynczego wezla - pojedynczego podobszaru
/******************************************************************************/
void heat_part ( int n, int p, int id, double x_min, double x_max )
{
  double cfl;
  double *h;
  double *h_new;
  int i;
  int ierr;
  int j;
  int j_max;
  int j_min;
  double k;
  MPI_Status status;
  double t;
  double t_del;
  double t_max;
  double t_min;
  int tag;
  double wtime;
  double *x;
  double x_del;
 
  MPI_Request req1, req2, req3, req4;
  MPI_Status stat1, stat2, stat3, stat4;
 
  h = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i
  h_new = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i+1
  x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // wspolrzedne punktow
 
  k = 0.002 / ( double ) p; // przewodniosc cieplna
 
  j_min = 0; // indeksy krokow czasowych - min i max
  j_max = 100;
  t_min = 0.0; // chwile czasu - min i max
  t_max = 10.0;
  t_del = ( t_max - t_min ) / ( double ) ( j_max - j_min ); // krok czasowy Delta t
 
  x_del = ( x_max - x_min ) / ( double ) ( n + 1 ); // odstep miedzy punktami
  for ( i = 0; i <= n + 1; i++ )
  {
    x[i] = ( ( double ) (         i ) * x_max   
           + ( double ) ( n + 1 - i ) * x_min ) 
           / ( double ) ( n + 1     );
  }
 
  // ustawienie warunku poczatkowego
  for ( i = 0; i <= n + 1; i++ )
  {
    h[i] = 95.0;
  }
 
  // sprawdzenie stabilnosci schematu
  cfl = k * t_del / x_del / x_del;
 
  if ( 0.5 <= cfl )
  {
    printf ( "  CFL condition failed.\n" );
    exit ( 1 );
  }
 
  wtime = MPI_Wtime ( ); //poczatek pomiaru czasu
 
  for ( j = 1; j <= j_max; j++ )
  {
 
    // wymiana informacji z wezlami sasiednimi
 
    ///NON BLOCK
    if ( id > 0 )
    {
        MPI_Irecv ( &h[0], 1, MPI_DOUBLE, id-1, 1, MPI_COMM_WORLD, &req1 );
    }
    if ( id < p - 1 )
    { 
	MPI_Irecv ( &h[n+1], 1, MPI_DOUBLE, id+1, 2, MPI_COMM_WORLD, &req2 );
    }
    ///END NON BLOCK
 
    if ( id > 0 )
    {
	MPI_Isend ( &h[1], 1, MPI_DOUBLE, id-1, 2, MPI_COMM_WORLD, &req3 );
    }
    if ( id < p - 1 )
    {
        MPI_Isend ( &h[n], 1, MPI_DOUBLE, id+1, 1, MPI_COMM_WORLD, &req4 );
    }
 
 
 
    // implementacja wzoru roznicowego
    for ( i = 2; i <= n-1; i++ )
    {
      h_new[i] = h[i] + t_del * ( 
        k * ( h[i-1] - 2.0 * h[i] + h[i+1] ) / x_del / x_del 
        + 2.0 * sin ( x[i] * t ) );
    }
 
 
    ///NON BLOCK 
    ///WAIT FOR RECEIVE
    if ( id > 0 ) 
    {
      MPI_Wait(&req1, &stat1); 
    }
    if ( id < p - 1 ) 
    {
      MPI_Wait(&req2, &stat2); 
    }
 
    int tmp = 1;
    h_new[tmp] = h[tmp] + t_del * ( 
        k * ( h[tmp-1] - 2.0 * h[tmp] + h[tmp+1] ) / x_del / x_del 
        + 2.0 * sin ( x[tmp] * t ) );
    tmp = n;
    h_new[tmp] = h[tmp] + t_del * ( 
        k * ( h[tmp-1] - 2.0 * h[tmp] + h[tmp+1] ) / x_del / x_del 
        + 2.0 * sin ( x[tmp] * t ) );
 
 
    ///WAIT FOR SEND
    if ( id > 0 ) 
    {
      MPI_Wait(&req3, &stat3); 
    }
    if ( id < p - 1 ) 
    {
      MPI_Wait(&req4, &stat4); 
    }
 
    ///END NON BLOCK
 
 
 
    // nowa chwila czasu
    t = ( ( double ) (         j - j_min ) * t_max   
            + ( double ) ( j_max - j         ) * t_min ) 
            / ( double ) ( j_max     - j_min );
 
    // przygotowanie do nastepnego kroku czasowego
    for ( i = 1; i < n + 1; i++ )
    {
      h[i] = h_new[i];
    }
    if ( 0 == id ) h[0] = 100.0 + 10.0 * sin ( t );
    if ( id == p - 1 ) h[n+1] = 75;
 
  }
 
  // koncowa wymiana informacji z wezlami sasiednimi
  tag = 11;
 
  if ( id < p - 1 ) {
    MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD );
  }
 
  if ( 0 < id ) {
    MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status );
  }
 
  tag = 12;
 
  if ( 0 < id ) {
    // DO UZUPELNIENIA
    MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD );
  }
 
  if ( id < p - 1 ) {
    // DO UZUPELNIENIA
    MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status );      
  }
 
  wtime = MPI_Wtime ( ) - wtime;
 
  if ( id == 0 )
  {
    printf ( "\n" );
    printf ( "  Wall clock elapsed seconds = %f\n", wtime );
  }
 
  // wydruk wyniku
  /*printf ( "%2d  T= %f\n", id, t );
  printf ( "%2d  X= ", id );
  for ( i = 0; i <= n + 1; i++ )
  {
    printf ( "%7.2f", x[i] );
  }
  printf ( "\n" );
  printf ( "%2d  H= ", id );
  for ( i = 0; i <= n + 1; i++ )
  {
    printf ( "%7.2f", h[i] );
  }
  printf ( "\n" ); */
 
  free ( h );
  free ( h_new );
  free ( x );
 
  return;
}

Zajęcia 3

child.c

# include <stdlib.h>
# include <stdio.h>
# include <math.h>
 
# include "mpi.h"
 
int main ( int argc, char *argv[] );
void heat_part ( int n, int p, int id, double x_min, double x_max );
 
/******************************************************************************/
 
int main ( int argc, char *argv[] )
{
  int id; // rank
  int n;  // liczba punktow dla kazdego wezla
  int p;  // size
  double x_max;
  double x_min;
  MPI_Comm parentcomm; 
  MPI_Status status;
 
  MPI_Init ( &argc, &argv );
 
  MPI_Comm_rank ( MPI_COMM_WORLD, &id );
 
  MPI_Comm_size ( MPI_COMM_WORLD, &p );
 
  MPI_Comm_get_parent(&parentcomm);
 
  MPI_Recv ( &x_min, 1, MPI_DOUBLE, 0, 1, parentcomm, &status );
 
  MPI_Recv ( &x_max, 1, MPI_DOUBLE, 0, 2, parentcomm, &status );
  MPI_Recv ( &n, 1, MPI_INT, 0, 3, parentcomm, &status );
 
  //MPI_Barrier(MPI_COMM_WORLD);
 
  heat_part ( n, p, id, x_min, x_max ); // obliczenia dla pojedynczego wezla
 
  MPI_Finalize ( );
 
  return 0;
}
 
/******************************************************************************/
// obliczenia dla pojedynczego wezla - pojedynczego podobszaru
/******************************************************************************/
void heat_part ( int n, int p, int id, double x_min, double x_max )
{
  double cfl;
  double *h;
  double *h_new;
  int i;
  int ierr;
  int j;
  int j_max;
  int j_min;
  double k;
  MPI_Status status;
  double t;
  double t_del;
  double t_max;
  double t_min;
  int tag;
  double wtime;
  double *x;
  double x_del;
 
  h = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i
  h_new = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // rozwiazanie dla t_i+1
  x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) ); // wspolrzedne punktow
 
  k = 0.002 / ( double ) p; // przewodniosc cieplna
 
  j_min = 0; // indeksy krokow czasowych - min i max
  j_max = 100;
  t_min = 0.0; // chwile czasu - min i max
  t_max = 10.0;
  t_del = ( t_max - t_min ) / ( double ) ( j_max - j_min ); // krok czasowy Delta t
 
  x_del = ( x_max - x_min ) / ( double ) ( n + 1 ); // odstep miedzy punktami
  for ( i = 0; i <= n + 1; i++ )
  {
    x[i] = ( ( double ) (         i ) * x_max  
           + ( double ) ( n + 1 - i ) * x_min )
           / ( double ) ( n + 1     );
  }
 
  // ustawienie warunku poczatkowego
  for ( i = 0; i <= n + 1; i++ )
  {
    h[i] = 95.0;
  }
 
  // sprawdzenie stabilnosci schematu
  cfl = k * t_del / x_del / x_del;
 
  if ( 0.5 <= cfl )
  {
    printf ( "  CFL condition failed.\n" );
    exit ( 1 );
  }
 
  wtime = MPI_Wtime ( ); //poczatek pomiaru czasu
 
  for ( j = 1; j <= j_max; j++ )
  {
 
    // wymiana informacji z wezlami sasiednimi
    tag = 1;
 
    if ( id < p - 1 )
    {
      MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD );
    }
 
    if ( 0 < id )
    {
      MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status );
    }
 
    tag = 2;
 
 if ( 0 < id ) {
    MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD );
  }
 
  if ( id < p - 1 ) {
    MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status );     
  }
 
 
    // implementacja wzoru roznicowego
    for ( i = 1; i <= n; i++ )
    {
      h_new[i] = h[i] + t_del * (
        k * ( h[i-1] - 2.0 * h[i] + h[i+1] ) / x_del / x_del
        + 2.0 * sin ( x[i] * t ) );
    }
 
    // nowa chwila czasu
    t = ( ( double ) (         j - j_min ) * t_max  
            + ( double ) ( j_max - j         ) * t_min )
            / ( double ) ( j_max     - j_min );
 
    // przygotowanie do nastepnego kroku czasowego
    for ( i = 1; i < n + 1; i++ )
    {
      h[i] = h_new[i];
    }
    if ( 0 == id ) h[0] = 100.0 + 10.0 * sin ( t );
    if ( id == p - 1 ) h[n+1] = 75;
 
  }
 
  // koncowa wymiana informacji z wezlami sasiednimi
  tag = 11;
 
  if ( id < p - 1 ) {
    MPI_Send ( &h[n], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD );
  }
 
  if ( 0 < id ) {
    MPI_Recv ( &h[0], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD, &status );
  }
 
  tag = 12;
 
  if ( 0 < id ) {
    // DO UZUPELNIENIA
    MPI_Send ( &h[1], 1, MPI_DOUBLE, id-1, tag, MPI_COMM_WORLD );
  }
 
  if ( id < p - 1 ) {
    // DO UZUPELNIENIA
    MPI_Recv ( &h[n+1], 1, MPI_DOUBLE, id+1, tag, MPI_COMM_WORLD, &status );     
  }
 
  wtime = MPI_Wtime ( ) - wtime;
 
  if ( id == 0 )
  {
    printf ( "\n" );
    printf ( "  Wall clock elapsed seconds = %f\n", wtime );
  }
 
  // wydruk wyniku
  printf ( "%2d  T= %f\n", id, t );
  printf ( "%2d  X= ", id );
  for ( i = 0; i <= n + 1; i++ )
  {
    printf ( "%7.2f", x[i] );
  }
  printf ( "\n" );
  printf ( "%2d  H= ", id );
  for ( i = 0; i <= n + 1; i++ )
  {
    printf ( "%7.2f", h[i] );
  }
  printf ( "\n" );
 
  free ( h );
  free ( h_new );
  free ( x );
 
  return;
}

parent.c

# include <stdlib.h>
# include <stdio.h>
# include <math.h>
 
# include "mpi.h"
 
int main ( int argc, char *argv[] );
 
/******************************************************************************/
 
int main ( int argc, char *argv[] )
{
  double a = 0.0; // lewy brzeg przedzialu
  double b = 1.0; // prawy brzeg przedzialu
  int i; 
  int id; // rank
  int n;  // liczba punktow dla kazdego wezla
  int p;  // size
  int np = 8; // liczba procesow
  double x_max;
  double x_min;
  MPI_Comm komunikator;
  int *errcodes;
 
  MPI_Init ( &argc, &argv );
 
  //MPI_Comm_rank ( MPI_COMM_WORLD, &id );
 
  //MPI_Comm_size ( MPI_COMM_WORLD, &p );
 
  MPI_Comm_spawn("dziecko", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &komunikator, errcodes);
 
  n = 12; // liczba punktow dla kazdego wezla
  int j;
  for(j=0;j<np;j++){
   i = 0;  // poczatkowa chwila czasu
    // wspolrzedna lewego punktu dla wezla id
    x_min = ( ( double )( np * n + 1 - j * n - i ) * a  
          + ( double )(             j * n + i ) * b )
          / ( double ) ( np * n + 1              );
 
    i = n + 1;
 
    // wspolrzedna prawego punktu dla wezla id
    x_max = ( ( double )( np * n + 1 - j * n - i ) * a  
          + ( double )(             j * n + i ) * b )
          / ( double )( np * n + 1              );
 
    MPI_Send ( &x_min, 1, MPI_DOUBLE, j, 1, komunikator );
    MPI_Send ( &x_max, 1, MPI_DOUBLE, j, 2, komunikator );
    MPI_Send ( &n, 1, MPI_INT, j, 3, komunikator );
 
  }
 
  MPI_Finalize ( );
 
  return 0;
}
 
 
 
 
 
 
    struct rekord wys;
    MPI_Datatype rekord_typ;
    int tab_dlug_blokow[3] = {1, 1, 1};
    MPI_Datatype tab_typow[3] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT};
    MPI_Aint podstawa, tab_odstepow[3];
 
    MPI_Get_address(&wys.x_min, &tab_odstepow[0]);
    MPI_Get_address(&wys.x_max, &tab_odstepow[1]);
    MPI_Get_address(&wys.n, &tab_odstepow[2]);
 
    MPI_Type_struct(3, tab_dlug_blokow, tab_odstepow, tab_typow, &rekord_typ);
    MPI_Type_commit(&rekord_typ);
    MPI_Recv ( &wys, 1, rekord_typ, 0, 1, parentcomm, &status );

Zajęcia 4

wkrótce

Zajęcia 5

Makefile

# optimization and other system dependent options
#include  make.$(SRR_ARCH)
# or directly
include  make.lab_404_NVIDIA
 
NAME = Hello_GPU
 
program: main.o 
	$(CC) $(LDFL) main.o  $(LIB) -o $(NAME)
 
main.o: main.c 
	$(CC) $(CFL) -c main.c  $(INC) -o main.o
 
clean:
	rm -f obj/*
	rm -f $(NAME)

make.lab_404_NVIDIA

# C compiler
#CC = icc
CC = gcc
 
# C++ compiler
CPPC = icpc
#CPPC = g++
 
# Loader (to link C/C++ and Fortran libraries)
LD = icpc
#LD = g++
 
# Archiver
AR = ar r
 
# For removing files
RM = rm -f
 
# Include directories
INC = -I/opt/cuda7/include
 
# Standard and/or local libraries
LIB = -L/opt/cuda7/lib64 -lOpenCL 
 
# C optimization and other flags
#CFL(icc) = -O3 -openmp
#CFL(gcc) = -O3 -fopenmp
#CFL(icc,debug) = -g
#CFL(gcc) = -g
CFL =
 
# Loader optimization and other flags
#LDFL(debug) = -g 
#LDFL(profile) = -p                                
LDFL =

main.c

#include<stdlib.h>
#include<stdio.h>
 
#include <CL/cl.h>
 
// functions to display platform and device properties
void DisplayPlatformInfo(
			 cl_platform_id id, 
			 cl_platform_info name,
			 char* str)
{
  cl_int retval;
  size_t paramValueSize;
 
  retval = clGetPlatformInfo(
			     id,
			     name,
			     0,
			     NULL,
			     &paramValueSize);
  if (retval != CL_SUCCESS){
    printf("Failed to find OpenCL platform %s.\n", str);
    return;
  }
 
  char * info = (char *)malloc(sizeof(char) * paramValueSize);
  retval = clGetPlatformInfo(
			     id,
			     name,
			     paramValueSize,
			     info,
			     NULL);
  if (retval != CL_SUCCESS)  {
    printf("Failed to find OpenCL platform %s.\n", str);
    return;
  }
 
  printf("\t%s:\t%s\n", str, info );
  free(info); 
}
 
void DisplayDeviceInfo_char(
		       cl_device_id id, 
		       cl_device_info name,
		       char* str)
{
  cl_int retval;
  size_t paramValueSize;
 
  retval = clGetDeviceInfo(
			   id,
			   name,
			   0,
			   NULL,
			   &paramValueSize);
  if (retval != CL_SUCCESS) {
    printf("Failed to find OpenCL device info %s.\n", str);
    return;
  }
 
  char * info = (char *)malloc(sizeof(char) * paramValueSize);
  retval = clGetDeviceInfo(
			   id,
			   name,
			   paramValueSize,
			   info,
			   NULL);
 
  if (retval != CL_SUCCESS) {
    printf("Failed to find OpenCL device info %s.\n", str);
    return;
  }
 
  printf("\t\t%s:\t%s\n", str, info );
  free(info);
};
 
void DisplayDeviceInfo_ulong(
		       cl_device_id id, 
		       cl_device_info name,
		       char* str)
{
  cl_int retval;
  size_t paramValueSize;// = sizeof(cl_ulong);
 
  retval = clGetDeviceInfo(
			   id,
			   name,
			   0,
			   NULL,
			   &paramValueSize);
  if (retval != CL_SUCCESS) {
    printf("Failed to find OpenCL device info %s.\n", str);
    return;
  }
 
  cl_ulong * info = (cl_ulong *)malloc(sizeof(cl_ulong) * paramValueSize);
  retval = clGetDeviceInfo(
			   id,
			   name,
			   paramValueSize,
			   info,
			   NULL);
 
  if (retval != CL_SUCCESS) {
    printf("Failed to find OpenCL device info %s.\n", str);
    return;
  }
 
  printf("\t\t%s:\t%lu MB\n", str, info[0] / 1024 / 1024 );
  free(info);
};
 
void DisplayDeviceInfo_uint(
		       cl_device_id id, 
		       cl_device_info name,
		       char* str)
{
  cl_int retval;
  size_t paramValueSize;// = sizeof(cl_ulong);
 
  retval = clGetDeviceInfo(
			   id,
			   name,
			   0,
			   NULL,
			   &paramValueSize);
  if (retval != CL_SUCCESS) {
    printf("Failed to find OpenCL device info %s.\n", str);
    return;
  }
 
  cl_uint * info = (cl_uint *)malloc(sizeof(cl_uint) * paramValueSize);
  retval = clGetDeviceInfo(
			   id,
			   name,
			   paramValueSize,
			   info,
			   NULL);
 
  if (retval != CL_SUCCESS) {
    printf("Failed to find OpenCL device info %s.\n", str);
    return;
  }
 
  printf("\t\t%s:\t%d MHz\n", str, info[0] );
  free(info);
};
 
 
// main program controlling execution of CPU code and OpenCL kernels
int main(int argc, char** argv)
{
  cl_uint number_of_contexts = 2;
  cl_context context = NULL;
  cl_context list_of_contexts[2] = {0,0};
  cl_command_queue commandQueue = 0;
  cl_program program = 0;
  cl_uint number_of_devices;
  cl_device_id device = 0;
  cl_device_id *list_of_devices;
  cl_device_type type;
  cl_kernel kernel = 0;
  cl_mem memObjects[3] = { 0, 0, 0 };
  cl_int retval;
  int icon, idev;
  cl_uint numPlatforms;
  cl_platform_id * platformIds;
  cl_uint i,j;
 
  // flag to control displaying
  int Monitor = 1;
 
 
  // Create OpenCL contexts 
 
  // First, query the total number of platforms
  retval = clGetPlatformIDs(0, (cl_platform_id *) NULL, &numPlatforms);
 
  // Next, allocate memory for the installed plaforms, and qeury 
  // to get the list.
  platformIds = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms);
 
  // Then, query the platform IDs
  retval = clGetPlatformIDs(numPlatforms, platformIds, NULL);
 
  if(Monitor>=0){
    printf("Number of platforms: \t%d\n", numPlatforms); 
  }
 
  // Iterate through the list of platforms displaying associated information
  for (i = 0; i < numPlatforms; i++) {
 
    if(Monitor>0){
 
      printf("Platform ID - %d\n",i);
      // First we display information associated with the platform
      DisplayPlatformInfo(
			platformIds[i], 
			CL_PLATFORM_NAME, 
			"CL_PLATFORM_NAME");
      DisplayPlatformInfo(
			platformIds[i], 
			CL_PLATFORM_PROFILE, 
			"CL_PLATFORM_PROFILE");
      DisplayPlatformInfo(
			platformIds[i], 
			CL_PLATFORM_VERSION, 
			"CL_PLATFORM_VERSION");
      DisplayPlatformInfo(
			platformIds[i], 
			CL_PLATFORM_VENDOR, 
			"CL_PLATFORM_VENDOR");
    }
 
  }
 
  // For the first platform
  int iplat;
  printf("Select Platform ID: "); scanf("%d", &iplat);
 
  // Query the set of devices associated with the platform
  retval = clGetDeviceIDs(
			  platformIds[iplat],
			  CL_DEVICE_TYPE_ALL,
			  0,
			  NULL,
			  &number_of_devices);
 
 
  list_of_devices = 
    (cl_device_id *) malloc (sizeof(cl_device_id) * number_of_devices);
 
  retval = clGetDeviceIDs(
			  platformIds[iplat],
			  CL_DEVICE_TYPE_ALL,
			  number_of_devices,
			  list_of_devices,
			  NULL);
 
  if(Monitor>=0){
    printf("Number of devices: \t%d\n", number_of_devices); 
  }
 
  // Iterate through each device, displaying associated information
  for (j = 0; j < number_of_devices; j++) {
 
    clGetDeviceInfo(list_of_devices[j], CL_DEVICE_TYPE, 
		    sizeof(cl_device_type), &type, NULL);
 
    if(Monitor>0){
 
      DisplayDeviceInfo_char(
			list_of_devices[j], 
			CL_DEVICE_NAME, 
			"CL_DEVICE_NAME");
 
      DisplayDeviceInfo_char(
			list_of_devices[j], 
			CL_DEVICE_VENDOR, 
			"CL_DEVICE_VENDOR");
 
      DisplayDeviceInfo_char(
			list_of_devices[j], 
			CL_DEVICE_VERSION, 
			"CL_DEVICE_VERSION");
 
      DisplayDeviceInfo_ulong(
			list_of_devices[j], 
			CL_DEVICE_GLOBAL_MEM_SIZE, 
			"CL_DEVICE_GLOBAL_MEM_SIZE");
 
      DisplayDeviceInfo_uint(
			list_of_devices[j], 
			CL_DEVICE_MAX_CLOCK_FREQUENCY, 
			"CL_DEVICE_MAX_CLOCK_FREQUENCY");
      printf("\n");
    }
  }
 
  // Next, create OpenCL contexts on platforms
  cl_context_properties contextProperties[] = {
    CL_CONTEXT_PLATFORM,
    (cl_context_properties)platformIds[iplat],
    0
  };
 
  if(Monitor>0){
    printf("Creating CPU context %d on platform %d\n", 1, iplat);
  }
 
  list_of_contexts[1] = 
    clCreateContextFromType(contextProperties, 
			    CL_DEVICE_TYPE_CPU, NULL, NULL, &retval);
 
  if(Monitor>=0 && retval != CL_SUCCESS){
    printf("Could not create CPU context on platform %d\n", i);
  }
 
  if(Monitor>0){
    printf("Creating GPU context 0 on platform %d\n", iplat);
  }
 
  list_of_contexts[0] = 
    clCreateContextFromType(contextProperties, 
			    CL_DEVICE_TYPE_GPU, NULL, NULL, &retval);
 
  if(Monitor>=0 && retval != CL_SUCCESS){
    printf("Could not create GPU context on platform %d\n", i);
  }
 
  // in a loop over devices of the seleceted platform   
  for(idev=0; idev<number_of_devices;idev++){
 
    if(Monitor>0){
      printf("\nFor context %d and device %d:\n",
	     idev, idev);
    }
    device = list_of_devices[idev];
    icon = idev;
 
    // choose OpenCL context on first available platform
    context = list_of_contexts[icon];
 
    if(context !=0){
 
      commandQueue = clCreateCommandQueue(context, device, 0, NULL);
      if (commandQueue == NULL) {
	printf("Failed to create commandQueue for device %d\n", idev);
	exit(0);
      }
 
      if(Monitor>0){
	printf("Reading program from source\n");
      }
 
      // read source code from file
      FILE *fp;
      char* source;
      long int size;
 
      fp = fopen("HelloWorld.cl", "rb");
      if(!fp) {
	printf("Could not open kernel file\n");
	exit(-1);
      }
      int status = fseek(fp, 0, SEEK_END);
      if(status != 0) {
	printf("Error seeking to end of file\n");
	exit(-1);
      }
      size = ftell(fp);
      if(size < 0) {
	printf("Error getting file position\n");
	exit(-1);
      }
 
      rewind(fp);
 
      source = (char *)malloc(size + 1);
 
      int i;
      for (i = 0; i < size+1; i++) {
	source[i]='\0';
      }
 
      if(source == NULL) {
	printf("Error allocating space for the kernel source\n");
	exit(-1);
      }
 
      fread(source, 1, size, fp);
      source[size] = '\0';
 
      const char* source_const = source;
      cl_program program = clCreateProgramWithSource(context, 1,
						     &source_const,
						     NULL, NULL);
      if (program == NULL)
	{
	  printf("Failed to create CL program from source.\n");
	  exit(-1);
	}
 
      if(Monitor>0){
	printf("Creating program and kernel\n");
      }
      // build program (passing options to compiler if necessary
      retval = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
      char* buildLog; size_t size_of_buildLog; 
      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
			    0, NULL, &size_of_buildLog); 
      buildLog = malloc(size_of_buildLog+1); 
      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
			    size_of_buildLog, buildLog, NULL); 
      buildLog[size_of_buildLog]= '\0'; 
      printf("Kernel buildLog: %s\n", buildLog); 
      if (retval != CL_SUCCESS)
	{
	  printf("Error in kernel\n");
	  clReleaseProgram(program);
	  exit(-1);
	}
 
 
      // Create OpenCL kernel
      kernel = clCreateKernel(program, "hello_kernel", NULL);
      if (kernel == NULL)
	{
	  printf("Failed to create kernel.\n");
	  exit(0);
	}
 
      if(Monitor>0){
	printf("Creating memory objects\n");
      }
      // Create memory objects that will be used as arguments to
      // kernel.  First create host memory arrays that will be
      // used to store the arguments to the kernel
      float result[1];
      float a[1];
      float b[1];
      a[0] = 2;
      b[0] = 2;
 
      memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY ,
				     sizeof(float), NULL, NULL);
      memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY ,
				     sizeof(float), NULL, NULL);
      memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
				     sizeof(float), NULL, NULL);
 
      if (memObjects[0]==NULL || memObjects[1]==NULL || memObjects[2]==NULL){
	printf("Error creating memory objects.\n");
	return 0;
      }
 
      if(Monitor>0){
	printf("Sending kernel arguments\n");
      }
      retval = clEnqueueWriteBuffer(
				    commandQueue, 
				    memObjects[0], 
				    CL_FALSE, 
				    0, 
				    sizeof(float),                         
				    a, 
				    0, 
				    NULL, 
				    NULL);
 
      // Use clEnqueueWriteBuffer() to write input array B to 
      // the device buffer bufferB
      retval = clEnqueueWriteBuffer(
				    commandQueue, 
				    memObjects[1], 
				    CL_FALSE, 
				    0, 
				    sizeof(float),                         
				    b,
				    0, 
				    NULL, 
				    NULL);
 
      // Set the kernel arguments (result, a, b)
      retval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]);
      retval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]);
      retval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]);
      if (retval != CL_SUCCESS)
	{
	  printf("Failed to Set the kernel arguments.\n");
	  //Cleanup(context, commandQueue, program, kernel, memObjects);
	  return 1;
	}
 
      if(Monitor>0){
	printf("Running the kernel!\n");
      }
      size_t globalWorkSize[1] = { 1 };
      size_t localWorkSize[1] = { 1 };
 
      // Queue the kernel up for execution across the array
      retval = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
				      globalWorkSize, localWorkSize,
				      0, NULL, NULL);
      if (retval != CL_SUCCESS)
	{
	  printf("Failed to queue kernel for execution.\n");
	  //Cleanup(context, commandQueue, program, kernel, memObjects);
	  return 1;
	}
 
      if(Monitor>0){
	printf("Transfering back results\n");
      }
      // Read the output buffer back to the Host
      retval = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE,
				   0, sizeof(float), result,
				   0, NULL, NULL);
      if (retval != CL_SUCCESS)
	{
	  printf("Failed to read result buffer.\n");
	  //Cleanup(context, commandQueue, program, kernel, memObjects);
	  return 1;
	}
 
      // Verify the output
      if(result[0]==4)  {
        printf("Output is correct: %lf + %lf = %lf\n",
	       a[0], b[0], result[0]);
      } else {
        printf("Output is incorrect: %lf + %lf != %lf\n",
	       a[0], b[0], result[0]);
      }
 
      for (i = 0; i < 3; i++)
      	{
      	  if (memObjects[i] != 0)
            clReleaseMemObject(memObjects[i]);
      	}
      if (commandQueue != 0)
        clReleaseCommandQueue(commandQueue);
 
      if (kernel != 0)
        clReleaseKernel(kernel);
 
      if (program != 0)
        clReleaseProgram(program);
 
    }
  }
 
  free(list_of_devices);
  free(platformIds);
 
  return 0;
}

WoGu Wiki

Narzędzia użytkownika

Narzędzia witryny

Spis treści

Zajęcia 1

Makefile

heat.c

Zajęcia 2

heat.c

Zajęcia 3

child.c

parent.c

Zajęcia 4

Zajęcia 5

Makefile

make.lab_404_NVIDIA

main.c

Narzędzia strony