Differences between revisions 11 and 12

MPI-Start as EMI-ES ParallelEnvironment backend

Contents

CREAM
UNICORE
ARC
Sample Job

1. CREAM

Cream support for the ParallelEnvironment uses mpi-start out of the box for its implementation. There is no need for extra configuration. The default job generated by CREAM looks like this:

/usr/bin/mpi-start -t <ParallelEnvironmentType> -npnode <ProccessesPerHost> -d THREADS_PER_PROCESS=<ThreadsPerProcess> <UserExecutable> <UserExecutableArgs>

2. UNICORE

The UNICORE EMI-ES ParallelEnvironment is a complete implementation of the specification. The configuration is done via xml files where the mapping from the job specification to the final executable is specified.

For each mpi-flavor that needs to be supported by mpi-start an entry in the idb must be included. For example for OpenMPI:

   1 <ee:ExecutionEnvironment xmlns:ee="http://www.unicore.eu/unicore/jsdl-extensions">
   2     <ee:Name>OpenMPI</ee:Name>
   3     <ee:Version>1.5.4</ee:Version>
   4     <ee:Description>Runs OpenMPI job using mpi-start to start the job</ee:Description>
   5     <ee:ExecutableName>/usr/bin/mpi-start</ee:ExecutableName>
   6     <ee:CommandlineTemplate>#EXECUTABLE -t openmpi #ARGS #USERCOMMAND #USERARGS</ee:CommandlineTemplate>
   7     <ee:Argument>
   8       <ee:Name>Output</ee:Name>
   9       <ee:IncarnatedValue>-o </ee:IncarnatedValue>
  10       <ee:ArgumentMetadata>
  11         <ee:Type>string</ee:Type>
  12         <ee:Description>Write the job output to a FILE instead of to the standard output stream</ee:Description>
  13       </ee:ArgumentMetadata>
  14     </ee:Argument>
  15     <ee:Argument>
  16       <ee:Name>PostHook</ee:Name>
  17       <ee:IncarnatedValue>-post </ee:IncarnatedValue>
  18       <ee:ArgumentMetadata>
  19         <ee:Type>string</ee:Type>
  20         <ee:Description>Use the file as post hook</ee:Description>
  21       </ee:ArgumentMetadata>
  22     </ee:Argument>
  23     <ee:Argument>
  24       <ee:Name>PreHook</ee:Name>
  25       <ee:IncarnatedValue>-pre </ee:IncarnatedValue>
  26       <ee:ArgumentMetadata>
  27         <ee:Type>string</ee:Type>
  28         <ee:Description>Use the file as pre hook</ee:Description>
  29       </ee:ArgumentMetadata>
  30     </ee:Argument>
  31     <ee:Argument>
  32       <ee:Name>Error</ee:Name>
  33       <ee:IncarnatedValue>-e </ee:IncarnatedValue>
  34       <ee:ArgumentMetadata>
  35         <ee:Type>string</ee:Type>
  36         <ee:Description>Write the job error to a FILE instead of to the standard error stream</ee:Description>
  37       </ee:ArgumentMetadata>
  38     </ee:Argument>
  39     <ee:Option> 
  40       <ee:Name>Verbose</ee:Name> 
  41       <ee:IncarnatedValue>-v</ee:IncarnatedValue> 
  42       <ee:OptionMetadata> 
  43         <ee:Description>Enable verbose mode</ee:Description> 
  44       </ee:OptionMetadata> 
  45     </ee:Option> 
  46     <ee:Option> 
  47       <ee:Name>PerNode</ee:Name> 
  48       <ee:IncarnatedValue>-pnode</ee:IncarnatedValue> 
  49       <ee:OptionMetadata> 
  50         <ee:Description>Start one process per available node</ee:Description> 
  51       </ee:OptionMetadata> 
  52     </ee:Option> 
  53     <ee:Option> 
  54       <ee:Name>PerSocket</ee:Name> 
  55       <ee:IncarnatedValue>-psocket</ee:IncarnatedValue> 
  56       <ee:OptionMetadata> 
  57         <ee:Description>Start one process per available socket</ee:Description> 
  58       </ee:OptionMetadata> 
  59     </ee:Option> 
  60     <ee:Option> 
  61       <ee:Name>PerCore</ee:Name> 
  62       <ee:IncarnatedValue>-pcore</ee:IncarnatedValue> 
  63       <ee:OptionMetadata> 
  64         <ee:Description>Start one process per available core</ee:Description> 
  65       </ee:OptionMetadata> 
  66     </ee:Option> 
  67 </ee:ExecutionEnvironment>

3. ARC

ARC supports the ParallelEnvironment through the RunTimeEnvironments. You need to create a RTE for the ParallelEnvironment that uses mpi-start:

   1 #!/bin/sh
   2 
   3 case "$1" in
   4 0)
   5         TYPE=`echo "$joboption_penv_type" | tr [:upper:] [:lower:]`
   6         OPTS="-t $TYPE"
   7         if [ "x$joboption_penv_procperslot" != "x" ] ; then
   8                 OPTS="$OPTS -npnode $joboption_penv_procperslot"
   9         fi
  10         if [ "x$joboption_penv_threadsperslot" != "x" ] ; then
  11                 OPTS="$OPTS -d THREADS_PER_CORE=$joboption_penv_threadsperslot"
  12         fi
  13         joboption_args="mpi-start $OPTS -- $joboption_args"
  14 ;;
  15 1)
  16 ;;
  17 2)
  18 ;;
  19 *)
  20         return 1
  21 ;;
  22 esac

In the user job, you should include both the RTE (e.g. MPISTART) and PE in the job description (only shown the relevant elements):

   1 <RuntimeEnvironment><Name>MPISTART</Name></RuntimeEnvironment>
   2 <ParallelEnvironment>
   3   <Type>OpenMPI</Type>
   4   <ProcessesPerSlot>4</ProcessesPerSlot>
   5   <ThreadsPerSlot>1</ThreadsPerSlot>
   6 </ParallelEnvironment>

4. Sample Job

This is a sample job that can be submitted to the EMI-ES endpoints:

   1 <ActivityDescription xmlns="http://www.eu-emi.eu/es/2010/12/adl">
   2     <ActivityIdentification>
   3         <Name>test job</Name>
   4         <Description>A test job showing the features of EMI-ES</Description>
   5         <Type>single</Type>
   6         <Annotation>test</Annotation>
   7     </ActivityIdentification>
   8     <Application>
   9         <Executable><Path>cpi.c</Path></Executable>
  10         <Error>std.err</Error>
  11         <Output>std.out</Output>
  12         <Environment><Name>I2G_MPI_PRE_RUN_HOOK</Name><Value>pre.sh</Value></Environment>
  13         <Environment><Name>I2G_MPI_START_VERBOSE</Name><Value>1</Value></Environment>
  14     </Application>
  15     <Resources>
  16         <SlotRequirement>
  17             <NumberOfSlots>2</NumberOfSlots>
  18         </SlotRequirement>
  19         <ParallelEnvironment>
  20                 <Type>OpenMPI</Type>
  21         </ParallelEnvironment>
  22     </Resources>
  23     <DataStaging>
  24         <InputFile> <Name>pre.sh</Name> <Source><URI>pre.sh</URI></Source> </InputFile>
  25         <InputFile> <Name>cpi.c</Name> <Source><URI>cpi.c</URI></Source> </InputFile>
  26     </DataStaging>
  27 </ActivityDescription>

The hook is the following:

   1 #!/bin/sh
   2 
   3 pre_run_hook () {
   4   # Compile the program.
   5   info_msg "Compiling ${I2G_MPI_APPLICATION}"
   6 
   7   export I2G_MPI_APPLICATION=`echo $I2G_MPI_APPLICATION | sed -e "s/\.c$//"`
   8   # Actually compile the program.
   9   cmd="${MPI_MPICC} ${MPI_MPICC_OPTS} -o ${I2G_MPI_APPLICATION} ${I2G_MPI_APPLICATION}.c"
  10   info_msg $cmd
  11   $cmd
  12   st=$?
  13   if [ $st -ne 0 ]; then
  14     error_msg "Error compiling program.  Exiting..."
  15     return $st 
  16   fi
  17 
  18   # Everything's OK.
  19   info_msg "Successfully compiled ${I2G_MPI_APPLICATION}"
  20   return 0
  21 }

And the C code:

   1 #include "mpi.h"
   2 #include <stdio.h>
   3 #include <math.h>
   4 
   5 double f( double );
   6 double f( double a )
   7 {
   8     return (4.0 / (1.0 + a*a));
   9 }
  10 
  11 int main( int argc, char *argv[])
  12 {
  13    int n_intervals = 16384;
  14 
  15    int done = 0, n, myid, numprocs, i;
  16    double PI25DT = 3.141592653589793238462643;
  17    double mypi, pi, h, sum, x;
  18    double startwtime = 0.0, endwtime;
  19    int  namelen;
  20    char processor_name[MPI_MAX_PROCESSOR_NAME];
  21 
  22    MPI_Init(&argc,&argv);
  23    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
  24    MPI_Comm_rank(MPI_COMM_WORLD,&myid);
  25    MPI_Get_processor_name(processor_name,&namelen);
  26 
  27    fprintf(stderr,"Process %d on %s: n=%d\n",myid, processor_name,n);
  28    if (numprocs >= 1) {
  29        if( myid == 0 ) fprintf(stderr,"Using %d intervals\n",n_intervals);
  30 
  31        n = 0;
  32        while (!done)
  33        {
  34       if (myid == 0) {
  35          startwtime = MPI_Wtime();
  36       }
  37       if( n == 0  ) n = n_intervals; else n = 0;
  38       MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
  39       if (n == 0)
  40          done = 1;
  41       else
  42       {
  43          h   = 1.0 / (double) n;
  44          sum = 0.0;
  45          for (i = myid + 1; i <= n; i += numprocs)
  46          {
  47         x = h * ((double)i - 0.5);
  48         sum += f(x);
  49          }
  50          mypi = h * sum;
  51 
  52          MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  53          if (myid == 0)
  54          {
  55         printf("pi is approximately %.16f, Error is %.16f\n",
  56                pi, fabs(pi - PI25DT));
  57         endwtime = MPI_Wtime();
  58         printf("wall clock time = %f\n",
  59         endwtime-startwtime);
  60          }
  61       }
  62        }
  63    } else {
  64        fprintf(stderr,"Only 1 process, not doing anything");
  65    }
  66    MPI_Finalize();
  67 
  68    return 0;
  69 }

eciencia: Middleware/MpiStart/ParallelEnvironment (last edited 2013-03-11 16:19:19 by enol)

Quick Links

Search Wiki

Page Tools

MPI-Start as EMI-ES ParallelEnvironment backend

1. CREAM

2. UNICORE

3. ARC

4. Sample Job

-  ⇤ ← Revision 11 as of 2013-02-27 09:09:44 → 
  Size: 8058
  Editor: enol
  Comment:
+   ← Revision 12 as of 2013-03-11 12:36:30 → ⇥
  Size: 8322
  Editor: enol
  Comment:
-Deletions are marked like this.
+Additions are marked like this.
 Line 6:
+== CREAM ==

Cream support for the !ParallelEnvironment uses mpi-start out of the box for its implementation. There is no need for extra configuration.
The default job generated by CREAM looks like this:
{{{
/usr/bin/mpi-start -t <ParallelEnvironmentType> -npnode <ProccessesPerHost> -d THREADS_PER_PROCESS=<ThreadsPerProcess> <UserExecutable> <UserExecutableArgs>
}}}
-Line 83:
+Line 94:
-Sample job:
+== ARC ==

ARC supports the !ParallelEnvironment through the !RunTimeEnvironments. You need to create a RTE for the !ParallelEnvironment that uses mpi-start:
{{{#!highlight sh
#!/bin/sh

case "$1" in
0)
        TYPE=`echo "$joboption_penv_type" | tr [:upper:] [:lower:]`
        OPTS="-t $TYPE"
        if [ "x$joboption_penv_procperslot" != "x" ] ; then
                OPTS="$OPTS -npnode $joboption_penv_procperslot"
        fi
        if [ "x$joboption_penv_threadsperslot" != "x" ] ; then
                OPTS="$OPTS -d THREADS_PER_CORE=$joboption_penv_threadsperslot"
        fi
        joboption_args="mpi-start $OPTS -- $joboption_args"
;;
1)
;;
2)
;;
*)
        return 1
;;
esac
}}}

In the user job, you should include both the RTE (e.g. MPISTART) and PE in the job description (only shown the relevant elements):
{{{#!highlight xml
<RuntimeEnvironment><Name>MPISTART</Name></RuntimeEnvironment>
<ParallelEnvironment>
  <Type>OpenMPI</Type>
  <ProcessesPerSlot>4</ProcessesPerSlot>
  <ThreadsPerSlot>1</ThreadsPerSlot>
</ParallelEnvironment>
}}}


== Sample Job ==

This is a sample job that can be submitted to the EMI-ES endpoints:
-Line 93:
+Line 145:
-        <Executable><Path>cpi</Path></Executable>
+        <Executable><Path>cpi.c</Path></Executable>
-Line 96:
+Line 148:
+        <Environment><Name>I2G_MPI_PRE_RUN_HOOK</Name><Value>pre.sh</Value></Environment>
        <Environment><Name>I2G_MPI_START_VERBOSE</Name><Value>1</Value></Environment>
-Line 100:
+Line 154:
-            <SlotsPerHost>2</SlotsPerHost>
            <ExclusiveExecution>true</ExclusiveExecution>
-Line 104:
+Line 156:
-           <Type>OpenMPI</Type>
+                <Type>OpenMPI</Type>
-Line 114:
+Line 166:
-== ARC ==

ARC supports MPI jobs through the !RunTimeEnvironments. There are two options for integrating them with mpi-start
 1. Explicit mpi-start usage by users.
 1. mpi-start as backend of parallel jobs

=== Explicit mpi-start ===

In this use case, the only requirements is that mpi-start is available at the nodes that will execute the job. There is no need for a RuntimeEnvironment and the user must set as executable for the jobs `mpi-start`. A minimal job description is shown below:

{{{#!highlight xml

<ActivityDescription xmlns="http://www.eu-emi.eu/es/2010/12/adl">
    <ActivityIdentification>
        <Name>MPI Test job</Name>
        <Description>A test job showing the features of EMI-ES</Description>
        <Type>single</Type>
    </ActivityIdentification>
    <Application>
        <Executable>
            <Path>/usr/bin/mpi-start</Path>
            <Argument>-t</Argument>
            <Argument>openmpi</Argument>
            <Argument>-pre</Argument>
            <Argument>pre.sh</Argument>
            <Argument>cpi</Argument>
        </Executable>
        <Error>std.err</Error>
        <Output>std.out</Output>
    </Application>
    <Resources>
        <SlotRequirement>
            <NumberOfSlots>2</NumberOfSlots>
            <SlotsPerHost>2</SlotsPerHost>
            <ExclusiveExecution>true</ExclusiveExecution>
        </SlotRequirement>
    </Resources>

    <DataStaging>
        <InputFile><Name>pre.sh</Name><Source><URI>pre.sh</URI></Source></InputFile>
        <InputFile><Name>cpi.c</Name><Source><URI>cpi.c</URI></Source>
        </InputFile>
</DataStaging>
</ActivityDescription>
}}}

Note that the `-t openmpi` option is passed  explicitly to indicate to mpi-start which implementation to use.

=== mpi-start backend ===

In this case, a !RuntimeEnvironment prepares the execution with mpi-start, hiding from the user the details of the implementation. Any additional arguments to mpi-start must be passed as environment variables.

A sample runtime environment for running !OpenMPI applications would be:
+The hook is the following:
-Line 172:
+Line 170:
-export I2G_MPI_TYPE=openmpi

case "$1" in
0)
        joboption_args="mpi-start -- $joboption_args"
;;
1)
;;
2)
;;
*)
        return 1
;;
esac
}}}

The job description would look as follows:
{{{#!highlight xml
<ActivityDescription xmlns="http://www.eu-emi.eu/es/2010/12/adl">
    <ActivityIdentification>
        <Name>test job</Name>
        <Description>A test job showing the features of EMI-ES</Description>
        <Type>single</Type>
        <Annotation>test</Annotation>
    </ActivityIdentification>
    <Application>
        <Executable><Path>cpi</Path></Executable>
        <Error>std.err</Error>
        <Output>std.out</Output>
        <Environment><Name>I2G_MPI_PRE_RUN_HOOK</Name><Value>pre.sh</Value></Environment>
    </Application>
    <Resources>
        <SlotRequirement>
            <NumberOfSlots>2</NumberOfSlots>
            <SlotsPerHost>2</SlotsPerHost>
            <ExclusiveExecution>true</ExclusiveExecution>
        </SlotRequirement>
        <RuntimeEnvironment><Name>OPENMPI</Name></RuntimeEnvironment>
    </Resources>
    <DataStaging>
        <InputFile> <Name>pre.sh</Name> <Source><URI>pre.sh</URI></Source> </InputFile>
        <InputFile> <Name>cpi.c</Name> <Source><URI>cpi.c</URI></Source> </InputFile>
        <!-- Fake input file -->
        <InputFile> <Name>cpi</Name> <Source><URI>cpi</URI></Source> </InputFile>
    </DataStaging>
</ActivityDescription>
}}}

== CREAM ==

Cream does not support the !ParallelEnvironment. Jobs may be sumbmitted using `/usr/bin/mpi-start` as executable. See the previous ARC example for details.
+pre_run_hook () {
  # Compile the program.
  info_msg "Compiling ${I2G_MPI_APPLICATION}"

  export I2G_MPI_APPLICATION=`echo $I2G_MPI_APPLICATION | sed -e "s/\.c$//"`
  # Actually compile the program.
  cmd="${MPI_MPICC} ${MPI_MPICC_OPTS} -o ${I2G_MPI_APPLICATION} ${I2G_MPI_APPLICATION}.c"
  info_msg $cmd
  $cmd
  st=$?
  if [ $st -ne 0 ]; then
    error_msg "Error compiling program.  Exiting..."
    return $st 
  fi

  # Everything's OK.
  info_msg "Successfully compiled ${I2G_MPI_APPLICATION}"
  return 0
}
}}}

And the C code:
{{{#!highlight c
#include "mpi.h"
#include <stdio.h>
#include <math.h>

double f( double );
double f( double a )
{
    return (4.0 / (1.0 + a*a));
}

int main( int argc, char *argv[])
{
   int n_intervals = 16384;

   int done = 0, n, myid, numprocs, i;
   double PI25DT = 3.141592653589793238462643;
   double mypi, pi, h, sum, x;
   double startwtime = 0.0, endwtime;
   int  namelen;
   char processor_name[MPI_MAX_PROCESSOR_NAME];

   MPI_Init(&argc,&argv);
   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
   MPI_Comm_rank(MPI_COMM_WORLD,&myid);
   MPI_Get_processor_name(processor_name,&namelen);

   fprintf(stderr,"Process %d on %s: n=%d\n",myid, processor_name,n);
   if (numprocs >= 1) {
       if( myid == 0 ) fprintf(stderr,"Using %d intervals\n",n_intervals);

       n = 0;
       while (!done)
       {
      if (myid == 0) {
         startwtime = MPI_Wtime();
      }
      if( n == 0  ) n = n_intervals; else n = 0;
      MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
      if (n == 0)
         done = 1;
      else
      {
         h   = 1.0 / (double) n;
         sum = 0.0;
         for (i = myid + 1; i <= n; i += numprocs)
         {
        x = h * ((double)i - 0.5);
        sum += f(x);
         }
         mypi = h * sum;

         MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
         if (myid == 0)
         {
        printf("pi is approximately %.16f, Error is %.16f\n",
               pi, fabs(pi - PI25DT));
        endwtime = MPI_Wtime();
        printf("wall clock time = %f\n",
        endwtime-startwtime);
         }
      }
       }
   } else {
       fprintf(stderr,"Only 1 process, not doing anything");
   }
   MPI_Finalize();

   return 0;
}
}}}