[GE users] SUN-MPI and SGE6.0u3

Andrea Lorenz lorenz at rz.rwth-aachen.de
Thu Jan 27 12:47:38 GMT 2005


> could you please send us the *.e* & *.o* files and qacct ouptut for the 
> job?
I have attached the following files:
batch.sunmpi - job script
Sun.o3351 - the output, all other output and error output files are empty
batch.queue - the queue configuration
batch.pe - the configuration of the parallel environmnet
batch.qacct - the qacct output for the job

> There is also another potential problem which might be related to your
> problem:
> 
> The tight integration module in CT has a hard-wired "ARCH" name, which was
> changed from 5.3 to 6. A workaround is to create a symbolic link 
> "solaris64"
> to "sol-sparc64" under "bin", "lib", and "utilbin" directories.
I set the links, but this do not solve the problem.

Andrea


    [ Part 2: "Attached Text" ]

pe_name           andrea
slots             10000
user_lists        zzz_andrea+gabi
xuser_lists       NONE
start_proc_args   /bin/true
stop_proc_args    /bin/true
allocation_rule   $fill_up
control_slaves    TRUE
job_is_first_task FALSE
urgency_slots     min


    [ Part 3: "Attached Text" ]

==============================================================
qname        sunfire_e6900_sfl1.p
hostname     sunc00.rz.RWTH-Aachen.DE
group        ae10                
owner        ae106lo             
project      NONE                
department   defaultdepartment   
jobname      Sun                 
jobnumber    3351                
taskid       undefined
account      sge                 
priority     0                   
qsub_time    Thu Jan  1 01:00:00 1970
start_time   Thu Jan 27 13:29:50 2005
end_time     Thu Jan 27 13:29:57 2005
granted_pe   andrea              
slots        4                   
failed       100 : assumedly after job
exit_status  129                 
ru_wallclock 7            
ru_utime     2            
ru_stime     1            
ru_maxrss    0                   
ru_ixrss     0                   
ru_ismrss    0                   
ru_idrss     0                   
ru_isrss     0                   
ru_minflt    0                   
ru_majflt    0                   
ru_nswap     0                   
ru_inblock   0                   
ru_oublock   0                   
ru_msgsnd    0                   
ru_msgrcv    0                   
ru_nsignals  0                   
ru_nvcsw     0                   
ru_nivcsw    0                   
cpu          3            
mem          0.032             
io           0.000             
iow          0.000             
maxvmem      113.289M
==============================================================
qname        sunfire_e6900_sfl1.p
hostname     sunc00.rz.RWTH-Aachen.DE
group        ae10                
owner        ae106lo             
project      NONE                
department   defaultdepartment   
jobname      Sun                 
jobnumber    3351                
taskid       undefined
account      sge                 
priority     0                   
qsub_time    Thu Jan 27 13:28:20 2005
start_time   Thu Jan 27 13:29:48 2005
end_time     Thu Jan 27 13:31:31 2005
granted_pe   andrea              
slots        4                   
failed       0    
exit_status  0                   
ru_wallclock 103          
ru_utime     0            
ru_stime     0            
ru_maxrss    0                   
ru_ixrss     0                   
ru_ismrss    0                   
ru_idrss     0                   
ru_isrss     0                   
ru_minflt    0                   
ru_majflt    0                   
ru_nswap     0                   
ru_inblock   0                   
ru_oublock   0                   
ru_msgsnd    0                   
ru_msgrcv    0                   
ru_nsignals  0                   
ru_nvcsw     0                   
ru_nivcsw    0                   
cpu          1            
mem          0.001             
io           0.000             
iow          0.000             
maxvmem      13.039M


    [ Part 4: "Attached Text" ]

qname                 sunfire_e6900_sfl1.p
hostname              sunc00.rz.RWTH-Aachen.DE
seq_no                0
load_thresholds       np_load_short=1.1 zzz_home_full=true zzz_work_full=true \
                      zzz_system_time=true zzz_mpi_down=true
suspend_thresholds    NONE
nsuspend              1
suspend_interval      00:05:00
priority              0
min_cpu_interval      00:05:00
processors            UNDEFINED
qtype                 NONE
ckpt_list             NONE
pe_list               andrea mpi_sunos_us4_1host mpi_sunos_us4_e6900_sfl1 \
                      mpi_sunos_us4_e6900_sfl1_je24tasks \
                      mpi_sunos_us4_e6900_sfl1_je48tasks \
                      mpi_sunos_us4_e6900_sfl1_je8tasks
rerun                 TRUE
slots                 48
tmpdir                /w0/tmp
shell                 /bin/yaksh
prolog                /bin/true
epilog                /bin/true
shell_start_mode      posix_compliant
starter_method        NONE
suspend_method        NONE
resume_method         NONE
terminate_method      NONE
notify                00:05:00
owner_list            NONE
user_lists            zzz_andrea+gabi
xuser_lists           NONE
subordinate_list      NONE
complex_values        
projects              NONE
xprojects             NONE
calendar              sunc00p
initial_state         default
s_rt                  INFINITY
h_rt                  04:00:00
s_cpu                 INFINITY
h_cpu                 INFINITY
s_fsize               INFINITY
h_fsize               INFINITY
s_data                INFINITY
h_data                INFINITY
s_stack               INFINITY
h_stack               INFINITY
s_core                INFINITY
h_core                INFINITY
s_rss                 INFINITY
h_rss                 INFINITY
s_vmem                INFINITY
h_vmem                INFINITY


    [ Part 5: "Attached Text" ]

-np 4 -x sge
Monte-Carlo estimate of pi by    4 processes is 3.141460.
1


    [ Part 6: "Attached Text" ]

#$ -N Sun
#$ -cwd
#$ -pe andrea 4
#$ -l h_vmem=300M
#$ -l h_rt=00:30:00
# ---------------------------


echo $MPRUN_FLAGS
mprun -np $NSLOTS monte
sleep 60
echo 1
sleep 60
echo 2
sleep 60
echo 3
sleep 60
mprun -np $NSLOTS prime



    [ Part 7: "Attached Text" ]

---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe at gridengine.sunsource.net
For additional commands, e-mail: users-help at gridengine.sunsource.net



More information about the gridengine-users mailing list