No subject


Wed Jan 12 20:38:46 GMT 2011


07/02/2009 14:12:35|19|.service.impl.ge.InstallationSequence.execute|W|Install execd on host compute-0-0: step 'Install execd' failed: Executor on host compute-0-0 not active

But, cs_vm is running on the node (simple_installation option used)

# sdmadm sj
name  host        state      used_mem  max_mem   message
-----------------------------------------------------------------------------------------
cs_vm compute-0-0 STARTED           7M       28M
      compute-0-1 STARTED           7M       28M
      compute-0-2 STARTED           7M       28M
      compute-0-3 STARTED           7M       28M
      compute-0-4 STARTED           6M       28M
      compute-0-5 STARTED           6M       28M
      compute-0-6 STARTED           8M       28M
      llgriddev   STARTED          26M      455M

What would be my next step to resolve this issue?

Here is my gesvc configuration:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<common:componentConfig xsi:type="ge_adapter:GEServiceConfig"
                        mapping="default"
                        xmlns:reporter="http://hedeby.sunsource.net/hedeby-reporter"
                        xmlns:cloud_adapter="http://hedeby.sunsource.net/hedeby-cloud-adapter"
                        xmlns:common="http://hedeby.sunsource.net/hedeby-common"
                        xmlns:resource_provider="http://hedeby.sunsource.net/hedeby-resource-provider"
                        xmlns:executor="http://hedeby.sunsource.net/hedeby-executor"
                        xmlns:ge_adapter="http://hedeby.sunsource.net/hedeby-gridengine-adapter"
                        xmlns:security="http://hedeby.sunsource.net/hedeby-security"
                        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <common:slos>
        <common:slo xsi:type="common:FixedUsageSLOConfig"
                    urgency="50"
                    name="fixed_usage"/>
        <common:slo xsi:type="ge_adapter:MaxPendingJobsSLOConfig"
                    averageSlotsPerHost="2"
                    max="5"
                    urgency="99"
                    name="maxPendingJobs"/>
    </common:slos>
    <ge_adapter:connection keystore="/var/sgeCA/port6444/default/userkeys/sge/keystore"
                           password=""
                           username="sge"
                           jmxPort="6446"
                           execdPort="6445"
                           masterPort="6444"
                           cell="default"
                           root="/usr/local/sge"
                           clusterName="dev6444"/>
    <ge_adapter:sloUpdateInterval unit="minutes"
                                  value="5"/>
    <ge_adapter:jobSuspendPolicy suspendMethods="reschedule_jobs_in_rerun_queue reschedule_restartable_jobs suspend_jobs_with_checkpoint">
        <ge_adapter:timeout unit="minutes"
                            value="2"/>
    </ge_adapter:jobSuspendPolicy>
    <ge_adapter:execd adminUsername="root"
                      defaultDomain=""
                      ignoreFQDN="true"
                      rcScript="false"
                      adminHost="true"
                      submitHost="false"
                      cleanupDefault="true">
        <ge_adapter:filter>cloudResource="true"</ge_adapter:filter>
        <ge_adapter:localSpoolDir>/var/spool/sge</ge_adapter:localSpoolDir>
        <ge_adapter:installTemplate needsConfigFile="false"
                                    executeOn="qmaster_host">
            <ge_adapter:script>util/templates/ge-adapter/copy_sge_root_to_cloud.sh</ge_adapter:script>
        </ge_adapter:installTemplate>
        <ge_adapter:installTemplate needsConfigFile="false"
                                    executeOn="exec_host">
            <ge_adapter:script>util/templates/ge-adapter/patch_bootstrap_files_on_cloud.sh</ge_adapter:script>
        </ge_adapter:installTemplate>
        <ge_adapter:installTemplate needsConfigFile="true"
                                    executeOn="exec_host">
            <ge_adapter:script>util/templates/ge-adapter/install_execd_cloud.sh</ge_adapter:script>
            <ge_adapter:conf>util/templates/ge-adapter/install_execd_cloud.conf</ge_adapter:conf>
        </ge_adapter:installTemplate>
        <ge_adapter:uninstallTemplate needsConfigFile="true"
                                      executeOn="exec_host">
            <ge_adapter:script>util/templates/ge-adapter/uninstall_execd_cloud.sh</ge_adapter:script>
            <ge_adapter:conf>util/templates/ge-adapter/uninstall_execd_cloud.conf</ge_adapter:conf>
        </ge_adapter:uninstallTemplate>
    </ge_adapter:execd>
    <ge_adapter:execd adminUsername="root"
                      defaultDomain=""
                      ignoreFQDN="true"
                      rcScript="false"
                      adminHost="true"
                      submitHost="false"
                      cleanupDefault="true">
        <ge_adapter:localSpoolDir>/var/spool/sge</ge_adapter:localSpoolDir>
    </ge_adapter:execd>
</common:componentConfig>



[root at llgriddev sge]# sdmadm sr
service    id          state    type flags usage annotation
-----------------------------------------------------------
spare_pool compute-0-0 ASSIGNED host       1
           compute-0-1 ASSIGNED host       1
           compute-0-2 ASSIGNED host       1
           compute-0-3 ASSIGNED host       1
           compute-0-4 ASSIGNED host       1
           compute-0-5 ASSIGNED host       1
           compute-0-6 ASSIGNED host       1
[root at llgriddev sge]# sdmadm ss
host      service    cstate  sstate
------------------------------------
llgriddev gesvc      STARTED RUNNING
          spare_pool STARTED RUNNING

]# qstat -f

############################################################################
 - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS
############################################################################
      4 0.55500 sleep      root         qw    07/02/2009 14:18:34     1 1-10:1
      5 0.55500 sleep      root         qw    07/02/2009 14:29:24     1 1-10:1
      6 0.55500 sleep      root         qw    07/02/2009 14:29:28     1 1-10:1
      7 0.55500 sleep      root         qw    07/02/2009 14:29:29     1 1-10:1
      8 0.55500 sleep      root         qw    07/02/2009 14:29:29     1 1-10:1
      9 0.55500 sleep      root         qw    07/02/2009 14:29:30     1 1-10:1
     10 0.55500 sleep      root         qw    07/02/2009 14:29:30     1 1-10:1
     11 0.55500 sleep      root         qw    07/02/2009 14:29:30     1 1-10:1
     12 0.55500 sleep      root         qw    07/02/2009 14:29:31     1 1-10:1
     13 0.55500 sleep      root         qw    07/02/2009 14:29:31     1 1-10:1
     14 0.55500 sleep      root         qw    07/02/2009 14:29:31     1 1-10:1
     15 0.55500 sleep      root         qw    07/02/2009 14:29:31     1 1-10:1
     16 0.55500 sleep      root         qw    07/02/2009 14:29:31     1 1-10:1
     17 0.55500 sleep      root         qw    07/02/2009 14:29:32     1 1-10:1

Thanks,
- Chansup

------------------------------------------------------
http://gridengine.sunsource.net/ds/viewMessage.do?dsForumId=38&dsMessageId=204981

To unsubscribe from this discussion, e-mail: [users-unsubscribe at gridengine.sunsource.net].



More information about the gridengine-users mailing list