From aa7d07f4f406e8ee1df505fba2da04c6cc87626c Mon Sep 17 00:00:00 2001
From: Mark Dixon
Date: Thu, 13 Aug 2015 10:57:38 +0100
Subject: [PATCH 6/6] Enhmt #1550 sharetree node priority scaled by slot (not job) count
When a sharetree node has pending jobs, each job was assigned the
number of sharetree tickets (stcks) due to the node and then scaled
based on how many running and pending jobs that the node had ahead of
it - sum(job_ahead).
This changes it to be related to the number of assigned slots that the
node has ahead of the job - sum(job_ahead*slots).
e.g. If there are no jobs running and a single job pending, the pending
job will still receive the full number of stcks due to the node. If
there is one 8 slot job running and one pending, the pending job will
receive 1/9 of the stcks due to the node, instead of 1/2.
There are no doubt more accurate maths it could be based on, such as
something based on the usage_weight_list config option, and more accurate
measures of slots (we simply take the minimum of the first PE range in
the job request here). This is an attempt to make a 1st order correction,
allowing more complicated calculations later if necessary.
It is hoped that this change will make the sharetree policy fairer for
nodes with a job mix containing jobs with a variety of slot counts.
---
source/libs/sched/sgeee.c | 27 +++++++++++++++++++++------
1 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/source/libs/sched/sgeee.c b/source/libs/sched/sgeee.c
index 9f15288..6b8628f 100644
--- a/source/libs/sched/sgeee.c
+++ b/source/libs/sched/sgeee.c
@@ -3472,8 +3472,8 @@ sge_sort_job_nodes(lListElem *root,
lPSortList(job_node_list, "%I+ %I- %I+ %I+", STN_queued, STN_sort, STN_jobid, STN_taskid);
/* calculate a new priority -
- The priority of each job associated with this node is the
- node's short term entitlement (STN_stt) divided by the number of jobs
+ The priority of each job associated with this node is the node's
+ short term entitlement (STN_stt) divided by the number of job slots
which are scheduled ahead of this node times the number of share tree
tickets. If we are dependent on another higher-level policy, we also add
the tickets from those policies. */
@@ -3482,14 +3482,29 @@ sge_sort_job_nodes(lListElem *root,
double node_stt = lGetDouble(node, STN_stt);
/* - increment job count and dilute entitlement */
- double job_count = 0;
+ double dilute_factor = 1;
lListElem *job_node;
for_each(job_node, job_node_list) {
- job_count++;
- lSetDouble(job_node, STN_shr, node_stt / job_count);
+
+ lSetDouble(job_node, STN_shr, node_stt / dilute_factor);
lSetDouble(job_node, STN_sort, lGetDouble(job_node, STN_tickets) +
- ((node_stt / job_count) * total_share_tree_tickets));
+ ((node_stt / dilute_factor) * total_share_tree_tickets));
+
+ /* Determine job's contribution to dilution */
+ int dilution = 1;
+ int job_ndx = lGetUlong(job_node, STN_ref) -1;
+ if (job_ndx >= 0) {
+ /* Use minimum value of first pe slot range */
+ /* (simplistic, but could be worse) */
+ lListElem *pe_range = lFirst(lGetList(job_ref[job_ndx].job, JB_pe_range));
+ if (pe_range) {
+ dilution = MAX(1, lGetUlong(pe_range, RN_min));
+ }
+ }
+
+ /* Dilute priority for next job */
+ dilute_factor += dilution;
}
}
--
1.7.1