Commit f7f2f85b authored by David Anderson's avatar David Anderson

- client: if a project is at max backoff for a resource,

        stop accumulating debt if it's at or around zero.
        This prevents other projects from being driven unboundedly negative.
    - client: if the number of overworked projects exceeds the number
        of device instances, clear debts; this indicates that an earlier
        client was buggy and produced bad debt values.

svn path=/trunk/boinc/; revision=17325
parent f0d4cff7
......@@ -1773,3 +1773,17 @@ Charlie 20 Feb 2009
win_build/
libboinc.vcproj
libboinc_staticcrt.vcproj
David 20 Feb 2009
- client: if a project is at max backoff for a resource,
stop accumulating debt if it's at or around zero.
This prevents other projects from being driven unboundedly negative.
- client: if the number of overworked projects exceeds the number
of device instances, clear debts; this indicates that an earlier
client was buggy and produced bad debt values.
client/
client_state.h
cpu_sched.cpp
sim.h
work_fetch.cpp,h
......@@ -503,4 +503,11 @@ extern void print_suspend_tasks_message(int);
#define WORK_FETCH_PERIOD 60
// see if we need to fetch work at least this often
#define CPU_SCHED_ENFORCE_PERIOD 60
// enforce CPU schedule at least this often
#define DEBT_ADJUST_PERIOD CPU_SCHED_ENFORCE_PERIOD
// debt is adjusted at least this often,
// since adjust_debts() is called from enforce_schedule()
#endif
......@@ -64,9 +64,6 @@ using std::vector;
#define DEADLINE_CUSHION 0
// try to finish jobs this much in advance of their deadline
#define CPU_SCHED_ENFORCE_PERIOD 60
// enforce CPU schedule at least this often
bool COPROCS::sufficient_coprocs(COPROCS& needed, bool log_flag, const char* prefix) {
for (unsigned int i=0; i<needed.coprocs.size(); i++) {
COPROC* cp = needed.coprocs[i];
......@@ -336,17 +333,15 @@ void CLIENT_STATE::adjust_debts() {
double share_frac;
double elapsed_time = now - debt_interval_start;
// This is called from enforce_schedule(),
// which runs about once every CPU_SCHED_ENFORCE_PERIOD seconds.
// If the elapsed time is more than 2*CPU_SCHED_ENFORCE_PERIOD
// If the elapsed time is more than 2*DEBT_ADJUST_PERIOD
// it must be because the host was suspended for a long time.
// In this case, ignore the last period
//
if (elapsed_time > 2*CPU_SCHED_ENFORCE_PERIOD || elapsed_time < 0) {
if (elapsed_time > 2*DEBT_ADJUST_PERIOD || elapsed_time < 0) {
if (log_flags.debt_debug) {
msg_printf(NULL, MSG_INFO,
"[debt_debug] adjust_debt: elapsed time (%d) longer than sched enforce period(%d). Ignoring this period.",
(int)elapsed_time, (int)CPU_SCHED_ENFORCE_PERIOD
(int)elapsed_time, (int)DEBT_ADJUST_PERIOD
);
}
reset_debt_accounting();
......@@ -743,6 +738,11 @@ bool CLIENT_STATE::enforce_schedule() {
}
if (!must_enforce_cpu_schedule) return false;
must_enforce_cpu_schedule = false;
// NOTE: there's an assumption that debt is adjusted at
// least as often as the CPU sched is enforced.
// If you remove the following, make changes accordingly
//
adjust_debts();
last_time = now;
bool action = false;
......
......@@ -310,4 +310,11 @@ extern bool work_fetch_old;
// assume actual CPU utilization will be this multiple
// of what we've actually measured recently
#define WORK_FETCH_PERIOD 60
\ No newline at end of file
#define WORK_FETCH_PERIOD 60
#define CPU_SCHED_ENFORCE_PERIOD 60
// enforce CPU schedule at least this often
#define DEBT_ADJUST_PERIOD CPU_SCHED_ENFORCE_PERIOD
// debt is adjusted at least this often,
// since adjust_debts() is called from enforce_schedule()
......@@ -385,6 +385,44 @@ void WORK_FETCH::accumulate_inst_sec(ACTIVE_TASK* atp, double dt) {
}
}
// Running buggy versions may lead to a situation where
// most projects are overworked.
// If there are more overworked projects than device instances,
// this must have happened.
// Set all debts back to zero.
//
void RSC_WORK_FETCH::repair_debts() {
unsigned int i;
int noverworked = 0;
PROJECT* p;
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.overworked()) {
noverworked++;
}
}
if (noverworked <= ninstances) {
return;
}
if (log_flags.debt_debug) {
msg_printf(0, MSG_INFO,
"[debt] %s: %d projects overworked; setting debts to zero",
rsc_name(rsc_type), noverworked
);
}
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
w.debt = 0;
}
}
// update long-term debts for a resource.
//
void RSC_WORK_FETCH::update_debts() {
......@@ -393,12 +431,17 @@ void RSC_WORK_FETCH::update_debts() {
double ders = 0;
PROJECT* p;
if (!repair_done) {
repair_debts();
repair_done = true;
}
// find the total resource share of eligible projects
//
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.debt_eligible(p)) {
if (w.debt_eligible(p, *this)) {
ders += p->resource_share;
neligible++;
}
......@@ -417,7 +460,7 @@ void RSC_WORK_FETCH::update_debts() {
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.debt_eligible(p)) {
if (w.debt_eligible(p, *this)) {
double share_frac = p->resource_share/ders;
// the change to a project's debt is:
......@@ -467,7 +510,7 @@ void RSC_WORK_FETCH::update_debts() {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.debt_eligible(p)) {
if (w.debt_eligible(p, *this)) {
w.debt += offset;
}
}
......@@ -516,12 +559,22 @@ void WORK_FETCH::compute_shares() {
// should this project be accumulating debt for this resource?
//
bool RSC_PROJECT_WORK_FETCH::debt_eligible(PROJECT* p) {
bool RSC_PROJECT_WORK_FETCH::debt_eligible(PROJECT* p, RSC_WORK_FETCH& rwf) {
if (p->non_cpu_intensive) return false;
if (p->suspended_via_gui) return false;
if (p->dont_request_more_work) return false;
if (has_runnable_jobs) return true;
if (backoff_time > gstate.now) return false;
// The last time we asked for work we didn't get any,
// but it's been a while since we asked.
// In this case, accumulate debt until we reach (around) zero,
// but then stop.
if (backoff_interval == MAX_BACKOFF_INTERVAL) {
if (debt > -rwf.ninstances*DEBT_ADJUST_PERIOD) {
return false;
}
}
if (p->min_rpc_time > gstate.now) return false;
return true;
}
......
......@@ -63,7 +63,7 @@ struct RSC_PROJECT_WORK_FETCH {
}
// whether this project is accumulating debt for this resource
bool debt_eligible(PROJECT*);
bool debt_eligible(PROJECT*, RSC_WORK_FETCH&);
inline void reset() {
backoff_time = 0;
backoff_interval = 0;
......@@ -112,6 +112,8 @@ struct RSC_WORK_FETCH {
inline void reset_debt_accounting() {
secs_this_debt_interval = 0;
}
void repair_debts();
bool repair_done;
void rr_init();
void accumulate_shortfall(double d_time);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment