From f5a6f862bf0a46ad743d39d10aa699ff065f7fc6 Mon Sep 17 00:00:00 2001 From: David Anderson <davea@ssl.berkeley.edu> Date: Wed, 9 Sep 2009 22:18:02 +0000 Subject: [PATCH] - client: fix bug in RR simulation: start only enough jobs to fill CPUs per project, not all the CPU jobs at once. I'm not sure how much difference this makes, but this is how it's supposed to work. - client: if app_info.xml doesn't specify flops, use an estimate that takes GPUs into account. - client: if it's been more than 2 weeks since time stats update, don't decay on_frac at all. svn path=/trunk/boinc/; revision=19035 --- checkin_notes | 19 +++++++++++++++++++ client/client_state.cpp | 13 ++++++++++++- client/rr_sim.cpp | 24 ++++++++++++++++++++++-- client/time_stats.cpp | 4 ++-- client/work_fetch.cpp | 2 +- lib/coproc.cpp | 4 ++-- lib/coproc.h | 5 +++-- 7 files changed, 61 insertions(+), 10 deletions(-) diff --git a/checkin_notes b/checkin_notes index e3e7a02792..a846d10dc6 100644 --- a/checkin_notes +++ b/checkin_notes @@ -7576,3 +7576,22 @@ David 8 Sept 2009 client/ time_stats.cpp + +David 8 Sept 2009 + - client: fix bug in RR simulation: + start only enough jobs to fill CPUs per project, + not all the CPU jobs at once. + I'm not sure how much difference this makes, + but this is how it's supposed to work. + - client: if app_info.xml doesn't specify flops, + use an estimate that takes GPUs into account. + - client: if it's been more than 2 weeks since time stats update, + don't decay on_frac at all. + + client/ + client_state.cpp + rr_sim.cpp + time_stats.cpp + work_fetch.cpp + lib/ + coproc.cpp,h diff --git a/client/client_state.cpp b/client/client_state.cpp index 6f6314581f..a786a40315 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -298,7 +298,18 @@ int CLIENT_STATE::init() { // for (i=0; i<app_versions.size(); i++) { APP_VERSION* avp = app_versions[i]; - if (!avp->flops) avp->flops = host_info.p_fpops; + if (!avp->flops) { + if (!avp->avg_ncpus) { + avp->avg_ncpus = 1; + } + avp->flops = avp->avg_ncpus * host_info.p_fpops; + if (avp->ncudas) { + avp->flops += avp->ncudas * coproc_cuda->flops_estimate(); + } + if (avp->natis) { + avp->flops += avp->natis * coproc_ati->flops_estimate(); + } + } } check_clock_reset(); diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp index 6c4745e6f4..4e3c961218 100644 --- a/client/rr_sim.cpp +++ b/client/rr_sim.cpp @@ -51,6 +51,17 @@ #include "coproc.h" #include "client_msgs.h" +inline void rsc_string(RESULT* rp, char* buf) { + APP_VERSION* avp = rp->avp; + if (avp->ncudas) { + sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas); + } else if (avp->natis) { + sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis); + } else { + sprintf(buf, "%.2f CPU", avp->avg_ncpus); + } +} + // this is here (rather than rr_sim.h) because its inline functions // refer to RESULT // @@ -61,15 +72,24 @@ struct RR_SIM_STATUS { double active_atis; inline void activate(RESULT* rp, double when) { + PROJECT* p = rp->project; if (log_flags.rr_simulation) { - msg_printf(rp->project, MSG_INFO, - "[rr_sim] %.2f: starting %s", when, rp->name + char buf[256]; + rsc_string(rp, buf); + msg_printf(p, MSG_INFO, + "[rr_sim] %.2f: starting %s (%s)", + when, rp->name, buf ); } active.push_back(rp); cpu_work_fetch.sim_nused += rp->avp->avg_ncpus; + p->cpu_pwf.sim_nused += rp->avp->avg_ncpus; + cuda_work_fetch.sim_nused += rp->avp->ncudas; + p->cuda_pwf.sim_nused += rp->avp->ncudas; + ati_work_fetch.sim_nused += rp->avp->natis; + p->ati_pwf.sim_nused += rp->avp->natis; } // remove *rpbest from active set, // and adjust FLOPS left for other results diff --git a/client/time_stats.cpp b/client/time_stats.cpp index 1728a3dffe..5552fe0324 100644 --- a/client/time_stats.cpp +++ b/client/time_stats.cpp @@ -172,9 +172,9 @@ void TIME_STATS::update(int suspend_reason) { // from a client version that wasn't updating due to bug. // Or it could be because user wasn't running for a while // and is starting up again. - // In either case, limit the amount that we decay on_frac. + // In either case, don't decay on_frac. // - dt = 14*86400; + dt = 0; } w1 = 1 - exp(-dt/ALPHA); // weight for recent period diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index d80f5fa9d3..516803097c 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -861,7 +861,7 @@ void WORK_FETCH::init() { ati_work_fetch.init( RSC_TYPE_ATI, coproc_ati->count, - coproc_ati->flops()/gstate.host_info.p_fpops + coproc_ati->flops_estimate()/gstate.host_info.p_fpops ); } diff --git a/lib/coproc.cpp b/lib/coproc.cpp index ad6b1500e9..b087d78151 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -802,7 +802,7 @@ void COPROC_ATI::get(COPROCS& coprocs, vector<string>& strings) { char buf[256], buf2[256]; if (i == 0) { best = gpus[i]; - } else if (gpus[i].flops() > best.flops()) { + } else if (gpus[i].flops_estimate() > best.flops_estimate()) { best = gpus[i]; } gpus[i].description(buf); @@ -943,7 +943,7 @@ int COPROC_ATI::parse(FILE* fin) { void COPROC_ATI::description(char* buf) { sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fGFLOPS)", - name, version, attribs.localRAM/1024.*1024., flops()/1.e9 + name, version, attribs.localRAM/1024.*1024., flops_estimate()/1.e9 ); } diff --git a/lib/coproc.h b/lib/coproc.h index 75dfaab554..266cd00f74 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -291,9 +291,10 @@ struct COPROC_ATI : public COPROC { void description(char*); void clear(); int parse(FILE*); - inline double flops() { - return attribs.numberOfSIMD * attribs.wavefrontSize * 2.5 * attribs.engineClock * 1.e6; + inline double flops_estimate() { + double x = attribs.numberOfSIMD * attribs.wavefrontSize * 2.5 * attribs.engineClock * 1.e6; // clock is in MHz + return x?x:5e10; } }; -- GitLab