Commit fd5fc4a2 authored by David Anderson's avatar David Anderson

- client: fix bug that could cause scheduler RPC

    to ask for work inappropriately,
    and tell user that it wasn't asking for work.
    Here's what was going on:
    There are two different structures with work request fields
    (req_secs, req_instances, estimated_delay):
    COPROC_CUDA *coproc_cuda
    and
    RSC_WORK_FETCH cuda_work_fetch.
    WORK_FETCH::choose_project() copied from cuda_work_fetch to coproc_cuda,
    but only if a project was selected.
    WORK_FETCH::clear_request() clears cuda_work_fetch but not coproc_cuda.

    Scenario:
    - a scheduler op is made to project A requesting X>0 secs of CUDA
    - later, a scheduler op is made to project B for reason
        other than work fetch (e.g., user request)
    - choose_project() doesn't choose anything,
        so the value of coproc_cuda->req_secs remains X
    - clear_request() is called but that doesn't change *coproc_cuda

    Solution: work-fetch code no longer knows about internals of
        COPROC_CUDA and is not responsible for settings its request fields.
        The copying of request fields from RSC_WORK_FETCH to COPROC
        is done at a higher level,
        in CLIENT_STATE::make_scheduler_request()

    Additional bug fix: estimated_delay wasn't being cleared in some cases.


svn path=/trunk/boinc/; revision=17411
parent 7f0efb54
......@@ -2362,3 +2362,38 @@ Rom 27 Feb 2009
clientgui/
sg_BoincSimpleGUI.cpp, .h
David 27 Feb 2009
- client: fix bug that could cause scheduler RPC
to ask for work inappropriately,
and tell user that it wasn't asking for work.
Here's what was going on:
There are two different structures with work request fields
(req_secs, req_instances, estimated_delay):
COPROC_CUDA *coproc_cuda
and
RSC_WORK_FETCH cuda_work_fetch.
WORK_FETCH::choose_project() copied from cuda_work_fetch to coproc_cuda,
but only if a project was selected.
WORK_FETCH::clear_request() clears cuda_work_fetch but not coproc_cuda.
Scenario:
- a scheduler op is made to project A requesting X>0 secs of CUDA
- later, a scheduler op is made to project B for reason
other than work fetch (e.g., user request)
- choose_project() doesn't choose anything,
so the value of coproc_cuda->req_secs remains X
- clear_request() is called but that doesn't change *coproc_cuda
Solution: work-fetch code no longer knows about internals of
COPROC_CUDA and is not responsible for settings its request fields.
The copying of request fields from RSC_WORK_FETCH to COPROC
is done at a higher level,
in CLIENT_STATE::make_scheduler_request()
Additional bug fix: estimated_delay wasn't being cleared in some cases.
client/
cs_scheduler.cpp
scheduler_op.cpp
work_fetch.cpp
......@@ -117,7 +117,6 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
" <resource_share_fraction>%f</resource_share_fraction>\n"
" <rrs_fraction>%f</rrs_fraction>\n"
" <prrs_fraction>%f</prrs_fraction>\n"
" <estimated_delay>%f</estimated_delay>\n"
" <duration_correction_factor>%f</duration_correction_factor>\n"
" <sandbox>%d</sandbox>\n",
p->authenticator,
......@@ -129,7 +128,6 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
resource_share_fraction,
rrs_fraction,
prrs_fraction,
cpu_work_fetch.estimated_delay,
p->duration_correction_factor,
g_use_sandbox?1:0
);
......@@ -227,6 +225,14 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
disk_total, disk_project
);
// copy request values from RSC_WORK_FETCH to COPROC
//
if (coproc_cuda) {
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
coproc_cuda->estimated_delay = cuda_work_fetch.estimated_delay;
}
if (coprocs.coprocs.size()) {
fprintf(f, " <coprocs>\n");
for (i=0; i<coprocs.coprocs.size(); i++) {
......
......@@ -238,13 +238,13 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) {
}
if (log_flags.sched_op_debug) {
msg_printf(p, MSG_INFO,
"CPU work request: %.2f seconds; %d idle CPUs",
"[sched_op_debug] CPU work request: %.2f seconds; %d idle CPUs",
cpu_work_fetch.req_secs, cpu_work_fetch.req_instances
);
if (coproc_cuda) {
msg_printf(p, MSG_INFO,
"CUDA work request: %.2f seconds; %d idle GPUs",
coproc_cuda->req_secs, coproc_cuda->req_instances
"[sched_op_debug] CUDA work request: %.2f seconds; %d idle GPUs",
cuda_work_fetch.req_secs, cuda_work_fetch.req_instances
);
}
}
......
......@@ -315,6 +315,7 @@ static void print_req(PROJECT* p) {
void RSC_WORK_FETCH::clear_request() {
req_secs = 0;
req_instances = 0;
estimated_delay = 0;
}
void WORK_FETCH::clear_request() {
......@@ -409,17 +410,12 @@ PROJECT* WORK_FETCH::choose_project() {
p = cpu_work_fetch.choose_project(FETCH_IF_PROJECT_STARVED);
}
if (p && coproc_cuda) {
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
coproc_cuda->estimated_delay = cuda_work_fetch.estimated_delay;
}
if (log_flags.work_fetch_debug) {
print_state();
if (p) {
print_req(p);
} else {
msg_printf(0, MSG_INFO, "No project chosen for work fetch");
msg_printf(0, MSG_INFO, "[wfd] No project chosen for work fetch");
}
}
......@@ -630,10 +626,12 @@ void WORK_FETCH::write_request(FILE* f) {
fprintf(f,
" <work_req_seconds>%f</work_req_seconds>\n"
" <cpu_req_secs>%f</cpu_req_secs>\n"
" <cpu_req_instances>%d</cpu_req_instances>\n",
" <cpu_req_instances>%d</cpu_req_instances>\n"
" <estimated_delay>%f</estimated_delay>\n",
cpu_work_fetch.req_secs,
cpu_work_fetch.req_secs,
cpu_work_fetch.req_instances
cpu_work_fetch.req_instances,
cpu_work_fetch.estimated_delay
);
}
......@@ -670,10 +668,18 @@ void WORK_FETCH::handle_reply(PROJECT* p, vector<RESULT*> new_results) {
if (got_cuda) p->cuda_pwf.clear_backoff();
}
// set up for initial RPC.
// arrange to always get one job, even if we don't need it or can't handle it.
// (this is probably what user wants)
//
void WORK_FETCH::set_initial_work_request() {
cpu_work_fetch.req_secs = 1;
cpu_work_fetch.req_instances = 0;
cpu_work_fetch.estimated_delay = 0;
if (coproc_cuda) {
coproc_cuda->req_secs = 1;
cuda_work_fetch.req_secs = 1;
cuda_work_fetch.req_instances = 0;
cuda_work_fetch.estimated_delay = 0;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment