diff --git a/checkin_notes b/checkin_notes index cfde7318d9d0f718a6407ba00f9be1b5b6ba6ecb..0f190df803565c372c7e11725225d16be616928d 100644 --- a/checkin_notes +++ b/checkin_notes @@ -3645,3 +3645,11 @@ David 30 Apr 2012 show_user.php client/ acct_mgr.cpp + +David 30 Apr 2012 + - client: code cleanup. Move RESULT and PROJECT to separate files + + client/ + project.cpp,h (new) + result.cpp,h (new) + *.cpp diff --git a/client/Makefile.am b/client/Makefile.am index 8dbeaceaa1b69d12da0bad3dc5735e24a223aadd..b7c8efea547b4ce1a12662467eaf585d17f0af00 100644 --- a/client/Makefile.am +++ b/client/Makefile.am @@ -70,6 +70,8 @@ boinc_client_SOURCES = \ main.cpp \ net_stats.cpp \ pers_file_xfer.cpp \ + project.cpp \ + result.cpp \ rr_sim.cpp \ sandbox.cpp \ scheduler_op.cpp \ diff --git a/client/acct_mgr.cpp b/client/acct_mgr.cpp index 48c6490aab1715025fdf5537ed4c9ea0a2474143..d55730fefb665c699f8a5e1bcbbcd3c8e14223de 100644 --- a/client/acct_mgr.cpp +++ b/client/acct_mgr.cpp @@ -24,17 +24,19 @@ #include <cstring> #endif -#include "parse.h" +#include "crypt.h" #include "error_numbers.h" -#include "client_msgs.h" +#include "parse.h" #include "str_util.h" #include "str_replace.h" #include "url.h" + +#include "client_msgs.h" +#include "client_state.h" #include "file_names.h" #include "filesys.h" -#include "client_state.h" #include "gui_http.h" -#include "crypt.h" +#include "project.h" #include "acct_mgr.h" diff --git a/client/app.cpp b/client/app.cpp index df8622a2b7c22d0bf640ed36a33bad83f366df2f..de2a20f0a5b253f33baa56d8a3fe3ecc35ae7137 100644 --- a/client/app.cpp +++ b/client/app.cpp @@ -66,6 +66,7 @@ #include "client_msgs.h" #include "client_state.h" #include "procinfo.h" +#include "result.h" #include "sandbox.h" #include "app.h" diff --git a/client/app_control.cpp b/client/app_control.cpp index 250c9bc7779186ce73837bcbaa007bce1f7ab64f..f67a31f93958a06ba9ee5de5577b4788f3e84895 100644 --- a/client/app_control.cpp +++ b/client/app_control.cpp @@ -60,16 +60,18 @@ using std::vector; -#include "filesys.h" #include "error_numbers.h" -#include "util.h" -#include "str_util.h" +#include "filesys.h" #include "parse.h" #include "shmem.h" +#include "str_util.h" +#include "util.h" + #include "client_msgs.h" #include "client_state.h" #include "file_names.h" #include "proc_control.h" +#include "result.h" #include "sandbox.h" #include "app.h" diff --git a/client/app_start.cpp b/client/app_start.cpp index 754e4c254d4ba1b1745e7db62e9fa2a18d32601b..c8b69e99794e4005e2415b29636e2e35ff01768c 100644 --- a/client/app_start.cpp +++ b/client/app_start.cpp @@ -81,6 +81,7 @@ using std::string; #include "client_msgs.h" #include "client_state.h" #include "file_names.h" +#include "result.h" #include "sandbox.h" #include "unix_util.h" diff --git a/client/async_file.cpp b/client/async_file.cpp index df0e3103b0b191b0891197debda8506ca9098eb8..7067ef9f8085d0158aa4bf1c47b01937e447e2a3 100644 --- a/client/async_file.cpp +++ b/client/async_file.cpp @@ -29,6 +29,7 @@ #include "app.h" #include "client_msgs.h" #include "client_state.h" +#include "project.h" #include "sandbox.h" #include "async_file.h" diff --git a/client/check_state.cpp b/client/check_state.cpp index 25785a645263b47db8d8764ea33a205d115865b8..84d66d28649d4ffd9f32f500cdfed59ed8533094 100644 --- a/client/check_state.cpp +++ b/client/check_state.cpp @@ -29,6 +29,7 @@ #include "client_msgs.h" #include "client_state.h" +#include "result.h" void CLIENT_STATE::check_project_pointer(PROJECT* p) { unsigned int i; diff --git a/client/client_state.cpp b/client/client_state.cpp index c2826595a3a4c9e39b61b327f79aeca01aea4e90..74d4ff944082afce1610fabc38e744f3809c18c1 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -56,6 +56,8 @@ #include "hostinfo_network.h" #include "http_curl.h" #include "network.h" +#include "project.h" +#include "result.h" #include "sandbox.h" #include "shmem.h" diff --git a/client/client_types.cpp b/client/client_types.cpp index ce21d95509f100ac1fd73f79bc42b11eafc51d45..42deb37ff887cce3f856cecd4880cdd4ddc01eef 100644 --- a/client/client_types.cpp +++ b/client/client_types.cpp @@ -50,7 +50,9 @@ #include "client_msgs.h" #include "client_state.h" #include "file_names.h" +#include "project.h" #include "pers_file_xfer.h" +#include "project.h" #include "sandbox.h" #include "client_types.h" @@ -58,588 +60,6 @@ using std::string; using std::vector; -PROJECT::PROJECT() { - init(); -} - -void PROJECT::init() { - strcpy(master_url, ""); - strcpy(authenticator, ""); - project_specific_prefs = ""; - gui_urls = ""; - resource_share = 100; - for (int i=0; i<MAX_RSC; i++) { - no_rsc_pref[i] = false; - no_rsc_config[i] = false; - no_rsc_apps[i] = false; - no_rsc_ams[i] = false; - rsc_defer_sched[i] = false; - } - strcpy(host_venue, ""); - using_venue_specific_prefs = false; - scheduler_urls.clear(); - strcpy(project_name, ""); - strcpy(symstore, ""); - strcpy(user_name, ""); - strcpy(team_name, ""); - strcpy(email_hash, ""); - strcpy(cross_project_id, ""); - cpid_time = 0; - user_total_credit = 0; - user_expavg_credit = 0; - user_create_time = 0; - ams_resource_share = -1; - rpc_seqno = 0; - userid = 0; - teamid = 0; - hostid = 0; - host_total_credit = 0; - host_expavg_credit = 0; - host_create_time = 0; - nrpc_failures = 0; - master_fetch_failures = 0; - min_rpc_time = 0; - possibly_backed_off = true; - master_url_fetch_pending = false; - sched_rpc_pending = 0; - next_rpc_time = 0; - last_rpc_time = 0; - trickle_up_pending = false; - anonymous_platform = false; - non_cpu_intensive = false; - verify_files_on_app_start = false; - pwf.reset(this); - send_time_stats_log = 0; - send_job_log = 0; - send_full_workload = false; - dont_use_dcf = false; - suspended_via_gui = false; - dont_request_more_work = false; - detach_when_done = false; - attached_via_acct_mgr = false; - ended = false; - strcpy(code_sign_key, ""); - user_files.clear(); - project_files.clear(); - next_runnable_result = NULL; - duration_correction_factor = 1; - project_files_downloaded_time = 0; - use_symlinks = false; - possibly_backed_off = false; - last_upload_start = 0; - nuploading_results = 0; - too_many_uploading_results = false; - -#ifdef SIM - idle_time = 0; - idle_time_sumsq = 0; - completed_task_count = 0; - completions_ratio_mean = 0.0; - completions_ratio_s = 0.0; - completions_ratio_stdev = 0.1; // for the first couple of completions - guess. - completions_required_stdevs = 3.0; - result_index = 0; -#endif -} - -static void handle_no_rsc_ams(PROJECT* p, const char* name) { - int i = rsc_index(name); - if (i < 0) return; - p->no_rsc_ams[i] = true; -} - -static void handle_no_rsc_pref(PROJECT* p, const char* name) { - int i = rsc_index(name); - if (i<0) return; - p->no_rsc_pref[i] = true; -} - -static void handle_no_rsc_apps(PROJECT* p, const char* name) { - int i = rsc_index(name); - if (i < 0) return; - p->no_rsc_apps[i] = true; -} - -static bool parse_rsc_param(XML_PARSER& xp, const char* end_tag, int& rsc_type, double& value) { - char name[256]; - bool val_found = false; - - rsc_type = -1; - while (!xp.get_tag()) { - if (xp.match_tag(end_tag)) { - return (rsc_type > 0 && val_found); - } - if (xp.parse_str("name", name, sizeof(name))) { - rsc_type = rsc_index(name); - continue; - } - if (xp.parse_double("rsc_type", value)) { - val_found = true; - } - } - return false; -} -// parse project fields from client_state.xml -// -int PROJECT::parse_state(XML_PARSER& xp) { - char buf[256]; - std::string sched_url, stemp; - string str1, str2; - int retval, rt; - double x; - bool btemp; - - init(); - while (!xp.get_tag()) { - if (xp.match_tag("/project")) { - if (cpid_time == 0) { - cpid_time = user_create_time; - } - return 0; - } - if (xp.parse_string("scheduler_url", sched_url)) { - scheduler_urls.push_back(sched_url); - continue; - } - if (xp.parse_str("master_url", master_url, sizeof(master_url))) continue; - if (xp.parse_str("project_name", project_name, sizeof(project_name))) continue; - if (xp.parse_str("symstore", symstore, sizeof(symstore))) continue; - if (xp.parse_str("user_name", user_name, sizeof(user_name))) continue; - if (xp.parse_str("team_name", team_name, sizeof(team_name))) continue; - if (xp.parse_str("host_venue", host_venue, sizeof(host_venue))) continue; - if (xp.parse_str("email_hash", email_hash, sizeof(email_hash))) continue; - if (xp.parse_str("cross_project_id", cross_project_id, sizeof(cross_project_id))) continue; - if (xp.parse_double("cpid_time", cpid_time)) continue; - if (xp.parse_double("user_total_credit", user_total_credit)) continue; - if (xp.parse_double("user_expavg_credit", user_expavg_credit)) continue; - if (xp.parse_double("user_create_time", user_create_time)) continue; - if (xp.parse_int("rpc_seqno", rpc_seqno)) continue; - if (xp.parse_int("userid", userid)) continue; - if (xp.parse_int("teamid", teamid)) continue; - if (xp.parse_int("hostid", hostid)) continue; - if (xp.parse_double("host_total_credit", host_total_credit)) continue; - if (xp.parse_double("host_expavg_credit", host_expavg_credit)) continue; - if (xp.parse_double("host_create_time", host_create_time)) continue; - if (xp.match_tag("code_sign_key")) { - retval = copy_element_contents( - xp.f->f, - "</code_sign_key>", - code_sign_key, - sizeof(code_sign_key) - ); - if (retval) return retval; - strip_whitespace(code_sign_key); - continue; - } - if (xp.parse_int("nrpc_failures", nrpc_failures)) continue; - if (xp.parse_int("master_fetch_failures", master_fetch_failures)) continue; - if (xp.parse_double("min_rpc_time", min_rpc_time)) continue; - if (xp.parse_bool("master_url_fetch_pending", master_url_fetch_pending)) continue; - if (xp.parse_int("sched_rpc_pending", sched_rpc_pending)) continue; - if (xp.parse_double("next_rpc_time", next_rpc_time)) continue; - if (xp.parse_bool("trickle_up_pending", trickle_up_pending)) continue; - if (xp.parse_int("send_time_stats_log", send_time_stats_log)) continue; - if (xp.parse_int("send_job_log", send_job_log)) continue; - if (xp.parse_bool("send_full_workload", send_full_workload)) continue; - if (xp.parse_bool("dont_use_dcf", dont_use_dcf)) continue; - if (xp.parse_bool("non_cpu_intensive", non_cpu_intensive)) continue; - if (xp.parse_bool("verify_files_on_app_start", verify_files_on_app_start)) continue; - if (xp.parse_bool("suspended_via_gui", suspended_via_gui)) continue; - if (xp.parse_bool("dont_request_more_work", dont_request_more_work)) continue; - if (xp.parse_bool("detach_when_done", detach_when_done)) continue; - if (xp.parse_bool("ended", ended)) continue; - if (xp.parse_double("rec", pwf.rec)) continue; - if (xp.parse_double("rec_time", pwf.rec_time)) continue; - if (xp.parse_double("cpu_backoff_interval", rsc_pwf[0].backoff_interval)) continue; - if (xp.parse_double("cpu_backoff_time", rsc_pwf[0].backoff_time)) { - if (rsc_pwf[0].backoff_time > gstate.now + 28*SECONDS_PER_DAY) { - rsc_pwf[0].backoff_time = gstate.now + 28*SECONDS_PER_DAY; - } - continue; - } - if (xp.match_tag("rsc_backoff_interval")) { - if (parse_rsc_param(xp, "/rsc_backoff_interval", rt, x)) { - rsc_pwf[rt].backoff_interval = x; - } - continue; - } - if (xp.match_tag("rsc_backoff_time")) { - if (parse_rsc_param(xp, "/rsc_backoff_time", rt, x)) { - rsc_pwf[rt].backoff_time = x; - } - continue; - } - if (xp.parse_double("resource_share", resource_share)) continue; - // not authoritative - if (xp.parse_double("duration_correction_factor", duration_correction_factor)) continue; - if (xp.parse_bool("attached_via_acct_mgr", attached_via_acct_mgr)) continue; - if (xp.parse_bool("no_cpu_apps", btemp)) { - if (btemp) handle_no_rsc_apps(this, "CPU"); - continue; - } - if (xp.parse_bool("no_cuda_apps", btemp)) { - if (btemp) handle_no_rsc_apps(this, GPU_TYPE_NVIDIA); - continue; - } - if (xp.parse_bool("no_ati_apps", btemp)) { - if (btemp) handle_no_rsc_apps(this, GPU_TYPE_ATI); - continue; - } - if (xp.parse_str("no_rsc_apps", buf, sizeof(buf))) { - handle_no_rsc_apps(this, buf); - continue; - } - if (xp.parse_bool("no_cpu_ams", btemp)) { - if (btemp) handle_no_rsc_ams(this, "CPU"); - continue; - } - if (xp.parse_bool("no_cuda_ams", btemp)) { - if (btemp) handle_no_rsc_ams(this, GPU_TYPE_NVIDIA); - continue; - } - if (xp.parse_bool("no_ati_ams", btemp)) { - if (btemp) handle_no_rsc_ams(this, GPU_TYPE_ATI); - continue; - } - if (xp.parse_str("no_rsc_ams", buf, sizeof(buf))) { - handle_no_rsc_ams(this, buf); - continue; - } - if (xp.parse_str("no_rsc_pref", buf, sizeof(buf))) { - handle_no_rsc_pref(this, buf); - continue; - } - - // backwards compat - old state files had ams_resource_share = 0 - if (xp.parse_double("ams_resource_share_new", ams_resource_share)) continue; - if (xp.parse_double("ams_resource_share", x)) { - if (x > 0) ams_resource_share = x; - continue; - } - if (xp.parse_bool("scheduler_rpc_in_progress", btemp)) continue; - if (xp.parse_bool("use_symlinks", use_symlinks)) continue; - if (xp.parse_bool("anonymous_platform", btemp)) continue; - if (xp.parse_string("trickle_up_url", stemp)) { - trickle_up_ops.push_back(new TRICKLE_UP_OP(stemp)); - continue; - } - if (log_flags.unparsed_xml) { - msg_printf(0, MSG_INFO, - "[unparsed_xml] PROJECT::parse_state(): unrecognized: %s", - xp.parsed_tag - ); - } - xp.skip_unexpected(); - } - return ERR_XML_PARSE; -} - -// Write project information to client state file or GUI RPC reply -// -int PROJECT::write_state(MIOFILE& out, bool gui_rpc) { - unsigned int i; - char un[2048], tn[2048]; - - out.printf( - "<project>\n" - ); - - xml_escape(user_name, un, sizeof(un)); - xml_escape(team_name, tn, sizeof(tn)); - out.printf( - " <master_url>%s</master_url>\n" - " <project_name>%s</project_name>\n" - " <symstore>%s</symstore>\n" - " <user_name>%s</user_name>\n" - " <team_name>%s</team_name>\n" - " <host_venue>%s</host_venue>\n" - " <email_hash>%s</email_hash>\n" - " <cross_project_id>%s</cross_project_id>\n" - " <cpid_time>%f</cpid_time>\n" - " <user_total_credit>%f</user_total_credit>\n" - " <user_expavg_credit>%f</user_expavg_credit>\n" - " <user_create_time>%f</user_create_time>\n" - " <rpc_seqno>%d</rpc_seqno>\n" - " <userid>%d</userid>\n" - " <teamid>%d</teamid>\n" - " <hostid>%d</hostid>\n" - " <host_total_credit>%f</host_total_credit>\n" - " <host_expavg_credit>%f</host_expavg_credit>\n" - " <host_create_time>%f</host_create_time>\n" - " <nrpc_failures>%d</nrpc_failures>\n" - " <master_fetch_failures>%d</master_fetch_failures>\n" - " <min_rpc_time>%f</min_rpc_time>\n" - " <next_rpc_time>%f</next_rpc_time>\n" - " <rec>%f</rec>\n" - " <rec_time>%f</rec_time>\n" - - " <resource_share>%f</resource_share>\n" - " <duration_correction_factor>%f</duration_correction_factor>\n" - " <sched_rpc_pending>%d</sched_rpc_pending>\n" - " <send_time_stats_log>%d</send_time_stats_log>\n" - " <send_job_log>%d</send_job_log>\n" - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - master_url, - project_name, - symstore, - un, - tn, - host_venue, - email_hash, - cross_project_id, - cpid_time, - user_total_credit, - user_expavg_credit, - user_create_time, - rpc_seqno, - userid, - teamid, - hostid, - host_total_credit, - host_expavg_credit, - host_create_time, - nrpc_failures, - master_fetch_failures, - min_rpc_time, - next_rpc_time, - pwf.rec, - pwf.rec_time, - resource_share, - duration_correction_factor, - sched_rpc_pending, - send_time_stats_log, - send_job_log, - anonymous_platform?" <anonymous_platform/>\n":"", - master_url_fetch_pending?" <master_url_fetch_pending/>\n":"", - trickle_up_pending?" <trickle_up_pending/>\n":"", - send_full_workload?" <send_full_workload/>\n":"", - dont_use_dcf?" <dont_use_dcf/>\n":"", - non_cpu_intensive?" <non_cpu_intensive/>\n":"", - verify_files_on_app_start?" <verify_files_on_app_start/>\n":"", - suspended_via_gui?" <suspended_via_gui/>\n":"", - dont_request_more_work?" <dont_request_more_work/>\n":"", - detach_when_done?" <detach_when_done/>\n":"", - ended?" <ended/>\n":"", - attached_via_acct_mgr?" <attached_via_acct_mgr/>\n":"", - (this == gstate.scheduler_op->cur_proj)?" <scheduler_rpc_in_progress/>\n":"", - use_symlinks?" <use_symlinks/>\n":"" - ); - for (int j=0; j<coprocs.n_rsc; j++) { - out.printf( - " <rsc_backoff_time>\n" - " <name>%s</name>\n" - " <value>%f</value>\n" - " </rsc_backoff_time>\n" - " <rsc_backoff_interval>\n" - " <name>%s</name>\n" - " <value>%f</value>\n" - " </rsc_backoff_interval>\n", - rsc_name(j), rsc_pwf[j].backoff_time, - rsc_name(j), rsc_pwf[j].backoff_interval - ); - if (no_rsc_ams[j]) { - out.printf(" <no_rsc_ams>%s</no_rsc_ams>\n", rsc_name(j)); - } - if (no_rsc_apps[j]) { - out.printf(" <no_rsc_apps>%s</no_rsc_apps>\n", rsc_name(j)); - } - if (no_rsc_pref[j]) { - out.printf(" <no_rsc_pref>%s</no_rsc_pref>\n", rsc_name(j)); - } - if (j>0 && gui_rpc && (ncoprocs_excluded[j] == rsc_work_fetch[j].ninstances)) { - out.printf(" <no_rsc_config>%s</no_rsc_config>\n", rsc_name(j)); - } - } - if (ams_resource_share >= 0) { - out.printf(" <ams_resource_share_new>%f</ams_resource_share_new>\n", - ams_resource_share - ); - } - if (gui_rpc) { - out.printf( - "%s" - " <sched_priority>%f</sched_priority>\n" - " <last_rpc_time>%f</last_rpc_time>\n" - " <project_files_downloaded_time>%f</project_files_downloaded_time>\n", - gui_urls.c_str(), - sched_priority, - last_rpc_time, - project_files_downloaded_time - ); - if (download_backoff.next_xfer_time > gstate.now) { - out.printf( - " <download_backoff>%f</download_backoff>\n", - download_backoff.next_xfer_time - gstate.now - ); - } - if (upload_backoff.next_xfer_time > gstate.now) { - out.printf( - " <upload_backoff>%f</upload_backoff>\n", - upload_backoff.next_xfer_time - gstate.now - ); - } - if (strlen(host_venue)) { - out.printf(" <venue>%s</venue>\n", host_venue); - } - } else { - for (i=0; i<scheduler_urls.size(); i++) { - out.printf( - " <scheduler_url>%s</scheduler_url>\n", - scheduler_urls[i].c_str() - ); - } - if (strlen(code_sign_key)) { - out.printf( - " <code_sign_key>\n%s\n</code_sign_key>\n", code_sign_key - ); - } - for (i=0; i<trickle_up_ops.size(); i++) { - TRICKLE_UP_OP* t = trickle_up_ops[i]; - out.printf( - " <trickle_up_url>%s</trickle_up_url>\n", - t->url.c_str() - ); - } - } - out.printf( - "</project>\n" - ); - return 0; -} - -// Some project data is stored in account file, other in client_state.xml -// Copy fields that are stored in client_state.xml from "p" into "this" -// -void PROJECT::copy_state_fields(PROJECT& p) { - scheduler_urls = p.scheduler_urls; - safe_strcpy(project_name, p.project_name); - safe_strcpy(user_name, p.user_name); - safe_strcpy(team_name, p.team_name); - safe_strcpy(host_venue, p.host_venue); - safe_strcpy(email_hash, p.email_hash); - safe_strcpy(cross_project_id, p.cross_project_id); - user_total_credit = p.user_total_credit; - user_expavg_credit = p.user_expavg_credit; - user_create_time = p.user_create_time; - cpid_time = p.cpid_time; - rpc_seqno = p.rpc_seqno; - userid = p.userid; - teamid = p.teamid; - hostid = p.hostid; - host_total_credit = p.host_total_credit; - host_expavg_credit = p.host_expavg_credit; - host_create_time = p.host_create_time; - nrpc_failures = p.nrpc_failures; - master_fetch_failures = p.master_fetch_failures; - min_rpc_time = p.min_rpc_time; - next_rpc_time = p.next_rpc_time; - master_url_fetch_pending = p.master_url_fetch_pending; - sched_rpc_pending = p.sched_rpc_pending; - trickle_up_pending = p.trickle_up_pending; - safe_strcpy(code_sign_key, p.code_sign_key); - for (int i=0; i<MAX_RSC; i++) { - rsc_pwf[i] = p.rsc_pwf[i]; - no_rsc_pref[i] = p.no_rsc_pref[i]; - no_rsc_apps[i] = p.no_rsc_apps[i]; - no_rsc_ams[i] = p.no_rsc_ams[i]; - } - pwf = p.pwf; - send_full_workload = p.send_full_workload; - dont_use_dcf = p.dont_use_dcf; - send_time_stats_log = p.send_time_stats_log; - send_job_log = p.send_job_log; - non_cpu_intensive = p.non_cpu_intensive; - verify_files_on_app_start = p.verify_files_on_app_start; - suspended_via_gui = p.suspended_via_gui; - dont_request_more_work = p.dont_request_more_work; - detach_when_done = p.detach_when_done; - attached_via_acct_mgr = p.attached_via_acct_mgr; - ended = p.ended; - duration_correction_factor = p.duration_correction_factor; - ams_resource_share = p.ams_resource_share; - if (ams_resource_share >= 0) { - resource_share = ams_resource_share; - } - use_symlinks = p.use_symlinks; -} - -// Write project statistic to project statistics file -// -int PROJECT::write_statistics(MIOFILE& out, bool /*gui_rpc*/) { - out.printf( - "<project_statistics>\n" - " <master_url>%s</master_url>\n", - master_url - ); - - for (std::vector<DAILY_STATS>::iterator i=statistics.begin(); - i!=statistics.end(); ++i - ) { - out.printf( - " <daily_statistics>\n" - " <day>%f</day>\n" - " <user_total_credit>%f</user_total_credit>\n" - " <user_expavg_credit>%f</user_expavg_credit>\n" - " <host_total_credit>%f</host_total_credit>\n" - " <host_expavg_credit>%f</host_expavg_credit>\n" - " </daily_statistics>\n", - i->day, - i->user_total_credit, - i->user_expavg_credit, - i->host_total_credit, - i->host_expavg_credit - ); - } - out.printf( - "</project_statistics>\n" - ); - return 0; -} - -void PROJECT::suspend() { - suspended_via_gui = true; - gstate.request_schedule_cpus("project suspended"); - gstate.request_work_fetch("project suspended"); -} -void PROJECT::resume() { - suspended_via_gui = false; - gstate.request_schedule_cpus("project resumed"); - gstate.request_work_fetch("project resumed"); -} - -void PROJECT::abort_not_started() { - for (unsigned int i=0; i<gstate.results.size(); i++) { - RESULT* rp = gstate.results[i]; - if (rp->project != this) continue; - if (rp->is_not_started()) { - rp->abort_inactive(EXIT_ABORTED_VIA_GUI); - } - } -} - -void PROJECT::get_task_durs(double& not_started_dur, double& in_progress_dur) { - not_started_dur = 0; - in_progress_dur = 0; - for (unsigned int i=0; i<gstate.results.size(); i++) { - RESULT* rp = gstate.results[i]; - if (rp->project != this) continue; - double d = rp->estimated_runtime_remaining(); - d /= gstate.time_stats.availability_frac(rp->avp->gpu_usage.rsc_type); - if (rp->is_not_started()) { - not_started_dur += d; - } else { - in_progress_dur += d; - } - } -} - -const char* PROJECT::get_scheduler_url(int index, double r) { - int n = (int) scheduler_urls.size(); - int ir = (int)(r*n); - int i = (index + ir)%n; - return scheduler_urls[i].c_str(); -} - bool FILE_XFER_BACKOFF::ok_to_transfer() { double dt = next_xfer_time - gstate.now; if (dt > gstate.pers_retry_delay_max) { @@ -675,23 +95,6 @@ void FILE_XFER_BACKOFF::file_xfer_succeeded() { next_xfer_time = 0; } -// delete current sym links. -// This is done when parsing scheduler reply, -// to ensure that we get rid of sym links for -// project files no longer in use -// -void PROJECT::delete_project_file_symlinks() { - unsigned int i; - char project_dir[256], path[256]; - - get_project_dir(this, project_dir, sizeof(project_dir)); - for (i=0; i<project_files.size(); i++) { - FILE_REF& fref = project_files[i]; - sprintf(path, "%s/%s", project_dir, fref.open_name); - delete_project_owned_file(path, false); - } -} - int parse_project_files(XML_PARSER& xp, vector<FILE_REF>& project_files) { int retval; project_files.clear(); @@ -716,82 +119,6 @@ int parse_project_files(XML_PARSER& xp, vector<FILE_REF>& project_files) { return ERR_XML_PARSE; } -// install pointers from FILE_REFs to FILE_INFOs for project files, -// and flag FILE_INFOs as being project files. -// -void PROJECT::link_project_files() { - FILE_INFO* fip; - vector<FILE_REF>::iterator fref_iter; - fref_iter = project_files.begin(); - while (fref_iter != project_files.end()) { - FILE_REF& fref = *fref_iter; - fip = gstate.lookup_file_info(this, fref.file_name); - if (!fip) { - msg_printf(this, MSG_INTERNAL_ERROR, - "project file refers to non-existent %s", fref.file_name - ); - fref_iter = project_files.erase(fref_iter); - continue; - } - fref.file_info = fip; - fip->is_project_file = true; - fref_iter++; - } -} - -void PROJECT::create_project_file_symlinks() { - for (unsigned i=0; i<gstate.file_infos.size(); i++) { - FILE_INFO* fip = gstate.file_infos[i]; - if (fip->project == this && fip->is_project_file && fip->status == FILE_PRESENT) { - write_symlink_for_project_file(fip); - } - } -} - -void PROJECT::write_project_files(MIOFILE& f) { - unsigned int i; - - if (!project_files.size()) return; - f.printf("<project_files>\n"); - for (i=0; i<project_files.size(); i++) { - FILE_REF& fref = project_files[i]; - fref.write(f); - } - f.printf("</project_files>\n"); -} - -// write symlinks for project files. -// Note: it's conceivable that one physical file -// has several logical names, so try them all -// -int PROJECT::write_symlink_for_project_file(FILE_INFO* fip) { - char project_dir[256], link_path[256], file_path[256]; - unsigned int i; - - get_project_dir(this, project_dir, sizeof(project_dir)); - for (i=0; i<project_files.size(); i++) { - FILE_REF& fref = project_files[i]; - if (fref.file_info != fip) continue; - sprintf(link_path, "%s/%s", project_dir, fref.open_name); - sprintf(file_path, "%s/%s", project_dir, fip->name); - make_soft_link(this, link_path, file_path); - } - return 0; -} - -// a project file download just finished. -// If it's the last one, update project_files_downloaded_time -// -void PROJECT::update_project_files_downloaded_time() { - unsigned int i; - for (i=0; i<project_files.size(); i++) { - FILE_REF& fref = project_files[i]; - FILE_INFO* fip = fref.file_info; - if (fip->status != FILE_PRESENT) continue; - } - project_files_downloaded_time = gstate.now; -} - int APP::parse(XML_PARSER& xp) { strcpy(name, ""); strcpy(user_friendly_name, ""); @@ -1781,482 +1108,6 @@ void WORKUNIT::clear_errors() { } } -int RESULT::parse_name(XML_PARSER& xp, const char* end_tag) { - strcpy(name, ""); - while (!xp.get_tag()) { - if (xp.match_tag(end_tag)) return 0; - if (xp.parse_str("name", name, sizeof(name))) continue; - if (log_flags.unparsed_xml) { - msg_printf(0, MSG_INFO, - "[unparsed_xml] RESULT::parse_name(): unrecognized: %s\n", - xp.parsed_tag - ); - } - xp.skip_unexpected(); - } - return ERR_XML_PARSE; -} - -void RESULT::clear() { - strcpy(name, ""); - strcpy(wu_name, ""); - received_time = 0; - report_deadline = 0; - version_num = 0; - strcpy(plan_class, ""); - strcpy(platform, ""); - avp = NULL; - output_files.clear(); - ready_to_report = false; - completed_time = 0; - got_server_ack = false; - final_cpu_time = 0; - final_elapsed_time = 0; -#ifdef SIM - peak_flop_count = 0; -#endif - fpops_per_cpu_sec = 0; - fpops_cumulative = 0; - intops_per_cpu_sec = 0; - intops_cumulative = 0; - _state = RESULT_NEW; - exit_status = 0; - stderr_out = ""; - suspended_via_gui = false; - coproc_missing = false; - report_immediately = false; - rr_sim_misses_deadline = false; - app = NULL; - wup = NULL; - project = NULL; - strcpy(resources, ""); - report_immediately = false; - schedule_backoff = 0; - strcpy(schedule_backoff_reason, ""); -} - -// parse a <result> element from scheduling server. -// -int RESULT::parse_server(XML_PARSER& xp) { - FILE_REF file_ref; - - clear(); - while (!xp.get_tag()) { - if (xp.match_tag("/result")) return 0; - if (xp.parse_str("name", name, sizeof(name))) continue; - if (xp.parse_str("wu_name", wu_name, sizeof(wu_name))) continue; - if (xp.parse_double("report_deadline", report_deadline)) continue; - if (xp.parse_str("platform", platform, sizeof(platform))) continue; - if (xp.parse_str("plan_class", plan_class, sizeof(plan_class))) continue; - if (xp.parse_int("version_num", version_num)) continue; - if (xp.match_tag("file_ref")) { - file_ref.parse(xp); - output_files.push_back(file_ref); - continue; - } - if (xp.parse_bool("report_immediately", report_immediately)) continue; - if (log_flags.unparsed_xml) { - msg_printf(0, MSG_INFO, - "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", - xp.parsed_tag - ); - } - xp.skip_unexpected(); - } - return ERR_XML_PARSE; -} - -// parse a <result> element from state file -// -int RESULT::parse_state(XML_PARSER& xp) { - FILE_REF file_ref; - - clear(); - while (!xp.get_tag()) { - if (xp.match_tag("/result")) { - // set state to something reasonable in case of bad state file - // - if (got_server_ack || ready_to_report) { - switch (state()) { - case RESULT_NEW: - case RESULT_FILES_DOWNLOADING: - case RESULT_FILES_DOWNLOADED: - case RESULT_FILES_UPLOADING: - set_state(RESULT_FILES_UPLOADED, "RESULT::parse_state"); - break; - } - } - return 0; - } - if (xp.parse_str("name", name, sizeof(name))) continue; - if (xp.parse_str("wu_name", wu_name, sizeof(wu_name))) continue; - if (xp.parse_double("received_time", received_time)) continue; - if (xp.parse_double("report_deadline", report_deadline)) { - continue; - } - if (xp.match_tag("file_ref")) { - file_ref.parse(xp); -#ifndef SIM - output_files.push_back(file_ref); -#endif - continue; - } - if (xp.parse_double("final_cpu_time", final_cpu_time)) continue; - if (xp.parse_double("final_elapsed_time", final_elapsed_time)) continue; - if (xp.parse_int("exit_status", exit_status)) continue; - if (xp.parse_bool("got_server_ack", got_server_ack)) continue; - if (xp.parse_bool("ready_to_report", ready_to_report)) continue; - if (xp.parse_double("completed_time", completed_time)) continue; - if (xp.parse_bool("suspended_via_gui", suspended_via_gui)) continue; - if (xp.parse_bool("report_immediately", report_immediately)) continue; - if (xp.parse_int("state", _state)) continue; - if (xp.parse_string("stderr_out", stderr_out)) continue; - if (xp.parse_double("fpops_per_cpu_sec", fpops_per_cpu_sec)) continue; - if (xp.parse_double("fpops_cumulative", fpops_cumulative)) continue; - if (xp.parse_double("intops_per_cpu_sec", intops_per_cpu_sec)) continue; - if (xp.parse_double("intops_cumulative", intops_cumulative)) continue; - if (xp.parse_str("platform", platform, sizeof(platform))) continue; - if (xp.parse_str("plan_class", plan_class, sizeof(plan_class))) continue; - if (xp.parse_int("version_num", version_num)) continue; - if (log_flags.unparsed_xml) { - msg_printf(0, MSG_INFO, - "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", - xp.parsed_tag - ); - } - xp.skip_unexpected(); - } - return ERR_XML_PARSE; -} - -int RESULT::write(MIOFILE& out, bool to_server) { - unsigned int i; - FILE_INFO* fip; - int n, retval; - - out.printf( - "<result>\n" - " <name>%s</name>\n" - " <final_cpu_time>%f</final_cpu_time>\n" - " <final_elapsed_time>%f</final_elapsed_time>\n" - " <exit_status>%d</exit_status>\n" - " <state>%d</state>\n" - " <platform>%s</platform>\n" - " <version_num>%d</version_num>\n", - name, - final_cpu_time, - final_elapsed_time, - exit_status, - state(), - platform, - version_num - ); - if (strlen(plan_class)) { - out.printf(" <plan_class>%s</plan_class>\n", plan_class); - } - if (fpops_per_cpu_sec) { - out.printf(" <fpops_per_cpu_sec>%f</fpops_per_cpu_sec>\n", fpops_per_cpu_sec); - } - if (fpops_cumulative) { - out.printf(" <fpops_cumulative>%f</fpops_cumulative>\n", fpops_cumulative); - } - if (intops_per_cpu_sec) { - out.printf(" <intops_per_cpu_sec>%f</intops_per_cpu_sec>\n", intops_per_cpu_sec); - } - if (intops_cumulative) { - out.printf(" <intops_cumulative>%f</intops_cumulative>\n", intops_cumulative); - } - if (to_server) { - out.printf( - " <app_version_num>%d</app_version_num>\n", - wup->version_num - ); - } - n = (int)stderr_out.length(); - if (n || to_server) { - out.printf("<stderr_out>\n"); - - // the following is here so that it gets recorded on server - // (there's no core_client_version field of result table) - // - if (to_server) { - out.printf( - "<core_client_version>%d.%d.%d</core_client_version>\n", - gstate.core_client_version.major, - gstate.core_client_version.minor, - gstate.core_client_version.release - ); - } - if (n) { - out.printf("<![CDATA[\n"); - out.printf("%s",stderr_out.c_str()); - if (stderr_out[n-1] != '\n') { - out.printf("\n"); - } - out.printf("]]>\n"); - } - out.printf("</stderr_out>\n"); - } - if (to_server) { - for (i=0; i<output_files.size(); i++) { - fip = output_files[i].file_info; - if (fip->uploaded) { - retval = fip->write(out, true); - if (retval) return retval; - } - } - } else { - if (got_server_ack) out.printf(" <got_server_ack/>\n"); - if (ready_to_report) out.printf(" <ready_to_report/>\n"); - if (completed_time) out.printf(" <completed_time>%f</completed_time>\n", completed_time); - if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); - if (report_immediately) out.printf(" <report_immediately/>\n"); - out.printf( - " <wu_name>%s</wu_name>\n" - " <report_deadline>%f</report_deadline>\n" - " <received_time>%f</received_time>\n", - wu_name, - report_deadline, - received_time - ); - for (i=0; i<output_files.size(); i++) { - retval = output_files[i].write(out); - if (retval) return retval; - } - } - out.printf("</result>\n"); - return 0; -} - -#ifndef SIM - -int RESULT::write_gui(MIOFILE& out) { - out.printf( - "<result>\n" - " <name>%s</name>\n" - " <wu_name>%s</wu_name>\n" - " <version_num>%d</version_num>\n" - " <plan_class>%s</plan_class>\n" - " <project_url>%s</project_url>\n" - " <final_cpu_time>%f</final_cpu_time>\n" - " <final_elapsed_time>%f</final_elapsed_time>\n" - " <exit_status>%d</exit_status>\n" - " <state>%d</state>\n" - " <report_deadline>%f</report_deadline>\n" - " <received_time>%f</received_time>\n" - " <estimated_cpu_time_remaining>%f</estimated_cpu_time_remaining>\n", - name, - wu_name, - version_num, - plan_class, - project->master_url, - final_cpu_time, - final_elapsed_time, - exit_status, - state(), - report_deadline, - received_time, - estimated_runtime_remaining() - ); - if (got_server_ack) out.printf(" <got_server_ack/>\n"); - if (ready_to_report) out.printf(" <ready_to_report/>\n"); - if (completed_time) out.printf(" <completed_time>%f</completed_time>\n", completed_time); - if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); - if (project->suspended_via_gui) out.printf(" <project_suspended_via_gui/>\n"); - if (report_immediately) out.printf(" <report_immediately/>\n"); - if (edf_scheduled) out.printf(" <edf_scheduled/>\n"); - if (coproc_missing) out.printf(" <coproc_missing/>\n"); - if (schedule_backoff > gstate.now) { - out.printf(" <scheduler_wait/>\n"); - if (strlen(schedule_backoff_reason)) { - out.printf( - " <scheduler_wait_reason>%s</scheduler_wait_reason>\n", - schedule_backoff_reason - ); - } - } - if (avp->needs_network && gstate.network_suspended) out.printf(" <network_wait/>\n"); - ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this); - if (atp) { - atp->write_gui(out); - } - if (!strlen(resources)) { - // only need to compute this string once - // - if (avp->gpu_usage.rsc_type) { - if (avp->gpu_usage.usage == 1) { - sprintf(resources, - "%.3g CPUs + 1 %s GPU", - avp->avg_ncpus, - rsc_name(avp->gpu_usage.rsc_type) - ); - } else { - sprintf(resources, - "%.3g CPUs + %.3g %s GPUs", - avp->avg_ncpus, - avp->gpu_usage.usage, - rsc_name(avp->gpu_usage.rsc_type) - ); - } - } else if (avp->missing_coproc) { - sprintf(resources, "%.3g CPUs + %s GPU (missing)", - avp->avg_ncpus, avp->missing_coproc_name - ); - } else if (!project->non_cpu_intensive && (avp->avg_ncpus != 1)) { - sprintf(resources, "%.3g CPUs", avp->avg_ncpus); - } else { - strcpy(resources, " "); - } - } - if (strlen(resources)>1) { - char buf[256]; - strcpy(buf, ""); - if (atp && atp->task_state() == PROCESS_EXECUTING) { - if (avp->gpu_usage.rsc_type) { - COPROC& cp = coprocs.coprocs[avp->gpu_usage.rsc_type]; - if (cp.count > 1) { - sprintf(buf, " (device %d)", - cp.device_nums[coproc_indices[0]] - ); - } - } - } - out.printf( - " <resources>%s%s</resources>\n", resources, buf - ); - } - out.printf("</result>\n"); - return 0; -} - -#endif - -// Returns true if the result's output files are all either -// successfully uploaded or have unrecoverable errors -// -bool RESULT::is_upload_done() { - unsigned int i; - FILE_INFO* fip; - int retval; - - for (i=0; i<output_files.size(); i++) { - fip = output_files[i].file_info; - if (fip->uploadable()) { - if (fip->had_failure(retval)) continue; - if (!fip->uploaded) { - return false; - } - } - } - return true; -} - -// resets all FILE_INFO's in result to uploaded = false -// -void RESULT::clear_uploaded_flags() { - unsigned int i; - FILE_INFO* fip; - - for (i=0; i<output_files.size(); i++) { - fip = output_files[i].file_info; - fip->uploaded = false; - } -} - -bool RESULT::is_not_started() { - if (computing_done()) return false; - if (gstate.active_tasks.lookup_result(this)) return false; - return true; -} - -bool PROJECT::some_download_stalled() { -#ifndef SIM - unsigned int i; - - if (!download_backoff.ok_to_transfer()) return true; - - for (i=0; i<gstate.pers_file_xfers->pers_file_xfers.size(); i++) { - PERS_FILE_XFER* pfx = gstate.pers_file_xfers->pers_file_xfers[i]; - if (pfx->fip->project != this) continue; - if (pfx->is_upload) continue; - if (pfx->next_request_time > gstate.now) return true; - } -#endif - return false; -} - -// return true if some file needed by this result (input or application) -// is downloading and backed off -// -bool RESULT::some_download_stalled() { -#ifndef SIM - unsigned int i; - FILE_INFO* fip; - PERS_FILE_XFER* pfx; - bool some_file_missing = false; - - for (i=0; i<wup->input_files.size(); i++) { - fip = wup->input_files[i].file_info; - if (fip->status != FILE_PRESENT) some_file_missing = true; - pfx = fip->pers_file_xfer; - if (pfx && pfx->next_request_time > gstate.now) { - return true; - } - } - for (i=0; i<avp->app_files.size(); i++) { - fip = avp->app_files[i].file_info; - if (fip->status != FILE_PRESENT) some_file_missing = true; - pfx = fip->pers_file_xfer; - if (pfx && pfx->next_request_time > gstate.now) { - return true; - } - } - - if (some_file_missing && !project->download_backoff.ok_to_transfer()) { - return true; - } -#endif - return false; -} - -FILE_REF* RESULT::lookup_file(FILE_INFO* fip) { - for (unsigned int i=0; i<output_files.size(); i++) { - FILE_REF& fr = output_files[i]; - if (fr.file_info == fip) return &fr; - } - return 0; -} - -FILE_INFO* RESULT::lookup_file_logical(const char* lname) { - for (unsigned int i=0; i<output_files.size(); i++) { - FILE_REF& fr = output_files[i]; - if (!strcmp(lname, fr.open_name)) { - return fr.file_info; - } - } - return 0; -} - -void RESULT::append_log_record() { - char filename[256]; - job_log_filename(*project, filename, sizeof(filename)); - FILE* f = fopen(filename, "ab"); - if (!f) return; - fprintf(f, "%.0f ue %f ct %f fe %.0f nm %s et %f\n", - gstate.now, estimated_runtime_uncorrected(), final_cpu_time, - wup->rsc_fpops_est, name, final_elapsed_time - ); - fclose(f); -} - -// abort a result that's not currently running -// -void RESULT::abort_inactive(int status) { - if (state() >= RESULT_COMPUTE_ERROR) return; - set_state(RESULT_ABORTED, "RESULT::abort_inactive"); - exit_status = status; -} - RUN_MODE::RUN_MODE() { perm_mode = 0; temp_mode = 0; diff --git a/client/client_types.h b/client/client_types.h index e28cf8be348ce7982b9c5f3073e7049f6d380bcf..262b8534591470de5ea3dab4354c72573f69d59e 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -38,10 +38,10 @@ #include "common_defs.h" #include "cc_config.h" -#include "rr_sim.h" -#include "work_fetch.h" #include "cs_notice.h" #include "cs_trickle.h" +#include "rr_sim.h" +#include "work_fetch.h" #ifdef SIM #include "sim.h" @@ -247,315 +247,6 @@ struct PROJ_AM { } }; -struct PROJECT : PROJ_AM { - // the following items come from the account file - // They are a function only of the user and the project - // - char authenticator[256]; - // user's authenticator on this project - std::string project_prefs; - // without the enclosing <project_preferences> tags. - // May include <venue> elements - // This field is used only briefly: between handling a - // scheduler RPC reply and writing the account file - std::string project_specific_prefs; - // without enclosing <project_specific> tags - // Does not include <venue> elements - std::string gui_urls; - // GUI URLs, with enclosing <gui_urls> tags - double resource_share; - // project's resource share relative to other projects. - double resource_share_frac; - // fraction of RS of non-suspended, compute-intensive projects - - // the following are from the user's project prefs - // - bool no_rsc_pref[MAX_RSC]; - - // derived from GPU exclusions in cc_config.xml; - // disable work fetch if all instances excluded - // - bool no_rsc_config[MAX_RSC]; - - // the following are from the project itself - // (or derived from app version list if anonymous platform) - // - bool no_rsc_apps[MAX_RSC]; - - // the following are from the account manager, if any - // - bool no_rsc_ams[MAX_RSC]; - - // the following set dynamically - // - bool rsc_defer_sched[MAX_RSC]; - // This project has a GPU job for which there's insuff. video RAM. - // Don't fetch more jobs of this type; they might have same problem - - char host_venue[256]; - // logically, this belongs in the client state file - // rather than the account file. - // But we need it in the latter in order to parse prefs. - bool using_venue_specific_prefs; - - // the following items come from client_state.xml - // They may depend on the host as well as user and project - // NOTE: if you add anything, add it to copy_state_fields() also!!! - // - std::vector<std::string> scheduler_urls; - // where to find scheduling servers - char symstore[256]; - // URL of symbol server (Windows) - char user_name[256]; - char team_name[256]; - char email_hash[MD5_LEN]; - char cross_project_id[MD5_LEN]; - double cpid_time; - double user_total_credit; - double user_expavg_credit; - double user_create_time; - int userid; - int teamid; - int hostid; - double host_total_credit; - double host_expavg_credit; - double host_create_time; - double ams_resource_share; - // resource share according to AMS; overrides project - // -1 means not specified by AMS - - // stuff related to scheduler RPCs and master fetch - // - int rpc_seqno; - int nrpc_failures; - // # of consecutive times we've failed to contact all scheduling servers - int master_fetch_failures; - double min_rpc_time; - // earliest time to contact any server of this project (or zero) - void set_min_rpc_time(double future_time, const char* reason); - double next_rpc_time; - // if nonzero, specifies a time when another scheduler RPC - // should be done (as requested by server). - // An RPC could be done sooner than this. - bool waiting_until_min_rpc_time(); - // returns true if min_rpc_time > now - bool master_url_fetch_pending; - // need to fetch and parse the master URL - int sched_rpc_pending; - // we need to do a scheduler RPC, for various possible reasons: - // user request, propagate host CPID, time-based, etc. - // Reasons are enumerated in lib/common_defs.h - bool possibly_backed_off; - // we need to call request_work_fetch() when a project - // transitions from being backed off to not. - // This (slightly misnamed) keeps track of whether this - // may still need to be done for given project - bool trickle_up_pending; - // have trickle up to send - double last_rpc_time; - // when last RPC finished - // not maintained across client sessions - // used by Manager (simple view) - - // Other stuff - - bool anonymous_platform; - // app_versions.xml file found in project dir; - // use those apps rather then getting from server - bool non_cpu_intensive; - // All this project's apps are non-CPU-intensive. - // Apps can also be individually marked as NCI - bool verify_files_on_app_start; - // Check app version and input files on app startup, - // to make sure they haven't been tampered with. - // This provides only the illusion of security. - bool use_symlinks; - double disk_usage; - // computed by get_disk_usages() - double disk_share; - // computed by get_disk_shares(); - - // items send in scheduler replies, requesting that - // various things be sent in the next request - // - int send_time_stats_log; - // if nonzero, send time stats log from that point on - int send_job_log; - // if nonzero, send this project's job log from that point on - bool send_full_workload; - bool dont_use_dcf; - - bool suspended_via_gui; - bool dont_request_more_work; - // Return work, but don't request more - // Used for a clean exit to a project, - // or if a user wants to pause doing work for the project - bool attached_via_acct_mgr; - bool detach_when_done; - // when no results for this project, detach it. - bool ended; - // project has ended; advise user to detach - char code_sign_key[MAX_KEY_LEN]; - std::vector<FILE_REF> user_files; - std::vector<FILE_REF> project_files; - // files not specific to apps or work - e.g. icons - int parse_preferences_for_user_files(); - void write_project_files(MIOFILE&); - void link_project_files(); - void create_project_file_symlinks(); - void delete_project_file_symlinks(); - int write_symlink_for_project_file(FILE_INFO*); - double project_files_downloaded_time; - // when last project file download finished - void update_project_files_downloaded_time(); - // called when a project file download finishes. - // If it's the last one, set project_files_downloaded_time to now - - double duration_correction_factor; - // Multiply by this when estimating the CPU time of a result - // (based on FLOPs estimated and benchmarks). - // This is dynamically updated in a way that maintains an upper bound. - // it goes down slowly but if a new estimate X is larger, - // the factor is set to X. - // - // Deprecated - current server logic handles this, - // and this should go to 1. - // But we need to keep it around for older projects - void update_duration_correction_factor(ACTIVE_TASK*); - - // fields used by CPU scheduler and work fetch - // everything from here on applies only to CPU intensive projects - - bool can_request_work(); - // not suspended and not deferred and not no more work - bool runnable(int rsc_type); - // has a runnable result using the given resource type - bool downloading(); - // has a result in downloading state - bool potentially_runnable(); - // runnable or contactable or downloading - bool nearly_runnable(); - // runnable or downloading - bool overworked(); - // the project has used too much CPU time recently - bool some_download_stalled(); - // a download is backed off - bool some_result_suspended(); - double last_upload_start; - // the last time an upload was started. - // Used for "work fetch deferral" mechanism: - // don't request work from a project if an upload started - // in last X minutes and is still active - bool uploading(); - bool has_results(); - - struct RESULT *next_runnable_result; - // the next result to run for this project - int nuploading_results; - // number of results in UPLOADING state - // Don't start new results if these exceeds 2*ncpus. - bool too_many_uploading_results; - - // scheduling (work fetch and job scheduling) - // - double sched_priority; - void compute_sched_priority(); - - // stuff for RR sim - // - double rr_sim_cpu_share; - bool rr_sim_active; - int ncoprocs_excluded[MAX_RSC]; - // number of excluded instances per processor type - bool operator<(const PROJECT& p) { - return sched_priority > p.sched_priority; - } - - // stuff related to work fetch - // - RSC_PROJECT_WORK_FETCH rsc_pwf[MAX_RSC]; - PROJECT_WORK_FETCH pwf; - inline void reset() { - for (int i=0; i<coprocs.n_rsc; i++) { - rsc_pwf[i].reset(); - } - } - inline int deadlines_missed(int rsc_type) { - return rsc_pwf[rsc_type].deadlines_missed; - } - void get_task_durs(double& not_started_dur, double& in_progress_dur); - - int nresults_returned; - // # of results being returned in current scheduler op - const char* get_scheduler_url(int index, double r); - // get scheduler URL with random offset r - bool checked; - // temporary used when scanning projects - - FILE_XFER_BACKOFF download_backoff; - FILE_XFER_BACKOFF upload_backoff; - inline FILE_XFER_BACKOFF& file_xfer_backoff(bool is_upload) { - return is_upload?upload_backoff:download_backoff; - } - - // support for replicated trickle-ups - // - std::vector<TRICKLE_UP_OP*> trickle_up_ops; - - PROJECT(); - ~PROJECT(){} - void init(); - void copy_state_fields(PROJECT&); - int write_account_file(); - int parse_account(FILE*); - int parse_account_file_venue(); - int parse_account_file(); - int parse_state(XML_PARSER&); - int write_state(MIOFILE&, bool gui_rpc=false); - - // statistic of the last x days - std::vector<DAILY_STATS> statistics; - int parse_statistics(MIOFILE&); - int parse_statistics(FILE*); - int write_statistics(MIOFILE&, bool gui_rpc=false); - int write_statistics_file(); - - void suspend(); - void resume(); - void abort_not_started(); - // abort unstarted jobs - - // clear AMS-related fields - inline void detach_ams() { - attached_via_acct_mgr = false; - ams_resource_share = -1; - for (int i=0; i<MAX_RSC; i++) { - no_rsc_ams[i] = false; - } - } - -#ifdef SIM - RANDOM_PROCESS available; - int index; - int result_index; - double idle_time; - double idle_time_sumsq; - bool idle; - int max_infeasible_count; - bool no_apps; - // for DCF variants: - int completed_task_count; - double completions_ratio_mean; - double completions_ratio_s; - double completions_ratio_stdev; - double completions_required_stdevs; - PROJECT_RESULTS project_results; - void print_results(FILE*, SIM_RESULTS&); - void backoff(); - void update_dcf_stats(RESULT*); -#endif -}; - struct APP { char name[256]; char user_friendly_name[256]; @@ -662,169 +353,6 @@ struct WORKUNIT { void clear_errors(); }; -struct RESULT { - char name[256]; - char wu_name[256]; - double received_time; // when we got this from server - double report_deadline; - int version_num; // identifies the app used - char plan_class[64]; - char platform[256]; - APP_VERSION* avp; - std::vector<FILE_REF> output_files; - bool ready_to_report; - // we're ready to report this result to the server; - // either computation is done and all the files have been uploaded - // or there was an error - double completed_time; - // time when ready_to_report was set - bool got_server_ack; - // we've received the ack for this result from the server - double final_cpu_time; - double final_elapsed_time; -#ifdef SIM - double peak_flop_count; - double sim_flops_left; -#endif - - // the following are nonzero if reported by app - double fpops_per_cpu_sec; - double fpops_cumulative; - double intops_per_cpu_sec; - double intops_cumulative; - - int _state; - // state of this result: see lib/result_state.h - inline int state() { return _state; } - inline void set_ready_to_report() { - ready_to_report = true; - } - void set_state(int, const char*); - int exit_status; - // return value from the application - std::string stderr_out; - // the concatenation of: - // - // - if report_result_error() is called for this result: - // <message>x</message> - // <exit_status>x</exit_status> - // <signal>x</signal> - // - if called in FILES_DOWNLOADED state: - // <couldnt_start>x</couldnt_start> - // - if called in NEW state: - // <download_error>x</download_error> for each failed download - // - if called in COMPUTE_DONE state: - // <upload_error>x</upload_error> for each failed upload - // - // - <stderr_txt>X</stderr_txt>, where X is the app's stderr output - bool suspended_via_gui; - bool coproc_missing; - // a coproc needed by this job is missing - // (e.g. because user removed their GPU board). - bool report_immediately; - bool not_started; // temp for CPU sched - - std::string name_md5; // see sort_results(); - int index; // index in results vector - - APP* app; - WORKUNIT* wup; - PROJECT* project; - - RESULT(){} - ~RESULT(){} - void clear(); - int parse_server(XML_PARSER&); - int parse_state(XML_PARSER&); - int parse_name(XML_PARSER&, const char* end_tag); - int write(MIOFILE&, bool to_server); - int write_gui(MIOFILE&); - bool is_upload_done(); // files uploaded? - void clear_uploaded_flags(); - FILE_REF* lookup_file(FILE_INFO*); - FILE_INFO* lookup_file_logical(const char*); - void abort_inactive(int); - // abort the result if it hasn't started computing yet - // Called only for results with no active task - // (otherwise you need to abort the active task) - void append_log_record(); - - // stuff related to CPU scheduling - - bool is_not_started(); - double estimated_runtime(); - double estimated_runtime_uncorrected(); - double estimated_runtime_remaining(); - inline double estimated_flops_remaining() { -#ifdef SIM - return sim_flops_left; -#else - return estimated_runtime_remaining()*avp->flops; -#endif - } - - inline bool computing_done() { - if (state() >= RESULT_COMPUTE_ERROR) return true; - if (ready_to_report) return true; - return false; - } - bool runnable(); - // downloaded, not finished, not suspended, project not suspended - bool nearly_runnable(); - // downloading or downloaded, - // not finished, suspended, project not suspended - bool downloading(); - // downloading, not downloaded, not suspended, project not suspended - bool some_download_stalled(); - // some input or app file is downloading, and backed off - // i.e. it may be a long time before we can run this result - inline bool uses_coprocs() { - return (avp->gpu_usage.rsc_type != 0); - } - inline int resource_type() { - return avp->gpu_usage.rsc_type; - } - inline bool non_cpu_intensive() { - if (project->non_cpu_intensive) return true; - if (app->non_cpu_intensive) return true; - return false; - } - inline bool dont_throttle() { - if (non_cpu_intensive()) return true; - if (avp->dont_throttle) return true; - return false; - } - - // temporaries used in CLIENT_STATE::rr_simulation(): - double rrsim_flops_left; - double rrsim_finish_delay; - double rrsim_flops; - bool rrsim_done; - - bool already_selected; - // used to keep cpu scheduler from scheduling a result twice - // transient; used only within schedule_cpus() - double computation_deadline(); - // report deadline - prefs.work_buf_min - time slice - bool rr_sim_misses_deadline; - - // temporaries used in enforce_schedule(): - bool unfinished_time_slice; - int seqno; - - bool edf_scheduled; - // temporary used to tell GUI that this result is deadline-scheduled - - int coproc_indices[MAX_COPROCS_PER_JOB]; - // keep track of coprocessor reservations - char resources[256]; - // textual description of resources used - double schedule_backoff; - // don't try to schedule until this time - // (wait for free GPU RAM) - char schedule_backoff_reason[256]; -}; - // represents an always/auto/never value, possibly temporarily overridden struct RUN_MODE { diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp index 801c2c8b6317727aaa7975b5ebc3aa72bbf1f477..8ce98560e9a832607bdb3250d04dd3730ccfdb8b 100644 --- a/client/cpu_sched.cpp +++ b/client/cpu_sched.cpp @@ -70,11 +70,13 @@ #include "str_util.h" #include "util.h" +#include "app.h" #include "client_msgs.h" +#include "client_state.h" #include "log_flags.h" -#include "app.h" +#include "project.h" +#include "result.h" -#include "client_state.h" using std::vector; using std::list; diff --git a/client/cs_account.cpp b/client/cs_account.cpp index 9d12a5afcfd2d597f1d2fe42179ca70bfe3c8424..1d1a16083aad77338993887d1642d9106c70488a 100644 --- a/client/cs_account.cpp +++ b/client/cs_account.cpp @@ -31,16 +31,18 @@ #endif #endif +#include "error_numbers.h" #include "filesys.h" #include "parse.h" +#include "str_replace.h" #include "str_util.h" #include "url.h" -#include "str_replace.h" -#include "client_state.h" + #include "client_msgs.h" -#include "log_flags.h" -#include "error_numbers.h" +#include "client_state.h" #include "file_names.h" +#include "log_flags.h" +#include "project.h" using std::string; using std::sort; diff --git a/client/cs_apps.cpp b/client/cs_apps.cpp index d37ca41d619f0a52309ba373f448a03f9f7c69cc..254772118e8ccc8ea32c56491d98dc98b0d20f3b 100644 --- a/client/cs_apps.cpp +++ b/client/cs_apps.cpp @@ -29,15 +29,18 @@ #include <csignal> #endif -#include "md5_file.h" -#include "util.h" #include "error_numbers.h" -#include "file_names.h" #include "filesys.h" +#include "md5_file.h" #include "shmem.h" -#include "log_flags.h" +#include "util.h" + #include "client_msgs.h" #include "client_state.h" +#include "file_names.h" +#include "log_flags.h" +#include "project.h" +#include "result.h" using std::vector; diff --git a/client/cs_cmdline.cpp b/client/cs_cmdline.cpp index bbfa798da61ba3218d2114310182f600c324d455..139e63ef9736075890d96640dc80d6490e5ba98c 100644 --- a/client/cs_cmdline.cpp +++ b/client/cs_cmdline.cpp @@ -35,11 +35,12 @@ #include "str_replace.h" #include "util.h" -#include "main.h" #include "client_msgs.h" #include "client_state.h" -#include "sandbox.h" #include "cs_proxy.h" +#include "main.h" +#include "project.h" +#include "sandbox.h" static void print_options(char* prog) { printf( diff --git a/client/cs_files.cpp b/client/cs_files.cpp index f15c3a35c81e73934f21efddbe3b13c2b16005bb..e6ed1eb0e8a240354efad2dda773ac90023d5bb5 100644 --- a/client/cs_files.cpp +++ b/client/cs_files.cpp @@ -43,6 +43,7 @@ #include "client_state.h" #include "client_msgs.h" #include "file_xfer.h" +#include "project.h" #include "sandbox.h" using std::vector; diff --git a/client/cs_notice.cpp b/client/cs_notice.cpp index b1fa920f2c338fc3f5c71157220e0873ba4fd6a7..f185c6965ffef12d7741a90e8c4fe9e367e9661f 100644 --- a/client/cs_notice.cpp +++ b/client/cs_notice.cpp @@ -32,6 +32,7 @@ #include "client_state.h" #include "client_msgs.h" #include "file_names.h" +#include "project.h" #include "cs_notice.h" diff --git a/client/cs_platforms.cpp b/client/cs_platforms.cpp index c4878a9e73d13c94c0f635104267dc7a340cc52e..33b3ca39105f609db4c870663607a5ae12d948ad 100644 --- a/client/cs_platforms.cpp +++ b/client/cs_platforms.cpp @@ -45,17 +45,17 @@ LPFN_ISWOW64PROCESS fnIsWow64Process; #include <sys/sysctl.h> #endif - - -#include "client_types.h" -#include "client_state.h" #include "error_numbers.h" -#include "log_flags.h" #include "filesys.h" #include "str_util.h" #include "str_replace.h" #include "util.h" +#include "client_types.h" +#include "client_state.h" +#include "log_flags.h" +#include "project.h" + // return the primary platform id. // const char* CLIENT_STATE::get_primary_platform() { diff --git a/client/cs_prefs.cpp b/client/cs_prefs.cpp index f2ec78192cab4909aa230ab42399f6ebc4aad465..8fb3aa77030d09ad7156b356b429da243547e32e 100644 --- a/client/cs_prefs.cpp +++ b/client/cs_prefs.cpp @@ -33,15 +33,17 @@ #endif #endif +#include "filesys.h" +#include "parse.h" #include "str_util.h" #include "str_replace.h" #include "util.h" -#include "filesys.h" -#include "parse.h" -#include "file_names.h" -#include "cpu_benchmark.h" + #include "client_msgs.h" #include "client_state.h" +#include "cpu_benchmark.h" +#include "file_names.h" +#include "project.h" using std::min; using std::string; diff --git a/client/cs_scheduler.cpp b/client/cs_scheduler.cpp index 5c1fda6c23bed25718d81b7c1dfd79c9c7fc535f..756efe0c77294eb816f0174b75f49caff459e83b 100644 --- a/client/cs_scheduler.cpp +++ b/client/cs_scheduler.cpp @@ -47,6 +47,8 @@ #include "client_msgs.h" #include "cs_notice.h" #include "cs_trickle.h" +#include "project.h" +#include "result.h" #include "scheduler_op.h" #include "sandbox.h" diff --git a/client/cs_statefile.cpp b/client/cs_statefile.cpp index 4c63ade79ded6c2e4bdc55578d474688a9311993..27acfdfd8b6cacc52f0735a1c770cb697252a651 100644 --- a/client/cs_statefile.cpp +++ b/client/cs_statefile.cpp @@ -30,10 +30,12 @@ #include "str_util.h" #include "util.h" -#include "cs_proxy.h" -#include "file_names.h" #include "client_msgs.h" #include "client_state.h" +#include "cs_proxy.h" +#include "file_names.h" +#include "project.h" +#include "result.h" #define MAX_STATE_FILE_WRITE_ATTEMPTS 2 diff --git a/client/cs_trickle.cpp b/client/cs_trickle.cpp index bdd14930b432e55f77a385d82f6942134dfe91aa..006fb5fb1ff99c583a651087017e3b38630e96e1 100644 --- a/client/cs_trickle.cpp +++ b/client/cs_trickle.cpp @@ -33,9 +33,11 @@ #include "parse.h" #include "util.h" #include "str_util.h" -#include "sandbox.h" -#include "client_state.h" + #include "client_msgs.h" +#include "client_state.h" +#include "project.h" +#include "sandbox.h" using std::string; diff --git a/client/file_names.cpp b/client/file_names.cpp index 7e5ff887b4c4b42b3cc407f519780c86f8524fea..7138166d07bc39b8d0de7f1271574b819f9bd3b4 100644 --- a/client/file_names.cpp +++ b/client/file_names.cpp @@ -33,15 +33,17 @@ #include "shmem.h" #endif -#include "filesys.h" #include "error_numbers.h" +#include "filesys.h" #include "str_util.h" #include "str_replace.h" #include "url.h" #include "util.h" + #include "client_msgs.h" -#include "sandbox.h" #include "client_state.h" +#include "project.h" +#include "sandbox.h" #include "file_names.h" diff --git a/client/file_xfer.cpp b/client/file_xfer.cpp index 39946340c3edeb0c33f7e0824ec91ac9b39e1387..3fbac1e08a43b09fe594d0db53032bc4cdcf7079 100644 --- a/client/file_xfer.cpp +++ b/client/file_xfer.cpp @@ -23,14 +23,16 @@ #include "config.h" #endif -#include "util.h" +#include "error_numbers.h" #include "file_names.h" -#include "client_state.h" #include "filesys.h" +#include "parse.h" +#include "util.h" + +#include "client_state.h" #include "client_msgs.h" #include "file_xfer.h" -#include "parse.h" -#include "error_numbers.h" +#include "project.h" using std::vector; diff --git a/client/gui_rpc_server_ops.cpp b/client/gui_rpc_server_ops.cpp index 353f7dd64bbf83907dc0f400888314b4223555b0..383f8f9ff51e09177a4083c9c916787c1fdc48c6 100644 --- a/client/gui_rpc_server_ops.cpp +++ b/client/gui_rpc_server_ops.cpp @@ -50,20 +50,22 @@ #endif #endif +#include "error_numbers.h" +#include "filesys.h" +#include "network.h" +#include "parse.h" #include "str_util.h" #include "url.h" -#include "client_state.h" #include "util.h" -#include "error_numbers.h" -#include "parse.h" -#include "network.h" -#include "filesys.h" -#include "file_names.h" +#include "client_state.h" #include "client_msgs.h" #include "client_state.h" #include "cs_proxy.h" #include "cs_notice.h" +#include "file_names.h" +#include "project.h" +#include "result.h" using std::string; using std::vector; diff --git a/client/http_curl.cpp b/client/http_curl.cpp index d7e0cac7b2f176342a7b7b68b72720671aa63e7d..554ac9a667e3e46978b3f7d9492721623f74b7dc 100644 --- a/client/http_curl.cpp +++ b/client/http_curl.cpp @@ -36,22 +36,22 @@ #endif #endif +#include "base64.h" #include "error_numbers.h" #include "filesys.h" -#include "client_msgs.h" -#include "log_flags.h" #include "str_util.h" #include "str_replace.h" #include "url.h" #include "util.h" -#include "network.h" -#include "file_names.h" #include "client_msgs.h" -#include "base64.h" #include "client_state.h" #include "cs_proxy.h" +#include "file_names.h" +#include "log_flags.h" +#include "network.h" #include "net_stats.h" +#include "project.h" #include "http_curl.h" diff --git a/client/log_flags.cpp b/client/log_flags.cpp index 23388b066ce175ad537d8fbe8e6d0a3714eebf1f..f2a1a259571ea57e79dfbcb0ece84b2af939313b 100644 --- a/client/log_flags.cpp +++ b/client/log_flags.cpp @@ -36,10 +36,12 @@ #include "parse.h" #include "str_util.h" -#include "file_names.h" #include "client_state.h" #include "client_msgs.h" #include "cs_proxy.h" +#include "file_names.h" +#include "project.h" +#include "result.h" using std::string; diff --git a/client/net_stats.cpp b/client/net_stats.cpp index e42e4f28015d5da11475545d92092fb8d44745e8..5ec4034fbc8b5f10d1743b861faf404c0aa3a1d4 100644 --- a/client/net_stats.cpp +++ b/client/net_stats.cpp @@ -31,17 +31,18 @@ #include <cmath> #endif +#include "error_numbers.h" +#include "filesys.h" #include "parse.h" -#include "time.h" #include "str_util.h" -#include "error_numbers.h" +#include "time.h" #include "util.h" -#include "filesys.h" #include "client_msgs.h" #include "client_state.h" -#include "file_names.h" #include "cs_proxy.h" +#include "file_names.h" +#include "project.h" #include "net_stats.h" diff --git a/client/pers_file_xfer.cpp b/client/pers_file_xfer.cpp index 1b70d26ca4f28ce619df978c6733d32a0099d47b..b5d8c919ce26be69315e77d0233f0ada67eacde2 100644 --- a/client/pers_file_xfer.cpp +++ b/client/pers_file_xfer.cpp @@ -32,11 +32,12 @@ #include "str_util.h" #include "filesys.h" -#include "log_flags.h" -#include "file_names.h" #include "client_state.h" #include "client_types.h" #include "client_msgs.h" +#include "file_names.h" +#include "log_flags.h" +#include "project.h" using std::vector; diff --git a/client/project.cpp b/client/project.cpp new file mode 100644 index 0000000000000000000000000000000000000000..995156b763bfb1ffc419463eba19fd4e31547f9d --- /dev/null +++ b/client/project.cpp @@ -0,0 +1,795 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2012 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see <http://www.gnu.org/licenses/>. + +#include <string.h> + +#include "str_replace.h" + +#include "client_msgs.h" +#include "client_state.h" +#include "log_flags.h" +#include "result.h" +#include "sandbox.h" + +#include "project.h" + +PROJECT::PROJECT() { + init(); +} + +void PROJECT::init() { + strcpy(master_url, ""); + strcpy(authenticator, ""); + project_specific_prefs = ""; + gui_urls = ""; + resource_share = 100; + for (int i=0; i<MAX_RSC; i++) { + no_rsc_pref[i] = false; + no_rsc_config[i] = false; + no_rsc_apps[i] = false; + no_rsc_ams[i] = false; + rsc_defer_sched[i] = false; + } + strcpy(host_venue, ""); + using_venue_specific_prefs = false; + scheduler_urls.clear(); + strcpy(project_name, ""); + strcpy(symstore, ""); + strcpy(user_name, ""); + strcpy(team_name, ""); + strcpy(email_hash, ""); + strcpy(cross_project_id, ""); + cpid_time = 0; + user_total_credit = 0; + user_expavg_credit = 0; + user_create_time = 0; + ams_resource_share = -1; + rpc_seqno = 0; + userid = 0; + teamid = 0; + hostid = 0; + host_total_credit = 0; + host_expavg_credit = 0; + host_create_time = 0; + nrpc_failures = 0; + master_fetch_failures = 0; + min_rpc_time = 0; + possibly_backed_off = true; + master_url_fetch_pending = false; + sched_rpc_pending = 0; + next_rpc_time = 0; + last_rpc_time = 0; + trickle_up_pending = false; + anonymous_platform = false; + non_cpu_intensive = false; + verify_files_on_app_start = false; + pwf.reset(this); + send_time_stats_log = 0; + send_job_log = 0; + send_full_workload = false; + dont_use_dcf = false; + suspended_via_gui = false; + dont_request_more_work = false; + detach_when_done = false; + attached_via_acct_mgr = false; + ended = false; + strcpy(code_sign_key, ""); + user_files.clear(); + project_files.clear(); + next_runnable_result = NULL; + duration_correction_factor = 1; + project_files_downloaded_time = 0; + use_symlinks = false; + possibly_backed_off = false; + last_upload_start = 0; + nuploading_results = 0; + too_many_uploading_results = false; + +#ifdef SIM + idle_time = 0; + idle_time_sumsq = 0; + completed_task_count = 0; + completions_ratio_mean = 0.0; + completions_ratio_s = 0.0; + completions_ratio_stdev = 0.1; // for the first couple of completions - guess. + completions_required_stdevs = 3.0; + result_index = 0; +#endif +} + +static void handle_no_rsc_ams(PROJECT* p, const char* name) { + int i = rsc_index(name); + if (i < 0) return; + p->no_rsc_ams[i] = true; +} + +static void handle_no_rsc_pref(PROJECT* p, const char* name) { + int i = rsc_index(name); + if (i<0) return; + p->no_rsc_pref[i] = true; +} + +static void handle_no_rsc_apps(PROJECT* p, const char* name) { + int i = rsc_index(name); + if (i < 0) return; + p->no_rsc_apps[i] = true; +} + +static bool parse_rsc_param(XML_PARSER& xp, const char* end_tag, int& rsc_type, double& value) { + char name[256]; + bool val_found = false; + + rsc_type = -1; + while (!xp.get_tag()) { + if (xp.match_tag(end_tag)) { + return (rsc_type > 0 && val_found); + } + if (xp.parse_str("name", name, sizeof(name))) { + rsc_type = rsc_index(name); + continue; + } + if (xp.parse_double("rsc_type", value)) { + val_found = true; + } + } + return false; +} +// parse project fields from client_state.xml +// +int PROJECT::parse_state(XML_PARSER& xp) { + char buf[256]; + std::string sched_url, stemp; + string str1, str2; + int retval, rt; + double x; + bool btemp; + + init(); + while (!xp.get_tag()) { + if (xp.match_tag("/project")) { + if (cpid_time == 0) { + cpid_time = user_create_time; + } + return 0; + } + if (xp.parse_string("scheduler_url", sched_url)) { + scheduler_urls.push_back(sched_url); + continue; + } + if (xp.parse_str("master_url", master_url, sizeof(master_url))) continue; + if (xp.parse_str("project_name", project_name, sizeof(project_name))) continue; + if (xp.parse_str("symstore", symstore, sizeof(symstore))) continue; + if (xp.parse_str("user_name", user_name, sizeof(user_name))) continue; + if (xp.parse_str("team_name", team_name, sizeof(team_name))) continue; + if (xp.parse_str("host_venue", host_venue, sizeof(host_venue))) continue; + if (xp.parse_str("email_hash", email_hash, sizeof(email_hash))) continue; + if (xp.parse_str("cross_project_id", cross_project_id, sizeof(cross_project_id))) continue; + if (xp.parse_double("cpid_time", cpid_time)) continue; + if (xp.parse_double("user_total_credit", user_total_credit)) continue; + if (xp.parse_double("user_expavg_credit", user_expavg_credit)) continue; + if (xp.parse_double("user_create_time", user_create_time)) continue; + if (xp.parse_int("rpc_seqno", rpc_seqno)) continue; + if (xp.parse_int("userid", userid)) continue; + if (xp.parse_int("teamid", teamid)) continue; + if (xp.parse_int("hostid", hostid)) continue; + if (xp.parse_double("host_total_credit", host_total_credit)) continue; + if (xp.parse_double("host_expavg_credit", host_expavg_credit)) continue; + if (xp.parse_double("host_create_time", host_create_time)) continue; + if (xp.match_tag("code_sign_key")) { + retval = copy_element_contents( + xp.f->f, + "</code_sign_key>", + code_sign_key, + sizeof(code_sign_key) + ); + if (retval) return retval; + strip_whitespace(code_sign_key); + continue; + } + if (xp.parse_int("nrpc_failures", nrpc_failures)) continue; + if (xp.parse_int("master_fetch_failures", master_fetch_failures)) continue; + if (xp.parse_double("min_rpc_time", min_rpc_time)) continue; + if (xp.parse_bool("master_url_fetch_pending", master_url_fetch_pending)) continue; + if (xp.parse_int("sched_rpc_pending", sched_rpc_pending)) continue; + if (xp.parse_double("next_rpc_time", next_rpc_time)) continue; + if (xp.parse_bool("trickle_up_pending", trickle_up_pending)) continue; + if (xp.parse_int("send_time_stats_log", send_time_stats_log)) continue; + if (xp.parse_int("send_job_log", send_job_log)) continue; + if (xp.parse_bool("send_full_workload", send_full_workload)) continue; + if (xp.parse_bool("dont_use_dcf", dont_use_dcf)) continue; + if (xp.parse_bool("non_cpu_intensive", non_cpu_intensive)) continue; + if (xp.parse_bool("verify_files_on_app_start", verify_files_on_app_start)) continue; + if (xp.parse_bool("suspended_via_gui", suspended_via_gui)) continue; + if (xp.parse_bool("dont_request_more_work", dont_request_more_work)) continue; + if (xp.parse_bool("detach_when_done", detach_when_done)) continue; + if (xp.parse_bool("ended", ended)) continue; + if (xp.parse_double("rec", pwf.rec)) continue; + if (xp.parse_double("rec_time", pwf.rec_time)) continue; + if (xp.parse_double("cpu_backoff_interval", rsc_pwf[0].backoff_interval)) continue; + if (xp.parse_double("cpu_backoff_time", rsc_pwf[0].backoff_time)) { + if (rsc_pwf[0].backoff_time > gstate.now + 28*SECONDS_PER_DAY) { + rsc_pwf[0].backoff_time = gstate.now + 28*SECONDS_PER_DAY; + } + continue; + } + if (xp.match_tag("rsc_backoff_interval")) { + if (parse_rsc_param(xp, "/rsc_backoff_interval", rt, x)) { + rsc_pwf[rt].backoff_interval = x; + } + continue; + } + if (xp.match_tag("rsc_backoff_time")) { + if (parse_rsc_param(xp, "/rsc_backoff_time", rt, x)) { + rsc_pwf[rt].backoff_time = x; + } + continue; + } + if (xp.parse_double("resource_share", resource_share)) continue; + // not authoritative + if (xp.parse_double("duration_correction_factor", duration_correction_factor)) continue; + if (xp.parse_bool("attached_via_acct_mgr", attached_via_acct_mgr)) continue; + if (xp.parse_bool("no_cpu_apps", btemp)) { + if (btemp) handle_no_rsc_apps(this, "CPU"); + continue; + } + if (xp.parse_bool("no_cuda_apps", btemp)) { + if (btemp) handle_no_rsc_apps(this, GPU_TYPE_NVIDIA); + continue; + } + if (xp.parse_bool("no_ati_apps", btemp)) { + if (btemp) handle_no_rsc_apps(this, GPU_TYPE_ATI); + continue; + } + if (xp.parse_str("no_rsc_apps", buf, sizeof(buf))) { + handle_no_rsc_apps(this, buf); + continue; + } + if (xp.parse_bool("no_cpu_ams", btemp)) { + if (btemp) handle_no_rsc_ams(this, "CPU"); + continue; + } + if (xp.parse_bool("no_cuda_ams", btemp)) { + if (btemp) handle_no_rsc_ams(this, GPU_TYPE_NVIDIA); + continue; + } + if (xp.parse_bool("no_ati_ams", btemp)) { + if (btemp) handle_no_rsc_ams(this, GPU_TYPE_ATI); + continue; + } + if (xp.parse_str("no_rsc_ams", buf, sizeof(buf))) { + handle_no_rsc_ams(this, buf); + continue; + } + if (xp.parse_str("no_rsc_pref", buf, sizeof(buf))) { + handle_no_rsc_pref(this, buf); + continue; + } + + // backwards compat - old state files had ams_resource_share = 0 + if (xp.parse_double("ams_resource_share_new", ams_resource_share)) continue; + if (xp.parse_double("ams_resource_share", x)) { + if (x > 0) ams_resource_share = x; + continue; + } + if (xp.parse_bool("scheduler_rpc_in_progress", btemp)) continue; + if (xp.parse_bool("use_symlinks", use_symlinks)) continue; + if (xp.parse_bool("anonymous_platform", btemp)) continue; + if (xp.parse_string("trickle_up_url", stemp)) { + trickle_up_ops.push_back(new TRICKLE_UP_OP(stemp)); + continue; + } + if (log_flags.unparsed_xml) { + msg_printf(0, MSG_INFO, + "[unparsed_xml] PROJECT::parse_state(): unrecognized: %s", + xp.parsed_tag + ); + } + xp.skip_unexpected(); + } + return ERR_XML_PARSE; +} + +// Write project information to client state file or GUI RPC reply +// +int PROJECT::write_state(MIOFILE& out, bool gui_rpc) { + unsigned int i; + char un[2048], tn[2048]; + + out.printf( + "<project>\n" + ); + + xml_escape(user_name, un, sizeof(un)); + xml_escape(team_name, tn, sizeof(tn)); + out.printf( + " <master_url>%s</master_url>\n" + " <project_name>%s</project_name>\n" + " <symstore>%s</symstore>\n" + " <user_name>%s</user_name>\n" + " <team_name>%s</team_name>\n" + " <host_venue>%s</host_venue>\n" + " <email_hash>%s</email_hash>\n" + " <cross_project_id>%s</cross_project_id>\n" + " <cpid_time>%f</cpid_time>\n" + " <user_total_credit>%f</user_total_credit>\n" + " <user_expavg_credit>%f</user_expavg_credit>\n" + " <user_create_time>%f</user_create_time>\n" + " <rpc_seqno>%d</rpc_seqno>\n" + " <userid>%d</userid>\n" + " <teamid>%d</teamid>\n" + " <hostid>%d</hostid>\n" + " <host_total_credit>%f</host_total_credit>\n" + " <host_expavg_credit>%f</host_expavg_credit>\n" + " <host_create_time>%f</host_create_time>\n" + " <nrpc_failures>%d</nrpc_failures>\n" + " <master_fetch_failures>%d</master_fetch_failures>\n" + " <min_rpc_time>%f</min_rpc_time>\n" + " <next_rpc_time>%f</next_rpc_time>\n" + " <rec>%f</rec>\n" + " <rec_time>%f</rec_time>\n" + + " <resource_share>%f</resource_share>\n" + " <duration_correction_factor>%f</duration_correction_factor>\n" + " <sched_rpc_pending>%d</sched_rpc_pending>\n" + " <send_time_stats_log>%d</send_time_stats_log>\n" + " <send_job_log>%d</send_job_log>\n" + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + master_url, + project_name, + symstore, + un, + tn, + host_venue, + email_hash, + cross_project_id, + cpid_time, + user_total_credit, + user_expavg_credit, + user_create_time, + rpc_seqno, + userid, + teamid, + hostid, + host_total_credit, + host_expavg_credit, + host_create_time, + nrpc_failures, + master_fetch_failures, + min_rpc_time, + next_rpc_time, + pwf.rec, + pwf.rec_time, + resource_share, + duration_correction_factor, + sched_rpc_pending, + send_time_stats_log, + send_job_log, + anonymous_platform?" <anonymous_platform/>\n":"", + master_url_fetch_pending?" <master_url_fetch_pending/>\n":"", + trickle_up_pending?" <trickle_up_pending/>\n":"", + send_full_workload?" <send_full_workload/>\n":"", + dont_use_dcf?" <dont_use_dcf/>\n":"", + non_cpu_intensive?" <non_cpu_intensive/>\n":"", + verify_files_on_app_start?" <verify_files_on_app_start/>\n":"", + suspended_via_gui?" <suspended_via_gui/>\n":"", + dont_request_more_work?" <dont_request_more_work/>\n":"", + detach_when_done?" <detach_when_done/>\n":"", + ended?" <ended/>\n":"", + attached_via_acct_mgr?" <attached_via_acct_mgr/>\n":"", + (this == gstate.scheduler_op->cur_proj)?" <scheduler_rpc_in_progress/>\n":"", + use_symlinks?" <use_symlinks/>\n":"" + ); + for (int j=0; j<coprocs.n_rsc; j++) { + out.printf( + " <rsc_backoff_time>\n" + " <name>%s</name>\n" + " <value>%f</value>\n" + " </rsc_backoff_time>\n" + " <rsc_backoff_interval>\n" + " <name>%s</name>\n" + " <value>%f</value>\n" + " </rsc_backoff_interval>\n", + rsc_name(j), rsc_pwf[j].backoff_time, + rsc_name(j), rsc_pwf[j].backoff_interval + ); + if (no_rsc_ams[j]) { + out.printf(" <no_rsc_ams>%s</no_rsc_ams>\n", rsc_name(j)); + } + if (no_rsc_apps[j]) { + out.printf(" <no_rsc_apps>%s</no_rsc_apps>\n", rsc_name(j)); + } + if (no_rsc_pref[j]) { + out.printf(" <no_rsc_pref>%s</no_rsc_pref>\n", rsc_name(j)); + } + if (j>0 && gui_rpc && (ncoprocs_excluded[j] == rsc_work_fetch[j].ninstances)) { + out.printf(" <no_rsc_config>%s</no_rsc_config>\n", rsc_name(j)); + } + } + if (ams_resource_share >= 0) { + out.printf(" <ams_resource_share_new>%f</ams_resource_share_new>\n", + ams_resource_share + ); + } + if (gui_rpc) { + out.printf( + "%s" + " <sched_priority>%f</sched_priority>\n" + " <last_rpc_time>%f</last_rpc_time>\n" + " <project_files_downloaded_time>%f</project_files_downloaded_time>\n", + gui_urls.c_str(), + sched_priority, + last_rpc_time, + project_files_downloaded_time + ); + if (download_backoff.next_xfer_time > gstate.now) { + out.printf( + " <download_backoff>%f</download_backoff>\n", + download_backoff.next_xfer_time - gstate.now + ); + } + if (upload_backoff.next_xfer_time > gstate.now) { + out.printf( + " <upload_backoff>%f</upload_backoff>\n", + upload_backoff.next_xfer_time - gstate.now + ); + } + if (strlen(host_venue)) { + out.printf(" <venue>%s</venue>\n", host_venue); + } + } else { + for (i=0; i<scheduler_urls.size(); i++) { + out.printf( + " <scheduler_url>%s</scheduler_url>\n", + scheduler_urls[i].c_str() + ); + } + if (strlen(code_sign_key)) { + out.printf( + " <code_sign_key>\n%s\n</code_sign_key>\n", code_sign_key + ); + } + for (i=0; i<trickle_up_ops.size(); i++) { + TRICKLE_UP_OP* t = trickle_up_ops[i]; + out.printf( + " <trickle_up_url>%s</trickle_up_url>\n", + t->url.c_str() + ); + } + } + out.printf( + "</project>\n" + ); + return 0; +} + +// Some project data is stored in account file, other in client_state.xml +// Copy fields that are stored in client_state.xml from "p" into "this" +// +void PROJECT::copy_state_fields(PROJECT& p) { + scheduler_urls = p.scheduler_urls; + safe_strcpy(project_name, p.project_name); + safe_strcpy(user_name, p.user_name); + safe_strcpy(team_name, p.team_name); + safe_strcpy(host_venue, p.host_venue); + safe_strcpy(email_hash, p.email_hash); + safe_strcpy(cross_project_id, p.cross_project_id); + user_total_credit = p.user_total_credit; + user_expavg_credit = p.user_expavg_credit; + user_create_time = p.user_create_time; + cpid_time = p.cpid_time; + rpc_seqno = p.rpc_seqno; + userid = p.userid; + teamid = p.teamid; + hostid = p.hostid; + host_total_credit = p.host_total_credit; + host_expavg_credit = p.host_expavg_credit; + host_create_time = p.host_create_time; + nrpc_failures = p.nrpc_failures; + master_fetch_failures = p.master_fetch_failures; + min_rpc_time = p.min_rpc_time; + next_rpc_time = p.next_rpc_time; + master_url_fetch_pending = p.master_url_fetch_pending; + sched_rpc_pending = p.sched_rpc_pending; + trickle_up_pending = p.trickle_up_pending; + safe_strcpy(code_sign_key, p.code_sign_key); + for (int i=0; i<MAX_RSC; i++) { + rsc_pwf[i] = p.rsc_pwf[i]; + no_rsc_pref[i] = p.no_rsc_pref[i]; + no_rsc_apps[i] = p.no_rsc_apps[i]; + no_rsc_ams[i] = p.no_rsc_ams[i]; + } + pwf = p.pwf; + send_full_workload = p.send_full_workload; + dont_use_dcf = p.dont_use_dcf; + send_time_stats_log = p.send_time_stats_log; + send_job_log = p.send_job_log; + non_cpu_intensive = p.non_cpu_intensive; + verify_files_on_app_start = p.verify_files_on_app_start; + suspended_via_gui = p.suspended_via_gui; + dont_request_more_work = p.dont_request_more_work; + detach_when_done = p.detach_when_done; + attached_via_acct_mgr = p.attached_via_acct_mgr; + ended = p.ended; + duration_correction_factor = p.duration_correction_factor; + ams_resource_share = p.ams_resource_share; + if (ams_resource_share >= 0) { + resource_share = ams_resource_share; + } + use_symlinks = p.use_symlinks; +} + +// Write project statistic to project statistics file +// +int PROJECT::write_statistics(MIOFILE& out, bool /*gui_rpc*/) { + out.printf( + "<project_statistics>\n" + " <master_url>%s</master_url>\n", + master_url + ); + + for (std::vector<DAILY_STATS>::iterator i=statistics.begin(); + i!=statistics.end(); ++i + ) { + out.printf( + " <daily_statistics>\n" + " <day>%f</day>\n" + " <user_total_credit>%f</user_total_credit>\n" + " <user_expavg_credit>%f</user_expavg_credit>\n" + " <host_total_credit>%f</host_total_credit>\n" + " <host_expavg_credit>%f</host_expavg_credit>\n" + " </daily_statistics>\n", + i->day, + i->user_total_credit, + i->user_expavg_credit, + i->host_total_credit, + i->host_expavg_credit + ); + } + out.printf( + "</project_statistics>\n" + ); + return 0; +} + +void PROJECT::suspend() { + suspended_via_gui = true; + gstate.request_schedule_cpus("project suspended"); + gstate.request_work_fetch("project suspended"); +} +void PROJECT::resume() { + suspended_via_gui = false; + gstate.request_schedule_cpus("project resumed"); + gstate.request_work_fetch("project resumed"); +} + +void PROJECT::abort_not_started() { + for (unsigned int i=0; i<gstate.results.size(); i++) { + RESULT* rp = gstate.results[i]; + if (rp->project != this) continue; + if (rp->is_not_started()) { + rp->abort_inactive(EXIT_ABORTED_VIA_GUI); + } + } +} + +void PROJECT::get_task_durs(double& not_started_dur, double& in_progress_dur) { + not_started_dur = 0; + in_progress_dur = 0; + for (unsigned int i=0; i<gstate.results.size(); i++) { + RESULT* rp = gstate.results[i]; + if (rp->project != this) continue; + double d = rp->estimated_runtime_remaining(); + d /= gstate.time_stats.availability_frac(rp->avp->gpu_usage.rsc_type); + if (rp->is_not_started()) { + not_started_dur += d; + } else { + in_progress_dur += d; + } + } +} + +const char* PROJECT::get_scheduler_url(int index, double r) { + int n = (int) scheduler_urls.size(); + int ir = (int)(r*n); + int i = (index + ir)%n; + return scheduler_urls[i].c_str(); +} + +// delete current sym links. +// This is done when parsing scheduler reply, +// to ensure that we get rid of sym links for +// project files no longer in use +// +void PROJECT::delete_project_file_symlinks() { + unsigned int i; + char project_dir[256], path[256]; + + get_project_dir(this, project_dir, sizeof(project_dir)); + for (i=0; i<project_files.size(); i++) { + FILE_REF& fref = project_files[i]; + sprintf(path, "%s/%s", project_dir, fref.open_name); + delete_project_owned_file(path, false); + } +} + +// install pointers from FILE_REFs to FILE_INFOs for project files, +// and flag FILE_INFOs as being project files. +// +void PROJECT::link_project_files() { + FILE_INFO* fip; + vector<FILE_REF>::iterator fref_iter; + fref_iter = project_files.begin(); + while (fref_iter != project_files.end()) { + FILE_REF& fref = *fref_iter; + fip = gstate.lookup_file_info(this, fref.file_name); + if (!fip) { + msg_printf(this, MSG_INTERNAL_ERROR, + "project file refers to non-existent %s", fref.file_name + ); + fref_iter = project_files.erase(fref_iter); + continue; + } + fref.file_info = fip; + fip->is_project_file = true; + fref_iter++; + } +} + +void PROJECT::create_project_file_symlinks() { + for (unsigned i=0; i<gstate.file_infos.size(); i++) { + FILE_INFO* fip = gstate.file_infos[i]; + if (fip->project == this && fip->is_project_file && fip->status == FILE_PRESENT) { + write_symlink_for_project_file(fip); + } + } +} + +void PROJECT::write_project_files(MIOFILE& f) { + unsigned int i; + + if (!project_files.size()) return; + f.printf("<project_files>\n"); + for (i=0; i<project_files.size(); i++) { + FILE_REF& fref = project_files[i]; + fref.write(f); + } + f.printf("</project_files>\n"); +} + +// write symlinks for project files. +// Note: it's conceivable that one physical file +// has several logical names, so try them all +// +int PROJECT::write_symlink_for_project_file(FILE_INFO* fip) { + char project_dir[256], link_path[256], file_path[256]; + unsigned int i; + + get_project_dir(this, project_dir, sizeof(project_dir)); + for (i=0; i<project_files.size(); i++) { + FILE_REF& fref = project_files[i]; + if (fref.file_info != fip) continue; + sprintf(link_path, "%s/%s", project_dir, fref.open_name); + sprintf(file_path, "%s/%s", project_dir, fip->name); + make_soft_link(this, link_path, file_path); + } + return 0; +} + +// a project file download just finished. +// If it's the last one, update project_files_downloaded_time +// +void PROJECT::update_project_files_downloaded_time() { + unsigned int i; + for (i=0; i<project_files.size(); i++) { + FILE_REF& fref = project_files[i]; + FILE_INFO* fip = fref.file_info; + if (fip->status != FILE_PRESENT) continue; + } + project_files_downloaded_time = gstate.now; +} + +bool PROJECT::some_download_stalled() { +#ifndef SIM + unsigned int i; + + if (!download_backoff.ok_to_transfer()) return true; + + for (i=0; i<gstate.pers_file_xfers->pers_file_xfers.size(); i++) { + PERS_FILE_XFER* pfx = gstate.pers_file_xfers->pers_file_xfers[i]; + if (pfx->fip->project != this) continue; + if (pfx->is_upload) continue; + if (pfx->next_request_time > gstate.now) return true; + } +#endif + return false; +} + +bool PROJECT::runnable(int rsc_type) { + if (suspended_via_gui) return false; + for (unsigned int i=0; i<gstate.results.size(); i++) { + RESULT* rp = gstate.results[i]; + if (rp->project != this) continue; + if (rsc_type != RSC_TYPE_ANY) { + if (rp->avp->gpu_usage.rsc_type != rsc_type) { + continue; + } + } + if (rp->runnable()) return true; + } + return false; +} + +bool PROJECT::uploading() { + for (unsigned int i=0; i<gstate.file_xfers->file_xfers.size(); i++) { + FILE_XFER& fx = *gstate.file_xfers->file_xfers[i]; + if (fx.fip->project == this && fx.is_upload) { + return true; + } + } + return false; +} + +bool PROJECT::downloading() { + if (suspended_via_gui) return false; + for (unsigned int i=0; i<gstate.results.size(); i++) { + RESULT* rp = gstate.results[i]; + if (rp->project != this) continue; + if (rp->downloading()) return true; + } + return false; +} + +bool PROJECT::has_results() { + for (unsigned i=0; i<gstate.results.size(); i++) { + RESULT *rp = gstate.results[i]; + if (rp->project == this) return true; + } + return false; +} + +bool PROJECT::some_result_suspended() { + unsigned int i; + for (i=0; i<gstate.results.size(); i++) { + RESULT *rp = gstate.results[i]; + if (rp->project != this) continue; + if (rp->suspended_via_gui) return true; + } + return false; +} + +bool PROJECT::can_request_work() { + if (suspended_via_gui) return false; + if (master_url_fetch_pending) return false; + if (min_rpc_time > gstate.now) return false; + if (dont_request_more_work) return false; + if (gstate.in_abort_sequence) return false; + return true; +} + +bool PROJECT::potentially_runnable() { + if (runnable(RSC_TYPE_ANY)) return true; + if (can_request_work()) return true; + if (downloading()) return true; + return false; +} + +bool PROJECT::nearly_runnable() { + if (runnable(RSC_TYPE_ANY)) return true; + if (downloading()) return true; + return false; +} + diff --git a/client/project.h b/client/project.h new file mode 100644 index 0000000000000000000000000000000000000000..a3256489d1b8a12af21440ef8df1390fcf5b0f0c --- /dev/null +++ b/client/project.h @@ -0,0 +1,332 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2012 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see <http://www.gnu.org/licenses/>. + +#ifndef _PROJECT_ +#define _PROJECT_ + +#include "client_types.h" + +struct PROJECT : PROJ_AM { + // the following items come from the account file + // They are a function only of the user and the project + // + char authenticator[256]; + // user's authenticator on this project + std::string project_prefs; + // without the enclosing <project_preferences> tags. + // May include <venue> elements + // This field is used only briefly: between handling a + // scheduler RPC reply and writing the account file + std::string project_specific_prefs; + // without enclosing <project_specific> tags + // Does not include <venue> elements + std::string gui_urls; + // GUI URLs, with enclosing <gui_urls> tags + double resource_share; + // project's resource share relative to other projects. + double resource_share_frac; + // fraction of RS of non-suspended, compute-intensive projects + + // the following are from the user's project prefs + // + bool no_rsc_pref[MAX_RSC]; + + // derived from GPU exclusions in cc_config.xml; + // disable work fetch if all instances excluded + // + bool no_rsc_config[MAX_RSC]; + + // the following are from the project itself + // (or derived from app version list if anonymous platform) + // + bool no_rsc_apps[MAX_RSC]; + + // the following are from the account manager, if any + // + bool no_rsc_ams[MAX_RSC]; + + // the following set dynamically + // + bool rsc_defer_sched[MAX_RSC]; + // This project has a GPU job for which there's insuff. video RAM. + // Don't fetch more jobs of this type; they might have same problem + + char host_venue[256]; + // logically, this belongs in the client state file + // rather than the account file. + // But we need it in the latter in order to parse prefs. + bool using_venue_specific_prefs; + + // the following items come from client_state.xml + // They may depend on the host as well as user and project + // NOTE: if you add anything, add it to copy_state_fields() also!!! + // + std::vector<std::string> scheduler_urls; + // where to find scheduling servers + char symstore[256]; + // URL of symbol server (Windows) + char user_name[256]; + char team_name[256]; + char email_hash[MD5_LEN]; + char cross_project_id[MD5_LEN]; + double cpid_time; + double user_total_credit; + double user_expavg_credit; + double user_create_time; + int userid; + int teamid; + int hostid; + double host_total_credit; + double host_expavg_credit; + double host_create_time; + double ams_resource_share; + // resource share according to AMS; overrides project + // -1 means not specified by AMS + + // stuff related to scheduler RPCs and master fetch + // + int rpc_seqno; + int nrpc_failures; + // # of consecutive times we've failed to contact all scheduling servers + int master_fetch_failures; + double min_rpc_time; + // earliest time to contact any server of this project (or zero) + void set_min_rpc_time(double future_time, const char* reason); + double next_rpc_time; + // if nonzero, specifies a time when another scheduler RPC + // should be done (as requested by server). + // An RPC could be done sooner than this. + bool waiting_until_min_rpc_time(); + // returns true if min_rpc_time > now + bool master_url_fetch_pending; + // need to fetch and parse the master URL + int sched_rpc_pending; + // we need to do a scheduler RPC, for various possible reasons: + // user request, propagate host CPID, time-based, etc. + // Reasons are enumerated in lib/common_defs.h + bool possibly_backed_off; + // we need to call request_work_fetch() when a project + // transitions from being backed off to not. + // This (slightly misnamed) keeps track of whether this + // may still need to be done for given project + bool trickle_up_pending; + // have trickle up to send + double last_rpc_time; + // when last RPC finished + // not maintained across client sessions + // used by Manager (simple view) + + // Other stuff + + bool anonymous_platform; + // app_versions.xml file found in project dir; + // use those apps rather then getting from server + bool non_cpu_intensive; + // All this project's apps are non-CPU-intensive. + // Apps can also be individually marked as NCI + bool verify_files_on_app_start; + // Check app version and input files on app startup, + // to make sure they haven't been tampered with. + // This provides only the illusion of security. + bool use_symlinks; + double disk_usage; + // computed by get_disk_usages() + double disk_share; + // computed by get_disk_shares(); + + // items send in scheduler replies, requesting that + // various things be sent in the next request + // + int send_time_stats_log; + // if nonzero, send time stats log from that point on + int send_job_log; + // if nonzero, send this project's job log from that point on + bool send_full_workload; + bool dont_use_dcf; + + bool suspended_via_gui; + bool dont_request_more_work; + // Return work, but don't request more + // Used for a clean exit to a project, + // or if a user wants to pause doing work for the project + bool attached_via_acct_mgr; + bool detach_when_done; + // when no results for this project, detach it. + bool ended; + // project has ended; advise user to detach + char code_sign_key[MAX_KEY_LEN]; + std::vector<FILE_REF> user_files; + std::vector<FILE_REF> project_files; + // files not specific to apps or work - e.g. icons + int parse_preferences_for_user_files(); + void write_project_files(MIOFILE&); + void link_project_files(); + void create_project_file_symlinks(); + void delete_project_file_symlinks(); + int write_symlink_for_project_file(FILE_INFO*); + double project_files_downloaded_time; + // when last project file download finished + void update_project_files_downloaded_time(); + // called when a project file download finishes. + // If it's the last one, set project_files_downloaded_time to now + + double duration_correction_factor; + // Multiply by this when estimating the CPU time of a result + // (based on FLOPs estimated and benchmarks). + // This is dynamically updated in a way that maintains an upper bound. + // it goes down slowly but if a new estimate X is larger, + // the factor is set to X. + // + // Deprecated - current server logic handles this, + // and this should go to 1. + // But we need to keep it around for older projects + void update_duration_correction_factor(ACTIVE_TASK*); + + // fields used by CPU scheduler and work fetch + // everything from here on applies only to CPU intensive projects + + bool can_request_work(); + // not suspended and not deferred and not no more work + bool runnable(int rsc_type); + // has a runnable result using the given resource type + bool downloading(); + // has a result in downloading state + bool potentially_runnable(); + // runnable or contactable or downloading + bool nearly_runnable(); + // runnable or downloading + bool overworked(); + // the project has used too much CPU time recently + bool some_download_stalled(); + // a download is backed off + bool some_result_suspended(); + double last_upload_start; + // the last time an upload was started. + // Used for "work fetch deferral" mechanism: + // don't request work from a project if an upload started + // in last X minutes and is still active + bool uploading(); + bool has_results(); + + struct RESULT *next_runnable_result; + // the next result to run for this project + int nuploading_results; + // number of results in UPLOADING state + // Don't start new results if these exceeds 2*ncpus. + bool too_many_uploading_results; + + // scheduling (work fetch and job scheduling) + // + double sched_priority; + void compute_sched_priority(); + + // stuff for RR sim + // + double rr_sim_cpu_share; + bool rr_sim_active; + int ncoprocs_excluded[MAX_RSC]; + // number of excluded instances per processor type + bool operator<(const PROJECT& p) { + return sched_priority > p.sched_priority; + } + + // stuff related to work fetch + // + RSC_PROJECT_WORK_FETCH rsc_pwf[MAX_RSC]; + PROJECT_WORK_FETCH pwf; + inline void reset() { + for (int i=0; i<coprocs.n_rsc; i++) { + rsc_pwf[i].reset(); + } + } + inline int deadlines_missed(int rsc_type) { + return rsc_pwf[rsc_type].deadlines_missed; + } + void get_task_durs(double& not_started_dur, double& in_progress_dur); + + int nresults_returned; + // # of results being returned in current scheduler op + const char* get_scheduler_url(int index, double r); + // get scheduler URL with random offset r + bool checked; + // temporary used when scanning projects + + FILE_XFER_BACKOFF download_backoff; + FILE_XFER_BACKOFF upload_backoff; + inline FILE_XFER_BACKOFF& file_xfer_backoff(bool is_upload) { + return is_upload?upload_backoff:download_backoff; + } + + // support for replicated trickle-ups + // + std::vector<TRICKLE_UP_OP*> trickle_up_ops; + + PROJECT(); + ~PROJECT(){} + void init(); + void copy_state_fields(PROJECT&); + int write_account_file(); + int parse_account(FILE*); + int parse_account_file_venue(); + int parse_account_file(); + int parse_state(XML_PARSER&); + int write_state(MIOFILE&, bool gui_rpc=false); + + // statistic of the last x days + std::vector<DAILY_STATS> statistics; + int parse_statistics(MIOFILE&); + int parse_statistics(FILE*); + int write_statistics(MIOFILE&, bool gui_rpc=false); + int write_statistics_file(); + + void suspend(); + void resume(); + void abort_not_started(); + // abort unstarted jobs + + // clear AMS-related fields + inline void detach_ams() { + attached_via_acct_mgr = false; + ams_resource_share = -1; + for (int i=0; i<MAX_RSC; i++) { + no_rsc_ams[i] = false; + } + } + +#ifdef SIM + RANDOM_PROCESS available; + int index; + int result_index; + double idle_time; + double idle_time_sumsq; + bool idle; + int max_infeasible_count; + bool no_apps; + // for DCF variants: + int completed_task_count; + double completions_ratio_mean; + double completions_ratio_s; + double completions_ratio_stdev; + double completions_required_stdevs; + PROJECT_RESULTS project_results; + void print_results(FILE*, SIM_RESULTS&); + void backoff(); + void update_dcf_stats(RESULT*); +#endif +}; + +#endif diff --git a/client/result.cpp b/client/result.cpp new file mode 100644 index 0000000000000000000000000000000000000000..18d3344d4f497134e354253be3040738fb5f9773 --- /dev/null +++ b/client/result.cpp @@ -0,0 +1,556 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2012 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see <http://www.gnu.org/licenses/>. + +#include "parse.h" + +#include "client_msgs.h" +#include "client_state.h" +#include "log_flags.h" + +#include "result.h" + +int RESULT::parse_name(XML_PARSER& xp, const char* end_tag) { + strcpy(name, ""); + while (!xp.get_tag()) { + if (xp.match_tag(end_tag)) return 0; + if (xp.parse_str("name", name, sizeof(name))) continue; + if (log_flags.unparsed_xml) { + msg_printf(0, MSG_INFO, + "[unparsed_xml] RESULT::parse_name(): unrecognized: %s\n", + xp.parsed_tag + ); + } + xp.skip_unexpected(); + } + return ERR_XML_PARSE; +} + +void RESULT::clear() { + strcpy(name, ""); + strcpy(wu_name, ""); + received_time = 0; + report_deadline = 0; + version_num = 0; + strcpy(plan_class, ""); + strcpy(platform, ""); + avp = NULL; + output_files.clear(); + ready_to_report = false; + completed_time = 0; + got_server_ack = false; + final_cpu_time = 0; + final_elapsed_time = 0; +#ifdef SIM + peak_flop_count = 0; +#endif + fpops_per_cpu_sec = 0; + fpops_cumulative = 0; + intops_per_cpu_sec = 0; + intops_cumulative = 0; + _state = RESULT_NEW; + exit_status = 0; + stderr_out = ""; + suspended_via_gui = false; + coproc_missing = false; + report_immediately = false; + rr_sim_misses_deadline = false; + app = NULL; + wup = NULL; + project = NULL; + strcpy(resources, ""); + report_immediately = false; + schedule_backoff = 0; + strcpy(schedule_backoff_reason, ""); +} + +// parse a <result> element from scheduling server. +// +int RESULT::parse_server(XML_PARSER& xp) { + FILE_REF file_ref; + + clear(); + while (!xp.get_tag()) { + if (xp.match_tag("/result")) return 0; + if (xp.parse_str("name", name, sizeof(name))) continue; + if (xp.parse_str("wu_name", wu_name, sizeof(wu_name))) continue; + if (xp.parse_double("report_deadline", report_deadline)) continue; + if (xp.parse_str("platform", platform, sizeof(platform))) continue; + if (xp.parse_str("plan_class", plan_class, sizeof(plan_class))) continue; + if (xp.parse_int("version_num", version_num)) continue; + if (xp.match_tag("file_ref")) { + file_ref.parse(xp); + output_files.push_back(file_ref); + continue; + } + if (xp.parse_bool("report_immediately", report_immediately)) continue; + if (log_flags.unparsed_xml) { + msg_printf(0, MSG_INFO, + "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", + xp.parsed_tag + ); + } + xp.skip_unexpected(); + } + return ERR_XML_PARSE; +} + +// parse a <result> element from state file +// +int RESULT::parse_state(XML_PARSER& xp) { + FILE_REF file_ref; + + clear(); + while (!xp.get_tag()) { + if (xp.match_tag("/result")) { + // set state to something reasonable in case of bad state file + // + if (got_server_ack || ready_to_report) { + switch (state()) { + case RESULT_NEW: + case RESULT_FILES_DOWNLOADING: + case RESULT_FILES_DOWNLOADED: + case RESULT_FILES_UPLOADING: + set_state(RESULT_FILES_UPLOADED, "RESULT::parse_state"); + break; + } + } + return 0; + } + if (xp.parse_str("name", name, sizeof(name))) continue; + if (xp.parse_str("wu_name", wu_name, sizeof(wu_name))) continue; + if (xp.parse_double("received_time", received_time)) continue; + if (xp.parse_double("report_deadline", report_deadline)) { + continue; + } + if (xp.match_tag("file_ref")) { + file_ref.parse(xp); +#ifndef SIM + output_files.push_back(file_ref); +#endif + continue; + } + if (xp.parse_double("final_cpu_time", final_cpu_time)) continue; + if (xp.parse_double("final_elapsed_time", final_elapsed_time)) continue; + if (xp.parse_int("exit_status", exit_status)) continue; + if (xp.parse_bool("got_server_ack", got_server_ack)) continue; + if (xp.parse_bool("ready_to_report", ready_to_report)) continue; + if (xp.parse_double("completed_time", completed_time)) continue; + if (xp.parse_bool("suspended_via_gui", suspended_via_gui)) continue; + if (xp.parse_bool("report_immediately", report_immediately)) continue; + if (xp.parse_int("state", _state)) continue; + if (xp.parse_string("stderr_out", stderr_out)) continue; + if (xp.parse_double("fpops_per_cpu_sec", fpops_per_cpu_sec)) continue; + if (xp.parse_double("fpops_cumulative", fpops_cumulative)) continue; + if (xp.parse_double("intops_per_cpu_sec", intops_per_cpu_sec)) continue; + if (xp.parse_double("intops_cumulative", intops_cumulative)) continue; + if (xp.parse_str("platform", platform, sizeof(platform))) continue; + if (xp.parse_str("plan_class", plan_class, sizeof(plan_class))) continue; + if (xp.parse_int("version_num", version_num)) continue; + if (log_flags.unparsed_xml) { + msg_printf(0, MSG_INFO, + "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", + xp.parsed_tag + ); + } + xp.skip_unexpected(); + } + return ERR_XML_PARSE; +} + +int RESULT::write(MIOFILE& out, bool to_server) { + unsigned int i; + FILE_INFO* fip; + int n, retval; + + out.printf( + "<result>\n" + " <name>%s</name>\n" + " <final_cpu_time>%f</final_cpu_time>\n" + " <final_elapsed_time>%f</final_elapsed_time>\n" + " <exit_status>%d</exit_status>\n" + " <state>%d</state>\n" + " <platform>%s</platform>\n" + " <version_num>%d</version_num>\n", + name, + final_cpu_time, + final_elapsed_time, + exit_status, + state(), + platform, + version_num + ); + if (strlen(plan_class)) { + out.printf(" <plan_class>%s</plan_class>\n", plan_class); + } + if (fpops_per_cpu_sec) { + out.printf(" <fpops_per_cpu_sec>%f</fpops_per_cpu_sec>\n", fpops_per_cpu_sec); + } + if (fpops_cumulative) { + out.printf(" <fpops_cumulative>%f</fpops_cumulative>\n", fpops_cumulative); + } + if (intops_per_cpu_sec) { + out.printf(" <intops_per_cpu_sec>%f</intops_per_cpu_sec>\n", intops_per_cpu_sec); + } + if (intops_cumulative) { + out.printf(" <intops_cumulative>%f</intops_cumulative>\n", intops_cumulative); + } + if (to_server) { + out.printf( + " <app_version_num>%d</app_version_num>\n", + wup->version_num + ); + } + n = (int)stderr_out.length(); + if (n || to_server) { + out.printf("<stderr_out>\n"); + + // the following is here so that it gets recorded on server + // (there's no core_client_version field of result table) + // + if (to_server) { + out.printf( + "<core_client_version>%d.%d.%d</core_client_version>\n", + gstate.core_client_version.major, + gstate.core_client_version.minor, + gstate.core_client_version.release + ); + } + if (n) { + out.printf("<![CDATA[\n"); + out.printf("%s",stderr_out.c_str()); + if (stderr_out[n-1] != '\n') { + out.printf("\n"); + } + out.printf("]]>\n"); + } + out.printf("</stderr_out>\n"); + } + if (to_server) { + for (i=0; i<output_files.size(); i++) { + fip = output_files[i].file_info; + if (fip->uploaded) { + retval = fip->write(out, true); + if (retval) return retval; + } + } + } else { + if (got_server_ack) out.printf(" <got_server_ack/>\n"); + if (ready_to_report) out.printf(" <ready_to_report/>\n"); + if (completed_time) out.printf(" <completed_time>%f</completed_time>\n", completed_time); + if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); + if (report_immediately) out.printf(" <report_immediately/>\n"); + out.printf( + " <wu_name>%s</wu_name>\n" + " <report_deadline>%f</report_deadline>\n" + " <received_time>%f</received_time>\n", + wu_name, + report_deadline, + received_time + ); + for (i=0; i<output_files.size(); i++) { + retval = output_files[i].write(out); + if (retval) return retval; + } + } + out.printf("</result>\n"); + return 0; +} + +#ifndef SIM + +int RESULT::write_gui(MIOFILE& out) { + out.printf( + "<result>\n" + " <name>%s</name>\n" + " <wu_name>%s</wu_name>\n" + " <version_num>%d</version_num>\n" + " <plan_class>%s</plan_class>\n" + " <project_url>%s</project_url>\n" + " <final_cpu_time>%f</final_cpu_time>\n" + " <final_elapsed_time>%f</final_elapsed_time>\n" + " <exit_status>%d</exit_status>\n" + " <state>%d</state>\n" + " <report_deadline>%f</report_deadline>\n" + " <received_time>%f</received_time>\n" + " <estimated_cpu_time_remaining>%f</estimated_cpu_time_remaining>\n", + name, + wu_name, + version_num, + plan_class, + project->master_url, + final_cpu_time, + final_elapsed_time, + exit_status, + state(), + report_deadline, + received_time, + estimated_runtime_remaining() + ); + if (got_server_ack) out.printf(" <got_server_ack/>\n"); + if (ready_to_report) out.printf(" <ready_to_report/>\n"); + if (completed_time) out.printf(" <completed_time>%f</completed_time>\n", completed_time); + if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); + if (project->suspended_via_gui) out.printf(" <project_suspended_via_gui/>\n"); + if (report_immediately) out.printf(" <report_immediately/>\n"); + if (edf_scheduled) out.printf(" <edf_scheduled/>\n"); + if (coproc_missing) out.printf(" <coproc_missing/>\n"); + if (schedule_backoff > gstate.now) { + out.printf(" <scheduler_wait/>\n"); + if (strlen(schedule_backoff_reason)) { + out.printf( + " <scheduler_wait_reason>%s</scheduler_wait_reason>\n", + schedule_backoff_reason + ); + } + } + if (avp->needs_network && gstate.network_suspended) out.printf(" <network_wait/>\n"); + ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this); + if (atp) { + atp->write_gui(out); + } + if (!strlen(resources)) { + // only need to compute this string once + // + if (avp->gpu_usage.rsc_type) { + if (avp->gpu_usage.usage == 1) { + sprintf(resources, + "%.3g CPUs + 1 %s GPU", + avp->avg_ncpus, + rsc_name(avp->gpu_usage.rsc_type) + ); + } else { + sprintf(resources, + "%.3g CPUs + %.3g %s GPUs", + avp->avg_ncpus, + avp->gpu_usage.usage, + rsc_name(avp->gpu_usage.rsc_type) + ); + } + } else if (avp->missing_coproc) { + sprintf(resources, "%.3g CPUs + %s GPU (missing)", + avp->avg_ncpus, avp->missing_coproc_name + ); + } else if (!project->non_cpu_intensive && (avp->avg_ncpus != 1)) { + sprintf(resources, "%.3g CPUs", avp->avg_ncpus); + } else { + strcpy(resources, " "); + } + } + if (strlen(resources)>1) { + char buf[256]; + strcpy(buf, ""); + if (atp && atp->task_state() == PROCESS_EXECUTING) { + if (avp->gpu_usage.rsc_type) { + COPROC& cp = coprocs.coprocs[avp->gpu_usage.rsc_type]; + if (cp.count > 1) { + sprintf(buf, " (device %d)", + cp.device_nums[coproc_indices[0]] + ); + } + } + } + out.printf( + " <resources>%s%s</resources>\n", resources, buf + ); + } + out.printf("</result>\n"); + return 0; +} + +#endif + +// Returns true if the result's output files are all either +// successfully uploaded or have unrecoverable errors +// +bool RESULT::is_upload_done() { + unsigned int i; + FILE_INFO* fip; + int retval; + + for (i=0; i<output_files.size(); i++) { + fip = output_files[i].file_info; + if (fip->uploadable()) { + if (fip->had_failure(retval)) continue; + if (!fip->uploaded) { + return false; + } + } + } + return true; +} + +// resets all FILE_INFO's in result to uploaded = false +// +void RESULT::clear_uploaded_flags() { + unsigned int i; + FILE_INFO* fip; + + for (i=0; i<output_files.size(); i++) { + fip = output_files[i].file_info; + fip->uploaded = false; + } +} + +bool RESULT::is_not_started() { + if (computing_done()) return false; + if (gstate.active_tasks.lookup_result(this)) return false; + return true; +} + +// return true if some file needed by this result (input or application) +// is downloading and backed off +// +bool RESULT::some_download_stalled() { +#ifndef SIM + unsigned int i; + FILE_INFO* fip; + PERS_FILE_XFER* pfx; + bool some_file_missing = false; + + for (i=0; i<wup->input_files.size(); i++) { + fip = wup->input_files[i].file_info; + if (fip->status != FILE_PRESENT) some_file_missing = true; + pfx = fip->pers_file_xfer; + if (pfx && pfx->next_request_time > gstate.now) { + return true; + } + } + for (i=0; i<avp->app_files.size(); i++) { + fip = avp->app_files[i].file_info; + if (fip->status != FILE_PRESENT) some_file_missing = true; + pfx = fip->pers_file_xfer; + if (pfx && pfx->next_request_time > gstate.now) { + return true; + } + } + + if (some_file_missing && !project->download_backoff.ok_to_transfer()) { + return true; + } +#endif + return false; +} + +FILE_REF* RESULT::lookup_file(FILE_INFO* fip) { + for (unsigned int i=0; i<output_files.size(); i++) { + FILE_REF& fr = output_files[i]; + if (fr.file_info == fip) return &fr; + } + return 0; +} + +FILE_INFO* RESULT::lookup_file_logical(const char* lname) { + for (unsigned int i=0; i<output_files.size(); i++) { + FILE_REF& fr = output_files[i]; + if (!strcmp(lname, fr.open_name)) { + return fr.file_info; + } + } + return 0; +} + +void RESULT::append_log_record() { + char filename[256]; + job_log_filename(*project, filename, sizeof(filename)); + FILE* f = fopen(filename, "ab"); + if (!f) return; + fprintf(f, "%.0f ue %f ct %f fe %.0f nm %s et %f\n", + gstate.now, estimated_runtime_uncorrected(), final_cpu_time, + wup->rsc_fpops_est, name, final_elapsed_time + ); + fclose(f); +} + +// abort a result that's not currently running +// +void RESULT::abort_inactive(int status) { + if (state() >= RESULT_COMPUTE_ERROR) return; + set_state(RESULT_ABORTED, "RESULT::abort_inactive"); + exit_status = status; +} + +// whether this task can be run right now +// +bool RESULT::runnable() { + if (suspended_via_gui) return false; + if (project->suspended_via_gui) return false; + if (state() != RESULT_FILES_DOWNLOADED) return false; + if (coproc_missing) return false; + if (schedule_backoff > gstate.now) return false; + if (avp->needs_network && gstate.network_suspended) return false; + return true; +} + +// whether this task should be included in RR simulation +// Like runnable, except downloading backoff is OK +// Schedule-backoff is not OK; +// we should be able to get GPU jobs from project A +// even if project B has backed-off jobs. +// +bool RESULT::nearly_runnable() { + if (suspended_via_gui) return false; + if (project->suspended_via_gui) return false; + switch (state()) { + case RESULT_FILES_DOWNLOADED: + case RESULT_FILES_DOWNLOADING: + break; + default: + return false; + } + if (coproc_missing) return false; + if (schedule_backoff > gstate.now) return false; + return true; +} + +// Return true if the result is waiting for its files to download, +// and nothing prevents this from happening soon +// +bool RESULT::downloading() { + if (suspended_via_gui) return false; + if (project->suspended_via_gui) return false; + if (state() > RESULT_FILES_DOWNLOADING) return false; + if (some_download_stalled()) return false; + return true; +} + +double RESULT::estimated_runtime_uncorrected() { + return wup->rsc_fpops_est/avp->flops; +} + +// estimate how long a result will take on this host +// +double RESULT::estimated_runtime() { + double x = estimated_runtime_uncorrected(); + if (!project->dont_use_dcf) { + x *= project->duration_correction_factor; + } + return x; +} + +double RESULT::estimated_runtime_remaining() { + if (computing_done()) return 0; + ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this); + if (atp) { +#ifdef SIM + return sim_flops_left/avp->flops; +#else + return atp->est_dur() - atp->elapsed_time; +#endif + } + return estimated_runtime(); +} + diff --git a/client/result.h b/client/result.h new file mode 100644 index 0000000000000000000000000000000000000000..b76518baede260a5b0009252b1f4f3fa14b40b13 --- /dev/null +++ b/client/result.h @@ -0,0 +1,186 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2012 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see <http://www.gnu.org/licenses/>. + +#ifndef _RESULT_ +#define _RESULT_ + +#include "project.h" + +struct RESULT { + char name[256]; + char wu_name[256]; + double received_time; // when we got this from server + double report_deadline; + int version_num; // identifies the app used + char plan_class[64]; + char platform[256]; + APP_VERSION* avp; + std::vector<FILE_REF> output_files; + bool ready_to_report; + // we're ready to report this result to the server; + // either computation is done and all the files have been uploaded + // or there was an error + double completed_time; + // time when ready_to_report was set + bool got_server_ack; + // we've received the ack for this result from the server + double final_cpu_time; + double final_elapsed_time; +#ifdef SIM + double peak_flop_count; + double sim_flops_left; +#endif + + // the following are nonzero if reported by app + double fpops_per_cpu_sec; + double fpops_cumulative; + double intops_per_cpu_sec; + double intops_cumulative; + + int _state; + // state of this result: see lib/result_state.h + inline int state() { return _state; } + inline void set_ready_to_report() { + ready_to_report = true; + } + void set_state(int, const char*); + int exit_status; + // return value from the application + std::string stderr_out; + // the concatenation of: + // + // - if report_result_error() is called for this result: + // <message>x</message> + // <exit_status>x</exit_status> + // <signal>x</signal> + // - if called in FILES_DOWNLOADED state: + // <couldnt_start>x</couldnt_start> + // - if called in NEW state: + // <download_error>x</download_error> for each failed download + // - if called in COMPUTE_DONE state: + // <upload_error>x</upload_error> for each failed upload + // + // - <stderr_txt>X</stderr_txt>, where X is the app's stderr output + bool suspended_via_gui; + bool coproc_missing; + // a coproc needed by this job is missing + // (e.g. because user removed their GPU board). + bool report_immediately; + bool not_started; // temp for CPU sched + + std::string name_md5; // see sort_results(); + int index; // index in results vector + + APP* app; + WORKUNIT* wup; + PROJECT* project; + + RESULT(){} + ~RESULT(){} + void clear(); + int parse_server(XML_PARSER&); + int parse_state(XML_PARSER&); + int parse_name(XML_PARSER&, const char* end_tag); + int write(MIOFILE&, bool to_server); + int write_gui(MIOFILE&); + bool is_upload_done(); // files uploaded? + void clear_uploaded_flags(); + FILE_REF* lookup_file(FILE_INFO*); + FILE_INFO* lookup_file_logical(const char*); + void abort_inactive(int); + // abort the result if it hasn't started computing yet + // Called only for results with no active task + // (otherwise you need to abort the active task) + void append_log_record(); + + // stuff related to CPU scheduling + + bool is_not_started(); + double estimated_runtime(); + double estimated_runtime_uncorrected(); + double estimated_runtime_remaining(); + inline double estimated_flops_remaining() { +#ifdef SIM + return sim_flops_left; +#else + return estimated_runtime_remaining()*avp->flops; +#endif + } + + inline bool computing_done() { + if (state() >= RESULT_COMPUTE_ERROR) return true; + if (ready_to_report) return true; + return false; + } + bool runnable(); + // downloaded, not finished, not suspended, project not suspended + bool nearly_runnable(); + // downloading or downloaded, + // not finished, suspended, project not suspended + bool downloading(); + // downloading, not downloaded, not suspended, project not suspended + bool some_download_stalled(); + // some input or app file is downloading, and backed off + // i.e. it may be a long time before we can run this result + inline bool uses_coprocs() { + return (avp->gpu_usage.rsc_type != 0); + } + inline int resource_type() { + return avp->gpu_usage.rsc_type; + } + inline bool non_cpu_intensive() { + if (project->non_cpu_intensive) return true; + if (app->non_cpu_intensive) return true; + return false; + } + inline bool dont_throttle() { + if (non_cpu_intensive()) return true; + if (avp->dont_throttle) return true; + return false; + } + + // temporaries used in CLIENT_STATE::rr_simulation(): + double rrsim_flops_left; + double rrsim_finish_delay; + double rrsim_flops; + bool rrsim_done; + + bool already_selected; + // used to keep cpu scheduler from scheduling a result twice + // transient; used only within schedule_cpus() + double computation_deadline(); + // report deadline - prefs.work_buf_min - time slice + bool rr_sim_misses_deadline; + + // temporaries used in enforce_schedule(): + bool unfinished_time_slice; + int seqno; + + bool edf_scheduled; + // temporary used to tell GUI that this result is deadline-scheduled + + int coproc_indices[MAX_COPROCS_PER_JOB]; + // keep track of coprocessor reservations + char resources[256]; + // textual description of resources used + double schedule_backoff; + // don't try to schedule until this time + // (wait for free GPU RAM) + char schedule_backoff_reason[256]; +}; + +#endif diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp index 0a5a49355accea1a66a8a45125746698af1f85e9..f74d02db4e5d5c91ad10125b36ac73b06773a2ae 100644 --- a/client/rr_sim.cpp +++ b/client/rr_sim.cpp @@ -40,9 +40,11 @@ #include "config.h" #endif +#include "client_msgs.h" #include "client_state.h" #include "coproc.h" -#include "client_msgs.h" +#include "project.h" +#include "result.h" using std::vector; diff --git a/client/scheduler_op.cpp b/client/scheduler_op.cpp index 93f3b18c6abcf4f8ed494dda6558230de6ab2d20..71e7fc5ccbf3d9cb20d8974cedd2f0765d6948bc 100644 --- a/client/scheduler_op.cpp +++ b/client/scheduler_op.cpp @@ -27,12 +27,12 @@ #include <ctime> #endif +#include "error_numbers.h" +#include "filesys.h" +#include "parse.h" #include "str_util.h" #include "str_replace.h" #include "util.h" -#include "parse.h" -#include "error_numbers.h" -#include "filesys.h" #include "client_state.h" #include "client_types.h" @@ -40,6 +40,8 @@ #include "file_names.h" #include "log_flags.h" #include "main.h" +#include "project.h" +#include "result.h" #include "scheduler_op.h" using std::vector; diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index 5c1d42cda71924686d1282ce920ab04d01cde92f..58e85780108e539dff74df7600938b77317fc0bb 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -25,10 +25,12 @@ #include "util.h" -#include "client_state.h" #include "client_msgs.h" - +#include "client_state.h" +#include "project.h" +#include "result.h" #include "scheduler_op.h" + #include "work_fetch.h" using std::vector; @@ -907,152 +909,6 @@ void CLIENT_STATE::compute_nuploading_results() { } } -bool PROJECT::runnable(int rsc_type) { - if (suspended_via_gui) return false; - for (unsigned int i=0; i<gstate.results.size(); i++) { - RESULT* rp = gstate.results[i]; - if (rp->project != this) continue; - if (rsc_type != RSC_TYPE_ANY) { - if (rp->avp->gpu_usage.rsc_type != rsc_type) { - continue; - } - } - if (rp->runnable()) return true; - } - return false; -} - -bool PROJECT::uploading() { - for (unsigned int i=0; i<gstate.file_xfers->file_xfers.size(); i++) { - FILE_XFER& fx = *gstate.file_xfers->file_xfers[i]; - if (fx.fip->project == this && fx.is_upload) { - return true; - } - } - return false; -} - -bool PROJECT::downloading() { - if (suspended_via_gui) return false; - for (unsigned int i=0; i<gstate.results.size(); i++) { - RESULT* rp = gstate.results[i]; - if (rp->project != this) continue; - if (rp->downloading()) return true; - } - return false; -} - -bool PROJECT::has_results() { - for (unsigned i=0; i<gstate.results.size(); i++) { - RESULT *rp = gstate.results[i]; - if (rp->project == this) return true; - } - return false; -} - -bool PROJECT::some_result_suspended() { - unsigned int i; - for (i=0; i<gstate.results.size(); i++) { - RESULT *rp = gstate.results[i]; - if (rp->project != this) continue; - if (rp->suspended_via_gui) return true; - } - return false; -} - -bool PROJECT::can_request_work() { - if (suspended_via_gui) return false; - if (master_url_fetch_pending) return false; - if (min_rpc_time > gstate.now) return false; - if (dont_request_more_work) return false; - if (gstate.in_abort_sequence) return false; - return true; -} - -bool PROJECT::potentially_runnable() { - if (runnable(RSC_TYPE_ANY)) return true; - if (can_request_work()) return true; - if (downloading()) return true; - return false; -} - -bool PROJECT::nearly_runnable() { - if (runnable(RSC_TYPE_ANY)) return true; - if (downloading()) return true; - return false; -} - -// whether this task can be run right now -// -bool RESULT::runnable() { - if (suspended_via_gui) return false; - if (project->suspended_via_gui) return false; - if (state() != RESULT_FILES_DOWNLOADED) return false; - if (coproc_missing) return false; - if (schedule_backoff > gstate.now) return false; - if (avp->needs_network && gstate.network_suspended) return false; - return true; -} - -// whether this task should be included in RR simulation -// Like runnable, except downloading backoff is OK -// Schedule-backoff is not OK; -// we should be able to get GPU jobs from project A -// even if project B has backed-off jobs. -// -bool RESULT::nearly_runnable() { - if (suspended_via_gui) return false; - if (project->suspended_via_gui) return false; - switch (state()) { - case RESULT_FILES_DOWNLOADED: - case RESULT_FILES_DOWNLOADING: - break; - default: - return false; - } - if (coproc_missing) return false; - if (schedule_backoff > gstate.now) return false; - return true; -} - -// Return true if the result is waiting for its files to download, -// and nothing prevents this from happening soon -// -bool RESULT::downloading() { - if (suspended_via_gui) return false; - if (project->suspended_via_gui) return false; - if (state() > RESULT_FILES_DOWNLOADING) return false; - if (some_download_stalled()) return false; - return true; -} - -double RESULT::estimated_runtime_uncorrected() { - return wup->rsc_fpops_est/avp->flops; -} - -// estimate how long a result will take on this host -// -double RESULT::estimated_runtime() { - double x = estimated_runtime_uncorrected(); - if (!project->dont_use_dcf) { - x *= project->duration_correction_factor; - } - return x; -} - -double RESULT::estimated_runtime_remaining() { - if (computing_done()) return 0; - ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this); - if (atp) { -#ifdef SIM - return sim_flops_left/avp->flops; -#else - return atp->est_dur() - atp->elapsed_time; -#endif - } - return estimated_runtime(); -} - // Returns the estimated total elapsed time of this task. // Compute this as a weighted average of estimates based on // 1) the workunit's flops count (static estimate)