Commit f325f0b7 authored by David Anderson's avatar David Anderson

resource description

svn path=/trunk/boinc/; revision=2243
parent 8f7b567a
......@@ -6045,3 +6045,31 @@ Korpela 2003/09/03
api/gutil.C
configure.ac
David Sept 3 2003
- Changed the way a WU's resources usage is described.
There are now separate estimates and bounds for FP ops.
Nothing about integer ops.
Disk and memory numbers are bounds.
This changes the client/server RPC format and the DB format,
so it will require a major version number increment
client/
app.C
client_state.C
client_types.C,h
http.C
net_xfer.C
db/
boinc_db.C,h
schema.sql
lib/
crypt.C,h
sched/
handle_request.C
test/
test_rsc.py
testbase.py
tools/
create_work.C
......@@ -124,8 +124,8 @@ int ACTIVE_TASK::init(RESULT* rp) {
wup = rp->wup;
app_version = wup->avp;
max_cpu_time = gstate.estimate_cpu_time(*rp->wup)*2;
max_disk_usage = rp->wup->rsc_disk;
max_mem_usage = rp->wup->rsc_memory;
max_disk_usage = rp->wup->rsc_disk_bound;
max_mem_usage = rp->wup->rsc_memory_bound;
return 0;
}
......
......@@ -225,7 +225,7 @@ int CLIENT_STATE::init() {
if (show_projects) {
printf("projects:\n");
for (i=0; i<projects.size(); i++) {
printf("URL: %s name: %s\n",
msg_printf(NULL, MSG_INFO, "URL: %s name: %s\n",
projects[i]->master_url, projects[i]->project_name
);
}
......@@ -236,8 +236,9 @@ int CLIENT_STATE::init() {
PROJECT* project = lookup_project(detach_project_url);
if (project) {
detach_project(project);
msg_printf(project, MSG_INFO, "detached from %s\n", detach_project_url);
} else {
printf("project %s not found\n", detach_project_url);
msg_printf(NULL, MSG_ERROR, "project %s not found\n", detach_project_url);
}
exit(0);
}
......@@ -246,9 +247,9 @@ int CLIENT_STATE::init() {
PROJECT* project = lookup_project(reset_project_url);
if (project) {
reset_project(project);
msg_printf(project, MSG_INFO, "Project has been reset");
msg_printf(project, MSG_INFO, "Project %s has been reset", reset_project_url);
} else {
printf("project %s not found\n", reset_project_url);
msg_printf(NULL, MSG_ERROR, "project %s not found\n", reset_project_url);
}
exit(0);
}
......@@ -258,7 +259,7 @@ int CLIENT_STATE::init() {
if (project) {
project->sched_rpc_pending = true;
} else {
printf("project %s not found\n", update_prefs_url);
msg_printf(NULL, MSG_ERROR, "project %s not found\n", update_prefs_url);
}
}
......@@ -520,27 +521,24 @@ int CLIENT_STATE::current_disk_usage(double& size) {
double CLIENT_STATE::estimate_cpu_time(WORKUNIT& wu) {
double x;
x = wu.rsc_fpops/host_info.p_fpops;
x += wu.rsc_iops/host_info.p_iops;
x = wu.rsc_fpops_est/host_info.p_fpops;
return x;
}
inline double force_fraction(double f)
{
inline double force_fraction(double f) {
if (f < 0) return 0;
if (f > 1) return 1;
return f;
}
double CLIENT_STATE::get_percent_done(RESULT* result)
{
double CLIENT_STATE::get_percent_done(RESULT* result) {
ACTIVE_TASK* atp = active_tasks.lookup_result(result);
return atp ? force_fraction(atp->fraction_done) : 0.0;
}
// returns true if start_hour == end_hour or start_hour <= now < end_hour
inline bool now_between_two_hours(int start_hour, int end_hour)
{
//
inline bool now_between_two_hours(int start_hour, int end_hour) {
if (start_hour == end_hour) {
// always work
return true;
......
......@@ -672,10 +672,10 @@ int WORKUNIT::parse(FILE* in) {
project = NULL;
// Default these to very large values (1 week on a 1 cobblestone machine)
// so we don't keep asking the server for more work
rsc_fpops = 1e9*SECONDS_PER_DAY*7;
rsc_iops = 1e9*SECONDS_PER_DAY*7;
rsc_memory = 4e9*SECONDS_PER_DAY*7;
rsc_disk = 1024*1024*1024; // 1 GB
rsc_fpops_est = 1e9*SECONDS_PER_DAY*7;
rsc_fpops_bound = 4e9*SECONDS_PER_DAY*7;
rsc_memory_bound = 1e8;
rsc_disk_bound = 1e9;
while (fgets(buf, 256, in)) {
if (match_tag(buf, "</workunit>")) return 0;
else if (parse_str(buf, "<name>", name, sizeof(name))) continue;
......@@ -683,10 +683,10 @@ int WORKUNIT::parse(FILE* in) {
else if (parse_int(buf, "<version_num>", version_num)) continue;
else if (parse_str(buf, "<command_line>", command_line, sizeof(command_line))) continue;
else if (parse_str(buf, "<env_vars>", env_vars, sizeof(env_vars))) continue;
else if (parse_double(buf, "<rsc_fpops>", rsc_fpops)) continue;
else if (parse_double(buf, "<rsc_iops>", rsc_iops)) continue;
else if (parse_double(buf, "<rsc_memory>", rsc_memory)) continue;
else if (parse_double(buf, "<rsc_disk>", rsc_disk)) continue;
else if (parse_double(buf, "<rsc_fpops_est>", rsc_fpops_est)) continue;
else if (parse_double(buf, "<rsc_fpops_bound>", rsc_fpops_bound)) continue;
else if (parse_double(buf, "<rsc_memory_bound>", rsc_memory_bound)) continue;
else if (parse_double(buf, "<rsc_disk_bound>", rsc_disk_bound)) continue;
else if (match_tag(buf, "<file_ref>")) {
file_ref.parse(in);
input_files.push_back(file_ref);
......@@ -707,19 +707,19 @@ int WORKUNIT::write(FILE* out) {
" <version_num>%d</version_num>\n"
" <command_line>%s</command_line>\n"
" <env_vars>%s</env_vars>\n"
" <rsc_fpops>%f</rsc_fpops>\n"
" <rsc_iops>%f</rsc_iops>\n"
" <rsc_memory>%f</rsc_memory>\n"
" <rsc_disk>%f</rsc_disk>\n",
" <rsc_fpops_est>%f</rsc_fpops_est>\n"
" <rsc_fpops_bound>%f</rsc_fpops_bound>\n"
" <rsc_memory_bound>%f</rsc_memory_bound>\n"
" <rsc_disk_bound>%f</rsc_disk_bound>\n",
name,
app_name,
version_num,
command_line,
env_vars,
rsc_fpops,
rsc_iops,
rsc_memory,
rsc_disk
rsc_fpops_est,
rsc_fpops_bound,
rsc_memory_bound,
rsc_disk_bound
);
for (i=0; i<input_files.size(); i++) {
input_files[i].write(out);
......
......@@ -212,10 +212,10 @@ struct WORKUNIT {
APP* app;
APP_VERSION* avp;
int ref_cnt;
double rsc_fpops;
double rsc_iops;
double rsc_memory;
double rsc_disk;
double rsc_fpops_est;
double rsc_fpops_bound;
double rsc_memory_bound;
double rsc_disk_bound;
int parse(FILE*);
int write(FILE*);
......
......@@ -501,9 +501,12 @@ bool HTTP_OP_SET::poll() {
}
// Open connection to the redirected server
// TODO: handle return value here
//
htp->open_server();
retval = htp->open_server();
if (retval) {
htp->http_op_state = HTTP_STATE_DONE;
htp->http_op_retval = retval;
}
break;
}
if ((htp->hrh.status/100)*100 != HTTP_STATUS_OK) {
......
......@@ -73,7 +73,7 @@ typedef int socklen_t;
typedef size_t socklen_t;
#endif
int NET_XFER::get_ip_addr( char *hostname, int &ip_addr ) {
int NET_XFER::get_ip_addr(char *hostname, int &ip_addr) {
hostent* hep;
#ifdef _WIN32
......@@ -121,7 +121,7 @@ int NET_XFER::get_ip_addr( char *hostname, int &ip_addr ) {
switch (h_errno) {
case HOST_NOT_FOUND:
sprintf(msg+n, "(authoritative answer not found)");
sprintf(msg+n, "(host not found)");
break;
case NO_DATA:
sprintf(msg+n, "(valid name, no data record of requested type)");
......@@ -130,7 +130,7 @@ int NET_XFER::get_ip_addr( char *hostname, int &ip_addr ) {
sprintf(msg+n, "(a nonrecoverable error occurred)");
break;
case TRY_AGAIN:
sprintf(msg+n, "(nonauthoritative host not found, or server failure)");
sprintf(msg+n, "(host not found or server failure)");
break;
}
......
......@@ -414,7 +414,8 @@ void DB_WORKUNIT::db_print(char* buf){
sprintf(buf,
"id=%d, create_time=%d, appid=%d, "
"name='%s', xml_doc='%s', batch=%d, "
"rsc_fpops=%.15e, rsc_iops=%.15e, rsc_memory=%.15e, rsc_disk=%.15e, "
"rsc_fpops_est=%.15e, rsc_fpops_bound=%.15e, "
"rsc_memory_bound=%.15e, rsc_disk_bound=%.15e, "
"need_validate=%d, "
"canonical_resultid=%d, canonical_credit=%.15e, "
"transition_time=%d, delay_bound=%d, "
......@@ -425,7 +426,7 @@ void DB_WORKUNIT::db_print(char* buf){
"result_template='%s'",
id, create_time, appid,
name, xml_doc, batch,
rsc_fpops, rsc_iops, rsc_memory, rsc_disk,
rsc_fpops_est, rsc_fpops_bound, rsc_memory_bound, rsc_disk_bound,
need_validate,
canonical_resultid, canonical_credit,
transition_time, delay_bound,
......@@ -449,10 +450,10 @@ void DB_WORKUNIT::db_parse(MYSQL_ROW &r) {
strcpy2(name, r[i++]);
strcpy2(xml_doc, r[i++]);
batch = atoi(r[i++]);
rsc_fpops = atof(r[i++]);
rsc_iops = atof(r[i++]);
rsc_memory = atof(r[i++]);
rsc_disk = atof(r[i++]);
rsc_fpops_est = atof(r[i++]);
rsc_fpops_bound = atof(r[i++]);
rsc_memory_bound = atof(r[i++]);
rsc_disk_bound = atof(r[i++]);
need_validate = atoi(r[i++]);
canonical_resultid = atoi(r[i++]);
canonical_credit = atof(r[i++]);
......
......@@ -265,24 +265,19 @@ struct WORKUNIT {
char name[256];
char xml_doc[MAX_BLOB_SIZE];
int batch;
double rsc_fpops; // estimated # of FP operations
double rsc_iops; // estimated # of integer operations
// The above two items are used for 2 purposes:
// 1) to estimate how long a result will take on a host
// for scheduling purposes;
// 2) to calculate an upper bound on the CPU time for a result
// before it is aborted.
// Currently this is twice the estimated CPU time.
// At some point we might want to have separate "max rsc" fields
double rsc_memory; // estimated size of RAM working set (bytes)
double rsc_fpops_est; // estimated # of FP operations
// used to estimate how long a result will take on a host
double rsc_fpops_bound; // upper bound on # of FP ops
// used to calculate an upper bound on the CPU time for a result
// before it is aborted.
double rsc_memory_bound; // upper bound on RAM working set (bytes)
// currently used only by scheduler to screen hosts
// At some point, could use as runtime limit
double rsc_disk; // estimated amount of disk needed (bytes)
double rsc_disk_bound; // upper bound on amount of disk needed (bytes)
// (including input, output and temp files, but NOT the app)
// This is used for 2 purposes:
// used for 2 purposes:
// 1) for scheduling (don't send this WU to a host w/ insuff. disk)
// 2) upper bound (abort task if it uses more than this disk)
// At some point we might want to have separate "max" fields
// 2) abort task if it uses more than this disk
bool need_validate; // this WU has at least 1 result in
// validate state = NEED_CHECK
int canonical_resultid; // ID of canonical result, or zero
......
......@@ -150,10 +150,10 @@ create table workunit (
name varchar(254) not null,
xml_doc blob,
batch integer not null,
rsc_fpops double not null,
rsc_iops double not null,
rsc_memory double not null,
rsc_disk double not null,
rsc_fpops_est double not null,
rsc_fpops_bound double not null,
rsc_memory_bound double not null,
rsc_disk_bound double not null,
need_validate smallint not null,
canonical_resultid integer not null,
canonical_credit double not null,
......
......@@ -50,7 +50,6 @@ struct DATA_BLOCK {
int print_hex_data(FILE* f, DATA_BLOCK&);
int sprint_hex_data(char* p, DATA_BLOCK&);
int scan_hex_data(FILE* f, DATA_BLOCK&);
int sscan_hex_data(char* p, DATA_BLOCK&);
int print_key_hex(FILE*, KEY* key, int len);
int scan_key_hex(FILE*, KEY* key, int len);
int sscan_key_hex(char*, KEY* key, int len);
......
......@@ -54,10 +54,8 @@ const double HOST_ACTIVE_FRAC_MIN = 0.5;
//
inline double estimate_duration(WORKUNIT& wu, HOST& host) {
if (host.p_fpops <= 0) host.p_fpops = 1e9;
if (host.p_iops <= 0) host.p_iops = 1e9;
if (wu.rsc_fpops <= 0) wu.rsc_fpops = 1e12;
if (wu.rsc_iops <= 0) wu.rsc_iops = 1e12;
return wu.rsc_fpops/host.p_fpops + wu.rsc_iops/host.p_iops;
if (wu.rsc_fpops_est <= 0) wu.rsc_fpops_est = 1e12;
return wu.rsc_fpops_est/host.p_fpops;
}
// estimate the amount of real time for this WU based on active_frac and
......@@ -74,17 +72,17 @@ inline double estimate_wallclock_duration(WORKUNIT& wu, HOST& host) {
// return true if the WU can be executed on the host
//
bool wu_is_feasible(WORKUNIT& wu, HOST& host) {
if(host.d_free && wu.rsc_disk > host.d_free) {
if(host.d_free && wu.rsc_disk_bound > host.d_free) {
log_messages.printf(
SchedMessages::DEBUG, "[WU#%d %s] needs %f disk; [HOST#%d] has %f\n",
wu.id, wu.name, wu.rsc_disk, host.id, host.d_free
wu.id, wu.name, wu.rsc_disk_bound, host.id, host.d_free
);
return false;
}
if (host.m_nbytes && wu.rsc_memory > host.m_nbytes) {
if (host.m_nbytes && wu.rsc_memory_bound > host.m_nbytes) {
log_messages.printf(
SchedMessages::DEBUG, "[WU#%d %s] needs %f mem; [HOST#%d] has %f\n",
wu.id, wu.name, wu.rsc_memory, host.id, host.m_nbytes
wu.id, wu.name, wu.rsc_memory_bound, host.id, host.m_nbytes
);
return false;
}
......@@ -132,16 +130,16 @@ int insert_wu_tags(WORKUNIT& wu, APP& app) {
char buf[256];
sprintf(buf,
" <rsc_fpops>%f</rsc_fpops>\n"
" <rsc_iops>%f</rsc_iops>\n"
" <rsc_memory>%f</rsc_memory>\n"
" <rsc_disk>%f</rsc_disk>\n"
" <rsc_fpops_est>%f</rsc_fpops_est>\n"
" <rsc_fpops_bound>%f</rsc_fpops_bound>\n"
" <rsc_memory_bound>%f</rsc_memory_bound>\n"
" <rsc_disk_bound>%f</rsc_disk_bound>\n"
" <name>%s</name>\n"
" <app_name>%s</app_name>\n",
wu.rsc_fpops,
wu.rsc_iops,
wu.rsc_memory,
wu.rsc_disk,
wu.rsc_fpops_est,
wu.rsc_fpops_bound,
wu.rsc_memory_bound,
wu.rsc_disk_bound,
wu.name,
app.name
);
......
......@@ -9,7 +9,7 @@ from test_uc import *
class WorkTooBig(WorkUC):
def __init__(self):
WorkUC.__init__(self)
self.rsc_disk = 1000000000000 # 1 TB
self.rsc_disk_bound = 1000000000000 # 1 TB
class ResultUnsent:
def __init__(self):
......
......@@ -524,10 +524,10 @@ class Host:
class Work:
def __init__(self, redundancy, **kwargs):
self.input_files = []
self.rsc_iops = 1.8e12
self.rsc_fpops = 1e13
self.rsc_memory = 1e7
self.rsc_disk = 1e7
self.rsc_fpops_est = 1e13
self.rsc_fpops_bound = 4e13
self.rsc_memory_bound = 1e7
self.rsc_disk_bound = 1e7
self.delay_bound = 86400
if not isinstance(redundancy, int):
raise TypeError
......@@ -576,9 +576,10 @@ class Work:
download_url = project.download_url,
keyfile = os.path.join(project.key_dir,'upload_private'),
appname = self.app.name,
rsc_iops = self.rsc_iops,
rsc_fpops = self.rsc_fpops,
rsc_disk = self.rsc_disk,
rsc_fpops_est = self.rsc_fpops_est,
rsc_fpops_bound = self.rsc_fpops_bound,
rsc_disk_bound = self.rsc_disk_bound,
rsc_memory_bound = self.rsc_memory_bound,
wu_template = self.wu_template,
result_template = self.result_template,
min_quorum = self.min_quorum,
......
......@@ -28,10 +28,10 @@
// [ -download_url x ]
// [ -download_dir x ]
// [ -keyfile path ]
// -rsc_fpops n
// -rsc_iops n
// -rsc_memory n
// -rsc_disk n
// -rsc_fpops_est n
// -rsc_fpops_bound n
// -rsc_memory_bound n
// -rsc_disk_bound n
// -delay_bound x
// [ -min_quorum x ]
// [ -target_nresults x ]
......@@ -119,14 +119,14 @@ int main(int argc, char** argv) {
strcpy(wu_template_file, argv[++i]);
} else if (!strcmp(argv[i], "-result_template")) {
strcpy(result_template_file, argv[++i]);
} else if (!strcmp(argv[i], "-rsc_fpops")) {
wu.rsc_fpops = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_iops")) {
wu.rsc_iops = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_memory")) {
wu.rsc_memory = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_disk")) {
wu.rsc_disk = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_fpops_est")) {
wu.rsc_fpops_est = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_fpops_bound")) {
wu.rsc_fpops_bound = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_memory_bound")) {
wu.rsc_memory_bound = atof(argv[++i]);
} else if (!strcmp(argv[i], "-rsc_disk_bound")) {
wu.rsc_disk_bound = atof(argv[++i]);
} else if (!strcmp(argv[i], "-keyfile")) {
strcpy(keyfile, argv[++i]);
} else if (!strcmp(argv[i], "-delay_bound")) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment