Commit 6fa5d3f7 authored by David Anderson's avatar David Anderson

replace wait3(), wait4() with waitpid()

svn path=/trunk/boinc/; revision=7170
parent 42bd780f
......@@ -9893,3 +9893,19 @@ Charlie 3 Aug 2005
mac_build/
boinc.pbproj/
project.pbxproj
David 3 Aug 2005
- replace wait3() and wait4() with waitpid().
wait3 and wait4 are obsolete and might not be
available on future systems.
- changed these obsoleted macros in configure.ac:
- AC_CANONICAL_SYSTEM -> AC_CANONICAL_TARGET
- AC_HELP_STRING -> AS_HELP_STRING
- AM_CONFIG_HEADER -> AC_CONFIG_HEADERS
(from Egon Larsson)
configure.ac
client/
app_control.C
sched/
sched_send.C
......@@ -89,46 +89,6 @@ int ACTIVE_TASK::kill_task() {
cleanup_task();
}
#if !defined(HAVE_WAIT4) && defined(HAVE_WAIT3)
#include <map>
struct proc_info_t {
int status;
rusage r;
proc_info_t() {};
proc_info_t(int s, const rusage &ru);
};
proc_info_t::proc_info_t(int s, const rusage &ru) : status(s), r(ru) {}
pid_t wait4(pid_t pid, int *statusp, int options, struct rusage *rusagep) {
static std::map<pid_t,proc_info_t> proc_info;
pid_t tmp_pid=0;
if (!pid) {
return wait3(statusp,options,rusagep);
} else {
if (proc_info.find(pid) == proc_info.end()) {
do {
tmp_pid=wait3(statusp,options,rusagep);
if ((tmp_pid>0) && (tmp_pid != pid)) {
proc_info[tmp_pid]=proc_info_t(*statusp,*rusagep);
if (!(options && WNOHANG)) {
tmp_pid=0;
}
} else {
return pid;
}
} while (!tmp_pid);
} else {
*statusp=proc_info[pid].status;
*rusagep=proc_info[pid].r;
proc_info.erase(pid);
return pid;
}
}
}
#endif
// We have sent a quit request to the process; see if it's exited.
// This is called when the core client exits,
// or when a project is detached or reset
......@@ -146,11 +106,8 @@ bool ACTIVE_TASK::has_task_exited() {
}
}
#else
int my_pid, stat;
struct rusage rs;
my_pid = wait4(pid, &stat, WNOHANG, &rs);
if (my_pid == pid) {
// We don't use status
if (waitpid(pid, 0, WNOHANG) == pid) {
exited = true;
}
#endif
......@@ -411,11 +368,9 @@ bool ACTIVE_TASK_SET::check_app_exited() {
}
}
#else
int pid;
int stat;
struct rusage rs;
int pid, stat;
if ((pid = wait4(0, &stat, WNOHANG, &rs)) > 0) {
if ((pid = waitpid(0, &stat, WNOHANG)) > 0) {
scope_messages.printf("ACTIVE_TASK_SET::check_app_exited(): process %d is done\n", pid);
atp = lookup_pid(pid);
if (!atp) {
......
......@@ -12,19 +12,19 @@ dnl Set the BOINC version here. You can also use the set-version script.
AC_INIT(BOINC, 4.72)
AC_ARG_ENABLE(debug,
AC_HELP_STRING([--enable-debug],
AS_HELP_STRING([--enable-debug],
[enable tracing and debugging flags for all components]),
[enable_debug=yes],
[])
AC_ARG_ENABLE(server,
AC_HELP_STRING([--disable-server],
AS_HELP_STRING([--disable-server],
[disable building the scheduling server]),
[],
[enable_server=yes])
AC_ARG_ENABLE(client,
AC_HELP_STRING([--disable-client],
AS_HELP_STRING([--disable-client],
[disable building the client]),
[],
[enable_client=yes])
......@@ -32,7 +32,7 @@ AC_ARG_ENABLE(client,
dnl ======================================================================
dnl some vodoo required for building portable client-binary (client, clientgui)
dnl ======================================================================
AC_ARG_ENABLE([client-release], AC_HELP_STRING([--enable-client-release],
AC_ARG_ENABLE([client-release], AS_HELP_STRING([--enable-client-release],
[Try building a portable "release-candidate" (currently implemented for Linux and Solaris only): \
this links libstd++ statically. You will probably need gcc-3.0 for
this to produce a portable client-binary.
......@@ -44,7 +44,7 @@ AC_ARG_ENABLE([client-release], AC_HELP_STRING([--enable-client-release],
disable_static_client=yes])
m4_divert_once([HELP_ENABLE],
AC_HELP_STRING([], [Default: --enable-server --enable-client:
AS_HELP_STRING([], [Default: --enable-server --enable-client:
builds both server and client]))
if test "${enable_server}" = yes ; then
......@@ -70,7 +70,7 @@ fi
echo "--- Configuring BOINC AC_PACKAGE_VERSION (${build_state}) ---"
echo "--- Build Components: (${configured_to_build}) ---"
AC_CANONICAL_SYSTEM
AC_CANONICAL_TARGET
dnl generate .tar.gz, .tar.bz2, .zip
dnl AM_INIT_AUTOMAKE(dist-bzip2 dist-zip)
......@@ -351,8 +351,7 @@ BOINC_GETSOCKOPT_TYPE
dnl Checks for library functions.
AC_PROG_GCC_TRADITIONAL
AC_FUNC_VPRINTF
AC_FUNC_WAIT3
AC_CHECK_FUNCS(lockf flock setpriority wait4 strlcpy strlcat sigaction getutent setutent)
AC_CHECK_FUNCS(lockf flock setpriority strlcpy strlcat sigaction getutent setutent)
dnl Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
......@@ -553,7 +552,7 @@ AC_CONFIG_FILES([
])
dnl AC_CONFIG_HEADER([config.h])
AM_CONFIG_HEADER([config.h])
AC_CONFIG_HEADERS([config.h])
AC_OUTPUT
......
......@@ -9,7 +9,7 @@ array("July 25, 2005",
and some of the projects using it."
),
array("July 24, 2005",
"<a href=http://www.fatbat.dk/thesis/>A paper by Jakob Pedersen & Christian S&oslash;ttrup</a>
"<a href=http://www.fatbat.dk/thesis/>A paper by Jakob Pedersen and Christian S&oslash;ttrup</a>
discusses the interoperation of the Grid and BOINC."
),
......
......@@ -58,6 +58,7 @@ show_name("James Drews");
show_name("Charlie Fenton");
show_name("John Flynn III");
show_name("Michael Gary");
show_name("Marco Gazzoni");
show_name("Gary Gibson");
show_name("Walt Gribben");
show_name("Jim Harris");
......@@ -72,6 +73,7 @@ show_name("John Kirby");
show_name("Eric Korpela");
show_name("Janus Kristensen");
show_name("Tim Lan");
show_name("Egon Larsson");
show_name("Gilson Laurent");
show_name("Bernd Machenschalk");
show_name("Sebastian Masch");
......
......@@ -3,14 +3,14 @@ require_once("docutil.php");
page_head("Database purging utility");
echo "
As a BOINC project operates, the size of its
workunit result tables increases,
and eventually they become inconveniently large
(for example, adding a field or building an index may take hours or days).
workunit and result tables increases.
Eventually they become so large
that adding a field or building an index may take hours or days.
<p>
To address this problem, BOINC provides a utility <b>db_purge</b>
that 'purges' result and WU records by writing them
to XML-format archive files, then deleting them from the database.
that writes result and WU records to XML-format archive files,
then deletes them from the database.
<p>
Workunits are purged only when their input files have been deleted.
......@@ -18,14 +18,6 @@ Because of BOINC's file-deletion policy,
this implies that all results are completed.
So when a workunit is purged, all its results are purged too.
<p>
The archive files have names of the form
wu_archive_TIME and result_archive_TIME
where TIME is the Unix time the file was created.
In addition, db_purge generates index files
'wu_index' and 'result_index'
associating each WU and result ID with the timestamp of its archive file.
<p>
Run db_purge from the project's bin/ directory.
It will create an archive/ directory and store archive files there.
......@@ -52,6 +44,90 @@ list_item("-d N", "Set logging verbosity to N (1,2,3)");
list_end();
echo "
<h3>Archive file format</h3>
<p>
The archive files have names of the form
wu_archive_TIME and result_archive_TIME
where TIME is the Unix time the file was created.
In addition, db_purge generates index files
'wu_index' and 'result_index'
associating each WU and result ID with the timestamp of its archive file.
<p>
The format of both type of index files is a number of rows each containing:
<pre>
ID TIME
</pre>
The ID field of the WU or result, 5 spaces,
and the timestamp part of the archive filename where the record with that
ID can be found.
<p>
The format of a record in the result archive file is:
".html_text("
<result_archive>
<id>%d</id>
<create_time>%d</create_time>
<workunitid>%d</workunitid>
<server_state>%d</server_state>
<outcome>%d</outcome>
<client_state>%d</client_state>
<hostid>%d</hostid>
<userid>%d</userid>
<report_deadline>%d</report_deadline>
<sent_time>%d</sent_time>
<received_time>%d</received_time>
<name>%s</name>
<cpu_time>%.15e</cpu_time>
<xml_doc_in>%s</xml_doc_in>
<xml_doc_out>%s</xml_doc_out>
<stderr_out>%s</stderr_out>
<batch>%d</batch>
<file_delete_state>%d</file_delete_state>
<validate_state>%d</validate_state>
<claimed_credit>%.15e</claimed_credit>
<granted_credit>%.15e</granted_credit>
<opaque>%f</opaque>
<random>%d</random>
<app_version_num>%d</app_version_num>
<appid>%d</appid>
<exit_status>%d</exit_status>
<teamid>%d</teamid>
<priority>%d</priority>
<mod_time>%s</mod_time>
</result_archive>
")."
The format of a record in the WU archive file is:
".html_text("
<workunit_archive>
<id>%d</id>
<create_time>%d</create_time>
<appid>%d</appid>
<name>%s</name>
<xml_doc>%s</xml_doc>
<batch>%d</batch>
<rsc_fpops_est>%.15e</rsc_fpops_est>
<rsc_fpops_bound>%.15e</rsc_fpops_bound>
<rsc_memory_bound>%.15e</rsc_memory_bound>
<rsc_disk_bound>%.15e</rsc_disk_bound>
<need_validate>%d</need_validate>
<canonical_resultid>%d</canonical_resultid>
<canonical_credit>%.15e</canonical_credit>
<transition_time>%d</transition_time>
<delay_bound>%d</delay_bound>
<error_mask>%d</error_mask>
<file_delete_state>%d</file_delete_state>
<assimilate_state>%d</assimilate_state>
<hr_class>%d</hr_class>
<opaque>%f</opaque>
<min_quorum>%d</min_quorum>
<target_nresults>%d</target_nresults>
<max_error_results>%d</max_error_results>
<max_total_results>%d</max_total_results>
<max_success_results>%d</max_success_results>
<result_template_file>%s</result_template_file>
<priority>%d</priority>
<mod_time>%s</mod_time>
</workunit_archive>
")."
";
page_tail();
?>
......@@ -624,17 +624,25 @@ void SCHEDULER_REPLY::got_bad_result() {
}
}
// returns zero if result still feasible. result may hve been
// given a new report time. Returns nonzero if result is no
// longer feasible (not enough time to compute it on host). In
// this case result is unchanged.
int possibly_give_result_new_deadline(DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REPLY& reply) {
// returns zero if result still feasible.
// result may hve been given a new report time.
// Returns nonzero if result is no longer feasible
// (not enough time to compute it on host).
// In this case result is unchanged.
//
int possibly_give_result_new_deadline(
DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REPLY& reply
) {
const double resend_frac = 0.5; // range [0, 1)
int result_sent_time = time(0);
int result_report_deadline = result_sent_time + (int)(resend_frac*(result.report_deadline - result.sent_time));
if (result_report_deadline < result.report_deadline) result_report_deadline = result.report_deadline;
if (result_report_deadline > result_sent_time + wu.delay_bound) result_report_deadline = result_sent_time + wu.delay_bound;
if (result_report_deadline < result.report_deadline) {
result_report_deadline = result.report_deadline;
}
if (result_report_deadline > result_sent_time + wu.delay_bound) {
result_report_deadline = result_sent_time + wu.delay_bound;
}
// If infeasible, return without modifying result
//
......@@ -651,9 +659,9 @@ int possibly_give_result_new_deadline(DB_RESULT& result, WORKUNIT& wu, SCHEDULER
//
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[RESULT#%d] [HOST#%d] %s report_deadline (resend lost work)\n",
"[RESULT#%d] [HOST#%d] %s report_deadline (resend lost work)\n",
result.id, reply.host.id,
result_report_deadline==result.report_deadline?"NO update to":"Updated"
result_report_deadline==result.report_deadline?"NO update to":"Updated"
);
result.sent_time = result_sent_time;
result.report_deadline = result_report_deadline;
......@@ -671,9 +679,9 @@ int add_result_to_reply(
retval = add_wu_to_reply(wu, reply, platform, app, avp);
if (retval) return retval;
// in the scheduling locality case, reduce the available space by
// LESS than the workunit rsc_disk_bound, IF the host already has
// the file OR the file was not already sent.
// in the scheduling locality case,
// reduce the available space by LESS than the workunit rsc_disk_bound,
// IF the host already has the file OR the file was not already sent.
//
if (!config.locality_scheduling ||
decrement_disk_space_locality(wu, request, reply)
......@@ -692,20 +700,20 @@ int add_result_to_reply(
//
result.report_deadline = result.sent_time + wu.delay_bound;
result.server_state = RESULT_SERVER_STATE_IN_PROGRESS;
}
else {
// Result was ALREADY sent to this host but never arrived. So
// we are resending it. result.report_deadline and time_sent
} else {
// Result was ALREADY sent to this host but never arrived.
// So we are resending it.
// result.report_deadline and time_sent
// have already been updated before this function was called.
//
if (result.report_deadline < result.sent_time) {
result.report_deadline = result.sent_time + 10;
}
result.report_deadline = result.sent_time + 10;
}
if (result.report_deadline > result.sent_time + wu.delay_bound) {
result.report_deadline = result.sent_time + wu.delay_bound;
}
result.report_deadline = result.sent_time + wu.delay_bound;
}
log_messages.printf(
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[RESULT#%d] [HOST#%d] (resend lost work)\n",
result.id, reply.host.id
......@@ -1172,18 +1180,19 @@ bool resend_lost_work(
continue;
}
// If time is too close to the deadline, or we already
// have a canonical result, or WU error flag is set, then
// don't bother to resend this result. Instead make it
// time out right away so that the transitioner. does
// 'the right thing'.
// If time is too close to the deadline,
// or we already have a canonical result,
// or WU error flag is set,
// then don't bother to resend this result.
// Instead make it time out right away
// so that the transitioner does 'the right thing'.
//
char warning_msg[256];
if (
wu.error_mask ||
wu.canonical_resultid ||
possibly_give_result_new_deadline(result, wu, reply)
) {
) {
result.report_deadline = time(0);
retval = result.update_subset();
if (retval) {
......@@ -1221,7 +1230,6 @@ bool resend_lost_work(
reply.host.id, result.id
);
continue;
}
sprintf(warning_msg, "Resent lost result %s", result.name);
USER_MESSAGE um(warning_msg, "high");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment