diff --git a/sm5/CHANGELOG b/sm5/CHANGELOG index 4269a8bb28eeecbf7bf6a46731031e063f567dcb..edc7ec46b9c2ffac3c2219abf31c96fe5685270d 100644 --- a/sm5/CHANGELOG +++ b/sm5/CHANGELOG @@ -1,6 +1,6 @@ CHANGELOG for smartmontools -$Id: CHANGELOG,v 1.50 2002/11/17 05:57:32 ballen4705 Exp $ +$Id: CHANGELOG,v 1.51 2002/11/21 14:11:18 ballen4705 Exp $ Copyright (C) 2002 Bruce Allen <smartmontools-support@lists.sourceforge.net> @@ -24,10 +24,30 @@ California, Santa Cruz. http://ssrc.soe.ucsc.edu/ NOTES FOR FUTURE RELEASES: see TODO file. CURRENT RELEASE (see VERSION file in this directory): + + Allen: smartd: added Directive -m for sending test email and + for modifying email reminder behavior. Updated manual, and sample + configuration file to illustrate & explain this. + + Allen: smartd: increased size of a continued smartd.conf line to + 1023 characters. + + Allen: Simplified Directive parsers and improved warning/error + messages. + smartmontools-5.0.45 Fixed bug in smartd where testunitready logic inverted prevented functioning on scsi devices. + The bug in question only affects smartd users with scsi devices. + To see if your version of smartd has the testunitready() bug, do + smartd -V + If the version of the module smartd.c in a line like: + Module: smartd.c revision: 1.66 date: 2002/11/17 + has a revision greater than or equal to 1.30, and less than or equal to + 1.64, then your version of the code has this problem. + This problem affected releases starting with RELEASE_5_0_16 up to and + including RELEASE_5_0_43. Added testunitnotready to smartctl for symmetry with smartd. diff --git a/sm5/VERSION b/sm5/VERSION index ea90ee31980757b2e469741512bcb39e73494e78..9e5feb5256930f3cae636754eef8a244ede164eb 100644 --- a/sm5/VERSION +++ b/sm5/VERSION @@ -1 +1 @@ -45 +46 diff --git a/sm5/smartd.8 b/sm5/smartd.8 index 7ba71286f342ad715cc8dd2440509abfa7bfbe63..c11fb8e6bfe7625129014e96251c3cb8715a465f 100644 --- a/sm5/smartd.8 +++ b/sm5/smartd.8 @@ -1,6 +1,6 @@ \# Copyright (C) 2002 Bruce Allen <smartmontools-support@lists.sourceforge.net> \# -\# $Id: smartd.8,v 1.30 2002/11/14 06:06:29 ballen4705 Exp $ +\# $Id: smartd.8,v 1.31 2002/11/21 14:11:19 ballen4705 Exp $ \# \# This program is free software; you can redistribute it and/or modify it \# under the terms of the GNU General Public License as published by the Free @@ -16,7 +16,7 @@ \# Research Center), Jack Baskin School of Engineering, University of \# California, Santa Cruz. http://ssrc.soe.ucsc.edu/ \# -.TH SMARTD 8 "$Date: 2002/11/14 06:06:29 $" "smartmontools-5.0" +.TH SMARTD 8 "$Date: 2002/11/21 14:11:19 $" "smartmontools-5.0" .SH NAME smartd \- S.M.A.R.T. Daemon .SH SYNOPSIS @@ -208,10 +208,11 @@ Section below! .B \ \ /dev/hdc -a -I 194 -I 5 -i 12 .B # .nf -.B # SCSI disks: +.B # SCSI disks. Send a TEST warning email to admin on +.B # startup. (Note: the -1 is the argument of -m.) .B # .B \ \ /dev/sda -.B \ \ /dev/sdc -M admin@yoyodyne.com +.B \ \ /dev/sdc -M admin@yoyodyne.com -m -1 .B # .nf .B # Strange device. It's SCSI: @@ -255,7 +256,8 @@ name. .B For ATA disks, if .B no Directives appear, the disk will not be monitored. -The '\-a' Directive will try to monitor everything possible. +Conversely, the '\-a' Directive will try to monitor everything +possible. .B If a SCSI disk is listed, it will be monitored at the only implemented level: roughly equivalent @@ -339,13 +341,17 @@ life period." command-line option.] .TP .B \-M <ADD> -Mail: Send a warning email to the email address <ADD> if the '\-c', '\-l', '\-L', or '\-f' -Directives detect a failure or a new error. This Directive only -works in conjunction with these other Directives (or with the equivalent '\-a' -Directive). To prevent your email in-box from getting filled up -with warning messages, only a single warning will be sent for each of -the enabled test types, '\-c', '\-l', '\-L', or '\-f', even if more -than one failure or error is detected. +Mail: Send a warning email to the email address <ADD> if +the '\-c', '\-l', '\-L', or '\-f' +Directives detect a failure or a new error. +This Directive only works in conjunction with these other Directives +(or with the equivalent '\-a' Directive). To prevent your email +in-box from getting filled up with warning messages, by default only a +single warning will be sent for each of the enabled test +types, '\-c', '\-l', '\-L', or '\-f', +even if more than one failure or error is +detected or if the failure or error persists. [This behavior can be +modified; see the '\-m' Directive below.] The email is sent using the system .B mail @@ -356,11 +362,50 @@ find the mail command (normally /bin/mail) the command must be in the path of the shell or environment from which .B smartd -was started. +was started. To test that the mail is being sent correctly, or to +alter the pattern of when mail is sent, see the `\-m' Directive below. -If you want to send email to more than one user, you can use the form +To send email to more than one user, please use the following form for the address +<ADD>: .B user1@add1,user2@add2,...,userN@addN -with no spaces for <ADD>. +(with no spaces). +.TP +.B \-m <N> +Modify Mail: Modifies the behavior of the '\-M' email Directive above, +and has no effect without this other Directive. This option controls +when, and how often, the '\-M' Directive sends email warning messages. + +This Directive takes a decimal integer argument <N> in the range from +-3 to 3 inclusive. +.nf +.B If <N>=0 or 1: +.fi +equivalent to not having this '\-m' Directive present at all. Only +one warning email is sent for each type of disk problem detected. +.nf +.B If <N>=2: +.fi +send additional warning reminder emails, once per day, for each type +of disk problem detected. +.nf +.B If <N>=3: +.fi +send additional warning reminder emails, after a one-day interval, +then a two-day interval, then a four-day interval, and so on for each +type of disk problem detected. Each interval is twice as long as the +previous interval. +.nf +.B If <N>=-1, -2, or -3: +.fi +Negative values of <N> have the same meaning as the corresponding +positive value, but also send a single additional "test email" +immediately upon +.B smartd +startup. This allows one to verify that any email is correctly +delivered. For example, setting <N>=-2 will send a test email on +.B smartd +startup, and additional warning reminder emails at one-day intervals +after any disk problems are detected. .TP .B \-p Prefail: Report anytime that a Prefail Attribute has changed @@ -548,4 +593,4 @@ Please let us know if there is an on\-line source for this document. .SH CVS ID OF THIS PAGE: -$Id: smartd.8,v 1.30 2002/11/14 06:06:29 ballen4705 Exp $ +$Id: smartd.8,v 1.31 2002/11/21 14:11:19 ballen4705 Exp $ diff --git a/sm5/smartd.c b/sm5/smartd.c index c709e357913ad521055b259e24ca6faa68209d7a..14b3e885c7bd5f468c2ec37050418fd4a7f12b51 100644 --- a/sm5/smartd.c +++ b/sm5/smartd.c @@ -36,6 +36,7 @@ #include <errno.h> #include <string.h> #include <time.h> +#include <limits.h> #include "atacmds.h" #include "scsicmds.h" #include "smartd.h" @@ -45,7 +46,7 @@ // CVS ID strings extern const char *CVSid1, *CVSid2; -const char *CVSid6="$Id: smartd.c,v 1.66 2002/11/17 05:30:11 ballen4705 Exp $" +const char *CVSid6="$Id: smartd.c,v 1.67 2002/11/21 14:11:19 ballen4705 Exp $" CVSID1 CVSID2 CVSID3 CVSID4 CVSID7; // global variable used for control of printing, passing arguments, etc. @@ -89,20 +90,49 @@ void printout(int priority,char *fmt, ...){ // If address is null, this just prints a warning message. But if // address is non-null then send and log a warning email. -void printandmail(char *address, mailinfo *mail, int priority, char *fmt, ...){ - char command[2048], message[256], hostname[256]; +void printandmail(cfgfile *cfg, int which, int priority, char *fmt, ...){ + char command[2048], message[256], hostname[256], additional[256], original[256], further[256]; int status; + time_t epoch; va_list ap; + const int day=24*3600; + int days=0; + char *address=cfg->address; + mailinfo *mail=cfg->maildata+which; - // See if user wants us to send mail, or if we already have - if (!address || mail->logged) + // See if user wants us to send mail + if (!address) return; - // record the time of the mail message, and increment counter. This - // is for later use if we decide to implement multiple email warning - // messages after some delay time. - mail->logged++; - mail->lastsent=time(NULL); + // check for sanity + if (cfg->emailopt<0 || cfg->emailopt>3){ + printout(LOG_INFO,"internal error in printandmail(): cfg->emailopts=%d\n",cfg->emailopt); + return; + } + + // Return if a single warning mail has been sent. + if ((cfg->emailopt==0 || cfg->emailopt==1) && mail->logged) + return; + + // To decide if to send mail, we need to know what time it is. + epoch=time(NULL); + + // Return if less than one day has gone by + if (cfg->emailopt==2 && mail->logged && epoch<(mail->lastsent+day)) + return; + + // Return if less than 2^(logged-1) days have gone by + if (cfg->emailopt==3 && mail->logged){ + days=0x01<<(mail->logged-1); + days*=day; + if (epoch<(mail->lastsent+days)) + return; + } + + // record the time of this mail message, and the first mail message + if (!mail->logged) + mail->firstsent=epoch; + mail->lastsent=epoch; // get system host name (not null terminated if length=MAX) if (gethostname(hostname, 256)) @@ -114,29 +144,55 @@ void printandmail(char *address, mailinfo *mail, int priority, char *fmt, ...){ va_start(ap, fmt); vsnprintf(message, 256, fmt, ap); va_end(ap); + + // appropriate message about further information + additional[0]=original[0]=further[0]='\0'; + if (which) { + sprintf(further,"You can also use the smartctl utility for further investigation.\n"); + + switch (cfg->emailopt){ + case 0: + case 1: + sprintf(additional,"No additional email messages about this problem will be sent.\n"); + break; + case 2: + sprintf(additional,"Another email message will be sent in 24 hours if the problem persists\n"); + break; + case 3: + sprintf(additional,"Another email message will be sent in %d days if the problem persists\n", + (0x01)<<mail->logged); + break; + } + if (cfg->emailopt>1 && mail->logged) + sprintf(original,"The original email about this issue was sent at %s\n",ctime(&(mail->firstsent))); + } - // now construct a command to send this as EMAIL, and issue it. - snprintf(command, 2048, "mail -s '%s: SMART errors detected' %s > /dev/null 2> /dev/null << \"ENDMAIL\"\n" - "This email was generated by the smartd daemon running on machine:\n" - "%s\n" + // now construct a command to send this as EMAIL + snprintf(command, 2048, "mail -s 'SMART errors detected on host: %s' %s > /dev/null 2> /dev/null << \"ENDMAIL\"\n" + "This email was generated by the smartd daemon running on host:\n" + "%s\n\n" "The following warning/error was logged by the smartd daemon:\n" - "%s" - "Further details can be found in the machine's syslog (/var/log/messages).\n" - "You can also use the smartctl utility for further investigation.\n" - "No additional email messages about this problem will be sent.\n" + "%s\n" + "For further details see the syslog (/var/log/messages) on host:\n" + "%s\n\n" + "%s%s%s" "ENDMAIL\n", - hostname, address, hostname, message); -#if (0) - snprintf(command,1024, "echo '%s' | mail -s '%s: smartd detected SMART errors' %s > /dev/null 2> /dev/null", - message, hostname, address); -#endif + hostname, address, hostname, message, hostname, further, original, additional); + // issue the command to send email status=system(command); if (WEXITSTATUS(status)) printout(LOG_CRIT,"Email warning message to %s failed (32-bit exit status: %d)\n",address,status); - else - printout(LOG_INFO,"Email warning message sent to %s\n",address); - + else { + if (which) + printout(LOG_INFO,"Email warning message sent to %s\n",address); + else + printout(LOG_INFO,"Email test message sent to %s\n",address); + } + + // increment mail sent counter + mail->logged++; + return; } @@ -236,6 +292,7 @@ void Directives() { printout(LOG_INFO," -L Monitor SMART Self-Test Log, report new errors\n"); printout(LOG_INFO," -f Monitor 'Usage' Attributes, report failures\n"); printout(LOG_INFO," -M ADD Send email warning to address ADD\n"); + printout(LOG_INFO," -m N Modify email warning behavior. -3 <= N <= 3\n"); printout(LOG_INFO," -p Report changes in 'Prefailure' Attributes\n"); printout(LOG_INFO," -u Report changes in 'Usage' Attributes\n"); printout(LOG_INFO," -t Equivalent to -p and -u Directives\n"); @@ -593,6 +650,12 @@ int ataCheckDevice(atadevices_t *drive){ char *name=drive->devicename; cfgfile *cfg=drive->cfg; + // If user has asked, test the email warning system + if (cfg->emailopt<0){ + cfg->emailopt*=-1; + printandmail(cfg, 0, LOG_CRIT, "TEST EMAIL from smartd for device: %s\n", drive->devicename); + } + // if we can't open device, fail gracefully rather than hard -- // perhaps the next time around we'll be able to open it if ((fd=opendevice(name))<0) @@ -605,7 +668,7 @@ int ataCheckDevice(atadevices_t *drive){ printout(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name); else if (status==1){ printout(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name); - printandmail(cfg->address, cfg->maildata , LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name); + printandmail(cfg, 1, LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name); } } @@ -638,7 +701,7 @@ int ataCheckDevice(atadevices_t *drive){ // warning message printout(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc); - printandmail(cfg->address, cfg->maildata+1, LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc); + printandmail(cfg, 2, LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc); } } @@ -688,7 +751,7 @@ int ataCheckDevice(atadevices_t *drive){ if (new>old){ printout(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n", name, (int)old, new); - printandmail(cfg->address, cfg->maildata+2, LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n", + printandmail(cfg, 3, LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n", name, (int)old, new); } if (new>=0) @@ -704,7 +767,7 @@ int ataCheckDevice(atadevices_t *drive){ if (new>old){ printout(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n", name, old, new); - printandmail(cfg->address, cfg->maildata+3, LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n", + printandmail(cfg, 4, LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n", name, old, new); } // this last line is probably not needed, count always increases @@ -724,6 +787,12 @@ int scsiCheckDevice(scsidevices_t *drive){ int fd; cfgfile *cfg=drive->cfg; + // If the user has asked for it, test the email warning system + if (cfg->emailopt<0){ + cfg->emailopt*=-1; + printandmail(cfg, 0, LOG_CRIT, "TEST EMAIL from smartd for device: %s\n", drive->devicename); + } + // if we can't open device, fail gracefully rather than hard -- // perhaps the next time around we'll be able to open it if ((fd=opendevice(drive->devicename))<0) @@ -737,7 +806,7 @@ int scsiCheckDevice(scsidevices_t *drive){ if (returnvalue) { printout(LOG_CRIT, "Device: %s, SMART Failure: (%d) %s\n", drive->devicename, (int)returnvalue, scsiSmartGetSenseCode(returnvalue)); - printandmail(cfg->address, cfg->maildata, LOG_CRIT, "Device: %s, SMART Failure: (%d) %s\n", drive->devicename, + printandmail(cfg, 1, LOG_CRIT, "Device: %s, SMART Failure: (%d) %s\n", drive->devicename, (int)returnvalue, scsiSmartGetSenseCode(returnvalue)); } else if (debugmode) @@ -786,6 +855,35 @@ char copyleftstring[]= cfgfile config[MAXENTRIES]; +// exits with an error message, or returns integer value of token +int inttoken(char *arg, char *name, char *token, int lineno, char *configfile, int min, int max){ + char *endptr; + int val; + + // make sure argument is there + if (!arg) { + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n", + configfile, lineno, name, token, min, max); + Directives(); + exit(1); + } + + // get argument value (base 10), check that it's integer, and in-range + val=strtol(arg,&endptr,10); + if (*endptr!='\0' || val<min || val>max ) { + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n", + configfile, lineno, name, token, arg, min, max); + Directives(); + exit(1); + } + + // all is well; return value + return val; +} + +// This function returns non-zero if it has correctly parsed a token, +// else zero if it has failed to parse a token. Or it exits with a +// directive message if there is a token-parsing problem. int parsetoken(char *token,cfgfile *cfg){ char sym=token[1]; char *name=cfg->name; @@ -798,8 +896,8 @@ int parsetoken(char *token,cfgfile *cfg){ // is the token not recognized? if (*token!='-' || strlen(token)!=2) { - printout(LOG_CRIT,"Drive: %s, unknown Directive: %s at line %d of file %s\n", - name,token,lineno,CONFIGFILE); + printout(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n", + CONFIGFILE, lineno, name, token); Directives(); exit(1); } @@ -807,7 +905,6 @@ int parsetoken(char *token,cfgfile *cfg){ // let's parse the token and swallow its argument switch (sym) { char *arg; - char *endptr; int val; case 'P': @@ -862,67 +959,43 @@ int parsetoken(char *token,cfgfile *cfg){ cfg->selftest=1; cfg->errorlog=1; break; + case 'm': + // email warning option + cfg->emailopt=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, -3, 3); + break; + case 'i': + // ignore failure of usage attribute + val=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, 1, 255); + isattoff(val,cfg->failatt,1); + break; + case 'I': + // ignore attribute for tracking purposes + val=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, 1, 255); + isattoff(val,cfg->trackatt,1); + break; + case 'C': + // period (time interval) for checking + checktime=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, 10, INT_MAX); + break; case 'M': // send email to address that follows arg=strtok(NULL,delim); if (!arg) { - printout(LOG_CRIT,"Drive %s Directive: %s at line %d of file %s needs email address.\n", - name,token,lineno,CONFIGFILE); + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s needs email address(es)\n", + CONFIGFILE, lineno, name, token); Directives(); exit(1); } if (!(cfg->address=strdup(arg))){ - printout(LOG_CRIT,"Drive %s Directive: %s at line %d of file %s: no free memory for address %s.\n", - name,token,lineno,CONFIGFILE,arg); + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s: no free memory for email address(es) %s\n", + CONFIGFILE, lineno, name, token, arg); Directives(); exit(1); } break; - case 'i': // ignore - case 'I': // ignore - case 'C': // period (time interval) for checking - // ignore a particular vendor attribute for tracking (i) or - // failure (I). Or give a check interval for sleeping. - arg=strtok(NULL,delim); - // make sure argument is there - if (!arg) { - printout(LOG_CRIT,"Drive %s Directive: %s at line %d of file %s needs integer argument.\n", - name,token,lineno,CONFIGFILE); - Directives(); - exit(1); - } - // get argument value, check that it's properly-formed, an - // integer, and in-range - val=strtol(arg,&endptr,10); - switch (sym) { - case 'C': - if (*endptr!='\0' || val<10) { - printout(LOG_CRIT,"Drive %s Directive: %s, line %d, file %s, has argument: %s, mimimum is ten secoonds\n", - name,token,lineno,CONFIGFILE,arg); - Directives(); - exit(1); - } - checktime=val; - return 1; - case 'i': - case 'I': - if (*endptr!='\0' || val<=0 || val>255 ) { - printout(LOG_CRIT,"Drive %s Directive: %s, line %d, file %s, has argument: %s, needs 0 < n < 256\n", - name,token,lineno,CONFIGFILE,arg); - Directives(); - exit(1); - } - // put into correct list (bitmaps, access only with isattoff() - // function. Turns OFF corresponding attribute. - if (sym=='I') - isattoff(val,cfg->trackatt,1); - else - isattoff(val,cfg->failatt,1); - return 1; - } default: - printout(LOG_CRIT,"Drive: %s, unknown Directive: %s at line %d of file %s\n", - name,token,lineno,CONFIGFILE); + printout(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n", + CONFIGFILE, lineno, name, token); Directives(); exit(1); } @@ -1010,7 +1083,15 @@ int parseconfigline(int entry, int lineno,char *line){ Directives(); exit(1); } - + + // additional sanity check. Has user set -m without -M? + if (cfg->emailopt && !cfg->address){ + printout(LOG_CRIT,"Drive: %s, Directive -m useless without address Directive -M on line %d of file %s\n", + cfg->name, cfg->lineno, CONFIGFILE); + Directives(); + exit(1); + } + entry++; free(copy); return 1; diff --git a/sm5/smartd.conf b/sm5/smartd.conf index c0f64ee921891e5bbc772b27176899004832a303..42ccb52703b15334d6509bce5f0e9981d140339b 100644 --- a/sm5/smartd.conf +++ b/sm5/smartd.conf @@ -23,13 +23,14 @@ DEVICESCAN /dev/hdb -c l L -t -I 194 # A very silent check. Only report SMART health status if it fails -/dev/hdc -c +# But send an email in this case +/dev/hdc -c -M admin@yoyodyne.com -# First two SCSI disks. Note that only the S directive applies +# First two SCSI disks. This will monitor everything that smartd can +monitor. /dev/sda -S /dev/sdb -S - # HERE IS A LIST OF DIRECTIVES FOR THIS CONFIGURATION FILE # -A Device is an ATA device # -S Device is a SCSI device @@ -40,6 +41,7 @@ DEVICESCAN # -L Monitor SMART Self-Test Log for new errors # -f Monitor for failure of any 'Usage' Attributes # -M ADD Send warning email to ADD for -c, -l, -L, and -f +# -m N Modify email warning behavior. -3 <= N <= 3 # -p Report changes in 'Prefailure' Attributes # -u Report changes in 'Usage' Attributes # -t Equivalent to -p and -u Directives diff --git a/sm5/smartd.cpp b/sm5/smartd.cpp index 7912baa7824b517c61fa5958062151205843d7c7..1c56f7586b04a14c616dcfc3df964697a06fe6a4 100644 --- a/sm5/smartd.cpp +++ b/sm5/smartd.cpp @@ -36,6 +36,7 @@ #include <errno.h> #include <string.h> #include <time.h> +#include <limits.h> #include "atacmds.h" #include "scsicmds.h" #include "smartd.h" @@ -45,7 +46,7 @@ // CVS ID strings extern const char *CVSid1, *CVSid2; -const char *CVSid6="$Id: smartd.cpp,v 1.66 2002/11/17 05:30:11 ballen4705 Exp $" +const char *CVSid6="$Id: smartd.cpp,v 1.67 2002/11/21 14:11:19 ballen4705 Exp $" CVSID1 CVSID2 CVSID3 CVSID4 CVSID7; // global variable used for control of printing, passing arguments, etc. @@ -89,20 +90,49 @@ void printout(int priority,char *fmt, ...){ // If address is null, this just prints a warning message. But if // address is non-null then send and log a warning email. -void printandmail(char *address, mailinfo *mail, int priority, char *fmt, ...){ - char command[2048], message[256], hostname[256]; +void printandmail(cfgfile *cfg, int which, int priority, char *fmt, ...){ + char command[2048], message[256], hostname[256], additional[256], original[256], further[256]; int status; + time_t epoch; va_list ap; + const int day=24*3600; + int days=0; + char *address=cfg->address; + mailinfo *mail=cfg->maildata+which; - // See if user wants us to send mail, or if we already have - if (!address || mail->logged) + // See if user wants us to send mail + if (!address) return; - // record the time of the mail message, and increment counter. This - // is for later use if we decide to implement multiple email warning - // messages after some delay time. - mail->logged++; - mail->lastsent=time(NULL); + // check for sanity + if (cfg->emailopt<0 || cfg->emailopt>3){ + printout(LOG_INFO,"internal error in printandmail(): cfg->emailopts=%d\n",cfg->emailopt); + return; + } + + // Return if a single warning mail has been sent. + if ((cfg->emailopt==0 || cfg->emailopt==1) && mail->logged) + return; + + // To decide if to send mail, we need to know what time it is. + epoch=time(NULL); + + // Return if less than one day has gone by + if (cfg->emailopt==2 && mail->logged && epoch<(mail->lastsent+day)) + return; + + // Return if less than 2^(logged-1) days have gone by + if (cfg->emailopt==3 && mail->logged){ + days=0x01<<(mail->logged-1); + days*=day; + if (epoch<(mail->lastsent+days)) + return; + } + + // record the time of this mail message, and the first mail message + if (!mail->logged) + mail->firstsent=epoch; + mail->lastsent=epoch; // get system host name (not null terminated if length=MAX) if (gethostname(hostname, 256)) @@ -114,29 +144,55 @@ void printandmail(char *address, mailinfo *mail, int priority, char *fmt, ...){ va_start(ap, fmt); vsnprintf(message, 256, fmt, ap); va_end(ap); + + // appropriate message about further information + additional[0]=original[0]=further[0]='\0'; + if (which) { + sprintf(further,"You can also use the smartctl utility for further investigation.\n"); + + switch (cfg->emailopt){ + case 0: + case 1: + sprintf(additional,"No additional email messages about this problem will be sent.\n"); + break; + case 2: + sprintf(additional,"Another email message will be sent in 24 hours if the problem persists\n"); + break; + case 3: + sprintf(additional,"Another email message will be sent in %d days if the problem persists\n", + (0x01)<<mail->logged); + break; + } + if (cfg->emailopt>1 && mail->logged) + sprintf(original,"The original email about this issue was sent at %s\n",ctime(&(mail->firstsent))); + } - // now construct a command to send this as EMAIL, and issue it. - snprintf(command, 2048, "mail -s '%s: SMART errors detected' %s > /dev/null 2> /dev/null << \"ENDMAIL\"\n" - "This email was generated by the smartd daemon running on machine:\n" - "%s\n" + // now construct a command to send this as EMAIL + snprintf(command, 2048, "mail -s 'SMART errors detected on host: %s' %s > /dev/null 2> /dev/null << \"ENDMAIL\"\n" + "This email was generated by the smartd daemon running on host:\n" + "%s\n\n" "The following warning/error was logged by the smartd daemon:\n" - "%s" - "Further details can be found in the machine's syslog (/var/log/messages).\n" - "You can also use the smartctl utility for further investigation.\n" - "No additional email messages about this problem will be sent.\n" + "%s\n" + "For further details see the syslog (/var/log/messages) on host:\n" + "%s\n\n" + "%s%s%s" "ENDMAIL\n", - hostname, address, hostname, message); -#if (0) - snprintf(command,1024, "echo '%s' | mail -s '%s: smartd detected SMART errors' %s > /dev/null 2> /dev/null", - message, hostname, address); -#endif + hostname, address, hostname, message, hostname, further, original, additional); + // issue the command to send email status=system(command); if (WEXITSTATUS(status)) printout(LOG_CRIT,"Email warning message to %s failed (32-bit exit status: %d)\n",address,status); - else - printout(LOG_INFO,"Email warning message sent to %s\n",address); - + else { + if (which) + printout(LOG_INFO,"Email warning message sent to %s\n",address); + else + printout(LOG_INFO,"Email test message sent to %s\n",address); + } + + // increment mail sent counter + mail->logged++; + return; } @@ -236,6 +292,7 @@ void Directives() { printout(LOG_INFO," -L Monitor SMART Self-Test Log, report new errors\n"); printout(LOG_INFO," -f Monitor 'Usage' Attributes, report failures\n"); printout(LOG_INFO," -M ADD Send email warning to address ADD\n"); + printout(LOG_INFO," -m N Modify email warning behavior. -3 <= N <= 3\n"); printout(LOG_INFO," -p Report changes in 'Prefailure' Attributes\n"); printout(LOG_INFO," -u Report changes in 'Usage' Attributes\n"); printout(LOG_INFO," -t Equivalent to -p and -u Directives\n"); @@ -593,6 +650,12 @@ int ataCheckDevice(atadevices_t *drive){ char *name=drive->devicename; cfgfile *cfg=drive->cfg; + // If user has asked, test the email warning system + if (cfg->emailopt<0){ + cfg->emailopt*=-1; + printandmail(cfg, 0, LOG_CRIT, "TEST EMAIL from smartd for device: %s\n", drive->devicename); + } + // if we can't open device, fail gracefully rather than hard -- // perhaps the next time around we'll be able to open it if ((fd=opendevice(name))<0) @@ -605,7 +668,7 @@ int ataCheckDevice(atadevices_t *drive){ printout(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name); else if (status==1){ printout(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name); - printandmail(cfg->address, cfg->maildata , LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name); + printandmail(cfg, 1, LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name); } } @@ -638,7 +701,7 @@ int ataCheckDevice(atadevices_t *drive){ // warning message printout(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc); - printandmail(cfg->address, cfg->maildata+1, LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc); + printandmail(cfg, 2, LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc); } } @@ -688,7 +751,7 @@ int ataCheckDevice(atadevices_t *drive){ if (new>old){ printout(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n", name, (int)old, new); - printandmail(cfg->address, cfg->maildata+2, LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n", + printandmail(cfg, 3, LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n", name, (int)old, new); } if (new>=0) @@ -704,7 +767,7 @@ int ataCheckDevice(atadevices_t *drive){ if (new>old){ printout(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n", name, old, new); - printandmail(cfg->address, cfg->maildata+3, LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n", + printandmail(cfg, 4, LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n", name, old, new); } // this last line is probably not needed, count always increases @@ -724,6 +787,12 @@ int scsiCheckDevice(scsidevices_t *drive){ int fd; cfgfile *cfg=drive->cfg; + // If the user has asked for it, test the email warning system + if (cfg->emailopt<0){ + cfg->emailopt*=-1; + printandmail(cfg, 0, LOG_CRIT, "TEST EMAIL from smartd for device: %s\n", drive->devicename); + } + // if we can't open device, fail gracefully rather than hard -- // perhaps the next time around we'll be able to open it if ((fd=opendevice(drive->devicename))<0) @@ -737,7 +806,7 @@ int scsiCheckDevice(scsidevices_t *drive){ if (returnvalue) { printout(LOG_CRIT, "Device: %s, SMART Failure: (%d) %s\n", drive->devicename, (int)returnvalue, scsiSmartGetSenseCode(returnvalue)); - printandmail(cfg->address, cfg->maildata, LOG_CRIT, "Device: %s, SMART Failure: (%d) %s\n", drive->devicename, + printandmail(cfg, 1, LOG_CRIT, "Device: %s, SMART Failure: (%d) %s\n", drive->devicename, (int)returnvalue, scsiSmartGetSenseCode(returnvalue)); } else if (debugmode) @@ -786,6 +855,35 @@ char copyleftstring[]= cfgfile config[MAXENTRIES]; +// exits with an error message, or returns integer value of token +int inttoken(char *arg, char *name, char *token, int lineno, char *configfile, int min, int max){ + char *endptr; + int val; + + // make sure argument is there + if (!arg) { + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n", + configfile, lineno, name, token, min, max); + Directives(); + exit(1); + } + + // get argument value (base 10), check that it's integer, and in-range + val=strtol(arg,&endptr,10); + if (*endptr!='\0' || val<min || val>max ) { + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n", + configfile, lineno, name, token, arg, min, max); + Directives(); + exit(1); + } + + // all is well; return value + return val; +} + +// This function returns non-zero if it has correctly parsed a token, +// else zero if it has failed to parse a token. Or it exits with a +// directive message if there is a token-parsing problem. int parsetoken(char *token,cfgfile *cfg){ char sym=token[1]; char *name=cfg->name; @@ -798,8 +896,8 @@ int parsetoken(char *token,cfgfile *cfg){ // is the token not recognized? if (*token!='-' || strlen(token)!=2) { - printout(LOG_CRIT,"Drive: %s, unknown Directive: %s at line %d of file %s\n", - name,token,lineno,CONFIGFILE); + printout(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n", + CONFIGFILE, lineno, name, token); Directives(); exit(1); } @@ -807,7 +905,6 @@ int parsetoken(char *token,cfgfile *cfg){ // let's parse the token and swallow its argument switch (sym) { char *arg; - char *endptr; int val; case 'P': @@ -862,67 +959,43 @@ int parsetoken(char *token,cfgfile *cfg){ cfg->selftest=1; cfg->errorlog=1; break; + case 'm': + // email warning option + cfg->emailopt=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, -3, 3); + break; + case 'i': + // ignore failure of usage attribute + val=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, 1, 255); + isattoff(val,cfg->failatt,1); + break; + case 'I': + // ignore attribute for tracking purposes + val=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, 1, 255); + isattoff(val,cfg->trackatt,1); + break; + case 'C': + // period (time interval) for checking + checktime=inttoken(arg=strtok(NULL,delim), name, token, lineno, CONFIGFILE, 10, INT_MAX); + break; case 'M': // send email to address that follows arg=strtok(NULL,delim); if (!arg) { - printout(LOG_CRIT,"Drive %s Directive: %s at line %d of file %s needs email address.\n", - name,token,lineno,CONFIGFILE); + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s needs email address(es)\n", + CONFIGFILE, lineno, name, token); Directives(); exit(1); } if (!(cfg->address=strdup(arg))){ - printout(LOG_CRIT,"Drive %s Directive: %s at line %d of file %s: no free memory for address %s.\n", - name,token,lineno,CONFIGFILE,arg); + printout(LOG_CRIT,"File %s line %d (drive %s): Directive: %s: no free memory for email address(es) %s\n", + CONFIGFILE, lineno, name, token, arg); Directives(); exit(1); } break; - case 'i': // ignore - case 'I': // ignore - case 'C': // period (time interval) for checking - // ignore a particular vendor attribute for tracking (i) or - // failure (I). Or give a check interval for sleeping. - arg=strtok(NULL,delim); - // make sure argument is there - if (!arg) { - printout(LOG_CRIT,"Drive %s Directive: %s at line %d of file %s needs integer argument.\n", - name,token,lineno,CONFIGFILE); - Directives(); - exit(1); - } - // get argument value, check that it's properly-formed, an - // integer, and in-range - val=strtol(arg,&endptr,10); - switch (sym) { - case 'C': - if (*endptr!='\0' || val<10) { - printout(LOG_CRIT,"Drive %s Directive: %s, line %d, file %s, has argument: %s, mimimum is ten secoonds\n", - name,token,lineno,CONFIGFILE,arg); - Directives(); - exit(1); - } - checktime=val; - return 1; - case 'i': - case 'I': - if (*endptr!='\0' || val<=0 || val>255 ) { - printout(LOG_CRIT,"Drive %s Directive: %s, line %d, file %s, has argument: %s, needs 0 < n < 256\n", - name,token,lineno,CONFIGFILE,arg); - Directives(); - exit(1); - } - // put into correct list (bitmaps, access only with isattoff() - // function. Turns OFF corresponding attribute. - if (sym=='I') - isattoff(val,cfg->trackatt,1); - else - isattoff(val,cfg->failatt,1); - return 1; - } default: - printout(LOG_CRIT,"Drive: %s, unknown Directive: %s at line %d of file %s\n", - name,token,lineno,CONFIGFILE); + printout(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n", + CONFIGFILE, lineno, name, token); Directives(); exit(1); } @@ -1010,7 +1083,15 @@ int parseconfigline(int entry, int lineno,char *line){ Directives(); exit(1); } - + + // additional sanity check. Has user set -m without -M? + if (cfg->emailopt && !cfg->address){ + printout(LOG_CRIT,"Drive: %s, Directive -m useless without address Directive -M on line %d of file %s\n", + cfg->name, cfg->lineno, CONFIGFILE); + Directives(); + exit(1); + } + entry++; free(copy); return 1; diff --git a/sm5/smartd.h b/sm5/smartd.h index fb64bc6b5f56be677eb215c01cbefb838c8ff9cd..3e70ab1418685f5c020185b035e69d3d9161a82f 100644 --- a/sm5/smartd.h +++ b/sm5/smartd.h @@ -23,7 +23,7 @@ */ #ifndef CVSID7 -#define CVSID7 "$Id: smartd.h,v 1.21 2002/11/13 10:04:13 ballen4705 Exp $\n" +#define CVSID7 "$Id: smartd.h,v 1.22 2002/11/21 14:11:20 ballen4705 Exp $\n" #endif // Configuration file @@ -39,9 +39,9 @@ #define MAXENTRIES 64 // maximum length of a continued line in configuration file -#define MAXCONTLINE 511 +#define MAXCONTLINE 1023 -// how often SMART status is checked, in seconds +// default for how often SMART status is checked, in seconds #define CHECKTIME 1800 // maximum number of ATA devices to monitor @@ -66,6 +66,8 @@ typedef struct mailinfo { int logged; // time last email was sent, as defined by man 2 time time_t lastsent; + // time problem initially logged + time_t firstsent; } mailinfo; // Used to store a list of devices and options that were in the @@ -85,8 +87,9 @@ typedef struct configfile_s { char errorlog; // Should we ignore missing capabilities/SMART errors char permissive; - // mailing information for each of the previous error types - mailinfo maildata[4]; + // mailing information for four of the previous error types plus mailtest + mailinfo maildata[5]; + char emailopt; // address to send email to char *address; // counts of ata and self-test errors. Perhaps ought to be in the @@ -123,6 +126,6 @@ typedef struct scsidevices_s { // Declare our own printing functions... void printout(int priority,char *fmt, ...) __attribute__ ((format(printf, 2, 3))); -void printandmail(char *address, mailinfo *mail, int priority, char *fmt, ...) __attribute__ ((format(printf, 4, 5))); +void printandmail(cfgfile *cfg, int which, int priority, char *fmt, ...) __attribute__ ((format(printf, 4, 5))); int ataCheckDevice(atadevices_t *drive); diff --git a/sm5/smartmontools.spec b/sm5/smartmontools.spec index 338b5133d7585896726b07e49d8f86a0f6dd39c7..83f50262e9a462bccce29e7d68515377f2b76b81 100644 --- a/sm5/smartmontools.spec +++ b/sm5/smartmontools.spec @@ -1,4 +1,4 @@ -Release: 45 +Release: 46 Summary: SMARTmontools - for monitoring S.M.A.R.T. disks and devices Summary(cs): SMARTmontools - pro monitorov�n� S.M.A.R.T. disk� a za��zen� Summary(de): SMARTmontools - zur �berwachung von S.M.A.R.T.-Platten und-Ger�ten @@ -30,7 +30,7 @@ Packager: Bruce Allen <smartmontools-support@lists.sourceforge.net> # http://ftp1.sourceforge.net/smartmontools/smartmontools-%{version}-%{release}.tar.gz # CVS ID of this file is: -# $Id: smartmontools.spec,v 1.69 2002/11/17 05:57:32 ballen4705 Exp $ +# $Id: smartmontools.spec,v 1.70 2002/11/21 14:11:20 ballen4705 Exp $ # Copyright (C) 2002 Bruce Allen <smartmontools-support@lists.sourceforge.net> # Home page: http://smartmontools.sourceforge.net/