diff --git a/sm5/CHANGELOG b/sm5/CHANGELOG index 372442873fd14360b8c826b9f1a28ab34775067f..d3ddd3bd0fc52f9a52969e14b0bbb9f5569a4ea6 100644 --- a/sm5/CHANGELOG +++ b/sm5/CHANGELOG @@ -1,6 +1,6 @@ CHANGELOG for smartmontools -$Id: CHANGELOG,v 1.22 2002/10/25 17:06:17 ballen4705 Exp $ +$Id: CHANGELOG,v 1.23 2002/10/26 09:24:26 ballen4705 Exp $ Copyright (C) 2002 Bruce Allen <smartmontools-support@lists.sourceforge.net> @@ -21,13 +21,38 @@ Research Center), Jack Baskin School of Engineering, University of California, Santa Cruz. http://ssrc.soe.ucsc.edu/ -NOTES FOR NEXT RELEASE: +NOTES FOR FUTURE RELEASES: see TODO file. - Next release: handle extended error and self-test logs gracefully. - Parse and print attribute flag meanings CURRENT RELEASE (see VERSION file in this directory): smartmontools-5.0-VERSION + + modified length of device name string in smartd internal structure + to accomodate max length device name strings + + removed un-implemented (-e = Email notification) option from + command line arg list. We'll put it back on when implemeneted. + + smartd now logs serious (fatal) conditions in its operation at + loglevel LOG_CRIT rather than LOG_INFO before exiting with error. + + smartd used to open a file descriptor for each SMART enabled + device, and then keep it open the entire time smartd was running. + This meant that some commands, like IOREADBLKPART did not work, + since the fd to the device was open. smartd now opens the device + when it needs to read values, then closes it. Also, if one time + around it can't open the device, it simply prints a warning + message but does not give up. + + smartd now opens SCSI devices as well using O_RDONLY rather than + O_RDWR. If someone can no longer monitor a SCSI device that used + to be readable, this may well be the reason why. + + smartd never checked if the number of ata or scsi devices detected + was greater than the max number it could monitor. Now it does. + +smartmontools-5.0-16 + smartd on startup now looks in the configuration file /etc/smartd.conf for a list of devices which to include in its monitoring list. See man page (man smartd) for syntax. diff --git a/sm5/TODO b/sm5/TODO index 3a35dba3a0574f4ddddb18a74672c92b3e1c566f..4ad6eeae9b7bd5e1580d18fe5c2fe8c822ab7bbb 100644 --- a/sm5/TODO +++ b/sm5/TODO @@ -4,7 +4,7 @@ Home page of code is: http://smartmontools.sourceforge.net Copyright (C) 2002 Bruce Allen <smartmontools-support@lists.sourceforge.net> -$Id: TODO,v 1.15 2002/10/25 14:23:40 ballen4705 Exp $ +$Id: TODO,v 1.16 2002/10/26 09:24:26 ballen4705 Exp $ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -34,29 +34,36 @@ Produce version for ATA/ATAPI-7 Modifications ------------- -Change smartd so that it also monitors usage as well as prefail attributes for failure -or changes. Make this a command line option +Handle extended error and self-test logs gracefully. Can someone tell +me a disk that supports more than log pages 1 and 6? I need to get a +disk so I can test this functionality, when I add it. -Perhaps change smartd to look in /proc/ide and /proc/scsi to see what exists? If something -doesn't exit then don't try to open it? +Change smartd so that it also monitors usage as well as prefail +attributes for failure or changes. Make this a command line option -Currently smartd looks at attribute values and thresholds, then prints out if there is an -error condition. Make it also look at the smart status, if that is enabled and working and -do that test as well. Perhaps also see if the number of device errors has changed. +Perhaps change smartd to look in /proc/ide and /proc/scsi to see what +exists? If something doesn't exit then don't try to open it? -Change smartd so that it monitors the ATA disk error log, and if the number of errors changes, -log an entry.. +Currently smartd looks at attribute values and thresholds, then prints +out if there is an error condition. Make it also look at the smart +status, if that is enabled and working and do that test as well. +Perhaps also see if the number of device errors has changed. -Perhaps modify the -q option (quiet mode) so that it only warns of ATA errors if they have (say) -taken place in the last 168 hours (week). +Add optional flags to /etc/smartd.conf, so that certain attributes are +ignored, or so that all usage attributes are ignored. + +Change smartd so that it monitors the ATA disk error log, and if the +number of errors changes, log an entry.. + +Perhaps modify the -q option (quiet mode) so that it only warns of ATA +errors if they have (say) taken place in the last 168 hours (week). Change input command line from using current command line format (getopt) to getopt_long() for long input options. This will be helpful in adding device specific options. -Command line option to specify devices to look for in smartd startup - -Print flags meanings in Vendor Attribute list -- not hex value -- +Parse and print more attribute flag meanings (IBM ones, eg performance +etc). Fixes ----- diff --git a/sm5/VERSION b/sm5/VERSION index b6a7d89c68e0ca66e96a9a51892cc33db66fb8a3..98d9bcb75a685dfbfd60f611c309410152935b3d 100644 --- a/sm5/VERSION +++ b/sm5/VERSION @@ -1 +1 @@ -16 +17 diff --git a/sm5/smartd.c b/sm5/smartd.c index e504e80e595d1d3487ee7847eef0da063355186b..7944644f21e99882ea425876990d6ea803f3028a 100644 --- a/sm5/smartd.c +++ b/sm5/smartd.c @@ -37,41 +37,11 @@ #include "scsicmds.h" #include "smartd.h" +// CVS ID strings extern const char *CVSid1, *CVSid2; -const char *CVSid3="$Id: smartd.c,v 1.31 2002/10/25 14:54:14 ballen4705 Exp $" +const char *CVSid3="$Id: smartd.c,v 1.32 2002/10/26 09:24:26 ballen4705 Exp $" CVSID1 CVSID4 CVSID7; -int daemon_init(void){ - pid_t pid; - int i; - - if ( (pid = fork()) < 0) - // unable to fork! - exit(1); - else if (pid != 0) - // we are the parent process -- exit cleanly - exit (0); - - // from here on, we are the child process - setsid(); - - // close any open file descriptors - for (i=getdtablesize();i>=0;--i) - close(i); - - // redirect any IO attempts to /dev/null - // open stdin - i=open("/dev/null",O_RDWR); - // stdout - dup(i); - // stderr - dup(i); - umask(0); - chdir("/"); - return(0); -} - - // This function prints either to stdout or to the syslog as needed void printout(int priority,char *fmt, ...){ va_list ap; @@ -85,22 +55,54 @@ void printout(int priority,char *fmt, ...){ return; } -// Printing function for atacmds +// Printing function for debugging atacmds. For debugging set 0 to 1 +// in #if statement void pout(char *fmt, ...){ va_list ap; + // initialize variable argument list va_start(ap,fmt); - va_end(ap); - return; #if (0) - // print out vprintf(fmt,ap); +#endif va_end(ap); return; -#endif } +// Forks new process, closes all file descriptors, redirects stdin, +// stdout, stderr +int daemon_init(void){ + pid_t pid; + int i; + if ((pid=fork()) < 0) { + // unable to fork! + printout(LOG_CRIT,"Unable to fork daemon process!\n"); + exit(1); + } + else if (pid) + // we are the parent process -- exit cleanly + exit(0); + + // from here on, we are the child process + setsid(); + + // close any open file descriptors + for (i=getdtablesize();i>=0;--i) + close(i); + + // redirect any IO attempts to /dev/null for stdin + i=open("/dev/null",O_RDWR); + // stdout + dup(i); + // stderr + dup(i); + umask(0); + chdir("/"); + return(0); +} + +// Prints header identifying version of code and home void printhead(){ printout(LOG_INFO,"smartd version %d.%d-%d - S.M.A.R.T. Daemon.\n", RELEASE_MAJOR, RELEASE_MINOR, SMARTMONTOOLS_VERSION); @@ -114,23 +116,45 @@ void Usage (void){ printout(LOG_INFO,"Read Only Options:\n"); printout(LOG_INFO," %c Start smartd in debug Mode\n",DEBUGMODE); printout(LOG_INFO," %c Print License, Copyright, and version information\n\n",PRINTCOPYLEFT); - printout(LOG_INFO,"Configuration file: /etc/smartd.conf\n"); + printout(LOG_INFO,"Optional configuration file: %s\n",CONFIGFILE); } - + +// returns negative if problem, else fd>=0 +int opendevice(char *device){ + int fd = open(device, O_RDONLY); + if (fd<0) { + if (errno<sys_nerr) + printout(LOG_INFO,"%s: Device: %s, Opening device failed\n",sys_errlist[errno],device); + else + printout(LOG_INFO,"Device: %s, Opening device failed\n",device); + return -1; + } + // device opened sucessfully + return fd; +} + +// returns 1 if problem, else zero +int closedevice(int fd){ + if (close(fd)){ + if (errno<sys_nerr) + printout(LOG_INFO,"%s: Closing file descriptor %d failed\n",sys_errlist[errno],fd); + else + printout(LOG_INFO,"Closing file descriptor %d failed\n",fd); + return 1; + } + // device opened sucessfully + return 0; +} + // scan to see what ata devices there are, and if they support SMART int atadevicescan (atadevices_t *devices, char *device){ int fd; struct hd_driveid drive; printout(LOG_INFO,"Opening device %s\n", device); - fd = open(device, O_RDONLY); - if (fd < 0) { - if (errno<sys_nerr) - printout(LOG_INFO,"%s: Device: %s, Opening device failed\n",sys_errlist[errno],device); - else - printout(LOG_INFO,"Device: %s, Opening device failed\n",device); + if ((fd=opendevice(device))<0) + // device open failed return 1; - } if (ataReadHDIdentity (fd,&drive) || !ataSmartSupport(drive) || ataEnableSmart(fd)){ // device exists, but not able to do SMART @@ -153,7 +177,13 @@ int atadevicescan (atadevices_t *devices, char *device){ return 4; } - // device exists, and does SMART. Add to list + // Device exists, and does SMART. Add to list + if (numatadevices>=MAXATADEVICES){ + printout(LOG_CRIT,"smartd has found more than MAXATADEVICES=%d ATA devices.\n" + "Recompile code from " PROJECTHOME " with larger MAXATADEVICES\n",numatadevices); + exit(1); + } + printout(LOG_INFO,"%s Found and is SMART capable. Adding to \"monitor\" list.\n",device); devices[numatadevices].fd = fd; strcpy(devices[numatadevices].devicename, device); @@ -167,6 +197,7 @@ int atadevicescan (atadevices_t *devices, char *device){ isSupportSelfTest(devices[numatadevices].smartval); numatadevices++; + closedevice(fd); return 0; } @@ -177,18 +208,11 @@ int scsidevicescan (scsidevices_t *devices, char *device){ int i, fd, smartsupport; unsigned char tBuf[4096]; - - // open device printout(LOG_INFO,"Opening device %s\n", device); - fd=open(device, O_RDWR); - if (fd<0) { - if (errno<sys_nerr) - printout(LOG_INFO,"%s: Device: %s, Opening device failed\n",sys_errlist[errno],device); - else - printout(LOG_INFO,"Device: %s, Opening device failed\n", device); + if ((fd=opendevice(device))<0) + // device open failed return 1; - } - + // check that it's ready for commands if (!testunitready(fd)){ printout(LOG_INFO,"Device: %s, Failed Test Unit Ready\n", device); @@ -211,6 +235,13 @@ int scsidevicescan (scsidevices_t *devices, char *device){ return 4; } + // Device exists, and does SMART. Add to list + if (numscsidevices>=MAXSCSIDEVICES){ + printout(LOG_CRIT,"smartd has found more than MAXSCSIDEVICES=%d SCSI devices.\n" + "Recompile code from " PROJECTHOME " with larger MAXSCSIDEVICES\n",numscsidevices); + exit(1); + } + // now we can proceed to register the device printout(LOG_INFO, "Device: %s, Found and is SMART capable. Adding to \"monitor\" list.\n",device); devices[numscsidevices].fd = fd; @@ -233,6 +264,7 @@ int scsidevicescan (scsidevices_t *devices, char *device){ } } numscsidevices++; + closedevice(fd); return 0; } @@ -269,19 +301,28 @@ void ataCompareSmartValues (atadevices_t *device, struct ata_smart_values new ){ int ataCheckDevice( atadevices_t *drive){ struct ata_smart_values tempsmartval; struct ata_smart_thresholds tempsmartthres; - int failed; + int failed,fd; char *loc,attributename[64]; + // if we can't open device, fail gracefully rather than hard -- + // perhaps the next time around we'll be able to open it + if ((fd=opendevice(drive->devicename))<0) + return 1; + // Coming into this function, *drive contains the last values measured, // and we read the NEW values into tempsmartval - if (ataReadSmartValues(drive->fd,&tempsmartval)) + if (ataReadSmartValues(fd,&tempsmartval)) printout(LOG_INFO, "%s:Failed to read SMART values\n", drive->devicename); // and we read the new thresholds into tempsmartthres - if (ataReadSmartThresholds (drive->fd, &tempsmartthres)) + if (ataReadSmartThresholds(fd, &tempsmartthres)) printout(LOG_INFO, "%s:Failed to read SMART thresholds\n",drive->devicename); - // See if any vendor attributes are below minimum, and print them out + // See if any vendor attributes are below minimum, and print them + // out. WHEN IT WORKS, we should here add a call to + // ataSmartStatus2() either in addition to or instead of the + // ataCheckSmart command below. This is the "right" long-term + // solution. if ((failed=ataCheckSmart(tempsmartval,tempsmartthres,1))){ ataPrintSmartAttribName(attributename,failed); // skip blank space in name @@ -291,18 +332,15 @@ int ataCheckDevice( atadevices_t *drive){ printout(LOG_CRIT,"Device: %s, Failed SMART attribute: %s. Use smartctl -a %s.\n", drive->devicename,loc,drive->devicename); } - - // WHEN IT WORKS, we should here add a call to ataSmartStatus2() - // either in addition to or instead of the ataCheckSmart command - // above. This is the "right" long-term solution. // see if any values have changed. Second argument is new values - ataCompareSmartValues (drive , tempsmartval); + ataCompareSmartValues(drive, tempsmartval); // Save the new values into *drive for the next time around drive->smartval = tempsmartval; drive->smartthres = tempsmartthres; + closedevice(fd); return 0; } @@ -312,10 +350,16 @@ int scsiCheckDevice( scsidevices_t *drive){ UINT8 returnvalue; UINT8 currenttemp; UINT8 triptemp; - + int fd; + + // if we can't open device, fail gracefully rather than hard -- + // perhaps the next time around we'll be able to open it + if ((fd=opendevice(drive->devicename))<0) + return 1; + currenttemp = triptemp = 0; - if (scsiCheckSmart( drive->fd, drive->SmartPageSupported, &returnvalue, ¤ttemp, &triptemp ) != 0) + if (scsiCheckSmart(fd, drive->SmartPageSupported, &returnvalue, ¤ttemp, &triptemp)) printout(LOG_INFO, "%s:Failed to read SMART values\n", drive->devicename); if (returnvalue) @@ -327,32 +371,34 @@ int scsiCheckDevice( scsidevices_t *drive){ // Seems to completely ignore what capabilities were found on the // device when scanned if (currenttemp){ - if ( (currenttemp != drive->Temperature) && ( drive->Temperature) ) + if ((currenttemp != drive->Temperature) && (drive->Temperature)) printout(LOG_INFO, "Device: %s, Temperature changed %d degrees to %d degrees since last reading\n", drive->devicename, (int) (currenttemp - drive->Temperature), (unsigned int) currenttemp ); - drive->Temperature = currenttemp; - } + } + closedevice(fd); return 0; } void CheckDevices ( atadevices_t *atadevices, scsidevices_t *scsidevices){ int i; + // If there are no devices to monitor, then exit if (!numatadevices && !numscsidevices){ printout(LOG_INFO,"Unable to monitor any SMART enabled ATA or SCSI devices.\n"); return; } + // Infinite loop, which checkes devices printout(LOG_INFO,"Started monitoring %d ATA and %d SCSI devices\n",numatadevices,numscsidevices); while (1){ - for (i = 0; i < numatadevices;i++) - ataCheckDevice ( &atadevices[i]); + for (i=0; i<numatadevices; i++) + ataCheckDevice(atadevices+i); - for (i = 0; i < numscsidevices;i++) - scsiCheckDevice ( &scsidevices[i]); + for (i=0; i<numscsidevices; i++) + scsiCheckDevice(scsidevices+i); - sleep ( checktime ); + sleep(checktime); } } @@ -363,7 +409,7 @@ int massagecvs(char *out,const char *in){ const char *savein=in; // skip to I of $Id: - while (*in !='\0' && *in!='I') + while (*in && *in!='I') in++; // skip to start of filename @@ -442,10 +488,10 @@ int parseconfigfile(){ if (fp==NULL && errno!=ENOENT){ // file exists but we can't read it if (errno<sys_nerr) - printout(LOG_INFO,"%s: Unable to open configuration file %s\n", + printout(LOG_CRIT,"%s: Unable to open configuration file %s\n", sys_errlist[errno],CONFIGFILE); else - printout(LOG_INFO,"Unable to open configuration file %s\n",CONFIGFILE); + printout(LOG_CRIT,"Unable to open configuration file %s\n",CONFIGFILE); exit(1); } @@ -465,7 +511,7 @@ int parseconfigfile(){ // See if line is too long len=strlen(line); if (len>MAXLINELEN){ - printout(LOG_INFO,"Error: line %d of file %s is more than than %d characters long.\n", + printout(LOG_CRIT,"Error: line %d of file %s is more than than %d characters long.\n", lineno,CONFIGFILE,MAXLINELEN); exit(1); } @@ -488,7 +534,7 @@ int parseconfigfile(){ // We've got a legit entry if (entry>=MAXENTRIES){ - printout(LOG_INFO,"Error: configuration file %s can have no more than %d entries\n", + printout(LOG_CRIT,"Error: configuration file %s can have no more than %d entries\n", CONFIGFILE,MAXENTRIES); exit(1); } @@ -511,25 +557,21 @@ int parseconfigfile(){ if (entry) return entry; - printout(LOG_INFO,"Configuration file %s contained no devices (like /dev/hda)\n",CONFIGFILE); + printout(LOG_CRIT,"Configuration file %s contained no devices (like /dev/hda)\n",CONFIGFILE); exit(1); } -const char opts[] = { DEBUGMODE, EMAILNOTIFICATION, PRINTCOPYLEFT,'h','?','\0' }; +// const char opts[] = {DEBUGMODE, EMAILNOTIFICATION, PRINTCOPYLEFT,'h','?','\0' }; +const char opts[] = {DEBUGMODE, PRINTCOPYLEFT,'h','?','\0' }; -/* Main Program */ -int main (int argc, char **argv){ - atadevices_t atadevices[MAXATADEVICES], *atadevicesptr; - scsidevices_t scsidevices[MAXSCSIDEVICES], *scsidevicesptr; - int optchar,i; + +// Parses input line, prints usage message and +// version/license/copyright messages +void ParseOpts(int argc, char **argv){ extern char *optarg; extern int optopt, optind, opterr; - int entries; - - numatadevices=0; - numscsidevices=0; - scsidevicesptr = scsidevices; - atadevicesptr = atadevices; + int optchar; + opterr=optopt=0; // Parse input options: @@ -550,7 +592,7 @@ int main (int argc, char **argv){ debugmode=1; if (optopt) { printhead(); - printout(LOG_INFO,"=======> UNRECOGNIZED OPTION: %c <======= \n\n",optopt); + printout(LOG_CRIT,"=======> UNRECOGNIZED OPTION: %c <======= \n\n",optopt); Usage(); exit(-1); } @@ -574,6 +616,19 @@ int main (int argc, char **argv){ // print header printhead(); + return; +} + +/* Main Program */ +int main (int argc, char **argv){ + atadevices_t atadevices[MAXATADEVICES], *atadevicesptr=atadevices; + scsidevices_t scsidevices[MAXSCSIDEVICES], *scsidevicesptr=scsidevices; + int i,entries; + + numatadevices=numscsidevices=0; + + // Parse input and print header and usage info if needed + ParseOpts(argc,argv); // look in configuration file CONFIGFILE (normally /etc/smartd.conf) entries=parseconfigfile(); @@ -583,18 +638,20 @@ int main (int argc, char **argv){ daemon_init(); } - // If we found a config file, look at its entries + // If we found a config file, register its entries if (entries) for (i=0;i<entries;i++){ + // register ATA devices if (config[i].tryata && atadevicescan(atadevicesptr, config[i].name)) printout(LOG_INFO,"Unable to register ATA device %s at line %d of file %s\n", config[i].name, config[i].lineno, CONFIGFILE); - + // then register SCSI devices if (config[i].tryscsi && scsidevicescan(scsidevicesptr, config[i].name)) printout(LOG_INFO,"Unable to register SCSI device %s at line %d of file %s\n", config[i].name, config[i].lineno, CONFIGFILE); } else { + // since there was no config file found, search all ATA and SCSI disks char deviceata[] = "/dev/hda"; char devicescsi[]= "/dev/sda"; printout(LOG_INFO,"No configuration file %s found. Searching for devices.\n",CONFIGFILE); @@ -604,6 +661,7 @@ int main (int argc, char **argv){ scsidevicescan(scsidevicesptr, devicescsi); } + // Now start an infinite loop that checks all devices CheckDevices(atadevicesptr, scsidevicesptr); return 0; } diff --git a/sm5/smartd.cpp b/sm5/smartd.cpp index 63546130de6b87e60de8afc483081d18b4feef6f..f376bbfdff51ff9c89137bc45b0c0acf3199173d 100644 --- a/sm5/smartd.cpp +++ b/sm5/smartd.cpp @@ -37,41 +37,11 @@ #include "scsicmds.h" #include "smartd.h" +// CVS ID strings extern const char *CVSid1, *CVSid2; -const char *CVSid3="$Id: smartd.cpp,v 1.31 2002/10/25 14:54:14 ballen4705 Exp $" +const char *CVSid3="$Id: smartd.cpp,v 1.32 2002/10/26 09:24:26 ballen4705 Exp $" CVSID1 CVSID4 CVSID7; -int daemon_init(void){ - pid_t pid; - int i; - - if ( (pid = fork()) < 0) - // unable to fork! - exit(1); - else if (pid != 0) - // we are the parent process -- exit cleanly - exit (0); - - // from here on, we are the child process - setsid(); - - // close any open file descriptors - for (i=getdtablesize();i>=0;--i) - close(i); - - // redirect any IO attempts to /dev/null - // open stdin - i=open("/dev/null",O_RDWR); - // stdout - dup(i); - // stderr - dup(i); - umask(0); - chdir("/"); - return(0); -} - - // This function prints either to stdout or to the syslog as needed void printout(int priority,char *fmt, ...){ va_list ap; @@ -85,22 +55,54 @@ void printout(int priority,char *fmt, ...){ return; } -// Printing function for atacmds +// Printing function for debugging atacmds. For debugging set 0 to 1 +// in #if statement void pout(char *fmt, ...){ va_list ap; + // initialize variable argument list va_start(ap,fmt); - va_end(ap); - return; #if (0) - // print out vprintf(fmt,ap); +#endif va_end(ap); return; -#endif } +// Forks new process, closes all file descriptors, redirects stdin, +// stdout, stderr +int daemon_init(void){ + pid_t pid; + int i; + if ((pid=fork()) < 0) { + // unable to fork! + printout(LOG_CRIT,"Unable to fork daemon process!\n"); + exit(1); + } + else if (pid) + // we are the parent process -- exit cleanly + exit(0); + + // from here on, we are the child process + setsid(); + + // close any open file descriptors + for (i=getdtablesize();i>=0;--i) + close(i); + + // redirect any IO attempts to /dev/null for stdin + i=open("/dev/null",O_RDWR); + // stdout + dup(i); + // stderr + dup(i); + umask(0); + chdir("/"); + return(0); +} + +// Prints header identifying version of code and home void printhead(){ printout(LOG_INFO,"smartd version %d.%d-%d - S.M.A.R.T. Daemon.\n", RELEASE_MAJOR, RELEASE_MINOR, SMARTMONTOOLS_VERSION); @@ -114,23 +116,45 @@ void Usage (void){ printout(LOG_INFO,"Read Only Options:\n"); printout(LOG_INFO," %c Start smartd in debug Mode\n",DEBUGMODE); printout(LOG_INFO," %c Print License, Copyright, and version information\n\n",PRINTCOPYLEFT); - printout(LOG_INFO,"Configuration file: /etc/smartd.conf\n"); + printout(LOG_INFO,"Optional configuration file: %s\n",CONFIGFILE); } - + +// returns negative if problem, else fd>=0 +int opendevice(char *device){ + int fd = open(device, O_RDONLY); + if (fd<0) { + if (errno<sys_nerr) + printout(LOG_INFO,"%s: Device: %s, Opening device failed\n",sys_errlist[errno],device); + else + printout(LOG_INFO,"Device: %s, Opening device failed\n",device); + return -1; + } + // device opened sucessfully + return fd; +} + +// returns 1 if problem, else zero +int closedevice(int fd){ + if (close(fd)){ + if (errno<sys_nerr) + printout(LOG_INFO,"%s: Closing file descriptor %d failed\n",sys_errlist[errno],fd); + else + printout(LOG_INFO,"Closing file descriptor %d failed\n",fd); + return 1; + } + // device opened sucessfully + return 0; +} + // scan to see what ata devices there are, and if they support SMART int atadevicescan (atadevices_t *devices, char *device){ int fd; struct hd_driveid drive; printout(LOG_INFO,"Opening device %s\n", device); - fd = open(device, O_RDONLY); - if (fd < 0) { - if (errno<sys_nerr) - printout(LOG_INFO,"%s: Device: %s, Opening device failed\n",sys_errlist[errno],device); - else - printout(LOG_INFO,"Device: %s, Opening device failed\n",device); + if ((fd=opendevice(device))<0) + // device open failed return 1; - } if (ataReadHDIdentity (fd,&drive) || !ataSmartSupport(drive) || ataEnableSmart(fd)){ // device exists, but not able to do SMART @@ -153,7 +177,13 @@ int atadevicescan (atadevices_t *devices, char *device){ return 4; } - // device exists, and does SMART. Add to list + // Device exists, and does SMART. Add to list + if (numatadevices>=MAXATADEVICES){ + printout(LOG_CRIT,"smartd has found more than MAXATADEVICES=%d ATA devices.\n" + "Recompile code from " PROJECTHOME " with larger MAXATADEVICES\n",numatadevices); + exit(1); + } + printout(LOG_INFO,"%s Found and is SMART capable. Adding to \"monitor\" list.\n",device); devices[numatadevices].fd = fd; strcpy(devices[numatadevices].devicename, device); @@ -167,6 +197,7 @@ int atadevicescan (atadevices_t *devices, char *device){ isSupportSelfTest(devices[numatadevices].smartval); numatadevices++; + closedevice(fd); return 0; } @@ -177,18 +208,11 @@ int scsidevicescan (scsidevices_t *devices, char *device){ int i, fd, smartsupport; unsigned char tBuf[4096]; - - // open device printout(LOG_INFO,"Opening device %s\n", device); - fd=open(device, O_RDWR); - if (fd<0) { - if (errno<sys_nerr) - printout(LOG_INFO,"%s: Device: %s, Opening device failed\n",sys_errlist[errno],device); - else - printout(LOG_INFO,"Device: %s, Opening device failed\n", device); + if ((fd=opendevice(device))<0) + // device open failed return 1; - } - + // check that it's ready for commands if (!testunitready(fd)){ printout(LOG_INFO,"Device: %s, Failed Test Unit Ready\n", device); @@ -211,6 +235,13 @@ int scsidevicescan (scsidevices_t *devices, char *device){ return 4; } + // Device exists, and does SMART. Add to list + if (numscsidevices>=MAXSCSIDEVICES){ + printout(LOG_CRIT,"smartd has found more than MAXSCSIDEVICES=%d SCSI devices.\n" + "Recompile code from " PROJECTHOME " with larger MAXSCSIDEVICES\n",numscsidevices); + exit(1); + } + // now we can proceed to register the device printout(LOG_INFO, "Device: %s, Found and is SMART capable. Adding to \"monitor\" list.\n",device); devices[numscsidevices].fd = fd; @@ -233,6 +264,7 @@ int scsidevicescan (scsidevices_t *devices, char *device){ } } numscsidevices++; + closedevice(fd); return 0; } @@ -269,19 +301,28 @@ void ataCompareSmartValues (atadevices_t *device, struct ata_smart_values new ){ int ataCheckDevice( atadevices_t *drive){ struct ata_smart_values tempsmartval; struct ata_smart_thresholds tempsmartthres; - int failed; + int failed,fd; char *loc,attributename[64]; + // if we can't open device, fail gracefully rather than hard -- + // perhaps the next time around we'll be able to open it + if ((fd=opendevice(drive->devicename))<0) + return 1; + // Coming into this function, *drive contains the last values measured, // and we read the NEW values into tempsmartval - if (ataReadSmartValues(drive->fd,&tempsmartval)) + if (ataReadSmartValues(fd,&tempsmartval)) printout(LOG_INFO, "%s:Failed to read SMART values\n", drive->devicename); // and we read the new thresholds into tempsmartthres - if (ataReadSmartThresholds (drive->fd, &tempsmartthres)) + if (ataReadSmartThresholds(fd, &tempsmartthres)) printout(LOG_INFO, "%s:Failed to read SMART thresholds\n",drive->devicename); - // See if any vendor attributes are below minimum, and print them out + // See if any vendor attributes are below minimum, and print them + // out. WHEN IT WORKS, we should here add a call to + // ataSmartStatus2() either in addition to or instead of the + // ataCheckSmart command below. This is the "right" long-term + // solution. if ((failed=ataCheckSmart(tempsmartval,tempsmartthres,1))){ ataPrintSmartAttribName(attributename,failed); // skip blank space in name @@ -291,18 +332,15 @@ int ataCheckDevice( atadevices_t *drive){ printout(LOG_CRIT,"Device: %s, Failed SMART attribute: %s. Use smartctl -a %s.\n", drive->devicename,loc,drive->devicename); } - - // WHEN IT WORKS, we should here add a call to ataSmartStatus2() - // either in addition to or instead of the ataCheckSmart command - // above. This is the "right" long-term solution. // see if any values have changed. Second argument is new values - ataCompareSmartValues (drive , tempsmartval); + ataCompareSmartValues(drive, tempsmartval); // Save the new values into *drive for the next time around drive->smartval = tempsmartval; drive->smartthres = tempsmartthres; + closedevice(fd); return 0; } @@ -312,10 +350,16 @@ int scsiCheckDevice( scsidevices_t *drive){ UINT8 returnvalue; UINT8 currenttemp; UINT8 triptemp; - + int fd; + + // if we can't open device, fail gracefully rather than hard -- + // perhaps the next time around we'll be able to open it + if ((fd=opendevice(drive->devicename))<0) + return 1; + currenttemp = triptemp = 0; - if (scsiCheckSmart( drive->fd, drive->SmartPageSupported, &returnvalue, ¤ttemp, &triptemp ) != 0) + if (scsiCheckSmart(fd, drive->SmartPageSupported, &returnvalue, ¤ttemp, &triptemp)) printout(LOG_INFO, "%s:Failed to read SMART values\n", drive->devicename); if (returnvalue) @@ -327,32 +371,34 @@ int scsiCheckDevice( scsidevices_t *drive){ // Seems to completely ignore what capabilities were found on the // device when scanned if (currenttemp){ - if ( (currenttemp != drive->Temperature) && ( drive->Temperature) ) + if ((currenttemp != drive->Temperature) && (drive->Temperature)) printout(LOG_INFO, "Device: %s, Temperature changed %d degrees to %d degrees since last reading\n", drive->devicename, (int) (currenttemp - drive->Temperature), (unsigned int) currenttemp ); - drive->Temperature = currenttemp; - } + } + closedevice(fd); return 0; } void CheckDevices ( atadevices_t *atadevices, scsidevices_t *scsidevices){ int i; + // If there are no devices to monitor, then exit if (!numatadevices && !numscsidevices){ printout(LOG_INFO,"Unable to monitor any SMART enabled ATA or SCSI devices.\n"); return; } + // Infinite loop, which checkes devices printout(LOG_INFO,"Started monitoring %d ATA and %d SCSI devices\n",numatadevices,numscsidevices); while (1){ - for (i = 0; i < numatadevices;i++) - ataCheckDevice ( &atadevices[i]); + for (i=0; i<numatadevices; i++) + ataCheckDevice(atadevices+i); - for (i = 0; i < numscsidevices;i++) - scsiCheckDevice ( &scsidevices[i]); + for (i=0; i<numscsidevices; i++) + scsiCheckDevice(scsidevices+i); - sleep ( checktime ); + sleep(checktime); } } @@ -363,7 +409,7 @@ int massagecvs(char *out,const char *in){ const char *savein=in; // skip to I of $Id: - while (*in !='\0' && *in!='I') + while (*in && *in!='I') in++; // skip to start of filename @@ -442,10 +488,10 @@ int parseconfigfile(){ if (fp==NULL && errno!=ENOENT){ // file exists but we can't read it if (errno<sys_nerr) - printout(LOG_INFO,"%s: Unable to open configuration file %s\n", + printout(LOG_CRIT,"%s: Unable to open configuration file %s\n", sys_errlist[errno],CONFIGFILE); else - printout(LOG_INFO,"Unable to open configuration file %s\n",CONFIGFILE); + printout(LOG_CRIT,"Unable to open configuration file %s\n",CONFIGFILE); exit(1); } @@ -465,7 +511,7 @@ int parseconfigfile(){ // See if line is too long len=strlen(line); if (len>MAXLINELEN){ - printout(LOG_INFO,"Error: line %d of file %s is more than than %d characters long.\n", + printout(LOG_CRIT,"Error: line %d of file %s is more than than %d characters long.\n", lineno,CONFIGFILE,MAXLINELEN); exit(1); } @@ -488,7 +534,7 @@ int parseconfigfile(){ // We've got a legit entry if (entry>=MAXENTRIES){ - printout(LOG_INFO,"Error: configuration file %s can have no more than %d entries\n", + printout(LOG_CRIT,"Error: configuration file %s can have no more than %d entries\n", CONFIGFILE,MAXENTRIES); exit(1); } @@ -511,25 +557,21 @@ int parseconfigfile(){ if (entry) return entry; - printout(LOG_INFO,"Configuration file %s contained no devices (like /dev/hda)\n",CONFIGFILE); + printout(LOG_CRIT,"Configuration file %s contained no devices (like /dev/hda)\n",CONFIGFILE); exit(1); } -const char opts[] = { DEBUGMODE, EMAILNOTIFICATION, PRINTCOPYLEFT,'h','?','\0' }; +// const char opts[] = {DEBUGMODE, EMAILNOTIFICATION, PRINTCOPYLEFT,'h','?','\0' }; +const char opts[] = {DEBUGMODE, PRINTCOPYLEFT,'h','?','\0' }; -/* Main Program */ -int main (int argc, char **argv){ - atadevices_t atadevices[MAXATADEVICES], *atadevicesptr; - scsidevices_t scsidevices[MAXSCSIDEVICES], *scsidevicesptr; - int optchar,i; + +// Parses input line, prints usage message and +// version/license/copyright messages +void ParseOpts(int argc, char **argv){ extern char *optarg; extern int optopt, optind, opterr; - int entries; - - numatadevices=0; - numscsidevices=0; - scsidevicesptr = scsidevices; - atadevicesptr = atadevices; + int optchar; + opterr=optopt=0; // Parse input options: @@ -550,7 +592,7 @@ int main (int argc, char **argv){ debugmode=1; if (optopt) { printhead(); - printout(LOG_INFO,"=======> UNRECOGNIZED OPTION: %c <======= \n\n",optopt); + printout(LOG_CRIT,"=======> UNRECOGNIZED OPTION: %c <======= \n\n",optopt); Usage(); exit(-1); } @@ -574,6 +616,19 @@ int main (int argc, char **argv){ // print header printhead(); + return; +} + +/* Main Program */ +int main (int argc, char **argv){ + atadevices_t atadevices[MAXATADEVICES], *atadevicesptr=atadevices; + scsidevices_t scsidevices[MAXSCSIDEVICES], *scsidevicesptr=scsidevices; + int i,entries; + + numatadevices=numscsidevices=0; + + // Parse input and print header and usage info if needed + ParseOpts(argc,argv); // look in configuration file CONFIGFILE (normally /etc/smartd.conf) entries=parseconfigfile(); @@ -583,18 +638,20 @@ int main (int argc, char **argv){ daemon_init(); } - // If we found a config file, look at its entries + // If we found a config file, register its entries if (entries) for (i=0;i<entries;i++){ + // register ATA devices if (config[i].tryata && atadevicescan(atadevicesptr, config[i].name)) printout(LOG_INFO,"Unable to register ATA device %s at line %d of file %s\n", config[i].name, config[i].lineno, CONFIGFILE); - + // then register SCSI devices if (config[i].tryscsi && scsidevicescan(scsidevicesptr, config[i].name)) printout(LOG_INFO,"Unable to register SCSI device %s at line %d of file %s\n", config[i].name, config[i].lineno, CONFIGFILE); } else { + // since there was no config file found, search all ATA and SCSI disks char deviceata[] = "/dev/hda"; char devicescsi[]= "/dev/sda"; printout(LOG_INFO,"No configuration file %s found. Searching for devices.\n",CONFIGFILE); @@ -604,6 +661,7 @@ int main (int argc, char **argv){ scsidevicescan(scsidevicesptr, devicescsi); } + // Now start an infinite loop that checks all devices CheckDevices(atadevicesptr, scsidevicesptr); return 0; } diff --git a/sm5/smartd.h b/sm5/smartd.h index 43d9a1b0e95b758720b9ee042f8c1fda2a8d6793..74fdb97653d7fa92da3ae70e7a4e5d6c2c422807 100644 --- a/sm5/smartd.h +++ b/sm5/smartd.h @@ -23,14 +23,24 @@ */ #ifndef CVSID7 -#define CVSID7 "$Id: smartd.h,v 1.8 2002/10/25 14:15:05 ballen4705 Exp $\n" +#define CVSID7 "$Id: smartd.h,v 1.9 2002/10/26 09:24:26 ballen4705 Exp $\n" #endif // Configuration file #define CONFIGFILE "/etc/smartd.conf" -#define MAXLINELEN 126 +#define MAXLINELEN 114 #define MAXENTRIES 64 +// BAD PROGRAMMING PRACTICE - GLOBAL VARIABLES SHOULD BE IN .c NOT .h +// FILE +/* how often SMART status is checked, in seconds */ +int checktime = 1800; +// number of ATA and SCSI devices being watched +int numatadevices; +int numscsidevices; +#define MAXATADEVICES 12 +#define MAXSCSIDEVICES 26 + /* Defines for command line options */ #define DEBUGMODE 'X' #define EMAILNOTIFICATION 'e' @@ -40,42 +50,32 @@ #define TRUE 0x01 #define FALSE 0x00 -#define MAXATADEVICES 12 -#define MAXSCSIDEVICES 26 - /* Global Variables for command line options */ +// These should go into a structure at some point unsigned char debugmode = FALSE; unsigned char emailnotification = FALSE; unsigned char printcopyleft = FALSE; -/* Number of ata device to scan */ -int numatadevices; -int numscsidevices; - - -/* how often SMART is checks in seconds */ -int checktime = 1800; - typedef struct atadevices_s { int fd; - char devicename[14]; int selftest; struct hd_driveid drive; struct ata_smart_values smartval; struct ata_smart_thresholds smartthres; + char devicename[MAXLINELEN+2]; } atadevices_t; typedef struct scsidevices_s { int fd; - char devicename[14]; unsigned char SmartPageSupported; unsigned char TempPageSupported; unsigned char Temperature; + char devicename[MAXLINELEN+2]; } scsidevices_t; typedef struct configfile_s { - char name[MAXLINELEN+2]; + int lineno; int tryata; int tryscsi; - int lineno; + char name[MAXLINELEN+2]; // really only needs to be +1 } cfgfile;