神刀安全网

Zfsd(8), the ZFS fault management daemon lands in FreeBSD

svn commit: r300906 – in head: cddl/usr.sbin cddl/usr.sbin/zfsd cddl/usr.sbin/zfsd/tests etc/defaults etc/mtree etc/rc.d lib lib/libdevdctl lib/libdevdctl/tests share/mk sys/cddl/contrib/opensolari…

Alan Somers asomers at FreeBSD.org

Sat May 28 17:43:42 UTC 2016

Author: asomers Date: Sat May 28 17:43:40 2016 New Revision: 300906 URL: https://svnweb.freebsd.org/changeset/base/300906  Log:   zfsd(8), the ZFS fault management daemon      Add zfsd, which deals with hard drive faults in ZFS pools. It manages   hotspares and replements in drive slots that publish physical paths.      cddl/usr.sbin/zfsd    Add zfsd(8) and its unit tests      cddl/usr.sbin/Makefile    Add zfsd to the build      lib/libdevdctl    A C++ library that helps devd clients process events      lib/Makefile   share/mk/bsd.libnames.mk   share/mk/src.libnames.mk    Add libdevdctl to the build. It's a private library, unusable by    out-of-tree software.      etc/defaults/rc.conf    By default, set zfsd_enable to NO      etc/mtree/BSD.include.dist    Add a directory for libdevdctl's include files      etc/mtree/BSD.tests.dist    Add a directory for zfsd's unit tests      etc/mtree/BSD.var.dist    Add /var/db/zfsd/cases, where zfsd stores case files while it's shut    down.      etc/rc.d/Makefile   etc/rc.d/zfsd    Add zfsd's rc script      sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c    Fix the resource.fs.zfs.statechange message. It had a number of    problems:       It was only being emitted on a transition to the HEALTHY state.    That made it impossible for zfsd to take actions based on drives    getting sicker.       It compared the new state to vdev_prevstate, which is the state that    the vdev had the last time it was opened.  That doesn't make sense,    because a vdev can change state multiple times without being    reopened.       vdev_set_state contains logic that will change the device's new    state based on various conditions.  However, the statechange event    was being posted _before_ that logic took effect.  Now it's being    posted after.      Submitted by: gibbs, asomers, mav, allanjude   Reviewed by: mav, delphij   Relnotes: yes   Sponsored by: Spectra Logic Corp, iX Systems   Differential Revision: https://reviews.freebsd.org/D6564  Added:   head/cddl/usr.sbin/zfsd/   head/cddl/usr.sbin/zfsd/Makefile   (contents, props changed)   head/cddl/usr.sbin/zfsd/Makefile.common   (contents, props changed)   head/cddl/usr.sbin/zfsd/callout.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/callout.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/case_file.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/case_file.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/tests/   head/cddl/usr.sbin/zfsd/tests/Makefile   (contents, props changed)   head/cddl/usr.sbin/zfsd/tests/libmocks.c   (contents, props changed)   head/cddl/usr.sbin/zfsd/tests/libmocks.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.supp   (contents, props changed)   head/cddl/usr.sbin/zfsd/vdev.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/vdev.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/vdev_iterator.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/vdev_iterator.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd.8   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd_event.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd_event.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd_exception.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd_exception.h   (contents, props changed)   head/cddl/usr.sbin/zfsd/zfsd_main.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/zpool_list.cc   (contents, props changed)   head/cddl/usr.sbin/zfsd/zpool_list.h   (contents, props changed)   head/etc/rc.d/zfsd   (contents, props changed)   head/lib/libdevdctl/   head/lib/libdevdctl/Makefile   (contents, props changed)   head/lib/libdevdctl/consumer.cc   (contents, props changed)   head/lib/libdevdctl/consumer.h   (contents, props changed)   head/lib/libdevdctl/event.cc   (contents, props changed)   head/lib/libdevdctl/event.h   (contents, props changed)   head/lib/libdevdctl/event_factory.cc   (contents, props changed)   head/lib/libdevdctl/event_factory.h   (contents, props changed)   head/lib/libdevdctl/exception.cc   (contents, props changed)   head/lib/libdevdctl/exception.h   (contents, props changed)   head/lib/libdevdctl/guid.cc   (contents, props changed)   head/lib/libdevdctl/guid.h   (contents, props changed)   head/lib/libdevdctl/tests/   head/lib/libdevdctl/tests/Makefile   (contents, props changed)   head/lib/libdevdctl/tests/libdevdctl_unittest.cc   (contents, props changed) Modified:   head/cddl/usr.sbin/Makefile   head/etc/defaults/rc.conf   head/etc/mtree/BSD.include.dist   head/etc/mtree/BSD.tests.dist   head/etc/mtree/BSD.var.dist   head/etc/rc.d/Makefile   head/lib/Makefile   head/share/mk/bsd.libnames.mk   head/share/mk/src.libnames.mk   head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c  Modified: head/cddl/usr.sbin/Makefile ============================================================================== --- head/cddl/usr.sbin/Makefile Sat May 28 16:38:09 2016 (r300905) +++ head/cddl/usr.sbin/Makefile Sat May 28 17:43:40 2016 (r300906) @@ -7,6 +7,7 @@ SUBDIR= ${_dtrace} /   ${_plockstat} /   ${_tests} /   ${_zdb} / + ${_zfsd} /   ${_zhack}    .if ${MK_TESTS} != "no" @@ -18,6 +19,9 @@ _tests= tests  _zdb= zdb  _zhack= zhack  .endif +. if ${MK_CXX} != "no" +_zfsd= zfsd +. endif  .endif    .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386"  Added: head/cddl/usr.sbin/zfsd/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/Makefile Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,13 @@ +# $FreeBSD$ + +SRCDIR=${.CURDIR}/../../.. +.include "Makefile.common" + +PROG_CXX= zfsd +MAN=  zfsd.8 + +.include <bsd.prog.mk> + +# The unittests require devel/googletest and devel/googlemock from ports. +# Don't automatically build them. +SUBDIR=  Added: head/cddl/usr.sbin/zfsd/Makefile.common ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/Makefile.common Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,42 @@ +# $FreeBSD$ + +SRCS=  callout.cc  / +  case_file.cc  / +  zfsd_event.cc  / +  vdev.cc   / +  vdev_iterator.cc / +  zfsd.cc   / +  zfsd_exception.cc / +  zpool_list.cc  / +  zfsd_main.cc + +WARNS?=  3 + +# Ignore warnings about Solaris specific pragmas. +IGNORE_PRAGMA=  YES + +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzpool/common +INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/include +INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/lib/libumem +INCFLAGS+= -I${SRCDIR}/sys/cddl/compat/opensolaris +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/head +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libuutil/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libumem/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs_core/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libnvpair +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/common/zfs +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/sys + +CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS} + +DPADD=  ${LIBDEVDCTL} ${LIBZFS} ${LIBZFS_CORE} ${LIBUTIL} ${LIBGEOM} / + ${LIBBSDXML} ${LIBSBUF} ${LIBNVPAIR} ${LIBUUTIL} +LIBADD=  devdctl zfs zfs_core util geom bsdxml sbuf nvpair uutil + +cscope: + find ${.CURDIR} -type f -a /( -name "*.[ch]" -o -name "*.cc" /) / +      > ${.CURDIR}/cscope.files + cd ${.CURDIR} && cscope -buq ${INCFLAGS}  Added: head/cddl/usr.sbin/zfsd/callout.cc ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/callout.cc Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,219 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions, and the following disclaimer, + *    without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + *    substantially similar to the "NO WARRANTY" disclaimer below + *    ("Disclaimer") and any redistribution must be conditioned upon + *    including a substantially similar Disclaimer requirement for further + *    binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs     (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * /file callout.cc + * + * /brief Implementation of the Callout class - multi-client + *        timer services built on top of the POSIX interval timer. + */ + +#include <sys/time.h> + +#include <signal.h> +#include <syslog.h> + +#include <climits> +#include <list> +#include <map> +#include <string> + +#include <devdctl/guid.h> +#include <devdctl/event.h> +#include <devdctl/event_factory.h> +#include <devdctl/consumer.h> +#include <devdctl/exception.h> + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +std::list<Callout *> Callout::s_activeCallouts; +bool       Callout::s_alarmFired(false); + +void +Callout::Init() +{ + signal(SIGALRM,  Callout::AlarmSignalHandler); +} + +bool +Callout::Stop() +{ + if (!IsPending()) +  return (false); + + for (std::list<Callout *>::iterator it(s_activeCallouts.begin()); +      it != s_activeCallouts.end(); it++) { +  if (*it != this) +   continue; + +  it = s_activeCallouts.erase(it); +  if (it != s_activeCallouts.end()) { + +   /* +    * Maintain correct interval for the +    * callouts that follow the just removed +    * entry. +    */ +   timeradd(&(*it)->m_interval, &m_interval, +     &(*it)->m_interval); +  } +  break; + } + m_pending = false; + return (true); +} + +bool +Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg) +{ + bool cancelled(false); + + if (!timerisset(&interval)) +  throw ZfsdException("Callout::Reset: interval of 0"); + + cancelled = Stop(); + + m_interval = interval; + m_func     = func; + m_arg      = arg; + m_pending  = true; + + std::list<Callout *>::iterator it(s_activeCallouts.begin()); + for (; it != s_activeCallouts.end(); it++) { + +  if (timercmp(&(*it)->m_interval, &m_interval, <=)) { +   /* +    * Decrease our interval by those that come +    * before us. +    */ +   timersub(&m_interval, &(*it)->m_interval, &m_interval); +  } else { +   /* +    * Account for the time between the newly +    * inserted event and those that follow. +    */ +   timersub(&(*it)->m_interval, &m_interval, +     &(*it)->m_interval); +   break; +  } + } + s_activeCallouts.insert(it, this); + + + if (s_activeCallouts.front() == this) { +  itimerval timerval = { {0, 0}, m_interval }; + +  setitimer(ITIMER_REAL, &timerval, NULL); + } + + return (cancelled); +} + +void +Callout::AlarmSignalHandler(int) +{ + s_alarmFired = true; + ZfsDaemon::WakeEventLoop(); +} + +void +Callout::ExpireCallouts() +{ + if (!s_alarmFired) +  return; + + s_alarmFired = false; + if (s_activeCallouts.empty()) { +  /* Callout removal/SIGALRM race was lost. */ +  return; + } + + /* +  * Expire the first callout (the one we used to set the +  * interval timer) as well as any callouts following that +  * expire at the same time (have a zero interval from +  * the callout before it). +  */ + do { +  Callout *cur(s_activeCallouts.front()); +  s_activeCallouts.pop_front(); +  cur->m_pending = false; +  cur->m_func(cur->m_arg); + } while (!s_activeCallouts.empty() +       && timerisset(&s_activeCallouts.front()->m_interval) == 0); + + if (!s_activeCallouts.empty()) { +  Callout *next(s_activeCallouts.front()); +  itimerval timerval = { { 0, 0 }, next->m_interval }; + +  setitimer(ITIMER_REAL, &timerval, NULL); + } +} + +timeval +Callout::TimeRemaining() const +{ + /* +  * Outline: Add the m_interval for each callout in s_activeCallouts +  * ahead of this, except for the first callout.  Add to that the result +  * of getitimer (That's because the first callout stores its original +  * interval setting while the timer is ticking). +  */ + itimerval timervalToAlarm; + timeval timeToExpiry; + std::list<Callout *>::iterator it; + + if (!IsPending()) { +  timeToExpiry.tv_sec = INT_MAX; +  timeToExpiry.tv_usec = 999999; /*maximum normalized value*/ +  return (timeToExpiry); + } + + timerclear(&timeToExpiry); + getitimer(ITIMER_REAL, &timervalToAlarm); + timeval& timeToAlarm = timervalToAlarm.it_value; + timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry); + + it =s_activeCallouts.begin(); + it++; /*skip the first callout in the list*/ + for (; it != s_activeCallouts.end(); it++) { +  timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry); +  if ((*it) == this) +   break; + } + return (timeToExpiry); +}  Added: head/cddl/usr.sbin/zfsd/callout.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/callout.h Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,185 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions, and the following disclaimer, + *    without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + *    substantially similar to the "NO WARRANTY" disclaimer below + *    ("Disclaimer") and any redistribution must be conditioned upon + *    including a substantially similar Disclaimer requirement for further + *    binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs     (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * /file callout.h + * + * /brief Interface for timer based callback services. + * + * Header requirements: + * + *     #include <sys/time.h> + * + *     #include <list> + */ + +#ifndef _CALLOUT_H_ +#define _CALLOUT_H_ + +/** + * /brief Type of the function callback from a Callout. + */ +typedef void CalloutFunc_t(void *); + +/** + * /brief Interface to a schedulable one-shot timer with the granularity + *        of the system clock (see setitimer(2)). + * + * Determination of callback expiration is triggered by the SIGALRM + * signal.  Callout callbacks are always delivered from Zfsd's event + * processing loop. + * + * Periodic actions can be triggered via the Callout mechanisms by + * resetting the Callout from within its callback. + */ +class Callout +{ +public: + + /** +  * Initialize the Callout subsystem. +  */ + static void Init(); + + /** +  * Function called (via SIGALRM) when our interval +  * timer expires. +  */ + static void AlarmSignalHandler(int); + + /** +  * Execute callbacks for all callouts that have the same +  * expiration time as the first callout in the list. +  */ + static void ExpireCallouts(); + + /** Constructor. */ + Callout(); + + /** +  * Returns true if callout has not been stopped, +  * or deactivated since the last time the callout was +  * reset. +  */ + bool IsActive() const; + + /** +  * Returns true if callout is still waiting to expire. +  */ + bool IsPending() const; + + /** +  * Disestablish a callout. +  */ + bool Stop(); + + /** +  * /brief Establish or change a timeout. +  * +  * /param interval  Timeval indicating the time which must elapse +  *                  before this callout fires. +  * /param func      Pointer to the callback funtion +  * /param arg       Argument pointer to pass to callback function +  * +  * /return  Cancellation status. +  *             true:  The previous callback was pending and therefore +  *                    was cancelled. +  *             false: The callout was not pending at the time of this +  *                    reset request. +  *          In all cases, a new callout is established. +  */ + bool  Reset(const timeval &interval, CalloutFunc_t *func, void *arg); + + /** +  * /brief Calculate the remaining time until this Callout's timer +  *        expires. +  * +  * The return value will be slightly greater than the actual time to +  * expiry. +  * +  * If the callout is not pending, returns INT_MAX. +  */ + timeval TimeRemaining() const; + +private: + /** +  * All active callouts sorted by expiration time.  The callout +  * with the nearest expiration time is at the head of the list. +  */ + static std::list<Callout *> s_activeCallouts; + + /** +  * The interval timer has expired.  This variable is set from +  * signal handler context and tested from Zfsd::EventLoop() +  * context via ExpireCallouts(). +  */ + static bool                 s_alarmFired; + + /** +  * Time, relative to others in the active list, until +  * this callout is fired. +  */ + timeval                     m_interval; + + /** Callback function argument. */ + void                       *m_arg; + + /** +  * The callback function associated with this timer +  * entry. +  */ + CalloutFunc_t              *m_func; + + /** State of this callout. */ + bool                        m_pending; +}; + +//- Callout public const methods ---------------------------------------------- +inline bool +Callout::IsPending() const +{ + return (m_pending); +} + +//- Callout public methods ---------------------------------------------------- +inline +Callout::Callout() + : m_arg(0), +   m_func(NULL), +   m_pending(false) +{ + timerclear(&m_interval); +} + +#endif /* CALLOUT_H_ */  Added: head/cddl/usr.sbin/zfsd/case_file.cc ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/case_file.cc Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,1104 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions, and the following disclaimer, + *    without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + *    substantially similar to the "NO WARRANTY" disclaimer below + *    ("Disclaimer") and any redistribution must be conditioned upon + *    including a substantially similar Disclaimer requirement for further + *    binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs     (Spectra Logic Corporation) + */ + +/** + * /file case_file.cc + * + * We keep case files for any leaf vdev that is not in the optimal state. + * However, we only serialize to disk those events that need to be preserved + * across reboots.  For now, this is just a log of soft errors which we + * accumulate in order to mark a device as degraded. + */ +#include <sys/cdefs.h> +#include <sys/time.h> + +#include <sys/fs/zfs.h> + +#include <dirent.h> +#include <iomanip> +#include <fstream> +#include <functional> +#include <sstream> +#include <syslog.h> +#include <unistd.h> + +#include <libzfs.h> + +#include <list> +#include <map> +#include <string> + +#include <devdctl/guid.h> +#include <devdctl/event.h> +#include <devdctl/event_factory.h> +#include <devdctl/exception.h> +#include <devdctl/consumer.h> + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*============================ Namespace Control =============================*/ +using std::auto_ptr; +using std::hex; +using std::ifstream; +using std::stringstream; +using std::setfill; +using std::setw; + +using DevdCtl::Event; +using DevdCtl::EventBuffer; +using DevdCtl::EventFactory; +using DevdCtl::EventList; +using DevdCtl::Guid; +using DevdCtl::ParseException; + +/*--------------------------------- CaseFile ---------------------------------*/ +//- CaseFile Static Data ------------------------------------------------------- + +CaseFileList  CaseFile::s_activeCases; +const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; +const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; + +//- CaseFile Static Public Methods --------------------------------------------- +CaseFile * +CaseFile::Find(Guid poolGUID, Guid vdevGUID) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); +      curCase != s_activeCases.end(); curCase++) { + +  if ((*curCase)->PoolGUID() != poolGUID +   || (*curCase)->VdevGUID() != vdevGUID) +   continue; + +  /* +   * We only carry one active case per-vdev. +   */ +  return (*curCase); + } + return (NULL); +} + +CaseFile * +CaseFile::Find(const string &physPath) +{ + CaseFile *result = NULL; + + for (CaseFileList::iterator curCase = s_activeCases.begin(); +      curCase != s_activeCases.end(); curCase++) { + +  if ((*curCase)->PhysicalPath() != physPath) +   continue; + +  if (result != NULL) { +   syslog(LOG_WARNING, "Multiple casefiles found for " +       "physical path %s.  " +       "This is most likely a bug in zfsd", +       physPath.c_str()); +  } +  result = *curCase; + } + return (result); +} + + +void +CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) +{ + CaseFileList::iterator casefile; + for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ +  CaseFileList::iterator next = casefile; +  next++; +  if (poolGUID == (*casefile)->PoolGUID()) +   (*casefile)->ReEvaluate(event); +  casefile = next; + } +} + +CaseFile & +CaseFile::Create(Vdev &vdev) +{ + CaseFile *activeCase; + + activeCase = Find(vdev.PoolGUID(), vdev.GUID()); + if (activeCase == NULL) +  activeCase = new CaseFile(vdev); + + return (*activeCase); +} + +void +CaseFile::DeSerialize() +{ + struct dirent **caseFiles; + + int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, +    DeSerializeSelector, /*compar*/NULL)); + + if (numCaseFiles == -1) +  return; + if (numCaseFiles == 0) { +  free(caseFiles); +  return; + } + + for (int i = 0; i < numCaseFiles; i++) { + +  DeSerializeFile(caseFiles[i]->d_name); +  free(caseFiles[i]); + } + free(caseFiles); +} + +void +CaseFile::LogAll() +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); +      curCase != s_activeCases.end(); curCase++) +  (*curCase)->Log(); +} + +void +CaseFile::PurgeAll() +{ + /* +  * Serialize casefiles before deleting them so that they can be reread +  * and revalidated during BuildCaseFiles. +  * CaseFiles remove themselves from this list on destruction. +  */ + while (s_activeCases.size() != 0) { +  CaseFile *casefile = s_activeCases.front(); +  casefile->Serialize(); +  delete casefile; + } + +} + +//- CaseFile Public Methods ---------------------------------------------------- +bool +CaseFile::RefreshVdevState() +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); + if (casePool == NULL) +  return (false); + + Vdev vd(casePool, CaseVdev(casePool)); + if (vd.DoesNotExist()) +  return (false); + + m_vdevState    = vd.State(); + m_vdevPhysPath = vd.PhysicalPath(); + return (true); +} + +bool +CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); + + if (pool == NULL || !RefreshVdevState()) { +  /* +   * The pool or vdev for this case file is no longer +   * part of the configuration.  This can happen +   * if we process a device arrival notification +   * before seeing the ZFS configuration change +   * event. +   */ +  syslog(LOG_INFO, +         "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  " +         "Closing/n", +         PoolGUIDString().c_str(), +         VdevGUIDString().c_str()); +  Close(); + +  /* +   * Since this event was not used to close this +   * case, do not report it as consumed. +   */ +  return (/*consumed*/false); + } + + if (VdevState() > VDEV_STATE_CANT_OPEN) { +  /* +   * For now, newly discovered devices only help for +   * devices that are missing.  In the future, we might +   * use a newly inserted spare to replace a degraded +   * or faulted device. +   */ +  syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", +      PoolGUIDString().c_str(), VdevGUIDString().c_str()); +  return (/*consumed*/false); + } + + if (vdev != NULL +  && vdev->PoolGUID() == m_poolGUID +  && vdev->GUID() == m_vdevGUID) { + +  zpool_vdev_online(pool, vdev->GUIDString().c_str(), +      ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, +      &m_vdevState); +  syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s./n", +         zpool_get_name(pool), vdev->GUIDString().c_str(), +         devPath.c_str(), +         zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + +  /* +   * Check the vdev state post the online action to see +   * if we can retire this case. +   */ +  CloseIfSolved(); + +  return (/*consumed*/true); + } + + /* +  * If the auto-replace policy is enabled, and we have physical +  * path information, try a physical path replacement. +  */ + if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { +  syslog(LOG_INFO, +         "CaseFile(%s:%s:%s): AutoReplace not set.  " +         "Ignoring device insertion./n", +         PoolGUIDString().c_str(), +         VdevGUIDString().c_str(), +         zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); +  return (/*consumed*/false); + } + + if (PhysicalPath().empty()) { +  syslog(LOG_INFO, +         "CaseFile(%s:%s:%s): No physical path information.  " +         "Ignoring device insertion./n", +         PoolGUIDString().c_str(), +         VdevGUIDString().c_str(), +         zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); +  return (/*consumed*/false); + } + + if (physPath != PhysicalPath()) { +  syslog(LOG_INFO, +         "CaseFile(%s:%s:%s): Physical path mismatch.  " +         "Ignoring device insertion./n", +         PoolGUIDString().c_str(), +         VdevGUIDString().c_str(), +         zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); +  return (/*consumed*/false); + } + + /* Write a label on the newly inserted disk. */ + if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { +  syslog(LOG_ERR, +         "Replace vdev(%s/%s) by physical path (label): %s: %s/n", +         zpool_get_name(pool), VdevGUIDString().c_str(), +         libzfs_error_action(g_zfsHandle), +         libzfs_error_description(g_zfsHandle)); +  return (/*consumed*/false); + } + + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", +     PoolGUIDString().c_str(), VdevGUIDString().c_str(), +     devPath.c_str()); + return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); +} + +bool +CaseFile::ReEvaluate(const ZfsEvent &event) +{ + bool consumed(false); + + if (event.Value("type") == "misc.fs.zfs.vdev_remove") { +  /* +   * The Vdev we represent has been removed from the +   * configuration.  This case is no longer of value. +   */ +  Close(); + +  return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { +  /* This Pool has been destroyed.  Discard the case */ +  Close(); + +  return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.config_sync") { +  RefreshVdevState(); +  if (VdevState() < VDEV_STATE_HEALTHY) +   consumed = ActivateSpare(); + } + + + if (event.Value("class") == "resource.fs.zfs.removed") { +  bool spare_activated; + +  if (!RefreshVdevState()) { +   /* +    * The pool or vdev for this case file is no longer +    * part of the configuration.  This can happen +    * if we process a device arrival notification +    * before seeing the ZFS configuration change +    * event. +    */ +   syslog(LOG_INFO, +          "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " +          "unconfigured.  Closing/n", +          PoolGUIDString().c_str(), +          VdevGUIDString().c_str()); +   /* +    * Close the case now so we won't waste cycles in the +    * system rescan +    */ +   Close(); + +   /* +    * Since this event was not used to close this +    * case, do not report it as consumed. +    */ +   return (/*consumed*/false); +  } + +  /* +   * Discard any tentative I/O error events for +   * this case.  They were most likely caused by the +   * hot-unplug of this device. +   */ +  PurgeTentativeEvents(); + +  /* Try to activate spares if they are available */ +  spare_activated = ActivateSpare(); + +  /* +   * Rescan the drives in the system to see if a recent +   * drive arrival can be used to solve this case. +   */ +  ZfsDaemon::RequestSystemRescan(); + +  /* +   * Consume the event if we successfully activated a spare. +   * Otherwise, leave it in the unconsumed events list so that the +   * future addition of a spare to this pool might be able to +   * close the case +   */ +  consumed = spare_activated; + } else if (event.Value("class") == "resource.fs.zfs.statechange") { +  RefreshVdevState(); +  /* +   * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to +   * activate a hotspare.  Otherwise, ignore the event +   */ +  if (VdevState() == VDEV_STATE_FAULTED || +      VdevState() == VDEV_STATE_DEGRADED || +      VdevState() == VDEV_STATE_CANT_OPEN) +   (void) ActivateSpare(); +  consumed = true; + } + else if (event.Value("class") == "ereport.fs.zfs.io" || +          event.Value("class") == "ereport.fs.zfs.checksum") { + +  m_tentativeEvents.push_front(event.DeepCopy()); +  RegisterCallout(event); +  consumed = true; + } + + bool closed(CloseIfSolved()); + + return (consumed || closed); +} + + +bool +CaseFile::ActivateSpare() { + nvlist_t *config, *nvroot; + nvlist_t       **spares; + char  *devPath, *vdev_type; + const char *poolname; + u_int   nspares, i; + int   error; + + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + if (zhp == NULL) { +  syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " +         "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID); +  return (false); + } + poolname = zpool_get_name(zhp); + config = zpool_get_config(zhp, NULL); + if (config == NULL) { +  syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " +         "config for pool %s", poolname); +  return (false); + } + error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); + if (error != 0){ +  syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " +         "tree for pool %s", poolname); +  return (false); + } + nspares = 0; + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, +       &nspares); + if (nspares == 0) { +  /* The pool has no spares configured */ +  syslog(LOG_INFO, "CaseFile::ActivateSpare: " +         "No spares available for pool %s", poolname); +  return (false); + } + for (i = 0; i < nspares; i++) { +  uint64_t    *nvlist_array; +  vdev_stat_t *vs; +  uint_t      nstats; + +  if (nvlist_lookup_uint64_array(spares[i], +      ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { +   syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " +          "find vdev stats for pool %s, spare %d", +          poolname, i); +   return (false); +  } +  vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); + +  if ((vs->vs_aux != VDEV_AUX_SPARED) +   && (vs->vs_state == VDEV_STATE_HEALTHY)) { +   /* We found a usable spare */ +   break; +  } + } + + if (i == nspares) { +  /* No available spares were found */ +  return (false);  *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***

More information about the svn-src-head mailing list

转载本站任何文章请注明:转载至神刀安全网,谢谢神刀安全网 » Zfsd(8), the ZFS fault management daemon lands in FreeBSD

分享到:更多 ()

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址