Skip to content

Add zpool status --lockless|--trylock #17193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -7723,7 +7723,8 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)

if (cfg == NULL) {
zdb_set_skip_mmp(poolname);
error = spa_get_stats(poolname, &cfg, NULL, 0);
error = spa_get_stats(poolname, &cfg, NULL, 0,
ZPOOL_LOCK_BEHAVIOR_DEFAULT);
if (error != 0) {
fatal("Tried to read config of pool \"%s\" but "
"spa_get_stats() failed with error %d\n",
Expand Down
7 changes: 6 additions & 1 deletion cmd/zpool/zpool_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type,
boolean_t literal, int *err)
{
zpool_list_t *zlp;
int rc;

zlp = safe_malloc(sizeof (zpool_list_t));

Expand All @@ -137,7 +138,11 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type,
zlp->zl_literal = literal;

if (argc == 0) {
(void) zpool_iter(g_zfs, add_pool, zlp);
rc = zpool_iter(g_zfs, add_pool, zlp);
if (rc != 0) {
free(zlp);
return (NULL);
}
zlp->zl_findall = B_TRUE;
} else {
int i;
Expand Down
37 changes: 32 additions & 5 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ enum zpool_options {
ZPOOL_OPTION_ALLOW_ASHIFT_MISMATCH,
ZPOOL_OPTION_POOL_KEY_GUID,
ZPOOL_OPTION_JSON_NUMS_AS_INT,
ZPOOL_OPTION_JSON_FLAT_VDEVS
ZPOOL_OPTION_JSON_FLAT_VDEVS,
ZPOOL_OPTION_LOCKLESS,
ZPOOL_OPTION_TRYLOCK,
};

/*
Expand Down Expand Up @@ -490,8 +492,8 @@ get_usage(zpool_help_t idx)
case HELP_LABELCLEAR:
return (gettext("\tlabelclear [-f] <vdev>\n"));
case HELP_LIST:
return (gettext("\tlist [-gHLpPv] [-o property[,...]] [-j "
"[--json-int, --json-pool-key-guid]] ...\n"
return (gettext("\tlist [-gHLpPv] [-o property[,...]] "
"[-j [--json-int, --json-pool-key-guid]] ...\n"
"\t [-T d|u] [pool] [interval [count]]\n"));
case HELP_PREFETCH:
return (gettext("\tprefetch -t <type> [<type opts>] <pool>\n"
Expand Down Expand Up @@ -521,8 +523,8 @@ get_usage(zpool_help_t idx)
return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> "
"[<device> ...]\n"));
case HELP_STATUS:
return (gettext("\tstatus [--power] [-j [--json-int, "
"--json-flat-vdevs, ...\n"
return (gettext("\tstatus [--power] [--lockless|--trylock] "
"[-j [--json-int, --json-flat-vdevs, ...\n"
"\t --json-pool-key-guid]] [-c [script1,script2,...]] "
"[-dDegiLpPstvx] ...\n"
"\t [-T d|u] [pool] [interval [count]]\n"));
Expand Down Expand Up @@ -2614,6 +2616,9 @@ typedef struct status_cbdata {
nvlist_t *cb_jsobj;
boolean_t cb_json_as_int;
boolean_t cb_json_pool_key_guid;
boolean_t cb_lockless;
boolean_t cb_trylock;

} status_cbdata_t;

/* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */
Expand Down Expand Up @@ -11002,6 +11007,8 @@ status_callback(zpool_handle_t *zhp, void *data)
* --json-int Display numbers in inteeger format instead of string
* --json-flat-vdevs Display vdevs in flat hierarchy
* --json-pool-key-guid Use pool GUID as key for pool objects
* --lockless No locks
* --trylock Try to get namespace lock, but abort if not
*
* Describes the health status of all pools or some subset.
*/
Expand All @@ -11024,6 +11031,10 @@ zpool_do_status(int argc, char **argv)
ZPOOL_OPTION_JSON_FLAT_VDEVS},
{"json-pool-key-guid", no_argument, NULL,
ZPOOL_OPTION_POOL_KEY_GUID},
{"lockless", no_argument, NULL,
ZPOOL_OPTION_LOCKLESS},
{"trylock", no_argument, NULL,
ZPOOL_OPTION_TRYLOCK},
{0, 0, 0, 0}
};

Expand Down Expand Up @@ -11111,6 +11122,12 @@ zpool_do_status(int argc, char **argv)
case ZPOOL_OPTION_POOL_KEY_GUID:
cb.cb_json_pool_key_guid = B_TRUE;
break;
case ZPOOL_OPTION_LOCKLESS:
cb.cb_lockless = B_TRUE;
break;
case ZPOOL_OPTION_TRYLOCK:
cb.cb_trylock = B_TRUE;
break;
case '?':
if (optopt == 'c') {
print_zpool_script_list("status");
Expand Down Expand Up @@ -11152,6 +11169,16 @@ zpool_do_status(int argc, char **argv)
usage(B_FALSE);
}

if (cb.cb_lockless && cb.cb_trylock) {
(void) fprintf(stderr, gettext("cannot pass both --lockless and"
" --trylock\n"));
usage(B_FALSE);
} else if (cb.cb_lockless) {
libzfs_set_lock_behavior(g_zfs, ZPOOL_LOCK_BEHAVIOR_LOCKLESS);
} else if (cb.cb_trylock) {
libzfs_set_lock_behavior(g_zfs, ZPOOL_LOCK_BEHAVIOR_TRYLOCK);
}

for (;;) {
if (cb.cb_json) {
cb.cb_jsobj = zpool_json_schema(0, 1);
Expand Down
2 changes: 2 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ _LIBZFS_H int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
nvlist_t *, nvlist_t *);
_LIBZFS_H int zpool_destroy(zpool_handle_t *, const char *);
_LIBZFS_H int zpool_add(zpool_handle_t *, nvlist_t *, boolean_t check_ashift);
_LIBZFS_H void libzfs_set_lock_behavior(libzfs_handle_t *,
zpool_lock_behavior_t);

typedef struct splitflags {
/* do not split, but return the config that would be split off */
Expand Down
15 changes: 15 additions & 0 deletions include/os/freebsd/spl/sys/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,19 @@ typedef enum {
#define mutex_owned(lock) sx_xlocked(lock)
#define mutex_owner(lock) sx_xholder(lock)

/*
* Poor-man's version of Linux kernel's down_timeout(). Try to acquire a mutex
* for 'ns' number of nanoseconds. Returns 0 if mutex was acquired or ETIME
* if timeout occurred.
*/
static inline int mutex_enter_timeout(kmutex_t *mutex, uint64_t ns)
{
hrtime_t end = gethrtime() + ns;
while (gethrtime() < end) {
if (mutex_tryenter(mutex))
return (0); /* success */
}
return (ETIME);
}

#endif /* _OPENSOLARIS_SYS_MUTEX_H_ */
16 changes: 16 additions & 0 deletions include/os/linux/spl/sys/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#define _SPL_MUTEX_H

#include <sys/types.h>
#include <sys/time.h>
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/lockdep.h>
Expand Down Expand Up @@ -187,4 +188,19 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
/* NOTE: do not dereference mp after this point */ \
}

/*
* Poor-man's version of Linux kernel's down_timeout(). Try to acquire a mutex
* for 'ns' number of nanoseconds. Returns 0 if mutex was acquired or ETIME
* if timeout occurred.
*/
static inline int mutex_enter_timeout(kmutex_t *mutex, uint64_t ns)
{
hrtime_t end = gethrtime() + ns;
while (gethrtime() < end) {
if (mutex_tryenter(mutex))
return (0); /* success */
}
return (ETIME);
}

#endif /* _SPL_MUTEX_H */
27 changes: 27 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,9 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats"
#define ZPOOL_CONFIG_COMPATIBILITY "compatibility"

/* ZFS_IOC_POOL_STATS argument to for spa_namespace locking behavior */
#define ZPOOL_CONFIG_LOCK_BEHAVIOR "lock_behavior" /* not stored on disk */

/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
Expand Down Expand Up @@ -1978,6 +1981,30 @@ enum zio_encrypt {
ZFS_XA_NS_PREFIX_MATCH(LINUX_TRUSTED, name) || \
ZFS_XA_NS_PREFIX_MATCH(LINUX_USER, name))

/*
* Set locking behavior for zpool commands.
*/
typedef enum {
/* Wait to acquire the lock on the zpool config */
ZPOOL_LOCK_BEHAVIOR_WAIT = 0,
ZPOOL_LOCK_BEHAVIOR_DEFAULT = ZPOOL_LOCK_BEHAVIOR_WAIT,
/*
* Return an error if it's taking an unnecessarily long time to
* acquire the lock on the pool config (default 100ms)
*/
ZPOOL_LOCK_BEHAVIOR_TRYLOCK = 1,

/*
* DANGER: THIS CAN CRASH YOUR SYSTEM
*
* If you can't acquire the pool config lock after 100ms then do a
* a lockless lookup. This should only be done in emergencies, as it
* can crash the kernel module!
*/
ZPOOL_LOCK_BEHAVIOR_LOCKLESS = 2,
ZPOOL_LOCK_BEHAVIOR_END = 3 /* last entry marker */
} zpool_lock_behavior_t;

#ifdef __cplusplus
}
#endif
Expand Down
12 changes: 10 additions & 2 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -742,10 +742,13 @@ typedef enum trim_type {

/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, const void *tag);
extern int spa_open_common_lock_behavior(const char *pool, spa_t **spapp,
const void *tag, nvlist_t *nvpolicy, nvlist_t **config,
zpool_lock_behavior_t zpool_lock_behavior);
extern int spa_open_rewind(const char *pool, spa_t **, const void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
size_t buflen, zpool_lock_behavior_t zpool_lock_behavior);
extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t *zplprops, struct dsl_crypto_params *dcp);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
Expand Down Expand Up @@ -850,10 +853,13 @@ extern kcondvar_t spa_namespace_cv;

extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
extern void spa_config_load(void);
extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
extern int spa_all_configs(uint64_t *generation, nvlist_t **pools,
zpool_lock_behavior_t zpool_lock_behavior);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern nvlist_t *spa_config_generate_lock_behavior(spa_t *spa, vdev_t *vd,
uint64_t txg, int getstats, zpool_lock_behavior_t zpool_lock_behavior);
extern void spa_config_update(spa_t *spa, int what);
extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,
vdev_t *parent, uint_t id, int atype);
Expand All @@ -865,9 +871,11 @@ extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,

/* Namespace manipulation */
extern spa_t *spa_lookup(const char *name);
extern spa_t *spa_lookup_lockless(const char *name);
extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
extern void spa_remove(spa_t *spa);
extern spa_t *spa_next(spa_t *prev);
extern spa_t *spa_next_lockless(spa_t *prev);

/* Refcount functions */
extern void spa_open_ref(spa_t *spa, const void *tag);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ extern void spa_set_deadman_synctime(hrtime_t ns);
extern void spa_set_deadman_ziotime(hrtime_t ns);
extern const char *spa_history_zone(void);
extern const char *zfs_active_allocator;
extern int zfs_allow_lockless_zpool_status;
extern unsigned int spa_namespace_trylock_ms;
extern int param_set_active_allocator_common(const char *val);

#ifdef __cplusplus
Expand Down
1 change: 1 addition & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ extern void mutex_enter(kmutex_t *mp);
extern int mutex_enter_check_return(kmutex_t *mp);
extern void mutex_exit(kmutex_t *mp);
extern int mutex_tryenter(kmutex_t *mp);
extern int mutex_enter_timeout(kmutex_t *mp, uint64_t ns);

#define NESTED_SINGLE 1
#define mutex_enter_nested(mp, class) mutex_enter(mp)
Expand Down
5 changes: 5 additions & 0 deletions lib/libnvpair/libnvpair.abi
Original file line number Diff line number Diff line change
Expand Up @@ -2194,6 +2194,7 @@
</data-member>
</class-decl>
<typedef-decl name='stack_t' type-id='380f9954' id='ac5e685f'/>
<typedef-decl name='unw_regnum_t' type-id='95e97e5e' id='c53620f0'/>
<class-decl name='unw_cursor' size-in-bits='8128' is-struct='yes' visibility='default' id='384a1f22'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='opaque' type-id='dc70ec0b' visibility='default'/>
Expand Down Expand Up @@ -2306,6 +2307,10 @@
<parameter type-id='b59d7dce'/>
<return type-id='79a0948f'/>
</function-decl>
<function-decl name='_Ux86_64_regname' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='c53620f0'/>
<return type-id='80f4b756'/>
</function-decl>
<function-decl name='_ULx86_64_init_local' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='3946e4d1'/>
<parameter type-id='2e408b96'/>
Expand Down
45 changes: 6 additions & 39 deletions lib/libuutil/libuutil.abi
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@
</data-member>
</class-decl>
<typedef-decl name='stack_t' type-id='380f9954' id='ac5e685f'/>
<typedef-decl name='unw_regnum_t' type-id='95e97e5e' id='c53620f0'/>
<class-decl name='unw_cursor' size-in-bits='8128' is-struct='yes' visibility='default' id='384a1f22'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='opaque' type-id='dc70ec0b' visibility='default'/>
Expand Down Expand Up @@ -763,6 +764,10 @@
<parameter type-id='b59d7dce'/>
<return type-id='79a0948f'/>
</function-decl>
<function-decl name='_Ux86_64_regname' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='c53620f0'/>
<return type-id='80f4b756'/>
</function-decl>
<function-decl name='_ULx86_64_init_local' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='3946e4d1'/>
<parameter type-id='2e408b96'/>
Expand Down Expand Up @@ -1011,16 +1016,9 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libspl/os/linux/getmntany.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='38b51b3c' size-in-bits='832' id='02b72c00'>
<subrange length='13' type-id='7359adad' id='487fded1'/>
</array-type-def>
<array-type-def dimensions='1' type-id='03085adc' size-in-bits='192' id='083f8d58'>
<subrange length='3' type-id='7359adad' id='56f209d2'/>
</array-type-def>
<class-decl name='__locale_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='23de8b96'/>
<array-type-def dimensions='1' type-id='80f4b756' size-in-bits='832' id='39e6f84a'>
<subrange length='13' type-id='7359adad' id='487fded1'/>
</array-type-def>
<class-decl name='mnttab' size-in-bits='256' is-struct='yes' visibility='default' id='1b055409'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
Expand Down Expand Up @@ -1130,25 +1128,6 @@
<typedef-decl name='__blksize_t' type-id='bd54fe1a' id='d3f10a7f'/>
<typedef-decl name='__blkcnt64_t' type-id='bd54fe1a' id='4e711bf1'/>
<typedef-decl name='__syscall_slong_t' type-id='bd54fe1a' id='03085adc'/>
<class-decl name='__locale_struct' size-in-bits='1856' is-struct='yes' visibility='default' id='90cc1ce3'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='__locales' type-id='02b72c00' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='832'>
<var-decl name='__ctype_b' type-id='31347b7a' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='896'>
<var-decl name='__ctype_tolower' type-id='6d60f45d' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='960'>
<var-decl name='__ctype_toupper' type-id='6d60f45d' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1024'>
<var-decl name='__names' type-id='39e6f84a' visibility='default'/>
</data-member>
</class-decl>
<typedef-decl name='__locale_t' type-id='f01e1813' id='b7ac9b5f'/>
<typedef-decl name='locale_t' type-id='b7ac9b5f' id='973a4f8d'/>
<class-decl name='timespec' size-in-bits='128' is-struct='yes' visibility='default' id='a9c79a1f'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='tv_sec' type-id='65eda9c0' visibility='default'/>
Expand All @@ -1157,23 +1136,12 @@
<var-decl name='tv_nsec' type-id='03085adc' visibility='default'/>
</data-member>
</class-decl>
<pointer-type-def type-id='23de8b96' size-in-bits='64' id='38b51b3c'/>
<pointer-type-def type-id='90cc1ce3' size-in-bits='64' id='f01e1813'/>
<qualified-type-def type-id='95e97e5e' const='yes' id='2448a865'/>
<pointer-type-def type-id='2448a865' size-in-bits='64' id='6d60f45d'/>
<qualified-type-def type-id='8efea9e5' const='yes' id='3beb2af4'/>
<pointer-type-def type-id='3beb2af4' size-in-bits='64' id='31347b7a'/>
<pointer-type-def type-id='0c544dc0' size-in-bits='64' id='394fc496'/>
<pointer-type-def type-id='56fe4a37' size-in-bits='64' id='b6b61d2f'/>
<qualified-type-def type-id='b6b61d2f' restrict='yes' id='3cad23cd'/>
<pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/>
<pointer-type-def type-id='0bbec9cd' size-in-bits='64' id='62f7a03d'/>
<qualified-type-def type-id='62f7a03d' restrict='yes' id='f1cadedf'/>
<class-decl name='__locale_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='23de8b96'/>
<function-decl name='uselocale' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='973a4f8d'/>
<return type-id='973a4f8d'/>
</function-decl>
<function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='e75a27e9'/>
<parameter type-id='3cad23cd'/>
Expand All @@ -1185,9 +1153,8 @@
<parameter type-id='822cd80b'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='strerror_l' visibility='default' binding='global' size-in-bits='64'>
<function-decl name='strerror' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='95e97e5e'/>
<parameter type-id='973a4f8d'/>
<return type-id='26a90f95'/>
</function-decl>
<function-decl name='__fprintf_chk' visibility='default' binding='global' size-in-bits='64'>
Expand Down
Loading
Loading