From: Viacheslav Dubeyko <Slava.Dubeyko@xxxxxxx> The Coverity Scan service has detected potential race condition in ceph_check_delayed_caps() [1]. The CID 1590633 contains explanation: "Accessing ci->i_ceph_flags without holding lock ceph_inode_info.i_ceph_lock. The value of the shared data will be determined by the interleaving of thread execution. Thread shared data is accessed without holding an appropriate lock, possibly causing a race condition (CWE-366)". The patch reworks the logic of accessing ci->i_ceph_flags. At first, it removes ci item from a mdsc->cap_delay_list. Then it unlocks mdsc->cap_delay_lock and it locks ci->i_ceph_lock. Then, it calls smp_mb__before_atomic() to be sure that ci->i_ceph_flags has consistent state of the bits. The is_metadata_under_flush variable stores the state of CEPH_I_FLUSH_BIT. Finally, it unlocks the ci->i_ceph_lock and it locks the mdsc->cap_delay_lock. The is_metadata_under_flush is used to check the condition that ci needs to be removed from mdsc->cap_delay_list. If it is not the case, then ci will be added into the head of mdsc->cap_delay_list. This patch reworks the logic of checking the CEPH_I_FLUSH_BIT, CEPH_I_FLUSH_SNAPS_BIT, CEPH_I_KICK_FLUSH_BIT, CEPH_ASYNC_CREATE_BIT, CEPH_I_ERROR_FILELOCK_BIT by test_bit() method and calling smp_mb__before_atomic() to ensure that bit state is consistent. It switches on calling the set_bit(), clear_bit() for these bits, and calling smp_mb__after_atomic() after these methods to ensure that modified bit is visible. Additionally, __must_hold() has been added for __cap_delay_requeue(), __cap_delay_requeue_front(), and __prep_cap() to help the sparse with lock checking and it was commented that caller of __cap_delay_requeue_front() and __prep_cap() must lock the ci->i_ceph_lock. v.2 Alex Markuze suggested to rework all Ceph inode's flags. Now, every declaration has CEPH_I_<*> and CEPH_I_<*>_BIT pair. v.3 The logic of operating by ci->i_ceph_flags bits on using test_bit(), clear_bit(), set_bit() and smp_mb__before_atomic(), smp_mb__after_atomic() has been reworked in addr.c, inode.c, locks.c, mds_client.c, snap.c, super.h, xattr.c additionally to caps.c. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590633 Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@xxxxxxx> cc: Alex Markuze <amarkuze@xxxxxxxxxx> cc: Ilya Dryomov <idryomov@xxxxxxxxx> cc: Ceph Development <ceph-devel@xxxxxxxxxxxxxxx> --- fs/ceph/addr.c | 8 ++- fs/ceph/caps.c | 132 +++++++++++++++++++++++++++++++++---------- fs/ceph/inode.c | 11 +++- fs/ceph/locks.c | 14 +++-- fs/ceph/mds_client.c | 10 +++- fs/ceph/snap.c | 6 +- fs/ceph/super.h | 75 ++++++++++++++++-------- fs/ceph/xattr.c | 11 +++- 8 files changed, 198 insertions(+), 69 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 60a621b00c65..c1caac06b059 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -2546,6 +2546,8 @@ int ceph_pool_perm_check(struct inode *inode, int need) return 0; spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); flags = ci->i_ceph_flags; pool = ci->i_layout.pool_id; spin_unlock(&ci->i_ceph_lock); @@ -2578,8 +2580,12 @@ int ceph_pool_perm_check(struct inode *inode, int need) if (pool == ci->i_layout.pool_id && pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) { ci->i_ceph_flags |= flags; - } else { + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } else { pool = ci->i_layout.pool_id; + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); flags = ci->i_ceph_flags; } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a8d8b56cf9d2..9c82cda33ee5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -516,6 +516,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, */ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) + __must_hold(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; @@ -525,7 +526,9 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, if (!mdsc->stopping) { spin_lock(&mdsc->cap_delay_lock); if (!list_empty(&ci->i_cap_delay_list)) { - if (ci->i_ceph_flags & CEPH_I_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags)) goto no_change; list_del_init(&ci->i_cap_delay_list); } @@ -540,15 +543,20 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, * Queue an inode for immediate writeback. Mark inode with I_FLUSH, * indicating we should send a cap message to flush dirty metadata * asap, and move to the front of the delayed cap list. + * + * Caller must hold i_ceph_lock. */ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) + __must_hold(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); - ci->i_ceph_flags |= CEPH_I_FLUSH; + set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list); @@ -1386,10 +1394,13 @@ void __ceph_remove_caps(struct ceph_inode_info *ci) * * Make note of max_size reported/requested from mds, revoked caps * that have now been implemented. + * + * Caller must hold i_ceph_lock. */ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, int op, int flags, int used, int want, int retain, int flushing, u64 flush_tid, u64 oldest_flush_tid) + __must_hold(ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->netfs.inode; @@ -1408,7 +1419,9 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, ceph_cap_string(revoking)); BUG_ON((retain & CEPH_CAP_PIN) == 0); - ci->i_ceph_flags &= ~CEPH_I_FLUSH; + clear_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); cap->issued &= retain; /* drop bits we don't want */ /* @@ -1665,7 +1678,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, last_tid = capsnap->cap_flush.tid; } - ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; + clear_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); while (first_tid <= last_tid) { struct ceph_cap *cap = ci->i_auth_cap; @@ -1728,7 +1743,9 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, session = *psession; retry: spin_lock(&ci->i_ceph_lock); - if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (!test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) { doutc(cl, " no capsnap needs flush, doing nothing\n"); goto out; } @@ -1752,7 +1769,9 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, } // make sure flushsnap messages are sent in proper order. - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) __kick_flushing_caps(mdsc, session, ci, 0); __ceph_flush_snaps(ci, session); @@ -2024,15 +2043,21 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) struct ceph_mds_session *session = NULL; spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { - ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS; + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags)) { + set_bit(CEPH_I_ASYNC_CHECK_CAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); /* Don't send messages until we get async create reply */ spin_unlock(&ci->i_ceph_lock); return; } - if (ci->i_ceph_flags & CEPH_I_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags)) flags |= CHECK_CAPS_FLUSH; retry: /* Caps wanted by virtue of active open files. */ @@ -2196,7 +2221,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) doutc(cl, "flushing dirty caps\n"); goto ack; } - if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { + + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) { doutc(cl, "flushing snap caps\n"); goto ack; } @@ -2220,12 +2248,14 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) /* kick flushing and flush snaps before sending normal * cap message */ + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); if (cap == ci->i_auth_cap && (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) { - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) __kick_flushing_caps(mdsc, session, ci, 0); - if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) + if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) __ceph_flush_snaps(ci, session); goto retry; @@ -2297,11 +2327,17 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) goto out; } + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); if (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) { - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) __kick_flushing_caps(mdsc, session, ci, 0); - if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags)) __ceph_flush_snaps(ci, session); goto retry_locked; } @@ -2573,10 +2609,14 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, u64 last_snap_flush = 0; /* Don't do anything until create reply comes in */ - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags)) return; - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; + clear_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { if (cf->is_capsnap) { @@ -2685,7 +2725,9 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } else { - ci->i_ceph_flags |= CEPH_I_KICK_FLUSH; + set_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } spin_unlock(&ci->i_ceph_lock); @@ -2720,7 +2762,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, spin_unlock(&ci->i_ceph_lock); continue; } - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { + + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_KICK_FLUSH_BIT, &ci->i_ceph_flags)) { __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } @@ -2827,8 +2872,10 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, again: spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); if ((flags & CHECK_FILELOCK) && - (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) { + test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) { doutc(cl, "%p %llx.%llx error filelock\n", inode, ceph_vinop(inode)); ret = -EIO; @@ -3205,8 +3252,11 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci, doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows); BUG_ON(capsnap->cap_flush.tid > 0); ceph_put_snap_context(capsnap->context); - if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) - ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; + if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) { + set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } list_del(&capsnap->ci_item); ceph_put_cap_snap(capsnap); @@ -3395,7 +3445,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ceph_try_drop_cap_snap(ci, capsnap)) { put++; } else { - ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; + set_bit(CEPH_I_FLUSH_SNAPS_BIT, + &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); flush_snaps = true; } } @@ -3646,8 +3699,11 @@ static void handle_cap_grant(struct inode *inode, rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns); if (ci->i_layout.pool_id != old_pool || - extra_info->pool_ns != old_ns) - ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; + extra_info->pool_ns != old_ns) { + clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } extra_info->pool_ns = old_ns; @@ -4613,6 +4669,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) unsigned long delay_max = opt->caps_wanted_delay_max * HZ; unsigned long loop_start = jiffies; unsigned long delay = 0; + bool is_metadata_under_flush; doutc(cl, "begin\n"); spin_lock(&mdsc->cap_delay_lock); @@ -4625,11 +4682,24 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) delay = ci->i_hold_caps_max; break; } - if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 && - time_before(jiffies, ci->i_hold_caps_max)) - break; + list_del_init(&ci->i_cap_delay_list); + spin_unlock(&mdsc->cap_delay_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_metadata_under_flush = + test_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + spin_unlock(&ci->i_ceph_lock); + spin_lock(&mdsc->cap_delay_lock); + + if (!is_metadata_under_flush && + time_before(jiffies, ci->i_hold_caps_max)) { + list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list); + break; + } + inode = igrab(&ci->netfs.inode); if (inode) { spin_unlock(&mdsc->cap_delay_lock); @@ -4811,7 +4881,9 @@ int ceph_drop_caps_for_unlink(struct inode *inode) doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); - ci->i_ceph_flags |= CEPH_I_FLUSH; + set_bit(CEPH_I_FLUSH_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, @@ -5080,7 +5152,9 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali if (atomic_read(&ci->i_filelock_ref) > 0) { /* make further file lock syscall return -EIO */ - ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; + set_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); pr_warn_ratelimited_client(cl, " dropping file locks for %p %llx.%llx\n", inode, ceph_vinop(inode)); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 06cd2963e41e..109d5746a6ac 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1105,8 +1105,11 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, lockdep_is_held(&ci->i_ceph_lock)); rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns); - if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) - ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; + if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) { + clear_bit(CEPH_I_POOL_PERM_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } pool_ns = old_ns; @@ -3149,7 +3152,9 @@ void ceph_inode_shutdown(struct inode *inode) bool invalidate = false; spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= CEPH_I_SHUTDOWN; + set_bit(CEPH_I_SHUTDOWN_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ebf4ac0055dd..2475f2dbec1b 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -58,7 +58,9 @@ static void ceph_fl_release_lock(struct file_lock *fl) if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; + clear_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); spin_unlock(&ci->i_ceph_lock); } fl->fl_u.ceph.inode = NULL; @@ -269,9 +271,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) wait = 1; spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) err = -EIO; - } spin_unlock(&ci->i_ceph_lock); if (err < 0) { if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl)) @@ -329,9 +332,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) doutc(cl, "fl_file: %p\n", fl->c.flc_file); spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_I_ERROR_FILELOCK_BIT, &ci->i_ceph_flags)) err = -EIO; - } spin_unlock(&ci->i_ceph_lock); if (err < 0) { if (lock_is_unlock(fl)) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 230e0c3f341f..19368c4eaf20 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3549,7 +3549,10 @@ static void __do_request(struct ceph_mds_client *mdsc, spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) { + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (test_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags) && + mds != cap->mds) { doutc(cl, "session changed for auth cap %d -> %d\n", cap->session->s_mds, session->s_mds); @@ -4630,8 +4633,11 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v2.pathbase = cpu_to_le64(pathbase); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); rec.v2.flock_len = (__force __le32) - ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); + (test_bit(CEPH_I_ERROR_FILELOCK_BIT, + &ci->i_ceph_flags) ? 0 : 1); } else { struct timespec64 ts; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c65f2b202b2b..6b9da14a942e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -661,6 +661,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, */ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) + __must_hold(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); @@ -700,7 +701,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, return 0; } - ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; + set_bit(CEPH_I_FLUSH_SNAPS_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), diff --git a/fs/ceph/super.h b/fs/ceph/super.h index bb0db0cc8003..d55f91ce1a97 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -628,22 +628,35 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* * Ceph inode. */ -#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ -#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ -#define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */ -#define CEPH_I_POOL_RD (1 << 4) /* can read from pool */ -#define CEPH_I_POOL_WR (1 << 5) /* can write to pool */ -#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ -#define CEPH_I_KICK_FLUSH (1 << 7) /* kick flushing caps */ -#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ -#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ -#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ -#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ -#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ -#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) -#define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */ -#define CEPH_I_ASYNC_CHECK_CAPS (1 << 14) /* check caps immediately after async - creating finishes */ +#define CEPH_I_DIR_ORDERED_BIT (0) /* dentries in dir are ordered */ +#define CEPH_I_FLUSH_BIT (2) /* do not delay flush of dirty metadata */ +#define CEPH_I_POOL_PERM_BIT (3) /* pool rd/wr bits are valid */ +#define CEPH_I_POOL_RD_BIT (4) /* can read from pool */ +#define CEPH_I_POOL_WR_BIT (5) /* can write to pool */ +#define CEPH_I_SEC_INITED_BIT (6) /* security initialized */ +#define CEPH_I_KICK_FLUSH_BIT (7) /* kick flushing caps */ +#define CEPH_I_FLUSH_SNAPS_BIT (8) /* need flush snapss */ +#define CEPH_I_ERROR_WRITE_BIT (9) /* have seen write errors */ +#define CEPH_I_ERROR_FILELOCK_BIT (10) /* have seen file lock errors */ +#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */ +#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ +#define CEPH_I_SHUTDOWN_BIT (13) /* inode is no longer usable */ +#define CEPH_I_ASYNC_CHECK_CAPS_BIT (14) /* check caps after async creating finishes */ + +#define CEPH_I_DIR_ORDERED (1 << CEPH_I_DIR_ORDERED_BIT) +#define CEPH_I_FLUSH (1 << CEPH_I_FLUSH_BIT) +#define CEPH_I_POOL_PERM (1 << CEPH_I_POOL_PERM_BIT) +#define CEPH_I_POOL_RD (1 << CEPH_I_POOL_RD_BIT) +#define CEPH_I_POOL_WR (1 << CEPH_I_POOL_WR_BIT) +#define CEPH_I_SEC_INITED (1 << CEPH_I_SEC_INITED_BIT) +#define CEPH_I_KICK_FLUSH (1 << CEPH_I_KICK_FLUSH_BIT) +#define CEPH_I_FLUSH_SNAPS (1 << CEPH_I_FLUSH_SNAPS_BIT) +#define CEPH_I_ERROR_WRITE (1 << CEPH_I_ERROR_WRITE_BIT) +#define CEPH_I_ERROR_FILELOCK (1 << CEPH_I_ERROR_FILELOCK_BIT) +#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT) +#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) +#define CEPH_I_SHUTDOWN (1 << CEPH_I_SHUTDOWN_BIT) +#define CEPH_I_ASYNC_CHECK_CAPS (1 << CEPH_I_ASYNC_CHECK_CAPS_BIT) /* * Masks of ceph inode work. @@ -663,20 +676,28 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, */ static inline void ceph_set_error_write(struct ceph_inode_info *ci) { - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= CEPH_I_ERROR_WRITE; - spin_unlock(&ci->i_ceph_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) { + set_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } + spin_unlock(&ci->i_ceph_lock); } static inline void ceph_clear_error_write(struct ceph_inode_info *ci) { - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE; - spin_unlock(&ci->i_ceph_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + if (!test_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags)) { + clear_bit(CEPH_I_ERROR_WRITE_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } + spin_unlock(&ci->i_ceph_lock); } static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, @@ -1110,10 +1131,14 @@ void ceph_inode_shutdown(struct inode *inode); static inline bool ceph_inode_is_shutdown(struct inode *inode) { - unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags); + unsigned long flags; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int state = READ_ONCE(fsc->mount_state); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags); + return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN; } diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 537165db4519..487bd18513b1 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1055,8 +1055,11 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, if (current->journal_info && !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && - security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN)) - ci->i_ceph_flags |= CEPH_I_SEC_INITED; + security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN)) { + set_bit(CEPH_I_SEC_INITED_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } out: spin_unlock(&ci->i_ceph_lock); return err; @@ -1366,7 +1369,9 @@ bool ceph_security_xattr_deadlock(struct inode *in) return false; ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); - ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) && + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + ret = !test_bit(CEPH_I_SEC_INITED_BIT, &ci->i_ceph_flags) && !(ci->i_xattrs.version > 0 && __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)); spin_unlock(&ci->i_ceph_lock); -- 2.50.1