On Mon, Jul 15, 2024 at 08:48:52AM -0400, Jeff Layton wrote: > /** > * inode_set_ctime_current - set the ctime to current_time > * @inode: inode > * > - * Set the inode->i_ctime to the current value for the inode. Returns > - * the current value that was assigned to i_ctime. > + * Set the inode's ctime to the current value for the inode. Returns the > + * current value that was assigned. If this is not a multigrain inode, then we > + * just set it to whatever the coarse_ctime is. > + * > + * If it is multigrain, then we first see if the coarse-grained timestamp is > + * distinct from what we have. If so, then we'll just use that. If we have to > + * get a fine-grained timestamp, then do so, and try to swap it into the floor. > + * We accept the new floor value regardless of the outcome of the cmpxchg. > + * After that, we try to swap the new value into i_ctime_nsec. Again, we take > + * the resulting ctime, regardless of the outcome of the swap. > */ > struct timespec64 inode_set_ctime_current(struct inode *inode) > { > - struct timespec64 now = current_time(inode); > + ktime_t now, floor = atomic64_read(&ctime_floor); > + struct timespec64 now_ts; > + u32 cns, cur; > + > + now = coarse_ctime(floor); > + > + /* Just return that if this is not a multigrain fs */ > + if (!is_mgtime(inode)) { > + now_ts = timestamp_truncate(ktime_to_timespec64(now), inode); > + inode_set_ctime_to_ts(inode, now_ts); > + goto out; > + } > + > + /* > + * We only need a fine-grained time if someone has queried it, > + * and the current coarse grained time isn't later than what's > + * already there. > + */ > + cns = smp_load_acquire(&inode->i_ctime_nsec); > + if (cns & I_CTIME_QUERIED) { > + ktime_t ctime = ktime_set(inode->i_ctime_sec, cns & ~I_CTIME_QUERIED); > + > + if (!ktime_after(now, ctime)) { > + ktime_t old, fine; > + > + /* Get a fine-grained time */ > + fine = ktime_get(); > > - inode_set_ctime_to_ts(inode, now); > - return now; > + /* > + * If the cmpxchg works, we take the new floor value. If > + * not, then that means that someone else changed it after we > + * fetched it but before we got here. That value is just > + * as good, so keep it. > + */ > + old = floor; > + if (!atomic64_try_cmpxchg(&ctime_floor, &old, fine)) > + fine = old; > + now = ktime_mono_to_real(fine); > + } > + } > + now_ts = timestamp_truncate(ktime_to_timespec64(now), inode); > + cur = cns; > + > + /* No need to cmpxchg if it's exactly the same */ > + if (cns == now_ts.tv_nsec && inode->i_ctime_sec == now_ts.tv_sec) > + goto out; > +retry: > + /* Try to swap the nsec value into place. */ > + if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now_ts.tv_nsec)) { > + /* If swap occurred, then we're (mostly) done */ > + inode->i_ctime_sec = now_ts.tv_sec; Linux always had rather lax approach to consistency of getattr results and I wonder if with this patchset it is no longer viable. Ignoring the flag, suppose ctime on the inode is { nsec = 12, sec = 1 }, while the new timestamp is { nsec = 1, sec = 2 } The current update method results in a transient state where { nsec = 1, sec = 1 }. But this represents an earlier point in time. Thus a thread which observed the first state and spotted the transient value in the second one is going to conclude time went backwards. Is this considered fine given what the multigrain stuff is trying to accomplish? As for fixing this, off hand I note there is a 4-byte hole in struct inode, just enough to store a sequence counter which fill_mg_cmtime could use to safely read the sec/nsec pair. The write side would take the inode spinlock. > + } else { > + /* > + * Was the change due to someone marking the old ctime QUERIED? > + * If so then retry the swap. This can only happen once since > + * the only way to clear I_CTIME_QUERIED is to stamp the inode > + * with a new ctime. > + */ > + if (!(cns & I_CTIME_QUERIED) && (cns | I_CTIME_QUERIED) == cur) { > + cns = cur; > + goto retry; > + } > + /* Otherwise, keep the existing ctime */ > + now_ts.tv_sec = inode->i_ctime_sec; > + now_ts.tv_nsec = cur & ~I_CTIME_QUERIED; > + } > +out: > + return now_ts; > } > EXPORT_SYMBOL(inode_set_ctime_current);