On Tue, 2019-02-19 at 13:51 +0200, Boaz harrosh wrote: > NetApp Security WARNING: This is an external email. Do not click links or open > attachments unless you recognize the sender and know the content is safe. > > > > > From: Boaz Harrosh <boazh@xxxxxxxxxx> > > zuf-core established the communication channels with the ZUS > User Mode Server. > > In this patch we have the core communication mechanics. > Which is the Novelty of this project. > (See previous submitted documentation for more info) > > Users will come later in the patchset > > Signed-off-by: Boaz Harrosh <boazh@xxxxxxxxxx> > --- > fs/zuf/_extern.h | 22 + > fs/zuf/_pr.h | 4 + > fs/zuf/relay.h | 88 ++++ > fs/zuf/zuf-core.c | 1016 ++++++++++++++++++++++++++++++++++++++++++++- > fs/zuf/zuf-root.c | 7 + > fs/zuf/zuf.h | 46 ++ > fs/zuf/zus_api.h | 185 +++++++++ > 7 files changed, 1367 insertions(+), 1 deletion(-) > create mode 100644 fs/zuf/relay.h > > diff --git a/fs/zuf/_extern.h b/fs/zuf/_extern.h > index 3bb9f1d9acf6..52bb6b9deafe 100644 > --- a/fs/zuf/_extern.h > +++ b/fs/zuf/_extern.h > @@ -28,10 +28,32 @@ struct dentry *zuf_mount(struct file_system_type *fs_type, > int flags, > const char *dev_name, void *data); > > /* zuf-core.c */ > +int zufc_zts_init(struct zuf_root_info *zri); /* Some private types in core > */ > +void zufc_zts_fini(struct zuf_root_info *zri); > + > long zufc_ioctl(struct file *filp, unsigned int cmd, ulong arg); > int zufc_release(struct inode *inode, struct file *file); > int zufc_mmap(struct file *file, struct vm_area_struct *vma); > > +int __zufc_dispatch_mount(struct zuf_root_info *zri, > + enum e_mount_operation op, > + struct zufs_ioc_mount *zim); > +int zufc_dispatch_mount(struct zuf_root_info *zri, struct zus_fs_info > *zus_zfi, > + enum e_mount_operation operation, > + struct zufs_ioc_mount *zim); > + > +const char *zuf_op_name(enum e_zufs_operation op); > +int __zufc_dispatch(struct zuf_root_info *zri, struct zuf_dispatch_op *zdo); > +static inline > +int zufc_dispatch(struct zuf_root_info *zri, struct zufs_ioc_hdr *hdr, > + struct page **pages, uint nump) > +{ > + struct zuf_dispatch_op zdo; > + > + zuf_dispatch_init(&zdo, hdr, pages, nump); > + return __zufc_dispatch(zri, &zdo); > +} > + > /* zuf-root.c */ > int zufr_register_fs(struct super_block *sb, struct zufs_ioc_register_fs > *rfs); > > diff --git a/fs/zuf/_pr.h b/fs/zuf/_pr.h > index 30b8cf912c1f..dc9f85453890 100644 > --- a/fs/zuf/_pr.h > +++ b/fs/zuf/_pr.h > @@ -39,5 +39,9 @@ > > /* ~~~ channel prints ~~~ */ > #define zuf_dbg_err(s, args ...) zuf_chan_debug("error", s, ##args) > +#define zuf_dbg_vfs(s, args ...) zuf_chan_debug("vfs ", s, ##args) > +#define zuf_dbg_core(s, args ...) zuf_chan_debug("core ", s, ##args) > +#define zuf_dbg_zus(s, args ...) zuf_chan_debug("zusdg", s, ##args) > +#define zuf_dbg_verbose(s, args ...) zuf_chan_debug("d-oto", s, ##args) > > #endif /* define __ZUF_PR_H__ */ > diff --git a/fs/zuf/relay.h b/fs/zuf/relay.h > new file mode 100644 > index 000000000000..a17d242b313a > --- /dev/null > +++ b/fs/zuf/relay.h > @@ -0,0 +1,88 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Relay scheduler-object Header file. > + * > + * Copyright (c) 2018 NetApp Inc. All rights reserved. > + * > + * ZUFS-License: GPL-2.0. See module.c for LICENSE details. > + * > + * Authors: > + * Boaz Harrosh <boazh@xxxxxxxxxx> > + */ > + > +#ifndef __RELAY_H__ > +#define __RELAY_H__ > + > +/* ~~~~ Relay ~~~~ */ > +struct relay { > + wait_queue_head_t fss_wq; > + bool fss_wakeup; > + bool fss_waiting; > + > + wait_queue_head_t app_wq; > + bool app_wakeup; > + bool app_waiting; > + > + cpumask_t cpus_allowed; > +}; > + > +static inline void relay_init(struct relay *relay) > +{ > + init_waitqueue_head(&relay->fss_wq); > + init_waitqueue_head(&relay->app_wq); > +} > + > +static inline bool relay_is_app_waiting(struct relay *relay) > +{ > + return relay->app_waiting; > +} > + > +static inline void relay_app_wakeup(struct relay *relay) > +{ > + relay->app_waiting = false; > + > + relay->app_wakeup = true; > + wake_up(&relay->app_wq); > +} > + > +static inline int relay_fss_wait(struct relay *relay) > +{ > + int err; > + > + relay->fss_waiting = true; > + relay->fss_wakeup = false; > + err = wait_event_interruptible(relay->fss_wq, relay->fss_wakeup); > + > + return err; Could you just do: "return wait_event_interruptible()" directly, instead of using the err variable? > +} > + > +static inline bool relay_is_fss_waiting_grab(struct relay *relay) > +{ > + if (relay->fss_waiting) { > + relay->fss_waiting = false; > + return true; > + } > + return false; > +} > + > +static inline void relay_fss_wakeup(struct relay *relay) > +{ > + relay->fss_wakeup = true; > + wake_up(&relay->fss_wq); > +} > + > +static inline void relay_fss_wakeup_app_wait(struct relay *relay, > + spinlock_t *spinlock) > +{ > + relay->app_waiting = true; > + > + relay_fss_wakeup(relay); > + > + relay->app_wakeup = false; > + if (spinlock) > + spin_unlock(spinlock); > + > + wait_event(relay->app_wq, relay->app_wakeup); > +} > + > +#endif /* ifndef __RELAY_H__ */ > diff --git a/fs/zuf/zuf-core.c b/fs/zuf/zuf-core.c > index e12cae584f8a..95582c0a4ba5 100644 > --- a/fs/zuf/zuf-core.c > +++ b/fs/zuf/zuf-core.c > @@ -18,14 +18,820 @@ > #include <linux/delay.h> > #include <linux/pfn_t.h> > #include <linux/sched/signal.h> > +#include <linux/uaccess.h> > > #include "zuf.h" > > +struct zufc_thread { > + struct zuf_special_file hdr; > + struct relay relay; > + struct vm_area_struct *vma; > + int no; > + int chan; > + > + /* Kernel side allocated IOCTL buffer */ > + struct vm_area_struct *opt_buff_vma; > + void *opt_buff; > + ulong max_zt_command; > + > + /* Next operation*/ > + struct zuf_dispatch_op *zdo; > +}; > + > +enum { INITIAL_ZT_CHANNELS = 3 }; > + > +struct zuf_threads_pool { > + uint _max_zts; > + uint _max_channels; > + /* array of pcp_arrays */ > + struct zufc_thread *_all_zt[ZUFS_MAX_ZT_CHANNELS]; > +}; > + > +static int _alloc_zts_channel(struct zuf_root_info *zri, int channel) > +{ > + zri->_ztp->_all_zt[channel] = alloc_percpu(struct zufc_thread); > + if (unlikely(!zri->_ztp->_all_zt[channel])) { > + zuf_err("!!! alloc_percpu channel=%d failed\n", channel); > + return -ENOMEM; > + } > + return 0; > +} > + > +static inline ulong _zt_pr_no(struct zufc_thread *zt) > +{ > + /* So in hex it will be channel as first nibble and cpu as 3rd and on > */ > + return ((ulong)zt->no << 8) | zt->chan; > +} > + > +int zufc_zts_init(struct zuf_root_info *zri) > +{ > + int c; > + > + zri->_ztp = kcalloc(1, sizeof(struct zuf_threads_pool), GFP_KERNEL); > + if (unlikely(!zri->_ztp)) > + return -ENOMEM; > + > + zri->_ztp->_max_zts = num_online_cpus(); > + zri->_ztp->_max_channels = INITIAL_ZT_CHANNELS; > + > + for (c = 0; c < INITIAL_ZT_CHANNELS; ++c) { > + int err = _alloc_zts_channel(zri, c); > + > + if (unlikely(err)) > + return err; > + } > + > + return 0; > +} > + > +void zufc_zts_fini(struct zuf_root_info *zri) > +{ > + int c; > + > + /* Always safe/must call zufc_zts_fini */ > + if (!zri->_ztp) > + return; > + > + for (c = 0; c < zri->_ztp->_max_channels; ++c) { > + if (zri->_ztp->_all_zt[c]) > + free_percpu(zri->_ztp->_all_zt[c]); > + } > + kfree(zri->_ztp); > + zri->_ztp = NULL; > +} > + > +static struct zufc_thread *_zt_from_cpu(struct zuf_root_info *zri, > + int cpu, uint chan) > +{ > + return per_cpu_ptr(zri->_ztp->_all_zt[chan], cpu); > +} > + > +static int _zt_from_f(struct file *filp, int cpu, uint chan, > + struct zufc_thread **ztp) > +{ > + *ztp = _zt_from_cpu(ZRI(filp->f_inode->i_sb), cpu, chan); > + if (unlikely(!*ztp)) > + return -ERANGE; > + return 0; I'm curious if there is a reason you did it this way instead of making use of the ERR_PTR() macro to return ztp directly? > +} > + > +static int _zu_register_fs(struct file *file, void *parg) > +{ > + struct zufs_ioc_register_fs rfs; > + int err; > + > + err = copy_from_user(&rfs, parg, sizeof(rfs)); > + if (unlikely(err)) { > + zuf_err("=>%d\n", err); > + return err; > + } > + > + err = zufr_register_fs(file->f_inode->i_sb, &rfs); > + if (err) > + zuf_err("=>%d\n", err); > + err = put_user(err, (int *)parg); > + return err; > +} > + > +/* ~~~~ mounting ~~~~*/ > +int __zufc_dispatch_mount(struct zuf_root_info *zri, > + enum e_mount_operation operation, > + struct zufs_ioc_mount *zim) > +{ > + zim->hdr.operation = operation; > + > + for (;;) { > + bool fss_waiting; > + > + spin_lock(&zri->mount.lock); > + > + if (unlikely(!zri->mount.zsf.file)) { > + spin_unlock(&zri->mount.lock); > + zuf_err("Server not up\n"); > + zim->hdr.err = -EIO; > + return zim->hdr.err; > + } > + > + fss_waiting = relay_is_fss_waiting_grab(&zri->mount.relay); > + if (fss_waiting) > + break; > + /* in case of break above spin_unlock is done inside > + * relay_fss_wakeup_app_wait > + */ > + > + spin_unlock(&zri->mount.lock); > + > + /* It is OK to wait if user storms mounts */ > + zuf_dbg_verbose("waiting\n"); > + msleep(100); > + } > + > + zri->mount.zim = zim; > + relay_fss_wakeup_app_wait(&zri->mount.relay, &zri->mount.lock); > + > + return zim->hdr.err; > +} > + > +int zufc_dispatch_mount(struct zuf_root_info *zri, struct zus_fs_info > *zus_zfi, > + enum e_mount_operation operation, > + struct zufs_ioc_mount *zim) > +{ > + zim->hdr.out_len = sizeof(*zim); > + zim->hdr.in_len = sizeof(*zim); > + if (operation == ZUFS_M_MOUNT || operation == ZUFS_M_REMOUNT) > + zim->hdr.in_len += zim->zmi.po.mount_options_len; > + zim->zmi.zus_zfi = zus_zfi; > + zim->zmi.num_cpu = zri->_ztp->_max_zts; > + zim->zmi.num_channels = zri->_ztp->_max_channels; > + > + return __zufc_dispatch_mount(zri, operation, zim); > +} > + > +static int _zu_mount(struct file *file, void *parg) > +{ > + struct super_block *sb = file->f_inode->i_sb; > + struct zuf_root_info *zri = ZRI(sb); > + bool waiting_for_reply; > + struct zufs_ioc_mount *zim; > + ulong cp_ret; > + int err; > + > + spin_lock(&zri->mount.lock); > + > + if (unlikely(!file->private_data)) { > + /* First time register this file as the mount-thread owner */ > + zri->mount.zsf.type = zlfs_e_mout_thread; > + zri->mount.zsf.file = file; > + file->private_data = &zri->mount.zsf; > + } else if (unlikely(file->private_data != &zri->mount)) { > + spin_unlock(&zri->mount.lock); > + zuf_err("Say what?? %p != %p\n", > + file->private_data, &zri->mount); > + return -EIO; > + } > + > + zim = zri->mount.zim; > + zri->mount.zim = NULL; > + waiting_for_reply = zim && relay_is_app_waiting(&zri->mount.relay); > + > + spin_unlock(&zri->mount.lock); > + > + if (waiting_for_reply) { > + cp_ret = copy_from_user(zim, parg, zim->hdr.out_len); > + if (unlikely(cp_ret)) { > + zuf_err("copy_from_user => %ld\n", cp_ret); > + zim->hdr.err = -EFAULT; > + } > + > + relay_app_wakeup(&zri->mount.relay); > + } > + > + /* This gets to sleep until a mount comes */ > + err = relay_fss_wait(&zri->mount.relay); > + if (unlikely(err || !zri->mount.zim)) { > + struct zufs_ioc_hdr *hdr = parg; > + > + /* Released by _zu_break INTER or crash */ > + zuf_dbg_zus("_zu_break? %p => %d\n", zri->mount.zim, err); > + put_user(ZUFS_OP_BREAK, &hdr->operation); > + put_user(EIO, &hdr->err); > + return err; > + } > + > + zim = zri->mount.zim; > + cp_ret = copy_to_user(parg, zim, zim->hdr.in_len); > + if (unlikely(cp_ret)) { > + err = -EFAULT; > + zuf_err("copy_to_user =>%ld\n", cp_ret); > + } > + return err; > +} > + > +static void zufc_mounter_release(struct file *file) > +{ > + struct zuf_root_info *zri = ZRI(file->f_inode->i_sb); > + > + zuf_dbg_zus("closed fu=%d au=%d fw=%d aw=%d\n", > + zri->mount.relay.fss_wakeup, zri->mount.relay.app_wakeup, > + zri->mount.relay.fss_waiting, zri->mount.relay.app_waiting); > + > + spin_lock(&zri->mount.lock); > + zri->mount.zsf.file = NULL; > + if (relay_is_app_waiting(&zri->mount.relay)) { > + zuf_err("server emergency exit while IO\n"); > + > + if (zri->mount.zim) > + zri->mount.zim->hdr.err = -EIO; > + spin_unlock(&zri->mount.lock); > + > + relay_app_wakeup(&zri->mount.relay); > + msleep(1000); /* crap */ > + } else { > + if (zri->mount.zim) > + zri->mount.zim->hdr.err = 0; > + spin_unlock(&zri->mount.lock); > + } > +} > + > +/* ~~~~ ZU_IOC_NUMA_MAP ~~~~ */ > +static int _zu_numa_map(struct file *file, void *parg) > +{ > + struct zufs_ioc_numa_map *numa_map; > + int n_nodes = num_online_nodes(); > + int n_cpus = num_online_cpus(); > + uint *nodes_cpu_count; > + uint max_cpu_per_node = 0; > + uint alloc_size; > + int cpu, i, err; > + > + alloc_size = sizeof(*numa_map) + n_cpus; /* char per cpu */ > + > + if ((n_nodes > 255) || (alloc_size > PAGE_SIZE)) { > + zuf_warn("!!!unexpected big machine with %d nodes > alloc_size=0x%x\n", > + n_nodes, alloc_size); > + return -ENOTSUPP; > + } > + > + nodes_cpu_count = kcalloc(n_nodes, sizeof(uint), GFP_KERNEL); > + if (unlikely(!nodes_cpu_count)) > + return -ENOMEM; > + > + numa_map = kzalloc(alloc_size, GFP_KERNEL); > + if (unlikely(!numa_map)) { > + err = -ENOMEM; > + goto out; > + } > + > + numa_map->possible_nodes = num_possible_nodes(); > + numa_map->possible_cpus = num_possible_cpus(); > + > + numa_map->online_nodes = n_nodes; > + numa_map->online_cpus = n_cpus; > + > + for_each_cpu(cpu, cpu_online_mask) { > + uint ctn = cpu_to_node(cpu); > + uint ncc = ++nodes_cpu_count[ctn]; > + > + numa_map->cpu_to_node[cpu] = ctn; > + max_cpu_per_node = max(max_cpu_per_node, ncc); > + } > + > + for (i = 1; i < n_nodes; ++i) { > + if (nodes_cpu_count[i] != nodes_cpu_count[0]) { > + zuf_info("@[%d]=%d Unbalanced CPU sockets @[0]=%d\n", > + i, nodes_cpu_count[i], nodes_cpu_count[0]); > + numa_map->nodes_not_symmetrical = true; > + break; > + } > + } > + > + numa_map->max_cpu_per_node = max_cpu_per_node; > + > + zuf_dbg_verbose( > + "possible_nodes=%d possible_cpus=%d online_nodes=%d > online_cpus=%d\n", > + numa_map->possible_nodes, numa_map->possible_cpus, > + n_nodes, n_cpus); > + > + err = copy_to_user(parg, numa_map, alloc_size); > + kfree(numa_map); > +out: > + kfree(nodes_cpu_count); > + return err; > +} > + > +static int _map_pages(struct zufc_thread *zt, struct page **pages, uint nump, > + bool map_readonly) > +{ > + int p, err; > + > + if (!(zt->vma && pages && nump)) > + return 0; > + > + for (p = 0; p < nump; ++p) { > + ulong zt_addr = zt->vma->vm_start + p * PAGE_SIZE; > + ulong pfn = page_to_pfn(pages[p]); > + pfn_t pfnt = phys_to_pfn_t(PFN_PHYS(pfn), PFN_MAP | PFN_DEV); > + vm_fault_t flt; > + > + if (map_readonly) > + flt = vmf_insert_mixed(zt->vma, zt_addr, pfnt); > + else > + flt = vmf_insert_mixed_mkwrite(zt->vma, zt_addr, > pfnt); > + err = zuf_flt_to_err(flt); > + if (unlikely(err)) { > + zuf_err("zuf: remap_pfn_range => %d p=0x%x > start=0x%lx\n", > + err, p, zt->vma->vm_start); > + return err; > + } > + } > + return 0; > +} > + > +static void _unmap_pages(struct zufc_thread *zt, struct page **pages, uint > nump) > +{ > + if (!(zt->vma && zt->zdo && pages && nump)) > + return; > + > + zt->zdo->pages = NULL; > + zt->zdo->nump = 0; > + > + zap_vma_ptes(zt->vma, zt->vma->vm_start, nump * PAGE_SIZE); > +} > + > +static void _fill_buff(ulong *buff, uint size) > +{ > + ulong *buff_end = buff + size; > + ulong val = 0; > + > + for (; buff < buff_end; ++buff, ++val) > + *buff = val; > +} > + > +static int _zu_init(struct file *file, void *parg) > +{ > + struct zufc_thread *zt; > + int cpu = smp_processor_id(); > + struct zufs_ioc_init zi_init; > + int err; > + > + err = copy_from_user(&zi_init, parg, sizeof(zi_init)); > + if (unlikely(err)) { > + zuf_err("=>%d\n", err); > + return err; > + } > + if (unlikely(zi_init.channel_no >= ZUFS_MAX_ZT_CHANNELS)) { > + zuf_err("[%d] channel_no=%d\n", cpu, zi_init.channel_no); > + return -EINVAL; > + } > + > + zuf_dbg_zus("[%d] aff=0x%lx channel=%d\n", > + cpu, zi_init.affinity, zi_init.channel_no); > + > + zi_init.hdr.err = _zt_from_f(file, cpu, zi_init.channel_no, &zt); > + if (unlikely(zi_init.hdr.err)) { > + zuf_err("=>%d\n", err); > + goto out; > + } > + > + if (unlikely(zt->hdr.file)) { > + zi_init.hdr.err = -EINVAL; > + zuf_err("[%d] !!! thread already set\n", cpu); > + goto out; > + } > + > + relay_init(&zt->relay); > + zt->hdr.type = zlfs_e_zt; > + zt->hdr.file = file; > + zt->no = cpu; > + zt->chan = zi_init.channel_no; > + > + zt->max_zt_command = zi_init.max_command; > + zt->opt_buff = vmalloc(zi_init.max_command); > + if (unlikely(!zt->opt_buff)) { > + zi_init.hdr.err = -ENOMEM; > + goto out; > + } > + _fill_buff(zt->opt_buff, zi_init.max_command / sizeof(ulong)); > + > + file->private_data = &zt->hdr; > +out: > + err = copy_to_user(parg, &zi_init, sizeof(zi_init)); > + if (err) > + zuf_err("=>%d\n", err); > + return err; > +} > + > +struct zufc_thread *_zt_from_f_private(struct file *file) > +{ > + struct zuf_special_file *zsf = file->private_data; > + > + WARN_ON(zsf->type != zlfs_e_zt); > + return container_of(zsf, struct zufc_thread, hdr); > +} > + > +/* Caller checks that file->private_data != NULL */ > +static void zufc_zt_release(struct file *file) > +{ > + struct zufc_thread *zt = _zt_from_f_private(file); > + > + if (unlikely(zt->hdr.file != file)) > + zuf_err("What happened zt->file(%p) != file(%p)\n", > + zt->hdr.file, file); > + > + zuf_dbg_zus("[%d] closed fu=%d au=%d fw=%d aw=%d\n", > + zt->no, zt->relay.fss_wakeup, zt->relay.app_wakeup, > + zt->relay.fss_waiting, zt->relay.app_waiting); > + > + if (relay_is_app_waiting(&zt->relay)) { > + zuf_err("server emergency exit while IO\n"); > + > + /* NOTE: Do not call _unmap_pages the vma is gone */ > + zt->hdr.file = NULL; > + > + relay_app_wakeup(&zt->relay); > + msleep(1000); /* crap */ > + } > + > + vfree(zt->opt_buff); > + memset(zt, 0, sizeof(*zt)); > +} > + > +static int _copy_outputs(struct zufc_thread *zt, void *arg) > +{ > + struct zufs_ioc_hdr *hdr = zt->zdo->hdr; > + struct zufs_ioc_hdr *user_hdr = zt->opt_buff; > + > + if (zt->opt_buff_vma->vm_start != (ulong)arg) { > + zuf_err("malicious Server\n"); > + return -EINVAL; > + } > + > + /* Update on the user out_len and return-code */ > + hdr->err = user_hdr->err; > + hdr->out_len = user_hdr->out_len; > + > + if (!hdr->out_len) > + return 0; > + > + if ((hdr->err == -EZUFS_RETRY) || (hdr->out_max < hdr->out_len)) { > + if (WARN_ON(!zt->zdo->oh)) { > + zuf_err("Trouble op(%s) out_max=%d out_len=%d\n", > + zuf_op_name(hdr->operation), > + hdr->out_max, hdr->out_len); > + return -EFAULT; > + } > + zuf_dbg_zus("[%s] %d %d => %d\n", > + zuf_op_name(hdr->operation), > + hdr->out_max, hdr->out_len, hdr->err); > + return zt->zdo->oh(zt->zdo, zt->opt_buff, zt->max_zt_command); > + } else { > + void *rply = (void *)hdr + hdr->out_start; > + void *from = zt->opt_buff + hdr->out_start; > + > + memcpy(rply, from, hdr->out_len); > + return 0; > + } > +} > + > +static int _zu_wait(struct file *file, void *parg) > +{ > + struct zufc_thread *zt; > + int err; > + > + zt = _zt_from_f_private(file); > + if (unlikely(!zt)) { > + zuf_err("Unexpected ZT state\n"); > + err = -ERANGE; > + goto err; > + } > + > + if (!zt->hdr.file || file != zt->hdr.file) { > + zuf_err("fatal\n"); > + err = -E2BIG; > + goto err; > + } > + if (unlikely((ulong)parg != zt->opt_buff_vma->vm_start)) { > + zuf_err("fatal 2\n"); > + err = -EINVAL; > + goto err; > + } > + > + if (relay_is_app_waiting(&zt->relay)) { > + if (unlikely(!zt->zdo)) { > + zuf_err("User has gone...\n"); > + err = -E2BIG; > + goto err; > + } else { > + /* overflow_handler might decide to execute the > + *parg here at zus context and return to server > + * If it also has an error to report to zus it > + * will set zdo->hdr->err. > + * EZUS_RETRY_DONE is when that happens. > + * In this case pages stay mapped in zt->vma > + */ > + err = _copy_outputs(zt, parg); > + if (err == EZUF_RETRY_DONE) { > + put_user(zt->zdo->hdr->err, (int *)parg); > + return 0; > + } > + > + _unmap_pages(zt, zt->zdo->pages, zt->zdo->nump); > + zt->zdo = NULL; > + if (unlikely(err)) /* _copy_outputs returned an err */ > + goto err; > + } > + relay_app_wakeup(&zt->relay); > + } > + > + err = relay_fss_wait(&zt->relay); > + if (err) > + zuf_dbg_err("[%d] relay error: %d\n", zt->no, err); > + > + if (zt->zdo && zt->zdo->hdr && > + zt->zdo->hdr->operation < ZUFS_OP_BREAK) { > + /* call map here at the zuf thread so we need no locks > + * TODO: Currently only ZUFS_OP_WRITE protects user-buffers > + * we should have a bit set in zt->zdo->hdr set per operation. > + * TODO: Why this does not work? > + */ > + _map_pages(zt, zt->zdo->pages, zt->zdo->nump, 0); > + memcpy(zt->opt_buff, zt->zdo->hdr, zt->zdo->hdr->in_len); > + } else { > + struct zufs_ioc_hdr *hdr = zt->opt_buff; > + > + /* This Means we were released by _zu_break */ > + zuf_dbg_zus("_zu_break? => %d\n", err); > + hdr->operation = ZUFS_OP_BREAK; > + hdr->err = err; > + } > + > + return err; > + > +err: > + put_user(err, (int *)parg); > + return err; > +} > + > +static int _try_grab_zt_channel(struct zuf_root_info *zri, int cpu, > + struct zufc_thread **ztp) > +{ > + struct zufc_thread *zt; > + int c; > + > + for (c = 0; ; ++c) { > + zt = _zt_from_cpu(zri, cpu, c); > + if (unlikely(!zt || !zt->hdr.file)) > + break; > + > + if (relay_is_fss_waiting_grab(&zt->relay)) { > + *ztp = zt; > + return true; > + } > + } > + > + *ztp = _zt_from_cpu(zri, cpu, 0); > + return false; > +} > + > +#define _zuf_get_cpu() get_cpu() > +#define _zuf_put_cpu() put_cpu() > + > +#ifdef CONFIG_ZUF_DEBUG > +static > +int _r_zufs_dispatch(struct zuf_root_info *zri, struct zuf_dispatch_op *zdo) > +#else > +int __zufc_dispatch(struct zuf_root_info *zri, struct zuf_dispatch_op *zdo) > +#endif > +{ > + struct task_struct *app = get_current(); > + struct zufs_ioc_hdr *hdr = zdo->hdr; > + int cpu, cpu2; > + struct zufc_thread *zt; > + > + if (unlikely(hdr->out_len && !hdr->out_max)) { > + /* TODO: Complain here and let caller code do this proper */ > + hdr->out_max = hdr->out_len; > + } > + > +channel_busy: > + cpu = _zuf_get_cpu(); > + > + if (!_try_grab_zt_channel(zri, cpu, &zt)) { > + _zuf_put_cpu(); > + > + /* If channel was grabbed then maybe a break_all is in > progress > + * on a different CPU make sure zt->file on this core is > + * updated > + */ > + mb(); > + if (unlikely(!zt->hdr.file)) { > + zuf_err("[%d] !zt->file\n", cpu); > + return -EIO; > + } > + zuf_dbg_err("[%d] can this be\n", cpu); > + /* FIXME: Do something much smarter */ > + msleep(10); > + if (signal_pending(get_current())) { > + zuf_dbg_err("[%d] => EINTR\n", cpu); > + return -EINTR; > + } > + goto channel_busy; > + } > + > + /* lock app to this cpu while waiting */ > + cpumask_copy(&zt->relay.cpus_allowed, &app->cpus_allowed); > + cpumask_copy(&app->cpus_allowed, cpumask_of(smp_processor_id())); > + > + zt->zdo = zdo; > + > + _zuf_put_cpu(); > + > + relay_fss_wakeup_app_wait(&zt->relay, NULL); > + > + /* restore cpu affinity after wakeup */ > + cpumask_copy(&app->cpus_allowed, &zt->relay.cpus_allowed); > + > +cpu2 = smp_processor_id(); > +if (cpu2 != cpu) > + zuf_warn("App switched cpu1=%u cpu2=%u\n", cpu, cpu2); > + > + return zt->hdr.file ? hdr->err : -EIO; > +} > + > +const char *zuf_op_name(enum e_zufs_operation op) > +{ > +#define CASE_ENUM_NAME(e) case e: return #e > + switch (op) { > + CASE_ENUM_NAME(ZUFS_OP_BREAK ); > + default: > + return "UNKNOWN"; > + } > +} > + > +#ifdef CONFIG_ZUF_DEBUG > + > +#define MAX_ZT_SEC 5 > +int __zufc_dispatch(struct zuf_root_info *zri, struct zuf_dispatch_op *zdo) > +{ > + u64 t1, t2; > + int err; > + > + t1 = ktime_get_ns(); > + err = _r_zufs_dispatch(zri, zdo); > + t2 = ktime_get_ns(); > + > + if ((t2 - t1) > MAX_ZT_SEC * NSEC_PER_SEC) > + zuf_err("zufc_dispatch(%s, [0x%x-0x%x]) took %lld sec\n", > + zuf_op_name(zdo->hdr->operation), zdo->hdr->offset, > + zdo->hdr->len, > + (t2 - t1) / NSEC_PER_SEC); > + > + return err; > +} > +#endif /* def CONFIG_ZUF_DEBUG */ > + > +/* ~~~ iomap_exec && exec_buffer allocation ~~~ */ > +struct zu_exec_buff { > + struct zuf_special_file hdr; > + struct vm_area_struct *vma; > + void *opt_buff; > + ulong alloc_size; > +}; > + > +/* Do some common checks and conversions */ > +static inline struct zu_exec_buff *_ebuff_from_file(struct file *file) > +{ > + struct zu_exec_buff *ebuff = file->private_data; > + > + if (WARN_ON_ONCE(ebuff->hdr.type != zlfs_e_dpp_buff)) { > + zuf_err("Must call ZU_IOC_ALLOC_BUFFER first\n"); > + return NULL; > + } > + > + if (WARN_ON_ONCE(ebuff->hdr.file != file)) > + return NULL; > + > + return ebuff; > +} > + > +static int _zu_ebuff_alloc(struct file *file, void *arg) > +{ > + struct zufs_ioc_alloc_buffer ioc_alloc; > + struct zu_exec_buff *ebuff; > + int err; > + > + err = copy_from_user(&ioc_alloc, arg, sizeof(ioc_alloc)); > + if (unlikely(err)) { > + zuf_err("=>%d\n", err); > + return err; > + } > + > + if (ioc_alloc.init_size > ioc_alloc.max_size) > + return -EINVAL; > + > + /* TODO: Easily Support growing */ > + /* TODO: Support global pools, also easy */ > + if (ioc_alloc.pool_no || ioc_alloc.init_size != ioc_alloc.max_size) > + return -ENOTSUPP; > + > + ebuff = kzalloc(sizeof(*ebuff), GFP_KERNEL); > + if (unlikely(!ebuff)) > + return -ENOMEM; > + > + ebuff->hdr.type = zlfs_e_dpp_buff; > + ebuff->hdr.file = file; > + i_size_write(file->f_inode, ioc_alloc.max_size); > + ebuff->alloc_size = ioc_alloc.init_size; > + ebuff->opt_buff = vmalloc(ioc_alloc.init_size); > + if (unlikely(!ebuff->opt_buff)) { > + kfree(ebuff); > + return -ENOMEM; > + } > + _fill_buff(ebuff->opt_buff, ioc_alloc.init_size / sizeof(ulong)); > + > + file->private_data = &ebuff->hdr; > + return 0; > +} > + > +static void zufc_ebuff_release(struct file *file) > +{ > + struct zu_exec_buff *ebuff = _ebuff_from_file(file); > + > + if (unlikely(!ebuff)) > + return; > + > + vfree(ebuff->opt_buff); > + ebuff->hdr.type = 0; > + ebuff->hdr.file = NULL; /* for none-dbg Kernels && use-after-free */ > + kfree(ebuff); > +} > + > +static int _zu_break(struct file *filp, void *parg) > +{ > + struct zuf_root_info *zri = ZRI(filp->f_inode->i_sb); > + int i, c; > + > + zuf_dbg_core("enter\n"); > + mb(); /* TODO how to schedule on all CPU's */ > + > + for (i = 0; i < zri->_ztp->_max_zts; ++i) { > + for (c = 0; c < zri->_ztp->_max_channels; ++c) { > + struct zufc_thread *zt = _zt_from_cpu(zri, i, c); > + > + if (unlikely(!(zt && zt->hdr.file))) > + continue; > + relay_fss_wakeup(&zt->relay); > + } > + } > + > + if (zri->mount.zsf.file) > + relay_fss_wakeup(&zri->mount.relay); > + > + zuf_dbg_core("exit\n"); > + return 0; > +} > + > long zufc_ioctl(struct file *file, unsigned int cmd, ulong arg) > { > + void __user *parg = (void __user *)arg; > + > switch (cmd) { > + case ZU_IOC_REGISTER_FS: > + return _zu_register_fs(file, parg); > + case ZU_IOC_MOUNT: > + return _zu_mount(file, parg); > + case ZU_IOC_NUMA_MAP: > + return _zu_numa_map(file, parg); > + case ZU_IOC_INIT_THREAD: > + return _zu_init(file, parg); > + case ZU_IOC_WAIT_OPT: > + return _zu_wait(file, parg); > + case ZU_IOC_ALLOC_BUFFER: > + return _zu_ebuff_alloc(file, parg); > + case ZU_IOC_BREAK_ALL: > + return _zu_break(file, parg); > default: > - zuf_err("%d\n", cmd); > + zuf_err("%d %ld\n", cmd, ZU_IOC_WAIT_OPT); > return -ENOTTY; > } > } > @@ -38,11 +844,215 @@ int zufc_release(struct inode *inode, struct file *file) > return 0; > > switch (zsf->type) { > + case zlfs_e_zt: > + zufc_zt_release(file); > + return 0; > + case zlfs_e_mout_thread: > + zufc_mounter_release(file); > + return 0; > + case zlfs_e_pmem: > + /* NOTHING to clean for pmem file yet */ > + /* zuf_pmem_release(file);*/ > + return 0; > + case zlfs_e_dpp_buff: > + zufc_ebuff_release(file); > + return 0; > default: > return 0; > } > } > > +/* ~~~~ mmap area of app buffers into server ~~~~ */ > + > +static int zuf_zt_fault(struct vm_fault *vmf) > +{ > + zuf_err("should not fault\n"); > + return VM_FAULT_SIGBUS; > +} > + > +static const struct vm_operations_struct zuf_vm_ops = { > + .fault = zuf_zt_fault, > +}; > + > +static int _zufc_zt_mmap(struct file *file, struct vm_area_struct *vma, > + struct zufc_thread *zt) > +{ > + /* Tell Kernel We will only access on a single core */ > + vma->vm_flags |= VM_MIXEDMAP; > + vma->vm_ops = &zuf_vm_ops; > + > + zt->vma = vma; > + > + zuf_dbg_core( > + "[0x%lx] start=0x%lx end=0x%lx flags=0x%lx file- > start=0x%lx\n", > + _zt_pr_no(zt), vma->vm_start, vma->vm_end, vma->vm_flags, > + vma->vm_pgoff); > + > + return 0; > +} > + > +/* ~~~~ mmap the Kernel allocated IOCTL buffer per ZT ~~~~ */ > +static int _opt_buff_mmap(struct vm_area_struct *vma, void *opt_buff, > + ulong opt_size) > +{ > + ulong offset; > + > + if (!opt_buff) > + return -ENOMEM; > + > + for (offset = 0; offset < opt_size; offset += PAGE_SIZE) { > + ulong addr = vma->vm_start + offset; > + ulong pfn = vmalloc_to_pfn(opt_buff + offset); > + pfn_t pfnt = phys_to_pfn_t(PFN_PHYS(pfn), PFN_MAP | PFN_DEV); > + int err; > + > + zuf_dbg_verbose("[0x%lx] pfn-0x%lx addr=0x%lx buff=0x%lx\n", > + offset, pfn, addr, (ulong)opt_buff + offset); > + > + err = zuf_flt_to_err(vmf_insert_mixed_mkwrite(vma, addr, > pfnt)); > + if (unlikely(err)) { > + zuf_err("zuf: zuf_insert_mixed_mkwrite => %d > offset=0x%lx addr=0x%lx\n", > + err, offset, addr); > + return err; > + } > + } > + > + return 0; > +} > + > +static int zuf_obuff_fault(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct zufc_thread *zt = _zt_from_f_private(vma->vm_file); > + long offset = (vmf->pgoff << PAGE_SHIFT) - ZUS_API_MAP_MAX_SIZE; > + int err; > + > + zuf_dbg_core( > + "[0x%lx] start=0x%lx end=0x%lx file-start=0x%lx > offset=0x%lx\n", > + _zt_pr_no(zt), vma->vm_start, vma->vm_end, vma->vm_pgoff, > + offset); > + > + /* if Server overruns its buffer crash it dead */ > + if (unlikely((offset < 0) || (zt->max_zt_command < offset))) { > + zuf_err("[0x%lx] start=0x%lx end=0x%lx file-start=0x%lx > offset=0x%lx\n", > + _zt_pr_no(zt), vma->vm_start, > + vma->vm_end, vma->vm_pgoff, offset); > + return VM_FAULT_SIGBUS; > + } > + > + /* We never released a zus-core.c that does not fault the > + * first page first. I want to see if this happens > + */ > + if (unlikely(offset)) > + zuf_warn("Suspicious server activity\n"); > + > + /* This faults only once at very first access */ > + err = _opt_buff_mmap(vma, zt->opt_buff, zt->max_zt_command); > + if (unlikely(err)) > + return VM_FAULT_SIGBUS; > + > + return VM_FAULT_NOPAGE; > +} > + > +static const struct vm_operations_struct zuf_obuff_ops = { > + .fault = zuf_obuff_fault, > +}; > + > +static int _zufc_obuff_mmap(struct file *file, struct vm_area_struct *vma, > + struct zufc_thread *zt) > +{ > + vma->vm_flags |= VM_MIXEDMAP; > + vma->vm_ops = &zuf_obuff_ops; > + > + zt->opt_buff_vma = vma; > + > + zuf_dbg_core( > + "[0x%lx] start=0x%lx end=0x%lx flags=0x%lx file- > start=0x%lx\n", > + _zt_pr_no(zt), vma->vm_start, vma->vm_end, vma->vm_flags, > + vma->vm_pgoff); > + > + return 0; > +} > + > +/* ~~~ */ > + > +static int zufc_zt_mmap(struct file *file, struct vm_area_struct *vma) > +{ > + struct zufc_thread *zt = _zt_from_f_private(file); > + > + /* We have two areas of mmap in this special file. > + * 0 to ZUS_API_MAP_MAX_SIZE: > + * The first part where app pages are mapped > + * into server per operation. > + * ZUS_API_MAP_MAX_SIZE of size zuf_root_info->max_zt_command > + * Is where we map the per ZT ioctl-buffer, later passed > + * to the zus_ioc_wait IOCTL call > + */ > + if (vma->vm_pgoff == ZUS_API_MAP_MAX_SIZE / PAGE_SIZE) > + return _zufc_obuff_mmap(file, vma, zt); > + > + /* zuf ZT API is very particular about where in its > + * special file we communicate > + */ > + if (unlikely(vma->vm_pgoff)) > + return -EINVAL; > + > + return _zufc_zt_mmap(file, vma, zt); > +} > + > +/* ~~~~ Implementation of the ZU_IOC_ALLOC_BUFFER mmap facility ~~~~ */ > + > +static int zuf_ebuff_fault(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct zu_exec_buff *ebuff = _ebuff_from_file(vma->vm_file); > + long offset = (vmf->pgoff << PAGE_SHIFT); > + int err; > + > + zuf_dbg_core("start=0x%lx end=0x%lx file-start=0x%lx file- > off=0x%lx\n", > + vma->vm_start, vma->vm_end, vma->vm_pgoff, offset); > + > + /* if Server overruns its buffer crash it dead */ > + if (unlikely((offset < 0) || (ebuff->alloc_size < offset))) { > + zuf_err("start=0x%lx end=0x%lx file-start=0x%lx file- > off=0x%lx\n", > + vma->vm_start, vma->vm_end, vma->vm_pgoff, > + offset); > + return VM_FAULT_SIGBUS; > + } > + > + /* We never released a zus-core.c that does not fault the > + * first page first. I want to see if this happens > + */ > + if (unlikely(offset)) > + zuf_warn("Suspicious server activity\n"); > + > + /* This faults only once at very first access */ > + err = _opt_buff_mmap(vma, ebuff->opt_buff, ebuff->alloc_size); > + if (unlikely(err)) > + return VM_FAULT_SIGBUS; > + > + return VM_FAULT_NOPAGE; > +} > + > +static const struct vm_operations_struct zuf_ebuff_ops = { > + .fault = zuf_ebuff_fault, > +}; > + > +static int zufc_ebuff_mmap(struct file *file, struct vm_area_struct *vma) > +{ > + struct zu_exec_buff *ebuff = _ebuff_from_file(vma->vm_file); > + > + vma->vm_flags |= VM_MIXEDMAP; > + vma->vm_ops = &zuf_ebuff_ops; > + > + ebuff->vma = vma; > + > + zuf_dbg_core("start=0x%lx end=0x%lx flags=0x%lx file-start=0x%lx\n", > + vma->vm_start, vma->vm_end, vma->vm_flags, vma- > >vm_pgoff); > + > + return 0; > +} > + > int zufc_mmap(struct file *file, struct vm_area_struct *vma) > { > struct zuf_special_file *zsf = file->private_data; > @@ -53,6 +1063,10 @@ int zufc_mmap(struct file *file, struct vm_area_struct > *vma) > } > > switch (zsf->type) { > + case zlfs_e_zt: > + return zufc_zt_mmap(file, vma); > + case zlfs_e_dpp_buff: > + return zufc_ebuff_mmap(file, vma); > default: > zuf_err("type=%d\n", zsf->type); > return -ENOTTY; > diff --git a/fs/zuf/zuf-root.c b/fs/zuf/zuf-root.c > index 55a839dbc854..37b70ca33d3c 100644 > --- a/fs/zuf/zuf-root.c > +++ b/fs/zuf/zuf-root.c > @@ -227,6 +227,7 @@ static void zufr_put_super(struct super_block *sb) > { > struct zuf_root_info *zri = ZRI(sb); > > + zufc_zts_fini(zri); > _unregister_all_fses(zri); > > zuf_info("zuf_root umount\n"); > @@ -282,10 +283,16 @@ static int zufr_fill_super(struct super_block *sb, void > *data, int silent) > root_i->i_fop = &zufr_file_dir_operations; > root_i->i_op = &zufr_inode_operations; > > + spin_lock_init(&zri->mount.lock); > mutex_init(&zri->sbl_lock); > + relay_init(&zri->mount.relay); > INIT_LIST_HEAD(&zri->fst_list); > INIT_LIST_HEAD(&zri->pmem_list); > > + err = zufc_zts_init(zri); > + if (unlikely(err)) > + return err; /* put will be called we have a root */ > + > return 0; > } > > diff --git a/fs/zuf/zuf.h b/fs/zuf/zuf.h > index f979d8cbe60c..a33f5908155d 100644 > --- a/fs/zuf/zuf.h > +++ b/fs/zuf/zuf.h > @@ -23,9 +23,11 @@ > #include <linux/xattr.h> > #include <linux/exportfs.h> > #include <linux/page_ref.h> > +#include <linux/mm.h> > > #include "zus_api.h" > > +#include "relay.h" > #include "_pr.h" > > enum zlfs_e_special_file { > @@ -44,6 +46,8 @@ struct zuf_special_file { > struct zuf_root_info { > struct __mount_thread_info { > struct zuf_special_file zsf; > + spinlock_t lock; > + struct relay relay; > struct zufs_ioc_mount *zim; > } mount; > > @@ -102,6 +106,48 @@ static inline struct zuf_inode_info *ZUII(struct inode > *inode) > return container_of(inode, struct zuf_inode_info, vfs_inode); > } > > +static inline struct zuf_fs_type *ZUF_FST(struct file_system_type *fs_type) > +{ > + return container_of(fs_type, struct zuf_fs_type, vfs_fst); > +} > + > +static inline struct zuf_fs_type *zuf_fst(struct super_block *sb) > +{ > + return ZUF_FST(sb->s_type); > +} > + > +struct zuf_dispatch_op; > +typedef int (*overflow_handler)(struct zuf_dispatch_op *zdo, void *parg, > + ulong zt_max_bytes); > +struct zuf_dispatch_op { > + struct zufs_ioc_hdr *hdr; > + struct page **pages; > + uint nump; > + overflow_handler oh; > + struct super_block *sb; > + struct inode *inode; > +}; > + > +static inline void > +zuf_dispatch_init(struct zuf_dispatch_op *zdo, struct zufs_ioc_hdr *hdr, > + struct page **pages, uint nump) > +{ > + memset(zdo, 0, sizeof(*zdo)); > + zdo->hdr = hdr; > + zdo->pages = pages; zdo->nump = nump; > +} > + > +static inline int zuf_flt_to_err(vm_fault_t flt) > +{ > + if (likely(flt == VM_FAULT_NOPAGE)) > + return 0; > + > + if (flt == VM_FAULT_OOM) > + return -ENOMEM; > + > + return -EACCES; > +} > + > /* Keep this include last thing in file */ > #include "_extern.h" > > diff --git a/fs/zuf/zus_api.h b/fs/zuf/zus_api.h > index 34e3e1a9a107..3319a70b5ccc 100644 > --- a/fs/zuf/zus_api.h > +++ b/fs/zuf/zus_api.h > @@ -66,6 +66,47 @@ > > #endif /* ndef __KERNEL__ */ > > +/* first available error code after include/linux/errno.h */ > +#define EZUFS_RETRY 531 > + > +/* The below is private to zuf Kernel only. Is not exposed to VFS nor zus > + * (defined here to allocate the constant) > + */ > +#define EZUF_RETRY_DONE 540 > + > +/** > + * zufs dual port memory > + * This is a special type of offset to either memory or persistent-memory, > + * that is designed to be used in the interface mechanism between userspace > + * and kernel, and can be accessed by both. > + * 3 first bits denote a mem-pool: > + * 0 - pmem pool > + * 1-6 - established shared pool by a call to zufs_ioc_create_mempool (below) > + * 7 - offset into app memory > + */ > +typedef __u64 __bitwise zu_dpp_t; > + > +static inline uint zu_dpp_t_pool(zu_dpp_t t) > +{ > + return t & 0x7; > +} > + > +static inline ulong zu_dpp_t_val(zu_dpp_t t) > +{ > + return t & ~0x7; > +} > + > +static inline zu_dpp_t enc_zu_dpp_t(ulong v, uint pool) > +{ > + return v | pool; > +} > + > +/* ~~~~~ ZUFS API ioctl commands ~~~~~ */ > +enum { > + ZUS_API_MAP_MAX_PAGES = 1024, > + ZUS_API_MAP_MAX_SIZE = ZUS_API_MAP_MAX_PAGES * PAGE_SIZE, > +}; > + > struct zufs_ioc_hdr { > __u32 err; /* IN/OUT must be first */ > __u16 in_len; /* How much to be copied *to* zus */ > @@ -102,4 +143,148 @@ struct zufs_ioc_register_fs { > }; > #define ZU_IOC_REGISTER_FS _IOWR('Z', 10, struct zufs_ioc_register_fs) > > +/* A cookie from user-mode returned by mount */ > +struct zus_sb_info; > + > +/* zus cookie per inode */ > +struct zus_inode_info; > + > +enum ZUFS_M_FLAGS { > + ZUFS_M_PEDANTIC = 0x00000001, > + ZUFS_M_EPHEMERAL = 0x00000002, > + ZUFS_M_SILENT = 0x00000004, > +}; > + > +struct zufs_parse_options { > + __u32 mount_options_len; > + __u32 pedantic; > + __u64 mount_flags; > + char mount_options[0]; > +}; > + > +enum e_mount_operation { > + ZUFS_M_MOUNT = 1, > + ZUFS_M_UMOUNT, > + ZUFS_M_REMOUNT, > + ZUFS_M_DDBG_RD, > + ZUFS_M_DDBG_WR, > +}; > + > +struct zufs_mount_info { > + /* IN */ > + struct zus_fs_info *zus_zfi; > + __u16 num_cpu; > + __u16 num_channels; > + __u32 pmem_kern_id; > + __u64 sb_id; > + > + /* OUT */ > + struct zus_sb_info *zus_sbi; > + /* mount is also iget of root */ > + struct zus_inode_info *zus_ii; > + zu_dpp_t _zi; > + __u64 old_mount_opt; > + __u64 remount_flags; > + > + /* More FS specific info */ > + __u32 s_blocksize_bits; > + __u8 acl_on; > + struct zufs_parse_options po; > +}; > + > +/* mount / umount */ > +struct zufs_ioc_mount { > + struct zufs_ioc_hdr hdr; > + struct zufs_mount_info zmi; > +}; > +#define ZU_IOC_MOUNT _IOWR('Z', 11, struct zufs_ioc_mount) > + > +/* pmem */ > +struct zufs_ioc_numa_map { > + /* Set by zus */ > + struct zufs_ioc_hdr hdr; > + > + __u32 possible_nodes; > + __u32 possible_cpus; > + __u32 online_nodes; > + __u32 online_cpus; > + > + __u32 max_cpu_per_node; > + > + /* This indicates that NOT all nodes have @max_cpu_per_node cpus */ > + bool nodes_not_symmetrical; > + > + /* Variable size must keep last > + * size @online_cpus > + */ > + __u8 cpu_to_node[]; > +}; > +#define ZU_IOC_NUMA_MAP _IOWR('Z', 12, struct zufs_ioc_numa_map) > + > +/* ZT init */ > +enum { ZUFS_MAX_ZT_CHANNELS = 64 }; > + > +struct zufs_ioc_init { > + struct zufs_ioc_hdr hdr; > + ulong affinity; /* IN */ > + uint channel_no; > + uint max_command; > +}; > +#define ZU_IOC_INIT_THREAD _IOWR('Z', 14, struct zufs_ioc_init) > + > +/* break_all (Server telling kernel to clean) */ > +struct zufs_ioc_break_all { > + struct zufs_ioc_hdr hdr; > +}; > +#define ZU_IOC_BREAK_ALL _IOWR('Z', 15, struct zufs_ioc_break_all) > + > +/* ~~~ zufs_ioc_wait_operation ~~~ */ > +struct zufs_ioc_wait_operation { > + struct zufs_ioc_hdr hdr; > + /* maximum size is governed by zufs_ioc_init->max_command */ > + char opt_buff[]; > +}; > +#define ZU_IOC_WAIT_OPT _IOWR('Z', 16, struct > zufs_ioc_wait_operation) > + > +/* These are the possible operations sent from Kernel to the Server in the > + * return of the ZU_IOC_WAIT_OPT. > + */ > +enum e_zufs_operation { > + ZUFS_OP_NULL = 0, > + > + ZUFS_OP_BREAK, /* Kernel telling Server to exit */ > + ZUFS_OP_MAX_OPT, > +}; > + > +/* Allocate a special_file that will be a dual-port communication buffer with > + * user mode. > + * Server will access the buffer via the mmap of this file. > + * Kernel will access the file via the valloc() pointer > + * > + * Some IOCTLs below demand use of this kind of buffer for communication > + * TODO: > + * pool_no is if we want to associate this buffer onto the 6 possible > + * mem-pools per zuf_sbi. So anywhere we have a zu_dpp_t it will mean > + * access from this pool. > + * If pool_no is zero then it is private to only this file. In this case > + * sb_id && zus_sbi are ignored / not needed. > + */ > +struct zufs_ioc_alloc_buffer { > + struct zufs_ioc_hdr hdr; > + /* The ID of the super block received in mount */ > + __u64 sb_id; > + /* We verify the sb_id validity against zus_sbi */ > + struct zus_sb_info *zus_sbi; > + /* max size of buffer allowed (size of mmap) */ > + __u32 max_size; > + /* allocate this much on initial call and set into vma */ > + __u32 init_size; > + > + /* TODO: These below are now set to ZERO. Need implementation */ > + __u16 pool_no; > + __u16 flags; > + __u32 reserved; > +}; > +#define ZU_IOC_ALLOC_BUFFER _IOWR('Z', 17, struct zufs_ioc_init) > + > #endif /* _LINUX_ZUFS_API_H */ > -- > 2.20.1 >