Differentiate between VL servers rather than lumping all their addresses together into one set by: (1) Add the concept of a VL server list, where each server has its own separate address list. This code is similar to the FS server list. (2) Use the DNS resolver to retrieve a set of servers and their associated addresses, ports, preference and weight ratings. (3) In the case of a legacy DNS resolver or an address list given directly through /proc/net/afs/cells, create a list containing just a dummy server record and attach all the addresses to that. (4) Implement a simple rotation policy, for the moment ignoring the priorities and weights assigned to the servers. (5) Show the address list through /proc/net/afs/<cell>/vlservers. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> --- fs/afs/Makefile | 2 fs/afs/addr_list.c | 109 ++++++++------------ fs/afs/cell.c | 39 ++++--- fs/afs/dynroot.c | 2 fs/afs/internal.h | 106 +++++++++++++++++-- fs/afs/proc.c | 44 ++++---- fs/afs/server.c | 42 ++------ fs/afs/vl_list.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/afs/vl_rotate.c | 239 +++++++++++++++++++++++++++++++++++++++++++ fs/afs/vlclient.c | 32 +++--- fs/afs/volume.c | 52 +++------ 11 files changed, 751 insertions(+), 205 deletions(-) create mode 100644 fs/afs/vl_list.c create mode 100644 fs/afs/vl_rotate.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 546874057bd3..03e9f7afea1b 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -29,6 +29,8 @@ kafs-y := \ super.o \ netdevices.o \ vlclient.o \ + vl_rotate.o \ + vl_list.o \ volume.o \ write.o \ xattr.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 4dbb8af54668..630f91633b40 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -64,14 +64,17 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, /* * Parse a text string consisting of delimited addresses. */ -struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, - char delim, - unsigned short service, - unsigned short port) +struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, + const char *text, size_t len, + char delim, + unsigned short service, + unsigned short port) { + struct afs_vlserver_list *vllist; struct afs_addr_list *alist; const char *p, *end = text + len; unsigned int nr = 0; + int ret = -ENOMEM; _enter("%*.*s,%c", (int)len, (int)len, text, delim); @@ -109,10 +112,20 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); - alist = afs_alloc_addrlist(nr, service, port); - if (!alist) + vllist = afs_alloc_vlserver_list(1); + if (!vllist) return ERR_PTR(-ENOMEM); + vllist->nr_servers = 1; + vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT); + if (!vllist->servers[0].server) + goto error; + + alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT); + if (!alist) + goto error; + vllist->servers[0].server->addresses = alist; + /* Extract the addresses */ p = text; do { @@ -180,11 +193,13 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } while (p < end && alist->nr_addrs < alist->max_addrs); _leave(" = [nr %u]", alist->nr_addrs); - return alist; + return vllist; bad_address: - kfree(alist); - return ERR_PTR(-EINVAL); + ret = -EINVAL; +error: + afs_put_vlserverlist(net, vllist); + return ERR_PTR(ret); } /* @@ -203,30 +218,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1, /* * Perform a DNS query for VL servers and build a up an address list. */ -struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) +struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) { - struct afs_addr_list *alist; - char *vllist = NULL; + struct afs_vlserver_list *vllist; + char *result = NULL; int ret; _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, - "", &vllist, _expiry); - if (ret < 0) + ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", + &result, _expiry); + if (ret < 0) { + kleave(" = %d [dns]", ret); return ERR_PTR(ret); - - alist = afs_parse_text_addrs(vllist, strlen(vllist), ',', - VL_SERVICE, AFS_VL_PORT); - if (IS_ERR(alist)) { - kfree(vllist); - if (alist != ERR_PTR(-ENOMEM)) - pr_err("Failed to parse DNS data\n"); - return alist; } - kfree(vllist); - return alist; + if (*_expiry == 0) + *_expiry = ktime_get_real_seconds() + 60; + + if (ret > 1 && result[0] == 0) + vllist = afs_extract_vlserver_list(cell, result, ret); + else + vllist = afs_parse_text_addrs(cell->net, result, ret, ',', + VL_SERVICE, AFS_VL_PORT); + kfree(result); + if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM)) + pr_err("Failed to parse DNS data\n"); + + return vllist; } /* @@ -353,43 +372,3 @@ int afs_end_cursor(struct afs_addr_cursor *ac) ac->begun = false; return ac->error; } - -/* - * Set the address cursor for iterating over VL servers. - */ -int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell) -{ - struct afs_addr_list *alist; - int ret; - - if (!rcu_access_pointer(cell->vl_addrs)) { - ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, - TASK_INTERRUPTIBLE); - if (ret < 0) - return ret; - - if (!rcu_access_pointer(cell->vl_addrs) && - ktime_get_real_seconds() < cell->dns_expiry) - return cell->error; - } - - read_lock(&cell->vl_addrs_lock); - alist = rcu_dereference_protected(cell->vl_addrs, - lockdep_is_held(&cell->vl_addrs_lock)); - if (alist->nr_addrs > 0) - afs_get_addrlist(alist); - else - alist = NULL; - read_unlock(&cell->vl_addrs_lock); - - if (!alist) - return -EDESTADDRREQ; - - ac->alist = alist; - ac->addr = NULL; - ac->start = READ_ONCE(alist->index); - ac->index = ac->start; - ac->error = 0; - ac->begun = false; - return 0; -} diff --git a/fs/afs/cell.c b/fs/afs/cell.c index f3d0bef16d78..0bb54de08f39 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -119,7 +119,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, */ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, - const char *vllist) + const char *addresses) { struct afs_cell *cell; int i, ret; @@ -134,7 +134,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, if (namelen == 5 && memcmp(name, "@cell", 5) == 0) return ERR_PTR(-EINVAL); - _enter("%*.*s,%s", namelen, namelen, name, vllist); + _enter("%*.*s,%s", namelen, namelen, name, addresses); cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); if (!cell) { @@ -153,22 +153,23 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, (1 << AFS_CELL_FL_NO_LOOKUP_YET)); INIT_LIST_HEAD(&cell->proc_volumes); rwlock_init(&cell->proc_lock); - rwlock_init(&cell->vl_addrs_lock); + rwlock_init(&cell->vl_servers_lock); /* Fill in the VL server list if we were given a list of addresses to * use. */ - if (vllist) { - struct afs_addr_list *alist; - - alist = afs_parse_text_addrs(vllist, strlen(vllist), ':', - VL_SERVICE, AFS_VL_PORT); - if (IS_ERR(alist)) { - ret = PTR_ERR(alist); + if (addresses) { + struct afs_vlserver_list *vllist; + + vllist = afs_parse_text_addrs(net, + addresses, strlen(addresses), ':', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); goto parse_failed; } - rcu_assign_pointer(cell->vl_addrs, alist); + rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = TIME64_MAX; } @@ -356,14 +357,14 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) */ static void afs_update_cell(struct afs_cell *cell) { - struct afs_addr_list *alist, *old; + struct afs_vlserver_list *vllist, *old; time64_t now, expiry; _enter("%s", cell->name); - alist = afs_dns_query(cell, &expiry); - if (IS_ERR(alist)) { - switch (PTR_ERR(alist)) { + vllist = afs_dns_query(cell, &expiry); + if (IS_ERR(vllist)) { + switch (PTR_ERR(vllist)) { case -ENODATA: /* The DNS said that the cell does not exist */ set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); @@ -387,12 +388,12 @@ static void afs_update_cell(struct afs_cell *cell) /* Exclusion on changing vl_addrs is achieved by a * non-reentrant work item. */ - old = rcu_dereference_protected(cell->vl_addrs, true); - rcu_assign_pointer(cell->vl_addrs, alist); + old = rcu_dereference_protected(cell->vl_servers, true); + rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = expiry; if (old) - afs_put_addrlist(old); + afs_put_vlserverlist(cell->net, old); } if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) @@ -414,7 +415,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); - afs_put_addrlist(rcu_access_pointer(cell->vl_addrs)); + afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 1cde710a8013..42e404ac3ff6 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "", NULL, NULL); + ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8ae4e2ebb99a..638c157fc387 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -343,12 +343,47 @@ struct afs_cell { rwlock_t proc_lock; /* VL server list. */ - rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ - struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ + rwlock_t vl_servers_lock; /* Lock on vl_servers */ + struct afs_vlserver_list *__rcu vl_servers; + u8 name_len; /* Length of name */ char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ }; +/* + * Volume Location server record. + */ +struct afs_vlserver { + struct rcu_head rcu; + struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */ + unsigned long flags; +#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ +#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ + rwlock_t lock; /* Lock on addresses */ + atomic_t usage; + u16 name_len; /* Length of name */ + u16 port; + char name[]; /* Server name, case-flattened */ +}; + +/* + * Weighted list of Volume Location servers. + */ +struct afs_vlserver_entry { + u16 priority; /* Preference (as SRV) */ + u16 weight; /* Weight (as SRV) */ + struct afs_vlserver *server; +}; + +struct afs_vlserver_list { + struct rcu_head rcu; + atomic_t usage; + unsigned short nr_servers; + unsigned short index; /* Server currently in use */ + rwlock_t lock; + struct afs_vlserver_entry servers[]; +}; + /* * Cached VLDB entry. * @@ -595,6 +630,22 @@ struct afs_addr_cursor { bool responded; /* T if the current address responded */ }; +/* + * Cursor for iterating over a set of volume location servers. + */ +struct afs_vl_cursor { + struct afs_addr_cursor ac; + struct afs_cell *cell; /* The cell we're querying */ + struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ + struct key *key; /* Key for the server */ + unsigned char start; /* Initial index in server list */ + unsigned char index; /* Number of servers tried beyond start */ + unsigned short flags; +#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */ +#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */ +}; + /* * Cursor for iterating over a set of fileservers. */ @@ -640,12 +691,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, unsigned short, unsigned short); extern void afs_put_addrlist(struct afs_addr_list *); -extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char, - unsigned short, unsigned short); -extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *); +extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, + const char *, size_t, char, + unsigned short, unsigned short); +extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *); extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); -extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); @@ -1039,14 +1090,43 @@ extern void afs_fs_exit(void); /* * vlclient.c */ -extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, - struct afs_addr_cursor *, - struct key *, const char *, int); -extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, - struct key *, const uuid_t *); +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, + const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); -extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *, - struct key *, const uuid_t *); +extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); + +/* + * vl_rotate.c + */ +extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *, + struct afs_cell *, struct key *); +extern bool afs_select_vlserver(struct afs_vl_cursor *); +extern bool afs_select_current_vlserver(struct afs_vl_cursor *); +extern int afs_end_vlserver_operation(struct afs_vl_cursor *); + +/* + * vlserver_list.c + */ +static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver) +{ + atomic_inc(&vlserver->usage); + return vlserver; +} + +static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist) +{ + if (vllist) + atomic_inc(&vllist->usage); + return vllist; +} + +extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short); +extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *); +extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int); +extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *); +extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, + const u8 *, size_t); /* * volume.c diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 476dcbb79713..bcb6ef7af27d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -253,56 +253,54 @@ static const struct seq_operations afs_proc_cell_volumes_ops = { */ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct sockaddr_rxrpc *addr = v; + struct afs_vlserver_entry *entry = v; + struct afs_vlserver *vlserver = entry->server; + struct afs_addr_list *alist = rcu_dereference(vlserver->addresses); + int i; - /* display header on line 1 */ - if (v == (void *)1) { - seq_puts(m, "ADDRESS\n"); - return 0; + seq_printf(m, "%s [p=%hu w=%hu]:\n", + vlserver->name, entry->priority, entry->weight); + if (alist) { + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, " %c %pISpc\n", + alist->index == i ? '>' : '-', + &alist->addrs[i].transport); } - - /* display one cell per line on subsequent lines */ - seq_printf(m, "%pISp\n", &addr->transport); return 0; } static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_addr_list *alist; + struct afs_vlserver_list *vllist; struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); - alist = rcu_dereference(cell->vl_addrs); - - /* allow for the header line */ - if (!pos) - return (void *) 1; - pos--; - - if (!alist || pos >= alist->nr_addrs) + vllist = rcu_dereference(cell->vl_servers); + if (!vllist || pos >= vllist->nr_servers) return NULL; - return alist->addrs + pos; + return &vllist->servers[pos]; } static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_addr_list *alist; + struct afs_vlserver_list *vllist; struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos; - alist = rcu_dereference(cell->vl_addrs); + vllist = rcu_dereference(cell->vl_servers); pos = *_pos; - (*_pos)++; - if (!alist || pos >= alist->nr_addrs) + pos++; + *_pos = pos; + if (!vllist || pos >= vllist->nr_servers) return NULL; - return alist->addrs + pos; + return &vllist->servers[pos]; } static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) diff --git a/fs/afs/server.c b/fs/afs/server.c index 1d329e6981d5..6102ea9ee3fb 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -246,41 +246,23 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, struct key *key, const uuid_t *uuid) { - struct afs_addr_cursor ac; - struct afs_addr_list *alist; + struct afs_vl_cursor vc; + struct afs_addr_list *alist = NULL; int ret; - ret = afs_set_vl_cursor(&ac, cell); - if (ret < 0) - return ERR_PTR(ret); - - while (afs_iterate_addresses(&ac)) { - if (test_bit(ac.index, &ac.alist->yfs)) - alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); - else - alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); - switch (ac.error) { - case 0: - afs_end_cursor(&ac); - return alist; - case -ECONNABORTED: - ac.error = afs_abort_to_error(ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - break; - default: - ac.error = -EIO; - goto error; + ret = -ERESTARTSYS; + if (afs_begin_vlserver_operation(&vc, cell, key)) { + while (afs_select_vlserver(&vc)) { + if (test_bit(vc.ac.index, &vc.ac.alist->yfs)) + alist = afs_yfsvl_get_endpoints(&vc, uuid); + else + alist = afs_vl_get_addrs_u(&vc, uuid); } + + ret = afs_end_vlserver_operation(&vc); } -error: - return ERR_PTR(afs_end_cursor(&ac)); + return ret < 0 ? ERR_PTR(ret) : alist; } /* diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c new file mode 100644 index 000000000000..da421c24eba9 --- /dev/null +++ b/fs/afs/vl_list.c @@ -0,0 +1,289 @@ +/* AFS vlserver list management. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@xxxxxxxxxx) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include "internal.h" + +struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, + unsigned short port) +{ + struct afs_vlserver *vlserver; + + vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), + GFP_KERNEL); + if (vlserver) { + atomic_set(&vlserver->usage, 1); + rwlock_init(&vlserver->lock); + vlserver->name_len = name_len; + vlserver->port = port; + memcpy(vlserver->name, name, name_len); + } + return vlserver; +} + +void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver) +{ + if (vlserver) { + unsigned int u = atomic_dec_return(&vlserver->usage); + //_debug("VL PUT %p{%u}", vlserver, u); + + if (u == 0) { + afs_put_addrlist(vlserver->addresses); + kfree_rcu(vlserver, rcu); + } + } +} + +struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) +{ + struct afs_vlserver_list *vllist; + + vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL); + if (vllist) { + atomic_set(&vllist->usage, 1); + rwlock_init(&vllist->lock); + } + + return vllist; +} + +void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist) +{ + if (vllist) { + unsigned int u = atomic_dec_return(&vllist->usage); + + //_debug("VLLS PUT %p{%u}", vllist, u); + if (u == 0) { + int i; + + for (i = 0; i < vllist->nr_servers; i++) { + afs_put_vlserver(net, vllist->servers[i].server); + } + kfree_rcu(vllist, rcu); + } + } +} + +u16 afs_extract_u16(const u8 **_b) +{ + u16 val; + + val = (u16)*(*_b)++ << 0; + val |= (u16)*(*_b)++ << 8; + return val; +} + +/* + * Build a VL server address list from a DNS queried server list. + */ +struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, + u8 nr_addrs, u16 port) +{ + struct afs_addr_list *alist; + const u8 *b = *_b; + int ret = -EINVAL; + + alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port); + if (!alist) + return ERR_PTR(-ENOMEM); + if (nr_addrs == 0) + return alist; + + for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) { + __be32 x[4]; + u8 family = *b++; + + switch (family) { + case AF_INET: + if (end - b < 4) + goto error; + memcpy(x, b, 4); + afs_merge_fs_addr4(alist, x[0], port); + b += 4; + break; + + case AF_INET6: + if (end - b < 16) + goto error; + memcpy(x, b, 16); + afs_merge_fs_addr6(alist, x, port); + b += 16; + break; + + default: + ret = -EADDRNOTAVAIL; + goto error; + } + } + + /* Start with IPv6 if available. */ + if (alist->nr_ipv4 < alist->nr_addrs) + alist->index = alist->nr_ipv4; + + *_b = b; + return alist; + +error: + afs_put_addrlist(alist); + return ERR_PTR(ret); +} + +/* + * Build a VL server list from a DNS queried server list. + */ +struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, + const u8 *b, size_t b_size) +{ + struct afs_vlserver_list *vllist, *previous; + struct afs_addr_list *addrs; + struct afs_vlserver *server; + const u8 *end = b + b_size; + int ret = -ENOMEM, nr_servers, i, j; + + _enter(""); + + /* Check that it's a server list, v1 */ + if (b[0] != 0 || b[1] != 1 || end - b < 3) { + pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n", + b[0], b[1], end - b); + return ERR_PTR(-EDESTADDRREQ); + } + nr_servers = b[2]; + b += 3; + + vllist = afs_alloc_vlserver_list(nr_servers); + if (!vllist) + return ERR_PTR(-ENOMEM); + + read_lock(&cell->vl_servers_lock); + previous = afs_get_vlserverlist(cell->vl_servers); + read_unlock(&cell->vl_servers_lock); + + while (end - b >= 4 * 2 + 2 * 1) { + u16 name_len = afs_extract_u16(&b); + u16 priority = afs_extract_u16(&b); + u16 weight = afs_extract_u16(&b); + u16 port = afs_extract_u16(&b); + u8 protocol = *b++; + u8 nr_addrs = *b++; + + if (end - b < name_len) + break; + + _debug("extract %u %u %u %u %u %u %*.*s", + name_len, priority, weight, port, protocol, nr_addrs, + name_len, name_len, b); + + ret = -EPROTONOSUPPORT; + if (protocol != IPPROTO_UDP) + goto error; + if (port == 0) + port = AFS_VL_PORT; + + server = NULL; + if (previous) { + /* See if we can update an old server record */ + for (i = 0; i < previous->nr_servers; i++) { + struct afs_vlserver *p = previous->servers[i].server; + + if (p->name_len == name_len && + p->port == port && + strncasecmp(b, p->name, name_len) == 0) { + server = afs_get_vlserver(p); + break; + } + } + } + + if (!server) { + ret = -ENOMEM; + server = afs_alloc_vlserver(b, name_len, port); + if (!server) + goto error; + } + + b += name_len; + + /* Extract the addresses - note that we can't skip this as we + * have to advance the payload pointer. + */ + addrs = afs_extract_vl_addrs(&b, end, nr_addrs, port); + if (!addrs) + goto error_2; + + if (vllist->nr_servers >= nr_servers) { + _debug("skip %u >= %u", vllist->nr_servers, nr_servers); + afs_put_addrlist(addrs); + afs_put_vlserver(cell->net, server); + continue; + } + + if (addrs->nr_addrs == 0) { + afs_put_addrlist(addrs); + if (!rcu_access_pointer(server->addresses)) { + afs_put_vlserver(cell->net, server); + continue; + } + } else { + struct afs_addr_list *old = addrs; + + write_lock(&server->lock); + rcu_swap_protected(server->addresses, old, + lockdep_is_held(&server->lock)); + write_unlock(&server->lock); + afs_put_addrlist(old); + } + + + /* TODO: Might want to check for duplicates */ + + /* Insertion-sort by priority and weight */ + for (j = 0; j < vllist->nr_servers; j++) { + if (priority < vllist->servers[j].priority) + break; /* Lower preferable */ + if (priority == vllist->servers[j].priority && + weight > vllist->servers[j].weight) + break; /* Higher preferable */ + } + + if (j < vllist->nr_servers) { + memmove(vllist->servers + j + 1, + vllist->servers + j, + (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); + } + + vllist->servers[j].priority = priority; + vllist->servers[j].weight = weight; + vllist->servers[j].server = server; + vllist->nr_servers++; + } + + if (b != end) { + _debug("parse error %zd", b - end); + goto error; + } + + ret = -EDESTADDRREQ; + if (vllist->nr_servers == 0) + goto error; + + afs_put_vlserverlist(cell->net, previous); + _leave(" = ok [%u]", vllist->nr_servers); + return vllist; + +error_2: + afs_put_vlserver(cell->net, server); +error: + afs_put_vlserverlist(cell->net, vllist); + afs_put_vlserverlist(cell->net, previous); + return ERR_PTR(ret); +} diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c new file mode 100644 index 000000000000..344118a85ec9 --- /dev/null +++ b/fs/afs/vl_rotate.c @@ -0,0 +1,239 @@ +/* Handle vlserver selection and rotation. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@xxxxxxxxxx) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include "internal.h" +#include "afs_vl.h" + +/* + * Begin an operation on a volume location server. + */ +bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, + struct key *key) +{ + memset(vc, 0, sizeof(*vc)); + vc->cell = cell; + vc->key = key; + vc->ac.error = SHRT_MAX; + + if (signal_pending(current)) { + vc->ac.error = -EINTR; + vc->flags |= AFS_VL_CURSOR_STOP; + return false; + } + + return true; +} + +/* + * Begin iteration through a server list, starting with the last used server if + * possible, or the last recorded good server if not. + */ +static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) +{ + struct afs_cell *cell = vc->cell; + + read_lock(&cell->vl_servers_lock); + vc->server_list = afs_get_vlserverlist(cell->vl_servers); + read_unlock(&cell->vl_servers_lock); + if (!vc->server_list) + return false; + + vc->start = READ_ONCE(vc->server_list->index); + vc->index = vc->start; + return true; +} + +/* + * Select the vlserver to use. May be called multiple times to rotate + * through the vlservers. + */ +bool afs_select_vlserver(struct afs_vl_cursor *vc) +{ + struct afs_addr_list *alist; + struct afs_vlserver *vlserver; + + _enter("%u/%u,%u/%u,%d,%d", + vc->index, vc->start, + vc->ac.index, vc->ac.start, + vc->ac.error, vc->ac.abort_code); + + if (vc->flags & AFS_VL_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + /* Evaluate the result of the previous operation, if there was one. */ + switch (vc->ac.error) { + case SHRT_MAX: + goto start; + + default: + case 0: + /* Success or local failure. Stop. */ + vc->flags |= AFS_VL_CURSOR_STOP; + _leave(" = f [okay/local %d]", vc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (vc->ac.abort_code) { + case AFSVL_IO: + case AFSVL_BADVOLOPER: + case AFSVL_NOMEM: + /* The server went weird. */ + vc->ac.error = -EREMOTEIO; + //write_lock(&vc->cell->vl_servers_lock); + //vc->server_list->weird_mask |= 1 << vc->index; + //write_unlock(&vc->cell->vl_servers_lock); + goto next_server; + + default: + vc->ac.error = afs_abort_to_error(vc->ac.abort_code); + goto failed; + } + + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn"); + goto iterate_address; + + case -ECONNRESET: + _debug("call reset"); + vc->flags |= AFS_VL_CURSOR_RETRY; + goto next_server; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(vc->cell->net, vc->server_list); + vc->server_list = NULL; + if (vc->flags & AFS_VL_CURSOR_RETRIED) + goto failed; + vc->flags |= AFS_VL_CURSOR_RETRIED; +start: + _debug("start"); + /* TODO: Consider checking the VL server list */ + + if (!afs_start_vl_iteration(vc)) + goto failed; + +use_server: + _debug("use"); + /* We're starting on a different vlserver from the list. We need to + * check it, find its address list and probe its capabilities before we + * use it. + */ + ASSERTCMP(vc->ac.alist, ==, NULL); + vlserver = vc->server_list->servers[vc->index].server; + + // TODO: Check the vlserver occasionally + //if (!afs_check_vlserver_record(vc, vlserver)) + // goto failed; + + _debug("USING VLSERVER: %s", vlserver->name); + + read_lock(&vlserver->lock); + alist = rcu_dereference_protected(vlserver->addresses, + lockdep_is_held(&vlserver->lock)); + afs_get_addrlist(alist); + read_unlock(&vlserver->lock); + + memset(&vc->ac, 0, sizeof(vc->ac)); + + /* Probe the current vlserver if we haven't done so yet. */ +#if 0 // TODO + if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) { + vc->ac.alist = afs_get_addrlist(alist); + + if (!afs_probe_vlserver(vc)) { + switch (vc->ac.error) { + case -ENOMEM: + case -ERESTARTSYS: + case -EINTR: + goto failed; + default: + goto next_server; + } + } + } +#endif + + if (!vc->ac.alist) + vc->ac.alist = alist; + else + afs_put_addrlist(alist); + + vc->ac.start = READ_ONCE(alist->index); + vc->ac.index = vc->ac.start; + +iterate_address: + ASSERT(vc->ac.alist); + _debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (!afs_iterate_addresses(&vc->ac)) + goto next_server; + + _leave(" = t"); + return true; + +next_server: + _debug("next"); + afs_end_cursor(&vc->ac); + vc->index++; + if (vc->index >= vc->server_list->nr_servers) + vc->index = 0; + if (vc->index != vc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (vc->flags & AFS_VL_CURSOR_RETRY) + goto restart_from_beginning; + + vc->ac.error = -EDESTADDRREQ; + goto failed; + +failed: + vc->flags |= AFS_VL_CURSOR_STOP; + afs_end_cursor(&vc->ac); + _leave(" = f [failed %d]", vc->ac.error); + return false; +} + +/* + * Tidy up a volume location server cursor and unlock the vnode. + */ +int afs_end_vlserver_operation(struct afs_vl_cursor *vc) +{ + struct afs_net *net = vc->cell->net; + int ret; + + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(net, vc->server_list); + + ret = vc->ac.error; + if (ret == -ECONNABORTED) + ret = afs_abort_to_error(vc->ac.abort_code); + + return vc->ac.error; +} diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index c3b740813fc7..75808b55af21 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = { * Dispatch a get volume entry by name or ID operation (uuid variant). If the * volname is a decimal number then it's a volume ID not a volume name. */ -struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, const char *volname, int volnamesz) { struct afs_vldb_entry *entry; struct afs_call *call; + struct afs_net *net = vc->cell->net; size_t reqsz, padsz; __be32 *bp; @@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, return ERR_PTR(-ENOMEM); } - call->key = key; + call->key = vc->key; call->reply[0] = entry; call->ret_reply0 = true; @@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, memset((void *)bp + volnamesz, 0, padsz); trace_afs_make_vl_call(call); - return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } /* @@ -267,14 +266,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = { * Dispatch an operation to get the addresses for a server, where the server is * nominated by UUID. */ -struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, const uuid_t *uuid) { struct afs_ListAddrByAttributes__xdr *r; const struct afs_uuid *u = (const struct afs_uuid *)uuid; struct afs_call *call; + struct afs_net *net = vc->cell->net; __be32 *bp; int i; @@ -286,7 +284,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, if (!call) return ERR_PTR(-ENOMEM); - call->key = key; + call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; @@ -307,7 +305,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, r->uuid.node[i] = htonl(u->node[i]); trace_afs_make_vl_call(call); - return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } /* @@ -377,14 +375,13 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { }; /* - * Probe a fileserver for the capabilities that it supports. This can + * Probe a volume server for the capabilities that it supports. This can * return up to 196 words. * * We use this to probe for service upgrade to determine what the server at the * other end supports. */ -int afs_vl_get_capabilities(struct afs_net *net, - struct afs_addr_cursor *ac, +int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, struct key *key) { struct afs_call *call; @@ -619,12 +616,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = { * Dispatch an operation to get the addresses for a server, where the server is * nominated by UUID. */ -struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, const uuid_t *uuid) { struct afs_call *call; + struct afs_net *net = vc->cell->net; __be32 *bp; _enter(""); @@ -635,7 +631,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, if (!call) return ERR_PTR(-ENOMEM); - call->key = key; + call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; @@ -646,5 +642,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ trace_afs_make_vl_call(call); - return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 3037bd01f617..404b9de48dc4 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -74,55 +74,35 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, const char *volname, size_t volnamesz) { - struct afs_addr_cursor ac; - struct afs_vldb_entry *vldb; + struct afs_vldb_entry *vldb = NULL; + struct afs_vl_cursor vc; int ret; - ret = afs_set_vl_cursor(&ac, cell); - if (ret < 0) - return ERR_PTR(ret); + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); - while (afs_iterate_addresses(&ac)) { - if (!test_bit(ac.index, &ac.alist->probed)) { - ret = afs_vl_get_capabilities(cell->net, &ac, key); + while (afs_select_vlserver(&vc)) { + if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) { + ret = afs_vl_get_capabilities(cell->net, &vc.ac, key); switch (ret) { case VL_SERVICE: - clear_bit(ac.index, &ac.alist->yfs); - set_bit(ac.index, &ac.alist->probed); - ac.addr->srx_service = ret; + clear_bit(vc.ac.index, &vc.ac.alist->yfs); + set_bit(vc.ac.index, &vc.ac.alist->probed); + vc.ac.addr->srx_service = ret; break; case YFS_VL_SERVICE: - set_bit(ac.index, &ac.alist->yfs); - set_bit(ac.index, &ac.alist->probed); - ac.addr->srx_service = ret; + set_bit(vc.ac.index, &vc.ac.alist->yfs); + set_bit(vc.ac.index, &vc.ac.alist->probed); + vc.ac.addr->srx_service = ret; break; } } - vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, - volname, volnamesz); - switch (ac.error) { - case 0: - afs_end_cursor(&ac); - return vldb; - case -ECONNABORTED: - ac.error = afs_abort_to_error(ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - break; - default: - ac.error = -EIO; - goto error; - } + vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); } -error: - return ERR_PTR(afs_end_cursor(&ac)); + ret = afs_end_vlserver_operation(&vc); + return ret < 0 ? ERR_PTR(ret) : vldb; } /*