On Fri, Jun 27, 2008 at 01:28:56PM -0400, david m. richter wrote: > i also have another setup in vmware; while i doubt it's > substantively different than bruce's, i'm a ready and willing tester. is > there a different branch (or repo, or just a stack of patches somewhere) > that i should/could be using? If on 2.6.25, then use ftp://ftp%40openais%2Eorg:downloads@xxxxxxxxxxx/downloads/openais-0.80.3/openais-0.80.3.tar.gz ftp://sources.redhat.com/pub/cluster/releases/cluster-2.03.04.tar.gz If on 2.6.26-rc, then you'll need to add the attached patch to cluster. Dave
commit 0e25f89dc09cab05c8ce519f4a84bdbf0bff25aa Author: David Teigland <teigland@xxxxxxxxxx> Date: Mon Apr 7 16:15:01 2008 -0500 gfs_controld: read plocks from dlm or lock_dlm In kernels before 2.6.26, cluster posix lock ops are passed to user space through the gfs-specific lock_dlm module. In 2.6.26, the same ops are passed to user space through the dlm module. Update gfs_controld to read the plock ops from either module, depending on the kernel. Signed-off-by: David Teigland <teigland@xxxxxxxxxx> diff --git a/group/gfs_controld/lock_dlm.h b/group/gfs_controld/lock_dlm.h index b57c1f3..4b132b0 100644 --- a/group/gfs_controld/lock_dlm.h +++ b/group/gfs_controld/lock_dlm.h @@ -110,6 +110,7 @@ struct mountpoint { struct mountgroup { struct list_head list; uint32_t id; + uint32_t associated_ls_id; struct list_head members; struct list_head members_gone; int memb_count; diff --git a/group/gfs_controld/plock.c b/group/gfs_controld/plock.c index c564300..5e4f56b 100644 --- a/group/gfs_controld/plock.c +++ b/group/gfs_controld/plock.c @@ -20,9 +20,10 @@ #include <netdb.h> #include <limits.h> #include <unistd.h> +#include <dirent.h> #include <openais/saAis.h> #include <openais/saCkpt.h> -#include <linux/lock_dlm_plock.h> +#include <linux/dlm_plock.h> #include "lock_dlm.h" @@ -30,8 +31,9 @@ #define PROC_DEVICES "/proc/devices" #define MISC_NAME "misc" #define CONTROL_DIR "/dev/misc" -#define CONTROL_NAME "lock_dlm_plock" +#define CONTROL_NAME "dlm_plock" +extern struct list_head mounts; extern int our_nodeid; extern int message_flow_control_on; @@ -57,6 +59,7 @@ static SaCkptCallbacksT callbacks = { 0, 0 }; static SaVersionT version = { 'B', 1, 1 }; static char section_buf[1024 * 1024]; static uint32_t section_len; +static int need_fsid_translation = 0; struct pack_plock { uint64_t start; @@ -100,13 +103,13 @@ struct posix_lock { struct lock_waiter { struct list_head list; uint32_t flags; - struct gdlm_plock_info info; + struct dlm_plock_info info; }; static void send_own(struct mountgroup *mg, struct resource *r, int owner); static void save_pending_plock(struct mountgroup *mg, struct resource *r, - struct gdlm_plock_info *in); + struct dlm_plock_info *in); static int got_unown(struct resource *r) @@ -114,7 +117,7 @@ static int got_unown(struct resource *r) return !!(r->flags & R_GOT_UNOWN); } -static void info_bswap_out(struct gdlm_plock_info *i) +static void info_bswap_out(struct dlm_plock_info *i) { i->version[0] = cpu_to_le32(i->version[0]); i->version[1] = cpu_to_le32(i->version[1]); @@ -129,7 +132,7 @@ static void info_bswap_out(struct gdlm_plock_info *i) i->owner = cpu_to_le64(i->owner); } -static void info_bswap_in(struct gdlm_plock_info *i) +static void info_bswap_in(struct dlm_plock_info *i) { i->version[0] = le32_to_cpu(i->version[0]); i->version[1] = le32_to_cpu(i->version[1]); @@ -147,11 +150,11 @@ static void info_bswap_in(struct gdlm_plock_info *i) static char *op_str(int optype) { switch (optype) { - case GDLM_PLOCK_OP_LOCK: + case DLM_PLOCK_OP_LOCK: return "LK"; - case GDLM_PLOCK_OP_UNLOCK: + case DLM_PLOCK_OP_UNLOCK: return "UN"; - case GDLM_PLOCK_OP_GET: + case DLM_PLOCK_OP_GET: return "GET"; default: return "??"; @@ -160,7 +163,7 @@ static char *op_str(int optype) static char *ex_str(int optype, int ex) { - if (optype == GDLM_PLOCK_OP_UNLOCK || optype == GDLM_PLOCK_OP_GET) + if (optype == DLM_PLOCK_OP_UNLOCK || optype == DLM_PLOCK_OP_GET) return "-"; if (ex) return "WR"; @@ -195,10 +198,11 @@ static int get_proc_number(const char *file, const char *name, uint32_t *number) return 0; } -static int control_device_number(uint32_t *major, uint32_t *minor) +static int control_device_number(const char *plock_misc_name, + uint32_t *major, uint32_t *minor) { if (!get_proc_number(PROC_DEVICES, MISC_NAME, major) || - !get_proc_number(PROC_MISC, GDLM_PLOCK_MISC_NAME, minor)) { + !get_proc_number(PROC_MISC, plock_misc_name, minor)) { *major = 0; return 0; } @@ -265,7 +269,7 @@ static int create_control(const char *control, uint32_t major, uint32_t minor) return 1; } -static int open_control(void) +static int open_control(const char *control_name, const char *plock_misc_name) { char control[PATH_MAX]; uint32_t major = 0, minor = 0; @@ -273,22 +277,20 @@ static int open_control(void) if (control_fd != -1) return 0; - snprintf(control, sizeof(control), "%s/%s", CONTROL_DIR, CONTROL_NAME); + snprintf(control, sizeof(control), "%s/%s", CONTROL_DIR, control_name); - if (!control_device_number(&major, &minor)) { - log_error("Is dlm missing from kernel?"); + if (!control_device_number(plock_misc_name, &major, &minor)) return -1; - } if (!control_exists(control, major, minor) && !create_control(control, major, minor)) { - log_error("Failure to communicate with kernel lock_dlm"); + log_error("Failure to create device file %s", control); return -1; } control_fd = open(control, O_RDWR); if (control_fd < 0) { - log_error("Failure to communicate with kernel lock_dlm: %s", + log_error("Failure to open device %s: %s", control, strerror(errno)); return -1; } @@ -296,6 +298,16 @@ static int open_control(void) return 0; } +/* + * In kernels before 2.6.26, plocks came from gfs2's lock_dlm module. + * Reading plocks from there as well should allow us to use cluster3 + * on old (RHEL5) kernels. In this case, the fsid we read in plock_info + * structs is the mountgroup id, which we need to translate to the ls id. + */ + +#define OLD_CONTROL_NAME "lock_dlm_plock" +#define OLD_PLOCK_MISC_NAME "lock_dlm_plock" + int setup_plocks(void) { SaAisErrorT err; @@ -318,14 +330,29 @@ int setup_plocks(void) log_error("ckpt init error %d - plocks unavailable", err); control: - rv = open_control(); - if (rv) - return rv; + need_fsid_translation = 1; + + rv = open_control(CONTROL_NAME, DLM_PLOCK_MISC_NAME); + if (rv) { + log_debug("setup_plocks trying old lock_dlm interface"); + rv = open_control(OLD_CONTROL_NAME, OLD_PLOCK_MISC_NAME); + if (rv) { + log_error("Is dlm missing from kernel? No control device."); + return rv; + } + + /* the fsid from the kernel is the mountgroup id in old + kernels, which we can use to look up the mg directly + without translation */ + + need_fsid_translation = 0; + } log_debug("plocks %d", control_fd); + log_debug("plock need_fsid_translation %d", need_fsid_translation); log_debug("plock cpg message size: %u bytes", (unsigned int) (sizeof(struct gdlm_header) + - sizeof(struct gdlm_plock_info))); + sizeof(struct dlm_plock_info))); return control_fd; } @@ -517,7 +544,7 @@ static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end) return shrink_range2(&po->start, &po->end, start, end); } -static int is_conflict(struct resource *r, struct gdlm_plock_info *in, int get) +static int is_conflict(struct resource *r, struct dlm_plock_info *in, int get) { struct posix_lock *po; @@ -566,7 +593,7 @@ static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner, 2. convert RE to RN range and mode */ static int lock_case1(struct posix_lock *po, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { uint64_t start2, end2; int rv; @@ -593,7 +620,7 @@ static int lock_case1(struct posix_lock *po, struct resource *r, 3. convert RE to RN range and mode */ static int lock_case2(struct posix_lock *po, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { int rv; @@ -616,7 +643,7 @@ static int lock_case2(struct posix_lock *po, struct resource *r, } static int lock_internal(struct mountgroup *mg, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { struct posix_lock *po, *safe; int rv = 0; @@ -679,7 +706,7 @@ static int lock_internal(struct mountgroup *mg, struct resource *r, } static int unlock_internal(struct mountgroup *mg, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { struct posix_lock *po, *safe; int rv = 0; @@ -743,7 +770,7 @@ static int unlock_internal(struct mountgroup *mg, struct resource *r, } static int add_waiter(struct mountgroup *mg, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { struct lock_waiter *w; @@ -751,14 +778,17 @@ static int add_waiter(struct mountgroup *mg, struct resource *r, w = malloc(sizeof(struct lock_waiter)); if (!w) return -ENOMEM; - memcpy(&w->info, in, sizeof(struct gdlm_plock_info)); + memcpy(&w->info, in, sizeof(struct dlm_plock_info)); list_add_tail(&w->list, &r->waiters); return 0; } -static void write_result(struct mountgroup *mg, struct gdlm_plock_info *in, +static void write_result(struct mountgroup *mg, struct dlm_plock_info *in, int rv) { + if (need_fsid_translation) + in->fsid = mg->associated_ls_id; + in->rv = rv; write(control_fd, in, sizeof(struct gdlm_plock_info)); } @@ -766,7 +796,7 @@ static void write_result(struct mountgroup *mg, struct gdlm_plock_info *in, static void do_waiters(struct mountgroup *mg, struct resource *r) { struct lock_waiter *w, *safe; - struct gdlm_plock_info *in; + struct dlm_plock_info *in; int rv; list_for_each_entry_safe(w, safe, &r->waiters, list) { @@ -792,7 +822,7 @@ static void do_waiters(struct mountgroup *mg, struct resource *r) } } -static void do_lock(struct mountgroup *mg, struct gdlm_plock_info *in, +static void do_lock(struct mountgroup *mg, struct dlm_plock_info *in, struct resource *r) { int rv; @@ -817,7 +847,7 @@ static void do_lock(struct mountgroup *mg, struct gdlm_plock_info *in, put_resource(r); } -static void do_unlock(struct mountgroup *mg, struct gdlm_plock_info *in, +static void do_unlock(struct mountgroup *mg, struct dlm_plock_info *in, struct resource *r) { int rv; @@ -833,7 +863,7 @@ static void do_unlock(struct mountgroup *mg, struct gdlm_plock_info *in, /* we don't even get to this function if the getlk isn't from us */ -static void do_get(struct mountgroup *mg, struct gdlm_plock_info *in, +static void do_get(struct mountgroup *mg, struct dlm_plock_info *in, struct resource *r) { int rv; @@ -846,19 +876,19 @@ static void do_get(struct mountgroup *mg, struct gdlm_plock_info *in, write_result(mg, in, rv); } -static void __receive_plock(struct mountgroup *mg, struct gdlm_plock_info *in, +static void __receive_plock(struct mountgroup *mg, struct dlm_plock_info *in, int from, struct resource *r) { switch (in->optype) { - case GDLM_PLOCK_OP_LOCK: + case DLM_PLOCK_OP_LOCK: mg->last_plock_time = time(NULL); do_lock(mg, in, r); break; - case GDLM_PLOCK_OP_UNLOCK: + case DLM_PLOCK_OP_UNLOCK: mg->last_plock_time = time(NULL); do_unlock(mg, in, r); break; - case GDLM_PLOCK_OP_GET: + case DLM_PLOCK_OP_GET: do_get(mg, in, r); break; default: @@ -880,7 +910,7 @@ static void __receive_plock(struct mountgroup *mg, struct gdlm_plock_info *in, static void _receive_plock(struct mountgroup *mg, char *buf, int len, int from) { - struct gdlm_plock_info info; + struct dlm_plock_info info; struct gdlm_header *hd = (struct gdlm_header *) buf; struct resource *r = NULL; struct timeval now; @@ -907,7 +937,7 @@ static void _receive_plock(struct mountgroup *mg, char *buf, int len, int from) plock_recv_time = now; } - if (info.optype == GDLM_PLOCK_OP_GET && from != our_nodeid) + if (info.optype == DLM_PLOCK_OP_GET && from != our_nodeid) return; if (from != hd->nodeid || from != info.nodeid) { @@ -1013,14 +1043,14 @@ void receive_plock(struct mountgroup *mg, char *buf, int len, int from) _receive_plock(mg, buf, len, from); } -static int send_struct_info(struct mountgroup *mg, struct gdlm_plock_info *in, +static int send_struct_info(struct mountgroup *mg, struct dlm_plock_info *in, int msg_type) { char *buf; int rv, len; struct gdlm_header *hd; - len = sizeof(struct gdlm_header) + sizeof(struct gdlm_plock_info); + len = sizeof(struct gdlm_header) + sizeof(struct dlm_plock_info); buf = malloc(len); if (!buf) { rv = -ENOMEM; @@ -1047,14 +1077,14 @@ static int send_struct_info(struct mountgroup *mg, struct gdlm_plock_info *in, } static void send_plock(struct mountgroup *mg, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { send_struct_info(mg, in, MSG_PLOCK); } static void send_own(struct mountgroup *mg, struct resource *r, int owner) { - struct gdlm_plock_info info; + struct dlm_plock_info info; /* if we've already sent an own message for this resource, (pending list is not empty), then we shouldn't send another */ @@ -1074,7 +1104,7 @@ static void send_own(struct mountgroup *mg, struct resource *r, int owner) static void send_syncs(struct mountgroup *mg, struct resource *r) { - struct gdlm_plock_info info; + struct dlm_plock_info info; struct posix_lock *po; struct lock_waiter *w; int rv; @@ -1111,7 +1141,7 @@ static void send_syncs(struct mountgroup *mg, struct resource *r) static void send_drop(struct mountgroup *mg, struct resource *r) { - struct gdlm_plock_info info; + struct dlm_plock_info info; memset(&info, 0, sizeof(info)); info.number = r->number; @@ -1123,7 +1153,7 @@ static void send_drop(struct mountgroup *mg, struct resource *r) so the op is saved on the pending list until the r owner is established */ static void save_pending_plock(struct mountgroup *mg, struct resource *r, - struct gdlm_plock_info *in) + struct dlm_plock_info *in) { struct lock_waiter *w; @@ -1132,7 +1162,7 @@ static void save_pending_plock(struct mountgroup *mg, struct resource *r, log_error("save_pending_plock no mem"); return; } - memcpy(&w->info, in, sizeof(struct gdlm_plock_info)); + memcpy(&w->info, in, sizeof(struct dlm_plock_info)); list_add_tail(&w->list, &r->pending); } @@ -1167,7 +1197,7 @@ static void send_pending_plocks(struct mountgroup *mg, struct resource *r) static void _receive_own(struct mountgroup *mg, char *buf, int len, int from) { struct gdlm_header *hd = (struct gdlm_header *) buf; - struct gdlm_plock_info info; + struct dlm_plock_info info; struct resource *r; int should_not_happen = 0; int rv; @@ -1294,7 +1324,7 @@ void receive_own(struct mountgroup *mg, char *buf, int len, int from) _receive_own(mg, buf, len, from); } -static void clear_syncing_flag(struct resource *r, struct gdlm_plock_info *in) +static void clear_syncing_flag(struct resource *r, struct dlm_plock_info *in) { struct posix_lock *po; struct lock_waiter *w; @@ -1333,7 +1363,7 @@ static void clear_syncing_flag(struct resource *r, struct gdlm_plock_info *in) static void _receive_sync(struct mountgroup *mg, char *buf, int len, int from) { - struct gdlm_plock_info info; + struct dlm_plock_info info; struct gdlm_header *hd = (struct gdlm_header *) buf; struct resource *r; int rv; @@ -1379,7 +1409,7 @@ void receive_sync(struct mountgroup *mg, char *buf, int len, int from) static void _receive_drop(struct mountgroup *mg, char *buf, int len, int from) { - struct gdlm_plock_info info; + struct dlm_plock_info info; struct resource *r; int rv; @@ -1478,11 +1508,122 @@ static int drop_resources(struct mountgroup *mg) return 0; } +/* iterate through directory names looking for matching id: + /sys/kernel/dlm/<name>/id */ + +#define DLM_SYSFS_DIR "/sys/kernel/dlm" + +static char ls_name[256]; + +static int get_lockspace_name(uint32_t ls_id) +{ + char path[PATH_MAX]; + DIR *d; + FILE *file; + struct dirent *de; + uint32_t id; + int rv, error; + + d = opendir(DLM_SYSFS_DIR); + if (!d) { + log_debug("%s: opendir failed: %d", path, errno); + return -1; + } + + rv = -1; + + while ((de = readdir(d))) { + if (de->d_name[0] == '.') + continue; + + id = 0; + memset(path, 0, PATH_MAX); + snprintf(path, PATH_MAX, "%s/%s/id", DLM_SYSFS_DIR, de->d_name); + + file = fopen(path, "r"); + if (!file) { + log_error("can't open %s %d", path, errno); + continue; + } + + error = fscanf(file, "%u", &id); + fclose(file); + + if (error != 1) { + log_error("bad read %s %d", path, errno); + continue; + } + if (id != ls_id) { + log_debug("get_lockspace_name skip %x %s", + id, de->d_name); + continue; + } + + log_debug("get_lockspace_name found %x %s", id, de->d_name); + strncpy(ls_name, de->d_name, 256); + rv = 0; + break; + } + + closedir(d); + return rv; +} + +/* find the locskapce with "ls_id" in sysfs, get it's name, then look for + the mg with with the same name in mounts list, return it's id */ + +static void set_associated_id(uint32_t ls_id) +{ + struct mountgroup *mg; + int rv; + + log_debug("set_associated_id ls_id %x %d", ls_id, ls_id); + + memset(&ls_name, 0, sizeof(ls_name)); + + rv = get_lockspace_name(ls_id); + if (rv) { + log_error("no lockspace found with id %x", ls_id); + return; + } + + mg = find_mg(ls_name); + if (!mg) { + log_error("no mountgroup found with name %s for ls_id %x", + ls_name, ls_id); + return; + } + + log_debug("set_associated_id ls %x is mg %x", ls_id, mg->id); + + mg->associated_ls_id = ls_id; +} + +static uint32_t ls_to_mg_id(uint32_t fsid) +{ + struct mountgroup *mg; + int do_set = 1; + + retry: + list_for_each_entry(mg, &mounts, list) { + if (mg->associated_ls_id == fsid) + return mg->id; + } + + if (do_set) { + do_set = 0; + set_associated_id(fsid); + goto retry; + } + + return fsid; +} + int process_plocks(void) { struct mountgroup *mg; struct resource *r; - struct gdlm_plock_info info; + struct dlm_plock_info info; struct timeval now; uint64_t usec; int rv; @@ -1526,6 +1667,9 @@ int process_plocks(void) goto fail; } + if (need_fsid_translation) + info.fsid = ls_to_mg_id(info.fsid); + mg = find_mg_id(info.fsid); if (!mg) { log_debug("process_plocks: no mg id %x", info.fsid);
-- Linux-cluster mailing list Linux-cluster@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/linux-cluster