Hi, like discussed with lon on IRC I'm trying to add to rgmanager the ability to freeze a service. I worked on it in these days and did an example patch. Here is how I think what a "freeze" can be and, of course, it can be implemented in many other ways so it's only an example. == What freeze means? == All actions on the service are blocked (start, stop, status) so you can work by hand on the various resources. When you unfreeze the service everything returns as before (so if you manually stopped a resource then the status will fail and the rg recovery is done). == When does a service can be freezed? == You can freeze only if the service status is DISABLED, STOPPED, or STARTED. It doesn't have sense to freeze a service that is in a transictional state. == How is it implemented? == *) As I don't want to lose the previous state and I don't think it's a service state, "freezed" is implemented like a service flag. As a "service flag" didn't existed before, this patch adds it to rg_state_t, so it will be transmitted around the cluster. *) Two options are added to clusvcasm (-F to freeze, -U to unfreeze), obviously these options names can be changed (perhaps they can be only a long option like --freeze, --unfreeze?). So you can freeze with: #clusvcadm -F $SERVICE and unfreeze with: #clusvcadm -U $SERVICE *) clustat reports these new flags in 2 ways: on normal mode the flags are between () and in long mode e new line "Flags:" is added. The functions added in rg_strings.c aren't well tested but should work with multiple flags. *) !!!! In the patch I haven't changed the function handle_start_remote_req because looking at the code I cannot find when it can be called. Maybe I'm missing something... :D Thanks! Bye! -- Simone Gotti -- Email.it, the professional e-mail, gratis per te: http://www.email.it/f Sponsor: Lo sai che hai un tesoro in soffitta? Quello che non serve più a te, può servire agli altri. * Vendi GRATIS ciò che vuoi con AdBoom.it Clicca qui: http://adv.email.it/cgi-bin/foclick.cgi?mid=6418&d=23-4
Index: include/resgroup.h =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/include/resgroup.h,v retrieving revision 1.19 diff -u -b -B -p -r1.19 resgroup.h --- include/resgroup.h 20 Mar 2007 17:09:56 -0000 1.19 +++ include/resgroup.h 22 Apr 2007 18:09:40 -0000 @@ -34,6 +34,7 @@ typedef struct { uint32_t rs_state; /**< State of service. */ uint32_t rs_restarts; /**< Number of cluster-induced restarts */ + uint32_t rs_flags; /**< User setted flags */ uint64_t rs_transition; /**< Last service transition time */ } rg_state_t; @@ -45,6 +46,7 @@ typedef struct { swab32((ptr)->rs_last_owner);\ swab32((ptr)->rs_state);\ swab32((ptr)->rs_restarts);\ + swab32((ptr)->rs_flags);\ swab64((ptr)->rs_transition);\ } @@ -79,6 +81,8 @@ typedef struct { #define RG_UNLOCK 20 #define RG_QUERY_LOCK 21 #define RG_MIGRATE 22 +#define RG_FREEZE 23 +#define RG_UNFREEZE 24 #define RG_NONE 999 const char *rg_req_str(int req); @@ -105,7 +109,11 @@ int handle_start_remote_req(char *svcNam #define DEFAULT_CHECK_INTERVAL 10 +/* Resource group flags (for now) */ +#define RG_FLAG_FREEZED (1<<0) /** Resource freezed */ + const char *rg_state_str(int val); +void rg_flags_str(char *flags_string, size_t size, int val); const char *agent_op_str(int val); int eval_groups(int local, uint32_t nodeid, int nodeStatus); @@ -121,6 +129,8 @@ int svc_stop(char *svcName, int error); int svc_status(char *svcName); int svc_disable(char *svcName); int svc_fail(char *svcName); +int svc_freeze(char *svcName); +int svc_unfreeze(char *svcName); int svc_migrate(char *svcName, int target); int rt_enqueue_request(const char *resgroupname, int request, msgctx_t *resp_ctx, @@ -162,6 +172,7 @@ cluster_member_list_t *member_list(void) int my_id(void); /* Return codes */ +#define RG_EFREEZED -11 /* Service is freezed */ #define RG_ERUN -10 /* Service is already running */ #define RG_EQUORUM -9 /* Operation requires quorum */ #define RG_EINVAL -8 /* Invalid operation for resource */ Index: src/clulib/rg_strings.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/rg_strings.c,v retrieving revision 1.7 diff -u -b -B -p -r1.7 rg_strings.c --- src/clulib/rg_strings.c 10 Mar 2007 00:20:54 -0000 1.7 +++ src/clulib/rg_strings.c 22 Apr 2007 18:09:40 -0000 @@ -35,6 +35,7 @@ const struct string_val rg_error_strings { RG_ENOSERVICE,"Service does not exist" }, { RG_EFORWARD, "Service not mastered locally" }, { RG_EABORT, "Aborted; service failed" }, + { RG_EFREEZED, "Failure: Service is freezed"}, { RG_EFAIL, "Failure" }, { RG_ESUCCESS, "Success" }, { RG_YES, "Yes" }, @@ -88,6 +89,12 @@ const struct string_val rg_state_strings }; +const struct string_val rg_flags_strings[] = { + {RG_FLAG_FREEZED, "freezed"}, + {0, NULL} +}; + + const struct string_val agent_ops[] = { {RS_START, "start"}, {RS_STOP, "stop"}, @@ -122,6 +129,20 @@ rg_search_table(const struct string_val } +static inline const char * +rg_flag_search_table(const struct string_val *table, int val) +{ + int x; + + for (x = 0; table[x].str != NULL; x++) { + if (table[x].val == val) { + return table[x].str; + } + } + + return "Unknown"; +} + const char * rg_strerror(int val) { @@ -134,6 +155,22 @@ rg_state_str(int val) return rg_search_table(rg_state_strings, val); } +void +rg_flags_str(char *flags_string, size_t size, int val) +{ + int i; + const char *string; + char *separator = ", "; + + for (i = 0; i < sizeof(uint32_t); i++) { + if ( val & (1 << i)) { + if (strlen(flags_string)) + strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1)); + string = rg_search_table(rg_flags_strings, (1 << i)); + strncat(flags_string, string, size - (strlen(flags_string) + strlen(string) + 1)); + } + } +} const char * rg_req_str(int val) Index: src/daemons/groups.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/groups.c,v retrieving revision 1.31 diff -u -b -B -p -r1.31 groups.c --- src/daemons/groups.c 19 Apr 2007 17:59:36 -0000 1.31 +++ src/daemons/groups.c 22 Apr 2007 18:09:41 -0000 @@ -376,6 +376,9 @@ consider_start(resource_node_t *node, ch mp = memb_id_to_p(membership, my_id()); assert(mp); + /* Service cannot be started if Freezed */ + if (svcStatus->rs_flags & RG_FLAG_FREEZED) + return; /* * Service must be not be running elsewhere to consider for a * local start. Index: src/daemons/rg_state.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_state.c,v retrieving revision 1.31 diff -u -b -B -p -r1.31 rg_state.c --- src/daemons/rg_state.c 19 Apr 2007 17:59:36 -0000 1.31 +++ src/daemons/rg_state.c 22 Apr 2007 18:09:42 -0000 @@ -282,6 +282,7 @@ init_rg(char *name, rg_state_t *svcblk) svcblk->rs_owner = 0; svcblk->rs_last_owner = 0; svcblk->rs_state = RG_STATE_STOPPED; + svcblk->rs_flags = 0; svcblk->rs_restarts = 0; svcblk->rs_transition = 0; strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name)); @@ -418,6 +419,7 @@ get_rg_state_local(char *name, rg_state_ svcblk->rs_owner = 0; svcblk->rs_last_owner = 0; svcblk->rs_state = RG_STATE_UNINITIALIZED; + svcblk->rs_flags = 0; svcblk->rs_restarts = 0; svcblk->rs_transition = 0; strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name)); @@ -446,6 +448,7 @@ get_rg_state_local(char *name, rg_state_ * 2 = DO NOT stop service, return 0 (success) * 3 = DO NOT stop service, return RG_EFORWARD * 4 = DO NOT stop service, return RG_EAGAIN + * 5 = DO NOT stop service, return RG_EFREEZED */ int svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req) @@ -453,6 +456,11 @@ svc_advise_stop(rg_state_t *svcStatus, c cluster_member_list_t *membership = member_list(); int ret = 0; + if (svcStatus->rs_flags & RG_FLAG_FREEZED) { + clulog(LOG_DEBUG, "RG %s FREEZED!!!\n", svcName); + return 5; + } + switch(svcStatus->rs_state) { case RG_STATE_FAILED: if (req == RG_DISABLE) @@ -568,6 +576,7 @@ svc_advise_stop(rg_state_t *svcStatus, c * 2 = DO NOT start service, return 0 * 3 = DO NOT start service, return RG_EAGAIN * 4 = DO NOT start service, return RG_ERUN + * 5 = DO NOT start service, return RG_EFREEZED */ int svc_advise_start(rg_state_t *svcStatus, char *svcName, int req) @@ -575,6 +584,11 @@ svc_advise_start(rg_state_t *svcStatus, cluster_member_list_t *membership = member_list(); int ret = 0; + if (svcStatus->rs_flags & RG_FLAG_FREEZED) { + clulog(LOG_DEBUG, "RG %s FREEZED!!!\n", svcName); + return 5; + } + switch(svcStatus->rs_state) { case RG_STATE_FAILED: clulog(LOG_ERR, @@ -752,6 +766,9 @@ svc_start(char *svcName, int req) case 4: rg_unlock(&lockp); return RG_ERUN; + case 5: + rg_unlock(&lockp); + return RG_EFREEZED; default: break; } @@ -914,6 +931,8 @@ svc_status(char *svcName) } rg_unlock(&lockp); + if (svcStatus.rs_flags & RG_FLAG_FREEZED) + return 0; if (svcStatus.rs_owner != my_id()) /* Don't check status for anything not owned */ return 0; @@ -961,6 +980,26 @@ svc_status(char *svcName) int svc_status_inquiry(char *svcName) { + struct dlm_lksb lockp; + rg_state_t svcStatus; + + if (rg_lock(svcName, &lockp) < 0) { + clulog(LOG_ERR, "#48: Unable to obtain cluster lock: %s\n", + strerror(errno)); + return RG_EFAIL; + } + + if (get_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#49: Failed getting status for RG %s\n", + svcName); + return RG_EFAIL; + } + rg_unlock(&lockp); + + if (svcStatus.rs_flags & RG_FLAG_FREEZED) + return 0; + return group_op(svcName, RG_STATUS); } @@ -1015,6 +1054,9 @@ _svc_stop(char *svcName, int req, int re case 4: rg_unlock(&lockp); return RG_EAGAIN; + case 5: + rg_unlock(&lockp); + return RG_EFREEZED; default: break; } @@ -1191,6 +1233,76 @@ svc_fail(char *svcName) return 0; } +/** + * Flag a cluster service as freezed/unfreezed. + * + * @param svcName Service ID to flag as freezed. + * @return FAIL, 0 + */ +int +_svc_freeze(char *svcName, int enabled) +{ + struct dlm_lksb lockp; + rg_state_t svcStatus; + + if (rg_lock(svcName, &lockp) == RG_EFAIL) { + clulog(LOG_ERR, "#55: Unable to obtain cluster lock: %s\n", + strerror(errno)); + return RG_EFAIL; + } + + clulog(LOG_DEBUG, "Handling %s request for RG %s\n", svcName, enabled?"freeze":"unfreeze"); + + if (get_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#56: Failed getting status for RG %s\n", + svcName); + return RG_EFAIL; + } + + switch(svcStatus.rs_state) { + case RG_STATE_STOPPED: + case RG_STATE_STARTED: + case RG_STATE_DISABLED: + + if (enabled == 1) { + clulog(LOG_DEBUG, "Freezing RG %s\n", svcName); + svcStatus.rs_flags |= RG_FLAG_FREEZED; + } else { + clulog(LOG_DEBUG, "Unfreezing RG %s\n", svcName); + svcStatus.rs_flags &= ~RG_FLAG_FREEZED; + } + + if (set_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#57: Failed changing RG status\n"); + return RG_EFAIL; + } + break; + + default: + rg_unlock(&lockp); + return RG_EFAIL; + break; + } + + rg_unlock(&lockp); + + return 0; +} + +int +svc_freeze(char *svcName) +{ + return _svc_freeze(svcName, 1); +} + +int +svc_unfreeze(char *svcName) +{ + return _svc_freeze(svcName, 0); +} + /* * Send a message to the target node to start the service. @@ -1324,6 +1436,9 @@ handle_relocate_req(char *svcName, int r svc_fail(svcName); return RG_EFAIL; } + if (ret == RG_EFREEZED) { + return RG_EFREEZED; + } if (ret == RG_EFORWARD) return RG_EFORWARD; } @@ -1531,7 +1646,7 @@ handle_start_req(char *svcName, int req, /* If services are locked, return the error */ - if (ret == RG_EAGAIN || ret == RG_ERUN) + if (ret == RG_EAGAIN || ret == RG_ERUN || ret == RG_EFREEZED) return ret; /* Index: src/daemons/rg_thread.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_thread.c,v retrieving revision 1.19 diff -u -b -B -p -r1.19 rg_thread.c --- src/daemons/rg_thread.c 27 Mar 2007 19:33:20 -0000 1.19 +++ src/daemons/rg_thread.c 22 Apr 2007 18:09:43 -0000 @@ -422,6 +422,18 @@ resgroup_thread_main(void *arg) break; + case RG_FREEZE: + error = svc_freeze(myname); + if (error != 0) + ret = RG_EFAIL; + break; + + case RG_UNFREEZE: + error = svc_unfreeze(myname); + if (error != 0) + ret = RG_EFAIL; + break; + default: printf("Unhandled request %d\n", req->rr_request); ret = RG_NONE; Index: src/utils/clustat.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clustat.c,v retrieving revision 1.31 diff -u -b -B -p -r1.31 clustat.c --- src/utils/clustat.c 6 Feb 2007 20:21:17 -0000 1.31 +++ src/utils/clustat.c 22 Apr 2007 18:09:43 -0000 @@ -416,7 +416,7 @@ void _txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags) { char owner[31]; - + char flags_string[255] = ""; if (rs->rs_state == RG_STATE_STOPPED || rs->rs_state == RG_STATE_DISABLED || @@ -430,19 +430,34 @@ _txt_rg_state(rg_state_t *rs, cluster_me snprintf(owner, sizeof(owner), "%-.30s", my_memb_id_to_name(members, rs->rs_owner)); } - printf(" %-20.20s %-30.30s %-16.16s\n", + rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags); + printf(" %-20.20s %-30.30s %-16.16s", rs->rs_name, owner, rg_state_str(rs->rs_state)); + if(strlen(flags_string)) + printf ("(%s)\n", flags_string); + else + printf("\n"); } void _txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags) { + char flags_string[255] = ""; + + rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags); + printf("Service Name : %s\n", rs->rs_name); printf(" Current State : %s (%d)\n", rg_state_str(rs->rs_state), rs->rs_state); + if (rs->rs_flags) + printf(" Flags : %s (%d)\n", + flags_string, rs->rs_flags); + else + printf(" Flags : none (%d)\n", + rs->rs_flags); printf(" Owner : %s\n", my_memb_id_to_name(members, rs->rs_owner)); printf(" Last Owner : %s\n", Index: src/utils/clusvcadm.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clusvcadm.c,v retrieving revision 1.18 diff -u -b -B -p -r1.18 clusvcadm.c --- src/utils/clusvcadm.c 20 Mar 2007 17:09:57 -0000 1.18 +++ src/utils/clusvcadm.c 22 Apr 2007 18:09:44 -0000 @@ -240,7 +240,7 @@ main(int argc, char **argv) return 1; } - while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:qh?")) != EOF) { + while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:F:U:qh?")) != EOF) { switch (opt) { case 'l': return do_lock(); @@ -294,6 +294,16 @@ main(int argc, char **argv) case 'v': printf("%s\n",PACKAGE_VERSION); return 0; + case 'F': + actionstr = "freezing"; + action = RG_FREEZE; + svcname = optarg; + break; + case 'U': + actionstr = "unfreezing"; + action = RG_UNFREEZE; + svcname = optarg; + break; case 'q': close(STDOUT_FILENO); break;
-- Linux-cluster mailing list Linux-cluster@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/linux-cluster