corosync  2.4.6
exec/votequorum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2020 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Authors: Christine Caulfield (ccaulfie@redhat.com)
7  * Fabio M. Di Nitto (fdinitto@redhat.com)
8  *
9  * This software licensed under BSD license, the text of which follows:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  * - Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  * - Neither the name of the MontaVista Software, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from this
21  * software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <config.h>
37 
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <fcntl.h>
41 #include <stdint.h>
42 #include <unistd.h>
43 
44 #include <qb/qbipc_common.h>
45 
46 #include "quorum.h"
47 #include <corosync/corodefs.h>
48 #include <corosync/list.h>
49 #include <corosync/logsys.h>
50 #include <corosync/coroapi.h>
51 #include <corosync/icmap.h>
52 #include <corosync/votequorum.h>
54 
55 #include "service.h"
56 #include "util.h"
57 
58 LOGSYS_DECLARE_SUBSYS ("VOTEQ");
59 
60 /*
61  * interface with corosync
62  */
63 
64 static struct corosync_api_v1 *corosync_api;
65 
66 /*
67  * votequorum global config vars
68  */
69 
70 
71 static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
72 static struct cluster_node *qdevice = NULL;
73 static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
74 static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
75 static uint8_t qdevice_can_operate = 1;
76 static void *qdevice_reg_conn = NULL;
77 static uint8_t qdevice_master_wins = 0;
78 
79 static uint8_t two_node = 0;
80 
81 static uint8_t wait_for_all = 0;
82 static uint8_t wait_for_all_status = 0;
83 static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */
84 
85 static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE;
86 static int lowest_node_id = -1;
87 static int highest_node_id = -1;
88 
89 #define DEFAULT_LMS_WIN 10000
90 static uint8_t last_man_standing = 0;
91 static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
92 
93 static uint8_t allow_downscale = 0;
94 static uint32_t ev_barrier = 0;
95 
96 static uint8_t ev_tracking = 0;
97 static uint32_t ev_tracking_barrier = 0;
98 static int ev_tracking_fd = -1;
99 
100 /*
101  * votequorum_exec defines/structs/forward definitions
102  */
105  struct qb_ipc_request_header header __attribute__((aligned(8)));
106  uint32_t nodeid;
107  uint32_t votes;
108  uint32_t expected_votes;
109  uint32_t flags;
110 } __attribute__((packed));
111 
113  struct qb_ipc_request_header header __attribute__((aligned(8)));
114  uint32_t nodeid;
115  uint32_t value;
116  uint8_t param;
117  uint8_t _pad0;
118  uint8_t _pad1;
119  uint8_t _pad2;
120 } __attribute__((packed));
121 
123  struct qb_ipc_request_header header __attribute__((aligned(8)));
124  uint32_t operation;
126 } __attribute__((packed));
127 
129  struct qb_ipc_request_header header __attribute__((aligned(8)));
132 } __attribute__((packed));
133 
134 /*
135  * votequorum_exec onwire version (via totem)
136  */
137 
138 #include "votequorum.h"
139 
140 /*
141  * votequorum_exec onwire messages (via totem)
142  */
143 
144 #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
145 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
146 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
147 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
148 
149 static void votequorum_exec_send_expectedvotes_notification(void);
150 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
151 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context);
152 
153 #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
154 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
155 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
156 
157 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
158 
159 /*
160  * used by req_exec_quorum_qdevice_reg
161  */
162 #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
163 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
164 
165 /*
166  * votequorum internal node status/view
167  */
168 
169 #define NODE_FLAGS_QUORATE 1
170 #define NODE_FLAGS_LEAVING 2
171 #define NODE_FLAGS_WFASTATUS 4
172 #define NODE_FLAGS_FIRST 8
173 #define NODE_FLAGS_QDEVICE_REGISTERED 16
174 #define NODE_FLAGS_QDEVICE_ALIVE 32
175 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64
176 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128
177 
178 typedef enum {
182 } nodestate_t;
183 
184 struct cluster_node {
185  int node_id;
187  uint32_t votes;
188  uint32_t expected_votes;
189  uint32_t flags;
190  struct list_head list;
191 };
192 
193 /*
194  * votequorum internal quorum status
195  */
196 
197 static uint8_t quorum;
198 static uint8_t cluster_is_quorate;
199 
200 /*
201  * votequorum membership data
202  */
203 
204 static struct cluster_node *us;
205 static struct list_head cluster_members_list;
206 static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
207 static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX];
208 static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX];
209 static int quorum_members_entries = 0;
210 static int previous_quorum_members_entries = 0;
211 static int atb_nodelist_entries = 0;
212 static struct memb_ring_id quorum_ringid;
213 
214 /*
215  * pre allocate all cluster_nodes + one for qdevice
216  */
217 static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
218 static int cluster_nodes_entries = 0;
219 
220 /*
221  * votequorum tracking
222  */
223 struct quorum_pd {
224  unsigned char track_flags;
227  struct list_head list;
228  void *conn;
229 };
230 
231 static struct list_head trackers_list;
232 
233 /*
234  * votequorum timers
235  */
236 
237 static corosync_timer_handle_t qdevice_timer;
238 static int qdevice_timer_set = 0;
239 static corosync_timer_handle_t last_man_standing_timer;
240 static int last_man_standing_timer_set = 0;
241 static int sync_nodeinfo_sent = 0;
242 static int sync_wait_for_poll_or_timeout = 0;
243 
244 /*
245  * Service Interfaces required by service_message_handler struct
246  */
247 
248 static int sync_in_progress = 0;
249 
250 static void votequorum_sync_init (
251  const unsigned int *trans_list,
252  size_t trans_list_entries,
253  const unsigned int *member_list,
254  size_t member_list_entries,
255  const struct memb_ring_id *ring_id);
256 
257 static int votequorum_sync_process (void);
258 static void votequorum_sync_activate (void);
259 static void votequorum_sync_abort (void);
260 
261 static quorum_set_quorate_fn_t quorum_callback;
262 
263 /*
264  * votequorum_exec handler and definitions
265  */
266 
267 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
268 static int votequorum_exec_exit_fn (void);
269 static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
270 
271 static void message_handler_req_exec_votequorum_nodeinfo (
272  const void *message,
273  unsigned int nodeid);
274 static void exec_votequorum_nodeinfo_endian_convert (void *message);
275 
276 static void message_handler_req_exec_votequorum_reconfigure (
277  const void *message,
278  unsigned int nodeid);
279 static void exec_votequorum_reconfigure_endian_convert (void *message);
280 
281 static void message_handler_req_exec_votequorum_qdevice_reg (
282  const void *message,
283  unsigned int nodeid);
284 static void exec_votequorum_qdevice_reg_endian_convert (void *message);
285 
286 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
287  const void *message,
288  unsigned int nodeid);
289 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
290 
291 static struct corosync_exec_handler votequorum_exec_engine[] =
292 {
293  { /* 0 */
294  .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
295  .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
296  },
297  { /* 1 */
298  .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
299  .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
300  },
301  { /* 2 */
302  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
303  .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
304  },
305  { /* 3 */
306  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
307  .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
308  },
309 };
310 
311 /*
312  * Library Handler and Functions Definitions
313  */
314 
315 static int quorum_lib_init_fn (void *conn);
316 
317 static int quorum_lib_exit_fn (void *conn);
318 
319 static void qdevice_timer_fn(void *arg);
320 
321 static void message_handler_req_lib_votequorum_getinfo (void *conn,
322  const void *message);
323 
324 static void message_handler_req_lib_votequorum_setexpected (void *conn,
325  const void *message);
326 
327 static void message_handler_req_lib_votequorum_setvotes (void *conn,
328  const void *message);
329 
330 static void message_handler_req_lib_votequorum_trackstart (void *conn,
331  const void *message);
332 
333 static void message_handler_req_lib_votequorum_trackstop (void *conn,
334  const void *message);
335 
336 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
337  const void *message);
338 
339 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
340  const void *message);
341 
342 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
343  const void *message);
344 
345 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
346  const void *message);
347 
348 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
349  const void *message);
350 
351 static struct corosync_lib_handler quorum_lib_service[] =
352 {
353  { /* 0 */
354  .lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
356  },
357  { /* 1 */
358  .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
360  },
361  { /* 2 */
362  .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
364  },
365  { /* 3 */
366  .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
368  },
369  { /* 4 */
370  .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
372  },
373  { /* 5 */
374  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
376  },
377  { /* 6 */
378  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
380  },
381  { /* 7 */
382  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
384  },
385  { /* 8 */
386  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
388  },
389  { /* 9 */
390  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
392  }
393 };
394 
395 static struct corosync_service_engine votequorum_service_engine = {
396  .name = "corosync vote quorum service v1.0",
397  .id = VOTEQUORUM_SERVICE,
398  .priority = 2,
399  .private_data_size = sizeof (struct quorum_pd),
400  .allow_inquorate = CS_LIB_ALLOW_INQUORATE,
401  .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED,
402  .lib_init_fn = quorum_lib_init_fn,
403  .lib_exit_fn = quorum_lib_exit_fn,
404  .lib_engine = quorum_lib_service,
405  .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
406  .exec_init_fn = votequorum_exec_init_fn,
407  .exec_exit_fn = votequorum_exec_exit_fn,
408  .exec_engine = votequorum_exec_engine,
409  .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
410  .sync_init = votequorum_sync_init,
411  .sync_process = votequorum_sync_process,
412  .sync_activate = votequorum_sync_activate,
413  .sync_abort = votequorum_sync_abort
414 };
415 
417 {
418  return (&votequorum_service_engine);
419 }
420 
421 static struct default_service votequorum_service[] = {
422  {
423  .name = "corosync_votequorum",
424  .ver = 0,
426  },
427 };
428 
429 /*
430  * common/utility macros/functions
431  */
432 
433 #define max(a,b) (((a) > (b)) ? (a) : (b))
434 
435 #define list_iterate(v, head) \
436  for (v = (head)->next; v != head; v = v->next)
437 
438 static void node_add_ordered(struct cluster_node *newnode)
439 {
440  struct cluster_node *node = NULL;
441  struct list_head *tmp;
442  struct list_head *newlist = &newnode->list;
443 
444  ENTER();
445 
446  list_iterate(tmp, &cluster_members_list) {
447  node = list_entry(tmp, struct cluster_node, list);
448  if (newnode->node_id < node->node_id) {
449  break;
450  }
451  }
452 
453  if (!node) {
454  list_add(&newnode->list, &cluster_members_list);
455  } else {
456  newlist->prev = tmp->prev;
457  newlist->next = tmp;
458  tmp->prev->next = newlist;
459  tmp->prev = newlist;
460  }
461 
462  LEAVE();
463 }
464 
465 static struct cluster_node *allocate_node(unsigned int nodeid)
466 {
467  struct cluster_node *cl = NULL;
468  struct list_head *tmp;
469 
470  ENTER();
471 
472  if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
473  cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
474  cluster_nodes_entries++;
475  } else {
476  list_iterate(tmp, &cluster_members_list) {
477  cl = list_entry(tmp, struct cluster_node, list);
478  if (cl->state == NODESTATE_DEAD) {
479  break;
480  }
481  }
482  /*
483  * this should never happen
484  */
485  if (!cl) {
486  log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node %u data!!", nodeid);
487  goto out;
488  }
489  list_del(tmp);
490  }
491 
492  memset(cl, 0, sizeof(struct cluster_node));
493  cl->node_id = nodeid;
494  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
495  node_add_ordered(cl);
496  }
497 
498 out:
499  LEAVE();
500 
501  return cl;
502 }
503 
504 static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
505 {
506  struct cluster_node *node;
507  struct list_head *tmp;
508 
509  ENTER();
510 
511  if (nodeid == us->node_id) {
512  LEAVE();
513  return us;
514  }
515 
516  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
517  LEAVE();
518  return qdevice;
519  }
520 
521  list_iterate(tmp, &cluster_members_list) {
522  node = list_entry(tmp, struct cluster_node, list);
523  if (node->node_id == nodeid) {
524  LEAVE();
525  return node;
526  }
527  }
528 
529  LEAVE();
530  return NULL;
531 }
532 
533 static void get_lowest_node_id(void)
534 {
535  struct cluster_node *node = NULL;
536  struct list_head *tmp;
537 
538  ENTER();
539 
540  lowest_node_id = us->node_id;
541 
542  list_iterate(tmp, &cluster_members_list) {
543  node = list_entry(tmp, struct cluster_node, list);
544  if ((node->state == NODESTATE_MEMBER) &&
545  (node->node_id < lowest_node_id)) {
546  lowest_node_id = node->node_id;
547  }
548  }
549  log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: %d us: %d", lowest_node_id, us->node_id);
550  icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
551 
552  LEAVE();
553 }
554 
555 static void get_highest_node_id(void)
556 {
557  struct cluster_node *node = NULL;
558  struct list_head *tmp;
559 
560  ENTER();
561 
562  highest_node_id = us->node_id;
563 
564  list_iterate(tmp, &cluster_members_list) {
565  node = list_entry(tmp, struct cluster_node, list);
566  if ((node->state == NODESTATE_MEMBER) &&
567  (node->node_id > highest_node_id)) {
568  highest_node_id = node->node_id;
569  }
570  }
571  log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: %d us: %d", highest_node_id, us->node_id);
572  icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id);
573 
574  LEAVE();
575 }
576 
577 static int check_low_node_id_partition(void)
578 {
579  struct cluster_node *node = NULL;
580  struct list_head *tmp;
581  int found = 0;
582 
583  ENTER();
584 
585  list_iterate(tmp, &cluster_members_list) {
586  node = list_entry(tmp, struct cluster_node, list);
587  if ((node->state == NODESTATE_MEMBER) &&
588  (node->node_id == lowest_node_id)) {
589  found = 1;
590  }
591  }
592 
593  LEAVE();
594  return found;
595 }
596 
597 static int check_high_node_id_partition(void)
598 {
599  struct cluster_node *node = NULL;
600  struct list_head *tmp;
601  int found = 0;
602 
603  ENTER();
604 
605  list_iterate(tmp, &cluster_members_list) {
606  node = list_entry(tmp, struct cluster_node, list);
607  if ((node->state == NODESTATE_MEMBER) &&
608  (node->node_id == highest_node_id)) {
609  found = 1;
610  }
611  }
612 
613  LEAVE();
614  return found;
615 }
616 
617 static int is_in_nodelist(int nodeid, unsigned int *members, int entries)
618 {
619  int i;
620  ENTER();
621 
622  for (i=0; i<entries; i++) {
623  if (nodeid == members[i]) {
624  LEAVE();
625  return 1;
626  }
627  }
628  LEAVE();
629  return 0;
630 }
631 
632 /*
633  * The algorithm for a list of tie-breaker nodes is:
634  * travel the list of nodes in the auto_tie_breaker list,
635  * if the node IS in our current partition, check if the
636  * nodes earlier in the atb list are in the 'previous' partition;
637  * If none are found then we are safe to be quorate, if any are
638  * then we cannot be as we don't know if that node is up or down.
639  * If we don't have a node in the current list we are NOT quorate.
640  * Obviously if we find the first node in the atb list in our
641  * partition then we are quorate.
642  *
643  * Special cases lowest nodeid, and highest nodeid are handled separately.
644  */
645 static int check_auto_tie_breaker(void)
646 {
647  int i, j;
648  int res;
649  ENTER();
650 
651  if (auto_tie_breaker == ATB_LOWEST) {
652  res = check_low_node_id_partition();
653  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res);
654  LEAVE();
655  return res;
656  }
657  if (auto_tie_breaker == ATB_HIGHEST) {
658  res = check_high_node_id_partition();
659  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res);
660  LEAVE();
661  return res;
662  }
663 
664  /* Assume ATB_LIST, we should never be called for ATB_NONE */
665  for (i=0; i < atb_nodelist_entries; i++) {
666  if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
667  /*
668  * Node is in our partition, if any of its predecessors are
669  * in the previous quorum partition then it might be in the
670  * 'other half' (as we've got this far without seeing it here)
671  * and so we can't be quorate.
672  */
673  for (j=0; j<i; j++) {
674  if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
675  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node %d in previous partition but not here, quorum denied", atb_nodelist[j]);
676  LEAVE();
677  return 0;
678  }
679  }
680 
681  /*
682  * None of the other list nodes were in the previous partition, if there
683  * are enough votes, we can be quorate
684  */
685  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node %d in current partition, we can be quorate", atb_nodelist[i]);
686  LEAVE();
687  return 1;
688  }
689  }
690  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate");
691  LEAVE();
692  return 0;
693 }
694 
695 /*
696  * atb_string can be either:
697  * 'lowest'
698  * 'highest'
699  * a list of nodeids
700  */
701 static void parse_atb_string(char *atb_string)
702 {
703  char *ptr;
704  long num;
705 
706  ENTER();
707  auto_tie_breaker = ATB_NONE;
708 
709  if (!strcmp(atb_string, "lowest"))
710  auto_tie_breaker = ATB_LOWEST;
711 
712  if (!strcmp(atb_string, "highest"))
713  auto_tie_breaker = ATB_HIGHEST;
714 
715  if (atoi(atb_string)) {
716 
717  atb_nodelist_entries = 0;
718  ptr = atb_string;
719  do {
720  num = strtol(ptr, &ptr, 10);
721  if (num) {
722  log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num);
723  atb_nodelist[atb_nodelist_entries++] = num;
724  }
725  } while (num);
726 
727  if (atb_nodelist_entries) {
728  auto_tie_breaker = ATB_LIST;
729  }
730  }
731  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
732  log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker);
733 
734  /* Make sure we got something */
735  if (auto_tie_breaker == ATB_NONE) {
736  log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
737  auto_tie_breaker = ATB_NONE;
738  }
739  LEAVE();
740 }
741 
742 static int check_qdevice_master(void)
743 {
744  struct cluster_node *node = NULL;
745  struct list_head *tmp;
746  int found = 0;
747 
748  ENTER();
749 
750  list_iterate(tmp, &cluster_members_list) {
751  node = list_entry(tmp, struct cluster_node, list);
752  if ((node->state == NODESTATE_MEMBER) &&
755  found = 1;
756  }
757  }
758 
759  LEAVE();
760  return found;
761 }
762 
763 static void decode_flags(uint32_t flags)
764 {
765  ENTER();
766 
768  "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
769  (flags & NODE_FLAGS_QUORATE)?"Yes":"No",
770  (flags & NODE_FLAGS_LEAVING)?"Yes":"No",
771  (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
772  (flags & NODE_FLAGS_FIRST)?"Yes":"No",
773  (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No",
774  (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
775  (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
776  (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No");
777 
778  LEAVE();
779 }
780 
781 /*
782  * load/save are copied almost pristine from totemsrp,c
783  */
784 static int load_ev_tracking_barrier(void)
785 {
786  int res = 0;
787  char filename[PATH_MAX];
788 
789  ENTER();
790 
791  snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_run_dir());
792 
793  ev_tracking_fd = open(filename, O_RDWR, 0700);
794  if (ev_tracking_fd != -1) {
795  res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
796  close(ev_tracking_fd);
797  if (res == sizeof (uint32_t)) {
798  LEAVE();
799  return 0;
800  }
801  }
802 
803  ev_tracking_barrier = 0;
804  umask(0);
805  ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
806  if (ev_tracking_fd != -1) {
807  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
808  if ((res == -1) || (res != sizeof (uint32_t))) {
810  "Unable to write to %s", filename);
811  }
812  close(ev_tracking_fd);
813  LEAVE();
814  return 0;
815  }
817  "Unable to create %s file", filename);
818 
819  LEAVE();
820 
821  return -1;
822 }
823 
824 static void update_wait_for_all_status(uint8_t wfa_status)
825 {
826  ENTER();
827 
828  wait_for_all_status = wfa_status;
829  if (wait_for_all_status) {
831  } else {
832  us->flags &= ~NODE_FLAGS_WFASTATUS;
833  }
834  icmap_set_uint8("runtime.votequorum.wait_for_all_status",
835  wait_for_all_status);
836 
837  LEAVE();
838 }
839 
840 static void update_two_node(void)
841 {
842  ENTER();
843 
844  icmap_set_uint8("runtime.votequorum.two_node", two_node);
845 
846  LEAVE();
847 }
848 
849 static void update_ev_barrier(uint32_t expected_votes)
850 {
851  ENTER();
852 
853  ev_barrier = expected_votes;
854  icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
855 
856  LEAVE();
857 }
858 
859 static void update_qdevice_can_operate(uint8_t status)
860 {
861  ENTER();
862 
863  qdevice_can_operate = status;
864  icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
865 
866  LEAVE();
867 }
868 
869 static void update_qdevice_master_wins(uint8_t allow)
870 {
871  ENTER();
872 
873  qdevice_master_wins = allow;
874  icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
875 
876  LEAVE();
877 }
878 
879 static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
880 {
881  int res;
882 
883  ENTER();
884 
885  ev_tracking_barrier = ev_t_barrier;
886  icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
887 
888  if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
890  "Unable to update ev_tracking_barrier on disk data!!!");
891  LEAVE();
892  return;
893  }
894 
895  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
896  if (res != sizeof (uint32_t)) {
898  "Unable to update ev_tracking_barrier on disk data!!!");
899  }
900 #ifdef HAVE_FDATASYNC
901  fdatasync(ev_tracking_fd);
902 #else
903  fsync(ev_tracking_fd);
904 #endif
905 
906  LEAVE();
907 }
908 
909 /*
910  * quorum calculation core bits
911  */
912 
913 static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
914 {
915  struct list_head *nodelist;
916  struct cluster_node *node;
917  unsigned int total_votes = 0;
918  unsigned int highest_expected = 0;
919  unsigned int newquorum, q1, q2;
920  unsigned int total_nodes = 0;
921 
922  ENTER();
923 
924  if ((allow_downscale) && (allow_decrease) && (max_expected)) {
925  max_expected = max(ev_barrier, max_expected);
926  }
927 
928  list_iterate(nodelist, &cluster_members_list) {
929  node = list_entry(nodelist, struct cluster_node, list);
930 
931  log_printf(LOGSYS_LEVEL_DEBUG, "node %u state=%d, votes=%u, expected=%u",
932  node->node_id, node->state, node->votes, node->expected_votes);
933 
934  if (node->state == NODESTATE_MEMBER) {
935  highest_expected = max(highest_expected, node->expected_votes);
936  total_votes += node->votes;
937  total_nodes++;
938  }
939  }
940 
942  log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
943  total_votes += qdevice->votes;
944  total_nodes++;
945  }
946 
947  if (max_expected > 0) {
948  highest_expected = max_expected;
949  }
950 
951  /*
952  * This quorum calculation is taken from the OpenVMS Cluster Systems
953  * manual, but, then, you guessed that didn't you
954  */
955  q1 = (highest_expected + 2) / 2;
956  q2 = (total_votes + 2) / 2;
957  newquorum = max(q1, q2);
958 
959  /*
960  * Normally quorum never decreases but the system administrator can
961  * force it down by setting expected votes to a maximum value
962  */
963  if (!allow_decrease) {
964  newquorum = max(quorum, newquorum);
965  }
966 
967  /*
968  * The special two_node mode allows each of the two nodes to retain
969  * quorum if the other fails. Only one of the two should live past
970  * fencing (as both nodes try to fence each other in split-brain.)
971  * Also: if there are more than two nodes, force us inquorate to avoid
972  * any damage or confusion.
973  */
974  if (two_node && total_nodes <= 2) {
975  newquorum = 1;
976  }
977 
978  if (ret_total_votes) {
979  *ret_total_votes = total_votes;
980  }
981 
982  LEAVE();
983  return newquorum;
984 }
985 
986 static void update_node_expected_votes(int new_expected_votes)
987 {
988  struct list_head *nodelist;
989  struct cluster_node *node;
990 
991  if (new_expected_votes) {
992  list_iterate(nodelist, &cluster_members_list) {
993  node = list_entry(nodelist, struct cluster_node, list);
994 
995  if (node->state == NODESTATE_MEMBER) {
996  node->expected_votes = new_expected_votes;
997  }
998  }
999  }
1000 }
1001 
1002 static void are_we_quorate(unsigned int total_votes)
1003 {
1004  int quorate;
1005  int quorum_change = 0;
1006 
1007  ENTER();
1008 
1009  /*
1010  * wait for all nodes to show up before granting quorum
1011  */
1012 
1013  if ((wait_for_all) && (wait_for_all_status)) {
1014  if (total_votes != us->expected_votes) {
1016  "Waiting for all cluster members. "
1017  "Current votes: %d expected_votes: %d",
1018  total_votes, us->expected_votes);
1019  assert(!cluster_is_quorate);
1020  return;
1021  }
1022  update_wait_for_all_status(0);
1023  }
1024 
1025  if (quorum > total_votes) {
1026  quorate = 0;
1027  } else {
1028  quorate = 1;
1029  get_lowest_node_id();
1030  get_highest_node_id();
1031  }
1032 
1033  if ((auto_tie_breaker != ATB_NONE) &&
1034  /* Must be a half (or half-1) split */
1035  (total_votes == (us->expected_votes / 2)) &&
1036  /* If the 'other' partition in a split might have quorum then we can't run ATB */
1037  (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1038  (check_auto_tie_breaker() == 1)) {
1039  quorate = 1;
1040  }
1041 
1042  if ((qdevice_master_wins) &&
1043  (!quorate) &&
1044  (check_qdevice_master() == 1)) {
1045  log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
1046  quorate = 1;
1047  }
1048 
1049  if (cluster_is_quorate && !quorate) {
1050  quorum_change = 1;
1051  log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
1052  }
1053  if (!cluster_is_quorate && quorate) {
1054  quorum_change = 1;
1055  log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
1056  }
1057 
1058  cluster_is_quorate = quorate;
1059  if (cluster_is_quorate) {
1060  us->flags |= NODE_FLAGS_QUORATE;
1061  } else {
1062  us->flags &= ~NODE_FLAGS_QUORATE;
1063  }
1064 
1065  if (wait_for_all) {
1066  if (quorate) {
1067  update_wait_for_all_status(0);
1068  } else {
1069  update_wait_for_all_status(1);
1070  }
1071  }
1072 
1073  if ((quorum_change) &&
1074  (sync_in_progress == 0)) {
1075  quorum_callback(quorum_members, quorum_members_entries,
1076  cluster_is_quorate, &quorum_ringid);
1077  votequorum_exec_send_quorum_notification(NULL, 0L);
1078  }
1079 
1080  LEAVE();
1081 }
1082 
1083 static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
1084 {
1085  unsigned int total_votes = 0;
1086  unsigned int cluster_members = 0;
1087  struct list_head *nodelist;
1088  struct cluster_node *node;
1089 
1090  ENTER();
1091 
1092  list_iterate(nodelist, &cluster_members_list) {
1093  node = list_entry(nodelist, struct cluster_node, list);
1094  if (node->state == NODESTATE_MEMBER) {
1095  cluster_members++;
1096  total_votes += node->votes;
1097  }
1098  }
1099 
1100  if (qdevice->votes) {
1101  total_votes += qdevice->votes;
1102  cluster_members++;
1103  }
1104 
1105  *totalvotes = total_votes;
1106  *current_members = cluster_members;
1107 
1108  LEAVE();
1109 }
1110 
1111 /*
1112  * Recalculate cluster quorum, set quorate and notify changes
1113  */
1114 static void recalculate_quorum(int allow_decrease, int by_current_nodes)
1115 {
1116  unsigned int total_votes = 0;
1117  unsigned int cluster_members = 0;
1118 
1119  ENTER();
1120 
1121  get_total_votes(&total_votes, &cluster_members);
1122 
1123  if (!by_current_nodes) {
1124  cluster_members = 0;
1125  }
1126 
1127  /*
1128  * Keep expected_votes at the highest number of votes in the cluster
1129  */
1130  log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
1131  if (total_votes > us->expected_votes) {
1132  us->expected_votes = total_votes;
1133  votequorum_exec_send_expectedvotes_notification();
1134  }
1135 
1136  if ((ev_tracking) &&
1137  (us->expected_votes > ev_tracking_barrier)) {
1138  update_ev_tracking_barrier(us->expected_votes);
1139  }
1140 
1141  quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1142  update_node_expected_votes(cluster_members);
1143 
1144  are_we_quorate(total_votes);
1145 
1146  LEAVE();
1147 }
1148 
1149 /*
1150  * configuration bits and pieces
1151  */
1152 
1153 static int votequorum_read_nodelist_configuration(uint32_t *votes,
1154  uint32_t *nodes,
1155  uint32_t *expected_votes)
1156 {
1157  icmap_iter_t iter;
1158  const char *iter_key;
1159  char tmp_key[ICMAP_KEYNAME_MAXLEN];
1160  uint32_t our_pos, node_pos;
1161  uint32_t nodecount = 0;
1162  uint32_t nodelist_expected_votes = 0;
1163  uint32_t node_votes = 0;
1164  int res = 0;
1165 
1166  ENTER();
1167 
1168  if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
1170  "No nodelist defined or our node is not in the nodelist");
1171  return 0;
1172  }
1173 
1174  iter = icmap_iter_init("nodelist.node.");
1175 
1176  while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
1177 
1178  res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
1179  if (res != 2) {
1180  continue;
1181  }
1182 
1183  if (strcmp(tmp_key, "ring0_addr") != 0) {
1184  continue;
1185  }
1186 
1187  nodecount++;
1188 
1189  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
1190  if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
1191  node_votes = 1;
1192  }
1193 
1194  nodelist_expected_votes = nodelist_expected_votes + node_votes;
1195 
1196  if (node_pos == our_pos) {
1197  *votes = node_votes;
1198  }
1199  }
1200 
1201  *expected_votes = nodelist_expected_votes;
1202  *nodes = nodecount;
1203 
1204  icmap_iter_finalize(iter);
1205 
1206  LEAVE();
1207 
1208  return 1;
1209 }
1210 
1211 static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1212 {
1213  char *qdevice_model = NULL;
1214  int ret = 0;
1215 
1216  ENTER();
1217 
1218  if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) {
1219  if (strlen(qdevice_model)) {
1220  if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
1221  *qdevice_votes = -1;
1222  }
1223  if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
1224  qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
1225  }
1226  if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) {
1227  qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
1228  }
1229  update_qdevice_can_operate(1);
1230  ret = 1;
1231  }
1232 
1233  free(qdevice_model);
1234  }
1235 
1236  LEAVE();
1237 
1238  return ret;
1239 }
1240 
1241 #define VOTEQUORUM_READCONFIG_STARTUP 0
1242 #define VOTEQUORUM_READCONFIG_RUNTIME 1
1243 
1244 static char *votequorum_readconfig(int runtime)
1245 {
1246  uint32_t node_votes = 0, qdevice_votes = 0;
1247  uint32_t node_expected_votes = 0, expected_votes = 0;
1248  uint32_t node_count = 0;
1249  uint8_t atb = 0;
1250  int have_nodelist, have_qdevice;
1251  char *atb_string = NULL;
1252  char *error = NULL;
1253 
1254  ENTER();
1255 
1256  log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
1257 
1258  /*
1259  * Set the few things we re-read at runtime back to their defaults
1260  */
1261  if (runtime) {
1262  two_node = 0;
1263  expected_votes = 0;
1264  /* auto_tie_breaker cannot be changed by config reload, but
1265  * we automatically disable it on odd-sized clusters without
1266  * wait_for_all.
1267  * We may need to re-enable it when membership changes to ensure
1268  * that auto_tie_breaker is consistent across all nodes */
1269  auto_tie_breaker = initial_auto_tie_breaker;
1270  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1271  }
1272 
1273  /*
1274  * gather basic data here
1275  */
1276  (void)icmap_get_uint32("quorum.expected_votes", &expected_votes);
1277  have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1278  have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1279  (void)icmap_get_uint8("quorum.two_node", &two_node);
1280 
1281  /*
1282  * do config verification and enablement
1283  */
1284 
1285  if ((!have_nodelist) && (!expected_votes)) {
1286  if (!runtime) {
1287  error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
1288  } else {
1289  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
1290  log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
1291  }
1292  goto out;
1293  }
1294 
1295  /*
1296  * two_node and qdevice are not compatible in the same config.
1297  * try to make an educated guess of what to do
1298  */
1299 
1300  if ((two_node) && (have_qdevice)) {
1301  if (!runtime) {
1302  error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
1303  goto out;
1304  } else {
1305  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
1306  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1307  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
1308  two_node = 0;
1309  } else {
1310  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
1311  update_qdevice_can_operate(0);
1312  }
1313  }
1314  }
1315 
1316  /*
1317  * Enable special features
1318  */
1319  if (!runtime) {
1320  (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
1321  if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) {
1322  wait_for_all_autoset = 1;
1323  }
1324  (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
1325  (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
1326  (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
1327  (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb);
1328  (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
1329 
1330  /* auto_tie_breaker defaults to LOWEST */
1331  if (atb) {
1332  auto_tie_breaker = ATB_LOWEST;
1333  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1334  }
1335  else {
1336  auto_tie_breaker = ATB_NONE;
1337  if (atb_string) {
1339  "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1340  }
1341  }
1342 
1343  if (atb && atb_string) {
1344  parse_atb_string(atb_string);
1345  }
1346  free(atb_string);
1347  initial_auto_tie_breaker = auto_tie_breaker;
1348 
1349  /* allow_downscale requires ev_tracking */
1350  if (allow_downscale) {
1351  ev_tracking = 1;
1352  }
1353 
1354  if (ev_tracking) {
1355  if (load_ev_tracking_barrier() < 0) {
1356  LEAVE();
1357  return ((char *)"Unable to load ev_tracking file!");
1358  }
1359  update_ev_tracking_barrier(ev_tracking_barrier);
1360  }
1361 
1362  }
1363 
1364  /*
1365  * Changing of wait_for_all during runtime is not supported, but changing of two_node is
1366  * and two_node may set wfa if not configured explicitly. It is safe to unset it
1367  * (or set it back) when two_node changes.
1368  */
1369  if (wait_for_all_autoset) {
1370  wait_for_all = two_node;
1371  }
1372 
1373  /* two_node and auto_tie_breaker are not compatible as two_node uses
1374  * a fence race to decide quorum whereas ATB decides based on node id
1375  */
1376  if (two_node && auto_tie_breaker != ATB_NONE) {
1377  log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible.");
1378  log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf");
1379  two_node = 0;
1380  }
1381 
1382  /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs
1383  * to be set so that an isolated half+1 without the tie breaker node
1384  * does not have quorum on reboot.
1385  */
1386  if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) &&
1387  (!wait_for_all)) {
1388  if (last_man_standing) {
1389  /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what
1390  * they might want so we'll just quit.
1391  */
1392  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n");
1393  log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n");
1394  log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n");
1395  log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n");
1396  log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n");
1397  error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1398  goto out;
1399  }
1400  else {
1401  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n");
1402  log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n");
1403  log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n");
1404  auto_tie_breaker = ATB_NONE;
1405  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1406  }
1407  }
1408 
1409  /*
1410  * quorum device is not compatible with last_man_standing and auto_tie_breaker
1411  * neither lms or atb can be set at runtime, so there is no need to check for
1412  * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
1413  * been enabled at startup.
1414  */
1415 
1416  if ((have_qdevice) && (last_man_standing)) {
1417  if (!runtime) {
1418  error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
1419  goto out;
1420  } else {
1421  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
1422  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1423  update_qdevice_can_operate(0);
1424  }
1425  }
1426 
1427  if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) {
1428  if (!runtime) {
1429  error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
1430  goto out;
1431  } else {
1432  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
1433  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1434  update_qdevice_can_operate(0);
1435  }
1436  }
1437 
1438  if ((have_qdevice) && (allow_downscale)) {
1439  if (!runtime) {
1440  error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
1441  goto out;
1442  } else {
1443  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
1444  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1445  update_qdevice_can_operate(0);
1446  }
1447  }
1448 
1449  /*
1450  * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
1451  * we don't know what the quorum device should vote.
1452  */
1453 
1454  if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1455  if (!runtime) {
1456  error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1457  goto out;
1458  } else {
1459  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
1460  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1461  update_qdevice_can_operate(0);
1462  }
1463  }
1464 
1465  /*
1466  * if user specifies a node list with uneven votes and no device.votes
1467  * we cannot autocalculate the votes
1468  */
1469 
1470  if ((have_qdevice) &&
1471  (qdevice_votes == -1) &&
1472  (have_nodelist) &&
1473  (node_count != node_expected_votes)) {
1474  if (!runtime) {
1475  error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1476  goto out;
1477  } else {
1478  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
1479  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1480  update_qdevice_can_operate(0);
1481  }
1482  }
1483 
1484  /*
1485  * validate quorum device votes vs expected_votes
1486  */
1487 
1488  if ((qdevice_votes > 0) && (expected_votes)) {
1489  int delta = expected_votes - qdevice_votes;
1490  if (delta < 2) {
1491  if (!runtime) {
1492  error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
1493  goto out;
1494  } else {
1495  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
1496  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1497  update_qdevice_can_operate(0);
1498  }
1499  }
1500  }
1501 
1502  /*
1503  * automatically calculate device votes and adjust expected_votes from nodelist
1504  */
1505 
1506  if ((have_qdevice) &&
1507  (qdevice_votes == -1) &&
1508  (!expected_votes) &&
1509  (have_nodelist) &&
1510  (node_count == node_expected_votes)) {
1511  qdevice_votes = node_expected_votes - 1;
1512  node_expected_votes = node_expected_votes + qdevice_votes;
1513  }
1514 
1515  /*
1516  * set this node votes and expected_votes
1517  */
1518  log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1519 
1520  if (ev_tracking) {
1521  expected_votes = ev_tracking_barrier;
1522  }
1523 
1524  if (have_nodelist) {
1525  us->votes = node_votes;
1526  us->expected_votes = node_expected_votes;
1527  } else {
1528  us->votes = 1;
1529  (void)icmap_get_uint32("quorum.votes", &us->votes);
1530  }
1531 
1532  if (expected_votes) {
1534  }
1535 
1536  /*
1537  * set qdevice votes
1538  */
1539 
1540  if (!have_qdevice) {
1541  qdevice->votes = 0;
1542  }
1543 
1544  if (qdevice_votes != -1) {
1545  qdevice->votes = qdevice_votes;
1546  }
1547 
1548  update_ev_barrier(us->expected_votes);
1549  update_two_node();
1550  if (wait_for_all) {
1551  if (!runtime) {
1552  update_wait_for_all_status(1);
1553  }
1554  } else if (wait_for_all_autoset && wait_for_all_status) {
1555  /*
1556  * Reset wait for all status for consistency when wfa is auto-unset by 2node.
1557  * wait_for_all_status would be ignored by are_we_quorate anyway.
1558  */
1559  update_wait_for_all_status(0);
1560  }
1561 
1562 out:
1563  LEAVE();
1564  return error;
1565 }
1566 
1567 static void votequorum_refresh_config(
1568  int32_t event,
1569  const char *key_name,
1570  struct icmap_notify_value new_val,
1571  struct icmap_notify_value old_val,
1572  void *user_data)
1573 {
1574  int old_votes, old_expected_votes;
1575  uint8_t reloading;
1576  uint8_t cancel_wfa;
1577 
1578  ENTER();
1579 
1580  /*
1581  * If a full reload is in progress then don't do anything until it's done and
1582  * can reconfigure it all atomically
1583  */
1584  if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
1585  return ;
1586  }
1587 
1588  (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
1589  if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
1590  cancel_wfa >= 1) {
1591  icmap_set_uint8("quorum.cancel_wait_for_all", 0);
1592  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA,
1593  us->node_id, 0);
1594  return;
1595  }
1596 
1597  old_votes = us->votes;
1598  old_expected_votes = us->expected_votes;
1599 
1600  /*
1601  * Reload the configuration
1602  */
1603  votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
1604 
1605  /*
1606  * activate new config
1607  */
1608  votequorum_exec_send_nodeinfo(us->node_id);
1609  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1610  if (us->votes != old_votes) {
1611  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES,
1612  us->node_id, us->votes);
1613  }
1614  if (us->expected_votes != old_expected_votes) {
1615  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES,
1616  us->node_id, us->expected_votes);
1617  }
1618 
1619  LEAVE();
1620 }
1621 
1622 static void votequorum_exec_add_config_notification(void)
1623 {
1624  icmap_track_t icmap_track_nodelist = NULL;
1625  icmap_track_t icmap_track_quorum = NULL;
1626  icmap_track_t icmap_track_reload = NULL;
1627 
1628  ENTER();
1629 
1630  icmap_track_add("nodelist.",
1632  votequorum_refresh_config,
1633  NULL,
1634  &icmap_track_nodelist);
1635 
1636  icmap_track_add("quorum.",
1638  votequorum_refresh_config,
1639  NULL,
1640  &icmap_track_quorum);
1641 
1642  icmap_track_add("config.totemconfig_reload_in_progress",
1644  votequorum_refresh_config,
1645  NULL,
1646  &icmap_track_reload);
1647 
1648  LEAVE();
1649 }
1650 
1651 /*
1652  * votequorum_exec core
1653  */
1654 
1655 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
1656 {
1658  struct iovec iov[1];
1659  int ret;
1660 
1661  ENTER();
1662 
1669 
1672 
1673  iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
1674  iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
1675 
1676  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1677 
1678  LEAVE();
1679  return ret;
1680 }
1681 
1682 static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1683 {
1685  struct iovec iov[1];
1686  struct cluster_node *node;
1687  int ret;
1688 
1689  ENTER();
1690 
1691  node = find_node_by_nodeid(nodeid);
1692  if (!node) {
1693  return -1;
1694  }
1695 
1700  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
1701  decode_flags(node->flags);
1702  }
1703 
1705  req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo);
1706 
1707  iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
1708  iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
1709 
1710  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1711 
1712  LEAVE();
1713  return ret;
1714 }
1715 
1716 static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
1717 {
1719  struct iovec iov[1];
1720  int ret;
1721 
1722  ENTER();
1723 
1726 
1727  assert(strlen(oldname) < sizeof(req_exec_quorum_qdevice_reconfigure.oldname));
1729 
1730  assert(strlen(newname) < sizeof(req_exec_quorum_qdevice_reconfigure.newname));
1732 
1733  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
1734  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
1735 
1736  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1737 
1738  LEAVE();
1739  return ret;
1740 }
1741 
1742 static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
1743 {
1745  struct iovec iov[1];
1746  int ret;
1747 
1748  ENTER();
1749 
1753 
1754  assert(strlen(qdevice_name_req) < sizeof(req_exec_quorum_qdevice_reg.qdevice_name));
1755  strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
1756 
1757  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
1758  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
1759 
1760  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1761 
1762  LEAVE();
1763  return ret;
1764 }
1765 
1766 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
1767 {
1768  struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification;
1769  struct list_head *tmp;
1770  struct cluster_node *node;
1771  int i = 0;
1772  int cluster_members = 0;
1773  int size;
1774  char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
1775 
1776  ENTER();
1777 
1778  log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate);
1779 
1780  list_iterate(tmp, &cluster_members_list) {
1781  node = list_entry(tmp, struct cluster_node, list);
1782  cluster_members++;
1783  }
1784  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1785  cluster_members++;
1786  }
1787 
1788  size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members;
1789 
1790  res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf;
1791  res_lib_votequorum_notification->quorate = cluster_is_quorate;
1792  res_lib_votequorum_notification->context = context;
1793  res_lib_votequorum_notification->node_list_entries = cluster_members;
1794  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION;
1795  res_lib_votequorum_notification->header.size = size;
1796  res_lib_votequorum_notification->header.error = CS_OK;
1797 
1798  /* Send all known nodes and their states */
1799  list_iterate(tmp, &cluster_members_list) {
1800  node = list_entry(tmp, struct cluster_node, list);
1801  res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
1802  res_lib_votequorum_notification->node_list[i++].state = node->state;
1803  }
1804  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1805  res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
1806  res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
1807  }
1808 
1809  /* Send it to all interested parties */
1810  if (conn) {
1811  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1812  LEAVE();
1813  return ret;
1814  } else {
1815  struct quorum_pd *qpd;
1816 
1817  list_iterate(tmp, &trackers_list) {
1818  qpd = list_entry(tmp, struct quorum_pd, list);
1819  res_lib_votequorum_notification->context = qpd->tracking_context;
1820  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1821  }
1822  }
1823 
1824  LEAVE();
1825 
1826  return 0;
1827 }
1828 
1829 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context)
1830 {
1831  struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification;
1832  int i = 0;
1833  int size;
1834  struct list_head *tmp;
1835  char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries];
1836 
1837  ENTER();
1838 
1839  log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = %d/%lld", quorum_ringid.rep.nodeid, quorum_ringid.seq);
1840 
1841  size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries;
1842 
1843  res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf;
1844  res_lib_votequorum_notification->node_list_entries = quorum_members_entries;
1845  res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.rep.nodeid;
1846  res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq;
1847  res_lib_votequorum_notification->context = context;
1848 
1849  for (i=0; i<quorum_members_entries; i++) {
1850  res_lib_votequorum_notification->node_list[i] = quorum_members[i];
1851  }
1852 
1853  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION;
1854  res_lib_votequorum_notification->header.size = size;
1855  res_lib_votequorum_notification->header.error = CS_OK;
1856 
1857  /* Send it to all interested parties */
1858  if (conn) {
1859  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1860  LEAVE();
1861  return ret;
1862  } else {
1863  struct quorum_pd *qpd;
1864 
1865  list_iterate(tmp, &trackers_list) {
1866  qpd = list_entry(tmp, struct quorum_pd, list);
1867  res_lib_votequorum_notification->context = qpd->tracking_context;
1868  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1869  }
1870  }
1871 
1872  LEAVE();
1873 
1874  return 0;
1875 }
1876 
1877 static void votequorum_exec_send_expectedvotes_notification(void)
1878 {
1880  struct quorum_pd *qpd;
1881  struct list_head *tmp;
1882 
1883  ENTER();
1884 
1885  log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
1886 
1891 
1892  list_iterate(tmp, &trackers_list) {
1893  qpd = list_entry(tmp, struct quorum_pd, list);
1897  }
1898 
1899  LEAVE();
1900 }
1901 
1902 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
1903 {
1904  ENTER();
1905 
1906  LEAVE();
1907 }
1908 
1909 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1910  const void *message,
1911  unsigned int nodeid)
1912 {
1914 
1915  ENTER();
1916 
1917  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node %u [from: %s to: %s]",
1918  nodeid,
1919  req_exec_quorum_qdevice_reconfigure->oldname,
1920  req_exec_quorum_qdevice_reconfigure->newname);
1921 
1922  if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
1923  log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
1924  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1925  strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
1926  /*
1927  * TODO: notify qdevices about name change?
1928  * this is not relevant for now and can wait later on since
1929  * qdevices are local only and libvotequorum is not final
1930  */
1931  }
1932 
1933  LEAVE();
1934 }
1935 
1936 static void exec_votequorum_qdevice_reg_endian_convert (void *message)
1937 {
1939 
1940  ENTER();
1941 
1942  req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation);
1943 
1944  LEAVE();
1945 }
1946 
1947 static void message_handler_req_exec_votequorum_qdevice_reg (
1948  const void *message,
1949  unsigned int nodeid)
1950 {
1953  int wipe_qdevice_name = 1;
1954  struct cluster_node *node = NULL;
1955  struct list_head *tmp;
1956  cs_error_t error = CS_OK;
1957 
1958  ENTER();
1959 
1960  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node %u [%s]",
1961  req_exec_quorum_qdevice_reg->operation,
1962  nodeid, req_exec_quorum_qdevice_reg->qdevice_name);
1963 
1964  switch(req_exec_quorum_qdevice_reg->operation)
1965  {
1967  if (nodeid != us->node_id) {
1968  if (!strlen(qdevice_name)) {
1969  log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
1970  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1971  }
1972  LEAVE();
1973  return;
1974  }
1975 
1976  /*
1977  * protect against the case where we broadcast qdevice registration
1978  * to new memebers, we receive the message back, but there is no registration
1979  * connection in progress
1980  */
1981  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1982  LEAVE();
1983  return;
1984  }
1985 
1986  /*
1987  * this should NEVER happen
1988  */
1989  if (!qdevice_reg_conn) {
1990  log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
1991  LEAVE();
1992  return;
1993  }
1994 
1995  /*
1996  * registering our own device in this case
1997  */
1998  if (!strlen(qdevice_name)) {
1999  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
2000  }
2001 
2002  /*
2003  * check if it is our device or something else
2004  */
2005  if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name,
2006  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2008  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2009  votequorum_exec_send_nodeinfo(us->node_id);
2010  } else {
2012  "A new qdevice with different name (new: %s old: %s) is trying to register!",
2013  req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name);
2014  error = CS_ERR_EXIST;
2015  }
2016 
2019  res_lib_votequorum_status.header.error = error;
2020  corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2021  qdevice_reg_conn = NULL;
2022  break;
2024  list_iterate(tmp, &cluster_members_list) {
2025  node = list_entry(tmp, struct cluster_node, list);
2026  if ((node->state == NODESTATE_MEMBER) &&
2027  (node->flags & NODE_FLAGS_QDEVICE_REGISTERED)) {
2028  wipe_qdevice_name = 0;
2029  }
2030  }
2031 
2032  if (wipe_qdevice_name) {
2033  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2034  }
2035 
2036  break;
2037  }
2038  LEAVE();
2039 }
2040 
2041 static void exec_votequorum_nodeinfo_endian_convert (void *message)
2042 {
2043  struct req_exec_quorum_nodeinfo *nodeinfo = message;
2044 
2045  ENTER();
2046 
2047  nodeinfo->nodeid = swab32(nodeinfo->nodeid);
2048  nodeinfo->votes = swab32(nodeinfo->votes);
2049  nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
2050  nodeinfo->flags = swab32(nodeinfo->flags);
2051 
2052  LEAVE();
2053 }
2054 
2055 static void message_handler_req_exec_votequorum_nodeinfo (
2056  const void *message,
2057  unsigned int sender_nodeid)
2058 {
2059  const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
2060  struct cluster_node *node = NULL;
2061  int old_votes;
2062  int old_expected;
2063  uint32_t old_flags;
2064  nodestate_t old_state;
2065  int new_node = 0;
2066  int allow_downgrade = 0;
2067  int by_node = 0;
2068  unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
2069 
2070  ENTER();
2071 
2072  log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", sender_nodeid);
2073  log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d flags: %d",
2074  nodeid,
2075  req_exec_quorum_nodeinfo->votes,
2076  req_exec_quorum_nodeinfo->expected_votes,
2077  req_exec_quorum_nodeinfo->flags);
2078 
2079  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
2080  decode_flags(req_exec_quorum_nodeinfo->flags);
2081  }
2082 
2083  node = find_node_by_nodeid(nodeid);
2084  if (!node) {
2085  node = allocate_node(nodeid);
2086  new_node = 1;
2087  }
2088  if (!node) {
2089  corosync_api->error_memory_failure();
2090  LEAVE();
2091  return;
2092  }
2093 
2094  if (new_node) {
2095  old_votes = 0;
2096  old_expected = 0;
2097  old_state = NODESTATE_DEAD;
2098  old_flags = 0;
2099  } else {
2100  old_votes = node->votes;
2101  old_expected = node->expected_votes;
2102  old_state = node->state;
2103  old_flags = node->flags;
2104  }
2105 
2106  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2107  struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2108 
2109  assert(sender_node != NULL);
2110 
2111  if ((!cluster_is_quorate) &&
2112  (sender_node->flags & NODE_FLAGS_QUORATE)) {
2113  node->votes = req_exec_quorum_nodeinfo->votes;
2114  } else {
2115  node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
2116  }
2117  goto recalculate;
2118  }
2119 
2120  /* Update node state */
2121  node->flags = req_exec_quorum_nodeinfo->flags;
2122  node->votes = req_exec_quorum_nodeinfo->votes;
2123  node->state = NODESTATE_MEMBER;
2124 
2125  if (node->flags & NODE_FLAGS_LEAVING) {
2126  node->state = NODESTATE_LEAVING;
2127  allow_downgrade = 1;
2128  by_node = 1;
2129  }
2130 
2131  if ((!cluster_is_quorate) &&
2132  (node->flags & NODE_FLAGS_QUORATE)) {
2133  allow_downgrade = 1;
2134  us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2135  }
2136 
2137  if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2138  node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2139  } else {
2140  node->expected_votes = us->expected_votes;
2141  }
2142 
2143  if ((last_man_standing) && (node->votes > 1)) {
2144  log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
2145  "cluster nodes votes are set to 1. Disabling LMS.");
2146  last_man_standing = 0;
2147  if (last_man_standing_timer_set) {
2148  corosync_api->timer_delete(last_man_standing_timer);
2149  last_man_standing_timer_set = 0;
2150  }
2151  }
2152 
2153 recalculate:
2154  if ((new_node) ||
2155  (nodeid == us->node_id) ||
2156  (node->flags & NODE_FLAGS_FIRST) ||
2157  (old_votes != node->votes) ||
2158  (old_expected != node->expected_votes) ||
2159  (old_flags != node->flags) ||
2160  (old_state != node->state)) {
2161  recalculate_quorum(allow_downgrade, by_node);
2162  }
2163 
2164  if ((wait_for_all) &&
2165  (!(node->flags & NODE_FLAGS_WFASTATUS)) &&
2166  (node->flags & NODE_FLAGS_QUORATE)) {
2167  update_wait_for_all_status(0);
2168  }
2169 
2170  LEAVE();
2171 }
2172 
2173 static void exec_votequorum_reconfigure_endian_convert (void *message)
2174 {
2175  struct req_exec_quorum_reconfigure *reconfigure = message;
2176 
2177  ENTER();
2178 
2179  reconfigure->nodeid = swab32(reconfigure->nodeid);
2180  reconfigure->value = swab32(reconfigure->value);
2181 
2182  LEAVE();
2183 }
2184 
2185 static void message_handler_req_exec_votequorum_reconfigure (
2186  const void *message,
2187  unsigned int nodeid)
2188 {
2190  struct cluster_node *node;
2191 
2192  ENTER();
2193 
2194  log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node %u for %u",
2195  nodeid, req_exec_quorum_reconfigure->nodeid);
2196 
2197  switch(req_exec_quorum_reconfigure->param)
2198  {
2200  update_node_expected_votes(req_exec_quorum_reconfigure->value);
2201 
2202  votequorum_exec_send_expectedvotes_notification();
2203  update_ev_barrier(req_exec_quorum_reconfigure->value);
2204  if (ev_tracking) {
2205  us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
2206  }
2207  recalculate_quorum(1, 0); /* Allow decrease */
2208  break;
2209 
2211  node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
2212  if (!node) {
2213  LEAVE();
2214  return;
2215  }
2216  node->votes = req_exec_quorum_reconfigure->value;
2217  recalculate_quorum(1, 0); /* Allow decrease */
2218  break;
2219 
2221  update_wait_for_all_status(0);
2222  log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node %d.",
2223  req_exec_quorum_reconfigure->nodeid);
2224  recalculate_quorum(0, 0);
2225 
2226  break;
2227 
2228  }
2229 
2230  LEAVE();
2231 }
2232 
2233 static int votequorum_exec_exit_fn (void)
2234 {
2235  int ret = 0;
2236 
2237  ENTER();
2238 
2239  /*
2240  * tell the other nodes we are leaving
2241  */
2242 
2243  if (allow_downscale) {
2244  us->flags |= NODE_FLAGS_LEAVING;
2245  ret = votequorum_exec_send_nodeinfo(us->node_id);
2246  }
2247 
2248  if ((ev_tracking) && (ev_tracking_fd != -1)) {
2249  close(ev_tracking_fd);
2250  }
2251 
2252 
2253  LEAVE();
2254  return ret;
2255 }
2256 
2257 static void votequorum_set_icmap_ro_keys(void)
2258 {
2259  icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE);
2260  icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE);
2261  icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE);
2262  icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE);
2263  icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE);
2264  icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE);
2265  icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE);
2266 }
2267 
2268 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
2269 {
2270  char *error = NULL;
2271 
2272  ENTER();
2273 
2274  /*
2275  * make sure we start clean
2276  */
2277  list_init(&cluster_members_list);
2278  list_init(&trackers_list);
2279  qdevice = NULL;
2280  us = NULL;
2281  memset(cluster_nodes, 0, sizeof(cluster_nodes));
2282 
2283  /*
2284  * Allocate a cluster_node for qdevice
2285  */
2286  qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
2287  if (!qdevice) {
2288  LEAVE();
2289  return ((char *)"Could not allocate node.");
2290  }
2291  qdevice->votes = 0;
2292  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2293 
2294  /*
2295  * Allocate a cluster_node for us
2296  */
2297  us = allocate_node(corosync_api->totem_nodeid_get());
2298  if (!us) {
2299  LEAVE();
2300  return ((char *)"Could not allocate node.");
2301  }
2302 
2303  icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
2304 
2305  us->state = NODESTATE_MEMBER;
2306  us->votes = 1;
2307  us->flags |= NODE_FLAGS_FIRST;
2308 
2309  error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
2310  if (error) {
2311  return error;
2312  }
2313  recalculate_quorum(0, 0);
2314 
2315  /*
2316  * Set RO keys in icmap
2317  */
2318  votequorum_set_icmap_ro_keys();
2319 
2320  /*
2321  * Listen for changes
2322  */
2323  votequorum_exec_add_config_notification();
2324 
2325  /*
2326  * Start us off with one node
2327  */
2328  votequorum_exec_send_nodeinfo(us->node_id);
2329 
2330  LEAVE();
2331 
2332  return (NULL);
2333 }
2334 
2335 /*
2336  * votequorum service core
2337  */
2338 
2339 static void votequorum_last_man_standing_timer_fn(void *arg)
2340 {
2341  ENTER();
2342 
2343  last_man_standing_timer_set = 0;
2344  if (cluster_is_quorate) {
2345  recalculate_quorum(1,1);
2346  }
2347 
2348  LEAVE();
2349 }
2350 
2351 static void votequorum_sync_init (
2352  const unsigned int *trans_list, size_t trans_list_entries,
2353  const unsigned int *member_list, size_t member_list_entries,
2354  const struct memb_ring_id *ring_id)
2355 {
2356  int i, j;
2357  int found;
2358  int left_nodes;
2359  struct cluster_node *node;
2360 
2361  ENTER();
2362 
2363  sync_in_progress = 1;
2364  sync_nodeinfo_sent = 0;
2365  sync_wait_for_poll_or_timeout = 0;
2366 
2367  if (member_list_entries > 1) {
2368  us->flags &= ~NODE_FLAGS_FIRST;
2369  }
2370 
2371  /*
2372  * we don't need to track which nodes have left directly,
2373  * since that info is in the node db, but we need to know
2374  * if somebody has left for last_man_standing
2375  */
2376  left_nodes = 0;
2377  for (i = 0; i < quorum_members_entries; i++) {
2378  found = 0;
2379  for (j = 0; j < member_list_entries; j++) {
2380  if (quorum_members[i] == member_list[j]) {
2381  found = 1;
2382  break;
2383  }
2384  }
2385  if (found == 0) {
2386  left_nodes = 1;
2387  node = find_node_by_nodeid(quorum_members[i]);
2388  if (node) {
2389  node->state = NODESTATE_DEAD;
2390  }
2391  }
2392  }
2393 
2394  if (last_man_standing) {
2395  if (((member_list_entries >= quorum) && (left_nodes)) ||
2396  ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) {
2397  if (last_man_standing_timer_set) {
2398  corosync_api->timer_delete(last_man_standing_timer);
2399  last_man_standing_timer_set = 0;
2400  }
2401  corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
2402  NULL, votequorum_last_man_standing_timer_fn,
2403  &last_man_standing_timer);
2404  last_man_standing_timer_set = 1;
2405  }
2406  }
2407 
2408  memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries);
2409  previous_quorum_members_entries = quorum_members_entries;
2410 
2411  memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
2412  quorum_members_entries = member_list_entries;
2413  memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
2414 
2415  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && us->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2416  /*
2417  * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout
2418  */
2419  if (qdevice_timer_set) {
2420  corosync_api->timer_delete(qdevice_timer);
2421  }
2422  corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2423  qdevice_timer_fn, &qdevice_timer);
2424  qdevice_timer_set = 1;
2425  sync_wait_for_poll_or_timeout = 1;
2426 
2427  log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)",
2428  qdevice_name, qdevice_sync_timeout);
2429  }
2430 
2431  LEAVE();
2432 }
2433 
2434 static int votequorum_sync_process (void)
2435 {
2436  if (!sync_nodeinfo_sent) {
2437  votequorum_exec_send_nodeinfo(us->node_id);
2438  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2439  if (strlen(qdevice_name)) {
2440  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2441  qdevice_name);
2442  }
2443  votequorum_exec_send_nodelist_notification(NULL, 0LL);
2444  sync_nodeinfo_sent = 1;
2445  }
2446 
2447  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2448  /*
2449  * Waiting for qdevice to poll with new ringid or timeout
2450  */
2451 
2452  return (-1);
2453  }
2454 
2455  return 0;
2456 }
2457 
2458 static void votequorum_sync_activate (void)
2459 {
2460  recalculate_quorum(0, 0);
2461  quorum_callback(quorum_members, quorum_members_entries,
2462  cluster_is_quorate, &quorum_ringid);
2463  votequorum_exec_send_quorum_notification(NULL, 0L);
2464 
2465  sync_in_progress = 0;
2466 }
2467 
2468 static void votequorum_sync_abort (void)
2469 {
2470 
2471 }
2472 
2474  quorum_set_quorate_fn_t q_set_quorate_fn)
2475 {
2476  char *error;
2477 
2478  ENTER();
2479 
2480  if (q_set_quorate_fn == NULL) {
2481  return ((char *)"Quorate function not set");
2482  }
2483 
2484  corosync_api = api;
2485  quorum_callback = q_set_quorate_fn;
2486 
2487  error = corosync_service_link_and_init(corosync_api,
2488  &votequorum_service[0]);
2489  if (error) {
2490  return (error);
2491  }
2492 
2493  LEAVE();
2494 
2495  return (NULL);
2496 }
2497 
2498 /*
2499  * Library Handler init/fini
2500  */
2501 
2502 static int quorum_lib_init_fn (void *conn)
2503 {
2504  struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2505 
2506  ENTER();
2507 
2508  list_init (&pd->list);
2509  pd->conn = conn;
2510 
2511  LEAVE();
2512  return (0);
2513 }
2514 
2515 static int quorum_lib_exit_fn (void *conn)
2516 {
2517  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2518 
2519  ENTER();
2520 
2521  if (quorum_pd->tracking_enabled) {
2522  list_del (&quorum_pd->list);
2523  list_init (&quorum_pd->list);
2524  }
2525 
2526  LEAVE();
2527 
2528  return (0);
2529 }
2530 
2531 /*
2532  * library internal functions
2533  */
2534 
2535 static void qdevice_timer_fn(void *arg)
2536 {
2537  ENTER();
2538 
2539  if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2540  (!qdevice_timer_set)) {
2541  LEAVE();
2542  return;
2543  }
2544 
2545  us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2547  log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
2548  votequorum_exec_send_nodeinfo(us->node_id);
2549 
2550  qdevice_timer_set = 0;
2551  sync_wait_for_poll_or_timeout = 0;
2552 
2553  LEAVE();
2554 }
2555 
2556 /*
2557  * Library Handler Functions
2558  */
2559 
2560 static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
2561 {
2564  struct cluster_node *node;
2565  unsigned int highest_expected = 0;
2566  unsigned int total_votes = 0;
2567  cs_error_t error = CS_OK;
2568  uint32_t nodeid = req_lib_votequorum_getinfo->nodeid;
2569 
2570  ENTER();
2571 
2572  log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node %u", conn, req_lib_votequorum_getinfo->nodeid);
2573 
2574  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2575  nodeid = us->node_id;
2576  }
2577 
2578  node = find_node_by_nodeid(nodeid);
2579  if (node) {
2580  struct cluster_node *iternode;
2581  struct list_head *nodelist;
2582 
2583  list_iterate(nodelist, &cluster_members_list) {
2584  iternode = list_entry(nodelist, struct cluster_node, list);
2585 
2586  if (iternode->state == NODESTATE_MEMBER) {
2587  highest_expected =
2588  max(highest_expected, iternode->expected_votes);
2589  total_votes += iternode->votes;
2590  }
2591  }
2592 
2593  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2594  total_votes += qdevice->votes;
2595  }
2596 
2597  switch(node->state) {
2598  case NODESTATE_MEMBER:
2600  break;
2601  case NODESTATE_DEAD:
2603  break;
2604  case NODESTATE_LEAVING:
2606  break;
2607  default:
2609  break;
2610  }
2614  res_lib_votequorum_getinfo.highest_expected = highest_expected;
2615 
2620 
2621  if (two_node) {
2623  }
2624  if (cluster_is_quorate) {
2626  }
2627  if (wait_for_all) {
2629  }
2630  if (last_man_standing) {
2632  }
2633  if (auto_tie_breaker != ATB_NONE) {
2635  }
2636  if (allow_downscale) {
2638  }
2639 
2641  strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
2643 
2644  if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2646  }
2647  if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2649  }
2650  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2652  }
2653  if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) {
2655  }
2656  } else {
2657  error = CS_ERR_NOT_EXIST;
2658  }
2659 
2662  res_lib_votequorum_getinfo.header.error = error;
2664  log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
2665 
2666  LEAVE();
2667 }
2668 
2669 static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
2670 {
2673  cs_error_t error = CS_OK;
2674  unsigned int newquorum;
2675  unsigned int total_votes;
2676  uint8_t allow_downscale_status = 0;
2677 
2678  ENTER();
2679 
2680  allow_downscale_status = allow_downscale;
2681  allow_downscale = 0;
2682 
2683  /*
2684  * Validate new expected votes
2685  */
2686  newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
2687  allow_downscale = allow_downscale_status;
2688  /*
2689  * Setting expected_votes < total_votes doesn't make sense.
2690  * For quorate cluster prevent cluster to become unquorate.
2691  */
2692  if (req_lib_votequorum_setexpected->expected_votes < total_votes ||
2693  (cluster_is_quorate && (newquorum > total_votes))) {
2694  error = CS_ERR_INVALID_PARAM;
2695  goto error_exit;
2696  }
2697  update_node_expected_votes(req_lib_votequorum_setexpected->expected_votes);
2698 
2699  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
2700  req_lib_votequorum_setexpected->expected_votes);
2701 
2702 error_exit:
2705  res_lib_votequorum_status.header.error = error;
2707 
2708  LEAVE();
2709 }
2710 
2711 static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
2712 {
2715  struct cluster_node *node;
2716  unsigned int newquorum;
2717  unsigned int total_votes;
2718  unsigned int saved_votes;
2719  cs_error_t error = CS_OK;
2720  unsigned int nodeid;
2721 
2722  ENTER();
2723 
2724  nodeid = req_lib_votequorum_setvotes->nodeid;
2725  node = find_node_by_nodeid(nodeid);
2726  if (!node) {
2727  error = CS_ERR_NAME_NOT_FOUND;
2728  goto error_exit;
2729  }
2730 
2731  /*
2732  * Check votes is valid
2733  */
2734  saved_votes = node->votes;
2735  node->votes = req_lib_votequorum_setvotes->votes;
2736 
2737  newquorum = calculate_quorum(1, 0, &total_votes);
2738 
2739  if (newquorum < total_votes / 2 ||
2740  newquorum > total_votes) {
2741  node->votes = saved_votes;
2742  error = CS_ERR_INVALID_PARAM;
2743  goto error_exit;
2744  }
2745 
2746  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
2747  req_lib_votequorum_setvotes->votes);
2748 
2749 error_exit:
2752  res_lib_votequorum_status.header.error = error;
2754 
2755  LEAVE();
2756 }
2757 
2758 static void message_handler_req_lib_votequorum_trackstart (void *conn,
2759  const void *message)
2760 {
2763  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2764  cs_error_t error = CS_OK;
2765 
2766  ENTER();
2767 
2768  /*
2769  * If an immediate listing of the current cluster membership
2770  * is requested, generate membership list
2771  */
2772  if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT ||
2773  req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) {
2774  log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
2775  votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context);
2776  votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
2777  }
2778 
2779  if (quorum_pd->tracking_enabled) {
2780  error = CS_ERR_EXIST;
2781  goto response_send;
2782  }
2783 
2784  /*
2785  * Record requests for tracking
2786  */
2787  if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES ||
2788  req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) {
2789 
2790  quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags;
2791  quorum_pd->tracking_enabled = 1;
2792  quorum_pd->tracking_context = req_lib_votequorum_trackstart->context;
2793 
2794  list_add (&quorum_pd->list, &trackers_list);
2795  }
2796 
2797 response_send:
2800  res_lib_votequorum_status.header.error = error;
2802 
2803  LEAVE();
2804 }
2805 
2806 static void message_handler_req_lib_votequorum_trackstop (void *conn,
2807  const void *message)
2808 {
2810  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2811  int error = CS_OK;
2812 
2813  ENTER();
2814 
2815  if (quorum_pd->tracking_enabled) {
2816  error = CS_OK;
2817  quorum_pd->tracking_enabled = 0;
2818  list_del (&quorum_pd->list);
2819  list_init (&quorum_pd->list);
2820  } else {
2821  error = CS_ERR_NOT_EXIST;
2822  }
2823 
2826  res_lib_votequorum_status.header.error = error;
2828 
2829  LEAVE();
2830 }
2831 
2832 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
2833  const void *message)
2834 {
2837  cs_error_t error = CS_OK;
2838 
2839  ENTER();
2840 
2841  if (!qdevice_can_operate) {
2842  log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2843  error = CS_ERR_ACCESS;
2844  goto out;
2845  }
2846 
2847  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2848  if ((!strncmp(req_lib_votequorum_qdevice_register->name,
2849  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2850  goto out;
2851  } else {
2853  "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2854  req_lib_votequorum_qdevice_register->name, qdevice_name);
2855  error = CS_ERR_EXIST;
2856  goto out;
2857  }
2858  } else {
2859  if (qdevice_reg_conn != NULL) {
2861  "Registration request already in progress");
2862  error = CS_ERR_TRY_AGAIN;
2863  goto out;
2864  }
2865  qdevice_reg_conn = conn;
2866  if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2867  req_lib_votequorum_qdevice_register->name) != 0) {
2869  "Unable to send qdevice registration request to cluster");
2870  error = CS_ERR_TRY_AGAIN;
2871  qdevice_reg_conn = NULL;
2872  } else {
2873  LEAVE();
2874  return;
2875  }
2876  }
2877 
2878 out:
2879 
2882  res_lib_votequorum_status.header.error = error;
2884 
2885  LEAVE();
2886 }
2887 
2888 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
2889  const void *message)
2890 {
2893  cs_error_t error = CS_OK;
2894 
2895  ENTER();
2896 
2897  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2898  if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2899  error = CS_ERR_INVALID_PARAM;
2900  goto out;
2901  }
2902  if (qdevice_timer_set) {
2903  corosync_api->timer_delete(qdevice_timer);
2904  qdevice_timer_set = 0;
2905  sync_wait_for_poll_or_timeout = 0;
2906  }
2907  us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED;
2908  us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2911  votequorum_exec_send_nodeinfo(us->node_id);
2912  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
2913  req_lib_votequorum_qdevice_unregister->name);
2914  } else {
2915  error = CS_ERR_NOT_EXIST;
2916  }
2917 
2918 out:
2921  res_lib_votequorum_status.header.error = error;
2923 
2924  LEAVE();
2925 }
2926 
2927 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
2928  const void *message)
2929 {
2932  cs_error_t error = CS_OK;
2933 
2934  ENTER();
2935 
2936  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2937  if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2938  error = CS_ERR_INVALID_PARAM;
2939  goto out;
2940  }
2941  votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
2942  req_lib_votequorum_qdevice_update->newname);
2943  } else {
2944  error = CS_ERR_NOT_EXIST;
2945  }
2946 
2947 out:
2950  res_lib_votequorum_status.header.error = error;
2952 
2953  LEAVE();
2954 }
2955 
2956 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
2957  const void *message)
2958 {
2961  cs_error_t error = CS_OK;
2962  uint32_t oldflags;
2963 
2964  ENTER();
2965 
2966  if (!qdevice_can_operate) {
2967  error = CS_ERR_ACCESS;
2968  goto out;
2969  }
2970 
2971  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2972  if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.rep.nodeid &&
2973  req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) {
2974  log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (%u.%"PRIu64") != last sync "
2975  "ring id (%u.%"PRIu64"). Ignoring poll call.",
2976  req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq,
2977  quorum_ringid.rep.nodeid, quorum_ringid.seq);
2978  error = CS_ERR_MESSAGE_ERROR;
2979  goto out;
2980  }
2981  if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2982  error = CS_ERR_INVALID_PARAM;
2983  goto out;
2984  }
2985 
2986  if (qdevice_timer_set) {
2987  corosync_api->timer_delete(qdevice_timer);
2988  qdevice_timer_set = 0;
2989  }
2990 
2991  oldflags = us->flags;
2992 
2994 
2995  if (req_lib_votequorum_qdevice_poll->cast_vote) {
2997  } else {
2999  }
3000 
3001  if (us->flags != oldflags) {
3002  votequorum_exec_send_nodeinfo(us->node_id);
3003  }
3004 
3005  corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
3006  qdevice_timer_fn, &qdevice_timer);
3007  qdevice_timer_set = 1;
3008  sync_wait_for_poll_or_timeout = 0;
3009  } else {
3010  error = CS_ERR_NOT_EXIST;
3011  }
3012 
3013 out:
3016  res_lib_votequorum_status.header.error = error;
3018 
3019  LEAVE();
3020 }
3021 
3022 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
3023  const void *message)
3024 {
3027  cs_error_t error = CS_OK;
3028  uint32_t oldflags = us->flags;
3029 
3030  ENTER();
3031 
3032  if (!qdevice_can_operate) {
3033  error = CS_ERR_ACCESS;
3034  goto out;
3035  }
3036 
3037  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
3038  if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
3039  error = CS_ERR_INVALID_PARAM;
3040  goto out;
3041  }
3042 
3043  if (req_lib_votequorum_qdevice_master_wins->allow) {
3045  } else {
3047  }
3048 
3049  if (us->flags != oldflags) {
3050  votequorum_exec_send_nodeinfo(us->node_id);
3051  }
3052 
3053  update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
3054  } else {
3055  error = CS_ERR_NOT_EXIST;
3056  }
3057 
3058 out:
3061  res_lib_votequorum_status.header.error = error;
3063 
3064  LEAVE();
3065 }
uint32_t expected_votes
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
void *(* ipc_private_data_get)(void *conn)
Definition: coroapi.h:256
#define VOTEQUORUM_INFO_QUORATE
#define TOTEM_AGREED
Definition: coroapi.h:102
#define CS_TRUE
Definition: corotypes.h:54
const char * name
Definition: coroapi.h:492
uint32_t votes
uint32_t nodeid
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_READCONFIG_STARTUP
const char * get_run_dir(void)
Definition: util.c:174
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1111
#define NODE_FLAGS_WFASTATUS
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:73
uint32_t value
#define CS_FALSE
Definition: corotypes.h:53
struct list_head * next
Definition: list.h:47
#define NODE_FLAGS_QUORATE
#define VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT
The corosync_service_engine struct.
Definition: coroapi.h:491
struct list_head list
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1132
The req_lib_votequorum_qdevice_master_wins struct.
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE
#define max(a, b)
int(* ipc_response_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:258
#define list_iterate(v, head)
char * votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn)
nodestate_t
#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA
int tracking_enabled
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define CS_TRACK_CURRENT
Definition: corotypes.h:87
The req_lib_votequorum_qdevice_unregister struct.
#define NODE_FLAGS_QDEVICE_MASTER_WINS
nodestate_t state
The res_lib_votequorum_quorum_notification struct.
The corosync_lib_handler struct.
Definition: coroapi.h:468
#define VOTEQUORUM_INFO_LAST_MAN_STANDING
struct message_header header
Definition: totemsrp.c:60
#define VOTEQUORUM_INFO_WAIT_FOR_ALL
#define NODE_FLAGS_QDEVICE_CAST_VOTE
uint32_t operation
The res_lib_votequorum_status struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE
The corosync_exec_handler struct.
Definition: coroapi.h:476
#define VOTEQUORUM_INFO_TWONODE
int(* totem_mcast)(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
Definition: coroapi.h:281
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
Definition: list.h:46
#define VOTEQUORUM_INFO_QDEVICE_REGISTERED
#define log_printf(level, format, args...)
Definition: logsys.h:320
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
Definition: coroapi.h:477
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_QDEVICE_NODEID
#define VOTEQUORUM_INFO_QDEVICE_MASTER_WINS
#define VOTEQUORUM_NODESTATE_MEMBER
#define CS_TRACK_CHANGES
Definition: corotypes.h:88
#define SERVICE_ID_MAKE(a, b)
Definition: coroapi.h:459
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
uint32_t expected_votes
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
void(* quorum_set_quorate_fn_t)(const unsigned int *view_list, size_t view_list_entries, int quorate, struct memb_ring_id *)
Definition: exec/quorum.h:42
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER
cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8)
Definition: icmap.c:884
void(* error_memory_failure)(void) __attribute__((noreturn))
Definition: coroapi.h:423
#define VOTEQUORUM_INFO_ALLOW_DOWNSCALE
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:71
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
#define VOTEQUORUM_INFO_QDEVICE_ALIVE
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:613
void * user_data
Definition: sam.c:127
struct list_head list
unsigned int(* totem_nodeid_get)(void)
Definition: coroapi.h:275
unsigned int nodeid
Definition: coroapi.h:112
#define CS_TRACK_CHANGES_ONLY
Definition: corotypes.h:89
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
uint32_t flags
The req_lib_votequorum_getinfo struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
Linked list API.
struct totem_ip_address rep
Definition: coroapi.h:123
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:157
The req_lib_votequorum_qdevice_update struct.
cs_error_t
The cs_error_t enum.
Definition: corotypes.h:94
unsigned char track_flags
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
LOGSYS_DECLARE_SUBSYS("VOTEQ")
The req_lib_votequorum_setvotes struct.
The corosync_api_v1 struct.
Definition: coroapi.h:225
typedef __attribute__
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:908
uint8_t param
The req_lib_votequorum_setexpected struct.
uint32_t quorate
Definition: sam.c:134
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER
struct corosync_service_engine * votequorum_get_service_engine_ver0(void)
The res_lib_votequorum_expectedvotes_notification struct.
#define ENTER
Definition: logsys.h:321
The req_lib_votequorum_qdevice_register struct.
char * corosync_service_link_and_init(struct corosync_api_v1 *corosync_api, struct default_service *service)
Link and initialize a service.
Definition: service.c:117
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_NODESTATE_LEAVING
The votequorum_node struct.
#define PROCESSOR_COUNT_MAX
Definition: coroapi.h:96
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG
The memb_ring_id struct.
Definition: coroapi.h:122
#define VOTEQUORUM_READCONFIG_RUNTIME
struct list_head * prev
Definition: list.h:48
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO
The req_lib_votequorum_trackstart struct.
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES
#define VOTEQUORUM_QDEVICE_MAX_NAME_LEN
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
The req_lib_votequorum_qdevice_poll struct.
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:872
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:69
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define NODE_FLAGS_LEAVING
#define list_entry(ptr, type, member)
Definition: list.h:84
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED
Definition: coroapi.h:156
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:72
unsigned long long seq
Definition: coroapi.h:124
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
Definition: icmap.c:589
void(* lib_handler_fn)(void *conn, const void *msg)
Definition: coroapi.h:469
The res_lib_votequorum_getinfo struct.
#define VOTEQUORUM_NODESTATE_DEAD
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
Definition: icmap.c:1241
#define VOTEQUORUM_INFO_QDEVICE_CAST_VOTE
int(* ipc_dispatch_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:263
#define VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT
const char * name
Definition: service.h:43
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1105
struct memb_ring_id ring_id
Definition: totemsrp.c:64
uint64_t tracking_context
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES
#define DEFAULT_LMS_WIN
#define LEAVE
Definition: logsys.h:322
#define NODE_FLAGS_QDEVICE_ALIVE
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
#define NODE_FLAGS_QDEVICE_REGISTERED
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1175
#define NODE_FLAGS_FIRST
struct qb_ipc_request_header header __attribute__((aligned(8)))
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem.nodeid", "totem.version", ...
Definition: icmap.h:85