corosync  2.4.6
main.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002-2006 MontaVista Software, Inc.
3  * Copyright (c) 2006-2012 Red Hat, Inc.
4  *
5  * All rights reserved.
6  *
7  * Author: Steven Dake (sdake@redhat.com)
8  *
9  * This software licensed under BSD license, the text of which follows:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  * - Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  * - Neither the name of the MontaVista Software, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from this
21  * software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
72 #include <config.h>
73 
74 #include <pthread.h>
75 #include <assert.h>
76 #include <sys/types.h>
77 #include <sys/file.h>
78 #include <sys/poll.h>
79 #include <sys/uio.h>
80 #include <sys/mman.h>
81 #include <sys/socket.h>
82 #include <sys/un.h>
83 #include <sys/time.h>
84 #include <sys/resource.h>
85 #include <sys/stat.h>
86 #include <netinet/in.h>
87 #include <arpa/inet.h>
88 #include <unistd.h>
89 #include <fcntl.h>
90 #include <stdlib.h>
91 #include <stdio.h>
92 #include <errno.h>
93 #include <signal.h>
94 #include <sched.h>
95 #include <time.h>
96 #include <semaphore.h>
97 #include <string.h>
98 
99 #include <qb/qbdefs.h>
100 #include <qb/qblog.h>
101 #include <qb/qbloop.h>
102 #include <qb/qbutil.h>
103 #include <qb/qbipcs.h>
104 
105 #include <corosync/swab.h>
106 #include <corosync/corotypes.h>
107 #include <corosync/corodefs.h>
108 #include <corosync/list.h>
109 #include <corosync/totem/totempg.h>
110 #include <corosync/logsys.h>
111 #include <corosync/icmap.h>
112 
113 #ifdef HAVE_LIBCGROUP
114 #include <libcgroup.h>
115 #endif
116 
117 #include "quorum.h"
118 #include "totemsrp.h"
119 #include "logconfig.h"
120 #include "totemconfig.h"
121 #include "main.h"
122 #include "sync.h"
123 #include "timer.h"
124 #include "util.h"
125 #include "apidef.h"
126 #include "service.h"
127 #include "schedwrk.h"
128 
129 #ifdef HAVE_SMALL_MEMORY_FOOTPRINT
130 #define IPC_LOGSYS_SIZE 1024*64
131 #else
132 #define IPC_LOGSYS_SIZE 8192*128
133 #endif
134 
135 /*
136  * LibQB adds default "*" syslog filter so we have to set syslog_priority as low
137  * as possible so filters applied later in _logsys_config_apply_per_file takes
138  * effect.
139  */
140 LOGSYS_DECLARE_SYSTEM ("corosync",
142  LOG_DAEMON,
143  LOG_EMERG);
144 
145 LOGSYS_DECLARE_SUBSYS ("MAIN");
146 
147 #define SERVER_BACKLOG 5
148 
149 static int sched_priority = 0;
150 
151 static unsigned int service_count = 32;
152 
154 
155 static struct corosync_api_v1 *api = NULL;
156 
157 static int sync_in_process = 1;
158 
159 static qb_loop_t *corosync_poll_handle;
160 
161 struct sched_param global_sched_param;
162 
163 static corosync_timer_handle_t corosync_stats_timer_handle;
164 
165 static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
166 
167 static int ip_version = AF_INET;
168 
169 qb_loop_t *cs_poll_handle_get (void)
170 {
171  return (corosync_poll_handle);
172 }
173 
174 int cs_poll_dispatch_add (qb_loop_t * handle,
175  int fd,
176  int events,
177  void *data,
178 
179  int (*dispatch_fn) (int fd,
180  int revents,
181  void *data))
182 {
183  return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
184  dispatch_fn);
185 }
186 
187 int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
188 {
189  return qb_loop_poll_del(handle, fd);
190 }
191 
193 {
194  int i;
195 
196  for (i = 0; i < SERVICES_COUNT_MAX; i++) {
197  if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
199  }
200  }
201 }
202 
203 static void corosync_blackbox_write_to_file (void)
204 {
205  char fname[PATH_MAX];
206  char fdata_fname[PATH_MAX];
207  char time_str[PATH_MAX];
208  struct tm cur_time_tm;
209  time_t cur_time_t;
210  ssize_t res;
211 
212  cur_time_t = time(NULL);
213  localtime_r(&cur_time_t, &cur_time_tm);
214 
215  strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
216  snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
217  get_run_dir(),
218  time_str,
219  (long long int)getpid());
220 
221  if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
222  LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file");
223  return ;
224  }
225  snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_run_dir());
226  unlink(fdata_fname);
227  if (symlink(fname, fdata_fname) == -1) {
228  log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'",
229  fname, fdata_fname);
230  }
231 }
232 
233 static void unlink_all_completed (void)
234 {
235  api->timer_delete (corosync_stats_timer_handle);
236  qb_loop_stop (corosync_poll_handle);
237  icmap_fini();
238 }
239 
241 {
242  corosync_service_unlink_all (api, unlink_all_completed);
243 }
244 
245 static int32_t sig_diag_handler (int num, void *data)
246 {
248  return 0;
249 }
250 
251 static int32_t sig_exit_handler (int num, void *data)
252 {
253  log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal");
254  corosync_service_unlink_all (api, unlink_all_completed);
255  return 0;
256 }
257 
258 static void sigsegv_handler (int num)
259 {
260  (void)signal (num, SIG_DFL);
261  corosync_blackbox_write_to_file ();
262  qb_log_fini();
263  raise (num);
264 }
265 
266 #define LOCALHOST_IP inet_addr("127.0.0.1")
267 
268 static void *corosync_group_handle;
269 
270 static struct totempg_group corosync_group = {
271  .group = "a",
272  .group_len = 1
273 };
274 
275 static void serialize_lock (void)
276 {
277 }
278 
279 static void serialize_unlock (void)
280 {
281 }
282 
283 static void corosync_sync_completed (void)
284 {
286  "Completed service synchronization, ready to provide service.");
287  sync_in_process = 0;
288 
289  cs_ipcs_sync_state_changed(sync_in_process);
291  /*
292  * Inform totem to start using new message queue again
293  */
295 }
296 
297 static int corosync_sync_callbacks_retrieve (
298  int service_id,
299  struct sync_callbacks *callbacks)
300 {
301  if (corosync_service[service_id] == NULL) {
302  return (-1);
303  }
304 
305  if (callbacks == NULL) {
306  return (0);
307  }
308 
309  callbacks->name = corosync_service[service_id]->name;
310 
311  callbacks->sync_init = corosync_service[service_id]->sync_init;
312  callbacks->sync_process = corosync_service[service_id]->sync_process;
313  callbacks->sync_activate = corosync_service[service_id]->sync_activate;
314  callbacks->sync_abort = corosync_service[service_id]->sync_abort;
315  return (0);
316 }
317 
318 static struct memb_ring_id corosync_ring_id;
319 
320 static void member_object_joined (unsigned int nodeid)
321 {
322  char member_ip[ICMAP_KEYNAME_MAXLEN];
323  char member_join_count[ICMAP_KEYNAME_MAXLEN];
324  char member_status[ICMAP_KEYNAME_MAXLEN];
325 
326  snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
327  "runtime.totem.pg.mrp.srp.members.%u.ip", nodeid);
328  snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
329  "runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid);
330  snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
331  "runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
332 
333  if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
334  icmap_inc(member_join_count);
335  icmap_set_string(member_status, "joined");
336  } else {
337  icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
338  icmap_set_uint32(member_join_count, 1);
339  icmap_set_string(member_status, "joined");
340  }
341 
343  "Member joined: %s", api->totem_ifaces_print (nodeid));
344 }
345 
346 static void member_object_left (unsigned int nodeid)
347 {
348  char member_status[ICMAP_KEYNAME_MAXLEN];
349 
350  snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
351  "runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
352  icmap_set_string(member_status, "left");
353 
355  "Member left: %s", api->totem_ifaces_print (nodeid));
356 }
357 
358 static void confchg_fn (
359  enum totem_configuration_type configuration_type,
360  const unsigned int *member_list, size_t member_list_entries,
361  const unsigned int *left_list, size_t left_list_entries,
362  const unsigned int *joined_list, size_t joined_list_entries,
363  const struct memb_ring_id *ring_id)
364 {
365  int i;
366  int abort_activate = 0;
367 
368  if (sync_in_process == 1) {
369  abort_activate = 1;
370  }
371  sync_in_process = 1;
372  cs_ipcs_sync_state_changed(sync_in_process);
373  memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
374 
375  for (i = 0; i < left_list_entries; i++) {
376  member_object_left (left_list[i]);
377  }
378  for (i = 0; i < joined_list_entries; i++) {
379  member_object_joined (joined_list[i]);
380  }
381  /*
382  * Call configuration change for all services
383  */
384  for (i = 0; i < service_count; i++) {
385  if (corosync_service[i] && corosync_service[i]->confchg_fn) {
386  corosync_service[i]->confchg_fn (configuration_type,
387  member_list, member_list_entries,
388  left_list, left_list_entries,
389  joined_list, joined_list_entries, ring_id);
390  }
391  }
392 
393  if (abort_activate) {
394  sync_abort ();
395  }
396  if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
397  sync_save_transitional (member_list, member_list_entries, ring_id);
398  }
399  if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
400  sync_start (member_list, member_list_entries, ring_id);
401  }
402 }
403 
404 static void priv_drop (void)
405 {
406  return; /* TODO: we are still not dropping privs */
407 }
408 
409 static void corosync_tty_detach (void)
410 {
411  int devnull;
412 
413  /*
414  * Disconnect from TTY if this is not a debug run
415  */
416 
417  switch (fork ()) {
418  case -1:
420  break;
421  case 0:
422  /*
423  * child which is disconnected, run this process
424  */
425  break;
426  default:
427  exit (0);
428  break;
429  }
430 
431  /* Create new session */
432  (void)setsid();
433 
434  /*
435  * Map stdin/out/err to /dev/null.
436  */
437  devnull = open("/dev/null", O_RDWR);
438  if (devnull == -1) {
440  }
441 
442  if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
443  || dup2(devnull, 2) < 0) {
444  close(devnull);
446  }
447  close(devnull);
448 }
449 
450 static void corosync_mlockall (void)
451 {
452  int res;
453  struct rlimit rlimit;
454 
455  rlimit.rlim_cur = RLIM_INFINITY;
456  rlimit.rlim_max = RLIM_INFINITY;
457 
458 #ifndef RLIMIT_MEMLOCK
459 #define RLIMIT_MEMLOCK RLIMIT_VMEM
460 #endif
461 
462  setrlimit (RLIMIT_MEMLOCK, &rlimit);
463 
464  res = mlockall (MCL_CURRENT | MCL_FUTURE);
465  if (res == -1) {
467  "Could not lock memory of service to avoid page faults");
468  };
469 }
470 
471 
472 static void corosync_totem_stats_updater (void *data)
473 {
474  totempg_stats_t * stats;
475  uint32_t total_mtt_rx_token;
476  uint32_t total_backlog_calc;
477  uint32_t total_token_holdtime;
478  int t, prev, i;
479  int32_t token_count;
480  char key_name[ICMAP_KEYNAME_MAXLEN];
481  const char *cstr;
482 
483  stats = api->totem_get_stats();
484 
485  icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved);
486  icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail);
487  icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx);
488  icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx);
489  icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx);
490  icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx);
491  icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx);
492  icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx);
493  icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx);
494  icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx);
495  icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx);
496  icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx);
497  icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx);
498  icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx);
499  icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx);
500  icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered);
501  icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost);
502  icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered);
503  icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost);
504  icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered);
505  icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost);
506  icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered);
507  icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost);
508  icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
509  icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
510  icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
511  icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
513 
514  icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
515  stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
516 
517  if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
519  cstr = "";
520 
522  cstr = "number of multicast sendmsg failures is above threshold";
523  }
524 
525  if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER) {
526  cstr = "totem is continuously in gather state";
527  }
528 
530  "Totem is unable to form a cluster because of an "
531  "operating system or network fault (reason: %s). The most common "
532  "cause of this message is that the local firewall is "
533  "configured improperly.", cstr);
534  icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
535  } else {
536  icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
537  }
538 
539  for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
540  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i);
541  icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]);
542  }
543  total_mtt_rx_token = 0;
544  total_token_holdtime = 0;
545  total_backlog_calc = 0;
546  token_count = 0;
547  t = stats->mrp->srp->latest_token;
548  while (1) {
549  if (t == 0)
550  prev = TOTEM_TOKEN_STATS_MAX - 1;
551  else
552  prev = t - 1;
553  if (prev == stats->mrp->srp->earliest_token)
554  break;
555  /* if tx == 0, then dropped token (not ours) */
556  if (stats->mrp->srp->token[t].tx != 0 ||
557  (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
558  total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
559  total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
560  total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
561  token_count++;
562  }
563  t = prev;
564  }
565  if (token_count) {
566  icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
567  icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count));
568  icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count));
569  }
570 
572 
573  api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
574  corosync_totem_stats_updater,
575  &corosync_stats_timer_handle);
576 }
577 
578 static void corosync_totem_stats_init (void)
579 {
580  icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0);
581  icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0);
582  icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0);
583 
584  /* start stats timer */
585  api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
586  corosync_totem_stats_updater,
587  &corosync_stats_timer_handle);
588 }
589 
590 static void deliver_fn (
591  unsigned int nodeid,
592  const void *msg,
593  unsigned int msg_len,
594  int endian_conversion_required)
595 {
596  const struct qb_ipc_request_header *header;
597  int32_t service;
598  int32_t fn_id;
599  uint32_t id;
600 
601  header = msg;
602  if (endian_conversion_required) {
603  id = swab32 (header->id);
604  } else {
605  id = header->id;
606  }
607 
608  /*
609  * Call the proper executive handler
610  */
611  service = id >> 16;
612  fn_id = id & 0xffff;
613 
614  if (!corosync_service[service]) {
615  return;
616  }
617  if (fn_id >= corosync_service[service]->exec_engine_count) {
618  log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
619  fn_id, service, corosync_service[service]->exec_engine_count);
620  return;
621  }
622 
623  icmap_fast_inc(service_stats_rx[service][fn_id]);
624 
625  if (endian_conversion_required) {
626  assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
628  ((void *)msg);
629  }
630 
632  (msg, nodeid);
633 }
634 
636  const struct iovec *iovec,
637  unsigned int iov_len,
638  unsigned int guarantee)
639 {
640  const struct qb_ipc_request_header *req = iovec->iov_base;
641  int32_t service;
642  int32_t fn_id;
643 
644  service = req->id >> 16;
645  fn_id = req->id & 0xffff;
646 
647  if (corosync_service[service]) {
648  icmap_fast_inc(service_stats_tx[service][fn_id]);
649  }
650 
651  return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
652 }
653 
654 static void corosync_ring_id_create_or_load (
655  struct memb_ring_id *memb_ring_id,
656  const struct totem_ip_address *addr)
657 {
658  int fd;
659  int res = 0;
660  char filename[PATH_MAX];
661 
662  snprintf (filename, sizeof(filename), "%s/ringid_%s",
663  get_run_dir(), totemip_print (addr));
664  fd = open (filename, O_RDONLY, 0700);
665  /*
666  * If file can be opened and read, read the ring id
667  */
668  if (fd != -1) {
669  res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
670  close (fd);
671  }
672  /*
673  * If file could not be opened or read, create a new ring id
674  */
675  if ((fd == -1) || (res != sizeof (uint64_t))) {
676  memb_ring_id->seq = 0;
677  umask(0);
678  fd = open (filename, O_CREAT|O_RDWR, 0700);
679  if (fd != -1) {
680  res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
681  close (fd);
682  if (res == -1) {
684  "Couldn't write ringid file '%s'", filename);
685 
687  }
688  } else {
690  "Couldn't create ringid file '%s'", filename);
691 
693  }
694  }
695 
696  totemip_copy(&memb_ring_id->rep, addr);
697  assert (!totemip_zero_check(&memb_ring_id->rep));
698 }
699 
700 static void corosync_ring_id_store (
701  const struct memb_ring_id *memb_ring_id,
702  const struct totem_ip_address *addr)
703 {
704  char filename[PATH_MAX];
705  int fd;
706  int res;
707 
708  snprintf (filename, sizeof(filename), "%s/ringid_%s",
709  get_run_dir(), totemip_print (addr));
710 
711  fd = open (filename, O_WRONLY, 0700);
712  if (fd == -1) {
713  fd = open (filename, O_CREAT|O_RDWR, 0700);
714  }
715  if (fd == -1) {
717  "Couldn't store new ring id %llx to stable storage",
718  memb_ring_id->seq);
719 
721  }
723  "Storing new sequence id for ring %llx", memb_ring_id->seq);
724  res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq));
725  close (fd);
726  if (res != sizeof(memb_ring_id->seq)) {
728  "Couldn't store new ring id %llx to stable storage",
729  memb_ring_id->seq);
730 
732  }
733 }
734 
735 static qb_loop_timer_handle recheck_the_q_level_timer;
737 {
738  totempg_check_q_level(corosync_group_handle);
740  qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
741  NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
742  }
743 }
744 
747 };
748 
749 
751  unsigned int service,
752  unsigned int id,
753  const void *msg,
754  void *sending_allowed_private_data)
755 {
757  (struct sending_allowed_private_data_struct *)sending_allowed_private_data;
758  struct iovec reserve_iovec;
759  struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
760  int sending_allowed;
761 
762  reserve_iovec.iov_base = (char *)header;
763  reserve_iovec.iov_len = header->size;
764 
766  corosync_group_handle,
767  &reserve_iovec, 1);
768  if (pd->reserved_msgs == -1) {
769  return -EINVAL;
770  }
771 
772  sending_allowed = QB_FALSE;
773  if (corosync_quorum_is_quorate() == 1 ||
774  corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
775  // we are quorate
776  // now check flow control
777  if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
778  sending_allowed = QB_TRUE;
779  } else if (pd->reserved_msgs && sync_in_process == 0) {
780  sending_allowed = QB_TRUE;
781  } else if (pd->reserved_msgs == 0) {
782  return -ENOBUFS;
783  } else /* (sync_in_process) */ {
784  return -EINPROGRESS;
785  }
786  } else {
787  return -EHOSTUNREACH;
788  }
789 
790  return (sending_allowed);
791 }
792 
793 void corosync_sending_allowed_release (void *sending_allowed_private_data)
794 {
796  (struct sending_allowed_private_data_struct *)sending_allowed_private_data;
797 
798  if (pd->reserved_msgs == -1) {
799  return;
800  }
802 }
803 
805 {
806  int ret = 0;
807 
808  assert (source != NULL);
809  if (source->nodeid == totempg_my_nodeid_get ()) {
810  ret = 1;
811  }
812  return ret;
813 }
814 
816  mar_message_source_t *source,
817  void *conn)
818 {
819  assert ((source != NULL) && (conn != NULL));
820  memset (source, 0, sizeof (mar_message_source_t));
821  source->nodeid = totempg_my_nodeid_get ();
822  source->conn = conn;
823 }
824 
827  qb_loop_timer_handle handle;
828  unsigned long long tv_prev;
829  unsigned long long max_tv_diff;
830 };
831 
832 static void timer_function_scheduler_timeout (void *data)
833 {
834  struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
835  unsigned long long tv_current;
836  unsigned long long tv_diff;
837  uint64_t schedmiss_event_tstamp;
838 
839  tv_current = qb_util_nano_current_get ();
840 
841  if (timeout_data->tv_prev == 0) {
842  /*
843  * Initial call -> just pretent everything is ok
844  */
845  timeout_data->tv_prev = tv_current;
846  timeout_data->max_tv_diff = 0;
847  }
848 
849  tv_diff = tv_current - timeout_data->tv_prev;
850  timeout_data->tv_prev = tv_current;
851 
852  if (tv_diff > timeout_data->max_tv_diff) {
853  schedmiss_event_tstamp = qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC;
854 
855  log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled (@%" PRIu64 ") for %0.4f ms "
856  "(threshold is %0.4f ms). Consider token timeout increase.",
857  schedmiss_event_tstamp,
858  (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
859 
860  icmap_set_float("runtime.schedmiss.delay", (float)tv_diff / QB_TIME_NS_IN_MSEC);
861  icmap_set_uint64("runtime.schedmiss.timestamp", schedmiss_event_tstamp);
862  }
863 
864  /*
865  * Set next threshold, because token_timeout can change
866  */
867  timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
868  qb_loop_timer_add (corosync_poll_handle,
869  QB_LOOP_MED,
870  timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3,
871  timeout_data,
872  timer_function_scheduler_timeout,
873  &timeout_data->handle);
874 }
875 
876 
877 static int corosync_set_rr_scheduler (void)
878 {
879  int ret_val = 0;
880 
881 #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
882  int res;
883 
884  sched_priority = sched_get_priority_max (SCHED_RR);
885  if (sched_priority != -1) {
886  global_sched_param.sched_priority = sched_priority;
887  res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
888  if (res == -1) {
890  "Could not set SCHED_RR at priority %d",
891  global_sched_param.sched_priority);
892 
893  global_sched_param.sched_priority = 0;
894 #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
895  qb_log_thread_priority_set (SCHED_OTHER, 0);
896 #endif
897  ret_val = -1;
898  } else {
899 
900  /*
901  * Turn on SCHED_RR in logsys system
902  */
903 #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
904  res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
905 #else
906  res = -1;
907 #endif
908  if (res == -1) {
910  "Could not set logsys thread priority."
911  " Can't continue because of priority inversions.");
913  }
914  }
915  } else {
917  "Could not get maximum scheduler priority");
918  sched_priority = 0;
919  ret_val = -1;
920  }
921 #else
923  "The Platform is missing process priority setting features. Leaving at default.");
924  ret_val = -1;
925 #endif
926 
927  return (ret_val);
928 }
929 
930 
931 /* The basename man page contains scary warnings about
932  thread-safety and portability, hence this */
933 static const char *corosync_basename(const char *file_name)
934 {
935  char *base;
936  base = strrchr (file_name, '/');
937  if (base) {
938  return base + 1;
939  }
940 
941  return file_name;
942 }
943 
944 static void
945 _logsys_log_printf(int level, int subsys,
946  const char *function_name,
947  const char *file_name,
948  int file_line,
949  const char *format,
950  ...) __attribute__((format(printf, 6, 7)));
951 
952 static void
953 _logsys_log_printf(int level, int subsys,
954  const char *function_name,
955  const char *file_name,
956  int file_line,
957  const char *format, ...)
958 {
959  va_list ap;
960 
961  va_start(ap, format);
962  qb_log_from_external_source_va(function_name, corosync_basename(file_name),
963  format, level, file_line,
964  subsys, ap);
965  va_end(ap);
966 }
967 
968 static void fplay_key_change_notify_fn (
969  int32_t event,
970  const char *key_name,
971  struct icmap_notify_value new_val,
972  struct icmap_notify_value old_val,
973  void *user_data)
974 {
975  if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
976  fprintf(stderr,"Writetofile\n");
977  corosync_blackbox_write_to_file ();
978  }
979  if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
980  fprintf(stderr,"statefump\n");
982  }
983 }
984 
985 static void corosync_fplay_control_init (void)
986 {
987  icmap_track_t track = NULL;
988 
989  icmap_set_string("runtime.blackbox.dump_flight_data", "no");
990  icmap_set_string("runtime.blackbox.dump_state", "no");
991 
992  icmap_track_add("runtime.blackbox.dump_flight_data",
994  fplay_key_change_notify_fn,
995  NULL, &track);
996  icmap_track_add("runtime.blackbox.dump_state",
998  fplay_key_change_notify_fn,
999  NULL, &track);
1000 }
1001 
1002 /*
1003  * Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
1004  * or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
1005  *
1006  * Also some RO keys cannot be determined in this stage, so they are set later in
1007  * other functions (like nodelist.local_node_pos, ...)
1008  */
1009 static void set_icmap_ro_keys_flag (void)
1010 {
1011  /*
1012  * Set RO flag for all keys of internal configuration and runtime statistics
1013  */
1014  icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
1015  icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE);
1016  icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
1017  icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
1018  icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE);
1019  icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE);
1020 
1021  /*
1022  * Set RO flag for constrete keys of configuration which can't be changed
1023  * during runtime
1024  */
1025  icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
1026  icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
1027  icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
1028  icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE);
1029  icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
1030  icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE);
1031  icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE);
1032  icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
1033  icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE);
1034  icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE);
1035  icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE);
1036  icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE);
1037  icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE);
1038  icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE);
1039  icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE);
1040 }
1041 
1042 static void main_service_ready (void)
1043 {
1044  int res;
1045 
1046  /*
1047  * This must occur after totempg is initialized because "this_ip" must be set
1048  */
1050  if (res == -1) {
1051  log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
1053  }
1054  cs_ipcs_init();
1055  corosync_totem_stats_init ();
1056  corosync_fplay_control_init ();
1057  sync_init (
1058  corosync_sync_callbacks_retrieve,
1059  corosync_sync_completed);
1060 }
1061 
1062 static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
1063 {
1064  struct flock lock;
1065  enum e_corosync_done err;
1066  char pid_s[17];
1067  int fd_flag;
1068  int lf;
1069 
1070  err = COROSYNC_DONE_EXIT;
1071 
1072  lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
1073  if (lf == -1) {
1074  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
1075  return (COROSYNC_DONE_ACQUIRE_LOCK);
1076  }
1077 
1078 retry_fcntl:
1079  lock.l_type = F_WRLCK;
1080  lock.l_start = 0;
1081  lock.l_whence = SEEK_SET;
1082  lock.l_len = 0;
1083  if (fcntl (lf, F_SETLK, &lock) == -1) {
1084  switch (errno) {
1085  case EINTR:
1086  goto retry_fcntl;
1087  break;
1088  case EAGAIN:
1089  case EACCES:
1090  log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
1092  goto error_close;
1093  break;
1094  default:
1095  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s",
1096  strerror(errno));
1098  goto error_close;
1099  break;
1100  }
1101  }
1102 
1103  if (ftruncate (lf, 0) == -1) {
1104  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
1105  strerror (errno));
1107  goto error_close_unlink;
1108  }
1109 
1110  memset (pid_s, 0, sizeof (pid_s));
1111  snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
1112 
1113 retry_write:
1114  if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
1115  if (errno == EINTR) {
1116  goto retry_write;
1117  } else {
1118  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
1119  "Error was %s", strerror (errno));
1121  goto error_close_unlink;
1122  }
1123  }
1124 
1125  if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
1126  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
1127  "Error was %s", strerror (errno));
1129  goto error_close_unlink;
1130  }
1131  fd_flag |= FD_CLOEXEC;
1132  if (fcntl (lf, F_SETFD, fd_flag) == -1) {
1133  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
1134  "Error was %s", strerror (errno));
1136  goto error_close_unlink;
1137  }
1138 
1139  return (err);
1140 
1141 error_close_unlink:
1142  unlink (lockfile);
1143 error_close:
1144  close (lf);
1145 
1146  return (err);
1147 }
1148 
1149 static int corosync_move_to_root_cgroup(void) {
1150  int res = -1;
1151 #ifdef HAVE_LIBCGROUP
1152  int cg_ret;
1153  struct cgroup *root_cgroup = NULL;
1154  struct cgroup_controller *root_cpu_cgroup_controller = NULL;
1155  char *current_cgroup_path = NULL;
1156 
1157  cg_ret = cgroup_init();
1158  if (cg_ret) {
1159  log_printf(LOGSYS_LEVEL_WARNING, "Unable to initialize libcgroup: %s ",
1160  cgroup_strerror(cg_ret));
1161 
1162  goto exit_res;
1163  }
1164 
1165  cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", &current_cgroup_path);
1166  if (cg_ret) {
1167  log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ",
1168  cgroup_strerror(cg_ret));
1169 
1170  goto exit_res;
1171  }
1172 
1173  if (strcmp(current_cgroup_path, "/") == 0) {
1174  log_printf(LOGSYS_LEVEL_DEBUG, "Corosync is already in root cgroup path");
1175 
1176  res = 0;
1177  goto exit_res;
1178  }
1179 
1180  root_cgroup = cgroup_new_cgroup("/");
1181  if (root_cgroup == NULL) {
1182  log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup");
1183 
1184  goto exit_res;
1185  }
1186 
1187  root_cpu_cgroup_controller = cgroup_add_controller(root_cgroup, "cpu");
1188  if (root_cpu_cgroup_controller == NULL) {
1189  log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup cpu controller");
1190 
1191  goto exit_res;
1192  }
1193 
1194  cg_ret = cgroup_attach_task(root_cgroup);
1195  if (cg_ret) {
1196  log_printf(LOGSYS_LEVEL_WARNING, "Can't attach task to root cgroup: %s ",
1197  cgroup_strerror(cg_ret));
1198 
1199  goto exit_res;
1200  }
1201 
1202  cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", &current_cgroup_path);
1203  if (cg_ret) {
1204  log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ",
1205  cgroup_strerror(cg_ret));
1206 
1207  goto exit_res;
1208  }
1209 
1210  if (strcmp(current_cgroup_path, "/") == 0) {
1211  log_printf(LOGSYS_LEVEL_NOTICE, "Corosync successfully moved to root cgroup");
1212  res = 0;
1213  } else {
1214  log_printf(LOGSYS_LEVEL_WARNING, "Can't move Corosync to root cgroup");
1215  }
1216 
1217 exit_res:
1218  if (root_cgroup != NULL) {
1219  cgroup_free(&root_cgroup);
1220  }
1221 
1222  /*
1223  * libcgroup doesn't define something like cgroup_fini so there is no way how to clean
1224  * it's cache. It has to be called when libcgroup authors decide to implement it.
1225  */
1226 
1227 #endif
1228  return (res);
1229 }
1230 
1231 
1232 int main (int argc, char **argv, char **envp)
1233 {
1234  const char *error_string;
1235  struct totem_config totem_config;
1236  int res, ch;
1237  int background, sched_rr, prio, testonly, move_to_root_cgroup;
1238  struct stat stat_out;
1239  enum e_corosync_done flock_err;
1240  uint64_t totem_config_warnings;
1241  struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
1242  long int tmpli;
1243  char *ep;
1244 
1245  /* default configuration
1246  */
1247  background = 1;
1248  sched_rr = 1;
1249  prio = 0;
1250  testonly = 0;
1251  move_to_root_cgroup = 1;
1252 
1253  while ((ch = getopt (argc, argv, "fP:pRrtv")) != EOF) {
1254 
1255  switch (ch) {
1256  case 'f':
1257  background = 0;
1258  break;
1259  case 'p':
1260  sched_rr = 0;
1261  break;
1262  case 'P':
1263  if (strcmp(optarg, "max") == 0) {
1264  prio = INT_MIN;
1265  } else if (strcmp(optarg, "min") == 0) {
1266  prio = INT_MAX;
1267  } else {
1268  errno = 0;
1269 
1270  tmpli = strtol(optarg, &ep, 10);
1271  if (errno != 0 || *ep != '\0' || tmpli > INT_MAX || tmpli < INT_MIN) {
1272  fprintf(stderr, "Priority value %s is invalid", optarg);
1274  return EXIT_FAILURE;
1275  }
1276 
1277  prio = tmpli;
1278  }
1279  break;
1280  case 'R':
1281  move_to_root_cgroup = 0;
1282  break;
1283  case 'r':
1284  sched_rr = 1;
1285  break;
1286  case 't':
1287  testonly = 1;
1288  break;
1289  case 'v':
1290  printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
1291  printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n");
1293  return EXIT_SUCCESS;
1294 
1295  break;
1296  default:
1297  fprintf(stderr, \
1298  "usage:\n"\
1299  " -f : Start application in foreground.\n"\
1300  " -p : Do not set realtime scheduling.\n"\
1301  " -r : Set round robin realtime scheduling (default).\n"\
1302  " -R : Do not try move corosync to root cpu cgroup (valid when built with libcgroup)\n" \
1303  " -P num : Set priority of process (no effect when -r is used)\n"\
1304  " -t : Test configuration and exit.\n"\
1305  " -v : Display version and SVN revision of Corosync and exit.\n");
1307  return EXIT_FAILURE;
1308  }
1309  }
1310 
1311 
1312  /*
1313  * Other signals are registered later via qb_loop_signal_add
1314  */
1315  (void)signal (SIGSEGV, sigsegv_handler);
1316  (void)signal (SIGABRT, sigsegv_handler);
1317 #if MSG_NOSIGNAL != 0
1318  (void)signal (SIGPIPE, SIG_IGN);
1319 #endif
1320 
1321  if (icmap_init() != CS_OK) {
1322  log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
1324  }
1325  set_icmap_ro_keys_flag();
1326 
1327  /*
1328  * Initialize the corosync_api_v1 definition
1329  */
1330  api = apidef_get ();
1331 
1332  res = coroparse_configparse(icmap_get_global_map(), &error_string);
1333  if (res == -1) {
1334  /*
1335  * Logsys can't log properly at this early stage, and we need to get this message out
1336  *
1337  */
1338  fprintf (stderr, "%s\n", error_string);
1339  syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
1341  }
1342 
1343  res = corosync_log_config_read (&error_string);
1344  if (res == -1) {
1345  /*
1346  * if we are here, we _must_ flush the logsys queue
1347  * and try to inform that we couldn't read the config.
1348  * this is a desperate attempt before certain death
1349  * and there is no guarantee that we can print to stderr
1350  * nor that logsys is sending the messages where we expect.
1351  */
1352  log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
1353  fprintf(stderr, "%s", error_string);
1354  syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
1356  }
1357 
1358  if (!testonly) {
1359  log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION);
1360  log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
1361  }
1362 
1363  /*
1364  * Make sure required directory is present
1365  */
1366  res = stat (get_run_dir(), &stat_out);
1367  if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
1368  log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", get_run_dir());
1370  }
1371 
1372  res = chdir(get_run_dir());
1373  if (res == -1) {
1374  log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to run directory %s. "
1375  "Please make sure it has correct context and rights.", get_run_dir());
1377  }
1378 
1379  res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
1380  if (res == -1) {
1381  log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
1383  }
1384 
1385  if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
1386  log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
1387  }
1388 
1389  if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
1390  log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
1391  }
1392 
1393  if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) {
1394  log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used.");
1395  }
1396 
1397  if (totem_config_warnings & TOTEM_CONFIG_BINDNETADDR_NODELIST_SET) {
1398  log_printf (LOGSYS_LEVEL_WARNING, "interface section bindnetaddr is used together with nodelist. "
1399  "Nodelist one is going to be used.");
1400  }
1401 
1402  if (totem_config_warnings != 0) {
1403  log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
1404  }
1405 
1406  res = totem_config_keyread (&totem_config, &error_string);
1407  if (res == -1) {
1408  log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
1410  }
1411 
1412  res = totem_config_validate (&totem_config, &error_string);
1413  if (res == -1) {
1414  log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
1416  }
1417 
1418  if (testonly) {
1420  }
1421 
1422 
1423  /*
1424  * Try to move corosync into root cpu cgroup. Failure is not fatal and
1425  * error is deliberately ignored.
1426  */
1427  if (move_to_root_cgroup) {
1428  (void)corosync_move_to_root_cgroup();
1429  }
1430 
1431  /*
1432  * Set round robin realtime scheduling with priority 99
1433  */
1434  if (sched_rr) {
1435  if (corosync_set_rr_scheduler () != 0) {
1436  prio = INT_MIN;
1437  } else {
1438  prio = 0;
1439  }
1440  }
1441 
1442  if (prio != 0) {
1443  if (setpriority(PRIO_PGRP, 0, prio) != 0) {
1445  "Could not set priority %d", prio);
1446  }
1447  }
1448 
1449  ip_version = totem_config.ip_version;
1450 
1451  totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load;
1452  totem_config.totem_memb_ring_id_store = corosync_ring_id_store;
1453 
1455  totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem,"
1456  "totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
1457  "totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c");
1458 
1465  totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
1467 
1468  /*
1469  * Now we are fully initialized.
1470  */
1471  if (background) {
1473 
1474  corosync_tty_detach ();
1475 
1477 
1478  log_printf (LOGSYS_LEVEL_DEBUG, "Corosync TTY detached");
1479  }
1480 
1481  /*
1482  * Lock all memory to avoid page faults which may interrupt
1483  * application healthchecking
1484  */
1485  corosync_mlockall ();
1486 
1487  corosync_poll_handle = qb_loop_create ();
1488 
1489  memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
1490  scheduler_pause_timeout_data.totem_config = &totem_config;
1491  timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
1492 
1493  qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
1494  SIGUSR2, NULL, sig_diag_handler, NULL);
1495  qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1496  SIGINT, NULL, sig_exit_handler, NULL);
1497  qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1498  SIGQUIT, NULL, sig_exit_handler, NULL);
1499  qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1500  SIGTERM, NULL, sig_exit_handler, NULL);
1501 
1502  if (logsys_thread_start() != 0) {
1503  log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread");
1505  }
1506 
1507  if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
1508  corosync_exit_error (flock_err);
1509  }
1510 
1511  /*
1512  * if totempg_initialize doesn't have root priveleges, it cannot
1513  * bind to a specific interface. This only matters if
1514  * there is more then one interface in a system, so
1515  * in this case, only a warning is printed
1516  */
1517  /*
1518  * Join multicast group and setup delivery
1519  * and configuration change functions
1520  */
1521  if (totempg_initialize (
1522  corosync_poll_handle,
1523  &totem_config) != 0) {
1524 
1525  log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize TOTEM layer");
1527  }
1528 
1530  main_service_ready);
1531 
1533  &corosync_group_handle,
1534  deliver_fn,
1535  confchg_fn);
1536 
1538  corosync_group_handle,
1539  &corosync_group,
1540  1);
1541 
1542  /*
1543  * Drop root privleges to user 'corosync'
1544  * TODO: Don't really need full root capabilities;
1545  * needed capabilities are:
1546  * CAP_NET_RAW (bindtodevice)
1547  * CAP_SYS_NICE (setscheduler)
1548  * CAP_IPC_LOCK (mlockall)
1549  */
1550  priv_drop ();
1551 
1552  schedwrk_init (
1553  serialize_lock,
1554  serialize_unlock);
1555 
1556  /*
1557  * Start main processing loop
1558  */
1559  qb_loop_run (corosync_poll_handle);
1560 
1561  /*
1562  * Exit was requested
1563  */
1564  totempg_finalize ();
1565 
1566  /*
1567  * free the loop resources
1568  */
1569  qb_loop_destroy (corosync_poll_handle);
1570 
1571  /*
1572  * free up the icmap
1573  */
1574 
1575  /*
1576  * Remove pid lock file
1577  */
1578  unlink (corosync_lock_file);
1579 
1581 
1582  return EXIT_SUCCESS;
1583 }
#define CS_TRUE
Definition: corotypes.h:54
const char * name
Definition: coroapi.h:492
int32_t cs_ipcs_q_level_get(void)
Definition: ipc_glue.c:690
unsigned int corosync_service_defaults_link_and_init(struct corosync_api_v1 *corosync_api)
Load all of the default services.
Definition: service.c:339
uint64_t gather_entered
Definition: totem.h:267
const char * get_run_dir(void)
Definition: util.c:174
int corosync_quorum_is_quorate(void)
Definition: exec/quorum.c:65
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
void totempg_finalize(void)
Definition: totempg.c:866
Totem Single Ring Protocol.
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
uint64_t memb_commit_token_rx
Definition: totem.h:262
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:73
#define LOGSYS_LEVEL_TRACE
Definition: logsys.h:75
void totempg_trans_ack(void)
Definition: totempg.c:1561
struct totem_config * totem_config
Definition: main.c:826
void *(* totem_get_stats)(void)
Definition: coroapi.h:436
#define PACKAGE_FEATURES
Definition: config.h:367
void logsys_blackbox_postfork(void)
Definition: logsys.c:878
#define CS_FALSE
Definition: corotypes.h:53
void sync_start(const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id)
Definition: sync.c:508
uint64_t memb_join_tx
Definition: totem.h:256
qb_loop_timer_handle handle
Definition: main.c:827
void sync_abort(void)
Definition: sync.c:531
void(* exec_dump_fn)(void)
Definition: coroapi.h:502
The totem_ip_address struct.
Definition: coroapi.h:111
void(* sync_abort)(void)
Definition: sync.h:47
totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX]
Definition: totem.h:281
const char * totemip_print(const struct totem_ip_address *addr)
Definition: totemip.c:264
Totem Single Ring Protocol.
void(* sync_init)(const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id)
Definition: sync.h:39
void(* sync_init)(const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id)
Definition: coroapi.h:516
uint64_t mcast_rx
Definition: totem.h:260
int coroparse_configparse(icmap_map_t config_map, const char **error_string)
Definition: coroparse.c:251
qb_loop_t * cs_poll_handle_get(void)
Definition: main.c:169
void corosync_recheck_the_q_level(void *data)
Definition: main.c:736
#define corosync_exit_error(err)
Definition: exec/util.h:70
int totempg_groups_joined_release(int msg_count)
Definition: totempg.c:1355
#define VERSION
Definition: config.h:495
totem_configuration_type
The totem_configuration_type enum.
Definition: coroapi.h:132
#define LOCALSTATEDIR
Definition: config.h:352
struct message_header header
Definition: totemsrp.c:60
uint64_t memb_merge_detect_rx
Definition: totem.h:255
LOGSYS_DECLARE_SUBSYS("MAIN")
int guarantee
Definition: totemsrp.c:66
unsigned char addr[TOTEMIP_ADDRLEN]
Definition: coroapi.h:77
void(* sync_activate)(void)
Definition: coroapi.h:523
void schedwrk_init(void(*serialize_lock_fn)(void), void(*serialize_unlock_fn)(void))
Definition: schedwrk.c:83
cs_error_t icmap_set_string(const char *key_name, const char *value)
Definition: icmap.c:643
int earliest_token
Definition: totem.h:278
void(* confchg_fn)(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id)
Definition: coroapi.h:510
uint64_t orf_token_tx
Definition: totem.h:252
int cs_poll_dispatch_add(qb_loop_t *handle, int fd, int events, void *data, int(*dispatch_fn)(int fd, int revents, void *data))
Definition: main.c:174
int totempg_groups_initialize(void **instance, void(*deliver_fn)(unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void(*confchg_fn)(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id))
Initialize a groups instance.
Definition: totempg.c:1143
uint64_t gather_token_lost
Definition: totem.h:268
#define RLIMIT_MEMLOCK
void totemip_copy(struct totem_ip_address *addr1, const struct totem_ip_address *addr2)
Definition: totemip.c:123
icmap_map_t icmap_get_global_map(void)
Return global icmap.
Definition: icmap.c:280
unsigned long long max_tv_diff
Definition: main.c:829
struct corosync_service_engine * corosync_service[SERVICES_COUNT_MAX]
Definition: service.c:110
int _logsys_subsys_create(const char *subsys, const char *filename)
_logsys_subsys_create
Definition: logsys.c:436
uint64_t memb_commit_token_tx
Definition: totem.h:261
#define LOGSYS_PERROR(err_num, level, fmt, args...)
The LOGSYS_PERROR macro.
Definition: logsys.h:314
cs_error_t icmap_inc(const char *key_name)
Increase stored value by one.
Definition: icmap.c:1065
#define log_printf(level, format, args...)
Definition: logsys.h:320
#define MAX_NO_CONT_SENDMSG_FAILURES
Definition: totem.h:65
#define TOTEM_TOKEN_STATS_MAX
Definition: totem.h:280
void(* sync_abort)(void)
Definition: coroapi.h:524
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
Definition: coroapi.h:477
void cs_ipcs_sync_state_changed(int32_t sync_in_process)
Definition: ipc_glue.c:753
int corosync_sending_allowed(unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data)
Definition: main.c:750
void logsys_blackbox_prefork(void)
Definition: logsys.c:872
void icmap_fini(void)
Finalize global icmap.
Definition: icmap.c:263
int main_mcast(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
Definition: main.c:635
void cs_ipc_allow_connections(int32_t allow)
Definition: ipc_glue.c:155
void corosync_service_unlink_all(struct corosync_api_v1 *api, void(*unlink_all_complete)(void))
Unlink and exit all corosync services.
Definition: service.c:394
uint64_t operational_entered
Definition: totem.h:265
void(* sync_activate)(void)
Definition: sync.h:46
LOGSYS_DECLARE_SYSTEM("corosync", LOGSYS_MODE_OUTPUT_STDERR|LOGSYS_MODE_OUTPUT_SYSLOG, LOG_DAEMON, LOG_EMERG)
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
void(*) in log_level_security)
Definition: totem.h:85
void totempg_service_ready_register(void(*totem_service_ready)(void))
Definition: totempg.c:1530
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
int latest_token
Definition: totem.h:279
uint64_t operational_token_lost
Definition: totem.h:266
#define MAX_NO_CONT_GATHER
Maximum number of continuous gather states.
Definition: totem.h:61
uint64_t consensus_timeouts
Definition: totem.h:273
int totem_config_keyread(struct totem_config *totem_config, const char **error_string)
Definition: totemconfig.c:1523
const char * name
Definition: sync.h:48
uint64_t recovery_token_lost
Definition: totem.h:272
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:71
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
struct corosync_exec_handler * exec_engine
Definition: coroapi.h:507
unsigned int totempg_my_nodeid_get(void)
Definition: totempg.c:1521
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:613
pthread_mutex_t lock
Definition: sam.c:131
uint64_t commit_token_lost
Definition: totem.h:270
void * user_data
Definition: sam.c:127
uint64_t token_hold_cancel_rx
Definition: totem.h:264
int logsys_thread_start(void)
logsys_thread_start
Definition: logsys.c:836
int sync_init(int(*sync_callbacks_retrieve)( int service_id, struct sync_callbacks *callbacks), void(*synchronization_completed)(void))
Definition: sync.c:158
int(* sync_process)(void)
Definition: coroapi.h:522
int totem_config_validate(struct totem_config *totem_config, const char **error_string)
Definition: totemconfig.c:1337
#define TOTEM_CONFIG_BINDNETADDR_NODELIST_SET
Definition: totemconfig.h:49
#define LOGSYS_MODE_OUTPUT_SYSLOG
Definition: logsys.h:60
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
int totempg_groups_joined_reserve(void *instance, const struct iovec *iovec, unsigned int iov_len)
Definition: totempg.c:1311
const void * group
Definition: totempg.h:56
cs_error_t icmap_set_float(const char *key_name, float value)
Definition: icmap.c:631
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:70
Linked list API.
struct totem_ip_address rep
Definition: coroapi.h:123
void sync_save_transitional(const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id)
Definition: sync.c:520
int totempg_groups_mcast_joined(void *instance, const struct iovec *iovec, unsigned int iov_len, int guarantee)
Definition: totempg.c:1241
const char * service_stats_tx[SERVICES_COUNT_MAX][SERVICE_HANDLER_MAXIMUM_COUNT]
Definition: service.c:113
uint64_t mcast_retx
Definition: totem.h:259
cs_error_t icmap_get(const char *key_name, void *value, size_t *value_len, icmap_value_types_t *type)
Retrieve value of key key_name and store it in user preallocated value pointer.
Definition: icmap.c:741
int corosync_log_config_read(cmap_handle_t cmap_h, const char *default_logfile, const char **error_string)
Definition: logconfig.c:689
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
void cs_ipcs_init(void)
Definition: ipc_glue.c:896
totemmrp_stats_t * mrp
Definition: totem.h:295
The corosync_api_v1 struct.
Definition: coroapi.h:225
typedef __attribute__
#define TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED
Definition: totemconfig.h:47
uint32_t msg_queue_avail
Definition: totem.h:297
uint64_t rx_msg_dropped
Definition: totem.h:274
void(* log_printf)(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format,...) __attribute__((format(printf
Definition: totem.h:78
#define TOTEM_CONFIG_WARNING_MEMBERS_IGNORED
Definition: totemconfig.h:46
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
uint32_t continuous_gather
Definition: totem.h:275
cs_error_t icmap_set_uint64(const char *key_name, uint64_t value)
Definition: icmap.c:625
totemrrp_stats_t * rrp
Definition: totem.h:251
cs_error_t icmap_fast_inc(const char *key_name)
Increase stored value by one.
Definition: icmap.c:1085
#define MILLI_2_NANO_SECONDS
Definition: coroapi.h:105
void message_source_set(mar_message_source_t *source, void *conn)
Definition: main.c:815
void logsys_config_apply(void)
logsys_config_apply
Definition: logsys.c:784
uint64_t token_hold_cancel_tx
Definition: totem.h:263
void(* totem_memb_ring_id_create_or_load)(struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr)
Definition: totem.h:198
int ip_version
Definition: totem.h:192
unsigned int token_timeout
Definition: totem.h:132
void totempg_check_q_level(void *instance)
Definition: totempg.c:1303
uint64_t mcast_tx
Definition: totem.h:258
const char * service_stats_rx[SERVICES_COUNT_MAX][SERVICE_HANDLER_MAXIMUM_COUNT]
Definition: service.c:112
void(* exec_endian_convert_fn)(void *msg)
Definition: coroapi.h:478
The memb_ring_id struct.
Definition: coroapi.h:122
struct corosync_api_v1 * apidef_get(void)
Definition: apidef.c:147
void corosync_sending_allowed_release(void *sending_allowed_private_data)
Definition: main.c:793
void cs_ipcs_stats_update(void)
Definition: ipc_glue.c:759
uint64_t memb_merge_detect_tx
Definition: totem.h:254
uint32_t msg_reserved
Definition: totem.h:296
totemsrp_stats_t * srp
Definition: totem.h:290
#define SERVICES_COUNT_MAX
Definition: coroapi.h:463
unsigned long long tv_prev
Definition: main.c:828
const char *(* totem_ifaces_print)(unsigned int nodeid)
Definition: coroapi.h:291
uint64_t memb_join_rx
Definition: totem.h:257
void corosync_state_dump(void)
Definition: main.c:192
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
int cs_poll_dispatch_delete(qb_loop_t *handle, int fd)
Definition: main.c:187
e_corosync_done
Definition: exec/util.h:47
int message_source_is_local(const mar_message_source_t *source)
Definition: main.c:804
struct totem_logging_configuration totem_logging_configuration
Definition: totem.h:166
int totempg_groups_join(void *instance, const struct totempg_group *groups, size_t group_cnt)
Definition: totempg.c:1191
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:72
unsigned long long seq
Definition: coroapi.h:124
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
Definition: icmap.c:589
uint64_t recovery_entered
Definition: totem.h:271
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
Definition: icmap.c:1241
int totempg_initialize(qb_loop_t *poll_handle, struct totem_config *totem_config)
Initialize the totem process groups abstraction.
Definition: totempg.c:815
void corosync_shutdown_request(void)
Definition: main.c:240
uint32_t interface_count
Definition: totem.h:239
struct sched_param global_sched_param
Definition: main.c:161
unsigned int nodeid
Definition: coroapi.h:75
uint32_t continuous_sendmsg_failures
Definition: totem.h:276
uint8_t * faulty
Definition: totem.h:238
struct memb_ring_id ring_id
Definition: totemsrp.c:64
int main(int argc, char **argv, char **envp)
Definition: main.c:1232
cs_error_t icmap_init(void)
Initialize global icmap.
Definition: icmap.c:223
uint64_t commit_entered
Definition: totem.h:269
#define TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED
Definition: totemconfig.h:48
#define LOGSYS_MODE_OUTPUT_STDERR
Definition: logsys.h:59
uint64_t orf_token_rx
Definition: totem.h:253
void(* totem_memb_ring_id_store)(const struct memb_ring_id *memb_ring_id, const struct totem_ip_address *addr)
Definition: totem.h:202
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1175
int totem_config_read(struct totem_config *totem_config, const char **error_string, uint64_t *warnings)
Definition: totemconfig.c:1039
int(* sync_process)(void)
Definition: sync.h:45
The mar_message_source_t struct.
Definition: coroapi.h:50
void logsys_system_fini(void)
logsys_system_fini
Definition: logsys.c:287