Hi everyone,
i am currently testing a 2node+arbitrator setup and am running into some problems when restarting a single node.
After all nodes have joined the cluster, if i restart a single one, it fails to start again and only shows
" WSREP: Failed to recover position:"
After cleaning out the whole datadir it will rejoin the cluster just fine.
wsrep.conf
[mysqld]
# if cluster is shutdown, and restarted in reverse, try to use IST instead of full SST
# https://www.percona.com/blog/2016/11/30/galera-cache-gcache-finally-recoverable-restart/
#wsrep_provider_options="gcache.size=3G"
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links = 0
# Path to Galera library
wsrep_provider =/usr/lib/galera3/libgalera_smm.so
# In order for Galera to work correctly binlog format should be ROW
binlog_format = ROW
# MyISAM storage engine has only experimental support
default_storage_engine = InnoDB
# Slave thread to use
#wsrep_slave_threads = 16
wsrep_log_conflicts
# This changes how InnoDB autoincrement locks are managed and is a requirement for Galera
innodb_autoinc_lock_mode =2
# Node IP address
wsrep_node_address = 10.10.0.131
# Cluster name
wsrep_cluster_name = percona_cluster_fra
#If wsrep_node_name is not specified, then system hostname will be used
wsrep_node_name = db03
#pxc_strict_mode allowed values: DISABLED,PERMISSIVE,ENFORCING,MASTER
# TODO get devs to fix stuff and switch to ENFORCING
pxc_strict_mode = ENFORCING
# SST method
wsrep_sst_method = xtrabackup-v2
#Authentication for SST method
wsrep_sst_auth = "xtrabackup:password"
# Cluster connection URL contains IPs of nodes
#If no IP is found, this implies that a new cluster needs to be created,
#in order to do that you need to bootstrap this node
wsrep_cluster_address = gcomm://10.10.0.131,10.10.0.132,10.10.0.101
#wsrep_notify_cmd = /usr/local/bin/galeranotify.py
my.cnf
# Ansible managed
#
# change mysql-prompt
[mysql]
prompt =\u@db03:[\d]>\_
# Template my.cnf for PXC
# Edit to your requirements.
[mysqld]
user = mysql
server-id = 3
datadir = /data/mysql
tmpdir = /tmp
socket = /var/run/mysqld/mysqld.sock
log-error = /var/log/mysqld.log
pid-file = /var/run/mysqld/mysqld.pid
skip-name-resolve
# deactivated bc/keepalived
# bind_address = 10.10.0.131
enforce_gtid_consistency = 1
gtid_mode = on
# set buffer to 70%
innodb_buffer_pool_size = 90069M
innodb_file_per_table = ON
innodb_flush_log_at_trx_commit = 2
# Logging
log-bin = mysql-bin
max_binlog_size = 300000000
log_slave_updates
slow-query-log = true
slow_query_log_file = /var/log/mysql/mysql-slow.log
long_query_time = 1
log_error_verbosity = 2
expire_logs_days = 4
log_output = file
slow_query_log = ON
long_query_time = 1
log_slow_rate_limit = 100
#log_slow_rate_type = query
log_slow_verbosity = full
log_slow_admin_statements = ON
log_slow_slave_statements = ON
slow_query_log_always_write_time = 1
slow_query_log_use_global_control = all
innodb_monitor_enable = all
userstat = 1
explicit_defaults_for_timestamp = 1
# # --------------------------------------------------------------------------------
event_scheduler = 1
max_connect_errors = 16385 # block server after this many unsuccessful connections
# # slave-replication
# #slave_net_timeout = 60
# #binlog_cache_size = 2M
# #binlog_stmt_cache_size = 2M
# # Threading / Processes
# #
thread_cache_size = 1024
max_connections = 8192
back_log = 512 # default 50 (max. = net.ipv4.tcp_max_syn_backlog = 2048)
# #
# # ThreadPool
# #
thread_handling = pool-of-threads
thread_pool_size = 26 # default # of CPUs
thread_pool_stall_limit = 500 # default 500 (ms)
# thread_pool_max_threads = 500 # default 500
# thread_pool_idle_timeout = 60 # default 60 (s)
# #
# # Query cache
# #
query_cache_limit = 16M
query_cache_size = 1M
# #
# # All storage engines
# #
tmp_table_size = 8192M
max_heap_table_size = 8192M
table_open_cache = 4000
Anyone know what the error might be?
regards,
Roman