Not the answer you need?
Register and ask your own question!

Restart Node fails (ubuntu-16.04.3, percona-xtradb-cluster 5.7.19-29.22-3.xenial)

romanvgromanvg EntrantInactive User Role Beginner
Hi everyone,

i am currently testing a 2node+arbitrator setup and am running into some problems when restarting a single node.
After all nodes have joined the cluster, if i restart a single one, it fails to start again and only shows

" WSREP: Failed to recover position:"

After cleaning out the whole datadir it will rejoin the cluster just fine.


wsrep.conf
[mysqld]
# if cluster is shutdown, and restarted in reverse, try to use IST instead of full SST
# https://www.percona.com/blog/2016/11/30/galera-cache-gcache-finally-recoverable-restart/
#wsrep_provider_options="gcache.size=3G"

# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links           = 0

# Path to Galera library
wsrep_provider           =/usr/lib/galera3/libgalera_smm.so

# In order for Galera to work correctly binlog format should be ROW
binlog_format            = ROW

# MyISAM storage engine has only experimental support
default_storage_engine   = InnoDB

# Slave thread to use
#wsrep_slave_threads      = 16

wsrep_log_conflicts

# This changes how InnoDB autoincrement locks are managed and is a requirement for Galera
innodb_autoinc_lock_mode =2

# Node IP address
wsrep_node_address       =  10.10.0.131

# Cluster name
wsrep_cluster_name       = percona_cluster_fra

#If wsrep_node_name is not specified,  then system hostname will be used
wsrep_node_name          = db03

#pxc_strict_mode allowed values: DISABLED,PERMISSIVE,ENFORCING,MASTER
# TODO get devs to fix stuff and switch to ENFORCING
pxc_strict_mode          = ENFORCING

# SST method
wsrep_sst_method         = xtrabackup-v2

#Authentication for SST method
wsrep_sst_auth           = "xtrabackup:password"

# Cluster connection URL contains IPs of nodes
#If no IP is found, this implies that a new cluster needs to be created,
#in order to do that you need to bootstrap this node
wsrep_cluster_address   = gcomm://10.10.0.131,10.10.0.132,10.10.0.101


#wsrep_notify_cmd        = /usr/local/bin/galeranotify.py

my.cnf
# Ansible managed
#
# change mysql-prompt
[mysql]
prompt                            =\u@db03:[\d]>\_

# Template my.cnf for PXC
# Edit to your requirements.
[mysqld]
user                              = mysql
server-id                         = 3
datadir                           = /data/mysql
tmpdir                            = /tmp
socket                            = /var/run/mysqld/mysqld.sock
log-error                         = /var/log/mysqld.log
pid-file                          = /var/run/mysqld/mysqld.pid

skip-name-resolve

# deactivated bc/keepalived
# bind_address                    = 10.10.0.131

enforce_gtid_consistency          = 1
gtid_mode                         = on

# set buffer to 70%
innodb_buffer_pool_size           = 90069M
innodb_file_per_table             = ON
innodb_flush_log_at_trx_commit    = 2

# Logging
log-bin                           = mysql-bin
max_binlog_size                   = 300000000
log_slave_updates
slow-query-log                    = true
slow_query_log_file          = /var/log/mysql/mysql-slow.log


long_query_time                   = 1
log_error_verbosity               = 2
expire_logs_days                  = 4

log_output                        = file
slow_query_log                    = ON
long_query_time                   = 1
log_slow_rate_limit               = 100
#log_slow_rate_type               = query
log_slow_verbosity                = full
log_slow_admin_statements         = ON
log_slow_slave_statements         = ON
slow_query_log_always_write_time  = 1
slow_query_log_use_global_control = all
innodb_monitor_enable             = all
userstat                          = 1

explicit_defaults_for_timestamp   = 1

# # --------------------------------------------------------------------------------
event_scheduler                   = 1
max_connect_errors                = 16385  # block server after this many unsuccessful connections

# # slave-replication
# #slave_net_timeout              = 60
# #binlog_cache_size              = 2M
# #binlog_stmt_cache_size         = 2M

# # Threading / Processes
# #
thread_cache_size                 = 1024
max_connections                   = 8192
back_log                          = 512   # default 50 (max. = net.ipv4.tcp_max_syn_backlog = 2048)

# #
# # ThreadPool
# #
thread_handling                   = pool-of-threads
thread_pool_size                  = 26  # default # of CPUs
thread_pool_stall_limit           = 500                   # default 500 (ms)
# thread_pool_max_threads         = 500                   # default 500
# thread_pool_idle_timeout        = 60                    # default 60 (s)

# #
# # Query cache
# #
query_cache_limit                 = 16M
query_cache_size                  = 1M

# #
# # All storage engines
# #
tmp_table_size                    = 8192M
max_heap_table_size               = 8192M
table_open_cache           = 4000

Anyone know what the error might be?

regards,
Roman

Comments

  • romanvgromanvg Entrant Inactive User Role Beginner
    Ok, found the error myself.. seems to be a bug, related to the log_error_verbosity. After initial join restart fails if <=2. So set it to 3 and it works.
Sign In or Register to comment.

MySQL, InnoDB, MariaDB and MongoDB are trademarks of their respective owners.
Copyright ©2005 - 2020 Percona LLC. All rights reserved.