Hello everyone,
I’m using official percona xtradb cluster docker image tag 5.7
I’ve tried to migrate from 5.7 to 8.0 to no success.
I’ve stopped all nodes, bumped tag of first node to 8.0 started it but it was complaining about previous unclean shutdown.
I figured i needed to add
stop_grace_period: 1m30s
to my docker-compose.yaml and that did the trick, it successfully upgraded first node to 8.0
However when i tried to join second node, that’s where things fail.
This is log file from second node when it tries to join:
2023-10-22T04:32:37.637728Z 0 [Warning] [MY-011068] [Server] The syntax '--skip-host-cache' is deprecated and will be removed in a future release. Please use SET GLOBAL host_cache_size=0 instead.
2023-10-22T04:32:37.637767Z 0 [Warning] [MY-011068] [Server] The syntax 'wsrep_slave_threads' is deprecated and will be removed in a future release. Please use wsrep_applier_threads instead.
2023-10-22T04:32:37.637823Z 0 [Warning] [MY-011068] [Server] The syntax 'expire-logs-days' is deprecated and will be removed in a future release. Please use binlog_expire_logs_seconds instead.
2023-10-22T04:32:37.642708Z 0 [Warning] [MY-010097] [Server] Insecure configuration for --secure-log-path: Current value does not restrict location of generated files. Consider setting it to a valid, non-empty path.
2023-10-22T04:32:37.643686Z 0 [Warning] [MY-010918] [Server] 'default_authentication_plugin' is deprecated and will be removed in a future release. Please use authentication_policy instead.
2023-10-22T04:32:37.643725Z 0 [System] [MY-010116] [Server] /usr/sbin/mysqld (mysqld 8.0.33-25.1) starting as process 1
2023-10-22T04:32:37.652606Z 0 [Note] [MY-000000] [Galera] Loading provider /usr/lib64/galera4/libgalera_smm.so initial position: 00000000-0000-0000-0000-000000000000:-1
2023-10-22T04:32:37.652658Z 0 [Note] [MY-000000] [Galera] wsrep_load(): loading provider library '/usr/lib64/galera4/libgalera_smm.so'
2023-10-22T04:32:37.653566Z 0 [Note] [MY-000000] [Galera] wsrep_load(): Galera 4.15(6983fb2) by Codership Oy <info@codership.com> (modified by Percona <https://percona.com/>) loaded successfully.
2023-10-22T04:32:37.653644Z 0 [Note] [MY-000000] [Galera] CRC-32C: using 64-bit x86 acceleration.
2023-10-22T04:32:37.653932Z 0 [Warning] [MY-000000] [Galera] Could not open state file for reading: '/var/lib/mysql//grastate.dat'
2023-10-22T04:32:37.653967Z 0 [Warning] [MY-000000] [Galera] No persistent state found. Bootstraping with default state
2023-10-22T04:32:37.654030Z 0 [Note] [MY-000000] [Galera] Found saved state: 00000000-0000-0000-0000-000000000000:-1, safe_to_bootstrap: 1
2023-10-22T04:32:37.654530Z 0 [Note] [MY-000000] [Galera] Generated new GCache ID: 07643e0b-7094-11ee-ac3c-e7787fbc2276
2023-10-22T04:32:37.654574Z 0 [Note] [MY-000000] [Galera] GCache DEBUG: opened preamble:
Version: 0
UUID: 00000000-0000-0000-0000-000000000000
Seqno: -1 - -1
Offset: -1
Synced: 0
EncVersion: 0
Encrypted: 0
MasterKeyConst UUID: 07643e0b-7094-11ee-ac3c-e7787fbc2276
MasterKey UUID: 00000000-0000-0000-0000-000000000000
MasterKey ID: 0
2023-10-22T04:32:37.654593Z 0 [Note] [MY-000000] [Galera] Skipped GCache ring buffer recovery: could not determine history UUID.
2023-10-22T04:32:37.659556Z 0 [Note] [MY-000000] [Galera] Passing config to GCS: allocator.disk_pages_encryption = no; allocator.encryption_cache_page_size = 32K; allocator.encryption_cache_size = 16777216; base_dir = /var/lib/mysql/; base_host = elk2; base_port = 4567; cert.log_conflicts = no; cert.optimistic_pa = no; debug = no; evs.auto_evict = 0; evs.delay_margin = PT1S; evs.delayed_keep_period = PT30S; evs.inactive_check_period = PT0.5S; evs.inactive_timeout = PT15S; evs.join_retrans_period = PT1S; evs.max_install_timeouts = 3; evs.send_window = 10; evs.stats_report_period = PT1M; evs.suspect_timeout = PT5S; evs.user_send_window = 4; evs.view_forget_timeout = PT24H; gcache.dir = /var/lib/mysql/; gcache.encryption = no; gcache.encryption_cache_page_size = 32K; gcache.encryption_cache_size = 16777216; gcache.freeze_purge_at_seqno = -1; gcache.keep_pages_count = 0; gcache.keep_pages_size = 0; gcache.mem_size = 0; gcache.name = galera.cache; gcache.page_size = 128M; gcache.recover = yes; gcache.size = 128M; gcomm.thread_prio = ; gcs.fc_auto_evict_threshold = 0.75; gcs.fc_auto_evict_window = 0; gcs.fc_debug = 0; gcs.fc_factor = 1.0; gcs.fc_limit = 100; gcs.fc_master_slave = no; gcs.fc_single_primary = no; gcs.max_packet_size = 64500; gcs.max_throttle = 0.25; gcs.recv_q_hard_limit = 9223372036854775807; gcs.recv_q_soft_limit = 0.25; gcs.sync_donor = no; gmcast.segment = 0; gmcast.version = 0; pc.announce_timeout = PT3S; pc.checksum = false; pc.ignore_quorum = false; pc.ignore_sb = false; pc.npvo = false; pc.recovery = true; pc.version = 0; pc.wait_prim = true; pc.wait_prim_timeout = PT30S; pc.wait_restored_prim_timeout = PT0S; pc.weight = 1; protonet.backend = asio; protonet.version = 0; repl.causal_read_timeout = PT30S; repl.commit_order = 3; repl.key_format = FLAT8; repl.max_ws_size = 2147483647; repl.proto_max = 10; socket.checksum = 2; socket.recv_buf_size = auto; socket.send_buf_size = auto;
2023-10-22T04:32:37.691674Z 0 [Note] [MY-000000] [WSREP] Starting replication
2023-10-22T04:32:37.691741Z 0 [Note] [MY-000000] [Galera] Connecting with bootstrap option: 0
2023-10-22T04:32:37.691762Z 0 [Note] [MY-000000] [Galera] Setting GCS initial position to 00000000-0000-0000-0000-000000000000:-1
2023-10-22T04:32:37.691829Z 0 [Note] [MY-000000] [Galera] protonet asio version 0
2023-10-22T04:32:37.691897Z 0 [Note] [MY-000000] [Galera] Using CRC-32C for message checksums.
2023-10-22T04:32:37.691928Z 0 [Note] [MY-000000] [Galera] backend: asio
2023-10-22T04:32:37.692033Z 0 [Note] [MY-000000] [Galera] gcomm thread scheduling priority set to other:0
2023-10-22T04:32:37.692207Z 0 [Note] [MY-000000] [Galera] Fail to access the file (/var/lib/mysql//gvwstate.dat) error (No such file or directory). It is possible if node is booting for first time or re-booting after a graceful shutdown
2023-10-22T04:32:37.692241Z 0 [Note] [MY-000000] [Galera] Restoring primary-component from disk failed. Either node is booting for first time or re-booting after a graceful shutdown
2023-10-22T04:32:37.692475Z 0 [Note] [MY-000000] [Galera] GMCast version 0
2023-10-22T04:32:37.696821Z 0 [Note] [MY-000000] [Galera] (076a0235-b121, 'tcp://0.0.0.0:4567') listening at tcp://0.0.0.0:4567
2023-10-22T04:32:37.696882Z 0 [Note] [MY-000000] [Galera] (076a0235-b121, 'tcp://0.0.0.0:4567') multicast: , ttl: 1
2023-10-22T04:32:37.697402Z 0 [Note] [MY-000000] [Galera] EVS version 1
2023-10-22T04:32:37.697514Z 0 [Note] [MY-000000] [Galera] gcomm: connecting to group 'company-pxe-cluster1', peer 'elk1:'
2023-10-22T04:32:37.699224Z 0 [Note] [MY-000000] [Galera] (076a0235-b121, 'tcp://0.0.0.0:4567') connection established to cf763568-83b6 tcp://172.31.28.146:4567
2023-10-22T04:32:37.699353Z 0 [Note] [MY-000000] [Galera] (076a0235-b121, 'tcp://0.0.0.0:4567') turning message relay requesting on, nonlive peers:
2023-10-22T04:32:37.839364Z 0 [Note] [MY-000000] [Galera] EVS version upgrade 0 -> 1
2023-10-22T04:32:37.839445Z 0 [Note] [MY-000000] [Galera] declaring cf763568-83b6 at tcp://172.31.28.146:4567 stable
2023-10-22T04:32:37.839470Z 0 [Note] [MY-000000] [Galera] PC protocol upgrade 0 -> 1
2023-10-22T04:32:37.839858Z 0 [Note] [MY-000000] [Galera] Node cf763568-83b6 state primary
2023-10-22T04:32:37.840068Z 0 [Note] [MY-000000] [Galera] Current view of cluster as seen by this node
view (view_id(PRIM,076a0235-b121,2)
memb {
076a0235-b121,0
cf763568-83b6,0
}
joined {
}
left {
}
partitioned {
}
)
2023-10-22T04:32:37.840096Z 0 [Note] [MY-000000] [Galera] Save the discovered primary-component to disk
2023-10-22T04:32:38.198083Z 0 [Note] [MY-000000] [Galera] gcomm: connected
2023-10-22T04:32:38.198214Z 0 [Note] [MY-000000] [Galera] Changing maximum packet size to 64500, resulting msg size: 32636
2023-10-22T04:32:38.198319Z 0 [Note] [MY-000000] [Galera] Shifting CLOSED -> OPEN (TO: 0)
2023-10-22T04:32:38.198334Z 0 [Note] [MY-000000] [Galera] Opened channel 'company-pxe-cluster1'
2023-10-22T04:32:38.198486Z 0 [Note] [MY-000000] [Galera] New COMPONENT: primary = yes, bootstrap = no, my_idx = 0, memb_num = 2
2023-10-22T04:32:38.204579Z 0 [Note] [MY-000000] [Galera] STATE_EXCHANGE: sent state UUID: 07b742ae-7094-11ee-abdd-b2e1030783c9
2023-10-22T04:32:38.208728Z 0 [Note] [MY-000000] [Galera] STATE EXCHANGE: sent state msg: 07b742ae-7094-11ee-abdd-b2e1030783c9
2023-10-22T04:32:38.208809Z 1 [Note] [MY-000000] [WSREP] Starting rollbacker thread 1
2023-10-22T04:32:38.208893Z 2 [Note] [MY-000000] [WSREP] Starting applier thread 2
2023-10-22T04:32:38.209145Z 0 [Note] [MY-000000] [Galera] STATE EXCHANGE: got state msg: 07b742ae-7094-11ee-abdd-b2e1030783c9 from 0 (114764290eca)
2023-10-22T04:32:38.209199Z 0 [Note] [MY-000000] [Galera] STATE EXCHANGE: got state msg: 07b742ae-7094-11ee-abdd-b2e1030783c9 from 1 (bdf3641d2950)
2023-10-22T04:32:38.209213Z 0 [Note] [MY-000000] [Galera] Quorum results:
version = 6,
component = PRIMARY,
conf_id = 1,
members = 1/2 (primary/total),
act_id = 1,
last_appl. = 0,
protocols = 2/10/4 (gcs/repl/appl),
vote policy= 0,
group UUID = 031dc267-7092-11ee-bf55-f7e51dd58c7c
2023-10-22T04:32:38.209271Z 0 [Note] [MY-000000] [Galera] Flow-control interval: [141, 141]
2023-10-22T04:32:38.209281Z 0 [Note] [MY-000000] [Galera] Shifting OPEN -> PRIMARY (TO: 2)
2023-10-22T04:32:38.209594Z 2 [Note] [MY-000000] [Galera] ####### processing CC 2, local, ordered
2023-10-22T04:32:38.209625Z 2 [Note] [MY-000000] [Galera] Maybe drain monitors from -1 upto current CC event 2 upto:-1
2023-10-22T04:32:38.209634Z 2 [Note] [MY-000000] [Galera] Drain monitors from -1 up to -1
2023-10-22T04:32:38.209648Z 2 [Note] [MY-000000] [Galera] Process first view: 031dc267-7092-11ee-bf55-f7e51dd58c7c my uuid: 076a0235-7094-11ee-b121-76308c277c2e
2023-10-22T04:32:38.209668Z 2 [Note] [MY-000000] [Galera] Server 114764290eca connected to cluster at position 031dc267-7092-11ee-bf55-f7e51dd58c7c:2 with ID 076a0235-7094-11ee-b121-76308c277c2e
2023-10-22T04:32:38.209678Z 2 [Note] [MY-000000] [WSREP] Server status change disconnected -> connected
2023-10-22T04:32:38.209714Z 2 [Note] [MY-000000] [WSREP] wsrep_notify_cmd is not defined, skipping notification.
2023-10-22T04:32:38.209747Z 2 [Note] [MY-000000] [Galera] ####### My UUID: 076a0235-7094-11ee-b121-76308c277c2e
2023-10-22T04:32:38.209760Z 2 [Note] [MY-000000] [Galera] Cert index reset to 00000000-0000-0000-0000-000000000000:-1 (proto: 10), state transfer needed: yes
2023-10-22T04:32:38.211856Z 0 [Note] [MY-000000] [Galera] Service thread queue flushed.
2023-10-22T04:32:38.211972Z 2 [Note] [MY-000000] [Galera] ####### Assign initial position for certification: 00000000-0000-0000-0000-000000000000:-1, protocol version: -1
2023-10-22T04:32:38.212002Z 2 [Note] [MY-000000] [Galera] State transfer required:
Group state: 031dc267-7092-11ee-bf55-f7e51dd58c7c:2
Local state: 00000000-0000-0000-0000-000000000000:-1
2023-10-22T04:32:38.212018Z 2 [Note] [MY-000000] [WSREP] Server status change connected -> joiner
2023-10-22T04:32:38.212030Z 2 [Note] [MY-000000] [WSREP] wsrep_notify_cmd is not defined, skipping notification.
2023-10-22T04:32:38.212509Z 0 [Note] [MY-000000] [WSREP] Initiating SST/IST transfer on JOINER side (wsrep_sst_xtrabackup-v2 --role 'joiner' --address 'elk2' --datadir '/var/lib/mysql/' --basedir '/usr/' --plugindir '/usr/lib64/mysql/plugin/' --defaults-file '/etc/my.cnf' --defaults-group-suffix '' --parent '1' --mysqld-version '8.0.33-25.1' '' )
2023-10-22T04:32:39.111163Z 2 [Note] [MY-000000] [WSREP] Prepared SST request: xtrabackup-v2|elk2:4444/xtrabackup_sst//1
2023-10-22T04:32:39.111234Z 2 [Note] [MY-000000] [Galera] Check if state gap can be serviced using IST
2023-10-22T04:32:39.111251Z 2 [Note] [MY-000000] [Galera] Local UUID: 00000000-0000-0000-0000-000000000000 != Group UUID: 031dc267-7092-11ee-bf55-f7e51dd58c7c
2023-10-22T04:32:39.111263Z 2 [Note] [MY-000000] [Galera] ####### IST uuid:00000000-0000-0000-0000-000000000000 f: 0, l: 2, STRv: 3
2023-10-22T04:32:39.111331Z 2 [Note] [MY-000000] [Galera] IST receiver addr using tcp://elk2:4568
2023-10-22T04:32:39.113517Z 2 [Note] [MY-000000] [Galera] State gap can't be serviced using IST. Switching to SST
2023-10-22T04:32:39.113569Z 2 [Warning] [MY-000000] [Galera] Failed to prepare for incremental state transfer: Failed to open IST listener at tcp://elk2:4568', asio error 'Failed to listen: bind: Cannot assign requested address: 99 (Cannot assign requested address)
at /mnt/jenkins/workspace/pxc80-autobuild-RELEASE/test/rpmbuild/BUILD/Percona-XtraDB-Cluster-8.0.33/percona-xtradb-cluster-galera/galerautils/src/gu_asio_stream_react.cpp:listen():788': 99 (Cannot assign requested address)
at /mnt/jenkins/workspace/pxc80-autobuild-RELEASE/test/rpmbuild/BUILD/Percona-XtraDB-Cluster-8.0.33/percona-xtradb-cluster-galera/galera/src/ist.cpp:prepare():349. IST will be unavailable.
2023-10-22T04:32:39.114573Z 0 [Note] [MY-000000] [Galera] Member 0.0 (114764290eca) requested state transfer from '*any*'. Selected 1.0 (bdf3641d2950)(SYNCED) as donor.
2023-10-22T04:32:39.114631Z 0 [Note] [MY-000000] [Galera] Shifting PRIMARY -> JOINER (TO: 2)
2023-10-22T04:32:39.114670Z 2 [Note] [MY-000000] [Galera] Requesting state transfer: success, donor: 1
2023-10-22T04:32:39.114756Z 2 [Note] [MY-000000] [Galera] Resetting GCache seqno map due to different histories.
2023-10-22T04:32:39.117347Z 2 [Note] [MY-000000] [Galera] GCache history reset: 00000000-0000-0000-0000-000000000000:0 -> 031dc267-7092-11ee-bf55-f7e51dd58c7c:2
2023-10-22T04:32:39.115392Z 0 [Warning] [MY-000000] [Galera] 1.0 (bdf3641d2950): State transfer to 0.0 (114764290eca) failed: -42 (No message of desired type)
2023-10-22T04:32:39.119291Z 0 [ERROR] [MY-000000] [Galera] /mnt/jenkins/workspace/pxc80-autobuild-RELEASE/test/rpmbuild/BUILD/Percona-XtraDB-Cluster-8.0.33/percona-xtradb-cluster-galera/gcs/src/gcs_group.cpp:gcs_group_handle_join_msg():1216: Will never receive state. Need to abort.
2023-10-22T04:32:39.119316Z 0 [Note] [MY-000000] [Galera] gcomm: terminating thread
2023-10-22T04:32:39.119342Z 0 [Note] [MY-000000] [Galera] gcomm: joining thread
2023-10-22T04:32:39.119613Z 0 [Note] [MY-000000] [Galera] gcomm: closing backend
2023-10-22T04:32:40.124588Z 0 [Note] [MY-000000] [Galera] Current view of cluster as seen by this node
view (view_id(NON_PRIM,076a0235-b121,2)
memb {
076a0235-b121,0
}
joined {
}
left {
}
partitioned {
cf763568-83b6,0
}
)
2023-10-22T04:32:40.124670Z 0 [Note] [MY-000000] [Galera] PC protocol downgrade 1 -> 0
2023-10-22T04:32:40.124683Z 0 [Note] [MY-000000] [Galera] Current view of cluster as seen by this node
view ((empty))
2023-10-22T04:32:40.124856Z 0 [Note] [MY-000000] [Galera] gcomm: closed
2023-10-22T04:32:40.124878Z 0 [Note] [MY-000000] [Galera] mysqld: Terminated.
2023-10-22T04:32:40.124887Z 0 [Note] [MY-000000] [WSREP] Initiating SST cancellation
2023-10-22T04:32:40.124894Z 0 [Note] [MY-000000] [WSREP] Terminating SST process
2023-10-22T04:32:40Z UTC - mysqld got signal 11 ;
Most likely, you have hit a bug, but this error can also be caused by malfunctioning hardware.
BuildID[sha1]=1794b64918b6420e8430816186ecb12644628136
Server Version: 8.0.33-25.1 Percona XtraDB Cluster (GPL), Release rel25, Revision 0c56202, WSREP version 26.1.4.3, wsrep_26.1.4.3
Thread pointer: 0x0
Attempting backtrace. You can use the following information to find out
where mysqld died. If you see no messages after this, something went
terribly wrong...
stack_bottom = 0 thread_stack 0x100000
/usr/sbin/mysqld(my_print_stacktrace(unsigned char const*, unsigned long)+0x41) [0x22683b1]
/usr/sbin/mysqld(print_fatal_signal(int)+0x39f) [0x127293f]
/usr/sbin/mysqld(handle_fatal_signal+0xd8) [0x1272a28]
/lib64/libpthread.so.0(+0x12cf0) [0x7fdfec856cf0]
/lib64/libc.so.6(abort+0x203) [0x7fdfeabd8f81]
/usr/lib64/galera4/libgalera_smm.so(+0x41bd1) [0x7fdfddf9cbd1]
/usr/lib64/galera4/libgalera_smm.so(+0xc0af1) [0x7fdfde01baf1]
/usr/lib64/galera4/libgalera_smm.so(+0xb7402) [0x7fdfde012402]
/lib64/libpthread.so.0(+0x81ca) [0x7fdfec84c1ca]
/lib64/libc.so.6(clone+0x43) [0x7fdfeabf0e73]
You may download the Percona XtraDB Cluster operations manual by visiting
http://www.percona.com/software/percona-xtradb-cluster/. You may find information
in the manual which will help you identify the cause of the crash.
I see this note:
2023-10-22T04:32:39.113569Z 2 [Warning] [MY-000000] [Galera] Failed to prepare for incremental state transfer: Failed to open IST listener at tcp://elk2:4568', asio error 'Failed to listen: bind: Cannot assign requested address: 99 (Cannot assign requested address)
at /mnt/jenkins/workspace/pxc80-autobuild-RELEASE/test/rpmbuild/BUILD/Percona-XtraDB-Cluster-8.0.33/percona-xtradb-cluster-galera/galerautils/src/gu_asio_stream_react.cpp:listen():788': 99 (Cannot assign requested address)
at /mnt/jenkins/workspace/pxc80-autobuild-RELEASE/test/rpmbuild/BUILD/Percona-XtraDB-Cluster-8.0.33/percona-xtradb-cluster-galera/galera/src/ist.cpp:prepare():349. IST will be unavailable.
...
2023-10-22T04:32:39.115392Z 0 [Warning] [MY-000000] [Galera] 1.0 (bdf3641d2950): State transfer to 0.0 (114764290eca) failed: -42 (No message of desired type)
2023-10-22T04:32:39.119291Z 0 [ERROR] [MY-000000] [Galera] /mnt/jenkins/workspace/pxc80-autobuild-RELEASE/test/rpmbuild/BUILD/Percona-XtraDB-Cluster-8.0.33/percona-xtradb-cluster-galera/gcs/src/gcs_group.cpp:gcs_group_handle_join_msg():1216: Will never receive state. Need to abort.
But it makes no sence since nothing should be listening to that port since container should be clean, right? And I’ve not started it with privileged mode so it’s not binding to host directly.
I’ve tried pxc-encrypt-cluster-traffic to OFF and ON both, I’ve tried with both ssl on and off, nothing seems to work. Can’t figure out where the issue is.
I have backup so I’ve succeessfully restored back to 5.7 but i really should upgrade to 8.0.
Can anyone help, please?
docker-compose.yaml for refference:
version: '3.3'
services:
pxc-node:
image: percona/percona-xtradb-cluster:8.0
stop_grace_period: 1m30s
cap_add:
- SYS_NICE
volumes:
- /opt/percona/data/cert:/cert
- /opt/percona/data/config:/etc/percona-xtradb-cluster.conf.d
- /opt/percona/data/data:/var/lib/mysql
- /opt/percona/data/log:/var/log/mysql
ports:
- '3306:3306'
- '4444:4444'
- '4567:4567'
- '4568:4568'
environment:
MYSQL_ROOT_PASSWORD: Addiko2017
XTRABACKUP_PASSWORD: Addiko2017
CLUSTER_NAME: addiko-pxe-cluster1
#CLUSTER_JOIN: elk2
command:
--wsrep_node_address=elk1 --wsrep_node_incoming_address=elk1:3306 --pxc-strict-mode=PERMISSIVE --server-id=1689939751 --slow-query-log=1 --slow-query-log-file=/var/log/mysql/pxc-slow.log --long-query-time=2 --log-error=/var/log/mysql/error.log --expire-logs-days=1 --skip-name-resolve --character-set-server=utf8mb4 --collation-server=utf8mb4_unicode_ci --tls-version=""