Replication stuck: Relay_Log_Pos not increasing

about 2 days ago relay_log_pos stopped increasing. i have 2 read replicas and both stopped at the same place…

Relay_Log_Pos: 826986907

looking at a show replica status i am seeing this

      Source_Log_File: binlog.009267
      Read_Source_Log_Pos: 842701137
           Relay_Log_File: mysql-prod-replica-02-relay-bin.007426
            Relay_Log_Pos: 826986907
    Relay_Source_Log_File: binlog.009135
       Replica_IO_Running: Yes
      Replica_SQL_Running: Yes

so i dumped mysql-prod-replica-02-relay-bin.007426 — and pasted an excerpt around “826986907”

i am not sure where to go from here to fix this…

thanks for any ideas people can help with

# at 826986732
#221124 18:06:25 server id 1  end_log_pos 826986667 CRC32 0x4c28e931    Update_rows: table id 86 flags: STMT_END_F

BINLOG '
obJ/YxMBAAAASgAAABvQSjEAAFYAAAAAAAEABXNsaWNlAA1lbWFpbF9kb21haW5zAAUIDw8PEgf6
APoA+gAAHgEBgAIBCB0CNsc=
obJ/Yx8BAAAAkAAAAKvQSjEAAFYAAAAAAAEAAgAF//8AnGgXUgAAAAAIem9oby5jb20Pc210cGlu
LnpvaG8uY29tDjEzNi4xNDMuMTkxLjIzma5xISMAnGgXUgAAAAAIem9oby5jb20Pc210cGluLnpv
aG8uY29tDjEzNi4xNDMuMTkxLjIzma5xIZkx6ShM
'/*!*/;
# at 826986876
#221124 18:06:25 server id 1  end_log_pos 826986698 CRC32 0x66eb27ad    Xid = 31457734691
COMMIT/*!*/;
# at 826986907
#221124 18:05:51 server id 1  end_log_pos 826986783 CRC32 0x16ea18b7    GTID    last_committed=136821   sequence_number=136822  rbr_only=yes    original_committed_timestamp=16693131852318
13   immediate_commit_timestamp=1669313185231813     transaction_length=1518829485
/*!50718 SET TRANSACTION ISOLATION LEVEL READ COMMITTED*//*!*/;
# original_commit_timestamp=1669313185231813 (2022-11-24 18:06:25.231813 GMT)
# immediate_commit_timestamp=1669313185231813 (2022-11-24 18:06:25.231813 GMT)
/*!80001 SET @@session.original_commit_timestamp=1669313185231813*//*!*/;
/*!80014 SET @@session.original_server_version=80026*//*!*/;
/*!80014 SET @@session.immediate_server_version=80026*//*!*/;
SET @@SESSION.GTID_NEXT= '42053cc7-d6d9-11ec-a377-0200170e2487:5152282533'/*!*/;
# at 826986992
#221124 18:05:51 server id 1  end_log_pos 826986863 CRC32 0x0567319e    Query   thread_id=1713651       exec_time=0     error_code=0
SET TIMESTAMP=1669313151/*!*/;
BEGIN
/*!*/;
# at 826987072
#221124 18:05:51 server id 1  end_log_pos 826987124 CRC32 0x716e1e1d    Table_map: `scrles`.`trade_algo_ta_append_finalcsv_ec0ya` mapped to number 44173693
# at 826987333
#221124 18:05:51 server id 1  end_log_pos 826994442 CRC32 0x504a2c01    Update_rows: table id 44173693
# at 826994651
#221124 18:05:51 server id 1  end_log_pos 827002162 CRC32 0x39ca7d06    Update_rows: table id 44173693
# at 827002371
#221124 18:05:51 server id 1  end_log_pos 827009634 CRC32 0xf413dc95    Update_rows: table id 44173693
1 Like

Can you add -v -v --base64-output=decode-rows to your mysqlbinlog output? Also, include the event before and after.

1 Like

thanks for the reply… here is the binlog with the 20 lines before and after…

> # at 826986658
> #221124 18:06:25 server id 1  end_log_pos 826986523 CRC32 0xc736021d    Table_map: `xxxxx`.`exxxx_dxxxxxs` mapped to number 86
> # at 826986732
> #221124 18:06:25 server id 1  end_log_pos 826986667 CRC32 0x4c28e931    Update_rows: table id 86 flags: STMT_END_F
> ### UPDATE `xxxxx`.`exxxx_dxxxxxs`
> ### WHERE
> ###   @1=1377265820 /* LONGINT meta=0 nullable=0 is_null=0 */
> ###   @2='zxxxo.com' /* VARSTRING(250) meta=250 nullable=1 is_null=0 */
> ###   @3='smtpin.zxxxo.com' /* VARSTRING(250) meta=250 nullable=1 is_null=0 */
> ###   @4='136.xxx.xxx.23' /* VARSTRING(250) meta=250 nullable=1 is_null=0 */
> ###   @5='2022-11-24 18:04:35' /* DATETIME(0) meta=0 nullable=1 is_null=0 */
> ### SET
> ###   @1=1377265820 /* LONGINT meta=0 nullable=0 is_null=0 */
> ###   @2='zxxxo.com' /* VARSTRING(250) meta=250 nullable=1 is_null=0 */
> ###   @3='smtpin.zxxxo.com' /* VARSTRING(250) meta=250 nullable=1 is_null=0 */
> ###   @4='136.xxx.xxx.23' /* VARSTRING(250) meta=250 nullable=1 is_null=0 */
> ###   @5='2022-11-24 18:06:25' /* DATETIME(0) meta=0 nullable=1 is_null=0 */
> # at 826986876
> #221124 18:06:25 server id 1  end_log_pos 826986698 CRC32 0x66eb27ad    Xid = 31457734691
> COMMIT/*!*/;
> # at 826986907
> #221124 18:05:51 server id 1  end_log_pos 826986783 CRC32 0x16ea18b7    GTID    last_committed=136821   sequence_number=136822  rbr_only=yes    original_committed_timestamp=1669313185231813      immediate_commit_timestamp=1669313185231813     transaction_length=1518829485
> /*!50718 SET TRANSACTION ISOLATION LEVEL READ COMMITTED*//*!*/;
> # original_commit_timestamp=1669313185231813 (2022-11-24 18:06:25.231813 GMT)
> # immediate_commit_timestamp=1669313185231813 (2022-11-24 18:06:25.231813 GMT)
> /*!80001 SET @@session.original_commit_timestamp=1669313185231813*//*!*/;
> /*!80014 SET @@session.original_server_version=80026*//*!*/;
> /*!80014 SET @@session.immediate_server_version=80026*//*!*/;
> SET @@SESSION.GTID_NEXT= '42053cc7-d6d9-11ec-a377-0200170e2487:5152282533'/*!*/;
> # at 826986992
> #221124 18:05:51 server id 1  end_log_pos 826986863 CRC32 0x0567319e    Query   thread_id=1713651       exec_time=0     error_code=0
> SET TIMESTAMP=1669313151/*!*/;
> BEGIN
> /*!*/;
> # at 826987072
> #221124 18:05:51 server id 1  end_log_pos 826987124 CRC32 0x716e1e1d    Table_map: `scrub_tables`.`trade_algo_ta_append_finalcsv_ec0ya` mapped to number 44173693
> # at 826987333
> #221124 18:05:51 server id 1  end_log_pos 826994442 CRC32 0x504a2c01    Update_rows: table id 44173693
> # at 826994651
> #221124 18:05:51 server id 1  end_log_pos 827002162 CRC32 0x39ca7d06    Update_rows: table id 44173693
> # at 827002371
1 Like

Are your relay logs still growing, or has growth also halted? If growth of relay logs has also halted, then there’s something on the source side that isn’t sending to the replica. This seems quite likely since both of your replicas have halted.

/!50718 SET TRANSACTION ISOLATION LEVEL READ COMMITTED//!/;

This line concerns me. I’m not sure why this would be present in the replication stream. Are you using GTID AUTO_POSITION=1? If so, I would skip to the next position and see if replication resumes.

1 Like

this is UTC

it appears that is working fine…

these numbers are incrementing

Source_Log_File: binlog.009408
Read_Source_Log_Pos: 576323813

these are staying the same

Relay_Log_File: mysql-rithcrm-prod-replica-02-relay-bin.007426
Relay_Log_Pos: 826986907
Relay_Source_Log_File: binlog.009135

when i set up the slave i did execute
MASTER_AUTO_POSITION = 1;

how would i skip to the next position – never did that before

thanks
larry

1 Like

i was trying to execute the above – but the ititial command seemed to hang… so i ctrl-c’d out of it – figuring maybe i should stop the replica first… so i tried a stop replica…

now i am seeing this…

1 Like

Woah woah, hold on. What is that query that’s been running for 353,026 seconds?!!! That’s 4 days! You have some MASSIVE update query which is running and that is the reason replication is not progressing. That’s also the reason why your STOP SLAVE appeared hung. Use the master’s binlog and exec_master_pos to find that query in the master and see what that is. Whatever that query is that is updating so many rows is locking up your replicas.

1 Like

am i correct that “Exec_Source_Log_Pos” is the mysql8 version of exec_master_pos?

and i will use mysqlbinlog – to dump
Relay_Source_Log_File: binlog.009135 (from show replica status)

i am doing this from the master

1 Like

Yes, you can look at the masters binlog

1 Like

i seem to see the same thing as a saw in the relay log when i looked at the replica…

grep 826986698 /mnt/mysqldata/bin009135.log -B 20 -A 20

BEGIN
/!/;

at 826986449

#221124 18:06:25 server id 1 end_log_pos 826986523 CRC32 0xc736021d Table_map: slice.email_domains mapped to number 86

at 826986523

#221124 18:06:25 server id 1 end_log_pos 826986667 CRC32 0x4c28e931 Update_rows: table id 86 flags: STMT_END_F

UPDATE xxx.xxxx

WHERE

@1=1377265820 /* LONGINT meta=0 nullable=0 is_null=0 */

@2=‘zoho.com’ /* VARSTRING(250) meta=250 nullable=1 is_null=0 */

@3=‘smtpin.zoho.com’ /* VARSTRING(250) meta=250 nullable=1 is_null=0 */

@4=‘136.143.191.23’ /* VARSTRING(250) meta=250 nullable=1 is_null=0 */

@5=‘2022-11-24 18:04:35’ /* DATETIME(0) meta=0 nullable=1 is_null=0 */

SET

@1=1377265820 /* LONGINT meta=0 nullable=0 is_null=0 */

@2=‘zoho.com’ /* VARSTRING(250) meta=250 nullable=1 is_null=0 */

@3=‘smtpin.zoho.com’ /* VARSTRING(250) meta=250 nullable=1 is_null=0 */

@4=‘136.143.191.23’ /* VARSTRING(250) meta=250 nullable=1 is_null=0 */

@5=‘2022-11-24 18:06:25’ /* DATETIME(0) meta=0 nullable=1 is_null=0 */

at 826986667

#221124 18:06:25 server id 1 end_log_pos 826986698 CRC32 0x66eb27ad Xid = 31457734691
COMMIT/!/;

at 826986698

#221124 18:05:51 server id 1 end_log_pos 826986783 CRC32 0x16ea18b7 GTID last_committed=136821 sequence_number=136822 rbr_only=yes original_committed_timestamp=1669313185231813 immediate_commit_timestamp=1669313185231813 transaction_length=1518829485
/!50718 SET TRANSACTION ISOLATION LEVEL READ COMMITTED//!/;

original_commit_timestamp=1669313185231813 (2022-11-24 18:06:25.231813 GMT)

immediate_commit_timestamp=1669313185231813 (2022-11-24 18:06:25.231813 GMT)

/!80001 SET @@session.original_commit_timestamp=1669313185231813//!/;
/!80014 SET @@session.original_server_version=80026//!/;
/!80014 SET @@session.immediate_server_version=80026//!/;
SET @@SESSION.GTID_NEXT= ‘42053cc7-d6d9-11ec-a377-0200170e2487:5152282533’/!/;

at 826986783

#221124 18:05:51 server id 1 end_log_pos 826986863 CRC32 0x0567319e Query thread_id=1713651 exec_time=0 error_code=0
SET TIMESTAMP=1669313151/!/;
BEGIN
/!/;

at 826986863

#221124 18:05:51 server id 1 end_log_pos 826987124 CRC32 0x716e1e1d Table_map: scrub_tables.trade_algo_ta_append_finalcsv_ec0ya mapped to number 44173693

at 826987124

#221124 18:05:51 server id 1 end_log_pos 826994442 CRC32 0x504a2c01 Update_rows: table id 44173693

at 826994442

#221124 18:05:51 server id 1

1 Like