The former primary pod did not start as a replica during few scenarios either during network isolation or when its worker node was unavailable.
Below are performed to start former primary pod as replica and pod stuck at readiness 4/5 status
- Noticed pg_rewind started in replica pod logs
- Replica recovery is blocked by a stale
postmaster.pidfile left over from the sudden node crash. Logged in replica pod and killed postmaster.pid process but no progress - Tried delete replica pod and again stuck at the same point.
- Deleted pod then immediately deleted the pvc, then replica pod bootstrapped from the primary and pod readiness status is 5/5
This behavior happened in couple of scenarios, is this expected behavior or something our configuration is incorrect. Below is the error in replica pod logs, please suggest.
/tmp/postgres:5432 - rejecting connections
2026-05-18 13:58:14,219 INFO: Lock owner: pg-cluster-ha-pg-db-pgsa-hql8-0; I am pg-cluster-ha-pg-db-pgsa-jmhg-0
2026-05-18 13:58:14,219 INFO: Still starting up as a standby.
2026-05-18 13:58:14,220 INFO: Lock owner: pg-cluster-ha-pg-db-pgsa-hql8-0; I am pg-cluster-ha-pg-db-pgsa-jmhg-0
2026-05-18 13:58:14,220 INFO: establishing a new patroni heartbeat connection to postgres
2026-05-18 13:58:15,043 INFO: establishing a new patroni heartbeat connection to postgres
2026-05-18 13:58:15,044 WARNING: Retry got exception: connection problems
2026-05-18 13:58:15,045 WARNING: Failed to determine PostgreSQL state from the connection, falling back to cached role
2026-05-18 13:58:15,045 INFO: no action. I am (pg-cluster-ha-pg-db-pgsa-jmhg-0), a secondary, and following a leader (pg-cluster-ha-pg-db-pgsa-hql8-0)
2026-05-18 13:58:17,027 INFO: establishing a new patroni heartbeat connection to postgres
2026-05-18 13:58:18,500 INFO: establishing a new patroni heartbeat connection to postgres
2026-05-18 13:58:18,502 WARNING: Retry got exception: connection problems
Below is the values.yaml used for setting up the primary and replica pods
[root@bastion-node pg-db]# cat values.yaml
Default values for pg-cluster.
This is a YAML-formatted file.
Declare variables to be passed into your templates.
annotations:
test-annotation: value
finalizers:
Set this if you want that operator deletes the PVCs on cluster deletion
- percona.com/delete-pvc
Set this if you want that operator deletes the ssl objects on cluster deletion
- percona.com/delete-ssl
Set this if you want that operator deletes the backups on cluster deletion
- percona.com/delete-backups
crVersion: 2.8.2
example-annotation: value
labels:
example-label: value
service:
Valid type are NodePort or LoadBalancer. Defaults to NodePort
type: LoadBalancer
If Postgrescluster has to be deployed on the openshift env, set openshift: true.
openshift: true
repository: registry.connect.redhat.com/percona/percona-postgresql-operator-containers
image: registry.connect.redhat.com/percona/percona-postgresql-operator-containers:2.8.2-ppg-16-postgres
imagePullPolicy: Always
imagePullSecrets:
- name: rh-catalog
postgresVersion: 16
port: 5432
pause: false
unmanaged: false
standby:
enabled: false
host: “”
port: “”
repoName: repo1
customRootCATLSSecret:
name: cluster1-ca-cert
items:
- key: “tls.crt”
path: “root.crt”
- key: “tls.key”
path: “root.key”
customTLSSecret:
name: “”
customReplicationTLSSecret:
name: “”
volumes:
instances:
expose:
type: ClusterIP
dataVolumeClaimSpec:
storageClassName: ocs-storagecluster-ceph-rbd
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: database
operator: In # The “equals” logic
values: - postgres # The label value on your node
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
postgres-operator.crunchydata.com/data: postgres
topologyKey: kubernetes.io/hostname
patroni:
dynamicConfiguration:
dcs:
maximum_lag_on_failover: 104857600
loop_wait: 10
retry_timeout: 10
proxy:
pgBouncer:
replicas: 0
image: docker.io/percona/percona-pgbouncer:1.25.0-1
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchLabels:
postgres-operator.crunchydata.com/role: pgbouncer
topologyKey: kubernetes.io/hostname
backups:
enabled: true
trackLatestRestorableTime: true
pgbackrest:
image: registry.connect.redhat.com/percona/percona-postgresql-operator-containers:2.8.2-ppg-16-pgbackrest
env:
envFrom:
repos:
- name: repo1
schedules:
full: "12 1 * * *"
differential: "01 11 * * 1-6"
volume:
volumeClaimSpec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
global:
repo1-retention-full: "2"
repo1-retention-diff: "7"
repo1-retention-full-type: time
repo1-cipher-type: none
repoHost:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchLabels:
postgres-operator.crunchydata.com/data: pgbackrest
topologyKey: kubernetes.io/hostname
backupaffinity:
enabled: true
nodeSelectorTerms:
key: database
value: postgres
pmm:
enabled: true
image:
repository: docker.io/percona/pmm-client
tag: 3.4.1
imagePullPolicy: IfNotPresent
secret: cluster1-pmm-secret
serverHost: “pmm-server.pmm.svc.cluster.local”
secret: pmm-secret
resources:
requests:
memory: 256Mi
cpu: 500m
secrets:
name:
replication user password
primaryuser:
superuser password
postgres: postgres
pgbouncer user password
pgbouncer:
pguser user password
pguser:
[root@bastion-node pg-db]#