We recently updated our worker nodes in our rancher cluster by draining the pod’s running and replacing it another worker node. We use psmdb-db:1.12.3
with psmdb-operator:1.12.1
installed using helm chart.
Currently we see that some of the pods fail to come up
percona-mongo-operator-psmdb-operator-5bb956cb88-cbvpd 1/1 Running 0 6d1h
percona-mongo-psmdb-d-cfg-0 2/2 Running 0 6d1h
percona-mongo-psmdb-d-cfg-1 2/2 Running 0 6d1h
percona-mongo-psmdb-d-cfg-2 2/2 Running 0 6d1h
percona-mongo-psmdb-d-mongos-0 0/1 Running 0 6d1h
percona-mongo-psmdb-d-mongos-1 0/1 Running 0 6d1h
percona-mongo-psmdb-d-rs0-0 1/2 Running 2315 (106s ago) 6d1h
percona-mongo-psmdb-d-rs0-1 1/2 CrashLoopBackOff 1592 (6s ago) 6d1h
percona-mongo-psmdb-d-rs1-0 2/2 Running 5 (6d1h ago) 6d1h
percona-mongo-psmdb-d-rs1-1 2/2 Running 1 (38h ago) 6d1h
percona-mongo-psmdb-d-rs1-2 2/2 Running 2 (5h55m ago) 6d1h
kg PerconaServerMongoDB
NAME ENDPOINT STATUS AGE
percona-mongo-psmdb-d percona-mongo-psmdb-d-mongos.data.svc.cluster.local initializing 80d
I tried deleting and recreating the pods, but it doesn’t help
Logs from percona-mongo-psmdb-d-rs0-1 shows the following
{"t":{"$date":"2022-09-12T14:50:37.159+00:00"},"s":"I", "c":"NETWORK", "id":4712102, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Host failed in replica set","attr":{"replicaSet":"rs0","host":"percona-mongo-psmdb-d-rs0-1.percona-mongo-psmdb-d-rs0.data.svc.cluster.local:27017","error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to percona-mongo-psmdb-d-rs0-1.percona-mongo-psmdb-d-rs0.data.svc.cluster.local:27017 :: caused by :: Could not find address for percona-mongo-psmdb-d-rs0-1.percona-mongo-psmdb-d-rs0.data.svc.cluster.local:27017: SocketException: Host not found (authoritative)"},"action":{"dropConnections":true,"requestImmediateCheck":false,"outcome":{"host":"percona-mongo-psmdb-d-rs0-1.percona-mongo-psmdb-d-rs0.data.svc.cluster.local:27017","success":false,"errorMessage":"HostUnreachable: Error connecting to percona-mongo-psmdb-d-rs0-1.percona-mongo-psmdb-d-rs0.data.svc.cluster.local:27017 :: caused by :: Could not find address for percona-mongo-psmdb-d-rs0-1.percona-mongo-psmdb-d-rs0.data.svc.cluster.local:27017: SocketException: Host not found (authoritative)"}}}}
apiVersion: psmdb.percona.com/v1
kind: PerconaServerMongoDB
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"psmdb.percona.com/v1-12-0","kind":"PerconaServerMongoDB"}
meta.helm.sh/release-name: percona-mongo
meta.helm.sh/release-namespace: data
creationTimestamp: "2022-06-24T12:53:53Z"
finalizers:
- delete-psmdb-pods-in-order
- delete-psmdb-pvc
generation: 3
labels:
app.kubernetes.io/instance: percona-mongo
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: psmdb-db
app.kubernetes.io/version: 1.12.0
helm.sh/chart: psmdb-db-1.12.3
managedFields:
- apiVersion: psmdb.percona.com/v1-12-0
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
.: {}
f:kubectl.kubernetes.io/last-applied-configuration: {}
f:meta.helm.sh/release-name: {}
f:meta.helm.sh/release-namespace: {}
f:finalizers:
.: {}
v:"delete-psmdb-pods-in-order": {}
v:"delete-psmdb-pvc": {}
f:labels:
.: {}
f:app.kubernetes.io/instance: {}
f:app.kubernetes.io/managed-by: {}
f:app.kubernetes.io/name: {}
f:app.kubernetes.io/version: {}
f:helm.sh/chart: {}
f:spec:
.: {}
f:backup:
.: {}
f:enabled: {}
f:image: {}
f:pitr:
.: {}
f:enabled: {}
f:serviceAccountName: {}
f:image: {}
f:imagePullPolicy: {}
f:multiCluster:
.: {}
f:enabled: {}
f:pause: {}
f:pmm:
.: {}
f:enabled: {}
f:image: {}
f:serverHost: {}
f:replsets: {}
f:secrets:
.: {}
f:encryptionKey: {}
f:users: {}
f:sharding:
.: {}
f:configsvrReplSet:
.: {}
f:affinity:
.: {}
f:antiAffinityTopologyKey: {}
f:configuration: {}
f:expose:
.: {}
f:enabled: {}
f:exposeType: {}
f:podDisruptionBudget:
.: {}
f:maxUnavailable: {}
f:resources:
.: {}
f:limits:
.: {}
f:cpu: {}
f:memory: {}
f:requests:
.: {}
f:cpu: {}
f:memory: {}
f:size: {}
f:volumeSpec:
.: {}
f:persistentVolumeClaim:
.: {}
f:resources:
.: {}
f:requests:
.: {}
f:storage: {}
f:enabled: {}
f:mongos:
.: {}
f:affinity:
.: {}
f:antiAffinityTopologyKey: {}
f:configuration: {}
f:expose:
.: {}
f:exposeType: {}
f:podDisruptionBudget:
.: {}
f:maxUnavailable: {}
f:resources:
.: {}
f:limits:
.: {}
f:cpu: {}
f:memory: {}
f:requests:
.: {}
f:cpu: {}
f:memory: {}
f:size: {}
f:unmanaged: {}
f:updateStrategy: {}
f:upgradeOptions:
.: {}
f:apply: {}
f:schedule: {}
f:setFCV: {}
f:versionServiceEndpoint: {}
manager: helm
operation: Update
time: "2022-07-04T18:31:53Z"
- apiVersion: psmdb.percona.com/v1-12-0
fieldsType: FieldsV1
fieldsV1:
f:status:
.: {}
f:backupVersion: {}
f:host: {}
f:mongoImage: {}
f:mongoVersion: {}
f:mongos: {}
f:observedGeneration: {}
f:pmmVersion: {}
f:replsets:
.: {}
f:cfg:
.: {}
f:initialized: {}
f:size: {}
f:rs0:
.: {}
f:added_as_shard: {}
f:initialized: {}
f:size: {}
f:rs1:
.: {}
f:added_as_shard: {}
f:initialized: {}
f:size: {}
manager: percona-server-mongodb-operator
operation: Update
time: "2022-08-04T17:37:41Z"
- apiVersion: psmdb.percona.com/v1-12-0
fieldsType: FieldsV1
fieldsV1:
f:status:
f:conditions: {}
f:mongos:
f:ready: {}
f:size: {}
f:status: {}
f:ready: {}
f:replsets:
f:cfg:
f:ready: {}
f:status: {}
f:rs0:
f:message: {}
f:ready: {}
f:status: {}
f:rs1:
f:ready: {}
f:status: {}
f:size: {}
f:state: {}
manager: percona-server-mongodb-operator
operation: Update
subresource: status
time: "2022-09-12T15:21:49Z"
name: percona-mongo-psmdb-d
namespace: data
resourceVersion: "116210509"
uid: 498a08e5-4f41-4cb8-a7ae-bd3d9877c5f7
spec:
backup:
enabled: true
image: percona/percona-backup-mongodb:1.7.0
pitr:
enabled: false
serviceAccountName: percona-server-mongodb-operator
image: percona/percona-server-mongodb:5.0.7-6
imagePullPolicy: Always
multiCluster:
enabled: false
pause: false
pmm:
enabled: false
image: percona/pmm-client:2.27.0
serverHost: monitoring-service
replsets:
- affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
arbiter:
affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
enabled: false
size: 1
configuration: |
systemLog:
verbosity: 0
expose:
enabled: false
exposeType: ClusterIP
name: rs0
nonvoting:
affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
enabled: false
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: 300m
memory: 0.5G
requests:
cpu: 300m
memory: 0.5G
size: 1
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 3Gi
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: 300m
memory: 0.5G
requests:
cpu: 300m
memory: 0.5G
size: 2
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 3Gi
- affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
arbiter:
affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
enabled: false
size: 1
expose:
enabled: false
exposeType: ClusterIP
name: rs1
nonvoting:
affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
enabled: false
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: 300m
memory: 0.5G
requests:
cpu: 300m
memory: 0.5G
size: 2
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 3Gi
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: 300m
memory: 0.5G
requests:
cpu: 300m
memory: 0.5G
size: 2
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 3Gi
secrets:
encryptionKey: percona-mongo-psmdb-d-mongodb-encryption-key
users: percona-mongo-psmdb-d-secrets
sharding:
configsvrReplSet:
affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
configuration: |
systemLog:
verbosity: 0
expose:
enabled: false
exposeType: ClusterIP
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: 300m
memory: 0.5G
requests:
cpu: 300m
memory: 0.5G
size: 2
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 3Gi
enabled: true
mongos:
affinity:
antiAffinityTopologyKey: kubernetes.io/hostname
configuration: |
systemLog:
verbosity: 0
expose:
exposeType: ClusterIP
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: 300m
memory: 0.5G
requests:
cpu: 300m
memory: 0.5G
size: 2
unmanaged: false
updateStrategy: SmartUpdate
upgradeOptions:
apply: 5.0-recommended
schedule: 0 2 * * *
setFCV: false
versionServiceEndpoint: https://check.percona.com
status:
backupVersion: 1.7.0
conditions:
- lastTransitionTime: "2022-09-06T13:54:49Z"
message: 'rs1: ready'
reason: RSReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T13:54:49Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-06T13:55:40Z"
message: 'rs1: ready'
reason: RSReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T13:55:40Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-06T14:05:55Z"
reason: MongosReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T14:05:55Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-06T14:08:46Z"
reason: MongosReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T14:08:46Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-06T14:32:08Z"
reason: MongosReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T14:32:08Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-06T16:11:02Z"
reason: MongosReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T16:11:02Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-06T20:22:42Z"
reason: MongosReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-06T20:22:42Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-08T20:13:47Z"
message: 'rs1: ready'
reason: RSReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-08T20:13:47Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-11T00:52:57Z"
message: 'rs1: ready'
reason: RSReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-11T00:52:57Z"
status: "True"
type: initializing
- lastTransitionTime: "2022-09-12T09:15:06Z"
message: 'rs1: ready'
reason: RSReady
status: "True"
type: ready
- lastTransitionTime: "2022-09-12T09:15:06Z"
status: "True"
type: initializing
host: percona-mongo-psmdb-d-mongos.data.svc.cluster.local
mongoImage: percona/percona-server-mongodb:5.0.7-6
mongoVersion: 5.0.7-6
mongos:
ready: 0
size: 2
status: initializing
observedGeneration: 3
pmmVersion: 2.27.0
ready: 6
replsets:
cfg:
initialized: true
ready: 3
size: 3
status: ready
rs0:
added_as_shard: true
initialized: true
message: 'mongod: back-off 5m0s restarting failed container=mongod pod=percona-mongo-psmdb-d-rs0-1_data(6a87e809-bedd-41c0-8242-8f9727c3feaa); '
ready: 0
size: 3
status: initializing
rs1:
added_as_shard: true
initialized: true
ready: 3
size: 3
status: ready
size: 11
state: initializing
How do we resolve this ?