Hi,
requireTLS appears to be broken in v1.14 version. If I downgraded to 1.13; it works just fine. I used the below config with everything else default to the 1.14 helm values. allowUnsafeConfigurations: false
. Any help is appreciated.
net: tls: mode: requireTLS
Hi @tra_for, We need to know more about your deployment. Please attach your CR. Thanks.
Here are the helm values that I’m using. I don’t see any of this issue with v1.13 of the operator with the same settings except for operator and MongoDB versions.
# Default values for psmdb-cluster.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# Platform type: kubernetes, openshift
# platform: kubernetes
# Cluster DNS Suffix
# clusterServiceDNSSuffix: svc.cluster.local
# clusterServiceDNSMode: "Internal"
finalizers:
## Set this if you want that operator deletes the primary pod last
- delete-psmdb-pods-in-order
## Set this if you want to delete database persistent volumes on cluster deletion
# - delete-psmdb-pvc
nameOverride: ""
fullnameOverride: ""
crVersion: 1.14.0
pause: false
unmanaged: false
allowUnsafeConfigurations: false
# ignoreAnnotations:
# - service.beta.kubernetes.io/aws-load-balancer-backend-protocol
# ignoreLabels:
# - rack
multiCluster:
enabled: false
# DNSSuffix: svc.clusterset.local
updateStrategy: SmartUpdate
upgradeOptions:
versionServiceEndpoint: https://check.percona.com
apply: disabled
schedule: "0 2 * * *"
setFCV: false
image:
repository: percona/percona-server-mongodb
tag: 6.0.4-3
imagePullPolicy: Always
# imagePullSecrets: []
# initImage:
# repository: percona/percona-server-mongodb-operator
# tag: 1.14.0
# initContainerSecurityContext: {}
# tls:
# # 90 days in hours
# certValidityDuration: 2160h
secrets: {}
# If you set users secret here the operator will use existing one or generate random values
# If not set the operator generates the default secret with name <cluster_name>-secrets
# users: my-cluster-name-secrets
# encryptionKey: my-cluster-name-mongodb-encryption-key
pmm:
enabled: false
image:
repository: percona/pmm-client
tag: 2.35.0
serverHost: monitoring-service
replsets:
- name: rs0
size: 3
configuration: |
net:
tls:
mode: requireTLS
systemLog:
verbosity: 2
# externalNodes:
# - host: 34.124.76.90
# - host: 34.124.76.91
# port: 27017
# votes: 0
# priority: 0
# - host: 34.124.76.92
# configuration: |
# operationProfiling:
# mode: slowOp
# systemLog:
# verbosity: 1
affinity:
antiAffinityTopologyKey: "kubernetes.io/hostname"
# advanced:
# podAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# - labelSelector:
# matchExpressions:
# - key: security
# operator: In
# values:
# - S1
# topologyKey: failure-domain.beta.kubernetes.io/zone
# tolerations: []
# priorityClass: ""
# annotations: {}
# labels: {}
# nodeSelector: {}
# livenessProbe:
# failureThreshold: 4
# initialDelaySeconds: 60
# periodSeconds: 30
# timeoutSeconds: 10
# startupDelaySeconds: 7200
# readinessProbe:
# failureThreshold: 8
# initialDelaySeconds: 10
# periodSeconds: 3
# successThreshold: 1
# timeoutSeconds: 2
# runtimeClassName: image-rc
# storage:
# engine: wiredTiger
# wiredTiger:
# engineConfig:
# cacheSizeRatio: 0.5
# directoryForIndexes: false
# journalCompressor: snappy
# collectionConfig:
# blockCompressor: snappy
# indexConfig:
# prefixCompression: true
# inMemory:
# engineConfig:
# inMemorySizeRatio: 0.5
# sidecars:
# - image: busybox
# command: ["/bin/sh"]
# args: ["-c", "while true; do echo echo $(date -u) 'test' >> /dev/null; sleep 5;done"]
# name: rs-sidecar-1
# volumeMounts:
# - mountPath: /volume1
# name: sidecar-volume-claim
# - mountPath: /secret
# name: sidecar-secret
# - mountPath: /configmap
# name: sidecar-config
# sidecarVolumes:
# - name: sidecar-secret
# secret:
# secretName: mysecret
# - name: sidecar-config
# configMap:
# name: myconfigmap
# sidecarPVCs:
# - apiVersion: v1
# kind: PersistentVolumeClaim
# metadata:
# name: sidecar-volume-claim
# spec:
# resources:
# requests:
# storage: 1Gi
# volumeMode: Filesystem
# accessModes:
# - ReadWriteOnce
podDisruptionBudget:
maxUnavailable: 1
expose:
enabled: false
exposeType: ClusterIP
# loadBalancerSourceRanges:
# - 10.0.0.0/8
# serviceAnnotations:
# service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
# serviceLabels:
# some-label: some-key
nonvoting:
enabled: false
# podSecurityContext: {}
# containerSecurityContext: {}
size: 3
# configuration: |
# operationProfiling:
# mode: slowOp
# systemLog:
# verbosity: 1
affinity:
antiAffinityTopologyKey: "kubernetes.io/hostname"
# advanced:
# podAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# - labelSelector:
# matchExpressions:
# - key: security
# operator: In
# values:
# - S1
# topologyKey: failure-domain.beta.kubernetes.io/zone
# tolerations: []
# priorityClass: ""
# annotations: {}
# labels: {}
# nodeSelector: {}
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: "300m"
memory: "0.5G"
requests:
cpu: "300m"
memory: "0.5G"
volumeSpec:
# emptyDir: {}
# hostPath:
# path: /data
pvc:
# annotations:
# volume.beta.kubernetes.io/storage-class: example-hostpath
# labels:
# rack: rack-22
# storageClassName: standard
# accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 3Gi
arbiter:
enabled: false
size: 1
affinity:
antiAffinityTopologyKey: "kubernetes.io/hostname"
# advanced:
# podAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# - labelSelector:
# matchExpressions:
# - key: security
# operator: In
# values:
# - S1
# topologyKey: failure-domain.beta.kubernetes.io/zone
# tolerations: []
# priorityClass: ""
# annotations: {}
# labels: {}
# nodeSelector: {}
# schedulerName: ""
resources:
limits:
cpu: "300m"
memory: "0.5G"
requests:
cpu: "300m"
memory: "0.5G"
volumeSpec:
# emptyDir: {}
# hostPath:
# path: /data
pvc:
# annotations:
# volume.beta.kubernetes.io/storage-class: example-hostpath
# labels:
# rack: rack-22
# storageClassName: standard
# accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 3Gi
sharding:
enabled: true
configrs:
size: 3
configuration: |
net:
tls:
mode: requireTLS
systemLog:
verbosity: 2
# externalNodes:
# - host: 34.124.76.90
# - host: 34.124.76.91
# port: 27017
# votes: 0
# priority: 0
# - host: 34.124.76.92
# configuration: |
# operationProfiling:
# mode: slowOp
# systemLog:
# verbosity: 1
affinity:
antiAffinityTopologyKey: "kubernetes.io/hostname"
# advanced:
# podAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# - labelSelector:
# matchExpressions:
# - key: security
# operator: In
# values:
# - S1
# topologyKey: failure-domain.beta.kubernetes.io/zone
# tolerations: []
# priorityClass: ""
# annotations: {}
# labels: {}
# nodeSelector: {}
# livenessProbe: {}
# readinessProbe: {}
# runtimeClassName: image-rc
# sidecars:
# - image: busybox
# command: ["/bin/sh"]
# args: ["-c", "while true; do echo echo $(date -u) 'test' >> /dev/null; sleep 5;done"]
# name: rs-sidecar-1
# volumeMounts:
# - mountPath: /volume1
# name: sidecar-volume-claim
# sidecarPVCs: []
# sidecarVolumes: []
podDisruptionBudget:
maxUnavailable: 1
expose:
enabled: false
exposeType: ClusterIP
# loadBalancerSourceRanges:
# - 10.0.0.0/8
# serviceAnnotations:
# service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
# serviceLabels:
# some-label: some-key
resources:
limits:
cpu: "300m"
memory: "0.5G"
requests:
cpu: "300m"
memory: "0.5G"
volumeSpec:
# emptyDir: {}
# hostPath:
# path: /data
# type: Directory
pvc:
# annotations:
# volume.beta.kubernetes.io/storage-class: example-hostpath
# labels:
# rack: rack-22
# storageClassName: standard
# accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 3Gi
mongos:
size: 2
configuration: |
net:
tls:
mode: requireTLS
systemLog:
verbosity: 2
# configuration: |
# systemLog:
# verbosity: 1
affinity:
antiAffinityTopologyKey: "kubernetes.io/hostname"
# advanced:
# podAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# - labelSelector:
# matchExpressions:
# - key: security
# operator: In
# values:
# - S1
# topologyKey: failure-domain.beta.kubernetes.io/zone
# tolerations: []
# priorityClass: ""
# annotations: {}
# labels: {}
# nodeSelector: {}
# livenessProbe: {}
# readinessProbe: {}
# runtimeClassName: image-rc
# sidecars:
# - image: busybox
# command: ["/bin/sh"]
# args: ["-c", "while true; do echo echo $(date -u) 'test' >> /dev/null; sleep 5;done"]
# name: rs-sidecar-1
# volumeMounts:
# - mountPath: /volume1
# name: sidecar-volume-claim
# sidecarPVCs: []
# sidecarVolumes: []
podDisruptionBudget:
maxUnavailable: 1
resources:
limits:
cpu: "300m"
memory: "0.5G"
requests:
cpu: "300m"
memory: "0.5G"
expose:
exposeType: ClusterIP
# servicePerPod: true
# loadBalancerSourceRanges:
# - 10.0.0.0/8
# serviceAnnotations:
# service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
# serviceLabels:
# some-label: some-key
# auditLog:
# destination: file
# format: BSON
# filter: '{}'
backup:
enabled: true
image:
repository: percona/percona-backup-mongodb
tag: 2.0.4
serviceAccountName: percona-server-mongodb-operator
# annotations:
# iam.amazonaws.com/role: role-arn
# resources:
# limits:
# cpu: "300m"
# memory: "0.5G"
# requests:
# cpu: "300m"
# memory: "0.5G"
storages:
# s3-us-west:
# type: s3
# s3:
# bucket: S3-BACKUP-BUCKET-NAME-HERE
# credentialsSecret: my-cluster-name-backup-s3
# region: us-west-2
# prefix: ""
# uploadPartSize: 10485760
# maxUploadParts: 10000
# storageClass: STANDARD
# insecureSkipTLSVerify: false
# minio:
# type: s3
# s3:
# bucket: MINIO-BACKUP-BUCKET-NAME-HERE
# region: us-east-1
# credentialsSecret: my-cluster-name-backup-minio
# endpointUrl: http://minio.psmdb.svc.cluster.local:9000/minio/
# prefix: ""
# azure-blob:
# type: azure
# azure:
# container: CONTAINER-NAME
# prefix: PREFIX-NAME
# credentialsSecret: SECRET-NAME
pitr:
enabled: false
# oplogSpanMin: 10
# compressionType: gzip
# compressionLevel: 6
tasks:
# - name: daily-s3-us-west
# enabled: true
# schedule: "0 0 * * *"
# keep: 3
# storageName: s3-us-west
# compressionType: gzip
# - name: weekly-s3-us-west
# enabled: false
# schedule: "0 0 * * 0"
# keep: 5
# storageName: s3-us-west
# compressionType: gzip
# - name: weekly-s3-us-west-physical
# enabled: false
# schedule: "0 5 * * 0"
# keep: 5
# type: physical
# storageName: s3-us-west
# compressionType: gzip
# compressionLevel: 6
# If you set users here the secret will be constructed by helm with these values
# users:
# MONGODB_BACKUP_USER: backup
# MONGODB_BACKUP_PASSWORD: backup123456
# MONGODB_DATABASE_ADMIN_USER: databaseAdmin
# MONGODB_DATABASE_ADMIN_PASSWORD: databaseAdmin123456
# MONGODB_CLUSTER_ADMIN_USER: clusterAdmin
# MONGODB_CLUSTER_ADMIN_PASSWORD: clusterAdmin123456
# MONGODB_CLUSTER_MONITOR_USER: clusterMonitor
# MONGODB_CLUSTER_MONITOR_PASSWORD: clusterMonitor123456
# MONGODB_USER_ADMIN_USER: userAdmin
# MONGODB_USER_ADMIN_PASSWORD: userAdmin123456
# PMM_SERVER_API_KEY: apikey
# # PMM_SERVER_USER: admin
# # PMM_SERVER_PASSWORD: admin
Got this error first.
2023-05-08T22:48:14.100Z ERROR failed to reconcile cluster {"controller": "psmdb-controller", "object": {"name":"mongodb-psmdb-db","namespace":"mongodb"}, "namespace": "mongodb", "name": "mongodb-psmdb-db", "reconcileID": "35449119-e330-41b8-acfa-31c24bf701a9", "replset": "rs0", "error": "handleReplsetInit: exec rs.initiate: command terminated with exit code 1 / Current Mongosh Log ID:\t64597c2aa979e7cc2e5651a9\nConnecting to:\t\tmongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&tls=true&tlsCertificateKeyFile=%2Ftmp%2Ftls.pem&tlsAllowInvalidCertificates=true&tlsCAFile=%2Fetc%2Fmongodb-ssl%2Fca.crt&appName=mongosh+1.6.2\n / MongoNetworkError: connect ECONNREFUSED 127.0.0.1:27017\n", "errorVerbose": "exec rs.initiate: command terminated with exit code 1 / Current Mongosh Log ID:\t64597c2aa979e7cc2e5651a9\nConnecting to:\t\tmongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&tls=true&tlsCertificateKeyFile=%2Ftmp%2Ftls.pem&tlsAllowInvalidCertificates=true&tlsCAFile=%2Fetc%2Fmongodb-ssl%2Fca.crt&appName=mongosh+1.6.2\n / MongoNetworkError: connect ECONNREFUSED 127.0.0.1:27017\n\nhandleReplsetInit\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:99\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:487\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:122\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:323\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:274\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:235\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1594"}
Then a bunch of those
2023-05-08T22:52:19.203Z ERROR failed to reconcile cluster {"controller": "psmdb-controller", "object": {"name":"mongodb-psmdb-db","namespace":"mongodb"}, "namespace": "mongodb", "name": "mongodb-psmdb-db", "reconcileID": "e3319980-c4f7-4a7e-a681-3d06d2d10176", "replset": "rs0", "error": "handleReplsetInit: exec add admin user: command terminated with exit code 137 / / ", "errorVerbose": "exec add admin user: command terminated with exit code 137 / / \nhandleReplsetInit\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:99\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:487\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:122\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:323\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:274\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:235\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1594"}
github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile
the pods are also failing
NAME READY STATUS RESTARTS AGE
mongodb-psmdb-db-cfg-0 2/2 Running 1 (12m ago) 16m
mongodb-psmdb-db-cfg-1 2/2 Running 1 (9m25s ago) 13m
mongodb-psmdb-db-cfg-2 2/2 Running 1 (7m37s ago) 10m
mongodb-psmdb-db-mongos-0 1/1 Running 0 8m32s
mongodb-psmdb-db-mongos-1 1/1 Running 0 7m4s
mongodb-psmdb-db-rs0-0 1/2 Running 5 (16s ago) 16m
mongodb-psmdb-db-rs0-1 1/2 Running 4 (21s ago) 13m
mongodb-psmdb-db-rs0-2 1/2 Running 3 (86s ago) 10m
percona-mdb-operator-psmdb-operator-54b7f4f77b-gt9jv 1/1 Running 0 16m
Hi @tra_for, I have tried reproducing the issue using your values without any results. Which k8s version do you use? Did you update to v1.14 or is it just a fresh deployment?
k8s 1.26, I’m using cert-manager as well. I’m running a fresh deployment of v1.14. It happened whether I use cert-manager or not. Was there any pods restart? it seems to be something with authentication. I didn’t touch anything else in the config.
2023-05-09T22:00:55.610Z ERROR failed to reconcile cluster {"controller": "psmdb-controller", "object": {"name":"mongodb","namespace":"mongodb"}, "namespace": "mongodb", "name": "mongodb", "reconcileID": "1f4804dd-5747-45de-a413-dde51c1001a8", "replset": "rs0", "error": "dial: ping mongo: connection() error occurred during connection handshake: auth error: sasl conversation error: unable to authenticate using mechanism \"SCRAM-SHA-1\": (AuthenticationFailed) Authentication failed.", "errorVerbose": "connection() error occurred during connection handshake: auth error: sasl conversation error: unable to authenticate using mechanism \"SCRAM-SHA-1\": (AuthenticationFailed) Authentication failed.\nping mongo\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo.Dial\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo/mongo.go:64\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb.MongoClient\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/client.go:47\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).mongoClientWithRole\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/connections.go:21\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:88\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:487\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:122\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:323\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:274\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:235\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1594\ndial\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:94\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:487\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:122\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:323\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:274\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:235\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1594"}
github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile
/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:489
Hi @tra_for, do you have any data in the cluster? Could you try deleting your PVCs and recreate the cluster?
The error seems like operator failed to initialize replset. If error happens after deleting PVCs please attach full operator logs.
@Slava_Sarzhan @Ege_Gunes This is a test environment, and I’ve deleted and retried this deployment many times and got the same result. I’ve also tested to upgrade from “crVersion: v1.13.0”, mongodb “image: 5.0.15-13” to “crVersion: v1.14.0”, mongodb “image: 6.0.4-3”. I’m using SmartUpdate. I got the following error from cfg-1 pod when the operator try to upgrade one config pod. I didn’t change TLS settings. Notice tlsMode set to empty string. I don’t know if that issue is causing the new deployment to fail. But it’s certainly causing the upgrade to fail. Something is up with the tlsMode. On new deployment, if I use “preferTLS” which is the default, it works fine. The moment I set it to “requireTLS” in new deployments, I ran into the issues as mentioned in my older comments. I tested this in bare metal & public managed k8 clusters. Full operator log is attached.
perconaoperator.log (43.0 KB)
/data/db/ps-entry.sh: line 208: mongo: command not found
3
+ exec mongod --bind_ip_all --auth --dbpath=/data/db --port=27017 --replSet=cfg --storageEngine=wiredTiger --relaxPermChecks --clusterAuthMode=x509 --configsvr --enableEncryption --encryptionKeyFile=/etc/mongodb-encryption/encryption-key --wiredTigerCacheSizeGB=0.25 --wiredTigerIndexPrefixCompression=true --config=/etc/mongodb-config/mongod.conf --tlsMode '' --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidCertificates --tlsClusterFile /tmp/tls-internal.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsClusterCAFile /etc/mongodb-ssl-internal/ca.crt
2
{"t":{"$date":"2023-05-26T13:34:23.075Z"},"s":"D1", "c":"ASSERT", "id":23074, "ctx":"-","msg":"User assertion","attr":{"error":"BadValue: unsupported value for tlsMode ","file":"src/mongo/util/net/ssl_options_server.cpp","line":99}}
1
{"t":{"$date":"2023-05-26T13:34:23.075Z"},"s":"F", "c":"CONTROL", "id":20574, "ctx":"-","msg":"Error during global initialization","attr":{"error":{"code":2,"codeName":"BadValue","errmsg":"unsupported value for tlsMode "}}}
The issue seems to be when manually setting the tls mode in the template or crd file. Same issue happend wether it’s “requireTLS” or “preferTLS”, once removed. it all looks good. But under the it’s using “preferTLS” by default.
net:
tls:
mode: requireTLS
hi @tra_for it looks strange. Seems that somehow the template or cr file can’t be parsed correctly. I have tested v1.14.0 with k8s 1.26.2 and it works. Please try to test this branch GitHub - percona/percona-server-mongodb-operator at 1.14.1-test-tls
kubectl apply --server-side -f ./deploy/bundle.yaml
kubectl apply -f ./deploy/cr.yaml
data/db/ps-entry.sh: line 208: mongo: command not found
3
+ exec mongod --bind_ip_all --auth --dbpath=/data/db --port=27017 --replSet=cfg --storageEngine=wiredTiger --relaxPermChecks --clusterAuthMode=x509 --configsvr --enableEncryption --encryptionKeyFile=/etc/mongodb-encryption/encryption-key --wiredTigerCacheSizeGB=0.25 --wiredTigerIndexPrefixCompression=true --config=/etc/mongodb-config/mongod.conf --tlsMode '' --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidCertificates --tlsClusterFile /tmp/tls-internal.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsClusterCAFile /etc/mongodb-ssl-internal/ca.crt
2
{"t":{"$date":"2023-05-26T13:34:23.075Z"},"s":"D1", "c":"ASSERT", "id":23074, "ctx":"-","msg":"User assertion","attr":{"error":"BadValue: unsupported value for tlsMode ","file":"src/mongo/util/net/ssl_options_server.cpp","line":99}}
1
{"t":{"$date":"2023-05-26T13:34:23.075Z"},"s":"F", "c":"CONTROL", "id":20574, "ctx":"-","msg":"Error during global initialization","attr":{"error":{"code":2,"codeName":"BadValue","errmsg":"unsupported value for tlsMode "}}}
If we are talking about this issue. It looks like you start CR 1.13.0 with PSMDB 6 version. You need to have 1.14.0 to use PSMDB 6.
I tested the branch. the operator is reporting same issues which resulted in mongos pod restarting since they are failing healthcheck. I’ve applied them just like you mentioned above. nothing was modified.
2023-05-30T15:59:27.859Z ERROR failed to reconcile cluster {"controller": "psmdb-controller", "object": {"name":"my-cluster-name","namespace":"default"}, "namespace": "default", "name": "my-cluster-name", "reconcileID": "9e5ba1ae-1e0d-460c-947a-cdcf4cb86953", "replset": "rs0", "error": "failed to get mongos connection: ping mongo: server selection error: context deadline exceeded, current topology: { Type: Unknown, Servers: [{ Addr: my-cluster-name-mongos.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp 10.110.34.65:27017: connect: connection refused }, ] }", "errorVerbose": "server selection error: context deadline exceeded, current topology: { Type: Unknown, Servers: [{ Addr: my-cluster-name-mongos.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp 10.110.34.65:27017: connect: connection refused }, ] }\nping mongo\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo.Dial\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo/mongo.go:64\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb.MongosClient\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/client.go:70\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).mongosClientWithRole\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/connections.go:30\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:172\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:487\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:122\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:323\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:274\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:235\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1594\nfailed to get mongos connection\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:174\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:487\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:122\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:323\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:274\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.4/pkg/internal/controller/controller.go:235\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1594"}
also, I’ve made sure that I’m getting the proper settings through Helm, it doesn’t matter whether I use Helm setup or the simple cr file setup—same results.
I’ve updated the Helm chart version, which seems to fix the issue in EKS.