Skip to content

DB server memory usage gradually increases after starting Replication Manager (DB is remote) #1155

@abi780

Description

@abi780

Environment

  • Replication Manager Version: v2.3.53 (Build: 2024-11-04)
  • Replication Manager Host: 192.168.8.215
  • Database Version: MySQL 5.6.49-enterprise-commercial-advanced-log
  • Primary DB: 192.168.8.171
  • Monitoring: Disabled (monitoring-scheduler = false)
  • Failover Mode: Manual
  • Memory Config: prov-db-memory = "256"

Problem

After starting Replication Manager (running on a separate host), memory usage on the DB server (192.168.8.171) gradually increases over time, even when:

  • Monitoring is turned off
  • No backups or scheduled jobs are enabled
  • Queries per second (QPS) is already high (~34,000 QPS)
  • Uptime of DB is stable (>200 days)

The only change is starting Replication Manager — stopping it halts the memory growth.

Notes

  • Config file disables most features: no failover polling, no scheduled backups.
  • The DB servers are defined in cluster1.toml.
  • log-sql-in-monitoring = false is set.

##Config_file

[cygnet@mipl-p-ha-iams-vims replication-manager]$ hostname -i
192.168.8.215
[cygnet@mipl-p-ha-iams-vims replication-manager]$ cat cluster.d/
bkp-cluster1-13-06-2025.toml  cluster1.toml                 cluster1.toml.sample          
[cygnet@mipl-p-ha-iams-vims replication-manager]$ cat cluster.d/cluster1.toml
[cluster1]
title = "cluster1"
prov-orchestrator = "onpremise"
#prov-db-tags = "innodb,noquerycache,slow,pfs,pkg,linux,smallredolog,logtotable"
#prov-db-tags = "innodb,noquerycache,pkg,linux"
#prov-db-databases = "mysql,performance_schema"
#prov-db-ignore-schemas = "CygNetMicroScanPRODTXDB15Jun2022"
#prov-db-databases = "mysql,performance_schema,information_schema"
prov-db-ignore-schemas = "CygNetMicroScanPRODTXDB15Jun2022,MIPLPMSDB25FEB25"
prov-db-memory = "256"
prov-db-memory-shared-pct = "threads:16,innodb:60,myisam:10,aria:10,rocksdb:1,tokudb:1,s3:1,archive:1,querycache:0"
prov-db-disk-size = "1"
prov-db-cpu-cores = "1"
prov-db-disk-iops = "300"

db-servers-hosts = "192.168.8.171:3306,192.168.8.212:3306"
db-servers-prefered-master = "192.168.8.171:3306"
db-servers-credential = "repluser:12qwaszx"
db-servers-connect-timeout = 1
replication-credential = "repluser:12qwaszx"

verbose = false
log-failed-election  = true
log-level = 1
log-rotate-max-age = 7
log-rotate-max-backup = 7
log-rotate-max-size = 5
#log-sql-in-monitoring   = true
log-sql-in-monitoring   = false
log-sst = true

##############
## TOPOLOGY ##
##############

replication-multi-master = false
replication-multi-tier-slave = false

############
# BACKUPS ##
###########


backup-streaming = false
backup-streaming-aws-access-key-id = "admin"
backup-streaming-aws-access-secret = "xxxx"
backup-streaming-endpoint= "https://s3.signal18.io/"
backup-streaming-region= "fr-1"
backup-streaming-bucket= "repman"

#backup-restic = true
backup-restic = false
backup-restic-aws =  false
backup-physical-type = "mariabackup"
backup-logical-type = "mysqldump"
backup-restic-aws-access-secret = "xxxx"
backup-restic-password = "xxxx"
backup-restic-binary-path = "/usr/local/bin/restic"

#monitoring-scheduler = true
monitoring-scheduler = false
scheduler-db-servers-logical-backup  = false
scheduler-db-servers-logical-backup-cron= "0 0 1 * * 6"
scheduler-db-servers-logs   =  false
scheduler-db-servers-logs-cron = "0 0 * * * *"
scheduler-db-servers-logs-table-keep = 4
scheduler-db-servers-logs-table-rotate  = false
scheduler-db-servers-logs-table-rotate-cron = "0 0 0/6 * * *"
scheduler-db-servers-optimize  = false
scheduler-db-servers-optimize-cron = "0 0 3 1 * 5"
scheduler-db-servers-physical-backup = false
scheduler-db-servers-physical-backup-cron = "0 0 0 * * *"

##############
## FAILOVER ##
##############

#failover-mode = "automatic"
failover-mode = "manual"
failover-pre-script = "/root/pre_failover.expect"
failover-post-script = "/root/post_failover.expect"

## Slaves will re enter with read-only

failover-readonly-state = true
failover-event-scheduler = false
failover-event-status = false

## Failover after N failures detection

failover-falsepositive-ping-counter = 5

## Cancel failover if already N failover
## Cancel failover if last failover was N seconds before
## Cancel failover in semi-sync when one slave is not in sync
## Cancel failover if one slave receive master heartbeat
## Cancel failover when replication delay is more than N seconds

failover-limit = 0
failover-time-limit = 0
failover-at-sync = false
failover-max-slave-delay = 30
failover-restart-unsafe = false

# failover-falsepositive-heartbeat = true
# failover-falsepositive-heartbeat-timeout = 3
# failover-falsepositive-maxscale = false
# failover-falsepositive-maxscale-timeout = 14
# failover-falsepositive-external = false
# failover-falsepositive-external-port = 80

################
## SWITCHOVER ##
################

## In switchover Wait N milliseconds before killing long running transactions
## Cancel switchover if transaction running more than N seconds
## Cancel switchover if write query running more than N seconds
## Cancel switchover if one of the slaves is not synced based on GTID equality

switchover-wait-kill = 5000
switchover-wait-trx = 10
switchover-wait-write-query = 10
switchover-at-equal-gtid = false
switchover-at-sync = false
switchover-max-slave-delay = 30

############
## REJOIN ##
############

autorejoin = true
autorejoin-script = ""
autorejoin-semisync = true
autorejoin-backup-binlog = true
autorejoin-flashback = false
autorejoin-mysqldump = false

####################
## CHECKS & FORCE ##
####################

#check-replication-filters = true
#check-binlog-filters = true
#check-replication-state = true

check-replication-filters = false
check-binlog-filters = false
check-replication-state = false


force-slave-heartbeat= false
force-slave-heartbeat-retry = 5
force-slave-heartbeat-time = 3
force-slave-gtid-mode = false
force-slave-semisync = false
force-slave-failover-readonly-state = false
force-binlog-row = false
force-binlog-annotate = false
force-binlog-slowqueries = false
force-binlog-compress = false
force-binlog-checksum = false
force-inmemory-binlog-cache-size = false
force-disk-relaylog-size-limit = false
force-disk-relaylog-size-limit-size = 1000000000
force-sync-binlog = false
force-sync-innodb = false

##############
## MAXSCALE ##
##############

## for 2 nodes cluster maxscale can be driven by replication manager

maxscale = false
maxscale-binlog = false
maxscale-servers = "192.168.0.201"
maxscale-port = 4003
maxscale-user = "admin"
maxscale-pass = "mariadb"

## When true replication manager drive maxscale server state
## Not required unless multiple maxscale or release does not support detect_stale_slave

maxscale-disable-monitor = false

## maxinfo|maxadmin

maxscale-get-info-method = "maxadmin"
maxscale-maxinfo-port = 4002

maxscale-write-port = 4007
maxscale-read-port = 4008
maxscale-read-write-port = 4006
maxscale-binlog-port = 4000

#############
## HAPROXY ##
#############

## Wrapper mode unless maxscale or proxysql required to be located with replication-manager

haproxy = false
haproxy-binary-path = "/usr/sbin/haproxy"

## Read write traffic
## Read only load balance least connection traffic
haproxy-write-port = 3306
haproxy-read-port = 3307

####################
## SHARDING PROXY ##
####################

mdbshardproxy = false
mdbshardproxy-hosts = "127.0.0.1:3306"
mdbshardproxy-user = "root:mariadb"

Request

  1. Can you confirm if Replication Manager performs any background replication checks even with monitoring and backups disabled?
  2. Is this memory behavior expected in v2.3.53?
  3. Any flags or config options to reduce background DB activity further?

Thank you.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions