1.この記事で達成したいこと

2.前提

  • Maria DBコンテナ4台構成とする
$ make ps
docker-compose ps
Name              Command               State                                                 Ports                                               
--------------------------------------------------------------------------------------------------------------------------------------------------
db00   docker-entrypoint.sh --wsr ...   Up      0.0.0.0:3306->3306/tcp, 0.0.0.0:4444->4444/tcp, 0.0.0.0:4567->4567/tcp, 0.0.0.0:4568->4568/tcp    
db01   docker-entrypoint.sh mysqld      Up      0.0.0.0:13306->3306/tcp, 0.0.0.0:14444->4444/tcp, 0.0.0.0:14567->4567/tcp, 0.0.0.0:14568->4568/tcp
db02   docker-entrypoint.sh mysqld      Up      0.0.0.0:23306->3306/tcp, 0.0.0.0:24444->4444/tcp, 0.0.0.0:24567->4567/tcp, 0.0.0.0:24568->4568/tcp
db03   docker-entrypoint.sh mysqld      Up      0.0.0.0:33306->3306/tcp, 0.0.0.0:34444->4444/tcp, 0.0.0.0:34567->4567/tcp, 0.0.0.0:34568->4568/tcp
$ 

3.環境情報

$ grep VERSION /etc/os-release 
VERSION="20.04.3 LTS (Focal Fossa)"
VERSION_ID="20.04"
VERSION_CODENAME=focal 
$ docker --version
Docker version 19.03.3, build a872fc2f86
$ docker-compose --version
docker-compose version 1.24.1, build 4667896b
$ grep image docker-compose.yml 
    image: mariadb:10.6.4
    image: mariadb:10.6.4
    image: mariadb:10.6.4
    image: mariadb:10.6.4
$

Docker Composeで扱うファイル、ディレクトリ構成

$ tree -L 3
.
├── db00
│   ├── conf.d
│   │   └── galera.cnf
│   ├── docker-entrypoint-initdb.d
│   │   └── seed.sql
│   └── scripts
│       └── my-wsrep-notify.sh
├── db01
│   ├── conf.d
│   │   └── galera.cnf
│   └── scripts
│       └── my-wsrep-notify.sh
├── db02
│   ├── conf.d
│   │   └── galera.cnf
│   ├── docker-entrypoint-initdb.d
│   └── scripts
│       └── my-wsrep-notify.sh
├── db03
│   ├── conf.d
│   │   └── galera.cnf
│   ├── docker-entrypoint-initdb.d
│   └── scripts
│       └── my-wsrep-notify.sh
├── docker-compose.yml
├── LICENCE
├── Makefile

4.設定

4-1.docker-compose.ymlを作成

$ vi docker-compose.yml
version: "3.7"

services:
  db00:
    image: mariadb:10.6.4
    container_name: db00
    hostname: db00
    ports:
      # https://mariadb.com/kb/en/configuring-mariadb-galera-cluster/#network-ports
      # Standard MariaDB Port (default: 3306) 
      - 3306:3306
      # Galera Replication Port (default: 4567) 
      - 4567:4567
      # IST Port (default: 4568)
      - 4568:4568
      # SST Port (default: 4444)
      - 4444:4444
    volumes:
      - type: volume
        source: ./db00_data
        target: /var/lib/mysql
      - type: bind
        source: ./db00/conf.d/galera.cnf
        target: /etc/mysql/conf.d/galera.cnf
      - type: bind
        source: ./db00/docker-entrypoint-initdb.d/seed.sql
        target: /docker-entrypoint-initdb.d/seed.sql
    environment:
      # ALLOW_EMPTY_PASSWORD is recommended only for development.
      - MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=yes
      #- MARIADB_ROOT_HOST=%
      #- MARIADB_ROOT_PASSWORD=yourpassword
      - MARIADB_DATABASE=demo
      - MARIADB_INITDB_SKIP_TZINFO=true
      - TZ=Asia/Tokyo
    restart: on-failure
    networks:
      node_network:
    command: ["--wsrep-new-cluster"]

  db01:
    image: mariadb:10.6.4
    container_name: db01
    hostname: db01
    ports:
      - 13306:3306
      - 14567:4567
      - 14568:4568
      - 14444:4444
    volumes:
      - type: volume
        source: ./db01_data
        target: /var/lib/mysql
      - type: bind
        source: ./db01/conf.d/galera.cnf
        target: /etc/mysql/conf.d/galera.cnf
    environment:
      # ALLOW_EMPTY_PASSWORD is recommended only for development.
      - MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=yes
      #- MARIADB_ROOT_HOST=%
      #- MARIADB_ROOT_PASSWORD=yourpassword
      - MARIADB_DATABASE=demo
      - MARIADB_INITDB_SKIP_TZINFO=true
      - TZ=Asia/Tokyo
    restart: on-failure
    networks:
      node_network:

  db02:
    image: mariadb:10.6.4
    container_name: db02
    hostname: db02
    ports:
      - 23306:3306
      - 24567:4567
      - 24568:4568
      - 24444:4444
    volumes:
      - type: volume
        source: ./db02_data
        target: /var/lib/mysql
      - type: bind
        source: ./db02/conf.d/galera.cnf
        target: /etc/mysql/conf.d/galera.cnf
    environment:
      # ALLOW_EMPTY_PASSWORD is recommended only for development.
      - MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=yes
      #- MARIADB_ROOT_HOST=%
      #- MARIADB_ROOT_PASSWORD=yourpassword
      - MARIADB_DATABASE=demo
      - MARIADB_INITDB_SKIP_TZINFO=true
      - TZ=Asia/Tokyo
    restart: on-failure
    networks:
      node_network:

  db03:
    image: mariadb:10.6.4
    container_name: db03
    hostname: db03
    ports:
      - 33306:3306
      - 34567:4567
      - 34568:4568
      - 34444:4444
    volumes:
      - type: volume
        source: ./db03_data
        target: /var/lib/mysql
      - type: bind
        source: ./db03/conf.d/galera.cnf
        target: /etc/mysql/conf.d/galera.cnf
    environment:
      # ALLOW_EMPTY_PASSWORD is recommended only for development.
      - MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=yes
      #- MARIADB_ROOT_HOST=%
      #- MARIADB_ROOT_PASSWORD=yourpassword
      - MARIADB_DATABASE=demo
      - MARIADB_INITDB_SKIP_TZINFO=true
      - TZ=Asia/Tokyo
    restart: on-failure
    networks:
      db_network:


volumes:
  db00_data:
  db01_data:
  db02_data:
  db03_data:

networks:
  db_network:
    driver: bridge

4-2.cnfファイルを作成

  • コンテナひとつにつきひとつ用意するので4つ作成
    • 名前は任意だが、コンテナ側のパスは/etc/mysql/conf.d/配下に置くこと
    • ※ここでは/etc/mysql/conf.d/galera.cnfとして配置
    • ※なおホスト側では、DBコンテナ4台分以下のパスにcnfファイルを置いている
$ find ./ -type f  -regex ".*galera.cnf"
./db01/conf.d/galera.cnf
./db02/conf.d/galera.cnf
./db03/conf.d/galera.cnf
./db00/conf.d/galera.cnf
  • galera.cnfを作成
    • 例としてdb00に配置するgalera.cnf
    • ホスト側のパスはdb00/conf.d/galera.cnf
    • 残り3台、db01, db02, db03のgalera.cnfについては後述
$ vi db00/conf.d/galera.cnf

[mysqld]
# バイナリログの出力場所を指定
# /var/log/mysql/mariadb-bin.[0-9]+という形で、数字のついたファイルが作成
# サンプルは少しスクロールしたところに記載
# https://mariadb.com/kb/en/replication-and-binary-log-system-variables/#log_bin
log_bin	='/var/log/mysql/mariadb-bin'

# Galera Clusterを使う場合、'row'とすること
# https://mariadb.com/kb/en/replication-and-binary-log-system-variables/#binlog_format
# c.f. https://mariadb.com/kb/en/mariadb-galera-cluster-known-limitations/
binlog_format='row'

# バイナリログを自動的に削除するまでの日数
# https://mariadb.com/kb/en/replication-and-binary-log-system-variables/#expire_logs_days
expire_logs_days=1

# Galera Clusterを使う場合、'InnoDB'とすること
# https://galeracluster.com/library/training/tutorials/configuration.html
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
default_storage_engine='InnoDB'

# Galera Clusterを使う場合、'READ-COMMITTED'とすること
# https://galeracluster.com/library/documentation/isolation-levels.html
# c.f. https://mariadb.com/kb/en/set-transaction/
# c.f. https://gihyo.jp/dev/serial/01/mysql-road-construction-news/0050?page=2
transaction-isolation='READ-COMMITTED'

# 「5-1.innodb-flush-log-at-trx-commitは0〜3のうちどれを指定すればよいか?」
# https://mariadb.com/docs/reference/mdb/cli/mariadbd/innodb-flush-log-at-trx-commit/#mdb-cli-mysqld-innodb-flush-log-at-trx-commit
innodb-flush-log-at-trx-commit=1

# https://galeracluster.com/library/training/tutorials/configuration.html#system-configuration
# https://qiita.com/chaspy/items/baad6947ae0f8b169868
innodb_autoinc_lock_mode=2

# データとソケットのパスを指定
# https://www.digitalocean.com/community/tutorials/how-to-change-a-mariadb-data-directory-to-a-new-location-on-centos-7
datadir='/var/lib/mysql'
socket='/run/mysqld/mysqld.sock'

# このあたりの書き方の参考資料
# 特にwsrep_cluster_addressで全台ホストやIPを指定するということは重要そう
# https://dba.stackexchange.com/questions/130922/error-wsrep-gcs-src-gcs-cppgcs-open1379-failed-to-open-channel-test-clu/131487
[galera]
wsrep_on='ON'
wsrep_provider='/usr/lib/galera/libgalera_smm.so'
wsrep_cluster_address='gcomm://db00,db01,db02,db03'
wsrep_cluster_name='my_galera'
wsrep_sst_method='rsync'
wsrep_slave_threads=2
wsrep_node_address='db00'

# このあたりの書き方の参考資料
# wsrep_drupal_282555_workaroundは今ではいらないかも
# https://qiita.com/chaspy/items/baad6947ae0f8b169868
wsrep_auto_increment_control='ON'
wsrep_drupal_282555_workaround='ON'
wsrep_retry_autocommit=10

# Can't connect to MySQL server on '127.0.0.1'
# https://takapi86.hatenablog.com/entry/2018/12/16/140314
# https://kamatimaru.hatenablog.com/entry/2020/05/30/023739
[client]
protocol='TCP'
  • log_bin =’/var/log/mysql/mariadb-bin’のサンプル
$ docker-compose exec db00 ls -la /var/log/mysql/
total 60
drwxr-s--- 1 mysql adm   4096 Sep 15 00:35 .
drwxr-xr-x 1 root  root  4096 Aug 31 12:44 ..
-rw-rw---- 1 mysql adm  27616 Sep 15 00:35 mariadb-bin.000001
-rw-rw---- 1 mysql adm   1453 Sep 15 00:35 mariadb-bin.000002
-rw-rw---- 1 mysql adm    393 Sep 15 00:35 mariadb-bin.000003
-rw-rw---- 1 mysql adm    438 Sep 15 00:35 mariadb-bin.000004
-rw-rw---- 1 mysql adm    389 Sep 15 00:35 mariadb-bin.000005
-rw-rw---- 1 mysql adm    170 Sep 15 00:35 mariadb-bin.index
$
  • db01, db02, db03はdb00/conf.d/galera.cnfをコピーし、以下2点だけ書き換えればok
    • wsrep_node_address=‘db00’
  • ※wsrep_node_addressとはコンテナのサービス名のこと
$ find ./ -type f  -regex ".*galera.cnf" | xargs grep "wsrep_node_address="
./db01/conf.d/galera.cnf:wsrep_node_address='db01'
./db02/conf.d/galera.cnf:wsrep_node_address='db02'
./db03/conf.d/galera.cnf:wsrep_node_address='db03'
./db00/conf.d/galera.cnf:wsrep_node_address='db00'
$
  • ※似たようなものとして、wsrep_cluster_addressがあるが、これはクラスターを構成するDBコンテナ全台を指す
    • ここではdb00, db01, db02, db03の4台
$ find ./ -type f  -regex ".*galera.cnf" | xargs grep "wsrep_cluster_address"
./db01/conf.d/galera.cnf:wsrep_cluster_address='gcomm://db00,db01,db02,db03'
./db02/conf.d/galera.cnf:wsrep_cluster_address='gcomm://db00,db01,db02,db03'
./db03/conf.d/galera.cnf:wsrep_cluster_address='gcomm://db00,db01,db02,db03'
./db00/conf.d/galera.cnf:wsrep_cluster_address='gcomm://db00,db01,db02,db03'
$

5.調べていること

5-1.innodb-flush-log-at-trx-commitは0〜3のうちどれを指定すればよいか?

Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3.

参考:–innodb-flush-log-at-trx-commit — MariaDB Enterprise Documentation

6.Mariadb Galera ClusterをDocker Composeで構築したときのログ

  • Makefileで構築するので使い方確認
$ make help
init                 はじめて立ち上げるときに使う(1台立ち上げて、--wsrep-new-clusterコマンドを実行してから残りのDBを立ち上げる)
up                   docker-compose up -d(立ち上げている場合のみ使う)
ps                   docker-compose ps
logs                 docker-compose logs
down                 docker-compose down -v
rmall                docker rm -f ${Galera_Cluster_CONTAINERS}
show                 mysql -u root -e "show status like 'wsrep_cluster_%'"
error                docker-compose logs | grep [ERROR|WARN|fail] | tail
bench                docker-compose exec db00 bench
$
  • 立ち上げ
    • ポイントはdocker-compose up –no-start
    • コンテナを構築するだけ。立ち上げはしない(Exit 0)
    • コマンドを実行するコンテナから起動(ここではdb00)するべく、docker-compose start db00
    • 続いて残りのコンテナをdocker-compose start db01 db02 db03で起動
$ make init
docker-compose up --no-start
Creating network "mariadb-galera-cluster_db_network" with driver "bridge"
Creating volume "mariadb-galera-cluster_db00_data" with default driver
Creating volume "mariadb-galera-cluster_db01_data" with default driver
Creating volume "mariadb-galera-cluster_db02_data" with default driver
Creating volume "mariadb-galera-cluster_db03_data" with default driver
Creating db01 ... done
Creating db00 ... done
Creating db03 ... done
Creating db02 ... done
sleep 2
docker-compose start db00
Starting db00 ... done
docker-compose start db01 db02 db03
Starting db01 ... done
Starting db02 ... done
Starting db03 ... done
sleep 3
$
  • 適当なSQLコマンドでマルチマスターレプリケーションがとれているか確認してみる
    • 確認箇所は以下のとおり
    • wsrep_cluster_size は立ち上げたDBコンテナの台数であるか
    • wsrep_cluster_state_uuidwsrep_local_state_uuid は同じであるか
    • wsrep_cluster_status はPrimary
    • wsrep_local_state_comment はSyncedであるか
$ make show
docker-compose exec db00 mysql -u root -e "show status like 'wsrep_cluster_%'"
+----------------------------+--------------------------------------+
| Variable_name              | Value                                |
+----------------------------+--------------------------------------+
| wsrep_cluster_weight       | 4                                    |
| wsrep_cluster_capabilities |                                      |
| wsrep_cluster_conf_id      | 2                                    |
| wsrep_cluster_size         | 4                                    |
| wsrep_cluster_state_uuid   | bc072429-149a-11ec-8718-9b1fede91861 |
| wsrep_cluster_status       | Primary                              |
+----------------------------+--------------------------------------+
docker-compose exec db00 mysql -u root -e "show status like 'wsrep_local_state_%'"
+---------------------------+--------------------------------------+
| Variable_name             | Value                                |
+---------------------------+--------------------------------------+
| wsrep_local_state_uuid    | bc072429-149a-11ec-8718-9b1fede91861 |
| wsrep_local_state_comment | Synced                               |
+---------------------------+--------------------------------------+
docker-compose exec db01 mysql -u root -e "show status like 'wsrep_local_state_%'"
+---------------------------+--------------------------------------+
| Variable_name             | Value                                |
+---------------------------+--------------------------------------+
| wsrep_local_state_uuid    | bc072429-149a-11ec-8718-9b1fede91861 |
| wsrep_local_state_comment | Synced                               |
+---------------------------+--------------------------------------+
docker-compose exec db02 mysql -u root -e "show status like 'wsrep_local_state_%'"
+---------------------------+--------------------------------------+
| Variable_name             | Value                                |
+---------------------------+--------------------------------------+
| wsrep_local_state_uuid    | bc072429-149a-11ec-8718-9b1fede91861 |
| wsrep_local_state_comment | Synced                               |
+---------------------------+--------------------------------------+
docker-compose exec db03 mysql -u root -e "show status like 'wsrep_local_state_%'"
+---------------------------+--------------------------------------+
| Variable_name             | Value                                |
+---------------------------+--------------------------------------+
| wsrep_local_state_uuid    | bc072429-149a-11ec-8718-9b1fede91861 |
| wsrep_local_state_comment | Synced                               |
+---------------------------+--------------------------------------+
$
  • db00のseed.sqlで投入したデータがdb03でみれるか
$ cat db00/docker-entrypoint-initdb.d/seed.sql 
DROP SCHEMA IF EXISTS demo;
CREATE SCHEMA demo;
USE demo;

DROP TABLE IF EXISTS users;

CREATE TABLE users
(
  id           INT(10) NOT NULL AUTO_INCREMENT,
  name     VARCHAR(40) NOT NULL,
  PRIMARY KEY (id)
);

INSERT INTO users (name) VALUES
    ("alice"),
    ("bob");
$
$ docker-compose exec db03 bash
root@db03:/# mysql
Welcome to the MariaDB monitor.  Commands end with ; or \g.
Your MariaDB connection id is 12
Server version: 10.6.4-MariaDB-1:10.6.4+maria~focal-log mariadb.org binary distribution

Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MariaDB [(none)]> prompt db03 > ;
PROMPT set to 'db03 > '
db03 > use demo;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
db03 > select * from users;
+----+-------+
| id | name  |
+----+-------+
|  1 | alice |
|  2 | bob   |
+----+-------+
2 rows in set (0.002 sec)

db03 > 
  • db03で投入したデータがdb00でみれるか
db03 > INSERT INTO users (name) VALUES ("charlie");
Query OK, 1 row affected (0.012 sec)

db03 > select * from users;
+----+---------+
| id | name    |
+----+---------+
|  1 | alice   |
|  2 | bob     |
|  3 | charlie |
+----+---------+
3 rows in set (0.001 sec)

db03 > exit
Bye
root@db03:/# exit
exit
$ docker-compose exec db00 bash
root@db00:/# mysql          
Welcome to the MariaDB monitor.  Commands end with ; or \g.
Your MariaDB connection id is 24
Server version: 10.6.4-MariaDB-1:10.6.4+maria~focal-log mariadb.org binary distribution

Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MariaDB [(none)]> prompt db00 > ;
PROMPT set to 'db00 > '
db00 > use demo;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
db00 > select * from users;
+----+---------+
| id | name    |
+----+---------+
|  1 | alice   |
|  2 | bob     |
|  3 | charlie |
+----+---------+
3 rows in set (0.001 sec)

db00 > 

7.既に立ち上がっているコンテナを改めて立ち上げたい

  • docker-compose.ymlの command: ["--wsrep-new-cluster"] をコメントアウト(–wsrep-new-clusterコマンドははじめて立ち上げるときのみ使う)
  • docker-compose up -dで立ち上げる
$ grep command docker-compose.yml 
    #command: ["--wsrep-new-cluster"]
$ make up
docker-compose up -d
db01 is up-to-date
db02 is up-to-date
db00 is up-to-date
db03 is up-to-date
$

8.課題

Mariadb Galera ClusterをDocker Composeで構築することはできたが、残課題が。。

$ vi ./path/to/my-wsrep-notify.sh
# https://galeracluster.com/library/documentation/notification-cmd-example.html
USER=root
PSWD=****
HOST=127.0.0.1
PORT=3306

SCHEMA="wsrep"
MEMB_TABLE="$SCHEMA.membership"
STATUS_TABLE="$SCHEMA.status"

BEGIN="
   SET wsrep_on=0;
   DROP SCHEMA IF EXISTS $SCHEMA; CREATE SCHEMA $SCHEMA;
   CREATE TABLE $MEMB_TABLE (
      idx  INT UNIQUE PRIMARY KEY,
      uuid CHAR(40) UNIQUE, /* node UUID */
      name VARCHAR(32),     /* node name */
      addr VARCHAR(256)     /* node address */
   ) ENGINE=MEMORY;
   CREATE TABLE $STATUS_TABLE (
      size   INT,      /* component size   */
      idx    INT,      /* this node index  */
      status CHAR(16), /* this node status */
      uuid   CHAR(40), /* cluster UUID */
      prim   BOOLEAN   /* if component is primary */
   ) ENGINE=MEMORY;
   BEGIN;
   DELETE FROM $MEMB_TABLE;
   DELETE FROM $STATUS_TABLE;
"
END="COMMIT;"

configuration_change()
{
   echo "$BEGIN;"

   local idx=0

   for NODE in $(echo $MEMBERS | sed s/,/\ /g)
   do
      echo "INSERT INTO $MEMB_TABLE VALUES ( $idx, "
      # Don't forget to properly quote string values
      echo "'$NODE'" | sed  s/\\//\',\'/g
      echo ");"
      idx=$(( $idx + 1 ))
   done

   echo "
      INSERT INTO $STATUS_TABLE
      VALUES($idx, $INDEX,'$STATUS', '$CLUSTER_UUID', $PRIMARY);
   "

   echo "$END"
}

status_update()
{
   echo "
      SET wsrep_on=0;
      BEGIN;
      UPDATE $STATUS_TABLE SET status='$STATUS';
      COMMIT;
   "
}

COM=status_update # not a configuration change by default

while [ $# -gt 0 ]
do
   case $1 in
      --status)
         STATUS=$2
         shift
         ;;
      --uuid)
         CLUSTER_UUID=$2
         shift
         ;;
      --primary)
         [ "$2" = "yes" ] && PRIMARY="1" || PRIMARY="0"
         COM=configuration_change
         shift
         ;;
      --index)
         INDEX=$2
         shift
         ;;
      --members)
         MEMBERS=$2
         shift
         ;;
         esac
         shift
   done

# Undefined means node is shutting down
if [ "$STATUS" != "Undefined" ]
then
   $COM | mysql -B -u$USER -p$PSWD -h$HOST -P$PORT
fi

exit 0

9.参考