应用: clickhouse -> 添加服务端服务: server1
镜像: yandex/clickhouse-server:latest
卷:
/etc/localtime:/etc/localtime:ro
/data/file:/data/file
/data/docker/monitor/clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro
/data/docker/monitor/clickhouse/metrika.xml:/etc/clickhouse-server/metrika.xml:ro
/data/docker/monitor/clickhouse/config.d/macros1.xml:/etc/clickhouse-server/config.d/macros.xml:ro
端口:(http)8123-8123
应用: clickhouse -> 添加服务端服务: server2
镜像: yandex/clickhouse-server:latest
卷:
/etc/localtime:/etc/localtime:ro
/data/file:/data/file
/data/docker/monitor/clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro
/data/docker/monitor/clickhouse/metrika.xml:/etc/clickhouse-server/metrika.xml:ro
/data/docker/monitor/clickhouse/config.d/macros2.xml:/etc/clickhouse-server/config.d/macros.xml:ro
端口:(http)8123-8123
应用: clickhouse -> 添加服务端服务: server3
镜像: yandex/clickhouse-server:latest
卷:
/etc/localtime:/etc/localtime:ro
/data/file:/data/file
/data/docker/monitor/clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro
/data/docker/monitor/clickhouse/metrika.xml:/etc/clickhouse-server/metrika.xml:ro
/data/docker/monitor/clickhouse/config.d/macros3.xml:/etc/clickhouse-server/config.d/macros.xml:ro
端口:(http)8123-8123
应用: monitor -> 添加交互式客户端服务: clickhouse-client
镜像: yandex/clickhouse-client:latest
命令:
--host server1 # --host server1 -u user1 --password 123456
卷:
/etc/localtime:/etc/localtime:ro
/data/file:/data/file
# 验证
clickhouse-client --host=server1 --port=9000
select * from system.clusters;
┌─cluster─────shard_num─┬─shard_weight─┬─replica_num─┬─host_name─────
│ cluster_3s_1r │ 1
# 复制表
现在我们有了一个集群和副本设置。对于clickhouse,我们需要在每个服务器中创建ReplicatedMergeTree表作为本地表
CREATE TABLE ttt (id Int32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/ttt', '{replica}') PARTITION BY id ORDER BY id;
CREATE TABLE ttt_all as ttt ENGINE = Distributed(cluster_3s_1r, default, ttt, rand());
# 产生一些数据和测试
# client
for ((idx=1;idx<=100;++idx));
do clickhouse-client --host server1 --query "Insert into default.ttt_all values ($idx)";
done;
# use default
select count(*) from ttt_all; # For Distributed table.
select count(*) from ttt; # For loacl table.
# mysql 同步到 clickhouse # 镜像: mariadb:latest
apt-get update -y
apt-get install -y wget vim net-tools curl git cron axel zip unzip
apt-get install -y cmake golang python3
apt-get install -y build-essential autoconf automake libtool
apt-get install -y libmysqlclient-dev python3-dev
apt-get dist-upgrade -y
apt-get upgrade -y
apt autoremove
apt clean
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
python3 -V
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python3 get-pip.py
pip3 install mysqlclient
pip3 install mysql-replication
pip3 install clickhouse-driver
cd /opt
git clone https://github.com/Altinity/clickhouse-mysql-data-reader.git
cd clickhouse-mysql-data-reader/
pip3 install -e /opt/clickhouse-mysql-data-reader/
mysql -A -uroot -pwdqdmm@r; # 登陆
CREATE USER 'clickhouse'@'%' IDENTIFIED BY 'wdqdmm@m';
GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE, SUPER ON *.* TO 'clickhouse'@'%';
# 创建数据与表,测试读写分离情况
create database test_db;
use test_db;
create table test_tables(name varchar(20),age int(4));
insert into test_tables values('test','1');
select * from test_tables;
# 开启bin-log
[mysqld]
server-id=1
bind-address = 0.0.0.0
#启用二进制日志, row复制模式,具有最高性能.
binlog_format=row
log_bin = /var/log/mysql/mysql-bin
log_bin_index = /var/log/mysql/mysql-bin.index
binlog_expire_logs_seconds = 7
max_binlog_size = 200M
clickhouse-mysql --src-server-id=1 --src-resume --src-wait --nice-pause=1 --src-host=mysql --src-user=clickhouse --src-password=wdqdmm@m --src-tables=test_db.test_tables --log-level=info --migrate-table --dst-host=server1 --dst-port=8123 --dst-schema=test_db --dst-table=test_tables --dst-create-table
# client 验证
clickhouse-client --host=server1 --port=9000
==========================
# vi /data/docker/monitor/clickhouse/config.xml
a. 去掉<listen_host>::</listen_host>注释
b. 在</remote_servers>这个xml标签后添加如下配置:
</remote_servers>
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
Values for substitutions are specified in /yandex/name_of_substitution elements in that file.
-->
<include_from>/etc/clickhouse-server/metrika.xml</include_from>
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
# vi /data/docker/monitor/clickhouse/users.xml
<max_memory_usage>500000000000</max_memory_usage>
# vi /data/docker/monitor/clickhouse/metrika.xml
<yandex>
<!-- 集群配置 -->
<clickhouse_remote_servers>
<cluster_3s_1r>
<!-- 数据分片1 -->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>server1</host>
<port>9000</port>
<user>default</user>
<password></password>
</replica>
</shard>
<!-- 数据分片2 -->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>server2</host>
<port>9000</port>
<user>default</user>
<password></password>
</replica>
</shard>
<!-- 数据分片3 -->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>server3</host>
<port>9000</port>
<user>default</user>
<password></password>
</replica>
</shard>
</cluster_3s_1r>
</clickhouse_remote_servers>
<!-- ZK -->
<zookeeper-servers>
<node index="1">
<host>zoo1</host>
<port>2181</port>
</node>
<node index="2">
<host>zoo2</host>
<port>2181</port>
</node>
<node index="3">
<host>zoo3</host>
<port>2181</port>
</node>
</zookeeper-servers>
<networks>
<ip>::/0</ip>
</networks>
<!-- 数据压缩算法 -->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
vi /data/docker/monitor/clickhouse/config.d/macros1.xml
<yandex>
<macros>
<replica>server1</replica>
<shard>01</shard>
<layer>01</layer>
</macros>
</yandex>
....
# 启动脚本
docker run -d \
--name cs \
--ulimit nofile=262144:262144 \
--volume=/opt/clickhouse/:/var/lib/clickhouse \
--volume=/etc/clickhouse-server/:/etc/clickhouse-server/ \
--add-host server01:$ip1 \
--add-host server02:$ip2 \
--add-host server03:$ip3 \
--hostname $current_hostname \
-p 9000:9000 \
-p 8123:8123 \
-p 9009:9009 \
yandex/clickhouse-server
注意1: 这里的 $ip1、$ip2、$ip3 记得替换为实际值
注意2: $current_hostname 为当前服务器的hostname
--add-host参数:因为我们在配置文件中使用了hostname来指代我们的服务器,为了让容器能够识别,所以需要加此参数
--hostname参数:clickhouse中的system.clusters表会显示集群信息,其中is_local的属性如果不配置hostname的话clickhouse无法识别是否是当前本机。is_local都为0的话会影响集群操作,比如create table on cluster cluster_2s_1r .....
--p参数:暴露容器中的端口到本机端口中。
# 验证集群搭建
分别查看三台服务器的system.clusters,应该显示集群中三台服务器的信息,且is_local为正确值
启动 clickhouse-client
# 在任意服务器
docker run -it \
--rm \
--add-host server01:$ip1 \
--add-host server02:$ip2 \
--add-host server03:$ip3 \
yandex/clickhouse-client \
--host server01 \
--port 9000
--host参数:相当于我们通过yum安装clickhouse时,执行命令clickhouse-client --host server01后面接的参数host,指定用于连接的clickhouse-server的host
--port参数:相当于我们通过yum安装clickhouse时,执行命令clickhouse-client --port 9000后面接的参数port,指定用于连接的clickhouse-server的port