https://hub.docker.com/r/bitnami/spark
https://github.com/bitnami/bitnami-docker-spark/
https://hub.docker.com/r/andreper/jupyterlab
https://hub.docker.com/r/andreper/spark-master
https://hub.docker.com/r/andreper/spark-worker
https://github.com/cluster-apps-on-docker/spark-standalone-cluster-on-docker
https://hub.docker.com/r/bde2020/spark-master
https://hub.docker.com/r/bde2020/spark-worker
https://hub.docker.com/r/bde2020/spark-submit
https://github.com/big-data-europe/docker-spark
https://github.com/apache/spark
Spark负责数据的计算
一.docker-compose.yml
version: '2'
services:
spark:
image: docker.io/bitnami/spark:3
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
ports:
- '8080:8080'
spark-worker-1:
image: docker.io/bitnami/spark:3
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
spark-worker-2:
image: docker.io/bitnami/spark:3
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
-SPARK_RPC_AUTHENTICATION_ENABLED: 启用RPC身份验证
-SPARK_RPC_ENCRYPTION_ENABLED: 启用RPC加密
-SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED: 启用本地存储加密
二.docker-compose.yml
version: "3.6"
volumes:
shared-workspace:
name: "hadoop-distributed-file-system"
driver: local
services:
jupyterlab:
image: andreper/jupyterlab:3.0.0-spark-3.0.0
container_name: jupyterlab
ports:
- 8888:8888
- 4040:4040
volumes:
- shared-workspace:/opt/workspace
spark-master:
image: andreper/spark-master:3.0.0
container_name: spark-master
ports:
- 8080:8080
- 7077:7077
volumes:
- shared-workspace:/opt/workspace
spark-worker-1:
image: andreper/spark-worker:3.0.0
container_name: spark-worker-1
environment:
- SPARK_WORKER_CORES=1
- SPARK_WORKER_MEMORY=512m
ports:
- 8081:8081
volumes:
- shared-workspace:/opt/workspace
depends_on:
- spark-master
spark-worker-2:
image: andreper/spark-worker:3.0.0
container_name: spark-worker-2
environment:
- SPARK_WORKER_CORES=1
- SPARK_WORKER_MEMORY=512m
ports:
- 8082:8081
volumes:
- shared-workspace:/opt/workspace
depends_on:
- spark-master
Cluster overview
| Application | URL | Description |
|---|
| JupyterLab | localhost:8888 | Cluster interface with built-in Jupyter notebooks |
| Spark Driver | localhost:4040 | Spark Driver web ui |
| Spark Master | localhost:8080 | Spark Master node |
| Spark Worker I | localhost:8081 | Spark Worker node with 1 core and 512m of memory (default) |
| Spark Worker II | localhost:8082 | Spark Worker node with 1 core and 512m of memory (default) |
三.docker-compose.yml
version: '3'
services:
spark-master:
image: bde2020/spark-master:3.1.1-hadoop3.2
container_name: spark-master
ports:
- "8080:8080"
- "7077:7077"
environment:
- INIT_DAEMON_STEP=setup_spark
spark-worker-1:
image: bde2020/spark-worker:3.1.1-hadoop3.2
container_name: spark-worker-1
depends_on:
- spark-master
ports:
- "8081:8081"
environment:
- "SPARK_MASTER=spark://spark-master:7077"