https://hub.docker.com/r/psroyano/uhadoop
https://github.com/psroyano/hadoop
Ubuntu 20.04, OpenJdk8, Python3, Hadoop 3.3.1
Dockerfile:
FROM ubuntu:20.04
LABEL maintainer="Pedro Santos" \
version="2.0"
ENV TZ=Europe/Madrid
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
#Instalacion de Python y java
RUN apt-get -q update && \
apt-get -q install -y python3 python3-pip openjdk-8-jdk libbcprov-java wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
ENV PATH $PATH:$JAVA_HOME/bin
#Instalacion Hadoop 3.3.1
RUN mkdir /app && \
wget https://ftp.cixug.es/apache/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz && \
tar -xvzf /hadoop-3.3.1.tar.gz -C /app && \
rm /hadoop-3.3.1.tar.gz && \
mkdir /app/hadoop-3.3.1/logs
ENV HADOOP_HOME /app/hadoop-3.3.1
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
#Copiamos archivos de configuración
COPY ./hadoopconf/* /app/hadoop-3.3.1/etc/hadoop/
#Copiamos archivo de inicio de servicios
COPY ./start.sh /app/start.sh
RUN mkdir -p /hdfs/datanode
ENTRYPOINT ["/bin/bash"]
CMD ["/app/start.sh"]
start.sh:
#!/usr/bin/env bash
if [ "$HOSTNAME" = "namenode" ]; then
echo "INICIANDO NAMENODE"
hdfs --daemon start namenode
/etc/init.d/mysql start
echo "Creando carpeta de trabajo hdfs"
hdfs dfs -mkdir -p /user/root
hdfs dfs -chmod 777 /user/root
echo "Creando carpetas para hive"
hdfs dfs -mkdir /tmp
hdfs dfs -chmod -R 777 /tmp
hdfs dfs -mkdir -p /user/hive/warehouse
hdfs dfs -chmod -R 777 /user/hive/warehouse
echo "Iniciando hiveserver2"
hiveserver2 &
echo "###################### Jupyter Notebook ######################"
jupyter notebook --port 8889 --notebook-dir='/media/notebooks' --no-browser --ip='*' --allow-root
elif [ "$HOSTNAME" = "yarnmaster" ]; then
echo "INICIANDO RESOURCE MANAGER"
yarn --daemon start resourcemanager
yarn --daemon start proxyserver
mapred --daemon start historyserver
else
echo "INICIANDO " $HOSTNAME
hdfs --daemon start datanode
yarn --daemon start nodemanager
fi
echo "OK"
exec bash
hadoopconf/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode:8020</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
hadoopconf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hdfs/datanode</value>
</property>
</configuration>
hadoopconf/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>yarnmaster:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>yarnmaster:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/app/hadoop-3.3.1</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/app/hadoop-3.3.1</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/app/hadoop-3.3.1</value>
</property>
</configuration>
hadoopconf/yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Site specific YARN configuration properties -->
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>yarnmaster</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/var/log/hadoop-yarn</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/yarn</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://yarnmaster:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>