阅读量:0
#!/bin/bash software_dir=/root/hadoop/ install_dir=/opt/ hostname=(master node1 node2) jdk_name=$(tar -tf "$software_dir"jdk*|head -n 1|sed 's|\/||') hadoop_name=$(tar -tf "$software_dir"hadoop*|head -n 1|sed 's|\/||') zookeeper_name=$(tar -tf "$software_dir"zookeeper*|head -n 1|sed 's|\/||') ### 搭建时间同步集群 echo =================== Start Chrony Install ===================== for host in ${hostname[@]} do ssh -T $host <<-EOF [ ! \$(rpm -qa chrony) ] && yum install -y chrony &>/dev/null [ ! \$(rpm -qa chrony) ] && echo "============ install chrony fail ==============" && exit echo -e >> /etc/chrony.conf sed -i 's/^server/#server/' /etc/chrony.conf echo -e "server master iburst\nbindaddress ::\nallow" >> /etc/chrony.conf [ ${hostname[0]} = \$(hostname) ] && echo "local stratum 10" >> /etc/chrony.conf && sed -i 's/master/127\.0\.0\.1/' /etc/chrony.conf systemctl enable --now chronyd timedatectl set-timezone Asia/Shanghai chronyc -a makestep exit EOF done [ ! $(rpm -qa chrony) ] && exit ### xsync script for host in ${hostname[@]} do ssh -T $host <<-EOF [ ! \$(rpm -qa rsync) ] && yum install -y rsync &>/dev/null [ ! \$(rpm -qa rsync) ] && echo "============ install rsync fail ==============" && exit exit EOF done [ ! $(rpm -qa rsync) ] && exit cat > /usr/local/bin/xsync << EOF #!/bin/bash pcount=\$# if [ \$pcount -lt 1 ] then echo "Not Enough Arguement !" exit fi for host in ${hostname[@]} do if [ "\$host" = \$(hostname) ] then continue fi echo ======================== Start \$host File Sync ======================= for file in \$@ do if [ -e \$file ] then pdir=\$(cd -P \$(dirname \$file); pwd) echo fileDir=\$pdir fname=\$(basename \$file) echo fileName=\$fname ssh \$host "mkdir -p \$pdir" rsync -av \$pdir/\$fname \$host:\$pdir &>/dev/null else echo "\$file does not exists" fi done done EOF chmod +x /usr/local/bin/xsync ### Install Component(Jdk Hadoop Zookeeper) echo =================== Start Install Component ===================== tar -xf "$software_dir"jdk* -C $install_dir tar -xf "$software_dir"hadoop* -C $install_dir tar -xf "$software_dir"zookeeper* -C $install_dir rm -rf $install_dir$hadoop_name/share/doc xsync ${install_dir}{$jdk_name,$hadoop_name,$zookeeper_name} for host in ${hostname[@]} do ssh -T $host <<EOF echo -e "\ #java export JAVA_HOME=$install_dir${jdk_name}\n\ export PATH=\\\$PATH:\\\$JAVA_HOME/bin\n\ #hadoop export HADOOP_HOME=$install_dir${hadoop_name}\n\ export PATH=\\\$PATH:\\\$HADOOP_HOME/bin:\\\$HADOOP_HOME/sbin\n\ #zookeeper export ZOOKEEPER_HOME=$install_dir${zookeeper_name}\n\ export PATH=\\\$PATH:\\\$ZOOKEEPER_HOME/bin\n\ #hive export HIVE_HOME=${install_dir}hive\n\ export PATH=\\\$PATH:\\\$HIVE_HOME/bin\ " >> /etc/profile source /etc/profile exit EOF done java -version &>/dev/null [ $? -eq 0 ] && echo "========= java/hadoop/zookeeper/hive installation complete !========" ### jpsall script cat > /usr/local/bin/jpsall << EOF #!/bin/bash for host in ${hostname[@]} do echo -e "\033[32m======================== \$host =======================\033[0m" if [ $# -gt 0 ] then ssh $host "source /etc/profile;$*" else ssh $host "source /etc/profile;jps|grep -v Jps" fi done EOF chmod +x /usr/local/bin/jpsall ### Zookeeper Configuration echo =================== Start Zookeeper Configuration ===================== zookeeper_path=$install_dir$zookeeper_name rm -rf $zookeeper_path tar -xf "$software_dir"zookeeper* -C $install_dir mkdir -p $zookeeper_path/{data,logs} mv $install_dir$zookeeper_name/conf/{zoo_sample.cfg,zoo.cfg} sed -i "/^dataDir=/c\dataDir=$zookeeper_path/data" $zookeeper_path/conf/zoo.cfg count=1 for host in ${hostname[@]} do zookeeper_host+="server.$count=$host:2888:3888" if [ $count -lt $(( ${#hostname[@]} )) ] then zookeeper_host+="\n" fi ((count++)) done echo -e "$zookeeper_host" >> $zookeeper_path/conf/zoo.cfg sed -i "s|ZOO_LOG_DIR=\".\"|ZOO_LOG_DIR=\"$zookeeper_path/logs\"|" $zookeeper_path/bin/zkEnv.sh cat > /usr/lib/systemd/system/zookeeper.service <<EOF [Unit] Description=Zookeeper Service After=network.target syslog.target [Service] Type=forking User=root Group=root Environment=JAVA_HOME=$install_dir$jdk_name PIDFile=$zookeeper_path/data/zookeeper_server.pid ExecStart=$zookeeper_path/bin/zkServer.sh start ExecStop=$zookeeper_path/bin/zkServer.sh stop ExecReload=$zookeeper_path/bin/zkServer.sh restart Restart=always TimeoutSec=30 SuccessExitStatus=130 143 [Install] WantedBy=multi-user.target EOF chown -R root:root $zookeeper_path xsync $zookeeper_path xsync /usr/lib/systemd/system/zookeeper.service count=1 for host in ${hostname[@]} do ssh -T $host <<EOF echo $count > $zookeeper_path/data/myid systemctl daemon-reload systemctl enable --now zookeeper exit EOF ((count++)) done jpsall "zkServer.sh status" ### Hadoop Configuration echo =================== Start Hadoop Configuration ===================== hadoop_path=$install_dir$hadoop_name hadoop_conf_path=$hadoop_path/etc/hadoop stop-all.sh jpsall "rm -rf $hadoop_path" &>/dev/null tar -xf "$software_dir"hadoop* -C $install_dir rm -rf $hadoop_path/share/doc/ cp $hadoop_conf_path/{mapred-site.xml.template,mapred-site.xml} sed -i '/configuration>$/d' $hadoop_conf_path/{core-site,hdfs-site,mapred-site,yarn-site}.xml for ((i=0; i<${#hostname[@]}; i++)) do zookeeper_address+=${hostname[$i]}:2181 qjournal_address+=${hostname[$i]}:8485 cluster_node+=${hostname[$i]} [ $i -lt $(( ${#hostname[@]} -1 )) ] && zookeeper_address+="," && qjournal_address+=";" && cluster_node+="\n" done cat >> $hadoop_conf_path/core-site.xml <<EOF <configuration> <!-- 设置 HDFS 服务名称 --> <property> <name>fs.defaultFS</name> <value>hdfs://hacluster</value> </property> <!-- 配置hadoop的元数据存储目录 --> <property> <name>hadoop.tmp.dir</name> <value>$hadoop_path/data</value> </property> <!-- 配置zookeeper通信地址 --> <property> <name>ha.zookeeper.quorum</name> <value>$zookeeper_address</value> </property> <!-- 开启HiveServer2 root用户全部权限 --> <property> <name>hadoop.proxyuser.root.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.root.groups</name> <value>*</value> </property> </configuration> EOF cat >> $hadoop_conf_path/hdfs-site.xml <<EOF <configuration> <!-- 设置Ha集群名称 --> <property> <name>dfs.nameservices</name> <value>hacluster</value> </property> <!-- 设置数据副本数 --> <property> <name>dfs.replication</name> <value>2</value> </property> <!-- 设置ha集群的节点名称 --> <property> <name>dfs.ha.namenodes.hacluster</name> <value>nn1,nn2</value> </property> <!-- 配置nn1 RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.hacluster.nn1</name> <value>${hostname[0]}:9000</value> </property> <!-- 配置nn2 RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.hacluster.nn2</name> <value>${hostname[1]}:9000</value> </property> <!-- 配置nn1 Http通信地址 --> <property> <name>dfs.namenode.http-address.hacluster.nn1</name> <value>${hostname[0]}:50070</value> </property> <!-- 配置nn2 Http通信地址 --> <property> <name>dfs.namenode.http-address.hacluster.nn2</name> <value>${hostname[1]}:50070</value> </property> <!-- 配置journal节点位置 --> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://$qjournal_address/hacluster</value> </property> <!-- 配置journal元数据存储目录--> <property> <name>dfs.journalnode.edits.dir</name> <value>$hadoop_path/data/dfs/journal</value> </property> <!-- 使用ssh方式进行远程切换主备 --> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <!-- 配置ssh密钥文件位置 --> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <!-- 关闭权限检查 --> <property> <name>dfs.permissions.enable</name> <value>false</value> </property> <!-- 设置故障转移类 --> <property> <name>dfs.client.failover.proxy.provider.hacluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!-- 开启自动故障转移 --> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> </configuration> EOF cat >> $hadoop_conf_path/mapred-site.xml <<EOF <configuration> <!-- 指定MR运行在yarn上 默认是在本地运行 --> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <!-- 配置历史服务器 JobHistoryServer 进程通信IPC地址 --> <property> <name>mapreduce.jobhistory.address</name> <value>${hostname[2]}:10020</value> </property> <!-- 配置历史服务器 JobHistoryServer Web UI地址 --> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>${hostname[2]}:19888</value> </property> </configuration> EOF cat >> $hadoop_conf_path/yarn-site.xml <<EOF <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <!-- 开启yarn 高可用 --> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <!-- 设置yarn集群名称 --> <property> <name>yarn.resourcemanager.cluster-id</name> <value>cluster-yarn</value> </property> <!-- 配置yarn集群节点名称 --> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <!-- 配置resourcemanager安装在指定的节点 --> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>${hostname[1]}</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>${hostname[2]}</value> </property> <!-- 配置resourcemanager的zookeeper集群地址 --> <property> <name>yarn.resourcemanager.zk-address</name> <value>$zookeeper_address</value> </property> <!-- 开启resourcemanager自动恢复 --> <property> <name>yarn.resourcemanager.recovery.enable</name> <value>true</value> </property> <!-- 指定resourcemanager的状态信息存储在zookeeper集群 --> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <!-- 关闭 resourcemanager 虚拟内存检查 --> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <!-- 配置 resourcemanager 内存大小 --> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>3072</value> </property> <!-- 开启日志聚集功能 --> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <!-- 聚集日志保留时间设置7天 --> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>604800</value> </property> <!-- 日志聚集位置默认为HDFS文件系统的/tmp/logs路径下 --> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/tmp/jobhistory/log</value> </property> </configuration> EOF echo -e $cluster_node > $hadoop_conf_path/slaves chown -R root:root $hadoop_path sed -i "/^export JAVA_HOME=/c\export JAVA_HOME=$install_dir$jdk_name" $hadoop_conf_path/hadoop-env.sh xsync $hadoop_path for host in ${hostname[@]} do ssh -T $host <<EOF [ ! \$(rpm -qa psmisc) ] && yum install -y psmisc &>/dev/null EOF done expect <<-EOF spawn hdfs zkfc -formatZK expect { "(Y or N)" {send "Y\r"; exp_continue} expect eof } EOF echo ============ ZookeeperCluster Formatting Complete ============= for host in ${hostname[@]} do ssh -T $host <<EOF echo ============ Start $host Journalnode ============= hadoop-daemon.sh start journalnode while true do curl $host:8485 &>/dev/null if [ \$? -eq 0 ] then exit fi sleep 4 done EOF done hdfs namenode -format echo ============ HadoopCluster Formatting Complete ============= hadoop-daemon.sh start namenode echo ============ Start SecondaryNamenode Data Sync ============= ssh -T ${hostname[1]} <<EOF hdfs namenode -bootstrapStandby hadoop-daemon.sh start namenode exit EOF ### custom_script yarn_custom_script(){ sed -i -E '/resourcemanager|nodemanager/s/^/#/' $hadoop_path/sbin/$1 cat >> $hadoop_path/sbin/$1 <<EOF # $2 resourceManager AUTOHA_ENABLED=\$(\$HADOOP_PREFIX/bin/hdfs getconf -confKey yarn.resourcemanager.ha.enabled) if [ "\$(echo "\$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then IDS=\$(\$HADOOP_PREFIX/bin/hdfs getconf -confKey yarn.resourcemanager.ha.rm-ids|tr "," " ") IDS=(\$IDS) for ((i=0; i<\${#IDS[@]}; i++)) do NODES+=\$(\$HADOOP_PREFIX/bin/hdfs getconf -confKey yarn.resourcemanager.hostname.\${IDS[\$i]}) if [ \$i -lt \$(( \${#IDS[@]} -1 )) ] then NODES+=" " fi done echo "$3 Resourcemanager HA on [\$NODES]" "\$bin"/yarn-daemons.sh --config \$YARN_CONF_DIR --hostnames "\$NODES" $2 resourcemanager else "\$bin"/yarn-daemon.sh --config \$YARN_CONF_DIR $2 resourcemanager fi # $2 nodeManager "\$bin"/yarn-daemons.sh --config \$YARN_CONF_DIR $2 nodemanager # $2 historyserver HISTORYSERVER_ENABLE=\$(\$HADOOP_PREFIX/bin/hdfs getconf -confKey yarn.log-aggregation-enable) REMOTE=\$(\$HADOOP_PREFIX/bin/hdfs getconf -confKey mapreduce.jobhistory.webapp.address|cut -d ":" -f1) if [ "\$(echo "\$HISTORYSERVER_ENABLE" | tr A-Z a-z)" = "true" ]; then echo "$3 Historyserver on [\$REMOTE]" ssh -T \$REMOTE "\$bin"/mr-jobhistory-daemon.sh $2 historyserver 2>&1 | sed "s/^/\$REMOTE: /" fi EOF } yarn_custom_script start-yarn.sh start Starting yarn_custom_script stop-yarn.sh stop Stoping #### ha集群启动使用该start-all.sh脚本有个bug就启动顺序的问题 该脚本的启动顺序是 namenode->datanode->journal nodes ...... #### 这样执行下来就会导致每次需要执行两次start-all.sh 才能让集群启动成功 #### ha集群正确的启动顺序是 journal nodes -> namenode ->datanode ...... #### 解决方案:把start-all.sh里面关于启动journal node的代码放到namenode 代码前面就能保证ha集群每次启动成功了 sed -i '/^# quor/,/^#------/d;49 r /dev/fd/3' $hadoop_path/sbin/start-dfs.sh \ 3< <(sed -n '/^# quor/,/^#-----/p' $hadoop_path/sbin/start-dfs.sh) sed -i '/Deprecated/s/^/#/' $hadoop_path/sbin/start-all.sh sed -i '/Deprecated/s/^/#/' $hadoop_path/sbin/stop-all.sh start-all.sh jpsall hdfs dfs -chmod -R 777 /tmp echo ============ HadoopCluster Startup Complete ============= #### hive configuration echo ============ Start Hive Configuration ============= hive_path=${install_dir}hive hive_conf_path=$hive_path/conf tez_path=${install_dir}tez rm -rf $tez_path tar xf ${software_dir}*tez* -C $install_dir mv ${install_dir}*tez* $tez_path chown -R root:root $tez_path rm -rf $hive_path tar xf ${software_dir}*hive* -C $install_dir mv ${install_dir}*hive* ${install_dir}hive mysql_user=root mysql_password=1234 if [ -f "$hive_conf_path/hive-log4j2.properties.template" ] then mv $hive_conf_path/hive-log4j2.properties.template $hive_conf_path/hive-log4j2.properties fi if [ -f "$hive_conf_path/hive-exec-log4j2.properties.template" ] then mv $hive_conf_path/hive-exec-log4j2.properties.template $hive_conf_path/hive-exec-log4j2.properties fi cp ${software_dir}mysql-connector-java-5.1.44-bin.jar $hive_path/lib sed -i "/property.hive.log.dir/c\property.hive.log.dir=$hive_path/logs" $hive_conf_path/hive-log4j2.properties sed -i "/property.hive.log.dir/c\property.hive.log.dir=$hive_path/logs" $hive_conf_path/hive-exec-log4j2.properties cat > $hive_conf_path/hive-site.xml <<EOF <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <!-- 配置hive元数据在hdfs的存储路径 --> <property> <name>hive.metastore.warehouse.dir</name> <value>/hive/database</value> </property> <!-- 对于小数据量,自动使用本地模式执行 MR job 加快执行过程 ,默认是false --> <property> <name>hive.exec.mode.local.auto</name> <value>true</value> </property> <!-- 取消列名前面的表名 --> <property> <name>hive.resultset.use.unique.column.names</name> <value>false</value> </property> <!-- 更换计算引擎 默认MR --> <property> <name>hive.execution.engine</name> <value>tez</value> </property> <!-- 关闭元数据校验 --> <property> <name>hive.metastore.schema.verification</name> <value>false</value> </property> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://${HOSTNAME}:3306/hive?createDatabaseIfNotExist=true&useUnicode=true&characterEncodeing=UTF-8&useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>$mysql_user</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>$mysql_password</value> </property> </configuration> EOF rm -rf ${hive_path}/lib/log4j-slf4j-impl-*.jar docker exec -it mysql mysql -u $mysql_user -p$mysql_password -e "drop database if exists hive;" &>/dev/null schematool -dbType mysql -initSchema hdfs dfs -rm -r /tez hdfs dfs -mkdir /tez tez_name=$(ls $tez_path/share) hdfs dfs -put $tez_path/share/$tez_name /tez rm -rf ${tez_path}/lib/slf4j-log4j12-*.jar cat > $hive_conf_path/tez-site.xml <<EOF <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>tez.lib.uris</name> <value>\${fs.defaultFS}/tez/$tez_name</value> </property> <property> <name>tez.use.cluster.hadoop-libs</name> <value>true</value> </property> <property> <name>tez.history.logging.service.class</name> <value>org.apache.tez.dag.history.logging.ats.ATSHisoryLoggingService</value> </property> </configuration> EOF mv $hive_conf_path/hive-env.sh.template $hive_conf_path/hive-env.sh cat >> $hive_conf_path/hive-env.sh <<EOF export TZEZ_HOME=$tez_path export HADOOP_CLASSPATH=\$HADOOP_CLASSPATH:\$TEZ_HOME/*.jar:\$TEZ_HOME/lib/* EOF for jar in \$(ls \$TEZ_HOME|grep jar) do export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar done for jar in \$(ls \$TEZ_HOME\lib) do export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar done