yarn的启动脚本
startSSPStreaming.sh
/opt/spark/bin/spark-submit --name SSPNginxStreaming --class com.donews.ssp.streaming.NginxStreaming --master yarn-cluster --driver-memory 2G --driver-cores 2 --num-executors 4 --executor-memory 1G --queue root.streaming --conf spark.ui.port=6660 nginxStreaming.jar &> sspnginx.log &
yarn的关闭脚本
stopSSPStreaming.sh
for i in `ps -ef | grep SSPNginxStreaming | grep -v grep | awk '{print $2}'`
do echo $i
kill -9 $i
done
for i in `yarn application -list | grep SSPNginxStreaming | awk '{print $1}'`
do echo $i
yarn application -kill $i
done
读取HDFS文件大小
#!/bin/bash
path=$1
dir=$(hadoop fs -ls $path | awk '{print $8}')
for i in $dir
do
hdfs dfs -du -s -h $i
done
读取Linux文件大小
#!/bin/bash
path=$1
dir=$(ls -l $path |awk '/^d/ {print $NF}')
for i in $dir
do
du -sh $path/$i
done
Spark-shell
[root@tagtic-master yuhui]# cat spark-shell.sh
#!/bin/bash
/opt/spark/bin/spark-shell --master yarn-client --executor-memory 4g --executor-cores 8 --name yuhuishell
北京小辉微信公众号
大数据资料分享请关注