一、制作Livy的parcel包和csd文件
1、配置java环境
需要java1.7以上,建议java1.8
2、配置maven环境
下载maven安装包,解压
wget https://dlcdn.apache.org/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz
tar zxf apache-maven-3.8.4-bin.tar.gz -C /data/build_livy_for_cdh6
配置环境变量
vim /etc/profile
export MAVEN_HOME=/data/build_livy_for_cdh6/apache-maven-3.8.4
export PATH=$PATH:$MAVEN_HOME/bin
source /etc/profile
3、下载 livy_zeppelin_cdh_csd_parcels
cd /data/build_livy_for_cdh6
yum install git -y
git clone https://github.com/alexjbush/livy_zeppelin_cdh_csd_parcels.git
4、修改build脚本
主要是删除ZEPPELIN相关配置
cd livy_zeppelin_cdh_csd_parcels/
cat build.sh
#!/bin/bash
set -x
set -e
JAR_PATH=/data/build_livy_for_cdh6/livy_zeppelin_cdh_csd_parcels/apache-livy-0.7.0-incubating-bin.zip
DEST_PATH=/data/build_livy_for_cdh6/cloudera/livy_cdh_csd_parcels
LIVY_URL=http://apache.mirror.anlx.net/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip
LIVY_MD5="2c5b63757414af16da308e63749e1ae6"
LIVY_VERSION=0.7.0
livy_service_name="LIVY"
livy_service_name_lower="$( echo $livy_service_name | tr '[:upper:]' '[:lower:]' )"
livy_archive="$( basename $LIVY_URL )"
livy_folder="$( basename $livy_archive .zip )"
livy_parcel_folder="${livy_service_name}-${LIVY_VERSION}"
livy_parcel_name="$livy_parcel_folder-el7.parcel"
livy_built_folder="${livy_parcel_folder}_build"
livy_csd_build_folder="livy_csd_build"
function build_cm_ext {
#Checkout if dir does not exist
if [ ! -d cm_ext ]; then
git clone https://github.com/cloudera/cm_ext.git
fi
if [ ! -f cm_ext/validator/target/validator.jar ]; then
cd cm_ext
git checkout "$CM_EXT_BRANCH"
mvn package
cd ..
fi
}
function get_livy {
if [ ! -f "$livy_archive" ]; then
wget $LIVY_URL
fi
livy_md5="$( md5sum $livy_archive | cut -d' ' -f1 )"
if [ "$livy_md5" != "$LIVY_MD5" ]; then
echo ERROR: md5 of $livy_archive is not correct
exit 1
fi
if [ ! -d "$livy_folder" ]; then
unzip $livy_archive
fi
}
function build_livy_parcel {
if [ -f "$livy_built_folder/$livy_parcel_name" ] && [ -f "$livy_built_folder/manifest.json" ]; then
return
fi
if [ ! -d $livy_parcel_folder ]; then
get_livy
mv $livy_folder $livy_parcel_folder
fi
cp -r livy-parcel-src/meta $livy_parcel_folder
sed -i -e "s/%VERSION%/$LIVY_VERSION/" ./$livy_parcel_folder/meta/parcel.json
sed -i -e "s/%SERVICENAME%/$livy_service_name/" ./$livy_parcel_folder/meta/parcel.json
sed -i -e "s/%SERVICENAMELOWER%/$livy_service_name_lower/" ./$livy_parcel_folder/meta/parcel.json
java -jar cm_ext/validator/target/validator.jar -d ./$livy_parcel_folder
mkdir -p $livy_built_folder
tar zcvhf ./$livy_built_folder/$livy_parcel_name $livy_parcel_folder --owner=root --group=root
java -jar cm_ext/validator/target/validator.jar -f ./$livy_built_folder/$livy_parcel_name
python cm_ext/make_manifest/make_manifest.py ./$livy_built_folder
}
function build_livy_csd {
JARNAME=${livy_service_name}-${LIVY_VERSION}.jar
if [ -f "$JARNAME" ]; then
return
fi
rm -rf ${livy_csd_build_folder}
cp -rf ./livy-csd-src ${livy_csd_build_folder}
sed -i -e "s/%SERVICENAME%/$livy_service_name/" ${livy_csd_build_folder}/descriptor/service.sdl
sed -i -e "s/%SERVICENAMELOWER%/$livy_service_name_lower/" ${livy_csd_build_folder}/descriptor/service.sdl
sed -i -e "s/%SERVICENAMELOWER%/$livy_service_name_lower/" ${livy_csd_build_folder}/scripts/control.sh
java -jar cm_ext/validator/target/validator.jar -s ${livy_csd_build_folder}/descriptor/service.sdl -l "SPARK_ON_YARN SPARK2_ON_YARN"
jar -cvf ./$JARNAME -C ${livy_csd_build_folder} .
}
case $1 in
parcel)
build_cm_ext
build_livy_parcel
;;
csd)
build_livy_csd
;;
*)
echo "Usage: $0 [parcel|csd]"
;;
esac
5、编译脚本
sh build.sh parcel
sh build.sh csd
编译完成后,生成livy的parcel、manifest、csd文件
[root@node01 livy_zeppelin_cdh_csd_parcels]# ll LIVY-0.7.0_build/
total 90500
-rw-r--r-- 1 root root 92661923 Feb 15 16:09 LIVY-0.7.0-el7.parcel
-rw-r--r-- 1 root root 311 Feb 15 16:10 manifest.json
[root@node01 livy_zeppelin_cdh_csd_parcels]# ll LIVY-0.7.0.jar
-rw-r--r-- 1 root root 5608 Feb 15 16:10 LIVY-0.7.0.jar
6、修改parcel,添加livy用户和组
cd LIVY-0.7.0_build/
tar zxf LIVY-0.7.0-el7.parcel
cat LIVY-0.7.0/meta/parcel.json
# 修改groups和users,添加livy
{
"schema_version": 1,
"name": "LIVY",
"version": "0.7.0",
"setActiveSymlink": true,
"provides": [
"livy"
],
"depends": "CDH",
"replaces": "",
"conflicts": "",
"scripts": {
"defines": "livy_env.sh"
},
"groups": ["livy"],
"users": {
"livy": {
"extra_groups": [],
"home": "/var/lib/livy",
"longname": "LIVY",
"shell": "/sbin/nologin"
}
},
"packages": [],
"components": []
}
rm -f LIVY-0.7.0-el7.parcel
tar zcf LIVY-0.7.0-el7.parcel LIVY-0.7.0
7、生成sha文件
echo -n $(sha1sum LIVY-0.7.0-el7.parcel | awk '{print $1}') > LIVY-0.7.0-el7.sha
二、CM添加Livy服务
1、拷贝csd文件
将制作出来的csd文件拷贝到cloudera里,并重启cloudera-scm-server
cp livy_0.7.0_cdh_parcel/csd/LIVY-0.7.0.jar /opt/cloudera/csd/
systemctl restart cloudera-scm-server
2、分配和激活parcel包
启动一个http服务提供parcel下载
cd livy_0.7.0_cdh_parcel/parcels
/opt/python3/bin/python3 -m http.server
访问cm界面,添加parcel配置
下载
分配
激活
3、配置hdfs core-site.html开启用户代理
在hdfs服务配置添加hadoop.proxyuser.livy.groups和hadoop.proxyuser.livy.hosts,并重启hdfs服务
4、添加Livy服务
在cm集群上,添加livy服务
5、修改Livy配置
根据情况修改livy的配置
三、验证Livy服务
1、创建交互式会话
curl -X POST -d '{"kind": "pyspark"}' -H "Content-Type: application/json" http://node04:8998/sessions
2、提交sql代码片段
curl http://node04:8998/sessions/0/statements -X POST -H 'Content-Type: application/json' -d '{"code":"show databases","kind":"sql"}'
3、查询结果
curl http://node04:8998/sessions/0/statements/0
{"id":0,"code":"show databases","state":"available","output":{"status":"ok","execution_count":0,"data":{"application/json":{"schema":{"type":"struct","fields":[{"name":"databaseName","type":"string","nullable":false,"metadata":{}}]},"data":[["default"],["dm_adm_db_dev"],["dm_ads_db_dev"],["dm_mdm_db_dev"],["dm_ods_db_dev"],["dm_pdm_db_dev"]]}}},"progress":1.0,"started":1644988250221,"completed":1644988253285}
查询结果正常,即Livy服务正常
4、删除会话
curl -X DELETE http://node04:8998/sessions/0
四、hue添加Livy+Spark
在cm上进入Hue服务界面,点击“配置”,在搜索栏输入“hue_safety_valve.ini”
添加配置如下:
[desktop]
app_blacklist=zookeeper,hbase,search,oozie,jobsub,pig,sqoop,security
[spark]
livy_server_host=node04
livy_server_port=8998
livy_server_session_kind=yarn
[notebook]
show_notebooks=true
enable_external_statements=true
enable_batch_execute=true
[[interpreters]]
# Define the name and how to connect and execute the language.
[[[hive]]]
# The name of the snippet.
name=Hive
# The backend connection to use to communicate with the server.
interface=hiveserver2
[[[impala]]]
name=Impala
interface=hiveserver2
[[[sql]]]
name=SparkSql
interface=livy
[[[spark]]]
name=Spark
interface=livy
[[[pyspark]]]
name=PySpark
interface=livy
[[[r]]]
name=R
interface=livy
[[[jar]]]
name=Spark Submit Jar
interface=livy-batch
登录hue,点击“查询”->“编辑器”-> “Notebook”
注意:不要在这里直接选择SparkSql,经测试无法查询,Spark或者PySpark可直接在这里选择
在Notebook里选择SparkSql
执行一条Spark SQL语句