Created
May 28, 2021 08:12
-
-
Save vinodkc/1d8d3e5b917fe8117dd5e324e0ba788f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Please try the following steps to test HWC read and write from Oozie | |
Step 1 : | |
in hive , login as hive user | |
----------------- | |
create database db_hwc_test; | |
use db_hwc_test; | |
CREATE TABLE demo_input_table ( | |
id int, | |
name varchar(10) ) | |
PARTITIONED BY ( | |
dept int) | |
STORED AS ORC TBLPROPERTIES ('orc.compress'='ZLIB','transactional'='true'); | |
INSERT INTO demo_input_table PARTITION(dept) SELECT * FROM( SELECT 1 as id, 'bcd' as name, 1 as dept ) dual; | |
INSERT INTO demo_input_table PARTITION(dept) SELECT * FROM( SELECT 2 as id, 'qwe' as name, 2 as dept ) dual; | |
INSERT INTO demo_input_table PARTITION(dept) SELECT * FROM( SELECT 3 as id, 'asd' as name, 1 as dept ) dual; | |
INSERT INTO demo_input_table PARTITION(dept) SELECT * FROM( SELECT 4 as id, 'zxc' as name, 2 as dept ) dual; | |
select * from demo_input_table; | |
CREATE TABLE demo_output_table ( | |
id int, | |
name varchar(10) ) | |
PARTITIONED BY ( | |
dept int) | |
STORED AS ORC TBLPROPERTIES ('orc.compress'='ZLIB','transactional'='true'); | |
from Ranger : give select right to hr-user1 on db_hwc_test.demo_input_table | |
from Ranger : give write right to hr-user1 on db_hwc_test.demo_output_table | |
Step2: Test table read and write using spark-submit in cluster mode to confirm the hwc read and write from spark-submit | |
cat pyspark_HWCDemo.py | |
----------------------- | |
import sys | |
from pyspark.sql import SparkSession | |
from pyspark_llap.sql.session import HiveWarehouseSession | |
def main(argv): | |
# initiate the spark session builder | |
spark = SparkSession.builder.appName("pyspark_HWCDemo.py").enableHiveSupport().getOrCreate() | |
hive = HiveWarehouseSession.session(spark).build() | |
inputTable = sys.argv[1] | |
outputTable = sys.argv[2] | |
selectQuery = "select * from {} limit 10".format(inputTable) | |
df = hive.executeQuery(selectQuery) | |
df.show() | |
print("Writing data from table {} to {}".format(inputTable, outputTable)) | |
df.write.format(HiveWarehouseSession.HIVE_WAREHOUSE_CONNECTOR).mode("append").option("table",outputTable).save() | |
if __name__ == '__main__': | |
main(sys.argv[1:]) | |
---------------------------------- | |
spark-submit --master yarn --deploy-mode cluster --conf spark.sql.hive.hiveserver2.jdbc.url.principal=hive/[email protected] --conf spark.datasource.hive.warehouse.load.staging.dir=/tmp --conf spark.datasource.hive.warehouse.metastoreUri=thrift://c220-node3.example.com:9083 --conf spark.hadoop.hive.llap.daemon.service.hosts=@llap0 --conf spark.jars=/usr/hdp/current/hive_warehouse_connector/hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152_dev.jar --conf spark.submit.pyFiles=/usr/hdp/current/hive_warehouse_connector/pyspark_hwc-1.0.0.3.1.5.0-152.zip --conf spark.security.credentials.hiveserver2.enabled=true --conf spark.sql.hive.hiveserver2.jdbc.url="jdbc:hive2://c220-node2.example.com:2181,c220-node3.example.com:2181,c220-node4.example.com:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2-interactive" --conf spark.sql.hive.zookeeper.quorum="c220-node2.example.com:2181,c220-node3.example.com:2181,c220-node4.example.com:2181" pyspark_HWCDemo.py db_hwc_test.demo_input_table db_hwc_test.demo_output_table | |
step 3: check the table result from hive beeline | |
from hive beeline run | |
select * from db_hwc_test.demo_output_table; | |
Step 4.1: test table read and write using oozie | |
[hr-user1@c220-node3 ooziehwc_write_demo]$ ll -R | |
.: | |
total 8 | |
-rw-r--r-- 1 hr-user1 hadoop 409 May 28 06:02 job.properties | |
drwxr-xr-x 2 hr-user1 hadoop 183 May 28 06:27 lib | |
-rw-r--r-- 1 hr-user1 hadoop 2317 May 28 07:20 workflow.xml | |
./lib: | |
total 55040 | |
-rw-r--r-- 1 hr-user1 hadoop 56340621 May 28 06:17 hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152_dev.jar | |
-rw------- 1 hr-user1 hadoop 158 May 28 06:27 hr-user1.keytab | |
-rw-r--r-- 1 hr-user1 hadoop 743 May 28 05:47 pyspark_HWCDemo.py | |
-rw-r--r-- 1 hr-user1 hadoop 6044 May 28 06:18 pyspark_hwc-1.0.0.3.1.5.0-152.zip | |
cat workflow.xml | |
----------------------- | |
<?xml version="1.0" encoding="UTF-8"?> | |
<workflow-app xmlns="uri:oozie:workflow:0.4" name="HWC-Spark-Demo-Workflow"> | |
<credentials> | |
<credential name="hs2-creds" type="hive2"> | |
<property> | |
<name>hive2.server.principal</name> | |
<value>hive/[email protected]</value> | |
</property> | |
<property> | |
<name>hive2.jdbc.url</name> | |
<value>jdbc:hive2://c220-node2.example.com:2181,c220-node3.example.com:2181,c220-node4.example.com:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2</value> | |
</property> | |
</credential> | |
</credentials> | |
<start to="spark-hwc" /> | |
<action name="spark-hwc" cred="hs2-creds"> | |
<spark xmlns="uri:oozie:spark-action:0.2"> | |
<job-tracker>${jobTracker}</job-tracker> | |
<name-node>${nameNode}</name-node> | |
<master>${master}</master> | |
<name>${appname}</name> | |
<jar>pyspark_HWCDemo.py</jar> | |
<spark-opts>--conf spark.sql.hive.hiveserver2.jdbc.url.principal=hive/[email protected] --conf spark.submit.pyFiles=/usr/hdp/current/hive_warehouse_connector/pyspark_hwc-1.0.0.3.1.5.0-152.zip --keytab hr-user101.keytab --principal [email protected] --conf spark.datasource.hive.warehouse.load.staging.dir=/tmp --conf spark.datasource.hive.warehouse.metastoreUri=thrift://c220-node3.example.com:9083 --conf spark.hadoop.hive.llap.daemon.service.hosts=@llap0 --conf spark.security.credentials.hiveserver2.enabled=true --conf spark.sql.hive.hiveserver2.jdbc.url="jdbc:hive2://c220-node2.example.com:2181,c220-node3.example.com:2181,c220-node4.example.com:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2-interactive" --conf spark.sql.hive.zookeeper.quorum="c220-node2.example.com:2181,c220-node3.example.com:2181,c220-node4.example.com:2181"</spark-opts> | |
<arg>${arg1}</arg> | |
<arg>${arg2}</arg> | |
<file>${nameNode}${jarPath}/hr-user1.keytab#hr-user101.keytab</file> | |
</spark> | |
<ok to="end" /> | |
<error to="fail" /> | |
</action> | |
<kill name="fail"> | |
<message>Workflow is Failed! message[${wf:errorMessage(wf:lastErrorNode())}]</message> | |
</kill> | |
<end name="end" /> | |
</workflow-app> | |
---------------------- | |
cat job.properties | |
------------------------- | |
nameNode=hdfs://c220-node2.example.com:8020 | |
jobTracker=c220-node2.example.com:8050 | |
queueName=default | |
appname=HWC-Spark-Write_Demo | |
oozie.use.system.libpath=true | |
oozie.wf.application.path=/tmp/sparkhwc_Write_demoOozieAction | |
jarPath=/tmp/sparkhwc_Write_demoOozieAction/lib | |
master=yarn-cluster | |
oozie.action.sharelib.for.spark=spark | |
arg1=db_hwc_test.demo_input_table | |
arg2=db_hwc_test.demo_output_table | |
step 4.2 : upload files to hdfs | |
hdfs dfs -ls -R /tmp/sparkhwc_Write_demoOozieAction | |
drwxr-xr-x - hr-user1 hdfs 0 2021-05-28 06:28 /tmp/sparkhwc_Write_demoOozieAction/lib | |
-rw-r--r-- 3 hr-user1 hdfs 56340621 2021-05-28 06:19 /tmp/sparkhwc_Write_demoOozieAction/lib/hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152_dev.jar | |
-rw-r--r-- 3 hr-user1 hdfs 158 2021-05-28 06:28 /tmp/sparkhwc_Write_demoOozieAction/lib/hr-user1.keytab | |
-rw-r--r-- 3 hr-user1 hdfs 743 2021-05-28 06:19 /tmp/sparkhwc_Write_demoOozieAction/lib/pyspark_HWCDemo.py | |
-rw-r--r-- 3 hr-user1 hdfs 6044 2021-05-28 06:19 /tmp/sparkhwc_Write_demoOozieAction/lib/pyspark_hwc-1.0.0.3.1.5.0-152.zip | |
-rw-r--r-- 3 hr-user1 hdfs 2317 2021-05-28 07:20 /tmp/sparkhwc_Write_demoOozieAction/workflow.xml | |
step 4.3 run oozie job | |
oozie job -oozie http://c220-node4.example.com:11000/oozie/ -config job.properties -run | |
Step 4.4 : after completion of the job, check table result from hive beeline | |
select * from db_hwc_test.demo_output_table; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment