dreyco676 · October 9, 2018 04:22
diff --git a/pyspark_list_to_column b/pyspark_list_to_column
 from pyspark.sql.functions import split, explode, lit, coalesce, first

 # split 'ROOF' column by comma
 df = df.withColumn('roof_list', split(df['ROOF'], ', '))

 # explode each value to a new record
 ex_df = df.withColumn('ex_roof_list', explode(df['roof_list']))

 # create a new record to agg by later
 ex_df = ex_df.withColumn('constant_val', lit(1))

 # pivot on the exploded column, coalesce null values and take the first value
 piv_df = ex_df.groupBy('NO').pivot('ex_roof_list').agg(coalesce(first('constant_val')))

 # fill nulls with 0
 piv_df = piv_df.fillna(0)
	from pyspark.sql.functions import split, explode, lit, coalesce, first

	# split 'ROOF' column by comma
	df = df.withColumn('roof_list', split(df['ROOF'], ', '))

	# explode each value to a new record
	ex_df = df.withColumn('ex_roof_list', explode(df['roof_list']))

	# create a new record to agg by later
	ex_df = ex_df.withColumn('constant_val', lit(1))

	# pivot on the exploded column, coalesce null values and take the first value
	piv_df = ex_df.groupBy('NO').pivot('ex_roof_list').agg(coalesce(first('constant_val')))

	# fill nulls with 0
	piv_df = piv_df.fillna(0)