Skip to content

Instantly share code, notes, and snippets.

@kolommik
Created August 3, 2020 06:50
Show Gist options
  • Save kolommik/9df39b63cfa3d42b8dc00704a4883f55 to your computer and use it in GitHub Desktop.
Save kolommik/9df39b63cfa3d42b8dc00704a4883f55 to your computer and use it in GitHub Desktop.
CALENDAR_PATH = "mnt/PRODGEN2/OUTPUT/RUSSIA_DATA_FOUNDATION/UNIVERSALCATALOG/MASTERDATA/MARS_UNIVERSAL_CALENDAR.csv"
calendarSchema = StructType([
StructField("OriginalDate", DateType(), False),
StructField("MarsYear", IntegerType(), False),
StructField("MarsPeriod", IntegerType(), False),
StructField("MarsWeek", IntegerType(), False),
StructField("MarsDay", IntegerType(), False),
StructField("MarsPeriodName", StringType(), False),
StructField("MarsPeriodFullName", StringType(), False),
StructField("MarsWeekName", StringType(), False),
StructField("MarsWeekFullName", StringType(), False),
StructField("MarsDayName", StringType(), False),
StructField("MarsDayFullName", StringType(), False),
StructField("CalendarYear", IntegerType(), False),
StructField("CalendarMonth", IntegerType(), False),
StructField("CalendarDay", IntegerType(), False),
StructField("CalendarDayOfYear", IntegerType(), False),
StructField("CalendarMonthName", StringType(), False),
StructField("CalendarMonthFullName", StringType(), False),
StructField("CalendarYearWeek", IntegerType(), False),
StructField("CalendarWeek", IntegerType(), False),
StructField("CalendarMonthName_RU", StringType(), False),
StructField("CalendarMonthFullName_RU", StringType(), False),
StructField("MarsPeriodDay", IntegerType(), False),
])
calendar_df = spark.read.csv(CALENDAR_PATH, header = "True", sep="|", schema = calendarSchema)
calendar_df = (
calendar_df
.withColumn("WeekStartDate", F.first("OriginalDate").over(Window.partitionBy(["CalendarYearWeek","CalendarWeek"]).orderBy("OriginalDate").rowsBetween(-sys.maxsize, sys.maxsize)))
.withColumn("WeekEndDate", F.last("OriginalDate").over(Window.partitionBy(["CalendarYearWeek","CalendarWeek"]).orderBy("OriginalDate").rowsBetween(-sys.maxsize, sys.maxsize)))
.withColumn("MarsCorrectDayFullName", F.concat(col("MarsYear"),lit("P"),F.format_string("%02d", "MarsPeriod"),lit("W"),col("MarsWeek"),lit("D"),col("MarsDay")))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment