@@ -24888,42 +24888,105 @@ def make_timestamp(
2488824888 )
2488924889
2489024890
24891- @_try_remote_functions
24891+ @overload
2489224892def try_make_timestamp(
2489324893 years: "ColumnOrName",
2489424894 months: "ColumnOrName",
2489524895 days: "ColumnOrName",
2489624896 hours: "ColumnOrName",
2489724897 mins: "ColumnOrName",
2489824898 secs: "ColumnOrName",
24899+ ) -> Column:
24900+ ...
24901+
24902+
24903+ @overload
24904+ def try_make_timestamp(
24905+ years: "ColumnOrName",
24906+ months: "ColumnOrName",
24907+ days: "ColumnOrName",
24908+ hours: "ColumnOrName",
24909+ mins: "ColumnOrName",
24910+ secs: "ColumnOrName",
24911+ timezone: "ColumnOrName",
24912+ ) -> Column:
24913+ ...
24914+
24915+
24916+ @overload
24917+ def try_make_timestamp(*, date: "ColumnOrName", time: "ColumnOrName") -> Column:
24918+ ...
24919+
24920+
24921+ @overload
24922+ def try_make_timestamp(
24923+ *, date: "ColumnOrName", time: "ColumnOrName", timezone: "ColumnOrName"
24924+ ) -> Column:
24925+ ...
24926+
24927+
24928+ @_try_remote_functions
24929+ def try_make_timestamp(
24930+ years: Optional["ColumnOrName"] = None,
24931+ months: Optional["ColumnOrName"] = None,
24932+ days: Optional["ColumnOrName"] = None,
24933+ hours: Optional["ColumnOrName"] = None,
24934+ mins: Optional["ColumnOrName"] = None,
24935+ secs: Optional["ColumnOrName"] = None,
2489924936 timezone: Optional["ColumnOrName"] = None,
24937+ date: Optional["ColumnOrName"] = None,
24938+ time: Optional["ColumnOrName"] = None,
2490024939) -> Column:
2490124940 """
24902- Try to create timestamp from years, months, days, hours, mins, secs and timezone fields.
24941+ Try to create timestamp from years, months, days, hours, mins, secs and (optional) timezone
24942+ fields. Alternatively, try to create timestamp from date, time, and (optional) timezone fields.
2490324943 The result data type is consistent with the value of configuration `spark.sql.timestampType`.
2490424944 The function returns NULL on invalid inputs.
2490524945
2490624946 .. versionadded:: 4.0.0
2490724947
24948+ .. versionchanged:: 4.1.0
24949+ Added support for creating timestamps from date and time.
24950+
2490824951 Parameters
2490924952 ----------
24910- years : :class:`~pyspark.sql.Column` or column name
24911- The year to represent, from 1 to 9999
24912- months : :class:`~pyspark.sql.Column` or column name
24913- The month-of-year to represent, from 1 (January) to 12 (December)
24914- days : :class:`~pyspark.sql.Column` or column name
24915- The day-of-month to represent, from 1 to 31
24916- hours : :class:`~pyspark.sql.Column` or column name
24917- The hour-of-day to represent, from 0 to 23
24918- mins : :class:`~pyspark.sql.Column` or column name
24919- The minute-of-hour to represent, from 0 to 59
24920- secs : :class:`~pyspark.sql.Column` or column name
24953+ years : :class:`~pyspark.sql.Column` or column name, optional
24954+ The year to represent, from 1 to 9999.
24955+ Required when creating timestamps from individual components.
24956+ Must be used with months, days, hours, mins, and secs.
24957+ months : :class:`~pyspark.sql.Column` or column name, optional
24958+ The month-of-year to represent, from 1 (January) to 12 (December).
24959+ Required when creating timestamps from individual components.
24960+ Must be used with years, days, hours, mins, and secs.
24961+ days : :class:`~pyspark.sql.Column` or column name, optional
24962+ The day-of-month to represent, from 1 to 31.
24963+ Required when creating timestamps from individual components.
24964+ Must be used with years, months, hours, mins, and secs.
24965+ hours : :class:`~pyspark.sql.Column` or column name, optional
24966+ The hour-of-day to represent, from 0 to 23.
24967+ Required when creating timestamps from individual components.
24968+ Must be used with years, months, days, mins, and secs.
24969+ mins : :class:`~pyspark.sql.Column` or column name, optional
24970+ The minute-of-hour to represent, from 0 to 59.
24971+ Required when creating timestamps from individual components.
24972+ Must be used with years, months, days, hours, and secs.
24973+ secs : :class:`~pyspark.sql.Column` or column name, optional
2492124974 The second-of-minute and its micro-fraction to represent, from 0 to 60.
24922- The value can be either an integer like 13 , or a fraction like 13.123.
24975+ The value can be either an integer like 13, or a fraction like 13.123.
2492324976 If the sec argument equals to 60, the seconds field is set
2492424977 to 0 and 1 minute is added to the final timestamp.
24978+ Required when creating timestamps from individual components.
24979+ Must be used with years, months, days, hours, and mins.
2492524980 timezone : :class:`~pyspark.sql.Column` or column name, optional
24926- The time zone identifier. For example, CET, UTC and etc.
24981+ The time zone identifier. For example, CET, UTC, and etc.
24982+ date : :class:`~pyspark.sql.Column` or column name, optional
24983+ The date to represent, in valid DATE format.
24984+ Required when creating timestamps from date and time components.
24985+ Must be used with time parameter only.
24986+ time : :class:`~pyspark.sql.Column` or column name, optional
24987+ The time to represent, in valid TIME format.
24988+ Required when creating timestamps from date and time components.
24989+ Must be used with date parameter only.
2492724990
2492824991 Returns
2492924992 -------
@@ -24945,7 +25008,7 @@ def try_make_timestamp(
2494525008 --------
2494625009 >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
2494725010
24948- Example 1: Make timestamp from years, months, days, hours, mins and secs .
25011+ Example 1: Make timestamp from years, months, days, hours, mins, secs, and timezone .
2494925012
2495025013 >>> import pyspark.sql.functions as sf
2495125014 >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
@@ -24959,7 +25022,7 @@ def try_make_timestamp(
2495925022 |2014-12-27 21:30:45.887 |
2496025023 +----------------------------------------------------+
2496125024
24962- Example 2: Make timestamp without timezone.
25025+ Example 2: Make timestamp from years, months, days, hours, mins, and secs ( without timezone) .
2496325026
2496425027 >>> import pyspark.sql.functions as sf
2496525028 >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
@@ -24972,7 +25035,6 @@ def try_make_timestamp(
2497225035 +----------------------------------------------------+
2497325036 |2014-12-28 06:30:45.887 |
2497425037 +----------------------------------------------------+
24975- >>> spark.conf.unset("spark.sql.session.timeZone")
2497625038
2497725039 Example 3: Make timestamp with invalid input.
2497825040
@@ -24988,16 +25050,85 @@ def try_make_timestamp(
2498825050 |NULL |
2498925051 +----------------------------------------------------+
2499025052
25053+ Example 4: Make timestamp from date, time, and timezone.
25054+
25055+ >>> import pyspark.sql.functions as sf
25056+ >>> from datetime import date, time
25057+ >>> df = spark.range(1).select(
25058+ ... sf.lit(date(2014, 12, 28)).alias("date"),
25059+ ... sf.lit(time(6, 30, 45, 887000)).alias("time"),
25060+ ... sf.lit("CET").alias("tz")
25061+ ... )
25062+ >>> df.select(
25063+ ... sf.try_make_timestamp(date=df.date, time=df.time, timezone=df.tz)
25064+ ... ).show(truncate=False)
25065+ +----------------------------------+
25066+ |try_make_timestamp(date, time, tz)|
25067+ +----------------------------------+
25068+ |2014-12-27 21:30:45.887 |
25069+ +----------------------------------+
25070+
25071+ Example 5: Make timestamp from date and time (without timezone).
25072+
25073+ >>> import pyspark.sql.functions as sf
25074+ >>> from datetime import date, time
25075+ >>> df = spark.range(1).select(
25076+ ... sf.lit(date(2014, 12, 28)).alias("date"),
25077+ ... sf.lit(time(6, 30, 45, 887000)).alias("time")
25078+ ... )
25079+ >>> df.select(sf.try_make_timestamp(date=df.date, time=df.time)).show(truncate=False)
25080+ +------------------------------+
25081+ |try_make_timestamp(date, time)|
25082+ +------------------------------+
25083+ |2014-12-28 06:30:45.887 |
25084+ +------------------------------+
25085+
2499125086 >>> spark.conf.unset("spark.sql.session.timeZone")
2499225087 """
24993- if timezone is not None:
24994- return _invoke_function_over_columns(
24995- "try_make_timestamp", years, months, days, hours, mins, secs, timezone
24996- )
25088+ if years is not None:
25089+ if any(arg is not None for arg in [date, time]):
25090+ raise PySparkValueError(
25091+ errorClass="CANNOT_SET_TOGETHER",
25092+ messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
25093+ )
25094+ if timezone is not None:
25095+ return _invoke_function_over_columns(
25096+ "try_make_timestamp",
25097+ cast("ColumnOrName", years),
25098+ cast("ColumnOrName", months),
25099+ cast("ColumnOrName", days),
25100+ cast("ColumnOrName", hours),
25101+ cast("ColumnOrName", mins),
25102+ cast("ColumnOrName", secs),
25103+ cast("ColumnOrName", timezone),
25104+ )
25105+ else:
25106+ return _invoke_function_over_columns(
25107+ "try_make_timestamp",
25108+ cast("ColumnOrName", years),
25109+ cast("ColumnOrName", months),
25110+ cast("ColumnOrName", days),
25111+ cast("ColumnOrName", hours),
25112+ cast("ColumnOrName", mins),
25113+ cast("ColumnOrName", secs),
25114+ )
2499725115 else:
24998- return _invoke_function_over_columns(
24999- "try_make_timestamp", years, months, days, hours, mins, secs
25000- )
25116+ if any(arg is not None for arg in [years, months, days, hours, mins, secs]):
25117+ raise PySparkValueError(
25118+ errorClass="CANNOT_SET_TOGETHER",
25119+ messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
25120+ )
25121+ if timezone is not None:
25122+ return _invoke_function_over_columns(
25123+ "try_make_timestamp",
25124+ cast("ColumnOrName", date),
25125+ cast("ColumnOrName", time),
25126+ cast("ColumnOrName", timezone),
25127+ )
25128+ else:
25129+ return _invoke_function_over_columns(
25130+ "try_make_timestamp", cast("ColumnOrName", date), cast("ColumnOrName", time)
25131+ )
2500125132
2500225133
2500325134@_try_remote_functions
0 commit comments