@@ -24790,43 +24790,106 @@ def make_time(hour: "ColumnOrName", minute: "ColumnOrName", second: "ColumnOrNam
2479024790 return _invoke_function_over_columns("make_time", hour, minute, second)
2479124791
2479224792
24793- @_try_remote_functions
24793+ @overload
2479424794def make_timestamp(
2479524795 years: "ColumnOrName",
2479624796 months: "ColumnOrName",
2479724797 days: "ColumnOrName",
2479824798 hours: "ColumnOrName",
2479924799 mins: "ColumnOrName",
2480024800 secs: "ColumnOrName",
24801+ ) -> Column:
24802+ ...
24803+
24804+
24805+ @overload
24806+ def make_timestamp(
24807+ years: "ColumnOrName",
24808+ months: "ColumnOrName",
24809+ days: "ColumnOrName",
24810+ hours: "ColumnOrName",
24811+ mins: "ColumnOrName",
24812+ secs: "ColumnOrName",
24813+ timezone: "ColumnOrName",
24814+ ) -> Column:
24815+ ...
24816+
24817+
24818+ @overload
24819+ def make_timestamp(*, date: "ColumnOrName", time: "ColumnOrName") -> Column:
24820+ ...
24821+
24822+
24823+ @overload
24824+ def make_timestamp(
24825+ *, date: "ColumnOrName", time: "ColumnOrName", timezone: "ColumnOrName"
24826+ ) -> Column:
24827+ ...
24828+
24829+
24830+ @_try_remote_functions
24831+ def make_timestamp(
24832+ years: Optional["ColumnOrName"] = None,
24833+ months: Optional["ColumnOrName"] = None,
24834+ days: Optional["ColumnOrName"] = None,
24835+ hours: Optional["ColumnOrName"] = None,
24836+ mins: Optional["ColumnOrName"] = None,
24837+ secs: Optional["ColumnOrName"] = None,
2480124838 timezone: Optional["ColumnOrName"] = None,
24839+ date: Optional["ColumnOrName"] = None,
24840+ time: Optional["ColumnOrName"] = None,
2480224841) -> Column:
2480324842 """
24804- Create timestamp from years, months, days, hours, mins, secs and timezone fields.
24843+ Create timestamp from years, months, days, hours, mins, secs, and (optional) timezone fields.
24844+ Alternatively, create timestamp from date, time, and (optional) timezone fields.
2480524845 The result data type is consistent with the value of configuration `spark.sql.timestampType`.
2480624846 If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL
2480724847 on invalid inputs. Otherwise, it will throw an error instead.
2480824848
2480924849 .. versionadded:: 3.5.0
2481024850
24851+ .. versionchanged:: 4.1.0
24852+ Added support for creating timestamps from date and time.
24853+
2481124854 Parameters
2481224855 ----------
24813- years : :class:`~pyspark.sql.Column` or column name
24814- The year to represent, from 1 to 9999
24815- months : :class:`~pyspark.sql.Column` or column name
24816- The month-of-year to represent, from 1 (January) to 12 (December)
24817- days : :class:`~pyspark.sql.Column` or column name
24818- The day-of-month to represent, from 1 to 31
24819- hours : :class:`~pyspark.sql.Column` or column name
24820- The hour-of-day to represent, from 0 to 23
24821- mins : :class:`~pyspark.sql.Column` or column name
24822- The minute-of-hour to represent, from 0 to 59
24823- secs : :class:`~pyspark.sql.Column` or column name
24856+ years : :class:`~pyspark.sql.Column` or column name, optional
24857+ The year to represent, from 1 to 9999.
24858+ Required when creating timestamps from individual components.
24859+ Must be used with months, days, hours, mins, and secs.
24860+ months : :class:`~pyspark.sql.Column` or column name, optional
24861+ The month-of-year to represent, from 1 (January) to 12 (December).
24862+ Required when creating timestamps from individual components.
24863+ Must be used with years, days, hours, mins, and secs.
24864+ days : :class:`~pyspark.sql.Column` or column name, optional
24865+ The day-of-month to represent, from 1 to 31.
24866+ Required when creating timestamps from individual components.
24867+ Must be used with years, months, hours, mins, and secs.
24868+ hours : :class:`~pyspark.sql.Column` or column name, optional
24869+ The hour-of-day to represent, from 0 to 23.
24870+ Required when creating timestamps from individual components.
24871+ Must be used with years, months, days, mins, and secs.
24872+ mins : :class:`~pyspark.sql.Column` or column name, optional
24873+ The minute-of-hour to represent, from 0 to 59.
24874+ Required when creating timestamps from individual components.
24875+ Must be used with years, months, days, hours, and secs.
24876+ secs : :class:`~pyspark.sql.Column` or column name, optional
2482424877 The second-of-minute and its micro-fraction to represent, from 0 to 60.
24825- The value can be either an integer like 13 , or a fraction like 13.123.
24878+ The value can be either an integer like 13, or a fraction like 13.123.
2482624879 If the sec argument equals to 60, the seconds field is set
2482724880 to 0 and 1 minute is added to the final timestamp.
24881+ Required when creating timestamps from individual components.
24882+ Must be used with years, months, days, hours, and mins.
2482824883 timezone : :class:`~pyspark.sql.Column` or column name, optional
24829- The time zone identifier. For example, CET, UTC and etc.
24884+ The time zone identifier. For example, CET, UTC, and etc.
24885+ date : :class:`~pyspark.sql.Column` or column name, optional
24886+ The date to represent, in valid DATE format.
24887+ Required when creating timestamps from date and time components.
24888+ Must be used with time parameter only.
24889+ time : :class:`~pyspark.sql.Column` or column name, optional
24890+ The time to represent, in valid TIME format.
24891+ Required when creating timestamps from date and time components.
24892+ Must be used with date parameter only.
2483024893
2483124894 Returns
2483224895 -------
@@ -24848,7 +24911,7 @@ def make_timestamp(
2484824911 --------
2484924912 >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
2485024913
24851- Example 1: Make timestamp from years, months, days, hours, mins and secs .
24914+ Example 1: Make timestamp from years, months, days, hours, mins, secs, and timezone .
2485224915
2485324916 >>> import pyspark.sql.functions as sf
2485424917 >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
@@ -24862,11 +24925,11 @@ def make_timestamp(
2486224925 |2014-12-27 21:30:45.887 |
2486324926 +----------------------------------------------------+
2486424927
24865- Example 2: Make timestamp without timezone.
24928+ Example 2: Make timestamp from years, months, days, hours, mins, and secs ( without timezone) .
2486624929
2486724930 >>> import pyspark.sql.functions as sf
24868- >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET' ]],
24869- ... ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz' ])
24931+ >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
24932+ ... ['year', 'month', 'day', 'hour', 'min', 'sec'])
2487024933 >>> df.select(
2487124934 ... sf.make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
2487224935 ... ).show(truncate=False)
@@ -24876,16 +24939,85 @@ def make_timestamp(
2487624939 |2014-12-28 06:30:45.887 |
2487724940 +------------------------------------------------+
2487824941
24942+ Example 3: Make timestamp from date, time, and timezone.
24943+
24944+ >>> import pyspark.sql.functions as sf
24945+ >>> from datetime import date, time
24946+ >>> df = spark.range(1).select(
24947+ ... sf.lit(date(2014, 12, 28)).alias("date"),
24948+ ... sf.lit(time(6, 30, 45, 887000)).alias("time"),
24949+ ... sf.lit("CET").alias("tz")
24950+ ... )
24951+ >>> df.select(
24952+ ... sf.make_timestamp(date=df.date, time=df.time, timezone=df.tz)
24953+ ... ).show(truncate=False)
24954+ +------------------------------+
24955+ |make_timestamp(date, time, tz)|
24956+ +------------------------------+
24957+ |2014-12-27 21:30:45.887 |
24958+ +------------------------------+
24959+
24960+ Example 4: Make timestamp from date and time (without timezone).
24961+
24962+ >>> import pyspark.sql.functions as sf
24963+ >>> from datetime import date, time
24964+ >>> df = spark.range(1).select(
24965+ ... sf.lit(date(2014, 12, 28)).alias("date"),
24966+ ... sf.lit(time(6, 30, 45, 887000)).alias("time")
24967+ ... )
24968+ >>> df.select(sf.make_timestamp(date=df.date, time=df.time)).show(truncate=False)
24969+ +--------------------------+
24970+ |make_timestamp(date, time)|
24971+ +--------------------------+
24972+ |2014-12-28 06:30:45.887 |
24973+ +--------------------------+
24974+
2487924975 >>> spark.conf.unset("spark.sql.session.timeZone")
2488024976 """
24881- if timezone is not None:
24882- return _invoke_function_over_columns(
24883- "make_timestamp", years, months, days, hours, mins, secs, timezone
24884- )
24977+ if years is not None:
24978+ if any(arg is not None for arg in [date, time]):
24979+ raise PySparkValueError(
24980+ errorClass="CANNOT_SET_TOGETHER",
24981+ messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
24982+ )
24983+ if timezone is not None:
24984+ return _invoke_function_over_columns(
24985+ "make_timestamp",
24986+ cast("ColumnOrName", years),
24987+ cast("ColumnOrName", months),
24988+ cast("ColumnOrName", days),
24989+ cast("ColumnOrName", hours),
24990+ cast("ColumnOrName", mins),
24991+ cast("ColumnOrName", secs),
24992+ cast("ColumnOrName", timezone),
24993+ )
24994+ else:
24995+ return _invoke_function_over_columns(
24996+ "make_timestamp",
24997+ cast("ColumnOrName", years),
24998+ cast("ColumnOrName", months),
24999+ cast("ColumnOrName", days),
25000+ cast("ColumnOrName", hours),
25001+ cast("ColumnOrName", mins),
25002+ cast("ColumnOrName", secs),
25003+ )
2488525004 else:
24886- return _invoke_function_over_columns(
24887- "make_timestamp", years, months, days, hours, mins, secs
24888- )
25005+ if any(arg is not None for arg in [years, months, days, hours, mins, secs]):
25006+ raise PySparkValueError(
25007+ errorClass="CANNOT_SET_TOGETHER",
25008+ messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
25009+ )
25010+ if timezone is not None:
25011+ return _invoke_function_over_columns(
25012+ "make_timestamp",
25013+ cast("ColumnOrName", date),
25014+ cast("ColumnOrName", time),
25015+ cast("ColumnOrName", timezone),
25016+ )
25017+ else:
25018+ return _invoke_function_over_columns(
25019+ "make_timestamp", cast("ColumnOrName", date), cast("ColumnOrName", time)
25020+ )
2488925021
2489025022
2489125023@overload
0 commit comments