XMLWordPrintableJSON

Details

    • Sub-task
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 3.4.0
    • 3.4.0
    • Connect
    • None

    Description

      from pyspark.sql import functions
      
      funs = [
          (functions.acosh, "ACOSH"),
          (functions.asinh, "ASINH"),
          (functions.atanh, "ATANH"),
      ]
      
      cols = ["a", functions.col("a")]
      
      for f, alias in funs:
          for c in cols:
              self.assertIn(f"{alias}(a)", repr(f(c)))
       Traceback (most recent call last):
        File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 271, in test_inverse_trig_functions
          self.assertIn(f"{alias}(a)", repr(f(c)))
      AssertionError: 'ACOSH(a)' not found in "Column<'acosh(ColumnReference(a))'>"

       

       

      from pyspark.sql.functions import col, lit, overlay
      from itertools import chain
      import re
      
      actual = list(
          chain.from_iterable(
              [
                  re.findall("(overlay\\(.*\\))", str(x))
                  for x in [
                      overlay(col("foo"), col("bar"), 1),
                      overlay("x", "y", 3),
                      overlay(col("x"), col("y"), 1, 3),
                      overlay("x", "y", 2, 5),
                      overlay("x", "y", lit(11)),
                      overlay("x", "y", lit(2), lit(5)),
                  ]
              ]
          )
      )
      
      expected = [
          "overlay(foo, bar, 1, -1)",
          "overlay(x, y, 3, -1)",
          "overlay(x, y, 1, 3)",
          "overlay(x, y, 2, 5)",
          "overlay(x, y, 11, -1)",
          "overlay(x, y, 2, 5)",
      ]
      
      self.assertListEqual(actual, expected)
      
      df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len"))
      
      exp = [Row(ol="SPARK_CORESQL")]
      self.assertTrue(
          all(
              [
                  df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp,
                  df.select(overlay(df.x, df.y, lit(7), lit(0)).alias("ol")).collect() == exp,
                  df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() == exp,
              ]
          )
      ) 
          Traceback (most recent call last):
        File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 675, in test_overlay
          self.assertListEqual(actual, expected)
      AssertionError: Lists differ: ['overlay(ColumnReference(foo), ColumnReference(bar[402 chars]5))'] != ['overlay(foo, bar, 1, -1)', 'overlay(x, y, 3, -1)'[90 chars] 5)']
      
      First differing element 0:
      'overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))'
      'overlay(foo, bar, 1, -1)'
      
      - ['overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))',
      -  'overlay(ColumnReference(x), ColumnReference(y), Literal(3), Literal(-1))',
      -  'overlay(ColumnReference(x), ColumnReference(y), Literal(1), Literal(3))',
      -  'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))',
      -  'overlay(ColumnReference(x), ColumnReference(y), Literal(11), Literal(-1))',
      -  'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))']
      + ['overlay(foo, bar, 1, -1)',
      +  'overlay(x, y, 3, -1)',
      +  'overlay(x, y, 1, 3)',
      +  'overlay(x, y, 2, 5)',
      +  'overlay(x, y, 11, -1)',
      +  'overlay(x, y, 2, 5)']
       

      Attachments

        Activity

          People

            gurwls223 Hyukjin Kwon
            techaddict Sandeep Singh
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: