Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Not A Problem
-
2.3.0
-
None
-
None
-
Emr 5.13.0 and Databricks Cloud 4.0
Description
Similar to other "grows beyond 64 KB" errors. Happens with large case statement:
import org.apache.spark.sql.functions._ import scala.collection.mutable import org.apache.spark.sql.Column var rdd = sc.parallelize(Array("""{ "event": { "timestamp": 1521086591110, "event_name": "yu", "page": { "page_url": "https://", "page_name": "es" }, "properties": { "id": "87", "action": "action", "navigate_action": "navigate_action" } } } """)) var df = spark.read.json(rdd) df = df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action") .toDF("id","event_time","url","action","page_name","event_name","navigation_action") var a = "case " for(i <- 1 to 300){ a = a + s"when action like '$i%' THEN '$i' " } a = a + " else null end as task_id" val expression = expr(a) df = df.filter("id is not null and id <> '' and event_time is not null") val transformationExpressions: mutable.HashMap[String, Column] = mutable.HashMap( "action" -> expr("coalesce(action, navigation_action) as action"), "task_id" -> expression ) for((col, expr) <- transformationExpressions) df = df.withColumn(col, expr) df = df.filter("(action is not null and action <> '') or (page_name is not null and page_name <> '')") df.show
Exception:
18/06/07 01:06:34 ERROR CodeGenerator: failed to compile: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" grows beyond 64 KB org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" grows beyond 64 KB at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:361) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234) at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446) at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1444) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1523) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1520) at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522) at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315) at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278) at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193) at com.google.common.cache.LocalCache.get(LocalCache.java:3932) at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3936) at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4806) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1392) at org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:579) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:578) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$3.apply(SparkPlan.scala:167) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:164) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:61) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:70) at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:45) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectResult(Dataset.scala:2759) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3331) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2488) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2488) at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3315) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:88) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:124) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3314) at org.apache.spark.sql.Dataset.head(Dataset.scala:2488) at org.apache.spark.sql.Dataset.take(Dataset.scala:2702) at org.apache.spark.sql.Dataset.showString(Dataset.scala:258) at org.apache.spark.sql.Dataset.show(Dataset.scala:727) at org.apache.spark.sql.Dataset.show(Dataset.scala:686) at org.apache.spark.sql.Dataset.show(Dataset.scala:695) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:1) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:51) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:53) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:55) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw.<init>(command-687647945500165:57) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw.<init>(command-687647945500165:59) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw.<init>(command-687647945500165:61) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw.<init>(command-687647945500165:63) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read.<init>(command-687647945500165:65) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$.<init>(command-687647945500165:69) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$.<clinit>(command-687647945500165) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval$.$print$lzycompute(<notebook>:7) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval$.$print(<notebook>:6) at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval.$print(<notebook>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:186) at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply$mcV$sp(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:500) at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:456) at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:249) at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:229) at com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:188) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:183) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:43) at com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:221) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:43) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:229) at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:601) at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:601) at scala.util.Try$.apply(Try.scala:192) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:596) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:486) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:554) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:391) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:348) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:215) at java.lang.Thread.run(Thread.java:748)
Log file is attached
Attachments
Attachments
Issue Links
- relates to
-
SPARK-22600 Fix 64kb limit for deeply nested expressions under wholestage codegen
- Resolved