Description
import org.apache.commons.io.IOUtils import java.net.URL import java.nio.charset.Charset// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext) // So you don't need create them manually// load bank data val bankText = sc.parallelize( IOUtils.toString( new URL("https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv"), Charset.forName("utf8")).split("\n"))case class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)val bank = bankText.map(s => s.split(";")).filter(s => s(0) != "\"age\"").map( s => Bank(s(0).toInt, s(1).replaceAll("\"", ""), s(2).replaceAll("\"", ""), s(3).replaceAll("\"", ""), s(5).replaceAll("\"", "").toInt ) ) bank.collect()
org.apache.spark.SparkDriverExecutionException: Execution error at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1690) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2588) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) at org.apache.spark.rdd.RDD.collect(RDD.scala:1029) ... 44 elided Caused by: java.lang.ArrayStoreException: [LBank; at scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:74) at org.apache.spark.SparkContext.$anonfun$runJob$4(SparkContext.scala:2235) at org.apache.spark.SparkContext.$anonfun$runJob$4$adapted(SparkContext.scala:2235) at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:59) at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1686) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2588) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)