Details
Description
var data01 = sqlContext.sql("select 1 as id, \"{\\\"animal\\\":{\\\"type\\\": \\\"cat\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"dog\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"donkey\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"turkey\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"cat\\\"}},{\\\"animal\\\":{\\\"NOTANIMAL\\\": \\\"measuring tape\\\"}}\" as field") case class SubField(fieldling: String) var data02 = data01.explode(data01("field")){ case Row(field: String) => field.split(",").map(SubField(_))} .selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as animal") var data03 = data01.explode(data01("field")){ case Row(field: String) => field.split(",").map(SubField(_))} .selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as animal") data02.cache() data02.select($"animal" === "cat").explain == Physical Plan == Project [(animal#25 = cat) AS (animal = cat)#263] InMemoryColumnarTableScan [animal#25], (InMemoryRelation [id#20,fieldling#24,animal#25], true, 10000, StorageLevel(true, true, false, true, 1), (TungstenProject [id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]), None) data02.select($"animal" === "cat").show +--------------+ |(animal = cat)| +--------------+ | true| | false| | false| | false| | true| | null| +--------------+ data02.filter($"animal" === "cat").explain == Physical Plan == Filter (animal#25 = cat) InMemoryColumnarTableScan [id#20,fieldling#24,animal#25], [(animal#25 = cat)], (InMemoryRelation [id#20,fieldling#24,animal#25], true, 10000, StorageLevel(true, true, false, true, 1), (TungstenProject [id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]), None) data02.filter($"animal" === "cat").show +---+---------+------+ | id|fieldling|animal| +---+---------+------+ +---+---------+------+
Attachments
Issue Links
- duplicates
-
SPARK-11330 Filter operation on StringType after groupBy PERSISTED brings no results
- Closed
- links to