In this article, we will explore how we can covert a json string into a Spark dataframe. In the below code we have an employeeSystemStruct which is a string that we want to convert to a Spark dataframe.
Below is the code
object ConvertJsonStringToDataFrame extends App {
val employeeSystemStruct = "[
{
"id": "343434",
"name": "Mark",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "56565",
"name": "Steve",
"dob": "2019-01-01",
"company": "google"
},
{
"id": "787878",
"name": "Cummins",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "7872323",
"name": "nassir",
"dob": "2019-01-01",
"company": "microsoft"
}
]"
val df=Seq(employeeSystemStruct).toDS()
val jsondf = spark.read.json(df)
display(jsondf)
}
object ConvertJsonStringToDataFrame extends App {
val employeeSystemStruct = "[
{
"id": "343434",
"name": "Mark",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "56565",
"name": "Steve",
"dob": "2019-01-01",
"company": "google"
},
{
"id": "787878",
"name": "Cummins",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "7872323",
"name": "nassir",
"dob": "2019-01-01",
"company": "microsoft"
}
]"
val df=Seq(employeeSystemStruct).toDS()
val jsondf = spark.read.json(df)
display(jsondf)
}
object ConvertJsonStringToDataFrame extends App { val employeeSystemStruct = "[ { "id": "343434", "name": "Mark", "dob": "2019-01-01", "company": "microsoft" }, { "id": "56565", "name": "Steve", "dob": "2019-01-01", "company": "google" }, { "id": "787878", "name": "Cummins", "dob": "2019-01-01", "company": "microsoft" }, { "id": "7872323", "name": "nassir", "dob": "2019-01-01", "company": "microsoft" } ]" val df=Seq(employeeSystemStruct).toDS() val jsondf = spark.read.json(df) display(jsondf) }
We can also convert this to a list of a custom object using the below code
object ConvertJsonStringToDataFrame extends App {
case class EmployeeStruct(id: String,name:String,
dob: String,name:String)
val employeeSystemStruct = "[
{
"id": "343434",
"name": "Mark",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "56565",
"name": "Steve",
"dob": "2019-01-01",
"company": "google"
},
{
"id": "787878",
"name": "Cummins",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "7872323",
"name": "nassir",
"dob": "2019-01-01",
"company": "microsoft"
}
]"
val df=Seq(employeeSystemStruct).toDS()
val jsondf = spark.read.json(df)
val employeeList = jsondf
.collect()
.map(row => {
EmployeeStruct
.apply(row.getAs("id"), row.getAs("name"),row.getAs("dob"),row.getAs("name"))
})
employeeList.foreach(hs => {
println(employeeList)
})
}
object ConvertJsonStringToDataFrame extends App {
case class EmployeeStruct(id: String,name:String,
dob: String,name:String)
val employeeSystemStruct = "[
{
"id": "343434",
"name": "Mark",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "56565",
"name": "Steve",
"dob": "2019-01-01",
"company": "google"
},
{
"id": "787878",
"name": "Cummins",
"dob": "2019-01-01",
"company": "microsoft"
},
{
"id": "7872323",
"name": "nassir",
"dob": "2019-01-01",
"company": "microsoft"
}
]"
val df=Seq(employeeSystemStruct).toDS()
val jsondf = spark.read.json(df)
val employeeList = jsondf
.collect()
.map(row => {
EmployeeStruct
.apply(row.getAs("id"), row.getAs("name"),row.getAs("dob"),row.getAs("name"))
})
employeeList.foreach(hs => {
println(employeeList)
})
}
object ConvertJsonStringToDataFrame extends App { case class EmployeeStruct(id: String,name:String, dob: String,name:String) val employeeSystemStruct = "[ { "id": "343434", "name": "Mark", "dob": "2019-01-01", "company": "microsoft" }, { "id": "56565", "name": "Steve", "dob": "2019-01-01", "company": "google" }, { "id": "787878", "name": "Cummins", "dob": "2019-01-01", "company": "microsoft" }, { "id": "7872323", "name": "nassir", "dob": "2019-01-01", "company": "microsoft" } ]" val df=Seq(employeeSystemStruct).toDS() val jsondf = spark.read.json(df) val employeeList = jsondf .collect() .map(row => { EmployeeStruct .apply(row.getAs("id"), row.getAs("name"),row.getAs("dob"),row.getAs("name")) }) employeeList.foreach(hs => { println(employeeList) }) }