We will be using the hadoopFile method of spark context to read the orc file .
Below is the method signature which will get an RDD for a Hadoop file with an arbitrary InputFormat
<K,V> RDD<scala.Tuple2<K,V>> hadoopFile(String path, Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, Class<K> keyClass, Class<V> valueClass, int minPartitions)
Below is the java code which reads a orc file and saves the same in a text file format.
import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.io.NullWritable; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import scala.Tuple2; public class ORCReaderDriver { @SuppressWarnings("resource") public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("test").setMaster("local"); args = new String[] { "ORC FILE INPUT PATH", "OUTPUT_PATH" }; JavaSparkContext jsc = new JavaSparkContext(sparkConf); JavaPairRDD<NullWritable, OrcStruct> orcSourceRdd = jsc.hadoopFile(args[0], org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.class, NullWritable.class, org.apache.hadoop.hive.ql.io.orc.OrcStruct.class, 1); orcSourceRdd.map(new Function<Tuple2<NullWritable, OrcStruct>, String>() { private static final long serialVersionUID = 5454545; public String call(Tuple2<NullWritable, OrcStruct> orcStruct) throws Exception { OrcStruct struct = orcStruct._2(); return struct.toString(); } }).saveAsTextFile(args[1]); } }