public class FrameRDDConverterUtils extends Object
| Modifier and Type | Class and Description | 
|---|---|
| static class  | FrameRDDConverterUtils.LongFrameToLongWritableFrameFunction | 
| static class  | FrameRDDConverterUtils.LongWritableFrameToLongFrameFunction | 
| static class  | FrameRDDConverterUtils.LongWritableTextToLongTextFunction | 
| static class  | FrameRDDConverterUtils.LongWritableToSerFunction | 
| Constructor and Description | 
|---|
| FrameRDDConverterUtils() | 
| Modifier and Type | Method and Description | 
|---|---|
| static org.apache.spark.api.java.JavaRDD<String> | binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                DataCharacteristics mcIn,
                FileFormatPropertiesCSV props,
                boolean strict) | 
| static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> | binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
                      org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                      DataCharacteristics mc,
                      Types.ValueType[] schema) | 
| static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> | binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext,
                      org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                      DataCharacteristics mc,
                      Types.ValueType[] schema)Deprecated.  | 
| static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
                        DataCharacteristics mcIn,
                        DataCharacteristics mcOut) | 
| static org.apache.spark.api.java.JavaRDD<String> | binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
                     DataCharacteristics mcIn) | 
| static int | convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema,
                            String[] colnames,
                            Types.ValueType[] fschema,
                            boolean containsID)NOTE: regarding the support of vector columns, we make the following 
 schema restriction: single vector column, which allows inference of
 the vector length without data access and covers the common case. | 
| static org.apache.spark.sql.types.StructType | convertFrameSchemaToDFSchema(Types.ValueType[] fschema,
                            boolean containsID)This function will convert Frame schema into DataFrame schema | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input,
                DataCharacteristics mc,
                Types.ValueType[] schema,
                boolean hasHeader,
                String delim,
                boolean fill,
                double fillValue,
                Set<String> naStrings) | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                org.apache.spark.api.java.JavaRDD<String> input,
                DataCharacteristics mcOut,
                Types.ValueType[] schema,
                boolean hasHeader,
                String delim,
                boolean fill,
                double fillValue,
                Set<String> naStrings) | 
| static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> | csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
           org.apache.spark.api.java.JavaRDD<String> dataRdd,
           String delim,
           Types.ValueType[] schema) | 
| static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> | csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
           String fnameIn,
           String delim,
           Types.ValueType[] schema) | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                      org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
                      DataCharacteristics mc,
                      boolean containsID) | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                      org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
                      DataCharacteristics mc,
                      boolean containsID,
                      Pair<String[],Types.ValueType[]> out) | 
| static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> | matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                        org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
                        DataCharacteristics mcIn) | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
                                 org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
                                 DataCharacteristics dcIn) | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                     org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in,
                     DataCharacteristics mcOut,
                     Types.ValueType[] schema) | 
| static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
                              org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input,
                              DataCharacteristics mc,
                              Types.ValueType[] schema) | 
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input, DataCharacteristics mc, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaRDD<String> input, DataCharacteristics mcOut, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mcIn, FileFormatPropertiesCSV props, boolean strict)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in, DataCharacteristics mcOut, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input, DataCharacteristics mc, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, DataCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, DataCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, DataCharacteristics dcIn)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, DataCharacteristics mcIn, DataCharacteristics mcOut)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID, Pair<String[],Types.ValueType[]> out)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
                                                                                            org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                                                                                            DataCharacteristics mc,
                                                                                            Types.ValueType[] schema)
@Deprecated public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mc, Types.ValueType[] schema)
public static org.apache.spark.sql.types.StructType convertFrameSchemaToDFSchema(Types.ValueType[] fschema, boolean containsID)
fschema - frame schemacontainsID - true if contains ID columnpublic static int convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema,
                                               String[] colnames,
                                               Types.ValueType[] fschema,
                                               boolean containsID)
dfschema - schema as StructTypecolnames - column namesfschema - array of SystemDS ValueTypescontainsID - if true, contains ID columnpublic static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
                                                                                      String fnameIn,
                                                                                      String delim,
                                                                                      Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
                                                                                      org.apache.spark.api.java.JavaRDD<String> dataRdd,
                                                                                      String delim,
                                                                                      Types.ValueType[] schema)
Copyright © 2021 The Apache Software Foundation. All rights reserved.