|
| 1 | +/******************************************************************************* |
| 2 | + * |
| 3 | + * |
| 4 | + * |
| 5 | + * This program and the accompanying materials are made available under the |
| 6 | + * terms of the Apache License, Version 2.0 which is available at |
| 7 | + * https://www.apache.org/licenses/LICENSE-2.0. |
| 8 | + * See the NOTICE file distributed with this work for additional |
| 9 | + * information regarding copyright ownership. |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | + * License for the specific language governing permissions and limitations |
| 15 | + * under the License. |
| 16 | + * |
| 17 | + * SPDX-License-Identifier: Apache-2.0 |
| 18 | + ******************************************************************************/ |
| 19 | +package org.deeplearning4j.datapipelineexamples.tablesaw; |
| 20 | + |
| 21 | +import com.google.common.primitives.Doubles; |
| 22 | +import com.google.common.primitives.Ints; |
| 23 | +import org.deeplearning4j.datapipelineexamples.utils.DownloaderUtility; |
| 24 | +import org.nd4j.linalg.api.ndarray.INDArray; |
| 25 | +import org.nd4j.linalg.dataset.DataSet; |
| 26 | +import org.nd4j.linalg.factory.Nd4j; |
| 27 | +import org.nd4j.linalg.util.FeatureUtil; |
| 28 | +import tech.tablesaw.api.CategoricalColumn; |
| 29 | +import tech.tablesaw.api.DoubleColumn; |
| 30 | +import tech.tablesaw.api.Table; |
| 31 | +import tech.tablesaw.io.csv.CsvReadOptions; |
| 32 | + |
| 33 | +import java.io.File; |
| 34 | +import java.util.Arrays; |
| 35 | +import java.util.stream.Collectors; |
| 36 | + |
| 37 | +/** |
| 38 | + * This example uses the table saw library to prepare csv data for conversion to a neural network. |
| 39 | + * If you would like more information on tablesaw, please look at the table saw quickstart: |
| 40 | + * https://jtablesaw.github.io/tablesaw/gettingstarted |
| 41 | + * |
| 42 | + * This example leverages tablesaw to load a csv and convert it to a dataset object. |
| 43 | + * |
| 44 | + * @author Adam Gibson |
| 45 | + */ |
| 46 | +public class TablesawCSVExample { |
| 47 | + |
| 48 | + public static void main(String...args) throws Exception { |
| 49 | + //download the data |
| 50 | + String directory = DownloaderUtility.IRISDATA.Download(); |
| 51 | + //note our downloaded csv has no headers, so we want auto generated column names |
| 52 | + CsvReadOptions csvReadOptions = CsvReadOptions |
| 53 | + .builder(new File(directory, "iris.txt")).header(false).build(); |
| 54 | + Table table = Table.read().csv(csvReadOptions); |
| 55 | + System.out.println(table.columnNames()); |
| 56 | + //Convert the data without the label column to get just the raw input data out. |
| 57 | + Table justLabel = Table.create(table.column(4)); |
| 58 | + Table withoutLabel = table.removeColumns(table.column(4)); |
| 59 | + //convert the data to a double array filtering the column without |
| 60 | + double[][] data = Arrays.stream(withoutLabel.columnArray()) |
| 61 | + .map(column -> (DoubleColumn) column) |
| 62 | + .map(input -> input.asList()) |
| 63 | + .map(input -> Doubles.toArray(input)) |
| 64 | + .collect(Collectors.toList()) |
| 65 | + .toArray(new double[table.columnNames().size()][]); |
| 66 | + |
| 67 | + //create the data from the array and print the data |
| 68 | + INDArray arr = Nd4j.create(data); |
| 69 | + System.out.println(arr.toStringFull()); |
| 70 | + |
| 71 | + //print the categories |
| 72 | + CategoricalColumn<?> objects = justLabel.categoricalColumn(0); |
| 73 | + System.out.println("List " + objects.asList()); |
| 74 | + System.out.println(objects.countByCategory()); |
| 75 | + |
| 76 | + |
| 77 | + //create an ndarray of the outcomes converted to categorical 0 1 labels |
| 78 | + int[] outcomes = Ints.toArray(justLabel.longColumn(0).asList()); |
| 79 | + INDArray labels = FeatureUtil.toOutcomeMatrix(outcomes, 3); |
| 80 | + |
| 81 | + |
| 82 | + //create a dataset object containing the input and the labels |
| 83 | + DataSet dataSet = new DataSet(arr,labels); |
| 84 | + |
| 85 | + |
| 86 | + |
| 87 | + } |
| 88 | + |
| 89 | + |
| 90 | +} |
0 commit comments