Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add concatenate function in Dataframe.java #102

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions .idea/compiler.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions .idea/jarRepositories.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/qaplug_profiles.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions .idea/runConfigurations.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions df1_origin.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
RowName,ID,Name,Age
11,1,A,10
22,2,B,20
33,3,C,30
44,4,D,40
128 changes: 127 additions & 1 deletion src/main/java/joinery/DataFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Array;
import java.security.SecureRandom;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
Expand Down Expand Up @@ -602,6 +603,86 @@ public DataFrame<V> append(final Object name, final List<? extends V> row) {
return this;
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/83
/**
* Concatenate two dataframes, either vertically or horizontally (Developed by Dongming Xia)
* If the input axis value is invalid (not 0 or 1), print out error message and return the main dataframe
* For vertical concatenate, if the number of columns or type of columns does not match,
* print out error message and return the main dataframe
* For horizontal concatenate, if the number of rows does not match,
* print out error message and return the main dataframe
*
* <pre> {@code
* > DataFrame<Object> df1 = new DataFrame<>("a", "b", "c");
* > df1.append(Arrays.asList(1, 2, 3));
* > df1.append(Arrays.asList(4, 5, 6));
* > df1.append(Arrays.asList(7, 8, 9));
* > DataFrame<Object> df2 = new DataFrame<>("d", "e", "f");
* > df2.append(Arrays.asList(10, 20, 30));
* > df2.append(Arrays.asList(40, 50, 60));
* > df2.append(Arrays.asList(70, 80, 90));
* > df1.concatenate(df2, 0).length();
* 6 }</pre>
*
* @param df2 - The dataframe to be concatenated after the main dataframe
* @param axis - The axis to concatenate along
*/
public final DataFrame<V> concatenate(final DataFrame<V> df2, final int axis){
// concatenate horizontally by calling the verticalConcat() function
if (axis == 0){
return this.verticalConcat(df2);
}
// concatenate vertically by calling the horizontalConcat() function
else if (axis == 1){
return this.horizontalConcat(df2);
}
// if input axis is invalid, print out message and return the main dataframe
else{
System.out.println("Please put 0 or 1 for the value of axis");
return this;
}
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/83
/**
* Concatenate two dataframes vertically, if the input axis value is not given by the user
*
* @param df2 - The dataframe to be concatenated vertically after the main dataframe
*/
public final DataFrame<V> concatenate(final DataFrame<V> df2){ return this.concatenate(df2, 0); }

// CS427 Issue link: https://github.com/cardillo/joinery/issues/83
private DataFrame<V> verticalConcat(final DataFrame<V> df2) {
// check if df2 has the same number of columns as the main df, if not, return the main dataframe
if (this.size() != df2.size()){
System.out.println("The numbers of columns between two dataframes does not match");
return this;
}
// check if df2 has the same column types as the main df, if not, return the main dataframe
else if (!Arrays.equals(this.types().toArray(), df2.types().toArray())){
System.out.println("The column types between two dataframes does not match");
return this;
}

// the main body of the function, Time Complexity is O(n) where n is the number of rows in df2
for(List<V> row: df2){
this.append(row);
}

return this;
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/83
private DataFrame<V> horizontalConcat(final DataFrame<V> df2) {
// check if df2 has the same number of rows as the main df, if not, return the main dataframe
if (this.length() != df2.length()) {
System.out.println("The number of rows between two dataframes does not match");
return this;
}

return this.resetIndex().join(df2.resetIndex());
}

/**
* Reshape a data frame to the specified dimensions.
*
Expand Down Expand Up @@ -2175,6 +2256,7 @@ public static final DataFrame<Object> readCsv(final InputStream input, final Str
public final void writeCsv(final String file)
throws IOException {
Serialization.writeCsv(this, new FileOutputStream(file));

}

/**
Expand All @@ -2189,6 +2271,48 @@ public final void writeCsv(final OutputStream output)
Serialization.writeCsv(this, output);
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
/**
* Write the data from this data frame to
* the specified file as comma separated values.
* The user can specify if row names should also be written as the first column in the csv file
*
* @param file the file to write
* @param withColName if row names should also be written in the csv file
* @throws IOException if an error occurs writing the file
*/
public final void writeCsv(final String file, final boolean withColName)
throws IOException {
if (withColName == false) {
Serialization.writeCsv(this, new FileOutputStream(file));
}
else {
Serialization.writeCsvWithRowName(this, new FileOutputStream(file));
}
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
/**
* Write the data from this data frame to
* the provided output stream as comma separated values.
* The user can specify if row names should also be written as the first column in the csv file
*
* @param output
* @param withColName
* @throws IOException
*/
public final void writeCsv(final OutputStream output, final boolean withColName)
throws IOException {
if (withColName == false) {
Serialization.writeCsv(this, output);
}
else {
Serialization.writeCsvWithRowName(this, output);
}
}



/**
* Read data from the specified excel
* workbook into a new data frame.
Expand Down Expand Up @@ -2231,7 +2355,7 @@ public final void writeXls(final String file)
* Write the data from the data frame
* to the provided output stream as an excel workbook.
*
* @param file the file to write
* @param output the file to write
* @throws IOException if an error occurs writing the file
*/
public final void writeXls(final OutputStream output)
Expand Down Expand Up @@ -2471,4 +2595,6 @@ public static final void main(final String[] args)
);
System.exit(255);
}


}
55 changes: 46 additions & 9 deletions src/main/java/joinery/impl/Serialization.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,7 @@
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.*;

import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
Expand Down Expand Up @@ -296,7 +289,7 @@ public static <V> void writeCsv(final DataFrame<V> df, final String output)
}

public static <V> void writeCsv(final DataFrame<V> df, final OutputStream output)
throws IOException {
throws IOException {
try (CsvListWriter writer = new CsvListWriter(new OutputStreamWriter(output), CsvPreference.STANDARD_PREFERENCE)) {
final String[] header = new String[df.size()];
final Iterator<Object> it = df.columns().iterator();
Expand All @@ -320,6 +313,50 @@ public static <V> void writeCsv(final DataFrame<V> df, final OutputStream output
}
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public static <V> void writeCsvWithRowName(final DataFrame<V> df, final String output)
throws IOException {
writeCsvWithRowName(df, new FileOutputStream(output));
}

// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public static <V> void writeCsvWithRowName(final DataFrame<V> df, final OutputStream output)
throws IOException {
try (CsvListWriter writer = new CsvListWriter(new OutputStreamWriter(output), CsvPreference.STANDARD_PREFERENCE)) {
final String[] header = new String[df.size() + 1];
header[0] = "RowName";
final Iterator<Object> it = df.columns().iterator();
for (int c = 1; c < df.size() + 1; c++) {
header[c] = String.valueOf(it.hasNext() ? it.next() : c);
}
writer.writeHeader(header);

final CellProcessor[] procs = new CellProcessor[df.size() + 1];
final List<Class<?>> types = df.types();
procs[0] = new ConvertNullTo("");
for (int c = 1; c < df.size() + 1; c++) {
final Class<?> cls = types.get(c - 1);
if (Date.class.isAssignableFrom(cls)) {
procs[c] = new ConvertNullTo("", new FmtDate("yyyy-MM-dd'T'HH:mm:ssXXX"));
} else {
procs[c] = new ConvertNullTo("");
}
}

Set<Object> index_list = df.index();

int index = 0;

for (final List<V> row : df) {
List<V> tempRow = new ArrayList<V>();
tempRow.add((V) index_list.toArray()[index]);
index ++;
tempRow.addAll(row);
writer.write(tempRow, procs);
}
}
}

public static DataFrame<Object> readXls(final String file)
throws IOException {
return readXls(file.contains("://") ?
Expand Down
Loading