-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
7 changed files
with
256 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,95 @@ | ||
# unpivotdude | ||
Unpivotdude is a utility to pivot data from an input file in a user-desired way. | ||
# Unpivotdude | ||
|
||
Unpivotdude is a utility to unpivot data from an input file in a user-desired way. | ||
|
||
Unpivotdude can be ran manually, or integrated into an automated-process/work-flow. In general it follows the [UNIX philosophy](https://en.wikipedia.org/wiki/Unix_philosophy) much as possible. | ||
|
||
Note: This is the converse-utility of the tool *pivotdude*, which can be found [here](https://github.com/chipnetics/pivotdude) | ||
|
||
# Project Motivation | ||
|
||
We can accomplish pivoting data in multiple ways; however, for most individuals, they typically do it in Excel. The problem with Excel is that to pivot against a combination of columns, things become clunky fast; they append together values to get unique column combinations, pivot tables become nested/indented and need reformatting, processing hangs on vast data (and may not even work when beyond ~50,000 line items), and so forth. Of course, it also goes without saying this process is difficult to automate and is not workable on large data sets. | ||
|
||
Unpivotdude is a command line approach to easily take tab-separated values from a file and output to standard output (stdout) the data transformation. | ||
|
||
# Pre-Compiled Binaries | ||
|
||
Binaries (.exe) for Windows OS have been pre-compiled and can be found in the 'bin' folder. | ||
|
||
With git, you can download all the latest source and binaries with `git clone https://github.com/chipnetics/unpivotdude` | ||
|
||
Alternatively, if you don't have git installed: | ||
|
||
1. Download the latest release [here](https://github.com/chipnetics/unpivotdude/archive/refs/heads/main.zip) | ||
2. Unzip to a local directory. | ||
3. Navigate to 'bin' directory for executables. | ||
|
||
# Compiling from Source | ||
|
||
Utilities are written in the V programming language and will compile under Windows, Linux, and MacOS. | ||
|
||
V is syntactically similar to Go, while equally fast as C. You can read about V [here](https://vlang.io/). | ||
|
||
Each utility is its own .v file, so after installing the [latest V compiler](https://github.com/vlang/v/releases/), it's as easy as executing the below. _Be sure that the V compiler root directory is part of your PATH environment._ | ||
|
||
``` | ||
git clone https://github.com/chipnetics/unpivotdude | ||
cd src | ||
v build unpivotdude.v | ||
``` | ||
Alternatively, if you don't have git installed: | ||
|
||
1. Download the bundled source [here](https://github.com/chipnetics/unpivotdude/archive/refs/heads/main.zip) | ||
2. Unzip to a local directory | ||
3. Navigate to src directory and run `v build unpivotdude.v` | ||
|
||
Please see the [V language documentation](https://github.com/vlang/v/blob/master/doc/docs.md) for further help if required. | ||
|
||
# Viewing Large Files | ||
|
||
As an aside, the author recommends the excellent tool _EmEditor_ by Emurasoft for manually editing or viewing large text-based files for data science & analytics. Check out the tool [here](https://www.emeditor.com/). _EmEditor_ is paid software but well worth the investment to increase effeciency. | ||
|
||
# Running Command Line Arguments | ||
|
||
For Windows users, if you want to pass optional command line arguments to an executable: | ||
|
||
1. Navigate to the directory of the utility. | ||
2. Hold Shift + Right Mouse Click. | ||
3. Select "Open PowerShell Window Here". | ||
4. Type the name of the exe along with the optional argument (i.e. `./unpivotdude.exe [OPTIONS]` ). | ||
|
||
# Command Line Options | ||
|
||
``` | ||
Usage: unpivotdude [options] [ARGS] | ||
Description: | ||
Unpivot input data on specific column combination. | ||
Note that columns are 0-index based. | ||
Options: | ||
-p, --pivot <string> Comma-separated list of pivot indexes. | ||
-h, --header <string> Comma-separated list of header indexes. | ||
-n, --no-header Indicate input file has no header. | ||
-f, --file-in <string> Input file to pivot. | ||
-h, --help display this help and exit | ||
--version output version information and exit | ||
``` | ||
|
||
_Note that the -p and -h options can be passed individual column indexes, or ranges of indexes (i.e. 3-5). See the example below._ | ||
|
||
*** | ||
|
||
# Examples | ||
|
||
---- | ||
|
||
`unpivotdude.exe -f ..\..\examples\sample1.txt -p 0,2 -h 3-5 > ..\..\examples\sample1_out.txt` | ||
|
||
**sample1.txt :::** | ||
|
||
![Example 1](img/sample1.png) | ||
|
||
**sample1_out.txt :::** | ||
|
||
![Example 1](img/sample1_out.png) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
col_a col_b col_c head_1 head_2 head_3 | ||
100 300 400 aaa bbb ccc | ||
888 111 222 xxx yyy xxx | ||
444 555 666 ggg hhh iii |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
col_a col_c unpivot_col unpivot_val | ||
100 400 head_1 aaa | ||
100 400 head_2 bbb | ||
100 400 head_3 ccc | ||
888 222 head_1 xxx | ||
888 222 head_2 yyy | ||
888 222 head_3 xxx | ||
444 666 head_1 ggg | ||
444 666 head_2 hhh | ||
444 666 head_3 iii |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
// Copyright (c) 2022 jeffrey -at- ieee.org. All rights reserved. | ||
// Use of this source code (/program) is governed by an MIT license, | ||
// that can be found in the LICENSE file. Do not remove this header. | ||
import os | ||
import flag | ||
|
||
fn main() | ||
{ | ||
mut fp := flag.new_flag_parser(os.args) | ||
fp.application('unpivotdude') | ||
fp.version('v0.0.1\nCopyright (c) 2022 jeffrey -at- ieee.org. All rights \ | ||
reserved.\nUse of this source code (/program) is governed by an MIT \ | ||
license,\nthat can be found in the LICENSE file.') | ||
fp.description('\nUnpivot input data on specific column combination.\n\ | ||
Note that columns are 0-index based.') | ||
fp.skip_executable() | ||
pivot_column_arg := fp.string('pivot', `p`, "", | ||
'Comma-separated list of pivot indexes.') | ||
header_column_arg := fp.string('header', `h`, "", | ||
'Comma-separated list of header indexes.') | ||
mut has_header := fp.bool('no-header', `n`, false, | ||
'Indicate input file has no header.') | ||
file_in := fp.string('file-in', `f`, "", | ||
'Input file to pivot.') | ||
|
||
has_header = !has_header // flip the has_header argument (readibility) | ||
|
||
|
||
additional_args := fp.finalize() or { | ||
eprintln(err) | ||
println(fp.usage()) | ||
return | ||
} | ||
|
||
if pivot_column_arg.len==0 || header_column_arg.len==0 || | ||
file_in.len==0 | ||
{ | ||
println(fp.usage()) | ||
return | ||
} | ||
|
||
additional_args.join_lines() | ||
|
||
pivot_column := expand_int_string(pivot_column_arg) // "0,4-6" --> [0,4,5,6] | ||
header_column := expand_int_string(header_column_arg) | ||
|
||
lines := os.read_lines(file_in) or {panic(err)} | ||
|
||
mut data_array := []Data{} | ||
mut data_struct := Data{} | ||
|
||
mut delimited_header := []string{} | ||
|
||
for index,line in lines | ||
{ | ||
if index==0 && has_header | ||
{ | ||
delimited_header = line.split("\t") | ||
continue | ||
} | ||
|
||
delimited_row := line.split("\t") | ||
mut pivot_col_string := "" | ||
|
||
for cols in pivot_column | ||
{ | ||
pivot_col_string += delimited_row[cols.int()] + "\t" | ||
} | ||
|
||
for heads in header_column | ||
{ | ||
data_struct.pivot_col = pivot_col_string.all_before_last("\t") | ||
data_struct.value = delimited_row[heads.int()] | ||
|
||
if has_header | ||
{ | ||
data_struct.header_elem = delimited_header[heads.int()] | ||
} | ||
else | ||
{ | ||
data_struct.header_elem = heads | ||
} | ||
data_array << data_struct | ||
} | ||
} | ||
|
||
// Print out the header | ||
if !has_header // Source data does not have header; make one... | ||
{ | ||
for value in pivot_column | ||
{ | ||
print("col_$value\t") | ||
} | ||
} | ||
else // has header | ||
{ | ||
for cols in pivot_column | ||
{ | ||
print("${delimited_header[cols.int()]}\t") | ||
} | ||
} | ||
println("unpivot_col\tunpivot_val") | ||
// End of header creation... | ||
|
||
for data_pt in data_array | ||
{ | ||
println("${data_pt.pivot_col}\t${data_pt.header_elem}\t${data_pt.value}") | ||
|
||
} | ||
} | ||
|
||
// "1,2,5-10,8" ==> ['1', '2', '5', '6', '7', '8', '9', '10', '8'] | ||
fn expand_int_string(ranges string) []string | ||
{ | ||
ranges_split := ranges.split(",") | ||
mut return_arr := []string{} | ||
|
||
for elem in ranges_split | ||
{ | ||
if elem.contains("-") | ||
{ | ||
elem_split := elem.split("-") | ||
|
||
mut lower_bound:= elem_split[0].int() | ||
upper_bound:= elem_split[1].int() | ||
|
||
for i:=lower_bound; i<=upper_bound; i++ | ||
{ | ||
return_arr << i.str() | ||
} | ||
} | ||
else | ||
{ | ||
return_arr << elem | ||
} | ||
} | ||
|
||
return return_arr | ||
} | ||
|
||
struct Data | ||
{ | ||
mut: | ||
header_elem string | ||
pivot_col string | ||
value string | ||
} |