-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpython downsample script2.py
92 lines (78 loc) · 3.53 KB
/
python downsample script2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
"""
Created on Fri May 3 12:47:01 2024
@author: 100575352
"""
import pandas as pd
import numpy as np
from tkinter import filedialog, Tk, simpledialog
import matplotlib.pyplot as plt
def select_excel_file():
""" Open a dialog to select an Excel file """
root = Tk()
root.withdraw() # Hide the main window
file_path = filedialog.askopenfilename(title="Select an Excel file", filetypes=[("Excel files", "*.xlsx")])
root.destroy()
return file_path
def get_user_input(prompt, type_=None, min_=None, max_=None, range_=None):
""" Get user input with type checking and range validation """
root = Tk()
root.withdraw()
input_str = simpledialog.askstring("Input", prompt, parent=root)
try:
val = type_(input_str)
if ((min_ is not None and val < min_) or
(max_ is not None and val > max_) or
(range_ is not None and val not in range_)):
raise ValueError("Input out of valid range.")
except TypeError:
print("Input type must be", type_.__name__)
except ValueError as e:
print(e)
else:
return val
def downsample_data(df, original_freq, target_freq):
""" Downsample the data by averaging over chunks for numeric and taking the first for strings """
factor = int(np.ceil(original_freq / target_freq)) # Adjust factor for frequencies less than 1 Hz
numeric_cols = df.select_dtypes(include=[np.number])
non_numeric_cols = df.select_dtypes(exclude=[np.number])
downsampled_numeric = numeric_cols.groupby(np.arange(len(numeric_cols)) // factor).mean()
downsampled_non_numeric = non_numeric_cols.groupby(np.arange(len(non_numeric_cols)) // factor).first()
return pd.concat([downsampled_numeric, downsampled_non_numeric], axis=1)
def save_dataframe(df):
""" Save DataFrame to an Excel file chosen by the user """
root = Tk()
root.withdraw()
file_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx")])
df.to_excel(file_path, index=False)
root.destroy()
def plot_data(original_df, downsampled_df):
""" Plot original and downsampled data for each numeric column """
numeric_cols = original_df.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
plt.figure()
plt.plot(original_df.index, original_df[column], label='Original Data')
plt.plot(downsampled_df.index * (len(original_df) / len(downsampled_df)), downsampled_df[column], label='Downsampled Data', linestyle='--')
plt.title(column)
plt.xlabel('Sample Index')
plt.ylabel('Value')
plt.legend()
plt.show()
def main():
# File selection and user input
file_path = select_excel_file()
starting_row = get_user_input("Enter the starting row (1-indexed):", int, 1)
original_freq = get_user_input("Enter the original sampling frequency (Hz):", float, 0.001)
target_freq = get_user_input("Enter the desired output frequency (Hz):", float, 0.001)
# Load the data
df = pd.read_excel(file_path, skiprows=starting_row - 1)
print("Data loaded successfully. Column names are:", df.columns.tolist())
# Downsample the data
downsampled_df = downsample_data(df, original_freq, target_freq)
print("Data downsampled successfully.")
# Plot the data
plot_data(df, downsampled_df)
# Save the downsampled data
save_dataframe(downsampled_df)
if __name__ == "__main__":
main()