diff --git a/metadata/braninDemo.py b/metadata/braninDemo.py index 274dfd7..001335d 100644 --- a/metadata/braninDemo.py +++ b/metadata/braninDemo.py @@ -1,245 +1,103 @@ +# +# Copyright 2022 Ocean Protocol Foundation +# SPDX-License-Identifier: Apache-2.0 +# +import json import os -import numpy as np -import time -from sklearn.datasets import make_classification, load_iris, fetch_california_housing -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor -from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.metrics import accuracy_score, mean_squared_error, classification_report -from sklearn.feature_selection import SelectKBest, f_classif -from sklearn.decomposition import PCA -from sklearn.preprocessing import StandardScaler - -# Create output directory -output_dir = '/data/outputs' -os.makedirs(output_dir, exist_ok=True) - -# Function to log progress -def log_progress(message): - print(f"{message}") - with open(f"{output_dir}/log.txt", "a") as f: - f.write(f"{message}\n") - -log_progress("Starting advanced text-based ML demo...") - -# Function to create a text-based heatmap -def text_heatmap(matrix, row_labels=None, col_labels=None, title="Heatmap"): - result = [title + "\n"] - - # Add column headers if provided - if col_labels: - header = " " # Space for row labels - for label in col_labels: - header += f"{label:8.8s} " - result.append(header) - - # Add rows with labels if provided - for i, row in enumerate(matrix): - line = f"{row_labels[i]:4.4s} " if row_labels else f"{i:4d} " - for val in row: - # Use different symbols based on value - if val > 0.8: - symbol = "███" - elif val > 0.6: - symbol = "▓▓▓" - elif val > 0.4: - symbol = "▒▒▒" - elif val > 0.2: - symbol = "░░░" - else: - symbol = " " - line += f"{symbol} {val:.2f} " - result.append(line) - - return "\n".join(result) - -# Function to create a text-based bar chart -def text_bar_chart(values, labels=None, title="Bar Chart", max_width=40): - result = [title + "\n"] - - # Find the maximum value for scaling - max_val = max(values) - - # Create bars - for i, val in enumerate(values): - # Calculate bar length - bar_len = int((val / max_val) * max_width) - bar = "█" * bar_len - - # Add label if provided - label = labels[i] if labels else f"Item {i+1}" - result.append(f"{label:15.15s} | {bar} {val:.4f}") - - return "\n".join(result) - -# 1. Classification Task -log_progress("PART 1: Classification Task") -log_progress("Generating synthetic classification dataset...") -X_class, y_class = make_classification( - n_samples=1000, - n_features=10, - n_informative=5, - n_redundant=2, - n_classes=3, - random_state=42 -) - -X_train, X_test, y_train, y_test = train_test_split(X_class, y_class, test_size=0.3, random_state=42) -log_progress(f"Dataset created with {len(X_train)} training samples and {len(X_test)} test samples") - -# Train classifier -log_progress("Training Random Forest classifier...") -start_time = time.time() -clf = RandomForestClassifier(n_estimators=100, random_state=42) -clf.fit(X_train, y_train) -training_time = time.time() - start_time - -# Evaluate -y_pred = clf.predict(X_test) -accuracy = accuracy_score(y_test, y_pred) -log_progress(f"Model accuracy: {accuracy:.4f}") -log_progress(f"Training time: {training_time:.2f} seconds") - -# Feature importance analysis -log_progress("Analyzing feature importance...") -importances = clf.feature_importances_ -feature_names = [f"Feature {i+1}" for i in range(X_class.shape[1])] - -# Create text-based bar chart for feature importance -importance_chart = text_bar_chart( - importances, - feature_names, - title="Feature Importance" -) -log_progress("Feature importance analysis complete") - -# 2. Regression Task with Real Dataset -log_progress("\nPART 2: Regression Task") -log_progress("Loading California housing dataset...") -housing = fetch_california_housing() -X_reg, y_reg = housing.data, housing.target - -# Normalize features -scaler = StandardScaler() -X_reg_scaled = scaler.fit_transform(X_reg) - -X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split( - X_reg_scaled, y_reg, test_size=0.3, random_state=42 -) - -log_progress(f"Housing dataset loaded with {X_reg.shape[0]} samples and {X_reg.shape[1]} features") - -# Train regressor -log_progress("Training Random Forest regressor...") -start_time = time.time() -reg = RandomForestRegressor(n_estimators=100, random_state=42) -reg.fit(X_reg_train, y_reg_train) -reg_training_time = time.time() - start_time - -# Evaluate -y_reg_pred = reg.predict(X_reg_test) -mse = mean_squared_error(y_reg_test, y_reg_pred) -rmse = np.sqrt(mse) -log_progress(f"Model RMSE: {rmse:.4f}") -log_progress(f"Training time: {reg_training_time:.2f} seconds") - -# Feature importance for regression -reg_importances = reg.feature_importances_ -reg_feature_names = housing.feature_names - -# Create text-based bar chart for regression feature importance -reg_importance_chart = text_bar_chart( - reg_importances, - reg_feature_names, - title="Housing Feature Importance" -) -log_progress("Housing feature importance analysis complete") - -# 3. PCA Analysis -log_progress("\nPART 3: PCA Dimensionality Reduction") -log_progress("Performing PCA on classification dataset...") - -pca = PCA() -pca.fit(X_class) -explained_variance = pca.explained_variance_ratio_ - -# Create text-based bar chart for explained variance -variance_chart = text_bar_chart( - explained_variance, - [f"PC {i+1}" for i in range(len(explained_variance))], - title="PCA Explained Variance" -) - -# Calculate cumulative explained variance -cumulative_variance = np.cumsum(explained_variance) -log_progress(f"Number of components for 90% variance: {np.argmax(cumulative_variance >= 0.9) + 1}") - -# 4. Feature Correlation Matrix -log_progress("\nPART 4: Feature Correlation Analysis") -log_progress("Calculating feature correlation matrix...") - -# Calculate correlation matrix -corr_matrix = np.corrcoef(X_class.T) - -# Create text-based heatmap for correlation -corr_heatmap = text_heatmap( - corr_matrix, - feature_names, - feature_names, - title="Feature Correlation Matrix" -) - -# Save all results to a comprehensive report -log_progress("\nSaving comprehensive analysis report...") -with open(f"{output_dir}/ml_analysis_report.txt", "w") as f: - f.write("# Advanced Machine Learning Analysis Report\n\n") - - f.write("## 1. Classification Task\n\n") - f.write(f"Dataset: Synthetic classification with {len(X_class)} samples, {X_class.shape[1]} features\n") - f.write(f"Classes: 3\n") - f.write(f"Training samples: {len(X_train)}\n") - f.write(f"Test samples: {len(X_test)}\n\n") - f.write(f"Model: Random Forest with 100 estimators\n") - f.write(f"Training time: {training_time:.2f} seconds\n") - f.write(f"Accuracy: {accuracy:.4f}\n\n") - f.write(importance_chart) - f.write("\n\n") - - f.write("## 2. Regression Task (California Housing)\n\n") - f.write(f"Dataset: California Housing with {len(X_reg)} samples, {X_reg.shape[1]} features\n") - f.write(f"Training samples: {len(X_reg_train)}\n") - f.write(f"Test samples: {len(X_reg_test)}\n\n") - f.write(f"Model: Random Forest Regressor with 100 estimators\n") - f.write(f"Training time: {reg_training_time:.2f} seconds\n") - f.write(f"RMSE: {rmse:.4f}\n\n") - f.write(reg_importance_chart) - f.write("\n\n") - - f.write("## 3. PCA Analysis\n\n") - f.write(variance_chart) - f.write("\n\n") - f.write(f"Cumulative explained variance:\n") - for i, var in enumerate(cumulative_variance): - f.write(f"PC 1-{i+1}: {var:.4f}\n") - f.write("\n\n") - - f.write("## 4. Feature Correlation Analysis\n\n") - f.write(corr_heatmap) - -# Save a summary of the classification report -with open(f"{output_dir}/classification_report.txt", "w") as f: - f.write("# Classification Report\n\n") - f.write(classification_report(y_test, y_pred)) - -# Save feature importance data -np.savetxt(f"{output_dir}/feature_importance.txt", importances) -np.savetxt(f"{output_dir}/housing_feature_importance.txt", reg_importances) - -# Save PCA results -np.savetxt(f"{output_dir}/pca_explained_variance.txt", explained_variance) -np.savetxt(f"{output_dir}/pca_cumulative_variance.txt", cumulative_variance) - -# Save correlation matrix -np.savetxt(f"{output_dir}/correlation_matrix.txt", corr_matrix) - -log_progress("Demo completed successfully!") -log_progress(f"All results saved to {output_dir}") \ No newline at end of file +import pickle +import sys + +import arff +import matplotlib +import numpy +from matplotlib import pyplot +from sklearn import gaussian_process + +matplotlib.use("agg") + + +def branin_mesh(X0, X1): + # b,c,t = 5.1/(4.*(pi)**2), 5./pi, 1./(8.*pi) + b, c, t = 0.12918450914398066, 1.5915494309189535, 0.039788735772973836 + u = X1 - b * X0**2 + c * X0 - 6 + r = 10.0 * (1.0 - t) * numpy.cos(X0) + 10 + Z = u**2 + r + + return Z + + +def create_mesh(npoints): + X0_vec = numpy.linspace(-5.0, 10.0, npoints) + X1_vec = numpy.linspace(0.0, 15.0, npoints) + X0, X1 = numpy.meshgrid(X0_vec, X1_vec) + Z = branin_mesh(X0, X1) + + return X0, X1, Z + + +def get_input(local=False): + if local: + print("Reading local file branin.arff.") + + return "branin.arff" + + dids = os.getenv("DIDS", None) + + if not dids: + print("No DIDs found in environment. Aborting.") + return + + dids = json.loads(dids) + + for did in dids: + filename = f"data/inputs/{did}/0" # 0 for metadata service + print(f"Reading asset file {filename}.") + + return filename + + +def plot(Zhat, npoints): + X0, X1, Z = create_mesh(npoints) + # plot data + model + fig, ax = pyplot.subplots(subplot_kw={"projection": "3d"}) + ax.plot_wireframe(X0, X1, Z, linewidth=1) + ax.scatter(X0, X1, Zhat, c="r", label="model") + pyplot.title("Data + model") + pyplot.show() + + +def run_gpr(local=False): + npoints = 15 + + filename = get_input(local) + if not filename: + print("Could not retrieve filename.") + return + + with open(filename) as datafile: + datafile.seek(0) + res = arff.load(datafile) + + print("Stacking data.") + mat = numpy.stack(res["data"]) + [X, y] = numpy.split(mat, [2], axis=1) + + print("Building Gaussian Process Regressor (GPR) model") + model = gaussian_process.GaussianProcessRegressor() + model.fit(X, y) + yhat = model.predict(X, return_std=False) + Zhat = numpy.reshape(yhat, (npoints, npoints)) + + if local: + print("Plotting results") + plot(Zhat, npoints) + + filename = "gpr.pickle" if local else "/data/outputs/result" + with open(filename, "wb") as pickle_file: + print(f"Pickling results in {filename}") + pickle.dump(Zhat, pickle_file) + + +if __name__ == "__main__": + local = len(sys.argv) == 2 and sys.argv[1] == "local" + run_gpr(local) \ No newline at end of file diff --git a/metadata/braninDemo2.py b/metadata/braninDemo2.py new file mode 100644 index 0000000..001335d --- /dev/null +++ b/metadata/braninDemo2.py @@ -0,0 +1,103 @@ +# +# Copyright 2022 Ocean Protocol Foundation +# SPDX-License-Identifier: Apache-2.0 +# +import json +import os +import pickle +import sys + +import arff +import matplotlib +import numpy +from matplotlib import pyplot +from sklearn import gaussian_process + +matplotlib.use("agg") + + +def branin_mesh(X0, X1): + # b,c,t = 5.1/(4.*(pi)**2), 5./pi, 1./(8.*pi) + b, c, t = 0.12918450914398066, 1.5915494309189535, 0.039788735772973836 + u = X1 - b * X0**2 + c * X0 - 6 + r = 10.0 * (1.0 - t) * numpy.cos(X0) + 10 + Z = u**2 + r + + return Z + + +def create_mesh(npoints): + X0_vec = numpy.linspace(-5.0, 10.0, npoints) + X1_vec = numpy.linspace(0.0, 15.0, npoints) + X0, X1 = numpy.meshgrid(X0_vec, X1_vec) + Z = branin_mesh(X0, X1) + + return X0, X1, Z + + +def get_input(local=False): + if local: + print("Reading local file branin.arff.") + + return "branin.arff" + + dids = os.getenv("DIDS", None) + + if not dids: + print("No DIDs found in environment. Aborting.") + return + + dids = json.loads(dids) + + for did in dids: + filename = f"data/inputs/{did}/0" # 0 for metadata service + print(f"Reading asset file {filename}.") + + return filename + + +def plot(Zhat, npoints): + X0, X1, Z = create_mesh(npoints) + # plot data + model + fig, ax = pyplot.subplots(subplot_kw={"projection": "3d"}) + ax.plot_wireframe(X0, X1, Z, linewidth=1) + ax.scatter(X0, X1, Zhat, c="r", label="model") + pyplot.title("Data + model") + pyplot.show() + + +def run_gpr(local=False): + npoints = 15 + + filename = get_input(local) + if not filename: + print("Could not retrieve filename.") + return + + with open(filename) as datafile: + datafile.seek(0) + res = arff.load(datafile) + + print("Stacking data.") + mat = numpy.stack(res["data"]) + [X, y] = numpy.split(mat, [2], axis=1) + + print("Building Gaussian Process Regressor (GPR) model") + model = gaussian_process.GaussianProcessRegressor() + model.fit(X, y) + yhat = model.predict(X, return_std=False) + Zhat = numpy.reshape(yhat, (npoints, npoints)) + + if local: + print("Plotting results") + plot(Zhat, npoints) + + filename = "gpr.pickle" if local else "/data/outputs/result" + with open(filename, "wb") as pickle_file: + print(f"Pickling results in {filename}") + pickle.dump(Zhat, pickle_file) + + +if __name__ == "__main__": + local = len(sys.argv) == 2 and sys.argv[1] == "local" + run_gpr(local) \ No newline at end of file diff --git a/metadata/exampleDataset.json b/metadata/exampleDataset.json new file mode 100644 index 0000000..f26ee99 --- /dev/null +++ b/metadata/exampleDataset.json @@ -0,0 +1,7 @@ +{ + "fileObject": { + "type": "url", + "url": "https://raw.githubusercontent.com/oceanprotocol/ocean-cli/refs/heads/main/metadata/simpleComputeDataset.json", + "method": "GET" + } +} diff --git a/ocean-protocol-0.0.1.vsix b/ocean-protocol-vscode-extension-0.0.1.vsix similarity index 65% rename from ocean-protocol-0.0.1.vsix rename to ocean-protocol-vscode-extension-0.0.1.vsix index 0b1c0b3..7534b17 100644 Binary files a/ocean-protocol-0.0.1.vsix and b/ocean-protocol-vscode-extension-0.0.1.vsix differ diff --git a/src/extension.ts b/src/extension.ts index 64ffa97..3bd19fe 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -225,40 +225,6 @@ export async function activate(context: vscode.ExtensionContext) { message: 'Generating signature for second result...' }) outputChannel.appendLine('Generating signature for second result...') - // const signatureResult2 = await generateOceanSignature({ - // signer, - // consumerAddress: signer.address, - // jobId, - // index: 1, - // nonce: Date.now() // nonce equals date in milliseconds - // }) - - // Try to retrieve second result (index 1) - // progress.report({ message: 'Retrieving compute results (2/2)...' }) - // outputChannel.appendLine('Retrieving second result...') - // const results2 = await getComputeResult( - // nodeUrl, - // jobId, - // signer.address, - // signatureResult2.signature, - // 1, - // Date.now() // nonce equals date in milliseconds - // ) - - // // Save second result if it exists - // progress.report({ message: 'Saving second result...' }) - // outputChannel.appendLine('Saving second result...') - // console.log('Saving second result to folder path:', resultsFolderPath) - // filePath2 = await saveOutput(results2, resultsFolderPath, 'output') - - // // After getting the second result - // console.log('Second result content type:', typeof results2) - // console.log('Second result keys:', Object.keys(results2)) - // console.log('File extension:', path.extname(filePath2)) - - // // Check file contents - // const fileStats = await fs.promises.stat(filePath2) - // console.log('File size:', fileStats.size) } catch (error) { console.log('No second result available:', error) outputChannel.appendLine('No second result available') diff --git a/src/viewProvider.ts b/src/viewProvider.ts index 53b6194..1aa9269 100644 --- a/src/viewProvider.ts +++ b/src/viewProvider.ts @@ -74,8 +74,21 @@ export class OceanProtocolViewProvider implements vscode.WebviewViewProvider { canSelectMany: false, openLabel: 'Select', filters: { + 'Algorithm Files': ['js', 'py'], + 'Dataset Files': ['json'] + } + } + + if (data.elementId === 'selectedDatasetPath') { + options.filters = { + 'Dataset Files': ['json'] + } + options.openLabel = 'Select Dataset' + } else if (data.elementId === 'selectedAlgorithmPath') { + options.filters = { 'Algorithm Files': ['js', 'py'] } + options.openLabel = 'Select Algorithm' } const fileUri = await vscode.window.showOpenDialog(options) @@ -254,7 +267,7 @@ export class OceanProtocolViewProvider implements vscode.WebviewViewProvider {
- + @@ -321,7 +334,7 @@ export class OceanProtocolViewProvider implements vscode.WebviewViewProvider { document.getElementById('selectResultsFolderBtn').addEventListener('click', () => { console.log('Results folder button clicked'); vscode.postMessage({ - type: 'selectResultsFolder' + type: 'selectResultsFolder' }); }); } @@ -329,7 +342,7 @@ export class OceanProtocolViewProvider implements vscode.WebviewViewProvider { if (document.getElementById('startComputeBtn')) { document.getElementById('startComputeBtn').addEventListener('click', () => { const privateKey = document.getElementById('privateKeyInput').value; - const nodeUrl = document.getElementById('nodeUrlInput').value || 'http://34.159.64.236:8001'; + const nodeUrl = document.getElementById('nodeUrlInput').value || 'https://1.c2d.nodes.oceanprotocol.com:8000'; const dockerImage = document.getElementById('dockerImageInput').value; const dockerTag = document.getElementById('dockerTagInput').value; // Only require algorithm to be selected