Skip to content

Commit

Permalink
Merge branch 'feature/experiments1' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
neoneye committed Jan 3, 2024
2 parents a123919 + 58dd164 commit 26190f7
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 7 deletions.
2 changes: 1 addition & 1 deletion rust_project/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion rust_project/loda-rust-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "loda-rust-cli"
version = "2023.12.27"
version = "2024.1.3"
authors = ["Simon Strandgaard <[email protected]>"]
description = "Command line interface for LODA Rust"
repository = "https://github.com/loda-lang/loda-rust"
Expand Down
19 changes: 17 additions & 2 deletions rust_project/loda-rust-cli/src/arc/generate_dataset_histogram.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
//! Generate a dataset with histogram comparisons and a summary.
//!
//! This dataset is available here:
//! This full huge dataset is available here, 3m rows:
//! https://huggingface.co/datasets/neoneye/histogram-comparisons-v1
//!
//! This small dataset is available here, 150k rows:
//! https://huggingface.co/datasets/neoneye/histogram-comparisons-small-v1
//!
//! Given a prompt similar to ARC input/output raw pixel data.
//! The response is histogram for every image, comparisons of input/output histograms, and a summary of the histograms.
//!
Expand Down Expand Up @@ -787,7 +790,7 @@ impl GenerateDataset {
}

#[allow(dead_code)]
pub fn generate_fulldataset(path: &Path) -> anyhow::Result<()> {
pub fn generate_dataset_huge(path: &Path) -> anyhow::Result<()> {
let mut generator = GenerateDataset::new();
let number_of_items: u32 = 1000000;
generator.populate(Curriculum::Small, number_of_items, false)?;
Expand All @@ -797,6 +800,18 @@ impl GenerateDataset {
generator.save(&path)?;
Ok(())
}

#[allow(dead_code)]
pub fn generate_dataset_small(path: &Path) -> anyhow::Result<()> {
let mut generator = GenerateDataset::new();
let number_of_items: u32 = 50000;
generator.populate(Curriculum::Small, number_of_items, false)?;
generator.populate(Curriculum::SmallMedium, number_of_items, false)?;
generator.populate(Curriculum::SmallMediumBig, number_of_items, false)?;
generator.shuffle();
generator.save(&path)?;
Ok(())
}
}

#[cfg(test)]
Expand Down
9 changes: 6 additions & 3 deletions rust_project/loda-rust-cli/src/subcommand_arc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::path::PathBuf;

#[cfg(feature = "loda-rust-arc")]
use crate::arc::{SubcommandARCSize, SubcommandARCWeb, TraverseProgramsAndModels};
// use crate::arc::GenerateDataset;

#[derive(Debug)]
pub enum SubcommandARCMode {
Expand Down Expand Up @@ -46,6 +45,9 @@ impl SubcommandARC {

#[cfg(feature = "loda-rust-arc")]
pub fn run(mode: SubcommandARCMode) -> anyhow::Result<()> {
#[allow(unused_imports)]
use crate::arc::GenerateDataset;

match mode {
SubcommandARCMode::CheckAllExistingSolutions => {
return TraverseProgramsAndModels::check_all_existing_solutions();
Expand All @@ -63,8 +65,9 @@ impl SubcommandARC {
return TraverseProgramsAndModels::label_all_puzzles();
},
SubcommandARCMode::ExportDataset => {
// let path: PathBuf = PathBuf::from("/Users/neoneye/Downloads/histograms.jsonl");
// GenerateDataset::generate_fulldataset(&path)?;
// let path: PathBuf = PathBuf::from("/Users/neoneye/Downloads/histogram-comparisons.jsonl");
// GenerateDataset::generate_dataset_huge(&path)?;
// GenerateDataset::generate_dataset_small(&path)?;
// return Ok(());
return TraverseProgramsAndModels::export_dataset();
},
Expand Down

0 comments on commit 26190f7

Please sign in to comment.