diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..ce654236 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT + +name: "Publish to PyPI" + +on: + workflow_dispatch: + target: + inputs: + target: + description: 'Target' + required: true + default: 'PyPI' + type: choice + options: + - PyPI + - TestPyPi + # push: + # tags: + # # Publish on any tag starting with a `v`, e.g., v0.1.0 + # - v* + +run-name: Publish to ${{ inputs.target }} + +jobs: + run: + runs-on: ubuntu-latest + environment: + name: pypi + permissions: + id-token: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Build + run: uv build + # Check that basic features work and we didn't miss to include crucial files + - name: Smoke test (wheel) + run: uv run --isolated --no-project --with dist/*.whl tests/smoke_test.py + - name: Smoke test (source distribution) + run: uv run --isolated --no-project --with dist/*.tar.gz tests/smoke_test.py + - name: Publish + run: uv publish ${{ inputs.target == 'TestPyPi' && '--index testpypi' || '' }} + - name: Summary + run: | + echo "### Published OpenTSLM to ${{ inputs.target }} :rocket:" >> $GITHUB_STEP_SUMMARY + echo "Version: `$(uv version --short)`" >> $GITHUB_STEP_SUMMARY + echo "URL: https://${{ inputs.target == 'TestPyPi' && 'test.' || '' }}pypi.org/project/opentslm/$(uv version --short)/" >>> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index e94f5b5b..91f26de0 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/.gitignore b/.gitignore index 7c660ca3..3f42fb9d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,22 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -venv +.venv +.vscode __pycache__ .DS_STORE -**/.DS_STORE raw_data +**/data/* +!**/data/.gitkeep *.ts *.zip -./__pycache__ upload_to_huggingface.py + +dist/ + +*.license \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a27cc5d4..00000000 --- a/.gitmodules +++ /dev/null @@ -1,12 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -[submodule "open_flamingo"] - path = open_flamingo - url = https://github.com/mlfoundations/open_flamingo.git -[submodule "src/open_flamingo"] - path = src/open_flamingo - url = https://github.com/mlfoundations/open_flamingo.git diff --git a/.linkspector.yml b/.linkspector.yml index 05471bf8..680e3f79 100644 --- a/.linkspector.yml +++ b/.linkspector.yml @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/__init__.py b/.python-version similarity index 69% rename from src/model/__init__.py rename to .python-version index 266b1365..d66339f8 100644 --- a/src/model/__init__.py +++ b/.python-version @@ -1,5 +1,7 @@ -# This source file is part of the OpenTSLM open-source project # # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) # # SPDX-License-Identifier: MIT +# + +3.12 diff --git a/.reuse/templates/opentslm.jinja2 b/.reuse/templates/opentslm.jinja2 deleted file mode 100644 index 17928cd4..00000000 --- a/.reuse/templates/opentslm.jinja2 +++ /dev/null @@ -1,9 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -{% for line in copyright_lines %} -{{ line }} -{% endfor %} - -{% for expr in spdx_expressions %} -SPDX-License-Identifier: {{ expr }} -{% endfor %} \ No newline at end of file diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index fa43bcdc..f908a9dc 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,7 +1,6 @@ diff --git a/README.md b/README.md index 88abc42f..296bd7b5 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # OpenTSLM: Time-Series Language Models for Reasoning over Multivariate Medical Text- and Time-Series Data +[![PyPI - Version](https://img.shields.io/pypi/v/opentslm)](https://pypi.org/project/opentslm) [![DOI](https://img.shields.io/badge/DOI-10.13140/RG.2.2.14827.60963-blue.svg)](https://doi.org/10.13140/RG.2.2.14827.60963) [![Static Analysis](https://github.com/StanfordBDHG/OpenTSLM/actions/workflows/static-analysis.yml/badge.svg)](https://github.com/StanfordBDHG/OpenTSLM/actions/workflows/static-analysis.yml) @@ -14,7 +14,7 @@ SPDX-License-Identifier: MIT Large Language Models (LLMs) have emerged as powerful tools for interpreting multimodal data (e.g., images, audio, text), often surpassing specialized models. In medicine, they hold particular promise for synthesizing large volumes of clinical information into actionable insights and patient-facing digital health applications. Yet, a major limitation remains their inability to handle time series data. To overcome this gap, we present OpenTSLM, a family of Time Series Language Models (TSLMs) created by integrating time series as a native modality to pretrained Large Language Models, enabling natural-language prompting and reasoning over multiple time series of any length [...] **[🔗 Read the full paper](https://doi.org/10.13140/RG.2.2.14827.60963)**

- Schematic Overview + Schematic Overview

@@ -23,24 +23,17 @@ Large Language Models (LLMs) have emerged as powerful tools for interpreting mul OpenTSLM models can reason over multiple time series of any length at once, generating findings, captions, and rationales in natural language. We tested these models across a wide range of tasks spanning Human Activity Recognition (HAR) from 3-axis acceleration data, sleep staging from EEG readings, 12-lead ECG question answering, and time series captioning. Some examples are shown below, more are available in the paper.

- ECG Rationale - HAR Rationale - M4 Caption + ECG Rationale + HAR Rationale + M4 Caption

## Installation -1. **Clone the Repository** - - ```bash - git clone https://github.com/StanfordBDHG/OpenTSLM.git --recurse-submodules - ``` - -2. **Install Dependencies** - ```bash - pip install -r requirements.txt - ``` +```bash +pip install opentslm +``` ## LLM Setup @@ -48,19 +41,19 @@ OpenTSLM models can reason over multiple time series of any length at once, gene OpenTSLM is designed to work with Llama and Gemma models, with Llama 3.2 1B as the default. These models are stored in Hugging Face repositories which may require access permissions. Follow these steps to gain access and download: 1. **Request Access (for Llama models)** - Visit the Llama model repository (e.g., https://huggingface.co/meta-llama/Llama-3.2-1B) or Gemma models repository (https://huggingface.co/google/gemma-3-270m) and request access from Meta. + Visit the Llama model repository (e.g., https://huggingface.co/meta-llama/Llama-3.2-1B) or Gemma models repository (https://huggingface.co/google/gemma-3-270m) and request access from Meta. 2. **Authenticate with Hugging Face** - Log in to your Hugging Face account and configure the CLI: + Log in to your Hugging Face account and configure the CLI: - ```bash - huggingface-cli login - ``` + ```bash + huggingface-cli login + ``` 3. **Create an API Token** - - Go to your Hugging Face settings: https://huggingface.co/settings/tokens - - Generate a new token with `read` scope. - - Copy the token for CLI login. + - Go to your Hugging Face settings: https://huggingface.co/settings/tokens + - Generate a new token with `read` scope. + - Copy the token for CLI login. ### Supported Models @@ -87,15 +80,11 @@ A factory class called `OpenTSLM` for easily loading pre-trained models from Hug There are [demo scripts](demo/huggingface/) available which use the following minimal code. If you want to create your own applications, create a new file in **this repo folder** and use the following code as start: ```python -import sys -import os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src"))) - -from model.llm.OpenTSLM import OpenTSLM -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate +from opentslm import OpenTSLM +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate from torch.utils.data import DataLoader -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE REPO_ID = "OpenTSLM/llama-3.2-1b-tsqa-sp" @@ -104,22 +93,41 @@ model = OpenTSLM.load_pretrained(REPO_ID, device="cuda" if torch.cuda.is_availab test_dataset = TSQADataset("test", EOS_TOKEN=model.get_eos_token()) test_loader = DataLoader( - test_dataset, - shuffle=False, - batch_size=1, - collate_fn=lambda batch: extend_time_series_to_match_patch_size_and_aggregate( - batch, patch_size=PATCH_SIZE - ), + test_dataset, + shuffle=False, + batch_size=1, + collate_fn=lambda batch: extend_time_series_to_match_patch_size_and_aggregate( + batch, patch_size=PATCH_SIZE + ), ) for i, batch in enumerate(test_loader): - predictions = model.generate(batch, max_new_tokens=200) - for sample, pred in zip(batch, predictions): - print("Question:", sample.get("pre_prompt", "N/A")) - print("Answer:", sample.get("answer", "N/A")) - print("Output:", pred) - if i >= 4: - break + predictions = model.generate(batch, max_new_tokens=200) + for sample, pred in zip(batch, predictions): + print("Question:", sample.get("pre_prompt", "N/A")) + print("Answer:", sample.get("answer", "N/A")) + print("Output:", pred) + if i >= 4: + break +``` + +## Building and finetuning your own models + +To run the demos and use finetuning scripts **clone the repository** and set up all dependencies. We recommend using [uv](https://docs.astral.sh/uv/) to set up the environment, but you can also use pip: + +```bash +git clone https://github.com/StanfordBDHG/OpenTSLM.git + + +# uv environment management (recommended). Installs uv if it does not exist and creates the virtual environment +command uv > /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh +uv sync --all-groups +source .venv/bin/activate + + +# or alternatively install via pip: +pip install -r requirements.txt + ``` ### HuggingFace Demo Scripts @@ -166,9 +174,9 @@ REPO_ID = "OpenTSLM/llama-3.2-1b-tsqa-flamingo" # Flamingo model All pretrained models are available under the `OpenTSLM` organization on HuggingFace Hub. Model names follow the pattern: - `OpenTSLM/{base_model}-{dataset}-{model_type}` - - `base_model`: `llama-3.2-1b`, `llama-3.2-3b`, `gemma-3-1b-pt`, `gemma-3-270m` - - `dataset`: `tsqa`, `m4`, `har`, `sleep`, `ecg` - - `model_type`: `sp` (Soft Prompt) or `flamingo` (Flamingo) + - `base_model`: `llama-3.2-1b`, `llama-3.2-3b`, `gemma-3-1b-pt`, `gemma-3-270m` + - `dataset`: `tsqa`, `m4`, `har`, `sleep`, `ecg` + - `model_type`: `sp` (Soft Prompt) or `flamingo` (Flamingo) Example: `OpenTSLM/llama-3.2-1b-ecg-flamingo` @@ -229,6 +237,24 @@ python curriculum_learning.py --model OpenTSLMFlamingo --eval_only - `--gradient_checkpointing`: Enable gradient checkpointing for memory efficiency - `--verbose`: Enable verbose logging +### Helper Scripts + +Helper scripts for analysis, testing, and batch processing are available in the `scripts/` directory: + +**Shell Scripts:** +- **`run_all_memory.sh`** - Run comprehensive memory usage analysis across all stages +- **`run_all_memory_missing.sh`** - Run memory analysis for missing stages only + +**Python Scripts:** +- **`create_doctor_eval_dataset.py`** - Create evaluation dataset for doctor assessments +- **`get_memory_use.py`** - Analyze and report memory usage across stages +- **`plot_memory_usage.py`** - Visualize memory usage patterns +- **`plot_memory_simulation.py`** - Simulate and plot memory requirements +- **`plot_memory_simulation_per_length.py`** - Analyze memory usage by sequence length +- **`hf_test.py`** - Test HuggingFace model loading and inference + +These scripts can be customized by editing the parameters directly or by passing command-line arguments. + ### Repository Naming Convention - Repository IDs ending with `-sp` will load and return `OpenTSLMSP` models @@ -335,22 +361,24 @@ For researchers and project partners interested in collaboration opportunities, This project is licensed under the MIT License. -We use the [REUSE specification](https://reuse.software/spec/) to ensure consistent and machine-readable licensing across the repository. +OpenTSLM uses [REUSE specification](https://reuse.software/spec/) to ensure consistent and machine-readable licensing across the repository. To add or update license headers, run: ```bash reuse annotate --recursive \ - --template opentslm \ --copyright "Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)" \ + --copyright "This source file is part of the OpenTSLM open-source project." \ --license MIT \ - --skip-unrecognised \ + --skip-unrecognized \ . ``` + +
- Stanford Biodesign               - ETH Centre for Digital Health Interventions - ETH Agentic Systems Lab + Stanford Biodesign               + ETH Centre for Digital Health Interventions + ETH Agentic Systems Lab
diff --git a/REUSE.toml b/REUSE.toml index 9016f084..f370d80b 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -1,6 +1,6 @@ version = 1 [[annotations]] -path = ["data/**"] +path = ["assets/**", "data/**", "**/*.png", "*.svg", "*.png", "**/*.pt", "**/*.jsonl", "**/*.json", ".gitignore", "**/uv.lock", "LICENSE.md", "**/requirements.txt"] SPDX-FileCopyrightText = "2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)" SPDX-License-Identifier = "MIT" diff --git a/assets/ASL_Logo2.svg.license b/assets/ASL_Logo2.svg.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/ASL_Logo2.svg.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/ASLwhite.svg.license b/assets/ASLwhite.svg.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/ASLwhite.svg.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/CDHI_white.svg.license b/assets/CDHI_white.svg.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/CDHI_white.svg.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/cdhi_logo.png.license b/assets/cdhi_logo.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/cdhi_logo.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/ecg_rationale.png.license b/assets/ecg_rationale.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/ecg_rationale.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/eth_cdhi_logo.png.license b/assets/eth_cdhi_logo.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/eth_cdhi_logo.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/eth_logo.png.license b/assets/eth_logo.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/eth_logo.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/har_rationale.png.license b/assets/har_rationale.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/har_rationale.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/m4_caption.png.license b/assets/m4_caption.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/m4_caption.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/schematic_overview_2.png.license b/assets/schematic_overview_2.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/schematic_overview_2.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/schematic_overview_3.png.license b/assets/schematic_overview_3.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/schematic_overview_3.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/sleep_rationale.png.license b/assets/sleep_rationale.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/sleep_rationale.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/assets/stanford_biodesign_logo.png.license b/assets/stanford_biodesign_logo.png.license deleted file mode 100644 index e83264aa..00000000 --- a/assets/stanford_biodesign_logo.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/curriculum_learning.py b/curriculum_learning.py index a0d3b05b..4b25fe25 100644 --- a/curriculum_learning.py +++ b/curriculum_learning.py @@ -1,24 +1,21 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -import sys -import os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src"))) +import os import json import os as _os import argparse from typing import List, Optional, Dict, Any, Callable -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.m4.M4QADataset import M4QADataset -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset -from time_series_datasets.util import ( +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.m4.M4QADataset import M4QADataset +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) import torch @@ -40,14 +37,14 @@ from tqdm.auto import tqdm from transformers import get_linear_schedule_with_warmup -from model.encoder.TransformerCNNEncoder import TransformerCNNEncoder -from model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo -from model.llm.OpenTSLMSP import OpenTSLMSP -from model.projector.MLPProjector import MLPProjector +from opentslm.model.encoder.TransformerCNNEncoder import TransformerCNNEncoder +from opentslm.model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo +from opentslm.model.llm.OpenTSLMSP import OpenTSLMSP +from opentslm.model.projector.MLPProjector import MLPProjector import datetime -from logger import get_logger, set_global_verbose +from opentslm.logger import get_logger, set_global_verbose -from model_config import ( +from opentslm.model_config import ( BATCH_SIZE, EARLY_STOP_PAT, GRAD_CLIP_NORM, diff --git a/data/.gitignore b/data/.gitignore deleted file mode 100644 index 2f75232d..00000000 --- a/data/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -*.csv -*.json -*.jsonl -*.jsonl.gz -*.jsonl.gz.part -*.jsonl.gz.part.1 \ No newline at end of file diff --git a/notebooks/.gitignore b/data/.gitkeep similarity index 100% rename from notebooks/.gitignore rename to data/.gitkeep diff --git a/demo/huggingface/.gitignore b/demo/huggingface/.gitignore deleted file mode 100644 index 935582c2..00000000 --- a/demo/huggingface/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -data \ No newline at end of file diff --git a/demo/huggingface/01_test_hf_tsqa.py b/demo/huggingface/01_test_hf_tsqa.py index eab54d1a..d4545587 100755 --- a/demo/huggingface/01_test_hf_tsqa.py +++ b/demo/huggingface/01_test_hf_tsqa.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ @@ -13,17 +12,11 @@ 4. Prints model outputs """ -import sys -import os - -# Add src to path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -from model.llm.OpenTSLM import OpenTSLM -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate +from opentslm.model.llm.OpenTSLM import OpenTSLM +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate from torch.utils.data import DataLoader -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE import torch # Model repository ID - change this to test different models REPO_ID = "OpenTSLM/llama-3.2-1b-tsqa-sp" diff --git a/demo/huggingface/02_test_hf_m4.py b/demo/huggingface/02_test_hf_m4.py index ab010302..aa5e0672 100755 --- a/demo/huggingface/02_test_hf_m4.py +++ b/demo/huggingface/02_test_hf_m4.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ @@ -13,17 +12,11 @@ 4. Prints model outputs """ -import sys -import os - -# Add src to path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -from model.llm.OpenTSLM import OpenTSLM -from time_series_datasets.m4.M4QADataset import M4QADataset -from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate +from opentslm.model.llm.OpenTSLM import OpenTSLM +from opentslm.time_series_datasets.m4.M4QADataset import M4QADataset +from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate from torch.utils.data import DataLoader -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE import torch # Model repository ID - change this to test different models diff --git a/demo/huggingface/03_test_hf_har_cot.py b/demo/huggingface/03_test_hf_har_cot.py index 762c0d5a..910f088d 100755 --- a/demo/huggingface/03_test_hf_har_cot.py +++ b/demo/huggingface/03_test_hf_har_cot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ @@ -13,17 +12,11 @@ 4. Prints model outputs """ -import sys -import os - -# Add src to path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -from model.llm.OpenTSLM import OpenTSLM -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset -from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate +from opentslm.model.llm.OpenTSLM import OpenTSLM +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate from torch.utils.data import DataLoader -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE import torch # Model repository ID - change this to test different models diff --git a/demo/huggingface/04_test_hf_sleep_cot.py b/demo/huggingface/04_test_hf_sleep_cot.py index 1e7498a2..b878b0da 100755 --- a/demo/huggingface/04_test_hf_sleep_cot.py +++ b/demo/huggingface/04_test_hf_sleep_cot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ @@ -13,17 +12,11 @@ 4. Prints model outputs """ -import sys -import os - -# Add src to path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -from model.llm.OpenTSLM import OpenTSLM -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset -from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate +from opentslm.model.llm.OpenTSLM import OpenTSLM +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate from torch.utils.data import DataLoader -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE import torch # Model repository ID - change this to test different models diff --git a/demo/huggingface/05_test_hf_ecg_qa_cot.py b/demo/huggingface/05_test_hf_ecg_qa_cot.py index c399b814..eacf2bfd 100755 --- a/demo/huggingface/05_test_hf_ecg_qa_cot.py +++ b/demo/huggingface/05_test_hf_ecg_qa_cot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ @@ -13,17 +12,11 @@ 4. Prints model outputs """ -import sys -import os - -# Add src to path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -from model.llm.OpenTSLM import OpenTSLM -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset -from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate +from opentslm.model.llm.OpenTSLM import OpenTSLM +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate from torch.utils.data import DataLoader -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE import torch # Model repository ID - change this to test different models diff --git a/evaluation/.gitignore b/evaluation/.gitignore deleted file mode 100644 index b33036a5..00000000 --- a/evaluation/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -*.jsonl \ No newline at end of file diff --git a/evaluation/baseline/README.md b/evaluation/baseline/README.md index c0bb9b3b..2a4c10e2 100644 --- a/evaluation/baseline/README.md +++ b/evaluation/baseline/README.md @@ -1,7 +1,6 @@ diff --git a/evaluation/baseline/common_evaluator.py b/evaluation/baseline/common_evaluator.py index 56287d10..1c265421 100644 --- a/evaluation/baseline/common_evaluator.py +++ b/evaluation/baseline/common_evaluator.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -21,13 +20,9 @@ import matplotlib.pyplot as plt from time import sleep -# Add src to path -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "src")) -) +from opentslm.logger import get_logger -# Import OpenAIPipeline -from openai_pipeline import OpenAIPipeline +from .openai_pipeline import OpenAIPipeline class CommonEvaluator: @@ -101,7 +96,7 @@ def load_dataset( print(f"Loading dataset: {dataset_class.__name__}") # Import the gruver formatters - from gruver_llmtime_tokenizer import gpt_formatter, llama_formatter + from .gruver_llmtime_tokenizer import gpt_formatter, llama_formatter # Choose formatter based on model type model_name = getattr(self, "current_model_name", None) @@ -528,9 +523,6 @@ def _get_existing_results_count(self, model_name: str, dataset_name: str) -> int ) if os.path.exists(jsonl_file): - count = 0 - with open(jsonl_file, "r") as f: - for line in f: if line.strip(): count += 1 return count diff --git a/evaluation/baseline/common_evaluator_plot.py b/evaluation/baseline/common_evaluator_plot.py index fbe0c8f3..f2f80a71 100644 --- a/evaluation/baseline/common_evaluator_plot.py +++ b/evaluation/baseline/common_evaluator_plot.py @@ -1,12 +1,9 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -import os import io -import sys import base64 from typing import Type, Callable, Dict, List, Any, Optional @@ -19,12 +16,7 @@ from PIL import Image import pandas as pd -# Add src to path -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "src")) -) -# Import OpenAIPipeline from openai_pipeline import OpenAIPipeline from common_evaluator import CommonEvaluator diff --git a/evaluation/baseline/eval.sh b/evaluation/baseline/eval.sh index eee8a90b..2924fca5 100755 --- a/evaluation/baseline/eval.sh +++ b/evaluation/baseline/eval.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/baseline/eval_remaining.sh b/evaluation/baseline/eval_remaining.sh index c8d6e2ed..ce7095f2 100644 --- a/evaluation/baseline/eval_remaining.sh +++ b/evaluation/baseline/eval_remaining.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/baseline/evaluate_all.py b/evaluation/baseline/evaluate_all.py index 6776ac41..db1a8914 100644 --- a/evaluation/baseline/evaluate_all.py +++ b/evaluation/baseline/evaluate_all.py @@ -1,14 +1,13 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from typing import Dict, Any, Callable from common_evaluator import CommonEvaluator -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.pamap2.PAMAP2AccQADataset import PAMAP2AccQADataset -from time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.pamap2.PAMAP2AccQADataset import PAMAP2AccQADataset +from opentslm.time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset # Import evaluation functions from evaluate_tsqa import evaluate_tsqa diff --git a/evaluation/baseline/evaluate_ecg_qa.py b/evaluation/baseline/evaluate_ecg_qa.py index 9b4c3c5d..49eb8767 100644 --- a/evaluation/baseline/evaluate_ecg_qa.py +++ b/evaluation/baseline/evaluate_ecg_qa.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -10,7 +9,7 @@ from common_evaluator import CommonEvaluator -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset def extract_answer(text: str) -> str: diff --git a/evaluation/baseline/evaluate_ecg_qa_plot.py b/evaluation/baseline/evaluate_ecg_qa_plot.py index 305669e4..062c2302 100644 --- a/evaluation/baseline/evaluate_ecg_qa_plot.py +++ b/evaluation/baseline/evaluate_ecg_qa_plot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -15,7 +14,7 @@ import matplotlib.pyplot as plt from common_evaluator_plot import CommonEvaluatorPlot -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset def extract_answer(text: str) -> str: diff --git a/evaluation/baseline/evaluate_har.py b/evaluation/baseline/evaluate_har.py index 84dcf222..8892a9aa 100644 --- a/evaluation/baseline/evaluate_har.py +++ b/evaluation/baseline/evaluate_har.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,7 +7,7 @@ import sys from typing import Dict, Any -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset def extract_label_from_prediction(prediction: str) -> str: diff --git a/evaluation/baseline/evaluate_har_plot.py b/evaluation/baseline/evaluate_har_plot.py index 73b2b51b..a5141fcb 100644 --- a/evaluation/baseline/evaluate_har_plot.py +++ b/evaluation/baseline/evaluate_har_plot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -14,8 +13,8 @@ import matplotlib.pyplot as plt from common_evaluator_plot import CommonEvaluatorPlot -from time_series_datasets.pamap2.PAMAP2AccQADataset import PAMAP2AccQADataset -from time_series_datasets.har_cot.HARAccQADataset import HARAccQADataset +from opentslm.time_series_datasets.pamap2.PAMAP2AccQADataset import PAMAP2AccQADataset +from opentslm.time_series_datasets.har_cot.HARAccQADataset import HARAccQADataset def extract_label_from_prediction(prediction: str) -> str: """ diff --git a/evaluation/baseline/evaluate_pamap.py b/evaluation/baseline/evaluate_pamap.py index d966fa31..22eeeb58 100644 --- a/evaluation/baseline/evaluate_pamap.py +++ b/evaluation/baseline/evaluate_pamap.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -9,7 +8,7 @@ from typing import Dict, Any from common_evaluator import CommonEvaluator -from time_series_datasets.pamap2.PAMAP2AccQADataset import PAMAP2AccQADataset +from opentslm.time_series_datasets.pamap2.PAMAP2AccQADataset import PAMAP2AccQADataset def extract_label_from_prediction(prediction: str) -> str: diff --git a/evaluation/baseline/evaluate_sleep_cot.py b/evaluation/baseline/evaluate_sleep_cot.py index 5d4058e8..8ed5087a 100644 --- a/evaluation/baseline/evaluate_sleep_cot.py +++ b/evaluation/baseline/evaluate_sleep_cot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -9,7 +8,7 @@ from typing import Dict, Any from common_evaluator import CommonEvaluator -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset def extract_sleep_stage_from_prediction(prediction: str) -> str: diff --git a/evaluation/baseline/evaluate_sleep_plot.py b/evaluation/baseline/evaluate_sleep_plot.py index 10029ba4..ea224088 100644 --- a/evaluation/baseline/evaluate_sleep_plot.py +++ b/evaluation/baseline/evaluate_sleep_plot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -14,7 +13,7 @@ import numpy as np from common_evaluator_plot import CommonEvaluatorPlot -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset def extract_label_from_text(text: str) -> str: diff --git a/evaluation/baseline/evaluate_tsqa.py b/evaluation/baseline/evaluate_tsqa.py index 96118bcf..157bb77a 100644 --- a/evaluation/baseline/evaluate_tsqa.py +++ b/evaluation/baseline/evaluate_tsqa.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -9,7 +8,7 @@ from typing import Dict, Any from common_evaluator import CommonEvaluator -from time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.TSQADataset import TSQADataset def evaluate_tsqa(ground_truth: str, prediction: str) -> Dict[str, Any]: diff --git a/evaluation/baseline/evaluate_tsqa_plot.py b/evaluation/baseline/evaluate_tsqa_plot.py index 07ff3253..8e4cba28 100644 --- a/evaluation/baseline/evaluate_tsqa_plot.py +++ b/evaluation/baseline/evaluate_tsqa_plot.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -13,7 +12,7 @@ import matplotlib.pyplot as plt from common_evaluator_plot import CommonEvaluatorPlot -from time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.TSQADataset import TSQADataset def evaluate_tsqa(ground_truth: str, prediction: str) -> Dict[str, Any]: diff --git a/evaluation/baseline/gruver_llmtime_tokenizer.py b/evaluation/baseline/gruver_llmtime_tokenizer.py index ad08cacd..3f82ee0c 100644 --- a/evaluation/baseline/gruver_llmtime_tokenizer.py +++ b/evaluation/baseline/gruver_llmtime_tokenizer.py @@ -1,10 +1,8 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -import sys import numpy as np from functools import partial from dataclasses import dataclass diff --git a/evaluation/baseline/openai_pipeline.py b/evaluation/baseline/openai_pipeline.py index e69588b6..1f4dd62a 100644 --- a/evaluation/baseline/openai_pipeline.py +++ b/evaluation/baseline/openai_pipeline.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/baseline/parse_predictions_baseline.py b/evaluation/baseline/parse_predictions_baseline.py index a77e892f..93b83c32 100644 --- a/evaluation/baseline/parse_predictions_baseline.py +++ b/evaluation/baseline/parse_predictions_baseline.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -15,13 +14,6 @@ from pathlib import Path from typing import Dict, List -import sys - -# Ensure repository root is on sys.path so 'evaluation' package is importable -REPO_ROOT = Path(__file__).resolve().parents[2] -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) - from evaluation.opentslm.parse_predictions import ( calculate_f1_score, calculate_f1_stats, diff --git a/evaluation/baseline/parse_predictions_sleep_baseline.py b/evaluation/baseline/parse_predictions_sleep_baseline.py index 4e090919..7d7c2b3b 100644 --- a/evaluation/baseline/parse_predictions_sleep_baseline.py +++ b/evaluation/baseline/parse_predictions_sleep_baseline.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -42,17 +41,6 @@ import json from pathlib import Path from typing import Dict, List -import sys - -# Ensure repository root is on sys.path so 'evaluation' package is importable -REPO_ROOT = Path(__file__).resolve().parents[2] -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) - -# Also ensure 'src' is on sys.path if needed in the future -SRC_ROOT = REPO_ROOT / "src" -if str(SRC_ROOT) not in sys.path: - sys.path.insert(0, str(SRC_ROOT)) # --- Inline minimal utilities (avoid importing modules that require extra packages) --- import re diff --git a/evaluation/baseline/parse_predictions_tsqa_baseline.py b/evaluation/baseline/parse_predictions_tsqa_baseline.py index 5db56c79..33a289d8 100644 --- a/evaluation/baseline/parse_predictions_tsqa_baseline.py +++ b/evaluation/baseline/parse_predictions_tsqa_baseline.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -43,12 +42,6 @@ import re from pathlib import Path from typing import Dict, List -import sys - -# Ensure repository root is on sys.path so 'evaluation' package is importable -REPO_ROOT = Path(__file__).resolve().parents[2] -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) from evaluation.opentslm.parse_predictions import ( calculate_f1_score, diff --git a/evaluation/clinician_eval/create_doctor_eval_dataset.py b/evaluation/clinician_eval/create_doctor_eval_dataset.py index 7cc2fa58..637dfa0d 100644 --- a/evaluation/clinician_eval/create_doctor_eval_dataset.py +++ b/evaluation/clinician_eval/create_doctor_eval_dataset.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -14,22 +13,17 @@ import json import os -import sys import re import pandas as pd import numpy as np import matplotlib.pyplot as plt import wfdb -from pathlib import Path -from typing import Dict, List, Set, Tuple +from typing import Dict, List, Tuple from collections import defaultdict from tqdm import tqdm -import shutil -# Add the src directory to the path -sys.path.append(os.path.join(os.path.dirname(__file__), "src")) -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset -from time_series_datasets.ecg_qa.plot_example import draw_ecg, get_ptbxl_ecg_path +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.ecg_qa.plot_example import get_ptbxl_ecg_path # Configuration MODEL_PREDICTIONS_FILE = "/Users/planger/Development/EmbedHealth/evaluation/opentslm/ecg_qa_cot/llama3b_flamingo_predictions.jsonl" diff --git a/evaluation/clinicianecg/.gitignore b/evaluation/clinicianecg/.gitignore deleted file mode 100644 index 572e1fe4..00000000 --- a/evaluation/clinicianecg/.gitignore +++ /dev/null @@ -1,19 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -.temp -.venv -venv -__pycache__ - -# ECG dataset - large files that should be stored separately (e.g., Git LFS, cloud storage) -data/ - -# Generated reviewer assignments and workbooks -reviewer_workbooks/ - -# Analysis results - these are generated and can be large -analysis_results/ \ No newline at end of file diff --git a/evaluation/clinicianecg/README.md b/evaluation/clinicianecg/README.md index ed8bdbf6..95b9e2f4 100644 --- a/evaluation/clinicianecg/README.md +++ b/evaluation/clinicianecg/README.md @@ -1,7 +1,6 @@ diff --git a/evaluation/clinicianecg/REVIEWER_INSTRUCTIONS.md b/evaluation/clinicianecg/REVIEWER_INSTRUCTIONS.md index b76b02e9..fbd9e849 100644 --- a/evaluation/clinicianecg/REVIEWER_INSTRUCTIONS.md +++ b/evaluation/clinicianecg/REVIEWER_INSTRUCTIONS.md @@ -1,7 +1,6 @@ diff --git a/evaluation/clinicianecg/pipeline/1_dataset_analyzer.py b/evaluation/clinicianecg/pipeline/1_dataset_analyzer.py index 317d6dcf..e8f1f838 100644 --- a/evaluation/clinicianecg/pipeline/1_dataset_analyzer.py +++ b/evaluation/clinicianecg/pipeline/1_dataset_analyzer.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/clinicianecg/pipeline/2.5_demo_responses.py b/evaluation/clinicianecg/pipeline/2.5_demo_responses.py index d174fa06..f022ad6d 100644 --- a/evaluation/clinicianecg/pipeline/2.5_demo_responses.py +++ b/evaluation/clinicianecg/pipeline/2.5_demo_responses.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/clinicianecg/pipeline/2_excel_generator.py b/evaluation/clinicianecg/pipeline/2_excel_generator.py index 1dee6d31..a439abe8 100644 --- a/evaluation/clinicianecg/pipeline/2_excel_generator.py +++ b/evaluation/clinicianecg/pipeline/2_excel_generator.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/clinicianecg/pipeline/3_response_analyzer.py b/evaluation/clinicianecg/pipeline/3_response_analyzer.py index 024c80f6..0b03f631 100644 --- a/evaluation/clinicianecg/pipeline/3_response_analyzer.py +++ b/evaluation/clinicianecg/pipeline/3_response_analyzer.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/clinicianecg/pipeline/config.json.license b/evaluation/clinicianecg/pipeline/config.json.license deleted file mode 100644 index e83264aa..00000000 --- a/evaluation/clinicianecg/pipeline/config.json.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/evaluation/clinicianecg/pipeline/requirements.txt b/evaluation/clinicianecg/pipeline/requirements.txt index 17ad0aba..672d181a 100644 --- a/evaluation/clinicianecg/pipeline/requirements.txt +++ b/evaluation/clinicianecg/pipeline/requirements.txt @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/memory/get_memory_use.py b/evaluation/memory/get_memory_use.py index c7cf943d..ec80ce1f 100644 --- a/evaluation/memory/get_memory_use.py +++ b/evaluation/memory/get_memory_use.py @@ -1,14 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import argparse import csv import os -import sys -import time from datetime import datetime from typing import Dict, List, Tuple @@ -20,31 +17,22 @@ import pynvml # type: ignore -_NVML_AVAILABLE = True - - -# Ensure src is on path -REPO_DIR = os.path.dirname(os.path.abspath(__file__)) -if REPO_DIR not in sys.path: - sys.path.append(REPO_DIR) -SRC_DIR = os.path.join(REPO_DIR, "src") -if SRC_DIR not in sys.path: - sys.path.append(SRC_DIR) # Models -from model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo -from model.llm.OpenTSLMSP import OpenTSLMSP +from opentslm.model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo +from opentslm.model.llm.OpenTSLMSP import OpenTSLMSP # Datasets -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset -from time_series_datasets.simulation.SimulationQADataset import SimulationQADataset -from time_series_datasets.util import ( +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.simulation.SimulationQADataset import SimulationQADataset +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) +_NVML_AVAILABLE = True def get_device(device_arg: str | None) -> str: if device_arg: diff --git a/evaluation/memory/plot_memory_scaling.py b/evaluation/memory/plot_memory_scaling.py index 93361a51..8d9c46ab 100644 --- a/evaluation/memory/plot_memory_scaling.py +++ b/evaluation/memory/plot_memory_scaling.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/memory/plot_memory_simulation.py b/evaluation/memory/plot_memory_simulation.py index c431da01..44567200 100644 --- a/evaluation/memory/plot_memory_simulation.py +++ b/evaluation/memory/plot_memory_simulation.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/memory/plot_memory_simulation_per_length.py b/evaluation/memory/plot_memory_simulation_per_length.py index cf3677a7..946394ad 100644 --- a/evaluation/memory/plot_memory_simulation_per_length.py +++ b/evaluation/memory/plot_memory_simulation_per_length.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/memory/plot_memory_usage.py b/evaluation/memory/plot_memory_usage.py index d676a683..8d6b64b3 100644 --- a/evaluation/memory/plot_memory_usage.py +++ b/evaluation/memory/plot_memory_usage.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/memory/run_all_memory.sh b/evaluation/memory/run_all_memory.sh index e743d1ee..cf58b050 100644 --- a/evaluation/memory/run_all_memory.sh +++ b/evaluation/memory/run_all_memory.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/opentslm/baseline_parse_predictions.py b/evaluation/opentslm/baseline_parse_predictions.py index 4b90f925..940606f0 100644 --- a/evaluation/opentslm/baseline_parse_predictions.py +++ b/evaluation/opentslm/baseline_parse_predictions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -16,12 +15,8 @@ from collections import Counter from tqdm import tqdm -# Add the src directory to the path to import from the dataset class -project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) -sys.path.append(os.path.join(project_root, "src")) - # Import the dataset class to get labels -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset # Get the supported labels from the dataset class SUPPORTED_LABELS = HARCoTQADataset.get_labels() diff --git a/evaluation/opentslm/ecg_qa_cot/parse_ecg_qa_cot_data.py b/evaluation/opentslm/ecg_qa_cot/parse_ecg_qa_cot_data.py index bff237c1..c6b31115 100644 --- a/evaluation/opentslm/ecg_qa_cot/parse_ecg_qa_cot_data.py +++ b/evaluation/opentslm/ecg_qa_cot/parse_ecg_qa_cot_data.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -16,9 +15,8 @@ from collections import Counter, defaultdict from tqdm import tqdm -# Add the src directory to the path to import from the dataset class -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "..", "src")) -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +# Import dataset via package namespace +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset def calculate_f1_score(prediction, ground_truth, possible_answers): diff --git a/evaluation/opentslm/get_pamap_cot_predictions.py b/evaluation/opentslm/get_pamap_cot_predictions.py index 8db1ad4b..6d0c4540 100644 --- a/evaluation/opentslm/get_pamap_cot_predictions.py +++ b/evaluation/opentslm/get_pamap_cot_predictions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -22,26 +21,19 @@ - CSV file with time series data, ground truth labels, and rationale """ -import sys -import os import torch import pandas as pd -import numpy as np import random from typing import List, Dict, Any import json -# Add src to path -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "src")) -) -from model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo -from time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset -from prompt.full_prompt import FullPrompt -from prompt.text_prompt import TextPrompt -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.util import ( +from opentslm.model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo +from opentslm.time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset +from opentslm.prompt.full_prompt import FullPrompt +from opentslm.prompt.text_prompt import TextPrompt +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) diff --git a/evaluation/opentslm/parse_predictions.py b/evaluation/opentslm/parse_predictions.py index fa0314ec..94802786 100644 --- a/evaluation/opentslm/parse_predictions.py +++ b/evaluation/opentslm/parse_predictions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -15,12 +14,8 @@ from pathlib import Path from collections import Counter -# Add the src directory to the path to import from the dataset class -project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) -sys.path.append(os.path.join(project_root, "src")) - # Import the dataset class to get labels -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset # Get the supported labels from the dataset class SUPPORTED_LABELS = HARCoTQADataset.get_labels() diff --git a/evaluation/opentslm/sleep/baseline_parse_sleep_cot_data.py b/evaluation/opentslm/sleep/baseline_parse_sleep_cot_data.py index 118a4879..7d756180 100644 --- a/evaluation/opentslm/sleep/baseline_parse_sleep_cot_data.py +++ b/evaluation/opentslm/sleep/baseline_parse_sleep_cot_data.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -10,15 +9,10 @@ import json import re -import sys -import os from pathlib import Path -from collections import Counter from tqdm import tqdm -# Add the src directory to the path to import from the dataset class -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "..", "src")) -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset # We'll determine supported labels dynamically from the actual ground truth data # Start with the dataset class labels as a fallback diff --git a/evaluation/opentslm/sleep/get_sleep_predictions.py b/evaluation/opentslm/sleep/get_sleep_predictions.py index d7a524d6..bfb19356 100644 --- a/evaluation/opentslm/sleep/get_sleep_predictions.py +++ b/evaluation/opentslm/sleep/get_sleep_predictions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -22,26 +21,18 @@ - CSV file with time series data, ground truth labels, and rationale """ -import sys -import os import torch import pandas as pd -import numpy as np import random from typing import List, Dict, Any import json -# Add src to path -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "src")) -) - -from model.llm.OpenTSLMSP import OpenTSLMSP -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset -from prompt.full_prompt import FullPrompt -from prompt.text_prompt import TextPrompt -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.util import ( +from opentslm.model.llm.OpenTSLMSP import OpenTSLMSP +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.prompt.full_prompt import FullPrompt +from opentslm.prompt.text_prompt import TextPrompt +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) diff --git a/evaluation/opentslm/sleep/parse_sleep_cot_data.py b/evaluation/opentslm/sleep/parse_sleep_cot_data.py index 4b4d6e4a..5eb9e6c8 100644 --- a/evaluation/opentslm/sleep/parse_sleep_cot_data.py +++ b/evaluation/opentslm/sleep/parse_sleep_cot_data.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -16,9 +15,8 @@ from collections import Counter from tqdm import tqdm -# Add the src directory to the path to import from the dataset class -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "..", "src")) -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +# Import dataset via package namespace +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset # We'll determine supported labels dynamically from the actual ground truth data # Start with the dataset class labels as a fallback diff --git a/evaluation/opentslm/sleep/plot_sleep_predictions.py b/evaluation/opentslm/sleep/plot_sleep_predictions.py index d0da4653..675ebfd2 100644 --- a/evaluation/opentslm/sleep/plot_sleep_predictions.py +++ b/evaluation/opentslm/sleep/plot_sleep_predictions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/opentslm/tsqa/baseline_parse_predictions.py b/evaluation/opentslm/tsqa/baseline_parse_predictions.py index 3c5101ec..f44e2bc7 100644 --- a/evaluation/opentslm/tsqa/baseline_parse_predictions.py +++ b/evaluation/opentslm/tsqa/baseline_parse_predictions.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/evaluation/opentslm/tsqa/parse_predictions.py b/evaluation/opentslm/tsqa/parse_predictions.py index cf2d9ec7..36953143 100644 --- a/evaluation/opentslm/tsqa/parse_predictions.py +++ b/evaluation/opentslm/tsqa/parse_predictions.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..6f7056a0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,102 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT + +[project] +name = "opentslm" +version = "0.1.0" +description = "OpenTSLM: Open Time Series Language Model - Curriculum Learning for Multimodal Time Series Understanding with Large Language Models" +readme = "README.md" +requires-python = ">=3.12" +license = "MIT" +authors = [ + {name = "Patrick Langer"}, + {name = "Thomas Kaar"}, + {name = "Max Rosenblattl"}, + {name = "Maxwell A. Xu"}, + {name = "Winnie Chow"}, + {name = "Martin Maritsch"}, + {name = "Robert Jakob"}, + {name = "Aradhana Verma"}, + {name = "Brian Han"}, + {name = "Daniel Seung Kim"}, + {name = "Henry Chubb"}, + {name = "Scott Ceresnak"}, + {name = "Aydin Zahedivash"}, + {name = "Alexander Tarlochan Singh Sandhu"}, + {name = "Fatima Rodriguez"}, + {name = "Daniel McDuff"}, + {name = "Elgar Fleisch"}, + {name = "Oliver Aalami"}, + {name = "Filipe Barata"}, + {name = "Paul Schmiedmayer"} +] +keywords = [ + "time-series", + "large-language-models", + "curriculum-learning", + "multimodal", + "healthcare" +] + +dependencies = [ + "numpy>=1.21", + "pandas>=1.3", + "torch>=2.0", + "tqdm>=4.62", + "matplotlib>=3.3", + "scikit-learn>=1.0", + "transformers>=4.25", + "huggingface-hub>=0.16", + "datasets>=2.0", + "peft>=0.4", + "requests>=2.28", + "einops>=0.6", + "wfdb>=4.0", + "open-flamingo>=0.0.2", +] + +[project.urls] +Homepage = "https://opentslm.com" +Repository = "https://github.com/StanfordBDHG/OpenTSLM" +"Bug Tracker" = "https://github.com/StanfordBDHG/OpenTSLM/issues" + +[dependency-groups] +dev = [ + "reuse>=6.2.0", + "ruff>=0.14.0", +] + +eval = [ + "openai>=1.0", +] + +notebook = [ + "jupyter>=1.0", + "ipython>=8.0", + "seaborn>=0.13.2", +] + +[build-system] +requires = ["uv_build>=0.9.0,<0.10.0"] +build-backend = "uv_build" + +[tool.uv] +required-version = ">=0.9" + +[[tool.uv.index]] +name = "testpypi" +url = "https://test.pypi.org/simple/" +publish-url = "https://test.pypi.org/legacy/" +explicit = true + +[tool.uv.workspace] +members = [ + "test-install", +] + + +[tool.ruff] +line-length = 120 +target-version = "py312" diff --git a/requirements.txt b/requirements.txt index 857847ad..097fbbe2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,27 +1,20 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -numpy -pandas -torch -joblib -argparse -tqdm -matplotlib -scikit-learn -torchinfo -transformers -chronos-forecasting -huggingface-hub -datasets -peft -requests -open-clip-torch -einops -einops-exts -pyvim -openai -wfdb \ No newline at end of file +numpy>=2.3.5 +pandas>=2.3.3 +torch>=2.9.1 +tqdm>=4.67.1 +matplotlib>=3.10.7 +scikit-learn>=1.7.2 +transformers>=4.57.3 +huggingface-hub>=0.36.0 +datasets>=4.4.1 +peft>=0.18.0 +requests>=2.32.5 +einops>=0.8.1 +wfdb>=4.3.0 +open-flamingo>=0.0.2 +-e . \ No newline at end of file diff --git a/create_doctor_eval_dataset.py b/scripts/create_doctor_eval_dataset.py similarity index 97% rename from create_doctor_eval_dataset.py rename to scripts/create_doctor_eval_dataset.py index 59473faa..a7776238 100644 --- a/create_doctor_eval_dataset.py +++ b/scripts/create_doctor_eval_dataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ @@ -11,22 +10,18 @@ import json import os -import sys import re import pandas as pd import numpy as np import matplotlib.pyplot as plt import wfdb -from pathlib import Path -from typing import Dict, List, Set, Tuple +from typing import Dict, List, Tuple from collections import defaultdict from tqdm import tqdm -import shutil # Add the src directory to the path -sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset -from time_series_datasets.ecg_qa.plot_example import draw_ecg, get_ptbxl_ecg_path +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.ecg_qa.plot_example import get_ptbxl_ecg_path # Configuration MODEL_PREDICTIONS_FILE = "/Users/planger/Development/EmbedHealth/evaluation/embedhealth/ecg_qa_cot/llama3b_flamingo_predictions.jsonl" diff --git a/get_memory_use.py b/scripts/get_memory_use.py similarity index 94% rename from get_memory_use.py rename to scripts/get_memory_use.py index 852423e3..1995c689 100644 --- a/get_memory_use.py +++ b/scripts/get_memory_use.py @@ -1,13 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import argparse import csv import os -import sys import time from datetime import datetime from typing import Dict, List, Tuple @@ -22,25 +20,17 @@ _NVML_AVAILABLE = True -# Ensure src is on path -REPO_DIR = os.path.dirname(os.path.abspath(__file__)) -if REPO_DIR not in sys.path: - sys.path.append(REPO_DIR) -SRC_DIR = os.path.join(REPO_DIR, "src") -if SRC_DIR not in sys.path: - sys.path.append(SRC_DIR) - # Models -from model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo -from model.llm.OpenTSLMSP import OpenTSLMSP +from opentslm.model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo +from opentslm.model.llm.OpenTSLMSP import OpenTSLMSP # Datasets -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset -from time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset -from time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset -from time_series_datasets.simulation.SimulationQADataset import SimulationQADataset -from time_series_datasets.util import ( +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset +from opentslm.time_series_datasets.sleep.SleepEDFCoTQADataset import SleepEDFCoTQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.time_series_datasets.simulation.SimulationQADataset import SimulationQADataset +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) diff --git a/hf_test.py b/scripts/hf_test.py similarity index 78% rename from hf_test.py rename to scripts/hf_test.py index 318d2012..ba1ac3c5 100644 --- a/hf_test.py +++ b/scripts/hf_test.py @@ -1,9 +1,9 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -from src import OpenTSLM, TextPrompt, TextTimeSeriesPrompt, FullPrompt +from opentslm import OpenTSLM +from opentslm.prompt import TextPrompt, TextTimeSeriesPrompt, FullPrompt # Load model model = OpenTSLM.load_pretrained("OpenTSLM/gemma-3-270m-pt-har-flamingo") diff --git a/plot_memory_simulation.py b/scripts/plot_memory_simulation.py similarity index 98% rename from plot_memory_simulation.py rename to scripts/plot_memory_simulation.py index d033b663..ee587a4e 100644 --- a/plot_memory_simulation.py +++ b/scripts/plot_memory_simulation.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ diff --git a/plot_memory_simulation_per_length.py b/scripts/plot_memory_simulation_per_length.py similarity index 99% rename from plot_memory_simulation_per_length.py rename to scripts/plot_memory_simulation_per_length.py index 409028d6..2f09ab10 100644 --- a/plot_memory_simulation_per_length.py +++ b/scripts/plot_memory_simulation_per_length.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT """ diff --git a/plot_memory_usage.py b/scripts/plot_memory_usage.py similarity index 97% rename from plot_memory_usage.py rename to scripts/plot_memory_usage.py index 344cfb31..0140e89f 100644 --- a/plot_memory_usage.py +++ b/scripts/plot_memory_usage.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/run_all_memory.sh b/scripts/run_all_memory.sh similarity index 96% rename from run_all_memory.sh rename to scripts/run_all_memory.sh index e743d1ee..cf58b050 100644 --- a/run_all_memory.sh +++ b/scripts/run_all_memory.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/run_all_memory_missing.sh b/scripts/run_all_memory_missing.sh similarity index 97% rename from run_all_memory_missing.sh rename to scripts/run_all_memory_missing.sh index a6c50ffe..7d0aa7b6 100644 --- a/run_all_memory_missing.sh +++ b/scripts/run_all_memory_missing.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/projector/__init__.py b/src/model/projector/__init__.py deleted file mode 100644 index 266b1365..00000000 --- a/src/model/projector/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT diff --git a/src/open_flamingo b/src/open_flamingo deleted file mode 160000 index 914076ee..00000000 --- a/src/open_flamingo +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 914076ee07d0235943f1556e9aaf2fb974337f0e diff --git a/src/opentslm/__init__.py b/src/opentslm/__init__.py new file mode 100644 index 00000000..0078c70d --- /dev/null +++ b/src/opentslm/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT + +from opentslm.model.llm.OpenTSLM import OpenTSLM + +__all__ = ["OpenTSLM"] \ No newline at end of file diff --git a/src/data.py b/src/opentslm/data.py similarity index 96% rename from src/data.py rename to src/opentslm/data.py index 20459947..15338b0f 100644 --- a/src/data.py +++ b/src/opentslm/data.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -10,7 +9,7 @@ import torch from datasets import load_dataset from torch.utils.data import DataLoader -from src.model_config import * +from opentslm.model_config import * # --------------------------- # Constants diff --git a/src/logger.py b/src/opentslm/logger.py similarity index 97% rename from src/logger.py rename to src/opentslm/logger.py index 669165f2..8d720b86 100644 --- a/src/logger.py +++ b/src/opentslm/logger.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/encoder/__init__.py b/src/opentslm/model/__init__.py similarity index 60% rename from src/model/encoder/__init__.py rename to src/opentslm/model/__init__.py index 266b1365..cbad1615 100644 --- a/src/model/encoder/__init__.py +++ b/src/opentslm/model/__init__.py @@ -1,5 +1,4 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/encoder/CNNTokenizer.py b/src/opentslm/model/encoder/CNNTokenizer.py similarity index 91% rename from src/model/encoder/CNNTokenizer.py rename to src/opentslm/model/encoder/CNNTokenizer.py index c622c0ab..be945cb0 100644 --- a/src/model/encoder/CNNTokenizer.py +++ b/src/opentslm/model/encoder/CNNTokenizer.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,8 +7,8 @@ import torch.nn as nn -from model_config import TRANSFORMER_INPUT_DIM, ENCODER_OUTPUT_DIM, PATCH_SIZE -from model.encoder.TimeSeriesEncoderBase import TimeSeriesEncoderBase +from opentslm.model_config import TRANSFORMER_INPUT_DIM, ENCODER_OUTPUT_DIM, PATCH_SIZE +from opentslm.model.encoder.TimeSeriesEncoderBase import TimeSeriesEncoderBase class CNNTokenizer(TimeSeriesEncoderBase): diff --git a/src/model/encoder/TimeSeriesEncoderBase.py b/src/opentslm/model/encoder/TimeSeriesEncoderBase.py similarity index 79% rename from src/model/encoder/TimeSeriesEncoderBase.py rename to src/opentslm/model/encoder/TimeSeriesEncoderBase.py index e49a4c7c..99204152 100644 --- a/src/model/encoder/TimeSeriesEncoderBase.py +++ b/src/opentslm/model/encoder/TimeSeriesEncoderBase.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,7 +7,7 @@ import torch import torch.nn as nn -from model_config import ENCODER_OUTPUT_DIM +from opentslm.model_config import ENCODER_OUTPUT_DIM class TimeSeriesEncoderBase(nn.Module): diff --git a/src/model/encoder/TransformerCNNEncoder.py b/src/opentslm/model/encoder/TransformerCNNEncoder.py similarity index 92% rename from src/model/encoder/TransformerCNNEncoder.py rename to src/opentslm/model/encoder/TransformerCNNEncoder.py index f22306ce..6cd01b04 100644 --- a/src/model/encoder/TransformerCNNEncoder.py +++ b/src/opentslm/model/encoder/TransformerCNNEncoder.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,8 +7,8 @@ import torch.nn as nn -from model_config import TRANSFORMER_INPUT_DIM, ENCODER_OUTPUT_DIM, PATCH_SIZE -from model.encoder.TimeSeriesEncoderBase import TimeSeriesEncoderBase +from opentslm.model_config import TRANSFORMER_INPUT_DIM, ENCODER_OUTPUT_DIM, PATCH_SIZE +from opentslm.model.encoder.TimeSeriesEncoderBase import TimeSeriesEncoderBase class TransformerCNNEncoder(TimeSeriesEncoderBase): diff --git a/src/model/encoder/TransformerMLPEncoder.py b/src/opentslm/model/encoder/TransformerMLPEncoder.py similarity index 92% rename from src/model/encoder/TransformerMLPEncoder.py rename to src/opentslm/model/encoder/TransformerMLPEncoder.py index bccc57f0..89403ea3 100644 --- a/src/model/encoder/TransformerMLPEncoder.py +++ b/src/opentslm/model/encoder/TransformerMLPEncoder.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,8 +7,8 @@ import torch.nn as nn -from src.model_config import TRANSFORMER_INPUT_DIM, ENCODER_OUTPUT_DIM, PATCH_SIZE -from model.encoder.TimeSeriesEncoderBase import TimeSeriesEncoderBase +from opentslm.model_config import TRANSFORMER_INPUT_DIM, ENCODER_OUTPUT_DIM, PATCH_SIZE +from opentslm.model.encoder.TimeSeriesEncoderBase import TimeSeriesEncoderBase class TransformerMLPEncoder(TimeSeriesEncoderBase): diff --git a/__init__.py b/src/opentslm/model/encoder/__init__.py similarity index 60% rename from __init__.py rename to src/opentslm/model/encoder/__init__.py index 266b1365..cbad1615 100644 --- a/__init__.py +++ b/src/opentslm/model/encoder/__init__.py @@ -1,5 +1,4 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/llm/OpenTSLM.py b/src/opentslm/model/llm/OpenTSLM.py similarity index 97% rename from src/model/llm/OpenTSLM.py rename to src/opentslm/model/llm/OpenTSLM.py index 19806258..b7e56a20 100644 --- a/src/model/llm/OpenTSLM.py +++ b/src/opentslm/model/llm/OpenTSLM.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import torch @@ -39,7 +38,7 @@ class OpenTSLM: Example: >>> model = OpenTSLM.load_pretrained("OpenTSLM/gemma-3-270m-pt-sleep-flamingo") >>> - >>> from prompt.full_prompt import FullPrompt + >>> from opentslm.prompt.full_prompt import FullPrompt >>> prompt = FullPrompt(...) >>> response = model.eval_prompt(prompt) """ diff --git a/src/model/llm/OpenTSLMFlamingo.py b/src/opentslm/model/llm/OpenTSLMFlamingo.py similarity index 95% rename from src/model/llm/OpenTSLMFlamingo.py rename to src/opentslm/model/llm/OpenTSLMFlamingo.py index 39e5101a..b0f5676b 100644 --- a/src/model/llm/OpenTSLMFlamingo.py +++ b/src/opentslm/model/llm/OpenTSLMFlamingo.py @@ -1,30 +1,29 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from types import SimpleNamespace -from model.encoder.CNNTokenizer import CNNTokenizer -from model.llm.TimeSeriesFlamingoWithTrainableEncoder import ( +from opentslm.model.encoder.CNNTokenizer import CNNTokenizer +from opentslm.model.llm.TimeSeriesFlamingoWithTrainableEncoder import ( TimeSeriesFlamingoWithTrainableEncoder, ) -from open_flamingo.open_flamingo.src.flamingo_lm import FlamingoLMMixin -from open_flamingo.open_flamingo.src.utils import extend_instance +from open_flamingo.src.flamingo_lm import FlamingoLMMixin +from open_flamingo.src.utils import extend_instance import torch import torch._dynamo from typing import List, Dict, Tuple from transformers import AutoTokenizer, AutoModelForCausalLM -from model_config import ENCODER_OUTPUT_DIM -from model.llm.TimeSeriesLLM import TimeSeriesLLM -from prompt.full_prompt import FullPrompt -from time_series_datasets.util import ( +from opentslm.model_config import ENCODER_OUTPUT_DIM +from opentslm.model.llm.TimeSeriesLLM import TimeSeriesLLM +from opentslm.prompt.full_prompt import FullPrompt +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) # Monkey-patch FlamingoLayer to add attention_type property for compatibility with newer transformers -from open_flamingo.open_flamingo.src.flamingo_lm import FlamingoLayer +from open_flamingo.src.flamingo_lm import FlamingoLayer def _attention_type_property(self): @@ -33,7 +32,7 @@ def _attention_type_property(self): # Add the attention_type property to FlamingoLayer -FlamingoLayer.attention_type = property(_attention_type_property) +FlamingoLayer.attention_type = property(_attention_type_property) # type: ignore class OpenTSLMFlamingo(TimeSeriesLLM): diff --git a/src/model/llm/OpenTSLMSP.py b/src/opentslm/model/llm/OpenTSLMSP.py similarity index 98% rename from src/model/llm/OpenTSLMSP.py rename to src/opentslm/model/llm/OpenTSLMSP.py index 59c290e5..4e7735d8 100644 --- a/src/model/llm/OpenTSLMSP.py +++ b/src/opentslm/model/llm/OpenTSLMSP.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -18,12 +17,12 @@ PEFT_AVAILABLE = False print("Warning: peft not available. LoRA fine-tuning will be disabled.") -from model_config import ENCODER_OUTPUT_DIM +from opentslm.model_config import ENCODER_OUTPUT_DIM from .TimeSeriesLLM import TimeSeriesLLM from ..encoder.TransformerCNNEncoder import TransformerCNNEncoder from ..projector.MLPProjector import MLPProjector -from prompt.full_prompt import FullPrompt -from time_series_datasets.util import ( +from opentslm.prompt.full_prompt import FullPrompt +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) diff --git a/src/model/llm/TimeSeriesFlamingoWithTrainableEncoder.py b/src/opentslm/model/llm/TimeSeriesFlamingoWithTrainableEncoder.py similarity index 95% rename from src/model/llm/TimeSeriesFlamingoWithTrainableEncoder.py rename to src/opentslm/model/llm/TimeSeriesFlamingoWithTrainableEncoder.py index 5ca67ebd..97a7b52e 100644 --- a/src/model/llm/TimeSeriesFlamingoWithTrainableEncoder.py +++ b/src/opentslm/model/llm/TimeSeriesFlamingoWithTrainableEncoder.py @@ -1,12 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import torch from torch import nn -from open_flamingo.open_flamingo.src.flamingo import Flamingo +from open_flamingo import Flamingo from einops import rearrange diff --git a/src/model/llm/TimeSeriesLLM.py b/src/opentslm/model/llm/TimeSeriesLLM.py similarity index 69% rename from src/model/llm/TimeSeriesLLM.py rename to src/opentslm/model/llm/TimeSeriesLLM.py index 55af6d2b..b3a568d5 100644 --- a/src/model/llm/TimeSeriesLLM.py +++ b/src/opentslm/model/llm/TimeSeriesLLM.py @@ -1,17 +1,14 @@ -# This source file is part of the OpenTSLM open-source project -# +from typing import List, Dict, Any + # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import torch import torch.nn as nn -from typing import List, Dict, Tuple -from transformers import AutoTokenizer, AutoModelForCausalLM -from torch.nn.utils.rnn import pad_sequence -from model_config import ENCODER_OUTPUT_DIM -from prompt.full_prompt import FullPrompt +from opentslm.prompt.full_prompt import FullPrompt class TimeSeriesLLM(nn.Module): def __init__( @@ -23,12 +20,12 @@ def __init__( def generate( - self, batch: List[Dict[str, any]], max_new_tokens: int = 50, **generate_kwargs + self, batch: List[Dict[str, Any]], max_new_tokens: int = 50, **generate_kwargs ) -> List[str]: raise NotImplementedError("Generate method should be implemented by the subclass") - def compute_loss(self, batch: List[Dict[str, any]]) -> torch.Tensor: + def compute_loss(self, batch: List[Dict[str, Any]]) -> torch.Tensor: """ batch: same format as generate() answers: List[str] of length B diff --git a/src/model/llm/__init__.py b/src/opentslm/model/llm/__init__.py similarity index 60% rename from src/model/llm/__init__.py rename to src/opentslm/model/llm/__init__.py index 266b1365..cbad1615 100644 --- a/src/model/llm/__init__.py +++ b/src/opentslm/model/llm/__init__.py @@ -1,5 +1,4 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/projector/LinearProjector.py b/src/opentslm/model/projector/LinearProjector.py similarity index 81% rename from src/model/projector/LinearProjector.py rename to src/opentslm/model/projector/LinearProjector.py index 0e9c9e24..0dfab497 100644 --- a/src/model/projector/LinearProjector.py +++ b/src/opentslm/model/projector/LinearProjector.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/model/projector/MLPProjector.py b/src/opentslm/model/projector/MLPProjector.py similarity index 85% rename from src/model/projector/MLPProjector.py rename to src/opentslm/model/projector/MLPProjector.py index af07ca90..e8c9a3a1 100644 --- a/src/model/projector/MLPProjector.py +++ b/src/opentslm/model/projector/MLPProjector.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/opentslm/model/projector/__init__.py b/src/opentslm/model/projector/__init__.py new file mode 100644 index 00000000..cbad1615 --- /dev/null +++ b/src/opentslm/model/projector/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT diff --git a/src/model_config.py b/src/opentslm/model_config.py similarity index 87% rename from src/model_config.py rename to src/opentslm/model_config.py index 0412f6b9..90228563 100644 --- a/src/model_config.py +++ b/src/opentslm/model_config.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/opentslm/prompt/__init__.py b/src/opentslm/prompt/__init__.py new file mode 100644 index 00000000..826feaad --- /dev/null +++ b/src/opentslm/prompt/__init__.py @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT + +from .prompt import Prompt +from .full_prompt import FullPrompt +from .text_prompt import TextPrompt +from .text_time_series_prompt import TextTimeSeriesPrompt +from .prompt_with_answer import PromptWithAnswer + +__all__ = [ + "Prompt", + "FullPrompt", + "TextPrompt", + "TextTimeSeriesPrompt", + "PromptWithAnswer", +] diff --git a/src/prompt/full_prompt.py b/src/opentslm/prompt/full_prompt.py similarity index 85% rename from src/prompt/full_prompt.py rename to src/opentslm/prompt/full_prompt.py index 501d61f5..64ff6222 100644 --- a/src/prompt/full_prompt.py +++ b/src/opentslm/prompt/full_prompt.py @@ -1,12 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from typing import List -from prompt.text_prompt import TextPrompt -from prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.prompt.text_prompt import TextPrompt +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt class FullPrompt: diff --git a/src/prompt/prompt.py b/src/opentslm/prompt/prompt.py similarity index 73% rename from src/prompt/prompt.py rename to src/opentslm/prompt/prompt.py index 69b4fdd6..c57b6e64 100644 --- a/src/prompt/prompt.py +++ b/src/opentslm/prompt/prompt.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/prompt/prompt_with_answer.py b/src/opentslm/prompt/prompt_with_answer.py similarity index 87% rename from src/prompt/prompt_with_answer.py rename to src/opentslm/prompt/prompt_with_answer.py index 9df2d024..1f6ba78d 100644 --- a/src/prompt/prompt_with_answer.py +++ b/src/opentslm/prompt/prompt_with_answer.py @@ -1,12 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from typing import List -from prompt.text_prompt import TextPrompt -from prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.prompt.text_prompt import TextPrompt +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt class PromptWithAnswer: diff --git a/src/prompt/text_prompt.py b/src/opentslm/prompt/text_prompt.py similarity index 80% rename from src/prompt/text_prompt.py rename to src/opentslm/prompt/text_prompt.py index 11c19450..5e0ae72d 100644 --- a/src/prompt/text_prompt.py +++ b/src/opentslm/prompt/text_prompt.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/prompt/text_time_series_prompt.py b/src/opentslm/prompt/text_time_series_prompt.py similarity index 93% rename from src/prompt/text_time_series_prompt.py rename to src/opentslm/prompt/text_time_series_prompt.py index d38da3c9..bdcbadd8 100644 --- a/src/prompt/text_time_series_prompt.py +++ b/src/opentslm/prompt/text_time_series_prompt.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/QADataset.py b/src/opentslm/time_series_datasets/QADataset.py similarity index 94% rename from src/time_series_datasets/QADataset.py rename to src/opentslm/time_series_datasets/QADataset.py index 8375b9c7..7e78fb02 100644 --- a/src/time_series_datasets/QADataset.py +++ b/src/opentslm/time_series_datasets/QADataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -9,10 +8,9 @@ from typing import Callable, List, Literal, Tuple import numpy as np -import sys -from prompt.prompt_with_answer import PromptWithAnswer -from prompt.text_prompt import TextPrompt -from prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.prompt.prompt_with_answer import PromptWithAnswer +from opentslm.prompt.text_prompt import TextPrompt +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt from torch.utils.data import Dataset diff --git a/src/time_series_datasets/TSQADataset.py b/src/opentslm/time_series_datasets/TSQADataset.py similarity index 92% rename from src/time_series_datasets/TSQADataset.py rename to src/opentslm/time_series_datasets/TSQADataset.py index 9f1a4f3d..3b79e6d0 100644 --- a/src/time_series_datasets/TSQADataset.py +++ b/src/opentslm/time_series_datasets/TSQADataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -9,9 +8,9 @@ from datasets import Dataset, load_dataset -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.util import ( +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) from torch.utils.data import DataLoader diff --git a/src/opentslm/time_series_datasets/__init__.py b/src/opentslm/time_series_datasets/__init__.py new file mode 100644 index 00000000..cbad1615 --- /dev/null +++ b/src/opentslm/time_series_datasets/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/constants.py b/src/opentslm/time_series_datasets/constants.py similarity index 78% rename from src/time_series_datasets/constants.py rename to src/opentslm/time_series_datasets/constants.py index 97256616..0d160e1d 100644 --- a/src/time_series_datasets/constants.py +++ b/src/opentslm/time_series_datasets/constants.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/ecg_qa/ECGQACoTQADataset.py b/src/opentslm/time_series_datasets/ecg_qa/ECGQACoTQADataset.py similarity index 97% rename from src/time_series_datasets/ecg_qa/ECGQACoTQADataset.py rename to src/opentslm/time_series_datasets/ecg_qa/ECGQACoTQADataset.py index 1f15da9f..2f779528 100644 --- a/src/time_series_datasets/ecg_qa/ECGQACoTQADataset.py +++ b/src/opentslm/time_series_datasets/ecg_qa/ECGQACoTQADataset.py @@ -1,17 +1,14 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from datasets import Dataset from typing import List, Tuple, Literal -import sys import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.ecg_qa.ecgqa_cot_loader import load_ecg_qa_cot_splits +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.ecg_qa.ecgqa_cot_loader import load_ecg_qa_cot_splits import numpy as np class ECGQACoTQADataset(QADataset): @@ -171,7 +168,7 @@ def _load_template_answers_cache(cls): try: import pandas as pd import ast - from time_series_datasets.ecg_qa.ecgqa_loader import ECG_QA_DIR + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import ECG_QA_DIR # Load template answers directly template_answers_path = os.path.join(ECG_QA_DIR, "ecgqa", "ptbxl", "answers_for_each_template.csv") @@ -209,7 +206,7 @@ def get_labels() -> List[str]: try: import pandas as pd import ast - from time_series_datasets.ecg_qa.ecgqa_loader import ECG_QA_DIR + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import ECG_QA_DIR template_answers_path = os.path.join(ECG_QA_DIR, "ecgqa", "ptbxl", "answers_for_each_template.csv") df = pd.read_csv(template_answers_path) @@ -299,7 +296,7 @@ def preload_ecg_data(cls, dataset_splits: List[Dataset]): # Fallback: construct path from ecg_id ecg_id = sample.get("ecg_id") if ecg_id and isinstance(ecg_id, list) and len(ecg_id) > 0: - from time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path ecg_path = get_ptbxl_ecg_path(ecg_id[0]) + ".dat" ecg_paths.add(ecg_path) @@ -336,7 +333,7 @@ def preload_processed_ecg_data(cls, dataset_splits: List[Dataset]): # Fallback: construct path from ecg_id ecg_id = sample.get("ecg_id") if ecg_id and isinstance(ecg_id, list) and len(ecg_id) > 0: - from time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path ecg_path = get_ptbxl_ecg_path(ecg_id[0]) + ".dat" ecg_signal, _, _ = cls._load_ecg_data(ecg_path) n_leads = ecg_signal.shape[1] if len(ecg_signal.shape) > 1 else 1 @@ -439,7 +436,7 @@ def _get_text_time_series_prompt_list(self, row) -> List[TextTimeSeriesPrompt]: if not isinstance(ecg_id, list) or len(ecg_id) == 0: raise ValueError(f"Sample 'ecg_id' must be a non-empty list: {ecg_id}") - from time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path ecg_path = get_ptbxl_ecg_path(ecg_id[0]) + ".dat" ecg_paths = [ecg_path] diff --git a/src/time_series_datasets/ecg_qa/ECGQADataset.py b/src/opentslm/time_series_datasets/ecg_qa/ECGQADataset.py similarity index 97% rename from src/time_series_datasets/ecg_qa/ECGQADataset.py rename to src/opentslm/time_series_datasets/ecg_qa/ECGQADataset.py index fc3c17b6..3a93319e 100644 --- a/src/time_series_datasets/ecg_qa/ECGQADataset.py +++ b/src/opentslm/time_series_datasets/ecg_qa/ECGQADataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -18,9 +17,9 @@ "Please install it with: pip install wfdb" ) -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.ecg_qa.ecgqa_loader import ( +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import ( load_ecg_qa_ptbxl_splits, load_ecg_qa_answers, ) @@ -197,7 +196,7 @@ def get_possible_answers_for_template(self, template_id: int) -> List[str]: try: import pandas as pd import ast - from time_series_datasets.ecg_qa.ecgqa_loader import ECG_QA_DIR + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import ECG_QA_DIR # Load template answers directly template_answers_path = os.path.join( @@ -231,7 +230,7 @@ def _get_text_time_series_prompt_list(self, row) -> List[TextTimeSeriesPrompt]: # Fallback: single ECG path ecg_id = row["ecg_id"][0] if row["ecg_id"] else None if ecg_id: - from time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path ecg_path = get_ptbxl_ecg_path(ecg_id) + ".dat" ecg_paths = [ecg_path] diff --git a/src/time_series_datasets/ecg_qa/README.md b/src/opentslm/time_series_datasets/ecg_qa/README.md similarity index 98% rename from src/time_series_datasets/ecg_qa/README.md rename to src/opentslm/time_series_datasets/ecg_qa/README.md index 50a97ebf..7d9f7b43 100644 --- a/src/time_series_datasets/ecg_qa/README.md +++ b/src/opentslm/time_series_datasets/ecg_qa/README.md @@ -1,7 +1,6 @@ @@ -99,7 +98,7 @@ yes ## Quick Start ```python -from time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset # Load ECG-QA splits (auto-downloads ~6GB of data on first run) train_dataset = ECGQADataset(split="train", EOS_TOKEN="") diff --git a/src/time_series_datasets/ecg_qa/__init__.py b/src/opentslm/time_series_datasets/ecg_qa/__init__.py similarity index 89% rename from src/time_series_datasets/ecg_qa/__init__.py rename to src/opentslm/time_series_datasets/ecg_qa/__init__.py index 4ba0c41a..277c1074 100644 --- a/src/time_series_datasets/ecg_qa/__init__.py +++ b/src/opentslm/time_series_datasets/ecg_qa/__init__.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -15,7 +14,7 @@ https://github.com/Jwoo5/ecg-qa Usage: - from time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset + from opentslm.time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset # Create dataset instance dataset = ECGQADataset(split="train", EOS_TOKEN="") diff --git a/src/time_series_datasets/ecg_qa/dataset_distribution.png b/src/opentslm/time_series_datasets/ecg_qa/dataset_distribution.png similarity index 100% rename from src/time_series_datasets/ecg_qa/dataset_distribution.png rename to src/opentslm/time_series_datasets/ecg_qa/dataset_distribution.png diff --git a/src/time_series_datasets/ecg_qa/ecg_example_real.png b/src/opentslm/time_series_datasets/ecg_qa/ecg_example_real.png similarity index 100% rename from src/time_series_datasets/ecg_qa/ecg_example_real.png rename to src/opentslm/time_series_datasets/ecg_qa/ecg_example_real.png diff --git a/src/time_series_datasets/ecg_qa/ecgqa_cot_loader.py b/src/opentslm/time_series_datasets/ecg_qa/ecgqa_cot_loader.py similarity index 95% rename from src/time_series_datasets/ecg_qa/ecgqa_cot_loader.py rename to src/opentslm/time_series_datasets/ecg_qa/ecgqa_cot_loader.py index b419d42d..b2cc38a0 100644 --- a/src/time_series_datasets/ecg_qa/ecgqa_cot_loader.py +++ b/src/opentslm/time_series_datasets/ecg_qa/ecgqa_cot_loader.py @@ -1,27 +1,18 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT -import os import subprocess -import json import requests import shutil -import pandas as pd -from pathlib import Path from typing import Tuple, Dict, List from datasets import Dataset -import sys import os import zipfile -import tempfile -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -from time_series_datasets.constants import RAW_DATA as RAW_DATA_PATH -from time_series_datasets.ecg_qa.ecgqa_loader import ( - load_ecg_qa_ptbxl_splits, +from opentslm.time_series_datasets.constants import RAW_DATA as RAW_DATA_PATH +from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import ( download_ecg_qa_if_not_exists, download_ptbxl_if_not_exists ) @@ -150,7 +141,7 @@ def load_ecg_qa_cot_splits() -> Tuple[Dataset, Dataset, Dataset]: download_ecg_qa_cot_if_not_exists() import pandas as pd - from time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import get_ptbxl_ecg_path def parse_ecg_id(ecg_id_raw: str) -> int: if ecg_id_raw is None: @@ -175,7 +166,7 @@ def load_split_from_csv(split_name: str) -> List[Dict]: raise FileNotFoundError(f"CoT split file not found: {split_file}") print(f"Loading CoT data for {split_name} split from {split_file}...") - print(f"Reading CSV file...") + print("Reading CSV file...") df = pd.read_csv(split_file) print(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") @@ -277,13 +268,13 @@ def get_label_distribution(dataset: Dataset) -> Dict[str, int]: try: train, val, test = load_ecg_qa_cot_splits() - print(f"Loaded ECG-QA CoT dataset:") + print("Loaded ECG-QA CoT dataset:") print(f" Train: {len(train)} samples") print(f" Validation: {len(val)} samples") print(f" Test: {len(test)} samples") if len(train) > 0: - print(f"\nSample from training set:") + print("\nSample from training set:") sample = train[0] for key, value in sample.items(): if isinstance(value, list) and len(value) > 3: diff --git a/src/time_series_datasets/ecg_qa/ecgqa_loader.py b/src/opentslm/time_series_datasets/ecg_qa/ecgqa_loader.py similarity index 98% rename from src/time_series_datasets/ecg_qa/ecgqa_loader.py rename to src/opentslm/time_series_datasets/ecg_qa/ecgqa_loader.py index 5f112c98..36053da4 100644 --- a/src/time_series_datasets/ecg_qa/ecgqa_loader.py +++ b/src/opentslm/time_series_datasets/ecg_qa/ecgqa_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -14,11 +13,9 @@ from pathlib import Path from typing import Tuple, Dict, List from datasets import Dataset -import sys import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) -from time_series_datasets.constants import RAW_DATA as RAW_DATA_PATH +from opentslm.time_series_datasets.constants import RAW_DATA as RAW_DATA_PATH from tqdm import tqdm diff --git a/src/time_series_datasets/ecg_qa/example_prompt.md b/src/opentslm/time_series_datasets/ecg_qa/example_prompt.md similarity index 88% rename from src/time_series_datasets/ecg_qa/example_prompt.md rename to src/opentslm/time_series_datasets/ecg_qa/example_prompt.md index 0fd3bd56..593ecb9a 100644 --- a/src/time_series_datasets/ecg_qa/example_prompt.md +++ b/src/opentslm/time_series_datasets/ecg_qa/example_prompt.md @@ -1,7 +1,6 @@ diff --git a/src/time_series_datasets/ecg_qa/plot_example.py b/src/opentslm/time_series_datasets/ecg_qa/plot_example.py similarity index 96% rename from src/time_series_datasets/ecg_qa/plot_example.py rename to src/opentslm/time_series_datasets/ecg_qa/plot_example.py index d04cfdc3..06898d91 100644 --- a/src/time_series_datasets/ecg_qa/plot_example.py +++ b/src/opentslm/time_series_datasets/ecg_qa/plot_example.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -12,16 +11,12 @@ """ import os -import sys import json import matplotlib.pyplot as plt import numpy as np import wfdb -# Add the src directory to path for imports -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset +from opentslm.time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset # ECG plotting configuration lead_names = ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"] diff --git a/src/time_series_datasets/har_cot/HARAccQADataset.py b/src/opentslm/time_series_datasets/har_cot/HARAccQADataset.py similarity index 91% rename from src/time_series_datasets/har_cot/HARAccQADataset.py rename to src/opentslm/time_series_datasets/har_cot/HARAccQADataset.py index 7d0a71e7..d5db27e4 100644 --- a/src/time_series_datasets/har_cot/HARAccQADataset.py +++ b/src/opentslm/time_series_datasets/har_cot/HARAccQADataset.py @@ -1,19 +1,18 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from datasets import Dataset from typing import List, Tuple, Literal -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.har_cot.har_cot_loader import load_har_cot_splits +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.har_cot.har_cot_loader import load_har_cot_splits import torch from torch.utils.data import DataLoader from tqdm.auto import tqdm -from time_series_datasets.util import ( +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) diff --git a/src/time_series_datasets/har_cot/HARCoTQADataset.py b/src/opentslm/time_series_datasets/har_cot/HARCoTQADataset.py similarity index 94% rename from src/time_series_datasets/har_cot/HARCoTQADataset.py rename to src/opentslm/time_series_datasets/har_cot/HARCoTQADataset.py index 914018c8..da55653e 100644 --- a/src/time_series_datasets/har_cot/HARCoTQADataset.py +++ b/src/opentslm/time_series_datasets/har_cot/HARCoTQADataset.py @@ -1,21 +1,18 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from datasets import Dataset from typing import List, Tuple, Literal -import sys import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.har_cot.har_cot_loader import load_har_cot_splits +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.har_cot.har_cot_loader import load_har_cot_splits import torch from torch.utils.data import DataLoader from tqdm.auto import tqdm -from time_series_datasets.util import ( +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) import numpy as np diff --git a/src/opentslm/time_series_datasets/har_cot/__init__.py b/src/opentslm/time_series_datasets/har_cot/__init__.py new file mode 100644 index 00000000..cbad1615 --- /dev/null +++ b/src/opentslm/time_series_datasets/har_cot/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/har_cot/har_cot_loader.py b/src/opentslm/time_series_datasets/har_cot/har_cot_loader.py similarity index 96% rename from src/time_series_datasets/har_cot/har_cot_loader.py rename to src/opentslm/time_series_datasets/har_cot/har_cot_loader.py index 4b0721ef..9d756359 100644 --- a/src/time_series_datasets/har_cot/har_cot_loader.py +++ b/src/opentslm/time_series_datasets/har_cot/har_cot_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -11,12 +10,10 @@ import ast import urllib.request import zipfile -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -from time_series_datasets.constants import RAW_DATA +from opentslm.time_series_datasets.constants import RAW_DATA from tqdm.auto import tqdm import logging -from logger import get_logger +from opentslm.logger import get_logger HAR_COT_DATA_DIR = os.path.join(RAW_DATA, "har_cot") HAR_COT_ZIP = os.path.join(HAR_COT_DATA_DIR, "har_cot.zip") diff --git a/src/time_series_datasets/m4/M4QADataset.py b/src/opentslm/time_series_datasets/m4/M4QADataset.py similarity index 93% rename from src/time_series_datasets/m4/M4QADataset.py rename to src/opentslm/time_series_datasets/m4/M4QADataset.py index 1613c656..5b567916 100644 --- a/src/time_series_datasets/m4/M4QADataset.py +++ b/src/opentslm/time_series_datasets/m4/M4QADataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -17,9 +16,9 @@ import torch from datasets import Dataset -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.m4.m4_loader import load_all_m4_data, create_combined_dataset +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.m4.m4_loader import load_all_m4_data, create_combined_dataset class M4QADataset(QADataset): diff --git a/src/time_series_datasets/m4/__init__.py b/src/opentslm/time_series_datasets/m4/__init__.py similarity index 79% rename from src/time_series_datasets/m4/__init__.py rename to src/opentslm/time_series_datasets/m4/__init__.py index 5fadb4e6..2388535d 100644 --- a/src/time_series_datasets/m4/__init__.py +++ b/src/opentslm/time_series_datasets/m4/__init__.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/m4/m4_loader.py b/src/opentslm/time_series_datasets/m4/m4_loader.py similarity index 98% rename from src/time_series_datasets/m4/m4_loader.py rename to src/opentslm/time_series_datasets/m4/m4_loader.py index df144274..1f03720b 100644 --- a/src/time_series_datasets/m4/m4_loader.py +++ b/src/opentslm/time_series_datasets/m4/m4_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -26,7 +25,7 @@ from typing import Dict, List, Literal, Optional, Tuple from datasets import Dataset from sklearn.model_selection import train_test_split -from time_series_datasets.constants import RAW_DATA +from opentslm.time_series_datasets.constants import RAW_DATA # --------------------------- # Constants diff --git a/src/time_series_datasets/monash/MonashDataset.py b/src/opentslm/time_series_datasets/monash/MonashDataset.py similarity index 93% rename from src/time_series_datasets/monash/MonashDataset.py rename to src/opentslm/time_series_datasets/monash/MonashDataset.py index 7d10e37f..6d0e1eff 100644 --- a/src/time_series_datasets/monash/MonashDataset.py +++ b/src/opentslm/time_series_datasets/monash/MonashDataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -12,7 +11,7 @@ from tqdm.auto import tqdm -from time_series_datasets.monash.monash_utils import ( +from opentslm.time_series_datasets.monash.monash_utils import ( download_and_extract_monash_ucr, load_from_tsfile_to_dataframe, ) diff --git a/src/time_series_datasets/monash/MonashSPO2QADataset.py b/src/opentslm/time_series_datasets/monash/MonashSPO2QADataset.py similarity index 86% rename from src/time_series_datasets/monash/MonashSPO2QADataset.py rename to src/opentslm/time_series_datasets/monash/MonashSPO2QADataset.py index 2f22427a..f3c44f37 100644 --- a/src/time_series_datasets/monash/MonashSPO2QADataset.py +++ b/src/opentslm/time_series_datasets/monash/MonashSPO2QADataset.py @@ -1,15 +1,14 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from typing import List, Literal, Optional, Tuple from datasets import Dataset -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.monash.MonashDataset import MonashDataset -from time_series_datasets.QADataset import QADataset -from time_series_datasets.util import ( +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.monash.MonashDataset import MonashDataset +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) import torch diff --git a/src/opentslm/time_series_datasets/monash/__init__.py b/src/opentslm/time_series_datasets/monash/__init__.py new file mode 100644 index 00000000..cbad1615 --- /dev/null +++ b/src/opentslm/time_series_datasets/monash/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/monash/monash_utils.py b/src/opentslm/time_series_datasets/monash/monash_utils.py similarity index 99% rename from src/time_series_datasets/monash/monash_utils.py rename to src/opentslm/time_series_datasets/monash/monash_utils.py index f3d98e4a..cc6a5520 100644 --- a/src/time_series_datasets/monash/monash_utils.py +++ b/src/opentslm/time_series_datasets/monash/monash_utils.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/pamap2/BalancedBatchSampler.py b/src/opentslm/time_series_datasets/pamap2/BalancedBatchSampler.py similarity index 94% rename from src/time_series_datasets/pamap2/BalancedBatchSampler.py rename to src/opentslm/time_series_datasets/pamap2/BalancedBatchSampler.py index 47a795e0..efc2bb97 100644 --- a/src/time_series_datasets/pamap2/BalancedBatchSampler.py +++ b/src/opentslm/time_series_datasets/pamap2/BalancedBatchSampler.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/pamap2/PAMAP2AccQADataset.py b/src/opentslm/time_series_datasets/pamap2/PAMAP2AccQADataset.py similarity index 89% rename from src/time_series_datasets/pamap2/PAMAP2AccQADataset.py rename to src/opentslm/time_series_datasets/pamap2/PAMAP2AccQADataset.py index dea805eb..2dca534b 100644 --- a/src/time_series_datasets/pamap2/PAMAP2AccQADataset.py +++ b/src/opentslm/time_series_datasets/pamap2/PAMAP2AccQADataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,11 +7,11 @@ from typing import List, Tuple import numpy as np -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.pamap2.PAMAP2Dataset import PAMAP2Dataset, ACTIVITIY_ID_DICT -from time_series_datasets.pamap2.pamap2_loader import PAMAP2_DIR -from time_series_datasets.util import ( +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.pamap2.PAMAP2Dataset import PAMAP2Dataset, ACTIVITIY_ID_DICT +from opentslm.time_series_datasets.pamap2.pamap2_loader import PAMAP2_DIR +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) import torch diff --git a/src/time_series_datasets/pamap2/PAMAP2CoTQADataset.py b/src/opentslm/time_series_datasets/pamap2/PAMAP2CoTQADataset.py similarity index 94% rename from src/time_series_datasets/pamap2/PAMAP2CoTQADataset.py rename to src/opentslm/time_series_datasets/pamap2/PAMAP2CoTQADataset.py index 1b6c5a5e..540903f4 100644 --- a/src/time_series_datasets/pamap2/PAMAP2CoTQADataset.py +++ b/src/opentslm/time_series_datasets/pamap2/PAMAP2CoTQADataset.py @@ -1,27 +1,24 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from datasets import Dataset from typing import List, Tuple, Literal -import sys import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.pamap2.pamap2_cot_loader import load_pamap2_cot_splits +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.pamap2.pamap2_cot_loader import load_pamap2_cot_splits import torch from torch.utils.data import DataLoader from tqdm.auto import tqdm -from time_series_datasets.util import ( +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) from collections import defaultdict import numpy as np from torch.utils.data import Sampler -from time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler +from opentslm.time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler TIME_SERIES_LABELS = [ diff --git a/src/time_series_datasets/pamap2/PAMAP2Dataset.py b/src/opentslm/time_series_datasets/pamap2/PAMAP2Dataset.py similarity index 97% rename from src/time_series_datasets/pamap2/PAMAP2Dataset.py rename to src/opentslm/time_series_datasets/pamap2/PAMAP2Dataset.py index bf485a9d..3337c94b 100644 --- a/src/time_series_datasets/pamap2/PAMAP2Dataset.py +++ b/src/opentslm/time_series_datasets/pamap2/PAMAP2Dataset.py @@ -1,12 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from typing import Tuple import pandas as pd -from time_series_datasets.pamap2.pamap2_loader import ensure_pamap2_data +from opentslm.time_series_datasets.pamap2.pamap2_loader import ensure_pamap2_data from torch.utils.data import Dataset ACTIVITIY_ID_DICT = { diff --git a/src/opentslm/time_series_datasets/pamap2/__init__.py b/src/opentslm/time_series_datasets/pamap2/__init__.py new file mode 100644 index 00000000..cbad1615 --- /dev/null +++ b/src/opentslm/time_series_datasets/pamap2/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/pamap2/pamap2_cot_loader.py b/src/opentslm/time_series_datasets/pamap2/pamap2_cot_loader.py similarity index 97% rename from src/time_series_datasets/pamap2/pamap2_cot_loader.py rename to src/opentslm/time_series_datasets/pamap2/pamap2_cot_loader.py index de38ec0c..1e11d93a 100644 --- a/src/time_series_datasets/pamap2/pamap2_cot_loader.py +++ b/src/opentslm/time_series_datasets/pamap2/pamap2_cot_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -12,10 +11,10 @@ import urllib.request import zipfile import shutil -from time_series_datasets.constants import RAW_DATA +from opentslm.time_series_datasets.constants import RAW_DATA import math import logging -from logger import get_logger +from opentslm.logger import get_logger PAMAP_DATA_DIR = os.path.join(RAW_DATA, "pamap") diff --git a/src/time_series_datasets/pamap2/pamap2_loader.py b/src/opentslm/time_series_datasets/pamap2/pamap2_loader.py similarity index 94% rename from src/time_series_datasets/pamap2/pamap2_loader.py rename to src/opentslm/time_series_datasets/pamap2/pamap2_loader.py index 9f32ba0f..11361971 100644 --- a/src/time_series_datasets/pamap2/pamap2_loader.py +++ b/src/opentslm/time_series_datasets/pamap2/pamap2_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -8,7 +7,7 @@ import zipfile import requests -from time_series_datasets.constants import RAW_DATA as RAW_DATA_PATH +from opentslm.time_series_datasets.constants import RAW_DATA as RAW_DATA_PATH # --------------------------- diff --git a/src/time_series_datasets/simulation/SimulationQADataset.py b/src/opentslm/time_series_datasets/simulation/SimulationQADataset.py similarity index 96% rename from src/time_series_datasets/simulation/SimulationQADataset.py rename to src/opentslm/time_series_datasets/simulation/SimulationQADataset.py index fb1fdbf6..7af24138 100644 --- a/src/time_series_datasets/simulation/SimulationQADataset.py +++ b/src/opentslm/time_series_datasets/simulation/SimulationQADataset.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -9,8 +8,8 @@ from typing import List, Tuple from datasets import Dataset -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset DATASET_SIZE = 200 diff --git a/src/time_series_datasets/sleep/SleepEDFCoTQADataset.py b/src/opentslm/time_series_datasets/sleep/SleepEDFCoTQADataset.py similarity index 90% rename from src/time_series_datasets/sleep/SleepEDFCoTQADataset.py rename to src/opentslm/time_series_datasets/sleep/SleepEDFCoTQADataset.py index 09217b6f..9dbdd257 100644 --- a/src/time_series_datasets/sleep/SleepEDFCoTQADataset.py +++ b/src/opentslm/time_series_datasets/sleep/SleepEDFCoTQADataset.py @@ -1,17 +1,14 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from datasets import Dataset from typing import List, Tuple, Literal -import sys import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.sleep.sleepedf_cot_loader import load_sleepedf_cot_splits +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.sleep.sleepedf_cot_loader import load_sleepedf_cot_splits import numpy as np class SleepEDFCoTQADataset(QADataset): diff --git a/src/time_series_datasets/sleep/SleepEDFQADataset.py b/src/opentslm/time_series_datasets/sleep/SleepEDFQADataset.py similarity index 92% rename from src/time_series_datasets/sleep/SleepEDFQADataset.py rename to src/opentslm/time_series_datasets/sleep/SleepEDFQADataset.py index bac0f4ea..75230e4e 100644 --- a/src/time_series_datasets/sleep/SleepEDFQADataset.py +++ b/src/opentslm/time_series_datasets/sleep/SleepEDFQADataset.py @@ -1,20 +1,14 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT from datasets import Dataset from typing import List, Tuple, Literal -import sys import os - -sys.path.append( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -) -from prompt.text_time_series_prompt import TextTimeSeriesPrompt -from time_series_datasets.QADataset import QADataset -from time_series_datasets.sleep.sleepedf_cot_loader import load_sleepedf_cot_splits +from opentslm.prompt.text_time_series_prompt import TextTimeSeriesPrompt +from opentslm.time_series_datasets.QADataset import QADataset +from opentslm.time_series_datasets.sleep.sleepedf_cot_loader import load_sleepedf_cot_splits import numpy as np diff --git a/src/opentslm/time_series_datasets/sleep/__init__.py b/src/opentslm/time_series_datasets/sleep/__init__.py new file mode 100644 index 00000000..cbad1615 --- /dev/null +++ b/src/opentslm/time_series_datasets/sleep/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. +# +# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/sleep/sleepedf_cot_loader.py b/src/opentslm/time_series_datasets/sleep/sleepedf_cot_loader.py similarity index 97% rename from src/time_series_datasets/sleep/sleepedf_cot_loader.py rename to src/opentslm/time_series_datasets/sleep/sleepedf_cot_loader.py index c6f1db27..c11ca466 100644 --- a/src/time_series_datasets/sleep/sleepedf_cot_loader.py +++ b/src/opentslm/time_series_datasets/sleep/sleepedf_cot_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -10,7 +9,7 @@ from typing import Tuple, Dict import ast import urllib.request -from time_series_datasets.constants import RAW_DATA +from opentslm.time_series_datasets.constants import RAW_DATA from tqdm.auto import tqdm from sklearn.model_selection import train_test_split diff --git a/src/time_series_datasets/ucr/__init__.py b/src/opentslm/time_series_datasets/ucr/__init__.py similarity index 70% rename from src/time_series_datasets/ucr/__init__.py rename to src/opentslm/time_series_datasets/ucr/__init__.py index 29c6eab4..5cd50446 100644 --- a/src/time_series_datasets/ucr/__init__.py +++ b/src/opentslm/time_series_datasets/ucr/__init__.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/ucr/ucr_loader.py b/src/opentslm/time_series_datasets/ucr/ucr_loader.py similarity index 98% rename from src/time_series_datasets/ucr/ucr_loader.py rename to src/opentslm/time_series_datasets/ucr/ucr_loader.py index 285f8833..0ccf44d9 100644 --- a/src/time_series_datasets/ucr/ucr_loader.py +++ b/src/opentslm/time_series_datasets/ucr/ucr_loader.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/util.py b/src/opentslm/time_series_datasets/util.py similarity index 93% rename from src/time_series_datasets/util.py rename to src/opentslm/time_series_datasets/util.py index 3afa6178..8b924db2 100644 --- a/src/time_series_datasets/util.py +++ b/src/opentslm/time_series_datasets/util.py @@ -1,12 +1,11 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import math from typing import List -from model_config import PATCH_SIZE +from opentslm.model_config import PATCH_SIZE import torch.nn.functional as F diff --git a/src/prompt/__init__.py b/src/prompt/__init__.py deleted file mode 100644 index 6882dbc6..00000000 --- a/src/prompt/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -# from .prompt import Prompt -# from .full_prompt import FullPrompt -# from .text_prompt import TextPrompt -# from .text_time_series_prompt import TextTimeSeriesPrompt -# from .prompt_with_answer import PromptWithAnswer diff --git a/src/time_series_datasets/__init__.py b/src/time_series_datasets/__init__.py deleted file mode 100644 index 266b1365..00000000 --- a/src/time_series_datasets/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/ecg_qa/dataset_distribution.png.license b/src/time_series_datasets/ecg_qa/dataset_distribution.png.license deleted file mode 100644 index e83264aa..00000000 --- a/src/time_series_datasets/ecg_qa/dataset_distribution.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/ecg_qa/ecg_example_real.png.license b/src/time_series_datasets/ecg_qa/ecg_example_real.png.license deleted file mode 100644 index e83264aa..00000000 --- a/src/time_series_datasets/ecg_qa/ecg_example_real.png.license +++ /dev/null @@ -1,5 +0,0 @@ -This source file is part of the OpenTSLM open-source project - -SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/har_cot/__init__.py b/src/time_series_datasets/har_cot/__init__.py deleted file mode 100644 index 266b1365..00000000 --- a/src/time_series_datasets/har_cot/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/monash/__init__.py b/src/time_series_datasets/monash/__init__.py deleted file mode 100644 index 266b1365..00000000 --- a/src/time_series_datasets/monash/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/pamap2/__init__.py b/src/time_series_datasets/pamap2/__init__.py deleted file mode 100644 index 266b1365..00000000 --- a/src/time_series_datasets/pamap2/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/sleep/__init__.py b/src/time_series_datasets/sleep/__init__.py deleted file mode 100644 index 266b1365..00000000 --- a/src/time_series_datasets/sleep/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT diff --git a/src/time_series_datasets/tsqa.py b/src/time_series_datasets/tsqa.py deleted file mode 100644 index 2657d4dd..00000000 --- a/src/time_series_datasets/tsqa.py +++ /dev/null @@ -1,25 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -from typing import Literal, Optional - -from time_series_datasets.util import load_qa_dataset -from datasets import load_dataset -from src.model_config import * - - -def get_tsqa_dataset( - split: Literal["train", "validation", "test"] = "train", - *, - EOS_TOKEN, - max_samples: Optional[int] = None, -): - return load_qa_dataset( - load_dataset("ChengsenWang/TSQA", split="train"), - split=split, - max_samples=max_samples, - EOS_TOKEN=EOS_TOKEN, - ) diff --git a/test/.gitignore b/test/.gitignore deleted file mode 100644 index 935582c2..00000000 --- a/test/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -# This source file is part of the OpenTSLM open-source project -# -# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) -# -# SPDX-License-Identifier: MIT - -data \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py index 266b1365..cbad1615 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,5 +1,4 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/test/ecg_qa_cot_test.py b/test/ecg_qa_cot_test.py index f9a251d6..d7dff451 100644 --- a/test/ecg_qa_cot_test.py +++ b/test/ecg_qa_cot_test.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -11,16 +10,12 @@ """ import unittest -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', "src")) - -# Import and set up global logger with verbose mode -from logger import get_logger, set_global_verbose +from opentslm.time_series_datasets.ecg_qa.ECGQACoTQADataset import ECGQACoTQADataset +from opentslm.logger import get_logger, set_global_verbose def pretty_print_label_distribution(dataset, name): """Pretty print label distribution for a dataset.""" - from time_series_datasets.ecg_qa.ecgqa_cot_loader import get_label_distribution + from opentslm.time_series_datasets.ecg_qa.ecgqa_cot_loader import get_label_distribution label_dist = get_label_distribution(dataset) total = len(dataset) print(f"\n{name} dataset:") @@ -38,7 +33,7 @@ def setUp(self): set_global_verbose(True) self.logger = get_logger() - from time_series_datasets.ecg_qa.ecgqa_cot_loader import load_ecg_qa_cot_splits + from opentslm.time_series_datasets.ecg_qa.ecgqa_cot_loader import load_ecg_qa_cot_splits self.load_ecg_qa_cot_splits = load_ecg_qa_cot_splits self.logger.loading("Loading ECG-QA CoT dataset splits...") @@ -60,7 +55,7 @@ def test_label_distribution(self): pretty_print_label_distribution(self.val, "Validation") pretty_print_label_distribution(self.test, "Test") - from time_series_datasets.ecg_qa.ecgqa_cot_loader import get_label_distribution + from opentslm.time_series_datasets.ecg_qa.ecgqa_cot_loader import get_label_distribution train_dist = get_label_distribution(self.train) val_dist = get_label_distribution(self.val) test_dist = get_label_distribution(self.test) @@ -192,7 +187,7 @@ def setUp(self): set_global_verbose(True) self.logger = get_logger() - from time_series_datasets.ecg_qa import ECGQACoTQADataset + from opentslm.time_series_datasets.ecg_qa import ECGQACoTQADataset self.ECGQACoTQADataset = ECGQACoTQADataset self.logger.loading("Initializing ECGQACoTQADataset...") @@ -387,7 +382,7 @@ def test_error_handling_missing_fields(self): self.logger.info("Testing error handling for missing fields...") # Create a sample with missing fields to test error handling - from time_series_datasets.ecg_qa import ECGQACoTQADataset + from opentslm.time_series_datasets.ecg_qa import ECGQACoTQADataset # This should work normally try: diff --git a/test/ecg_qa_test.py b/test/ecg_qa_test.py index 2451c4df..82bfc304 100644 --- a/test/ecg_qa_test.py +++ b/test/ecg_qa_test.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -18,17 +17,14 @@ Usage: python test_ecgqa.py """ - import sys -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', "src")) + def test_ecgqa_loader(): """Test the ECG-QA loader functions.""" print("Testing ECG-QA loader...") try: - from time_series_datasets.ecg_qa.ecgqa_loader import ( + from opentslm.time_series_datasets.ecg_qa.ecgqa_loader import ( does_ecg_qa_exist, does_ptbxl_exist, download_ecg_qa_if_not_exists, @@ -57,7 +53,7 @@ def test_ecgqa_dataset(): print("\nTesting ECGQADataset...") try: - from time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset + from opentslm.time_series_datasets.ecg_qa.ECGQADataset import ECGQADataset # Try to create dataset instances with limited samples for faster testing print("Creating dataset instances (limited to 5 samples each for testing)...") diff --git a/test/elastic_ddp.py b/test/elastic_ddp.py index 41ccbb6d..4d8fea5c 100644 --- a/test/elastic_ddp.py +++ b/test/elastic_ddp.py @@ -1,6 +1,5 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT diff --git a/test/eval_EmbedHealthFlamingo.py b/test/eval_EmbedHealthFlamingo.py index c92e4881..ecf9557c 100644 --- a/test/eval_EmbedHealthFlamingo.py +++ b/test/eval_EmbedHealthFlamingo.py @@ -1,23 +1,22 @@ -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT import json from typing import List -from time_series_datasets.TSQADataset import TSQADataset -from time_series_datasets.monash.MonashSPO2QADataset import MonashSPO2QADataset -from time_series_datasets.util import ( +from opentslm.time_series_datasets.TSQADataset import TSQADataset +from opentslm.time_series_datasets.monash.MonashSPO2QADataset import MonashSPO2QADataset +from opentslm.time_series_datasets.util import ( extend_time_series_to_match_patch_size_and_aggregate, ) import torch from torch.utils.data import ConcatDataset, DataLoader, Dataset from tqdm.auto import tqdm -from model.encoder.TransformerCNNEncoder import TransformerCNNEncoder -from model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo -from src.model_config import ( +from opentslm.model.encoder.TransformerCNNEncoder import TransformerCNNEncoder +from opentslm.model.llm.OpenTSLMFlamingo import OpenTSLMFlamingo +from opentslm.model_config import ( PATCH_SIZE, RESULTS_FILE, ) diff --git a/test/har_cot_test.py b/test/har_cot_test.py index 08de573e..017070e7 100644 --- a/test/har_cot_test.py +++ b/test/har_cot_test.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -11,16 +10,13 @@ """ import unittest -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', "src")) # Import and set up global logger with verbose mode -from logger import get_logger, set_global_verbose +from opentslm.logger import get_logger, set_global_verbose def pretty_print_label_distribution(dataset, name): """Pretty print label distribution for a dataset.""" - from time_series_datasets.har_cot.har_cot_loader import get_label_distribution + from opentslm.time_series_datasets.har_cot.har_cot_loader import get_label_distribution label_dist = get_label_distribution(dataset) total = len(dataset) print(f"\n{name} dataset:") @@ -38,7 +34,7 @@ def setUp(self): set_global_verbose(True) self.logger = get_logger() - from time_series_datasets.har_cot.har_cot_loader import load_har_cot_splits + from opentslm.time_series_datasets.har_cot.har_cot_loader import load_har_cot_splits self.load_har_cot_splits = load_har_cot_splits self.logger.loading("Loading HAR CoT dataset splits...") @@ -60,7 +56,7 @@ def test_label_distribution(self): pretty_print_label_distribution(self.val, "Validation") pretty_print_label_distribution(self.test, "Test") - from time_series_datasets.har_cot.har_cot_loader import get_label_distribution + from opentslm.time_series_datasets.har_cot.har_cot_loader import get_label_distribution train_dist = get_label_distribution(self.train) val_dist = get_label_distribution(self.val) test_dist = get_label_distribution(self.test) @@ -150,7 +146,7 @@ def setUp(self): set_global_verbose(True) self.logger = get_logger() - from time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset + from opentslm.time_series_datasets.har_cot.HARCoTQADataset import HARCoTQADataset self.HARCoTQADataset = HARCoTQADataset self.logger.loading("Initializing HARCoTQADataset...") diff --git a/test/m4_loader_test.py b/test/m4_loader_test.py index 1df44235..dbc82f26 100644 --- a/test/m4_loader_test.py +++ b/test/m4_loader_test.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -11,17 +10,14 @@ """ import unittest -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', "src")) +from opentslm.time_series_datasets.m4.M4QADataset import M4QADataset as _M4QADataset class TestM4QADataset(unittest.TestCase): """ Unit tests for the M4QADataset class and loader. """ def setUp(self): - from time_series_datasets.m4.M4QADataset import M4QADataset - self.M4QADataset = M4QADataset + self.M4QADataset = _M4QADataset self.train_dataset = self.M4QADataset("train", "") self.val_dataset = self.M4QADataset("validation", "") self.test_dataset = self.M4QADataset("test", "") diff --git a/test/pamap_cot_loader_test.py b/test/pamap_cot_loader_test.py index ab4f55bb..fdb2e5f3 100644 --- a/test/pamap_cot_loader_test.py +++ b/test/pamap_cot_loader_test.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -# This source file is part of the OpenTSLM open-source project -# # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) +# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project. # # SPDX-License-Identifier: MIT @@ -11,13 +10,12 @@ """ import unittest -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', "src")) +from opentslm.logger import get_logger, set_global_verbose +from opentslm.time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler # Import and set up global logger with verbose mode -from logger import get_logger, set_global_verbose -from time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler +from opentslm.logger import get_logger, set_global_verbose +from opentslm.time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler class TestPAMAP2CoTLoader(unittest.TestCase): """ Unit tests for the PAMAP2 CoT loader functions. @@ -27,7 +25,7 @@ def setUp(self): set_global_verbose(True) self.logger = get_logger() - from time_series_datasets.pamap2.pamap2_cot_loader import load_pamap2_cot_splits + from opentslm.time_series_datasets.pamap2.pamap2_cot_loader import load_pamap2_cot_splits self.load_pamap2_cot_splits = load_pamap2_cot_splits self.logger.loading("Loading PAMAP2 CoT dataset splits...") @@ -90,7 +88,7 @@ def setUp(self): set_global_verbose(True) self.logger = get_logger() - from time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset + from opentslm.time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset self.PAMAP2CoTQADataset = PAMAP2CoTQADataset self.logger.loading("Initializing PAMAP2CoTQADataset...") @@ -196,8 +194,8 @@ def test_balanced_batches_pamap2cot(self): Test that BalancedBatchSampler produces balanced batches on the real PAMAP2CoTQADataset training split. Prints batch labels and class counts for each batch. """ - from time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset - from time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler + from opentslm.time_series_datasets.pamap2.PAMAP2CoTQADataset import PAMAP2CoTQADataset + from opentslm.time_series_datasets.pamap2.BalancedBatchSampler import BalancedBatchSampler # Helper to extract label from answer string def extract_label_from_answer(answer: str) -> str: # Assumes answer ends with 'Answer: