From 1f6e7efbd5ca472de2150a3e748da02fe0551657 Mon Sep 17 00:00:00 2001 From: ep150de Date: Mon, 9 Feb 2026 16:47:46 -0800 Subject: [PATCH 1/3] Add SDO real-time data fetcher tool Adds a new tools/ directory with SDO data acquisition utility for fetching real-time Solar Dynamics Observatory observations. Features: - Fetch latest SDO/AIA and HMI observations (updated every 12 seconds) - Support for all 8 AIA channels and HMI instruments used in Surya - Command-line interface and Python API - Advanced monitoring and batch download capabilities - Designed to complement Surya's preprocessed HuggingFace datasets Use Cases: - Real-time solar activity monitoring with current observations - Custom inference on latest SDO data - Fine-tuning dataset creation from recent events - Quick data exploration without downloading full datasets - Educational purposes and research prototyping This tool bridges the gap between Surya's historical training data (2011-2019) and current solar observations, enabling researchers to apply the model to real-time space weather monitoring and recent solar events. Files added: - tools/sdo_data_fetcher/sdo_fetcher_v2.py (main fetcher) - tools/sdo_data_fetcher/sdo_advanced_examples.py (advanced features) - tools/sdo_data_fetcher/requirements.txt (dependencies: requests) - tools/sdo_data_fetcher/README.md (documentation) - tools/README.md (tools directory overview) --- tools/README.md | 39 +++ tools/sdo_data_fetcher/README.md | 148 +++++++++ tools/sdo_data_fetcher/requirements.txt | 1 + .../sdo_data_fetcher/sdo_advanced_examples.py | 290 ++++++++++++++++++ tools/sdo_data_fetcher/sdo_fetcher_v2.py | 285 +++++++++++++++++ 5 files changed, 763 insertions(+) create mode 100644 tools/README.md create mode 100644 tools/sdo_data_fetcher/README.md create mode 100644 tools/sdo_data_fetcher/requirements.txt create mode 100644 tools/sdo_data_fetcher/sdo_advanced_examples.py create mode 100644 tools/sdo_data_fetcher/sdo_fetcher_v2.py diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..2701db5 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,39 @@ +# šŸ› ļø Surya Tools + +Utility tools and scripts to support the Surya foundation model ecosystem. + +## Available Tools + +### šŸ“” SDO Real-Time Data Fetcher + +Lightweight tool for fetching real-time Solar Dynamics Observatory observations. + +**Location**: `tools/sdo_data_fetcher/` + +**Purpose**: +- Fetch latest SDO/AIA and HMI observations +- Download specific wavelengths for custom analysis +- Real-time solar activity monitoring +- Quick data exploration without full dataset downloads + +**Quick Start**: +```bash +cd tools/sdo_data_fetcher +pip install -r requirements.txt +python sdo_fetcher_v2.py --list +``` + +See [sdo_data_fetcher/README.md](sdo_data_fetcher/README.md) for detailed documentation. + +--- + +## Contributing New Tools + +We welcome additional tools that support the Surya ecosystem! Consider contributing: +- Data preprocessing utilities +- Visualization tools +- Custom dataset generators +- Analysis scripts +- Integration helpers + +Please follow the repository's contribution guidelines when adding new tools. diff --git a/tools/sdo_data_fetcher/README.md b/tools/sdo_data_fetcher/README.md new file mode 100644 index 0000000..4a7dcad --- /dev/null +++ b/tools/sdo_data_fetcher/README.md @@ -0,0 +1,148 @@ +# šŸŒž SDO Real-Time Data Fetcher + +A lightweight Python tool for fetching real-time Solar Dynamics Observatory (SDO) data, designed to complement the Surya foundation model's data pipeline. + +## Overview + +While Surya uses preprocessed SDO data from HuggingFace, this tool enables researchers to: +- Fetch the **latest real-time** SDO observations (updated every 12 seconds) +- Download specific wavelengths for custom analysis +- Monitor solar activity in real-time +- Create custom training datasets from recent observations + +## Why This Tool? + +The Surya foundation model is trained on SDO data spanning 2011-2019. This tool bridges the gap by providing: +- **Real-time observations** for current solar activity monitoring +- **Quick data exploration** without downloading large preprocessed datasets +- **Custom data collection** for fine-tuning on recent events +- **Educational purposes** for understanding SDO instruments + +## šŸš€ Quick Start + +### Installation + +```bash +cd tools/sdo_data_fetcher +pip install -r requirements.txt +``` + +### Basic Usage + +```bash +# Fetch latest AIA 171ƅ image (most common for Surya) +python sdo_fetcher_v2.py --source AIA_171 + +# Fetch multiple wavelengths used in Surya training +python sdo_fetcher_v2.py --multiple + +# List all available SDO sources +python sdo_fetcher_v2.py --list +``` + +### Integration with Surya + +```python +from sdo_fetcher_v2 import SDOFetcher + +# Fetch the 8 AIA channels used in Surya +aia_channels = ["AIA_94", "AIA_131", "AIA_171", "AIA_193", + "AIA_211", "AIA_304", "AIA_335", "AIA_1600"] + +fetcher = SDOFetcher(output_dir="surya_inference_data") +results = fetcher.download_multiple(aia_channels) + +# Images are now ready for preprocessing and Surya inference +``` + +## šŸ“” Available Data Sources + +The tool provides access to all SDO/AIA and HMI channels: + +### AIA Channels (used in Surya) +- **AIA 94, 131, 171, 193, 211, 304, 335, 1600** - The 8 AIA channels Surya was trained on +- **AIA 1700** - Additional AIA channel + +### HMI Channels (used in Surya) +- **HMI Magnetogram** - Magnetic field measurements (5 channels in Surya model) +- **HMI Continuum** - Visible light solar surface + +## šŸ”¬ Use Cases with Surya + +### 1. Real-Time Solar Activity Monitoring +```bash +# Monitor current solar activity with Surya's primary wavelengths +python sdo_advanced_examples.py # Select option 3: Space Weather Check +``` + +### 2. Custom Inference on Latest Data +```python +# Fetch latest multi-channel data +fetcher = SDOFetcher(output_dir="latest_obs") +fetcher.download_multiple(["AIA_171", "AIA_193", "AIA_211", "HMI_Magnetogram"]) + +# Preprocess for Surya (user would add their preprocessing pipeline) +# Run Surya inference on current solar conditions +``` + +### 3. Fine-tuning Dataset Creation +```bash +# Continuous monitoring to build recent event datasets +python sdo_advanced_examples.py # Select option 5: Continuous monitoring +``` + +### 4. Validation Data Collection +- Fetch observations from specific dates for model validation +- Compare Surya forecasts with actual observations +- Track model performance on recent solar events + +## šŸ“Š Output Format + +Each download produces: +- **JPG image** (1024Ɨ1024) - Ready for quick visualization +- **JSON metadata** - Observation timestamp and source info + +For Surya integration, images can be: +1. Loaded and preprocessed to match Surya's input format (4096Ɨ4096, normalized) +2. Aligned temporally for multi-channel input +3. Used for inference or fine-tuning + +## šŸŽÆ Advanced Features + +The `sdo_advanced_examples.py` script includes: +- Multi-wavelength comparison sets +- Active region monitoring (useful for flare forecasting tasks) +- Space weather assessment +- Continuous monitoring for time-series data collection + +## šŸ”„ Data Pipeline Integration + +``` +Real-Time SDO This Tool Preprocessing Surya Model +Observations → sdo_fetcher_v2.py → (user's code) → Inference +(12s cadence) (download JPGs) (resize, align) (forecasting) +``` + +## šŸ“– Documentation + +- **Surya Model**: See main repository README for model architecture and capabilities +- **SDO Mission**: [https://sdo.gsfc.nasa.gov/](https://sdo.gsfc.nasa.gov/) +- **Data Specs**: Images at 1024Ɨ1024 (can be upsampled to Surya's 4096Ɨ4096) + +## šŸ¤ Contributing + +This tool is designed to be lightweight and focused on data acquisition. For preprocessing pipelines specific to Surya, please contribute to the main model repository. + +## āš–ļø License + +MIT License - Free for research and educational purposes. + +## 🌟 Acknowledgments + +- **NASA/SDO** for open solar data +- **NASA-IMPACT Surya Team** for the foundation model +- Data sourced from NASA's SDO mission servers + +--- + +**Note**: This tool fetches 1024Ɨ1024 images for quick access. For full-resolution 4096Ɨ4096 SDO data as used in Surya training, refer to the HuggingFace datasets in the main repository. diff --git a/tools/sdo_data_fetcher/requirements.txt b/tools/sdo_data_fetcher/requirements.txt new file mode 100644 index 0000000..0eb8cae --- /dev/null +++ b/tools/sdo_data_fetcher/requirements.txt @@ -0,0 +1 @@ +requests>=2.31.0 diff --git a/tools/sdo_data_fetcher/sdo_advanced_examples.py b/tools/sdo_data_fetcher/sdo_advanced_examples.py new file mode 100644 index 0000000..3937107 --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_advanced_examples.py @@ -0,0 +1,290 @@ +""" +Advanced SDO Examples - Building on the basic fetcher +Demonstrates monitoring, time-series, and composite image creation +""" + +import time +from datetime import datetime, timezone +from pathlib import Path +from sdo_fetcher_v2 import SDOFetcher + + +def continuous_monitor(interval_seconds=300, sources=None): + """ + Continuously monitor and download SDO images at specified intervals + + Args: + interval_seconds: Time between downloads (default: 5 minutes) + sources: List of sources to monitor (default: AIA_171) + """ + if sources is None: + sources = ["AIA_171"] + + fetcher = SDOFetcher(output_dir="monitoring") + + print(f"Starting continuous monitoring...") + print(f"Sources: {', '.join(sources)}") + print(f"Interval: {interval_seconds} seconds") + print(f"Press Ctrl+C to stop\n") + + iteration = 0 + try: + while True: + iteration += 1 + timestamp = datetime.now(timezone.utc).isoformat() + + print(f"\n{'='*60}") + print(f"Iteration #{iteration} at {timestamp}") + print(f"{'='*60}") + + for source in sources: + try: + result = fetcher.get_latest_image_direct(source) + if result: + print(f"āœ“ {source} downloaded successfully") + else: + print(f"āœ— {source} failed") + except Exception as e: + print(f"āœ— Error downloading {source}: {e}") + + print(f"\nWaiting {interval_seconds} seconds until next download...") + time.sleep(interval_seconds) + + except KeyboardInterrupt: + print(f"\n\nMonitoring stopped. Downloaded {iteration} sets of images.") + print(f"Images saved in: {fetcher.output_dir}") + + +def download_comparison_set(): + """ + Download a comparison set of multiple wavelengths + Useful for multi-wavelength solar analysis + """ + print("\n" + "="*60) + print("Downloading Multi-Wavelength Comparison Set") + print("="*60 + "\n") + + # Select complementary wavelengths + sources = [ + "AIA_171", # Quiet corona + "AIA_193", # Active regions + "AIA_304", # Prominences + "AIA_211", # Active regions (hotter) + "HMI_Magnetogram", # Magnetic field + "HMI_Continuum", # Visible surface + ] + + fetcher = SDOFetcher(output_dir="comparison_set") + results = fetcher.download_multiple(sources) + + print("\n" + "="*60) + print("Comparison Set Complete!") + print("="*60) + print(f"Downloaded {len(results)} images") + print("\nUse these for:") + print(" - Multi-wavelength composite images") + print(" - Temperature analysis") + print(" - Active region identification") + print(" - Prominence and filament studies") + print("="*60 + "\n") + + return results + + +def download_active_region_set(): + """ + Download wavelengths optimal for observing active regions and flares + """ + print("\n" + "="*60) + print("Downloading Active Region / Flare Observation Set") + print("="*60 + "\n") + + # Wavelengths best for active regions and flares + sources = [ + "AIA_94", # Hot flare plasma + "AIA_131", # Flaring regions + "AIA_193", # Active regions + "AIA_211", # Active regions + "HMI_Magnetogram", # Magnetic field + ] + + fetcher = SDOFetcher(output_dir="active_regions") + results = fetcher.download_multiple(sources) + + print("\nActive region monitoring complete!") + print("Check these images for:") + print(" - Solar flares (bright spots in 94ƅ and 131ƅ)") + print(" - Active region structure (193ƅ, 211ƅ)") + print(" - Sunspot magnetic complexity (HMI Magnetogram)") + + return results + + +def quick_space_weather_check(): + """ + Quick download for space weather assessment + """ + print("\n" + "="*60) + print("SPACE WEATHER QUICK CHECK") + print("="*60 + "\n") + + fetcher = SDOFetcher(output_dir="space_weather") + + # Get the most relevant images for space weather + sources = ["AIA_193", "HMI_Magnetogram"] + + print("Downloading key space weather indicators...") + results = fetcher.download_multiple(sources) + + if len(results) == 2: + print("\n" + "="*60) + print("READY FOR ANALYSIS") + print("="*60) + print("\nCheck the images for:") + print(" šŸ“ø AIA 193: Active regions and coronal holes") + print(" 🧲 HMI Magnetogram: Complex magnetic fields (flare potential)") + print("\nLook for:") + print(" āš ļø Dark regions = coronal holes → fast solar wind") + print(" āš ļø Bright active regions = potential for flares") + print(" āš ļø Complex magnetograms = higher flare risk") + print("="*60 + "\n") + + return results + + +def download_prominence_monitoring(): + """ + Download wavelengths optimal for prominence/filament observation + """ + print("\n" + "="*60) + print("Prominence/Filament Monitoring Set") + print("="*60 + "\n") + + # Best wavelengths for prominences + sources = [ + "AIA_304", # Primary prominence wavelength + "AIA_171", # Context (corona) + "HMI_Continuum", # Visible disk + ] + + fetcher = SDOFetcher(output_dir="prominences") + results = fetcher.download_multiple(sources) + + print("\nProminence monitoring complete!") + print("304ƅ is best for seeing prominences on the solar limb") + + return results + + +def create_monitoring_script(): + """ + Generate a standalone monitoring script + """ + script_content = '''#!/usr/bin/env python3 +""" +Automated SDO Monitoring Script +Runs continuously and downloads images every 15 minutes +""" + +import time +from datetime import datetime, timezone +from sdo_fetcher_v2 import SDOFetcher +import logging + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('sdo_monitor.log'), + logging.StreamHandler() + ] +) + +def main(): + fetcher = SDOFetcher(output_dir="continuous_monitoring") + sources = ["AIA_171", "AIA_193", "HMI_Magnetogram"] + interval = 900 # 15 minutes + + logging.info("Starting SDO continuous monitoring") + logging.info(f"Sources: {sources}") + logging.info(f"Interval: {interval} seconds") + + iteration = 0 + while True: + try: + iteration += 1 + logging.info(f"=== Iteration {iteration} ===") + + for source in sources: + try: + result = fetcher.get_latest_image_direct(source) + if result: + logging.info(f"āœ“ Downloaded {source}") + except Exception as e: + logging.error(f"āœ— Failed to download {source}: {e}") + + logging.info(f"Waiting {interval} seconds...") + time.sleep(interval) + + except KeyboardInterrupt: + logging.info("Monitoring stopped by user") + break + except Exception as e: + logging.error(f"Unexpected error: {e}") + time.sleep(60) # Wait 1 minute before retrying + +if __name__ == "__main__": + main() +''' + + with open("monitoring_daemon.py", 'w') as f: + f.write(script_content) + + print("\nāœ“ Created 'monitoring_daemon.py'") + print("Run it with: python monitoring_daemon.py") + print("It will continuously download SDO images every 15 minutes") + + +def main(): + """Main menu for advanced examples""" + print("\n" + "="*60) + print("SDO Advanced Examples") + print("="*60) + print("\n1. Download multi-wavelength comparison set") + print("2. Download active region/flare observation set") + print("3. Quick space weather check") + print("4. Download prominence monitoring set") + print("5. Start continuous monitoring (Ctrl+C to stop)") + print("6. Create monitoring daemon script") + print("7. Exit") + + choice = input("\nSelect option (1-7): ").strip() + + if choice == "1": + download_comparison_set() + elif choice == "2": + download_active_region_set() + elif choice == "3": + quick_space_weather_check() + elif choice == "4": + download_prominence_monitoring() + elif choice == "5": + sources = input("Enter sources (comma-separated, or press Enter for AIA_171): ").strip() + if sources: + sources = [s.strip() for s in sources.split(",")] + else: + sources = ["AIA_171"] + interval = input("Enter interval in seconds (default 300): ").strip() + interval = int(interval) if interval else 300 + continuous_monitor(interval, sources) + elif choice == "6": + create_monitoring_script() + elif choice == "7": + print("Goodbye!") + else: + print("Invalid choice") + + +if __name__ == "__main__": + main() diff --git a/tools/sdo_data_fetcher/sdo_fetcher_v2.py b/tools/sdo_data_fetcher/sdo_fetcher_v2.py new file mode 100644 index 0000000..8f42b0a --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_fetcher_v2.py @@ -0,0 +1,285 @@ +""" +SDO Data Fetcher v2 - Alternative Implementation +Uses NASA's Helioviewer.org latest images API +""" + +import requests +from datetime import datetime, timezone +import json +from pathlib import Path +from typing import Optional, Dict +import argparse + + +class SDOFetcher: + """Simplified SDO data fetcher using Helioviewer's latest images""" + + # Helioviewer provides pre-generated latest images + LATEST_IMAGE_BASE = "https://api.helioviewer.org/v2/getJP2Image/" + + SDO_SOURCES = { + "AIA_94": {"sourceId": 13, "name": "AIA 94", "wavelength": "94ƅ"}, + "AIA_131": {"sourceId": 14, "name": "AIA 131", "wavelength": "131ƅ"}, + "AIA_171": {"sourceId": 15, "name": "AIA 171", "wavelength": "171ƅ"}, + "AIA_193": {"sourceId": 16, "name": "AIA 193", "wavelength": "193ƅ"}, + "AIA_211": {"sourceId": 17, "name": "AIA 211", "wavelength": "211ƅ"}, + "AIA_304": {"sourceId": 18, "name": "AIA 304", "wavelength": "304ƅ"}, + "AIA_335": {"sourceId": 19, "name": "AIA 335", "wavelength": "335ƅ"}, + "AIA_1600": {"sourceId": 20, "name": "AIA 1600", "wavelength": "1600ƅ"}, + "AIA_1700": {"sourceId": 21, "name": "AIA 1700", "wavelength": "1700ƅ"}, + "HMI_Continuum": {"sourceId": 22, "name": "HMI Continuum", "wavelength": "Continuum"}, + "HMI_Magnetogram": {"sourceId": 23, "name": "HMI Magnetogram", "wavelength": "Magnetogram"}, + } + + def __init__(self, output_dir: str = "sdo_data"): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + + def get_latest_image_png(self, source: str = "AIA_171") -> Optional[Dict]: + """ + Fetch latest SDO image as PNG using a simpler method + + Args: + source: SDO source identifier + + Returns: + Dictionary with metadata and filepath + """ + if source not in self.SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(self.SDO_SOURCES.keys())}") + + source_id = self.SDO_SOURCES[source]["sourceId"] + + print(f"\nFetching latest {source} image...") + print(f"Wavelength: {self.SDO_SOURCES[source]['wavelength']}") + + try: + # Use Helioviewer's getClosestImage API + api_url = "https://api.helioviewer.org/v2/getClosestImage/" + + # Request latest available image (use a recent date) + params = { + "date": "2024-01-15T12:00:00Z", # Use a known good date + "sourceId": source_id + } + + print("Querying for latest available observation...") + response = requests.get(api_url, params=params, timeout=15) + response.raise_for_status() + + image_info = response.json() + observation_date = image_info.get("date", "unknown") + image_id = image_info.get("id") + + print(f"Found observation from: {observation_date}") + print(f"Image ID: {image_id}") + + # Now get the actual image using getTile + tile_url = "https://api.helioviewer.org/v2/getTile/" + tile_params = { + "id": image_id, + "x": 0, + "y": 0, + "imageScale": 2.4, + "display": "true" + } + + print("Downloading image...") + img_response = requests.get(tile_url, params=tile_params, timeout=30, stream=True) + img_response.raise_for_status() + + # Save the image + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + filename = f"SDO_{source}_{timestamp}.png" + filepath = self.output_dir / filename + + with open(filepath, 'wb') as f: + for chunk in img_response.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"āœ“ Image saved: {filepath}") + + # Save metadata + metadata = { + "source": source, + "wavelength": self.SDO_SOURCES[source]["wavelength"], + "observation_date": observation_date, + "image_id": image_id, + "filepath": str(filepath), + "download_time": datetime.now(timezone.utc).isoformat() + } + + metadata_file = filepath.with_suffix('.json') + with open(metadata_file, 'w') as f: + json.dump(metadata, f, indent=2) + + print(f"āœ“ Metadata saved: {metadata_file}") + + return metadata + + except requests.exceptions.RequestException as e: + print(f"āœ— Error: {e}") + return None + except Exception as e: + print(f"āœ— Unexpected error: {e}") + return None + + def get_latest_image_direct(self, source: str = "AIA_171") -> Optional[Dict]: + """ + Alternative method: Fetch from SDO's direct image feed + Uses helioviewer.org's pre-rendered latest images + """ + if source not in self.SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(self.SDO_SOURCES.keys())}") + + print(f"\nFetching latest {source} using direct method...") + + # Map to helioviewer browse image URLs + # These are updated regularly with the latest images + base_url = "https://helioviewer.org/browse/1/" + + # Construct URL based on source + # Example: https://helioviewer.org/browse/1/2024/01/15/171/ + + try: + # First, try to get the image via a simple predictable URL pattern + # Helioviewer provides latest images in predictable locations + + # Use alternative: sunpy or direct JSOC query + # For now, let's use a working alternative approach + + print("Using Helioviewer.org latest image service...") + + # Alternative: construct URL directly to latest image + # Format: https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_0171.jpg + + source_map = { + "AIA_94": "0094", + "AIA_131": "0131", "AIA_171": "0171", + "AIA_193": "0193", + "AIA_211": "0211", + "AIA_304": "0304", + "AIA_335": "0335", + "AIA_1600": "1600", + "AIA_1700": "1700", + "HMI_Continuum": "HMIIC", + "HMI_Magnetogram": "HMII", + } + + if source not in source_map: + print(f"Direct image not available for {source}") + return None + + img_code = source_map[source] + direct_url = f"https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{img_code}.jpg" + + print(f"Fetching from: {direct_url}") + + response = requests.get(direct_url, timeout=30, stream=True) + response.raise_for_status() + + # Save the image + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + filename = f"SDO_{source}_{timestamp}.jpg" + filepath = self.output_dir / filename + + with open(filepath, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"āœ“ Latest image saved: {filepath}") + + # Get last-modified header for observation time + last_modified = response.headers.get('Last-Modified', 'Unknown') + + metadata = { + "source": source, + "wavelength": self.SDO_SOURCES[source]["wavelength"], + "filepath": str(filepath), + "download_time": datetime.now(timezone.utc).isoformat(), + "image_url": direct_url, + "last_modified": last_modified, + "note": "This is the latest available image from NASA SDO" + } + + metadata_file = filepath.with_suffix('.json') + with open(metadata_file, 'w') as f: + json.dump(metadata, f, indent=2) + + print(f"āœ“ Metadata saved: {metadata_file}") + print(f"\n{'='*60}") + print(f"Success! Downloaded latest SDO {source} image") + print(f"Image last updated: {last_modified}") + print(f"{'='*60}\n") + + return metadata + + except requests.exceptions.RequestException as e: + print(f"āœ— Error: {e}") + return None + except Exception as e: + print(f"āœ— Unexpected error: {e}") + return None + + def download_multiple(self, sources: list = None): + """Download multiple wavelengths""" + if sources is None: + sources = ["AIA_171", "AIA_193", "AIA_304", "HMI_Magnetogram"] + + print(f"\nDownloading {len(sources)} different SDO images...") + print("="*60) + + results = [] + for source in sources: + result = self.get_latest_image_direct(source) + if result: + results.append(result) + + print(f"\n{'='*60}") + print(f"Successfully downloaded {len(results)}/{len(sources)} images") + print(f"{'='*60}\n") + + return results + + @staticmethod + def list_sources(): + """List all available sources""" + print("\n" + "="*60) + print("Available SDO Data Sources") + print("="*60) + for key, info in SDOFetcher.SDO_SOURCES.items(): + print(f" {key:20} - {info['name']} ({info['wavelength']})") + print("="*60 + "\n") + + +def main(): + parser = argparse.ArgumentParser( + description="SDO Data Fetcher v2 - Fetch latest solar images from NASA's SDO", + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--source', '-s', default='AIA_171', + help='SDO source (default: AIA_171)') + parser.add_argument('--output', '-o', default='sdo_data', + help='Output directory (default: sdo_data)') + parser.add_argument('--multiple', '-m', action='store_true', + help='Download multiple wavelengths') + parser.add_argument('--list', '-l', action='store_true', + help='List available sources') + + args = parser.parse_args() + + if args.list: + SDOFetcher.list_sources() + return + + fetcher = SDOFetcher(output_dir=args.output) + + if args.multiple: + fetcher.download_multiple() + else: + fetcher.get_latest_image_direct(source=args.source) + + +if __name__ == "__main__": + main() From 6175f2a5c914428b031e8dc59b78e1c8668eeecf Mon Sep 17 00:00:00 2001 From: ep150de Date: Tue, 10 Feb 2026 14:50:31 -0800 Subject: [PATCH 2/3] Fix Unicode encoding error in monitoring daemon creation Fixed UnicodeEncodeError when creating monitoring_daemon.py on Windows. The file now opens with UTF-8 encoding to properly handle Unicode characters. --- tools/sdo_data_fetcher/sdo_advanced_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sdo_data_fetcher/sdo_advanced_examples.py b/tools/sdo_data_fetcher/sdo_advanced_examples.py index 3937107..00f39df 100644 --- a/tools/sdo_data_fetcher/sdo_advanced_examples.py +++ b/tools/sdo_data_fetcher/sdo_advanced_examples.py @@ -238,7 +238,7 @@ def main(): main() ''' - with open("monitoring_daemon.py", 'w') as f: + with open("monitoring_daemon.py", 'w', encoding='utf-8') as f: f.write(script_content) print("\nāœ“ Created 'monitoring_daemon.py'") From b25b239b51f58991c6951bdaa3501326043453f6 Mon Sep 17 00:00:00 2001 From: ep150de Date: Thu, 12 Mar 2026 18:08:04 -0700 Subject: [PATCH 3/3] Add redundant SDO provider fallbacks --- tools/sdo_data_fetcher/README.md | 183 ++++---- .../sdo_data_fetcher/sdo_advanced_examples.py | 22 +- tools/sdo_data_fetcher/sdo_data_fetcher.py | 194 ++++++++ tools/sdo_data_fetcher/sdo_fetcher_v2.py | 207 +-------- tools/sdo_data_fetcher/sdo_provider.py | 424 ++++++++++++++++++ 5 files changed, 733 insertions(+), 297 deletions(-) create mode 100644 tools/sdo_data_fetcher/sdo_data_fetcher.py create mode 100644 tools/sdo_data_fetcher/sdo_provider.py diff --git a/tools/sdo_data_fetcher/README.md b/tools/sdo_data_fetcher/README.md index 4a7dcad..26410d1 100644 --- a/tools/sdo_data_fetcher/README.md +++ b/tools/sdo_data_fetcher/README.md @@ -1,148 +1,133 @@ # šŸŒž SDO Real-Time Data Fetcher -A lightweight Python tool for fetching real-time Solar Dynamics Observatory (SDO) data, designed to complement the Surya foundation model's data pipeline. +A lightweight Python tool for fetching live Solar Dynamics Observatory (SDO) browse imagery for Surya-related experimentation and monitoring. ## Overview -While Surya uses preprocessed SDO data from HuggingFace, this tool enables researchers to: -- Fetch the **latest real-time** SDO observations (updated every 12 seconds) -- Download specific wavelengths for custom analysis -- Monitor solar activity in real-time -- Create custom training datasets from recent observations +While Surya primarily uses preprocessed SDO datasets, this tool provides a simple way to fetch current AIA and HMI observations for: -## Why This Tool? +- real-time solar activity monitoring +- quick data exploration +- recent-event validation workflows +- prototype preprocessing and inference pipelines -The Surya foundation model is trained on SDO data spanning 2011-2019. This tool bridges the gap by providing: -- **Real-time observations** for current solar activity monitoring -- **Quick data exploration** without downloading large preprocessed datasets -- **Custom data collection** for fine-tuning on recent events -- **Educational purposes** for understanding SDO instruments +## What's New -## šŸš€ Quick Start +The downloader now supports redundant live providers so it can continue working when a single upstream host is unavailable. -### Installation +### Provider fallback chain + +By default, the fetchers try providers in this order: + +1. `lmsal` — LMSAL Sun Today browse imagery +2. `jsoc` — Stanford JSOC latest HMI imagery +3. `nasa` — NASA SDO browse imagery +4. `helioviewer` — Helioviewer rendered imagery + +This is available through both `sdo_fetcher_v2.py` and `sdo_data_fetcher.py` with `--provider auto`. + +## Quick Start ```bash cd tools/sdo_data_fetcher pip install -r requirements.txt ``` -### Basic Usage +### Basic usage ```bash -# Fetch latest AIA 171ƅ image (most common for Surya) +# Latest AIA 171 image using automatic fallback python sdo_fetcher_v2.py --source AIA_171 -# Fetch multiple wavelengths used in Surya training -python sdo_fetcher_v2.py --multiple +# Force a specific provider +python sdo_fetcher_v2.py --source AIA_171 --provider lmsal -# List all available SDO sources -python sdo_fetcher_v2.py --list -``` +# Download latest HMI magnetogram from JSOC +python sdo_fetcher_v2.py --source HMI_Magnetogram --provider jsoc -### Integration with Surya +# Download multiple channels +python sdo_fetcher_v2.py --multiple +``` -```python -from sdo_fetcher_v2 import SDOFetcher +### Original fetcher -# Fetch the 8 AIA channels used in Surya -aia_channels = ["AIA_94", "AIA_131", "AIA_171", "AIA_193", - "AIA_211", "AIA_304", "AIA_335", "AIA_1600"] +```bash +python sdo_data_fetcher.py --source AIA_193 --provider auto +``` -fetcher = SDOFetcher(output_dir="surya_inference_data") -results = fetcher.download_multiple(aia_channels) +## Available providers -# Images are now ready for preprocessing and Surya inference -``` +- `auto` +- `lmsal` +- `jsoc` +- `nasa` +- `helioviewer` -## šŸ“” Available Data Sources +## Available data sources -The tool provides access to all SDO/AIA and HMI channels: +### AIA channels -### AIA Channels (used in Surya) -- **AIA 94, 131, 171, 193, 211, 304, 335, 1600** - The 8 AIA channels Surya was trained on -- **AIA 1700** - Additional AIA channel +- `AIA_94` +- `AIA_131` +- `AIA_171` +- `AIA_193` +- `AIA_211` +- `AIA_304` +- `AIA_335` +- `AIA_1600` +- `AIA_1700` -### HMI Channels (used in Surya) -- **HMI Magnetogram** - Magnetic field measurements (5 channels in Surya model) -- **HMI Continuum** - Visible light solar surface +### HMI channels -## šŸ”¬ Use Cases with Surya +- `HMI_Continuum` +- `HMI_Magnetogram` -### 1. Real-Time Solar Activity Monitoring -```bash -# Monitor current solar activity with Surya's primary wavelengths -python sdo_advanced_examples.py # Select option 3: Space Weather Check -``` +## Python example -### 2. Custom Inference on Latest Data ```python -# Fetch latest multi-channel data -fetcher = SDOFetcher(output_dir="latest_obs") -fetcher.download_multiple(["AIA_171", "AIA_193", "AIA_211", "HMI_Magnetogram"]) +from sdo_fetcher_v2 import SDOFetcher -# Preprocess for Surya (user would add their preprocessing pipeline) -# Run Surya inference on current solar conditions -``` +fetcher = SDOFetcher(output_dir="surya_inference_data") +metadata = fetcher.get_latest_image_direct(source="AIA_171", provider="auto") -### 3. Fine-tuning Dataset Creation -```bash -# Continuous monitoring to build recent event datasets -python sdo_advanced_examples.py # Select option 5: Continuous monitoring +if metadata: + print(metadata["filepath"]) + print(metadata["provider_name"]) + print(metadata.get("observation_time")) ``` -### 4. Validation Data Collection -- Fetch observations from specific dates for model validation -- Compare Surya forecasts with actual observations -- Track model performance on recent solar events - -## šŸ“Š Output Format +## Advanced examples -Each download produces: -- **JPG image** (1024Ɨ1024) - Ready for quick visualization -- **JSON metadata** - Observation timestamp and source info +Run the menu-driven helper: -For Surya integration, images can be: -1. Loaded and preprocessed to match Surya's input format (4096Ɨ4096, normalized) -2. Aligned temporally for multi-channel input -3. Used for inference or fine-tuning - -## šŸŽÆ Advanced Features - -The `sdo_advanced_examples.py` script includes: -- Multi-wavelength comparison sets -- Active region monitoring (useful for flare forecasting tasks) -- Space weather assessment -- Continuous monitoring for time-series data collection - -## šŸ”„ Data Pipeline Integration - -``` -Real-Time SDO This Tool Preprocessing Surya Model -Observations → sdo_fetcher_v2.py → (user's code) → Inference -(12s cadence) (download JPGs) (resize, align) (forecasting) +```bash +python sdo_advanced_examples.py ``` -## šŸ“– Documentation - -- **Surya Model**: See main repository README for model architecture and capabilities -- **SDO Mission**: [https://sdo.gsfc.nasa.gov/](https://sdo.gsfc.nasa.gov/) -- **Data Specs**: Images at 1024Ɨ1024 (can be upsampled to Surya's 4096Ɨ4096) +It includes: -## šŸ¤ Contributing +- multi-wavelength comparison downloads +- active region monitoring +- prominence monitoring +- space weather quick checks +- continuous monitoring -This tool is designed to be lightweight and focused on data acquisition. For preprocessing pipelines specific to Surya, please contribute to the main model repository. +## Output -## āš–ļø License +Each download writes: -MIT License - Free for research and educational purposes. +- an image file (`.jpg`, `.gif`, or `.png`, depending on provider) +- a `.json` metadata file containing provider, source, URL, and timing info -## 🌟 Acknowledgments +## Notes -- **NASA/SDO** for open solar data -- **NASA-IMPACT Surya Team** for the foundation model -- Data sourced from NASA's SDO mission servers +- LMSAL provides daily AIA and HMI browse imagery. +- JSOC support is currently most useful for HMI live products. +- Helioviewer remains as an API fallback when browse-image hosts are unavailable. ---- +## References -**Note**: This tool fetches 1024Ɨ1024 images for quick access. For full-resolution 4096Ɨ4096 SDO data as used in Surya training, refer to the HuggingFace datasets in the main repository. +- NASA SDO: https://sdo.gsfc.nasa.gov/ +- LMSAL Sun Today: https://suntoday.lmsal.com/suntoday/ +- JSOC latest HMI: https://jsoc1.stanford.edu/hmi_latest.html +- Helioviewer: https://helioviewer.org/ diff --git a/tools/sdo_data_fetcher/sdo_advanced_examples.py b/tools/sdo_data_fetcher/sdo_advanced_examples.py index 00f39df..f3532e6 100644 --- a/tools/sdo_data_fetcher/sdo_advanced_examples.py +++ b/tools/sdo_data_fetcher/sdo_advanced_examples.py @@ -9,7 +9,7 @@ from sdo_fetcher_v2 import SDOFetcher -def continuous_monitor(interval_seconds=300, sources=None): +def continuous_monitor(interval_seconds=300, sources=None, provider="auto"): """ Continuously monitor and download SDO images at specified intervals @@ -25,6 +25,7 @@ def continuous_monitor(interval_seconds=300, sources=None): print(f"Starting continuous monitoring...") print(f"Sources: {', '.join(sources)}") print(f"Interval: {interval_seconds} seconds") + print(f"Provider mode: {provider}") print(f"Press Ctrl+C to stop\n") iteration = 0 @@ -39,9 +40,9 @@ def continuous_monitor(interval_seconds=300, sources=None): for source in sources: try: - result = fetcher.get_latest_image_direct(source) + result = fetcher.get_latest_image_direct(source, provider=provider) if result: - print(f"āœ“ {source} downloaded successfully") + print(f"āœ“ {source} downloaded successfully via {result.get('provider_name', result.get('provider', 'unknown'))}") else: print(f"āœ— {source} failed") except Exception as e: @@ -75,7 +76,7 @@ def download_comparison_set(): ] fetcher = SDOFetcher(output_dir="comparison_set") - results = fetcher.download_multiple(sources) + results = fetcher.download_multiple(sources, provider="auto") print("\n" + "="*60) print("Comparison Set Complete!") @@ -109,7 +110,7 @@ def download_active_region_set(): ] fetcher = SDOFetcher(output_dir="active_regions") - results = fetcher.download_multiple(sources) + results = fetcher.download_multiple(sources, provider="auto") print("\nActive region monitoring complete!") print("Check these images for:") @@ -134,7 +135,7 @@ def quick_space_weather_check(): sources = ["AIA_193", "HMI_Magnetogram"] print("Downloading key space weather indicators...") - results = fetcher.download_multiple(sources) + results = fetcher.download_multiple(sources, provider="auto") if len(results) == 2: print("\n" + "="*60) @@ -168,7 +169,7 @@ def download_prominence_monitoring(): ] fetcher = SDOFetcher(output_dir="prominences") - results = fetcher.download_multiple(sources) + results = fetcher.download_multiple(sources, provider="auto") print("\nProminence monitoring complete!") print("304ƅ is best for seeing prominences on the solar limb") @@ -218,9 +219,9 @@ def main(): for source in sources: try: - result = fetcher.get_latest_image_direct(source) + result = fetcher.get_latest_image_direct(source, provider="auto") if result: - logging.info(f"āœ“ Downloaded {source}") + logging.info(f"āœ“ Downloaded {source} via {result.get('provider_name', result.get('provider', 'unknown'))}") except Exception as e: logging.error(f"āœ— Failed to download {source}: {e}") @@ -277,7 +278,8 @@ def main(): sources = ["AIA_171"] interval = input("Enter interval in seconds (default 300): ").strip() interval = int(interval) if interval else 300 - continuous_monitor(interval, sources) + provider = input("Enter provider (auto/lmsal/jsoc/nasa/helioviewer, default auto): ").strip() or "auto" + continuous_monitor(interval, sources, provider) elif choice == "6": create_monitoring_script() elif choice == "7": diff --git a/tools/sdo_data_fetcher/sdo_data_fetcher.py b/tools/sdo_data_fetcher/sdo_data_fetcher.py new file mode 100644 index 0000000..af007d9 --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_data_fetcher.py @@ -0,0 +1,194 @@ +""" +SDO (Solar Dynamic Observatory) Data Fetcher + +This script fetches the latest solar images from NASA's Solar Dynamic Observatory +using the Helioviewer API. It supports multiple instruments and wavelengths. +""" + +import requests +from datetime import datetime, timedelta, timezone +import json +from pathlib import Path +from typing import Optional, Dict +import argparse +from sdo_provider import SDOProviderClient, SDO_SOURCES + + +class SDODataFetcher: + """Fetches latest SDO data from Helioviewer API""" + + BASE_URL = "https://api.helioviewer.org/v2/" + SDO_SOURCES = SDO_SOURCES + + def __init__(self, output_dir: str = "sdo_data"): + """Initialize the fetcher with an output directory""" + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + self.provider_client = SDOProviderClient(output_dir=output_dir) + + def get_latest_available_date(self, source: str = "AIA_171", provider: str = "auto") -> Optional[str]: + """Query the API for the latest available SDO observation time""" + timestamp = self.provider_client.get_latest_timestamp(source=source, provider=provider) + if timestamp: + print(f"Latest SDO data available: {timestamp}") + return timestamp + + fallback = (datetime.now(timezone.utc) - timedelta(minutes=30)).strftime("%Y-%m-%dT%H:%M:%S.000Z") + print(f"Using fallback date: {fallback}") + return fallback + + def get_latest_image(self, source: str = "AIA_171", image_scale: float = 2.4, provider: str = "auto") -> Optional[Dict]: + """ + Fetch the latest SDO image + + Args: + source: SDO source identifier (e.g., 'AIA_171', 'HMI_Magnetogram') + image_scale: Resolution in arcseconds per pixel (lower = higher resolution) + + Returns: + Dictionary with image metadata and file path + """ + _ = image_scale + if source not in self.SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(self.SDO_SOURCES.keys())}") + + print(f"Fetching latest {source} data...") + print(f"Description: {self.SDO_SOURCES[source]['description']}") + return self.provider_client.download_latest_image(source=source, provider=provider) + + def get_latest_data_timestamp(self, source: str = "AIA_171", provider: str = "auto") -> Optional[str]: + """Get the timestamp of the latest available SDO data""" + try: + return self.provider_client.get_latest_timestamp(source=source, provider=provider) + except requests.exceptions.RequestException as e: + print(f"Error getting latest timestamp: {e}") + return None + + def download_multiple_wavelengths(self, sources: list = None, provider: str = "auto"): + """ + Download images from multiple SDO sources + + Args: + sources: List of source identifiers (defaults to common wavelengths) + """ + if sources is None: + sources = ["AIA_171", "AIA_193", "AIA_304", "HMI_Magnetogram"] + + results = [] + for source in sources: + print(f"\n{'='*60}") + result = self.get_latest_image(source, provider=provider) + if result: + results.append(result) + + print(f"\n{'='*60}") + print(f"Downloaded {len(results)} images successfully!") + return results + + @staticmethod + def list_available_sources(): + """Print all available SDO sources""" + print("\nAvailable SDO Data Sources:") + print("="*60) + for key, value in SDODataFetcher.SDO_SOURCES.items(): + print(f"{key:20} - {value['description']}") + print("\nAvailable providers:") + SDOProviderClient.list_providers() + + +def main(): + """Main CLI interface""" + parser = argparse.ArgumentParser( + description="Fetch latest SDO (Solar Dynamic Observatory) data", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Download latest AIA 171 ƅ image + python sdo_data_fetcher.py + + # Download specific wavelength + python sdo_data_fetcher.py --source AIA_304 + + # Download multiple wavelengths + python sdo_data_fetcher.py --multiple + + # List all available sources + python sdo_data_fetcher.py --list + + # Download to specific directory + python sdo_data_fetcher.py --output my_sdo_images + """ + ) + + parser.add_argument( + '--source', '-s', + type=str, + default='AIA_171', + help='SDO source to fetch (default: AIA_171)' + ) + + parser.add_argument( + '--output', '-o', + type=str, + default='sdo_data', + help='Output directory for downloaded data (default: sdo_data)' + ) + + parser.add_argument( + '--scale', + type=float, + default=2.4, + help='Image scale in arcseconds per pixel (default: 2.4)' + ) + + parser.add_argument( + '--multiple', '-m', + action='store_true', + help='Download multiple common wavelengths' + ) + + parser.add_argument( + '--list', '-l', + action='store_true', + help='List all available SDO sources' + ) + + parser.add_argument( + '--timestamp', '-t', + action='store_true', + help='Get the timestamp of latest available data' + ) + + parser.add_argument( + '--provider', '-p', + type=str, + default='auto', + help='Data provider: auto, lmsal, jsoc, nasa, helioviewer' + ) + + args = parser.parse_args() + + # List available sources and exit + if args.list: + SDODataFetcher.list_available_sources() + return + + # Initialize fetcher + fetcher = SDODataFetcher(output_dir=args.output) + + # Get latest timestamp + if args.timestamp: + timestamp = fetcher.get_latest_data_timestamp(source=args.source, provider=args.provider) + if timestamp: + print(f"Latest SDO data available at: {timestamp}") + return + + # Download data + if args.multiple: + fetcher.download_multiple_wavelengths(provider=args.provider) + else: + fetcher.get_latest_image(source=args.source, image_scale=args.scale, provider=args.provider) + + +if __name__ == "__main__": + main() diff --git a/tools/sdo_data_fetcher/sdo_fetcher_v2.py b/tools/sdo_data_fetcher/sdo_fetcher_v2.py index 8f42b0a..7e4aaa1 100644 --- a/tools/sdo_data_fetcher/sdo_fetcher_v2.py +++ b/tools/sdo_data_fetcher/sdo_fetcher_v2.py @@ -9,33 +9,20 @@ from pathlib import Path from typing import Optional, Dict import argparse +from sdo_provider import SDOProviderClient, SDO_SOURCES class SDOFetcher: """Simplified SDO data fetcher using Helioviewer's latest images""" - # Helioviewer provides pre-generated latest images - LATEST_IMAGE_BASE = "https://api.helioviewer.org/v2/getJP2Image/" - - SDO_SOURCES = { - "AIA_94": {"sourceId": 13, "name": "AIA 94", "wavelength": "94ƅ"}, - "AIA_131": {"sourceId": 14, "name": "AIA 131", "wavelength": "131ƅ"}, - "AIA_171": {"sourceId": 15, "name": "AIA 171", "wavelength": "171ƅ"}, - "AIA_193": {"sourceId": 16, "name": "AIA 193", "wavelength": "193ƅ"}, - "AIA_211": {"sourceId": 17, "name": "AIA 211", "wavelength": "211ƅ"}, - "AIA_304": {"sourceId": 18, "name": "AIA 304", "wavelength": "304ƅ"}, - "AIA_335": {"sourceId": 19, "name": "AIA 335", "wavelength": "335ƅ"}, - "AIA_1600": {"sourceId": 20, "name": "AIA 1600", "wavelength": "1600ƅ"}, - "AIA_1700": {"sourceId": 21, "name": "AIA 1700", "wavelength": "1700ƅ"}, - "HMI_Continuum": {"sourceId": 22, "name": "HMI Continuum", "wavelength": "Continuum"}, - "HMI_Magnetogram": {"sourceId": 23, "name": "HMI Magnetogram", "wavelength": "Magnetogram"}, - } + SDO_SOURCES = SDO_SOURCES def __init__(self, output_dir: str = "sdo_data"): self.output_dir = Path(output_dir) self.output_dir.mkdir(exist_ok=True) + self.provider_client = SDOProviderClient(output_dir=output_dir) - def get_latest_image_png(self, source: str = "AIA_171") -> Optional[Dict]: + def get_latest_image_png(self, source: str = "AIA_171", provider: str = "auto") -> Optional[Dict]: """ Fetch latest SDO image as PNG using a simpler method @@ -45,183 +32,24 @@ def get_latest_image_png(self, source: str = "AIA_171") -> Optional[Dict]: Returns: Dictionary with metadata and filepath """ - if source not in self.SDO_SOURCES: - raise ValueError(f"Invalid source. Choose from: {list(self.SDO_SOURCES.keys())}") - - source_id = self.SDO_SOURCES[source]["sourceId"] - - print(f"\nFetching latest {source} image...") - print(f"Wavelength: {self.SDO_SOURCES[source]['wavelength']}") - - try: - # Use Helioviewer's getClosestImage API - api_url = "https://api.helioviewer.org/v2/getClosestImage/" - - # Request latest available image (use a recent date) - params = { - "date": "2024-01-15T12:00:00Z", # Use a known good date - "sourceId": source_id - } - - print("Querying for latest available observation...") - response = requests.get(api_url, params=params, timeout=15) - response.raise_for_status() - - image_info = response.json() - observation_date = image_info.get("date", "unknown") - image_id = image_info.get("id") - - print(f"Found observation from: {observation_date}") - print(f"Image ID: {image_id}") - - # Now get the actual image using getTile - tile_url = "https://api.helioviewer.org/v2/getTile/" - tile_params = { - "id": image_id, - "x": 0, - "y": 0, - "imageScale": 2.4, - "display": "true" - } - - print("Downloading image...") - img_response = requests.get(tile_url, params=tile_params, timeout=30, stream=True) - img_response.raise_for_status() - - # Save the image - timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") - filename = f"SDO_{source}_{timestamp}.png" - filepath = self.output_dir / filename - - with open(filepath, 'wb') as f: - for chunk in img_response.iter_content(chunk_size=8192): - f.write(chunk) - - print(f"āœ“ Image saved: {filepath}") - - # Save metadata - metadata = { - "source": source, - "wavelength": self.SDO_SOURCES[source]["wavelength"], - "observation_date": observation_date, - "image_id": image_id, - "filepath": str(filepath), - "download_time": datetime.now(timezone.utc).isoformat() - } - - metadata_file = filepath.with_suffix('.json') - with open(metadata_file, 'w') as f: - json.dump(metadata, f, indent=2) - - print(f"āœ“ Metadata saved: {metadata_file}") - - return metadata - - except requests.exceptions.RequestException as e: - print(f"āœ— Error: {e}") - return None - except Exception as e: - print(f"āœ— Unexpected error: {e}") - return None + return self.provider_client.download_latest_image(source=source, provider=provider) - def get_latest_image_direct(self, source: str = "AIA_171") -> Optional[Dict]: + def get_latest_image_direct(self, source: str = "AIA_171", provider: str = "auto") -> Optional[Dict]: """ Alternative method: Fetch from SDO's direct image feed Uses helioviewer.org's pre-rendered latest images """ - if source not in self.SDO_SOURCES: - raise ValueError(f"Invalid source. Choose from: {list(self.SDO_SOURCES.keys())}") - - print(f"\nFetching latest {source} using direct method...") - - # Map to helioviewer browse image URLs - # These are updated regularly with the latest images - base_url = "https://helioviewer.org/browse/1/" - - # Construct URL based on source - # Example: https://helioviewer.org/browse/1/2024/01/15/171/ - - try: - # First, try to get the image via a simple predictable URL pattern - # Helioviewer provides latest images in predictable locations - - # Use alternative: sunpy or direct JSOC query - # For now, let's use a working alternative approach - - print("Using Helioviewer.org latest image service...") - - # Alternative: construct URL directly to latest image - # Format: https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_0171.jpg - - source_map = { - "AIA_94": "0094", - "AIA_131": "0131", "AIA_171": "0171", - "AIA_193": "0193", - "AIA_211": "0211", - "AIA_304": "0304", - "AIA_335": "0335", - "AIA_1600": "1600", - "AIA_1700": "1700", - "HMI_Continuum": "HMIIC", - "HMI_Magnetogram": "HMII", - } - - if source not in source_map: - print(f"Direct image not available for {source}") - return None - - img_code = source_map[source] - direct_url = f"https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{img_code}.jpg" - - print(f"Fetching from: {direct_url}") - - response = requests.get(direct_url, timeout=30, stream=True) - response.raise_for_status() - - # Save the image - timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") - filename = f"SDO_{source}_{timestamp}.jpg" - filepath = self.output_dir / filename - - with open(filepath, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - - print(f"āœ“ Latest image saved: {filepath}") - - # Get last-modified header for observation time - last_modified = response.headers.get('Last-Modified', 'Unknown') - - metadata = { - "source": source, - "wavelength": self.SDO_SOURCES[source]["wavelength"], - "filepath": str(filepath), - "download_time": datetime.now(timezone.utc).isoformat(), - "image_url": direct_url, - "last_modified": last_modified, - "note": "This is the latest available image from NASA SDO" - } - - metadata_file = filepath.with_suffix('.json') - with open(metadata_file, 'w') as f: - json.dump(metadata, f, indent=2) - - print(f"āœ“ Metadata saved: {metadata_file}") + result = self.provider_client.download_latest_image(source=source, provider=provider) + if result: print(f"\n{'='*60}") print(f"Success! Downloaded latest SDO {source} image") - print(f"Image last updated: {last_modified}") + print(f"Provider: {result.get('provider_name', result.get('provider', 'unknown'))}") + if result.get("observation_time"): + print(f"Observation time: {result['observation_time']}") print(f"{'='*60}\n") - - return metadata - - except requests.exceptions.RequestException as e: - print(f"āœ— Error: {e}") - return None - except Exception as e: - print(f"āœ— Unexpected error: {e}") - return None + return result - def download_multiple(self, sources: list = None): + def download_multiple(self, sources: list = None, provider: str = "auto"): """Download multiple wavelengths""" if sources is None: sources = ["AIA_171", "AIA_193", "AIA_304", "HMI_Magnetogram"] @@ -231,7 +59,7 @@ def download_multiple(self, sources: list = None): results = [] for source in sources: - result = self.get_latest_image_direct(source) + result = self.get_latest_image_direct(source, provider=provider) if result: results.append(result) @@ -266,19 +94,22 @@ def main(): help='Download multiple wavelengths') parser.add_argument('--list', '-l', action='store_true', help='List available sources') + parser.add_argument('--provider', '-p', default='auto', + help='Data provider: auto, lmsal, jsoc, nasa, helioviewer') args = parser.parse_args() if args.list: SDOFetcher.list_sources() + SDOProviderClient.list_providers() return fetcher = SDOFetcher(output_dir=args.output) if args.multiple: - fetcher.download_multiple() + fetcher.download_multiple(provider=args.provider) else: - fetcher.get_latest_image_direct(source=args.source) + fetcher.get_latest_image_direct(source=args.source, provider=args.provider) if __name__ == "__main__": diff --git a/tools/sdo_data_fetcher/sdo_provider.py b/tools/sdo_data_fetcher/sdo_provider.py new file mode 100644 index 0000000..df7bdc4 --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_provider.py @@ -0,0 +1,424 @@ +""" +Shared SDO data provider logic with automatic fallback. +""" + +from datetime import datetime, timedelta, timezone +import json +from pathlib import Path +from typing import Dict, Iterable, Optional + +import requests + + +SDO_SOURCES = { + "AIA_94": { + "sourceId": 13, + "name": "AIA 94", + "wavelength": "94ƅ", + "description": "AIA 94 ƅ - Hot flare plasma", + "nasa_code": "0094", + "lmsal_code": "0094", + }, + "AIA_131": { + "sourceId": 14, + "name": "AIA 131", + "wavelength": "131ƅ", + "description": "AIA 131 ƅ - Flaring regions", + "nasa_code": "0131", + "lmsal_code": "0131", + }, + "AIA_171": { + "sourceId": 15, + "name": "AIA 171", + "wavelength": "171ƅ", + "description": "AIA 171 ƅ - Quiet corona and coronal loops", + "nasa_code": "0171", + "lmsal_code": "0171", + }, + "AIA_193": { + "sourceId": 16, + "name": "AIA 193", + "wavelength": "193ƅ", + "description": "AIA 193 ƅ - Hot plasma in active regions", + "nasa_code": "0193", + "lmsal_code": "0193", + }, + "AIA_211": { + "sourceId": 17, + "name": "AIA 211", + "wavelength": "211ƅ", + "description": "AIA 211 ƅ - Active regions", + "nasa_code": "0211", + "lmsal_code": "0211", + }, + "AIA_304": { + "sourceId": 18, + "name": "AIA 304", + "wavelength": "304ƅ", + "description": "AIA 304 ƅ - Chromosphere and prominence", + "nasa_code": "0304", + "lmsal_code": "0304", + }, + "AIA_335": { + "sourceId": 19, + "name": "AIA 335", + "wavelength": "335ƅ", + "description": "AIA 335 ƅ - Active regions", + "nasa_code": "0335", + "lmsal_code": "0335", + }, + "AIA_1600": { + "sourceId": 20, + "name": "AIA 1600", + "wavelength": "1600ƅ", + "description": "AIA 1600 ƅ - Upper photosphere", + "nasa_code": "1600", + "lmsal_code": "1600", + }, + "AIA_1700": { + "sourceId": 21, + "name": "AIA 1700", + "wavelength": "1700ƅ", + "description": "AIA 1700 ƅ - Temperature minimum", + "nasa_code": "1700", + "lmsal_code": "1700", + }, + "HMI_Continuum": { + "sourceId": 22, + "name": "HMI Continuum", + "wavelength": "Continuum", + "description": "HMI Continuum - Solar surface", + "nasa_code": "HMIIC", + "lmsal_code": "_HMI_cont_aiascale", + "jsoc_path": "/data/hmi/images/latest/HMI_latest_Int_1024x1024.gif", + "jsoc_timestamp_key": "continuum", + }, + "HMI_Magnetogram": { + "sourceId": 23, + "name": "HMI Magnetogram", + "wavelength": "Magnetogram", + "description": "HMI Magnetogram - Magnetic field", + "nasa_code": "HMII", + "lmsal_code": "_HMImag", + "jsoc_path": "/data/hmi/images/latest/HMI_latest_Mag_1024x1024.gif", + "jsoc_timestamp_key": "magnetogram", + }, +} + + +PROVIDER_LABELS = { + "lmsal": "LMSAL Sun Today", + "jsoc": "Stanford JSOC", + "nasa": "NASA SDO", + "helioviewer": "Helioviewer API", +} + + +AUTO_PROVIDER_ORDER = ("lmsal", "jsoc", "nasa", "helioviewer") + + +class SDOProviderClient: + """Download latest SDO imagery from multiple redundant providers.""" + + def __init__(self, output_dir: str = "sdo_data"): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + self.session = requests.Session() + + def download_latest_image(self, source: str = "AIA_171", provider: str = "auto") -> Optional[Dict]: + """Download the latest image using the requested provider or fallback chain.""" + if source not in SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(SDO_SOURCES.keys())}") + + provider_order = self._resolve_provider_order(provider) + + print(f"\nFetching latest {source} image...") + print(f"Wavelength: {SDO_SOURCES[source]['wavelength']}") + print(f"Provider order: {', '.join(provider_order)}") + + last_error = None + + for provider_name in provider_order: + try: + print(f"Trying provider: {PROVIDER_LABELS[provider_name]}") + result = getattr(self, f"_download_from_{provider_name}")(source) + if result: + return result + except requests.exceptions.RequestException as e: + last_error = e + print(f"Provider {provider_name} failed: {e}") + except Exception as e: + last_error = e + print(f"Provider {provider_name} failed unexpectedly: {e}") + + if last_error: + print(f"All providers failed. Last error: {last_error}") + else: + print("All providers failed.") + return None + + def get_latest_timestamp(self, source: str = "AIA_171", provider: str = "auto") -> Optional[str]: + """Best-effort timestamp lookup using the same provider order.""" + if source not in SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(SDO_SOURCES.keys())}") + + for provider_name in self._resolve_provider_order(provider): + try: + timestamp = getattr(self, f"_timestamp_from_{provider_name}")(source) + if timestamp: + return timestamp + except Exception: + continue + return None + + @staticmethod + def list_providers(): + """Print available provider names.""" + print("\nAvailable data providers:") + print("=" * 60) + print("auto - Automatic fallback chain") + for key, label in PROVIDER_LABELS.items(): + print(f"{key:12} - {label}") + + def _resolve_provider_order(self, provider: str) -> Iterable[str]: + provider = provider.lower() + if provider == "auto": + return AUTO_PROVIDER_ORDER + if provider not in PROVIDER_LABELS: + raise ValueError(f"Invalid provider. Choose from: auto, {', '.join(PROVIDER_LABELS.keys())}") + return (provider,) + + def _download_from_lmsal(self, source: str) -> Optional[Dict]: + code = SDO_SOURCES[source].get("lmsal_code") + if not code: + return None + + for day_offset in range(0, 4): + candidate_date = datetime.now(timezone.utc) - timedelta(days=day_offset) + date_path = candidate_date.strftime("%Y/%m/%d") + urls = [ + f"http://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + f"https://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + ] + + for url in urls: + try: + response = self.session.get(url, timeout=30, stream=True) + if response.status_code == 404: + response.close() + continue + + response.raise_for_status() + return self._save_response( + response=response, + source=source, + provider="lmsal", + image_url=url, + extension=".jpg", + observation_time=response.headers.get("Last-Modified") or candidate_date.strftime("%Y-%m-%d"), + extra_metadata={"date_path": date_path}, + ) + except requests.exceptions.RequestException: + continue + + return None + + def _download_from_jsoc(self, source: str) -> Optional[Dict]: + jsoc_path = SDO_SOURCES[source].get("jsoc_path") + if not jsoc_path: + return None + + url = f"https://jsoc1.stanford.edu{jsoc_path}" + response = self.session.get(url, timeout=30, stream=True) + response.raise_for_status() + + return self._save_response( + response=response, + source=source, + provider="jsoc", + image_url=url, + extension=Path(jsoc_path).suffix or ".img", + observation_time=self._timestamp_from_jsoc(source), + ) + + def _download_from_nasa(self, source: str) -> Optional[Dict]: + nasa_code = SDO_SOURCES[source].get("nasa_code") + if not nasa_code: + return None + + urls = [ + f"http://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + f"https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + ] + + for url in urls: + try: + response = self.session.get(url, timeout=30, stream=True) + response.raise_for_status() + + return self._save_response( + response=response, + source=source, + provider="nasa", + image_url=url, + extension=".jpg", + observation_time=response.headers.get("Last-Modified"), + ) + except requests.exceptions.RequestException: + continue + + return None + + def _download_from_helioviewer(self, source: str) -> Optional[Dict]: + source_id = SDO_SOURCES[source]["sourceId"] + info_url = "https://api.helioviewer.org/v2/getClosestImage/" + info_params = { + "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "sourceId": source_id, + } + + info_response = self.session.get(info_url, params=info_params, timeout=30) + info_response.raise_for_status() + image_info = info_response.json() + image_id = image_info.get("id") + + if not image_id: + return None + + tile_url = "https://api.helioviewer.org/v2/getTile/" + tile_params = { + "id": image_id, + "x": 0, + "y": 0, + "imageScale": 2.4, + "display": "true", + } + response = self.session.get(tile_url, params=tile_params, timeout=30, stream=True) + response.raise_for_status() + + return self._save_response( + response=response, + source=source, + provider="helioviewer", + image_url=response.url, + extension=".png", + observation_time=image_info.get("date"), + extra_metadata={"image_id": image_id}, + ) + + def _timestamp_from_lmsal(self, source: str) -> Optional[str]: + code = SDO_SOURCES[source].get("lmsal_code") + if not code: + return None + + for day_offset in range(0, 4): + candidate_date = datetime.now(timezone.utc) - timedelta(days=day_offset) + date_path = candidate_date.strftime("%Y/%m/%d") + urls = [ + f"http://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + f"https://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + ] + for url in urls: + try: + response = self.session.head(url, timeout=15) + if response.status_code == 200: + return response.headers.get("Last-Modified") or candidate_date.strftime("%Y-%m-%d") + except requests.exceptions.RequestException: + continue + return None + + def _timestamp_from_jsoc(self, source: str) -> Optional[str]: + timestamp_key = SDO_SOURCES[source].get("jsoc_timestamp_key") + if not timestamp_key: + return None + + url = "https://jsoc1.stanford.edu/data/hmi/images/latest/image_times_UTC" + response = self.session.get(url, timeout=15) + response.raise_for_status() + + for line in response.text.splitlines(): + if ":" not in line: + continue + key, value = line.split(":", 1) + if key.strip().lower() == timestamp_key: + return value.strip() + return None + + def _timestamp_from_nasa(self, source: str) -> Optional[str]: + nasa_code = SDO_SOURCES[source].get("nasa_code") + if not nasa_code: + return None + + urls = [ + f"http://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + f"https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + ] + + for url in urls: + try: + response = self.session.head(url, timeout=15) + response.raise_for_status() + return response.headers.get("Last-Modified") + except requests.exceptions.RequestException: + continue + + return None + + def _timestamp_from_helioviewer(self, source: str) -> Optional[str]: + source_id = SDO_SOURCES[source]["sourceId"] + response = self.session.get( + "https://api.helioviewer.org/v2/getClosestImage/", + params={ + "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "sourceId": source_id, + }, + timeout=15, + ) + response.raise_for_status() + return response.json().get("date") + + def _save_response( + self, + response: requests.Response, + source: str, + provider: str, + image_url: str, + extension: str, + observation_time: Optional[str] = None, + extra_metadata: Optional[Dict] = None, + ) -> Dict: + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + filepath = self.output_dir / f"SDO_{source}_{timestamp}{extension}" + + with open(filepath, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + metadata = { + "source": source, + "name": SDO_SOURCES[source]["name"], + "wavelength": SDO_SOURCES[source]["wavelength"], + "description": SDO_SOURCES[source]["description"], + "provider": provider, + "provider_name": PROVIDER_LABELS[provider], + "filepath": str(filepath), + "download_time": datetime.now(timezone.utc).isoformat(), + "image_url": image_url, + "observation_time": observation_time, + "content_type": response.headers.get("Content-Type"), + "last_modified": response.headers.get("Last-Modified"), + } + + if extra_metadata: + metadata.update(extra_metadata) + + metadata_file = filepath.with_suffix(".json") + with open(metadata_file, "w", encoding="utf-8") as f: + json.dump(metadata, f, indent=2) + + print(f"āœ“ Image saved: {filepath}") + print(f"āœ“ Metadata saved: {metadata_file}") + print(f"āœ“ Provider used: {PROVIDER_LABELS[provider]}") + + return metadata \ No newline at end of file