Source code for swvo.io.plasmasphere.read_plasmasphere_combined_inputs
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
#
# SPDX-License-Identifier: Apache-2.0
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
import pandas as pd
logger = logging.getLogger(__name__)
[docs]
class PlasmasphereCombinedInputsReader:
"""Reads one of the available combined inputs for plasmasphere density prediction.
Parameters
----------
folder : str
The folder where the combined inputs files are stored.
Raises
------
FileNotFoundError
If the data folder does not exist.
RuntimeError
If the source of data requested is not among the available ones.
"""
ENV_VAR_NAME = "PLASMASPHERE_COMBINED_INPUTS_DIR"
LABEL = "plasmsphere_combined_inputs"
def __init__(self, data_dir: Optional[Path] = None) -> None:
if data_dir is None:
if self.ENV_VAR_NAME not in os.environ:
raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]
self.data_dir: Path = Path(data_dir) # ty:ignore[invalid-argument-type]
logger.info(f"Plasmasphere combined inputs directory: {self.data_dir}")
if not self.data_dir.exists():
msg = f"Plasmasphere combined inputs directory {self.data_dir} does not exist! Impossible to retrive data!"
logger.error(msg)
raise FileNotFoundError(msg)
def _read_single_file(self, date: datetime, source: str) -> pd.DataFrame | None:
"""Read a single file from the specified folder for the given date and source.
Parameters
----------
date : datetime
date of the plasmasphere prediction we want to read
source : str
source of the combined input we want to read. Available "kp" and "solar_wind"
Returns
-------
pd.DataFrame or None
pandas.DataFrame with the data read from the file, or None if the file does not exist.
"""
file_name = f"combined_inputs/{source}/{source}_{date.year}{str(date.month).zfill(2)}{str(date.day).zfill(2)}T{str(date.hour).zfill(2)}00.csv"
file_path = os.path.join(self.data_dir, file_name)
logger.info(f"Looking for file {file_path} for source {source} and date {date}")
if not os.path.isfile(file_path):
msg = f"No suitable files found in the folder {self.data_dir} for the requested date {date}"
logger.warning(msg)
return None
if source == "solar_wind":
data = pd.read_csv(file_path, parse_dates=["date"])
data["t"] = data["date"]
data.drop(labels=["date"], axis=1, inplace=True)
if source == "kp":
data = pd.read_csv(file_path, parse_dates=["t"])
return data
[docs]
def read(self, source: str, requested_date: datetime | None = None) -> pd.DataFrame | None:
"""Read one of the available combined inputs for plasmasphere density prediction.
Parameters
----------
source : str
The source of combined input requested. Available "kp" and "solar_wind".
requested_date : datetime | None, optional
Date of combined input thar we want to read up to hour precision, by default None which means current date.
Returns
-------
pd.DataFrame|None
pandas.DataFrame with the data read from the file, or None if the file does not exist.
Raises
------
RuntimeError
If the source of data requested is not among the available ones.
"""
if requested_date is None:
requested_date = datetime.now(timezone.utc).replace(microsecond=0, minute=0, second=0)
if source == "kp":
requested_date = requested_date.replace(minute=0, second=0, microsecond=0)
return self._read_single_file(requested_date, "kp")
elif source == "solar_wind":
requested_date = requested_date.replace(minute=0, second=0, microsecond=0)
return self._read_single_file(requested_date, "solar_wind")
else:
msg = f"Combined input {source} requested not available..."
logger.error(msg)
raise RuntimeError(msg)