Source code for swvo.io.dst.omni
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
#
# SPDX-License-Identifier: Apache-2.0
"""
Module for handling OMNI Dst data.
"""
from __future__ import annotations
import logging
import warnings
from datetime import datetime, timedelta, timezone
from pathlib import Path
import numpy as np
import pandas as pd
from swvo.io.omni import OMNILowRes
from swvo.io.utils import enforce_utc_timezone
logger = logging.getLogger(__name__)
logging.captureWarnings(True)
[docs]
class DSTOMNI(OMNILowRes):
"""
Class for reading F10.7 data from OMNI DST files.
Inherits the `download_and_process`, other private methods and attributes from OMNILowRes.
"""
# data is downloaded along with OMNI data, check file name in parent class
[docs]
def read(self, start_time: datetime, end_time: datetime, download: bool = False) -> pd.DataFrame:
"""
Read OMNI DST data for the given time range.
Parameters
----------
start_time : datetime
Start time of the data to read. Must be timezone-aware.
end_time : datetime
End time of the data to read. Must be timezone-aware.
download : bool, optional
Download data on the go, defaults to False.
Returns
-------
:class:`pandas.DataFrame`
OMNI DST data.
"""
if start_time > end_time:
msg = "start_time must be before end_time"
logger.error(msg)
raise ValueError(msg)
start_time = enforce_utc_timezone(start_time)
end_time = enforce_utc_timezone(end_time)
file_paths, _ = self._get_processed_file_list(start_time, end_time)
t = pd.date_range(
datetime(start_time.year, start_time.month, start_time.day),
datetime(end_time.year, end_time.month, end_time.day, 23, 00, 00),
freq=timedelta(hours=1),
tz=timezone.utc,
)
data_out = pd.DataFrame(index=t)
data_out["dst"] = np.array([np.nan] * len(t))
data_out["file_name"] = np.array([None] * len(t))
for file_path in file_paths:
if not file_path.exists():
if download:
self.download_and_process(start_time, end_time)
else:
warnings.warn(f"File {file_path} not found")
continue
df_one_file = self._read_single_file(file_path)
data_out = df_one_file.combine_first(data_out)
data_out = data_out.truncate(
before=start_time - timedelta(hours=0.9999),
after=end_time + timedelta(hours=0.9999),
)
data_out.index.name = "t"
data_out.drop(columns=["timestamp", "t"], inplace=True, errors="ignore")
return data_out
def _read_single_file(self, file_path: Path) -> pd.DataFrame:
"""Read yearly OMNI DST file to a DataFrame.
Parameters
----------
file_path : Path
Path to the file.
Returns
-------
pd.DataFrame
Data from yearly OMNI DST file.
"""
df = pd.read_csv(file_path)
df.drop(columns=["kp", "f107"], inplace=True)
df["t"] = pd.to_datetime(df["timestamp"], utc=True)
df.index = df["t"]
df["file_name"] = file_path
df.loc[df["dst"].isna(), "file_name"] = None
return df