Source code for speasy.webservices.cda

# -*- coding: utf-8 -*-

"""CDA_Webservice package for Space Physics WebServices Client."""

__author__ = """Alexis Jeandet"""
__email__ = 'alexis.jeandet@member.fsf.org'
__version__ = '0.1.0'

import logging
import re
from datetime import datetime, timedelta
from typing import Dict, Optional, Tuple

from speasy.core import AllowedKwargs
from speasy.core import any_files, http, url_utils
from speasy.core import cdf
from speasy.core.cache import CACHE_ALLOWED_KWARGS, UnversionedProviderCache
from speasy.core.dataprovider import (GET_DATA_ALLOWED_KWARGS, DataProvider,
                                      ParameterRangeCheck)
from speasy.core.datetime_range import DateTimeRange
from speasy.core.inventory.indexes import (DatasetIndex, ParameterIndex,
                                           SpeasyIndex)
from speasy.core.proxy import PROXY_ALLOWED_KWARGS, GetProduct, Proxyfiable
from speasy.core.requests_scheduling import SplitLargeRequests
from speasy.products.variable import SpeasyVariable

log = logging.getLogger(__name__)

_burst_regex = re.compile("(.*MMS.*FPI.*BRST.*|.*MMS.*SCM.*BRST.*)")


def _is_burst_product(product: ParameterIndex or str) -> bool:
    if isinstance(product, ParameterIndex):
        product = product.spz_uid()
    return bool(_burst_regex.match(str(product)))


def _large_request_max_duration(product):
    if _is_burst_product(product):
        return timedelta(hours=2)
    else:
        return timedelta(days=7)


def _cache_fragment_size(product):
    if _is_burst_product(product):
        return 2
    else:
        return 12


[docs] class CdaWebException(BaseException): def __init__(self, text): super(CdaWebException, self).__init__(text)
[docs] def get_parameter_args(start_time: datetime, stop_time: datetime, product: str, **kwargs): return {'path': f"cdaweb/{product}", 'start_time': f'{start_time.isoformat()}', 'stop_time': f'{stop_time.isoformat()}'}
[docs] class CDA_Webservice(DataProvider): BASE_URL = "https://cdaweb.gsfc.nasa.gov" def __init__(self): self.__url = f"{self.BASE_URL}/WS/cdasr/1" DataProvider.__init__(self, provider_name='cda', provider_alt_names=['cdaweb'])
[docs] def build_inventory(self, root: SpeasyIndex): from ._inventory_builder import build_inventory root = build_inventory(root=root) return root
[docs] def parameter_range(self, parameter_id: str or ParameterIndex) -> Optional[DateTimeRange]: """Get product time range. Parameters ---------- parameter_id: str or ParameterIndex parameter id Returns ------- Optional[DateTimeRange] Data time range Examples -------- >>> import speasy as spz >>> spz.cda.parameter_range("AC_H0_MFI/BGSEc") <DateTimeRange: 1997-09-02T00:00:12+00:00 -> ...> """ return self._parameter_range(parameter_id)
[docs] def dataset_range(self, dataset_id: str or DatasetIndex) -> Optional[DateTimeRange]: """Get product time range. Parameters ---------- dataset_id: str or DatasetIndex parameter id Returns ------- Optional[DateTimeRange] Data time range Examples -------- >>> import speasy as spz >>> spz.cda.dataset_range("AC_H0_MFI") <DateTimeRange: 1997-09-02T00:00:12+00:00 -> ...> """ return self._dataset_range(dataset_id)
def _to_dataset_and_variable(self, index_or_str: ParameterIndex or str) -> Tuple[str, str]: if isinstance(index_or_str, ParameterIndex): index_or_str = index_or_str.spz_uid() if type(index_or_str) is str: if '/' in index_or_str: parts = index_or_str.split('/') if len(parts) == 2: return parts[0], parts[1] for pos in range(1, len(parts)): ds = '/'.join(parts[:pos]) var = '/'.join(parts[pos:]) if (ds in self.flat_inventory.datasets) and (index_or_str in self.flat_inventory.parameters): return ds, var raise ValueError( f"Given string is ambiguous, it contains several '/', tried all combinations but failed to find a matching dataset/variable pair in inventory: {index_or_str}") raise ValueError(f"Given string does not look like a CDA dataset/variable pair: {index_or_str}") raise TypeError(f"Wrong type for {index_or_str}, expecting a string or a SpeasyIndex, got {type(index_or_str)}") def _dl_variable(self, dataset: str, variable: str, start_time: datetime, stop_time: datetime, if_newer_than: datetime or None = None, extra_http_headers: Dict or None = None) -> Optional[ SpeasyVariable]: start_time, stop_time = start_time.strftime('%Y%m%dT%H%M%SZ'), stop_time.strftime('%Y%m%dT%H%M%SZ') fmt = "cdf" url = f"{self.__url}/dataviews/sp_phys/datasets/{url_utils.quote(dataset, safe='')}/data/{start_time},{stop_time}/{url_utils.quote(variable, safe='')}?format={fmt}" headers = {"Accept": "application/json"} if if_newer_than is not None: headers["If-Modified-Since"] = if_newer_than.ctime() if extra_http_headers is not None: headers.update(extra_http_headers) resp = http.get(url, headers=headers) log.debug(resp.url) if resp.status_code == 200 and 'FileDescription' in resp.json(): return cdf.load_variable(file=resp.json()['FileDescription'][0]['Name'], variable=variable) elif not resp.ok: if resp.status_code == 404 and "No data available" in resp.json().get('Message', [""])[0]: log.warning(f"Got 404 'No data available' from CDAWeb with {url}") return None raise CdaWebException(f'Failed to get data with request: {url}, got {resp.status_code} HTTP response') else: return None
[docs] @AllowedKwargs( PROXY_ALLOWED_KWARGS + CACHE_ALLOWED_KWARGS + GET_DATA_ALLOWED_KWARGS + ['if_newer_than']) @ParameterRangeCheck() @UnversionedProviderCache(prefix="cda", fragment_hours=_cache_fragment_size, cache_retention=timedelta(days=7)) @SplitLargeRequests(threshold=_large_request_max_duration) @Proxyfiable(GetProduct, get_parameter_args) def get_data(self, product, start_time: datetime, stop_time: datetime, if_newer_than: datetime or None = None, extra_http_headers: Dict or None = None): dataset, variable = self._to_dataset_and_variable(product) return self._dl_variable(start_time=start_time, stop_time=stop_time, dataset=dataset, variable=variable, if_newer_than=if_newer_than, extra_http_headers=extra_http_headers)
[docs] def get_variable(self, dataset: str, variable: str, start_time: datetime or str, stop_time: datetime or str, **kwargs) -> \ Optional[SpeasyVariable]: return self.get_data(f"{dataset}/{variable}", start_time, stop_time, **kwargs)