Source code for kkpy.io

"""
kkpy.io
========================

Functions to read and write files

.. currentmodule:: io

.. autosummary::
    kkpy.io.get_fname
    kkpy.io.read_aws
    kkpy.io.read_2dvd_rho
    kkpy.io.read_mxpol_rhi_with_hc
    kkpy.io.read_dem
    kkpy.io.read_wissdom
    kkpy.io.read_vertix
    kkpy.io.read_vet
    kkpy.io.read_hsr
    kkpy.io.read_d3d
    kkpy.io.read_r3d
    kkpy.io.read_sounding
    kkpy.io.read_lidar_wind
    kkpy.io.read_wpr_kma
    kkpy.io.read_pluvio_raw
    kkpy.io.read_2dvd
    kkpy.io.read_wxt520

"""
import numpy as np
import pandas as pd
import xarray as xr
import datetime
import glob
import os
import sys

[docs]def read_aws(time, date_range=True, datadir='/disk/STORAGE/OBS/AWS/', stnid=None, dask=True): """ Read AWS (AWS_MIN). Examples --------- >>> import datetime >>> df_aws = kkpy.io.read_aws(time=datetime.datetime(2018,2,28,6,0)) >>> df_aws = kkpy.io.read_aws(time=[datetime.datetime(2018,2,28,6,0),datetime.datetime(2018,3,1,12,0)], datadir='/path/to/aws/files/') Parameters ---------- time : datetime or array_like of datetime Datetime of the data you want to read. If this is array of two elements, it will read all data within two datetimes by default. If this is array of elements and keyword *date_range* is False, it will read the data of specific time of each element. date_range : bool, optional False if argument *time* contains element of specific time you want to read. datadir : str, optional Directory of data. stnid : list, optional List of station id you want to read. Read all site if None. dask : boolean, optional Return a dask dataframe if True, otherwise return a pandas dataframe. Returns --------- df_aws : dataframe Return dataframe of aws data. Warnings --------- This routine will be updated to receive argument for only filenames. The syntax will be likely changed in the near future. """ import dask.dataframe as dd if time is None: sys.exit(f'{__name__}: Check time argument') if len(time) == 1: date_range = False if date_range: if len(time) != 2: sys.exit(f'{__name__}: Check time and date_range arguments') if time[0] >= time[1]: sys.exit(f'{__name__}: time[1] must be greater than time[0]') dt_start = datetime.datetime(time[0].year, time[0].month, time[0].day, time[0].hour, time[0].minute) dt_finis = datetime.datetime(time[1].year, time[1].month, time[1].day, time[1].hour, time[1].minute) # Get file list filearr = np.array([]) _dt = dt_start while _dt <= dt_finis: _filearr = np.sort(glob.glob(f'{datadir}/{_dt:%Y%m}/{_dt:%d}/AWS_MIN_{_dt:%Y%m%d%H%M}')) filearr = np.append(filearr, _filearr) _dt = _dt + datetime.timedelta(minutes=1) yyyy_filearr = [int(os.path.basename(x)[-12:-8]) for x in filearr] mm_filearr = [int(os.path.basename(x)[-8:-6]) for x in filearr] dd_filearr = [int(os.path.basename(x)[-6:-4]) for x in filearr] hh_filearr = [int(os.path.basename(x)[-4:-2]) for x in filearr] ii_filearr = [int(os.path.basename(x)[-2:]) for x in filearr] dt_filearr = np.array([datetime.datetime(yyyy,mm,dd,hh,ii) for (yyyy,mm,dd,hh,ii) in zip(yyyy_filearr, mm_filearr, dd_filearr, hh_filearr, ii_filearr)]) filearr = filearr[(dt_filearr >= dt_start) & (dt_filearr <= dt_finis)] dt_filearr = dt_filearr[(dt_filearr >= dt_start) & (dt_filearr <= dt_finis)] else: list_dt_yyyymmddhhii = np.unique(np.array([datetime.datetime(_time.year, _time.month, _time.day, _time.hour, _time.minute) for _time in time])) filearr = np.array([f'{datadir}/{_dt:%Y%m}/{_dt:%d}/AWS_MIN_{_dt:%Y%m%d%H%M}' for _dt in list_dt_yyyymmddhhii]) dt_filearr = list_dt_yyyymmddhhii if len(filearr) == 0: sys.exit(f'{__name__}: No matched data for the given time period') df_list = [] names = ['ID', 'YMDHI', 'LON', 'LAT', 'HGT', 'WD', 'WS', 'T', 'RH', 'PA', 'PS', 'RE', 'R60mAcc', 'R1d', 'R15m', 'R60m', 'WDS', 'WSS', 'dummy'] df_aws = dd.read_csv(filearr.tolist(), delimiter='#', names=names, header=None, na_values=[-999,-997]) df_aws = df_aws.drop('dummy', axis=1) df_aws.WD = df_aws.WD/10. df_aws.WS = df_aws.WS/10. df_aws.T = df_aws['T']/10. df_aws.RH = df_aws.RH/10. df_aws.PA = df_aws.PA/10. df_aws.PS = df_aws.PS/10. df_aws.RE = df_aws.RE/10. df_aws.R60mAcc = df_aws.R60mAcc/10. df_aws.R1d = df_aws.R1d/10. df_aws.R15m = df_aws.R15m/10. df_aws.R60m = df_aws.R60m/10. df_aws.WDS = df_aws.WDS/10. df_aws.WSS = df_aws.WSS/10. if stnid: df_aws = df_aws[df_aws['ID'].isin(stnid)] df_aws = df_aws.set_index(dd.to_datetime(df_aws['YMDHI'], format='%Y%m%d%H%M')) df_aws = df_aws.drop('YMDHI', axis=1) if dask: return df_aws else: return df_aws.compute()
[docs]def read_2dvd_rho(time, date_range=True, datadir='/disk/common/kwonil_rainy/RHO_2DVD/', filename='2DVD_Dapp_v_rho_201*Deq.txt'): """ Read 2DVD density files into dataframe. Examples --------- >>> import datetime >>> df_2dvd_drop = kkpy.io.read_2dvd_rho(time=datetime.datetime(2018,2,28)) # automatically date_range=False >>> df_2dvd_drop = kkpy.io.read_2dvd_rho(time=[datetime.datetime(2018,2,28,6),datetime.datetime(2018,3,1,12)], datadir='/path/to/2dvd/files/') >>> df_2dvd_drop = kkpy.io.read_2dvd_rho(time=list_of_many_datetimes, date_range=False) >>> df_2dvd_drop = kkpy.io.read_2dvd_rho(time=datetime.datetime(2018,2,28), filename='2DVD_rho_test_*.txt') Parameters ---------- time : datetime or array_like of datetime Datetime of the data you want to read. If this is array of two elements, it will read all data within two datetimes by default. If this is array of elements and keyword *date_range* is False, it will read the data of specific time of each element. date_range : bool, optional False if argument *time* contains element of specific time you want to read. datadir : str, optional Directory of data. filename : str, optional File naming of data. Returns --------- df_2dvd_drop : dataframe Return dataframe of 2dvd data. """ # Get file list filearr = np.array(np.sort(glob.glob(f'{datadir}/**/{filename}', recursive=True))) yyyy_filearr = [int(os.path.basename(x)[-27:-23]) for x in filearr] mm_filearr = [int(os.path.basename(x)[-23:-21]) for x in filearr] dd_filearr = [int(os.path.basename(x)[-21:-19]) for x in filearr] dt_filearr = np.array([datetime.datetime(yyyy,mm,dd) for (yyyy, mm, dd) in zip(yyyy_filearr, mm_filearr, dd_filearr)]) if time is None: sys.exit(f'{__name__}: Check time argument') if len(time) == 1: date_range = False if date_range: if len(time) != 2: sys.exit(f'{__name__}: Check time and date_range arguments') if time[0] >= time[1]: sys.exit(f'{__name__}: time[1] must be greater than time[0]') dt_start = datetime.datetime(time[0].year, time[0].month, time[0].day) dt_finis = datetime.datetime(time[1].year, time[1].month, time[1].day) filearr = filearr[(dt_filearr >= dt_start) & (dt_filearr <= dt_finis)] dt_filearr = dt_filearr[(dt_filearr >= dt_start) & (dt_filearr <= dt_finis)] else: list_dt_yyyymmdd = np.unique(np.array([datetime.datetime(_time.year, _time.month, _time.day) for _time in time])) filearr = filearr[np.isin(dt_filearr, list_dt_yyyymmdd)] dt_filearr = dt_filearr[np.isin(dt_filearr, list_dt_yyyymmdd)] if len(filearr) == 0: sys.exit(f'{__name__}: No matched data for the given time period') # # READ DATA columns = ['hhmm', 'Dapp', 'VEL', 'RHO', 'AREA', 'WA', 'HA', 'WB', 'HB', 'Deq'] dflist = [] for i_file, (file, dt) in enumerate(zip(filearr, dt_filearr)): _df = pd.read_csv(file, skiprows=1, names=columns, header=None, delim_whitespace=True) _df['year'] = dt.year _df['month'] = dt.month _df['day'] = dt.day _df['hour'] = np.int_(_df['hhmm'] / 100) _df['minute'] = _df['hhmm'] % 100 _df['jultime'] = pd.to_datetime(_df[['year','month','day','hour','minute']]) _df = _df.drop(['hhmm','year','month','day','hour','minute'], axis=1) dflist.append(_df) print(i_file+1, filearr.size, file) df_2dvd_drop = pd.concat(dflist, sort=False, ignore_index=True) df_2dvd_drop.set_index('jultime', inplace=True) if date_range: if np.sum([np.sum([_time.hour, _time.minute, _time.second]) for _time in time]) != 0: df_2dvd_drop = df_2dvd_drop.loc[time[0]:time[1]] return df_2dvd_drop
[docs]def read_mxpol_rhi_with_hc(rhifile_nc, hcfile_mat): """ Read MXPOL RHI with hydrometeor classification into py-ART radar object. Examples --------- >>> rhifile = '/disk/WORKSPACE/kwonil/MXPOL/RAW/2018/02/28/MXPol-polar-20180228-065130-RHI-225_8.nc' >>> hidfile = '/disk/WORKSPACE/kwonil/MXPOL/HID/2018/02/28/MXPol-polar-20180228-065130-RHI-225_8_zdrcorr_demix.mat' >>> radar_mxp = kkpy.io.read_mxpol_rhi_with_hc(rhifile, hcfile) Parameters ---------- rhifile_nc : str or array_like of str Filepath of RHI data to read. The number and the order of elements should match with `hcfile_mat`. hcfile_mat : str or array_like of str Filepath of hydrometeor classification file to read. The number and the order of elements should match with `rhifile_nc`. Returns --------- radar : py-ART radar object Return py-ART radar object. """ os.environ['PYART_QUIET'] = "True" import pyart import scipy.io from netCDF4 import Dataset # HC file HC_proportion = scipy.io.loadmat(hcfile_mat) # RHI file mxpol = Dataset(rhifile_nc,'r') El = mxpol.variables['Elevation'][:] wh_hc = np.logical_and(El>5,El<175) El = El[wh_hc] R = mxpol.variables['Range'][:] radar = pyart.testing.make_empty_rhi_radar(HC_proportion['AG'].shape[1], HC_proportion['AG'].shape[0], 1) ######## HIDs ######## # find most probable habit for i, _HC in HC_proportion.items(): if '_' in i: continue if i in 'AG': HC3d_proportion = np.array(HC_proportion[i]) else: HC3d_proportion = np.dstack([HC3d_proportion, HC_proportion[i]]) HC = np.float_(np.argmax(HC3d_proportion, axis=2)) HC[np.isnan(HC3d_proportion[:,:,0])] = np.nan # add to PYART radar fields list_str = [ 'AG', 'CR', 'IH', 'LR', 'MH', 'RN', 'RP', 'WS'] list_standard = [ 'Aggregation', 'Crystal', 'Ice hail / Graupel', 'Light rain', 'Melting hail', 'Rain', 'Rimed particles', 'Wet snow'] for _str, _standard in zip(list_str, list_standard): mask_dict = { 'data':HC_proportion[_str], 'unit':'-', 'long_name':f'Proportion of the {_str}', '_FillValue':-9999, 'standard_name':_standard} radar.add_field(_str, mask_dict, replace_existing=True) radar.add_field('HC', {'data':HC, 'unit':'-', 'long_name':f'Most probable habit. AG(0), CR(1), IH(2), LR(3), MH(4), RN(5), RP(6), WS(7)', '_FillValue':-9999, 'standard_name':'Hydrometeor classification'}, replace_existing=True) ######## Radar variables ######## ZDR = mxpol.variables['Zdr'][:].T[wh_hc] Z = mxpol.variables['Zh'][:].T[wh_hc] KDP = mxpol.variables['Kdp'][:].T[wh_hc] mask_dict = { 'data':KDP, 'unit':'deg/km', 'long_name': 'differential phase shift', '_FillValue':-9999, 'standard_name':'KDP' } radar.add_field('KDP', mask_dict) mask_dict = { 'data':ZDR-4.5, 'unit':'dB', 'long_name': 'differential reflectivity', '_FillValue':-9999, 'standard_name':'ZDR' } radar.add_field('ZDR', mask_dict) mask_dict = { 'data':Z, 'unit':'dBZ', 'long_name': 'horizontal reflectivity', '_FillValue':-9999, 'standard_name':'ZHH' } radar.add_field('ZHH', mask_dict) radar.range['data'] = R radar.elevation['data'] = El azimuth = np.array(mxpol['Azimuth'][:][wh_hc]) if azimuth[0] < 0: azimuth += 360 radar.azimuth['data'] = azimuth radar.fixed_angle['data'] = azimuth radar.time['data'] = np.array(mxpol.variables['Time'][:]) radar.time['units'] = "seconds since 1970-01-01T00:00:00Z" radar.longitude['data'] = np.array([mxpol.getncattr('Longitude-value')]) radar.latitude['data'] = np.array([mxpol.getncattr('Latitude-value')]) radar.metadata['instrument_name'] = 'MXPol' radar.altitude['data'] = np.array([mxpol.getncattr('Altitude-value')]) return radar
[docs]def read_dem(file=None, area='pyeongchang'): """ Read NASA SRTM 3-arcsec (90 meters) digital elevation model in South Korea. Examples --------- >>> dem, lon_dem, lat_dem, proj_dem = kkpy.io.read_dem(area='pyeongchang') >>> ax = plt.subplot(projection=ccrs.PlateCarree()) >>> pm = ax.pcolormesh(lon_dem, lat_dem, dem, cmap=cmap, vmin=0, transform=ccrs.PlateCarree()) >>> dem, lon_dem, lat_dem, proj_dem = kkpy.io.read_dem(area='korea') >>> dem, lon_dem, lat_dem, proj_dem = kkpy.io.read_dem(file='./pyeongchang_90m.tif') Parameters ---------- file : str, optional Filepath of .tif DEM file to read. area : str, optional Region of interest. Possible options are 'pyeongchang' and 'korea'. Default is 'pyeongchang'. Returns --------- dem : float 2D array Return DEM elevation. lon_dem : float 2D array Return longitude of each DEM pixel. lat_dem : float 2D array Return latitude of each DEM pixel. proj_dem : osr object Spatial reference system of the used coordinates. """ import cartopy.crs as ccrs if area in 'pyeongchang': dem = np.load('/disk/WORKSPACE/kwonil/SRTM3_V2.1/NPY/pyeongchang_90m_dem.npy') lon_dem = np.load('/disk/WORKSPACE/kwonil/SRTM3_V2.1/NPY/pyeongchang_90m_lon.npy') lat_dem = np.load('/disk/WORKSPACE/kwonil/SRTM3_V2.1/NPY/pyeongchang_90m_lat.npy') elif area in 'korea': dem = np.load('/disk/WORKSPACE/kwonil/SRTM3_V2.1/NPY/korea_90m_dem.npy') lon_dem = np.load('/disk/WORKSPACE/kwonil/SRTM3_V2.1/NPY/korea_90m_lon.npy') lat_dem = np.load('/disk/WORKSPACE/kwonil/SRTM3_V2.1/NPY/korea_90m_lat.npy') else: sys.exit(f'{__name__}: Check area argument') proj_dem = ccrs.PlateCarree() return dem, lon_dem, lat_dem, proj_dem
[docs]def get_fname(indir, pattern, dt, date_range=True, verbose=True): """ Get filename corresponding to the given datetime(s) and format. Examples --------- >>> # Get radar filename >>> fname, fdatetime = get_fname('/disk/STORAGE/OBS/Radar/ICE-POP/PPI/NOQC/KST/', >>> '%Y%m/%d/RDR_GNG_%Y%m%d%H%M.uf', >>> [datetime.datetime(2018,2,28,15,0), datetime.datetime(2018,3,2,16,0)]) >>> # Get AWS filename (no extension) >>> fname, fdatetime = get_fname('/disk/STORAGE/OBS/AWS/', >>> '%Y%m/%d/AWS_MIN_%Y%m%d%H%M', >>> [datetime.datetime(2018,1,22,5,30), datetime.datetime(2018,1,23,4,28)]) >>> # Get MRR filename (duplicate format - %m) >>> fname, fdatetime = get_fname('/disk/STORAGE/OBS/MRR/AveData/', >>> '%Y%m/%m%d.ave', >>> [datetime.datetime(2015,8,15,5,30), datetime.datetime(2015,8,17,4,28)]) >>> # Get 2DVD filename (one datetime, the use of DOY - %j) >>> fname, fdatetime = get_fname('/disk/STORAGE/OBS/2DVD/2dvddata/hyd/', >>> 'V%y%j_1.txt', >>> datetime.datetime(2012,2,3)) >>> # Get MRR-PRO filename (multiple datetimes with pandas) >>> import pandas as pd >>> fname, fdatetime = get_fname('/disk/STORAGE/OBS/MRR-PRO/', >>> '%Y%m/%Y%m%d/%Y%m%d_%H%M%S.nc', >>> pd.date_range(start='2020-06-01', end='2020-08-31', freq='1D'), >>> date_range=False, >>> verbose=True) Parameters ---------- indir : str Path of the root directory. This should **not** have any format string. pattern : str Datetime pattern to match. The directory can be formatted here (eg. %Y%m/%d/sitename/data_%Y%m%d%H%M%S.csv). See format code description: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes. dt : datetime or list of datetime Datetime of interest to match. If a datetime object, find one matched file. If list of datetime objects, find matched files for specific datetimes if **`date_range` is False**. If list of two datetime objects and **`date_range` is True**, find all matched files within two datetimes. date_range : boolean, optional True if find all matched files within two datetimes. The number of dt should be two if `date_range` is True. Return matched files for specific datetimes of `dt` if False. Default is True. verbose : boolean, optional If True, print warnings 'File does not exist' if **`date_range` is False**. Returns --------- fname : str or list of str Return filename of matched files. fdt : datetime or list of datetime Return datetime of matched files. """ import warnings # check indir if indir.find('%') != -1: raise UserWarning('indir should not have datetime format') # check pattern if pattern.find('%') == -1: raise UserWarning('pattern should have at least one datetime format (eg. %Y%m%d)') # check dt is_dt_list = isinstance(dt, list) | isinstance(dt, np.ndarray) if not is_dt_list: if isinstance(dt, pd.DatetimeIndex): dt = dt.to_pydatetime() else: if isinstance(dt[0], pd.DatetimeIndex): dt = dt.to_pydatetime() # check if dt is single variable or list dt = np.array(dt) is_dt_single = dt.size == 1 is_dt_two = dt.size == 2 is_dt_multiple = dt.size > 2 # check date_range if is_dt_single or is_dt_multiple: date_range = False if date_range and dt[0] >= dt[1]: raise UserWarning('dt[0] should be earlier than dt[1] if date_range is True') # split into the patterns of filename and directory pattern_fn = pattern.split('/')[-1] pattern_dir = '/'.join(pattern.split('/')[:-1]) if is_dt_single: # the easiest case fname = [f'{indir}/{dt:{pattern}}'] else: if not date_range: # the second easiest case fname = [f'{indir}/{_dt:{pattern}}' for _dt in dt] else: # check if only the last part of fmt contains format string wh_fmt = ['%' in x for x in pattern.split('/')] cnt_fmtstr = np.sum(wh_fmt) is_dir_clean = cnt_fmtstr == 1 if is_dir_clean: # if directory doesn't contain the format string # find all files in the directory candidate_fn = np.array(glob.glob(f'{indir}/{_pattern_as_asterisk(pattern)}')) # filename to datetime candidate_dt = _fname2dt(candidate_fn, f'{indir}/{pattern}') else: # if directory contain the format string # find candidate paths dtlist = pd.date_range(start=dt[0], end=dt[1], freq='1H') candidate_paths = np.unique([t.strftime(f'{indir}{pattern_dir}') for t in dtlist]) # candidate_fn = [] for candidate_path in candidate_paths: candidate_fn.append(glob.glob(f'{candidate_path}/{_pattern_as_asterisk(pattern_fn)}')) # flatten list (the fastest way) candidate_fn = np.array(sum(candidate_fn, [])) # filename to datetime candidate_dt = _fname2dt(candidate_fn, f'{indir}/{pattern}') # check if any file found if len(candidate_dt) == 0: raise UserWarning('No matched file found') # match with the datetime range lowest_dtfmt = _get_lowest_order_datetimeformat(pattern_fn) one_order_lower = { '%Y': '%m', '%m': '%d', '%d': '%H%M%S', '%j': '%H%M%S', '%H': '%M%S', '%M': '%S', '%S': '%f' } default_dtvalue = { '%m': 1, '%d': 1, '%H%M%S': 0, '%M%S': 0, '%S': 0, '%f': 0 } lower_dtfmt = one_order_lower[lowest_dtfmt] candidate_dt = np.array(candidate_dt) if int(datetime.datetime.strftime(dt[0], lower_dtfmt)) != default_dtvalue[lower_dtfmt]: # Truncate unnecessary start time (eg. filename: 20180228, start_datetime: 20180228 13:00 --> 20180228 00:00) dt0_trunc = _truncate_unnecessary_datetime(dt[0], lower_dtfmt) wh = np.where(np.logical_and(candidate_dt >= dt0_trunc, candidate_dt <= dt[1]))[0] else: wh = np.where(np.logical_and(candidate_dt >= dt[0], candidate_dt <= dt[1]))[0] # check if any file found if wh.size == 0: raise UserWarning('No matched file found') # store matched files only fname = candidate_fn[wh] # get rid of duplicated files fname = np.unique(fname) # check if file exists fname_exist = [] for _fname in fname: if os.path.isfile(_fname): fname_exist.append(_fname) else: if verbose: warnings.warn(f'File does not exist: {_fname}') fname = np.array(fname_exist) # prepare a return if fname.size == 0: raise UserWarning('No matched file found') fdt = _fname2dt(fname, f'{indir}/{pattern}') if is_dt_single: fname = fname[0] return fname, fdt
def _fname2dt(fnames, pattern): """ Get datetime from the filename. """ import re import parse fnames = np.array(fnames) dt = [] clean_pattern = re.sub('//', '/', f'{pattern}') dtfmt = re.findall('(\\%\D)', clean_pattern) # for duplicated datetime format is_duplicate = len(set(dtfmt)) != len(dtfmt) replace = { '(%Y)': '{Y:04d}', '(%y)': '{y:02d}', '(%m)': '{m:02d}', '(%d)': '{d:02d}', '(%j)': '{j:03d}', '(%H)': '{H:02d}', '(%M)': '{M:02d}', '(%S)': '{S:02d}', } default = { 'Y': 2020, 'm': 1, 'd': 1, 'H': 0, 'M': 0, 'S': 0, } for fname in fnames: clean_fname = re.sub('//', '/', f'{fname}') if not is_duplicate: _dt = datetime.datetime.strptime(clean_fname, clean_pattern) else: parse_pattern = clean_pattern for key in replace.keys(): parse_pattern = re.sub(key, replace[key], parse_pattern) parsed = parse.parse(parse_pattern, clean_fname) if 'j' not in parsed.named.keys(): # set default value if no datetime key is found for key in default.keys(): if key not in parsed.named.keys(): parsed.named[key] = default[key] _dt = datetime.datetime(parsed['Y'], parsed['m'], parsed['d'], parsed['H'], parsed['M'], parsed['S']) else: # set default value if no datetime key is found for key in ['Y', 'H', 'M', 'S']: if key not in parsed.named.keys(): parsed.named[key] = default[key] _dt = datetime.datetime(parsed['Y'], 1, 1, parsed['H'], parsed['M'], parsed['S']) + datetime.timedelta(parsed['j'] - 1) dt.append(_dt) dt = np.array(dt) if fnames.size == 1: dt = dt[0] return dt def _pattern_as_asterisk(pattern): """ Replace recurring %D (D: character) in the datetime pattern to asterisk. """ import re return re.sub("(\\%\D)+", "*", pattern) def _get_lowest_order_datetimeformat(pattern): """ Get the lowest order of datetime format in the pattern. """ import re dtfmts = re.findall('(\\%\D)', pattern) fmt2new = { '%Y': '%Y', # year '%y': '%Y', '%G': '%Y', '%m': '%m', # month '%B': '%m', '%b': '%m', '%d': '%d', # day '%j': '%j', # day of the year '%H': '%H', # hour '%I': '%H', '%M': '%M', # minute '%S': '%S', # second } for _fmt in dtfmts: lowest_fmt = fmt2new[_fmt] return lowest_fmt def _truncate_unnecessary_datetime(dt, highest_fmt_unnecessary): """ Truncate unnecessary datetime. """ if '%m' in highest_fmt_unnecessary: dt_trunc = dt.replace(month=0, day=0, hour=0, minute=0, second=0, microsecond=0) elif '%d' in highest_fmt_unnecessary: dt_trunc = dt.replace(day=0, hour=0, minute=0, second=0, microsecond=0) elif '%H%M%S' in highest_fmt_unnecessary: dt_trunc = dt.replace(hour=0, minute=0, second=0, microsecond=0) elif '%M%S' in highest_fmt_unnecessary: dt_trunc = dt.replace(minute=0, second=0, microsecond=0) elif '%S' in highest_fmt_unnecessary: dt_trunc = dt.replace(second=0, microsecond=0) elif '%f' in highest_fmt_unnecessary: dt_trunc = dt.replace(microsecond=0) else: raise UserWarning('Something wrong!! Please report an issue to GitHub with error log') return dt_trunc def _get_proj_from_KNUwissdom(ds): import cartopy.crs as ccrs lcc = ds['lcc_projection'] clon = lcc.longitude_of_central_meridian clat = lcc.latitude_of_projection_origin false_easting = lcc.false_easting false_northing = lcc.false_northing standard_parallel = lcc.standard_parallel proj = ccrs.LambertConformal( central_longitude=clon, central_latitude=clat, standard_parallels=standard_parallel, false_easting=false_easting, false_northing=false_northing, globe=ccrs.Globe( ellipse=None, semimajor_axis=6371008.77, semiminor_axis=6371008.77) ) return proj def _get_proj_from_KMAwissdom(): import cartopy.crs as ccrs proj = ccrs.LambertConformal( central_longitude=126.0, central_latitude=38.0, standard_parallels=(30,60), false_easting=440000, false_northing=700000, globe=ccrs.Globe( ellipse=None, semimajor_axis=6371008.77, semiminor_axis=6371008.77) ) return proj def _read_wissdom_KNU1(fnames, degree='essential'): import xarray as xr ds = xr.open_mfdataset(fnames, concat_dim='NT', combine='nested') ds['proj'] = _get_proj_from_KNUwissdom(ds) ds = ds.rename( {'uu2':'u', 'vv2':'v', 'ww2':'w' } ) if degree in ['extensive', 'debug']: ds = ds.rename( {'rvort2':'vor', 'rdivg2':'div' } ) if degree in ['essential', 'extensive']: ds.attrs={} list_vars = [] for x in ds.variables.__iter__(): list_vars.append(x) if degree in ['essential']: for var in ['u', 'v', 'w', 'x', 'y', 'lev', 'proj']: list_vars.remove(var) ds = ds.drop(list_vars) if degree in ['extensive']: for var in ['u', 'v', 'w', 'vor', 'div', 'x', 'y', 'lev', 'proj']: list_vars.remove(var) ds = ds.drop(list_vars) ds = ds.rename_dims({'X':'nx', 'Y':'ny', 'lev':'nz'}) return ds def _read_wissdom_KMAnc(fnames, degree='essential'): import xarray as xr ds = xr.open_mfdataset(fnames, concat_dim='NT', combine='nested') ds['proj'] = _get_proj_from_KMAwissdom() dataminus = ds.data_minus datascale = ds.data_scale dataout = ds.data_out ds['x'] = ds['nx'].values * ds.grid_size ds['y'] = ds['ny'].values * ds.grid_size ds['height'] = ds['height'][0,:] ds = ds.set_coords(("height")) # variable to coord ds = ds.rename( {'u_component':'u', 'v_component':'v', 'w_component':'w', 'height':'lev' } ) for f in ['u', 'v', 'w']: ds[f] = xr.where( ds[f] == dataout, np.nan, ds[f] ) ds[f] = (ds[f]-dataminus)/datascale if degree in ['extensive', 'debug']: ds = ds.rename( {'vertical_vorticity':'vor', 'divergence':'div' } ) for f in ['div', 'vor']: ds[f] = xr.where( ds[f] == dataout, np.nan, ds[f] ) ds[f] = xr.where( ds[f] <= 0, 10**((ds[f]-dataminus)/datascale), -10**(-(ds[f]-dataminus)/datascale) ) if degree in ['debug']: ds = ds.rename( {'vertical_velocity':'vt', 'reflectivity':'z' } ) for f in ['vt', 'z']: ds[f] = xr.where( ds[f] == dataout, np.nan, ds[f] ) ds[f] = (ds[f]-dataminus)/datascale if degree in ['essential']: ds = ds.drop_vars([ 'vertical_vorticity', 'divergence', 'vertical_velocity', 'reflectivity' ]) if degree in ['extensive']: ds = ds.drop_vars([ 'vertical_velocity', 'reflectivity' ]) if degree in ['essential', 'extensive']: ds.attrs={} ds = ds.transpose('NT', 'ny', 'nx', 'nz', 'x', 'y') ds['x'] = ds['x'].rename({'x':'nx'}) ds['y'] = ds['y'].rename({'y':'ny'}) ds = ds.drop(['nx', 'ny']) return ds def _read_wissdom_KMAbin(fname, degree='essential'): import xarray as xr import gzip from numba import njit @njit(fastmath=True) def fastpow(value): return 10.0**value @njit(fastmath=True) def invert_scaling1(arr, data_minus, data_scale): res = np.empty(arr.shape) for x in range(arr.shape[0]): for y in range(arr.shape[1]): for z in range(arr.shape[2]): res[x,y,z] = (arr[x,y,z]-data_minus)/data_scale return res import math @njit(fastmath=True) def invert_scaling2(arr, data_minus, data_scale): res = np.empty(arr.shape) for x in range(arr.shape[0]): for y in range(arr.shape[1]): for z in range(arr.shape[2]): if arr[x,y,z] <= 0: res[x,y,z] = fastpow((arr[x,y,z]-data_minus)/data_scale) else: res[x,y,z] = -fastpow(-(arr[x,y,z]-data_minus)/data_scale) return res def bin2str(binary): return [ord(c) for c in binary.decode('latin-1')] ######## why not ascii ????????? def timestr2dt(file): yy = np.frombuffer(file.read(2), dtype=np.int16)[0] mm = ord(file.read(1)) dd = ord(file.read(1)) hh = ord(file.read(1)) mi = ord(file.read(1)) ss = ord(file.read(1)) try: return datetime.datetime(yy,mm,dd,hh,mi,ss) except: return -1 with gzip.open(fname,'rb') as f: version = ord(f.read(1)) # char ptype = np.frombuffer(f.read(2), dtype=np.int16)[0] # short tm = timestr2dt(f) # struct tm_in = timestr2dt(f) # struct num_stn = ord(f.read(1)) # char map_code = ord(f.read(1)) # char map_etc = ord(f.read(1)) # char nx = np.frombuffer(f.read(2), dtype=np.int16)[0] # short ny = np.frombuffer(f.read(2), dtype=np.int16)[0] # short nz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dxy = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short num_data = ord(f.read(1)) # char dz2 = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min2 = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_out = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_in = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_minus = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_scale = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_unit = ord(f.read(1)) # char etc = np.frombuffer(f.read(16), dtype=np.int16) # short u = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) v = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) w = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) if degree in ['extensive','debug']: div = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) vor = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) if degree in ['debug']: dbz = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) vt = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16).copy().reshape(nz,ny,nx) mask_u = u == data_out mask_v = v == data_out mask_w = w == data_out if degree in ['extensive','debug']: mask_div = div == data_out mask_vor = vor == data_out if degree in ['debug']: mask_dbz = dbz == data_out mask_vt = vt == data_out u = invert_scaling1(u, data_minus, data_scale) v = invert_scaling1(v, data_minus, data_scale) w = invert_scaling1(w, data_minus, data_scale) if degree in ['extensive','debug']: div = invert_scaling2(div, data_minus, data_scale) vor = invert_scaling2(vor, data_minus, data_scale) if degree in ['debug']: dbz = invert_scaling1(dbz, data_minus, data_scale) vt = invert_scaling1(vt, data_minus, data_scale) u[mask_u] = np.nan v[mask_v] = np.nan w[mask_w] = np.nan if degree in ['extensive','debug']: div[mask_div] = np.nan vor[mask_vor] = np.nan if degree in ['debug']: dbz[mask_dbz] = np.nan vt[mask_vt] = np.nan lev = np.arange(nz)*dz x = np.arange(nx)*dxy y = np.arange(ny)*dxy ds = xr.Dataset( { 'u': (["NT","ny","nx","nz"], np.expand_dims(np.swapaxes(np.swapaxes(u,0,2),0,1),0)), 'v': (["NT","ny","nx","nz"], np.expand_dims(np.swapaxes(np.swapaxes(v,0,2),0,1),0)), 'w': (["NT","ny","nx","nz"], np.expand_dims(np.swapaxes(np.swapaxes(w,0,2),0,1),0)), }, coords={ 'lev': (["nz"], (lev)), 'x': (["nx"], (x)), 'y': (["ny"], (y)), } ) if degree in ['extensive', 'debug']: ds['div'] = (["NT","ny","nx","nz"], np.expand_dims(np.swapaxes(np.swapaxes(div,0,2),0,1),0)) ds['vor'] = (["NT","ny","nx","nz"], np.expand_dims(np.swapaxes(np.swapaxes(vor,0,2),0,1),0)) if degree in ['debug']: pass ds['proj'] = _get_proj_from_KMAwissdom() return ds
[docs]def read_wissdom(fnames, kind='KNUv2', degree='essential'): """ Read WISSDOM wind field. Examples --------- >>> ds_wissdom = kkpy.io.read_wissdom('WISSDOM_VAR_201802280600.nc') >>> ds_wissdom = kkpy.io.read_wissdom('RDR_R3D_KMA_WD_201802280600.bin.gz', kind='KMAbin') >>> ds_wissdom = kkpy.io.read_wissdom('RDR_R3D_KMA_WD_201802280600.nc', kind='KMAnc', degree='extensive') Parameters ---------- fnames : str or array_like Filename(s) of WISSDOM to read. kind : str, optional Data format. Possible options are 'KNUv2', 'KMAnc', and 'KMAbin'. Default is 'KNUv2'. degree : str, optional Degree of variable type to read. Possible options are 'essential', 'extensive', and 'debug'. Default is 'essential'. 'essential' includes u, v, and w, while 'extensive' further includes divergence and vorticity. 'debug' returns all available variables. Note that the time efficiency for 'extensive' is low when kind='KMAbin'. Returns --------- ds : xarray dataset object Return WISSDOM wind field. """ import xarray as xr if kind in ['KNUv2']: ds = _read_wissdom_KNU1(fnames, degree=degree) elif kind in ['KMAnc']: ds = _read_wissdom_KMAnc(fnames, degree=degree) elif kind in ['KMAbin']: if isinstance(fnames, (list,np.ndarray)): dslist = [] for fname in fnames: dslist.append(_read_wissdom_KMAbin(fname, degree=degree)) ds = xr.combine_nested(dslist, 'NT') else: ds = _read_wissdom_KMAbin(fnames, degree=degree) else: raise UserWarning(f'Not supported: kind={kind}') return ds
[docs]def read_vertix(fnames): """ Read VertiX (nc). Examples --------- >>> ds_vtx = kkpy.io.read_vertix(fnames) Parameters ---------- fnames : str or array_like Filename(s) of VertiX to read. Returns --------- ds : xarray dataset object Return VertiX dataset. """ from cftime import num2date import xarray as xr # read data ds = xr.open_mfdataset( fnames, decode_times=False, combine='nested' ) # calculate datetime for time_var in ['time_dwell', 'time_spec']: ds[time_var] = xr.DataArray( num2date( ds[time_var].data, 'seconds since 1970-01-01 00:00:00.0', only_use_cftime_datetimes=False, only_use_python_datetimes=True ), dims=[time_var] ) # variable to coordinate ds = ds.set_coords('range') ds = ds.set_coords('spec_vms') # replace nodata to NaN for var in list(ds.keys()): ds[var] = ds[var].where(ds[var] > ds.nodata) return ds
[docs]def read_vet(fnames): """ Read WRC VET motion vector. Examples --------- >>> ds_vet = kkpy.io.read_vet(fnames) Parameters ---------- fnames : str or array_like Filename(s) of VET motion vector to read. Returns --------- ds : xarray dataset object Return WRC VET motion vector dataset. """ import xarray as xr dss = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _ds = _read_vet(fname) dss.append(_ds) ds = xr.concat(dss, dim='t') else: ds = _read_vet(fnames) return ds
def _read_vet(fname): import cartopy.crs as ccrs nx_vet = 25 ny_vet = 25 dxy_vet = 32000 # meter border_vet = 112 clon_vet = 126.0 clat_vet = 38.0 fe_vet = 440000 fn_vet = 770000 proj_vet = ccrs.LambertConformal( central_longitude=clon_vet, central_latitude=clat_vet, false_easting=fe_vet, false_northing=fn_vet, standard_parallels=(30,60) ) # READ DATA dt = np.dtype([ ('t','<6i2'), ('pixel_res_km','<f4'), ('border_pix','<i4'), ('ew_arsize','<i4'), ('sn_arsize','<i4'), ('velf_ew_size','<i4'), ('velf_sn_size','<i4'), ('uv', '(25,25,2)f4')] ) data = np.fromfile(fname, dtype=dt) # Resolution of vector data, assuming that N-S and E-W grids are same cnt_vec = data['velf_ew_size'][0] cnt_border = data['border_pix'][0] cnt_pixel = data['ew_arsize'][0] res_vec = (cnt_pixel - 2*cnt_border)/cnt_vec # DEFINE DATASET ds = xr.Dataset( { 'u': (["t","y","x"], data['uv'][0,:,:,0][None,:,:]), 'v': (["t","y","x"], data['uv'][0,:,:,1][None,:,:]), }, coords={ 'x': (["x"], (np.arange(cnt_vec)*res_vec + res_vec/2. + cnt_border)*1e3), 'y': (["y"], (np.arange(cnt_vec)*res_vec + res_vec/2. + cnt_border)*1e3), 't': (["t"], [pd.Timestamp(os.path.basename(fname)[-16:-4])]), }, attrs={ 'title': 'Output of variational echo tracking by KMA', 'time_unit': 'KST', 'vector_resolution': f'{res_vec} km', 'crs': proj_vet.proj4_init, 'projection': proj_vet, } ) return ds
[docs]def read_hsr(fnames): """ Read WRC HSR reflectivity. Examples --------- >>> ds_hsr = kkpy.io.read_hsr(fnames) Parameters ---------- fnames : str or array_like Filename(s) of HSR reflectivity to read. Returns --------- ds : xarray dataset object Return WRC HSR reflectivity dataset. """ import xarray as xr dss = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _ds = _read_hsr(fname) dss.append(_ds) ds = xr.concat(dss, dim='t') else: ds = _read_hsr(fnames) return ds
def _read_hsr(file): import gzip import cartopy.crs as ccrs def bin2str(binary): return [ord(c) for c in binary.decode('ascii')] def timestr2dt(file): yy = np.frombuffer(file.read(2), dtype=np.int16)[0] mm = ord(file.read(1)) dd = ord(file.read(1)) hh = ord(file.read(1)) mi = ord(file.read(1)) ss = ord(file.read(1)) try: return datetime.datetime(yy,mm,dd,hh,mi,ss) except: return -1 with gzip.open(file,'rb') as f: version = ord(f.read(1)) # char ptype = np.frombuffer(f.read(2), dtype=np.int16)[0] # short tm = timestr2dt(f) # struct tm_in = timestr2dt(f) # struct num_stn = ord(f.read(1)) # char map_code = ord(f.read(1)) # char map_etc = ord(f.read(1)) # char nx = np.frombuffer(f.read(2), dtype=np.int16)[0] # short ny = np.frombuffer(f.read(2), dtype=np.int16)[0] # short nz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dxy = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short num_data = ord(f.read(1)) # char data_code = bin2str(f.read(16)) # char etc = bin2str(f.read(15)) # char for ii in range(48): stn_cd = bin2str(f.read(6)) # char _tm = timestr2dt(f) # struct _tm_in = timestr2dt(f) # struct refl = np.frombuffer(f.read(2*nx*ny), dtype=np.int16) refl = refl.copy().reshape(ny,nx) / 100 refl[refl == -300] = -9999 # outside radar observable range refl[refl == -250] = -9998 # no echo refl[(refl < -100) & (refl > -9000)] = -9997 # null # hsr = {} # hsr['reflectivity'] = refl # hsr['nx'] = nx # hsr['ny'] = ny # hsr['dxy'] = dxy # meter # hsr['clon'] = clon_hsr # degN # hsr['clat'] = clat_hsr # degE # hsr['false_easting'] = fe_hsr # meter # hsr['false_northing'] = fn_hsr # meter # hsr['cnt_radar'] = num_stn # hsr['time'] = tm # datetime object clon_hsr = 126.0 clat_hsr = 38.0 fe_hsr = 560500 # meter fn_hsr = 840500 # meter proj_hsr = ccrs.LambertConformal( central_longitude=clon_hsr, central_latitude=clat_hsr, false_easting=fe_hsr, false_northing=fn_hsr, standard_parallels=(30,60) ) # DEFINE DATASET ds = xr.Dataset( { 'reflectivity': (["t","y","x"], refl[None,:,:]), }, coords={ 'x': (["x"], np.arange(0,np.int64(nx)*dxy, dxy)), 'y': (["y"], np.arange(0,np.int64(ny)*dxy, dxy)), 't': (["t"], [pd.Timestamp(os.path.basename(file)[-19:-7])]), }, attrs={ 'title': 'HSR product generated by KMA', 'time_unit': 'KST', 'cnt_radar': num_stn, 'outside_radar_observable_range': -9999, 'no_echo': -9998, 'null': -9997, 'crs': proj_hsr.proj4_init, 'projection': proj_hsr, } ) return ds
[docs]def read_d3d(fnames): """ Read WRC D3D product. Examples --------- >>> ds_d3d = kkpy.io.read_d3d(fnames) Parameters ---------- fnames : str or array_like Filename(s) of D3D product to read. Variables in all files must be of the same type. Returns --------- ds : xarray dataset object Return WRC D3D product dataset. """ import xarray as xr dss = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _ds = _read_d3d(fname) dss.append(_ds) ds = xr.concat(dss, dim='t') else: ds = _read_d3d(fnames) return ds
def _read_d3d(fname): import gzip import cartopy.crs as ccrs def bin2str(binary): return [ord(c) for c in binary.decode('ascii')] def timestr2dt(file): yy = np.frombuffer(file.read(2), dtype=np.int16)[0] mm = ord(file.read(1)) dd = ord(file.read(1)) hh = ord(file.read(1)) mi = ord(file.read(1)) ss = ord(file.read(1)) try: return datetime.datetime(yy,mm,dd,hh,mi,ss) except: return -1 if 'ta' in os.path.basename(fname): varname = 'air_temperature' elif 'td' in os.path.basename(fname): varname = 'dew_point_temperature' elif 'pa': varname = 'air_pressure' else: raise UserWarning('Variable type is not recognized') return -1 with gzip.open(fname,'rb') as f: version = ord(f.read(1)) # char ptype = np.frombuffer(f.read(2), dtype=np.int16)[0] # short tm = timestr2dt(f) # struct tm_in = timestr2dt(f) # struct num_stn = ord(f.read(1)) # char map_code = ord(f.read(1)) # char map_etc = ord(f.read(1)) # char nx = np.frombuffer(f.read(2), dtype=np.int16)[0] # short ny = np.frombuffer(f.read(2), dtype=np.int16)[0] # short nz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dxy = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short num_data = ord(f.read(1)) # char dz2 = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min2 = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_out = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_in = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_minus = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_scale = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_unit = ord(f.read(1)) # char etc = np.frombuffer(f.read(16), dtype=np.int16) # short var3d = (np.frombuffer(f.read(2*nz*ny*nx), dtype=np.int16)-data_minus)/data_scale var3d = var3d.reshape(nz,ny,nx) proj = ccrs.LambertConformal( central_longitude=126.0, central_latitude=38.0, false_easting=444000, false_northing=772000, standard_parallels=(30,60) ) # DEFINE DATASET ds = xr.Dataset( { varname: (["t","z","y","x"], var3d[None,:,:]), }, coords={ 'x': (["x"], np.arange(0,np.int64(nx)*dxy, dxy)), 'y': (["y"], np.arange(0,np.int64(ny)*dxy, dxy)), 'z': (["z"], np.append(np.arange(0,2000,100), np.arange(2000,10001,200))), 't': (["t"], [pd.Timestamp(os.path.basename(fname)[-19:-7])]), }, attrs={ 'title': 'D3D product generated by KMA', 'time_unit': 'KST', 'crs': proj.proj4_init, 'projection': proj, } ) return ds
[docs]def read_r3d(fnames, kind='nc'): """ Read WRC R3D product. Examples --------- >>> ds_r3d = kkpy.io.read_r3d(fnames) Parameters ---------- fnames : str or array_like Filename(s) of R3D product to read. Variables in all files must be of the same type. kind : str, optional Data format. Possible options are 'nc' and 'bin'. Default is 'nc'. --------- >>> ds_r3d = kkpy.io.read_r3d('RDR_R3D_EXT_KD_202301011740.nc') >>> ds_r3d = kkpy.io.read_r3d('RDR_R3D_EXT_HCI_202007010950.bin.gz', kind='bin') >>> ds_r3d = kkpy.io.read_r3d(['RDR_R3D_EXT_CZ_202007010740.bin.gz','RDR_R3D_EXT_CZ_202007010745.bin.gz'], kind='bin') Parameters ---------- fnames : str or array_like Filename(s) of WISSDOM to read. Returns --------- ds : xarray dataset object Return WRC R3D product dataset. """ import xarray as xr if kind in ['nc']: ds = _read_r3d_nc(fnames) elif kind in ['bin']: if isinstance(fnames, (list,np.ndarray)): dss = [] for fname in fnames: dss.append(_read_r3d_bin(fname)) ds = xr.combine_nested(dss, 't') else: ds = _read_r3d_bin(fnames) else: raise UserWarning(f'Not supported: kind={kind}') return ds
def _read_r3d_nc(fname): import xarray as xr import cartopy.crs as ccrs ds = xr.open_mfdataset(fname, concat_dim='t', combine='nested') fnames = None if isinstance(fname, (list,np.ndarray)): fnames = fname fname = fnames[0] if '_CZ' in os.path.basename(fname): varname = 'reflectivity' elif '_DR' in os.path.basename(fname): varname = 'differential_reflectivity' elif '_KD' in os.path.basename(fname): varname = 'specific_differential_phase' elif '_RH' in os.path.basename(fname): varname = 'CopolarCorrelation' elif '_HCI' in os.path.basename(fname): varname = 'radar_echo_classification' else: raise UserWarning('Variable type is not recognized') return -1 proj = ccrs.LambertConformal( central_longitude=126.0, central_latitude=38.0, false_easting=440500, false_northing=770500, standard_parallels=(30,60) ) dataminus = ds.data.data_minus datascale = ds.data.data_scale dataout = ds.data.data_out # ds['x'] = ds.grid_nx * ds.grid_size # ds['y'] = ds.grid_ny * ds.grid_size # ds = ds.set_coords(("height")) # variable to coord ds = ds.rename( {'data':varname} ) ds[varname] = xr.where( ds[varname] == dataout, np.nan, ds[varname] ) ds[varname] = (ds[varname]-dataminus)/datascale if fnames is not None: times = [pd.Timestamp(os.path.basename(fn)[-15:-3]) for fn in fnames] else: times = [pd.Timestamp(os.path.basename(fname)[-15:-3])] ds = ds.rename_dims({'nx':'x','ny':'y','nz':'z'}) ds = ds.assign_coords({ 'x': (["x"], np.arange(0, np.int64(ds.grid_nx)*ds.grid_size, ds.grid_size)), 'y': (["y"], np.arange(0, np.int64(ds.grid_ny)*ds.grid_size, ds.grid_size)), 'z': (["z"], ds.height.values[0,:]), 't': (["t"], times), }) ds = ds.drop_vars('height') ds.attrs = { 'title': 'R3D product generated by KMA', 'time_unit': 'KST', 'crs': proj.proj4_init, 'projection': proj, } return ds def _read_r3d_bin(fname): import cartopy.crs as ccrs import gzip def bin2str(binary): return [ord(c) for c in binary.decode('ascii')] def timestr2dt(fname): yy = np.frombuffer(fname.read(2), dtype=np.int16)[0] mm = ord(fname.read(1)) dd = ord(fname.read(1)) hh = ord(fname.read(1)) mi = ord(fname.read(1)) ss = ord(fname.read(1)) try: return datetime.datetime(yy,mm,dd,hh,mi,ss) except: return -1 if '_CZ' in os.path.basename(fname): varname = 'reflectivity' elif '_DR' in os.path.basename(fname): varname = 'differential_reflectivity' elif '_KD' in os.path.basename(fname): varname = 'specific_differential_phase' elif '_RH' in os.path.basename(fname): varname = 'CopolarCorrelation' elif '_HCI' in os.path.basename(fname): varname = 'radar_echo_classification' else: raise UserWarning('Variable type is not recognized') return -1 with gzip.open(fname,'rb') as f: version = ord(f.read(1)) # char ptype = np.frombuffer(f.read(2), dtype=np.int16)[0] # short tm = timestr2dt(f) # struct tm_in = timestr2dt(f) # struct num_stn = ord(f.read(1)) # char map_code = ord(f.read(1)) # char map_etc = ord(f.read(1)) # char nx = np.frombuffer(f.read(2), dtype=np.int16)[0] # short ny = np.frombuffer(f.read(2), dtype=np.int16)[0] # short nz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dxy = np.frombuffer(f.read(2), dtype=np.int16)[0] # short dz = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short num_data = ord(f.read(1)) # char dz2 = np.frombuffer(f.read(2), dtype=np.int16)[0] # short z_min2 = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_out = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_in = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_min = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_minus = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_scale = np.frombuffer(f.read(2), dtype=np.int16)[0] # short data_unit = ord(f.read(1)) # char etc = np.frombuffer(f.read(16), dtype=np.int16) # short for ii in range(48): stn_cd = bin2str(f.read(6)) # char _tm = timestr2dt(f) # struct _tm_in = timestr2dt(f) # struct data = np.frombuffer(f.read(2*nx*ny*nz), dtype=np.int16) r3d_1d_scaled = (data & 0x07FF) mask_outside_1d = (r3d_1d_scaled == data_out) mask_qced_1d = (r3d_1d_scaled == data_in) r3d_1d = (r3d_1d_scaled - data_minus)/data_scale _r3d = r3d_1d.reshape(nz,ny,nx) mask_outside = mask_outside_1d.reshape(nz,ny,nx) mask_qced = mask_qced_1d.reshape(nz,ny,nx) _r3d[mask_outside] = -9999 _r3d[mask_qced] = -9998 proj = ccrs.LambertConformal( central_longitude=126.0, central_latitude=38.0, false_easting=440500, false_northing=770500, standard_parallels=(30,60) ) # DEFINE DATASET ds = xr.Dataset( { varname: (["t","z","y","x"], _r3d[None,:,:]), }, coords={ 'x': (["x"], np.arange(0,np.int64(nx)*dxy, dxy)), 'y': (["y"], np.arange(0,np.int64(ny)*dxy, dxy)), 'z': (["z"], np.arange(0,np.int64(nz)*dz, dz)), 't': (["t"], [pd.Timestamp(os.path.basename(fname)[-19:-7])]), }, attrs={ 'title': 'R3D product generated by KMA', 'time_unit': 'KST', 'crs': proj.proj4_init, 'projection': proj, } ) return ds
[docs]def read_sounding(fnames): """ Read sounding. Examples --------- >>> df = kkpy.io.read_sounding(fnames) Parameters ---------- fnames : str or array_like Filename(s) of sounding to read. Returns --------- df : pandas dataframe object Return sounding dataframe. Notes --------- Columns: - 'P': pressure [hPa] - 'T': temperature [degC] - 'RH': relative humidity [%] - 'WS': wind speed [m s-1] - 'WD': wind direction [deg] - 'Lon': longitude [oE] - 'Lat': latitude [oN] - 'Alt': altitude from mean sea level [m] - 'Geo': geopotential height [gpm] - 'Dew': dew-point temperature [degC] - 'U': u-wind [m s-1] - 'V': v-wind [m s-1] - 'Uknot': u-wind [knot] - 'Vknot': v-wind [knot] - 'time': launch time [file-dependent, generally UTC] """ import pandas as pd dfs = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _df = _read_sounding(fname) dfs.append(_df) df = pd.concat(dfs).reset_index(drop=True) else: df = _read_sounding(fnames) return df
def _read_sounding(filename): from . import util def read_sounding_v1(filename): df = pd.read_csv(filename, delim_whitespace=True, skiprows=10, on_bad_lines='skip', encoding='euc-kr' ) df = df.drop(['Time(min:sec)', 'Asc(m/m)'], axis=1) df.columns = ['P', 'T', 'RH', 'WS', 'WD', 'Lon', 'Lat', 'Alt','Geo', 'Dew'] df['WS'] = util.knot2ms(df['WS']) return df def read_sounding_v2(filename): df = pd.read_csv(filename, delim_whitespace=True, skiprows=5, on_bad_lines='skip', encoding='ISO-8859-1' ) df = df.drop(['min', 's', 'm/s.1'], axis=1) df.columns = ['P', 'T', 'RH', 'WS', 'WD', 'Alt', 'Geo', 'Dew', 'Lat', 'Lon'] # df['WS'] = util.ms2knot(df['WS']) return df def read_sounding_v3(filename): df = pd.read_csv(filename, delim_whitespace=True, skiprows=10, on_bad_lines='skip', encoding='euc-kr' ) df = df.drop(['Time(mm:ss)', 'Asc(m/m)'], axis=1) df.columns = ['P', 'T', 'RH', 'WS', 'WD', 'Lon', 'Lat', 'Alt','Geo', 'Dew'] df['WS'] = util.knot2ms(df['WS']) return df def clean_df(df): def to_float(x): try: return float(x) except ValueError: return None list_cols = ['P', 'T', 'RH', 'WS', 'WD', 'Alt','Geo', 'Dew', 'Lat', 'Lon'] _df = df for col in list_cols: valid_index = _df[col].apply(to_float).dropna().index _df = _df.loc[valid_index] for col in list_cols: _df[col] = _df[col].astype('float64') return _df def get_launch_time(fname): try: with open(fname, encoding='CP949') as f: hdr = f.readline() launch_time = datetime.datetime.strptime(hdr[31:50], '%Y-%m-%d %H:%M:%S') except: try: with open(fname, encoding='CP949') as f: hdr = f.readline() launch_time = datetime.datetime.strptime(hdr[29:48], '%Y-%m-%d %H:%M:%S') except: try: with open(fname, encoding='ISO-8859-1') as f: mdy = f.readline()[-9:-1] his = f.readline()[-9:-1] launch_time = pd.to_datetime(f'{mdy} {his}', format='%d/%m/%y %H:%M:%S') except: raise UserWarning(f'Failed reading launch time : {fname}') return launch_time try: df = read_sounding_v1(filename) except: try: df = read_sounding_v2(filename) except: try: df = read_sounding_v3(filename) except: raise UserWarning(f'Cannot read this kind of format: {filename}') df = clean_df(df) df['U'], df['V'] = util.wind2uv(wd=df['WD'], ws=df['WS']) df['Uknot'], df['Vknot'] = util.ms2knot(df['U']), util.ms2knot(df['V']) df['time'] = get_launch_time(filename) return df def _read_lidar_wind(fname): from . import util df = pd.read_csv( fname, skiprows=2, delim_whitespace=True, names=['Alt', 'U', 'V', 'W', 'WS', 'WD', 'Valid'], na_values=-999, ) df['Uknot'], df['Vknot'] = util.ms2knot(df['U']), util.ms2knot(df['V']) return df
[docs]def read_lidar_wind(fnames, ftimes, dropna=True): """ Read lidar wind profile. Examples --------- >>> df = kkpy.io.read_lidar_wind(fnames) Parameters ---------- fnames : str or array_like Filename(s) of lidar wind profile to read. ftimes : str or array_like Datetime(s) of lidar wind profile to read (see: kkpy.io.get_fname). dropna : boolean (optional) True if drop rows containing NaN values. Returns --------- df : pandas dataframe object Return lidar wind profile dataframe. Notes --------- Columns: - 'Alt': altitude from lidar [m] - 'U': u-wind [m s-1] - 'V': v-wind [m s-1] - 'W': w-wind [m s-1] - 'WS': wind speed [m s-1] - 'WD': wind direction [deg] - 'Valid': Proportion of the number of data available for wind calculation along the azimuth direction [%] - 'Uknot': u-wind [knot] - 'Vknot': v-wind [knot] - 'time': measurement time [file-dependent, generally KST] """ dfs = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _df = _read_lidar_wind(fname) _df['time'] = ftimes[i_f] dfs.append(_df) df = pd.concat(dfs) else: df = _read_lidar_wind(fnames) df['time'] = ftimes if dropna: df = df.dropna() df = df.reset_index(drop=True) return df
def _read_wpr_kma(fname): from . import util with open(fname) as f: _ = f.readline() data = f.readline() split = data.split('#') df = pd.DataFrame({ 'Alt': np.float_(split[2:-1:7]), 'WD': np.float_(split[3::7]), 'WS': np.float_(split[4::7]), 'U': np.float_(split[5::7]), 'V': np.float_(split[6::7]), 'Uknot': util.ms2knot(np.float_(split[5::7])), 'Vknot': util.ms2knot(np.float_(split[6::7])), 'time': None }) return df
[docs]def read_wpr_kma(fnames, ftimes, dropna=True): """ Read KMA wind profiler. Examples --------- >>> df = kkpy.io.read_wpr_kma(fnames) Parameters ---------- fnames : str or array_like Filename(s) of KMA wind profiler data to read. ftimes : str or array_like Datetime(s) of KMA wind profiler data to read (see: kkpy.io.get_fname). dropna : boolean (optional) True if drop rows containing NaN values. Returns --------- df : pandas dataframe object Return KMA wind profiler data dataframe. Notes --------- Columns: - 'Alt': altitude from wind profiler [m] - 'WD': wind direction [deg] - 'WS': wind speed [m s-1] - 'U': u-wind [m s-1] - 'V': v-wind [m s-1] - 'Uknot': u-wind [knot] - 'Vknot': v-wind [knot] - 'time': measurement time [file-dependent, generally KST] """ dfs = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _df = _read_wpr_kma(fname) _df['time'] = ftimes[i_f] dfs.append(_df) df = pd.concat(dfs) else: df = _read_wpr_kma(fnames) df['time'] = ftimes if dropna: df = df.dropna() df = df.reset_index(drop=True) return df
def _read_pluvio_raw(fname): df = pd.read_csv( fname, skiprows=26, names=[ 'Date', 'Time', 'STIN', 'PRT', 'PA', 'PNRT', 'PATNRT', 'BRT', 'BNRT', 'T', 'HStatus', 'Status', 'dummy1', 'dummy2', 'dummy3'], delim_whitespace=True, ) df['time'] = pd.to_datetime( df['Date']+df['Time'], format='%Y/%m/%d%H:%M:%S' ) df = df.drop([ 'STIN','HStatus','Status', 'Date','Time', 'dummy1','dummy2','dummy3'], axis=1) return df
[docs]def read_pluvio_raw(fnames, dropna=True): """ Read pluvio raw data. Examples --------- >>> df = kkpy.io.read_pluvio(fnames) Parameters ---------- fnames : str or array_like Filename(s) of pluvio raw data to read. dropna : boolean (optional) True if drop rows containing NaN values. Returns --------- df : pandas dataframe object Return pluvio raw data dataframe. Notes --------- Columns: - 'PRT': Intensity RT [mm hr-1] - 'PA': Accu RT-NRT [mm] - 'PNRT': Accu NRT [mm] - 'PATNRT': Accu total NRT [mm] - 'BRT': Bucket RT [mm] - 'BNRT': Bucket NRT [mm] - 'T': Temperature of load cell [degC] - 'time': measurement time [file-dependent, generally UTC] See details at OTT manual (pluvio operating instructions). The variable names are based on the document number 70.020.000.B.E 04-0515. """ dfs = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _df = _read_pluvio_raw(fname) dfs.append(_df) df = pd.concat(dfs) else: df = _read_pluvio_raw(fnames) if dropna: df = df.dropna() df = df.reset_index(drop=True) return df
def _read_2dvd(filename): def _yydoy2dt(yydoy): return datetime.datetime.strptime(yydoy, '%y%j') def _2dvd_set_index_dt1(df, fname): yydoy = os.path.basename(fname)[1:6] dt_date = _yydoy2dt(yydoy) df['year'] = dt_date.year df['month'] = dt_date.month df['day'] = dt_date.day columns_date = ['year','month','day','hour','minute','second','millisecond'] df = df.set_index(pd.to_datetime(df[columns_date])) return df.drop(columns_date, axis=1) def _2dvd_set_index_dt2(df, fname): dt_date = datetime.datetime.strptime(os.path.basename(fname)[16:24], '%Y%m%d') df['year'] = dt_date.year df['month'] = dt_date.month df['day'] = dt_date.day df['hour'] = df['HHMM'] // 100 df['minute'] = df['HHMM'] % 100 df = df.drop('HHMM', axis=1) columns_date = ['year','month','day','hour','minute'] df = df.set_index(pd.to_datetime(df[columns_date])) return df.drop(columns_date, axis=1) def read_2dvd_asc1(fname): """ P R I N T O U T O F .\DATASET7\V20231_1.hyd TIME STAMP DIAM VOL VEL OBL AREA A>= A<= B>= B<= hr mi sc msc mm mm3 m/s mm2 03 20 36 563 0.40 0.03 0.67 0.27 10907.40 457 461 104 111 """ df = pd.read_csv( fname, header=None, delim_whitespace=True, skiprows=3, names=['hour','minute','second','millisecond','D_mm', 'VOL_mm3','VEL_ms','OBL','AREA_mm2', 'A1','A2','B1','B2'], engine='python', ) if isinstance(df.index, pd.MultiIndex): raise UserWarning('Failed to read the format') df = _2dvd_set_index_dt1(df, fname) return df def read_2dvd_asc2(fname): """ ^[[2J ==================================================================== | | | T H E 2 D - V I D E O - D I S T R O M E T E R | | | | HYD2ASC | | sample how to print some hydrometeor-parameters | | to an ASCII file | | | | (Feb. 09, 2004, internal version no. 7.002) | | | | | | JOANNEUM RESEARCH, GRAZ/AUSTRIA | ==================================================================== P R I N T O U T O F C:\2dvddata\before_201806\hyd\V18100_1.hyd 09 51 59 787 0.39 0.03 3.99 0.89 10977.31 540 544 319 322 """ df = pd.read_csv( fname, header=None, delim_whitespace=True, skiprows=20, skipfooter=1, names=['hour','minute','second','millisecond','D_mm', 'VOL_mm3','VEL_ms','OBL','AREA_mm2', 'A1','A2','B1','B2'], engine='python', ) if isinstance(df.index, pd.MultiIndex): raise UserWarning('Failed to read the format') df = _2dvd_set_index_dt1(df, fname) return df def read_2dvd_asc3(fname): """ TYPE_SNO printout of V17340_1.sno TIME STAMP DIAM VOL VEL OBL AREA A>= A<= B>= B<= wid_A obl_A wid_B obl_B hr mi sc msc mm mm3 m/s mm2 00 01 42 891 3.97 32.7610 0.92 0.00 9819.97 343 380 11 49 4.36 0.76 5.86 0.59 """ df = pd.read_csv( fname, header=None, delim_whitespace=True, skiprows=3, names=['hour','minute','second','millisecond','D_mm', 'VOL_mm3','VEL_ms','OBL','AREA_mm2', 'A1','A2','B1','B2','WA','OA','WB','OB'], engine='python', ) if isinstance(df.index, pd.MultiIndex): raise UserWarning('Failed to read the format') df = _2dvd_set_index_dt1(df, fname) return df def read_2dvd_rho1(fname): """ HOUR MINUTE SEC MSEC [UTC] APPARENT_DIAMETER VELOCIY DENSITY WA HA WB HB Deq 0139 0.4257243 1.1390000 0.7007125 0.0109990 0.5010000 0.3910000 0.5010000 0.4330000 0.4250000 """ df = pd.read_csv( fname, delim_whitespace=True, names=['HHMM', 'Dapp_mm', 'VEL_ms', 'Rho_gcm3', 'AREA_m2', 'WA', 'HA', 'WB', 'HB', 'D_mm'], skiprows=1 ) if isinstance(df.index, pd.MultiIndex): raise UserWarning('Failed to read the format') df['AREA_mm2'] = df['AREA_m2'] * 1e6 df = df.drop('AREA_m2', axis=1) df = _2dvd_set_index_dt2(df, fname) return df try: df = read_2dvd_asc1(filename) except: try: df = read_2dvd_asc2(filename) except: try: df = read_2dvd_asc3(filename) except: try: df = read_2dvd_rho1(filename) except: raise UserWarning(f'Cannot read this kind of format: {filename}') return df
[docs]def read_2dvd(fnames, verbose=False): """ Read 2DVD ascii file. Examples --------- >>> df_2dvd = kkpy.io.read_2dvd(fnames) Parameters ---------- fnames : str or array_like Filename(s) of 2DVD to read. verbose : bool, optional True if print reading status Returns --------- df : pandas dataframe object Return 2dvd dataframe. """ import pandas as pd dfs = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): if verbose: print(i_f, fname) _df = _read_2dvd(fname) dfs.append(_df) df = pd.concat(dfs) else: df = _read_2dvd(fnames) return df
def _read_wxt520(fname): import pandas as pd from . import util df = pd.read_csv(fname) df = df.set_index(pd.to_datetime(df['Timestamp'], format='%Y/%m/%d %H:%M:%S.%f')) df = df.drop(['Timestamp','TZ','Hail Accum. (hits/cm2)'], axis=1) df.columns = ['WD', 'WS', 'MWS', 'T', 'RH', 'P', 'R_ACC'] df['U'], df['V'] = util.wind2uv(wd=df['WD'], ws=df['WS']) return df
[docs]def read_wxt520(fnames, verbose=False): """ Read WXT520 ascii file. Examples --------- >>> df_wxt = kkpy.io.read_wxt520(fnames) Parameters ---------- fnames : str or array_like Filename(s) of WXT520 to read. Returns --------- df : pandas dataframe object Return wxt520 dataframe. """ import pandas as pd dfs = [] if isinstance(fnames, (list, np.ndarray)): for i_f, fname in enumerate(fnames): _df = _read_wxt520(fname) dfs.append(_df) df = pd.concat(dfs) else: df = _read_wxt520(fnames) return df