Commit 9be010df authored by andy.disch's avatar andy.disch
Browse files

initial commit

parent 8334e65e
This diff is collapsed.
clear all;
close all;
%% File prep
Site = 5;
testfolder = ''; % Results will be saved in ./Data/Site[Site][testfolder]/
filepath = strcat('./Data/');
mkdir(filepath);
filepath = strcat('./Data/', 'Site', num2str(Site,'%02.0f'), testfolder, '/');
mkdir(filepath);
addpath('./DryWeather');
%import data and aggregating if necessary
TTF = readtimetable('C:/Users/user/Desktop/RICHI/data/rafis_flowrate_liter_s.csv');
TTF2 = retime(TTF,'regular','mean','TimeStep',minutes(5));
F = timetable2table(TTF2);
F = fillmissing(F,'previous');
TTR = readtimetable('C:/Users/user/Desktop/RICHI/data/rafis_precipitation_mm_h.csv');
TTR2 = retime(TTR,'regular','mean','TimeStep',minutes(5));
R = timetable2table(TTR2);
R = fillmissing(R,'previous');
%for histData
t_flow = F{:,1}.';
Flow = F{:,2}.';
t_rain = R{:,1}.';
Rain1 = R{:,2}.';
Starttime = datetime('2020-02-15 00:00:00');
Endtime = datetime('2020-10-01 23:55:00');
Time = t_flow(t_flow > Starttime & t_flow < Endtime);
Flow = Flow(t_flow > Starttime & t_flow < Endtime);
Rain1 = Rain1(t_rain > Starttime & t_rain < Endtime);
Rain1(Rain1 <0) = 0;
FlowTime = convertTo(Time,'posixtime');
Rain2 = zeros(1,length(Rain1)); %create zero-vector (in case of other data source)
Rain3 = zeros(1,length(Rain1));
histData = strcat('./Examples/Site',num2str(Site),'_hist.mat'); %choose a proper folder to save
save(histData,'Flow','FlowTime','Rain1','Rain2','Rain3','Site','Starttime','Endtime');
% for predData
t_flow = F{:,1}.';
Flow = F{:,2}.';
t_rain = R{:,1}.';
Rain1 = R{:,2}.';
Starttime = datetime('2020-10-02 00:00:00');
Endtime = datetime('2021-02-11 23:55:00');
Time = t_flow(t_flow > Starttime & t_flow < Endtime);
Flow = Flow(t_flow > Starttime & t_flow < Endtime);
Rain1 = Rain1(t_rain > Starttime & t_rain < Endtime);
Rain1(Rain1 <0) = 0;
FlowTime = convertTo(Time,'posixtime');
Rain2 = zeros(1,length(Rain1)); %create zero-vector (in case of other data source)
Rain3 = zeros(1,length(Rain1));
predData = strcat('./Examples/Site',num2str(Site),'_pred.mat');
save(predData,'Flow','FlowTime','Rain1','Rain2','Rain3','Site','Starttime','Endtime');
clear all;
close all;
%% User inputs
Site = 5;
diurnal_lookback = 4; % Enter one diurnal/dry-weather lookback period (in months)
testfolder = ''; % Results will be saved in ./Data/Site[Site][testfolder]/
Fs = 288; % Sensor sampling frequency (number per day)
% Threshold criteria
slope = 10; % Threshold for slope of a dry-weather diurnal pattern (trough-to-trough)
timeSlack = 3; % Threshold for length of a dry-weather diurnal pattern (hours +/- 24 hours)
stdMax = 8; % Threshold for maximum standard deviation of a dry-weather diurnal pattern
stdMin = 0.25; % Threshold for minimum standard deviation of a dry-weather diurnal pattern
% Dry-weather section for learning hyperparameters
starti = 1; % Start index
chunk = round(diurnal_lookback*Fs*365/12);
endi = starti+chunk;
section = starti:endi;
%% File load
load(strcat('./Examples/', 'Site', num2str(Site),'_hist.mat')); % Historical data
filepath = strcat('./Data/');
mkdir(filepath);
filepath = strcat('./Data/', 'Site', num2str(Site,'%02.0f'), testfolder, '/');
mkdir(filepath);
addpath('./DryWeather');
all_datetime = datetime(FlowTime,'ConvertFrom','epochtime','Epoch','1970-01-01');
%% GP prep
run('./gpml-matlab-v3.6-2015-07-07/startup.m')
k2 = @covPeriodic;
k3 = @covRQard;
covfunc = {@covSum, {k2, k3}};
%% Flow filters
filter_diurnal_SOS = load('./Filter/diurn_butter_SOS.mat');
filter_diurnal_G = load('./Filter/diurn_butter_G.mat');
%% Remove high frequency noise
no_noise = smoothts(Flow,'g',300,100); %Gaussian smoother if necessary
%% Extract diurnal
diurnals = filtfilt(filter_diurnal_SOS.SOS,filter_diurnal_G.G,Flow);
%% Data to be used
timeSection = all_datetime(section);
FlowSection = FlowTime(section);
diurnalSection = diurnals(section);
all_good = selectTrainingDays(FlowTime, diurnals, Fs, slope, timeSlack, stdMin, stdMax);
good_i = selectTrainingDays(FlowSection, diurnalSection, Fs, slope, timeSlack, stdMin, stdMax);
all_good_ind = [];
for i = 1:length(all_good)
all_good_ind = [all_good_ind, all_good(i,1):all_good(i,2)];
end
good_ind = [];
for i = 1:length(good_i)
good_ind = [good_ind, good_i(i,1):good_i(i,2)];
end
figure(1)
hold on
plot(all_datetime, Flow)
plot(all_datetime, diurnals,'k')
plot(all_datetime(all_good_ind),diurnals(all_good_ind),'linewidth',2);
plot(timeSection(good_ind), diurnalSection(good_ind));
xtickformat ( 'yyyy-MM-dd')
ylabel('flowrate [liter/s]')
legend('Raw data','all diurnals','good diurnals','selection')
set(gca,'FontSize',14)
disp('Check good diurnals which will be used for training; adjust criteria thresholds as needed')
%pause
%% Run GP
hyp = [];
hyp.cov = [0 0 1 0 0 -1]; hyp.lik = -2;
y = diurnalSection(good_ind)';
x = timeToWeekdayDecimal(timeSection(good_ind)');
step = floor(length(y)/3500);
if step == 0
step = 1;
end
y = y(1:step:length(x));
x = x(1:step:length(x));
x_star = timeToWeekdayDecimal(timeSection)';
[hyp fX i] = minimize(hyp, @gp, -100, @infExact, [], covfunc, @likGauss, x, y);
[mu s2] = gp(hyp, @infExact, [], covfunc, @likGauss, x, y, x_star);
%% Plot results for final check
figure(1)
plot(timeSection,mu)
legend('filtered raw data','all diurnals','good diurnals','selection','results')
%% Save hyperparameters
no_noise = no_noise(section);
diurnals = diurnalSection;
all_datetime = timeSection;
FlowTime = FlowSection;
HyperFile = strcat(filepath,'HypInit',num2str(Site,'%02.0f'),'.mat');
save(HyperFile,'hyp','slope','timeSlack','stdMax','stdMin');
# InfiltrationDetection
# Infiltration
# -*- coding: utf-8 -*-
"""
Created on Wed May 12 2021
@author: M.Zhu
Module of identifying rain-affected time and independent rain events.
This module aims to identify rain-affected time and index independent rain
events for further anaylsis. A function of filling missing data and filtering
out negative values is also provided.
Typical usage examples:
Rain = pd.read_csv('C:/Users/user/Data/data.csv',
sep=',', parse_dates=[0], float_precision='high')
Rain_filtered = data_filt(Rain,60,'2020-05-20','2020-07-31')
rain_or_not = rainy_t(Rain_filtered,'1D', 1, 2)
rain_or_not = rainy_t(Rain_filtered,'30T', 0.5, 48)
events = event_id(Rain_filtered, 0.01, 24)
"""
def data_filt(ts:DataFrame, unit_t:int, start_time:str, end_time:str
)-> DataFrame:
"""Minute-based function to fill data gap and filter error
The function is built up based on minute resolution since rainfall and flow
measurement are usually recorded with 1-10 min intervals. Sampling time
will be checked automatically, and the negative values will be replaced
into 0. All the values will be transfromed into depth/volume per sampling
timestep in favor of aggregation.
Args:
ts: imported dataframe of rainfall or flow measurement
unit_t: unit time of the data; e.g. "unit_t" of "mm/h" is 60 (min)
start_time: start datetime of interest; e.g. '2020-02-15'
end_time: end datetime of interest; e.g. '2021-02-15'
Returns:
ts_cut: dataframe with index of 'time' and columns name of 'value';
the unit of 'value' is depth/volume per sampling interval/timestep;
e.g. (mm/5min) with sampling time of 5 min.
"""
ts.columns = ['time', 'value']
ts.set_index('time',inplace = True)
ts_cut = ts[start_time:end_time].copy()
# check sampling time
freq = ts_cut.index.to_series().diff().astype('timedelta64[m]')
f = freq.min()
# filter out negative values and transform unit
ts_cut.loc[ts_cut.value < 0, 'value'] = 0
ts_cut = ts_cut.resample(str(f)+ 'T').pad()
ts_cut = ts_cut * f / unit_t
return ts_cut
def rainy_t(
ts:DataFrame, resolution:str, threshold:int or float, aff_time:int
)-> DataFrame:
"""Identification of rain influenced time
Args:
ts: dataframe of rainfall; preferably uses the outcome of "data_filt"
function; with time index and value unit of depth per sampling
timestep.
resolution: data resolution of interest, e.g. '1H', '1D','30T'.
threshold: rainfall threshold to identify rainy or not (mm/resolution).
aff_time: rain-affected timestep corresponding with"resolution",
e.g. 4(H), 2(D), 8(*30T).
Returns:
ts_t: time indexed dataframe and value of 0 (not rainy) or 1 (rainy).
"""
ts_t = ts.resample(resolution).sum()
rt = ts_t > threshold
aff_time = aff_time + 1
eva_str = rt
for t in range(1,aff_time):
eva_str = eva_str | rt.shift(t)
ts_t[eva_str] = 1
ts_t[~eva_str] = 0
return ts_t
def event_id(ts:DataFrame, threshold:int or float, gap_mins:int)-> DataFrame:
"""Identification of independent rain events
Args:
ts: dataframe of rainfall; preferably uses the outcome of "data_filt"
function; with time index and value unit of depth per sampling
timestep.
threshold: rainfall threshold to identify rainy or not (mm/timestep).
gap_mins: minimum duration of dry periods between 2 independent rain
events (minutes).
Returns:
events: dataframe of rainfall with columns of "time", "value", and
"event_id" to indicate certain timestep of which rain events it
belongs to.
"""
freq = ts.index.to_series().diff().astype('timedelta64[m]')
f = freq.min()
gap = int(gap_mins / f)
events = ts.reset_index()
events['event_id'] = 0
a = 1
c = len(ts)
for i in range(0,c):
if events.value.iloc[i] > threshold:
b = i + gap
if b < c:
events.iloc[i:b, events.columns.get_loc('event_id')] = a
else:
events.iloc[i:c, events.columns.get_loc('event_id')] = a
idx = i + 1
b = b + 1
if events.value.iloc[idx:b].max() > threshold:
a = a
else:
a = a + 1
return events
# -*- coding: utf-8 -*-
"""
Created on Thu May 20 12:25:08 2021
@author: user
"""
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 25 2021
@author: M.Zhu
Module of data import and pre-processing
This module aims to provide examples of importing and processing raw data.
Detailed setting such as resampling interval could be adjusted according to
data sources.
"""
import pandas as pd
import rain_flow_analysis as rfa
ts_f = pd.read_csv('C:/Users/user/Desktop/RICHI/data/rafis_flowrate_liter_s.csv',
sep=';', parse_dates=[0], float_precision='high')
"""
DataFrame: read flow data from csv file
"""
ts_f_2 = rfa.data_filt(ts_f,1/60,'2020-02-15','2021-02-11')
"""
DataFrame: pre-processing of flow data.
unit "l/s" is transformed to "l/sampling timestep" in favor of aggregation.
"""
ts_f_3 = (ts_f_2.resample('5T').sum())/(1000 * 300)
"""
DataFrame: aggregation to resolution of interest.
Here is an example of aggregating to 5-minute resoultion.
Unit transformation to "m3/s" in favor of optimization.
"""
ts_r = pd.read_csv('C:/Users/user/Desktop/RICHI/data/rafis_precipitation_mm_h.csv',
sep=';', parse_dates=[0], float_precision='high')
"""
DataFrame: read rain data from csv file
"""
ts_r_2 = rfa.data_filt(ts_r,60,'2020-02-15','2021-02-11')
"""
DataFrame: pre-processing of flow data.
unit "mm/h" is transformed to "mm/sampling timestep" in favor of aggregation.
"""
ts_r_3 = ts_r_2.resample('5T').sum()
"""
DataFrame: aggregation to resolution of interest.
Here is an example of aggregating to 5-minute resoultion (mm/5min).
"""
q_obs = rfa.flow_separate(ts_r_3, ts_f_3, 1, 2)
q_obs = q_obs.dropna()
"""
DataFrame: flow without GWI and FS components, i.e. RDII.
Here is an example with daily rainfall threshold of 1 mm/d and rain affected
days of 2.
"""
flow_rain = q_obs.merge(ts_r_3, on = 'time', how = 'left')
flow_rain.columns = ['flow', 'rain']
"""
DataFrame: compile flow and rain data after processing while ensure them to have
same sampling timestep and length.
Note:
It is also possible to further select a short period of interest or specific
rain events:
flow_rain = flow_rain['2020-08-29':'2020-9-10']
"""
flow_rain = flow_rain.reset_index()
Qobs = flow_rain.flow.to_numpy()
Rain = flow_rain.rain.to_numpy()
"""
Qobs(numpy.ndarray): flow data formatted in favor of hydrological model (m3/s)
Rain(numpy.ndarray): rain data formatted in favor of hydrological model (mm/dt)
"""
# -*- coding: utf-8 -*-
"""
Created on Thu May 13 2021
@author: M.Zhu
One-dimension lumped hydrological model with Muskingum routing.
The Muskingum routing method uses mass balance approach to route an inflow
hydrograph.The lumped hydrological model includes routing of rain-derived
direct inflow to sewer inlet, routing of rain-induced infiltration to sewer
inlet, and routing of sewer flow from inlet to outlet.
A wrapped-up version of hydrological model is built in favor of optimization.
"""
import numpy as np
def muskingum(Q_in:numpy.ndarray,dt: int,K: float,X: float,V0: float
) -> numpy.ndarray:
"""1-D simplified numerical Muskingum routing with ONE subreach.
Args:
Q_in: inflow/stormwater at each timestep (m3/s).
dt: sampling time (second).
K: flow travel time through the reach (second).
X: dimensionless coefficient in range of 0.0~0.5;
0.0 means storage within the reach is computed as linear reservoir;
Typical value is 0.2.
V0: intial storage volume within the reach(m3).
For numerical stability: K>=dt, X<=dt/(2*K).
The storage volume 'Vol'(m3) would be updated at each timestep.
Returns:
Q_out: outflow at corresponding timestep (m3/s).
"""
nT = np.size(Q_in)
# initialize the output shape
Q_out = np.zeros_like(Q_in,dtype=float)
Vol = np.zeros_like(Q_in,dtype=float)
# coefficients Cx and Cy for muskingum routing
Cx = (dt/2 - K * X)/ (dt/2 + K * (1-X))
Cy = 1 / (dt/2 + K * (1-X))
for i in range(0, nT):
if i == 0 :
iVV = V0
else:
iVV = Vol[i-1]
Q_out[i] = Cx * Q_in[i] + Cy * iVV
Vol[i] = (Q_in[i] - Q_out[i])* dt + iVV
return Q_out
def hydro_mdl(
Rain:numpy.ndarray,dt:int,Area:int or float,c_dir:float,c_inf:float,
K_dir:float,K_inf:float,K_sew:float,X:float,V0:float
)-> tuple of numpy.ndarray:
""" The lumped hydrological model.
Rational formula (Q=c*depth*A) and Muskingum routing are used.
(CityDrain manual, 2007)
Args:
Rain: rainfall depth per timestep (mm/dt)
dt: sampling time/timestep (second).
Area: catchment drainage area (m2).
c_dir: runoff coefficient; fraction of rain that produces direct inflow.
c_inf: runoff coefficient; fraction of rain that prduces infiltration.
K_dir: direct inflow's travel time to sewer inlet (second).
K_inf: infiltration flow's travel time to sewer inlet (second).
K_sew: flow travel time through the reach (second).
X: dimensionless coefficient in range of 0.0~0.5;
V0: intial storage volume within the reach (m3)
Returns:
Qsew_out: combined outflow at sewer outlet (m3/s)
Qdir_out: routed inflow at sewer inlet (m3/s)
Qinf_out: routed infiltration flow at sewer inlet (m3/s)
"""
Qdir_in = Rain * Area * c_dir /(1000 * dt)
Qdir_out = muskingum(Qdir_in, dt, K_dir, X, V0)
Qinf_in = Rain * Area * c_inf /(1000 * dt)
Qinf_out = muskingum(Qinf_in, dt, K_inf, X, V0)
Qsew_in = Qdir_out + Qinf_out
Qsew_out = muskingum(Qsew_in, dt, K_sew, X, V0)
return Qsew_out, Qdir_out, Qinf_out
def mdl_wrap(
U:numpy.ndarray,Rain:numpy.ndarray,dt:int,Area:int or float,X:float,V0:float
) -> numpy.ndarray:
"""A wrapped function to help out with the optimisation.
Args:
U: parameter set including c_dir, c_inf, K_dir, K_inf, and K_sew
Rain: rainfall depth per timestep (mm/dt)
dt: sampling time/timestep (second).
Area: catchment drainage area (m2).
X: dimensionless coefficient in range of 0.0~0.5;
V0: intial storage volume within the reach (m3)
Returns:
Qtot: combined outflow at sewer outlet (m3/s)
"""
[c_dir,c_inf,K_dir,K_inf,K_sew] = U
Qtot, Qdir, Qinf = hydro_mdl(Rain,dt,Area,c_dir,c_inf,K_dir,K_inf,K_sew,X,V0)
return Qtot
# -*- coding: utf-8 -*-
"""
Created on Thu May 13 2021
@author: M.Zhu
Module of optimization using non-linear least square algorithm.
This auto-optimization program aims to calibrate parameters of hydrological
model with Muskingum routing.
Parameters to be optimized are: c_dir,c_inf,K_dir,K_inf,K_sew. The constraints
of the optimized parameters can be adjusted based on catchment characteristics,
as well as fixed parameters such as drainage area.
This module required numpy,scipy.optimize, and timeit package.
"""
import numpy as np
from scipy.optimize import least_squares
import timeit
import hydro_model as hmd
from data_input import Rain, Qobs
dt = 300
Area = 500000
X = 0
V0 = 0
"""
Fixed parameters:
dt(int): sampling timestep (second).
Area(int or float): catchment drainage area (m2).
X(float): dimensionless coefficient of Muskingum routing [0~0.5].
V0 (float): intial storage volume of Muskingum routing (m3).
"""
rng = np.random.default_rng(seed = 3)
IC = rng.random(5)
c_dir = IC[0]
c_inf = IC[1]
K_dir = 1000*IC[2]
K_inf = 100000*IC[3]
K_sew = 1000*IC[4]
U0 = np.array([c_dir,c_inf,K_dir,K_inf,K_sew])
lb = np.array([0.01,0,0,0,0])
ub = np.array([1,1,np.inf,np.inf,np.inf])
"""
Constructs initial condition of parameters to be optimized.
rng: random number generator.
IC(numpy.ndarray, float): array of random floats in the half-open interval [0.0, 1.0).
c_dir(float): initial condition of direct rain-runoff coefficient (0.0, 1.0).
c_inf(float): initial condition of RDI runoff coefficient (0.0, 1.0).
K_dir(float): initial condition of direct inflow travel time (second).
K_inf(float): initial condition of infiltration flow travel time (second).
K_sew(float): initial condition of combined sewer flow travel time (second).
U0(numpy.ndarray, float): array of initial set of parameters to be optimized.
lb(numpy.ndarray, float): array of lower bound of parameters to be optimized.
ub(numpy.ndarray, float): array of upper bound of parameters to be optimized.
"""