Quick Start¶
Example 1: one time series as a single column dataframe¶
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from io import StringIO
import importlib_resources
# import collect_result for handling the process
from foresee.scripts.main import collect_result
# 'basic_time_series_data.csv' file has only one column containing time series values
basic_time_series_data_txt = importlib_resources.files('foresee.data').joinpath('basic_time_series_data.csv').read_text()
ts_df = pd.read_csv(StringIO(basic_time_series_data_txt))
ts_df.head()
# present data here
# user defind parameters
# if input dataframe has more than one column, provide column name containing time series data
endog_colname = None
if len(ts_df.columns) > 1 and endog_colname is None:
raise ValueError('time series column name is required!!!')
# if uploading your own sample data, update the following parameters if needed
freq = 5
fcst_length = 10
model_list = ['ewm_model', 'fft', 'holt_winters', 'prophet', 'sarimax']
'''
avilable run types: 'all_models', 'best_model', 'all_best'
all_models: no holdout, no tuning, no model competition. return results for all models
best_model: compare models forecast accuracy and return the result of the best model
all_best: compute forecast accuracy for all models and return the result for all models
'''
run_type = 'all_models'
# if comparing models results, holdout length is required
if run_type == 'all_models':
holdout_length = None
else:
holdout_length = 20
# we are working with one time series and no date-time column so time series id and date-time column name are set to None.
gbkey = None
ds_column = None
tune = False
# we are fitting one time series in this example so no need to parallelize.
fit_execution_method = 'non_parallel'
'''
result: dataframe containing fitted values and future forecasts
fit_results_list: list of dictionaries containing fitted values, forecasts, and errors (useful for debuging)
'''
result, fit_result_list = collect_result(
ts_df.copy(),
endog_colname,
gbkey,
ds_column,
freq,
fcst_length,
run_type,
holdout_length,
model_list,
fit_execution_method,
tune
)
result.head()
# present data here
Example 2: multiple time series as a dataframe with a time series id column¶
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from io import StringIO
import importlib_resources
# import main from foresee.scripts
from foresee.scripts import main
# upload sample time-series dataframe with columns(id, date_stamp, y)
test_data_light_txt = importlib_resources.files('foresee.data').joinpath('test_data_light.csv').read_text()
ts_df = pd.read_csv(StringIO(test_data_light_txt))
ts_df['date_stamp'] = pd.to_datetime(ts_df['date_stamp'])
ts_df.head()
# user defind parameters
# time series values column name: required if input dataframe has more than one column
endog_colname = 'y'
if len(ts_df.columns) > 1 and endog_colname is None:
raise ValueError('time series column name is required!!!')
# time series frequency
freq = 5
# out of sample forecast length
fcst_length = 10
# available forecasting models
model_list = ['ewm_model', 'fft', 'holt_winters', 'prophet', 'sarimax']
# avilable run types: 'best_model', 'all_best', 'all_models'
run_type = 'all_best'
# if comparing models (run_type in 'best_model' or 'all_best') then holdout length is required
if run_type == 'all_models':
holdout_length = None
else:
holdout_length = 20
# fit-forecast computations can be done in parallel for each time series. requires dask library!!!
# for sequential processing set fit_execution_method to 'non_parallel'
fit_execution_method = 'parallel'
# since we have two time series in this dataset, time series id column name and date-time column name are required.
gbkey = 'id'
ds_column = 'date_stamp'
tune = True
'''
result: dataframe containing fitted values and future forecasts
fit_results_list: list of dictionaries containing fitted values, forecasts, and errors (useful for debuging)
'''
result, fit_result_list = main.collect_result(
ts_df.copy(),
endog_colname,
gbkey,
ds_column,
freq,
fcst_length,
run_type,
holdout_length,
model_list,
fit_execution_method,
tune
)
result.head()
Example 3: run forecasts with UI app¶
This simple UI accepts csv file for input data and has check lists to set neccessary parameters. Application runs at this url: http://localhost:8050/dash
Excecute the following block of code then navigate to above URL, fill out time series information, and drop your file to be processed. Results will be returned as a table and can be downloaded.
import flask
import dash
server = flask.Flask(__name__)
@server.route('/')
def index():
return 'Flask root.'
from foresee.webapp.dash_app import app
if __name__ == '__main__':
app.run_server()