Idea: learn about the way that macroeconomic data is published and revised in releases
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import datetime
from data_apps_aws.sql import get_db_engine, get_db_data
from data_apps_aws.src_data_pipes.fred_config import get_nowcast_ticker_list
print(f'The time at execution is: {datetime.datetime.now()}')
The time at execution is: 2021-03-28 14:02:17.353464
plt.rcParams['figure.figsize'] = 14, 6
sns.set()
db_con = get_db_engine('econ_data_read')
Get all release data for multiple series
ticker_list = get_nowcast_ticker_list()
tuple_ticker_list = tuple(ticker_list)
query=f"""
SELECT *
FROM archival_data
WHERE ticker in {tuple_ticker_list}
"""
all_ticker_release_data = get_db_data(query, db_con)
all_ticker_release_data.tail()
| realtime_start | date | value | ticker | last_updated_here | |
|---|---|---|---|---|---|
| 148131 | 2021-03-26 | 2021-01-01 | 114.622 | PCEPILFE | 2021-03-26 13:01:34 |
| 148132 | 2021-03-26 | 2021-02-01 | 15681.000 | DSPIC96 | 2021-03-26 13:01:34 |
| 148133 | 2021-03-26 | 2021-02-01 | 13119.200 | PCEC96 | 2021-03-26 13:01:35 |
| 148134 | 2021-03-26 | 2021-02-01 | 112.740 | PCEPI | 2021-03-26 13:01:34 |
| 148135 | 2021-03-26 | 2021-02-01 | 114.720 | PCEPILFE | 2021-03-26 13:01:34 |
Min / max observation dates per ticker:
min_max_obs_dates = all_ticker_release_data.groupby('ticker')['date'].agg(['min', 'max'])
min_max_obs_dates
| min | max | |
|---|---|---|
| ticker | ||
| A261RX1Q020SBEA | 1947-01-01 | 2020-10-01 |
| BOPTEXP | 1992-01-01 | 2021-01-01 |
| BOPTIMP | 1992-01-01 | 2021-01-01 |
| BUSINV | 1980-12-01 | 2021-01-01 |
| CPIAUCSL | 1947-01-01 | 2021-02-01 |
| CPILFESL | 1957-01-01 | 2021-02-01 |
| DGORDER | 1958-02-01 | 2021-02-01 |
| DSPIC96 | 1959-01-01 | 2021-02-01 |
| GACDFSA066MSFRBPHI | 1968-05-01 | 2021-03-01 |
| GACDISA066MSFRBNY | 2001-07-01 | 2021-03-01 |
| GDPC1 | 1947-01-01 | 2020-10-01 |
| HOUST | 1959-01-01 | 2021-02-01 |
| HSN1F | 1963-01-01 | 2021-02-01 |
| INDPRO | 1919-01-01 | 2021-02-01 |
| IQ | 1983-09-01 | 2021-02-01 |
| IR | 1982-09-01 | 2021-02-01 |
| JTSJOL | 2000-12-01 | 2021-01-01 |
| PAYEMS | 1939-01-01 | 2021-02-01 |
| PCEC96 | 1959-01-01 | 2021-02-01 |
| PCEPI | 1959-01-01 | 2021-02-01 |
| PCEPILFE | 1959-01-01 | 2021-02-01 |
| PERMIT | 1960-01-01 | 2021-02-01 |
| PPIFIS | 2009-11-01 | 2021-02-01 |
| RSAFS | 1992-01-01 | 2021-02-01 |
| TCU | 1967-01-01 | 2021-02-01 |
| TTLCONS | 1993-01-01 | 2021-01-01 |
| ULCNFB | 1947-01-01 | 2020-10-01 |
| UNRATE | 1948-01-01 | 2021-02-01 |
| WHLSLRIMSA | 1992-01-01 | 2021-01-01 |
Min / max release dates per ticker
min_max_release_dates = all_ticker_release_data.groupby('ticker')['realtime_start'].agg(['min', 'max'])
min_max_release_dates
| min | max | |
|---|---|---|
| ticker | ||
| A261RX1Q020SBEA | 2013-02-28 | 2021-03-25 |
| BOPTEXP | 2010-04-13 | 2021-03-05 |
| BOPTIMP | 2010-04-13 | 2021-03-05 |
| BUSINV | 1996-11-15 | 2021-03-16 |
| CPIAUCSL | 1972-07-21 | 2021-03-10 |
| CPILFESL | 1996-12-12 | 2021-03-10 |
| DGORDER | 1999-08-04 | 2021-03-24 |
| DSPIC96 | 1979-12-18 | 2021-03-26 |
| GACDFSA066MSFRBPHI | 2015-04-16 | 2021-03-18 |
| GACDISA066MSFRBNY | 2014-03-17 | 2021-03-15 |
| GDPC1 | 1991-12-04 | 2021-03-25 |
| HOUST | 1960-07-21 | 2021-03-17 |
| HSN1F | 1999-07-30 | 2021-03-23 |
| INDPRO | 1927-01-26 | 2021-03-16 |
| IQ | 2010-03-16 | 2021-03-16 |
| IR | 2010-03-16 | 2021-03-16 |
| JTSJOL | 2010-08-11 | 2021-03-11 |
| PAYEMS | 1955-05-06 | 2021-03-05 |
| PCEC96 | 1979-11-19 | 2021-03-26 |
| PCEPI | 2000-08-01 | 2021-03-26 |
| PCEPILFE | 2000-08-01 | 2021-03-26 |
| PERMIT | 1999-08-17 | 2021-03-23 |
| PPIFIS | 2014-02-19 | 2021-03-12 |
| RSAFS | 2001-06-13 | 2021-03-16 |
| TCU | 1996-11-15 | 2021-03-16 |
| TTLCONS | 2011-07-01 | 2021-03-01 |
| ULCNFB | 1968-05-27 | 2021-03-04 |
| UNRATE | 1960-03-15 | 2021-03-05 |
| WHLSLRIMSA | 2013-06-13 | 2021-03-16 |
Number of releases per ticker
n_releases = all_ticker_release_data.groupby('ticker')['realtime_start'].nunique()
n_releases
ticker A261RX1Q020SBEA 75 BOPTEXP 131 BOPTIMP 131 BUSINV 295 CPIAUCSL 601 CPILFESL 308 DGORDER 533 DSPIC96 501 GACDFSA066MSFRBPHI 76 GACDISA066MSFRBNY 85 GDPC1 354 HOUST 727 HSN1F 260 INDPRO 1153 IQ 133 IR 133 JTSJOL 128 PAYEMS 794 PCEC96 498 PCEPI 250 PCEPILFE 250 PERMIT 502 PPIFIS 89 RSAFS 253 TCU 315 TTLCONS 116 ULCNFB 390 UNRATE 733 WHLSLRIMSA 94 Name: realtime_start, dtype: int64
Number of observations per release
n_obs_per_release = all_ticker_release_data.groupby(['ticker', 'realtime_start'])['date'].count().to_frame(name='n_obs').reset_index()
n_obs_per_release
| ticker | realtime_start | n_obs | |
|---|---|---|---|
| 0 | A261RX1Q020SBEA | 2013-02-28 | 263 |
| 1 | A261RX1Q020SBEA | 2013-03-28 | 1 |
| 2 | A261RX1Q020SBEA | 2013-05-30 | 2 |
| 3 | A261RX1Q020SBEA | 2013-06-26 | 1 |
| 4 | A261RX1Q020SBEA | 2013-07-31 | 265 |
| ... | ... | ... | ... |
| 9903 | WHLSLRIMSA | 2020-11-17 | 3 |
| 9904 | WHLSLRIMSA | 2020-12-16 | 3 |
| 9905 | WHLSLRIMSA | 2021-01-15 | 1 |
| 9906 | WHLSLRIMSA | 2021-02-17 | 5 |
| 9907 | WHLSLRIMSA | 2021-03-16 | 1 |
9908 rows × 3 columns
n_obs_per_release.pivot(index='realtime_start', columns='ticker', values='n_obs').fillna(0).plot()
plt.title('Observations per release')
plt.show()
this_real_time_date = '2016-07-19'
max_release_per_obs_date = all_ticker_release_data.query('realtime_start <= @this_real_time_date').groupby(['date', 'ticker'])['realtime_start'].max().to_frame('realtime_start').reset_index()
current_data_view = max_release_per_obs_date.merge(all_ticker_release_data)
current_data_view
| date | ticker | realtime_start | value | last_updated_here | |
|---|---|---|---|---|---|
| 0 | 1919-01-01 | INDPRO | 2016-04-01 | 5.0585 | 2021-03-26 13:01:33 |
| 1 | 1919-02-01 | INDPRO | 2016-04-01 | 4.8349 | 2021-03-26 13:01:33 |
| 2 | 1919-03-01 | INDPRO | 2016-04-01 | 4.6952 | 2021-03-26 13:01:33 |
| 3 | 1919-04-01 | INDPRO | 2016-04-01 | 4.7790 | 2021-03-26 13:01:33 |
| 4 | 1919-05-01 | INDPRO | 2016-04-01 | 4.8070 | 2021-03-26 13:01:33 |
| ... | ... | ... | ... | ... | ... |
| 15060 | 2016-06-01 | PPIFIS | 2016-07-14 | 110.7000 | 2021-03-26 13:01:34 |
| 15061 | 2016-06-01 | RSAFS | 2016-07-15 | 456978.0000 | 2021-03-26 13:01:33 |
| 15062 | 2016-06-01 | TCU | 2016-07-15 | 75.4000 | 2021-03-26 13:01:35 |
| 15063 | 2016-06-01 | UNRATE | 2016-07-08 | 4.9000 | 2021-03-26 13:01:33 |
| 15064 | 2016-07-01 | GACDISA066MSFRBNY | 2016-07-15 | 0.5500 | 2021-03-26 13:01:35 |
15065 rows × 5 columns
df_wide = current_data_view.pivot(index='date', columns='ticker', values='value')
df_wide.columns = df_wide.columns.values
df_wide = df_wide.loc[:, ticker_list]
df_wide.tail()
| PAYEMS | JTSJOL | CPIAUCSL | DGORDER | HSN1F | RSAFS | UNRATE | HOUST | INDPRO | PPIFIS | ... | PERMIT | TCU | BUSINV | IQ | GACDISA066MSFRBNY | PCEC96 | GACDFSA066MSFRBPHI | GDPC1 | ULCNFB | A261RX1Q020SBEA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | |||||||||||||||||||||
| 2016-03-01 | 143733.0 | 5670.0 | 237.920 | 228499.0 | 522.0 | 447907.0 | 5.0 | 1113.0 | 103.3557 | 109.5 | ... | 1077.0 | 74.8 | 1804490.0 | 118.1 | 0.62 | 11377.6 | 12.4 | NaN | NaN | NaN |
| 2016-04-01 | 143877.0 | 5845.0 | 238.890 | 235729.0 | 586.0 | 453397.0 | 5.0 | 1155.0 | 103.8246 | 109.7 | ... | 1130.0 | 75.2 | 1807046.0 | 118.6 | 9.56 | 11470.9 | -1.6 | NaN | NaN | NaN |
| 2016-05-01 | 143888.0 | 5500.0 | 239.410 | 230352.0 | 551.0 | 454359.0 | 4.7 | 1135.0 | 103.5110 | 110.1 | ... | 1136.0 | 74.9 | 1809981.0 | 120.0 | -9.02 | 11500.2 | -1.8 | NaN | NaN | NaN |
| 2016-06-01 | 144175.0 | NaN | 239.927 | NaN | NaN | 456978.0 | 4.9 | 1189.0 | 104.1342 | 110.7 | ... | 1153.0 | 75.4 | NaN | 120.9 | 6.01 | NaN | 4.7 | NaN | NaN | NaN |
| 2016-07-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | 0.55 | NaN | NaN | NaN | NaN | NaN |
5 rows × 29 columns
df_wide = df_wide.ffill(axis=1)
df_wide.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f133de6ca50>