# Verify this line runs correctly.
# Otherwise, go to s1_env_check.ipynb

import pandas as pd  # import the pandas library and give it the alias "pd"

df_gdp = pd.read_csv('../raw/gdp.csv',   # read a CSV file into a DataFrame; path is relative to this notebook
                     index_col='date',   # use the 'date' column as the row index instead of 0, 1, 2, ...
                     parse_dates=True)   # convert the index to datetime objects automatically

# Observe the data
df_gdp.tail(5)  # show the last 5 rows of the DataFrame

# YOUR CODE HERE: call pd.read_csv(...) with the stock file path, index_col='date', parse_dates=True
df_stock = pd.read_csv('../raw/sp500.csv', index_col='date', parse_dates=True)

df_stock.tail(5)  # show the last 5 rows to verify the data loaded correctly

df_ntl = pd.read_csv('../raw/data_ntl_shape.csv',  # read the nighttime lights CSV file
                     index_col='date',          # use the 'date' column as the row index
                     parse_dates=True)          # parse dates so Python recognizes them as dates

df_ntl.head(10)  # show the first 10 rows (.tail shows last; .head shows first)

# Double brackets [[ ]] select a subset of columns and return a DataFrame (single brackets return a Series)
# YOUR CODE HERE: put the column name you want to keep inside the inner brackets, e.g. df_ntl[['average']]
df_ntl = df_ntl[['ntl_mean']]
df_ntl.head(5)

# YOUR CODE HERE: call .plot() on df_ntl to visualize the series.
df_ntl.plot()

<Axes: xlabel='date'>

# YOUR CODE HERE: pass a frequency string inside resample(), e.g. resample('QS') for Quarter Start
# .mean() then averages all monthly values that fall within each quarter
df_ntl = df_ntl.resample('QS').mean()
df_ntl.tail(10)

# dropna removes rows with missing values (NaN)
# how='all' means: only drop a row if ALL its columns are NaN (keeps rows missing just one value)
# axis=0 means operate on rows (axis=1 would operate on columns)
df_ntl = df_ntl.dropna(how='all', axis=0)
df_ntl.tail(10)

# dropna() with no arguments defaults to how='any': drops a row if ANY column has NaN
df_ntl = df_ntl.dropna()
df_ntl.tail(5)

df_ntl.describe()  # returns count, mean, std, min, quartiles, and max for each numeric column

# YOUR CODE HERE: call .plot() on df_ntl to draw a line chart, e.g. df_ntl.plot()
df_ntl.plot()

<Axes: xlabel='date'>

# Calling .plot() on each DataFrame opens a separate chart for each series
# Expressions separated by commas inside a cell return a tuple of results — each plot displays inline
df_ntl.plot(), df_gdp.plot(), df_stock.plot()

(<Axes: xlabel='date'>, <Axes: xlabel='date'>, <Axes: xlabel='date'>)

df_stock = df_stock.resample('QS').mean()  # group daily/monthly data into quarters (Quarter Start) and average
df_stock.tail(10)

# .merge() joins two DataFrames — like a SQL JOIN
# left_index=True, right_index=True means use the row index (date) as the join key on both sides
df_merge = df_ntl.merge(df_gdp, left_index=True, right_index=True)
df_merge = df_merge.merge(df_stock, left_index=True, right_index=True)  # chain a second merge onto the result
df_merge

df_gdp.tail(5)

# pd.DateOffset shifts dates by a fixed amount; here we add 2 months to move QS (Jan/Apr/Jul/Oct) to QE (Mar/Jun/Sep/Dec)
# Reassigning .index replaces the entire index of the DataFrame in place
df_ntl.index = df_ntl.index + pd.DateOffset(months=2)
df_stock.index = df_stock.index + pd.DateOffset(months=2)
df_stock.tail(5)

# Now that the date indexes match, the merge should produce non-empty rows
df_merge = df_ntl.merge(df_gdp, left_index=True, right_index=True)
df_merge = df_merge.merge(df_stock, left_index=True, right_index=True)
df_merge

df_merge.plot()

<Axes: xlabel='date'>

df_change = df_merge.pct_change()  # compute period-over-period percentage change: (current - previous) / previous
df_change.head(5)  # first row will be NaN because there is no previous value to compare against

df_change.plot()

<Axes: xlabel='date'>

	gdp
date
2024-12-01	431334
2025-03-01	436021
2025-06-01	437133
2025-09-01	438665
2025-12-01	406765

	sp500
date
2025-12-01	6853.025455
2026-01-01	6929.122000
2026-02-01	6893.806316
2026-03-01	6654.419091
2026-04-01	6908.205294

	ntl_mean	ntl_sum
date
2012-01-19	1.272979	68866.872397
2012-01-20	1.424077	77041.130328
2012-01-21	2.092794	113218.056544
2012-01-22	2.362034	127783.671390
2012-01-23	1.650252	89276.972640
2012-01-24	1.233973	66756.729011
2012-01-25	1.416948	76655.464130
2012-01-26	1.783512	96486.233810
2012-01-27	2.472152	133740.959514
2012-01-28	1.884260	101936.559525

	ntl_mean
date
2012-01-19	1.272979
2012-01-20	1.424077
2012-01-21	2.092794
2012-01-22	2.362034
2012-01-23	1.650252

	ntl_mean
date
2024-01-01	1.866945
2024-04-01	1.883133
2024-07-01	1.758757
2024-10-01	1.852094
2025-01-01	1.898161
2025-04-01	2.087190
2025-07-01	1.948137
2025-10-01	1.629562
2026-01-01	1.765616
2026-04-01	1.777356

First Notebook¶

Merge data¶

	ntl_mean
count	58.000000
mean	1.635601
std	0.166790
min	1.365451
25%	1.516362
50%	1.593312
75%	1.743566
max	2.087190

	sp500
date
2024-01-01	4995.675143
2024-04-01	5254.286236
2024-07-01	5545.826530
2024-10-01	5911.048045
2025-01-01	5900.729611
2025-04-01	5736.788913
2025-07-01	6429.821934
2025-10-01	6776.534328
2026-01-01	6825.782469
2026-04-01	6908.205294

	ntl_mean	gdp	sp500
date
2016-06-01	1.578983	380452	2077.530772
2016-09-01	1.517159	382285	2161.358375
2016-12-01	1.521077	383727	2184.878571
2017-03-01	1.459510	385870	2323.949567
2017-06-01	1.602375	386945	2396.213613
2017-09-01	1.532358	393308	2467.722181
2017-12-01	1.489896	394347	2604.981162
2018-03-01	1.516096	395365	2732.577628
2018-06-01	1.606802	398167	2703.157244
2018-09-01	1.544570	401787	2850.988098
2018-12-01	1.467081	404410	2692.000558
2019-03-01	1.478753	405172	2722.079340
2019-06-01	1.600551	406856	2882.890636
2019-09-01	1.577211	408132	2958.589273
2019-12-01	1.528719	408515	3086.443080
2020-03-01	1.501864	406098	3069.303568
2020-06-01	1.549749	338323	2928.750382
2020-09-01	1.405559	370658	3321.615253
2020-12-01	1.365451	378343	3554.334621
2021-03-01	1.438901	381323	3862.562929
2021-06-01	1.491602	391979	4182.505078
2021-09-01	1.545995	395905	4421.154185
2021-12-01	1.588242	409411	4600.955512
2022-03-01	1.626595	410800	4467.020414
2022-06-01	1.872416	417863	4110.200889
2022-09-01	1.665564	425874	3973.604340
2022-12-01	1.797160	425732	3851.973492
2023-03-01	1.918031	428412	4002.966789
2023-06-01	1.998197	428126	4204.337802
2023-09-01	1.943247	436638	4458.176399
2023-12-01	1.854829	433240	4471.505247
2024-03-01	1.866945	433003	4995.675143
2024-06-01	1.883133	428684	5254.286236
2024-09-01	1.758757	424159	5545.826530
2024-12-01	1.852094	431334	5911.048045
2025-03-01	1.898161	436021	5900.729611
2025-06-01	2.087190	437133	5736.788913
2025-09-01	1.948137	438665	6429.821934
2025-12-01	1.629562	406765	6776.534328

	ntl_mean	gdp	sp500
date
2016-06-01	NaN	NaN	NaN
2016-09-01	-0.039154	0.004818	0.040350
2016-12-01	0.002582	0.003772	0.010882
2017-03-01	-0.040476	0.005585	0.063652
2017-06-01	0.097885	0.002786	0.031095