Daily NTL Data¶
Import all required libraries and set up the folder paths. Also lists the satellite files already downloaded and sets the date range to process.
Instructions. In Anaconda Prompt:
- First, install
{python}
conda create -n geo python=3.11 pip numpy pandas matplotlib seaborn tqdm python-dateutil geopandas jupyterlab gdal rasterio folium mapclassify fiona shapely pyproj colorcet h5py libgdal -y
conda activate geo
pip install blarkmarblepy
conda install -c conda-forge contextily
Then:
- Visit https://ladsweb.modaps.eosdis.nasa.gov/
- Enter NASA profile
- In Eulas: Accept New Eulas, agree to: Meris EULA and Sentinel-3 EULA
- Make sure Profile includes Institution and all data
Download: 6. (In workshop: Skip automatic download. Use VNP files in Github) 7. Notebook: Change date range to start in 2025-11-02 = 8. Run code
#Set File Paths
# Importing multiple libraries at once using commas
import struct, os, sys, time, requests, os, glob, datetime, h5py, re
import numpy as np, numpy.ma as ma # numpy: fast numerical arrays; 'np' is the standard alias
from osgeo import gdal, ogr, gdalnumeric # gdal: reads/writes geospatial raster files
import matplotlib.pyplot as plt # plt: standard alias for plotting
import statistics as stat
import pandas as pd # pd: standard alias for tabular data (DataFrames)
from datetime import datetime # imports just the datetime class from the datetime module
from pathlib import Path # Path: object-oriented file path handling
from tqdm import tqdm # tqdm: adds a progress bar to any loop
# Folders
# 'locals()' returns a dict of all variables defined so far in this session
if 'd' in locals(): os.chdir(d)
else : d = f"{os.getcwd()}" # f-string: embeds the expression in {} into the string
path_source = f"{d}/../raw" # '../' means "go up one folder level"
PATH_NL = f"{d}/../hfiles/"
# os.path.exists() returns True/False; os.makedirs() creates the folder if missing
if not os.path.exists(PATH_NL):
os.makedirs(PATH_NL)
print(f"Folder '{PATH_NL}' created.")
else:
print(f"Folder '{PATH_NL}' exists.")
os.chdir(PATH_NL) # change the working directory to PATH_NL
outputFolder = f"{PATH_NL}/output/"
if not os.path.exists(outputFolder):
os.makedirs(outputFolder)
print(f"Folder '{outputFolder}' created.")
else:
print(f"Folder '{outputFolder}' exists.")
rasterFiles = os.listdir(PATH_NL) # returns a list of filenames in the folder
# List comprehension: keep only filenames that contain "VNP46A2"
rasterFiles = [ x for x in rasterFiles if "VNP46A2" in x ]
rasterFiles.sort() # sorts the list in-place (alphabetical = chronological here)
totalFiles = len(rasterFiles) # len() returns the number of items in a list
# list(set(...)) removes duplicates; here we define which satellite tile covers Jamaica
the_tiles = list(set(['h10v07']))
# Download mode
# Date ranges of interest to download
# None → incremental (default): picks up from last h5 file or data_ntl.csv
# "YYYY-MM-DD YYYY-MM-DD" → download that exact date range (start inclusive, end exclusive)
date_range = "2025-10-02 2025-11-15"
Folder 'c:\Users\guerr\Dropbox\02_Work\Consulting\2026_NWCST_CAR_Research-Workshop\JAM Workshop\workshop_code/../hfiles/' exists. Folder 'C:\Users\guerr\Dropbox\02_Work\Consulting\2026_NWCST_CAR_Research-Workshop\jamaica\hfiles\/output/' exists.
Download h5 files¶
(Warning. This can have a long runtime depending on the number of files required)
Set the NASA Earthdata API key. Determine the date window to download (custom range or incremental). Define helper functions to list and download VNP46A2 satellite files.
#### Replace API Key with your own
api_key = "eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6ImRndWVycmVybyIsImV4cCI6MTc4MjI3MDk3MiwiaWF0IjoxNzc3MDg2OTcyLCJpc3MiOiJodHRwczovL3Vycy5lYXJ0aGRhdGEubmFzYS5nb3YiLCJpZGVudGl0eV9wcm92aWRlciI6ImVkbF9vcHMiLCJhY3IiOiJlZGwiLCJhc3N1cmFuY2VfbGV2ZWwiOjN9.UnjYQaZfBoEOdW_JyVY1009tYOHByX4z_e0dvU3RGuJd0rxbJtgb5yxOLFzt0Qb1d7KazQQMDQbQ4rF6QNeWkSY7F6duOwYB5GHcepDzPBCPbqF-5h-6okvWtR20v2p1Uh6oA1xB7J1rnP6ErdLWJHD1yJTaEKRfCAhOZBl5viRG57j33tYDRooBItI1yzxWYjhr5A-ka77BKZf5T7GZcM2A6lH_91WLp7yrtUyIp-9KhL_z9wOho2jMzeyDMU131n1wzlLssrwZD3k-uSzsC5zCQ7JjjCRv57vcjSv-FoFzuEB885E6zjN9CZOhbgQwy2l_x3QAqKJ2rVZgCiH7Uw"
#Download from https://ladsweb.modaps.eosdis.nasa.gov/
# Dict with one key; used to authenticate all HTTP requests to the NASA API
headers = {'Authorization': f'Bearer {api_key}'}
# --- Determine start/end of download window ---
if date_range is not None:
parts = date_range.strip().split() # .split() splits a string on whitespace → list of 2 strings
start_dt = datetime.strptime(parts[0], "%Y-%m-%d") # parses a string into a datetime object
end_dt = datetime.strptime(parts[1], "%Y-%m-%d")
last_year = start_dt.strftime("%Y") # .strftime() formats a datetime back to a string
last_day = start_dt.strftime("%j") # %j = day-of-year (001–366)
end_date = end_dt
print(f"Custom range: {start_dt.date()} → {end_dt.date()}")
else:
end_date = None
try : # try/except: run code that might fail; handle errors gracefully
data = pd.read_csv(f"{path_source}/data_ntl.csv", parse_dates=True)
last_date = pd.to_datetime(data.groupby('location')['date'].max().reset_index().JD.min())
print("Last date in the directory:", last_date)
last_date = last_date.strftime("%Y%j")
last_year, last_day = last_date[:4], last_date[4:] # string slicing: first 4 chars, then the rest
print("In Julian Days:", last_date)
except :
#Get last file in the directory
PATH_NL_file_list = glob.glob(os.path.join(PATH_NL, '*.h5')) # glob finds files matching a pattern
last_file = '.A2001001.'
if PATH_NL_file_list:
for file in PATH_NL_file_list :
# re.search() finds a regex pattern anywhere in the string; returns None if not found
match = re.search(r'\.A(\d{4})(\d{3})\.', file)
if match:
year = match.group(1) # .group(1) returns the first captured () group
julian_date = match.group(2)
start_year_day = f"{year}/{julian_date}"
if int(start_year_day.replace("/","")) > int( re.search(r'\.A(\d{7})\.', last_file).group(1) ) :
last_file = file
else:
print("No Julian date and year found in the filename.")
print("Last file in the directory:", last_file)
else:
print("The directory is empty.")
match = re.search(r'\.A(\d{4})(\d{3})\.', last_file)
last_year, last_day = match.group(1), match.group(2)
print(f"Last NTL collected: {last_year, last_day}")
# ---------------------------------------------------------------
# Helper functions
# List comprehension: keep only entries that are actual files (not subdirectories)
available_files = [f for f in os.listdir(PATH_NL) if os.path.isfile(os.path.join(PATH_NL, f))]
# -------------------------------------------------
def download_vnp46a2(year, day_of_year, tile, output_dir=PATH_NL, file_list=available_files):
"""
Download VNP46A2 file for specific date and tile
Args:
year: Year (e.g., 2023)
day_of_year: Day of year (1-365/366)
tile: Tile identifier (e.g., "h08v05")
output_dir: Directory to save files
"""
base_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/VNP46A2"
url = f"{base_url}/{year}/{day_of_year:03d}" # :03d formats the integer with leading zeros to width 3
headers = {"Authorization": f"Bearer {api_key}"}
response = requests.get(f"{url}.json", headers=headers) # sends an HTTP GET request
if response.status_code != 200: # 200 means "OK"; anything else is an error
print(f"Error getting file list: {response.status_code}")
files = response.json() # .json() parses the response body as JSON → Python dict/list
# Check what key the API used to return the file list
if isinstance(files, dict): # isinstance() checks if a variable is of a given type
if 'content' in files:
filenames = files['content']
elif 'files' in files:
filenames = files['files']
else:
filenames = list(files.keys()) # .keys() returns all keys of a dict
# Keep only download links that match our tile and end in '.h5'
matching_files = [f['downloadsLink'] for f in filenames if tile in f['downloadsLink'] and f['downloadsLink'].endswith('.h5')]
if not matching_files:
print(f"No files found for tile {tile}")
print(f"Available files: {filenames[:5]}") # slice: show only the first 5 items
return # 'return' with no value exits the function early
for file_url in matching_files:
filename = file_url.split("/")[-1] # .split("/") splits on "/" → [-1] gets the last element
output_path = os.path.join(output_dir, filename)
if filename in file_list : # 'in' checks membership in a list
print(f"Skipped {filename}")
continue # 'continue' skips the rest of this loop iteration
# stream=True: download in chunks instead of loading the whole file into memory at once
file_response = requests.get(file_url, headers=headers, stream=True)
if file_response.status_code == 200:
with open(output_path, 'wb') as f: # 'wb' = write binary mode; 'with' auto-closes the file
for chunk in file_response.iter_content(chunk_size=8192): # read 8 KB at a time
f.write(chunk)
else:
print(f"Error downloading {filename}: {file_response.status_code}")
# -------------------------------------------------
def list_years(last_year, last_day, tile, end_date=None, output_dir=PATH_NL, collection="5200", product='VNP46A2'):
base_url = f"https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/{collection}/{product}"
headers = {"Authorization": f"Bearer {api_key}"}
response = requests.get(f"{base_url}.json", headers=headers)
response = response.json()
# Extract the 'name' field from each item in the 'content' list
all_years = [item['name'] for item in response['content']]
# Keep only years >= last_year (comparing as integers)
years_to_download = [year for year in all_years if int(year) >= int(last_year)]
if end_date is not None:
years_to_download = [year for year in years_to_download if int(year) <= end_date.year]
return years_to_download
# -------------------------------------------------
def get_target(target, last_year, last_day, tile=the_tiles[0], end_date=None, output_dir=PATH_NL, collection="5200", product='VNP46A2'):
"""
Download all available days from target years that are after last observation
(and before end_date if provided).
Args:
target: List of years to check (e.g., ['2024', '2025'])
last_year: Last year you have (e.g., 2024)
last_day: Last day of year you have (e.g., 100)
tile: Tile identifier
end_date: Optional datetime; stop before this date (used for custom ranges)
output_dir: Output directory
"""
base_url = f"https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/{collection}/{product}"
headers = {"Authorization": f"Bearer {api_key}"}
# Convert year + day-of-year string into a proper datetime object for comparisons
last_observation = datetime.strptime(f"{last_year}-{last_day}", "%Y-%j")
my_list = []
for year in target:
response = requests.get(f"{base_url}/{year}.json", headers=headers)
if response.status_code != 200:
print(f"Error getting days for year {year}: {response.status_code}")
continue
response_data = response.json()
all_days = [item['name'] for item in response_data['content']]
# sorted() returns a new sorted list; int() converts string "001" → 1 for numeric sorting
day_numbers = sorted([int(day) for day in all_days if day.isdigit()]) # .isdigit() checks if string is a number
for day in day_numbers:
dt = datetime.strptime(f"{year}-{day}", "%Y-%j")
# Only add days that are strictly after our last observation and before end_date
if dt > last_observation:
if end_date is None or dt < end_date:
my_list.append((year, day)) # appending a tuple (pair of values) to the list
return my_list
Build the list of satellite files that are missing locally and download them from NASA one by one.
# -------------------------------------------------
# target to download
target = []
for tile in the_tiles :
# list_years returns which years to check; get_target narrows to specific (year, day) pairs
tile_target = list_years(last_year, last_day, tile, end_date=end_date)
tile_target = get_target(tile_target, last_year, last_day, tile, end_date=end_date)
# Prepend the tile name to each (year, day) tuple → (tile, year, day)
target.append( [ (tile ,) + t for t in tile_target ] )
# Flatten list-of-lists into a single list using a list comprehension
target = [x for sublist in target for x in sublist]
if target:
print(f"Last date available: {target[-1]}") # [-1] accesses the last element of the list
print("Downloading...")
for t in tqdm( target , desc="Processing") : # tqdm wraps the loop to show a progress bar
download_vnp46a2( t[1] , t[2], t[0]) # t[0]=tile, t[1]=year, t[2]=day
time.sleep(0.01) # pause 10 ms between requests to avoid overloading the server
else:
print("Nothing new to download.")
rasterFiles = [ x for x in rasterFiles if "VNP46A2" in x ]
rasterFiles.sort()
totalFiles = len(rasterFiles)
print("Finalized")
Analysis¶
Define helper functions used to read pixel values from the satellite raster files (convert Julian dates, extract a point value, process a full h5 file).
# HELPER FUNCTIONS
def conJDtoDate(JD):
'''
Converts a 5-character Julian date string (YYDDD) found in filenames to a Python date object.
Example: "25301" → 2025-10-28
'''
# %y = 2-digit year, %j = day of year; strptime parses the string into a datetime
date = datetime.strptime(JD, '%y%j').date() # .date() drops the time part, keeps only year/month/day
return date
def getRasterData(lat, lon, window, xOrigin, yOrigin, pixelWidth, pixelHeight, data):
'''
Returns the pixel value (or average of a 3x3 grid) at a given latitude/longitude.
window=1: single pixel; window=3: average over a 3x3 neighborhood.
'''
# Convert geographic coordinates to pixel row/column indices
col = int((lon - xOrigin) / pixelWidth )
row = int((yOrigin - lat) / pixelHeight)
#Data AT THAT ROW COLUMN
if(window == 3):
'''
This is a grid of 3x3 around the lat,lon.
'''
# np.array creates a 2D array of index offsets for a 3x3 neighborhood
indexX = np.array([[-1,0,1],[-1,0,1],[-1,0,1]])
indexY = np.array([[1,1,1],[0,0,0],[-1,-1,-1]])
newIndexX = indexX + row # shift offsets by the actual row
newIndexY = indexY + col # shift offsets by the actual column
Totalvalue = []
for i in range(0, 3):
for j in range(0, 3):
Totalvalue.append(data[newIndexX[i][j]][newIndexY[i][j]]) # index into 2D array
# stat.mean() computes the average; map(float, ...) converts all values to floats first
value = format(stat.mean(map(float, Totalvalue)),'.2f') # format to 2 decimal places
return float(value)
else:
value = data[row][col] # direct 2D array lookup at [row][col]
return value
def processHD5(inputHD5, layer, OutputFolder, coords, Date ):
rasterFilePre = inputHD5[:-3] # strip the last 3 characters (".h5") from the filename
## Open HDF file
hdflayer = gdal.Open(inputHD5, gdal.GA_ReadOnly) # open the h5 file as read-only
subhdflayer = hdflayer.GetSubDatasets()[layer][0] # get the path to the Nth sub-dataset (layer index)
rlayer = gdal.Open(subhdflayer, gdal.GA_ReadOnly)
#Subset the Layer Name
outputLayerName = subhdflayer[92:] # slice off the first 92 characters (file path prefix)
clean_layer_name = re.sub(r'[^\w\-_.]', '_', outputLayerName) # replace any special characters with '_'
#outputFile
outputRaster = OutputFolder + rasterFilePre + "_" + clean_layer_name + ".tif"
# Read tile position metadata to compute geographic bounding box
HorizontalTileNumber = int(rlayer.GetMetadata_Dict()["HorizontalTileNumber"])
VerticalTileNumber = int(rlayer.GetMetadata_Dict()["VerticalTileNumber"])
WestBoundCoord = (10*HorizontalTileNumber) - 180
NorthBoundCoord = 90-(10*VerticalTileNumber)
EastBoundCoord = WestBoundCoord + 10
SouthBoundCoord = NorthBoundCoord - 10
EPSG = "-a_srs EPSG:4326" # WGS84 coordinate reference system (standard lat/lon)
# Build the gdal.Translate options string: set projection + bounding box corners
translateOptionText = EPSG+" -a_ullr " + str(WestBoundCoord) + " " + str(NorthBoundCoord) + " " + str(EastBoundCoord) + " " + str(SouthBoundCoord)
translateoptions = gdal.TranslateOptions(gdal.ParseCommandLine(translateOptionText))
result = gdal.Translate(outputRaster, rlayer, options=translateoptions) # write the georeferenced .tif file
raster = gdal.Open(outputRaster,gdal.GA_ReadOnly)
if raster is None:
print ("Could not open image " , Date )
band = raster.GetRasterBand(1) # get the first (and only) data band from the raster
cols = raster.RasterXSize # number of pixels wide
rows = raster.RasterYSize # number of pixels tall
# GetGeoTransform returns 6 numbers that define pixel size and top-left corner
transform = raster.GetGeoTransform()
xOrigin = transform[0] # longitude of the top-left corner
yOrigin = transform[3] # latitude of the top-left corner
pixelWidth = transform[1] # degrees per pixel (east-west)
pixelHeight = -transform[5] # degrees per pixel (north-south); negative in the file, so we negate it
data = band.ReadAsArray(0, 0, cols, rows) # read all pixel values into a 2D numpy array
_datalist_ = []
# Here starts the lat lon part.
for coord in coords :
lat = coord['lat']
lon = coord['lon']
# Skip this coordinate if it falls outside the tile's geographic extent
if lat < SouthBoundCoord or lat > NorthBoundCoord or lon < WestBoundCoord or lon > EastBoundCoord:
continue
value1 = getRasterData(lat, lon, 1 , xOrigin, yOrigin, pixelWidth, pixelHeight, data)
value3 = getRasterData(lat, lon, 3 , xOrigin, yOrigin, pixelWidth, pixelHeight, data)
# Build a dict for this location's record; keys become column names when converted to a DataFrame
_d_ = {
'sector' : coord['sector'],
'location' : coord['location'],
'city' : coord['city'],
'date' : Date,
'DNBvalue1' : value1 , # single-pixel NTL value
'DNBvalue3' : value3 , # 3x3 averaged NTL value
}
_datalist_.append(_d_)
raster = None # release the file handle (equivalent to closing the file)
return _datalist_
Coordinates¶
Load the list of locations (coordinates) from the Excel file. Compare with already-processed dates to identify only the new satellite files that need processing.
rasterFiles = os.listdir(PATH_NL)
rasterFiles = [ x for x in rasterFiles if "VNP46A2" in x ]
rasterFiles.sort()
totalFiles = len(rasterFiles)
locations_df = pd.read_excel(f'{path_source}/Coordinates.xlsx') # reads an Excel file into a DataFrame
# Normalize unicode (e.g., accented characters), encode to ASCII dropping unknowns, then strip whitespace
locations_df['location'] = locations_df['location'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
locations_df['city'] = locations_df['city'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
locations_df['thefile'] = the_tiles[0] # add a column with the same tile name for every row
try :
# parse_dates=True: attempt to parse any date-looking columns automatically
data = pd.read_csv(f"{path_source}/data_ntl.csv", parse_dates=True)
except :
# If the file doesn't exist yet, create an empty DataFrame with the needed columns
data = pd.DataFrame()
data['location'] = np.nan
data['city'] = np.nan
data['date'] = np.nan
if len(data)>0 :
# Clean up text columns in existing data to match the format in locations_df
data['location'] = data['location'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
data['city'] = data['city'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
# Left merge: keeps all rows from locations_df; adds 'date' column showing the latest date per location
locations_df = pd.merge(locations_df,
data.groupby('location')['date'].max().reset_index() , # latest date per location
on='location',
how='left')
else :
locations_df['date'] = np.nan # np.nan is the standard "missing value" for numeric/date columns
print(f"Last observation: {data.date.max()}")
# Keep only relevant dates file in iteration
# Technically we should keep only every date after the minimum last Date.
_the_dates = pd.to_datetime(data.date.unique()) # .unique() returns each date only once; pd.to_datetime converts them
# Filter rasterFiles to only those whose embedded date is NOT already in the data
rasterNew = [ F for F in rasterFiles if pd.to_datetime(conJDtoDate( F[11:16] )) not in _the_dates ]
# Further restrict to the date_range window if one is set
if date_range is not None:
dr_start = pd.to_datetime(date_range.split()[0])
dr_end = pd.to_datetime(date_range.split()[1])
rasterNew = [F for F in rasterNew if dr_start <= pd.to_datetime(conJDtoDate(F[11:16])) <= dr_end]
totalFiles = len(rasterNew)
print(f"Total Files: {totalFiles}")
Last observation: 2025-12-30 Total Files: 2
Loop through each new satellite file, extract the nighttime light value at every location, and append the results to the master CSV.
if not os.path.exists(outputFolder): os.makedirs(outputFolder)
# List comprehension used as a one-liner loop: delete any leftover .tif temp files
[ os.remove(f"{outputFolder}/{T}") for T in os.listdir(outputFolder) if ".tif" in T ]
# Build a dict: {location_name: [list of already-processed dates]}
records = data.groupby('location')['date'].agg(list).to_dict()
mydata = [] # will accumulate one dict per location per date
for NOF in tqdm(range(0, totalFiles), desc="Processing") :
_definelist_ = []
# Extract the 5-char Julian date code from the filename (characters 11–15)
dateOfYear = conJDtoDate(rasterNew[NOF][11:16])
# Build the list of locations that still need this date processed
target = [
point for point in locations_df.to_dict(orient='records') # .to_dict('records') → list of row dicts
if
# Include the point if: it has no date yet, OR this dateOfYear isn't already in its record
( pd.isnull(point['date']) or pd.to_datetime(dateOfYear) not in records[point['location']] )
and rasterFiles[0].split(".")[2] in the_tiles
]
if len(target) == 0 : continue # nothing to do for this file; skip to the next iteration
_toappend_ = processHD5(rasterFiles[NOF], 2, outputFolder, target, dateOfYear )
# Extend mydata with the returned list of dicts (one per location)
[ mydata.append(f) for f in _toappend_ ]
# Clean up any .tif temp files created during processing
for T in os.listdir(outputFolder) :
if ".tif" in T : os.remove(f"{outputFolder}/{T}")
# Combine the existing data with newly extracted records
data = pd.concat([data, pd.DataFrame(mydata)], axis=0) # axis=0: stack rows vertically
data['date'] = pd.to_datetime(data['date']) # ensure 'date' column is datetime type
data.set_index('date', inplace=True) # make 'date' the row index; inplace=True modifies the DataFrame directly
data.sort_index() # returns a sorted copy (not saved here; used for display only)
data.index = pd.to_datetime(data.index) # re-confirm index is datetime (handles mixed types)
data.to_csv(f'{path_source}/data_ntl.csv') # save to CSV; overwrites the file if it already exists
print(f"Saved. Last update: {data.index.max()}")
data
Processing: 100%|██████████| 2/2 [00:00<00:00, 6.57it/s]
Saved. Last update: 2025-12-30 00:00:00
| location | city | sector | DNBvalue1 | DNBvalue3 | |
|---|---|---|---|---|---|
| date | |||||
| 2025-10-02 | Sangster International Airport (MBJ) | Montego Bay | Airport | 30.194336 | 29.51 |
| 2025-10-02 | Norman Manley International Airport (KIN) | Kingston | Airport | 141.848540 | 73.05 |
| 2025-10-02 | Ian Fleming International Airport (OCJ) | Boscobel | Airport | 12.989580 | 8.97 |
| 2025-10-02 | Ken Jones Aerodrome (Port Antonio) | St. Margarets Bay | Airport | 2.032099 | 1.06 |
| 2025-10-02 | Tinson Pen Aerodrome (KTP) | Kingston | Airport | 98.617030 | 72.75 |
| ... | ... | ... | ... | ... | ... |
| 2025-11-12 | Trench Town Culture Yard Museum | Kingston | Cultural | 26.936199 | 34.22 |
| 2025-11-12 | Sam Sharpe Square | Montego Bay | Cultural | 75.191666 | 35.63 |
| 2025-11-12 | Fort Charles (Port Royal) | Port Royal | Cultural | 19.353334 | 3.30 |
| 2025-11-12 | Fort Montego | Montego Bay | Cultural | 8.931674 | 25.80 |
| 2025-11-12 | Lovers Leap Lighthouse | NaN | Cultural | 1.455617 | 0.99 |
9810 rows × 5 columns
Country Area¶
Define a function that computes the mean and total nighttime light over the entire island from one satellite file. Load the Jamaica boundary shapefile from GADM.
import geopandas as gpd # geopandas: extends pandas for geospatial (shapefile) data
import pandas as pd
import rasterio # rasterio: reads/writes raster (grid) files like .tif
from rasterio.mask import mask as rio_mask # masks a raster to a polygon shape
from shapely.geometry import mapping # converts a shapely geometry to a GeoJSON-style dict
def processHD5_island(inputHD5, layer, OutputFolder, gdf_boundary, Date):
"""
Extract mean NTL radiance over the full island boundary from one h5 file.
Returns a dict with date, mean, sum, and pixel count.
"""
hdflayer = gdal.Open(inputHD5, gdal.GA_ReadOnly)
subhdflayer = hdflayer.GetSubDatasets()[layer][0] # select layer by index (2 = DNB_At_Sensor_Radiance)
rlayer = gdal.Open(subhdflayer, gdal.GA_ReadOnly)
# Compute the tile's geographic bounding box from its H/V tile indices
meta = rlayer.GetMetadata_Dict()
H = int(meta["HorizontalTileNumber"])
V = int(meta["VerticalTileNumber"])
west, north = (10 * H) - 180, 90 - (10 * V)
east, south = west + 10, north - 10
tmp = os.path.join(OutputFolder, "_island_tmp.tif") # temporary file path for the georeferenced raster
opts = gdal.TranslateOptions(gdal.ParseCommandLine(
f"-a_srs EPSG:4326 -a_ullr {west} {north} {east} {south}"
))
gdal.Translate(tmp, rlayer, options=opts)
rlayer = None # release the GDAL file handle
# Convert each geometry in the GeoDataFrame to a GeoJSON dict for rasterio masking
shapes = [mapping(geom) for geom in gdf_boundary.geometry]
with rasterio.open(tmp) as src:
# rio_mask clips the raster to the island polygon; nodata=np.nan fills outside pixels
out_image, _ = rio_mask(src, shapes, crop=True, nodata=np.nan, filled=True)
try:
os.remove(tmp) # delete the temporary .tif file
except Exception:
pass # if deletion fails, silently continue
data = out_image[0].astype(float) # out_image has shape (1, rows, cols); [0] drops the band dimension
data[data >= 65535] = np.nan # 65535 is the VIIRS fill/no-data sentinel value
data[data < 0] = np.nan # negative values are also invalid
valid = ~np.isnan(data) # boolean array: True where data is a real number
return {
"date" : Date,
"ntl_mean": float(np.nanmean(data)) if valid.any() else np.nan, # np.nanmean ignores NaN values
"ntl_sum" : float(np.nansum(data)),
}
# Load Jamaica boundary shapefile directly from a URL (zipped shapefile)
shape = "https://geodata.ucdavis.edu/gadm/gadm4.1/shp/gadm41_JAM_shp.zip"
gdf = gpd.read_file(shape)
gdf.explore(tiles="CartoDB dark_matter", zoom=9) # .explore() renders an interactive map in the notebook
c:\Users\guerr\anaconda3\envs\geo\Lib\site-packages\pyogrio\geopandas.py:265: UserWarning: More than one layer found in 'gadm41_JAM_shp.zip': 'gadm41_JAM_0' (default), 'gadm41_JAM_1'. Specify layer parameter to avoid this warning. result = read_func(
Filter satellite files to the selected date range and run the island-level extraction for each one, producing a daily time series of mean NTL.
# ── Parameters ────────────────────────────────────────────────────────────────
# .split() splits the string on the space → list of 2 strings; [0] and [1] pick each part
date_start, date_end = date_range.split()[0], date_range.split()[1]
# ── Dissolve GDF to a single island polygon ───────────────────────────────────
# .dissolve() merges all sub-polygons (parishes) into one island outline; .to_crs() reprojects to WGS84
gdf_jam = gdf[gdf.geometry.notnull()].dissolve().to_crs("EPSG:4326")
# ── Filter h5 files to the requested time window and tile ────────────────────
start_dt = pd.to_datetime(date_start) # convert "YYYY-MM-DD" string to a pandas Timestamp
end_dt = pd.to_datetime(date_end)
# List all VNP46A2 .h5 files in the folder, sorted chronologically
all_h5 = sorted([f for f in os.listdir(PATH_NL) if "VNP46A2" in f and f.endswith(".h5")])
# Keep only files that match the tile and fall within the date range
rasters_range = [
f for f in all_h5
if the_tiles[0] in f # correct tile only
and start_dt <= pd.to_datetime(conJDtoDate(f[11:16])) <= end_dt
]
print(f"Files in range [{date_start} → {date_end}]: {len(rasters_range)}")
# ── Process ───────────────────────────────────────────────────────────────────
if not os.path.exists(outputFolder):
os.makedirs(outputFolder)
records = [] # will collect one dict per file
for f in tqdm(rasters_range, desc="Island NTL"):
date = conJDtoDate(f[11:16]) # extract and convert the Julian date from the filename
rec = processHD5_island(os.path.join(PATH_NL, f), 2, outputFolder, gdf_jam, date)
records.append(rec)
# Convert list of dicts into a DataFrame
df = pd.DataFrame(records)
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True) # set 'date' as the index for easy time-series plotting
df.sort_index(inplace=True) # sort rows by date ascending
print(f"Saved {len(df)} rows. Last date: {df.index.max()}")
df.head() # .head() shows the first 5 rows
Files in range [2025-10-02 → 2025-11-15]: 43
Island NTL: 100%|██████████| 43/43 [00:08<00:00, 5.02it/s]
Saved 43 rows. Last date: 2025-11-15 00:00:00
| ntl_mean | ntl_sum | |
|---|---|---|
| date | ||
| 2025-10-02 | 2.491267 | 134775.036133 |
| 2025-10-03 | 2.328361 | 125962.024996 |
| 2025-10-04 | 2.011105 | 108798.744972 |
| 2025-10-05 | 1.444410 | 78141.144171 |
| 2025-10-06 | 1.575248 | 85219.365989 |
Plots¶
Time series¶
Plot the daily island-mean NTL time series and mark Hurricane Melissa with a vertical line.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates # mdates: helpers for formatting date axes
event_date = pd.Timestamp("2025-10-28") # pd.Timestamp: a single point in time (like datetime)
event_label = "H. Melissa"
fig, ax = plt.subplots(figsize=(12, 4)) # create a figure and one set of axes; figsize in inches
# Plot the time series: df.index on x-axis, ntl_mean column on y-axis
ax.plot(df.index, df["ntl_mean"],
color="#005BAC", linewidth=1.8, label="NTL mean (nW/cm²/sr)")
# Draw a vertical dashed line at the hurricane date
ax.axvline(event_date, color="#CC0000", linewidth=1.5, linestyle="--", zorder=3)
# Add a text label just above the line; va/ha control vertical/horizontal alignment
ax.text(event_date, ax.get_ylim()[1],
f" {event_label}", color="#CC0000",
fontsize=9, va="top", ha="left", rotation=0)
ax.set_title("Jamaica Nighttime Lights — Island Mean", fontsize=13, weight="bold")
ax.set_ylabel("NTL mean (nW/cm²/sr)")
ax.grid(axis="y", linestyle=":", alpha=0.5) # horizontal gridlines only; alpha=0.5 makes them semi-transparent
ax.legend(frameon=False) # show the legend without a box border
plt.tight_layout() # automatically adjust spacing so labels don't overlap
plt.show()
Maping event day¶
Load the rasters for 7 days before and after the hurricane. Plot side-by-side maps showing the spatial pattern of nighttime lights before and after the event.
import contextily as cx # contextily: adds web map tiles (basemaps) to matplotlib axes
import colorcet as cc # colorcet: perceptually uniform colormaps
event_date = pd.Timestamp("2025-10-28")
# ── Pick the two target dates ─────────────────────────────────────────────────
# pd.Timedelta: represents a duration; used here to shift a date by 7 days
before_date = event_date - pd.Timedelta(days=7)
after_date = event_date + pd.Timedelta(days=7)
# 'dir()' returns a list of all variable names currently defined; check if gdf_jam already exists
if 'gdf_jam' not in dir():
gdf_jam = gdf[gdf.geometry.notnull()].dissolve().to_crs("EPSG:4326")
def find_h5_for_date(target_date, path=PATH_NL, tile=the_tiles[0]):
"""Find the h5 filename that matches a given date and tile, or return None."""
all_h5 = sorted([f for f in os.listdir(path) if "VNP46A2" in f and f.endswith(".h5") and tile in f])
for f in all_h5:
file_date = pd.Timestamp(conJDtoDate(f[11:16]))
if file_date.date() == target_date.date(): # .date() compares only the date part, ignoring time
return f
return None # explicit None return signals "not found"
def raster_to_array(h5_filename, layer, gdf_boundary, out_folder):
"""Extract clipped raster array + 1-D lon/lat arrays from an h5 file."""
h5_path = os.path.join(PATH_NL, h5_filename)
hdflayer = gdal.Open(h5_path, gdal.GA_ReadOnly)
subhdflayer = hdflayer.GetSubDatasets()[layer][0]
rlayer = gdal.Open(subhdflayer, gdal.GA_ReadOnly)
meta = rlayer.GetMetadata_Dict()
H, V = int(meta["HorizontalTileNumber"]), int(meta["VerticalTileNumber"])
west, north = (10 * H) - 180, 90 - (10 * V)
east, south = west + 10, north - 10
tmp = os.path.join(out_folder, "_map_tmp.tif")
opts = gdal.TranslateOptions(gdal.ParseCommandLine(
f"-a_srs EPSG:4326 -a_ullr {west} {north} {east} {south}"
))
gdal.Translate(tmp, rlayer, options=opts)
rlayer = None
shapes = [mapping(geom) for geom in gdf_boundary.geometry]
with rasterio.open(tmp) as src:
out_image, out_transform = rio_mask(src, shapes, crop=True, nodata=np.nan, filled=True)
nrows, ncols = out_image.shape[1], out_image.shape[2]
# Build 1-D arrays of longitude and latitude at the center of each pixel
lons = out_transform.c + (np.arange(ncols) + 0.5) * out_transform.a # x-center of each column
lats = out_transform.f + (np.arange(nrows) + 0.5) * out_transform.e # y-center of each row
try:
os.remove(tmp)
except Exception:
pass
data = out_image[0].astype(float)
data[data >= 65535] = np.nan # mask VIIRS fill value
data[data < 0] = np.nan
return data, lons, lats
# ── Load the two rasters ──────────────────────────────────────────────────────
f_before = find_h5_for_date(before_date)
f_after = find_h5_for_date(after_date)
print(f"Before : {f_before} ({before_date.date()})")
print(f"After : {f_after} ({after_date.date()})")
arr_before, lons, lats = raster_to_array(f_before, 2, gdf_jam, outputFolder)
arr_after, _, _ = raster_to_array(f_after, 2, gdf_jam, outputFolder) # _ discards repeated lons/lats
# ── Shared color scale (robust 2nd-98th percentile across both panels) ────────
# Concatenate both arrays (flattened to 1-D) to get a single distribution for the color scale
both = np.concatenate([arr_before[~np.isnan(arr_before)], arr_after[~np.isnan(arr_after)]])
# Use percentiles (not min/max) to avoid extreme outliers distorting the color scale
vmin, vmax = np.nanpercentile(both, 2), np.nanpercentile(both, 98)
# np.meshgrid turns 1-D lon/lat arrays into 2-D grids needed for pcolormesh
LON, LAT = np.meshgrid(lons, lats)
# ── Plot ──────────────────────────────────────────────────────────────────────
fig, axes = plt.subplots(1, 2, figsize=(16, 8)) # 1 row, 2 columns of subplots
fig.subplots_adjust(bottom=0.15) # reserve space below maps for colorbar
panel_titles = [
f"7 days before Hurricane Melissa\n({before_date.strftime('%b %d, %Y')})",
f"7 days after Hurricane Melissa\n({after_date.strftime('%b %d, %Y')})",
]
# zip() pairs each axis with its array and title so we can loop over all three together
for ax, arr, title in zip(axes, [arr_before, arr_after], panel_titles):
# pcolormesh plots a 2-D array as a color grid; shading="auto" avoids deprecation warnings
pcm = ax.pcolormesh(LON, LAT, arr, cmap=cc.cm.bmy, vmin=vmin, vmax=vmax, shading="auto")
ax.set_title(title, fontsize=13, weight="bold")
# cx.add_basemap overlays a web map tile layer on the existing axes
cx.add_basemap(ax, source=cx.providers.CartoDB.Positron, crs="EPSG:4326")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
# fig.add_axes([left, bottom, width, height]): manually place a new axes for the colorbar
cbar_ax = fig.add_axes([0.25, 0.05, 0.5, 0.025])
fig.colorbar(pcm, cax=cbar_ax, orientation="horizontal",
label="NTL Radiance (nW/cm²/sr)")
fig.suptitle("Daily Aggregate Nighttime Light Radiance - Jamaica", fontsize=15, weight="bold")
# fig.text places text at figure coordinates (0=left/bottom, 1=right/top)
fig.text(0.01, 0.01, "Source: NASA Black Marble VNP46A2", fontsize=11)
plt.show()
Before : VNP46A2.A2025294.h10v07.002.2025302162350.h5 (2025-10-21) After : VNP46A2.A2025308.h10v07.002.2025322075732.h5 (2025-11-04)