#Set File Paths
# Importing multiple libraries at once using commas
import struct, os, sys, time, requests, os, glob, datetime, h5py, re
import numpy as np, numpy.ma as ma          # numpy: fast numerical arrays; 'np' is the standard alias
from osgeo import gdal, ogr, gdalnumeric    # gdal: reads/writes geospatial raster files
import matplotlib.pyplot as plt             # plt: standard alias for plotting
import statistics as stat
import pandas as pd                         # pd: standard alias for tabular data (DataFrames)
from datetime import datetime               # imports just the datetime class from the datetime module
from pathlib import Path                    # Path: object-oriented file path handling
from tqdm import tqdm                       # tqdm: adds a progress bar to any loop

# Folders
# 'locals()' returns a dict of all variables defined so far in this session
if 'd' in locals(): os.chdir(d)
else : d = f"{os.getcwd()}"                # f-string: embeds the expression in {} into the string

path_source = f"{d}/../raw"                # '../' means "go up one folder level"
PATH_NL = f"{d}/../hfiles/"
# os.path.exists() returns True/False; os.makedirs() creates the folder if missing
if not os.path.exists(PATH_NL):
    os.makedirs(PATH_NL)
    print(f"Folder '{PATH_NL}' created.")
else:
    print(f"Folder '{PATH_NL}' exists.")

os.chdir(PATH_NL)                          # change the working directory to PATH_NL

outputFolder = f"{PATH_NL}/output/"
if not os.path.exists(outputFolder):
    os.makedirs(outputFolder)
    print(f"Folder '{outputFolder}' created.")
else:
    print(f"Folder '{outputFolder}' exists.")
    
rasterFiles = os.listdir(PATH_NL)         # returns a list of filenames in the folder
# List comprehension: keep only filenames that contain "VNP46A2"
rasterFiles = [ x for x in rasterFiles if "VNP46A2" in x ]
rasterFiles.sort()                         # sorts the list in-place (alphabetical = chronological here)
totalFiles = len(rasterFiles)              # len() returns the number of items in a list

# list(set(...)) removes duplicates; here we define which satellite tile covers Jamaica
the_tiles = list(set(['h10v07']))

# Download mode
# Date ranges of interest to download
#   None                       → incremental (default): picks up from last h5 file or data_ntl.csv
#   "YYYY-MM-DD YYYY-MM-DD"    → download that exact date range (start inclusive, end exclusive)
date_range = "2025-10-02 2025-11-15"

Folder 'c:\Users\guerr\Dropbox\02_Work\Consulting\2026_NWCST_CAR_Research-Workshop\JAM Workshop\workshop_code/../hfiles/' exists.
Folder 'C:\Users\guerr\Dropbox\02_Work\Consulting\2026_NWCST_CAR_Research-Workshop\jamaica\hfiles\/output/' exists.

#### Replace API Key with your own
api_key = "eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6ImRndWVycmVybyIsImV4cCI6MTc4MjI3MDk3MiwiaWF0IjoxNzc3MDg2OTcyLCJpc3MiOiJodHRwczovL3Vycy5lYXJ0aGRhdGEubmFzYS5nb3YiLCJpZGVudGl0eV9wcm92aWRlciI6ImVkbF9vcHMiLCJhY3IiOiJlZGwiLCJhc3N1cmFuY2VfbGV2ZWwiOjN9.UnjYQaZfBoEOdW_JyVY1009tYOHByX4z_e0dvU3RGuJd0rxbJtgb5yxOLFzt0Qb1d7KazQQMDQbQ4rF6QNeWkSY7F6duOwYB5GHcepDzPBCPbqF-5h-6okvWtR20v2p1Uh6oA1xB7J1rnP6ErdLWJHD1yJTaEKRfCAhOZBl5viRG57j33tYDRooBItI1yzxWYjhr5A-ka77BKZf5T7GZcM2A6lH_91WLp7yrtUyIp-9KhL_z9wOho2jMzeyDMU131n1wzlLssrwZD3k-uSzsC5zCQ7JjjCRv57vcjSv-FoFzuEB885E6zjN9CZOhbgQwy2l_x3QAqKJ2rVZgCiH7Uw"
#Download from https://ladsweb.modaps.eosdis.nasa.gov/

# Dict with one key; used to authenticate all HTTP requests to the NASA API
headers = {'Authorization': f'Bearer {api_key}'}

# --- Determine start/end of download window ---
if date_range is not None:
    parts = date_range.strip().split()          # .split() splits a string on whitespace → list of 2 strings
    start_dt = datetime.strptime(parts[0], "%Y-%m-%d")   # parses a string into a datetime object
    end_dt   = datetime.strptime(parts[1], "%Y-%m-%d")
    last_year = start_dt.strftime("%Y")         # .strftime() formats a datetime back to a string
    last_day  = start_dt.strftime("%j")         # %j = day-of-year (001–366)
    end_date  = end_dt
    print(f"Custom range: {start_dt.date()} → {end_dt.date()}")
else:
    end_date = None
    try :                                        # try/except: run code that might fail; handle errors gracefully
        data = pd.read_csv(f"{path_source}/data_ntl.csv", parse_dates=True)
        last_date = pd.to_datetime(data.groupby('location')['date'].max().reset_index().JD.min())
        print("Last date in the directory:", last_date)
        last_date = last_date.strftime("%Y%j")
        last_year, last_day = last_date[:4], last_date[4:]   # string slicing: first 4 chars, then the rest
        print("In Julian Days:", last_date)
    except :
        #Get last file in the directory
        PATH_NL_file_list = glob.glob(os.path.join(PATH_NL, '*.h5'))   # glob finds files matching a pattern

        last_file = '.A2001001.'

        if PATH_NL_file_list:
            for file in PATH_NL_file_list :
                # re.search() finds a regex pattern anywhere in the string; returns None if not found
                match = re.search(r'\.A(\d{4})(\d{3})\.', file)
                if match:
                    year = match.group(1)          # .group(1) returns the first captured () group
                    julian_date = match.group(2)
                    start_year_day = f"{year}/{julian_date}"
                    if int(start_year_day.replace("/","")) > int( re.search(r'\.A(\d{7})\.', last_file).group(1) ) :
                        last_file = file
                else:
                    print("No Julian date and year found in the filename.")
            print("Last file in the directory:", last_file)
        else:
            print("The directory is empty.")

        match = re.search(r'\.A(\d{4})(\d{3})\.', last_file)
        last_year, last_day = match.group(1), match.group(2)

print(f"Last NTL collected: {last_year, last_day}")

# ---------------------------------------------------------------
# Helper functions
# List comprehension: keep only entries that are actual files (not subdirectories)
available_files = [f for f in os.listdir(PATH_NL) if os.path.isfile(os.path.join(PATH_NL, f))]

# -------------------------------------------------
def download_vnp46a2(year, day_of_year, tile, output_dir=PATH_NL, file_list=available_files):
    """
    Download VNP46A2 file for specific date and tile

    Args:
        year: Year (e.g., 2023)
        day_of_year: Day of year (1-365/366)
        tile: Tile identifier (e.g., "h08v05")
        output_dir: Directory to save files
    """
    base_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/VNP46A2"
    url = f"{base_url}/{year}/{day_of_year:03d}"    # :03d formats the integer with leading zeros to width 3
    headers = {"Authorization": f"Bearer {api_key}"}
    response = requests.get(f"{url}.json", headers=headers)   # sends an HTTP GET request
    if response.status_code != 200:                            # 200 means "OK"; anything else is an error
        print(f"Error getting file list: {response.status_code}")

    files = response.json()                         # .json() parses the response body as JSON → Python dict/list

    # Check what key the API used to return the file list
    if isinstance(files, dict):                     # isinstance() checks if a variable is of a given type
        if 'content' in files:
            filenames = files['content']
        elif 'files' in files:
            filenames = files['files']
        else:
            filenames = list(files.keys())          # .keys() returns all keys of a dict

    # Keep only download links that match our tile and end in '.h5'
    matching_files = [f['downloadsLink'] for f in filenames if tile in f['downloadsLink'] and f['downloadsLink'].endswith('.h5')]

    if not matching_files:
        print(f"No files found for tile {tile}")
        print(f"Available files: {filenames[:5]}")  # slice: show only the first 5 items
        return                                       # 'return' with no value exits the function early

    for file_url in matching_files:
        filename = file_url.split("/")[-1]           # .split("/") splits on "/" → [-1] gets the last element
        output_path = os.path.join(output_dir, filename)

        if filename in file_list :                   # 'in' checks membership in a list
            print(f"Skipped {filename}")
            continue                                 # 'continue' skips the rest of this loop iteration
        # stream=True: download in chunks instead of loading the whole file into memory at once
        file_response = requests.get(file_url, headers=headers, stream=True)

        if file_response.status_code == 200:
            with open(output_path, 'wb') as f:       # 'wb' = write binary mode; 'with' auto-closes the file
                for chunk in file_response.iter_content(chunk_size=8192):   # read 8 KB at a time
                    f.write(chunk)
        else:
            print(f"Error downloading {filename}: {file_response.status_code}")


# -------------------------------------------------
def list_years(last_year, last_day, tile, end_date=None, output_dir=PATH_NL, collection="5200", product='VNP46A2'):
    base_url = f"https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/{collection}/{product}"
    headers = {"Authorization": f"Bearer {api_key}"}

    response = requests.get(f"{base_url}.json", headers=headers)
    response = response.json()
    # Extract the 'name' field from each item in the 'content' list
    all_years = [item['name'] for item in response['content']]

    # Keep only years >= last_year (comparing as integers)
    years_to_download = [year for year in all_years if int(year) >= int(last_year)]
    if end_date is not None:
        years_to_download = [year for year in years_to_download if int(year) <= end_date.year]

    return years_to_download


# -------------------------------------------------
def get_target(target, last_year, last_day, tile=the_tiles[0], end_date=None, output_dir=PATH_NL, collection="5200", product='VNP46A2'):
    """
    Download all available days from target years that are after last observation
    (and before end_date if provided).

    Args:
        target: List of years to check (e.g., ['2024', '2025'])
        last_year: Last year you have (e.g., 2024)
        last_day: Last day of year you have (e.g., 100)
        tile: Tile identifier
        end_date: Optional datetime; stop before this date (used for custom ranges)
        output_dir: Output directory
    """
    base_url = f"https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/{collection}/{product}"
    headers = {"Authorization": f"Bearer {api_key}"}

    # Convert year + day-of-year string into a proper datetime object for comparisons
    last_observation = datetime.strptime(f"{last_year}-{last_day}", "%Y-%j")

    my_list = []
    for year in target:
        response = requests.get(f"{base_url}/{year}.json", headers=headers)

        if response.status_code != 200:
            print(f"Error getting days for year {year}: {response.status_code}")
            continue

        response_data = response.json()
        all_days = [item['name'] for item in response_data['content']]
        # sorted() returns a new sorted list; int() converts string "001" → 1 for numeric sorting
        day_numbers = sorted([int(day) for day in all_days if day.isdigit()])   # .isdigit() checks if string is a number

        for day in day_numbers:
            dt = datetime.strptime(f"{year}-{day}", "%Y-%j")
            # Only add days that are strictly after our last observation and before end_date
            if dt > last_observation:
                if end_date is None or dt < end_date:
                    my_list.append((year, day))     # appending a tuple (pair of values) to the list

    return my_list

# -------------------------------------------------
# target to download
target = []
for tile in the_tiles :
    # list_years returns which years to check; get_target narrows to specific (year, day) pairs
    tile_target = list_years(last_year, last_day, tile, end_date=end_date)
    tile_target = get_target(tile_target, last_year, last_day, tile, end_date=end_date)
    # Prepend the tile name to each (year, day) tuple → (tile, year, day)
    target.append( [ (tile ,) + t for t in tile_target ] )
# Flatten list-of-lists into a single list using a list comprehension
target = [x for sublist in target for x in sublist]

if target:
    print(f"Last date available: {target[-1]}")   # [-1] accesses the last element of the list
    print("Downloading...")
    for t in tqdm( target , desc="Processing") :  # tqdm wraps the loop to show a progress bar
        download_vnp46a2( t[1] , t[2], t[0])      # t[0]=tile, t[1]=year, t[2]=day
        time.sleep(0.01)                           # pause 10 ms between requests to avoid overloading the server
else:
    print("Nothing new to download.")

rasterFiles = [ x for x in rasterFiles if "VNP46A2" in x ]
rasterFiles.sort()
totalFiles = len(rasterFiles)
print("Finalized")

# HELPER FUNCTIONS

def conJDtoDate(JD):
    '''
    Converts a 5-character Julian date string (YYDDD) found in filenames to a Python date object.
    Example: "25301" → 2025-10-28
    '''
    # %y = 2-digit year, %j = day of year; strptime parses the string into a datetime
    date = datetime.strptime(JD, '%y%j').date()   # .date() drops the time part, keeps only year/month/day
    return date

def getRasterData(lat, lon, window, xOrigin, yOrigin, pixelWidth, pixelHeight, data):
    '''
    Returns the pixel value (or average of a 3x3 grid) at a given latitude/longitude.
    window=1: single pixel; window=3: average over a 3x3 neighborhood.
    '''
    
    # Convert geographic coordinates to pixel row/column indices
    col = int((lon - xOrigin) / pixelWidth )
    row = int((yOrigin - lat) / pixelHeight)
    
    #Data AT THAT ROW COLUMN
    if(window == 3):
        '''
        This is a grid of 3x3 around the lat,lon. 
        '''
        # np.array creates a 2D array of index offsets for a 3x3 neighborhood
        indexX = np.array([[-1,0,1],[-1,0,1],[-1,0,1]])
        indexY = np.array([[1,1,1],[0,0,0],[-1,-1,-1]])
        newIndexX = indexX + row    # shift offsets by the actual row
        newIndexY = indexY + col    # shift offsets by the actual column
        
        Totalvalue = []
        for i in range(0, 3):
            for j in range(0, 3):
                Totalvalue.append(data[newIndexX[i][j]][newIndexY[i][j]])   # index into 2D array
                
        # stat.mean() computes the average; map(float, ...) converts all values to floats first
        value = format(stat.mean(map(float, Totalvalue)),'.2f')   # format to 2 decimal places
        return float(value)

    else:
        value = data[row][col]   # direct 2D array lookup at [row][col]
        return value
    

def processHD5(inputHD5, layer, OutputFolder, coords, Date ):
    rasterFilePre = inputHD5[:-3]             # strip the last 3 characters (".h5") from the filename
    ## Open HDF file
    hdflayer = gdal.Open(inputHD5, gdal.GA_ReadOnly)           # open the h5 file as read-only
    subhdflayer = hdflayer.GetSubDatasets()[layer][0]           # get the path to the Nth sub-dataset (layer index)
    rlayer = gdal.Open(subhdflayer, gdal.GA_ReadOnly)
    #Subset the Layer Name
    outputLayerName = subhdflayer[92:]                          # slice off the first 92 characters (file path prefix)
    clean_layer_name = re.sub(r'[^\w\-_.]', '_', outputLayerName)    # replace any special characters with '_'
    
    #outputFile
    outputRaster = OutputFolder + rasterFilePre + "_" + clean_layer_name + ".tif"
    
    # Read tile position metadata to compute geographic bounding box
    HorizontalTileNumber = int(rlayer.GetMetadata_Dict()["HorizontalTileNumber"])
    VerticalTileNumber = int(rlayer.GetMetadata_Dict()["VerticalTileNumber"])
    WestBoundCoord = (10*HorizontalTileNumber) - 180
    NorthBoundCoord = 90-(10*VerticalTileNumber)
    
    EastBoundCoord = WestBoundCoord + 10
    SouthBoundCoord = NorthBoundCoord - 10
    
    EPSG = "-a_srs EPSG:4326"   # WGS84 coordinate reference system (standard lat/lon)
    
    # Build the gdal.Translate options string: set projection + bounding box corners
    translateOptionText = EPSG+" -a_ullr " + str(WestBoundCoord) + " " + str(NorthBoundCoord) + " " + str(EastBoundCoord) + " " + str(SouthBoundCoord)
    translateoptions = gdal.TranslateOptions(gdal.ParseCommandLine(translateOptionText))
    result = gdal.Translate(outputRaster, rlayer, options=translateoptions)   # write the georeferenced .tif file
    
    raster = gdal.Open(outputRaster,gdal.GA_ReadOnly)
    
    if raster is None:
        print ("Could not open image " , Date )

    band = raster.GetRasterBand(1)     # get the first (and only) data band from the raster

    cols = raster.RasterXSize          # number of pixels wide
    rows = raster.RasterYSize          # number of pixels tall

    # GetGeoTransform returns 6 numbers that define pixel size and top-left corner
    transform = raster.GetGeoTransform()
    xOrigin = transform[0]             # longitude of the top-left corner
    yOrigin = transform[3]             # latitude of the top-left corner
    pixelWidth = transform[1]          # degrees per pixel (east-west)
    pixelHeight = -transform[5]        # degrees per pixel (north-south); negative in the file, so we negate it
    data = band.ReadAsArray(0, 0, cols, rows)    # read all pixel values into a 2D numpy array
    
    _datalist_ = []
    
    # Here starts the lat lon part.
    for coord in coords :
        
        lat = coord['lat']
        lon = coord['lon']
        
        # Skip this coordinate if it falls outside the tile's geographic extent
        if lat < SouthBoundCoord or lat > NorthBoundCoord or lon < WestBoundCoord or lon > EastBoundCoord:
            continue

        value1 = getRasterData(lat, lon, 1 , xOrigin, yOrigin, pixelWidth, pixelHeight, data)
        value3 = getRasterData(lat, lon, 3 , xOrigin, yOrigin, pixelWidth, pixelHeight, data)
        
        # Build a dict for this location's record; keys become column names when converted to a DataFrame
        _d_ = {
            'sector' : coord['sector'],
            'location' : coord['location'],
            'city' : coord['city'],
            'date' : Date,            
            'DNBvalue1' : value1 ,     # single-pixel NTL value
            'DNBvalue3' : value3 ,     # 3x3 averaged NTL value
        }        
        
        _datalist_.append(_d_)
    
    raster = None    # release the file handle (equivalent to closing the file)
    
    return _datalist_

rasterFiles = os.listdir(PATH_NL)
rasterFiles = [ x for x in rasterFiles if "VNP46A2" in x ]
rasterFiles.sort()
totalFiles = len(rasterFiles)

locations_df = pd.read_excel(f'{path_source}/Coordinates.xlsx')   # reads an Excel file into a DataFrame
# Normalize unicode (e.g., accented characters), encode to ASCII dropping unknowns, then strip whitespace
locations_df['location'] = locations_df['location'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
locations_df['city'] = locations_df['city'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
locations_df['thefile'] = the_tiles[0]   # add a column with the same tile name for every row

try :
    # parse_dates=True: attempt to parse any date-looking columns automatically
    data = pd.read_csv(f"{path_source}/data_ntl.csv", parse_dates=True)
except :
    # If the file doesn't exist yet, create an empty DataFrame with the needed columns
    data = pd.DataFrame()
    data['location']  = np.nan
    data['city'] = np.nan
    data['date'] = np.nan

if len(data)>0 :
    # Clean up text columns in existing data to match the format in locations_df
    data['location'] = data['location'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
    data['city'] = data['city'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.strip()
    # Left merge: keeps all rows from locations_df; adds 'date' column showing the latest date per location
    locations_df = pd.merge(locations_df, 
                         data.groupby('location')['date'].max().reset_index() ,   # latest date per location
                        on='location',
                        how='left')
else :
    locations_df['date'] = np.nan    # np.nan is the standard "missing value" for numeric/date columns

print(f"Last observation: {data.date.max()}")

# Keep only relevant dates file in iteration
# Technically we should keep only every date after the minimum last Date.
_the_dates = pd.to_datetime(data.date.unique())   # .unique() returns each date only once; pd.to_datetime converts them
# Filter rasterFiles to only those whose embedded date is NOT already in the data
rasterNew = [ F for F in rasterFiles if pd.to_datetime(conJDtoDate( F[11:16] )) not in _the_dates ]
# Further restrict to the date_range window if one is set
if date_range is not None:
    dr_start = pd.to_datetime(date_range.split()[0])
    dr_end   = pd.to_datetime(date_range.split()[1])
    rasterNew = [F for F in rasterNew if dr_start <= pd.to_datetime(conJDtoDate(F[11:16])) <= dr_end]
totalFiles = len(rasterNew)
print(f"Total Files: {totalFiles}")

Last observation: 2025-12-30
Total Files: 2

if not os.path.exists(outputFolder): os.makedirs(outputFolder)

# List comprehension used as a one-liner loop: delete any leftover .tif temp files
[ os.remove(f"{outputFolder}/{T}") for T in os.listdir(outputFolder) if ".tif" in T ]
# Build a dict: {location_name: [list of already-processed dates]}
records = data.groupby('location')['date'].agg(list).to_dict()

mydata = []   # will accumulate one dict per location per date

for NOF in tqdm(range(0, totalFiles), desc="Processing") :
    _definelist_ = []
    # Extract the 5-char Julian date code from the filename (characters 11–15)
    dateOfYear = conJDtoDate(rasterNew[NOF][11:16])
    
    # Build the list of locations that still need this date processed
    target = [
        point for point in locations_df.to_dict(orient='records')   # .to_dict('records') → list of row dicts
        if
        # Include the point if: it has no date yet, OR this dateOfYear isn't already in its record
        ( pd.isnull(point['date']) or pd.to_datetime(dateOfYear) not in records[point['location']] )
        and rasterFiles[0].split(".")[2] in the_tiles
    ]
    
    if len(target) == 0 : continue   # nothing to do for this file; skip to the next iteration
    
    _toappend_ = processHD5(rasterFiles[NOF], 2, outputFolder, target, dateOfYear )
    
    # Extend mydata with the returned list of dicts (one per location)
    [ mydata.append(f) for f in _toappend_ ]

# Clean up any .tif temp files created during processing
for T in os.listdir(outputFolder) :
    if ".tif" in T : os.remove(f"{outputFolder}/{T}")


# Combine the existing data with newly extracted records
data = pd.concat([data, pd.DataFrame(mydata)], axis=0)   # axis=0: stack rows vertically
data['date'] = pd.to_datetime(data['date'])               # ensure 'date' column is datetime type
data.set_index('date', inplace=True)   # make 'date' the row index; inplace=True modifies the DataFrame directly
data.sort_index()                      # returns a sorted copy (not saved here; used for display only)

data.index = pd.to_datetime(data.index)   # re-confirm index is datetime (handles mixed types)

data.to_csv(f'{path_source}/data_ntl.csv')   # save to CSV; overwrites the file if it already exists
print(f"Saved. Last update: {data.index.max()}")
data

Processing: 100%|██████████| 2/2 [00:00<00:00,  6.57it/s]

Saved. Last update: 2025-12-30 00:00:00

import geopandas as gpd          # geopandas: extends pandas for geospatial (shapefile) data
import pandas as pd
import rasterio                   # rasterio: reads/writes raster (grid) files like .tif
from rasterio.mask import mask as rio_mask   # masks a raster to a polygon shape
from shapely.geometry import mapping         # converts a shapely geometry to a GeoJSON-style dict

def processHD5_island(inputHD5, layer, OutputFolder, gdf_boundary, Date):
    """
    Extract mean NTL radiance over the full island boundary from one h5 file.
    Returns a dict with date, mean, sum, and pixel count.
    """
    hdflayer    = gdal.Open(inputHD5, gdal.GA_ReadOnly)
    subhdflayer = hdflayer.GetSubDatasets()[layer][0]   # select layer by index (2 = DNB_At_Sensor_Radiance)
    rlayer      = gdal.Open(subhdflayer, gdal.GA_ReadOnly)

    # Compute the tile's geographic bounding box from its H/V tile indices
    meta = rlayer.GetMetadata_Dict()
    H = int(meta["HorizontalTileNumber"])
    V = int(meta["VerticalTileNumber"])
    west, north = (10 * H) - 180, 90 - (10 * V)
    east, south = west + 10, north - 10

    tmp = os.path.join(OutputFolder, "_island_tmp.tif")   # temporary file path for the georeferenced raster
    opts = gdal.TranslateOptions(gdal.ParseCommandLine(
        f"-a_srs EPSG:4326 -a_ullr {west} {north} {east} {south}"
    ))
    gdal.Translate(tmp, rlayer, options=opts)
    rlayer = None   # release the GDAL file handle

    # Convert each geometry in the GeoDataFrame to a GeoJSON dict for rasterio masking
    shapes = [mapping(geom) for geom in gdf_boundary.geometry]
    with rasterio.open(tmp) as src:
        # rio_mask clips the raster to the island polygon; nodata=np.nan fills outside pixels
        out_image, _ = rio_mask(src, shapes, crop=True, nodata=np.nan, filled=True)

    try:
        os.remove(tmp)    # delete the temporary .tif file
    except Exception:
        pass              # if deletion fails, silently continue

    data = out_image[0].astype(float)   # out_image has shape (1, rows, cols); [0] drops the band dimension
    data[data >= 65535] = np.nan        # 65535 is the VIIRS fill/no-data sentinel value
    data[data < 0]      = np.nan        # negative values are also invalid

    valid = ~np.isnan(data)             # boolean array: True where data is a real number
    return {
        "date"    : Date,
        "ntl_mean": float(np.nanmean(data)) if valid.any() else np.nan,   # np.nanmean ignores NaN values
        "ntl_sum" : float(np.nansum(data)),
    }

# Load Jamaica boundary shapefile directly from a URL (zipped shapefile)
shape = "https://geodata.ucdavis.edu/gadm/gadm4.1/shp/gadm41_JAM_shp.zip"
gdf = gpd.read_file(shape)
gdf.explore(tiles="CartoDB dark_matter", zoom=9)    # .explore() renders an interactive map in the notebook

c:\Users\guerr\anaconda3\envs\geo\Lib\site-packages\pyogrio\geopandas.py:265: UserWarning: More than one layer found in 'gadm41_JAM_shp.zip': 'gadm41_JAM_0' (default), 'gadm41_JAM_1'. Specify layer parameter to avoid this warning.
  result = read_func(

# ── Parameters ────────────────────────────────────────────────────────────────
# .split() splits the string on the space → list of 2 strings; [0] and [1] pick each part
date_start, date_end = date_range.split()[0], date_range.split()[1]

# ── Dissolve GDF to a single island polygon ───────────────────────────────────
# .dissolve() merges all sub-polygons (parishes) into one island outline; .to_crs() reprojects to WGS84
gdf_jam = gdf[gdf.geometry.notnull()].dissolve().to_crs("EPSG:4326")

# ── Filter h5 files to the requested time window and tile ────────────────────
start_dt = pd.to_datetime(date_start)   # convert "YYYY-MM-DD" string to a pandas Timestamp
end_dt   = pd.to_datetime(date_end)

# List all VNP46A2 .h5 files in the folder, sorted chronologically
all_h5 = sorted([f for f in os.listdir(PATH_NL) if "VNP46A2" in f and f.endswith(".h5")])
# Keep only files that match the tile and fall within the date range
rasters_range = [
    f for f in all_h5
    if the_tiles[0] in f                                          # correct tile only
    and start_dt <= pd.to_datetime(conJDtoDate(f[11:16])) <= end_dt
]
print(f"Files in range [{date_start} → {date_end}]: {len(rasters_range)}")

# ── Process ───────────────────────────────────────────────────────────────────
if not os.path.exists(outputFolder):
    os.makedirs(outputFolder)

records = []    # will collect one dict per file
for f in tqdm(rasters_range, desc="Island NTL"):
    date = conJDtoDate(f[11:16])   # extract and convert the Julian date from the filename
    rec  = processHD5_island(os.path.join(PATH_NL, f), 2, outputFolder, gdf_jam, date)
    records.append(rec)

# Convert list of dicts into a DataFrame
df = pd.DataFrame(records)
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True)    # set 'date' as the index for easy time-series plotting
df.sort_index(inplace=True)           # sort rows by date ascending

print(f"Saved {len(df)} rows.  Last date: {df.index.max()}")
df.head()   # .head() shows the first 5 rows

Files in range [2025-10-02 → 2025-11-15]: 43

Island NTL: 100%|██████████| 43/43 [00:08<00:00,  5.02it/s]

Saved 43 rows.  Last date: 2025-11-15 00:00:00

import matplotlib.pyplot as plt
import matplotlib.dates as mdates    # mdates: helpers for formatting date axes

event_date  = pd.Timestamp("2025-10-28")   # pd.Timestamp: a single point in time (like datetime)
event_label = "H. Melissa"

fig, ax = plt.subplots(figsize=(12, 4))   # create a figure and one set of axes; figsize in inches

# Plot the time series: df.index on x-axis, ntl_mean column on y-axis
ax.plot(df.index, df["ntl_mean"],
        color="#005BAC", linewidth=1.8, label="NTL mean (nW/cm²/sr)")

# Draw a vertical dashed line at the hurricane date
ax.axvline(event_date, color="#CC0000", linewidth=1.5, linestyle="--", zorder=3)
# Add a text label just above the line; va/ha control vertical/horizontal alignment
ax.text(event_date, ax.get_ylim()[1],
        f" {event_label}", color="#CC0000",
        fontsize=9, va="top", ha="left", rotation=0)

ax.set_title("Jamaica Nighttime Lights — Island Mean", fontsize=13, weight="bold")
ax.set_ylabel("NTL mean (nW/cm²/sr)")
ax.grid(axis="y", linestyle=":", alpha=0.5)   # horizontal gridlines only; alpha=0.5 makes them semi-transparent
ax.legend(frameon=False)    # show the legend without a box border
plt.tight_layout()          # automatically adjust spacing so labels don't overlap
plt.show()

import contextily as cx    # contextily: adds web map tiles (basemaps) to matplotlib axes
import colorcet as cc       # colorcet: perceptually uniform colormaps

event_date = pd.Timestamp("2025-10-28")

# ── Pick the two target dates ─────────────────────────────────────────────────
# pd.Timedelta: represents a duration; used here to shift a date by 7 days
before_date = event_date - pd.Timedelta(days=7)
after_date  = event_date + pd.Timedelta(days=7)

# 'dir()' returns a list of all variable names currently defined; check if gdf_jam already exists
if 'gdf_jam' not in dir():
    gdf_jam = gdf[gdf.geometry.notnull()].dissolve().to_crs("EPSG:4326")

def find_h5_for_date(target_date, path=PATH_NL, tile=the_tiles[0]):
    """Find the h5 filename that matches a given date and tile, or return None."""
    all_h5 = sorted([f for f in os.listdir(path) if "VNP46A2" in f and f.endswith(".h5") and tile in f])
    for f in all_h5:
        file_date = pd.Timestamp(conJDtoDate(f[11:16]))
        if file_date.date() == target_date.date():   # .date() compares only the date part, ignoring time
            return f
    return None   # explicit None return signals "not found"

def raster_to_array(h5_filename, layer, gdf_boundary, out_folder):
    """Extract clipped raster array + 1-D lon/lat arrays from an h5 file."""
    h5_path     = os.path.join(PATH_NL, h5_filename)
    hdflayer    = gdal.Open(h5_path, gdal.GA_ReadOnly)
    subhdflayer = hdflayer.GetSubDatasets()[layer][0]
    rlayer      = gdal.Open(subhdflayer, gdal.GA_ReadOnly)

    meta  = rlayer.GetMetadata_Dict()
    H, V  = int(meta["HorizontalTileNumber"]), int(meta["VerticalTileNumber"])
    west, north = (10 * H) - 180, 90 - (10 * V)
    east, south = west + 10, north - 10

    tmp  = os.path.join(out_folder, "_map_tmp.tif")
    opts = gdal.TranslateOptions(gdal.ParseCommandLine(
        f"-a_srs EPSG:4326 -a_ullr {west} {north} {east} {south}"
    ))
    gdal.Translate(tmp, rlayer, options=opts)
    rlayer = None

    shapes = [mapping(geom) for geom in gdf_boundary.geometry]
    with rasterio.open(tmp) as src:
        out_image, out_transform = rio_mask(src, shapes, crop=True, nodata=np.nan, filled=True)
        nrows, ncols = out_image.shape[1], out_image.shape[2]
        # Build 1-D arrays of longitude and latitude at the center of each pixel
        lons = out_transform.c + (np.arange(ncols) + 0.5) * out_transform.a   # x-center of each column
        lats = out_transform.f + (np.arange(nrows) + 0.5) * out_transform.e   # y-center of each row

    try:
        os.remove(tmp)
    except Exception:
        pass

    data = out_image[0].astype(float)
    data[data >= 65535] = np.nan   # mask VIIRS fill value
    data[data < 0]      = np.nan
    return data, lons, lats

# ── Load the two rasters ──────────────────────────────────────────────────────
f_before = find_h5_for_date(before_date)
f_after  = find_h5_for_date(after_date)
print(f"Before : {f_before}  ({before_date.date()})")
print(f"After  : {f_after}  ({after_date.date()})")

arr_before, lons, lats = raster_to_array(f_before, 2, gdf_jam, outputFolder)
arr_after,  _,    _    = raster_to_array(f_after,  2, gdf_jam, outputFolder)   # _ discards repeated lons/lats

# ── Shared color scale (robust 2nd-98th percentile across both panels) ────────
# Concatenate both arrays (flattened to 1-D) to get a single distribution for the color scale
both = np.concatenate([arr_before[~np.isnan(arr_before)], arr_after[~np.isnan(arr_after)]])
# Use percentiles (not min/max) to avoid extreme outliers distorting the color scale
vmin, vmax = np.nanpercentile(both, 2), np.nanpercentile(both, 98)

# np.meshgrid turns 1-D lon/lat arrays into 2-D grids needed for pcolormesh
LON, LAT = np.meshgrid(lons, lats)

# ── Plot ──────────────────────────────────────────────────────────────────────
fig, axes = plt.subplots(1, 2, figsize=(16, 8))   # 1 row, 2 columns of subplots
fig.subplots_adjust(bottom=0.15)   # reserve space below maps for colorbar

panel_titles = [
    f"7 days before Hurricane Melissa\n({before_date.strftime('%b %d, %Y')})",
    f"7 days after Hurricane Melissa\n({after_date.strftime('%b %d, %Y')})",
]

# zip() pairs each axis with its array and title so we can loop over all three together
for ax, arr, title in zip(axes, [arr_before, arr_after], panel_titles):
    # pcolormesh plots a 2-D array as a color grid; shading="auto" avoids deprecation warnings
    pcm = ax.pcolormesh(LON, LAT, arr, cmap=cc.cm.bmy, vmin=vmin, vmax=vmax, shading="auto")
    ax.set_title(title, fontsize=13, weight="bold")
    # cx.add_basemap overlays a web map tile layer on the existing axes
    cx.add_basemap(ax, source=cx.providers.CartoDB.Positron, crs="EPSG:4326")
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")

# fig.add_axes([left, bottom, width, height]): manually place a new axes for the colorbar
cbar_ax = fig.add_axes([0.25, 0.05, 0.5, 0.025])
fig.colorbar(pcm, cax=cbar_ax, orientation="horizontal",
             label="NTL Radiance (nW/cm²/sr)")

fig.suptitle("Daily Aggregate Nighttime Light Radiance - Jamaica", fontsize=15, weight="bold")
# fig.text places text at figure coordinates (0=left/bottom, 1=right/top)
fig.text(0.01, 0.01, "Source: NASA Black Marble VNP46A2", fontsize=11)

plt.show()

Before : VNP46A2.A2025294.h10v07.002.2025302162350.h5  (2025-10-21)
After  : VNP46A2.A2025308.h10v07.002.2025322075732.h5  (2025-11-04)

	location	city	sector	DNBvalue1	DNBvalue3
date
2025-10-02	Sangster International Airport (MBJ)	Montego Bay	Airport	30.194336	29.51
2025-10-02	Norman Manley International Airport (KIN)	Kingston	Airport	141.848540	73.05
2025-10-02	Ian Fleming International Airport (OCJ)	Boscobel	Airport	12.989580	8.97
2025-10-02	Ken Jones Aerodrome (Port Antonio)	St. Margarets Bay	Airport	2.032099	1.06
2025-10-02	Tinson Pen Aerodrome (KTP)	Kingston	Airport	98.617030	72.75
...	...	...	...	...	...
2025-11-12	Trench Town Culture Yard Museum	Kingston	Cultural	26.936199	34.22
2025-11-12	Sam Sharpe Square	Montego Bay	Cultural	75.191666	35.63
2025-11-12	Fort Charles (Port Royal)	Port Royal	Cultural	19.353334	3.30
2025-11-12	Fort Montego	Montego Bay	Cultural	8.931674	25.80
2025-11-12	Lovers Leap Lighthouse	NaN	Cultural	1.455617	0.99

	ntl_mean	ntl_sum
date
2025-10-02	2.491267	134775.036133
2025-10-03	2.328361	125962.024996
2025-10-04	2.011105	108798.744972
2025-10-05	1.444410	78141.144171
2025-10-06	1.575248	85219.365989

Daily NTL Data¶

Download h5 files¶

Analysis¶

Coordinates¶

Country Area¶

Plots¶

Time series¶

Maping event day¶