%%bash
pip install pygbif

Requirement already satisfied: pygbif in /opt/conda/lib/python3.11/site-packages (0.6.4)
Requirement already satisfied: requests>2.7 in /opt/conda/lib/python3.11/site-packages (from pygbif) (2.31.0)
Requirement already satisfied: requests-cache in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.2.0)
Requirement already satisfied: geojson-rewind in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.1.0)
Requirement already satisfied: geomet in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.1.0)
Requirement already satisfied: appdirs>=1.4.3 in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.4.4)
Requirement already satisfied: matplotlib in /opt/conda/lib/python3.11/site-packages (from pygbif) (3.8.4)
Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (2.2.1)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (2024.2.2)
Requirement already satisfied: click in /opt/conda/lib/python3.11/site-packages (from geomet->pygbif) (8.1.7)
Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (1.4.4)
Requirement already satisfied: numpy>=1.21 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (1.24.3)
Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (24.0)
Requirement already satisfied: pillow>=8 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (2.9.0)
Requirement already satisfied: attrs>=21.2 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (23.2.0)
Requirement already satisfied: cattrs>=22.2 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (23.2.3)
Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (4.2.0)
Requirement already satisfied: url-normalize>=1.4 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (1.4.3)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib->pygbif) (1.16.0)

import calendar
import os
import pathlib
import requests
import time
import zipfile
from getpass import getpass

import cartopy.crs as ccrs
import panel as pn
import pygbif.occurrences as occ
from glob import glob

import pandas as pd
import geopandas as gpd
import hvplot.pandas
import pygbif.species as species

INFO:NumExpr defaulting to 2 threads.

# Create data directory in the home folder
data_dir = os.path.join(
    # Home directory
    pathlib.Path.home(),
    # Earth analytics data directory
    'earth-analytics',
    'data',
    # Project directory
    'species-distribution-ESIIL',
)
os.makedirs(data_dir, exist_ok=True)

# Define the directory name for GBIF data
gbif_dir = os.path.join(data_dir, 'monarch-data')

# Set up the ecoregions level III boundary URL
ecoregion_url = ("https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/"
                  "cec_na/NA_CEC_Eco_Level3.zip")
# Set up a path to save the dataon your machine
ecoregionpath = os.path.join(data_dir, 'NA_CEC_Eco_Level3.zip')

# Don't download twice
if not os.path.exists(ecoregionpath):
    # Download, and don't check the certificate for the EPA
    ecoregions_response = requests.get(ecoregion_url, verify=False)
    # Save the binary data to a file
    with open(ecoregionpath, 'wb') as ecoregions_file:
        ecoregions_file.write(ecoregions_response.content)

# Open up the ecoregions boundaries
ecoregions_gdf = (gpd.read_file(ecoregionpath)
.rename(columns={
        'NA_L3NAME': 'name',
        'Shape_Area': 'area'})
    [['name', 'area', 'geometry']]
)
# Name the index so it will match the other data later on
ecoregions_gdf.index.name = 'ecoregion'

# Plot the ecoregions to check download
ecoregions_gdf.plot()

<Axes: >

# Make a copy of the ecoregions
ecoregion_plot = ecoregions_gdf.copy()

# Simplify the geometry to speed up processing
ecoregion_plot.geometry = ecoregion_plot.simplify(1000)

# Change the CRS to Mercator for mapping
ecoregion_plot = ecoregion_plot.to_crs(ccrs.Mercator())

# Check that the plot runs
ecoregion_plot.hvplot(geo=True, crs=ccrs.Mercator())

reset_credentials = False
# GBIF needs a username, password, and email
credentials = dict(
    GBIF_USER=(input, 'GBIF username:'),
    GBIF_PWD=(getpass, 'GBIF password'),
    GBIF_EMAIL=(input, 'GBIF email'),
)
for env_variable, (prompt_func, prompt_text) in credentials.items():
    # Delete credential from environment if requested
    if reset_credentials and (env_variable in os.environ):
        os.environ.pop(env_variable)
    # Ask for credential and save to environment
    if not env_variable in os.environ:
        os.environ[env_variable] = prompt_func(prompt_text)

# Query species
species_info = species.name_lookup('danaus plexippus', rank='SPECIES')

# Get the first result
first_result = species_info['results'][0]

# Get the species key (nubKey)
species_key = first_result['nubKey']

# Check the result
first_result['species'], species_key

('Danaus plexippus', 5133088)

# Only download once
gbif_pattern = os.path.join(gbif_dir, '*.csv')
if not glob(gbif_pattern):
    # Submit query to GBIF
    gbif_query = occ.download([
        "continent = NORTH_AMERICA",
        "speciesKey = 5133088",
        "year = 2023",
        "hasCoordinate = TRUE",
    ])
    download_key = gbif_query[0]

    #wait for download to build
    if not 'GBIF_DOWNLOAD_KEY' in os.environ:
        os.environ['GBIF_DOWNLOAD_KEY'] = gbif_query[0]

        # Wait for the download to build
        wait = occ.download_meta(download_key)['status']
        while not wait=='SUCCEEDED':
            wait = occ.download_meta(download_key)['status']
            time.sleep(5)

    # Download GBIF data
    download_info = occ.download_get(
        os.environ['GBIF_DOWNLOAD_KEY'], 
        path=data_dir)

    # Unzip GBIF data
    with zipfile.ZipFile(download_info['path']) as download_zip:
        download_zip.extractall(path=gbif_dir)

# Find the extracted .csv file path
gbif_path = glob(gbif_pattern)[0]

!head $gbif_path

gbifID	datasetKey	occurrenceID	kingdom	phylum	class	order	family	genus	species	infraspecificEpithet	taxonRank	scientificName	verbatimScientificName	verbatimScientificNameAuthorship	countryCode	locality	stateProvince	occurrenceStatus	individualCount	publishingOrgKey	decimalLatitude	decimalLongitude	coordinateUncertaintyInMeters	coordinatePrecision	elevation	elevationAccuracy	depth	depthAccuracy	eventDate	day	month	year	taxonKey	speciesKey	basisOfRecord	institutionCode	collectionCode	catalogNumber	recordNumber	identifiedBy	dateIdentified	license	rightsHolder	recordedBy	typeStatus	establishmentMeans	lastInterpreted	mediaType	issue
4868184771	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/217003637	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		CA		Ontario	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	43.27179	-79.903956	800.0						2023-08-14T08:52	14	8	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	217003637		leewarren	2024-05-20T07:33:39	CC_BY_NC_4_0	leewarren	leewarren			2024-05-28T03:40:01.071Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868152100	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/153340665	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		Georgia	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	31.620569	-81.265035	5.0						2023-03-27T16:14:37	27	3	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	153340665		Christa F. Hayes	2023-04-03T16:49:40	CC_BY_NC_4_0	Christa F. Hayes	Christa F. Hayes			2024-05-28T03:32:03.160Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868139350	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/216122131	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		Ohio	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	40.044079	-83.065383	3.0						2023-09-14T09:29:28	14	9	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	216122131		Craig	2024-05-16T15:15:27	CC_BY_NC_4_0	Craig	Craig			2024-05-28T03:43:39.432Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868122462	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/215824428	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		Wisconsin	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	43.08413	-89.375661	50.0						2023-09-23T17:06:53	23	9	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	215824428		gmzelle	2024-05-15T01:16:46	CC_BY_NC_4_0	gmzelle	gmzelle			2024-05-28T04:07:43.933Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868121692	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/215795948	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		Texas	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	32.624855	-97.559147	29066.0						2023-08-22T10:00	22	8	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	215795948		angel00	2024-05-14T22:11:25	CC_BY_NC_4_0	angel00	angel00			2024-05-28T03:39:58.290Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868115887	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/182977421	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		Maryland	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	39.049817	-76.512772	10.0						2023-09-12T14:11:45	12	9	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	182977421		Cassidy Martin	2023-09-12T18:30:58	CC_BY_4_0	Cassidy Martin	Cassidy Martin			2024-05-28T04:03:36.876Z		COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868110167	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/217163459	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		California	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	38.685764	-119.813653	17.0						2023-09-27T11:30	27	9	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	217163459		Alpine Watershed Group	2024-05-20T21:57:51	CC_BY_NC_4_0	Alpine Watershed Group	Alpine Watershed Group			2024-05-28T03:40:27.629Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868082186	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/215922552	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		US		Texas	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	32.899845	-96.78312	12.0						2023-10-10T10:26:45	10	10	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	215922552		pacogomez	2024-05-15T15:20:08	CC_BY_NC_4_0	pacogomez	pacogomez			2024-05-28T04:07:57.848Z	StillImage	CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED
4868068554	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/181231441	Animalia	Arthropoda	Insecta	Lepidoptera	Nymphalidae	Danaus	Danaus plexippus		SPECIES	Danaus plexippus (Linnaeus, 1758)	Danaus plexippus		CA		Ontario	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	43.125176	-81.268889	1871.0						2023-09-01T12:45:13	1	9	2023	5133088	5133088	HUMAN_OBSERVATION	iNaturalist	Observations	181231441		paul_dennehy	2024-05-16T02:04:36	CC_BY_NC_4_0	barnesmike41	barnesmike41			2024-05-28T04:09:20.647Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED

# Load the GBIF data
gbif_df = pd.read_csv(
    gbif_path, 
    delimiter='\t',
    index_col='gbifID',
    usecols=['gbifID', 'decimalLatitude', 'decimalLongitude', 'month']
)
gbif_df.head()

gbif_gdf = (
    gpd.GeoDataFrame(
        gbif_df, 
        geometry=gpd.points_from_xy(
            gbif_df.decimalLongitude, 
            gbif_df.decimalLatitude), 
        crs="EPSG:4326")
    # Select the desired columns
    [['month', 'geometry']]
)
gbif_gdf

gbif_ecoregion_gdf = (
    ecoregions_gdf
    # Match the CRS of the GBIF data and the ecoregions
    .to_crs(gbif_gdf.crs)
    # Find ecoregion for each observation
    .sjoin(
        gbif_gdf,
        how='inner', 
        predicate='contains')
    # Select the required columns
    [['month', 'name']]
)
gbif_ecoregion_gdf

occurrence_df = (
    gbif_ecoregion_gdf
    # For each ecoregion, for each month...
    .groupby(['ecoregion', 'month'])
    # ...count the number of occurrences
    .agg(occurrences=('name', 'count'))
)

# Get rid of rare observations (possible misidentification?)
occurrence_df = occurrence_df[occurrence_df.occurrences>1]

# Take the mean by ecoregion
mean_occurrences_by_ecoregion = (
    occurrence_df
    .groupby(['ecoregion'])
    .mean()
)

# Take the mean by month
mean_occurrences_by_month = (
    occurrence_df
    .groupby(['month'])
    .mean()
)

# Normalize the observations by the monthly mean throughout the year
occurrence_df['norm_occurrences'] = (
    occurrence_df
    / mean_occurrences_by_ecoregion 
    / mean_occurrences_by_month
)
occurrence_df

from bokeh.models import HoverTool
import holoviews as hv

# Updating plot to remove scientific notation in hover tool

# Join the occurrences with the plotting GeoDataFrame
occurrence_gdf = ecoregion_plot.join(occurrence_df)

# Get the plot bounds so they don't change with the slider
xmin, ymin, xmax, ymax = occurrence_gdf.total_bounds

# Define slider widget
slider = pn.widgets.DiscreteSlider(
    name='month',
    options={calendar.month_name[i]: i for i in range(1,13)}
)

# Creating hover tool to show numbers as decimals and not in sci notation
hover = HoverTool(tooltips=[("norm_occurrences", "@norm_occurrences{'.0f'}")]) 

# Plot occurrence by ecoregion and month
migration_plot = (
    occurrence_gdf
    .hvplot(
        c='norm_occurrences',
        groupby='month',
        # Use background tiles
        geo=True, crs=ccrs.Mercator(), tiles='EsriWorldLightGrayBase',
        title="Monarch Butterfly Observations by Month",
        xlim=(xmin, xmax), ylim=(ymin, ymax),
        frame_height=550,
        widgets={'month': slider},
        widget_location='bottom',
        colormap='reds',
        yformatter='%.0f',
        tools= [hover]
    )
)

# Save the plot
migration_plot.save('monarch-migration_no_sci.html', embed=True)

# Show the plot
migration_plot

WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='p11664', ...)

BokehModel(combine_events=True, render_bundle={'docs_json': {'37debd06-2710-4609-b5b3-74de1e6e8542': {'version…

%%capture
%%bash
jupyter nbconvert *.ipynb --to html

	month	geometry
gbifID
4868184771	8	POINT (-79.90396 43.27179)
4868152100	3	POINT (-81.26503 31.62057)
4868139350	9	POINT (-83.06538 40.04408)
4868122462	9	POINT (-89.37566 43.08413)
4868121692	8	POINT (-97.55915 32.62485)
...	...	...
4011504290	1	POINT (-95.49225 30.16971)
4011503220	1	POINT (-122.06137 37.58708)
4011499223	1	POINT (-122.03414 37.51129)
4011493281	1	POINT (-97.40997 25.82311)
4011490187	1	POINT (-122.01204 37.45983)

	month	name
ecoregion
57	7	Thompson-Okanogan Plateau
57	6	Thompson-Okanogan Plateau
57	7	Thompson-Okanogan Plateau
57	6	Thompson-Okanogan Plateau
57	8	Thompson-Okanogan Plateau
...	...	...
2545	6	Eastern Cascades Slopes and Foothills
2545	7	Eastern Cascades Slopes and Foothills
2545	6	Eastern Cascades Slopes and Foothills
2545	9	Eastern Cascades Slopes and Foothills
2545	9	Eastern Cascades Slopes and Foothills

Mapping Monarch Butterfly migration¶

Create a folder for your data¶

Define your study area – the ecoregions of North America¶

Download and save ecoregion boundaries¶

Load the ecoregions into Python¶

Create a simplified `GeoDataFrame` for plotting¶

Access locations and times of Monarch Butterfly encounters¶

Register and log in to GBIF¶

Get the species key¶

Download data from GBIF¶

Load the GBIF data into Python¶

Convert the GBIF data to a GeoDataFrame¶

Count the number of observations in each ecosystem, during each month of 2023¶

Identify the ecoregion for each observation¶

Count the observations in each ecoregion each month¶

Plot the Danaus plexippus observations by month¶

	decimalLatitude	decimalLongitude	month
gbifID
4868184771	43.271790	-79.903956	8
4868152100	31.620569	-81.265035	3
4868139350	40.044079	-83.065383	9
4868122462	43.084130	-89.375661	9
4868121692	32.624855	-97.559147	8

		occurrences	norm_occurrences
ecoregion	month
57	6	16	0.052299
	7	14	0.022974
	8	6	0.008173
60	6	2	0.016343
60	7	3	0.012308
...	...	...	...
2528	8	2	0.016346
2545	6	4	0.015690
	7	18	0.035446
	8	15	0.024519
	9	3	0.005624

Mapping Monarch Butterfly migration¶

Create a folder for your data¶

Define your study area – the ecoregions of North America¶

Download and save ecoregion boundaries¶

Load the ecoregions into Python¶

Create a simplified GeoDataFrame for plotting¶

Access locations and times of Monarch Butterfly encounters¶

Register and log in to GBIF¶

Get the species key¶

Download data from GBIF¶

Load the GBIF data into Python¶

Convert the GBIF data to a GeoDataFrame¶

Count the number of observations in each ecosystem, during each month of 2023¶

Identify the ecoregion for each observation¶

Count the observations in each ecoregion each month¶

Plot the Danaus plexippus observations by month¶

Create a simplified `GeoDataFrame` for plotting¶