import os
import sys
print(sys.path)

# Import modules
import census
import earthpy as et
import geopandas as gpd
import hashlib
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pystac_client
import rioxarray as rxr
import rioxarray.merge as rxrmerge
import shapely.geometry
import us
import xarray as xr

from shapely.geometry import shape

from hvplot.plotting import scatter_matrix
from ipywidgets import interact, Dropdown
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

data_dir = os.path.join(et.io.HOME, et.io.DATA_NAME)
abq_dir = os.path.join(data_dir, 'albuquerque')


if not os.path.exists(abq_dir):
    os.makedirs(abq_dir)

# Load shapefile url
abq_path = os.path.join(abq_dir, 'cityboundary.shp')
if not os.path.exists (abq_path):
    abq_url = ('jurisdiction.zip')
    gpd.read_file(abq_url).to_file(abq_path)

NM_fips = us.states.NM.fips

year = "2021"  # Example year, change as needed
census_url = (f"https://www2.census.gov/geo/tiger/TIGER{year}/TRACT/tl_{year}_{NM_fips}_tract.zip")

NM_census_path = os.path.join(data_dir, 'NM2020census.shp')
if not os.path.exists(NM_census_path):
    gpd.read_file(census_url).to_file(NM_census_path)

tract_gdf = gpd.read_file(NM_census_path)  # Census tracts
abq_gdf = gpd.read_file(abq_path).to_crs('epsg:4269')  # ABQ boundary GDF
abq_gdf = abq_gdf[abq_gdf['JURISDICTI'] == 'ALBUQUERQUE']
# Reproject tract_gdf to match the CRS
tract_gdf = tract_gdf.to_crs(abq_gdf.crs)

# Perform the spatial join
abq_tracts = gpd.sjoin(tract_gdf, abq_gdf, how="inner", predicate='within')

print(abq_gdf.crs)
print(abq_tracts.crs)

epsg:4269
EPSG:4269

import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 10))
abq_gdf.plot(ax=ax, color='none', edgecolor='blue', linewidth=2)  # Plot city boundaries in blue
abq_tracts.plot(ax=ax, color='green', alpha=0.5, edgecolor = 'white')  # Plot tracts in green
plt.show()

#Code from Ed

import getpass
import keyring



def get_api_key(service_name, username, override=False):
    # Get the API key from the keyring
    api_key = keyring.get_password(service_name, username)

    # If the API key is None or override is True, prompt for a new key
    if api_key is None or override:
        api_key = getpass.getpass(prompt='Enter U.S. Census Bureau API Key: ')
        keyring.set_password(service_name, username, api_key)

    return api_key


service_name = "US_Census_Bureau_ACS"
username = "api_key"

# API key expires in 48 hours. Set override to True to force a new key.
api_key = get_api_key(service_name, username, override=True)

from census import Census
from us import states

c= Census(os.environ.get(api_key))

def get_all_tracts_for_state(api_key, state_fips, year):
    c = Census(api_key)
    try:
        # Fetching data for a basic variable, but we're interested in the tract IDs
        data = c.acs5.get(('B01003_001E',), {'for': 'tract:*', 'in': f'state:{NM_fips}'}, year=year)
        tracts = set([d['tract'] for d in data])  # Extracting unique tract IDs
        return list(tracts)
    except Exception as e:
        print(f"Error occurred during fetching tracts: {e}")
        return []

# Example usage
state_fips = states.NM.fips
year =2021 
all_tracts = get_all_tracts_for_state(api_key, state_fips, year)
print(f"Number of tracts in New Mexico: {len(all_tracts)}")

Number of tracts in New Mexico: 466

# Step 1: Define Basic Parameters
variables = ["B06011_001E",'B19001_001E','B25077_001E','B25064_001E','B25034_001E' ]  
# Example: Median Income, Estimated Earnings in last 12 months, median pproperty value
year = 2021
cache_dir = os.path.join(data_dir, "census_dir")
if not os.path.exists(cache_dir):
    os.mkdir(cache_dir)

from census import Census
from us import states

def get_all_tracts_for_state(api_key, state_fips, year):
    c = Census(api_key)
    filters = {"for": "tract:*", "in": f"state:{state_fips}"}
    all_tracts = c.acs5.get(('NAME',), filters, year=year)
    return [(tract["state"], tract["county"], tract["tract"]) for tract in all_tracts]

# Get all tracts for New Mexico
all_tracts = get_all_tracts_for_state(api_key, state_fips, year)

print(all_tracts)

# Step 3: Batch Processing
def create_batches(data_list, batch_size):
    for i in range(0, len(data_list), batch_size):
        yield data_list[i:i + batch_size]

batch_size = 50  # Adjust as needed
tract_batches = list(create_batches(all_tracts, batch_size))

def download_state_tract_data(api_key, state_fips, tract_batches, variables, year, cache_dir):
    c = Census(api_key)
    all_data = []  # List to store data from all batches

    for batch_index, batch in enumerate(tract_batches):
        batch_id = hashlib.md5('_'.join([f"{state}-{county}-{tract}" for state, county, tract in batch]).encode()).hexdigest()
        cache_file = os.path.join(cache_dir, f"{state_fips}_{batch_id}.csv")

        print(f"Processing batch {batch_index + 1}/{len(tract_batches)}: {batch_id}")

        if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
            print(f"Cache file found for batch {batch_id}, loading data from cache.")
            batch_data = pd.read_csv(cache_file)
            print(f"Loaded {len(batch_data)} rows from cache.")
        else:
            print(f"No cache file found for batch {batch_id}, downloading data.")
            batch_data_list = []
            for state, county, tract in batch:
                print(f"Querying API for state: {state}, county: {county}, tract: {tract}")
                data = c.acs5.get(variables, {'for': f'tract:{tract}', 'in': f'state:{state} county:{county}'}, year=year)
                if data:
                    batch_data_list.append(pd.DataFrame(data))
                    print(f"Saving to {cache_file} in {cache_dir}")

            if batch_data_list:
                batch_data = pd.concat(batch_data_list, ignore_index=True)
                print(f"Batch {batch_id} data size: {len(batch_data)} rows. Saving to cache.")
                batch_data.to_csv(cache_file, index=False)

        all_data.append(batch_data)  # Append the batch data to the all_data list

# Concatenate all batch data into a single DataFrame
    state_tract_data = pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()
    print(f"Total data size: {len(state_tract_data)} rows")


    # Concatenate all batch data into a single DataFrame
    state_tract_data = pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()
    print(f"Total data size: {len(state_tract_data)} rows")

    return state_tract_data

state_fips =us.states.NM.fips  # Illinois FIPS code
year = 2021
variables = ["B06011_001E",'B19001_001E','B25077_001E','B25064_001E','B25034_001E' ]  

# Assume county_tract_batches is a list of batches, each batch being a list of (state, county, tract) tuples
# Ensure cache_dir exists

state_tract_data = download_state_tract_data(api_key, state_fips, tract_batches, variables, year, cache_dir)

import pandas as pd
# Convert both values to int
abq_tracts['TRACTCE'] = abq_tracts['TRACTCE'].astype(int)
state_tract_data['tract'] = state_tract_data['tract'].astype(int)


# Merging the DataFrames on columns with different names
abq_gdf_merged = pd.merge(abq_tracts, 
                      state_tract_data, 
                      how='inner', 
                      left_on='TRACTCE', 
                      right_on='tract')

# Check for missing data and remove tracts without income data 
abq_gdf_merged = abq_gdf_merged.dropna(subset=['B06011_001E',
                                               'B19001_001E', 
                                               'B25077_001E',
                                                'B25064_001E',
                                                'B25034_001E' ] ) 

# Remove tracts with nonsenical values below 0 
abq_gdf_merged = abq_gdf_merged[(abq_gdf_merged['B06011_001E'] >= 0) 
                                & (abq_gdf_merged['B25077_001E']>=0)
                                & (abq_gdf_merged['B19001_001E']>=0)
                                & (abq_gdf_merged['B25064_001E']>=0)
                                & (abq_gdf_merged['B25034_001E']>=0)]

abq_gdf_merged = abq_gdf_merged.to_crs('4326')


print(abq_gdf_merged.head)

min_income = abq_gdf_merged['B06011_001E'].min()
max_income = abq_gdf_merged['B06011_001E'].max()
print(f"Minimum Income: {min_income}, Maximum Income: {max_income}")

abq_gdf_merged.plot(column='B06011_001E',
                    legend=True,
                    figsize=(10, 10),
                    cmap='viridis',
                    vmin=min_income,  # Set the minimum value
                    vmax=max_income)  # Set the maximum value
plt.title('Median Income by Census Tract in Albuquerque')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()

Minimum Income: 7437.0, Maximum Income: 64207.0

import pystac_client

# Initialize STAC client
e84_catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")

# Define a simple GeoJSON geometry for testing
test_geometry = {
    "type": "Point",
    "coordinates": [-90.199402, 38.627003]  # Coordinates for St. Louis, MO
}

# Define a time range (you can adjust this to your needs)
test_datetime = "2020-01-01/2021-01-01"

# Perform the test search
try:
    search = e84_catalog.search(
        collections=["naip"],
        intersects=test_geometry,
        datetime=test_datetime
    )

    # Fetch items
    items = list(search.get_items())
    print(f"Found {len(items)} items.")

    # Optionally, print the IDs of the first few items
    for item in items[:5]:
        print(item.id)

except Exception as e:
    print(f"An error occurred: {e}")

c:\Users\Degg\miniconda3\envs\earth-analytics-python\lib\site-packages\pystac_client\item_search.py:835: FutureWarning: get_items() is deprecated, use items() instead
  warnings.warn(

Found 2 items.
mo_m_3809031_nw_15_060_20200725
mo_m_3809023_sw_15_060_20200725

# Initialize the STAC client
client = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")

# Path to cache file
cache_file_path = 'stac_results.json'

# Function to save stac_results to cache
def save_to_cache(stac_results, cache_file_path):
    with open(cache_file_path, 'w') as file:
        json.dump(stac_results, file)

# Try to load cached results
if os.path.exists(cache_file_path):
    with open(cache_file_path, 'r') as file:
        stac_results = json.load(file)
else:
    stac_results = []

# Initialize cached_df with a 'tract_id' column if stac_results is empty or the file doesn't exist
if not stac_results:
    cached_df = pd.DataFrame(columns=['tract_id'])
else:
    # Convert list of dicts to DataFrame for easier checking
    cached_df = pd.DataFrame(stac_results)

# Iterate over each tract in the GeoDataFrame
for index, row in abq_gdf_merged.iterrows():
    tract_id = row['tract']
    if tract_id in cached_df['tract_id'].values:
        print(f"Skipping tract ID: {tract_id}, already processed.")
        continue  # Skip this tract if already processed
    
    geometry = row['geometry']  # The geometry column from the GeoDataFrame

    # Ensure the geometry is in a format that can be serialized to GeoJSON
    if not isinstance(geometry, dict):
        geometry = shapely.geometry.mapping(geometry)  # Convert Shapely object to GeoJSON
    
    # Perform the search
    search = client.search(
        collections=["naip"],
        intersects=geometry,
        datetime="2020"
    )

    # Process the search results
    items = list(search.get_items())
    print(f"Found {len(items)} items for tract ID: {tract_id}")

    if items:
        item_urls = [i.assets['image'].href for i in items]
        stac_results.append({'tract_id': tract_id, 'urls': item_urls, 'geometry': geometry})
        # Save updated results to cache after each tract is processed
        save_to_cache(stac_results, cache_file_path)

print("Updated stac_results cached.")

stac_results_df = pd.DataFrame(stac_results)  # Convert list of dicts to DataFrame for easier processing

# Load existing stats from CSV if it exists
stats_path = 'abq_ndvi_stats.csv'


    
def compute_ndvi_by_tract(tract_data, stats_path):
    '''Iterates through all tracts and loads in each url, merging rasters
    if there are more than one for a particular track, then computes NDVI
    and appends tract info to stats csv file.
    '''

    # Load existing stats from CSV if it exists, otherwise create an empty DataFrame with the correct columns
    if os.path.exists(stats_path):
        existing_stats_df = pd.read_csv(stats_path)
    else:
        columns = ['tract_id', 
                'ndvi_min', 
                'ndvi_max', 
                'ndvi_mean', 
                'ndvi_25pctl', 
                'ndvi_50pctl', 
                'ndvi_75pctl', 
                'fraction_ndvi_gt_0_12']
        existing_stats_df = pd.DataFrame(columns=columns)
        existing_stats_df.to_csv(stats_path, index=False) 


    tract_id = tract_data['tract_id']
    urls = list(tract_data['urls'])
    
    if not urls:
        print("No URLs to process for tract", tract_id)
        return

    geometry_dict = tract_data['geometry']

    # Convert the dictionary to a Shapely geometry object
    geometry_shapely = shape(geometry_dict)
    
    # Wrap the Shapely geometry object into a GeoSeries
    geometry_gseries = gpd.GeoSeries([geometry_shapely], crs="EPSG:4326")    
    
    # Skip processing if NDVI stats for this tract already exist
    if tract_id in existing_stats_df['tract_id'].values:
        print(f"NDVI stats for tract {tract_id} already exist. Skipping...")
        return
    
    # Load and merge rasters from url
    tract_rasters = None
    for url in urls:
        print(f'Loading raster from {url}')
        raster_data = rxr.open_rasterio(url, Masked = True).squeeze()
        print(raster_data.shape)
        tract_geometry = geometry_gseries.to_crs(raster_data.rio.crs)
        print(tract_geometry)
        
        minx, miny, maxx, maxy = tract_geometry.total_bounds
        print(f'Cropping raster...')
        cropped_raster_data = raster_data.rio.clip_box(minx=minx,
                                                        miny=miny, 
                                                        maxx=maxx, 
                                                        maxy=maxy,
                                                         auto_expand=True)
   
        
        print(cropped_raster_data.shape)
        
        print(f'Clipping raster...')
        clipped_raster_data =  cropped_raster_data.rio.clip(tract_geometry, all_touched=True).fillna(0)
        if tract_rasters is None:
            tract_rasters = clipped_raster_data
        else: 
            tract_rasters = rxrmerge.merge_arrays([tract_rasters, clipped_raster_data])
        
    if tract_rasters.shape[0] < 4:
        print(f"Raster from {url} does not contain the necessary bands for NDVI calculation.")
    
        
    # Calculate NDVI stats
    nir = tract_rasters.sel(band=4)
    print(nir)
    red =  tract_rasters.sel(band=1)
    print(red)
    ndvi_da = xr.where((nir + red) == 0, 0, (nir - red) / (nir + red))

    #Eliminate infinte values
    ndvi_da = ndvi_da.where(np.isfinite(ndvi_da), np.nan)

    print(ndvi_da)

    new_ndvi_stats = pd.DataFrame([{
        'tract_id': tract_id,
        'ndvi_min': float(ndvi_da.min(skipna=True).values),
        'ndvi_max': float(ndvi_da.max(skipna=True).values),
        'ndvi_mean': float(ndvi_da.mean(skipna=True).values),
        'ndvi_25pctl': float(np.nanpercentile(ndvi_da, 25)),
        'ndvi_50pctl': float(np.nanpercentile(ndvi_da, 50)),
        'ndvi_75pctl': float(np.nanpercentile(ndvi_da, 75)),
        'fraction_ndvi_gt_0_12': float(((ndvi_da > 0.12).sum(skipna=True) / ndvi_da.count()).values)}])
    
    print(new_ndvi_stats)
    
    # Append new stats to the existing DataFrame and save to CSV
    updated_stats_df = pd.concat([existing_stats_df, new_ndvi_stats], ignore_index=True)
    updated_stats_df.to_csv(stats_path, index=False)
    print(f'Stats for tract {tract_id} updated in {stats_path}.')

for tract in stac_results:
    compute_ndvi_by_tract(tract, stats_path)

# Load in full stats file as pandas DataFrame
ndvi_stats_df = pd.read_csv(stats_path)

# Prep GeoDataFrame for merging and rename columns
abq_gdf_merged.rename(columns={'B06011_001E': 'Median Income',
                                'B19001_001E': 'Income sources in last 12 Months', 
                                'B25077_001E': 'Estimated Property Value',
                                'B25064_001E': 'Monthly Gross Rent',
                                'B25034_001E': 'Infrastructure Years Built (Total)',
                                "TRACTCE": 'tract_id'}, inplace=True)

# Merge census data with NDVI stats
abq_ndvi_gdf = abq_gdf_merged.merge(ndvi_stats_df, on='tract_id')

# Function to plot selected data
def plot_data(selected_column):
    fig, ax = plt.subplots(figsize=(10, 6))
    abq_ndvi_gdf.plot(column=selected_column, 
                      legend=True, 
                      ax=ax, 
                      cmap='viridis',
                      legend_kwds={'label': f"{selected_column} by Census Tract"},
                      vmin=abq_ndvi_gdf[selected_column].min(), 
                      vmax=abq_ndvi_gdf[selected_column].max())
    ax.set_title(f'{selected_column} by Census Tract in Albuquerque')
    ax.set_xlabel('Longitude')
    ax.set_ylabel('Latitude')
    plt.tight_layout()
    plt.show()

# Create dropdown and interact function call
column_dropdown = Dropdown(options=['Median Income', 
                                    'Income sources in last 12 Months', 
                                    'Estimated Property Value', 
                                    'Monthly Gross Rent',
                                    'Infrastructure Years Built (Total)',
                                      'ndvi_mean'])
interact(plot_data, selected_column=column_dropdown)

interactive(children=(Dropdown(description='selected_column', options=('Median Income', 'Income sources in las…

<function __main__.plot_data(selected_column)>

import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Initialize scalers
scaler = MinMaxScaler()
std_scaler = StandardScaler()

# Existing code
abq_ndvi_gdf['log_median_income'] = np.log(abq_ndvi_gdf['Median Income'] + 1)  # Adding 1 to avoid log(0)
abq_ndvi_gdf['log_ndvi_mean'] = np.log(abq_ndvi_gdf['ndvi_mean'] + 1) 
abq_ndvi_gdf['log_ndvi_75pctl'] = np.log(abq_ndvi_gdf['ndvi_75pctl'] + 1) 

# New variables: log transformation
abq_ndvi_gdf['log_income_sources_last_12_months'] = np.log(abq_ndvi_gdf['Income sources in last 12 Months'] + 1)
abq_ndvi_gdf['log_estimated_property_value'] = np.log(abq_ndvi_gdf['Estimated Property Value'] + 1)
abq_ndvi_gdf['log_ndvi_min'] = np.log(abq_ndvi_gdf['ndvi_min'] + 1)
abq_ndvi_gdf['log_monthly_rent'] = np.log(abq_ndvi_gdf['Monthly Gross Rent'] + 1)


# Normalizing new and existing variables
abq_ndvi_gdf['Normalized Median Income'] = scaler.fit_transform(abq_ndvi_gdf[['log_median_income']].values.reshape(-1, 1))
abq_ndvi_gdf['Normalized NDVI Mean'] = scaler.fit_transform(abq_ndvi_gdf[['log_ndvi_mean']].values.reshape(-1, 1))
abq_ndvi_gdf['Normalized NDVI 75ptcl'] = scaler.fit_transform(abq_ndvi_gdf[['log_ndvi_75pctl']].values.reshape(-1, 1))
abq_ndvi_gdf['Normalized Income Sources'] = scaler.fit_transform(abq_ndvi_gdf[['log_income_sources_last_12_months']].values.reshape(-1, 1))
abq_ndvi_gdf['Normalized Property Value'] = scaler.fit_transform(abq_ndvi_gdf[['log_estimated_property_value']].values.reshape(-1, 1))
abq_ndvi_gdf['Normalized Monthly Rent'] = scaler.fit_transform(abq_ndvi_gdf[['log_monthly_rent']].values.reshape(-1, 1))
abq_ndvi_gdf['Normalized NDVI Min'] = scaler.fit_transform(abq_ndvi_gdf[['log_ndvi_min']].values.reshape(-1, 1))

# Standardizing new and existing variables
abq_ndvi_gdf['Standardized Median Income'] = std_scaler.fit_transform(abq_ndvi_gdf[['log_median_income']].values.reshape(-1, 1))
abq_ndvi_gdf['Standardized NDVI Mean'] = std_scaler.fit_transform(abq_ndvi_gdf[['log_ndvi_mean']].values.reshape(-1, 1))
abq_ndvi_gdf['Standardized NDVI 75ptcl'] = std_scaler.fit_transform(abq_ndvi_gdf[['log_ndvi_75pctl']].values.reshape(-1, 1))
abq_ndvi_gdf['Standardized Income Sources'] = std_scaler.fit_transform(abq_ndvi_gdf[['log_income_sources_last_12_months']].values.reshape(-1, 1))
abq_ndvi_gdf['Standardized Property Value'] = std_scaler.fit_transform(abq_ndvi_gdf[['log_estimated_property_value']].values.reshape(-1, 1))
abq_ndvi_gdf['Standardized NDVI Min'] = std_scaler.fit_transform(abq_ndvi_gdf[['log_ndvi_min']].values.reshape(-1, 1))

# Select columns of interest for the scatter matrix
columns_of_interest = ['Normalized Median Income', 'Normalized Property Value', 'Normalized Monthly Rent', 'Normalized NDVI Mean', 'Standardized NDVI 75ptcl']

# Create the scatter matrix
scatter_matrix(abq_ndvi_gdf[columns_of_interest])

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

# Feature list: these should correspond to the normalized and logged columns created earlier
features = ['Normalized Median Income', 'Normalized Income Sources', 'Normalized Property Value']
target = 'Normalized NDVI Mean'

# Initialize scalers for each feature for normalization reversal
scalers = {feature: MinMaxScaler() for feature in features}
scalers[target] = MinMaxScaler()  # Adding a scaler for the target as well

# Fit scalers on the full dataset to be able to inverse transform later
for feature in features + [target]:
    scalers[feature].fit(abq_ndvi_gdf[[feature]])

# Preparing DataFrames and fitting models
results = []
fig, axes = plt.subplots(nrows=1, ncols=len(features), figsize=(15, 5), sharey=True)

for i, feature in enumerate(features):
    X = abq_ndvi_gdf[[feature]].values
    y = abq_ndvi_gdf[target].values  # Target vector is the normalized NDVI

    # Splitting the data into training/testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Creating and training the linear regression model
    regr = LinearRegression()
    regr.fit(X_train, y_train)

    # Predictions
    y_pred = regr.predict(X_test)

    # Reversing normalization for actual and predicted values
    y_test_actual = scalers[target].inverse_transform(y_test.reshape(-1, 1)).flatten()
    y_pred_actual = scalers[target].inverse_transform(y_pred.reshape(-1, 1)).flatten()

    # Reversing log transformation
    y_test_exp = np.exp(y_test_actual) - 1  # Assuming log(x + 1) was used
    y_pred_exp = np.exp(y_pred_actual) - 1

    # Collecting results in a DataFrame
    df_results = pd.DataFrame({'Actual': y_test_exp, 'Predicted': y_pred_exp})
    results.append(df_results)

    # Plotting results
    ax = axes[i]
    ax.scatter(X_test, y_test_exp, color="black", label='Actual data')
    ax.plot(X_test, y_pred_exp, color="blue", linewidth=2, label='Prediction')
    ax.set_title(f'Model for {feature}')
    ax.set_xlabel(f'{feature}')
    ax.set_ylabel('NDVI Mean' if i == 0 else "")
    ax.legend()

plt.tight_layout()
plt.show()

# Combining and saving results
combined_results = pd.concat(results, keys=features)
combined_results.to_csv('predicted_vs_measured.csv', index=True)
predict_df = pd.read_csv('predicted_vs_measured.csv')

print(predict_df.head())

# Example and performance metrics
print("Example results from the first model:")
print(results[0].head())
print("\nCoefficients: \n", regr.coef_)
print("Mean squared error: %.2f" % mean_squared_error(y_test_exp, y_pred_exp))
print("Coefficient of determination (R^2): %.2f" % r2_score(y_test_exp, y_pred_exp))

                 Unnamed: 0  Unnamed: 1    Actual  Predicted
0  Normalized Median Income           0  0.986380   0.728381
1  Normalized Median Income           1  0.749880   0.712661
2  Normalized Median Income           2  0.786089   0.699502
3  Normalized Median Income           3  0.741052   0.715726
4  Normalized Median Income           4  0.293868   0.704962
Example results from the first model:
     Actual  Predicted
0  0.986380   0.728381
1  0.749880   0.712661
2  0.786089   0.699502
3  0.741052   0.715726
4  0.293868   0.704962

Coefficients: 
 [-0.14234052]
Mean squared error: 0.14
Coefficient of determination (R^2): 0.02

%%capture
%%bash
jupyter nbconvert abq-ndvi.ipynb --to html --no-input

---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
Cell In[81], line 1
----> 1 get_ipython().run_cell_magic('bash', '', 'jupyter nbconvert abq-ndvi.ipynb --to html --no-input\n')

File c:\Users\Degg\miniconda3\envs\earth-analytics-python\lib\site-packages\IPython\core\interactiveshell.py:2475, in InteractiveShell.run_cell_magic(self, magic_name, line, cell)
   2473 with self.builtin_trap:
   2474     args = (magic_arg_s, cell)
-> 2475     result = fn(*args, **kwargs)
   2477 # The code below prevents the output from being displayed
   2478 # when using magics with decodator @output_can_be_silenced
   2479 # when the last Python token in the expression is a ';'.
   2480 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):

File c:\Users\Degg\miniconda3\envs\earth-analytics-python\lib\site-packages\IPython\core\magics\script.py:153, in ScriptMagics._make_script_magic.<locals>.named_script_magic(line, cell)
    151 else:
    152     line = script
--> 153 return self.shebang(line, cell)

File c:\Users\Degg\miniconda3\envs\earth-analytics-python\lib\site-packages\IPython\core\magics\script.py:305, in ScriptMagics.shebang(self, line, cell)
    300 if args.raise_error and p.returncode != 0:
    301     # If we get here and p.returncode is still None, we must have
    302     # killed it but not yet seen its return code. We don't wait for it,
    303     # in case it's stuck in uninterruptible sleep. -9 = SIGKILL
    304     rc = p.returncode or -9
--> 305     raise CalledProcessError(rc, cell)

CalledProcessError: Command 'b'jupyter nbconvert abq-ndvi.ipynb --to html --no-input\n'' returned non-zero exit status 1.

Albuquerque Urban Greenspace¶

Setting up analysis¶

API KEY¶

Model Description¶

Data Transformations¶

Fitting to a Least Squares Regression model¶

Predictive Model Shows a Weak Relationship Between Parameters¶

Albuquerque Urban Greenspace¶

Setting up analysis¶

API KEY¶

DROPDOWN PLOT COMPARISON¶

Model Description¶

Data Transformations¶

Fitting to a Least Squares Regression model¶

Predictive Model Shows a Weak Relationship Between Parameters¶