4. Predicting the whole Kano and Yoshii River¶

Written by Men Vuthy, 2022

Import modules

[1]:

import os
import pandas as pd
import numpy as np
np.random.seed(0)

import rasterio
import geopandas as gpd

# Import scikit-learn modules
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
rc('text', usetex=True)

[2]:

# Input classified data of each river
Kano_classified = pd.read_csv('data/kano_river/out_img/classified/kano_classified_index.csv')
Yoshii_classified = pd.read_csv('data/yoshii_river/out_img/classified/yoshii_classified_index.csv')

[3]:

# Get index of dataframe
kano_index = Kano_classified.iloc[:,0]
yoshii_index = Yoshii_classified.iloc[:,0]

# Create a list of the feature column's names
features = Kano_classified.columns[1:29]

features

[3]:

Index(['B1', 'G1', 'R1', 'NIR1', 'NDVI1', 'NDWI1', 'BSI1', 'B2', 'G2', 'R2',
       'NIR2', 'NDVI2', 'NDWI2', 'BSI2', 'B3', 'G3', 'R3', 'NIR3', 'NDVI3',
       'NDWI3', 'BSI3', 'B4', 'G4', 'R4', 'NIR4', 'NDVI4', 'NDWI4', 'BSI4'],
      dtype='object')

[4]:

# load, no need to initialize the loaded_rf
rfc_model = joblib.load("./random_forest.joblib")

[5]:

# Apply the trained Classifier to the prediction dataframe
preds_kano = rfc_model.predict(Kano_classified[features])

[6]:

# View accuracy classification (cross-validation) score
print('Our classification accuracy is: {cv}%'.format(cv=accuracy_score(Kano_classified['label'], preds_kano)* 100))

Our classification accuracy is: 96.09589654544772%

Visualizing confusion matrixdata/kano_river/out_img/predicted

[7]:

# Get and reshape confusion matrix data
Matrix = confusion_matrix(Kano_classified['label'], preds_kano)
matrix = Matrix.astype('float') / Matrix.sum(axis=1)[:, np.newaxis]

[8]:

# Build the plot
plt.figure(figsize=(15,5))
sns.heatmap(matrix, annot=True, annot_kws={'size':10},
            cmap=plt.cm.Greens, linewidths=0.2)

# Add labels to the plot
class_names = ['1', '2', '3', '4', '5', '6', '7']
tick_marks = np.arange(len(class_names))
tick_marks2 = tick_marks + 0.5
plt.xticks(tick_marks+ 0.5, class_names, rotation=25, fontsize=10)
plt.yticks(tick_marks2, class_names, rotation=0, fontsize=10)
plt.xlabel('Predicted label', fontsize=12)
plt.ylabel('True label', fontsize=12)
plt.title('Confusion Matrix for Random Forest Model - Kano River\nTraining data - Kano\&Yoshii River, 75\% training and 25\% testing, Accuracy score is 96.09\%')
plt.savefig('data/kano_river/out_img/predicted/confusion-matrix-kano&yoshii-data.png', dpi=300)
plt.show()

../../../../_images/Content_Project_2022_kano-and-yoshii-river_4-Prediction_on_Kano,_Yoshii_&_Muromi_River_11_0.png

[9]:

# Apply the trained Classifier to the prediction dataframe
preds_yoshii = rfc_model.predict(Yoshii_classified[features])

[10]:

# View accuracy classification (cross-validation) score
print('Our classification accuracy is: {cv}%'.format(cv=accuracy_score(Yoshii_classified['label'], preds_yoshii)* 100))

Our classification accuracy is: 97.14533764790879%

Visualizing confusion matrix

[11]:

# Get and reshape confusion matrix data
Matrix = confusion_matrix(Yoshii_classified['label'], preds_yoshii)
matrix = Matrix.astype('float') / Matrix.sum(axis=1)[:, np.newaxis]

[12]:

# Build the plot
plt.figure(figsize=(15,5))
sns.heatmap(matrix, annot=True, annot_kws={'size':10},
            cmap=plt.cm.Greens, linewidths=0.2)

# Add labels to the plot
class_names = ['1', '2', '3', '4', '5', '6', '7']
tick_marks = np.arange(len(class_names))
tick_marks2 = tick_marks + 0.5
plt.xticks(tick_marks+ 0.5, class_names, rotation=25, fontsize=10)
plt.yticks(tick_marks2, class_names, rotation=0, fontsize=10)
plt.xlabel('Predicted label', fontsize=12)
plt.ylabel('True label', fontsize=12)
plt.title('Confusion Matrix for Random Forest Model - Yoshii River\nTraining data - Kano\&Yoshii River, 75\% training and 25\% testing, Accuracy score is 97.14\%')
plt.savefig('data/yoshii_river/out_img/predicted/confusion-matrix-kano&yoshii-data.png', dpi=300)
plt.show()

../../../../_images/Content_Project_2022_kano-and-yoshii-river_4-Prediction_on_Kano,_Yoshii_&_Muromi_River_16_0.png

Create raster of predicted data - Kano river

[13]:

# Add one image for projection and shape reference
kano_img = rasterio.open("data/kano_river/out_img/class/kano_class.tiff")

[14]:

# Create new dataframe for predicted class and its index
rfc_class = pd.DataFrame()
rfc_class['id'] = kano_index
rfc_class['class'] = preds_kano

[15]:

# Rearrange index into shape of reference raster
indx = list(range(kano_img.read().reshape(-1).shape[0]))
Index = pd.DataFrame()
Index['id'] = indx
df1 = Index.set_index('id')
df2 = rfc_class.set_index('id')
df2 = rfc_class.set_index(df2.index.astype('int64')).drop(columns=['id'])
mask = df2.index.isin(df1.index)
df1['cluster'] = df2.loc[mask, 'class']

[16]:

# Reshape the cluster array
array = np.array(df1['cluster'])
n_array = array.reshape(kano_img.read().shape)
class_array = np.where(np.isnan(n_array), 0, n_array)

[17]:

# Data dir
data_dir = "data/kano_river/out_img/predicted"

# Output raster
out_tif = os.path.join(data_dir, "kano_predicted.tif")

# Copy the metadata
out_meta = kano_img.meta.copy()
out_meta

# Update the metadata
out_meta.update({'driver': 'GTiff',
                 'dtype': 'float32',
                 'nodata': None,
                 'width': kano_img.shape[1],
                 'height': kano_img.shape[0],
                 'crs': kano_img.crs,
                 'count':1,
                 'transform': kano_img.transform
                })

with rasterio.open(out_tif, "w", **out_meta) as dest:
    dest.write(class_array.astype(np.float32))

Create raster of predicted data - Yoshii river

[31]:

# Add one image for projection and shape reference
yoshii_img = rasterio.open("data/yoshii_river/out_img/class/yoshii_class.tiff")

[32]:

# Create new dataframe for predicted class and its index
rfc_class = pd.DataFrame()
rfc_class['id'] = yoshii_index
rfc_class['class'] = preds_yoshii

[33]:

# Rearrange index into shape of reference raster
indx = list(range(yoshii_img.read().reshape(-1).shape[0]))
Index = pd.DataFrame()
Index['id'] = indx
df1 = Index.set_index('id')
df2 = rfc_class.set_index('id')
df2 = rfc_class.set_index(df2.index.astype('int64')).drop(columns=['id'])
mask = df2.index.isin(df1.index)
df1['cluster'] = df2.loc[mask, 'class']

[34]:

# Reshape the cluster array
array = np.array(df1['cluster'])
n_array = array.reshape(yoshii_img.read().shape)
class_array = np.where(np.isnan(n_array), 0, n_array)

[22]:

# Data dir
data_dir = "data/yoshii_river/out_img/predicted"

# Output raster
out_tif = os.path.join(data_dir, "yoshii_predicted.tif")

# Copy the metadata
out_meta = yoshii_img.meta.copy()
out_meta

# Update the metadata
out_meta.update({'driver': 'GTiff',
                 'dtype': 'float32',
                 'nodata': None,
                 'width': yoshii_img.shape[1],
                 'height': yoshii_img.shape[0],
                 'crs': yoshii_img.crs,
                 'count':1,
                 'transform': yoshii_img.transform
                })

with rasterio.open(out_tif, "w", **out_meta) as dest:
    dest.write(class_array.astype(np.float32))