5. Validation Data

Written by Men Vuthy, 2021


Import packages

[1]:
import os
import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

5.1 Paddy Points

Create raster image based on no-noise data

[2]:
# Read data
DF_NDVI = pd.read_csv('output/3/no_noise_data/no_noise_ndvi.csv')
[3]:
DF_NDVI = DF_NDVI.iloc[4:, :].reset_index(drop=True)
[4]:
NDVI = DF_NDVI.T
NDVI.insert(0, 'id', NDVI.index[:])
[5]:
# Add one image for projection and shape reference
raster = rasterio.open("input/ndvi_2011/2010_01_01.tif")
[6]:
# Re-arrange cluster range
indx = []
for i in range(0,392313):
    indx.append(i)

Index = pd.DataFrame()
Index['id'] = indx

df1 = Index.set_index('id')
df2 = NDVI.set_index('id')
df2 = NDVI.set_index(df2.index.astype('int64')).drop(columns=['id'])
mask = df2.index.isin(df1.index)

for i, j in zip(range(len(df2.columns)), df2.columns):
    df1[j] = df2.loc[mask, j]

Smooth_NDVI = df1
[7]:
# Reshape the cluster array
Array_list = []

for i in range(len(Smooth_NDVI.columns)):
    array = np.array(Smooth_NDVI[i])
    NDVI_array = array.reshape(raster.read().shape)
    Array_list.append(NDVI_array)
[8]:
# Create array of smooth ndvi
NDVI_array = np.array(Array_list)
NDVI_array = np.moveaxis(NDVI_array, [0, 1, 2, 3], [1, 0, 2, 3])
NDVI_array = NDVI_array[0]
NDVI_array.shape
[8]:
(65, 521, 753)

Export image as tiff

[9]:
# Data dir
data_dir = "output/4/no_noise_img"

# Output raster
out_tif = os.path.join(data_dir, "no_noise_ndvi.tif")

# Copy the metadata
out_meta = raster.meta.copy()
out_meta

# Update the metadata
out_meta.update({'driver': 'GTiff',
                 'dtype': 'float32',
                 'nodata': None,
                 'width': raster.shape[1],
                 'height': raster.shape[0],
                 'crs': raster.crs,
                 'count':65,
                 'transform': raster.transform
                })
[10]:
with rasterio.open(out_tif, "w", **out_meta) as dest:
    dest.write(NDVI_array.astype(np.float32))

NDVI timeseries at allpaddy points

[11]:
# Read data
NDVI_img = rasterio.open('output/4/no_noise_img/no_noise_ndvi.tif')
NN_NDVI = NDVI_img.read()
[12]:
plt.imshow(NN_NDVI[0]);
../../../../_images/Content_Project_2021_paddy-area-classification_5._validation-data_18_0.png

Paddy Points

[13]:
paddy = gpd.read_file('input/paddy_point/paddy_points.shp')
[14]:
paddy.plot();
../../../../_images/Content_Project_2021_paddy-area-classification_5._validation-data_21_0.png

Extract ndvi based on paddy points

[15]:
NDVI_List = []

for point in paddy['geometry']:

    for i in range(1, NN_NDVI.shape[0]+1):

        # Select row and column
        x = point.xy[0][0]
        y = point.xy[1][0]

        # Locate x and y of point to get row and col of raster
        row, col = NDVI_img.index(x, y)

        # Extract band value
        ndvi = NDVI_img.read(i)[row,col]

        # Append to list
        NDVI_List.append(ndvi)

[16]:
# Create array of ndvi
ndvi_array = np.array(NDVI_List)

# Reshape array of ndvi
ndvi_reshape = ndvi_array.reshape((len(paddy['geometry']),NN_NDVI.shape[0]))

# Create ndvi dataframe
DF_Validation = pd.DataFrame(ndvi_reshape).T
DF_Validation
[16]:
0 1 2 3 4 5 6 7 8 9 ... 240 241 242 243 244 245 246 247 248 249
0 0.30656 0.30070 0.30872 0.300300 0.37614 0.31070 0.30242 0.29408 0.29544 0.26586 ... 0.30484 0.29380 0.27422 0.27342 0.31036 0.304780 0.31180 0.27698 0.30098 0.29138
1 0.29246 0.28526 0.29392 0.284140 0.32442 0.30164 0.28660 0.27876 0.27948 0.25624 ... 0.29132 0.28620 0.27160 0.26406 0.29812 0.294360 0.29170 0.26322 0.28980 0.27682
2 0.27646 0.26858 0.27812 0.266300 0.28486 0.29054 0.27436 0.25870 0.26574 0.24814 ... 0.27436 0.27408 0.25940 0.25310 0.28879 0.277280 0.27496 0.25308 0.28497 0.26114
3 0.25586 0.25887 0.26424 0.254440 0.26746 0.28112 0.26374 0.24062 0.25246 0.24176 ... 0.26446 0.26924 0.25938 0.25504 0.28456 0.281160 0.27192 0.24560 0.28063 0.25934
4 0.24462 0.24799 0.25640 0.248320 0.25348 0.28048 0.26200 0.23238 0.24636 0.24074 ... 0.25602 0.26310 0.25464 0.24824 0.28728 0.292160 0.27214 0.24758 0.28195 0.25890
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
60 0.41578 0.56593 0.68400 0.680193 0.60364 0.64858 0.67000 0.49599 0.63586 0.54138 ... 0.41742 0.43984 0.28020 0.66948 0.37100 0.314643 0.38560 0.30576 0.38422 0.46544
61 0.48096 0.57599 0.58410 0.556433 0.47780 0.53500 0.54704 0.49667 0.57608 0.58816 ... 0.50790 0.50404 0.36424 0.63130 0.42864 0.380443 0.42784 0.36670 0.45080 0.47698
62 0.53590 0.58783 0.52778 0.427653 0.38446 0.45966 0.44826 0.50061 0.52678 0.60048 ... 0.56946 0.51574 0.37734 0.58740 0.46686 0.452003 0.42184 0.43670 0.51632 0.48336
63 0.56918 0.55715 0.44346 0.378940 0.36458 0.44692 0.39998 0.50132 0.45886 0.63756 ... 0.58910 0.54578 0.42698 0.54918 0.58010 0.549100 0.46714 0.56082 0.57142 0.53050
64 0.57440 0.53922 0.36418 0.380320 0.46214 0.44940 0.39912 0.48398 0.43058 0.57456 ... 0.59070 0.51490 0.45468 0.48188 0.61056 0.603920 0.51218 0.60378 0.56238 0.52114

65 rows × 250 columns

Save result

[17]:
DF_Validation.to_csv('output/4/validation_data/validation_paddy.csv', index = False)

Visualize data

[18]:
DF_Validation = pd.read_csv('output/4/validation_data/validation_paddy.csv')
[19]:
i = 110 # column number or pixel numer
j = i+1

# Plot between noised and smoothed ndvi
fig, ax = plt.subplots(figsize=(12,5))
ax = DF_Validation.iloc[:,i:j].plot(ax=ax)
ax.set_title('2011 NDVI Timeseries')
[19]:
Text(0.5, 1.0, '2011 NDVI Timeseries')
../../../../_images/Content_Project_2021_paddy-area-classification_5._validation-data_29_1.png

5.2 Paddy Classes

Visualize data

[20]:
# Arrange dataframe for plot
DF_Validation = pd.read_csv('output/4/validation_data/validation_paddy.csv').T

Date = pd.read_csv('output/2/date_img/date_img.csv').iloc[:65]

date = []
for i in Date.iloc[:].values:
    date.append(i[0])

DF_Validation.columns = date

DF_Validation = DF_Validation.T
[21]:
i = 206 # column number or pixel numer
j = i+1

# Plot between noised and smoothed ndvi
fig, ax = plt.subplots(figsize=(15,5))
ax = DF_Validation.iloc[:,i:j].plot(ax=ax)
ax.set_title('2011 NDVI Timeseries')
[21]:
Text(0.5, 1.0, '2011 NDVI Timeseries')
../../../../_images/Content_Project_2021_paddy-area-classification_5._validation-data_34_1.png
[22]:
Date = pd.read_csv('output/2/date_img/date_img.csv').iloc[:65]