import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import ConcatDataset, Subset

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping

import matplotlib.pyplot as plt

from sklearn import tree
from sklearn import neighbors
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score

from functions.helper_basic import *
from functions.loader_basic import *
from functions.writhe_code import *

import itertools
import pickle
from pathlib import Path

knot = ### FINISH ME ###

file = f"data/XYZ/XYZ_{knot}_small.dat.nos"
data = np.loadtxt(file)

print(f"The data has the following dimensions:  {### FINISH ME ###}")
print(f"The first row in the data is given: {### FINISH ME ###}")

Nbeads = ### FINISH ME ###
n_cols = ### FINISH ME ###

data = data.reshape(-1, ### FINISH ME ###, ### FINISH ME ###)

sample = 0

for i in range(0, 5):
    fig, axes = plt.subplots(1, 3, figsize=(12, 4))
    # Loop through each subplot

    axes_1 = fig.add_subplot(1, 3, 1, projection='3d') 

    single_knot = data[sample]

    single_knot_x = single_knot[:, 0]
    single_knot_y = single_knot[:, 1]
    single_knot_z = single_knot[:, 2]

    axes_1.plot(single_knot_x, single_knot_y, single_knot_z, color='b')

    last_seg_x = [single_knot_x[-1], single_knot_x[0]]
    last_seg_y = [single_knot_y[-1], single_knot_y[0]]
    last_seg_z = [single_knot_z[-1], single_knot_z[0]]

    axes_1.plot(last_seg_x, last_seg_y, last_seg_z, color='b')
    axes[0].set_title(f'Embedding {sample} of {knot}')

    axes[0].set_xticks([])
    axes[0].set_yticks([])


    writhe_matrix = compute_sts_writhe(single_knot, single_knot, 10)
    total = np.sum(writhe_matrix)

    im = axes[1].imshow(writhe_matrix)  # Display the matrix
    axes[1].set_title(f'StS writhe')


    STA = []
    for i in writhe_matrix:
        STA_point = 0

        for j in i:
            STA_point += j

        STA.append(STA_point)

    axes[2].plot(STA)
    axes[2].set_title(f"StA writhe")


    sample += 10

plt.show()

class FFNNModel(nn.Module):

    def __init__(self, ### FINISH ME ###, ### FINISH ME ###):
        super(FFNNModel, self).__init__()

        self.flatten_layer = nn.Flatten()
        self.dense_layer1 = nn.Linear(### FINISH ME ###, ### FINISH ME ###)
        self.dense_layer2 = nn.Linear(### FINISH ME ###, ### FINISH ME ###)
        self.output_layer = nn.Linear(### FINISH ME ###, ### FINISH ME ###)

    def forward(self, x):

        x = self.flatten_layer(x)
        x = F.relu(### FINISH ME ###)
        x = ### FINISH ME ###

        x = ### FINISH ME ###

        return x

class NN(pl.LightningModule):
    def __init__(self, model, loss, opt):
        super().__init__()
        
        self.model = model
        self.loss = loss
        self.optimiser = opt

    def forward(self, x):
        # apply model layers
        x = self.model(x)
        return x

    def training_step(self, batch, batch_idx, loss_name = 'train_loss'):
        # training
        ### FINISH ME ### = batch
        z = self.forward(### FINISH ME ###)
        loss = self.loss(### FINISH ME ###)
        self.log(loss_name, loss, on_epoch=True, on_step=True)
        return loss
    
    def validation_step(self, batch, batch_idx, loss_name = 'val_loss'):
        # validation
        ### FINISH ME ### = batch
        z = ### FINISH ME ###
        loss = ### FINISH ME ###
        self.log(loss_name, loss, prog_bar=True, on_epoch=True, on_step=True)
        return loss
    
    def test_step(self, batch, batch_idx, loss_name ='test_loss'):

        # test
        # Now, revisit the structure we had for training and validation steps and replicate, specifically we need to pass the batch
        # and its x, y data into a forward pass and evaluate the loss

        ### FINISH ME ###

        _, predicted = torch.max(z.data, 1) 
        test_acc = torch.sum(y == predicted).item() / (len(y)*1.0) 

        # log outputs
        self.log_dict({'test_loss': loss, 'test_acc': test_acc})

        return loss

    def configure_optimizers(self):
        return self.optimiser

dtype = "XYZ"
knots = ["0_1", "3_1", "4_1", "5_1", "5_2"]

input_shape = ### FINISH ME ###
output_shape = ### FINISH ME ###

FFNN = FFNNModel(input_shape, output_shape)
loss_fn = ### FINISH ME ### (Make the appropriate loss function choice for a CLASSIFICATION problem: refer to PyTorch documentation if unsure)
optimizer = optim.Adam(FFNN.parameters(), lr=### FINISH ME ###)

def train(model, loss_fn, optimizer, train_loader, val_loader, test_loader, epochs):

    neural = NN(model=model, loss=loss_fn, opt=optimizer)

    # Early stopping defines metrics to ensure model doesn't overfit
    # min_delta is the minimum change in validation loss we require to say that the model has decreased loss in an epoch
    # patience is how many epochs we allow the model to run if the val_loss isnt going down. This count is reset each time the val_loss goes down
    # both these parameters will allow for better convergence

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=### FINISH ME ###, patience=### FINISH ME ###, verbose=True, mode="min")
    trainer = Trainer(max_epochs=epochs, limit_train_batches=250, callbacks=[early_stop_callback])  # steps per epoch = 250

    # trainer.fit() fits data using training data and evaluates training using validation data
    # had the input requirements (pytorch model, training data, validation data)

    trainer.fit(### FINISH ME ###, ### FINISH ME ###, ### FINISH ME ###)
                
    # trainer.test() tests data using test data 
    # requires the testing data

    trainer.test(dataloaders= ### FINISH ME ###)

    all_predicted = []
    all_y = []

    for x, y in test_loader:
        z = neural.forward(x)

        _, predicted = torch.max(z.data, 1) 
        test_acc = torch.sum(y == predicted).item() / (len(y)*1.0) 

        predicted_np = predicted.cpu().numpy()
        y_np = y.cpu().numpy()

        # Accumulate predictions
        all_predicted.extend(predicted_np)
        all_y.extend(y_np)

    # Calculate confusion matrix over all batches
    conf_mat = confusion_matrix(all_y, all_predicted)
    ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=knots).plot(include_values=False, cmap='Blues')

len_db = 1000
bs = 256
directory = "data"
device = torch.device("cpu") 
datasets = []

for i, knot in enumerate(knots): 
    indicies = np.arange(0, len_db)
    datasets.append(Subset(KnotDataset(directory, dtype, knot, Nbeads, i), indicies))

dataset = ConcatDataset(datasets)

ninputs = len(dataset)

# Make an effective train/val/test split
train_len = int(ninputs * (### FINISH ME ###)) 
test_len = int(ninputs * (### FINISH ME ###))
val_len = ninputs - (train_len + test_len)

train_dataset, test_dataset, val_dataset = split_train_test_validation(dataset, train_len, test_len, val_len, bs)

train(model = FFNN, loss_fn = loss_fn, optimizer = optimizer, train_loader = train_dataset, val_loader = val_dataset, test_loader= test_dataset, epochs = ### FINISH ME ###)

dtype = "SIGWRITHE" # loading StA
knots = ["0_1", "3_1", "4_1", "5_1", "5_2"]

input_shape = ### FINISH ME ###
output_shape = ### FINISH ME ###

FFNN = FFNNModel(### FINISH ME ###, ### FINISH ME ###)
loss_fn = ### FINISH ME ###
optimizer_ffnn = optim.Adam(FFNN.parameters(), lr=### FINISH ME ###)

datasets = []

for i, knot in enumerate(knots): 
    indicies = np.arange(0, len_db)
    datasets.append(Subset(KnotDataset(directory, dtype, knot, Nbeads, i), indicies))

dataset = ConcatDataset(datasets)

ninputs = len(dataset)

# Make an effective train/val/test split
train_len = int(ninputs * 0.9)
test_len = int(ninputs * 0.075)
val_len = ninputs - (train_len + test_len)

train_dataset, test_dataset, val_dataset = split_train_test_validation(dataset, train_len, test_len, val_len, bs)

train(### FINISH ME ###)

class LSTMModel(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(LSTMModel, self).__init__()

        self.hidden_size = ### FINISH ME ###
        self.num_layers = ### FINISH ME ###
        self.seq = self.inp = input_shape[0]

        self.lstm = nn.LSTM(input_shape[1], self.hidden_size, self.num_layers, batch_first=True, bidirectional= False)
        self.fc = nn.Linear(### FINISH ME ###, ### FINISH ME ###)

    def forward(self, x):

        hidden = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) 
        cell = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (hidden, cell)) 
        out = self.fc(out[:, -1, :])  
        
        return out

LSTM = LSTMModel(### FINISH ME ###, ### FINISH ME ###)
loss_fn = ### FINISH ME ###
optimizer_lstm = optim.Adam(LSTM.parameters(), lr=### FINISH ME ###)

train(### FINISH ME ###)

class AlternativeML:
    '''
    
    This class builds a Decision Tree classifier (https://scikit-learn.org/stable/modules/tree.html)
    The __init__ function will ensure that our pre-loaded data is of the right shapes and data type; for our purposes we will ignore it.
    The classification function is where our Decision Tree code lives, all of the details are included on the link above and we just need to provide the training
    and testing data as required.

    '''
    def __init__(self, train_data, test_data):
        self.X_train = []
        self.y_train = []
        self.train_data = train_data
        for X, y in self.train_data:
            X = [i.numpy().flatten() for i in X]
            self.X_train.append(X)
            self.y_train.append(y.numpy())

        self.X_train = list(itertools.chain.from_iterable(self.X_train))
        self.y_train = list(itertools.chain.from_iterable(self.y_train))

        self.X_test = []
        self.y_test = []
        self.test_data = test_data
        for X, y in self.test_data:
            X = [i.numpy().flatten() for i in X]
            self.X_test.append(X)
            self.y_test.append(y.numpy())

        self.X_test = list(itertools.chain.from_iterable(self.X_test))
        self.y_test = list(itertools.chain.from_iterable(self.y_test))

    def DTclassifier(self):

        print("training decision tree model...")
        clf = tree.DecisionTreeClassifier(max_depth=### FINISH ME ###)
        clf = clf.fit(### FINISH ME ###, ### FINISH ME ###)
        filename = f'DT.sav'
        pickle.dump(clf, open(filename, 'wb'))

        print("loading decision tree model...")

        clf = pickle.load(open(f'DT.sav', 'rb'))

        y_pred = clf.predict(### FINISH ME ###, ### FINISH ME ###)
        score = accuracy_score(y_pred=### FINISH ME ###, y_true=### FINISH ME ###)
        conf_mat = confusion_matrix(y_pred=### FINISH ME ###, y_true=### FINISH ME ###)
        print(f"DT Accuracy: {score*100}%")
        print(conf_mat)
        ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=knots).plot(include_values=False, cmap='Blues')
        plt.show()

    def KNNclassifier(self):
        
        print("training...")
        clf = neighbors.KNeighborsClassifier(n_neighbors= ### FINISH ME ###)
        clf = clf.fit(### FINISH ME ###, ### FINISH ME ###)
        filename = f'KNN.sav'
        pickle.dump(clf, open(filename, 'wb'))

        print("loading model...")

        clf = pickle.load(open(f'KNN.sav', 'rb'))

        y_pred = clf.predict(### FINISH ME ###)
        score = accuracy_score(y_pred=### FINISH ME ###, y_true=### FINISH ME ###)
        conf_mat = confusion_matrix(y_pred=### FINISH ME ###, y_true=### FINISH ME ###)
        print(f"KNN Accuracy: {score*100}%")
        print(conf_mat)
        ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=knots).plot(include_values=False, cmap='Blues')
        plt.show()

AltML = AlternativeML(### FINISH ME ###, ### FINISH ME ###)
AltML.DTclassifier()

AltML.KNNclassifier()

Machine Learning - Knot Classification¶

Author of this Jupyter Notebook: Djordje Mihajlovic¶

Package loading¶

Basic Data Loading¶

Data visualization¶

Neural Networks¶

Training, Validating and Testing¶

Initializing our newly built model¶

Avoiding overfitting¶

Putting everything together¶

Trying another input¶

Retrain as required¶

Using "better" ML: LSTMs¶

Using "worse" ML: DT and KNN¶

Decision Trees¶

K-Nearest Neighbours¶