#!/usr/bin/env python
# coding: utf-8

# ## Problem Statement- 

# LLMs does great role in AI but needed GPUs and heavier computation which is not only costly based on the money but also it creates blocker for democratisation of AI which actually contribute towards the rapid growth of open source AI. Therefore There is a need of system where One can create a billion parameter model or  100 Billion Parameter model or 1 Trillion Parameter Model without the need of GPU or massive RAM or computational resources like with 16GB RAM and i5 processor, one should able to run that model. Thus Kratim Budhimata model provides that key requirement where you can create n number of models inside a single model which provides feasible management of large number of models and you can train and predict selective models which actually contribute for the prediction for particular user query, this way there is  no way to even load all the model into RAM atleast one time. One don't need to load full model anytime that's the magic of this solution. you can run this multimodel system  and just use a classifier to predict which model should be used to answer that query after that you can load only that model. So it can be horizontally scaled rather than vertically meaning instead of having one big model you can have thousands of small models which actually provides better results and solve the hallucination problem also.

# ### Solution 
# 

# 1. Create a Kratim Budhimata model class and initalise it and call it first model
# 2. Train the classification models on the prompts and create the labels based on model number or dataset number. let's say 1 for  text summarisation model in below example
# 3. Save the weights of classification model and summarisation model
# 4. Initalise another instance of Kratim Budhimata model class and call it second model
# 5. Load the trained weight of first model's classification model and summarisation model into Second model's classification
# 6. Predict the class using classification model
# 7. Based on that class, Initalize and load the relevent model to predict the response in our case its summarisation model.
# 8. One can assign none value after saving the weight to the first model for saving resources in production environment or whereever required.

# ### Import

# In[6]:


import tensorflow as kratim_budhimata_tf
import numpy as np
import pandas as pd
import keras
from sklearn.metrics import accuracy_score
from duckduckgo_search import DDGS


# ### Load the Data

# In[8]:


prompts = [
    "Kratim Budhimata is evolving the AI field by Long term innovations which can make real difference.",
    "When it comes to better results in low cost one should connect with Kratim Budhimata.",
    "If one needs best results with low computation limitation then Kratim Budhimata is best place to reach out.",
    "Weather is very unpredictible in most of the areas now a days mostly in rainy seasons.",
    "Relationship is personal thing which needs to be respected  for  privacy.",
    "Global warming is one of the areas where the world should look into it.",
    "Best time to do the Great thing is now",
    "Bravery cannot be replaced by anything which should be present",
    "Food and Health is basic requirement for human being which should be fulfilled for better world",
    "Direction matters more than speed when it comes to take critical decisions."
]

responses = [
    "Kratim Budhimata is evolving AI by cutting edge Innovations",
    "Kratim Budhimata is building innovative solutions cost effectively",
    "Low compute and more results meaning Kratim Budhimata",
    "Weather remains unpredictible when rains",
    "Relationship is private thing which should be respected",
    "Global warming should be addressed",
    "Always do the best thing first.",
    "Bravery matters when going gets tough",
    "Food and Health is need and its not a priviledge",
    "Good direction often leads to the better places"
]


# In[9]:


labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]


# ### Data Preprocessing

# In[11]:


tokenizer = kratim_budhimata_tf.keras.preprocessing.text.Tokenizer(filters='')
tokenizer.fit_on_texts(prompts + responses)
vocab_size = len(tokenizer.word_index) + 1


# In[12]:


max_prompt_len = max(len(p.split()) for p in prompts)
max_summary_len = max(len(r.split()) for r in responses)


# In[13]:


def encoding_function(texts, max_len):
    seqs = tokenizer.texts_to_sequences(texts)
    return kratim_budhimata_tf.keras.preprocessing.sequence.pad_sequences(seqs, maxlen=max_len, padding='post')


# In[14]:


prompts_x = encoding_function(prompts, max_prompt_len)
summary_y = encoding_function(responses, max_summary_len)
summary_onehot_y = kratim_budhimata_tf.keras.utils.to_categorical(summary_y, num_classes=vocab_size)
labels_y = kratim_budhimata_tf.keras.utils.to_categorical(labels, num_classes=2)


# In[15]:


embed_dim = 64
lstm_units = 128


# ### Model Creation

# In[17]:


@keras.src.saving.register_keras_serializable()
class KratimBudhimataModel(kratim_budhimata_tf.keras.Model):
    def __init__(self, model_num=1, model_type="text_summarisation", use_search=False, max_prompt_len=256, max_summary_len=256, lstm_units=128, num_classes=2, vocab_size=1500, embed_dim=128, model_num_list=[1]):
        super().__init__()
        self.model_num=None
        self.model_type=None
        self.use_search=False
        self.max_prompt_len=256
        self.max_summary_len=256
        self.lstm_units=128
        self.num_classes=2
        self.vocab_size = 1000
        self.embed_dim = 128
        self.model_paths = {}
        self.optimizers={}
        self.compiled_ids=set()
        self.trainable = True         
        
    def model_register(self, model_num, model_type, use_search):
        if model_num not in self.model_paths and use_search==False:
            if model_type=="text_classification":                
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(self.max_prompt_len,), name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.lstm_layer = kratim_budhimata_tf.keras.layers.LSTM(self.lstm_units, name=f'LSTM_Layer_{model_num}')(self.embedding_task_number)
                self.classification_outcome = kratim_budhimata_tf.keras.layers.Dense(self.num_classes, activation='softmax', name=f'classifier_dense_{model_num}')(self.lstm_layer)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.classification_outcome, name=f"classification_model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            elif model_type=="text_summarisation":
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(self.max_prompt_len,), name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.encode_lstm_layer = kratim_budhimata_tf.keras.layers.LSTM(self.lstm_units, name=f'LSTM_{model_num}')(self.embedding_task_number)
                self.repeat_vector = kratim_budhimata_tf.keras.layers.RepeatVector(self.max_summary_len, name=f'Repeat_Vector_{model_num}')(self.encode_lstm_layer)
                self.decode_lstm_layer = kratim_budhimata_tf.keras.layers.LSTM(lstm_units, return_sequences=True, name=f'decode_LSTM_{model_num}')(self.repeat_vector)
                self.summary_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'summary_dense_{model_num}')(self.decode_lstm_layer)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.summary_outcome, name=f"summary_model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            elif model_type=="text_to_image":
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number)
                self.image=kratim_budhimata_tf.keras.layers.Dense(224*224*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome)
                self.image_outcome=kratim_budhimata_tf.keras.layers.Reshape((224, 224, 3), name='outcome_image')(self.image)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.image_outcome, name=f"model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            elif model_type=="text_to_video":
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number)
                self.video=kratim_budhimata_tf.keras.layers.Dense(16*112*112*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome)
                self.video_outcome=kratim_budhimata_tf.keras.layers.Reshape((16, 112, 112, 3), name='outcome_video')(self.video)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.video_outcome, name=f"model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            elif model_type=="text_to_image_and_video":
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number)
                self.image=kratim_budhimata_tf.keras.layers.Dense(224*224*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome)
                self.image_outcome=kratim_budhimata_tf.keras.layers.Reshape((224, 224, 3), name='outcome_image')(self.image)
                self.video=kratim_budhimata_tf.keras.layers.Dense(16*112*112*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome)
                self.video_outcome=kratim_budhimata_tf.keras.layers.Reshape((16, 112, 112, 3), name='outcome_video')(self.video)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=[self.image_outcome, self.video_outcome], name=f"model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            elif model_type=="text_to_text_and_image_and_video":
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number)
                self.image=kratim_budhimata_tf.keras.layers.Dense(224*224*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome)
                self.image_outcome=kratim_budhimata_tf.keras.layers.Reshape((224, 224, 3), name='outcome_image')(self.image)
                self.video=kratim_budhimata_tf.keras.layers.Dense(16*112*112*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome)
                self.video_outcome=kratim_budhimata_tf.keras.layers.Reshape((16, 112, 112, 3), name='outcome_video')(self.video)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=[self.text_outcome, self.image_outcome, self.video_outcome], name=f"model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            else:
                print(f"Registered Model Num: {model_num} ")
                self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}')
                self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids)
                self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number)
                self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.text_outcome, name=f"model_{model_num}")            
                self.model_paths[model_num]=self.model
                self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002)
            
        if use_search==True:
            with DDGS() as ddgs:
                results = ddgs.text(query)
                return results[0]['body'] if results else "No response found from search."
                
                
    def model_creation_process(self, model_num, model_type, use_search):
        self.model_register(model_num, model_type, use_search)
        self.model_num=model_num
        m_path=self.model_paths.get(model_num)
        if self.model_num not in self.compiled_ids:
            self.compile(
                optimizer=self.optimizers[model_num],
                loss={
                    'classification_outcome': 'categorical_crossentropy',
                    'summary_outcome': 'categorical_crossentropy',
                    'text_outcome': 'sparse_categorical_crossentropy',
                    'image_output': 'mse',
                    'video_output': 'mse'
                    },
                metrics=['accuracy', 'accuracy','accuracy','accuracy','accuracy']
            )
            print(self.optimizers[model_num])
            print(f"compilation done for {model_num}")
            self.compiled_ids.add(model_num)
        
    def call(self, inputs, training=False):
        if self.model_num is None:
            print("please provdie model number.")
        else:
            return self.model_paths[self.model_num]
        
    def get_config(self):
        return {
            "model_num": self.model_num,
            #"model_paths":self.model_paths
        }                   
        
    @classmethod
    def from_config(cls, config):
        return cls(**config)                


# ### Model Initialization

# In[19]:


first_model = KratimBudhimataModel()
first_model.use_search=False
first_model.num_classes=2
first_model.vocab_size=vocab_size
first_model.embed_dim=embed_dim
first_model.lstm_units=lstm_units
first_model.max_prompt_len=max_prompt_len
first_model.max_summary_len=max_summary_len


# ### Model Number 1 is for classification and 2 is for Summarisation. Here Model Number is created because in case of  multiple models we can load the relvent model which can answer the particular user query by first using classification model to classify the label which is model number then use that label to load the relevent model number  model to predict the response for that query in that way two steps would be inculded in prediction.

# In[21]:


first_model.model_creation_process(1, model_type="text_classification", use_search=False)


# In[22]:


first_model.model_creation_process(2, model_type="text_summarisation", use_search=False)
first_model.summary()


# ### Model Exploration

# In[24]:


first_model.layers


# ### Classification Model

# In[26]:


model_classification=first_model.layers[0]
model_classification


# ### Summarisation Model

# In[28]:


model_summary=first_model.layers[1]
model_summary


# In[29]:


model_classification.compile(
                optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy']
            )


# In[30]:


model_summary.compile(
                optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy']
            )


# ### Classification Model Training

# In[32]:


model_classification.fit(prompts_x, labels_y, epochs=200)


# ### Summarisation Model Training

# In[34]:


model_summary.fit(prompts_x, summary_onehot_y, epochs=300)


# ### Classification Model Summary

# In[36]:


model_classification.summary()


# ### Summarisation Model Summary

# In[38]:


model_summary.summary()


# ### Classification Model Weight Saving

# In[40]:


model_classification.save_weights("model_classification_trained.weights.h5")


# ### Summarisation Model Weight Saving

# In[42]:


model_summary.save_weights("model_summary_trained.weights.h5")


# ### Second Model Creation and classification model initialisation and Classification Model Weights Loading

# #### Config parameter would  be same as  first model because  we need to load the first model weights

# In[45]:


second_model=KratimBudhimataModel()
second_model.use_search=False
second_model.num_classes=2
second_model.vocab_size=vocab_size
second_model.embed_dim=embed_dim
second_model.lstm_units=lstm_units
second_model.max_prompt_len=max_prompt_len
second_model.max_summary_len=max_summary_len


# In[46]:


second_model.model_creation_process(1, model_type="text_classification", use_search=False)
second_model.summary()


# ### Load Classification Model Trained and Saved Weights in Second Model 0 layer for Prediction

# In[48]:


second_model.layers[0].load_weights("model_classification_trained.weights.h5")
second_model.layers[0].summary()


# ### Prediction - First Classification Model is  used  to  predict the class  of the prompt based on that relevent model would be used for prediction like if class would be 1 then summarisation model would be called. Thus only relevent model would be used for prediction which will save RAM and CPU/GPU computation in huge amount.

# In[50]:


index_word = {i: w for w, i in tokenizer.word_index.items()}
def decoder_function_sequence(seq):
    return ' '.join([index_word.get(i, '') for i in seq if i != 0])


# In[51]:


prediction_class = second_model.layers[0].predict(prompts_x)


# ### Predicted Classes

# ### Classification Model Accuracy

# In[54]:


predicted_labels = np.argmax(prediction_class, axis=1)
#true_classes = np.argmax(labels, axis=0)


# In[55]:


true_classes=np.array(labels)


# In[56]:


classification_model_accuracy = accuracy_score(true_classes, predicted_labels)


# In[57]:


classification_model_accuracy


# ### Check which response has 1 class and create subset of prompts which has 1 classes

# In[59]:


true_classes


# In[60]:


#for the labels have value 1


# In[61]:


predicted_labels


# ### First 5 labels are 1 class it means these prompts can be predicted with summarisation model. So create the subset of prompts for prediction using Summarisation model

# In[63]:


second_model.model_creation_process(2, model_type="text_summarisation", use_search=False)


# ### Load Summarisation Model into Second Model 1 layer which was trained and saved weights by first model 

# ### Second Model- Summarisation model(layer) initialisation and Load weight of first model's trained  summarisation model into this model for prediction

# In[66]:


second_model.layers[1].load_weights("model_summary_trained.weights.h5")


# ### Create a subset of prompts which have label as 1

# In[68]:


prompts_x[0:5]


# In[69]:


summary_y[0:5]


# ### Summarisation Model Prediction

# In[71]:


prediction_summary_probs = second_model.layers[1].predict(prompts_x[0:5])
predicted_summary_indices = np.argmax(prediction_summary_probs, axis=-1)
predicted_summary_texts = [decoder_function_sequence(seq) for seq in predicted_summary_indices]


# In[72]:


predicted_summary_texts


# In[73]:


actual_summary_texts = [decoder_function_sequence(seq) for seq in summary_y[0:5]]
actual_summary_texts


# ### Summarisation  Model Accuracy

# In[75]:


summary_match_outcome = [p.strip() == t.strip() for p, t in zip(predicted_summary_texts, actual_summary_texts)]
summary_accuracy = sum(summary_match_outcome) / len(summary_match_outcome)
print("Summary Model Accuracy (Pass@1):", round(summary_accuracy * 100, 2), "%")


# In[76]:


print("\n Predicted and Actual Comparision")
for i in range(len(prompts[0:5])):
    print(f"- Prompt: {prompts[i]}")
    print(f"  Predicted Class: {predicted_labels[i]} | Actual Class: {true_classes[i]}")
    print(f"  Predicted Summary: {predicted_summary_texts[i]}")
    print(f"  Actual Summary     : {actual_summary_texts[i]}")
    print(f"  Summary Match Outcome    : {'True' if summary_match_outcome[i] else 'False'}\n")


# ## Summary model accuracy for all prompts

# In[78]:


prediction_summary_probs = second_model.layers[1].predict(prompts_x)


# In[79]:


predicted_summary_indices = np.argmax(prediction_summary_probs, axis=-1)
predicted_summary_texts = [decoder_function_sequence(seq) for seq in predicted_summary_indices]


# In[80]:


actual_summary_texts = [decoder_function_sequence(seq) for seq in summary_y]


# In[81]:


predicted_summary_texts


# In[82]:


actual_summary_texts


# In[83]:


summary_match_outcome = [p.strip() == t.strip() for p, t in zip(predicted_summary_texts, actual_summary_texts)]
summary_accuracy = sum(summary_match_outcome) / len(summary_match_outcome)
print("Summary Model Accuracy (Pass@1):", round(summary_accuracy * 100, 2), "%")


# In[84]:


print("\n Predicted and Actual Comparision")
for i in range(len(prompts)):
    print(f"- Prompt: {prompts[i]}")
    print(f"  Predicted Class: {predicted_labels[i]} | Actual Class: {true_classes[i]}")
    print(f"  Predicted Summary: {predicted_summary_texts[i]}")
    print(f"  Actual Summary     : {actual_summary_texts[i]}")
    print(f"  Summary Match Outcome    : {'True' if summary_match_outcome[i] else 'False'}\n")


# In[ ]:


# In[ ]: