#!/usr/bin/env python # coding: utf-8 # ## Problem Statement- # LLMs does great role in AI but needed GPUs and heavier computation which is not only costly based on the money but also it creates blocker for democratisation of AI which actually contribute towards the rapid growth of open source AI. Therefore There is a need of system where One can create a billion parameter model or 100 Billion Parameter model or 1 Trillion Parameter Model without the need of GPU or massive RAM or computational resources like with 16GB RAM and i5 processor, one should able to run that model. Thus Kratim Budhimata model provides that key requirement where you can create n number of models inside a single model which provides feasible management of large number of models and you can train and predict selective models which actually contribute for the prediction for particular user query, this way there is no way to even load all the model into RAM atleast one time. One don't need to load full model anytime that's the magic of this solution. you can run this multimodel system and just use a classifier to predict which model should be used to answer that query after that you can load only that model. So it can be horizontally scaled rather than vertically meaning instead of having one big model you can have thousands of small models which actually provides better results and solve the hallucination problem also. # ### Solution # # 1. Create a Kratim Budhimata model class and initalise it and call it first model # 2. Train the classification models on the prompts and create the labels based on model number or dataset number. let's say 1 for text summarisation model in below example # 3. Save the weights of classification model and summarisation model # 4. Initalise another instance of Kratim Budhimata model class and call it second model # 5. Load the trained weight of first model's classification model and summarisation model into Second model's classification # 6. Predict the class using classification model # 7. Based on that class, Initalize and load the relevent model to predict the response in our case its summarisation model. # 8. One can assign none value after saving the weight to the first model for saving resources in production environment or whereever required. # ### Import # In[6]: import tensorflow as kratim_budhimata_tf import numpy as np import pandas as pd import keras from sklearn.metrics import accuracy_score from duckduckgo_search import DDGS # ### Load the Data # In[8]: prompts = [ "Kratim Budhimata is evolving the AI field by Long term innovations which can make real difference.", "When it comes to better results in low cost one should connect with Kratim Budhimata.", "If one needs best results with low computation limitation then Kratim Budhimata is best place to reach out.", "Weather is very unpredictible in most of the areas now a days mostly in rainy seasons.", "Relationship is personal thing which needs to be respected for privacy.", "Global warming is one of the areas where the world should look into it.", "Best time to do the Great thing is now", "Bravery cannot be replaced by anything which should be present", "Food and Health is basic requirement for human being which should be fulfilled for better world", "Direction matters more than speed when it comes to take critical decisions." ] responses = [ "Kratim Budhimata is evolving AI by cutting edge Innovations", "Kratim Budhimata is building innovative solutions cost effectively", "Low compute and more results meaning Kratim Budhimata", "Weather remains unpredictible when rains", "Relationship is private thing which should be respected", "Global warming should be addressed", "Always do the best thing first.", "Bravery matters when going gets tough", "Food and Health is need and its not a priviledge", "Good direction often leads to the better places" ] # In[9]: labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # ### Data Preprocessing # In[11]: tokenizer = kratim_budhimata_tf.keras.preprocessing.text.Tokenizer(filters='') tokenizer.fit_on_texts(prompts + responses) vocab_size = len(tokenizer.word_index) + 1 # In[12]: max_prompt_len = max(len(p.split()) for p in prompts) max_summary_len = max(len(r.split()) for r in responses) # In[13]: def encoding_function(texts, max_len): seqs = tokenizer.texts_to_sequences(texts) return kratim_budhimata_tf.keras.preprocessing.sequence.pad_sequences(seqs, maxlen=max_len, padding='post') # In[14]: prompts_x = encoding_function(prompts, max_prompt_len) summary_y = encoding_function(responses, max_summary_len) summary_onehot_y = kratim_budhimata_tf.keras.utils.to_categorical(summary_y, num_classes=vocab_size) labels_y = kratim_budhimata_tf.keras.utils.to_categorical(labels, num_classes=2) # In[15]: embed_dim = 64 lstm_units = 128 # ### Model Creation # In[17]: @keras.src.saving.register_keras_serializable() class KratimBudhimataModel(kratim_budhimata_tf.keras.Model): def __init__(self, model_num=1, model_type="text_summarisation", use_search=False, max_prompt_len=256, max_summary_len=256, lstm_units=128, num_classes=2, vocab_size=1500, embed_dim=128, model_num_list=[1]): super().__init__() self.model_num=None self.model_type=None self.use_search=False self.max_prompt_len=256 self.max_summary_len=256 self.lstm_units=128 self.num_classes=2 self.vocab_size = 1000 self.embed_dim = 128 self.model_paths = {} self.optimizers={} self.compiled_ids=set() self.trainable = True def model_register(self, model_num, model_type, use_search): if model_num not in self.model_paths and use_search==False: if model_type=="text_classification": print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(self.max_prompt_len,), name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.lstm_layer = kratim_budhimata_tf.keras.layers.LSTM(self.lstm_units, name=f'LSTM_Layer_{model_num}')(self.embedding_task_number) self.classification_outcome = kratim_budhimata_tf.keras.layers.Dense(self.num_classes, activation='softmax', name=f'classifier_dense_{model_num}')(self.lstm_layer) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.classification_outcome, name=f"classification_model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) elif model_type=="text_summarisation": print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(self.max_prompt_len,), name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.encode_lstm_layer = kratim_budhimata_tf.keras.layers.LSTM(self.lstm_units, name=f'LSTM_{model_num}')(self.embedding_task_number) self.repeat_vector = kratim_budhimata_tf.keras.layers.RepeatVector(self.max_summary_len, name=f'Repeat_Vector_{model_num}')(self.encode_lstm_layer) self.decode_lstm_layer = kratim_budhimata_tf.keras.layers.LSTM(lstm_units, return_sequences=True, name=f'decode_LSTM_{model_num}')(self.repeat_vector) self.summary_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'summary_dense_{model_num}')(self.decode_lstm_layer) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.summary_outcome, name=f"summary_model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) elif model_type=="text_to_image": print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number) self.image=kratim_budhimata_tf.keras.layers.Dense(224*224*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome) self.image_outcome=kratim_budhimata_tf.keras.layers.Reshape((224, 224, 3), name='outcome_image')(self.image) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.image_outcome, name=f"model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) elif model_type=="text_to_video": print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number) self.video=kratim_budhimata_tf.keras.layers.Dense(16*112*112*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome) self.video_outcome=kratim_budhimata_tf.keras.layers.Reshape((16, 112, 112, 3), name='outcome_video')(self.video) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.video_outcome, name=f"model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) elif model_type=="text_to_image_and_video": print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number) self.image=kratim_budhimata_tf.keras.layers.Dense(224*224*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome) self.image_outcome=kratim_budhimata_tf.keras.layers.Reshape((224, 224, 3), name='outcome_image')(self.image) self.video=kratim_budhimata_tf.keras.layers.Dense(16*112*112*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome) self.video_outcome=kratim_budhimata_tf.keras.layers.Reshape((16, 112, 112, 3), name='outcome_video')(self.video) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=[self.image_outcome, self.video_outcome], name=f"model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) elif model_type=="text_to_text_and_image_and_video": print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number) self.image=kratim_budhimata_tf.keras.layers.Dense(224*224*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome) self.image_outcome=kratim_budhimata_tf.keras.layers.Reshape((224, 224, 3), name='outcome_image')(self.image) self.video=kratim_budhimata_tf.keras.layers.Dense(16*112*112*3, activation='sigmoid', name=f"image_dense_{model_num}")(self.text_outcome) self.video_outcome=kratim_budhimata_tf.keras.layers.Reshape((16, 112, 112, 3), name='outcome_video')(self.video) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=[self.text_outcome, self.image_outcome, self.video_outcome], name=f"model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) else: print(f"Registered Model Num: {model_num} ") self.input_ids = kratim_budhimata_tf.keras.Input(shape=(None,), dtype='int32', name=f'input_ids_{model_num}') self.embedding_task_number = kratim_budhimata_tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, name=f'embedding_task_number_{model_num}')(self.input_ids) self.text_outcome = kratim_budhimata_tf.keras.layers.Dense(self.vocab_size, activation='softmax', name=f'text_output_{model_num}')(self.embedding_task_number) self.model = kratim_budhimata_tf.keras.models.Model(inputs=self.input_ids, outputs=self.text_outcome, name=f"model_{model_num}") self.model_paths[model_num]=self.model self.optimizers[model_num]=kratim_budhimata_tf.keras.optimizers.Adam(learning_rate=0.002) if use_search==True: with DDGS() as ddgs: results = ddgs.text(query) return results[0]['body'] if results else "No response found from search." def model_creation_process(self, model_num, model_type, use_search): self.model_register(model_num, model_type, use_search) self.model_num=model_num m_path=self.model_paths.get(model_num) if self.model_num not in self.compiled_ids: self.compile( optimizer=self.optimizers[model_num], loss={ 'classification_outcome': 'categorical_crossentropy', 'summary_outcome': 'categorical_crossentropy', 'text_outcome': 'sparse_categorical_crossentropy', 'image_output': 'mse', 'video_output': 'mse' }, metrics=['accuracy', 'accuracy','accuracy','accuracy','accuracy'] ) print(self.optimizers[model_num]) print(f"compilation done for {model_num}") self.compiled_ids.add(model_num) def call(self, inputs, training=False): if self.model_num is None: print("please provdie model number.") else: return self.model_paths[self.model_num] def get_config(self): return { "model_num": self.model_num, #"model_paths":self.model_paths } @classmethod def from_config(cls, config): return cls(**config) # ### Model Initialization # In[19]: first_model = KratimBudhimataModel() first_model.use_search=False first_model.num_classes=2 first_model.vocab_size=vocab_size first_model.embed_dim=embed_dim first_model.lstm_units=lstm_units first_model.max_prompt_len=max_prompt_len first_model.max_summary_len=max_summary_len # ### Model Number 1 is for classification and 2 is for Summarisation. Here Model Number is created because in case of multiple models we can load the relvent model which can answer the particular user query by first using classification model to classify the label which is model number then use that label to load the relevent model number model to predict the response for that query in that way two steps would be inculded in prediction. # In[21]: first_model.model_creation_process(1, model_type="text_classification", use_search=False) # In[22]: first_model.model_creation_process(2, model_type="text_summarisation", use_search=False) first_model.summary() # ### Model Exploration # In[24]: first_model.layers # ### Classification Model # In[26]: model_classification=first_model.layers[0] model_classification # ### Summarisation Model # In[28]: model_summary=first_model.layers[1] model_summary # In[29]: model_classification.compile( optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'] ) # In[30]: model_summary.compile( optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'] ) # ### Classification Model Training # In[32]: model_classification.fit(prompts_x, labels_y, epochs=200) # ### Summarisation Model Training # In[34]: model_summary.fit(prompts_x, summary_onehot_y, epochs=300) # ### Classification Model Summary # In[36]: model_classification.summary() # ### Summarisation Model Summary # In[38]: model_summary.summary() # ### Classification Model Weight Saving # In[40]: model_classification.save_weights("model_classification_trained.weights.h5") # ### Summarisation Model Weight Saving # In[42]: model_summary.save_weights("model_summary_trained.weights.h5") # ### Second Model Creation and classification model initialisation and Classification Model Weights Loading # #### Config parameter would be same as first model because we need to load the first model weights # In[45]: second_model=KratimBudhimataModel() second_model.use_search=False second_model.num_classes=2 second_model.vocab_size=vocab_size second_model.embed_dim=embed_dim second_model.lstm_units=lstm_units second_model.max_prompt_len=max_prompt_len second_model.max_summary_len=max_summary_len # In[46]: second_model.model_creation_process(1, model_type="text_classification", use_search=False) second_model.summary() # ### Load Classification Model Trained and Saved Weights in Second Model 0 layer for Prediction # In[48]: second_model.layers[0].load_weights("model_classification_trained.weights.h5") second_model.layers[0].summary() # ### Prediction - First Classification Model is used to predict the class of the prompt based on that relevent model would be used for prediction like if class would be 1 then summarisation model would be called. Thus only relevent model would be used for prediction which will save RAM and CPU/GPU computation in huge amount. # In[50]: index_word = {i: w for w, i in tokenizer.word_index.items()} def decoder_function_sequence(seq): return ' '.join([index_word.get(i, '') for i in seq if i != 0]) # In[51]: prediction_class = second_model.layers[0].predict(prompts_x) # ### Predicted Classes # ### Classification Model Accuracy # In[54]: predicted_labels = np.argmax(prediction_class, axis=1) #true_classes = np.argmax(labels, axis=0) # In[55]: true_classes=np.array(labels) # In[56]: classification_model_accuracy = accuracy_score(true_classes, predicted_labels) # In[57]: classification_model_accuracy # ### Check which response has 1 class and create subset of prompts which has 1 classes # In[59]: true_classes # In[60]: #for the labels have value 1 # In[61]: predicted_labels # ### First 5 labels are 1 class it means these prompts can be predicted with summarisation model. So create the subset of prompts for prediction using Summarisation model # In[63]: second_model.model_creation_process(2, model_type="text_summarisation", use_search=False) # ### Load Summarisation Model into Second Model 1 layer which was trained and saved weights by first model # ### Second Model- Summarisation model(layer) initialisation and Load weight of first model's trained summarisation model into this model for prediction # In[66]: second_model.layers[1].load_weights("model_summary_trained.weights.h5") # ### Create a subset of prompts which have label as 1 # In[68]: prompts_x[0:5] # In[69]: summary_y[0:5] # ### Summarisation Model Prediction # In[71]: prediction_summary_probs = second_model.layers[1].predict(prompts_x[0:5]) predicted_summary_indices = np.argmax(prediction_summary_probs, axis=-1) predicted_summary_texts = [decoder_function_sequence(seq) for seq in predicted_summary_indices] # In[72]: predicted_summary_texts # In[73]: actual_summary_texts = [decoder_function_sequence(seq) for seq in summary_y[0:5]] actual_summary_texts # ### Summarisation Model Accuracy # In[75]: summary_match_outcome = [p.strip() == t.strip() for p, t in zip(predicted_summary_texts, actual_summary_texts)] summary_accuracy = sum(summary_match_outcome) / len(summary_match_outcome) print("Summary Model Accuracy (Pass@1):", round(summary_accuracy * 100, 2), "%") # In[76]: print("\n Predicted and Actual Comparision") for i in range(len(prompts[0:5])): print(f"- Prompt: {prompts[i]}") print(f" Predicted Class: {predicted_labels[i]} | Actual Class: {true_classes[i]}") print(f" Predicted Summary: {predicted_summary_texts[i]}") print(f" Actual Summary : {actual_summary_texts[i]}") print(f" Summary Match Outcome : {'True' if summary_match_outcome[i] else 'False'}\n") # ## Summary model accuracy for all prompts # In[78]: prediction_summary_probs = second_model.layers[1].predict(prompts_x) # In[79]: predicted_summary_indices = np.argmax(prediction_summary_probs, axis=-1) predicted_summary_texts = [decoder_function_sequence(seq) for seq in predicted_summary_indices] # In[80]: actual_summary_texts = [decoder_function_sequence(seq) for seq in summary_y] # In[81]: predicted_summary_texts # In[82]: actual_summary_texts # In[83]: summary_match_outcome = [p.strip() == t.strip() for p, t in zip(predicted_summary_texts, actual_summary_texts)] summary_accuracy = sum(summary_match_outcome) / len(summary_match_outcome) print("Summary Model Accuracy (Pass@1):", round(summary_accuracy * 100, 2), "%") # In[84]: print("\n Predicted and Actual Comparision") for i in range(len(prompts)): print(f"- Prompt: {prompts[i]}") print(f" Predicted Class: {predicted_labels[i]} | Actual Class: {true_classes[i]}") print(f" Predicted Summary: {predicted_summary_texts[i]}") print(f" Actual Summary : {actual_summary_texts[i]}") print(f" Summary Match Outcome : {'True' if summary_match_outcome[i] else 'False'}\n") # In[ ]: # In[ ]: