#!/bin/bash # === SYSTEM PROMPT === # This script builds a Bengali language model using a multi-agent system with human-in-the-loop (HIL) capabilities. # Advanced Features: # - Real-Time Streaming: Displays a colorful, dynamic status dashboard in the terminal. # - Robust Error Handling: Validates setup, API calls, and file operations with detailed logging. # - Modern Interface: Uses ANSI colors, progress bars, and a boxed header for a polished look. # - Loop and Iteration: Monitors execution, retries on failure, and ensures task completion. # - Code Execution: Executes Python and Node.js code locally for preprocessing and evaluation. # - Tools: Provides Python and Node.js REPLs for file operations and analysis. # - Time Travel: Logs actions with timestamps for debugging and auditing. # - Subgraph Support: Encapsulates tasks (data collection, preprocessing, training, evaluation) as reusable nodes. # - Memory: Persists state across agent interactions using a key-value store. # - API Integrations: Uses Together, Cohere, and Gemini APIs (Together as primary for text generation). # - File Operations: Creates, edits, and validates files with error checking. # - Output: Saves the model to /storage/BA73-022B/bd/bd-model-genaretions/model.pt. # === CONFIGURATION === PROJECT_DIR="/storage/BA73-022B/bd/bd-model-genaretions" # Updated as per user request LOG_FILE="$PROJECT_DIR/logs/actions.log" MEMORY_FILE="$PROJECT_DIR/memory.txt" REQUESTS_DIR="$PROJECT_DIR/requests" RESPONSES_DIR="$PROJECT_DIR/responses" DATA_DIR="$PROJECT_DIR/data" STATUS_DIR="$PROJECT_DIR/status" # API Keys TOGETHER_API_KEY="07f08ca73c50496a3406ff621912254a67370d576822f1921f77eed47e649545" COHERE_API_KEY="rvpLjkuzZPsoHGeIxqQxttTTIt4IxGUS5FOINU4L" GEMINI_API_KEY="AIzaSyAQNxQU0WnegEnMfP6LCwkVw-PUtR11qaI" # === SETUP === echo "Initializing project directories..." for dir in "$PROJECT_DIR" "$DATA_DIR" "$REQUESTS_DIR" "$RESPONSES_DIR" "$PROJECT_DIR/logs" "$STATUS_DIR"; do mkdir -p "$dir" if [ $? -ne 0 ]; then echo -e "\033[1;31mError: Failed to create directory $dir\033[0m" exit 1 fi done touch "$LOG_FILE" "$MEMORY_FILE" if [ ! -f "$LOG_FILE" ] || [ ! -f "$MEMORY_FILE" ]; then echo -e "\033[1;31mError: Failed to create log or memory file\033[0m" exit 1 fi echo "[$(date)] Starting Bengali language model generation" >> "$LOG_FILE" # === UTILITY FUNCTIONS === # Memory Management function set_memory { local key="$1" local value="$2" grep -v "^$key=" "$MEMORY_FILE" > "$MEMORY_FILE.tmp" && mv "$MEMORY_FILE.tmp" "$MEMORY_FILE" echo "$key=$value" >> "$MEMORY_FILE" } function get_memory { local key="$1" local value=$(grep "^$key=" "$MEMORY_FILE" | cut -d'=' -f2) echo "${value:-false}" } # Logging (Time Travel) function log_action { local agent_id="$1" local action="$2" echo "[$(date)] [Agent $agent_id] $action" >> "$LOG_FILE" } # Status Updates function set_status { local agent_id="$1" local status="$2" echo "$status" > "$STATUS_DIR/agent$agent_id.status" } # === TOOL CALLING FUNCTIONS === function run_python { local code="$1" log_action "Tool" "Running Python code: $code" local output=$(python3 -c "$code" 2>> "$LOG_FILE") local exit_code=$? if [ $exit_code -ne 0 ]; then log_action "Tool" "Python execution failed with exit code $exit_code" return $exit_code fi echo "$output" } function run_node { local code="$1" log_action "Tool" "Running Node.js code: $code" local output=$(node -e "$code" 2>> "$LOG_FILE") local exit_code=$? if [ $exit_code -ne 0 ]; then log_action "Tool" "Node.js execution failed with exit code $exit_code" return $exit_code fi echo "$output" } # === API CALLING FUNCTIONS === function call_together_api { local prompt="$1" curl -s -m 10 -X POST "https://api.together.ai/v1/completions" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $TOGETHER_API_KEY" \ -d "{\"prompt\": \"$prompt\", \"model\": \"some_model\", \"max_tokens\": 100}" } function call_cohere_api { local prompt="$1" curl -s -m 10 -X POST "https://api.cohere.ai/generate" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $COHERE_API_KEY" \ -d "{\"prompt\": \"$prompt\", \"max_tokens\": 100}" } function call_gemini_api { local prompt="$1" curl -s -m 10 -X POST "https://api.gemini.ai/v1/completions" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GEMINI_API_KEY" \ -d "{\"prompt\": \"$prompt\", \"model\": \"some_model\"}" } function generate_text { local prompt="$1" local api="$2" local attempts=3 for ((i=1; i<=attempts; i++)); do local response text case "$api" in together) response=$(call_together_api "$prompt") text=$(echo "$response" | jq -r '.choices[0].text' 2>/dev/null) ;; cohere) response=$(call_cohere_api "$prompt") text=$(echo "$response" | jq -r '.generations[0].text' 2>/dev/null) ;; gemini) response=$(call_gemini_api "$prompt") text=$(echo "$response" | jq -r '.choices[0].text' 2>/dev/null) ;; *) text="Unknown API" ;; esac if [ -n "$text" ] && [ "$text" != "null" ] && [[ ! "$text" =~ "Error" ]]; then echo "$text" return 0 fi log_action "API" "Attempt $i failed for $api API, retrying..." sleep 2 done log_action "API" "Failed to generate text with $api after $attempts attempts" return 1 } # === HUMAN-IN-THE-LOOP REQUEST FUNCTION === function request_human_input { local agent_id="$1" local request="$2" log_action "$agent_id" "Requesting human input: $request" set_status "$agent_id" "Waiting for human input" echo "$request" > "$REQUESTS_DIR/agent$agent_id.txt" while [ ! -f "$RESPONSES_DIR/agent$agent_id.txt" ]; do sleep 1 done local response=$(cat "$RESPONSES_DIR/agent$agent_id.txt") rm "$RESPONSES_DIR/agent$agent_id.txt" log_action "$agent_id" "Received human response: $response" set_status "$agent_id" "Processing human input" echo "$response" } # === SUBGRAPH FUNCTIONS === function collect_data { set_status 1 "Generating Bengali text via API" local prompt="Generate a sample of Bengali text for language model training." local text=$(generate_text "$prompt" "together") if [ $? -eq 0 ]; then set_status 1 "Saving data to file" echo "$text" > "$DATA_DIR/bengali_text.txt" if [ $? -ne 0 ]; then set_status 1 "Error: Failed to save data" log_action 1 "Failed to write to $DATA_DIR/bengali_text.txt" return 1 fi log_action 1 "Data saved to $DATA_DIR/bengali_text.txt" set_memory "data_collected" "true" else set_status 1 "API error" log_action 1 "Failed to collect data due to API error" return 1 fi set_status 1 "Data collection completed" } function preprocess_data { set_status 2 "Waiting for data collection" while [ "$(get_memory 'data_collected')" != "true" ]; do sleep 1 done set_status 2 "Analyzing data" if [ ! -f "$DATA_DIR/bengali_text.txt" ]; then set_status 2 "Error: Data file missing" log_action 2 "Error: No data file found at $DATA_DIR/bengali_text.txt" return 1 fi local output=$(run_python "with open('$DATA_DIR/bengali_text.txt', 'r') as f: text = f.read(); print(f'Text length: {len(text)} characters')") if [ $? -ne 0 ]; then set_status 2 "Error: Analysis failed" log_action 2 "Preprocessing analysis failed" return 1 fi log_action 2 "Analysis result: $output" set_status 2 "Awaiting human review" local response=$(request_human_input 2 "Review the Bengali text in $DATA_DIR/bengali_text.txt (approve/reject/edit)") case "$response" in approve) set_status 2 "Saving preprocessed data" echo "Data preprocessed" > "$DATA_DIR/preprocessed_text.txt" log_action 2 "Preprocessing approved, saved to $DATA_DIR/preprocessed_text.txt" set_memory "data_preprocessed" "true" ;; edit) set_status 2 "Editing data" log_action 2 "Human requested edit; applying transformation" run_python "with open('$DATA_DIR/bengali_text.txt', 'r') as f: text = f.read(); with open('$DATA_DIR/preprocessed_text.txt', 'w') as f: f.write(text.upper())" if [ $? -eq 0 ]; then set_memory "data_preprocessed" "true" else set_status 2 "Error: Edit failed" return 1 fi ;; *) set_status 2 "Preprocessing rejected" log_action 2 "Preprocessing rejected by human" return 1 ;; esac } function train_model { set_status 3 "Waiting for preprocessing" while [ "$(get_memory 'data_preprocessed')" != "true" ]; do sleep 1 done set_status 3 "Training model" if [ ! -f "$DATA_DIR/preprocessed_text.txt" ]; then set_status 3 "Error: Preprocessed data missing" log_action 3 "Error: No preprocessed data found at $DATA_DIR/preprocessed_text.txt" return 1 fi echo "Training Bengali model..." sleep 2 # Simulate training echo "Model trained" > "$PROJECT_DIR/model.pt" if [ $? -ne 0 ]; then set_status 3 "Error: Failed to save model" log_action 3 "Failed to save model to $PROJECT_DIR/model.pt" return 1 fi log_action 3 "Model saved to $PROJECT_DIR/model.pt" set_memory "model_trained" "true" set_status 3 "Training completed" } function evaluate_model { set_status 4 "Waiting for model training" while [ "$(get_memory 'model_trained')" != "true" ]; do sleep 1 done set_status 4 "Evaluating model" if [ ! -f "$PROJECT_DIR/model.pt" ]; then set_status 4 "Error: Model file missing" log_action 4 "Error: No model file found at $PROJECT_DIR/model.pt" return 1 fi local output=$(run_python "print('Simulated accuracy: 85%')") if [ $? -ne 0 ]; then set_status 4 "Error: Evaluation failed" log_action 4 "Evaluation failed" return 1 fi set_status 4 "Awaiting human review" local response=$(request_human_input 4 "Review model performance: $output (approve/reject/fix)") case "$response" in approve) log_action 4 "Evaluation approved" set_memory "evaluation_completed" "true" ;; fix) set_status 4 "Fixing model" log_action 4 "Human requested fix; simulating correction" echo "Fixed model" > "$PROJECT_DIR/model.pt" set_memory "evaluation_completed" "true" ;; *) set_status 4 "Evaluation rejected" log_action 4 "Evaluation rejected by human" return 1 ;; esac set_status 4 "Evaluation completed" } # === AGENT FUNCTIONS === function agent1 { set_status 1 "Starting data collection" until collect_data; do set_status 1 "Retrying data collection" log_action 1 "Retrying data collection after failure" sleep 2 done set_status 1 "Data collection completed" set_memory "agent1_completed" "true" } function agent2 { set_status 2 "Starting preprocessing" until preprocess_data; do set_status 2 "Retrying preprocessing" log_action 2 "Retrying preprocessing after failure" sleep 2 done set_status 2 "Preprocessing completed" set_memory "agent2_completed" "true" } function agent3 { set_status 3 "Starting training" until train_model; do set_status 3 "Retrying training" log_action 3 "Retrying training after failure" sleep 2 done set_status 3 "Training completed" set_memory "agent3_completed" "true" } function agent4 { set_status 4 "Starting evaluation" until evaluate_model; do set_status 4 "Retrying evaluation" log_action 4 "Retrying evaluation after failure" sleep 2 done set_status 4 "Evaluation completed" set_memory "agent4_completed" "true" } # === STATUS DISPLAY === function display_status { echo -e "\033[1;34m┌─────────────────────── STATUS DASHBOARD ──────────────────────┐\033[0m" echo -e "\033[1;34m│ Bengali Language Model Generation - $(date +%H:%M:%S) │\033[0m" echo -e "\033[1;34m└───────────────────────────────────────────────────────────────┘\033[0m" local completed=0 for agent in 1 2 3 4; do local status="Not started" if [ -f "$STATUS_DIR/agent$agent.status" ]; then status=$(cat "$STATUS_DIR/agent$agent.status") fi if [ "$(get_memory "agent${agent}_completed")" == "true" ]; then status="Completed" ((completed++)) fi case $agent in 1) color="\033[1;32m" ;; # Green 2) color="\033[1;33m" ;; # Yellow 3) color="\033[1;34m" ;; # Blue 4) color="\033[1;35m" ;; # Magenta esac echo -e "${color}Agent $agent: $status\033[0m" done local progress=$((completed * 25)) # 25% per agent echo -e "\033[1;36mProgress: [$completed/4] ${progress}%\033[0m" } # === HUMAN-IN-THE-LOOP HANDLER === function hil_handler { while true; do clear display_status if [ "$(get_memory 'agent1_completed')" == "true" ] && \ [ "$(get_memory 'agent2_completed')" == "true" ] && \ [ "$(get_memory 'agent3_completed')" == "true" ] && \ [ "$(get_memory 'agent4_completed')" == "true" ]; then log_action "HIL" "All agents completed successfully" echo -e "\033[1;32m✓ All agents completed! Model generation successful.\033[0m" break fi for req_file in "$REQUESTS_DIR"/*; do if [ -f "$req_file" ]; then local agent_id=$(basename "$req_file" .txt | sed 's/agent//') local request=$(cat "$req_file") echo -e "\n\033[1;33mAgent $agent_id requests your input:\033[0m $request" echo -e "\033[1;33mEnter response (e.g., approve/reject/edit/fix):\033[0m" read -r human_input if [ -z "$human_input" ]; then echo -e "\033[1;31mError: Input cannot be empty. Try again.\033[0m" continue fi echo "$human_input" > "$RESPONSES_DIR/agent$agent_id.txt" rm "$req_file" fi done sleep 1 done } # === CLEANUP ON EXIT === function cleanup { echo -e "\033[1;31mScript interrupted. Cleaning up...\033[0m" rm -f "$REQUESTS_DIR"/* "$RESPONSES_DIR"/* 2>/dev/null log_action "Main" "Script terminated by user" exit 1 } trap cleanup INT TERM # === MAIN EXECUTION === echo -e "\033[1;32mStarting Bengali language model generation...\033[0m" log_action "Main" "Script execution started" agent1 & agent2 & agent3 & agent4 & hil_handler echo -e "\033[1;32mProcess completed successfully!\033[0m" echo "Model saved at: $PROJECT_DIR/model.pt" echo "Detailed logs available at: $LOG_FILE"