Commit
·
49f4666
1
Parent(s):
a39812a
Revized docker file
Browse files
.ipynb_checkpoints/Dockerfile-checkpoint
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.8-slim-buster
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system and Python dependencies
|
| 8 |
+
RUN apt-get update && \
|
| 9 |
+
apt-get install -y build-essential openslide-tools libgl1-mesa-glx && \
|
| 10 |
+
apt-get clean && \
|
| 11 |
+
rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Add a non-root user with a specified UID
|
| 14 |
+
ARG USER_ID
|
| 15 |
+
RUN adduser --disabled-password --gecos '' --uid $USER_ID myuser
|
| 16 |
+
|
| 17 |
+
# Copy the entire genomic_plip_model directory contents into the container at /app
|
| 18 |
+
COPY ./ /app/
|
| 19 |
+
|
| 20 |
+
# Install Python dependencies
|
| 21 |
+
RUN pip install -r requirements.txt
|
| 22 |
+
|
| 23 |
+
# Set the user to the newly created non-root user
|
| 24 |
+
USER myuser
|
| 25 |
+
|
| 26 |
+
# Expose a port (if necessary for your application)
|
| 27 |
+
EXPOSE 8888
|
| 28 |
+
|
| 29 |
+
# Set the entrypoint to a shell command
|
| 30 |
+
ENTRYPOINT ["/bin/bash"]
|
.ipynb_checkpoints/requirements-checkpoint.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.19.2
|
| 2 |
+
pandas==1.3.4
|
| 3 |
+
matplotlib==3.5.2
|
| 4 |
+
openslide-python==1.1.2
|
| 5 |
+
scikit-image==0.18.1
|
| 6 |
+
scikit-learn==1.2.1
|
| 7 |
+
tqdm==4.62.3
|
| 8 |
+
Pillow==9.4.0
|
| 9 |
+
transformers==4.33.2
|
| 10 |
+
torch==2.0.1
|
| 11 |
+
jupyterlab==3.2.1
|
| 12 |
+
tensorflow==2.6.1
|
.ipynb_checkpoints/train_omics_plip_model-checkpoint.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import argparse
|
| 3 |
+
from torch import optim
|
| 4 |
+
from torch.utils.data import DataLoader
|
| 5 |
+
from scripts.genomic_plip_model import GenomicPLIPModel
|
| 6 |
+
from scripts.tile_file_dataloader import FlatTileDataset
|
| 7 |
+
from transformers import CLIPVisionModel
|
| 8 |
+
|
| 9 |
+
def train_model(data_dir, model_save_path, pretrained_model_path, lr, num_epochs, train_batch_size, validation_batch_size, num_workers):
|
| 10 |
+
|
| 11 |
+
# Load datasets
|
| 12 |
+
train_dataset = FlatTileDataset(data_dir=f'{data_dir}/train')
|
| 13 |
+
train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=num_workers)
|
| 14 |
+
|
| 15 |
+
validation_dataset = FlatTileDataset(data_dir=f'{data_dir}/validate')
|
| 16 |
+
validation_data_loader = DataLoader(validation_dataset, batch_size=validation_batch_size, shuffle=False, num_workers=num_workers)
|
| 17 |
+
|
| 18 |
+
# Initialize the model
|
| 19 |
+
base_model = CLIPVisionModel.from_pretrained(pretrained_model_path)
|
| 20 |
+
custom_model = GenomicPLIPModel(base_model)
|
| 21 |
+
|
| 22 |
+
criterion = torch.nn.CosineSimilarity(dim=1)
|
| 23 |
+
optimizer = optim.Adam(custom_model.parameters(), lr=lr)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
for epoch in range(num_epochs):
|
| 27 |
+
# Training loop
|
| 28 |
+
custom_model.train()
|
| 29 |
+
train_loss = 0.0
|
| 30 |
+
|
| 31 |
+
for batch_images, batch_scores in train_data_loader:
|
| 32 |
+
optimizer.zero_grad()
|
| 33 |
+
|
| 34 |
+
batch_loss = 0
|
| 35 |
+
for img, score in zip(batch_images, batch_scores):
|
| 36 |
+
vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
|
| 37 |
+
cos_sim = criterion(score_features, vision_features)
|
| 38 |
+
loss = 1-cos_sim.mean()
|
| 39 |
+
|
| 40 |
+
batch_loss += loss.item()
|
| 41 |
+
loss.backward()
|
| 42 |
+
|
| 43 |
+
optimizer.step()
|
| 44 |
+
train_loss += batch_loss
|
| 45 |
+
print(f"Batch Cosine Similarity {batch_loss:.4f}")
|
| 46 |
+
|
| 47 |
+
avg_train_loss = train_loss / len(train_data_loader)
|
| 48 |
+
print(f"Epoch [{epoch+1}/{num_epochs}], Training Cosine Similarity: {avg_train_loss:.4f}")
|
| 49 |
+
|
| 50 |
+
# Validation loop
|
| 51 |
+
custom_model.eval()
|
| 52 |
+
validation_loss = 0.0
|
| 53 |
+
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
for batch_images, batch_scores in validation_data_loader:
|
| 56 |
+
batch_loss = 0
|
| 57 |
+
for img, score in zip(batch_images, batch_scores):
|
| 58 |
+
vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
|
| 59 |
+
cos_sim = criterion(score_features, vision_features)
|
| 60 |
+
loss = 1-cos_sim.mean()
|
| 61 |
+
|
| 62 |
+
batch_loss += loss.item()
|
| 63 |
+
|
| 64 |
+
validation_loss += batch_loss
|
| 65 |
+
print(f"Validation Batch Cosine Similarity {batch_loss:.4f}")
|
| 66 |
+
|
| 67 |
+
avg_validation_loss = validation_loss / len(validation_data_loader)
|
| 68 |
+
print(f"Epoch [{epoch+1}/{num_epochs}], Validation Cosine Similarity: {avg_validation_loss:.4f}")
|
| 69 |
+
|
| 70 |
+
# Save the trained model
|
| 71 |
+
torch.save(custom_model.state_dict(), model_save_path)
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
parser = argparse.ArgumentParser(description='Train the Genomic PLIP Model')
|
| 75 |
+
parser.add_argument('--data_dir', type=str, default='Datasets/train_03', help='Directory containing the train, validate, and test datasets.')
|
| 76 |
+
parser.add_argument('--model_save_path', type=str, default='genomic_plip.pth', help='Path to save the trained model.')
|
| 77 |
+
parser.add_argument('--pretrained_model_path', type=str, default='./plip', help='Path to the pretrained CLIP model.')
|
| 78 |
+
|
| 79 |
+
parser.add_argument('--lr', type=float, default=0.00001, help='Learning rate for the optimizer.')
|
| 80 |
+
parser.add_argument('--num_epochs', type=int, default=1, help='Number of epochs to train for.')
|
| 81 |
+
parser.add_argument('--train_batch_size', type=int, default=128, help='Batch size for the training data loader.')
|
| 82 |
+
parser.add_argument('--validation_batch_size', type=int, default=128, help='Batch size for the validation data loader.')
|
| 83 |
+
parser.add_argument('--num_workers', type=int, default=32, help='Number of worker threads for data loading.')
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
args = parser.parse_args()
|
| 87 |
+
|
| 88 |
+
train_model(args.data_dir, args.model_save_path, args.pretrained_model_path, args.lr, args.num_epochs, args.train_batch_size, args.validation_batch_size, args.num_workers)
|
| 89 |
+
|
Dockerfile
CHANGED
|
@@ -10,16 +10,20 @@ RUN apt-get update && \
|
|
| 10 |
apt-get clean && \
|
| 11 |
rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
|
|
|
|
| 17 |
COPY ./ /app/
|
|
|
|
| 18 |
# Install Python dependencies
|
| 19 |
-
RUN pip install
|
| 20 |
|
| 21 |
-
#
|
|
|
|
| 22 |
|
|
|
|
| 23 |
EXPOSE 8888
|
| 24 |
|
| 25 |
# Set the entrypoint to a shell command
|
|
|
|
| 10 |
apt-get clean && \
|
| 11 |
rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
+
# Add a non-root user with a specified UID
|
| 14 |
+
ARG USER_ID
|
| 15 |
+
RUN adduser --disabled-password --gecos '' --uid $USER_ID myuser
|
| 16 |
|
| 17 |
+
# Copy the entire genomic_plip_model directory contents into the container at /app
|
| 18 |
COPY ./ /app/
|
| 19 |
+
|
| 20 |
# Install Python dependencies
|
| 21 |
+
RUN pip install -r requirements.txt
|
| 22 |
|
| 23 |
+
# Set the user to the newly created non-root user
|
| 24 |
+
USER myuser
|
| 25 |
|
| 26 |
+
# Expose a port (if necessary for your application)
|
| 27 |
EXPOSE 8888
|
| 28 |
|
| 29 |
# Set the entrypoint to a shell command
|
train_omics_plip_model.py
CHANGED
|
@@ -35,7 +35,7 @@ def train_model(data_dir, model_save_path, pretrained_model_path, lr, num_epochs
|
|
| 35 |
for img, score in zip(batch_images, batch_scores):
|
| 36 |
vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
|
| 37 |
cos_sim = criterion(score_features, vision_features)
|
| 38 |
-
loss = -cos_sim.mean()
|
| 39 |
|
| 40 |
batch_loss += loss.item()
|
| 41 |
loss.backward()
|
|
@@ -57,7 +57,7 @@ def train_model(data_dir, model_save_path, pretrained_model_path, lr, num_epochs
|
|
| 57 |
for img, score in zip(batch_images, batch_scores):
|
| 58 |
vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
|
| 59 |
cos_sim = criterion(score_features, vision_features)
|
| 60 |
-
loss = -cos_sim.mean()
|
| 61 |
|
| 62 |
batch_loss += loss.item()
|
| 63 |
|
|
|
|
| 35 |
for img, score in zip(batch_images, batch_scores):
|
| 36 |
vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
|
| 37 |
cos_sim = criterion(score_features, vision_features)
|
| 38 |
+
loss = 1-cos_sim.mean()
|
| 39 |
|
| 40 |
batch_loss += loss.item()
|
| 41 |
loss.backward()
|
|
|
|
| 57 |
for img, score in zip(batch_images, batch_scores):
|
| 58 |
vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
|
| 59 |
cos_sim = criterion(score_features, vision_features)
|
| 60 |
+
loss = 1-cos_sim.mean()
|
| 61 |
|
| 62 |
batch_loss += loss.item()
|
| 63 |
|