Spaces:

Chintan-Shah
/

CLIPInference

Sleeping

App Files Files Community

CLIPInference / app.py

Chintan-Shah

Update app.py

18dbf41 verified over 1 year ago

raw

history blame contribute delete

1.9 kB

	import os
	import clip
	import torch
	from torchvision.datasets import CIFAR100
	from PIL import Image
	import gradio as gr

	# Load the model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model, preprocess = clip.load('ViT-B/32', device)

	# Download the dataset
	cifar100 = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False)
	text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in cifar100.classes]).to(device)

	def generateOutput(source):
	# Prepare the inputs
	# image, class_id = cifar100[3637]
	image = Image.fromarray(source.astype('uint8'), 'RGB')
	image_input = preprocess(image).unsqueeze(0).to(device)

	with torch.no_grad():
	image_features = model.encode_image(image_input)
	text_features = model.encode_text(text_inputs)

	# Pick the top 5 most similar labels for the image
	image_features /= image_features.norm(dim=-1, keepdim=True)
	text_features /= text_features.norm(dim=-1, keepdim=True)
	similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
	values, indices = similarity[0].topk(5)

	# Result in Text
	outputText = "\nTop predictions:\n"
	for value, index in zip(values, indices):
	outputText = outputText + f"{cifar100.classes[index]:>16s}: {100 * value.item():.2f}% \n"

	return(outputText)

	title = "CLIP Classification Inference Trials"
	description = "Shows the CLIP Classification based on CIFAR100 data with your own image"
	examples = [["Elephants.jpg"],["bloom-blooming-blossom-462118.jpg"], ["Puppies.jpg"], ["photo2.JPG"], ["MultipleItems.jpg"]]
	demo = gr.Interface(
	generateOutput,
	inputs = [
	gr.Image(width=256, height=256, label="Input Image"),
	],
	outputs = [
	gr.Text(),
	],
	title = title,
	description = description,
	examples = examples,
	cache_examples=False
	)
	demo.launch()