<!DOCTYPE html>
Browse files<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DocuSniffer Pro</title>
<link rel="icon" type="image/x-icon" href="/static/favicon.ico">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/feather-icons"></script>
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/vanta@latest/dist/vanta.net.min.js"></script>
<style>
.progress-bar {
transition: width 0.3s ease;
} else {
// Clear results if search is empty
document.getElementById('searchResults').innerHTML = `
<div class="max-h-[300px] overflow-y-auto">
<div class="flex items-center justify-center p-8 text-gray-500">
<div class="text-center">
<i data-feather="alert-circle" class="mx-auto text-2xl mb-2"></i>
<p>No search results yet</p>
</div>
</div>
</div>
`;
feather.replace();
document.getElementById('resultCount').textContent = '0 results';
}
.preview-pane {
height: 60vh;
overflow-y: auto;
}
.search-result:hover {
background-color: rgba(59, 130, 246, 0.1);
cursor: pointer;
}
</style>
</head>
<body class="bg-gray-50" id="vanta-bg">
<div class="container mx-auto px-4 py-8">
<!-- Header -->
<header class="flex justify-between items-center mb-8">
<div class="flex items-center space-x-2">
<i data-feather="search" class="text-blue-500"></i>
<h1 class="text-3xl font-bold text-gray-800">DocuSniffer Pro</h1>
</div>
<div class="flex space-x-4">
<button class="bg-blue-500 hover:bg-blue-600 text-white px-4 py-2 rounded-lg flex items-center space-x-2">
<i data-feather="help-circle"></i>
<span>Help</span>
</button>
<button class="bg-gray-200 hover:bg-gray-300 px-4 py-2 rounded-lg flex items-center space-x-2">
<i data-feather="settings"></i>
<span>Settings</span>
</button>
</div>
</header>
<!-- Main Menu -->
<div class="bg-white rounded-xl shadow-md p-6 mb-8">
<div class="flex flex-wrap justify-between gap-4">
<!-- Step 1 -->
<div class="flex-1 min-w-[200px]">
<button id="selectFolderBtn" class="w-full bg-blue-100 hover:bg-blue-200 text-blue-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all">
<i data-feather="folder" class="text-2xl mb-2"></i>
<span class="font-medium">1. Select Image Folder</span>
</button>
</div>
<!-- Step 2 -->
<div class="flex-1 min-w-[200px]">
<button id="convertToPdfBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="file-text" class="text-2xl mb-2"></i>
<span class="font-medium">2. Convert to PDF</span>
</button>
</div>
<!-- Step 3 -->
<div class="flex-1 min-w-[200px]">
<button id="selectPdfBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="file" class="text-2xl mb-2"></i>
<span class="font-medium">3. Select PDF</span>
</button>
</div>
<!-- Step 4 -->
<div class="flex-1 min-w-[200px]">
<button id="searchContentBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="search" class="text-2xl mb-2"></i>
<span class="font-medium">4. Search Content</span>
</button>
</div>
<!-- Step 5 -->
<div class="flex-1 min-w-[200px]">
<button id="exportReportBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="download" class="text-2xl mb-2"></i>
<span class="font-medium">5. Export Report</span>
</button>
</div>
</div>
</div>
<!-- Status Messages -->
<div id="statusMessages" class="bg-white rounded-xl shadow-md p-4 mb-8 space-y-2">
<div class="flex items-center text-gray-600">
<i data-feather="info" class="mr-2"></i>
<span id="statusText">Select an image folder to begin</span>
</div>
</div>
<!-- Main Content Area -->
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<!-- Preview Pane -->
<div class="lg:col-span-2 bg-white rounded-xl shadow-md p-6">
<div class="flex justify-between items-center mb-4">
<h2 class="text-xl font-semibold text-gray-800">Document Preview</h2>
<div class="flex space-x-2">
<button class="bg-gray-100 hover:bg-gray-200 p-2 rounded-lg">
<i data-feather="zoom-in"></i>
</button>
<button class="bg-gray-100 hover:bg-gray-200 p-2 rounded-lg">
<i data-feather="zoom-out"></i>
</button>
</div>
</div>
<div class="preview-pane border-2 border-dashed border-gray-300 rounded-lg flex items-center justify-center">
<div class="text-center p-8 text-gray-500">
<i data-feather="file" class="mx-auto text-4xl mb-4"></i>
<p>Preview will appear here</p>
</div>
</div>
<!-- Progress Bar -->
<div id="progressContainer" class="mt-4 hidden">
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-gray-700">Processing...</span>
<span id="progressPercent" class="text-sm font-medium text-gray-700">0%</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-2.5">
<div id="progressBar" class="progress-bar bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
</div>
</div>
</div>
<!-- Search Results -->
<div class="bg-white rounded-xl shadow-md p-6">
<div class="flex justify-between items-center mb-4">
<h2 class="text-xl font-semibold text-gray-800">Search Results</h2>
<span id="resultCount" class="text-sm bg-blue-100 text-blue-800 px-2 py-1 rounded-full">0 results</span>
</div>
<div class="mb-4">
<div class="relative">
<input id="searchInput" type="text" placeholder="Enter search term..." class="w-full pl-10 pr-4 py-2 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" disabled>
<i data-feather="search" class="absolute left-3 top-2.5 text-gray-400"></i>
</div>
</div>
<div id="searchResults" class="border border-gray-200 rounded-lg overflow-hidden">
<div class="max-h-[300px] overflow-y-auto">
<div class="flex items-center justify-center p-8 text-gray-500">
<div class="text-center">
<i data-feather="alert-circle" class="mx-auto text-2xl mb-2"></i>
<p>No search results yet</p>
</div>
</div>
</div>
</div>
<button id="exportBtn" class="mt-4 w-full bg-green-500 hover:bg-green-600 text-white px-4 py-2 rounded-lg flex items-center justify-center space-x-2 opacity-50 cursor-not-allowed">
<i data-feather="download"></i>
<span>Export to Excel</span>
</button>
</div>
</div>
</div>
<script>
// Initialize Vanta.js background
VANTA.NET({
el: "#vanta-bg",
mouseControls: true,
touchControls: true,
gyroControls: false,
minHeight: 200.00,
minWidth: 200.00,
scale: 1.00,
scaleMobile: 1.00,
color: 0x9ca3af,
backgroundColor: 0xf9fafb,
points: 10.00,
maxDistance: 20.00,
spacing: 17.00
});
// Add event listener for double click on search results
document.getElementById('searchResults').addEventListener('dblclick', function(e) {
if (e.target.closest('.search-result')) {
const pageNum = e.target.closest('.search-result').getAttribute('data-page');
if (pageNum) {
showPagePreview(pageNum);
}
}
});
// In
- README.md +9 -5
- index.html +174 -19
|
@@ -1,10 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: undefined
|
| 3 |
+
colorFrom: pink
|
| 4 |
+
colorTo: green
|
| 5 |
+
emoji: 🐳
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
tags:
|
| 9 |
+
- deepsite-v3
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Welcome to your new DeepSite project!
|
| 13 |
+
This project was created with [DeepSite](https://deepsite.hf.co).
|
| 14 |
+
|
|
@@ -1,19 +1,174 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import threading
|
| 3 |
+
import tkinter as tk
|
| 4 |
+
from tkinter import filedialog, messagebox, ttk
|
| 5 |
+
from PIL import Image, ImageTk, ImageDraw
|
| 6 |
+
from pdf2image import convert_from_path
|
| 7 |
+
import pytesseract
|
| 8 |
+
from openpyxl import Workbook
|
| 9 |
+
|
| 10 |
+
# Tesseract yolu (Windows için örnek)
|
| 11 |
+
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 12 |
+
|
| 13 |
+
# Senin Poppler bin yolu
|
| 14 |
+
poppler_path = r"C:\Users\Murat\poppler\poppler-25.07.0\Library\bin"
|
| 15 |
+
|
| 16 |
+
if not os.path.exists(poppler_path):
|
| 17 |
+
poppler_path = None
|
| 18 |
+
print("⚠️ Poppler yolu bulunamadı. PDF işlemleri çalışmayabilir.")
|
| 19 |
+
|
| 20 |
+
class PDFImageAnalyzer(tk.Tk):
|
| 21 |
+
def __init__(self):
|
| 22 |
+
super().__init__()
|
| 23 |
+
self.title("Resim → PDF → PDF Arama")
|
| 24 |
+
self.geometry("900x600")
|
| 25 |
+
|
| 26 |
+
self.image_folder = ""
|
| 27 |
+
self.image_list = []
|
| 28 |
+
self.pdf_path = ""
|
| 29 |
+
self.search_results = [] # [(sayfa_numarası, [satırlar])]
|
| 30 |
+
self.keyword = ""
|
| 31 |
+
|
| 32 |
+
self.create_widgets()
|
| 33 |
+
|
| 34 |
+
def create_widgets(self):
|
| 35 |
+
frame = tk.Frame(self)
|
| 36 |
+
frame.pack(pady=10, fill=tk.X)
|
| 37 |
+
|
| 38 |
+
tk.Button(frame, text="Resim Klasörü Seç", command=self.select_folder).pack(side=tk.LEFT, padx=5)
|
| 39 |
+
tk.Button(frame, text="Resimleri PDF’ye Çevir", command=self.images_to_pdf).pack(side=tk.LEFT, padx=5)
|
| 40 |
+
tk.Button(frame, text="PDF Seç", command=self.select_pdf).pack(side=tk.LEFT, padx=5)
|
| 41 |
+
|
| 42 |
+
tk.Label(frame, text="Aranacak Kelime:").pack(side=tk.LEFT, padx=5)
|
| 43 |
+
self.search_entry = tk.Entry(frame)
|
| 44 |
+
self.search_entry.pack(side=tk.LEFT, padx=5)
|
| 45 |
+
tk.Button(frame, text="Ara", command=self.start_search_thread).pack(side=tk.LEFT, padx=5)
|
| 46 |
+
|
| 47 |
+
tk.Button(frame, text="Excel Raporu Kaydet", command=self.save_excel).pack(side=tk.LEFT, padx=5)
|
| 48 |
+
|
| 49 |
+
self.progress = ttk.Progressbar(self, orient="horizontal", length=800, mode="determinate")
|
| 50 |
+
self.progress.pack(pady=10)
|
| 51 |
+
|
| 52 |
+
self.result_listbox = tk.Listbox(self, height=20)
|
| 53 |
+
self.result_listbox.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
|
| 54 |
+
self.result_listbox.bind("<Double-Button-1>", self.preview_page)
|
| 55 |
+
|
| 56 |
+
def select_folder(self):
|
| 57 |
+
self.image_folder = filedialog.askdirectory()
|
| 58 |
+
if self.image_folder:
|
| 59 |
+
self.image_list = [os.path.join(self.image_folder, f) for f in os.listdir(self.image_folder) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp"))]
|
| 60 |
+
messagebox.showinfo("Bilgi", f"{len(self.image_list)} resim bulundu.")
|
| 61 |
+
|
| 62 |
+
def images_to_pdf(self):
|
| 63 |
+
if not self.image_list:
|
| 64 |
+
messagebox.showerror("Hata", "Lütfen önce bir klasör seçin.")
|
| 65 |
+
return
|
| 66 |
+
images = [Image.open(img).convert("RGB") for img in self.image_list]
|
| 67 |
+
save_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF dosyaları", "*.pdf")])
|
| 68 |
+
if save_path:
|
| 69 |
+
images[0].save(save_path, save_all=True, append_images=images[1:])
|
| 70 |
+
messagebox.showinfo("Başarılı", f"PDF kaydedildi: {save_path}")
|
| 71 |
+
|
| 72 |
+
def select_pdf(self):
|
| 73 |
+
self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF dosyaları", "*.pdf")])
|
| 74 |
+
if self.pdf_path:
|
| 75 |
+
messagebox.showinfo("Bilgi", f"Seçilen PDF: {self.pdf_path}")
|
| 76 |
+
|
| 77 |
+
def start_search_thread(self):
|
| 78 |
+
if not poppler_path:
|
| 79 |
+
messagebox.showerror("Hata", "Poppler bulunamadı. Lütfen sistemine kur ve tekrar çalıştır.")
|
| 80 |
+
return
|
| 81 |
+
self.keyword = self.search_entry.get()
|
| 82 |
+
thread = threading.Thread(target=self.search_pdf)
|
| 83 |
+
thread.start()
|
| 84 |
+
|
| 85 |
+
def search_pdf(self):
|
| 86 |
+
if not self.pdf_path or not self.keyword:
|
| 87 |
+
messagebox.showerror("Hata", "Lütfen PDF seçin ve arama kelimesi girin.")
|
| 88 |
+
return
|
| 89 |
+
keyword = self.keyword
|
| 90 |
+
self.search_results.clear()
|
| 91 |
+
self.result_listbox.delete(0, tk.END)
|
| 92 |
+
|
| 93 |
+
try:
|
| 94 |
+
pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
|
| 95 |
+
except Exception as e:
|
| 96 |
+
messagebox.showerror("Hata", f"PDF okunamadı: {str(e)}")
|
| 97 |
+
return
|
| 98 |
+
|
| 99 |
+
self.progress["maximum"] = len(pages)
|
| 100 |
+
self.progress["value"] = 0
|
| 101 |
+
|
| 102 |
+
for i, page in enumerate(pages):
|
| 103 |
+
text = pytesseract.image_to_string(page)
|
| 104 |
+
lines = text.split("\n")
|
| 105 |
+
found_lines = []
|
| 106 |
+
for line in lines:
|
| 107 |
+
if keyword.lower() in line.lower():
|
| 108 |
+
found_lines.append(line.strip())
|
| 109 |
+
|
| 110 |
+
if found_lines:
|
| 111 |
+
self.search_results.append((i, found_lines))
|
| 112 |
+
self.result_listbox.insert(tk.END, f"Sayfa {i+1}: {len(found_lines)} adet bulundu")
|
| 113 |
+
for found_line in found_lines:
|
| 114 |
+
self.result_listbox.insert(tk.END, f" → {found_line}")
|
| 115 |
+
|
| 116 |
+
self.progress["value"] = i + 1
|
| 117 |
+
self.update_idletasks()
|
| 118 |
+
|
| 119 |
+
if not self.search_results:
|
| 120 |
+
messagebox.showinfo("Sonuç Yok", "Kelime bulunamadı.")
|
| 121 |
+
else:
|
| 122 |
+
messagebox.showinfo("Tamamlandı", f"{len(self.search_results)} sayfada sonuç bulundu.")
|
| 123 |
+
|
| 124 |
+
def save_excel(self):
|
| 125 |
+
if not self.search_results:
|
| 126 |
+
messagebox.showerror("Hata", "Önce arama yapın.")
|
| 127 |
+
return
|
| 128 |
+
file_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel dosyaları", "*.xlsx")])
|
| 129 |
+
if file_path:
|
| 130 |
+
wb = Workbook()
|
| 131 |
+
ws = wb.active
|
| 132 |
+
ws.append(["Sayfa Numarası", "Bulunan Satırlar"])
|
| 133 |
+
for page_num, lines in self.search_results:
|
| 134 |
+
ws.append([page_num+1, "\n".join(lines)])
|
| 135 |
+
wb.save(file_path)
|
| 136 |
+
messagebox.showinfo("Başarılı", f"Excel raporu kaydedildi: {file_path}")
|
| 137 |
+
|
| 138 |
+
def preview_page(self, event):
|
| 139 |
+
selection = self.result_listbox.curselection()
|
| 140 |
+
if not selection or not self.search_results:
|
| 141 |
+
return
|
| 142 |
+
|
| 143 |
+
# Hangi sayfa tıklandığını bul
|
| 144 |
+
idx = selection[0]
|
| 145 |
+
page_num = None
|
| 146 |
+
for pn, lines in self.search_results:
|
| 147 |
+
if idx >= 0:
|
| 148 |
+
page_num = pn
|
| 149 |
+
break
|
| 150 |
+
|
| 151 |
+
if page_num is None:
|
| 152 |
+
return
|
| 153 |
+
|
| 154 |
+
pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
|
| 155 |
+
image = pages[page_num]
|
| 156 |
+
|
| 157 |
+
# Vurgulama
|
| 158 |
+
data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
|
| 159 |
+
draw = ImageDraw.Draw(image)
|
| 160 |
+
for i, word in enumerate(data['text']):
|
| 161 |
+
if self.keyword.lower() in word.lower():
|
| 162 |
+
(x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
|
| 163 |
+
draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
|
| 164 |
+
|
| 165 |
+
preview_win = tk.Toplevel(self)
|
| 166 |
+
preview_win.title(f"Sayfa Önizleme: {page_num + 1}")
|
| 167 |
+
img = ImageTk.PhotoImage(image.resize((800, 1000)))
|
| 168 |
+
label = tk.Label(preview_win, image=img)
|
| 169 |
+
label.image = img
|
| 170 |
+
label.pack()
|
| 171 |
+
|
| 172 |
+
if __name__ == "__main__":
|
| 173 |
+
app = PDFImageAnalyzer()
|
| 174 |
+
app.mainloop()
|