ers01 commited on
Commit
cfa9958
·
verified ·
1 Parent(s): cf16f03

<!DOCTYPE html>

Browse files

<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DocuSniffer Pro</title>
<link rel="icon" type="image/x-icon" href="/static/favicon.ico">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/feather-icons"></script>
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/vanta@latest/dist/vanta.net.min.js"></script>
<style>
.progress-bar {
transition: width 0.3s ease;
} else {
// Clear results if search is empty
document.getElementById('searchResults').innerHTML = `
<div class="max-h-[300px] overflow-y-auto">
<div class="flex items-center justify-center p-8 text-gray-500">
<div class="text-center">
<i data-feather="alert-circle" class="mx-auto text-2xl mb-2"></i>
<p>No search results yet</p>
</div>
</div>
</div>
`;
feather.replace();
document.getElementById('resultCount').textContent = '0 results';
}
.preview-pane {
height: 60vh;
overflow-y: auto;
}
.search-result:hover {
background-color: rgba(59, 130, 246, 0.1);
cursor: pointer;
}
</style>
</head>
<body class="bg-gray-50" id="vanta-bg">
<div class="container mx-auto px-4 py-8">
<!-- Header -->
<header class="flex justify-between items-center mb-8">
<div class="flex items-center space-x-2">
<i data-feather="search" class="text-blue-500"></i>
<h1 class="text-3xl font-bold text-gray-800">DocuSniffer Pro</h1>
</div>
<div class="flex space-x-4">
<button class="bg-blue-500 hover:bg-blue-600 text-white px-4 py-2 rounded-lg flex items-center space-x-2">
<i data-feather="help-circle"></i>
<span>Help</span>
</button>
<button class="bg-gray-200 hover:bg-gray-300 px-4 py-2 rounded-lg flex items-center space-x-2">
<i data-feather="settings"></i>
<span>Settings</span>
</button>
</div>
</header>

<!-- Main Menu -->
<div class="bg-white rounded-xl shadow-md p-6 mb-8">
<div class="flex flex-wrap justify-between gap-4">
<!-- Step 1 -->
<div class="flex-1 min-w-[200px]">
<button id="selectFolderBtn" class="w-full bg-blue-100 hover:bg-blue-200 text-blue-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all">
<i data-feather="folder" class="text-2xl mb-2"></i>
<span class="font-medium">1. Select Image Folder</span>
</button>
</div>

<!-- Step 2 -->
<div class="flex-1 min-w-[200px]">
<button id="convertToPdfBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="file-text" class="text-2xl mb-2"></i>
<span class="font-medium">2. Convert to PDF</span>
</button>
</div>

<!-- Step 3 -->
<div class="flex-1 min-w-[200px]">
<button id="selectPdfBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="file" class="text-2xl mb-2"></i>
<span class="font-medium">3. Select PDF</span>
</button>
</div>

<!-- Step 4 -->
<div class="flex-1 min-w-[200px]">
<button id="searchContentBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="search" class="text-2xl mb-2"></i>
<span class="font-medium">4. Search Content</span>
</button>
</div>

<!-- Step 5 -->
<div class="flex-1 min-w-[200px]">
<button id="exportReportBtn" class="w-full bg-gray-100 hover:bg-gray-200 text-gray-800 px-4 py-3 rounded-lg flex flex-col items-center transition-all opacity-50 cursor-not-allowed">
<i data-feather="download" class="text-2xl mb-2"></i>
<span class="font-medium">5. Export Report</span>
</button>
</div>
</div>
</div>

<!-- Status Messages -->
<div id="statusMessages" class="bg-white rounded-xl shadow-md p-4 mb-8 space-y-2">
<div class="flex items-center text-gray-600">
<i data-feather="info" class="mr-2"></i>
<span id="statusText">Select an image folder to begin</span>
</div>
</div>

<!-- Main Content Area -->
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<!-- Preview Pane -->
<div class="lg:col-span-2 bg-white rounded-xl shadow-md p-6">
<div class="flex justify-between items-center mb-4">
<h2 class="text-xl font-semibold text-gray-800">Document Preview</h2>
<div class="flex space-x-2">
<button class="bg-gray-100 hover:bg-gray-200 p-2 rounded-lg">
<i data-feather="zoom-in"></i>
</button>
<button class="bg-gray-100 hover:bg-gray-200 p-2 rounded-lg">
<i data-feather="zoom-out"></i>
</button>
</div>
</div>

<div class="preview-pane border-2 border-dashed border-gray-300 rounded-lg flex items-center justify-center">
<div class="text-center p-8 text-gray-500">
<i data-feather="file" class="mx-auto text-4xl mb-4"></i>
<p>Preview will appear here</p>
</div>
</div>

<!-- Progress Bar -->
<div id="progressContainer" class="mt-4 hidden">
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-gray-700">Processing...</span>
<span id="progressPercent" class="text-sm font-medium text-gray-700">0%</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-2.5">
<div id="progressBar" class="progress-bar bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
</div>
</div>
</div>

<!-- Search Results -->
<div class="bg-white rounded-xl shadow-md p-6">
<div class="flex justify-between items-center mb-4">
<h2 class="text-xl font-semibold text-gray-800">Search Results</h2>
<span id="resultCount" class="text-sm bg-blue-100 text-blue-800 px-2 py-1 rounded-full">0 results</span>
</div>

<div class="mb-4">
<div class="relative">
<input id="searchInput" type="text" placeholder="Enter search term..." class="w-full pl-10 pr-4 py-2 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" disabled>
<i data-feather="search" class="absolute left-3 top-2.5 text-gray-400"></i>
</div>
</div>

<div id="searchResults" class="border border-gray-200 rounded-lg overflow-hidden">
<div class="max-h-[300px] overflow-y-auto">
<div class="flex items-center justify-center p-8 text-gray-500">
<div class="text-center">
<i data-feather="alert-circle" class="mx-auto text-2xl mb-2"></i>
<p>No search results yet</p>
</div>
</div>
</div>
</div>

<button id="exportBtn" class="mt-4 w-full bg-green-500 hover:bg-green-600 text-white px-4 py-2 rounded-lg flex items-center justify-center space-x-2 opacity-50 cursor-not-allowed">
<i data-feather="download"></i>
<span>Export to Excel</span>
</button>
</div>
</div>
</div>

<script>
// Initialize Vanta.js background
VANTA.NET({
el: "#vanta-bg",
mouseControls: true,
touchControls: true,
gyroControls: false,
minHeight: 200.00,
minWidth: 200.00,
scale: 1.00,
scaleMobile: 1.00,
color: 0x9ca3af,
backgroundColor: 0xf9fafb,
points: 10.00,
maxDistance: 20.00,
spacing: 17.00
});

// Add event listener for double click on search results
document.getElementById('searchResults').addEventListener('dblclick', function(e) {
if (e.target.closest('.search-result')) {
const pageNum = e.target.closest('.search-result').getAttribute('data-page');
if (pageNum) {
showPagePreview(pageNum);
}
}
});
// In

Files changed (2) hide show
  1. README.md +9 -5
  2. index.html +174 -19
README.md CHANGED
@@ -1,10 +1,14 @@
1
  ---
2
- title: Undefined
3
- emoji: 🔥
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: undefined
3
+ colorFrom: pink
4
+ colorTo: green
5
+ emoji: 🐳
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite-v3
10
  ---
11
 
12
+ # Welcome to your new DeepSite project!
13
+ This project was created with [DeepSite](https://deepsite.hf.co).
14
+
index.html CHANGED
@@ -1,19 +1,174 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import tkinter as tk
4
+ from tkinter import filedialog, messagebox, ttk
5
+ from PIL import Image, ImageTk, ImageDraw
6
+ from pdf2image import convert_from_path
7
+ import pytesseract
8
+ from openpyxl import Workbook
9
+
10
+ # Tesseract yolu (Windows için örnek)
11
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
12
+
13
+ # Senin Poppler bin yolu
14
+ poppler_path = r"C:\Users\Murat\poppler\poppler-25.07.0\Library\bin"
15
+
16
+ if not os.path.exists(poppler_path):
17
+ poppler_path = None
18
+ print("⚠️ Poppler yolu bulunamadı. PDF işlemleri çalışmayabilir.")
19
+
20
+ class PDFImageAnalyzer(tk.Tk):
21
+ def __init__(self):
22
+ super().__init__()
23
+ self.title("Resim → PDF → PDF Arama")
24
+ self.geometry("900x600")
25
+
26
+ self.image_folder = ""
27
+ self.image_list = []
28
+ self.pdf_path = ""
29
+ self.search_results = [] # [(sayfa_numarası, [satırlar])]
30
+ self.keyword = ""
31
+
32
+ self.create_widgets()
33
+
34
+ def create_widgets(self):
35
+ frame = tk.Frame(self)
36
+ frame.pack(pady=10, fill=tk.X)
37
+
38
+ tk.Button(frame, text="Resim Klasörü Seç", command=self.select_folder).pack(side=tk.LEFT, padx=5)
39
+ tk.Button(frame, text="Resimleri PDF’ye Çevir", command=self.images_to_pdf).pack(side=tk.LEFT, padx=5)
40
+ tk.Button(frame, text="PDF Seç", command=self.select_pdf).pack(side=tk.LEFT, padx=5)
41
+
42
+ tk.Label(frame, text="Aranacak Kelime:").pack(side=tk.LEFT, padx=5)
43
+ self.search_entry = tk.Entry(frame)
44
+ self.search_entry.pack(side=tk.LEFT, padx=5)
45
+ tk.Button(frame, text="Ara", command=self.start_search_thread).pack(side=tk.LEFT, padx=5)
46
+
47
+ tk.Button(frame, text="Excel Raporu Kaydet", command=self.save_excel).pack(side=tk.LEFT, padx=5)
48
+
49
+ self.progress = ttk.Progressbar(self, orient="horizontal", length=800, mode="determinate")
50
+ self.progress.pack(pady=10)
51
+
52
+ self.result_listbox = tk.Listbox(self, height=20)
53
+ self.result_listbox.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
54
+ self.result_listbox.bind("<Double-Button-1>", self.preview_page)
55
+
56
+ def select_folder(self):
57
+ self.image_folder = filedialog.askdirectory()
58
+ if self.image_folder:
59
+ self.image_list = [os.path.join(self.image_folder, f) for f in os.listdir(self.image_folder) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp"))]
60
+ messagebox.showinfo("Bilgi", f"{len(self.image_list)} resim bulundu.")
61
+
62
+ def images_to_pdf(self):
63
+ if not self.image_list:
64
+ messagebox.showerror("Hata", "Lütfen önce bir klasör seçin.")
65
+ return
66
+ images = [Image.open(img).convert("RGB") for img in self.image_list]
67
+ save_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF dosyaları", "*.pdf")])
68
+ if save_path:
69
+ images[0].save(save_path, save_all=True, append_images=images[1:])
70
+ messagebox.showinfo("Başarılı", f"PDF kaydedildi: {save_path}")
71
+
72
+ def select_pdf(self):
73
+ self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF dosyaları", "*.pdf")])
74
+ if self.pdf_path:
75
+ messagebox.showinfo("Bilgi", f"Seçilen PDF: {self.pdf_path}")
76
+
77
+ def start_search_thread(self):
78
+ if not poppler_path:
79
+ messagebox.showerror("Hata", "Poppler bulunamadı. Lütfen sistemine kur ve tekrar çalıştır.")
80
+ return
81
+ self.keyword = self.search_entry.get()
82
+ thread = threading.Thread(target=self.search_pdf)
83
+ thread.start()
84
+
85
+ def search_pdf(self):
86
+ if not self.pdf_path or not self.keyword:
87
+ messagebox.showerror("Hata", "Lütfen PDF seçin ve arama kelimesi girin.")
88
+ return
89
+ keyword = self.keyword
90
+ self.search_results.clear()
91
+ self.result_listbox.delete(0, tk.END)
92
+
93
+ try:
94
+ pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
95
+ except Exception as e:
96
+ messagebox.showerror("Hata", f"PDF okunamadı: {str(e)}")
97
+ return
98
+
99
+ self.progress["maximum"] = len(pages)
100
+ self.progress["value"] = 0
101
+
102
+ for i, page in enumerate(pages):
103
+ text = pytesseract.image_to_string(page)
104
+ lines = text.split("\n")
105
+ found_lines = []
106
+ for line in lines:
107
+ if keyword.lower() in line.lower():
108
+ found_lines.append(line.strip())
109
+
110
+ if found_lines:
111
+ self.search_results.append((i, found_lines))
112
+ self.result_listbox.insert(tk.END, f"Sayfa {i+1}: {len(found_lines)} adet bulundu")
113
+ for found_line in found_lines:
114
+ self.result_listbox.insert(tk.END, f" → {found_line}")
115
+
116
+ self.progress["value"] = i + 1
117
+ self.update_idletasks()
118
+
119
+ if not self.search_results:
120
+ messagebox.showinfo("Sonuç Yok", "Kelime bulunamadı.")
121
+ else:
122
+ messagebox.showinfo("Tamamlandı", f"{len(self.search_results)} sayfada sonuç bulundu.")
123
+
124
+ def save_excel(self):
125
+ if not self.search_results:
126
+ messagebox.showerror("Hata", "Önce arama yapın.")
127
+ return
128
+ file_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel dosyaları", "*.xlsx")])
129
+ if file_path:
130
+ wb = Workbook()
131
+ ws = wb.active
132
+ ws.append(["Sayfa Numarası", "Bulunan Satırlar"])
133
+ for page_num, lines in self.search_results:
134
+ ws.append([page_num+1, "\n".join(lines)])
135
+ wb.save(file_path)
136
+ messagebox.showinfo("Başarılı", f"Excel raporu kaydedildi: {file_path}")
137
+
138
+ def preview_page(self, event):
139
+ selection = self.result_listbox.curselection()
140
+ if not selection or not self.search_results:
141
+ return
142
+
143
+ # Hangi sayfa tıklandığını bul
144
+ idx = selection[0]
145
+ page_num = None
146
+ for pn, lines in self.search_results:
147
+ if idx >= 0:
148
+ page_num = pn
149
+ break
150
+
151
+ if page_num is None:
152
+ return
153
+
154
+ pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
155
+ image = pages[page_num]
156
+
157
+ # Vurgulama
158
+ data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
159
+ draw = ImageDraw.Draw(image)
160
+ for i, word in enumerate(data['text']):
161
+ if self.keyword.lower() in word.lower():
162
+ (x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
163
+ draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
164
+
165
+ preview_win = tk.Toplevel(self)
166
+ preview_win.title(f"Sayfa Önizleme: {page_num + 1}")
167
+ img = ImageTk.PhotoImage(image.resize((800, 1000)))
168
+ label = tk.Label(preview_win, image=img)
169
+ label.image = img
170
+ label.pack()
171
+
172
+ if __name__ == "__main__":
173
+ app = PDFImageAnalyzer()
174
+ app.mainloop()