File size: 7,012 Bytes
cfa9958 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import os
import threading
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from PIL import Image, ImageTk, ImageDraw
from pdf2image import convert_from_path
import pytesseract
from openpyxl import Workbook
# Tesseract yolu (Windows için örnek)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Senin Poppler bin yolu
poppler_path = r"C:\Users\Murat\poppler\poppler-25.07.0\Library\bin"
if not os.path.exists(poppler_path):
poppler_path = None
print("⚠️ Poppler yolu bulunamadı. PDF işlemleri çalışmayabilir.")
class PDFImageAnalyzer(tk.Tk):
def __init__(self):
super().__init__()
self.title("Resim → PDF → PDF Arama")
self.geometry("900x600")
self.image_folder = ""
self.image_list = []
self.pdf_path = ""
self.search_results = [] # [(sayfa_numarası, [satırlar])]
self.keyword = ""
self.create_widgets()
def create_widgets(self):
frame = tk.Frame(self)
frame.pack(pady=10, fill=tk.X)
tk.Button(frame, text="Resim Klasörü Seç", command=self.select_folder).pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="Resimleri PDF’ye Çevir", command=self.images_to_pdf).pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="PDF Seç", command=self.select_pdf).pack(side=tk.LEFT, padx=5)
tk.Label(frame, text="Aranacak Kelime:").pack(side=tk.LEFT, padx=5)
self.search_entry = tk.Entry(frame)
self.search_entry.pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="Ara", command=self.start_search_thread).pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="Excel Raporu Kaydet", command=self.save_excel).pack(side=tk.LEFT, padx=5)
self.progress = ttk.Progressbar(self, orient="horizontal", length=800, mode="determinate")
self.progress.pack(pady=10)
self.result_listbox = tk.Listbox(self, height=20)
self.result_listbox.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
self.result_listbox.bind("<Double-Button-1>", self.preview_page)
def select_folder(self):
self.image_folder = filedialog.askdirectory()
if self.image_folder:
self.image_list = [os.path.join(self.image_folder, f) for f in os.listdir(self.image_folder) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp"))]
messagebox.showinfo("Bilgi", f"{len(self.image_list)} resim bulundu.")
def images_to_pdf(self):
if not self.image_list:
messagebox.showerror("Hata", "Lütfen önce bir klasör seçin.")
return
images = [Image.open(img).convert("RGB") for img in self.image_list]
save_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF dosyaları", "*.pdf")])
if save_path:
images[0].save(save_path, save_all=True, append_images=images[1:])
messagebox.showinfo("Başarılı", f"PDF kaydedildi: {save_path}")
def select_pdf(self):
self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF dosyaları", "*.pdf")])
if self.pdf_path:
messagebox.showinfo("Bilgi", f"Seçilen PDF: {self.pdf_path}")
def start_search_thread(self):
if not poppler_path:
messagebox.showerror("Hata", "Poppler bulunamadı. Lütfen sistemine kur ve tekrar çalıştır.")
return
self.keyword = self.search_entry.get()
thread = threading.Thread(target=self.search_pdf)
thread.start()
def search_pdf(self):
if not self.pdf_path or not self.keyword:
messagebox.showerror("Hata", "Lütfen PDF seçin ve arama kelimesi girin.")
return
keyword = self.keyword
self.search_results.clear()
self.result_listbox.delete(0, tk.END)
try:
pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
except Exception as e:
messagebox.showerror("Hata", f"PDF okunamadı: {str(e)}")
return
self.progress["maximum"] = len(pages)
self.progress["value"] = 0
for i, page in enumerate(pages):
text = pytesseract.image_to_string(page)
lines = text.split("\n")
found_lines = []
for line in lines:
if keyword.lower() in line.lower():
found_lines.append(line.strip())
if found_lines:
self.search_results.append((i, found_lines))
self.result_listbox.insert(tk.END, f"Sayfa {i+1}: {len(found_lines)} adet bulundu")
for found_line in found_lines:
self.result_listbox.insert(tk.END, f" → {found_line}")
self.progress["value"] = i + 1
self.update_idletasks()
if not self.search_results:
messagebox.showinfo("Sonuç Yok", "Kelime bulunamadı.")
else:
messagebox.showinfo("Tamamlandı", f"{len(self.search_results)} sayfada sonuç bulundu.")
def save_excel(self):
if not self.search_results:
messagebox.showerror("Hata", "Önce arama yapın.")
return
file_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel dosyaları", "*.xlsx")])
if file_path:
wb = Workbook()
ws = wb.active
ws.append(["Sayfa Numarası", "Bulunan Satırlar"])
for page_num, lines in self.search_results:
ws.append([page_num+1, "\n".join(lines)])
wb.save(file_path)
messagebox.showinfo("Başarılı", f"Excel raporu kaydedildi: {file_path}")
def preview_page(self, event):
selection = self.result_listbox.curselection()
if not selection or not self.search_results:
return
# Hangi sayfa tıklandığını bul
idx = selection[0]
page_num = None
for pn, lines in self.search_results:
if idx >= 0:
page_num = pn
break
if page_num is None:
return
pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
image = pages[page_num]
# Vurgulama
data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
draw = ImageDraw.Draw(image)
for i, word in enumerate(data['text']):
if self.keyword.lower() in word.lower():
(x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
preview_win = tk.Toplevel(self)
preview_win.title(f"Sayfa Önizleme: {page_num + 1}")
img = ImageTk.PhotoImage(image.resize((800, 1000)))
label = tk.Label(preview_win, image=img)
label.image = img
label.pack()
if __name__ == "__main__":
app = PDFImageAnalyzer()
app.mainloop()
|