| import os | |
| import threading | |
| import tkinter as tk | |
| from tkinter import filedialog, messagebox, ttk | |
| from PIL import Image, ImageTk, ImageDraw | |
| from pdf2image import convert_from_path | |
| import pytesseract | |
| from openpyxl import Workbook | |
| # Tesseract yolu (Windows için örnek) | |
| pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" | |
| # Senin Poppler bin yolu | |
| poppler_path = r"C:\Users\Murat\poppler\poppler-25.07.0\Library\bin" | |
| if not os.path.exists(poppler_path): | |
| poppler_path = None | |
| print("⚠️ Poppler yolu bulunamadı. PDF işlemleri çalışmayabilir.") | |
| class PDFImageAnalyzer(tk.Tk): | |
| def __init__(self): | |
| super().__init__() | |
| self.title("Resim → PDF → PDF Arama") | |
| self.geometry("900x600") | |
| self.image_folder = "" | |
| self.image_list = [] | |
| self.pdf_path = "" | |
| self.search_results = [] # [(sayfa_numarası, [satırlar])] | |
| self.keyword = "" | |
| self.create_widgets() | |
| def create_widgets(self): | |
| frame = tk.Frame(self) | |
| frame.pack(pady=10, fill=tk.X) | |
| tk.Button(frame, text="Resim Klasörü Seç", command=self.select_folder).pack(side=tk.LEFT, padx=5) | |
| tk.Button(frame, text="Resimleri PDF’ye Çevir", command=self.images_to_pdf).pack(side=tk.LEFT, padx=5) | |
| tk.Button(frame, text="PDF Seç", command=self.select_pdf).pack(side=tk.LEFT, padx=5) | |
| tk.Label(frame, text="Aranacak Kelime:").pack(side=tk.LEFT, padx=5) | |
| self.search_entry = tk.Entry(frame) | |
| self.search_entry.pack(side=tk.LEFT, padx=5) | |
| tk.Button(frame, text="Ara", command=self.start_search_thread).pack(side=tk.LEFT, padx=5) | |
| tk.Button(frame, text="Excel Raporu Kaydet", command=self.save_excel).pack(side=tk.LEFT, padx=5) | |
| self.progress = ttk.Progressbar(self, orient="horizontal", length=800, mode="determinate") | |
| self.progress.pack(pady=10) | |
| self.result_listbox = tk.Listbox(self, height=20) | |
| self.result_listbox.pack(padx=10, pady=10, fill=tk.BOTH, expand=True) | |
| self.result_listbox.bind("<Double-Button-1>", self.preview_page) | |
| def select_folder(self): | |
| self.image_folder = filedialog.askdirectory() | |
| if self.image_folder: | |
| self.image_list = [os.path.join(self.image_folder, f) for f in os.listdir(self.image_folder) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp"))] | |
| messagebox.showinfo("Bilgi", f"{len(self.image_list)} resim bulundu.") | |
| def images_to_pdf(self): | |
| if not self.image_list: | |
| messagebox.showerror("Hata", "Lütfen önce bir klasör seçin.") | |
| return | |
| images = [Image.open(img).convert("RGB") for img in self.image_list] | |
| save_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF dosyaları", "*.pdf")]) | |
| if save_path: | |
| images[0].save(save_path, save_all=True, append_images=images[1:]) | |
| messagebox.showinfo("Başarılı", f"PDF kaydedildi: {save_path}") | |
| def select_pdf(self): | |
| self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF dosyaları", "*.pdf")]) | |
| if self.pdf_path: | |
| messagebox.showinfo("Bilgi", f"Seçilen PDF: {self.pdf_path}") | |
| def start_search_thread(self): | |
| if not poppler_path: | |
| messagebox.showerror("Hata", "Poppler bulunamadı. Lütfen sistemine kur ve tekrar çalıştır.") | |
| return | |
| self.keyword = self.search_entry.get() | |
| thread = threading.Thread(target=self.search_pdf) | |
| thread.start() | |
| def search_pdf(self): | |
| if not self.pdf_path or not self.keyword: | |
| messagebox.showerror("Hata", "Lütfen PDF seçin ve arama kelimesi girin.") | |
| return | |
| keyword = self.keyword | |
| self.search_results.clear() | |
| self.result_listbox.delete(0, tk.END) | |
| try: | |
| pages = convert_from_path(self.pdf_path, poppler_path=poppler_path) | |
| except Exception as e: | |
| messagebox.showerror("Hata", f"PDF okunamadı: {str(e)}") | |
| return | |
| self.progress["maximum"] = len(pages) | |
| self.progress["value"] = 0 | |
| for i, page in enumerate(pages): | |
| text = pytesseract.image_to_string(page) | |
| lines = text.split("\n") | |
| found_lines = [] | |
| for line in lines: | |
| if keyword.lower() in line.lower(): | |
| found_lines.append(line.strip()) | |
| if found_lines: | |
| self.search_results.append((i, found_lines)) | |
| self.result_listbox.insert(tk.END, f"Sayfa {i+1}: {len(found_lines)} adet bulundu") | |
| for found_line in found_lines: | |
| self.result_listbox.insert(tk.END, f" → {found_line}") | |
| self.progress["value"] = i + 1 | |
| self.update_idletasks() | |
| if not self.search_results: | |
| messagebox.showinfo("Sonuç Yok", "Kelime bulunamadı.") | |
| else: | |
| messagebox.showinfo("Tamamlandı", f"{len(self.search_results)} sayfada sonuç bulundu.") | |
| def save_excel(self): | |
| if not self.search_results: | |
| messagebox.showerror("Hata", "Önce arama yapın.") | |
| return | |
| file_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel dosyaları", "*.xlsx")]) | |
| if file_path: | |
| wb = Workbook() | |
| ws = wb.active | |
| ws.append(["Sayfa Numarası", "Bulunan Satırlar"]) | |
| for page_num, lines in self.search_results: | |
| ws.append([page_num+1, "\n".join(lines)]) | |
| wb.save(file_path) | |
| messagebox.showinfo("Başarılı", f"Excel raporu kaydedildi: {file_path}") | |
| def preview_page(self, event): | |
| selection = self.result_listbox.curselection() | |
| if not selection or not self.search_results: | |
| return | |
| # Hangi sayfa tıklandığını bul | |
| idx = selection[0] | |
| page_num = None | |
| for pn, lines in self.search_results: | |
| if idx >= 0: | |
| page_num = pn | |
| break | |
| if page_num is None: | |
| return | |
| pages = convert_from_path(self.pdf_path, poppler_path=poppler_path) | |
| image = pages[page_num] | |
| # Vurgulama | |
| data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) | |
| draw = ImageDraw.Draw(image) | |
| for i, word in enumerate(data['text']): | |
| if self.keyword.lower() in word.lower(): | |
| (x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i]) | |
| draw.rectangle([x, y, x + w, y + h], outline="red", width=3) | |
| preview_win = tk.Toplevel(self) | |
| preview_win.title(f"Sayfa Önizleme: {page_num + 1}") | |
| img = ImageTk.PhotoImage(image.resize((800, 1000))) | |
| label = tk.Label(preview_win, image=img) | |
| label.image = img | |
| label.pack() | |
| if __name__ == "__main__": | |
| app = PDFImageAnalyzer() | |
| app.mainloop() | |