Spaces:
Sleeping
Sleeping
| import base64 | |
| import mimetypes | |
| import markdown | |
| from weasyprint import HTML | |
| import fitz # PyMuPDF | |
| import os | |
| def encode_image_v2(image_path: str): | |
| """Encodes an image to base64 and determines the correct MIME type.""" | |
| mime_type, _ = mimetypes.guess_type(image_path) | |
| if mime_type is None: | |
| raise ValueError(f"Cannot determine MIME type for {image_path}") | |
| with open(image_path, "rb") as image_file: | |
| encoded_string = base64.b64encode(image_file.read()).decode('utf-8') | |
| return f"data:{mime_type};base64,{encoded_string}" | |
| # Function to encode the image | |
| def encode_image(image_path): | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode("utf-8") | |
| # Function to convert Markdown to PDF | |
| def convert_markdown_to_pdf(md_content, output_pdf_path): | |
| # Convert Markdown to HTML | |
| html_content = markdown.markdown(md_content) | |
| # Save HTML to a PDF file | |
| HTML(string=html_content).write_pdf(output_pdf_path) | |
| print("Markdown has been successfully converted to PDF!") | |
| def pdf_to_images(pdf_path, output_folder, zoom_x=2.0, zoom_y=2.0): | |
| # Open the PDF file | |
| pdf_document = fitz.open(pdf_path) | |
| # Create output folder if it doesn't exist | |
| if not os.path.exists(output_folder): | |
| os.makedirs(output_folder) | |
| image_paths = [] | |
| for page_num in range(len(pdf_document)): | |
| page = pdf_document.load_page(page_num) | |
| matrix = fitz.Matrix(zoom_x, zoom_y) # Adjust the zoom factor to increase quality | |
| pix = page.get_pixmap(matrix=matrix) | |
| image_path = os.path.join(output_folder, f'page_{page_num + 1}.png') | |
| pix.save(image_path) | |
| image_paths.append(image_path) | |
| return image_paths | |