Alyosha11
/

Phoneme

Alyosha11 commited on May 10, 2024

Commit

c4d0a5f

verified ·

1 Parent(s): 6455306

Upload extract.py with huggingface_hub

Files changed (1) hide show

extract.py ADDED Viewed

+import os
+import pyarrow.parquet as pq
+def extract_parquet_files(directory):
+    # Create a directory to store the extracted CSV files
+    output_directory = "extracted_csv_files"
+    os.makedirs(output_directory, exist_ok=True)
+    # Iterate over files in the directory
+    for filename in os.listdir(directory):
+        # Check if the file has a .parquet extension
+        if filename.endswith(".parquet"):
+            file_path = os.path.join(directory, filename)
+            # Read the parquet file
+            table = pq.read_table(file_path)
+            # Extract the data from the parquet file
+            data = table.to_pandas()
+            # Generate the output CSV file path
+            csv_filename = os.path.splitext(filename)[0] + ".csv"
+            csv_file_path = os.path.join(output_directory, csv_filename)
+            # Save the extracted data as a CSV file
+            data.to_csv(csv_file_path, index=False)
+            print(f"Extracted data from {filename} saved as {csv_filename}")
+# Directory containing the parquet files
+parquet_directory = "hindi"
+# Call the function to extract parquet files
+extract_parquet_files(parquet_directory)