Python: PDF Image Extractor

From delarco wiki
Jump to navigation Jump to search
import os
import sys
import logging
import subprocess

from PIL import Image, ImageSequence


def extract_pdf_images(filepath, directory, ghostscript_path):
    
    cwd = r'.'
    cmd = r'{gs_path} -o "{directory}\p-%03d-000.png" -sDEVICE=png16m -dNOSAFER -r300 {filepath}'.format(gs_path=ghostscript_path, directory=directory, filepath=filepath)
    subprocess.check_output(cmd, cwd=cwd, shell=True)
    
#def extract_tif_images(filepath, directory):
#    im = Image.open(filepath)
#    for i, page in enumerate(ImageSequence.Iterator(im)):
#        page.save(r'{directory}/p-{page}-000.png'.format(directory=directory, page=str(i+1).zfill(3)))

ghostscript_path = r'C:\tools\gs\gs9.53.3\bin\gswin64c.exe'
filename = os.path.join('.', sys.argv[1])
output_directory = r'.\output'

extract_pdf_images(filename, output_directory, ghostscript_path)