User:Sagsaw
Appearance
Tesseract Gimp Plug-in
[edit]I threw together this python-fu gimp plug-in which OCR's the selected portion of an image using tesserect and then opens the resulting text in open office.
#!/usr/bin/env python
import os
import tempfile
import time
import gimpfu
import gimp
import subprocess
def do_ocr(Image, Drawable):
#Create a new image from the selection
gimpfu.pdb.gimp_edit_copy(Drawable)
NewImage = gimpfu.pdb.gimp_edit_paste_as_new()
#Convert image to grayscale and flatten
if gimpfu.pdb.gimp_drawable_is_gray(NewImage.active_layer) == 0:
gimpfu.pdb.gimp_image_convert_grayscale(NewImage)
NewLayer = gimpfu.pdb.gimp_image_flatten(NewImage)
#Save as tiff in a temporary file
TempDir = tempfile.mkdtemp()
print TempDir
try:
gimpfu.pdb.file_tiff_save(NewImage, NewLayer, "%s/image.tif" % TempDir, "%s/image.tif" % TempDir, 0)
try:
print ["tesseract", "%s/image.tif" % TempDir, "%s/image" % TempDir]
gimpfu.pdb.gimp_progress_set_text("Performing optical character regognition.")
retval = subprocess.call(["tesseract", "%s/image.tif" % TempDir, "%s/image" % TempDir])
try:
if retval != 0:
gimpfu.pdb.gimp_message("OCR failed with error code %d" % retval)
else:
subprocess.Popen(["/usr/bin/ooffice", "-write", "%s/image.txt" % TempDir])
#Allow time for open office to open.
time.sleep(5)
finally:
#pass
os.remove("%s/image.txt" % TempDir)
finally:
#pass
os.remove("%s/image.tif" % TempDir)
finally:
#pass
os.rmdir(TempDir)
gimpfu.register(
"tesseract_ocr",
"Extract text from the selection using OCR.",
"Extract text from the selection using OCR.",
"Mark Moss",
"Mark Moss",
"2008-2009",
"<Image>/Tools/OC_R",
"RGB*, GRAY*",
[],
[],
do_ocr)
gimpfu.main()