OCR Code cleanup, preparing for adding the word-rectangles to assist other Greenshot functionality (like blur/pixelate etc)

This commit is contained in:
Robin 2016-10-01 22:22:32 +02:00
commit cab0f21e26
19 changed files with 672 additions and 111 deletions

View file

@ -0,0 +1,29 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCRCommand.Modi
{
public enum CompressionLevel {
miCOMP_LEVEL_LOW = 0,
miCOMP_LEVEL_MEDIUM = 1,
miCOMP_LEVEL_HIGH = 2
}
}

View file

@ -0,0 +1,30 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCR
{
public enum FileFormat {
miFILE_FORMAT_DEFAULTVALUE = -1,
miFILE_FORMAT_TIFF = 1,
miFILE_FORMAT_TIFF_LOSSLESS = 2,
miFILE_FORMAT_MDI = 4
}
}

View file

@ -0,0 +1,32 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
using System;
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Base class for the common properties of the Modi interfaces
/// </summary>
public interface ICommon : IDisposable {
IDocument Application { get; }
}
}

View file

@ -0,0 +1,33 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
using System;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.CustomMarshalers;
namespace GreenshotOCRCommand.Modi {
[ComImport, Guid("00020400-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
public interface IDispatch {
void Reserved();
[PreserveSig]
int GetTypeInfo(uint nInfo, int lcid, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(TypeToTypeInfoMarshaler))] out Type typeInfo);
}
}

View file

@ -0,0 +1,82 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
using Greenshot.Interop;
using GreenshotOCR;
namespace GreenshotOCRCommand.Modi {
/// <summary>
/// The MODI Document object represents an ordered collection of document images saved as a single file.
/// You can use the Create method to load an existing MDI or TIF file, or to create an empty document that you can populate with images from other documents.
/// The OCR method performs OCR on all pages in the document, and the OnOCRProgress event reports the status of the operation and allows the user to cancel it.
/// The Dirty property lets you know whether your document has unsaved OCR results or changes.
/// The SaveAs method allows you to specify an image file format and a compression level.
/// You can also use the PrintOut method to print the document to a printer or a file.
/// </summary>
[ComProgId("MODI.Document")]
public interface IDocument : ICommon {
/// <summary>
/// Closes the document.
/// </summary>
/// <param name="saveCall"></param>
void Close(bool saveCall);
/// <summary>
/// The document's collection of pages.
/// </summary>
IImages Images
{
get;
}
/// <summary>
/// Occurs periodically during an optical character recognition (OCR) operation. Returns the estimated percentage of the OCR operation that is complete, and allows the user to cancel the operation.
/// </summary>
// event OnOCRProgress { get; }
/// <summary>
/// Indicates whether the active document has unsaved changes.
/// </summary>
bool Dirty { get; }
/// <summary>
/// Creates a new document.
/// </summary>
/// <param name="file">Optional String. The path and filename of the optional document file that is to be loaded into the new document.</param>
void Create(string file);
/// <summary>
/// Performs optical character recognition (OCR) on the specified document or image.
/// </summary>
/// <param name="language">ModiLanguage</param>
/// <param name="orientimage">Optional Boolean. Specifies whether the OCR engine attempts to determine the orientation of the page. Default is true.</param>
/// <param name="straightenImage">Optional Boolean. Specifies whether the OCR engine attempts to "de-skew" the page to correct for small angles of misalignment from the vertical. Default is true.</param>
void OCR(ModiLanguage language, bool orientimage, bool straightenImage);
/// <summary>
///
/// </summary>
/// <param name="filename"></param>
/// <param name="fileFormat"></param>
/// <param name="compressionLevel"></param>
void SaveAs(string filename, FileFormat fileFormat, CompressionLevel compressionLevel);
}
}

View file

@ -0,0 +1,41 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Describes the page in a scan
/// </summary>
public interface IImage : ICommon {
ILayout Layout {
get;
}
long BitsPerPixel { get; }
CompressionLevel Compression { get; }
//IPictureDisp Picture { get; }
int PixelHeight { get; }
int PixelWidth { get; }
//IPictureDisp Thumbnail { get; }
int XDPI { get; }
int YDPI { get; }
}
}

View file

@ -0,0 +1,41 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
using System.Collections;
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Use the Images accessor property of the Document object to return an Images collection.
/// Use the Item property of the Images collection to return an Image object and gain access to its OCR Layout
/// (including recognized Text and Words), the properties that describe its dimensions and format (BitsPerPixel, Compression, PixelHeight, PixelWidth, XDPI, and YDPI),
/// and its Picture and Thumbnail images.
/// </summary>
public interface IImages : ICommon, IEnumerable {
int Count {
get;
}
IImage this [int index] {
get;
}
new IEnumerator GetEnumerator();
}
}

View file

@ -0,0 +1,55 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Layout of the IImage
/// </summary>
public interface ILayout : ICommon {
/// <summary>
/// Returns the recognized text as a Unicode string.
/// </summary>
string Text {
get;
}
/// <summary>
/// An accessor property that returns the Words collection recognized in the text during an optical character recognition (OCR) operation.
/// </summary>
IWords Words { get; }
/// <summary>
/// Returns the number of characters in the recognized text.
/// </summary>
int NumChars { get; }
/// <summary>
/// Returns the number of words in the recognized text.
/// </summary>
int NumWords { get; }
/// <summary>
/// Returns the language identifier for the recognized text. Read-only Long.
/// </summary>
ModiLanguage Language { get; }
}
}

View file

@ -0,0 +1,48 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Represents a bounding rectangle in the optical character recognition (OCR) layout.
/// </summary>
public interface IMiRect : ICommon {
/// <summary>
/// The Bottom property represent the distance in pixels from the top edge of the containing image.
/// </summary>
int Bottom { get; }
/// <summary>
/// The Left property represent the distance in pixels from the left edge of the containing image.
/// </summary>
int Left { get; }
/// <summary>
/// The Right property represent the distance in pixels from the left edge of the containing image.
/// </summary>
int Right { get; }
/// <summary>
/// The Top property represent the distance in pixels from the top edge of the containing image.
/// </summary>
int Top { get; }
}
}

View file

@ -0,0 +1,38 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
using System.Collections;
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Represents the collection of bounding rectangles in the optical character recognition (OCR) layout. A collection of MiRect objects.
/// </summary>
public interface IMiRects : ICommon, IEnumerable {
int Count {
get;
}
IMiRect this [int index] {
get;
}
new IEnumerator GetEnumerator();
}
}

View file

@ -0,0 +1,65 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// Represents a word recognized in the text during an optical character recognition (OCR) operation.
/// </summary>
public interface IWord : ICommon
{
/// <summary>
/// Returns the index of the specified word in the Words collection of the Layout or IMiSelectableItem object.
/// </summary>
long Id { get; }
/// <summary>
/// Returns the number of the region in the optical character recognition (OCR) layout where the word occurs.
/// </summary>
long RegionId { get; }
/// <summary>
/// Returns the number of the line in the optical character recognition (OCR) layout where the word occurs.
/// </summary>
long LineId { get; }
/// <summary>
/// Returns the recognized text as a Unicode string.
/// </summary>
string Text { get; }
/// <summary>
/// Returns the relative confidence factor reported by the optical character recognition (OCR) engine (on a scale of 0 to 999) after recognizing the specified word.
/// </summary>
short RecognitionConfidence { get; }
/// <summary>
/// Returns the index of the font used by the specified wordthis is the font that was recognized in the text during an optical character recognition (OCR) operation.
/// </summary>
long FontId { get; }
/// <summary>
/// Rectangles
/// </summary>
IMiRects Rects { get; }
}
}

View file

@ -0,0 +1,43 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
using System.Collections;
namespace GreenshotOCRCommand.Modi
{
/// <summary>
/// The Words collection recognized in the text during an optical character recognition (OCR) operation.
/// </summary>
public interface IWords : ICommon, IEnumerable
{
int Count
{
get;
}
IWord this[int index]
{
get;
}
new IEnumerator GetEnumerator();
}
}

View file

@ -0,0 +1,48 @@
/*
* Greenshot - a free and open source screenshot tool
* Copyright (C) 2007-2016 Thomas Braun, Jens Klingen, Robin Krom
*
* For more information see: http://getgreenshot.org/
* The Greenshot project is hosted on GitHub https://github.com/greenshot/greenshot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace GreenshotOCRCommand.Modi
{
public enum ModiLanguage {
CHINESE_SIMPLIFIED = 2052,
CHINESE_TRADITIONAL = 1028,
CZECH = 5,
DANISH = 6,
DUTCH = 19,
ENGLISH = 9,
FINNISH = 11,
FRENCH = 12,
GERMAN = 7,
GREEK = 8,
HUNGARIAN = 14,
ITALIAN = 16,
JAPANESE = 17,
KOREAN = 18,
NORWEGIAN = 20,
POLISH = 21,
PORTUGUESE = 22,
RUSSIAN = 25,
SPANISH = 10,
SWEDISH = 29,
TURKISH = 31,
SYSDEFAULT = 2048
}
}