Worked on the OCR to improve the usage, it would now be possible to add a processor which scans all captures.

This commit is contained in:
Robin Krom 2020-02-20 22:59:09 +01:00
parent a4e4ae86cc
commit b49b01c9be
5 changed files with 215 additions and 74 deletions

View file

@ -317,6 +317,9 @@ namespace Greenshot {
private readonly Timer _doubleClickTimer = new Timer();
public MainForm(CopyDataTransport dataTransport) {
var uiContext = TaskScheduler.FromCurrentSynchronizationContext();
SimpleServiceProvider.Current.AddService(uiContext);
DpiChanged += (e,o) => ApplyDpiScaling();
// The most important form is this

View file

@ -175,6 +175,11 @@ namespace GreenshotPlugin.Core {
}
}
/// <summary>
/// The information which OCR brings
/// </summary>
public OcrInformation OcrInformation { get; set; }
/// <summary>
/// Set if the cursor is visible
/// </summary>
@ -387,7 +392,7 @@ namespace GreenshotPlugin.Core {
/// <summary>
/// The Window Capture code
/// </summary>
public class WindowCapture {
public static class WindowCapture {
private static readonly ILog Log = LogManager.GetLogger(typeof(WindowCapture));
private static readonly CoreConfiguration Configuration = IniConfig.GetIniSection<CoreConfiguration>();
@ -400,9 +405,6 @@ namespace GreenshotPlugin.Core {
[return: MarshalAs(UnmanagedType.Bool)]
private static extern bool DeleteObject(IntPtr hObject);
private WindowCapture() {
}
/// <summary>
/// Get the bounds of all screens combined.
/// </summary>

View file

@ -156,6 +156,11 @@ namespace GreenshotPlugin.Interfaces {
/// <param name="y">y coordinates to move the mouse</param>
void MoveMouseLocation(int x, int y);
/// <summary>
/// Store the OCR information for this capture
/// </summary>
OcrInformation OcrInformation { get; set; }
// / TODO: Enable when the elements are usable again.
///// <summary>
///// Apply a translate to the elements e.g. needed for crop

View file

@ -25,15 +25,103 @@ using System.Threading.Tasks;
namespace GreenshotPlugin.Interfaces
{
/// <summary>
/// This interface describes something that can do OCR of a bitmap
/// </summary>
public interface IOcrProvider
{
/// <summary>
/// Start the actual OCR
/// </summary>
/// <param name="image">Image</param>
/// <returns>OcrInformation</returns>
Task<OcrInformation> DoOcrAsync(Image image);
/// <summary>
/// Start the actual OCR
/// </summary>
/// <param name="surface">ISurface</param>
/// <returns>OcrInformation</returns>
Task<OcrInformation> DoOcrAsync(ISurface surface);
}
/// <summary>
/// Contains the information about a word
/// </summary>
public class Word
{
/// <summary>
/// The actual text for the word
/// </summary>
public string Text { get; set; }
/// <summary>
/// The location of the word
/// </summary>
public Rectangle Location { get; set; }
}
/// <summary>
/// Describes a line of words
/// </summary>
public class Line
{
private Rectangle? _calculatedBounds;
/// <summary>
/// Constructor will preallocate the number of words
/// </summary>
/// <param name="wordCount">int</param>
public Line(int wordCount)
{
Words = new Word[wordCount];
for (int i = 0; i < wordCount; i++)
{
Words[i] = new Word();
}
}
/// <summary>
/// An array with words
/// </summary>
public Word[] Words { get; }
/// <summary>
/// Calculate the bounds of the words
/// </summary>
/// <returns>Rectangle</returns>
private Rectangle CalculateBounds()
{
if (Words.Length == 0)
{
return Rectangle.Empty;
}
var result = Words[0].Location;
for (var index = 0; index < Words.Length; index++)
{
result = Rectangle.Union(result, Words[index].Location);
}
return result;
}
/// <summary>
/// Return the calculated bounds for the whole line
/// </summary>
public Rectangle CalculatedBounds
{
get { return _calculatedBounds ??= CalculateBounds(); }
}
}
/// <summary>
/// Contains all the information on the OCR result
/// </summary>
public class OcrInformation
{
public string Text { get; set; }
public IList<(string word, Rectangle location)> Words { get; } = new List<(string, Rectangle)>();
public IList<Line> Lines { get; } = new List<Line>();
}
}

View file

@ -27,6 +27,7 @@ using Windows.Graphics.Imaging;
using Windows.Media.Ocr;
using GreenshotPlugin.Core;
using System.Text;
using Windows.Storage.Streams;
using GreenshotPlugin.Interfaces;
using GreenshotPlugin.Interfaces.Plugin;
@ -52,6 +53,8 @@ namespace GreenshotWin10Plugin
/// </summary>
public Win10OcrDestination()
{
// Set this as IOcrProvider
SimpleServiceProvider.Current.AddService<IOcrProvider>(this);
var languages = OcrEngine.AvailableRecognizerLanguages;
foreach (var language in languages)
{
@ -63,21 +66,54 @@ namespace GreenshotWin10Plugin
/// Scan the surface bitmap for text, and get the OcrResult
/// </summary>
/// <param name="surface">ISurface</param>
/// <returns>OcrResult</returns>
public async Task<OcrInformation> DoOcrAsync(ISurface surface)
/// <returns>OcrResult sync</returns>
public Task<OcrInformation> DoOcrAsync(ISurface surface)
{
var ocrEngine = OcrEngine.TryCreateFromUserProfileLanguages();
using var imageStream = new MemoryStream();
ImageOutput.SaveToStream(surface, imageStream, new SurfaceOutputSettings());
imageStream.Position = 0;
var randomAccessStream = imageStream.AsRandomAccessStream();
return DoOcrAsync(randomAccessStream);
}
var decoder = await BitmapDecoder.CreateAsync(imageStream.AsRandomAccessStream());
/// <summary>
/// Scan the Image for text, and get the OcrResult
/// </summary>
/// <param name="image">Image</param>
/// <returns>OcrResult sync</returns>
public async Task<OcrInformation> DoOcrAsync(Image image)
{
OcrInformation result;
using (var imageStream = new MemoryStream())
{
ImageOutput.SaveToStream(image, null, imageStream, new SurfaceOutputSettings());
imageStream.Position = 0;
var randomAccessStream = imageStream.AsRandomAccessStream();
result = await DoOcrAsync(randomAccessStream);
}
return result;
}
/// <summary>
/// Scan the surface bitmap for text, and get the OcrResult
/// </summary>
/// <param name="randomAccessStream">IRandomAccessStream</param>
/// <returns>OcrResult sync</returns>
public async Task<OcrInformation> DoOcrAsync(IRandomAccessStream randomAccessStream)
{
var ocrEngine = OcrEngine.TryCreateFromUserProfileLanguages();
if (ocrEngine is null)
{
return null;
}
var decoder = await BitmapDecoder.CreateAsync(randomAccessStream);
var softwareBitmap = await decoder.GetSoftwareBitmapAsync();
var ocrResult = await ocrEngine.RecognizeAsync(softwareBitmap);
var result = new OcrInformation();
// Build the text from the lines, otherwise it's just everything concated together
// Build the text from the lines, otherwise it's just everything concatenated together
var text = new StringBuilder();
foreach (var line in ocrResult.Lines)
{
@ -85,13 +121,20 @@ namespace GreenshotWin10Plugin
}
result.Text = text.ToString();
foreach (var line in ocrResult.Lines)
foreach (var ocrLine in ocrResult.Lines)
{
foreach (var word in line.Words)
{
var rectangle = new Rectangle((int)word.BoundingRect.X, (int)word.BoundingRect.Y, (int)word.BoundingRect.Width, (int)word.BoundingRect.Height);
var line = new Line(ocrLine.Words.Count);
result.Lines.Add(line);
result.Words.Add((word.Text, rectangle));
for (var index = 0; index < ocrLine.Words.Count; index++)
{
var ocrWord = ocrLine.Words[index];
var location = new Rectangle((int) ocrWord.BoundingRect.X, (int) ocrWord.BoundingRect.Y,
(int) ocrWord.BoundingRect.Width, (int) ocrWord.BoundingRect.Height);
var word = line.Words[index];
word.Text = ocrWord.Text;
word.Location = location;
}
}
return result;