From 7cee8fd87a9bb8bee91d7e4db141375ecafd46ee Mon Sep 17 00:00:00 2001 From: zeyu xu <286513187@qq.com> Date: Mon, 8 Apr 2024 12:10:32 +0800 Subject: [PATCH] =?UTF-8?q?add=20OCR=20=E5=92=8C=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E6=9F=A5=E8=AF=A2=E4=B8=8A=E9=99=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/AntSK.Domain/AntSK.Domain.csproj | 1 + src/AntSK.Domain/Domain/Service/KMService.cs | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/AntSK.Domain/AntSK.Domain.csproj b/src/AntSK.Domain/AntSK.Domain.csproj index f624e2a..d67fed3 100644 --- a/src/AntSK.Domain/AntSK.Domain.csproj +++ b/src/AntSK.Domain/AntSK.Domain.csproj @@ -46,6 +46,7 @@ + diff --git a/src/AntSK.Domain/Domain/Service/KMService.cs b/src/AntSK.Domain/Domain/Service/KMService.cs index 7940312..e69c31f 100644 --- a/src/AntSK.Domain/Domain/Service/KMService.cs +++ b/src/AntSK.Domain/Domain/Service/KMService.cs @@ -8,6 +8,7 @@ using AntSK.Domain.Domain.Other; using AntSK.Domain.Options; using AntSK.Domain.Repositories; using AntSK.Domain.Utils; +using AntSK.OCR; using DocumentFormat.OpenXml.Drawing.Diagrams; using LLama; using LLamaSharp.KernelMemory; @@ -16,6 +17,7 @@ using Microsoft.AspNetCore.Components; using Microsoft.Extensions.Configuration; using Microsoft.KernelMemory; using Microsoft.KernelMemory.Configuration; +using Microsoft.KernelMemory.DataFormats; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.MemoryStorage.DevTools; @@ -110,7 +112,8 @@ namespace AntSK.Domain.Domain.Service WithTextEmbeddingGenerationByAIType(memoryBuild, embedModel, embeddingHttpClient); //加载向量库 WithMemoryDbByVectorDB(memoryBuild); - + //加载OCR + WithOcr(memoryBuild, kms); _memory = memoryBuild.Build(); return _memory; } @@ -119,6 +122,14 @@ namespace AntSK.Domain.Domain.Service //} } + private static void WithOcr(IKernelMemoryBuilder memoryBuild, Kmss kms) + { + if (kms.IsOCR == 1) + { + memoryBuild.WithCustomImageOcr(new AntSKOcrEngine()); + } + } + private void WithTextEmbeddingGenerationByAIType(IKernelMemoryBuilder memory, AIModels embedModel, HttpClient embeddingHttpClient) { @@ -262,7 +273,7 @@ namespace AntSK.Domain.Domain.Service { foreach (var memoryDb in memoryDbs) { - var items = await memoryDb.GetListAsync(memoryIndex.Name, new List() { new MemoryFilter().ByDocument(fileId) }, 100, true).ToListAsync(); + var items = await memoryDb.GetListAsync(memoryIndex.Name, new List() { new MemoryFilter().ByDocument(fileId) }, 1000, true).ToListAsync(); docTextList.AddRange(items.Select(item => new KMFile() { DocumentId = item.GetDocumentId(),