add OCR 和文档查询上限

This commit is contained in:
zeyu xu
2024-04-08 12:10:32 +08:00
parent 8ce0e5d348
commit 7cee8fd87a
2 changed files with 14 additions and 2 deletions

View File

@@ -46,6 +46,7 @@
<ItemGroup>
<ProjectReference Include="..\AntSK.LLamaFactory\AntSK.LLamaFactory.csproj" />
<ProjectReference Include="..\AntSk.LLM\AntSK.LLM.csproj" />
<ProjectReference Include="..\AntSK.OCR\AntSK.OCR.csproj" />
<ProjectReference Include="..\MiddleWare\AntSK.BackgroundTask\AntSK.BackgroundTask.csproj" />
</ItemGroup>

View File

@@ -8,6 +8,7 @@ using AntSK.Domain.Domain.Other;
using AntSK.Domain.Options;
using AntSK.Domain.Repositories;
using AntSK.Domain.Utils;
using AntSK.OCR;
using DocumentFormat.OpenXml.Drawing.Diagrams;
using LLama;
using LLamaSharp.KernelMemory;
@@ -16,6 +17,7 @@ using Microsoft.AspNetCore.Components;
using Microsoft.Extensions.Configuration;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.Configuration;
using Microsoft.KernelMemory.DataFormats;
using Microsoft.KernelMemory.FileSystem.DevTools;
using Microsoft.KernelMemory.MemoryStorage;
using Microsoft.KernelMemory.MemoryStorage.DevTools;
@@ -110,7 +112,8 @@ namespace AntSK.Domain.Domain.Service
WithTextEmbeddingGenerationByAIType(memoryBuild, embedModel, embeddingHttpClient);
//加载向量库
WithMemoryDbByVectorDB(memoryBuild);
//加载OCR
WithOcr(memoryBuild, kms);
_memory = memoryBuild.Build<MemoryServerless>();
return _memory;
}
@@ -119,6 +122,14 @@ namespace AntSK.Domain.Domain.Service
//}
}
private static void WithOcr(IKernelMemoryBuilder memoryBuild, Kmss kms)
{
if (kms.IsOCR == 1)
{
memoryBuild.WithCustomImageOcr(new AntSKOcrEngine());
}
}
private void WithTextEmbeddingGenerationByAIType(IKernelMemoryBuilder memory, AIModels embedModel,
HttpClient embeddingHttpClient)
{
@@ -262,7 +273,7 @@ namespace AntSK.Domain.Domain.Service
{
foreach (var memoryDb in memoryDbs)
{
var items = await memoryDb.GetListAsync(memoryIndex.Name, new List<MemoryFilter>() { new MemoryFilter().ByDocument(fileId) }, 100, true).ToListAsync();
var items = await memoryDb.GetListAsync(memoryIndex.Name, new List<MemoryFilter>() { new MemoryFilter().ByDocument(fileId) }, 1000, true).ToListAsync();
docTextList.AddRange(items.Select(item => new KMFile()
{
DocumentId = item.GetDocumentId(),