From e6f2c5c2fe424307634e985e6ca36d4dc7fe98cd Mon Sep 17 00:00:00 2001 From: zyxucp <286513187@qq.com> Date: Thu, 23 May 2024 11:29:23 +0800 Subject: [PATCH] =?UTF-8?q?update=20=E5=8D=87=E7=BA=A7SK=20KM=E7=89=88?= =?UTF-8?q?=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/AntSK.Domain/AntSK.Domain.csproj | 10 +++++----- src/AntSK.Domain/Directory.Build.props | 5 ++--- .../Domain/Other/Bge/BgeEmbeddingConfig.cs | 5 +++-- src/AntSK.Domain/Domain/Other/KMExcelHandler.cs | 3 ++- src/AntSK.Domain/Domain/Other/LLamaConfig.cs | 2 +- src/AntSK.Domain/Domain/Other/QAHandler.cs | 2 +- src/AntSK.Domain/Domain/Service/KMService.cs | 2 +- src/AntSK.Domain/Utils/ConvertUtils.cs | 9 ++++++++- src/AntSK.OCR/Directory.Build.props | 5 ++--- 9 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/AntSK.Domain/AntSK.Domain.csproj b/src/AntSK.Domain/AntSK.Domain.csproj index 70c1cc9..a4b8677 100644 --- a/src/AntSK.Domain/AntSK.Domain.csproj +++ b/src/AntSK.Domain/AntSK.Domain.csproj @@ -5,11 +5,11 @@ enable enable AntSK.Domain.xml - CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102 + CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102,KMEXP00 - + @@ -26,9 +26,9 @@ - - - + + + diff --git a/src/AntSK.Domain/Directory.Build.props b/src/AntSK.Domain/Directory.Build.props index 60bfe86..00d2bf2 100644 --- a/src/AntSK.Domain/Directory.Build.props +++ b/src/AntSK.Domain/Directory.Build.props @@ -1,8 +1,7 @@ - - 0.39.240427.1 - 0.11.2 + 0.61.240519.2 + 0.12.0 diff --git a/src/AntSK.Domain/Domain/Other/Bge/BgeEmbeddingConfig.cs b/src/AntSK.Domain/Domain/Other/Bge/BgeEmbeddingConfig.cs index 9923390..1711418 100644 --- a/src/AntSK.Domain/Domain/Other/Bge/BgeEmbeddingConfig.cs +++ b/src/AntSK.Domain/Domain/Other/Bge/BgeEmbeddingConfig.cs @@ -1,4 +1,5 @@ -using Microsoft.KernelMemory.AI.OpenAI.GPT3; +using Microsoft.KernelMemory.AI.OpenAI; +using Microsoft.KernelMemory.AI.OpenAI.GPT3; using Python.Runtime; using System; using System.Collections.Generic; @@ -85,7 +86,7 @@ namespace AntSK.Domain.Domain.Other.Bge // return len; //} - var tokenCount1 = GPT3Tokenizer.Encode(queryStr).Count; + var tokenCount1 = DefaultGPTTokenizer.StaticCountTokens(queryStr); return tokenCount1; } diff --git a/src/AntSK.Domain/Domain/Other/KMExcelHandler.cs b/src/AntSK.Domain/Domain/Other/KMExcelHandler.cs index 6f22473..dcfe473 100644 --- a/src/AntSK.Domain/Domain/Other/KMExcelHandler.cs +++ b/src/AntSK.Domain/Domain/Other/KMExcelHandler.cs @@ -1,4 +1,5 @@ using AntSK.Domain.Domain.Model.Constant; +using AntSK.Domain.Utils; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI.OpenAI; using Microsoft.KernelMemory.Configuration; @@ -134,7 +135,7 @@ namespace AntSK.Domain.Domain.Other PartitionNumber = partitionNumber, SectionNumber = sectionNumber, Tags = pipeline.Tags, - ContentSHA256 = textData.CalculateSHA256(), + ContentSHA256 = textData.AntSKCalculateSHA256(), }; newFiles.Add(destFile, destFileDetails); destFileDetails.MarkProcessedBy(this); diff --git a/src/AntSK.Domain/Domain/Other/LLamaConfig.cs b/src/AntSK.Domain/Domain/Other/LLamaConfig.cs index 00b65f0..cbfb780 100644 --- a/src/AntSK.Domain/Domain/Other/LLamaConfig.cs +++ b/src/AntSK.Domain/Domain/Other/LLamaConfig.cs @@ -32,7 +32,7 @@ namespace AntSK.Domain.Domain.Other ContextSize = lsConfig?.ContextSize ?? 2048, Seed = lsConfig?.Seed ?? 0, GpuLayerCount = lsConfig?.GpuLayerCount ?? 20, - EmbeddingMode = true + Embeddings = true }; var weights = LLamaWeights.LoadFromFile(parameters); dicLLamaWeights.Add(modelPath, (weights, parameters)); diff --git a/src/AntSK.Domain/Domain/Other/QAHandler.cs b/src/AntSK.Domain/Domain/Other/QAHandler.cs index 7247455..5fb5484 100644 --- a/src/AntSK.Domain/Domain/Other/QAHandler.cs +++ b/src/AntSK.Domain/Domain/Other/QAHandler.cs @@ -151,7 +151,7 @@ namespace AntSK.Domain.Domain.Other PartitionNumber = partitionNumber, SectionNumber = sectionNumber, Tags = pipeline.Tags, - ContentSHA256 = textData.CalculateSHA256(), + ContentSHA256 = textData.AntSKCalculateSHA256(), }; newFiles.Add(destFile, destFileDetails); destFileDetails.MarkProcessedBy(this); diff --git a/src/AntSK.Domain/Domain/Service/KMService.cs b/src/AntSK.Domain/Domain/Service/KMService.cs index 7555e05..25a1b4a 100644 --- a/src/AntSK.Domain/Domain/Service/KMService.cs +++ b/src/AntSK.Domain/Domain/Service/KMService.cs @@ -296,7 +296,7 @@ namespace AntSK.Domain.Domain.Service { DocumentId = item.GetDocumentId(), Text = item.GetPartitionText(), - Url = item.GetWebPageUrl(), + Url = item.GetWebPageUrl(KmsConstantcs.KmsIndex), LastUpdate = item.GetLastUpdate().LocalDateTime.ToString("yyyy-MM-dd HH:mm:ss"), File = item.GetFileName() })); diff --git a/src/AntSK.Domain/Utils/ConvertUtils.cs b/src/AntSK.Domain/Utils/ConvertUtils.cs index 2661828..850052d 100644 --- a/src/AntSK.Domain/Utils/ConvertUtils.cs +++ b/src/AntSK.Domain/Utils/ConvertUtils.cs @@ -1,4 +1,5 @@ -using System.Web; +using System.Security.Cryptography; +using System.Web; namespace AntSK.Domain.Utils { @@ -261,5 +262,11 @@ namespace AntSK.Domain.Utils { return s.Equals(value, StringComparison.OrdinalIgnoreCase); } + + public static string AntSKCalculateSHA256(this BinaryData binaryData) + { + byte[] byteArray = SHA256.HashData(binaryData.ToMemory().Span); + return Convert.ToHexString(byteArray).ToLowerInvariant(); + } } } diff --git a/src/AntSK.OCR/Directory.Build.props b/src/AntSK.OCR/Directory.Build.props index 869bf4e..00d2bf2 100644 --- a/src/AntSK.OCR/Directory.Build.props +++ b/src/AntSK.OCR/Directory.Build.props @@ -1,8 +1,7 @@ - - 0.36.240416.1 - 0.11.2 + 0.61.240519.2 + 0.12.0