mirror of
https://github.com/AIDotNet/AntSK.git
synced 2026-02-18 06:20:11 +08:00
Compare commits
162 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb0df4d4af | ||
|
|
c810f85cea | ||
|
|
6700b75684 | ||
|
|
ab15dd6e99 | ||
|
|
13a4419705 | ||
|
|
cf03465e23 | ||
|
|
a441730508 | ||
|
|
6e30886bd2 | ||
|
|
bc2af42724 | ||
|
|
919fc5dea7 | ||
|
|
9ab287fabd | ||
|
|
cf02efc2da | ||
|
|
1c34ad5987 | ||
|
|
4e8039703e | ||
|
|
c2b97c7f82 | ||
|
|
1a621f5cbc | ||
|
|
836f898ffe | ||
|
|
0a9a737709 | ||
|
|
0df8c74ec2 | ||
|
|
5e3ff74eaa | ||
|
|
7c49ff0a6c | ||
|
|
63f5267bca | ||
|
|
e0c35aac06 | ||
|
|
27d52d3331 | ||
|
|
e7b2c6e193 | ||
|
|
7600397b79 | ||
|
|
874b8e5d7f | ||
|
|
3ac18086a1 | ||
|
|
16049c7413 | ||
|
|
7bb7a41bb3 | ||
|
|
6c37ed66b2 | ||
|
|
bc86f96159 | ||
|
|
8eb09fb783 | ||
|
|
eff5f69f0f | ||
|
|
e3f966d4f2 | ||
|
|
015f51b99c | ||
|
|
cd66b61014 | ||
|
|
f0bef7d2fa | ||
|
|
de051b047d | ||
|
|
ed6f5dada2 | ||
|
|
d2e3fde829 | ||
|
|
195551e9c1 | ||
|
|
855103c2a4 | ||
|
|
6150d543d3 | ||
|
|
d968d78982 | ||
|
|
0ec5d1f1cf | ||
|
|
31f44c1758 | ||
|
|
e5f63d605d | ||
|
|
7db62e3dc6 | ||
|
|
4408fa4345 | ||
|
|
c5e952b98e | ||
|
|
bedfeaf53d | ||
|
|
d605fd6685 | ||
|
|
e5e3f7cd8f | ||
|
|
657949694c | ||
|
|
10b6035f84 | ||
|
|
3f9fe27456 | ||
|
|
da3a0681e5 | ||
|
|
57b7948d86 | ||
|
|
40b8bd0439 | ||
|
|
6ed9cc9b70 | ||
|
|
e51bf35217 | ||
|
|
28f88438e7 | ||
|
|
85f4a330d5 | ||
|
|
21d7c719f1 | ||
|
|
4ef398bd57 | ||
|
|
dc70270362 | ||
|
|
97b7211cce | ||
|
|
3e762e13af | ||
|
|
e084317a46 | ||
|
|
531b4473e8 | ||
|
|
aefd0d2775 | ||
|
|
960468edf0 | ||
|
|
07ad1f58b5 | ||
|
|
095428be50 | ||
|
|
87fc8911fa | ||
|
|
58272e1ce8 | ||
|
|
700bbcb63f | ||
|
|
dde1d68876 | ||
|
|
71553a6153 | ||
|
|
d4f8de3e21 | ||
|
|
6cf5dea10d | ||
|
|
05379dfee6 | ||
|
|
5a6d49ff64 | ||
|
|
64ab940a26 | ||
|
|
55982ea36d | ||
|
|
21efcf2479 | ||
|
|
0dc7bfcadb | ||
|
|
22d99091e1 | ||
|
|
7558d3ffdc | ||
|
|
85ae41c44c | ||
|
|
91193850dd | ||
|
|
7cc04e3364 | ||
|
|
3da28090c6 | ||
|
|
1595ef2c0a | ||
|
|
83e3d81de7 | ||
|
|
18437ddda4 | ||
|
|
fd503171a1 | ||
|
|
7022139780 | ||
|
|
1e508e45af | ||
|
|
03d9ec2cad | ||
|
|
86fb48bab7 | ||
|
|
a4bc1e4a55 | ||
|
|
8681e15da5 | ||
|
|
ebc82f8b1b | ||
|
|
3bcd7bd7e1 | ||
|
|
b64d8669b1 | ||
|
|
0489044098 | ||
|
|
17e2062b72 | ||
|
|
4e4f5a698d | ||
|
|
b879d04bcd | ||
|
|
95f918f4c7 | ||
|
|
f0e1ad6088 | ||
|
|
61773af48d | ||
|
|
54cd04c3bf | ||
|
|
cd9f4ae11b | ||
|
|
3f9c748b41 | ||
|
|
d483005531 | ||
|
|
1d2db6a896 | ||
|
|
9a7a263055 | ||
|
|
6beb0b52c7 | ||
|
|
0ea167a204 | ||
|
|
6e6afa2a7c | ||
|
|
7a2a5d86bb | ||
|
|
a1a36c3494 | ||
|
|
4f350081dd | ||
|
|
b3ea0c4e1a | ||
|
|
e72a6acd03 | ||
|
|
9bb8ab89fe | ||
|
|
e78da66d1a | ||
|
|
9ee21fd5e5 | ||
|
|
a22c04c9b2 | ||
|
|
3bb5bfaca7 | ||
|
|
c4bf5ee7e5 | ||
|
|
5e1e688f84 | ||
|
|
80d9bf68f3 | ||
|
|
65f2e3e363 | ||
|
|
68d27ff2bc | ||
|
|
034da30811 | ||
|
|
3db0cdcd19 | ||
|
|
42181a6f1d | ||
|
|
ec8cbf2550 | ||
|
|
9a1bd079da | ||
|
|
4213c4379c | ||
|
|
05cda17e2e | ||
|
|
cda6e54f0b | ||
|
|
51d8ba6408 | ||
|
|
b571c7d22d | ||
|
|
a0c91f565e | ||
|
|
280c750165 | ||
|
|
fec9337fda | ||
|
|
b84f252f2f | ||
|
|
5c998ccce2 | ||
|
|
0e3cfd2cfb | ||
|
|
4040831a23 | ||
|
|
a3a2308659 | ||
|
|
6d43c71d13 | ||
|
|
8315b6f37f | ||
|
|
7bc708e6ae | ||
|
|
e6f2c5c2fe | ||
|
|
5cab781362 | ||
|
|
02d7994bae |
@@ -1,8 +1,4 @@
|
||||
# 1. Define the Python image to use for getting pip
|
||||
FROM pytorch/pytorch AS python-base
|
||||
|
||||
# 2. Define the .NET SDK image to build your application
|
||||
FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
|
||||
FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
|
||||
WORKDIR /src
|
||||
COPY ["src/AntSK/AntSK.csproj", "AntSK/"]
|
||||
RUN dotnet restore "AntSK/AntSK.csproj"
|
||||
@@ -11,19 +7,11 @@ WORKDIR "/src/AntSK"
|
||||
RUN dotnet build "AntSK.csproj" -c Release -o /app/build
|
||||
RUN dotnet publish "AntSK.csproj" -c Release -o /app/publish
|
||||
|
||||
# 3. Define the final image that will contain both .NET runtime and Python
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0 AS final
|
||||
|
||||
# Copy the Python/pip installation from the official Python image
|
||||
COPY --from=python-base /usr/local /usr/local
|
||||
COPY --from=python-base /opt/conda/ /opt/conda/
|
||||
FROM registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk-base:v1.0.0 AS final
|
||||
WORKDIR /app
|
||||
COPY --from=build /app/publish .
|
||||
# Make sure the app and Python directories are in PATH
|
||||
ENV PATH="/app:/opt/conda/bin:/usr/local/bin:${PATH}"
|
||||
|
||||
ENV PATH="/app:/opt/conda/bin:/usr/local/bin:${PATH}"
|
||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
||||
RUN echo 'Asia/Shanghai' >/etc/timezone
|
||||
RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
RUN apt update && apt install -y libpugixml-dev libtbb-dev
|
||||
ENTRYPOINT ["dotnet", "AntSK.dll"]
|
||||
|
||||
10
LICENSE
10
LICENSE
@@ -1,9 +1,17 @@
|
||||
Apache License
|
||||
AntSK License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
This project follows the Apache 2.0 agreement, in addition to the following additional terms
|
||||
1.This project can be used for commercial purposes, but it has the right to prohibit you from using it if it violates the following provisions
|
||||
2. Without authorization, you are not allowed to modify AntSK's logo and title information
|
||||
3. Without authorization, you are not allowed to modify the copyright information at the bottom of the page
|
||||
4. If you need authorization, you can contact WeChat: xuzeyu91 or Email:antskpro@qq.com
|
||||
|
||||
|
||||
Apache 2.0 License
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
|
||||
215
README.en.md
215
README.en.md
@@ -1,215 +0,0 @@
|
||||
[简体中文](./README.md) | English
|
||||
# AntSK
|
||||
## AI Knowledge Base/Intelligent Agent built on .Net8+AntBlazor+SemanticKernel
|
||||
|
||||
## ⭐Core Features
|
||||
|
||||
- **Semantic Kernel**: Utilizes advanced natural language processing technology to accurately understand, process, and respond to complex semantic queries, providing users with precise information retrieval and recommendation services.
|
||||
|
||||
- **Kernel Memory**: Capable of continuous learning and storing knowledge points, AntSK has long-term memory function, accumulates experience, and provides a more personalized interaction experience.
|
||||
|
||||
- **Knowledge Base**: Import knowledge base through documents (Word, PDF, Excel, Txt, Markdown, Json, PPT) and perform knowledge base Q&A.
|
||||
|
||||
- **GPT Generation**: This platform supports creating personalized GPT models, enabling users to build their own GPT models.
|
||||
|
||||
- **API Interface Publishing**: Exposes internal functions in the form of APIs, enabling developers to integrate AntSK into other applications and enhance application intelligence.
|
||||
|
||||
- **API Plugin System**: Open API plugin system that allows third-party developers or service providers to easily integrate their services into AntSK, continuously enhancing application functionality.
|
||||
|
||||
- **.Net Plugin System**: Open dll plugin system that allows third-party developers or service providers to easily integrate their business functions by generating dll in standard format code, continuously enhancing application functionality.
|
||||
|
||||
- **Online Search**: AntSK, real-time access to the latest information, ensuring users receive the most timely and relevant data.
|
||||
|
||||
- **Model Management**: Adapts and manages integration of different models from different manufacturers, including gguf types supported by **llama.cpp** and models offline running supported by **llamafactory**.
|
||||
|
||||
- **Domestic Innovation**: AntSK supports domestic models and databases and can run under domestic innovation conditions.
|
||||
|
||||
- **Model Fine-Tuning**: Planned based on llamafactory for model fine-tuning.
|
||||
|
||||
## ⛪Application Scenarios
|
||||
|
||||
AntSK is suitable for various business scenarios, such as:
|
||||
- Enterprise knowledge management system
|
||||
- Automatic customer service and chatbots
|
||||
- Enterprise search engine
|
||||
- Personalized recommendation system
|
||||
- Intelligent writing assistance
|
||||
- Education and online learning platforms
|
||||
- Other interesting AI Apps
|
||||
|
||||
## ✏️Function Examples
|
||||
### Online Demo
|
||||
[document](http://antsk.cn/)
|
||||
|
||||
[demo](https://antsk.ai-dotnet.com/)
|
||||
|
||||
```
|
||||
Default account: test
|
||||
|
||||
Default password: test
|
||||
|
||||
Due to the low configuration of the cloud server, the local model cannot be run, so the system settings permissions have been closed. You can simply view the interface. If you want to use the local model, please download and use it on your own.
|
||||
```
|
||||
|
||||
### Other Function Examples
|
||||
[Video Demonstration](https://www.bilibili.com/video/BV1zH4y1h7Y9/)
|
||||
|
||||
## ❓How to get started?
|
||||
|
||||
Here I am using Postgres as the data and vector storage because Semantic Kernel and Kernel Memory support it, but you can also use other options.
|
||||
|
||||
The model by default supports the local model of openai, azure openai, and llama. If you need to use other models, you can integrate them using one-api.
|
||||
|
||||
The Login configuration in the configuration file is the default login account and password.
|
||||
|
||||
The following configuration file needs to be configured
|
||||
|
||||
## 1️⃣Using docker-compose
|
||||
|
||||
Provided the pg version **appsettings.json** and simplified version (Sqlite+disk) **docker-compose.simple.yml**
|
||||
|
||||
Download **docker-compose.yml** from the project root directory and place the configuration file **appsettings.json** in the same directory.
|
||||
|
||||
The pg image has already been prepared. You can modify the default username and password in docker-compose.yml, and then the database connection in your **appsettings.json** needs to be consistent.
|
||||
|
||||
Then you can execute the following command in the directory to start AntSK
|
||||
```
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
## 2️⃣How to mount local models and model download directory in docker
|
||||
```
|
||||
# Non-host version, do not use local proxy
|
||||
version: '3.8'
|
||||
services:
|
||||
antsk:
|
||||
container_name: antsk
|
||||
image: registry.cn-hangzhou.aliyuncs.com/AIDotNet/antsk:v0.1.5ports:
|
||||
- 5000:5000
|
||||
networks:
|
||||
- antsk
|
||||
depends_on:
|
||||
- antskpg
|
||||
restart: always
|
||||
environment:
|
||||
- ASPNETCORE_URLS=http://*:5000
|
||||
volumes:
|
||||
- ./appsettings.json:/app/appsettings.json # Local configuration file needs to be placed in the same directory
|
||||
- D://model:/app/model
|
||||
networks:
|
||||
antsk:
|
||||
```
|
||||
Taking this as an example, it means mounting the local D://model folder of Windows into the container /app/model. If so, the model address in your appsettings.json should be configured as
|
||||
```
|
||||
model/xxx.gguf
|
||||
```
|
||||
|
||||
## 3️⃣Some meanings of configuration file
|
||||
```
|
||||
{
|
||||
"DBConnection": {
|
||||
"DbType": "Sqlite",
|
||||
"ConnectionStrings": "Data Source=AntSK.db;"
|
||||
},
|
||||
"KernelMemory": {
|
||||
"VectorDb": "Disk",
|
||||
"ConnectionString": "Host=;Port=;Database=antsk;Username=;Password=",
|
||||
"TableNamePrefix": "km-"
|
||||
},
|
||||
"LLamaSharp": {
|
||||
"RunType": "GPU",
|
||||
"FileDirectory": "D:\\Code\\AI\\AntBlazor\\model\\"
|
||||
},
|
||||
"Login": {
|
||||
"User": "admin",
|
||||
"Password": "xuzeyu"
|
||||
},
|
||||
"BackgroundTaskBroker": {
|
||||
"ImportKMSTask": {
|
||||
"WorkerCount": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
```
|
||||
// Supports various databases, you can check SqlSugar, MySql, SqlServer, Sqlite, Oracle, PostgreSQL, Dm, Kdbndp, Oscar, MySqlConnector, Access, OpenGauss, QuestDB, HG, ClickHouse, GBase, Odbc, OceanBaseForOracle, TDengine, GaussDB, OceanBase, Tidb, Vastbase, PolarDB, Custom
|
||||
DBConnection.DbType
|
||||
|
||||
// Connection string, need to use the corresponding string according to the different DB types
|
||||
DBConnection.ConnectionStrings
|
||||
|
||||
//The type of vector storage, supporting Postgres, Disk, Memory, Qdrant, Redis, AzureAISearch
|
||||
//Postgres and Redis require ConnectionString configuration
|
||||
//The ConnectionString of Qdrant and AzureAISearch uses Endpoint | APIKey
|
||||
KernelMemory.VectorDb
|
||||
|
||||
//Local model execution options: GPU and CPU. When using the online API, any option can be used.
|
||||
LLamaSharp.RunType
|
||||
|
||||
//Local model path, used for quick selection of models under llama, as well as saving downloaded models.
|
||||
LLamaSharp.FileDirectory
|
||||
|
||||
//Default admin account password
|
||||
Login
|
||||
|
||||
//Import asynchronous processing thread count. A higher count can be used for online API, but for local models, 1 is recommended to avoid memory overflow issues.
|
||||
BackgroundTaskBroker.ImportKMSTask.WorkerCount
|
||||
|
||||
```
|
||||
|
||||
## ⚠️Fixing Style Issues:
|
||||
Run the following in AntSK/src/AntSK:
|
||||
```
|
||||
dotnet clean
|
||||
dotnet build
|
||||
dotnet publish "AntSK.csproj"
|
||||
```
|
||||
Then navigate to AntSK/src/AntSK/bin/Release/net8.0/publish and run:
|
||||
```
|
||||
dotnet AntSK.dll
|
||||
```
|
||||
The styles should now be applied after starting.
|
||||
|
||||
I'm using CodeFirst mode for the database, so as long as the database connection is properly configured, the table structure will be created automatically.
|
||||
|
||||
## ✔️Using llamafactory
|
||||
```
|
||||
1. First, ensure that Python and pip are installed in your environment. This step is not necessary if using an image, such as version v0.2.3.2, which already includes the complete Python environment.
|
||||
2. Go to the model add page and select llamafactory.
|
||||
3. Click "Initialize" to check whether the 'pip install' environment setup is complete.
|
||||
4. Choose a model that you like.
|
||||
5. Click "Start" to begin downloading the model from the tower. This may involve a somewhat lengthy wait.
|
||||
6. After the model has finished downloading, enter http://localhost:8000/ in the request address. The default port is 8000.
|
||||
7. Click "Save" and start chatting.
|
||||
8. Many people ask about the difference between LLamaSharp and llamafactory. In fact, LLamaSharp is a .NET implementation of llama.cpp, but only supports local gguf models, while llamafactory supports a wider variety of models and uses Python implementation. The main difference lies here. Additionally, llamafactory has the ability to fine-tune models, which is an area we will focus on integrating in the future.
|
||||
```
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
[](https://github.com/AIDotNet/AntSK/pulls)
|
||||
|
||||
If you would like to contribute, feel free to create a [Pull Request](https://github.com/AIDotNet/AntSK/pulls), or give us [Bug Report](https://github.com/AIDotNet/AntSK/issues/new).
|
||||
|
||||
|
||||
## 💕 Contributors
|
||||
|
||||
This project exists thanks to all the people who contribute.
|
||||
|
||||
<a href="https://github.com/AIDotNet/AntSK/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=AIDotNet/AntSK&max=1000&columns=15&anon=1" />
|
||||
</a>
|
||||
|
||||
## 🚨 Use Protocol
|
||||
This warehouse follows the [Apache-2.0 License]( https://github.com/AIDotNet/AntSK?tab=Apache -2.0-1-ov file) open source protocol.
|
||||
The Apache open source license allows the use of AntSK in commercial environments, provided that the license terms are followed. One of the main terms is to retain the copyright and license statements.
|
||||
If you plan to use AntSK in commercial projects, you need to ensure that you follow the following steps:
|
||||
1. Copyright statement containing Apache license. [Apache-2.0 License]( https://github.com/AIDotNet/AntSK?tab=Apache -2.0-1-ov file).
|
||||
2. If you modify the software source code, you need to clearly indicate these modifications in the source code.
|
||||
|
||||
## ☎️Contact Me
|
||||
If you have any questions or suggestions, please contact me through my official WeChat account. We also have a discussion group where you can send a message to join, and then I will add you to the group.
|
||||

|
||||
|
||||
---
|
||||
|
||||
We appreciate your interest in **AntSK** and look forward to collaborating with you to create an intelligent future!
|
||||
254
README.md
254
README.md
@@ -1,99 +1,92 @@
|
||||
中文|[English](https://github.com/AIDotNet/AntSK/blob/main/README.en.md)
|
||||
[简体中文](./README.zh.md) | English
|
||||
# AntSK
|
||||
## 使用.Net8+Blazor+SemanticKernel 打造的AI知识库/智能体
|
||||
## AI Knowledge Base/Intelligent Agent built on .Net8+AntBlazor+SemanticKernel
|
||||
|
||||
## ⭐核心功能
|
||||
## ⭐Core Features
|
||||
|
||||
- **语义内核 (Semantic Kernel)**:采用领先的自然语言处理技术,准确理解、处理和响应复杂的语义查询,为用户提供精确的信息检索和推荐服务。
|
||||
- **Semantic Kernel**: Utilizes advanced natural language processing technology to accurately understand, process, and respond to complex semantic queries, providing users with precise information retrieval and recommendation services.
|
||||
|
||||
- **内存内核 (Kernel Memory)**:具备持续学习和存储知识点的能力,AntSK 拥有长期记忆功能,累积经验,提供更个性化的交互体验。
|
||||
- **Kernel Memory**: Capable of continuous learning and storing knowledge points, AntSK has long-term memory function, accumulates experience, and provides a more personalized interaction experience.
|
||||
|
||||
- **知识库**:通过文档(Word、PDF、Excel、Txt、Markdown、Json、PPT)等形式导入知识库,可以进行知识库问答,支持本地bge-embedding 向量模型 ,以及bge-rerank 重排模型。
|
||||
- **Knowledge Base**: Import knowledge base through documents (Word, PDF, Excel, Txt, Markdown, Json, PPT) and perform knowledge base Q&A.
|
||||
|
||||
- **文生图**:集成**StableDiffusion** 本地模型,可以进行文生图。
|
||||
- **GPT Generation**: This platform supports creating personalized GPT models, enabling users to build their own GPT models.
|
||||
|
||||
- **GPTs 生成**:此平台支持创建个性化的GPT模型,尝试构建您自己的GPT模型。
|
||||
- **API Interface Publishing**: Exposes internal functions in the form of APIs, enabling developers to integrate AntSK into other applications and enhance application intelligence.
|
||||
|
||||
- **API接口发布**:将内部功能以API的形式对外提供,便于开发者将AntSK 集成进其他应用,增强应用智慧。
|
||||
- **API Plugin System**: Open API plugin system that allows third-party developers or service providers to easily integrate their services into AntSK, continuously enhancing application functionality.
|
||||
|
||||
- **API插件系统**:开放式API插件系统,允许第三方开发者或服务商轻松将其服务集成到AntSK,不断增强应用功能。
|
||||
- **.Net Plugin System**: Open dll plugin system that allows third-party developers or service providers to easily integrate their business functions by generating dll in standard format code, continuously enhancing application functionality.
|
||||
|
||||
- **.Net插件系统**:开放式dll插件系统,允许第三方开发者或服务商轻松将其业务功能通过标准格式的代码生成dll后集成到AntSK,不断增强应用功能。
|
||||
- **Online Search**: AntSK, real-time access to the latest information, ensuring users receive the most timely and relevant data.
|
||||
|
||||
- **联网搜索**:AntSK,实时获取最新信息,确保用户接受到的资料总是最及时、最相关的。
|
||||
- **Model Management**: Adapts and manages integration of different models from different manufacturers, including gguf types supported by **llama.cpp** and models offline running supported by **llamafactory** and **ollama**.
|
||||
|
||||
- **模型管理**:适配和管理集成不同厂商的不同模型。并且支持**llama.cpp**所支持的gguf类型,以及**llamafactory**所支持的模型离线运行
|
||||
- **Domestic Innovation**: AntSK supports domestic models and databases and can run under domestic innovation conditions.
|
||||
|
||||
- **国产信创**:AntSK支持国产模型,和国产数据库,可以在信创条件下运行
|
||||
- **Model Fine-Tuning**: Planned based on llamafactory for model fine-tuning.
|
||||
|
||||
- **模型微调**:规划中,基于llamafactory进行模型微调
|
||||
|
||||
## ⛪Application Scenarios
|
||||
|
||||
## ⛪应用场景
|
||||
AntSK is suitable for various business scenarios, such as:
|
||||
- Enterprise knowledge management system
|
||||
- Automatic customer service and chatbots
|
||||
- Enterprise search engine
|
||||
- Personalized recommendation system
|
||||
- Intelligent writing assistance
|
||||
- Education and online learning platforms
|
||||
- Other interesting AI Apps
|
||||
|
||||
AntSK 适用于多种业务场景,例如:
|
||||
- 企业级知识管理系统
|
||||
- 自动客服与聊天机器人
|
||||
- 企业级搜索引擎
|
||||
- 个性化推荐系统
|
||||
- 智能辅助写作
|
||||
- 教育与在线学习平台
|
||||
- 其他有意思的AI App
|
||||
## ✏️Function Examples
|
||||
### Online Demo
|
||||
[document](http://antsk.cn/)
|
||||
|
||||
## ✏️功能示例
|
||||
### 在线演示
|
||||
|
||||
[文档地址](http://antsk.cn/)
|
||||
|
||||
[体验地址](https://antsk.ai-dotnet.com/)
|
||||
[demo](https://demo.antsk.cn/)
|
||||
and
|
||||
[demo1](https://antsk.ai-dotnet.com/)
|
||||
|
||||
```
|
||||
默认账号:test
|
||||
Default account: test
|
||||
|
||||
默认密码:test
|
||||
Default password: test
|
||||
|
||||
由于云服务器配置较低,无法运行本地模型,所以把系统设置权限关闭了,大家看看界面即可,要使用本地模型,请下载自行使用
|
||||
|
||||
请勿在演示站点上传敏感信息
|
||||
Due to the low configuration of the cloud server, the local model cannot be run, so the system settings permissions have been closed. You can simply view the interface. If you want to use the local model, please download and use it on your own.
|
||||
```
|
||||
|
||||
### 其他功能示例
|
||||
[视频示例](https://www.bilibili.com/video/BV1zH4y1h7Y9/)
|
||||
### Other Function Examples
|
||||
[Video Demonstration](https://www.bilibili.com/video/BV1zH4y1h7Y9/)
|
||||
|
||||
[在线文档:http://antsk.cn](http://antsk.cn)
|
||||
## ❓How to get started?
|
||||
|
||||
## ❓如何开始?
|
||||
Here I am using Postgres as the data and vector storage because Semantic Kernel and Kernel Memory support it, but you can also use other options.
|
||||
|
||||
在这里我使用的是Postgres 作为数据存储和向量存储,因为Semantic Kernel和Kernel Memory都支持他,当然你也可以换成其他的。
|
||||
The model by default supports the local model of openai, azure openai, and llama. If you need to use other models, you can integrate them using one-api.
|
||||
|
||||
模型默认支持openai、azure openai、讯飞星火、阿里云积、 和llama支持的gguf本地模型 以及llamafactory的本地模型,如果需要使用其他模型,可以使用one-api进行集成。
|
||||
The Login configuration in the configuration file is the default login account and password.
|
||||
|
||||
配置文件中的Login配置是默认的登录账号和密码
|
||||
The following configuration file needs to be configured
|
||||
|
||||
需要配置如下的配置文件
|
||||
## 1️⃣Using docker-compose
|
||||
|
||||
## 1️⃣使用docker-compose
|
||||
Provided the pg version **appsettings.json** and simplified version (Sqlite+disk) **docker-compose.simple.yml**
|
||||
|
||||
提供了pg版本 **appsettings.json** 和 简化版本(**Sqlite+disk**) **docker-compose.simple.yml**
|
||||
Download **docker-compose.yml** from the project root directory and place the configuration file **appsettings.json** in the same directory.
|
||||
|
||||
从项目根目录下载**docker-compose.yml**,然后把配置文件**appsettings.json**和它放在统一目录,
|
||||
The pg image has already been prepared. You can modify the default username and password in docker-compose.yml, and then the database connection in your **appsettings.json** needs to be consistent.
|
||||
|
||||
这里已经把pg的镜像做好了。在docker-compose.yml中可以修改默认账号密码,然后你的**appsettings.json**的数据库连接需要保持一致。
|
||||
|
||||
然后你可以进入到目录后执行
|
||||
Then you can execute the following command in the directory to start AntSK
|
||||
```
|
||||
docker-compose up -d
|
||||
```
|
||||
来启动AntSK
|
||||
|
||||
## 2️⃣如何在docker中挂载本地模型,和模型下载的目录
|
||||
## 2️⃣How to mount local models and model download directory in docker
|
||||
```
|
||||
# 非 host 版本, 不使用本机代理
|
||||
# Non-host version, do not use local proxy
|
||||
version: '3.8'
|
||||
services:
|
||||
antsk:
|
||||
container_name: antsk
|
||||
image: registry.cn-hangzhou.aliyuncs.com/AIDotNet/antsk:v0.3.1
|
||||
image: registry.cn-hangzhou.aliyuncs.com/AIDotNet/antsk:v0.5.0
|
||||
ports:
|
||||
- 5000:5000
|
||||
networks:
|
||||
@@ -104,32 +97,36 @@ services:
|
||||
environment:
|
||||
- ASPNETCORE_URLS=http://*:5000
|
||||
volumes:
|
||||
- ./appsettings.json:/app/appsettings.json # 本地配置文件 需要放在同级目录
|
||||
- ./appsettings.json:/app/appsettings.json # Local configuration file needs to be placed in the same directory
|
||||
- D://model:/app/model
|
||||
- D://model:/root/.cache/modelscope/hub/AI-ModelScope #使用Llamafactory时需要挂载 否则初始化的环境重启后会丢失
|
||||
networks:
|
||||
antsk:
|
||||
```
|
||||
以这个为示例,意思是把windows本地D://model的文件夹挂载进 容器内/app/model 如果是这样你的appsettings.json中的模型地址应该配置为
|
||||
```
|
||||
model/xxx.gguf
|
||||
```
|
||||
Taking this as an example, it means mounting the local D://model folder of Windows into the container /app/model. If so, the model address in your appsettings.json should be configured as
|
||||
|
||||
## 3️⃣配置文件的一些含义
|
||||
[LiteDockerCompose](https://github.com/AIDotNet/AntSK/blob/main/docker-compose.simple.yml)
|
||||
|
||||
The compact version is deployed with sqlite-disk by one click
|
||||
|
||||
[FullDockerCompose](https://github.com/AIDotNet/AntSK/blob/main/docker-compose.yml)
|
||||
|
||||
The full version uses pg+aspire
|
||||
|
||||
|
||||
## 3️⃣Some meanings of configuration file
|
||||
```
|
||||
{
|
||||
"DBConnection": {
|
||||
"DbType": "Sqlite",
|
||||
"DbType": "Sqlite",
|
||||
"ConnectionStrings": "Data Source=AntSK.db;"
|
||||
},
|
||||
"KernelMemory": {
|
||||
"VectorDb": "Disk",
|
||||
"VectorDb": "Disk",
|
||||
"ConnectionString": "Host=;Port=;Database=antsk;Username=;Password=",
|
||||
"TableNamePrefix": "km-"
|
||||
},
|
||||
"LLamaSharp": {
|
||||
"RunType": "GPU",
|
||||
"FileDirectory": "D:\\Code\\AI\\AntBlazor\\model\\"
|
||||
"FileDir": {
|
||||
"DirectoryPath": "D:\\git\\AntBlazor\\model"
|
||||
},
|
||||
"Login": {
|
||||
"User": "admin",
|
||||
@@ -143,92 +140,95 @@ model/xxx.gguf
|
||||
}
|
||||
```
|
||||
```
|
||||
//支持多种数据库,具体可以查看SqlSugar,MySql,SqlServer,Sqlite,Oracle,PostgreSQL,Dm,Kdbndp,Oscar,MySqlConnector,Access,OpenGauss,QuestDB,HG,ClickHouse,GBase,Odbc,OceanBaseForOracle,TDengine,GaussDB,OceanBase,Tidb,Vastbase,PolarDB,Custom
|
||||
// Supports various databases, you can check SqlSugar, MySql, SqlServer, Sqlite, Oracle, PostgreSQL, Dm, Kdbndp, Oscar, MySqlConnector, Access, OpenGauss, QuestDB, HG, ClickHouse, GBase, Odbc, OceanBaseForOracle, TDengine, GaussDB, OceanBase, Tidb, Vastbase, PolarDB, Custom
|
||||
DBConnection.DbType
|
||||
//连接字符串,需要根据不同DB类型,用对应的字符串
|
||||
|
||||
// Connection string, need to use the corresponding string according to the different DB types
|
||||
DBConnection.ConnectionStrings
|
||||
|
||||
//向量存储的类型,支持 Postgres、Disk、Memory、Qdrant、Redis、AzureAISearch
|
||||
//Postgres、Redis需要配置 ConnectionString
|
||||
//Qdrant 和AzureAISearch 的 ConnectionString 使用 Endpoint|APIKey
|
||||
//The type of vector storage, supporting Postgres, Disk, Memory, Qdrant, Redis, AzureAISearch
|
||||
//Postgres and Redis require ConnectionString configuration
|
||||
//The ConnectionString of Qdrant and AzureAISearch uses Endpoint | APIKey
|
||||
KernelMemory.VectorDb
|
||||
|
||||
//本地模型使用的运行方式 GUP CPU ,如果用在线API 这个随意使用一个即可
|
||||
LLamaSharp.RunType
|
||||
//Local model path, used for quick selection of models under llama, as well as saving downloaded models.
|
||||
FileDir.DirectoryPath
|
||||
|
||||
//本地模型路径,用于在选择llama时可以快速选择目录下的模型,以及保存下载的模型
|
||||
LLamaSharp.FileDirectory
|
||||
|
||||
//默认管理员账号密码
|
||||
//Default admin account password
|
||||
Login
|
||||
//导入异步处理的线程数,使用在线API可以高一点,本地模型建议1 否则容易内存溢出崩掉
|
||||
|
||||
//Import asynchronous processing thread count. A higher count can be used for online API, but for local models, 1 is recommended to avoid memory overflow issues.
|
||||
BackgroundTaskBroker.ImportKMSTask.WorkerCount
|
||||
|
||||
```
|
||||
|
||||
## ⚠️找不到样式问题解决:
|
||||
AntSK/src/AntSK下执行:
|
||||
## ⚠️Fixing Style Issues:
|
||||
Run the following in AntSK/src/AntSK:
|
||||
```
|
||||
dotnet clean
|
||||
dotnet build
|
||||
dotnet publish "AntSK.csproj"
|
||||
```
|
||||
再去AntSK/src/AntSK/bin/Release/net8.0/publish下
|
||||
Then navigate to AntSK/src/AntSK/bin/Release/net8.0/publish and run:
|
||||
```
|
||||
dotnet AntSK.dll
|
||||
```
|
||||
然后启动就有样式了
|
||||
The styles should now be applied after starting.
|
||||
|
||||
DB我使用的是CodeFirst模式,只要配置好数据库链接,表结构是自动创建的
|
||||
I'm using CodeFirst mode for the database, so as long as the database connection is properly configured, the table structure will be created automatically.
|
||||
|
||||
## ✔️使用llamafactory
|
||||
## ✔️Using llamafactory
|
||||
```
|
||||
1、首先需要确保你的环境已经安装了python和pip,如果使用镜像,例如p0.2.4版本已经包含了 python全套环境则无需此步骤
|
||||
2、进入模型添加页面选择llamafactory
|
||||
3、点击初始化,可以检查pip install 环境是否完成
|
||||
4、选择一个喜欢的模型
|
||||
5、点击启动,这会开始从魔塔下载模型,你可能需要有一个较为漫长的等待
|
||||
6、等待模型下载完毕后,在请求地址输入 http://localhost:8000/ 这里默认是使用8000端口
|
||||
7、点击保存,然后就可以开始聊天了
|
||||
8、很多人会问 LLamaSharp与llamafactory有什么区别?其实这两者LLamaSharp是llama.cpp的 dotnet实现,但是只支持本地gguf模型, 而llamafactory 支持的模型种类更多,但使用的是python的实现,其主要差异在这里,另外llamafactory具有模型微调的能力,这也是我们下一步需要重点集成的部分。
|
||||
1. First, ensure that Python and pip are installed in your environment. This step is not necessary if using an image, such as version v0.2.3.2, which already includes the complete Python environment.
|
||||
2. Go to the model add page and select llamafactory.
|
||||
3. Click "Initialize" to check whether the 'pip install' environment setup is complete.
|
||||
4. Choose a model that you like.
|
||||
5. Click "Start" to begin downloading the model from the tower. This may involve a somewhat lengthy wait.
|
||||
6. After the model has finished downloading, enter http://localhost:8000/ in the request address. The default port is 8000.
|
||||
7. Click "Save" and start chatting.
|
||||
8. Many people ask about the difference between LLamaSharp and llamafactory. In fact, LLamaSharp is a .NET implementation of llama.cpp, but only supports local gguf models, while llamafactory supports a wider variety of models and uses Python implementation. The main difference lies here. Additionally, llamafactory has the ability to fine-tune models, which is an area we will focus on integrating in the future.
|
||||
```
|
||||
|
||||
## 🤝 贡献
|
||||
## 💕 Contributors
|
||||
|
||||
[](https://github.com/AIDotNet/AntSK/pulls)
|
||||
|
||||
如果你想贡献,可以创建一个[拉取请求](https://github.com/AIDotNet/AntSK/pulls), 或给我们[错误报告](https://github.com/AIDotNet/AntSK/issues/new).
|
||||
|
||||
|
||||
## 💕 贡献者
|
||||
This project exists thanks to all the people who contribute.
|
||||
|
||||
这个项目的存在要感谢所有的贡献者。
|
||||
|
||||
<a href="https://github.com/AIDotNet/AntSK/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=AIDotNet/AntSK&max=1000&columns=15&anon=1" />
|
||||
</a>
|
||||
|
||||
## 🚨 使用协议
|
||||
|
||||
本仓库遵循 [Apache-2.0 License](https://github.com/AIDotNet/AntSK?tab=Apache-2.0-1-ov-file) 开源协议。
|
||||
Apache开源许可证允许在商业环境中使用AntSK,前提是需要遵守许可证的条款。主要条款之一是要保留版权声明和许可证声明。
|
||||
|
||||
如果您打算在商业项目中使用AntSK,您需要确保遵守以下步骤:
|
||||
|
||||
1、包含Apache许可证的版权声明。 [Apache-2.0 License](https://github.com/AIDotNet/AntSK?tab=Apache-2.0-1-ov-file) 。
|
||||
|
||||
2、如果您修改了软件源代码,您需要在源代码中明确标明这些修改。
|
||||
|
||||
|
||||
## ☎️联系我
|
||||
如有任何问题或建议,请通过以下方式关注我的公众号《许泽宇的技术分享》,发消息与我联系,我们也有AIDotnet交流群,可以发送进群等消息,然后我会拉你进交流群
|
||||

|
||||
|
||||
## 🌟 Star History
|
||||
<a href="https://github.com/AIDotNet/AntSK/stargazers" target="_blank" style="display: block" align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=AIDotNet/AntSK&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=AIDotNet/AntSK&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=AIDotNet/AntSK&type=Date" />
|
||||
</picture>
|
||||
<img src="https://contrib.rocks/image?repo=AIDotNet/AntSK&max=1000&columns=15&anon=1" />
|
||||
</a>
|
||||
|
||||
## 🚨 Use Protocol
|
||||
|
||||
This warehouse follows the [AntSK License](https://github.com/AIDotNet/AntSK?tab=Apache-2.0-1-ov-file) open source protocol.
|
||||
|
||||
This project follows the Apache 2.0 agreement, in addition to the following additional terms
|
||||
|
||||
1. This project can be used for commercial purposes, but it has the right to prohibit you from using it if it violates the following provisions
|
||||
|
||||
2. Without authorization, you are not allowed to modify AntSK's logo and title information
|
||||
|
||||
4. Without authorization, you are not allowed to modify the copyright information at the bottom of the page
|
||||
|
||||
6. If you need authorization, you can contact WeChat: **xuzeyu91**
|
||||
|
||||
If you plan to use AntSK in commercial projects, you need to ensure that you follow the following steps:
|
||||
|
||||
1. Copyright statement containing AntSK license. [AntSK License](https://github.com/AIDotNet/AntSK?tab=Apache-2.0-1-ov-file).
|
||||
|
||||
2. If you modify the software source code, you need to clearly indicate these modifications in the source code.
|
||||
|
||||
3. Meet the above requirements
|
||||
|
||||
## 💕 Special thanks
|
||||
Helping enterprise AI application development, we recommend [AntBlazor](https://antblazor.com)
|
||||
|
||||
## ☎️Contact Me
|
||||
If you have any questions or suggestions, please contact me through my official WeChat account. We also have a discussion group where you can send a message to join, and then I will add you to the group.
|
||||
|
||||
Additionally, you can also contact me via email: antskpro@qq.com
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
We appreciate your interest in **AntSK** and look forward to collaborating with you to create an intelligent future!
|
||||
|
||||
247
README.zh.md
Normal file
247
README.zh.md
Normal file
@@ -0,0 +1,247 @@
|
||||
中文|[English](./README.md)
|
||||
# AntSK
|
||||
## 使用.Net8+Blazor+SemanticKernel 打造的AI知识库/智能体
|
||||
|
||||
## ⭐核心功能
|
||||
|
||||
- **语义内核 (Semantic Kernel)**:采用领先的自然语言处理技术,准确理解、处理和响应复杂的语义查询,为用户提供精确的信息检索和推荐服务。
|
||||
|
||||
- **内存内核 (Kernel Memory)**:具备持续学习和存储知识点的能力,AntSK 拥有长期记忆功能,累积经验,提供更个性化的交互体验。
|
||||
|
||||
- **知识库**:通过文档(Word、PDF、Excel、Txt、Markdown、Json、PPT)等形式导入知识库,可以进行知识库问答,支持本地bge-embedding 向量模型 ,以及bge-rerank 重排模型。
|
||||
|
||||
- **文生图**:集成**StableDiffusion** 本地模型,可以进行文生图。
|
||||
|
||||
- **GPTs 生成**:此平台支持创建个性化的GPT模型,尝试构建您自己的GPT模型。
|
||||
|
||||
- **API接口发布**:将内部功能以API的形式对外提供,便于开发者将AntSK 集成进其他应用,增强应用智慧。
|
||||
|
||||
- **API插件系统**:开放式API插件系统,允许第三方开发者或服务商轻松将其服务集成到AntSK,不断增强应用功能。
|
||||
|
||||
- **.Net插件系统**:开放式dll插件系统,允许第三方开发者或服务商轻松将其业务功能通过标准格式的代码生成dll后集成到AntSK,不断增强应用功能。
|
||||
|
||||
- **联网搜索**:AntSK,实时获取最新信息,确保用户接受到的资料总是最及时、最相关的。
|
||||
|
||||
- **模型管理**:适配和管理集成不同厂商的不同模型。并且支持**llama.cpp**所支持的gguf类型,以及**llamafactory** 和 **ollama** 所支持的模型离线运行
|
||||
|
||||
- **国产信创**:AntSK支持国产模型,和国产数据库,可以在信创条件下运行
|
||||
|
||||
- **模型微调**:规划中,基于llamafactory进行模型微调
|
||||
|
||||
|
||||
## ⛪应用场景
|
||||
|
||||
AntSK 适用于多种业务场景,例如:
|
||||
- 企业级知识管理系统
|
||||
- 自动客服与聊天机器人
|
||||
- 企业级搜索引擎
|
||||
- 个性化推荐系统
|
||||
- 智能辅助写作
|
||||
- 教育与在线学习平台
|
||||
- 其他有意思的AI App
|
||||
|
||||
## ✏️功能示例
|
||||
### 在线演示
|
||||
|
||||
[体验地址1](https://demo.antsk.cn/)
|
||||
|
||||
和
|
||||
|
||||
[体验地址2](https://antsk.ai-dotnet.com/)
|
||||
```
|
||||
默认账号:test
|
||||
|
||||
默认密码:test
|
||||
|
||||
由于云服务器配置较低,无法运行本地模型,所以把系统设置权限关闭了,大家看看界面即可,要使用本地模型,请下载自行使用
|
||||
|
||||
请勿在演示站点上传敏感信息
|
||||
```
|
||||
|
||||
### 其他功能示例
|
||||
[视频示例](https://www.bilibili.com/video/BV1zH4y1h7Y9/)
|
||||
|
||||
[在线文档:http://antsk.cn](http://antsk.cn)
|
||||
|
||||
## ❓如何开始?
|
||||
|
||||
在这里我使用的是Postgres 作为数据存储和向量存储,因为Semantic Kernel和Kernel Memory都支持他,当然你也可以换成其他的。
|
||||
|
||||
模型默认支持openai、azure openai、讯飞星火、阿里云积、 和llama支持的gguf本地模型 以及llamafactory的本地模型,如果需要使用其他模型,可以使用one-api进行集成。
|
||||
|
||||
配置文件中的Login配置是默认的登录账号和密码
|
||||
|
||||
需要配置如下的配置文件
|
||||
|
||||
## 1️⃣使用docker-compose
|
||||
|
||||
提供了pg版本 **appsettings.json** 和 简化版本(**Sqlite+disk**) **docker-compose.simple.yml**
|
||||
|
||||
从项目根目录下载**docker-compose.yml**,然后把配置文件**appsettings.json**和它放在统一目录,
|
||||
|
||||
这里已经把pg的镜像做好了。在docker-compose.yml中可以修改默认账号密码,然后你的**appsettings.json**的数据库连接需要保持一致。
|
||||
|
||||
然后你可以进入到目录后执行
|
||||
```
|
||||
docker-compose up -d
|
||||
```
|
||||
来启动AntSK
|
||||
|
||||
## 2️⃣如何在docker中挂载本地模型,和模型下载的目录
|
||||
```
|
||||
# 非 host 版本, 不使用本机代理
|
||||
version: '3.8'
|
||||
services:
|
||||
antsk:
|
||||
container_name: antsk
|
||||
image: registry.cn-hangzhou.aliyuncs.com/AIDotNet/antsk:v0.3.1
|
||||
ports:
|
||||
- 5000:5000
|
||||
networks:
|
||||
- antsk
|
||||
depends_on:
|
||||
- antskpg
|
||||
restart: always
|
||||
environment:
|
||||
- ASPNETCORE_URLS=http://*:5000
|
||||
volumes:
|
||||
- ./appsettings.json:/app/appsettings.json # 本地配置文件 需要放在同级目录
|
||||
- D://model:/app/model
|
||||
- D://model:/root/.cache/modelscope/hub/AI-ModelScope #使用Llamafactory时需要挂载 否则初始化的环境重启后会丢失
|
||||
networks:
|
||||
antsk:
|
||||
```
|
||||
以这个为示例,意思是把windows本地D://model的文件夹挂载进 容器内/app/model 如果是这样你的appsettings.json中的模型地址应该配置为
|
||||
|
||||
[LiteDockerCompose](https://github.com/AIDotNet/AntSK/blob/main/docker-compose.simple.yml)
|
||||
|
||||
精简版使用sqlite+disk向量模式,简化部署配置
|
||||
|
||||
[FullDockerCompose](https://github.com/AIDotNet/AntSK/blob/main/docker-compose.yml)
|
||||
|
||||
完整版使用pg+aspire 功能更完整,配置文件需要参考如下配置含义进行配置
|
||||
|
||||
|
||||
## 3️⃣配置文件的一些含义
|
||||
```
|
||||
{
|
||||
"DBConnection": {
|
||||
"DbType": "Sqlite",
|
||||
"ConnectionStrings": "Data Source=AntSK.db;"
|
||||
},
|
||||
"KernelMemory": {
|
||||
"VectorDb": "Disk",
|
||||
"ConnectionString": "Host=;Port=;Database=antsk;Username=;Password=",
|
||||
"TableNamePrefix": "km-"
|
||||
},
|
||||
"FileDir": {
|
||||
"DirectoryPath": "D:\\git\\AntBlazor\\model"
|
||||
},
|
||||
"Login": {
|
||||
"User": "admin",
|
||||
"Password": "xuzeyu"
|
||||
},
|
||||
"BackgroundTaskBroker": {
|
||||
"ImportKMSTask": {
|
||||
"WorkerCount": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
```
|
||||
//支持多种数据库,具体可以查看SqlSugar,MySql,SqlServer,Sqlite,Oracle,PostgreSQL,Dm,Kdbndp,Oscar,MySqlConnector,Access,OpenGauss,QuestDB,HG,ClickHouse,GBase,Odbc,OceanBaseForOracle,TDengine,GaussDB,OceanBase,Tidb,Vastbase,PolarDB,Custom
|
||||
DBConnection.DbType
|
||||
//连接字符串,需要根据不同DB类型,用对应的字符串
|
||||
DBConnection.ConnectionStrings
|
||||
|
||||
//向量存储的类型,支持 Postgres、Disk、Memory、Qdrant、Redis、AzureAISearch
|
||||
//Postgres、Redis需要配置 ConnectionString
|
||||
//Qdrant 和AzureAISearch 的 ConnectionString 使用 Endpoint|APIKey
|
||||
KernelMemory.VectorDb
|
||||
|
||||
//本地模型路径,用于在选择llama时可以快速选择目录下的模型,以及保存下载的模型
|
||||
FileDir.DirectoryPath
|
||||
|
||||
//默认管理员账号密码
|
||||
Login
|
||||
//导入异步处理的线程数,使用在线API可以高一点,本地模型建议1 否则容易内存溢出崩掉
|
||||
BackgroundTaskBroker.ImportKMSTask.WorkerCount
|
||||
```
|
||||
|
||||
## ⚠️找不到样式问题解决:
|
||||
AntSK/src/AntSK下执行:
|
||||
```
|
||||
dotnet clean
|
||||
dotnet build
|
||||
dotnet publish "AntSK.csproj"
|
||||
```
|
||||
再去AntSK/src/AntSK/bin/Release/net8.0/publish下
|
||||
```
|
||||
dotnet AntSK.dll
|
||||
```
|
||||
然后启动就有样式了
|
||||
|
||||
DB我使用的是CodeFirst模式,只要配置好数据库链接,表结构是自动创建的
|
||||
|
||||
## ✔️使用llamafactory
|
||||
```
|
||||
1、首先需要确保你的环境已经安装了python和pip,如果使用镜像,例如p0.2.4版本已经包含了 python全套环境则无需此步骤
|
||||
2、进入模型添加页面选择llamafactory
|
||||
3、点击初始化,可以检查pip install 环境是否完成
|
||||
4、选择一个喜欢的模型
|
||||
5、点击启动,这会开始从魔塔下载模型,你可能需要有一个较为漫长的等待
|
||||
6、等待模型下载完毕后,在请求地址输入 http://localhost:8000/ 这里默认是使用8000端口
|
||||
7、点击保存,然后就可以开始聊天了
|
||||
8、很多人会问 LLamaSharp与llamafactory有什么区别?其实这两者LLamaSharp是llama.cpp的 dotnet实现,但是只支持本地gguf模型, 而llamafactory 支持的模型种类更多,但使用的是python的实现,其主要差异在这里,另外llamafactory具有模型微调的能力,这也是我们下一步需要重点集成的部分。
|
||||
```
|
||||
|
||||
## 💕 贡献者
|
||||
|
||||
这个项目的存在要感谢所有的贡献者。
|
||||
|
||||
<a href="https://github.com/AIDotNet/AntSK/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=AIDotNet/AntSK&max=1000&columns=15&anon=1" />
|
||||
</a>
|
||||
|
||||
## 🚨 使用协议
|
||||
|
||||
本仓库遵循 [AntSK License](https://github.com/AIDotNet/AntSK?tab=Apache-2.0-1-ov-file) 开源协议。
|
||||
|
||||
除以下附加条款外,该项目遵循Apache 2.0协议
|
||||
|
||||
1. 本项目可以用于商业目的,但如果违反以下规定,它有权禁止您使用
|
||||
|
||||
2. 未经授权,您不允许修改AntSK的徽标和标题信息
|
||||
|
||||
3. 未经授权,您不能修改页面底部的版权信息
|
||||
|
||||
4. 如果您需要授权,可以联系微信:xuzeyu91
|
||||
|
||||
如果您打算在商业项目中使用AntSK,您需要确保遵守以下步骤:
|
||||
|
||||
1. 包含AntSK许可证的版权声明。 [AntSK License](https://github.com/AIDotNet/AntSK?tab=Apache-2.0-1-ov-file) 。
|
||||
|
||||
2. 如果您修改了软件源代码,您需要在源代码中明确标明这些修改。
|
||||
|
||||
3. 满足以上要求
|
||||
|
||||
## 💕 特别感谢
|
||||
助力企业级AI应用开发,推荐使用 [AntBlazor](https://antblazor.com)
|
||||
|
||||
|
||||
## ☎️联系我
|
||||
如有任何问题或建议,请通过以下方式关注我的公众号《许泽宇的技术分享》,发消息与我联系,我们也有AIDotnet交流群,可以发送进群等消息,然后我会拉你进交流群
|
||||
|
||||
另外您也可以通过邮箱与我联系:antskpro@qq.com
|
||||
|
||||

|
||||
|
||||
## 🌟 Star History
|
||||
<a href="https://github.com/AIDotNet/AntSK/stargazers" target="_blank" style="display: block" align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=AIDotNet/AntSK&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=AIDotNet/AntSK&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=AIDotNet/AntSK&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
@@ -3,9 +3,9 @@ version: '3.8'
|
||||
services:
|
||||
antsk:
|
||||
container_name: antsk
|
||||
image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:v0.3.1
|
||||
image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:v0.5.2
|
||||
# 如果需要pytorch环境需要使用下面这个镜像,镜像比较大
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:p0.3.1
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:p0.5.2
|
||||
ports:
|
||||
- 5000:5000
|
||||
networks:
|
||||
@@ -15,5 +15,7 @@ services:
|
||||
- ASPNETCORE_URLS=http://*:5000
|
||||
volumes:
|
||||
- ./appsettings.json:/app/appsettings.json # 本地配置文件 需要放在同级目录
|
||||
- /AntSK/model:/app/model
|
||||
- /AntSK/model:/root/.cache/modelscope/hub/AI-ModelScope # LLamaFactory模型文件
|
||||
networks:
|
||||
antsk:
|
||||
|
||||
@@ -1,6 +1,20 @@
|
||||
# 非 host 版本, 不使用本机代理
|
||||
version: '3.8'
|
||||
services:
|
||||
aspire-dashboard:
|
||||
container_name: aspire-dashboard
|
||||
image: mcr.microsoft.com/dotnet/aspire-dashboard:8.0
|
||||
networks:
|
||||
- antsk
|
||||
environment:
|
||||
- DOTNET_DASHBOARD_UNSECURED_ALLOW_ANONYMOUS=true
|
||||
- ASPIRE_ALLOW_UNSECURED_TRANSPORT=true
|
||||
- DASHBOARD_OTLP_AUTHMODE=ApiKey
|
||||
- DASHBOARD_OTLP_PRIMARYAPIKEY=antsk
|
||||
ports:
|
||||
- 18888:18888
|
||||
- 18889:18889
|
||||
restart: unless-stopped
|
||||
antskpg:
|
||||
image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/pg:v0.5.0
|
||||
container_name: antskpg
|
||||
@@ -18,9 +32,9 @@ services:
|
||||
- ./pg/data:/var/lib/postgresql/data
|
||||
antsk:
|
||||
container_name: antsk
|
||||
image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:v0.3.1
|
||||
image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:v0.5.2
|
||||
# 如果需要pytorch环境需要使用下面这个镜像,镜像比较大
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:p0.3.1
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/xuzeyu91/antsk:p0.5.2
|
||||
ports:
|
||||
- 5000:5000
|
||||
networks:
|
||||
@@ -30,7 +44,15 @@ services:
|
||||
restart: always
|
||||
environment:
|
||||
- ASPNETCORE_URLS=http://*:5000
|
||||
- ASPNETCORE_FORWARDEDHEADERS_ENABLED=true
|
||||
- OTEL_DOTNET_EXPERIMENTAL_OTLP_EMIT_EXCEPTION_LOG_ATTRIBUTES=true
|
||||
- OTEL_DOTNET_EXPERIMENTAL_OTLP_EMIT_EVENT_LOG_ATTRIBUTES= true
|
||||
- OTEL_DOTNET_EXPERIMENTAL_OTLP_RETRY=in_memory
|
||||
- OTEL_EXPORTER_OTLP_ENDPOINT=http://aspire-dashboard:18889
|
||||
- OTEL_SERVICE_NAME=antsk
|
||||
volumes:
|
||||
- ./appsettings.json:/app/appsettings.json # 本地配置文件 需要放在同级目录
|
||||
- /AntSK/model:/app/model
|
||||
- /AntSK/model:/root/.cache/modelscope/hub/AI-ModelScope # LLamaFactory模型文件
|
||||
networks:
|
||||
antsk:
|
||||
|
||||
20
src/AntSK.AppHost/AntSK.AppHost.csproj
Normal file
20
src/AntSK.AppHost/AntSK.AppHost.csproj
Normal file
@@ -0,0 +1,20 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<IsAspireHost>true</IsAspireHost>
|
||||
<UserSecretsId>32ac67c8-178a-4eeb-871d-879023582e06</UserSecretsId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Aspire.Hosting.AppHost" Version="8.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\AntSK\AntSK.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
5
src/AntSK.AppHost/Program.cs
Normal file
5
src/AntSK.AppHost/Program.cs
Normal file
@@ -0,0 +1,5 @@
|
||||
var builder = DistributedApplication.CreateBuilder(args);
|
||||
|
||||
builder.AddProject<Projects.AntSK>("antsk");
|
||||
|
||||
builder.Build().Run();
|
||||
8
src/AntSK.AppHost/appsettings.Development.json
Normal file
8
src/AntSK.AppHost/appsettings.Development.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
}
|
||||
}
|
||||
}
|
||||
9
src/AntSK.AppHost/appsettings.json
Normal file
9
src/AntSK.AppHost/appsettings.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Aspire.Hosting.Dcp": "Warning"
|
||||
}
|
||||
}
|
||||
}
|
||||
26
src/AntSK.AppHost/aspirate-output/docker-compose.yaml
Normal file
26
src/AntSK.AppHost/aspirate-output/docker-compose.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
services:
|
||||
aspire-dashboard:
|
||||
container_name: "aspire-dashboard"
|
||||
image: "mcr.microsoft.com/dotnet/aspire-dashboard:8.0"
|
||||
environment:
|
||||
DOTNET_DASHBOARD_UNSECURED_ALLOW_ANONYMOUS: "true"
|
||||
ports:
|
||||
- target: 18888
|
||||
published: 18888
|
||||
restart: unless-stopped
|
||||
antsk:
|
||||
container_name: "antsk"
|
||||
image: "antsk:latest"
|
||||
environment:
|
||||
OTEL_DOTNET_EXPERIMENTAL_OTLP_EMIT_EXCEPTION_LOG_ATTRIBUTES: "true"
|
||||
OTEL_DOTNET_EXPERIMENTAL_OTLP_EMIT_EVENT_LOG_ATTRIBUTES: "true"
|
||||
OTEL_DOTNET_EXPERIMENTAL_OTLP_RETRY: "in_memory"
|
||||
ASPNETCORE_FORWARDEDHEADERS_ENABLED: "true"
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://aspire-dashboard:18889"
|
||||
OTEL_SERVICE_NAME: "antsk"
|
||||
ports:
|
||||
- target: 8080
|
||||
published: 10000
|
||||
- target: 8443
|
||||
published: 10001
|
||||
restart: unless-stopped
|
||||
17
src/AntSK.AppHost/aspirate-state.json
Normal file
17
src/AntSK.AppHost/aspirate-state.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"projectPath": ".",
|
||||
"outputPath": "aspirate-output",
|
||||
"containerImageTags": [
|
||||
"latest"
|
||||
],
|
||||
"containerBuilder": "docker",
|
||||
"outputFormat": "compose",
|
||||
"privateRegistryEmail": "aspir8@aka.ms",
|
||||
"includeDashboard": true,
|
||||
"secrets": {
|
||||
"salt": "fjamZa3pQbM1UyY4",
|
||||
"hash": "QR\u002BSEr3p2SwD/w2oPE21vrWh/EerhNyVyTkr0atIREw=",
|
||||
"secrets": {}
|
||||
},
|
||||
"processAllComponents": true
|
||||
}
|
||||
26
src/AntSK.AppHost/manifest.json
Normal file
26
src/AntSK.AppHost/manifest.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"resources": {
|
||||
"antsk": {
|
||||
"type": "project.v0",
|
||||
"path": "../AntSK/AntSK.csproj",
|
||||
"env": {
|
||||
"OTEL_DOTNET_EXPERIMENTAL_OTLP_EMIT_EXCEPTION_LOG_ATTRIBUTES": "true",
|
||||
"OTEL_DOTNET_EXPERIMENTAL_OTLP_EMIT_EVENT_LOG_ATTRIBUTES": "true",
|
||||
"OTEL_DOTNET_EXPERIMENTAL_OTLP_RETRY": "in_memory",
|
||||
"ASPNETCORE_FORWARDEDHEADERS_ENABLED": "true"
|
||||
},
|
||||
"bindings": {
|
||||
"http": {
|
||||
"scheme": "http",
|
||||
"protocol": "tcp",
|
||||
"transport": "http"
|
||||
},
|
||||
"https": {
|
||||
"scheme": "https",
|
||||
"protocol": "tcp",
|
||||
"transport": "http"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,47 +5,49 @@
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<DocumentationFile>AntSK.Domain.xml</DocumentationFile>
|
||||
<NoWarn>CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102</NoWarn>
|
||||
<NoWarn>CA1050,CA1707,CA2007,VSTHRD111,CS1591,RCS1110,CA5394,SKEXP0001,SKEXP0002,SKEXP0003,SKEXP0004,SKEXP0010,SKEXP0011,,SKEXP0012,SKEXP0020,SKEXP0021,SKEXP0022,SKEXP0023,SKEXP0024,SKEXP0025,SKEXP0026,SKEXP0027,SKEXP0028,SKEXP0029,SKEXP0030,SKEXP0031,SKEXP0032,SKEXP0040,SKEXP0041,SKEXP0042,SKEXP0050,SKEXP0051,SKEXP0052,SKEXP0053,SKEXP0054,SKEXP0055,SKEXP0060,SKEXP0061,SKEXP0101,SKEXP0102,KMEXP00</NoWarn>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="AntDesign.Charts" Version="0.5.1" />
|
||||
<PackageReference Include="AntDesign.ProLayout" Version="0.19.0" />
|
||||
<PackageReference Include="AntDesign.Charts" Version="0.5.6" />
|
||||
<PackageReference Include="AntDesign.ProLayout" Version="0.20.3" />
|
||||
<PackageReference Include="BlazorComponents.Terminal" Version="0.6.0" />
|
||||
<PackageReference Include="Blazored.LocalStorage" Version="4.5.0" />
|
||||
|
||||
<PackageReference Include="pythonnet" Version="3.0.3" />
|
||||
|
||||
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
|
||||
<PackageReference Include="pythonnet" Version="3.0.4" />
|
||||
|
||||
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.9.0" />
|
||||
|
||||
<PackageReference Include="AutoMapper" Version="8.1.0" />
|
||||
<PackageReference Include="BCrypt.Net-Next" Version="4.0.3" />
|
||||
<PackageReference Include="Markdig" Version="0.37.0" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||
<PackageReference Include="SqlSugarCore" Version="5.1.4.154" />
|
||||
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.118" />
|
||||
<PackageReference Include="RestSharp" Version="110.2.0" />
|
||||
<PackageReference Include="NPOI" Version="2.7.0" />
|
||||
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.11.1" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel.Core" Version="1.11.1" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Core" Version="1.11.1-alpha" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="$(NewtonsoftVersion)" />
|
||||
<PackageReference Include="SqlSugarCore" Version="5.1.4.169" />
|
||||
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.119" />
|
||||
<PackageReference Include="RestSharp" Version="$(RestSharpVersion)" />
|
||||
<PackageReference Include="NPOI" Version="2.7.1" />
|
||||
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="$(SKVersion)" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel.Core" Version="$(SKVersion)" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Core" Version="$(SKVersion)-alpha" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.AI.OpenAI" Version="$(KMVersion)" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.AI.AzureOpenAI" Version="$(KMVersion)" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.Core" Version="$(KMVersion)" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.MemoryDb.Postgres" Version="$(KMVersion)" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.MemoryDb.Qdrant" Version="$(KMVersion)" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.MemoryDb.Redis" Version="$(KMVersion)" />
|
||||
<PackageReference Include="Microsoft.KernelMemory.MemoryDb.AzureAISearch" Version="$(KMVersion)" />
|
||||
|
||||
<PackageReference Include="LLamaSharp" Version="$(LLamaSharpVersion)" />
|
||||
<PackageReference Include="LLamaSharp.Backend.Cpu" Version="$(LLamaSharpVersion)" />
|
||||
<PackageReference Include="LLamaSharp.Backend.Cuda12" Version="$(LLamaSharpVersion)" />
|
||||
<PackageReference Include="LLamaSharp.kernel-memory" Version="$(LLamaSharpVersion)" />
|
||||
<PackageReference Include="LLamaSharp.semantic-kernel" Version="$(LLamaSharpVersion)" />
|
||||
|
||||
|
||||
<PackageReference Include="Serilog" Version="4.1.0" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.0.0" />
|
||||
<PackageReference Include="Serilog.Sinks.File" Version="6.0.0" />
|
||||
<PackageReference Include="Serilog.Extensions.Logging" Version="8.0.1-dev-10391" />
|
||||
<PackageReference Include="Serilog.Settings.Configuration" Version="8.0.4" />
|
||||
<PackageReference Include="Serilog.Sinks.Seq" Version="8.0.0" />
|
||||
<PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="4.1.1" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\AntSK.LLamaFactory\AntSK.LLamaFactory.csproj" />
|
||||
<ProjectReference Include="..\AntSk.LLM\AntSK.LLM.csproj" />
|
||||
<ProjectReference Include="..\AntSK.LLM\AntSK.LLM.csproj" />
|
||||
<ProjectReference Include="..\AntSK.OCR\AntSK.OCR.csproj" />
|
||||
<ProjectReference Include="..\MiddleWare\AntSK.BackgroundTask\AntSK.BackgroundTask.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -188,11 +188,6 @@
|
||||
<member name="M:AntSK.Domain.Domain.Other.KMExcelHandler.InvokeAsync(Microsoft.KernelMemory.Pipeline.DataPipeline,System.Threading.CancellationToken)">
|
||||
<inheritdoc />
|
||||
</member>
|
||||
<member name="F:AntSK.Domain.Domain.Other.LLamaConfig.dicLLamaWeights">
|
||||
<summary>
|
||||
避免模型重复加载,本地缓存
|
||||
</summary>
|
||||
</member>
|
||||
<member name="P:AntSK.Domain.Domain.Other.QAHandler.StepName">
|
||||
<inheritdoc />
|
||||
</member>
|
||||
@@ -924,6 +919,20 @@
|
||||
<param name="value"></param>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="M:AntSK.Domain.Utils.ConvertUtils.Unescape(System.String)">
|
||||
<summary>
|
||||
\uxxxx转中文,保留换行符号
|
||||
</summary>
|
||||
<param name="unicodeString"></param>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="M:AntSK.Domain.Utils.ConvertUtils.IsStream(System.String)">
|
||||
<summary>
|
||||
是否为流式请求
|
||||
</summary>
|
||||
<param name="value"></param>
|
||||
<returns></returns>
|
||||
</member>
|
||||
<member name="M:AntSK.Domain.Utils.RepoFiles.SamplePluginsPath">
|
||||
<summary>
|
||||
Scan the local folders from the repo, looking for "samples/plugins" folder.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
namespace AntSK.Domain.Common
|
||||
{
|
||||
[AttributeUsage(AttributeTargets.Method)]
|
||||
public class AntSkFunctionAttribute : Attribute
|
||||
public class AntSKFunctionAttribute : Attribute
|
||||
{
|
||||
// 自定义的ActionAttribute
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ using DocumentFormat.OpenXml.Office2016.Drawing.ChartDrawing;
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.OpenApi.Models;
|
||||
using SqlSugar;
|
||||
using Swashbuckle.AspNetCore.SwaggerGen;
|
||||
@@ -19,6 +20,12 @@ namespace AntSK.Domain.Common.DependencyInjection
|
||||
{
|
||||
public static class InitExtensions
|
||||
{
|
||||
private static ILogger _logger;
|
||||
|
||||
public static void InitLog(ILogger logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
/// <summary>
|
||||
/// 使用codefirst创建数据库表
|
||||
/// </summary>
|
||||
@@ -52,6 +59,8 @@ namespace AntSK.Domain.Common.DependencyInjection
|
||||
}
|
||||
//安装向量插件
|
||||
_repository.GetDB().Ado.ExecuteCommandAsync($"CREATE EXTENSION IF NOT EXISTS vector;");
|
||||
|
||||
_logger.LogInformation("初始化表结构完成");
|
||||
}
|
||||
return app;
|
||||
}
|
||||
@@ -72,7 +81,7 @@ namespace AntSK.Domain.Common.DependencyInjection
|
||||
llamafactoryStart.Value = "false";
|
||||
_dic_Repository.Insert(llamafactoryStart);
|
||||
}
|
||||
|
||||
_logger.LogInformation("初始化数据库初始数据完成");
|
||||
}
|
||||
return app;
|
||||
}
|
||||
@@ -99,7 +108,7 @@ namespace AntSK.Domain.Common.DependencyInjection
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine(ex.Message + " ---- " + ex.StackTrace);
|
||||
_logger.LogError(ex.Message + " ---- " + ex.StackTrace);
|
||||
}
|
||||
return app;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
using LLamaSharp.KernelMemory;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using Microsoft.KernelMemory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
@@ -1,12 +1,4 @@
|
||||
using LLama.Common;
|
||||
using LLama;
|
||||
using LLamaSharp.KernelMemory;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using AntSK.Domain.Domain.Other.Bge;
|
||||
|
||||
namespace AntSK.Domain.Common.Embedding
|
||||
@@ -52,5 +44,10 @@ namespace AntSK.Domain.Common.Embedding
|
||||
{
|
||||
return BgeEmbeddingConfig.TokenCount(text);
|
||||
}
|
||||
|
||||
public IReadOnlyList<string> GetTokens(string text)
|
||||
{
|
||||
return new List<string>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
using System;
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
@@ -7,7 +9,7 @@ using System.Threading.Tasks;
|
||||
|
||||
namespace AntSK.Domain.Common.LLamaFactory
|
||||
{
|
||||
public class ProcessWrapper
|
||||
public class ProcessWrapper(ILogger<ProcessWrapper> _logger)
|
||||
{
|
||||
private Process process;
|
||||
|
||||
@@ -41,7 +43,7 @@ namespace AntSK.Domain.Common.LLamaFactory
|
||||
isProcessComplete = true;
|
||||
}
|
||||
}
|
||||
Console.WriteLine(result);
|
||||
_logger.LogInformation(result);
|
||||
}
|
||||
start.WaitForExit();
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
<Project>
|
||||
<!-- See https://aka.ms/dotnet/msbuild/customize for more details on customizing your build -->
|
||||
<PropertyGroup>
|
||||
|
||||
<KMVersion>0.40.240501.1</KMVersion>
|
||||
<LLamaSharpVersion>0.11.2</LLamaSharpVersion>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
@@ -12,7 +12,9 @@ namespace AntSK.Domain.Domain.Interface
|
||||
{
|
||||
public event LogMessageHandler LogMessageReceived;
|
||||
Task PipInstall();
|
||||
Task StartLLamaFactory(string modelName, string templateName);
|
||||
|
||||
Task PipInstallName(string name);
|
||||
Task StartLLamaFactory(string modelName);
|
||||
|
||||
void KillProcess();
|
||||
|
||||
|
||||
15
src/AntSK.Domain/Domain/Interface/IOllamaService.cs
Normal file
15
src/AntSK.Domain/Domain/Interface/IOllamaService.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using static AntSK.Domain.Domain.Service.OllamaService;
|
||||
|
||||
namespace AntSK.Domain.Domain.Interface
|
||||
{
|
||||
public interface IOllamaService
|
||||
{
|
||||
public event LogMessageHandler LogMessageReceived;
|
||||
Task StartOllama(string modelName);
|
||||
}
|
||||
}
|
||||
@@ -13,9 +13,6 @@ namespace AntSK.Domain.Domain.Model.Enum
|
||||
[Display(Name = "Azure Open AI")]
|
||||
AzureOpenAI = 2,
|
||||
|
||||
[Display(Name = "LLama本地模型")]
|
||||
LLamaSharp = 3,
|
||||
|
||||
[Display(Name = "星火大模型")]
|
||||
SparkDesk = 4,
|
||||
|
||||
@@ -28,8 +25,11 @@ namespace AntSK.Domain.Domain.Model.Enum
|
||||
BgeEmbedding = 7,
|
||||
[Display(Name = "Bge Rerank")]
|
||||
BgeRerank = 8,
|
||||
[Display(Name = "StableDiffusion")]
|
||||
StableDiffusion = 9,
|
||||
|
||||
[Display(Name = "Ollama")]
|
||||
Ollama = 10,
|
||||
[Display(Name = "OllamaEmbedding")]
|
||||
OllamaEmbedding = 11,
|
||||
[Display(Name = "模拟输出")]
|
||||
Mock = 100,
|
||||
|
||||
@@ -42,7 +42,6 @@ namespace AntSK.Domain.Domain.Model.Enum
|
||||
{
|
||||
Chat = 1,
|
||||
Embedding = 2,
|
||||
Image=3,
|
||||
Rerank=4
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,26 +2,29 @@
|
||||
using AntSK.Domain.Domain.Interface;
|
||||
using AntSK.Domain.Domain.Model;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace AntSK.Domain.Domain.Other
|
||||
{
|
||||
public class BackGroundTaskHandler : IBackgroundTaskHandler<ImportKMSTaskReq>
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly ILogger<BackGroundTaskHandler> _logger;
|
||||
|
||||
public BackGroundTaskHandler(IServiceScopeFactory scopeFactory)
|
||||
public BackGroundTaskHandler(IServiceScopeFactory scopeFactory, ILogger<BackGroundTaskHandler> logger)
|
||||
{
|
||||
_scopeFactory = scopeFactory;
|
||||
_logger = logger;
|
||||
}
|
||||
public async Task ExecuteAsync(ImportKMSTaskReq item)
|
||||
{
|
||||
using (var scope = _scopeFactory.CreateScope())
|
||||
{
|
||||
Console.WriteLine("ExecuteAsync.开始执行后台任务");
|
||||
_logger.LogInformation("ExecuteAsync.开始执行后台任务");
|
||||
var importKMSService = scope.ServiceProvider.GetRequiredService<IImportKMSService>();
|
||||
//不能使用异步
|
||||
importKMSService.ImportKMSTask(item);
|
||||
Console.WriteLine("ExecuteAsync.后台任务执行完成");
|
||||
_logger.LogInformation("ExecuteAsync.后台任务执行完成");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using static Python.Runtime.Py;
|
||||
using AntSK.Domain.Utils;
|
||||
|
||||
namespace AntSK.Domain.Domain.Other.Bge
|
||||
{
|
||||
@@ -26,11 +27,7 @@ namespace AntSK.Domain.Domain.Other.Bge
|
||||
{
|
||||
if (model == null)
|
||||
{
|
||||
if (string.IsNullOrEmpty(Runtime.PythonDLL))
|
||||
{
|
||||
Runtime.PythonDLL = pythondllPath;
|
||||
}
|
||||
PythonEngine.Initialize();
|
||||
PyRunTime.InitRunTime(pythondllPath);
|
||||
try
|
||||
{
|
||||
using (GIL())// 初始化Python环境的Global Interpreter Lock)
|
||||
@@ -39,7 +36,7 @@ namespace AntSK.Domain.Domain.Other.Bge
|
||||
dynamic flagEmbedding = Py.Import("FlagEmbedding");
|
||||
|
||||
dynamic model_dir = modelscope.snapshot_download(modelName, revision: "master");
|
||||
dynamic flagReranker = flagEmbedding.FlagReranker(model_dir, use_fp16: true);
|
||||
dynamic flagReranker = flagEmbedding.FlagReranker(model_dir, use_fp16: false);
|
||||
model = flagReranker;
|
||||
return model;
|
||||
}
|
||||
@@ -69,7 +66,7 @@ namespace AntSK.Domain.Domain.Other.Bge
|
||||
pyList.Append(item.ToPython()); // 将C# string转换为Python对象并添加到PyList中
|
||||
}
|
||||
PyObject result = model.compute_score(pyList, normalize: true);
|
||||
return result.As<double>();
|
||||
return result.ConvertToString().Trim('[').Trim(']').ConvertToDouble();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using Microsoft.KernelMemory.AI.OpenAI.GPT3;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using Python.Runtime;
|
||||
using Serilog;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
@@ -26,13 +27,7 @@ namespace AntSK.Domain.Domain.Other.Bge
|
||||
{
|
||||
if (model == null)
|
||||
{
|
||||
//Runtime.PythonDLL = @"D:\Programs\Python\Python311\python311.dll";
|
||||
if (string.IsNullOrEmpty(Runtime.PythonDLL))
|
||||
{
|
||||
Runtime.PythonDLL = pythondllPath;
|
||||
}
|
||||
PythonEngine.Initialize();
|
||||
PythonEngine.BeginAllowThreads();
|
||||
PyRunTime.InitRunTime(pythondllPath);
|
||||
try
|
||||
{
|
||||
using (GIL())// 初始化Python环境的Global Interpreter Lock)
|
||||
@@ -85,13 +80,13 @@ namespace AntSK.Domain.Domain.Other.Bge
|
||||
// return len;
|
||||
|
||||
//}
|
||||
var tokenCount1 = GPT3Tokenizer.Encode(queryStr).Count;
|
||||
var tokenCount1 = DefaultGPTTokenizer.StaticCountTokens(queryStr);
|
||||
return tokenCount1;
|
||||
}
|
||||
|
||||
public static void Dispose()
|
||||
{
|
||||
Console.WriteLine("python dispose");
|
||||
Log.Information("python dispose");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
28
src/AntSK.Domain/Domain/Other/Bge/PyRunTime.cs
Normal file
28
src/AntSK.Domain/Domain/Other/Bge/PyRunTime.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
using Python.Runtime;
|
||||
|
||||
namespace AntSK.Domain.Domain.Other.Bge
|
||||
{
|
||||
public static class PyRunTime
|
||||
{
|
||||
static object lockobj = new object();
|
||||
|
||||
static bool isInit = false;
|
||||
|
||||
public static void InitRunTime(string pythonPath)
|
||||
{
|
||||
lock (lockobj)
|
||||
{
|
||||
if (!isInit)
|
||||
{
|
||||
if (string.IsNullOrEmpty(Runtime.PythonDLL))
|
||||
{
|
||||
Runtime.PythonDLL = pythonPath;
|
||||
}
|
||||
PythonEngine.Initialize();
|
||||
PythonEngine.BeginAllowThreads();
|
||||
isInit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
using AntSK.Domain.Domain.Model.Constant;
|
||||
using AntSK.Domain.Utils;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.KernelMemory.AI.OpenAI;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using Microsoft.KernelMemory.Configuration;
|
||||
using Microsoft.KernelMemory.DataFormats.Text;
|
||||
using Microsoft.KernelMemory.Diagnostics;
|
||||
@@ -134,7 +135,7 @@ namespace AntSK.Domain.Domain.Other
|
||||
PartitionNumber = partitionNumber,
|
||||
SectionNumber = sectionNumber,
|
||||
Tags = pipeline.Tags,
|
||||
ContentSHA256 = textData.CalculateSHA256(),
|
||||
ContentSHA256 = textData.AntSKCalculateSHA256(),
|
||||
};
|
||||
newFiles.Add(destFile, destFileDetails);
|
||||
destFileDetails.MarkProcessedBy(this);
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
using LLama;
|
||||
using LLama.Common;
|
||||
using LLamaSharp.KernelMemory;
|
||||
|
||||
namespace AntSK.Domain.Domain.Other
|
||||
{
|
||||
public static class LLamaConfig
|
||||
{
|
||||
static object lockobj = new object();
|
||||
/// <summary>
|
||||
/// 避免模型重复加载,本地缓存
|
||||
/// </summary>
|
||||
static Dictionary<string, (LLamaWeights, ModelParams)> dicLLamaWeights = new Dictionary<string, (LLamaWeights, ModelParams)>();
|
||||
public static (LLamaWeights, ModelParams) GetLLamaConfig(string modelPath, LLamaSharpConfig config = null)
|
||||
{
|
||||
lock (lockobj)
|
||||
{
|
||||
if (dicLLamaWeights.ContainsKey(modelPath))
|
||||
{
|
||||
return dicLLamaWeights.GetValueOrDefault(modelPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
InferenceParams infParams = new() { AntiPrompts = ["\n\n"] };
|
||||
LLamaSharpConfig lsConfig = new(modelPath) { DefaultInferenceParams = infParams };
|
||||
if (config != null)
|
||||
{
|
||||
lsConfig = config;
|
||||
}
|
||||
var parameters = new ModelParams(lsConfig.ModelPath)
|
||||
{
|
||||
ContextSize = lsConfig?.ContextSize ?? 2048,
|
||||
Seed = lsConfig?.Seed ?? 0,
|
||||
GpuLayerCount = lsConfig?.GpuLayerCount ?? 20,
|
||||
EmbeddingMode = true
|
||||
};
|
||||
var weights = LLamaWeights.LoadFromFile(parameters);
|
||||
dicLLamaWeights.Add(modelPath, (weights, parameters));
|
||||
return (weights, parameters);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@ using AntSK.Domain.Domain.Model;
|
||||
using AntSK.Domain.Utils;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.KernelMemory.AI.OpenAI;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using Microsoft.KernelMemory.Configuration;
|
||||
using Microsoft.KernelMemory.DataFormats.Text;
|
||||
using Microsoft.KernelMemory.Diagnostics;
|
||||
@@ -151,7 +151,7 @@ namespace AntSK.Domain.Domain.Other
|
||||
PartitionNumber = partitionNumber,
|
||||
SectionNumber = sectionNumber,
|
||||
Tags = pipeline.Tags,
|
||||
ContentSHA256 = textData.CalculateSHA256(),
|
||||
ContentSHA256 = textData.AntSKCalculateSHA256(),
|
||||
};
|
||||
newFiles.Add(destFile, destFileDetails);
|
||||
destFileDetails.MarkProcessedBy(this);
|
||||
|
||||
@@ -330,22 +330,18 @@ namespace AntSK.Domain.Domain.Service
|
||||
|
||||
public async Task<ChatHistory> GetChatHistory(List<Chats> MessageList, ChatHistory history)
|
||||
{
|
||||
if (MessageList.Count > 1)
|
||||
foreach (var item in MessageList)
|
||||
{
|
||||
|
||||
foreach (var item in MessageList)
|
||||
if (item.IsSend)
|
||||
{
|
||||
if (item.IsSend)
|
||||
{
|
||||
history.AddUserMessage(item.Context);
|
||||
}
|
||||
else
|
||||
{
|
||||
history.AddAssistantMessage(item.Context);
|
||||
}
|
||||
history.AddUserMessage(item.Context);
|
||||
}
|
||||
else
|
||||
{
|
||||
history.AddAssistantMessage(item.Context);
|
||||
}
|
||||
}
|
||||
return history;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ using System.Text.RegularExpressions;
|
||||
using Microsoft.SemanticKernel;
|
||||
using HtmlAgilityPack;
|
||||
using System.Collections.Generic;
|
||||
using Serilog;
|
||||
|
||||
namespace AntSK.Domain.Domain.Service
|
||||
{
|
||||
@@ -115,7 +116,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine(ex.Message + " ---- " + ex.StackTrace);
|
||||
Log.Error(ex.Message + " ---- " + ex.StackTrace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ using AntSK.Domain.Domain.Model.Constant;
|
||||
using AntSK.Domain.Domain.Model.Excel;
|
||||
using AntSK.Domain.Domain.Other;
|
||||
using AntSK.Domain.Repositories;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.KernelMemory;
|
||||
using Microsoft.KernelMemory.Handlers;
|
||||
using System.Text;
|
||||
@@ -15,7 +16,8 @@ namespace AntSK.Domain.Domain.Service
|
||||
public class ImportKMSService(
|
||||
IKMService _kMService,
|
||||
IKmsDetails_Repositories _kmsDetails_Repositories,
|
||||
IKmss_Repositories _kmss_Repositories
|
||||
IKmss_Repositories _kmss_Repositories,
|
||||
ILogger<ImportKMSService> _logger
|
||||
) : IImportKMSService
|
||||
{
|
||||
|
||||
@@ -140,13 +142,13 @@ namespace AntSK.Domain.Domain.Service
|
||||
req.KmsDetail.Status = Model.Enum.ImportKmsStatus.Success;
|
||||
_kmsDetails_Repositories.Update(req.KmsDetail);
|
||||
//_kmsDetails_Repositories.GetList(p => p.KmsId == req.KmsId);
|
||||
Console.WriteLine("后台导入任务成功:" + req.KmsDetail.DataCount);
|
||||
_logger.LogInformation("后台导入任务成功:" + req.KmsDetail.DataCount);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
req.KmsDetail.Status = Model.Enum.ImportKmsStatus.Fail;
|
||||
_kmsDetails_Repositories.Update(req.KmsDetail);
|
||||
Console.WriteLine("后台导入任务异常:" + ex.Message);
|
||||
_logger.LogError("后台导入任务异常:" + ex.Message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,6 @@ using AntSK.Domain.Repositories;
|
||||
using AntSK.Domain.Utils;
|
||||
using AntSK.OCR;
|
||||
using DocumentFormat.OpenXml.Drawing.Diagrams;
|
||||
using LLama;
|
||||
using LLamaSharp.KernelMemory;
|
||||
using Markdig;
|
||||
using Microsoft.AspNetCore.Components;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
@@ -171,12 +169,6 @@ namespace AntSK.Domain.Domain.Service
|
||||
APIType = AzureOpenAIConfig.APITypes.EmbeddingGeneration,
|
||||
});
|
||||
break;
|
||||
|
||||
case Model.Enum.AIType.LLamaSharp:
|
||||
var (weights, parameters) = LLamaConfig.GetLLamaConfig(embedModel.ModelName);
|
||||
var embedder = new LLamaEmbedder(weights, parameters);
|
||||
memory.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder));
|
||||
break;
|
||||
case Model.Enum.AIType.BgeEmbedding:
|
||||
string pyDll = embedModel.EndPoint;
|
||||
string bgeEmbeddingModelName = embedModel.ModelName;
|
||||
@@ -185,6 +177,13 @@ namespace AntSK.Domain.Domain.Service
|
||||
case Model.Enum.AIType.DashScope:
|
||||
memory.WithDashScopeDefaults(embedModel.ModelKey);
|
||||
break;
|
||||
case Model.Enum.AIType.OllamaEmbedding:
|
||||
memory.WithOpenAITextEmbeddingGeneration(new OpenAIConfig()
|
||||
{
|
||||
APIKey = "NotNull",
|
||||
EmbeddingModel = embedModel.ModelName
|
||||
}, null, false, embeddingHttpClient);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,15 +210,15 @@ namespace AntSK.Domain.Domain.Service
|
||||
APIType = AzureOpenAIConfig.APITypes.TextCompletion,
|
||||
});
|
||||
break;
|
||||
|
||||
case Model.Enum.AIType.LLamaSharp:
|
||||
var (weights, parameters) = LLamaConfig.GetLLamaConfig(chatModel.ModelName);
|
||||
var context = weights.CreateContext(parameters);
|
||||
var executor = new StatelessExecutor(weights, parameters);
|
||||
memory.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor));
|
||||
break;
|
||||
case Model.Enum.AIType.LLamaFactory:
|
||||
|
||||
memory.WithOpenAITextGeneration(new OpenAIConfig()
|
||||
{
|
||||
APIKey = "NotNull",
|
||||
TextModel = chatModel.ModelName
|
||||
}, null, chatHttpClient);
|
||||
break;
|
||||
case Model.Enum.AIType.Ollama:
|
||||
memory.WithOpenAITextGeneration(new OpenAIConfig()
|
||||
{
|
||||
APIKey = "NotNull",
|
||||
|
||||
@@ -4,24 +4,16 @@ using AntSK.Domain.Domain.Interface;
|
||||
using AntSK.Domain.Domain.Other;
|
||||
using AntSK.Domain.Repositories;
|
||||
using AntSK.Domain.Utils;
|
||||
using LLama;
|
||||
using LLamaSharp.SemanticKernel.TextCompletion;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.SemanticKernel;
|
||||
using Microsoft.SemanticKernel.Plugins.Core;
|
||||
using Microsoft.SemanticKernel.TextGeneration;
|
||||
using RestSharp;
|
||||
using System;
|
||||
using ServiceLifetime = AntSK.Domain.Common.DependencyInjection.ServiceLifetime;
|
||||
using AntSK.LLM.Mock;
|
||||
using AntSK.Domain.Domain.Model.Enum;
|
||||
using AntSK.LLM.LLamaFactory;
|
||||
using System.Reflection;
|
||||
using DocumentFormat.OpenXml.Drawing;
|
||||
using Microsoft.KernelMemory;
|
||||
using OpenCvSharp.ML;
|
||||
using LLamaSharp.SemanticKernel.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.ChatCompletion;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace AntSK.Domain.Domain.Service
|
||||
{
|
||||
@@ -33,17 +25,20 @@ namespace AntSK.Domain.Domain.Service
|
||||
private readonly FunctionService _functionService;
|
||||
private readonly IServiceProvider _serviceProvider;
|
||||
private Kernel _kernel;
|
||||
private readonly ILogger<KernelService> _logger;
|
||||
|
||||
public KernelService(
|
||||
IApis_Repositories apis_Repositories,
|
||||
IAIModels_Repositories aIModels_Repositories,
|
||||
FunctionService functionService,
|
||||
IServiceProvider serviceProvider)
|
||||
IServiceProvider serviceProvider,
|
||||
ILogger<KernelService> logger)
|
||||
{
|
||||
_apis_Repositories = apis_Repositories;
|
||||
_aIModels_Repositories = aIModels_Repositories;
|
||||
_functionService = functionService;
|
||||
_serviceProvider = serviceProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -103,15 +98,30 @@ namespace AntSK.Domain.Domain.Service
|
||||
);
|
||||
break;
|
||||
|
||||
case Model.Enum.AIType.LLamaSharp:
|
||||
var (weights, parameters) = LLamaConfig.GetLLamaConfig(chatModel.ModelName);
|
||||
var ex = new StatelessExecutor(weights, parameters);
|
||||
builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));
|
||||
builder.Services.AddKeyedSingleton<IChatCompletionService>("local-llama-chat", new LLamaSharpChatCompletion(ex));
|
||||
break;
|
||||
|
||||
case Model.Enum.AIType.SparkDesk:
|
||||
var options = new SparkDeskOptions { AppId = chatModel.EndPoint, ApiSecret = chatModel.ModelKey, ApiKey = chatModel.ModelName, ModelVersion = Sdcb.SparkDesk.ModelVersion.V3_5 };
|
||||
|
||||
var settings = chatModel.ModelKey.Split("|");
|
||||
|
||||
Sdcb.SparkDesk.ModelVersion modelVersion = Sdcb.SparkDesk.ModelVersion.V3_5;
|
||||
|
||||
switch (chatModel.ModelName)
|
||||
{
|
||||
case "V3_5":
|
||||
modelVersion = Sdcb.SparkDesk.ModelVersion.V3_5;
|
||||
break;
|
||||
case "V3":
|
||||
modelVersion = Sdcb.SparkDesk.ModelVersion.V3;
|
||||
break;
|
||||
case "V2":
|
||||
modelVersion = Sdcb.SparkDesk.ModelVersion.V2;
|
||||
break;
|
||||
case "V1_5":
|
||||
modelVersion = Sdcb.SparkDesk.ModelVersion.V1_5;
|
||||
break;
|
||||
}
|
||||
|
||||
SparkDeskOptions options = new SparkDeskOptions { AppId = settings[0], ApiSecret = settings[1], ApiKey = settings[2], ModelVersion = modelVersion };
|
||||
|
||||
builder.Services.AddKeyedSingleton<ITextGenerationService>("spark-desk", new SparkDeskTextCompletion(options, chatModel.Id));
|
||||
builder.Services.AddKeyedSingleton<IChatCompletionService>("spark-desk-chat", new SparkDeskChatCompletion(options, chatModel.Id));
|
||||
break;
|
||||
@@ -127,7 +137,14 @@ namespace AntSK.Domain.Domain.Service
|
||||
case Model.Enum.AIType.LLamaFactory:
|
||||
builder.AddOpenAIChatCompletion(
|
||||
modelId: chatModel.ModelName,
|
||||
apiKey: "123",
|
||||
apiKey: "NotNull",
|
||||
httpClient: chatHttpClient
|
||||
);
|
||||
break;
|
||||
case AIType.Ollama:
|
||||
builder.AddOpenAIChatCompletion(
|
||||
modelId: chatModel.ModelName,
|
||||
apiKey: "NotNull",
|
||||
httpClient: chatHttpClient
|
||||
);
|
||||
break;
|
||||
@@ -142,7 +159,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
public void ImportFunctionsByApp(Apps app, Kernel _kernel)
|
||||
{
|
||||
//插件不能重复注册,否则会异常
|
||||
if (_kernel.Plugins.Any(p => p.Name == "AntSkFunctions"))
|
||||
if (_kernel.Plugins.Any(p => p.Name == "AntSKFunctions"))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -153,7 +170,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
//本地函数插件
|
||||
ImportNativeFunction(app, functions);
|
||||
|
||||
_kernel.ImportPluginFromFunctions("AntSkFunctions", functions);
|
||||
_kernel.ImportPluginFromFunctions("AntSKFunctions", functions);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -224,7 +241,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
{
|
||||
try
|
||||
{
|
||||
Console.WriteLine(jsonBody);
|
||||
_logger.LogInformation(jsonBody);
|
||||
RestClient client = new RestClient();
|
||||
RestRequest request = new RestRequest(api.Url, Method.Post);
|
||||
foreach (var header in api.Header.ConvertToString().Split("\n"))
|
||||
|
||||
@@ -4,7 +4,9 @@ using AntSK.Domain.Domain.Model.Dto;
|
||||
using AntSK.Domain.Options;
|
||||
using AntSK.LLamaFactory.Model;
|
||||
using Microsoft.AspNetCore.Mvc.ModelBinding;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Newtonsoft.Json;
|
||||
using Serilog;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
@@ -17,7 +19,7 @@ using System.Threading.Tasks;
|
||||
namespace AntSK.Domain.Domain.Service
|
||||
{
|
||||
[ServiceDescription(typeof(ILLamaFactoryService), ServiceLifetime.Singleton)]
|
||||
public class LLamaFactoryService : ILLamaFactoryService
|
||||
public class LLamaFactoryService(ILogger<LLamaFactoryService> _logger) : ILLamaFactoryService
|
||||
{
|
||||
private Process process;
|
||||
|
||||
@@ -26,7 +28,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
private readonly object _syncLock = new object();
|
||||
private List<LLamaModel> modelList = new List<LLamaModel>();
|
||||
|
||||
public LLamaFactoryService() { }
|
||||
|
||||
public delegate Task LogMessageHandler(string message);
|
||||
public event LogMessageHandler LogMessageReceived;
|
||||
protected virtual async Task OnLogMessageReceived(string message)
|
||||
@@ -56,12 +58,12 @@ namespace AntSK.Domain.Domain.Service
|
||||
};
|
||||
process.OutputDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Console.WriteLine($"{eventArgs.Data}");
|
||||
_logger.LogInformation($"{eventArgs.Data}");
|
||||
OnLogMessageReceived(eventArgs.Data);
|
||||
};
|
||||
process.ErrorDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Console.WriteLine($"{eventArgs.Data}");
|
||||
_logger.LogInformation($"{eventArgs.Data}");
|
||||
OnLogMessageReceived(eventArgs.Data);
|
||||
};
|
||||
process.Start();
|
||||
@@ -72,8 +74,45 @@ namespace AntSK.Domain.Domain.Service
|
||||
}, TaskCreationOptions.LongRunning);
|
||||
await cmdTask;
|
||||
}
|
||||
public async Task PipInstallName(string name)
|
||||
{
|
||||
|
||||
public async Task StartLLamaFactory(string modelName, string templateName)
|
||||
var cmdTask = Task.Factory.StartNew(() =>
|
||||
{
|
||||
|
||||
var isProcessComplete = false;
|
||||
|
||||
process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "pip",
|
||||
Arguments = $"install {name} -i https://pypi.tuna.tsinghua.edu.cn/simple",
|
||||
UseShellExecute = false,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
WorkingDirectory = AppDomain.CurrentDomain.BaseDirectory,
|
||||
}
|
||||
};
|
||||
process.OutputDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Log.Information($"{eventArgs.Data}");
|
||||
OnLogMessageReceived(eventArgs.Data);
|
||||
};
|
||||
process.ErrorDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Log.Information($"{eventArgs.Data}");
|
||||
OnLogMessageReceived(eventArgs.Data);
|
||||
};
|
||||
process.Start();
|
||||
process.BeginOutputReadLine();
|
||||
process.BeginErrorReadLine();
|
||||
process.WaitForExit();
|
||||
OnLogMessageReceived("--------------------完成--------------------");
|
||||
}, TaskCreationOptions.LongRunning);
|
||||
await cmdTask;
|
||||
}
|
||||
public async Task StartLLamaFactory(string modelName)
|
||||
{
|
||||
var cmdTask = Task.Factory.StartNew(() =>
|
||||
{
|
||||
@@ -85,7 +124,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "python",
|
||||
Arguments = "api_demo.py --model_name_or_path " + modelName + " --template " + templateName + " ",
|
||||
Arguments = "api_antsk.py --model_name_or_path " + modelName + " --template default ",
|
||||
UseShellExecute = false,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError=true,
|
||||
@@ -97,12 +136,12 @@ namespace AntSK.Domain.Domain.Service
|
||||
process.StartInfo.EnvironmentVariables["USE_MODELSCOPE_HUB"] = Environment.GetEnvironmentVariable("USE_MODELSCOPE_HUB") ?? "1";
|
||||
process.OutputDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Console.WriteLine($"{eventArgs.Data}");
|
||||
_logger.LogInformation($"{eventArgs.Data}");
|
||||
OnLogMessageReceived(eventArgs.Data);
|
||||
};
|
||||
process.ErrorDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Console.WriteLine($"{eventArgs.Data}");
|
||||
_logger.LogInformation($"{eventArgs.Data}");
|
||||
OnLogMessageReceived(eventArgs.Data);
|
||||
};
|
||||
process.Start();
|
||||
@@ -137,7 +176,7 @@ namespace AntSK.Domain.Domain.Service
|
||||
if (process1.ProcessName.ToLower() == "python")
|
||||
{
|
||||
process1.Kill();
|
||||
System.Console.WriteLine("kill python");
|
||||
_logger.LogInformation("kill python");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
74
src/AntSK.Domain/Domain/Service/OllamaService.cs
Normal file
74
src/AntSK.Domain/Domain/Service/OllamaService.cs
Normal file
@@ -0,0 +1,74 @@
|
||||
using AntSK.Domain.Common.DependencyInjection;
|
||||
using AntSK.Domain.Domain.Interface;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Serilog;
|
||||
using AntSK.Domain.Utils;
|
||||
|
||||
namespace AntSK.Domain.Domain.Service
|
||||
{
|
||||
[ServiceDescription(typeof(IOllamaService), ServiceLifetime.Singleton)]
|
||||
public class OllamaService : IOllamaService
|
||||
{
|
||||
private Process process;
|
||||
public delegate Task LogMessageHandler(string message);
|
||||
public event LogMessageHandler LogMessageReceived;
|
||||
protected virtual async Task OnLogMessageReceived(string message)
|
||||
{
|
||||
LogMessageReceived?.Invoke(message);
|
||||
}
|
||||
|
||||
public async Task StartOllama(string modelName)
|
||||
{
|
||||
Console.OutputEncoding = Encoding.UTF8;
|
||||
var cmdTask = Task.Factory.StartNew(() =>
|
||||
{
|
||||
|
||||
var isProcessComplete = false;
|
||||
|
||||
process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "ollama",
|
||||
Arguments = "run " + modelName,
|
||||
UseShellExecute = false,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
}
|
||||
};
|
||||
process.OutputDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Log.Information($"{eventArgs.Data.ConvertToString()}");
|
||||
if (!eventArgs.Data.ConvertToString().Contains("The handle is invalid"))
|
||||
{
|
||||
OnLogMessageReceived(eventArgs.Data.ConvertToString());
|
||||
}
|
||||
};
|
||||
process.ErrorDataReceived += (sender, eventArgs) =>
|
||||
{
|
||||
Log.Error($"{eventArgs.Data.ConvertToString()}");
|
||||
if (!eventArgs.Data.ConvertToString().Contains("The handle is invalid"))
|
||||
{
|
||||
OnLogMessageReceived(eventArgs.Data.ConvertToString());
|
||||
}
|
||||
};
|
||||
process.StartInfo.StandardOutputEncoding = Encoding.UTF8;
|
||||
process.StartInfo.StandardErrorEncoding = Encoding.UTF8;
|
||||
|
||||
process.Start();
|
||||
process.BeginOutputReadLine();
|
||||
process.BeginErrorReadLine();
|
||||
process.WaitForExit();
|
||||
|
||||
OnLogMessageReceived("--------------------完成--------------------");
|
||||
}, TaskCreationOptions.LongRunning);
|
||||
await cmdTask;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
13
src/AntSK.Domain/Options/FileDirOption.cs
Normal file
13
src/AntSK.Domain/Options/FileDirOption.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace AntSK.Domain.Options
|
||||
{
|
||||
public class FileDirOption
|
||||
{
|
||||
public static string DirectoryPath { get; set; } = Directory.GetCurrentDirectory();
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
namespace AntSK.Domain.Options
|
||||
{
|
||||
public class LLamaSharpOption
|
||||
{
|
||||
public static string RunType { get; set; }
|
||||
public static string FileDirectory { get; set; } = Directory.GetCurrentDirectory();
|
||||
}
|
||||
}
|
||||
@@ -25,7 +25,7 @@ namespace AntSK.Domain.Repositories
|
||||
/// 图标
|
||||
/// </summary>
|
||||
[Required]
|
||||
public string Icon { get; set; }
|
||||
public string Icon { get; set; } = "windows";
|
||||
|
||||
/// <summary>
|
||||
/// 类型
|
||||
@@ -75,6 +75,7 @@ namespace AntSK.Domain.Repositories
|
||||
/// <summary>
|
||||
/// 知识库ID列表
|
||||
/// </summary>
|
||||
[SugarColumn(ColumnDataType = "varchar(1000)")]
|
||||
public string? KmsIdList { get; set; }
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace AntSK.Domain.Repositories
|
||||
/// 图标
|
||||
/// </summary>
|
||||
[Required]
|
||||
public string Icon { get; set; }
|
||||
public string Icon { get; set; } = "question-circle";
|
||||
/// <summary>
|
||||
/// 名称
|
||||
/// </summary>
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
using System.Web;
|
||||
using Newtonsoft.Json;
|
||||
using Serilog;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Web;
|
||||
|
||||
namespace AntSK.Domain.Utils
|
||||
{
|
||||
@@ -261,5 +265,55 @@ namespace AntSK.Domain.Utils
|
||||
{
|
||||
return s.Equals(value, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// \uxxxx转中文,保留换行符号
|
||||
/// </summary>
|
||||
/// <param name="unicodeString"></param>
|
||||
/// <returns></returns>
|
||||
public static string Unescape(this string value)
|
||||
{
|
||||
if (value.IsNull())
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Formatting formatting = Formatting.None;
|
||||
|
||||
object jsonObj = JsonConvert.DeserializeObject(value);
|
||||
string unescapeValue = JsonConvert.SerializeObject(jsonObj, formatting);
|
||||
return unescapeValue;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Error(ex.ToString());
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 是否为流式请求
|
||||
/// </summary>
|
||||
/// <param name="value"></param>
|
||||
/// <returns></returns>
|
||||
public static bool IsStream(this string value)
|
||||
{
|
||||
// 正则表达式忽略空格的情况
|
||||
string pattern = @"\s*""stream""\s*:\s*true\s*";
|
||||
|
||||
// 使用正则表达式匹配
|
||||
bool contains = Regex.IsMatch(value, pattern);
|
||||
return contains;
|
||||
}
|
||||
|
||||
public static string AntSKCalculateSHA256(this BinaryData binaryData)
|
||||
{
|
||||
byte[] byteArray = SHA256.HashData(binaryData.ToMemory().Span);
|
||||
return Convert.ToHexString(byteArray).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Serilog;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AntSK.Domain.Utils
|
||||
{
|
||||
@@ -15,12 +17,19 @@ namespace AntSK.Domain.Utils
|
||||
UriBuilder uriBuilder;
|
||||
Regex regex = new Regex(@"(https?)://([^/:]+)(:\d+)?/(.*)");
|
||||
Match match = regex.Match(_endPoint);
|
||||
if (Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") == "Development" && request.Content != null)
|
||||
string guid = Guid.NewGuid().ToString();
|
||||
var mediaType = request.Content.Headers.ContentType.MediaType;
|
||||
string requestBody = (await request.Content.ReadAsStringAsync()).Unescape();
|
||||
var uncaseBody = new StringContent(requestBody, Encoding.UTF8, mediaType);
|
||||
request.Content = uncaseBody;
|
||||
|
||||
if (Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT").ConvertToString() != "Production")
|
||||
{
|
||||
string requestBody = await request.Content.ReadAsStringAsync();
|
||||
//生产环境根据环境变量可去关闭日志
|
||||
//便于调试查看请求prompt
|
||||
Console.WriteLine(requestBody);
|
||||
Log.Information("{Message}", $"【模型服务接口调用-{guid},host:{_endPoint}】:{Environment.NewLine}{requestBody}");
|
||||
}
|
||||
|
||||
if (match.Success)
|
||||
{
|
||||
string xieyi = match.Groups[1].Value;
|
||||
@@ -70,7 +79,11 @@ namespace AntSK.Domain.Utils
|
||||
|
||||
// 接着,调用基类的 SendAsync 方法将你的修改后的请求发出去
|
||||
HttpResponseMessage response = await base.SendAsync(request, cancellationToken);
|
||||
|
||||
if (Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT").ConvertToString() != "Production")
|
||||
{
|
||||
string responseContent = requestBody.IsStream() ? response.Content.ReadAsStringAsync().Result : response.Content.ReadAsStringAsync().Result.Unescape();
|
||||
Log.Information("{Message}", $"【模型服务接口返回-{guid},host:{_endPoint}】:{Environment.NewLine}{responseContent}");
|
||||
}
|
||||
return response;
|
||||
}
|
||||
}
|
||||
@@ -82,7 +95,7 @@ namespace AntSK.Domain.Utils
|
||||
{
|
||||
var handler = new OpenAIHttpClientHandler(endPoint.ConvertToString());
|
||||
var httpClient = new HttpClient(handler);
|
||||
httpClient.Timeout = TimeSpan.FromMinutes(5);
|
||||
httpClient.Timeout = TimeSpan.FromMinutes(10);
|
||||
return httpClient;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,16 +7,22 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||
<PackageReference Include="RestSharp" Version="110.2.0" />
|
||||
<PackageReference Include="Cnblogs.KernelMemory.AI.DashScope" Version="0.1.0" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="$(SKVersion)" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="$(NewtonsoftVersion)" />
|
||||
<PackageReference Include="RestSharp" Version="$(RestSharpVersion)" />
|
||||
<PackageReference Include="Cnblogs.KernelMemory.AI.DashScope" Version="0.3.0" />
|
||||
<PackageReference Include="Cnblogs.SemanticKernel.Connectors.DashScope" Version="0.3.2" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.10.0" />
|
||||
<PackageReference Include="Sdcb.SparkDesk" Version="3.0.0" />
|
||||
<PackageReference Include="System.Drawing.Common" Version="8.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="OllamaEmbeddingModelList.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="OllamaModelList.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="StableDiffusion\Backend\CPU\stable-diffusion.dll">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
@@ -42,7 +48,7 @@
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="StableDiffusionModelList.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
105
src/AntSK.LLM/OllamaModelList.txt
Normal file
105
src/AntSK.LLM/OllamaModelList.txt
Normal file
@@ -0,0 +1,105 @@
|
||||
gemma2
|
||||
gemma2:27b
|
||||
gemma:2b
|
||||
gemma:7b
|
||||
llama3
|
||||
llama3:70b
|
||||
yi:6b
|
||||
yi:9B
|
||||
yi:34B
|
||||
qwen2:0.5b
|
||||
qwen2:1.5b
|
||||
qwen2:7b
|
||||
qwen2:72b
|
||||
qwen:0.5b
|
||||
qwen:1.8b
|
||||
qwen:4b
|
||||
qwen:7b
|
||||
qwen:14b
|
||||
qwen:32b
|
||||
qwen:72b
|
||||
qwen:110b
|
||||
deepseek-coder:1.3b
|
||||
deepseek-coder:6.7b
|
||||
deepseek-coder:33b
|
||||
deepseek-coder-v2:16b
|
||||
deepseek-coder-v2:236b
|
||||
phi:2.7b
|
||||
phi3:mini
|
||||
phi3:medium
|
||||
phi3:medium-128k
|
||||
aya:8b
|
||||
aya:35b
|
||||
mistral:7b
|
||||
mixtral:8x22b
|
||||
mixtral:8x7b
|
||||
codegemma:2b
|
||||
codegemma:7b
|
||||
command-r:35b
|
||||
llava
|
||||
gemma:2b
|
||||
gemma:7b
|
||||
llama2:7b
|
||||
llama2:13b
|
||||
llama2:70b
|
||||
llama2-chinese:7b
|
||||
llama2-chinese:13b
|
||||
llama3.1:8b
|
||||
llama3.1:70b
|
||||
llama3.1:405b
|
||||
codellama:7b
|
||||
codellama:13b
|
||||
codellama:34b
|
||||
codellama:70b
|
||||
dolphin-mistral:7b
|
||||
dolphin-mixtral:8x22b
|
||||
dolphin-mixtral:8x7b
|
||||
llama2-uncensored:7b
|
||||
llama2-uncensored:70b
|
||||
tinyllama:1.1b
|
||||
openchat:7b
|
||||
orca-mini:3b
|
||||
orca-mini:7b
|
||||
orca-mini:13b
|
||||
orca-mini:70b
|
||||
mistral-openorca:7b
|
||||
dolphin-llama3:8b
|
||||
dolphin-llama3:70b
|
||||
starcoder:1b
|
||||
starcoder:3b
|
||||
starcoder:7b
|
||||
starcoder:15b
|
||||
starcoder2:3b
|
||||
starcoder2:7b
|
||||
starcoder2:15b
|
||||
zephyr:7b
|
||||
zephyr:141b
|
||||
nous-hermes2:10.7b
|
||||
nous-hermes2:34b
|
||||
vicuna:7b
|
||||
vicuna:13b
|
||||
vicuna:33b
|
||||
wizard-vicuna-uncensored:7b
|
||||
wizard-vicuna-uncensored:13b
|
||||
wizard-vicuna-uncensored:30b
|
||||
wizardlm2:7b
|
||||
codestral:22b
|
||||
tinydolphin:1.1b
|
||||
openhermes:v2.5
|
||||
neural-chat:7b
|
||||
codeqwen:7b
|
||||
phind-codellama:34b
|
||||
nous-hermes:7b
|
||||
nous-hermes:13b
|
||||
nous-hermes:13b
|
||||
starling-lm:7b
|
||||
llama3-gradient:8b
|
||||
llama3-gradient:70b
|
||||
yarn-llama2:7b
|
||||
yarn-llama2:13b
|
||||
llava-llama3:8b
|
||||
llama-pro:instruct
|
||||
everythinglm:13b
|
||||
llava-phi3:3.8b
|
||||
mistrallite:7b
|
||||
notus:7b
|
||||
@@ -50,7 +50,7 @@ namespace AntSK.LLM.SparkDesk
|
||||
parameters.Temperature = (float)chatExecutionSettings.Temperature;
|
||||
parameters.MaxTokens = chatExecutionSettings.MaxTokens ?? parameters.MaxTokens;
|
||||
|
||||
IList<KernelFunctionMetadata> functions = kernel?.Plugins.GetFunctionsMetadata().Where(x => x.PluginName == "AntSkFunctions").ToList() ?? [];
|
||||
IList<KernelFunctionMetadata> functions = kernel?.Plugins.GetFunctionsMetadata().Where(x => x.PluginName == "AntSKFunctions").ToList() ?? [];
|
||||
var functionDefs = functions.Select(func => new FunctionDef(func.Name, func.Description, func.Parameters.Select(p => new FunctionParametersDef(p.Name, p.ParameterType?.IsClass == true ? "object" : "string", p.Description, p.IsRequired)).ToList())).ToList();
|
||||
|
||||
List<ChatMessage> messages = GetSparkMessage(chatHistory);
|
||||
@@ -133,7 +133,7 @@ namespace AntSK.LLM.SparkDesk
|
||||
parameters.Temperature = (float)chatExecutionSettings.Temperature;
|
||||
parameters.MaxTokens = chatExecutionSettings.MaxTokens ?? parameters.MaxTokens;
|
||||
|
||||
IList<KernelFunctionMetadata> functions = kernel?.Plugins.GetFunctionsMetadata().Where(x => x.PluginName == "AntSkFunctions").ToList() ?? [];
|
||||
IList<KernelFunctionMetadata> functions = kernel?.Plugins.GetFunctionsMetadata().Where(x => x.PluginName == "AntSKFunctions").ToList() ?? [];
|
||||
var functionDefs = functions.Select(func => new FunctionDef(func.Name, func.Description, func.Parameters.Select(p => new FunctionParametersDef(p.Name, p.ParameterType?.IsClass == true ? "object" : "string", p.Description, p.IsRequired)).ToList())).ToList();
|
||||
List<ChatMessage> messages = GetSparkMessage(chatHistory);
|
||||
await foreach (StreamedChatResponse msg in _client.ChatAsStreamAsync(_options.ModelVersion, messages.ToArray(), parameters, functionDefs.Count > 0 ? [.. functionDefs] : null, cancellationToken: cancellationToken))
|
||||
@@ -67,7 +67,7 @@ namespace AntSK.LLM.SparkDesk
|
||||
parameters.Temperature = (float)chatExecutionSettings.Temperature;
|
||||
parameters.MaxTokens = chatExecutionSettings.MaxTokens ?? parameters.MaxTokens;
|
||||
|
||||
IList<KernelFunctionMetadata> functions = kernel?.Plugins.GetFunctionsMetadata().Where(x => x.PluginName == "AntSkFunctions").ToList() ?? [];
|
||||
IList<KernelFunctionMetadata> functions = kernel?.Plugins.GetFunctionsMetadata().Where(x => x.PluginName == "AntSKFunctions").ToList() ?? [];
|
||||
var functionDefs = functions.Select(func => new FunctionDef(func.Name, func.Description, func.Parameters.Select(p => new FunctionParametersDef(p.Name, p.ParameterType?.IsClass == true ? "object" : "string", p.Description, p.IsRequired)).ToList())).ToList();
|
||||
|
||||
//var messages = GetHistories(prompt);
|
||||
19
src/AntSK.LLamaFactory/llamafactory/api_antsk.py
Normal file
19
src/AntSK.LLamaFactory/llamafactory/api_antsk.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import os
|
||||
|
||||
import uvicorn
|
||||
|
||||
from llamafactory.api.app import create_app
|
||||
from llamafactory.chat import ChatModel
|
||||
|
||||
|
||||
def main():
|
||||
chat_model = ChatModel()
|
||||
app = create_app(chat_model)
|
||||
api_host = os.environ.get("API_HOST", "0.0.0.0")
|
||||
api_port = int(os.environ.get("API_PORT", "8000"))
|
||||
print("Visit http://localhost:{}/docs for API document.".format(api_port))
|
||||
uvicorn.run(app, host=api_host, port=api_port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,16 +0,0 @@
|
||||
import os
|
||||
|
||||
import uvicorn
|
||||
|
||||
from llmtuner import ChatModel, create_app
|
||||
|
||||
|
||||
def main():
|
||||
chat_model = ChatModel()
|
||||
app = create_app(chat_model)
|
||||
print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000)))
|
||||
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,6 @@
|
||||
# Level: api, webui > chat, eval, train > data, model > hparams > extras
|
||||
|
||||
from .cli import VERSION
|
||||
|
||||
|
||||
__version__ = VERSION
|
||||
108
src/AntSK.LLamaFactory/llamafactory/llamafactory/api/app.py
Normal file
108
src/AntSK.LLamaFactory/llamafactory/llamafactory/api/app.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Optional
|
||||
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from ..chat import ChatModel
|
||||
from ..extras.misc import torch_gc
|
||||
from ..extras.packages import is_fastapi_available, is_starlette_available, is_uvicorn_available
|
||||
from .chat import (
|
||||
create_chat_completion_response,
|
||||
create_score_evaluation_response,
|
||||
create_stream_chat_completion_response,
|
||||
)
|
||||
from .protocol import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ModelCard,
|
||||
ModelList,
|
||||
ScoreEvaluationRequest,
|
||||
ScoreEvaluationResponse,
|
||||
)
|
||||
|
||||
|
||||
if is_fastapi_available():
|
||||
from fastapi import Depends, FastAPI, HTTPException, status
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
|
||||
|
||||
|
||||
if is_starlette_available():
|
||||
from sse_starlette import EventSourceResponse
|
||||
|
||||
|
||||
if is_uvicorn_available():
|
||||
import uvicorn
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: "FastAPI"): # collects GPU memory
|
||||
yield
|
||||
torch_gc()
|
||||
|
||||
|
||||
def create_app(chat_model: "ChatModel") -> "FastAPI":
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
api_key = os.environ.get("API_KEY")
|
||||
security = HTTPBearer(auto_error=False)
|
||||
|
||||
async def verify_api_key(auth: Annotated[Optional[HTTPAuthorizationCredentials], Depends(security)]):
|
||||
if api_key and (auth is None or auth.credentials != api_key):
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key.")
|
||||
|
||||
@app.get(
|
||||
"/v1/models",
|
||||
response_model=ModelList,
|
||||
status_code=status.HTTP_200_OK,
|
||||
dependencies=[Depends(verify_api_key)],
|
||||
)
|
||||
async def list_models():
|
||||
model_card = ModelCard(id="gpt-3.5-turbo")
|
||||
return ModelList(data=[model_card])
|
||||
|
||||
@app.post(
|
||||
"/v1/chat/completions",
|
||||
response_model=ChatCompletionResponse,
|
||||
status_code=status.HTTP_200_OK,
|
||||
dependencies=[Depends(verify_api_key)],
|
||||
)
|
||||
async def create_chat_completion(request: ChatCompletionRequest):
|
||||
if not chat_model.engine.can_generate:
|
||||
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
||||
|
||||
if request.stream:
|
||||
generate = create_stream_chat_completion_response(request, chat_model)
|
||||
return EventSourceResponse(generate, media_type="text/event-stream")
|
||||
else:
|
||||
return await create_chat_completion_response(request, chat_model)
|
||||
|
||||
@app.post(
|
||||
"/v1/score/evaluation",
|
||||
response_model=ScoreEvaluationResponse,
|
||||
status_code=status.HTTP_200_OK,
|
||||
dependencies=[Depends(verify_api_key)],
|
||||
)
|
||||
async def create_score_evaluation(request: ScoreEvaluationRequest):
|
||||
if chat_model.engine.can_generate:
|
||||
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
||||
|
||||
return await create_score_evaluation_response(request, chat_model)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def run_api() -> None:
|
||||
chat_model = ChatModel()
|
||||
app = create_app(chat_model)
|
||||
api_host = os.environ.get("API_HOST", "0.0.0.0")
|
||||
api_port = int(os.environ.get("API_PORT", "8000"))
|
||||
print("Visit http://localhost:{}/docs for API document.".format(api_port))
|
||||
uvicorn.run(app, host=api_host, port=api_port)
|
||||
219
src/AntSK.LLamaFactory/llamafactory/llamafactory/api/chat.py
Normal file
219
src/AntSK.LLamaFactory/llamafactory/llamafactory/api/chat.py
Normal file
@@ -0,0 +1,219 @@
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple
|
||||
|
||||
from ..data import Role as DataRole
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.packages import is_fastapi_available, is_pillow_available, is_requests_available
|
||||
from .common import dictify, jsonify
|
||||
from .protocol import (
|
||||
ChatCompletionMessage,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseChoice,
|
||||
ChatCompletionResponseUsage,
|
||||
ChatCompletionStreamResponse,
|
||||
ChatCompletionStreamResponseChoice,
|
||||
Finish,
|
||||
Function,
|
||||
FunctionCall,
|
||||
Role,
|
||||
ScoreEvaluationResponse,
|
||||
)
|
||||
|
||||
|
||||
if is_fastapi_available():
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
|
||||
if is_pillow_available():
|
||||
from PIL import Image
|
||||
|
||||
|
||||
if is_requests_available():
|
||||
import requests
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from ..chat import ChatModel
|
||||
from .protocol import ChatCompletionRequest, ScoreEvaluationRequest
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
ROLE_MAPPING = {
|
||||
Role.USER: DataRole.USER.value,
|
||||
Role.ASSISTANT: DataRole.ASSISTANT.value,
|
||||
Role.SYSTEM: DataRole.SYSTEM.value,
|
||||
Role.FUNCTION: DataRole.FUNCTION.value,
|
||||
Role.TOOL: DataRole.OBSERVATION.value,
|
||||
}
|
||||
|
||||
|
||||
def _process_request(
|
||||
request: "ChatCompletionRequest",
|
||||
) -> Tuple[List[Dict[str, str]], Optional[str], Optional[str], Optional["NDArray"]]:
|
||||
logger.info("==== request ====\n{}".format(json.dumps(dictify(request), indent=2, ensure_ascii=False)))
|
||||
|
||||
if len(request.messages) == 0:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length")
|
||||
|
||||
if request.messages[0].role == Role.SYSTEM:
|
||||
system = request.messages.pop(0).content
|
||||
else:
|
||||
system = None
|
||||
|
||||
if len(request.messages) % 2 == 0:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
|
||||
|
||||
input_messages = []
|
||||
image = None
|
||||
for i, message in enumerate(request.messages):
|
||||
if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
|
||||
elif i % 2 == 1 and message.role not in [Role.ASSISTANT, Role.FUNCTION]:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
|
||||
|
||||
if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls):
|
||||
name = message.tool_calls[0].function.name
|
||||
arguments = message.tool_calls[0].function.arguments
|
||||
content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False)
|
||||
input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content})
|
||||
elif isinstance(message.content, list):
|
||||
for input_item in message.content:
|
||||
if input_item.type == "text":
|
||||
input_messages.append({"role": ROLE_MAPPING[message.role], "content": input_item.text})
|
||||
else:
|
||||
image_url = input_item.image_url.url
|
||||
if image_url.startswith("data:image"): # base64 image
|
||||
image_data = base64.b64decode(image_url.split(",", maxsplit=1)[1])
|
||||
image_path = io.BytesIO(image_data)
|
||||
elif os.path.isfile(image_url): # local file
|
||||
image_path = open(image_url, "rb")
|
||||
else: # web uri
|
||||
image_path = requests.get(image_url, stream=True).raw
|
||||
|
||||
image = Image.open(image_path).convert("RGB")
|
||||
else:
|
||||
input_messages.append({"role": ROLE_MAPPING[message.role], "content": message.content})
|
||||
|
||||
tool_list = request.tools
|
||||
if isinstance(tool_list, list) and len(tool_list):
|
||||
try:
|
||||
tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools")
|
||||
else:
|
||||
tools = None
|
||||
|
||||
return input_messages, system, tools, image
|
||||
|
||||
|
||||
def _create_stream_chat_completion_chunk(
|
||||
completion_id: str,
|
||||
model: str,
|
||||
delta: "ChatCompletionMessage",
|
||||
index: Optional[int] = 0,
|
||||
finish_reason: Optional["Finish"] = None,
|
||||
) -> str:
|
||||
choice_data = ChatCompletionStreamResponseChoice(index=index, delta=delta, finish_reason=finish_reason)
|
||||
chunk = ChatCompletionStreamResponse(id=completion_id, model=model, choices=[choice_data])
|
||||
return jsonify(chunk)
|
||||
|
||||
|
||||
async def create_chat_completion_response(
|
||||
request: "ChatCompletionRequest", chat_model: "ChatModel"
|
||||
) -> "ChatCompletionResponse":
|
||||
completion_id = "chatcmpl-{}".format(uuid.uuid4().hex)
|
||||
input_messages, system, tools, image = _process_request(request)
|
||||
responses = await chat_model.achat(
|
||||
input_messages,
|
||||
system,
|
||||
tools,
|
||||
image,
|
||||
do_sample=request.do_sample,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
max_new_tokens=request.max_tokens,
|
||||
num_return_sequences=request.n,
|
||||
stop=request.stop,
|
||||
)
|
||||
|
||||
prompt_length, response_length = 0, 0
|
||||
choices = []
|
||||
for i, response in enumerate(responses):
|
||||
if tools:
|
||||
result = chat_model.engine.template.format_tools.extract(response.response_text)
|
||||
else:
|
||||
result = response.response_text
|
||||
|
||||
if isinstance(result, tuple):
|
||||
name, arguments = result
|
||||
function = Function(name=name, arguments=arguments)
|
||||
tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function)
|
||||
response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call])
|
||||
finish_reason = Finish.TOOL
|
||||
else:
|
||||
response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result)
|
||||
finish_reason = Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH
|
||||
|
||||
choices.append(ChatCompletionResponseChoice(index=i, message=response_message, finish_reason=finish_reason))
|
||||
prompt_length = response.prompt_length
|
||||
response_length += response.response_length
|
||||
|
||||
usage = ChatCompletionResponseUsage(
|
||||
prompt_tokens=prompt_length,
|
||||
completion_tokens=response_length,
|
||||
total_tokens=prompt_length + response_length,
|
||||
)
|
||||
|
||||
return ChatCompletionResponse(id=completion_id, model=request.model, choices=choices, usage=usage)
|
||||
|
||||
|
||||
async def create_stream_chat_completion_response(
|
||||
request: "ChatCompletionRequest", chat_model: "ChatModel"
|
||||
) -> AsyncGenerator[str, None]:
|
||||
completion_id = "chatcmpl-{}".format(uuid.uuid4().hex)
|
||||
input_messages, system, tools, image = _process_request(request)
|
||||
if tools:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
|
||||
|
||||
if request.n > 1:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream multiple responses.")
|
||||
|
||||
yield _create_stream_chat_completion_chunk(
|
||||
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
|
||||
)
|
||||
async for new_token in chat_model.astream_chat(
|
||||
input_messages,
|
||||
system,
|
||||
tools,
|
||||
image,
|
||||
do_sample=request.do_sample,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
max_new_tokens=request.max_tokens,
|
||||
stop=request.stop,
|
||||
):
|
||||
if len(new_token) != 0:
|
||||
yield _create_stream_chat_completion_chunk(
|
||||
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(content=new_token)
|
||||
)
|
||||
|
||||
yield _create_stream_chat_completion_chunk(
|
||||
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(), finish_reason=Finish.STOP
|
||||
)
|
||||
yield "[DONE]"
|
||||
|
||||
|
||||
async def create_score_evaluation_response(
|
||||
request: "ScoreEvaluationRequest", chat_model: "ChatModel"
|
||||
) -> "ScoreEvaluationResponse":
|
||||
if len(request.messages) == 0:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request")
|
||||
|
||||
scores = await chat_model.aget_scores(request.messages, max_length=request.max_length)
|
||||
return ScoreEvaluationResponse(model=request.model, scores=scores)
|
||||
@@ -0,0 +1,20 @@
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Dict
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
def dictify(data: "BaseModel") -> Dict[str, Any]:
|
||||
try: # pydantic v2
|
||||
return data.model_dump(exclude_unset=True)
|
||||
except AttributeError: # pydantic v1
|
||||
return data.dict(exclude_unset=True)
|
||||
|
||||
|
||||
def jsonify(data: "BaseModel") -> str:
|
||||
try: # pydantic v2
|
||||
return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
|
||||
except AttributeError: # pydantic v1
|
||||
return data.json(exclude_unset=True, ensure_ascii=False)
|
||||
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
from enum import Enum, unique
|
||||
from typing import List, Optional
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Literal
|
||||
@@ -39,15 +39,37 @@ class Function(BaseModel):
|
||||
arguments: str
|
||||
|
||||
|
||||
class FunctionDefinition(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
parameters: Dict[str, Any]
|
||||
|
||||
|
||||
class FunctionAvailable(BaseModel):
|
||||
type: Literal["function", "code_interpreter"] = "function"
|
||||
function: Optional[FunctionDefinition] = None
|
||||
|
||||
|
||||
class FunctionCall(BaseModel):
|
||||
id: Literal["call_default"] = "call_default"
|
||||
id: str
|
||||
type: Literal["function"] = "function"
|
||||
function: Function
|
||||
|
||||
|
||||
class ImageURL(BaseModel):
|
||||
url: str
|
||||
|
||||
|
||||
class MultimodalInputItem(BaseModel):
|
||||
type: Literal["text", "image_url"]
|
||||
text: Optional[str] = None
|
||||
image_url: Optional[ImageURL] = None
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: Role
|
||||
content: str
|
||||
content: Optional[Union[str, List[MultimodalInputItem]]] = None
|
||||
tool_calls: Optional[List[FunctionCall]] = None
|
||||
|
||||
|
||||
class ChatCompletionMessage(BaseModel):
|
||||
@@ -59,12 +81,13 @@ class ChatCompletionMessage(BaseModel):
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: List[ChatMessage]
|
||||
tools: list = []
|
||||
tools: Optional[List[FunctionAvailable]] = None
|
||||
do_sample: bool = True
|
||||
temperature: Optional[float] = None
|
||||
top_p: Optional[float] = None
|
||||
n: int = 1
|
||||
max_tokens: Optional[int] = None
|
||||
stop: Optional[Union[str, List[str]]] = None
|
||||
stream: bool = False
|
||||
|
||||
|
||||
@@ -74,7 +97,7 @@ class ChatCompletionResponseChoice(BaseModel):
|
||||
finish_reason: Finish
|
||||
|
||||
|
||||
class ChatCompletionResponseStreamChoice(BaseModel):
|
||||
class ChatCompletionStreamResponseChoice(BaseModel):
|
||||
index: int
|
||||
delta: ChatCompletionMessage
|
||||
finish_reason: Optional[Finish] = None
|
||||
@@ -87,7 +110,7 @@ class ChatCompletionResponseUsage(BaseModel):
|
||||
|
||||
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
id: Literal["chatcmpl-default"] = "chatcmpl-default"
|
||||
id: str
|
||||
object: Literal["chat.completion"] = "chat.completion"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
@@ -96,11 +119,11 @@ class ChatCompletionResponse(BaseModel):
|
||||
|
||||
|
||||
class ChatCompletionStreamResponse(BaseModel):
|
||||
id: Literal["chatcmpl-default"] = "chatcmpl-default"
|
||||
id: str
|
||||
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
choices: List[ChatCompletionResponseStreamChoice]
|
||||
choices: List[ChatCompletionStreamResponseChoice]
|
||||
|
||||
|
||||
class ScoreEvaluationRequest(BaseModel):
|
||||
@@ -110,7 +133,7 @@ class ScoreEvaluationRequest(BaseModel):
|
||||
|
||||
|
||||
class ScoreEvaluationResponse(BaseModel):
|
||||
id: Literal["scoreeval-default"] = "scoreeval-default"
|
||||
id: str
|
||||
object: Literal["score.evaluation"] = "score.evaluation"
|
||||
model: str
|
||||
scores: List[float]
|
||||
@@ -4,15 +4,13 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Opti
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from numpy.typing import NDArray
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer
|
||||
from vllm import AsyncLLMEngine
|
||||
|
||||
from ..data import Template
|
||||
from ..extras.packages import is_vllm_available
|
||||
from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
|
||||
|
||||
if is_vllm_available():
|
||||
from vllm import AsyncLLMEngine
|
||||
|
||||
|
||||
@dataclass
|
||||
class Response:
|
||||
@@ -49,6 +47,7 @@ class BaseEngine(ABC):
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> List["Response"]: ...
|
||||
|
||||
@@ -58,6 +57,7 @@ class BaseEngine(ABC):
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> AsyncGenerator[str, None]: ...
|
||||
|
||||
@@ -2,12 +2,15 @@ import asyncio
|
||||
from threading import Thread
|
||||
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence
|
||||
|
||||
from ..extras.misc import torch_gc
|
||||
from ..hparams import get_infer_args
|
||||
from .hf_engine import HuggingfaceEngine
|
||||
from .vllm_engine import VllmEngine
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from .base_engine import BaseEngine, Response
|
||||
|
||||
|
||||
@@ -36,9 +39,10 @@ class ChatModel:
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> List["Response"]:
|
||||
task = asyncio.run_coroutine_threadsafe(self.achat(messages, system, tools, **input_kwargs), self._loop)
|
||||
task = asyncio.run_coroutine_threadsafe(self.achat(messages, system, tools, image, **input_kwargs), self._loop)
|
||||
return task.result()
|
||||
|
||||
async def achat(
|
||||
@@ -46,18 +50,20 @@ class ChatModel:
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> List["Response"]:
|
||||
return await self.engine.chat(messages, system, tools, **input_kwargs)
|
||||
return await self.engine.chat(messages, system, tools, image, **input_kwargs)
|
||||
|
||||
def stream_chat(
|
||||
self,
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> Generator[str, None, None]:
|
||||
generator = self.astream_chat(messages, system, tools, **input_kwargs)
|
||||
generator = self.astream_chat(messages, system, tools, image, **input_kwargs)
|
||||
while True:
|
||||
try:
|
||||
task = asyncio.run_coroutine_threadsafe(generator.__anext__(), self._loop)
|
||||
@@ -70,9 +76,10 @@ class ChatModel:
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
async for new_token in self.engine.stream_chat(messages, system, tools, **input_kwargs):
|
||||
async for new_token in self.engine.stream_chat(messages, system, tools, image, **input_kwargs):
|
||||
yield new_token
|
||||
|
||||
def get_scores(
|
||||
@@ -89,3 +96,45 @@ class ChatModel:
|
||||
**input_kwargs,
|
||||
) -> List[float]:
|
||||
return await self.engine.get_scores(batch_input, **input_kwargs)
|
||||
|
||||
|
||||
def run_chat() -> None:
|
||||
try:
|
||||
import platform
|
||||
|
||||
if platform.system() != "Windows":
|
||||
import readline # noqa: F401
|
||||
except ImportError:
|
||||
print("Install `readline` for a better experience.")
|
||||
|
||||
chat_model = ChatModel()
|
||||
messages = []
|
||||
print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.")
|
||||
|
||||
while True:
|
||||
try:
|
||||
query = input("\nUser: ")
|
||||
except UnicodeDecodeError:
|
||||
print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
|
||||
continue
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
if query.strip() == "exit":
|
||||
break
|
||||
|
||||
if query.strip() == "clear":
|
||||
messages = []
|
||||
torch_gc()
|
||||
print("History has been removed.")
|
||||
continue
|
||||
|
||||
messages.append({"role": "user", "content": query})
|
||||
print("Assistant: ", end="", flush=True)
|
||||
|
||||
response = ""
|
||||
for new_text in chat_model.stream_chat(messages):
|
||||
print(new_text, end="", flush=True)
|
||||
response += new_text
|
||||
print()
|
||||
messages.append({"role": "assistant", "content": response})
|
||||
@@ -2,25 +2,31 @@ import asyncio
|
||||
import concurrent.futures
|
||||
import os
|
||||
from threading import Thread
|
||||
from typing import TYPE_CHECKING, Any, AsyncGenerator, Callable, Dict, List, Optional, Sequence, Tuple
|
||||
from typing import TYPE_CHECKING, Any, AsyncGenerator, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
import torch
|
||||
from transformers import GenerationConfig, TextIteratorStreamer
|
||||
|
||||
from ..data import get_template_and_fix_tokenizer
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.misc import get_logits_processor
|
||||
from ..model import load_model_and_tokenizer
|
||||
from ..model import load_model, load_tokenizer
|
||||
from .base_engine import BaseEngine, Response
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer
|
||||
from numpy.typing import NDArray
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer, ProcessorMixin
|
||||
from transformers.image_processing_utils import BaseImageProcessor
|
||||
from trl import PreTrainedModelWrapper
|
||||
|
||||
from ..data import Template
|
||||
from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class HuggingfaceEngine(BaseEngine):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -30,55 +36,96 @@ class HuggingfaceEngine(BaseEngine):
|
||||
generating_args: "GeneratingArguments",
|
||||
) -> None:
|
||||
self.can_generate = finetuning_args.stage == "sft"
|
||||
self.model, self.tokenizer = load_model_and_tokenizer(
|
||||
model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate)
|
||||
)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
self.tokenizer = tokenizer_module["tokenizer"]
|
||||
self.processor = tokenizer_module["processor"]
|
||||
self.tokenizer.padding_side = "left" if self.can_generate else "right"
|
||||
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template)
|
||||
self.model = load_model(
|
||||
self.tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate)
|
||||
) # must after fixing tokenizer to resize vocab
|
||||
self.generating_args = generating_args.to_dict()
|
||||
|
||||
@staticmethod
|
||||
def _process_args(
|
||||
model: "PreTrainedModel",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
template: "Template",
|
||||
generating_args: Dict[str, Any],
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
input_kwargs: Optional[Dict[str, Any]] = {},
|
||||
) -> Tuple[Dict[str, Any], int]:
|
||||
if (
|
||||
processor is not None
|
||||
and image is not None
|
||||
and not hasattr(processor, "image_seq_length")
|
||||
and template.image_token not in messages[0]["content"]
|
||||
): # llava-like models
|
||||
messages[0]["content"] = template.image_token + messages[0]["content"]
|
||||
|
||||
paired_messages = messages + [{"role": "assistant", "content": ""}]
|
||||
system = system or generating_args["default_system"]
|
||||
pixel_values = None
|
||||
prompt_ids, _ = template.encode_oneturn(
|
||||
tokenizer=tokenizer, messages=paired_messages, system=system, tools=tools
|
||||
)
|
||||
if processor is not None and image is not None: # add image features
|
||||
image_processor: "BaseImageProcessor" = getattr(processor, "image_processor")
|
||||
batch_feature = image_processor(image, return_tensors="pt")
|
||||
pixel_values = batch_feature.to(model.device)["pixel_values"] # shape (B, C, H, W)
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
|
||||
prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
|
||||
|
||||
prompt_length = len(prompt_ids)
|
||||
inputs = torch.tensor([prompt_ids], device=model.device)
|
||||
attention_mask = torch.ones_like(inputs, dtype=torch.bool)
|
||||
|
||||
do_sample = input_kwargs.pop("do_sample", None)
|
||||
temperature = input_kwargs.pop("temperature", None)
|
||||
top_p = input_kwargs.pop("top_p", None)
|
||||
top_k = input_kwargs.pop("top_k", None)
|
||||
num_return_sequences = input_kwargs.pop("num_return_sequences", None)
|
||||
repetition_penalty = input_kwargs.pop("repetition_penalty", None)
|
||||
max_length = input_kwargs.pop("max_length", None)
|
||||
max_new_tokens = input_kwargs.pop("max_new_tokens", None)
|
||||
do_sample: Optional[bool] = input_kwargs.pop("do_sample", None)
|
||||
temperature: Optional[float] = input_kwargs.pop("temperature", None)
|
||||
top_p: Optional[float] = input_kwargs.pop("top_p", None)
|
||||
top_k: Optional[float] = input_kwargs.pop("top_k", None)
|
||||
num_return_sequences: int = input_kwargs.pop("num_return_sequences", 1)
|
||||
repetition_penalty: Optional[float] = input_kwargs.pop("repetition_penalty", None)
|
||||
length_penalty: Optional[float] = input_kwargs.pop("length_penalty", None)
|
||||
max_length: Optional[int] = input_kwargs.pop("max_length", None)
|
||||
max_new_tokens: Optional[int] = input_kwargs.pop("max_new_tokens", None)
|
||||
stop: Optional[Union[str, List[str]]] = input_kwargs.pop("stop", None)
|
||||
|
||||
if stop is not None:
|
||||
logger.warning("Stop parameter is not supported in Huggingface engine yet.")
|
||||
|
||||
generating_args = generating_args.copy()
|
||||
generating_args.update(
|
||||
dict(
|
||||
do_sample=do_sample if do_sample is not None else generating_args["do_sample"],
|
||||
temperature=temperature or generating_args["temperature"],
|
||||
top_p=top_p or generating_args["top_p"],
|
||||
top_k=top_k or generating_args["top_k"],
|
||||
num_return_sequences=num_return_sequences or 1,
|
||||
repetition_penalty=repetition_penalty or generating_args["repetition_penalty"],
|
||||
temperature=temperature if temperature is not None else generating_args["temperature"],
|
||||
top_p=top_p if top_p is not None else generating_args["top_p"],
|
||||
top_k=top_k if top_k is not None else generating_args["top_k"],
|
||||
num_return_sequences=num_return_sequences,
|
||||
repetition_penalty=repetition_penalty
|
||||
if repetition_penalty is not None
|
||||
else generating_args["repetition_penalty"],
|
||||
length_penalty=length_penalty if length_penalty is not None else generating_args["length_penalty"],
|
||||
eos_token_id=[tokenizer.eos_token_id] + tokenizer.additional_special_tokens_ids,
|
||||
pad_token_id=tokenizer.pad_token_id,
|
||||
)
|
||||
)
|
||||
|
||||
if isinstance(num_return_sequences, int) and num_return_sequences > 1:
|
||||
if isinstance(num_return_sequences, int) and num_return_sequences > 1: # do_sample needs temperature > 0
|
||||
generating_args["do_sample"] = True
|
||||
generating_args["temperature"] = generating_args["temperature"] or 1.0
|
||||
|
||||
if not generating_args["temperature"]:
|
||||
generating_args["do_sample"] = False
|
||||
|
||||
if not generating_args["do_sample"]:
|
||||
generating_args.pop("temperature", None)
|
||||
generating_args.pop("top_p", None)
|
||||
|
||||
if max_length:
|
||||
generating_args.pop("max_new_tokens", None)
|
||||
@@ -90,10 +137,14 @@ class HuggingfaceEngine(BaseEngine):
|
||||
|
||||
gen_kwargs = dict(
|
||||
inputs=inputs,
|
||||
attention_mask=attention_mask,
|
||||
generation_config=GenerationConfig(**generating_args),
|
||||
logits_processor=get_logits_processor(),
|
||||
)
|
||||
|
||||
if pixel_values is not None:
|
||||
gen_kwargs["pixel_values"] = pixel_values
|
||||
|
||||
return gen_kwargs, prompt_length
|
||||
|
||||
@staticmethod
|
||||
@@ -101,15 +152,17 @@ class HuggingfaceEngine(BaseEngine):
|
||||
def _chat(
|
||||
model: "PreTrainedModel",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
template: "Template",
|
||||
generating_args: Dict[str, Any],
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
input_kwargs: Optional[Dict[str, Any]] = {},
|
||||
) -> List["Response"]:
|
||||
gen_kwargs, prompt_length = HuggingfaceEngine._process_args(
|
||||
model, tokenizer, template, generating_args, messages, system, tools, input_kwargs
|
||||
model, tokenizer, processor, template, generating_args, messages, system, tools, image, input_kwargs
|
||||
)
|
||||
generate_output = model.generate(**gen_kwargs)
|
||||
response_ids = generate_output[:, prompt_length:]
|
||||
@@ -134,15 +187,17 @@ class HuggingfaceEngine(BaseEngine):
|
||||
def _stream_chat(
|
||||
model: "PreTrainedModel",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
template: "Template",
|
||||
generating_args: Dict[str, Any],
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
input_kwargs: Optional[Dict[str, Any]] = {},
|
||||
) -> Callable[[], str]:
|
||||
gen_kwargs, _ = HuggingfaceEngine._process_args(
|
||||
model, tokenizer, template, generating_args, messages, system, tools, input_kwargs
|
||||
model, tokenizer, processor, template, generating_args, messages, system, tools, image, input_kwargs
|
||||
)
|
||||
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
||||
gen_kwargs["streamer"] = streamer
|
||||
@@ -198,6 +253,7 @@ class HuggingfaceEngine(BaseEngine):
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> List["Response"]:
|
||||
if not self.can_generate:
|
||||
@@ -207,11 +263,13 @@ class HuggingfaceEngine(BaseEngine):
|
||||
input_args = (
|
||||
self.model,
|
||||
self.tokenizer,
|
||||
self.processor,
|
||||
self.template,
|
||||
self.generating_args,
|
||||
messages,
|
||||
system,
|
||||
tools,
|
||||
image,
|
||||
input_kwargs,
|
||||
)
|
||||
async with self._semaphore:
|
||||
@@ -223,6 +281,7 @@ class HuggingfaceEngine(BaseEngine):
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
if not self.can_generate:
|
||||
@@ -232,11 +291,13 @@ class HuggingfaceEngine(BaseEngine):
|
||||
input_args = (
|
||||
self.model,
|
||||
self.tokenizer,
|
||||
self.processor,
|
||||
self.template,
|
||||
self.generating_args,
|
||||
messages,
|
||||
system,
|
||||
tools,
|
||||
image,
|
||||
input_kwargs,
|
||||
)
|
||||
async with self._semaphore:
|
||||
@@ -0,0 +1,214 @@
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
|
||||
|
||||
from ..data import get_template_and_fix_tokenizer
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.misc import get_device_count
|
||||
from ..extras.packages import is_vllm_available
|
||||
from ..model import load_config, load_tokenizer
|
||||
from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
|
||||
from .base_engine import BaseEngine, Response
|
||||
|
||||
|
||||
if is_vllm_available():
|
||||
from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.sequence import MultiModalData
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from numpy.typing import NDArray
|
||||
from transformers.image_processing_utils import BaseImageProcessor
|
||||
|
||||
from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class VllmEngine(BaseEngine):
|
||||
def __init__(
|
||||
self,
|
||||
model_args: "ModelArguments",
|
||||
data_args: "DataArguments",
|
||||
finetuning_args: "FinetuningArguments",
|
||||
generating_args: "GeneratingArguments",
|
||||
) -> None:
|
||||
config = load_config(model_args) # may download model from ms hub
|
||||
|
||||
self.can_generate = finetuning_args.stage == "sft"
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
self.tokenizer = tokenizer_module["tokenizer"]
|
||||
self.processor = tokenizer_module["processor"]
|
||||
self.tokenizer.padding_side = "left"
|
||||
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template)
|
||||
self.generating_args = generating_args.to_dict()
|
||||
|
||||
engine_args = {
|
||||
"model": model_args.model_name_or_path,
|
||||
"trust_remote_code": True,
|
||||
"download_dir": model_args.cache_dir,
|
||||
"dtype": model_args.vllm_dtype,
|
||||
"max_model_len": model_args.vllm_maxlen,
|
||||
"tensor_parallel_size": get_device_count() or 1,
|
||||
"gpu_memory_utilization": model_args.vllm_gpu_util,
|
||||
"disable_log_stats": True,
|
||||
"disable_log_requests": True,
|
||||
"enforce_eager": model_args.vllm_enforce_eager,
|
||||
"enable_lora": model_args.adapter_name_or_path is not None,
|
||||
"max_lora_rank": model_args.vllm_max_lora_rank,
|
||||
}
|
||||
|
||||
if model_args.visual_inputs:
|
||||
image_size = config.vision_config.image_size
|
||||
patch_size = config.vision_config.patch_size
|
||||
self.image_feature_size = (image_size // patch_size) ** 2
|
||||
engine_args["image_input_type"] = "pixel_values"
|
||||
engine_args["image_token_id"] = self.tokenizer.convert_tokens_to_ids(self.template.image_token)
|
||||
engine_args["image_input_shape"] = "1,3,{},{}".format(image_size, image_size)
|
||||
engine_args["image_feature_size"] = self.image_feature_size
|
||||
if getattr(config, "is_yi_vl_derived_model", None):
|
||||
import vllm.model_executor.models.llava
|
||||
|
||||
logger.info("Detected Yi-VL model, applying projector patch.")
|
||||
vllm.model_executor.models.llava.LlavaMultiModalProjector = LlavaMultiModalProjectorForYiVLForVLLM
|
||||
|
||||
self.model = AsyncLLMEngine.from_engine_args(AsyncEngineArgs(**engine_args))
|
||||
if model_args.adapter_name_or_path is not None:
|
||||
self.lora_request = LoRARequest("default", 1, model_args.adapter_name_or_path[0])
|
||||
else:
|
||||
self.lora_request = None
|
||||
|
||||
async def _generate(
|
||||
self,
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> AsyncIterator["RequestOutput"]:
|
||||
request_id = "chatcmpl-{}".format(uuid.uuid4().hex)
|
||||
|
||||
if (
|
||||
self.processor is not None
|
||||
and image is not None
|
||||
and not hasattr(self.processor, "image_seq_length")
|
||||
and self.template.image_token not in messages[0]["content"]
|
||||
): # llava-like models (TODO: paligemma models)
|
||||
messages[0]["content"] = self.template.image_token * self.image_feature_size + messages[0]["content"]
|
||||
|
||||
paired_messages = messages + [{"role": "assistant", "content": ""}]
|
||||
system = system or self.generating_args["default_system"]
|
||||
prompt_ids, _ = self.template.encode_oneturn(
|
||||
tokenizer=self.tokenizer, messages=paired_messages, system=system, tools=tools
|
||||
)
|
||||
|
||||
if self.processor is not None and image is not None: # add image features
|
||||
image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor")
|
||||
pixel_values = image_processor(image, return_tensors="pt")["pixel_values"]
|
||||
multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
|
||||
else:
|
||||
multi_modal_data = None
|
||||
|
||||
prompt_length = len(prompt_ids)
|
||||
|
||||
use_beam_search: bool = self.generating_args["num_beams"] > 1
|
||||
temperature: Optional[float] = input_kwargs.pop("temperature", None)
|
||||
top_p: Optional[float] = input_kwargs.pop("top_p", None)
|
||||
top_k: Optional[float] = input_kwargs.pop("top_k", None)
|
||||
num_return_sequences: int = input_kwargs.pop("num_return_sequences", 1)
|
||||
repetition_penalty: Optional[float] = input_kwargs.pop("repetition_penalty", None)
|
||||
length_penalty: Optional[float] = input_kwargs.pop("length_penalty", None)
|
||||
max_length: Optional[int] = input_kwargs.pop("max_length", None)
|
||||
max_new_tokens: Optional[int] = input_kwargs.pop("max_new_tokens", None)
|
||||
stop: Optional[Union[str, List[str]]] = input_kwargs.pop("stop", None)
|
||||
|
||||
if "max_new_tokens" in self.generating_args:
|
||||
max_tokens = self.generating_args["max_new_tokens"]
|
||||
elif "max_length" in self.generating_args:
|
||||
if self.generating_args["max_length"] > prompt_length:
|
||||
max_tokens = self.generating_args["max_length"] - prompt_length
|
||||
else:
|
||||
max_tokens = 1
|
||||
|
||||
if max_length:
|
||||
max_tokens = max_length - prompt_length if max_length > prompt_length else 1
|
||||
|
||||
if max_new_tokens:
|
||||
max_tokens = max_new_tokens
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
n=num_return_sequences,
|
||||
repetition_penalty=(
|
||||
repetition_penalty if repetition_penalty is not None else self.generating_args["repetition_penalty"]
|
||||
)
|
||||
or 1.0, # repetition_penalty must > 0
|
||||
temperature=temperature if temperature is not None else self.generating_args["temperature"],
|
||||
top_p=(top_p if top_p is not None else self.generating_args["top_p"]) or 1.0, # top_p must > 0
|
||||
top_k=top_k if top_k is not None else self.generating_args["top_k"],
|
||||
use_beam_search=use_beam_search,
|
||||
length_penalty=length_penalty if length_penalty is not None else self.generating_args["length_penalty"],
|
||||
stop=stop,
|
||||
stop_token_ids=[self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids,
|
||||
max_tokens=max_tokens,
|
||||
skip_special_tokens=True,
|
||||
)
|
||||
|
||||
result_generator = self.model.generate(
|
||||
inputs={"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data},
|
||||
sampling_params=sampling_params,
|
||||
request_id=request_id,
|
||||
lora_request=self.lora_request,
|
||||
)
|
||||
return result_generator
|
||||
|
||||
async def start(self) -> None:
|
||||
pass
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> List["Response"]:
|
||||
final_output = None
|
||||
generator = await self._generate(messages, system, tools, image, **input_kwargs)
|
||||
async for request_output in generator:
|
||||
final_output = request_output
|
||||
|
||||
results = []
|
||||
for output in final_output.outputs:
|
||||
results.append(
|
||||
Response(
|
||||
response_text=output.text,
|
||||
response_length=len(output.token_ids),
|
||||
prompt_length=len(final_output.prompt_token_ids),
|
||||
finish_reason=output.finish_reason,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
async def stream_chat(
|
||||
self,
|
||||
messages: Sequence[Dict[str, str]],
|
||||
system: Optional[str] = None,
|
||||
tools: Optional[str] = None,
|
||||
image: Optional["NDArray"] = None,
|
||||
**input_kwargs,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
generated_text = ""
|
||||
generator = await self._generate(messages, system, tools, image, **input_kwargs)
|
||||
async for result in generator:
|
||||
delta_text = result.outputs[0].text[len(generated_text) :]
|
||||
generated_text = result.outputs[0].text
|
||||
yield delta_text
|
||||
|
||||
async def get_scores(
|
||||
self,
|
||||
batch_input: List[str],
|
||||
**input_kwargs,
|
||||
) -> List[float]:
|
||||
raise NotImplementedError("vLLM engine does not support get_scores.")
|
||||
106
src/AntSK.LLamaFactory/llamafactory/llamafactory/cli.py
Normal file
106
src/AntSK.LLamaFactory/llamafactory/llamafactory/cli.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
from enum import Enum, unique
|
||||
|
||||
from . import launcher
|
||||
from .api.app import run_api
|
||||
from .chat.chat_model import run_chat
|
||||
from .eval.evaluator import run_eval
|
||||
from .extras.env import VERSION, print_env
|
||||
from .extras.logging import get_logger
|
||||
from .extras.misc import get_device_count
|
||||
from .train.tuner import export_model, run_exp
|
||||
from .webui.interface import run_web_demo, run_web_ui
|
||||
|
||||
|
||||
USAGE = (
|
||||
"-" * 70
|
||||
+ "\n"
|
||||
+ "| Usage: |\n"
|
||||
+ "| llamafactory-cli api -h: launch an OpenAI-style API server |\n"
|
||||
+ "| llamafactory-cli chat -h: launch a chat interface in CLI |\n"
|
||||
+ "| llamafactory-cli eval -h: evaluate models |\n"
|
||||
+ "| llamafactory-cli export -h: merge LoRA adapters and export model |\n"
|
||||
+ "| llamafactory-cli train -h: train models |\n"
|
||||
+ "| llamafactory-cli webchat -h: launch a chat interface in Web UI |\n"
|
||||
+ "| llamafactory-cli webui: launch LlamaBoard |\n"
|
||||
+ "| llamafactory-cli version: show version info |\n"
|
||||
+ "-" * 70
|
||||
)
|
||||
|
||||
WELCOME = (
|
||||
"-" * 58
|
||||
+ "\n"
|
||||
+ "| Welcome to LLaMA Factory, version {}".format(VERSION)
|
||||
+ " " * (21 - len(VERSION))
|
||||
+ "|\n|"
|
||||
+ " " * 56
|
||||
+ "|\n"
|
||||
+ "| Project page: https://github.com/hiyouga/LLaMA-Factory |\n"
|
||||
+ "-" * 58
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@unique
|
||||
class Command(str, Enum):
|
||||
API = "api"
|
||||
CHAT = "chat"
|
||||
ENV = "env"
|
||||
EVAL = "eval"
|
||||
EXPORT = "export"
|
||||
TRAIN = "train"
|
||||
WEBDEMO = "webchat"
|
||||
WEBUI = "webui"
|
||||
VER = "version"
|
||||
HELP = "help"
|
||||
|
||||
|
||||
def main():
|
||||
command = sys.argv.pop(1)
|
||||
if command == Command.API:
|
||||
run_api()
|
||||
elif command == Command.CHAT:
|
||||
run_chat()
|
||||
elif command == Command.ENV:
|
||||
print_env()
|
||||
elif command == Command.EVAL:
|
||||
run_eval()
|
||||
elif command == Command.EXPORT:
|
||||
export_model()
|
||||
elif command == Command.TRAIN:
|
||||
force_torchrun = os.environ.get("FORCE_TORCHRUN", "0").lower() in ["true", "1"]
|
||||
if force_torchrun or get_device_count() > 1:
|
||||
master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
|
||||
master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
|
||||
logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
|
||||
subprocess.run(
|
||||
(
|
||||
"torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} "
|
||||
"--master_addr {master_addr} --master_port {master_port} {file_name} {args}"
|
||||
).format(
|
||||
nnodes=os.environ.get("NNODES", "1"),
|
||||
node_rank=os.environ.get("RANK", "0"),
|
||||
nproc_per_node=os.environ.get("NPROC_PER_NODE", str(get_device_count())),
|
||||
master_addr=master_addr,
|
||||
master_port=master_port,
|
||||
file_name=launcher.__file__,
|
||||
args=" ".join(sys.argv[1:]),
|
||||
),
|
||||
shell=True,
|
||||
)
|
||||
else:
|
||||
run_exp()
|
||||
elif command == Command.WEBDEMO:
|
||||
run_web_demo()
|
||||
elif command == Command.WEBUI:
|
||||
run_web_ui()
|
||||
elif command == Command.VER:
|
||||
print(WELCOME)
|
||||
elif command == Command.HELP:
|
||||
print(USAGE)
|
||||
else:
|
||||
raise NotImplementedError("Unknown command: {}".format(command))
|
||||
@@ -0,0 +1,16 @@
|
||||
from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding
|
||||
from .data_utils import Role, split_dataset
|
||||
from .loader import get_dataset
|
||||
from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
|
||||
|
||||
|
||||
__all__ = [
|
||||
"KTODataCollatorWithPadding",
|
||||
"PairwiseDataCollatorWithPadding",
|
||||
"Role",
|
||||
"split_dataset",
|
||||
"get_dataset",
|
||||
"TEMPLATES",
|
||||
"Template",
|
||||
"get_template_and_fix_tokenizer",
|
||||
]
|
||||
221
src/AntSK.LLamaFactory/llamafactory/llamafactory/data/aligner.py
Normal file
221
src/AntSK.LLamaFactory/llamafactory/llamafactory/data/aligner.py
Normal file
@@ -0,0 +1,221 @@
|
||||
import os
|
||||
from functools import partial
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Union
|
||||
|
||||
from datasets import Features
|
||||
|
||||
from ..extras.logging import get_logger
|
||||
from .data_utils import Role
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datasets import Dataset, IterableDataset
|
||||
|
||||
from ..hparams import DataArguments
|
||||
from .parser import DatasetAttr
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _convert_images(images: List[Any], dataset_attr: "DatasetAttr", data_args: "DataArguments") -> List[Any]:
|
||||
r"""
|
||||
Optionally concatenates image path to dataset dir when loading from local disk.
|
||||
"""
|
||||
outputs = []
|
||||
if dataset_attr.load_from in ["script", "file"]:
|
||||
for image in images:
|
||||
if isinstance(image, str) and os.path.isfile(os.path.join(data_args.dataset_dir, image)):
|
||||
outputs.append(os.path.join(data_args.dataset_dir, image))
|
||||
else:
|
||||
outputs.append(image)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
def convert_alpaca(
|
||||
examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr", data_args: "DataArguments"
|
||||
) -> Dict[str, List[Any]]:
|
||||
r"""
|
||||
Converts alpaca format dataset to the standard format.
|
||||
"""
|
||||
outputs = {"prompt": [], "response": [], "system": [], "tools": [], "images": []}
|
||||
convert_images = partial(_convert_images, dataset_attr=dataset_attr, data_args=data_args)
|
||||
for i in range(len(examples[dataset_attr.prompt])):
|
||||
prompt = []
|
||||
if dataset_attr.history and isinstance(examples[dataset_attr.history][i], list):
|
||||
for old_prompt, old_response in examples[dataset_attr.history][i]:
|
||||
prompt.append({"role": Role.USER.value, "content": old_prompt})
|
||||
prompt.append({"role": Role.ASSISTANT.value, "content": old_response})
|
||||
|
||||
content = []
|
||||
if dataset_attr.prompt and examples[dataset_attr.prompt][i]:
|
||||
content.append(examples[dataset_attr.prompt][i])
|
||||
|
||||
if dataset_attr.query and examples[dataset_attr.query][i]:
|
||||
content.append(examples[dataset_attr.query][i])
|
||||
|
||||
prompt.append({"role": Role.USER.value, "content": "\n".join(content)}) # "prompt\nquery"
|
||||
|
||||
if dataset_attr.kto_tag and isinstance(examples[dataset_attr.kto_tag][i], bool): # kto example
|
||||
response = [{"role": Role.ASSISTANT.value, "content": examples[dataset_attr.response][i]}]
|
||||
if examples[dataset_attr.kto_tag][i]:
|
||||
response = response + [{"role": Role.ASSISTANT.value, "content": ""}]
|
||||
else:
|
||||
response = [{"role": Role.ASSISTANT.value, "content": ""}] + response
|
||||
elif (
|
||||
dataset_attr.ranking
|
||||
and isinstance(examples[dataset_attr.chosen][i], str)
|
||||
and isinstance(examples[dataset_attr.rejected][i], str)
|
||||
): # pairwise example
|
||||
response = [
|
||||
{"role": Role.ASSISTANT.value, "content": examples[dataset_attr.chosen][i]},
|
||||
{"role": Role.ASSISTANT.value, "content": examples[dataset_attr.rejected][i]},
|
||||
]
|
||||
elif dataset_attr.response and isinstance(examples[dataset_attr.response][i], str): # normal example
|
||||
response = [{"role": Role.ASSISTANT.value, "content": examples[dataset_attr.response][i]}]
|
||||
else: # unsupervised
|
||||
response = []
|
||||
|
||||
outputs["prompt"].append(prompt)
|
||||
outputs["response"].append(response)
|
||||
outputs["system"].append(examples[dataset_attr.system][i] if dataset_attr.system else "")
|
||||
outputs["tools"].append(examples[dataset_attr.tools][i] if dataset_attr.tools else "")
|
||||
outputs["images"].append(convert_images(examples[dataset_attr.images][i]) if dataset_attr.images else [])
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
def convert_sharegpt(
|
||||
examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr", data_args: "DataArguments"
|
||||
) -> Dict[str, List[Any]]:
|
||||
r"""
|
||||
Converts sharegpt format dataset to the standard format.
|
||||
"""
|
||||
outputs = {"prompt": [], "response": [], "system": [], "tools": [], "images": []}
|
||||
convert_images = partial(_convert_images, dataset_attr=dataset_attr, data_args=data_args)
|
||||
tag_mapping = {
|
||||
dataset_attr.user_tag: Role.USER.value,
|
||||
dataset_attr.assistant_tag: Role.ASSISTANT.value,
|
||||
dataset_attr.observation_tag: Role.OBSERVATION.value,
|
||||
dataset_attr.function_tag: Role.FUNCTION.value,
|
||||
dataset_attr.system_tag: Role.SYSTEM.value,
|
||||
}
|
||||
odd_tags = (dataset_attr.user_tag, dataset_attr.observation_tag)
|
||||
even_tags = (dataset_attr.assistant_tag, dataset_attr.function_tag)
|
||||
accept_tags = (odd_tags, even_tags)
|
||||
for i, messages in enumerate(examples[dataset_attr.messages]):
|
||||
if dataset_attr.system_tag and messages[0][dataset_attr.role_tag] == dataset_attr.system_tag:
|
||||
system = messages[0][dataset_attr.content_tag]
|
||||
messages = messages[1:]
|
||||
else:
|
||||
system = examples[dataset_attr.system][i] if dataset_attr.system else ""
|
||||
|
||||
if len(messages) == 0:
|
||||
continue
|
||||
|
||||
aligned_messages = []
|
||||
broken_data = False
|
||||
for turn_idx, message in enumerate(messages):
|
||||
if message[dataset_attr.role_tag] not in accept_tags[turn_idx % 2]:
|
||||
logger.warning("Invalid role tag in {}.".format(messages))
|
||||
broken_data = True
|
||||
|
||||
aligned_messages.append(
|
||||
{"role": tag_mapping[message[dataset_attr.role_tag]], "content": message[dataset_attr.content_tag]}
|
||||
)
|
||||
|
||||
if (not dataset_attr.ranking and len(aligned_messages) % 2 != 0) or (
|
||||
dataset_attr.ranking and len(aligned_messages) % 2 == 0
|
||||
):
|
||||
logger.warning("Invalid message count in {}.".format(messages))
|
||||
broken_data = True
|
||||
|
||||
if dataset_attr.kto_tag and isinstance(examples[dataset_attr.kto_tag][i], bool): # kto example
|
||||
prompt = aligned_messages[:-1]
|
||||
response = aligned_messages[-1:]
|
||||
if examples[dataset_attr.kto_tag][i]:
|
||||
response = response + [{"role": Role.ASSISTANT.value, "content": ""}]
|
||||
else:
|
||||
response = [{"role": Role.ASSISTANT.value, "content": ""}] + response
|
||||
elif (
|
||||
dataset_attr.ranking
|
||||
and isinstance(examples[dataset_attr.chosen][i], dict)
|
||||
and isinstance(examples[dataset_attr.rejected][i], dict)
|
||||
): # pairwise example
|
||||
chosen = examples[dataset_attr.chosen][i]
|
||||
rejected = examples[dataset_attr.rejected][i]
|
||||
if (
|
||||
chosen[dataset_attr.role_tag] not in accept_tags[-1]
|
||||
or rejected[dataset_attr.role_tag] not in accept_tags[-1]
|
||||
):
|
||||
logger.warning("Invalid role tag in {}.".format([chosen, rejected]))
|
||||
broken_data = True
|
||||
|
||||
prompt = aligned_messages
|
||||
response = [
|
||||
{"role": tag_mapping[chosen[dataset_attr.role_tag]], "content": chosen[dataset_attr.content_tag]},
|
||||
{"role": tag_mapping[rejected[dataset_attr.role_tag]], "content": rejected[dataset_attr.content_tag]},
|
||||
]
|
||||
else: # normal example
|
||||
prompt = aligned_messages[:-1]
|
||||
response = aligned_messages[-1:]
|
||||
|
||||
if broken_data:
|
||||
logger.warning("Skipping this abnormal example.")
|
||||
continue
|
||||
|
||||
outputs["prompt"].append(prompt)
|
||||
outputs["response"].append(response)
|
||||
outputs["system"].append(system)
|
||||
outputs["tools"].append(examples[dataset_attr.tools][i] if dataset_attr.tools else "")
|
||||
outputs["images"].append(convert_images(examples[dataset_attr.images][i]) if dataset_attr.images else [])
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
def align_dataset(
|
||||
dataset: Union["Dataset", "IterableDataset"], dataset_attr: "DatasetAttr", data_args: "DataArguments"
|
||||
) -> Union["Dataset", "IterableDataset"]:
|
||||
r"""
|
||||
Aligned dataset:
|
||||
prompt: [{"role": "user", "content": "..."}] * (2T - 1)
|
||||
response: [{"role": "assistant", "content": "..."}] * N (N > 1 for ranking dataset)
|
||||
system: "..."
|
||||
tools: "...",
|
||||
images: [],
|
||||
"""
|
||||
if dataset_attr.formatting == "alpaca":
|
||||
convert_func = partial(convert_alpaca, dataset_attr=dataset_attr, data_args=data_args)
|
||||
else:
|
||||
convert_func = partial(convert_sharegpt, dataset_attr=dataset_attr, data_args=data_args)
|
||||
|
||||
column_names = list(next(iter(dataset)).keys())
|
||||
features = Features.from_dict(
|
||||
{
|
||||
"prompt": [
|
||||
{"role": {"dtype": "string", "_type": "Value"}, "content": {"dtype": "string", "_type": "Value"}}
|
||||
],
|
||||
"response": [
|
||||
{"role": {"dtype": "string", "_type": "Value"}, "content": {"dtype": "string", "_type": "Value"}}
|
||||
],
|
||||
"system": {"dtype": "string", "_type": "Value"},
|
||||
"tools": {"dtype": "string", "_type": "Value"},
|
||||
"images": [{"_type": "Image"}],
|
||||
}
|
||||
)
|
||||
kwargs = {}
|
||||
if not data_args.streaming:
|
||||
kwargs = dict(
|
||||
num_proc=data_args.preprocessing_num_workers,
|
||||
load_from_cache_file=(not data_args.overwrite_cache),
|
||||
desc="Converting format of dataset",
|
||||
)
|
||||
|
||||
return dataset.map(
|
||||
convert_func,
|
||||
batched=True,
|
||||
remove_columns=column_names,
|
||||
features=features,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -0,0 +1,81 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Sequence
|
||||
|
||||
import torch
|
||||
from transformers import DataCollatorForSeq2Seq
|
||||
|
||||
|
||||
@dataclass
|
||||
class PairwiseDataCollatorWithPadding(DataCollatorForSeq2Seq):
|
||||
r"""
|
||||
Data collator for pairwise data.
|
||||
"""
|
||||
|
||||
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
||||
r"""
|
||||
Pads batched data to the longest sequence in the batch.
|
||||
|
||||
We generate 2 * n examples where the first n examples represent chosen examples and
|
||||
the last n examples represent rejected examples.
|
||||
"""
|
||||
concatenated_features = []
|
||||
for key in ("chosen", "rejected"):
|
||||
for feature in features:
|
||||
target_feature = {
|
||||
"input_ids": feature["{}_input_ids".format(key)],
|
||||
"attention_mask": feature["{}_attention_mask".format(key)],
|
||||
"labels": feature["{}_labels".format(key)],
|
||||
}
|
||||
if "pixel_values" in feature:
|
||||
target_feature["pixel_values"] = feature["pixel_values"]
|
||||
|
||||
if "{}_token_type_ids".format(key) in feature:
|
||||
target_feature["token_type_ids"] = feature["{}_token_type_ids".format(key)]
|
||||
|
||||
concatenated_features.append(target_feature)
|
||||
|
||||
return super().__call__(concatenated_features)
|
||||
|
||||
|
||||
@dataclass
|
||||
class KTODataCollatorWithPadding(DataCollatorForSeq2Seq):
|
||||
r"""
|
||||
Data collator for KTO data.
|
||||
"""
|
||||
|
||||
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
||||
target_features = []
|
||||
kl_features = []
|
||||
kto_tags = []
|
||||
for feature in features:
|
||||
target_feature = {
|
||||
"input_ids": feature["input_ids"],
|
||||
"attention_mask": feature["attention_mask"],
|
||||
"labels": feature["labels"],
|
||||
}
|
||||
kl_feature = {
|
||||
"input_ids": feature["kl_input_ids"],
|
||||
"attention_mask": feature["kl_attention_mask"],
|
||||
"labels": feature["kl_labels"],
|
||||
}
|
||||
if "pixel_values" in feature:
|
||||
target_feature["pixel_values"] = feature["pixel_values"]
|
||||
|
||||
if "token_type_ids" in feature:
|
||||
target_feature["token_type_ids"] = feature["token_type_ids"]
|
||||
kl_feature["token_type_ids"] = feature["kl_token_type_ids"]
|
||||
|
||||
target_features.append(target_feature)
|
||||
kl_features.append(kl_feature)
|
||||
kto_tags.append(feature["kto_tags"])
|
||||
|
||||
batch = super().__call__(target_features)
|
||||
kl_batch = super().__call__(kl_features)
|
||||
batch["kl_input_ids"] = kl_batch["input_ids"]
|
||||
batch["kl_attention_mask"] = kl_batch["attention_mask"]
|
||||
batch["kl_labels"] = kl_batch["labels"]
|
||||
if "token_type_ids" in batch:
|
||||
batch["kl_token_type_ids"] = kl_batch["token_type_ids"]
|
||||
|
||||
batch["kto_tags"] = torch.tensor(kto_tags)
|
||||
return batch
|
||||
@@ -1,6 +1,5 @@
|
||||
import hashlib
|
||||
from enum import Enum, unique
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
|
||||
|
||||
from datasets import concatenate_datasets, interleave_datasets
|
||||
|
||||
@@ -11,7 +10,7 @@ if TYPE_CHECKING:
|
||||
from datasets import Dataset, IterableDataset
|
||||
from transformers import Seq2SeqTrainingArguments
|
||||
|
||||
from llmtuner.hparams import DataArguments
|
||||
from ..hparams import DataArguments
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@@ -26,25 +25,10 @@ class Role(str, Enum):
|
||||
OBSERVATION = "observation"
|
||||
|
||||
|
||||
def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None:
|
||||
if file_sha1 is None:
|
||||
logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
|
||||
return
|
||||
|
||||
if len(data_files) != 1:
|
||||
logger.warning("Checksum failed: too many files.")
|
||||
return
|
||||
|
||||
with open(data_files[0], "rb") as f:
|
||||
sha1 = hashlib.sha1(f.read()).hexdigest()
|
||||
if sha1 != file_sha1:
|
||||
logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0]))
|
||||
|
||||
|
||||
def infer_max_len(source_len: int, target_len: int, max_len: int, reserved_label_len: int) -> Tuple[int, int]:
|
||||
max_target_len = int(max_len * (target_len / (source_len + target_len)))
|
||||
max_target_len = max(max_target_len, reserved_label_len)
|
||||
max_source_len = max_len - max_target_len
|
||||
max_source_len = max_len - min(max_target_len, target_len)
|
||||
return max_source_len, max_target_len
|
||||
|
||||
|
||||
@@ -78,9 +62,9 @@ def split_dataset(
|
||||
if training_args.do_train:
|
||||
if data_args.val_size > 1e-6: # Split the dataset
|
||||
if data_args.streaming:
|
||||
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
|
||||
val_set = dataset.take(int(data_args.val_size))
|
||||
train_set = dataset.skip(int(data_args.val_size))
|
||||
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
|
||||
return {"train_dataset": train_set, "eval_dataset": val_set}
|
||||
else:
|
||||
val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
|
||||
@@ -1,21 +1,24 @@
|
||||
import inspect
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Literal, Union
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Literal, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
from datasets import load_dataset, load_from_disk
|
||||
|
||||
from ..extras.constants import FILEEXT2TYPE
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.misc import has_tokenized_data
|
||||
from .aligner import align_dataset
|
||||
from .data_utils import merge_dataset
|
||||
from .parser import get_dataset_list
|
||||
from .preprocess import get_preprocess_and_print_func
|
||||
from .template import get_template_and_fix_tokenizer
|
||||
from .utils import checksum, merge_dataset
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datasets import Dataset, IterableDataset
|
||||
from transformers import Seq2SeqTrainingArguments
|
||||
from transformers import ProcessorMixin, Seq2SeqTrainingArguments
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ..hparams import DataArguments, ModelArguments
|
||||
@@ -56,14 +59,12 @@ def load_single_dataset(
|
||||
data_files.append(local_path)
|
||||
data_path = FILEEXT2TYPE.get(local_path.split(".")[-1], None)
|
||||
else:
|
||||
raise ValueError("File not found.")
|
||||
raise ValueError("File {} not found.".format(local_path))
|
||||
|
||||
if data_path is None:
|
||||
raise ValueError("File extension must be txt, csv, json or jsonl.")
|
||||
|
||||
checksum(data_files, dataset_attr.file_sha1)
|
||||
raise ValueError("Allowed file types: {}.".format(",".join(FILEEXT2TYPE.keys())))
|
||||
else:
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError("Unknown load type: {}.".format(dataset_attr.load_from))
|
||||
|
||||
if dataset_attr.load_from == "ms_hub":
|
||||
try:
|
||||
@@ -80,7 +81,9 @@ def load_single_dataset(
|
||||
cache_dir=cache_dir,
|
||||
token=model_args.ms_hub_token,
|
||||
use_streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
|
||||
).to_hf_dataset()
|
||||
)
|
||||
if isinstance(dataset, MsDataset):
|
||||
dataset = dataset.to_hf_dataset()
|
||||
except ImportError:
|
||||
raise ImportError("Please install modelscope via `pip install modelscope -U`")
|
||||
else:
|
||||
@@ -104,30 +107,43 @@ def load_single_dataset(
|
||||
if data_args.streaming and (dataset_attr.load_from == "file"): # faster than specifying streaming=True
|
||||
dataset = dataset.to_iterable_dataset() # TODO: add num shards parameter
|
||||
|
||||
if dataset_attr.num_samples is not None and not data_args.streaming:
|
||||
target_num = dataset_attr.num_samples
|
||||
indexes = np.random.permutation(len(dataset))[:target_num]
|
||||
target_num -= len(indexes)
|
||||
if target_num > 0:
|
||||
expand_indexes = np.random.choice(len(dataset), target_num)
|
||||
indexes = np.concatenate((indexes, expand_indexes), axis=0)
|
||||
|
||||
assert len(indexes) == dataset_attr.num_samples, "Sample num mismatched."
|
||||
dataset = dataset.select(indexes)
|
||||
logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr))
|
||||
|
||||
if data_args.max_samples is not None: # truncate dataset
|
||||
num_samples = min(data_args.max_samples, len(dataset))
|
||||
dataset = dataset.select(range(num_samples))
|
||||
max_samples = min(data_args.max_samples, len(dataset))
|
||||
dataset = dataset.select(range(max_samples))
|
||||
|
||||
return align_dataset(dataset, dataset_attr, data_args)
|
||||
|
||||
|
||||
def get_dataset(
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
model_args: "ModelArguments",
|
||||
data_args: "DataArguments",
|
||||
training_args: "Seq2SeqTrainingArguments",
|
||||
stage: Literal["pt", "sft", "rm", "ppo"],
|
||||
# split: Optional[str] = "train", # TODO: add split
|
||||
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"] = None,
|
||||
) -> Union["Dataset", "IterableDataset"]:
|
||||
template = get_template_and_fix_tokenizer(tokenizer, data_args.template)
|
||||
if data_args.train_on_prompt and template.efficient_eos:
|
||||
raise ValueError("Current template does not support `train_on_prompt`.")
|
||||
|
||||
# Load from cache
|
||||
if data_args.cache_path is not None:
|
||||
if os.path.exists(data_args.cache_path):
|
||||
# Load tokenized dataset
|
||||
if data_args.tokenized_path is not None:
|
||||
if has_tokenized_data(data_args.tokenized_path):
|
||||
logger.warning("Loading dataset from disk will ignore other data arguments.")
|
||||
dataset = load_from_disk(data_args.cache_path)
|
||||
dataset = load_from_disk(data_args.tokenized_path)
|
||||
logger.info("Loaded tokenized dataset from {}.".format(data_args.tokenized_path))
|
||||
if data_args.streaming:
|
||||
dataset = dataset.to_iterable_dataset()
|
||||
return dataset
|
||||
@@ -138,12 +154,15 @@ def get_dataset(
|
||||
with training_args.main_process_first(desc="load dataset"):
|
||||
all_datasets = []
|
||||
for dataset_attr in get_dataset_list(data_args):
|
||||
if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True):
|
||||
raise ValueError("The dataset is not applicable in the current training stage.")
|
||||
|
||||
all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args))
|
||||
dataset = merge_dataset(all_datasets, data_args, training_args)
|
||||
|
||||
with training_args.main_process_first(desc="pre-process dataset"):
|
||||
preprocess_func, print_function = get_preprocess_and_print_func(
|
||||
tokenizer, template, data_args, training_args, stage
|
||||
data_args, training_args, stage, template, tokenizer, processor
|
||||
)
|
||||
column_names = list(next(iter(dataset)).keys())
|
||||
kwargs = {}
|
||||
@@ -156,15 +175,21 @@ def get_dataset(
|
||||
|
||||
dataset = dataset.map(preprocess_func, batched=True, remove_columns=column_names, **kwargs)
|
||||
|
||||
if data_args.cache_path is not None and not os.path.exists(data_args.cache_path):
|
||||
if data_args.tokenized_path is not None:
|
||||
if training_args.should_save:
|
||||
dataset.save_to_disk(data_args.cache_path)
|
||||
logger.info("Dataset cache saved at {}.".format(data_args.cache_path))
|
||||
dataset.save_to_disk(data_args.tokenized_path)
|
||||
logger.info("Tokenized dataset saved at {}.".format(data_args.tokenized_path))
|
||||
logger.info("Please restart the training with `tokenized_path: {}`.".format(data_args.tokenized_path))
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if training_args.should_log:
|
||||
try:
|
||||
print_function(next(iter(dataset)))
|
||||
except StopIteration:
|
||||
raise RuntimeError("Cannot find valid samples, check `data/README.md` for the data format.")
|
||||
if stage == "pt":
|
||||
raise RuntimeError("Cannot find sufficient samples, consider increasing dataset size.")
|
||||
else:
|
||||
raise RuntimeError("Cannot find valid samples, check `data/README.md` for the data format.")
|
||||
|
||||
return dataset
|
||||
@@ -20,23 +20,28 @@ class DatasetAttr:
|
||||
""" basic configs """
|
||||
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
||||
dataset_name: str
|
||||
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
|
||||
ranking: bool = False
|
||||
""" extra configs """
|
||||
file_sha1: Optional[str] = None
|
||||
subset: Optional[str] = None
|
||||
folder: Optional[str] = None
|
||||
ranking: bool = False
|
||||
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
|
||||
""" columns """
|
||||
num_samples: Optional[int] = None
|
||||
""" common columns """
|
||||
system: Optional[str] = None
|
||||
""" columns for the alpaca format """
|
||||
tools: Optional[str] = None
|
||||
images: Optional[str] = None
|
||||
""" rlhf columns """
|
||||
chosen: Optional[str] = None
|
||||
rejected: Optional[str] = None
|
||||
kto_tag: Optional[str] = None
|
||||
""" alpaca columns """
|
||||
prompt: Optional[str] = "instruction"
|
||||
query: Optional[str] = "input"
|
||||
response: Optional[str] = "output"
|
||||
history: Optional[str] = None
|
||||
""" columns for the sharegpt format """
|
||||
""" sharegpt columns """
|
||||
messages: Optional[str] = "conversations"
|
||||
tools: Optional[str] = None
|
||||
""" tags for the sharegpt format """
|
||||
""" sharegpt tags """
|
||||
role_tag: Optional[str] = "from"
|
||||
content_tag: Optional[str] = "value"
|
||||
user_tag: Optional[str] = "human"
|
||||
@@ -53,22 +58,35 @@ class DatasetAttr:
|
||||
|
||||
|
||||
def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
|
||||
dataset_names = [ds.strip() for ds in data_args.dataset.split(",")] if data_args.dataset is not None else []
|
||||
try:
|
||||
with open(os.path.join(data_args.dataset_dir, DATA_CONFIG), "r") as f:
|
||||
dataset_info = json.load(f)
|
||||
except Exception as err:
|
||||
if data_args.dataset is not None:
|
||||
raise ValueError(
|
||||
"Cannot open {} due to {}.".format(os.path.join(data_args.dataset_dir, DATA_CONFIG), str(err))
|
||||
)
|
||||
if data_args.dataset is not None:
|
||||
dataset_names = [ds.strip() for ds in data_args.dataset.split(",")]
|
||||
else:
|
||||
dataset_names = []
|
||||
|
||||
if data_args.dataset_dir == "ONLINE":
|
||||
dataset_info = None
|
||||
else:
|
||||
try:
|
||||
with open(os.path.join(data_args.dataset_dir, DATA_CONFIG), "r") as f:
|
||||
dataset_info = json.load(f)
|
||||
except Exception as err:
|
||||
if len(dataset_names) != 0:
|
||||
raise ValueError(
|
||||
"Cannot open {} due to {}.".format(os.path.join(data_args.dataset_dir, DATA_CONFIG), str(err))
|
||||
)
|
||||
dataset_info = None
|
||||
|
||||
if data_args.interleave_probs is not None:
|
||||
data_args.interleave_probs = [float(prob.strip()) for prob in data_args.interleave_probs.split(",")]
|
||||
|
||||
dataset_list: List[DatasetAttr] = []
|
||||
for name in dataset_names:
|
||||
if dataset_info is None:
|
||||
load_from = "ms_hub" if use_modelscope() else "hf_hub"
|
||||
dataset_attr = DatasetAttr(load_from, dataset_name=name)
|
||||
dataset_list.append(dataset_attr)
|
||||
continue
|
||||
|
||||
if name not in dataset_info:
|
||||
raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG))
|
||||
|
||||
@@ -85,18 +103,18 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
|
||||
else:
|
||||
dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"])
|
||||
|
||||
dataset_attr.set_attr("file_sha1", dataset_info[name])
|
||||
dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
|
||||
dataset_attr.set_attr("ranking", dataset_info[name], default=False)
|
||||
dataset_attr.set_attr("subset", dataset_info[name])
|
||||
dataset_attr.set_attr("folder", dataset_info[name])
|
||||
dataset_attr.set_attr("ranking", dataset_info[name], default=False)
|
||||
dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
|
||||
dataset_attr.set_attr("num_samples", dataset_info[name])
|
||||
|
||||
if "columns" in dataset_info[name]:
|
||||
column_names = ["system"]
|
||||
column_names = ["system", "tools", "images", "chosen", "rejected", "kto_tag"]
|
||||
if dataset_attr.formatting == "alpaca":
|
||||
column_names.extend(["prompt", "query", "response", "history"])
|
||||
else:
|
||||
column_names.extend(["messages", "tools"])
|
||||
column_names.extend(["messages"])
|
||||
|
||||
for column_name in column_names:
|
||||
dataset_attr.set_attr(column_name, dataset_info[name]["columns"])
|
||||
@@ -0,0 +1,84 @@
|
||||
from functools import partial
|
||||
from typing import TYPE_CHECKING, Callable, Literal, Optional, Tuple
|
||||
|
||||
from .processors.feedback import preprocess_feedback_dataset
|
||||
from .processors.pairwise import preprocess_pairwise_dataset, print_pairwise_dataset_example
|
||||
from .processors.pretrain import preprocess_pretrain_dataset
|
||||
from .processors.supervised import (
|
||||
preprocess_packed_supervised_dataset,
|
||||
preprocess_supervised_dataset,
|
||||
print_supervised_dataset_example,
|
||||
)
|
||||
from .processors.unsupervised import preprocess_unsupervised_dataset, print_unsupervised_dataset_example
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import ProcessorMixin, Seq2SeqTrainingArguments
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ..hparams import DataArguments
|
||||
from .template import Template
|
||||
|
||||
|
||||
def get_preprocess_and_print_func(
|
||||
data_args: "DataArguments",
|
||||
training_args: "Seq2SeqTrainingArguments",
|
||||
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
) -> Tuple[Callable, Callable]:
|
||||
if stage == "pt":
|
||||
preprocess_func = partial(
|
||||
preprocess_pretrain_dataset,
|
||||
tokenizer=tokenizer,
|
||||
data_args=data_args,
|
||||
)
|
||||
print_function = partial(print_unsupervised_dataset_example, tokenizer=tokenizer)
|
||||
elif stage == "sft" and not training_args.predict_with_generate:
|
||||
if data_args.packing:
|
||||
preprocess_func = partial(
|
||||
preprocess_packed_supervised_dataset,
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
data_args=data_args,
|
||||
)
|
||||
else:
|
||||
preprocess_func = partial(
|
||||
preprocess_supervised_dataset,
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
|
||||
print_function = partial(print_supervised_dataset_example, tokenizer=tokenizer)
|
||||
elif stage == "rm":
|
||||
preprocess_func = partial(
|
||||
preprocess_pairwise_dataset,
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
print_function = partial(print_pairwise_dataset_example, tokenizer=tokenizer)
|
||||
elif stage == "kto":
|
||||
preprocess_func = partial(
|
||||
preprocess_feedback_dataset,
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
print_function = partial(print_supervised_dataset_example, tokenizer=tokenizer)
|
||||
else:
|
||||
preprocess_func = partial(
|
||||
preprocess_unsupervised_dataset,
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
print_function = partial(print_unsupervised_dataset_example, tokenizer=tokenizer)
|
||||
|
||||
return preprocess_func, print_function
|
||||
@@ -0,0 +1,126 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.logging import get_logger
|
||||
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import ProcessorMixin
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ...hparams import DataArguments
|
||||
from ..template import Template
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _encode_feedback_example(
|
||||
prompt: Sequence[Dict[str, str]],
|
||||
response: Sequence[Dict[str, str]],
|
||||
kl_response: Sequence[Dict[str, str]],
|
||||
system: Optional[str],
|
||||
tools: Optional[str],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Tuple[List[int], List[int], List[int], List[int], bool]:
|
||||
if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models
|
||||
prompt[0]["content"] = template.image_token + prompt[0]["content"]
|
||||
|
||||
if response[0]["content"]: # desired example
|
||||
kto_tag = True
|
||||
messages = prompt + [response[0]]
|
||||
else: # undesired example
|
||||
kto_tag = False
|
||||
messages = prompt + [response[1]]
|
||||
|
||||
if kl_response[0]["content"]:
|
||||
kl_messages = prompt + [kl_response[0]]
|
||||
else:
|
||||
kl_messages = prompt + [kl_response[1]]
|
||||
|
||||
prompt_ids, response_ids = template.encode_oneturn(
|
||||
tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
|
||||
)
|
||||
_, kl_response_ids = template.encode_oneturn(
|
||||
tokenizer, kl_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
|
||||
)
|
||||
|
||||
if template.efficient_eos:
|
||||
response_ids += [tokenizer.eos_token_id]
|
||||
kl_response_ids += [tokenizer.eos_token_id]
|
||||
|
||||
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models
|
||||
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
|
||||
prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
|
||||
|
||||
input_ids = prompt_ids + response_ids
|
||||
labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids
|
||||
kl_input_ids = prompt_ids + kl_response_ids
|
||||
kl_labels = [IGNORE_INDEX] * len(prompt_ids) + kl_response_ids
|
||||
|
||||
return input_ids, labels, kl_input_ids, kl_labels, kto_tag
|
||||
|
||||
|
||||
def preprocess_feedback_dataset(
|
||||
examples: Dict[str, List[Any]],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Dict[str, List[List[int]]]:
|
||||
# create unrelated input-output pairs for estimating the KL term by flipping the matched pairs
|
||||
kl_response = examples["response"][::-1]
|
||||
model_inputs = {
|
||||
"input_ids": [],
|
||||
"attention_mask": [],
|
||||
"labels": [],
|
||||
"kl_input_ids": [],
|
||||
"kl_attention_mask": [],
|
||||
"kl_labels": [],
|
||||
"kto_tags": [],
|
||||
}
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"] = []
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["token_type_ids"] = []
|
||||
model_inputs["kl_token_type_ids"] = []
|
||||
|
||||
for i in range(len(examples["prompt"])):
|
||||
if len(examples["prompt"][i]) % 2 != 1 or len(examples["response"][i]) < 2:
|
||||
logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
|
||||
continue
|
||||
|
||||
input_ids, labels, kl_input_ids, kl_labels, kto_tag = _encode_feedback_example(
|
||||
prompt=examples["prompt"][i],
|
||||
response=examples["response"][i],
|
||||
kl_response=kl_response[i],
|
||||
system=examples["system"][i],
|
||||
tools=examples["tools"][i],
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
model_inputs["input_ids"].append(input_ids)
|
||||
model_inputs["attention_mask"].append([1] * len(input_ids))
|
||||
model_inputs["labels"].append(labels)
|
||||
model_inputs["kl_input_ids"].append(kl_input_ids)
|
||||
model_inputs["kl_attention_mask"].append([1] * len(kl_input_ids))
|
||||
model_inputs["kl_labels"].append(kl_labels)
|
||||
model_inputs["kto_tags"].append(kto_tag)
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"].append(get_pixel_values(examples["images"][i], processor))
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["token_type_ids"].append(get_paligemma_token_type_ids(len(input_ids), processor))
|
||||
model_inputs["kl_token_type_ids"].append(get_paligemma_token_type_ids(len(kl_input_ids), processor))
|
||||
|
||||
desirable_num = sum([1 for tag in model_inputs["kto_tags"] if tag])
|
||||
undesirable_num = len(model_inputs["kto_tags"]) - desirable_num
|
||||
if desirable_num == 0 or undesirable_num == 0:
|
||||
logger.warning("Your dataset only has one preference type.")
|
||||
|
||||
return model_inputs
|
||||
@@ -0,0 +1,123 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.logging import get_logger
|
||||
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import ProcessorMixin
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ...hparams import DataArguments
|
||||
from ..template import Template
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _encode_pairwise_example(
|
||||
prompt: Sequence[Dict[str, str]],
|
||||
response: Sequence[Dict[str, str]],
|
||||
system: Optional[str],
|
||||
tools: Optional[str],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Tuple[List[int], List[int], List[int], List[int]]:
|
||||
if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models
|
||||
prompt[0]["content"] = template.image_token + prompt[0]["content"]
|
||||
|
||||
chosen_messages = prompt + [response[0]]
|
||||
rejected_messages = prompt + [response[1]]
|
||||
prompt_ids, chosen_ids = template.encode_oneturn(
|
||||
tokenizer, chosen_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
|
||||
)
|
||||
_, rejected_ids = template.encode_oneturn(
|
||||
tokenizer, rejected_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
|
||||
)
|
||||
|
||||
if template.efficient_eos:
|
||||
chosen_ids += [tokenizer.eos_token_id]
|
||||
rejected_ids += [tokenizer.eos_token_id]
|
||||
|
||||
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models
|
||||
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
|
||||
prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
|
||||
|
||||
chosen_input_ids = prompt_ids + chosen_ids
|
||||
chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids
|
||||
rejected_input_ids = prompt_ids + rejected_ids
|
||||
rejected_labels = [IGNORE_INDEX] * len(prompt_ids) + rejected_ids
|
||||
|
||||
return chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels
|
||||
|
||||
|
||||
def preprocess_pairwise_dataset(
|
||||
examples: Dict[str, List[Any]],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Dict[str, List[List[int]]]:
|
||||
# build input pairs with format `<bos> X`, `Y1 <eos>` and `Y2 <eos>`
|
||||
model_inputs = {
|
||||
"chosen_input_ids": [],
|
||||
"chosen_attention_mask": [],
|
||||
"chosen_labels": [],
|
||||
"rejected_input_ids": [],
|
||||
"rejected_attention_mask": [],
|
||||
"rejected_labels": [],
|
||||
}
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"] = []
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["chosen_token_type_ids"] = []
|
||||
model_inputs["rejected_token_type_ids"] = []
|
||||
|
||||
for i in range(len(examples["prompt"])):
|
||||
if len(examples["prompt"][i]) % 2 != 1 or len(examples["response"][i]) < 2:
|
||||
logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
|
||||
continue
|
||||
|
||||
chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels = _encode_pairwise_example(
|
||||
prompt=examples["prompt"][i],
|
||||
response=examples["response"][i],
|
||||
system=examples["system"][i],
|
||||
tools=examples["tools"][i],
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
model_inputs["chosen_input_ids"].append(chosen_input_ids)
|
||||
model_inputs["chosen_attention_mask"].append([1] * len(chosen_input_ids))
|
||||
model_inputs["chosen_labels"].append(chosen_labels)
|
||||
model_inputs["rejected_input_ids"].append(rejected_input_ids)
|
||||
model_inputs["rejected_attention_mask"].append([1] * len(rejected_input_ids))
|
||||
model_inputs["rejected_labels"].append(rejected_labels)
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"].append(get_pixel_values(examples["images"][i], processor))
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["chosen_token_type_ids"].append(
|
||||
get_paligemma_token_type_ids(len(chosen_input_ids), processor)
|
||||
)
|
||||
model_inputs["rejected_token_type_ids"].append(
|
||||
get_paligemma_token_type_ids(len(rejected_input_ids), processor)
|
||||
)
|
||||
|
||||
return model_inputs
|
||||
|
||||
|
||||
def print_pairwise_dataset_example(example: Dict[str, List[int]], tokenizer: "PreTrainedTokenizer") -> None:
|
||||
valid_chosen_labels = list(filter(lambda x: x != IGNORE_INDEX, example["chosen_labels"]))
|
||||
valid_rejected_labels = list(filter(lambda x: x != IGNORE_INDEX, example["rejected_labels"]))
|
||||
print("chosen_input_ids:\n{}".format(example["chosen_input_ids"]))
|
||||
print("chosen_inputs:\n{}".format(tokenizer.decode(example["chosen_input_ids"], skip_special_tokens=False)))
|
||||
print("chosen_label_ids:\n{}".format(example["chosen_labels"]))
|
||||
print("chosen_labels:\n{}".format(tokenizer.decode(valid_chosen_labels, skip_special_tokens=False)))
|
||||
print("rejected_input_ids:\n{}".format(example["rejected_input_ids"]))
|
||||
print("rejected_inputs:\n{}".format(tokenizer.decode(example["rejected_input_ids"], skip_special_tokens=False)))
|
||||
print("rejected_label_ids:\n{}".format(example["rejected_labels"]))
|
||||
print("rejected_labels:\n{}".format(tokenizer.decode(valid_rejected_labels, skip_special_tokens=False)))
|
||||
@@ -0,0 +1,36 @@
|
||||
from itertools import chain
|
||||
from typing import TYPE_CHECKING, Any, Dict, List
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ...hparams import DataArguments
|
||||
|
||||
|
||||
def preprocess_pretrain_dataset(
|
||||
examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
|
||||
) -> Dict[str, List[List[int]]]:
|
||||
# build grouped texts with format `X1 X2 X3 ...` if packing is enabled
|
||||
text_examples = [messages[0]["content"] + tokenizer.eos_token for messages in examples["prompt"]]
|
||||
|
||||
if not data_args.packing:
|
||||
if data_args.template == "gemma":
|
||||
text_examples = [tokenizer.bos_token + example for example in text_examples]
|
||||
|
||||
result = tokenizer(text_examples, add_special_tokens=False, max_length=data_args.cutoff_len, truncation=True)
|
||||
else:
|
||||
tokenized_examples = tokenizer(text_examples, add_special_tokens=False)
|
||||
concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()}
|
||||
total_length = len(concatenated_examples[list(concatenated_examples.keys())[0]])
|
||||
block_size = data_args.cutoff_len
|
||||
total_length = (total_length // block_size) * block_size
|
||||
result = {
|
||||
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
||||
for k, t in concatenated_examples.items()
|
||||
}
|
||||
if data_args.template == "gemma":
|
||||
for i in range(len(result["input_ids"])):
|
||||
result["input_ids"][i][0] = tokenizer.bos_token_id
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,64 @@
|
||||
import bisect
|
||||
from typing import TYPE_CHECKING, List, Sequence
|
||||
|
||||
from ...extras.packages import is_pillow_available
|
||||
|
||||
|
||||
if is_pillow_available():
|
||||
from PIL import Image
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from numpy.typing import NDArray
|
||||
from PIL.Image import Image as ImageObject
|
||||
from transformers import ProcessorMixin
|
||||
from transformers.image_processing_utils import BaseImageProcessor
|
||||
|
||||
|
||||
def search_for_fit(numbers: Sequence[int], capacity: int) -> int:
|
||||
r"""
|
||||
Finds the index of largest number that fits into the knapsack with the given capacity.
|
||||
"""
|
||||
index = bisect.bisect(numbers, capacity)
|
||||
return -1 if index == 0 else (index - 1)
|
||||
|
||||
|
||||
def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]:
|
||||
r"""
|
||||
An efficient greedy algorithm with binary search for the knapsack problem.
|
||||
"""
|
||||
numbers.sort() # sort numbers in ascending order for binary search
|
||||
knapsacks = []
|
||||
|
||||
while numbers:
|
||||
current_knapsack = []
|
||||
remaining_capacity = capacity
|
||||
|
||||
while True:
|
||||
index = search_for_fit(numbers, remaining_capacity)
|
||||
if index == -1:
|
||||
break # no more numbers fit in this knapsack
|
||||
|
||||
remaining_capacity -= numbers[index] # update the remaining capacity
|
||||
current_knapsack.append(numbers.pop(index)) # add the number to knapsack
|
||||
|
||||
knapsacks.append(current_knapsack)
|
||||
|
||||
return knapsacks
|
||||
|
||||
|
||||
def get_pixel_values(images: Sequence["ImageObject"], processor: "ProcessorMixin") -> "NDArray":
|
||||
r"""
|
||||
Processes visual inputs. (currently only supports a single image)
|
||||
"""
|
||||
image_processor: "BaseImageProcessor" = getattr(processor, "image_processor")
|
||||
image = images[0] if len(images) != 0 else Image.new("RGB", (100, 100), (255, 255, 255))
|
||||
return image_processor(image, return_tensors="pt")["pixel_values"][0] # shape (C, H, W)
|
||||
|
||||
|
||||
def get_paligemma_token_type_ids(input_len: int, processor: "ProcessorMixin") -> List[int]:
|
||||
r"""
|
||||
Gets paligemma token type ids for computing loss.
|
||||
"""
|
||||
image_seq_length = getattr(processor, "image_seq_length")
|
||||
return [0] * image_seq_length + [1] * (input_len - image_seq_length)
|
||||
@@ -0,0 +1,169 @@
|
||||
from collections import defaultdict
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.logging import get_logger
|
||||
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import ProcessorMixin
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ...hparams import DataArguments
|
||||
from ..template import Template
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _encode_supervised_example(
|
||||
prompt: Sequence[Dict[str, str]],
|
||||
response: Sequence[Dict[str, str]],
|
||||
system: Optional[str],
|
||||
tools: Optional[str],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Tuple[List[int], List[int]]:
|
||||
if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models
|
||||
prompt[0]["content"] = template.image_token + prompt[0]["content"]
|
||||
|
||||
messages = prompt + response
|
||||
input_ids, labels = [], []
|
||||
|
||||
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models
|
||||
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
|
||||
input_ids += [image_token_id] * getattr(processor, "image_seq_length")
|
||||
labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
|
||||
|
||||
encoded_pairs = template.encode_multiturn(
|
||||
tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
|
||||
)
|
||||
for turn_idx, (source_ids, target_ids) in enumerate(encoded_pairs):
|
||||
if data_args.train_on_prompt:
|
||||
source_mask = source_ids
|
||||
elif turn_idx != 0 and template.efficient_eos:
|
||||
source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
|
||||
else:
|
||||
source_mask = [IGNORE_INDEX] * len(source_ids)
|
||||
|
||||
input_ids += source_ids + target_ids
|
||||
labels += source_mask + target_ids
|
||||
|
||||
if template.efficient_eos:
|
||||
input_ids += [tokenizer.eos_token_id]
|
||||
labels += [tokenizer.eos_token_id]
|
||||
|
||||
return input_ids, labels
|
||||
|
||||
|
||||
def preprocess_supervised_dataset(
|
||||
examples: Dict[str, List[Any]],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Dict[str, List[List[int]]]:
|
||||
# build inputs with format `<bos> X Y <eos>` and labels with format `<ignore> ... <ignore> Y <eos>`
|
||||
# for multiturn examples, we only mask the prompt part in each prompt-response pair.
|
||||
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"] = []
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["token_type_ids"] = []
|
||||
|
||||
for i in range(len(examples["prompt"])):
|
||||
if len(examples["prompt"][i]) % 2 != 1 or len(examples["response"][i]) != 1:
|
||||
logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
|
||||
continue
|
||||
|
||||
input_ids, labels = _encode_supervised_example(
|
||||
prompt=examples["prompt"][i],
|
||||
response=examples["response"][i],
|
||||
system=examples["system"][i],
|
||||
tools=examples["tools"][i],
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
model_inputs["input_ids"].append(input_ids)
|
||||
model_inputs["attention_mask"].append([1] * len(input_ids))
|
||||
model_inputs["labels"].append(labels)
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"].append(get_pixel_values(examples["images"][i], processor))
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["token_type_ids"].append(get_paligemma_token_type_ids(len(input_ids), processor))
|
||||
|
||||
return model_inputs
|
||||
|
||||
|
||||
def preprocess_packed_supervised_dataset(
|
||||
examples: Dict[str, List[Any]],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
data_args: "DataArguments",
|
||||
) -> Dict[str, List[List[int]]]:
|
||||
# build inputs with format `<bos> X1 Y1 <eos> <bos> X2 Y2 <eos>`
|
||||
# and labels with format `<ignore> ... <ignore> Y1 <eos> <ignore> ... <ignore> Y2 <eos>`
|
||||
valid_num = 0
|
||||
batch_input_ids, batch_labels = [], []
|
||||
lengths = []
|
||||
length2indexes = defaultdict(list)
|
||||
for i in range(len(examples["prompt"])):
|
||||
if len(examples["prompt"][i]) % 2 != 1 or len(examples["response"][i]) != 1:
|
||||
logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
|
||||
continue
|
||||
|
||||
input_ids, labels = _encode_supervised_example(
|
||||
prompt=examples["prompt"][i],
|
||||
response=examples["response"][i],
|
||||
system=examples["system"][i],
|
||||
tools=examples["tools"][i],
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=None,
|
||||
data_args=data_args,
|
||||
)
|
||||
length = len(input_ids)
|
||||
if length > data_args.cutoff_len:
|
||||
logger.warning("Dropped lengthy example with length {} > {}.".format(length, data_args.cutoff_len))
|
||||
else:
|
||||
lengths.append(length)
|
||||
length2indexes[length].append(valid_num)
|
||||
batch_input_ids.append(input_ids)
|
||||
batch_labels.append(labels)
|
||||
valid_num += 1
|
||||
|
||||
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
|
||||
knapsacks = greedy_knapsack(lengths, data_args.cutoff_len)
|
||||
for knapsack in knapsacks:
|
||||
packed_input_ids, packed_labels = [], []
|
||||
for length in knapsack:
|
||||
index = length2indexes[length].pop()
|
||||
packed_input_ids += batch_input_ids[index]
|
||||
packed_labels += batch_labels[index]
|
||||
|
||||
if len(packed_input_ids) < data_args.cutoff_len:
|
||||
pad_length = data_args.cutoff_len - len(packed_input_ids)
|
||||
packed_input_ids += [tokenizer.pad_token_id] * pad_length
|
||||
packed_labels += [IGNORE_INDEX] * pad_length
|
||||
|
||||
if len(packed_input_ids) != data_args.cutoff_len:
|
||||
raise ValueError("The length of packed example should be identical to the cutoff length.")
|
||||
|
||||
model_inputs["input_ids"].append(packed_input_ids)
|
||||
model_inputs["attention_mask"].append([1] * data_args.cutoff_len)
|
||||
model_inputs["labels"].append(packed_labels)
|
||||
|
||||
return model_inputs
|
||||
|
||||
|
||||
def print_supervised_dataset_example(example: Dict[str, List[int]], tokenizer: "PreTrainedTokenizer") -> None:
|
||||
valid_labels = list(filter(lambda x: x != IGNORE_INDEX, example["labels"]))
|
||||
print("input_ids:\n{}".format(example["input_ids"]))
|
||||
print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
|
||||
print("label_ids:\n{}".format(example["labels"]))
|
||||
print("labels:\n{}".format(tokenizer.decode(valid_labels, skip_special_tokens=False)))
|
||||
@@ -0,0 +1,92 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from ...extras.logging import get_logger
|
||||
from ..data_utils import Role
|
||||
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import ProcessorMixin
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
from ...hparams import DataArguments
|
||||
from ..template import Template
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _encode_unsupervised_example(
|
||||
prompt: Sequence[Dict[str, str]],
|
||||
response: Sequence[Dict[str, str]],
|
||||
system: Optional[str],
|
||||
tools: Optional[str],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Tuple[List[int], List[int]]:
|
||||
if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models
|
||||
prompt[0]["content"] = template.image_token + prompt[0]["content"]
|
||||
|
||||
if len(response) == 1:
|
||||
messages = prompt + response
|
||||
else:
|
||||
messages = prompt + [{"role": Role.ASSISTANT.value, "content": ""}]
|
||||
|
||||
input_ids, labels = template.encode_oneturn(
|
||||
tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
|
||||
)
|
||||
if template.efficient_eos:
|
||||
labels += [tokenizer.eos_token_id]
|
||||
|
||||
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models
|
||||
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
|
||||
input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids
|
||||
|
||||
return input_ids, labels
|
||||
|
||||
|
||||
def preprocess_unsupervised_dataset(
|
||||
examples: Dict[str, List[Any]],
|
||||
template: "Template",
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"],
|
||||
data_args: "DataArguments",
|
||||
) -> Dict[str, List[List[int]]]:
|
||||
# build inputs with format `<bos> X` and labels with format `Y <eos>`
|
||||
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"] = []
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["token_type_ids"] = []
|
||||
|
||||
for i in range(len(examples["prompt"])):
|
||||
if len(examples["prompt"][i]) % 2 != 1:
|
||||
logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
|
||||
continue
|
||||
|
||||
input_ids, labels = _encode_unsupervised_example(
|
||||
prompt=examples["prompt"][i],
|
||||
response=examples["response"][i],
|
||||
system=examples["system"][i],
|
||||
tools=examples["tools"][i],
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
processor=processor,
|
||||
data_args=data_args,
|
||||
)
|
||||
model_inputs["input_ids"].append(input_ids)
|
||||
model_inputs["attention_mask"].append([1] * len(input_ids))
|
||||
model_inputs["labels"].append(labels)
|
||||
if processor is not None:
|
||||
model_inputs["pixel_values"].append(get_pixel_values(examples["images"][i], processor))
|
||||
if hasattr(processor, "image_seq_length"): # paligemma models
|
||||
model_inputs["token_type_ids"].append(get_paligemma_token_type_ids(len(input_ids), processor))
|
||||
|
||||
return model_inputs
|
||||
|
||||
|
||||
def print_unsupervised_dataset_example(example: Dict[str, List[int]], tokenizer: "PreTrainedTokenizer") -> None:
|
||||
print("input_ids:\n{}".format(example["input_ids"]))
|
||||
print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
|
||||
@@ -2,8 +2,8 @@ from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
from ..extras.logging import get_logger
|
||||
from .data_utils import Role, infer_max_len
|
||||
from .formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter
|
||||
from .utils import Role, infer_max_len
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -26,6 +26,7 @@ class Template:
|
||||
format_separator: "Formatter"
|
||||
default_system: str
|
||||
stop_words: List[str]
|
||||
image_token: str
|
||||
efficient_eos: bool
|
||||
replace_eos: bool
|
||||
force_system: bool
|
||||
@@ -68,8 +69,8 @@ class Template:
|
||||
self,
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
messages: List[Dict[str, str]],
|
||||
system: str,
|
||||
tools: str,
|
||||
system: Optional[str],
|
||||
tools: Optional[str],
|
||||
cutoff_len: int,
|
||||
reserved_label_len: int,
|
||||
) -> Sequence[Tuple[List[int], List[int]]]:
|
||||
@@ -195,7 +196,7 @@ class Llama2Template(Template):
|
||||
return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len)
|
||||
|
||||
|
||||
templates: Dict[str, Template] = {}
|
||||
TEMPLATES: Dict[str, Template] = {}
|
||||
|
||||
|
||||
def _register_template(
|
||||
@@ -209,6 +210,7 @@ def _register_template(
|
||||
format_separator: Optional["Formatter"] = None,
|
||||
default_system: str = "",
|
||||
stop_words: List[str] = [],
|
||||
image_token: str = "<image>",
|
||||
efficient_eos: bool = False,
|
||||
replace_eos: bool = False,
|
||||
force_system: bool = False,
|
||||
@@ -246,7 +248,7 @@ def _register_template(
|
||||
default_function_formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}"] + eos_slots)
|
||||
default_tool_formatter = ToolFormatter(tool_format="default")
|
||||
default_separator_formatter = EmptyFormatter()
|
||||
templates[name] = template_class(
|
||||
TEMPLATES[name] = template_class(
|
||||
format_user=format_user or default_user_formatter,
|
||||
format_assistant=format_assistant or default_assistant_formatter,
|
||||
format_system=format_system or default_user_formatter,
|
||||
@@ -256,6 +258,7 @@ def _register_template(
|
||||
format_separator=format_separator or default_separator_formatter,
|
||||
default_system=default_system,
|
||||
stop_words=stop_words,
|
||||
image_token=image_token,
|
||||
efficient_eos=efficient_eos,
|
||||
replace_eos=replace_eos,
|
||||
force_system=force_system,
|
||||
@@ -276,7 +279,7 @@ def _add_or_replace_eos_token(tokenizer: "PreTrainedTokenizer", eos_token: str)
|
||||
|
||||
|
||||
def _jinja_escape(content: str) -> str:
|
||||
return content.replace("\n", r"\n").replace("'", r"\'")
|
||||
return content.replace("'", r"\'")
|
||||
|
||||
|
||||
def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedTokenizer", placeholder: str = "content") -> str:
|
||||
@@ -290,10 +293,10 @@ def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedTokenizer", pl
|
||||
slot_items.append(placeholder)
|
||||
if slot_pieces[1]:
|
||||
slot_items.append("'" + _jinja_escape(slot_pieces[1]) + "'")
|
||||
elif isinstance(slot, set):
|
||||
if "bos_token" in slot:
|
||||
elif isinstance(slot, set): # do not use {{ eos_token }} since it may be replaced
|
||||
if "bos_token" in slot and tokenizer.bos_token_id is not None:
|
||||
slot_items.append("'" + tokenizer.bos_token + "'")
|
||||
elif "eos_token" in slot: # do not use {{ eos_token }} since it may be replaced
|
||||
elif "eos_token" in slot and tokenizer.eos_token_id is not None:
|
||||
slot_items.append("'" + tokenizer.eos_token + "'")
|
||||
elif isinstance(slot, dict):
|
||||
raise ValueError("Dict is not supported.")
|
||||
@@ -308,7 +311,7 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer")
|
||||
jinja_template += "{% set system_message = '" + _jinja_escape(template.default_system) + "' %}"
|
||||
|
||||
jinja_template += (
|
||||
"{% if messages[0]['role'] == 'system' %}" "{% set system_message = messages[0]['content'] %}" "{% endif %}"
|
||||
"{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}"
|
||||
)
|
||||
|
||||
system_message = _convert_slots_to_jinja(template.format_system.apply(), tokenizer, placeholder="system_message")
|
||||
@@ -325,9 +328,11 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer")
|
||||
jinja_template += "{% if loop.index0 == 0 and system_message is defined %}"
|
||||
jinja_template += "{% set content = " + system_message + " + message['content'] %}"
|
||||
jinja_template += "{% endif %}"
|
||||
|
||||
jinja_template += "{% if message['role'] == 'user' %}"
|
||||
user_message = _convert_slots_to_jinja(template.format_user.apply(), tokenizer)
|
||||
jinja_template += "{{ " + user_message + " }}"
|
||||
|
||||
jinja_template += "{% elif message['role'] == 'assistant' %}"
|
||||
assistant_message = _convert_slots_to_jinja(
|
||||
template.format_assistant.apply() + template.format_separator.apply(), tokenizer
|
||||
@@ -343,9 +348,9 @@ def get_template_and_fix_tokenizer(
|
||||
name: Optional[str] = None,
|
||||
) -> Template:
|
||||
if name is None:
|
||||
template = templates["vanilla"] # placeholder
|
||||
template = TEMPLATES["empty"] # placeholder
|
||||
else:
|
||||
template = templates.get(name, None)
|
||||
template = TEMPLATES.get(name, None)
|
||||
if template is None:
|
||||
raise ValueError("Template {} does not exist.".format(name))
|
||||
|
||||
@@ -385,7 +390,8 @@ _register_template(
|
||||
format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n\n### Response:\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n\n"]),
|
||||
default_system=(
|
||||
"Below is an instruction that describes a task. " "Write a response that appropriately completes the request."
|
||||
"Below is an instruction that describes a task. "
|
||||
"Write a response that appropriately completes the request.\n\n"
|
||||
),
|
||||
)
|
||||
|
||||
@@ -414,7 +420,7 @@ _register_template(
|
||||
|
||||
_register_template(
|
||||
name="baichuan",
|
||||
format_user=StringFormatter(slots=["<reserved_102>{{content}}<reserved_103>"]),
|
||||
format_user=StringFormatter(slots=[{"token": "<reserved_102>"}, "{{content}}", {"token": "<reserved_103>"}]),
|
||||
efficient_eos=True,
|
||||
)
|
||||
|
||||
@@ -441,6 +447,18 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="breeze",
|
||||
format_user=StringFormatter(slots=["[INST] {{content}} [/INST] "]),
|
||||
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
|
||||
default_system=(
|
||||
"You are a helpful AI assistant built by MediaTek Research. "
|
||||
"The user you are helping speaks Traditional Chinese and comes from Taiwan."
|
||||
),
|
||||
efficient_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="chatglm2",
|
||||
format_user=StringFormatter(slots=["[Round {{idx}}]\n\n问:{{content}}\n\n答:"]),
|
||||
@@ -490,6 +508,7 @@ _register_template(
|
||||
name="chatml",
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
stop_words=["<|im_end|>", "<|im_start|>"],
|
||||
replace_eos=True,
|
||||
@@ -500,6 +519,7 @@ _register_template(
|
||||
name="chatml_de",
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
default_system="Du bist ein freundlicher und hilfsbereiter KI-Assistent.",
|
||||
stop_words=["<|im_end|>", "<|im_start|>"],
|
||||
@@ -514,6 +534,26 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="cohere",
|
||||
format_user=StringFormatter(
|
||||
slots=[
|
||||
(
|
||||
"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"
|
||||
"<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
|
||||
)
|
||||
]
|
||||
),
|
||||
format_system=StringFormatter(
|
||||
slots=[{"bos_token"}, "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]
|
||||
),
|
||||
default_system=(
|
||||
"You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users "
|
||||
"by providing thorough responses. You are trained by Cohere."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="cpm",
|
||||
format_user=StringFormatter(slots=["<用户>{{content}}<AI>"]),
|
||||
@@ -522,6 +562,32 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="dbrx",
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
default_system=(
|
||||
"You are DBRX, created by Databricks. You were last updated in December 2023. "
|
||||
"You answer questions based on information available up to that point.\n"
|
||||
"YOU PROVIDE SHORT RESPONSES TO SHORT QUESTIONS OR STATEMENTS, but provide thorough "
|
||||
"responses to more complex and open-ended questions.\nYou assist with various tasks, "
|
||||
"from writing to coding (using markdown for code blocks — remember to use ``` with "
|
||||
"code, JSON, and tables).\n(You do not have real-time data access or code execution "
|
||||
"capabilities. You avoid stereotyping and provide balanced perspectives on "
|
||||
"controversial topics. You do not provide song lyrics, poems, or news articles and "
|
||||
"do not divulge details of your training data.)\nThis is your system prompt, "
|
||||
"guiding your responses. Do not reference it, just respond to the user. If you find "
|
||||
"yourself talking about this message, stop. You should be responding appropriately "
|
||||
"and usually that means not mentioning this.\nYOU DO NOT MENTION ANY OF THIS INFORMATION "
|
||||
"ABOUT YOURSELF UNLESS THE INFORMATION IS DIRECTLY PERTINENT TO THE USER'S QUERY."
|
||||
),
|
||||
stop_words=["<|im_end|>"],
|
||||
replace_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="deepseek",
|
||||
format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]),
|
||||
@@ -554,6 +620,16 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="empty",
|
||||
format_user=StringFormatter(slots=["{{content}}"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}"]),
|
||||
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
|
||||
efficient_eos=True,
|
||||
force_system=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="falcon",
|
||||
format_user=StringFormatter(slots=["User: {{content}}\nFalcon:"]),
|
||||
@@ -562,16 +638,39 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="fewshot",
|
||||
format_separator=EmptyFormatter(slots=["\n\n"]),
|
||||
efficient_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="gemma",
|
||||
format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
|
||||
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
|
||||
format_observation=StringFormatter(
|
||||
slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
|
||||
),
|
||||
format_separator=EmptyFormatter(slots=["<end_of_turn>\n"]),
|
||||
efficient_eos=True,
|
||||
force_system=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="glm4",
|
||||
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
|
||||
format_assistant=StringFormatter(slots=["\n{{content}}"]),
|
||||
format_system=StringFormatter(slots=["[gMASK]<sop>{{content}}"]),
|
||||
format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]),
|
||||
format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
|
||||
stop_words=["<|user|>", "<|observation|>"],
|
||||
efficient_eos=True,
|
||||
force_system=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="intern",
|
||||
format_user=StringFormatter(slots=["<|User|>:{{content}}", {"token": "<eoh>"}, "\n<|Bot|>:"]),
|
||||
@@ -601,17 +700,8 @@ _register_template(
|
||||
_register_template(
|
||||
name="llama2",
|
||||
format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
|
||||
format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]),
|
||||
format_system=StringFormatter(slots=["<<SYS>>\n{{content}}\n<</SYS>>\n\n"]),
|
||||
default_system=(
|
||||
"You are a helpful, respectful and honest assistant. "
|
||||
"Always answer as helpfully as possible, while being safe. "
|
||||
"Your answers should not include any harmful, unethical, "
|
||||
"racist, sexist, toxic, dangerous, or illegal content. "
|
||||
"Please ensure that your responses are socially unbiased and positive in nature.\n\n"
|
||||
"If a question does not make any sense, or is not factually coherent, "
|
||||
"explain why instead of answering something not correct. "
|
||||
"If you don't know the answer to a question, please don't share false information."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -623,6 +713,33 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="llama3",
|
||||
format_user=StringFormatter(
|
||||
slots=[
|
||||
(
|
||||
"<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
|
||||
"<|start_header_id|>assistant<|end_header_id|>\n\n"
|
||||
)
|
||||
]
|
||||
),
|
||||
format_system=StringFormatter(
|
||||
slots=[{"bos_token"}, "<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]
|
||||
),
|
||||
format_observation=StringFormatter(
|
||||
slots=[
|
||||
(
|
||||
"<|start_header_id|>tool<|end_header_id|>\n\n{{content}}<|eot_id|>"
|
||||
"<|start_header_id|>assistant<|end_header_id|>\n\n"
|
||||
)
|
||||
]
|
||||
),
|
||||
default_system="You are a helpful assistant.",
|
||||
stop_words=["<|eot_id|>"],
|
||||
replace_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="mistral",
|
||||
format_user=StringFormatter(slots=["[INST] {{content}} [/INST]"]),
|
||||
@@ -633,8 +750,7 @@ _register_template(
|
||||
|
||||
_register_template(
|
||||
name="olmo",
|
||||
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}]),
|
||||
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]),
|
||||
format_system=StringFormatter(slots=[{"eos_token"}, "{{content}}"]),
|
||||
force_system=True,
|
||||
)
|
||||
@@ -643,12 +759,28 @@ _register_template(
|
||||
_register_template(
|
||||
name="openchat",
|
||||
format_user=StringFormatter(slots=["GPT4 Correct User: {{content}}", {"eos_token"}, "GPT4 Correct Assistant:"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}]),
|
||||
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
|
||||
force_system=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="openchat-3.6",
|
||||
format_user=StringFormatter(
|
||||
slots=[
|
||||
(
|
||||
"<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>"
|
||||
"<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n"
|
||||
)
|
||||
]
|
||||
),
|
||||
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
|
||||
stop_words=["<|eot_id|>"],
|
||||
replace_eos=True,
|
||||
force_system=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="orion",
|
||||
format_user=StringFormatter(slots=["Human: {{content}}\n\nAssistant: ", {"eos_token"}]),
|
||||
@@ -657,10 +789,22 @@ _register_template(
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="phi",
|
||||
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>\n"]),
|
||||
format_system=StringFormatter(slots=[{"bos_token"}, "<|system|>\n{{content}}<|end|>\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
default_system="You are a helpful AI assistant.",
|
||||
stop_words=["<|end|>"],
|
||||
replace_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="qwen",
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||
format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
default_system="You are a helpful assistant.",
|
||||
stop_words=["<|im_end|>"],
|
||||
@@ -688,7 +832,11 @@ _register_template(
|
||||
|
||||
|
||||
_register_template(
|
||||
name="vanilla",
|
||||
name="telechat",
|
||||
format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]),
|
||||
format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]),
|
||||
stop_words=["<_end>"],
|
||||
replace_eos=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -742,12 +890,29 @@ _register_template(
|
||||
_register_template(
|
||||
name="yi",
|
||||
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
stop_words=["<|im_end|>"],
|
||||
replace_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="yi_vl",
|
||||
format_user=StringFormatter(slots=["### Human: {{content}}\n### Assistant:"]),
|
||||
format_separator=EmptyFormatter(slots=["\n"]),
|
||||
default_system=(
|
||||
"This is a chat between an inquisitive human and an AI assistant. "
|
||||
"Assume the role of the AI assistant. Read all the images carefully, "
|
||||
"and respond to the human's questions with informative, helpful, detailed and polite answers. "
|
||||
"这是一个好奇的人类和一个人工智能助手之间的对话。假设你扮演这个AI助手的角色。"
|
||||
"仔细阅读所有的图像,并对人类的问题做出信息丰富、有帮助、详细的和礼貌的回答。\n\n"
|
||||
),
|
||||
stop_words=["###"],
|
||||
efficient_eos=True,
|
||||
)
|
||||
|
||||
|
||||
_register_template(
|
||||
name="yuan",
|
||||
format_user=StringFormatter(slots=["{{content}}", {"token": "<sep>"}]),
|
||||
@@ -762,7 +927,7 @@ _register_template(
|
||||
format_user=StringFormatter(slots=["<|user|>\n{{content}}", {"eos_token"}, "<|assistant|>"]),
|
||||
format_assistant=StringFormatter(slots=["\n{{content}}", {"eos_token"}]),
|
||||
format_system=StringFormatter(slots=["<|system|>\n{{content}}", {"eos_token"}]),
|
||||
default_system="You are a friendly chatbot who always responds in the style of a pirate",
|
||||
default_system="You are Zephyr, a helpful assistant.",
|
||||
)
|
||||
|
||||
|
||||
@@ -14,16 +14,17 @@ from transformers.utils import cached_file
|
||||
from ..data import get_template_and_fix_tokenizer
|
||||
from ..extras.constants import CHOICES, SUBJECTS
|
||||
from ..hparams import get_eval_args
|
||||
from ..model import load_model_and_tokenizer
|
||||
from ..model import load_model, load_tokenizer
|
||||
from .template import get_eval_template
|
||||
|
||||
|
||||
class Evaluator:
|
||||
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
|
||||
self.model_args, self.data_args, self.eval_args, finetuning_args = get_eval_args(args)
|
||||
self.model, self.tokenizer = load_model_and_tokenizer(self.model_args, finetuning_args)
|
||||
self.tokenizer = load_tokenizer(self.model_args)["tokenizer"]
|
||||
self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2
|
||||
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
|
||||
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
|
||||
self.eval_template = get_eval_template(self.eval_args.lang)
|
||||
self.choice_inputs = [
|
||||
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
|
||||
@@ -117,6 +118,5 @@ class Evaluator:
|
||||
f.write(score_info)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
evaluator = Evaluator()
|
||||
evaluator.eval()
|
||||
def run_eval() -> None:
|
||||
Evaluator().eval()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user