20250404 实现分词、向量化,检索增强
This commit is contained in:
parent
e81caa381e
commit
fb4e6e6065
6
rag/DWALK商城上架商品.txt
Normal file
6
rag/DWALK商城上架商品.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
DWALK商城上架商品
|
||||||
|
商品名称:机械革命旷世G16蛟龙16k/15K/15极光16pro定制4060游戏笔记本电脑,价格:8000元,店铺:机械革命官方旗舰店,商品分类:电脑/笔记本电脑 简介:32G,显卡型号NVIDIA GeForce RTX4060,CPU型号英特尔 酷睿 i7-13650HX,学生,家庭娱乐,高清游戏,设计制图,办公
|
||||||
|
商品名称:小米RedmiBook 16 2024英特尔酷睿标压轻薄商务笔记本办公商务电脑,价格:3000元,店铺:小米官方旗舰店,商品分类:电脑/笔记本电脑 简介:16G,显卡型号UHD Graphics,CPU型号英特尔 酷睿 i5-12450H,商务办公 女性定位 家庭影音
|
||||||
|
商品名称:ALIENWARE外星人m16 R2 Ultra,价格:12000元,店铺:外星人长虹佳华专卖店,商品分类:电脑/笔记本电脑 简介:16G,显卡型号NVIDIA GeForce RTX4060,CPU型号英特尔 酷睿 Ultra7 155H,AI助力高性能本,高清游戏
|
||||||
|
商品名称:HONOR/荣耀X60 5G智能手机,价格:1146元,店铺:荣耀官方旗舰店,商品分类:手机/智能手机 简介:一亿像素,6.8英寸,电池容量5800mAh高能量密度大电池超耐久使用,十面耐摔防水抗震,512GB超大存储空间,AI智慧助手魔法抠图
|
||||||
|
商品名称:REDMI K80红米k80,价格:2449元,店铺:小米官方旗舰店,商品分类:手机/智能手机 简介:Android/安卓系统,5000万像素,6.67英寸,电池容量6550mAh智慧充电引擎,性能满贯,狂暴进化,AI从芯片低层深度赋能,实现性能实力爆发,轻松应对游戏中高负载场景
|
68
src/main/java/com/ai/config/SegmentConfig.java
Normal file
68
src/main/java/com/ai/config/SegmentConfig.java
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
package com.ai.config;
|
||||||
|
|
||||||
|
import com.ai.service.SegmentAssist;
|
||||||
|
import dev.langchain4j.community.model.dashscope.QwenChatModel;
|
||||||
|
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
|
||||||
|
import dev.langchain4j.data.document.Document;
|
||||||
|
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
|
||||||
|
import dev.langchain4j.data.document.splitter.DocumentByCharacterSplitter;
|
||||||
|
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
|
||||||
|
import dev.langchain4j.data.embedding.Embedding;
|
||||||
|
import dev.langchain4j.data.segment.TextSegment;
|
||||||
|
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
|
||||||
|
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||||
|
import dev.langchain4j.model.output.Response;
|
||||||
|
import dev.langchain4j.rag.content.retriever.ContentRetriever;
|
||||||
|
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
|
||||||
|
import dev.langchain4j.service.AiServices;
|
||||||
|
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
||||||
|
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
||||||
|
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
|
||||||
|
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class SegmentConfig {
|
||||||
|
|
||||||
|
@Value("${langchain4j.api-key}")
|
||||||
|
private String apiKey;
|
||||||
|
|
||||||
|
@Value("${langchain4j.model}")
|
||||||
|
private String model;
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public SegmentAssist segmentAssist() {
|
||||||
|
ChatLanguageModel qwenModel = QwenChatModel.builder()
|
||||||
|
.apiKey(apiKey)
|
||||||
|
.modelName(model)
|
||||||
|
.build();
|
||||||
|
QwenEmbeddingModel embeddingModel = QwenEmbeddingModel.builder().apiKey(apiKey).build();
|
||||||
|
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
||||||
|
List<Document> documents = FileSystemDocumentLoader.loadDocuments("E:\\ideaProject\\liang-ai\\rag");
|
||||||
|
for (Document document : documents) {
|
||||||
|
DocumentByLineSplitter splitter = new DocumentByLineSplitter(300,30);
|
||||||
|
List<TextSegment> segments = splitter.split(document);
|
||||||
|
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
||||||
|
embeddingStore.addAll(embeddings, segments);
|
||||||
|
}
|
||||||
|
|
||||||
|
ContentRetriever contentRetriever = EmbeddingStoreContentRetriever.builder()
|
||||||
|
.embeddingStore(embeddingStore)
|
||||||
|
.embeddingModel(embeddingModel)
|
||||||
|
.maxResults(5)
|
||||||
|
.minScore(0.6)
|
||||||
|
.build();
|
||||||
|
// for simplicity, we will use an in-memory one:
|
||||||
|
return AiServices.builder(SegmentAssist.class)
|
||||||
|
.chatMemoryProvider(memoryId -> MessageWindowChatMemory.withMaxMessages(10))
|
||||||
|
.chatLanguageModel(qwenModel)
|
||||||
|
.contentRetriever(contentRetriever)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
@ -1,9 +1,6 @@
|
|||||||
package com.ai.controller;
|
package com.ai.controller;
|
||||||
|
|
||||||
import com.ai.service.Assist;
|
import com.ai.service.*;
|
||||||
import com.ai.service.LangChainService;
|
|
||||||
import com.ai.service.NormalRequestService;
|
|
||||||
import com.ai.service.OllamaAssist;
|
|
||||||
import dev.langchain4j.community.model.dashscope.QwenChatModel;
|
import dev.langchain4j.community.model.dashscope.QwenChatModel;
|
||||||
import dev.langchain4j.community.model.zhipu.ZhipuAiImageModel;
|
import dev.langchain4j.community.model.zhipu.ZhipuAiImageModel;
|
||||||
import dev.langchain4j.data.image.Image;
|
import dev.langchain4j.data.image.Image;
|
||||||
@ -39,6 +36,12 @@ public class LangChainController {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private NormalRequestService normalRequestService;
|
private NormalRequestService normalRequestService;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private SegmentAssist segmentAssist;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private EmbeddingService embeddingService;
|
||||||
|
|
||||||
@GetMapping("/normal/chat")
|
@GetMapping("/normal/chat")
|
||||||
public String normalChat(@RequestParam("input") String input) {
|
public String normalChat(@RequestParam("input") String input) {
|
||||||
System.out.println("start normal chat...");
|
System.out.println("start normal chat...");
|
||||||
@ -81,4 +84,11 @@ public class LangChainController {
|
|||||||
System.out.println("start highlevel memory chat...");
|
System.out.println("start highlevel memory chat...");
|
||||||
return ollamaAssist.chat(input);
|
return ollamaAssist.chat(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@GetMapping("/embedd/chat")
|
||||||
|
public String embeddChat(@RequestParam("input") String input) {
|
||||||
|
System.out.println("start embedd chat...");
|
||||||
|
embeddingService.embedding(input);
|
||||||
|
return segmentAssist.chat(input);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,3 +16,6 @@ GET http://localhost:8080/langchain/zhipu/img?input=请画一张魔兽世界里
|
|||||||
|
|
||||||
### 测试 LangChainController 的 highlevel chat 接口
|
### 测试 LangChainController 的 highlevel chat 接口
|
||||||
GET http://localhost:8080/langchain/high/call?input=用5个菲林抽取5个代理人
|
GET http://localhost:8080/langchain/high/call?input=用5个菲林抽取5个代理人
|
||||||
|
|
||||||
|
### 测试 LangChainController 的 highlevel chat 接口
|
||||||
|
GET http://localhost:8080/langchain/embedd/chat?input=我想要个便宜的办公笔记本,有什么推荐吗
|
||||||
|
42
src/main/java/com/ai/service/EmbeddingService.java
Normal file
42
src/main/java/com/ai/service/EmbeddingService.java
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package com.ai.service;
|
||||||
|
|
||||||
|
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
|
||||||
|
import dev.langchain4j.data.document.Document;
|
||||||
|
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
|
||||||
|
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
|
||||||
|
import dev.langchain4j.data.embedding.Embedding;
|
||||||
|
import dev.langchain4j.data.segment.TextSegment;
|
||||||
|
import dev.langchain4j.model.output.Response;
|
||||||
|
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
||||||
|
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
||||||
|
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
|
||||||
|
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class EmbeddingService {
|
||||||
|
|
||||||
|
@Value("${langchain4j.api-key}")
|
||||||
|
private String apiKey;
|
||||||
|
|
||||||
|
public void embedding(String input) {
|
||||||
|
QwenEmbeddingModel embeddingModel = QwenEmbeddingModel.builder().apiKey(apiKey).build();
|
||||||
|
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
||||||
|
List<Document> documents = FileSystemDocumentLoader.loadDocuments("E:\\ideaProject\\liang-ai\\rag");
|
||||||
|
for (Document document : documents) {
|
||||||
|
DocumentByLineSplitter splitter = new DocumentByLineSplitter(200,30);
|
||||||
|
List<TextSegment> segments = splitter.split(document);
|
||||||
|
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
||||||
|
embeddingStore.addAll(embeddings, segments);
|
||||||
|
Response<Embedding> embed = embeddingModel.embed(input);
|
||||||
|
EmbeddingSearchRequest builder = EmbeddingSearchRequest.builder().queryEmbedding(embed.content()).build();
|
||||||
|
EmbeddingSearchResult<TextSegment> results = embeddingStore.search(builder);
|
||||||
|
for (EmbeddingMatch<TextSegment> match : results.matches()) {
|
||||||
|
System.out.println(match.embedded().text() + ", 分数为" + match.score());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
9
src/main/java/com/ai/service/SegmentAssist.java
Normal file
9
src/main/java/com/ai/service/SegmentAssist.java
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
package com.ai.service;
|
||||||
|
|
||||||
|
import dev.langchain4j.service.SystemMessage;
|
||||||
|
|
||||||
|
public interface SegmentAssist {
|
||||||
|
|
||||||
|
@SystemMessage("你是DWALK商城助手,你需要基于DWALK商城上架的商品来回答问题")
|
||||||
|
String chat(String userMessage);
|
||||||
|
}
|
16
个人知识库
16
个人知识库
@ -1,18 +1,4 @@
|
|||||||
梁靖林个人博客halo
|
梁靖林个人博客halo
|
||||||
地址:halo.liangjl.top
|
地址:halo.liangjl.top
|
||||||
账号:liangjinglin
|
账号:liangjinglin
|
||||||
密码: 353912918a
|
密码: 353912918a
|
||||||
|
|
||||||
DWALK商城 商品推荐优先级规则
|
|
||||||
1.根据本月销量推荐,销量越高,优先级越高,考量占比50%
|
|
||||||
2.根据评分,评分越高,优先级越高 考量占比50%
|
|
||||||
3.根据评论数,评论数越多,优先级越高 考量占比10%
|
|
||||||
4.根据收藏数,收藏数越多,优先级越高 考量占比5%
|
|
||||||
|
|
||||||
DWALK商城商品情况
|
|
||||||
1.茅台 销量:100 评分:4 评论数:100 收藏数:100
|
|
||||||
2.meta50 销量:200 评分:5 评论数:500 收藏数:5000
|
|
||||||
3.机械革命 销量:50 评分:4.5 评论数:80 收藏数:100
|
|
||||||
4.潮汕牛肉丸 销量:150 评分:3 评论数:100 收藏数:20
|
|
||||||
5.风筒 销量:80 评分:3.5 评论数:50 收藏数:10
|
|
||||||
6.口红 销量:30 评分:4.5 评论数:200 收藏数:200
|
|
Loading…
Reference in New Issue
Block a user