20250404 实现分词、向量化,检索增强

This commit is contained in:
liangjinglin 2025-04-04 22:12:31 +08:00
parent e81caa381e
commit fb4e6e6065
7 changed files with 143 additions and 19 deletions

View File

@ -0,0 +1,6 @@
DWALK商城上架商品
商品名称机械革命旷世G16蛟龙16k/15K/15极光16pro定制4060游戏笔记本电脑价格8000元店铺机械革命官方旗舰店商品分类电脑/笔记本电脑 简介32G显卡型号NVIDIA GeForce RTX4060CPU型号英特尔 酷睿 i7-13650HX学生,家庭娱乐,高清游戏,设计制图,办公
商品名称小米RedmiBook 16 2024英特尔酷睿标压轻薄商务笔记本办公商务电脑价格3000元店铺小米官方旗舰店商品分类电脑/笔记本电脑 简介16G显卡型号UHD GraphicsCPU型号英特尔 酷睿 i5-12450H商务办公 女性定位 家庭影音
商品名称ALIENWARE外星人m16 R2 Ultra价格12000元店铺外星人长虹佳华专卖店商品分类电脑/笔记本电脑 简介16G显卡型号NVIDIA GeForce RTX4060CPU型号英特尔 酷睿 Ultra7 155HAI助力高性能本高清游戏
商品名称HONOR/荣耀X60 5G智能手机价格1146元店铺荣耀官方旗舰店商品分类手机/智能手机 简介一亿像素6.8英寸电池容量5800mAh高能量密度大电池超耐久使用十面耐摔防水抗震512GB超大存储空间AI智慧助手魔法抠图
商品名称REDMI K80红米k80价格2449元店铺小米官方旗舰店商品分类手机/智能手机 简介Android/安卓系统5000万像素6.67英寸电池容量6550mAh智慧充电引擎性能满贯狂暴进化AI从芯片低层深度赋能实现性能实力爆发轻松应对游戏中高负载场景

View File

@ -0,0 +1,68 @@
package com.ai.config;
import com.ai.service.SegmentAssist;
import dev.langchain4j.community.model.dashscope.QwenChatModel;
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.splitter.DocumentByCharacterSplitter;
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.rag.content.retriever.ContentRetriever;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.service.AiServices;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
import lombok.RequiredArgsConstructor;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.List;
@Configuration
@RequiredArgsConstructor
public class SegmentConfig {
@Value("${langchain4j.api-key}")
private String apiKey;
@Value("${langchain4j.model}")
private String model;
@Bean
public SegmentAssist segmentAssist() {
ChatLanguageModel qwenModel = QwenChatModel.builder()
.apiKey(apiKey)
.modelName(model)
.build();
QwenEmbeddingModel embeddingModel = QwenEmbeddingModel.builder().apiKey(apiKey).build();
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
List<Document> documents = FileSystemDocumentLoader.loadDocuments("E:\\ideaProject\\liang-ai\\rag");
for (Document document : documents) {
DocumentByLineSplitter splitter = new DocumentByLineSplitter(300,30);
List<TextSegment> segments = splitter.split(document);
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
embeddingStore.addAll(embeddings, segments);
}
ContentRetriever contentRetriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.maxResults(5)
.minScore(0.6)
.build();
// for simplicity, we will use an in-memory one:
return AiServices.builder(SegmentAssist.class)
.chatMemoryProvider(memoryId -> MessageWindowChatMemory.withMaxMessages(10))
.chatLanguageModel(qwenModel)
.contentRetriever(contentRetriever)
.build();
}
}

View File

@ -1,9 +1,6 @@
package com.ai.controller; package com.ai.controller;
import com.ai.service.Assist; import com.ai.service.*;
import com.ai.service.LangChainService;
import com.ai.service.NormalRequestService;
import com.ai.service.OllamaAssist;
import dev.langchain4j.community.model.dashscope.QwenChatModel; import dev.langchain4j.community.model.dashscope.QwenChatModel;
import dev.langchain4j.community.model.zhipu.ZhipuAiImageModel; import dev.langchain4j.community.model.zhipu.ZhipuAiImageModel;
import dev.langchain4j.data.image.Image; import dev.langchain4j.data.image.Image;
@ -39,6 +36,12 @@ public class LangChainController {
@Autowired @Autowired
private NormalRequestService normalRequestService; private NormalRequestService normalRequestService;
@Autowired
private SegmentAssist segmentAssist;
@Autowired
private EmbeddingService embeddingService;
@GetMapping("/normal/chat") @GetMapping("/normal/chat")
public String normalChat(@RequestParam("input") String input) { public String normalChat(@RequestParam("input") String input) {
System.out.println("start normal chat..."); System.out.println("start normal chat...");
@ -81,4 +84,11 @@ public class LangChainController {
System.out.println("start highlevel memory chat..."); System.out.println("start highlevel memory chat...");
return ollamaAssist.chat(input); return ollamaAssist.chat(input);
} }
@GetMapping("/embedd/chat")
public String embeddChat(@RequestParam("input") String input) {
System.out.println("start embedd chat...");
embeddingService.embedding(input);
return segmentAssist.chat(input);
}
} }

View File

@ -16,3 +16,6 @@ GET http://localhost:8080/langchain/zhipu/img?input=请画一张魔兽世界里
### 测试 LangChainController 的 highlevel chat 接口 ### 测试 LangChainController 的 highlevel chat 接口
GET http://localhost:8080/langchain/high/call?input=用5个菲林抽取5个代理人 GET http://localhost:8080/langchain/high/call?input=用5个菲林抽取5个代理人
### 测试 LangChainController 的 highlevel chat 接口
GET http://localhost:8080/langchain/embedd/chat?input=我想要个便宜的办公笔记本,有什么推荐吗

View File

@ -0,0 +1,42 @@
package com.ai.service;
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
public class EmbeddingService {
@Value("${langchain4j.api-key}")
private String apiKey;
public void embedding(String input) {
QwenEmbeddingModel embeddingModel = QwenEmbeddingModel.builder().apiKey(apiKey).build();
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
List<Document> documents = FileSystemDocumentLoader.loadDocuments("E:\\ideaProject\\liang-ai\\rag");
for (Document document : documents) {
DocumentByLineSplitter splitter = new DocumentByLineSplitter(200,30);
List<TextSegment> segments = splitter.split(document);
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
embeddingStore.addAll(embeddings, segments);
Response<Embedding> embed = embeddingModel.embed(input);
EmbeddingSearchRequest builder = EmbeddingSearchRequest.builder().queryEmbedding(embed.content()).build();
EmbeddingSearchResult<TextSegment> results = embeddingStore.search(builder);
for (EmbeddingMatch<TextSegment> match : results.matches()) {
System.out.println(match.embedded().text() + ", 分数为" + match.score());
}
}
}
}

View File

@ -0,0 +1,9 @@
package com.ai.service;
import dev.langchain4j.service.SystemMessage;
public interface SegmentAssist {
@SystemMessage("你是DWALK商城助手你需要基于DWALK商城上架的商品来回答问题")
String chat(String userMessage);
}

View File

@ -2,17 +2,3 @@
地址halo.liangjl.top 地址halo.liangjl.top
账号liangjinglin 账号liangjinglin
密码: 353912918a 密码: 353912918a
DWALK商城 商品推荐优先级规则
1.根据本月销量推荐销量越高优先级越高考量占比50%
2.根据评分,评分越高,优先级越高 考量占比50%
3.根据评论数,评论数越多,优先级越高 考量占比10%
4.根据收藏数,收藏数越多,优先级越高 考量占比5%
DWALK商城商品情况
1.茅台 销量100 评分4 评论数100 收藏数100
2.meta50 销量200 评分5 评论数500 收藏数5000
3.机械革命 销量50 评分4.5 评论数80 收藏数100
4.潮汕牛肉丸 销量150 评分3 评论数100 收藏数20
5.风筒 销量80 评分3.5 评论数50 收藏数10
6.口红 销量30 评分4.5 评论数200 收藏数200