houzhongjian
2025-05-29 41499fd3c28216c1526a72b10fa98eb8ffee78cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
package com.iailab.module.ai.service.knowledge;
 
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.ObjUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import com.iailab.framework.common.enums.CommonStatusEnum;
import com.iailab.framework.common.pojo.PageResult;
import com.iailab.framework.common.util.object.BeanUtils;
import com.iailab.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentCreateListReqVO;
import com.iailab.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
import com.iailab.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
import com.iailab.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateStatusReqVO;
import com.iailab.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
import com.iailab.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
import com.iailab.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.tokenizer.TokenCountEstimator;
import org.springframework.context.annotation.Lazy;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
 
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
 
import static com.iailab.framework.common.exception.util.ServiceExceptionUtil.exception;
import static com.iailab.framework.common.util.collection.CollectionUtils.convertList;
import static com.iailab.module.ai.enums.ErrorCodeConstants.*;
 
/**
 * AI 知识库文档 Service 实现类
 *
 * @author xiaoxin
 */
@Service
@Slf4j
public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentService {
 
    @Resource
    private AiKnowledgeDocumentMapper knowledgeDocumentMapper;
 
    @Resource
    private TokenCountEstimator tokenCountEstimator;
 
    @Resource
    private AiKnowledgeSegmentService knowledgeSegmentService;
    @Resource
    @Lazy // 延迟加载,避免循环依赖
    private AiKnowledgeService knowledgeService;
 
    @Override
    public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
        // 1. 校验参数
        knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId());
 
        // 2. 下载文档
        String content = readUrl(createReqVO.getUrl());
 
        // 3. 文档记录入库
        AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
                .setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content))
                .setStatus(CommonStatusEnum.ENABLE.getStatus());
        knowledgeDocumentMapper.insert(documentDO);
 
        // 4. 文档切片入库(异步)
        knowledgeSegmentService.createKnowledgeSegmentBySplitContentAsync(documentDO.getId(), content);
        return documentDO.getId();
    }
 
    @Override
    public List<Long> createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO) {
        // 1. 校验参数
        knowledgeService.validateKnowledgeExists(createListReqVO.getKnowledgeId());
 
        // 2. 下载文档
        List<String> contents = convertList(createListReqVO.getList(), document -> readUrl(document.getUrl()));
 
        // 3. 文档记录入库
        List<AiKnowledgeDocumentDO> documentDOs = new ArrayList<>(createListReqVO.getList().size());
        for (int i = 0; i < createListReqVO.getList().size(); i++) {
            AiKnowledgeDocumentCreateListReqVO.Document documentVO = createListReqVO.getList().get(i);
            String content = contents.get(i);
            documentDOs.add(BeanUtils.toBean(documentVO, AiKnowledgeDocumentDO.class)
                    .setKnowledgeId(createListReqVO.getKnowledgeId())
                    .setContent(content).setContentLength(content.length())
                    .setTokens(tokenCountEstimator.estimate(content))
                    .setSegmentMaxTokens(createListReqVO.getSegmentMaxTokens())
                    .setStatus(CommonStatusEnum.ENABLE.getStatus()));
        }
        knowledgeDocumentMapper.insertBatch(documentDOs);
 
        // 4. 批量创建文档切片(异步)
        documentDOs.forEach(documentDO -> knowledgeSegmentService
                .createKnowledgeSegmentBySplitContentAsync(documentDO.getId(), documentDO.getContent()));
        return convertList(documentDOs, AiKnowledgeDocumentDO::getId);
    }
 
    @Override
    public PageResult<AiKnowledgeDocumentDO> getKnowledgeDocumentPage(AiKnowledgeDocumentPageReqVO pageReqVO) {
        return knowledgeDocumentMapper.selectPage(pageReqVO);
    }
 
    @Override
    public AiKnowledgeDocumentDO getKnowledgeDocument(Long id) {
        return knowledgeDocumentMapper.selectById(id);
    }
 
    @Override
    public void updateKnowledgeDocument(AiKnowledgeDocumentUpdateReqVO reqVO) {
        // 1. 校验文档是否存在
        AiKnowledgeDocumentDO oldDocument = validateKnowledgeDocumentExists(reqVO.getId());
 
        // 2. 更新文档
        AiKnowledgeDocumentDO document = BeanUtils.toBean(reqVO, AiKnowledgeDocumentDO.class);
        knowledgeDocumentMapper.updateById(document);
 
        // 3. 如果处于开启状态,并且最大 tokens 发生变化,则 segment 需要重新索引
        if (CommonStatusEnum.isEnable(oldDocument.getStatus())
                && reqVO.getSegmentMaxTokens() != null
                && ObjUtil.notEqual(reqVO.getSegmentMaxTokens(), oldDocument.getSegmentMaxTokens())) {
            // 删除旧的文档切片
            knowledgeSegmentService.deleteKnowledgeSegmentByDocumentId(reqVO.getId());
            // 重新创建文档切片
            knowledgeSegmentService.createKnowledgeSegmentBySplitContentAsync(reqVO.getId(), oldDocument.getContent());
        }
    }
 
    @Override
    public void updateKnowledgeDocumentStatus(AiKnowledgeDocumentUpdateStatusReqVO reqVO) {
        // 1. 校验存在
        AiKnowledgeDocumentDO document = validateKnowledgeDocumentExists(reqVO.getId());
 
        // 2. 更新状态
        knowledgeDocumentMapper.updateById(new AiKnowledgeDocumentDO()
                .setId(reqVO.getId()).setStatus(reqVO.getStatus()));
 
        // 3. 处理文档切片
        if (CommonStatusEnum.isEnable(reqVO.getStatus())) {
            knowledgeSegmentService.createKnowledgeSegmentBySplitContentAsync(reqVO.getId(), document.getContent());
        } else {
            knowledgeSegmentService.deleteKnowledgeSegmentByDocumentId(reqVO.getId());
        }
    }
 
    @Override
    @Transactional(rollbackFor = Exception.class)
    public void deleteKnowledgeDocument(Long id) {
        // 1. 校验存在
        validateKnowledgeDocumentExists(id);
 
        // 2. 删除
        knowledgeDocumentMapper.deleteById(id);
 
        // 3. 删除对应的段落
        knowledgeSegmentService.deleteKnowledgeSegmentByDocumentId(id);
    }
 
    @Override
    public void updateKnowledgeDocumentRetrievalCountIncr(Collection<Long> ids) {
        if (CollUtil.isEmpty(ids)) {
            return;
        }
        knowledgeDocumentMapper.updateRetrievalCountIncr(ids);
    }
 
    @Override
    public AiKnowledgeDocumentDO validateKnowledgeDocumentExists(Long id) {
        AiKnowledgeDocumentDO knowledgeDocument = knowledgeDocumentMapper.selectById(id);
        if (knowledgeDocument == null) {
            throw exception(KNOWLEDGE_DOCUMENT_NOT_EXISTS);
        }
        return knowledgeDocument;
    }
 
    @Override
    public String readUrl(String url) {
        // 下载文件
        ByteArrayResource resource;
        try {
            byte[] bytes = HttpUtil.downloadBytes(url);
            if (bytes.length == 0) {
                throw exception(KNOWLEDGE_DOCUMENT_FILE_EMPTY);
            }
            resource = new ByteArrayResource(bytes);
        } catch (Exception e) {
            log.error("[readUrl][url({}) 读取失败]", url, e);
            throw exception(KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL);
        }
 
        // 读取文件
        TikaDocumentReader loader = new TikaDocumentReader(resource);
        List<Document> documents = loader.get();
        Document document = CollUtil.getFirst(documents);
        if (document == null || StrUtil.isEmpty(document.getText())) {
            throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL);
        }
        return document.getText();
    }
 
    @Override
    public List<AiKnowledgeDocumentDO> getKnowledgeDocumentList(Collection<Long> ids) {
        if (CollUtil.isEmpty(ids)) {
            return Collections.emptyList();
        }
        return knowledgeDocumentMapper.selectBatchIds(ids);
    }
 
}