一、主要操作步骤
二、参数获取
- categoryId:数据中心类目id
- workspaceId:业务空间id
- indexId:索引id
- ak、sk
ACCESS_KEY_ID、ACCESS_KEY_SECRET参数获取地址:
三、完整的JAVA Code Sample
- pom.xml
<dependencies> <!-- https://mvnrepository.com/artifact/com.alibaba/dashscope-sdk-java --> <dependency> <groupId>com.alibaba</groupId> <artifactId>dashscope-sdk-java</artifactId> <version>2.15.1</version> </dependency> <dependency> <groupId>com.aliyun</groupId> <artifactId>bailian20231229</artifactId> <version>1.4.1</version> </dependency> </dependencies>
- Code
import com.google.gson.internal.LinkedTreeMap; import java.io.DataOutputStream; import java.io.FileInputStream; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Formatter; import java.util.HashMap; public class apply { public static void main(String[] args_) throws Exception { // 上传文件的本地路径地址 String filePath = "C:\\Users\\*****\\1.0.01.04.36569.docx"; String categoryId = "cate_d78d9993**********";//数据中心类目id String workspaceId = "llm-w*******"; //业务空间id String indexId = "eev*******";// 索引库索引id String ak = "LT*********"; String sk = "8R**********"; // 初始化参数 String md5 = ""; String byteLength = ""; String fileName = ""; try { Path path = Paths.get(filePath); fileName = path.getFileName().toString(); // 读取文件内容到字节数组 byte[] fileBytes = Files.readAllBytes(path); // 获取文件字节内容长度 byteLength = fileBytes.length + ""; // 计算MD5值 md5 = calculateMD5(fileBytes); } catch (IOException | NoSuchAlgorithmException e) { e.printStackTrace(); } // 初始化Client对象 com.aliyun.teaopenapi.Client client = apply.createClient(ak, sk); //1、申请文件上传租约 HashMap<String,Object> response = apply.applyFileUploadLease(client, fileName,md5,byteLength,categoryId,workspaceId); // 从response中提取data,获取上传文件必要的url和 if(response.containsKey("body")){ LinkedTreeMap<String, Object> body1 = (LinkedTreeMap<String, Object>) response.get("body"); if (body1.containsKey("Data")) { LinkedTreeMap<String, Object> dataObject = (LinkedTreeMap<String, Object>) body1.get("Data"); // 这里是你需要的Data对象 String fileUploadLeaseId = (String) dataObject.get("FileUploadLeaseId"); String type = (String) dataObject.get("Type"); LinkedTreeMap<String, Object> param = (LinkedTreeMap<String, Object>) dataObject.get("Param"); String preSignedUrl = (String) param.get("Url"); LinkedTreeMap<String, Object> headers = (LinkedTreeMap<String, Object>) param.get("Headers"); String contentType = (String) headers.get("Content-Type"); String extra = (String) headers.get("X-bailian-extra"); //2、使用租约上传文档 uploadFile(preSignedUrl,filePath,extra,contentType,Integer.parseInt(byteLength)); //3、addfile,添加上传的文档到百炼系统,系统自动解析文档 String fileId = addFile(client, fileUploadLeaseId, workspaceId, categoryId); //4、将解析的文档添加到索引 AddDocumentsJob(client,indexId,workspaceId,fileId,categoryId); }else { System.out.println("body not found in response."); } } } /** * <b>description</b> : * <p>使用AK & SK初始化账号Client</p> * @return Client * * @throws Exception */ public static com.aliyun.teaopenapi.Client createClient(String ak, String sk) throws Exception { // 工程代码泄露可能会导致 AccessKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考。 // 建议使用更安全的 STS 方式,更多鉴权访问方式请参见:https://help.aliyun.com/document_detail/378657.html。 com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() // 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。 .setAccessKeyId(ak) // 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。 .setAccessKeySecret(sk); // Endpoint 请参考 https://api.aliyun.com/product/bailian config.endpoint = "bailian.cn-beijing.aliyuncs.com"; return new com.aliyun.teaopenapi.Client(config); } /** * <b>申请文档上传租约</b> : * <p>API 相关</p> * @return OpenApi.Params */ public static com.aliyun.teaopenapi.models.Params createApplyFileUploadLeaseApiInfo(String CategoryId, String WorkspaceId) throws Exception { com.aliyun.teaopenapi.models.Params params = new com.aliyun.teaopenapi.models.Params() // 接口名称 .setAction("ApplyFileUploadLease") // 接口版本 .setVersion("2023-12-29") // 接口协议 .setProtocol("HTTPS") // 接口 HTTP 方法 .setMethod("POST") .setAuthType("AK") .setStyle("ROA") // 接口 PATH .setPathname("/" + WorkspaceId + "/datacenter/category/" + CategoryId + "") // 接口请求体内容格式 .setReqBodyType("formData") // 接口响应体内容格式 .setBodyType("json"); return params; } public static HashMap<String,Object> applyFileUploadLease(com.aliyun.teaopenapi.Client client, String fileName, String md5, String byteLength, String categoryId, String workspaceId ) throws Exception{ com.aliyun.teaopenapi.models.Params params = apply.createApplyFileUploadLeaseApiInfo(categoryId, workspaceId); // body params java.util.Map<String, Object> body = new java.util.HashMap<>(); body.put("FileName", fileName); body.put("Md5", md5); body.put("SizeInBytes", byteLength); // runtime options com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); com.aliyun.teaopenapi.models.OpenApiRequest request = new com.aliyun.teaopenapi.models.OpenApiRequest() .setBody(body); // 返回值为 Map 类型,可从 Map 中获得三类数据:响应体 body、响应头 headers、HTTP 返回的状态码 statusCode。 HashMap<String,Object> response = (HashMap<String, java.lang.Object>) client.callApi(params, request, runtime); return response; } /** * 计算md5值 * @param bytes * @return * @throws NoSuchAlgorithmException */ private static String calculateMD5(byte[] bytes) throws NoSuchAlgorithmException { MessageDigest md = MessageDigest.getInstance("MD5"); byte[] digest = md.digest(bytes); return byteArrayToHexString(digest); } private static String byteArrayToHexString(byte[] bytes) { Formatter formatter = new Formatter(); for (byte b : bytes) { formatter.format("%02x", b); } String result = formatter.toString(); formatter.close(); return result; } /** * 使用获取到的租约上传实际文件到数据中心,注意这一步上传后在数据中心还看不到文档,AddFile之后才可以在数据中心看到添加的文档 * @param preSignedUrl * @param filePath * @param extra * @param contentType * @param length */ public static void uploadFile(String preSignedUrl, String filePath,String extra,String contentType,int length ) { HttpURLConnection connection = null; try { // 创建URL对象 URL url = new URL(preSignedUrl); connection = (HttpURLConnection) url.openConnection(); // 设置请求⽅法为PUT,预签名URL默认⽤于PUT操作进⾏⽂件上传 connection.setRequestMethod("PUT"); // 允许向connection输出,因为这个连接是⽤于上传⽂件的 connection.setDoOutput(true); // 设置请求头,这⾥可以根据需要设置⽐如Content-Type connection.setRequestProperty("X-bailian-extra", extra); connection.setRequestProperty("Content-Type", contentType); // 读取⽂件并通过连接上传 try (DataOutputStream outStream = new DataOutputStream(connection.getOutputStream()); FileInputStream fileInputStream = new FileInputStream(filePath)) { byte[] buffer = new byte[length]; int bytesRead; while ((bytesRead = fileInputStream.read(buffer)) != -1) { outStream.write(buffer, 0, bytesRead); } outStream.flush(); } // 检查响应代码 int responseCode = connection.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { // ⽂件上传成功处理 System.out.println("File uploaded successfully."); } else { // ⽂件上传失败处理 System.out.println("Failed to upload the file. ResponseCod e: " + responseCode); } } catch (Exception e) { e.printStackTrace(); } finally { if (connection != null) { connection.disconnect(); } } } /** * 构建添加文档API * @param WorkspaceId * @return * @throws Exception */ public static com.aliyun.teaopenapi.models.Params createAddFileApiInfo(String WorkspaceId) throws Exception { com.aliyun.teaopenapi.models.Params params = new com.aliyun.teaopenapi.models.Params() // 接⼝名称 .setAction("AddFile") // 接⼝版本 .setVersion("2023-12-29") // 接⼝协议 .setProtocol("HTTPS") // 接⼝ HTTP ⽅法 .setMethod("PUT") .setAuthType("AK") .setStyle("ROA") // 接⼝ PATH .setPathname("/" + WorkspaceId + "/datacenter/file") // 接⼝请求体内容格式 .setReqBodyType("formData") // 接⼝响应体内容格式 .setBodyType("json"); return params; } /** * AddFile * 该接口用于将已经成功上传的文档添加到百炼系统数据中心,添加成功之后,系统会自动启动文件的解析,在数据中心可以看到文档 * @param client * @param leaseId * @param workSpaceId * @param categoryId * @throws Exception */ public static String addFile(com.aliyun.teaopenapi.Client client,String leaseId,String workSpaceId,String categoryId) throws Exception { com.aliyun.teaopenapi.models.Params params = apply.createAddFileApiInfo(workSpaceId); // body params java.util.Map<String, Object> body = new java.util.HashMap<>(); body.put("LeaseId", leaseId); body.put("Parser", "DASHSCOPE_DOCMIND"); body.put("CategoryId", categoryId); // runtime options com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); com.aliyun.teaopenapi.models.OpenApiRequest request = new com.aliyun.teaopenapi.models.OpenApiRequest() .setBody(body); // 复制代码运行请自行打印 API 的返回值 // 返回值为 Map 类型,可从 Map 中获得三类数据:响应体 body、响应头 headers、HTTP 返回的状态码 statusCode。 HashMap<String,Object> response = (HashMap<String, Object>) client.callApi(params, request, runtime); LinkedTreeMap<String, Object> file_body = (LinkedTreeMap<String, Object>) response.get("body"); LinkedTreeMap<String, Object> data = (LinkedTreeMap<String, Object>) file_body.get("Data"); String fileId = (String) data.get("FileId"); System.out.println("addFile:" + response.get("statusCode")); return fileId; } public static com.aliyun.teaopenapi.models.Params createSubmitIndexAddDocumentsJobApiInfo(String WorkspaceId) throws Exception { com.aliyun.teaopenapi.models.Params params = new com.aliyun.teaopenapi.models.Params() // 接口名称 .setAction("SubmitIndexAddDocumentsJob") // 接口版本 .setVersion("2023-12-29") // 接口协议 .setProtocol("HTTPS") // 接口 HTTP 方法 .setMethod("POST") .setAuthType("AK") .setStyle("ROA") // 接口 PATH .setPathname("/" + WorkspaceId + "/index/add_documents_to_index") // 接口请求体内容格式 .setReqBodyType("json") // 接口响应体内容格式 .setBodyType("json"); return params; } /** * 追加以及添加到数据中心的文档到索引 * @param client * @param indexId * @param workSpaceId * @param documentIds * @param categoryIds * @throws Exception */ public static void AddDocumentsJob(com.aliyun.teaopenapi.Client client,String indexId,String workSpaceId,String documentIds, String categoryIds) throws Exception { com.aliyun.teaopenapi.models.Params params = apply.createSubmitIndexAddDocumentsJobApiInfo(workSpaceId); // query params java.util.Map<String, Object> queries = new java.util.HashMap<>(); queries.put("IndexId", indexId); queries.put("SourceType", "DATA_CENTER_FILE"); queries.put("DocumentIds", com.aliyun.openapiutil.Client.arrayToStringWithSpecifiedStyle(java.util.Arrays.asList( documentIds ), "DocumentIds", "json")); queries.put("CategoryIds", com.aliyun.openapiutil.Client.arrayToStringWithSpecifiedStyle(java.util.Arrays.asList( categoryIds ), "CategoryIds", "json")); // runtime options com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); com.aliyun.teaopenapi.models.OpenApiRequest request = new com.aliyun.teaopenapi.models.OpenApiRequest() .setQuery(com.aliyun.openapiutil.Client.query(queries)); // 复制代码运行请自行打印 API 的返回值 // 返回值为 Map 类型,可从 Map 中获得三类数据:响应体 body、响应头 headers、HTTP 返回的状态码 statusCode。 HashMap<String,Object> response = (HashMap<String, Object>) client.callApi(params, request, runtime); System.out.println("AddDocumentsJob: " + response.get("statusCode")); } }
- 上传效果查看