1 方法一
<!-- 阿里JSON解析器 -->
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.16</version>
</dependency>
1.1 新建HttpUtils,用来发送post请求
package com.xinglong.spider.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.net.ConnectException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
/**
* @Author:胡云峰
* @Date:2022/12/7 10:43
*/
public class HttpUtils {
private static final Logger log = LoggerFactory.getLogger(HttpUtils.class);
/**
* 向指定 URL 发送POST方法的请求
*
* @param url 发送请求的 URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return 所代表远程资源的响应结果
*/
public static String sendPost(String url, String param,String contentType)
{
PrintWriter out = null;
BufferedReader in = null;
StringBuilder result = new StringBuilder();
try
{
log.info("sendPost - {}", url);
URL realUrl = new URL(url);
URLConnection conn = realUrl.openConnection();
conn.setRequestProperty("accept", "*/*");
conn.setRequestProperty("connection", "Keep-Alive");
conn.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
conn.setRequestProperty("Accept-Charset", "utf-8");
conn.setRequestProperty("content-type", contentType);
conn.setDoOutput(true);
conn.setDoInput(true);
out = new PrintWriter(conn.getOutputStream(),true);
out.print(param);
out.flush();
in = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
String line;
while ((line = in.readLine()) != null)
{
result.append(line);
}
log.info("recv - {}", result);
}
catch (ConnectException e)
{
log.error("调用HttpUtils.sendPost ConnectException, url=" + url + ",param=" + param, e);
}
catch (SocketTimeoutException e)
{
log.error("调用HttpUtils.sendPost SocketTimeoutException, url=" + url + ",param=" + param, e);
}
catch (IOException e)
{
log.error("调用HttpUtils.sendPost IOException, url=" + url + ",param=" + param, e);
}
catch (Exception e)
{
log.error("调用HttpsUtil.sendPost Exception, url=" + url + ",param=" + param, e);
}
finally
{
try
{
if (out != null)
{
out.close();
}
if (in != null)
{
in.close();
}
}
catch (IOException ex)
{
log.error("调用in.close Exception, url=" + url + ",param=" + param, ex);
}
}
return result.toString();
}
}
1.2 获取 access token
/**
* @ author 胡云峰
* 获取 access token
* 注意: access_token的有效期为30天,需要每30天进行定期更换;
*/
public static String getAccessToken(){
//向https://aip.baidubce.com/oauth/2.0/token发送请求
String url = "https: //aip.baidubce.com/oauth/2.0/token";
//设置contentType格式
String contentType = "application/x-www-form-urlencoded";
// 固定值为client_credentials
String grant_type = "client_credentials";
//应用的 API Key 这里使用自己的
String client_id = "***********";
//应用的 Secret Key 这里使用自己的
String client_secret = "************";
//参数
String para = "grant_type="+grant_type+"&client_id="+client_id+"&client_secret=" + client_secret;
//发起post请求
String resp = HttpUtils.sendPost(url, para, contentType);
//解析返回信息获取access_token
JSONObject jsonObject = JSONObject.parseObject(resp);
return jsonObject.get("access_token").toString();
}
1.3 获取图片的文字信息
/**
* 获取图片文字信息
* @return
*/
public static String getOcrMessage(){
String url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" + "?access_token=" +getAccessToken();
//设置contentType格式
String contentType = "application/x-www-form-urlencoded";
//image base64
//请求格式支持:PNG、JPG、JPEG、BMP、TIFF、PNM、WebP
String imgStr = "/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAA8AKADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDtrW1ga1hZoIySikkoOeKsCztv+feL/vgU2z/484P+ua/yqyKiMY8q0IjGPKtCIWdr/wA+0P8A3wKeLK1/59of+/YqUU7IHWnyx7D5Y9iIWVp/z6w/9+xThY2n/PrB/wB+xWb/AMJToQvfsbararcZxsaQDn0z0zWyjBgCDke1XOg4W542v3QcsX0IxYWf/PrB/wB+xThYWf8Az6Qf9+xVTWNcsdA09r3UJTHCGC5VSxJPQYFX7a4S5t45kztkUMMjBwRmk6Noqbjo+tg5Y9hBp9l/z6W//fsf4U4adZf8+dv/AN+l/wAKmBAoSeJ3KLIhYcFQ3IqeSPYOWPYYNOsf+fO3/wC/S/4U4abY/wDPlb/9+l/wqwKcKOWPYOWPYrjTLD/nytv+/S/4U8aZYf8APjbf9+l/wqwOKytV8U6JobqmpahDbu3IVjk/kKunRdSXLCN35IHGK3RfGl6f/wA+Nt/35X/CnjStP/58LX/vyv8AhUenatp+qwCewvILmL+9E4YD646Gr4qZU1F2asw5Y9isNK07/nwtf+/K/wCFOGk6d/0D7X/vyv8AhVoU8UuWPYOWPYqjSdN/6B9p/wB+V/wqtqel6fHpF66WNqrrA5VhCoIO08jitYVV1b/kC3//AF7yf+gmlKMeV6ClGPK9DkrP/jzg/wCua/yqyKr2f/HnB/1zX+VWRTj8KHH4UOFY3iOeWPR7tYGxM0TBD744raArJ1m1ae3YD0q4y5WmUfP+i2NrqN3NDeSSxyYyrKec985rt/C3i++8M6kmjaxMZrJ+IJ2OdnPHPp/KsnWvDdxFeNd2R2XCnOOgb/69YOralLqEUNvPaNHcxGvvI1lm8+kqUlqtL02luvL+mctvZrz/ADOy8f6prE/mxS3FsdPlmX7PEBljjoc/n+daHgLWtRbVJVvtULuE2vZyjBUDoyeormfD0o1uJdNvVD3FrzGX6kf4j+VdpB4bSR4p5Iz50JykinDD2z6V42MxKw1J4CrBJq/vJLys16pK73v6WNIrmfOmcp468R6tf+KJNM+2PBaKyoqqcAg9z69aypI9Q8I3VvqOn6g0qqwB6jnrgjJ4rc8X+H5L2cTr8s6jAJ/iHoa5a5GsTwLZ3EPy5GX9fqc4r1MvxtKdClGnKEYJWqRlZX8/P/MicWm7/I9v/wCFi6XbaDY6vcRzta3LeWzxKG8p/RhnOOD0/rW3p/jLw7qUQktdYs2B/haUIw+qtgj8q8D8P6iNNnn0TUUWawuiDsf7obsfxwPyHpT9Q0rw3HePEbi5spFPMZ+ZcdiCR0/GvNq5VhI1HTblr70ZRXMnF7ab3WzLU5Wue7SeM9BfUI9Og1W2mu5chI4n39ATyRwOnc15F4h8LXV/4nuL++vFlhllJ2rncE7Csix1DwroMyXEFvc6jdxkMjO21VYd+38jXqXhiW18W6Ut8sJhlB2yRN1U/XuD2P8AhWdWniMr/f4RSUWrNySTv5LdIE1PSR53JpF74bY6r4dv542iG6SFm6gcnp1Hsa9Z+HnjpPFunOs6iO/t8CZR0bOcMPyrjviDDJomkNJbr947Cw/hzWX8HY2hvbu6zw+I8fTmtajljMqnisVZzi0oy6vun38gXu1OWJ9ALzTxUMB3Rg1OK+YNhwqrq3/IEv8A/r2k/wDQTVsVV1f/AJAl/wD9e0n/AKCamXwsmXws5Kz/AOPKD/rmv8qsiq9l/wAeUH/XNf5VZFEfhQR+FDhSPGHUg04U8VRRzWraMsisyrzXknia2v8ATL7zTGstvnrt5X2J/rXvssQdCMVyetaGZySq9a6cJXjQqc04Ka6p/wCfR+YpK60PJfDDTXnjG2uraIoitmTPTGOc/WvoCzt0khBwOlcXo/hxrecHZgA9hXoFpF5cSrW+Y45YucXGPLGKUUr30XdkwjymNqmiR3CEhea8u8WeGLsuJbWcoyc+WTgH/wCvXuTIGGKxNU0VbtTx1rmw2Inh6iqwtdd1dFNXVmfN95BqLzL58Db043KP612keiLrek2j3sDefGmNw4b8a7F/Bp+0btveuo0zQEiiCutenis7q16dOEIqDhs43W/bsRGmk31PONH8KQWk6vFZruB4ZxuP69K9T0WCVIQHzV6DSoY+iCtCOFUGAMV5NWtUqy5qkm33buWklscZ4w0sX9nJDJHvjcYYHvXmOiarD4K8UQ6e+nutvO6+ZOZGYlTwGC+x69+PpXvl5aLPGQRXMf2L5eoLMsS714DbeQPrXRhcUqSlConKEltdrXo9O3mmKUb6o7G1YNEuPSrIqlYoyxAHrV4VxlDhVXV/+QJf/wDXtJ/6Catiqur/APIEv/8Ar2k/9BNTL4WTL4WclZf8eVv/ANc1/lVkVzMWtXMUSRqkRCKFGQe341J/b91/zzh/75P+NZRrRsjONWNkdKKcK5n/AISG7/55wf8AfJ/xpf8AhIrv/nnB/wB8n/Gq9tEftonUAUGJX6iuY/4SS8/55Qf98n/Gl/4SW8/55Qf98n/Gj20Q9tE6dIEU8AVOoxXJf8JPe/8APK3/AO+W/wAaX/hKL3/nlb/98t/jR7aIe2ideKdtBrj/APhKr7/nlb/98t/jS/8ACV33/PK2/wC+W/xo9tEPbROvES+gqVUArjP+Etv/APnjbf8AfLf40v8Awl+of88bb/vlv/iqPbRD20TtgKeK4f8A4TDUP+eNr/3y3/xVL/wmWo/88bX/AL5b/wCKo9tEPbRO5xkUnkKTnFcR/wAJnqP/ADxtf++G/wDiqX/hNdS/54Wn/fDf/FUe2iHtoneIoUcVIK4D/hNtS/54Wn/fDf8AxVL/AMJxqf8AzwtP++G/+Ko9tEPbRPQRVXV/+QHqH/XtJ/6Ca4r/AITnU/8Anhaf98N/8VUdz4z1G6tZrd4bUJKjIxVWyARjj5qmVaNmKVWNmf/Z";
//注意:一定要urlencoder,否则会出现图片格式不对的报错
try {
//使用Postman工具或 Python、PHP 等请求库会自动进行urlencode,无需自行处理
imgStr = URLEncoder.encode(imgStr, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
//发起请求
String resp = HttpUtils.sendPost(url, "image=" + imgStr, contentType);
//利用阿里的fastjson解析
JSONObject jsonObject = JSONObject.parseObject(resp);
String wordsResult = jsonObject.get("words_result").toString();
//words_result是一个集合
JSONArray jsonArray = JSONArray.parseArray(wordsResult);
//组装解析后的结果
StringBuilder sb = new StringBuilder();
for (Object o : jsonArray) {
//Object强转成JSONObject
JSONObject jObject = (JSONObject) o;
//解析words
String words = jObject.get("words").toString();
//组装到sb
sb.append(words);
}
//返回结果
return sb.toString();
}
1.4 测试
public static void main(String[] args) {
String ocrMessage = getOcrMessage();
System.out.println(ocrMessage);
}
1.5 结果
9*9=3
2 方法二使用jsoup
<!-- 阿里JSON解析器 -->
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.16</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
2.1 获取 access token
public static String getAccessToken() throws IOException {
//获取请求连接
Connection con = Jsoup.connect("https://aip.baidubce.com/oauth/2.0/token");
con.header("Content-Type","application/x-www-form-urlencoded");
con.data("grant_type","client_credentials");
con.data("client_id","************");
con.data("client_secret","****************");
Connection.Response response = con.ignoreContentType(true).method(Connection.Method.POST).execute();
String body = response.body();
JSONObject jsonObject = JSONObject.parseObject(body);
String accessToken = jsonObject.get("access_token").toString();
return accessToken;
}
2.2 获取图片的文字信息
public static String getOcrMessage(String imgStr) throws IOException {
String url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" + "?access_token=" + getAccessToken();
Connection con = Jsoup.connect(url);
con.header("Content-Type","application/x-www-form-urlencoded");
con.data("image",imgStr);
Connection.Response execute = con.ignoreContentType(true).method(Connection.Method.POST).execute();
String body = execute.body();
//利用阿里的fastjson解析
JSONObject jsonObject = JSONObject.parseObject(body);
String wordsResult = jsonObject.get("words_result").toString();
//words_result是一个集合
JSONArray jsonArray = JSONArray.parseArray(wordsResult);
//组装解析后的结果
StringBuilder sb = new StringBuilder();
for (Object o : jsonArray) {
//Object强转成JSONObject
JSONObject jObject = (JSONObject) o;
//解析words
String words = jObject.get("words").toString();
//组装到sb
sb.append(words);
}
//返回结果
return sb.toString();
}
2.3 测试
public static void main(String[] args) throws IOException {
//image base64 使用jsoup不需要urlencode
String imgStr = "/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAA8AKADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDtrW1ga1hZoIySikkoOeKsCztv+feL/vgU2z/484P+ua/yqyKiMY8q0IjGPKtCIWdr/wA+0P8A3wKeLK1/59of+/YqUU7IHWnyx7D5Y9iIWVp/z6w/9+xThY2n/PrB/wB+xWb/AMJToQvfsbararcZxsaQDn0z0zWyjBgCDke1XOg4W542v3QcsX0IxYWf/PrB/wB+xThYWf8Az6Qf9+xVTWNcsdA09r3UJTHCGC5VSxJPQYFX7a4S5t45kztkUMMjBwRmk6Noqbjo+tg5Y9hBp9l/z6W//fsf4U4adZf8+dv/AN+l/wAKmBAoSeJ3KLIhYcFQ3IqeSPYOWPYYNOsf+fO3/wC/S/4U4abY/wDPlb/9+l/wqwKcKOWPYOWPYrjTLD/nytv+/S/4U8aZYf8APjbf9+l/wqwOKytV8U6JobqmpahDbu3IVjk/kKunRdSXLCN35IHGK3RfGl6f/wA+Nt/35X/CnjStP/58LX/vyv8AhUenatp+qwCewvILmL+9E4YD646Gr4qZU1F2asw5Y9isNK07/nwtf+/K/wCFOGk6d/0D7X/vyv8AhVoU8UuWPYOWPYqjSdN/6B9p/wB+V/wqtqel6fHpF66WNqrrA5VhCoIO08jitYVV1b/kC3//AF7yf+gmlKMeV6ClGPK9DkrP/jzg/wCua/yqyKr2f/HnB/1zX+VWRTj8KHH4UOFY3iOeWPR7tYGxM0TBD744raArJ1m1ae3YD0q4y5WmUfP+i2NrqN3NDeSSxyYyrKec985rt/C3i++8M6kmjaxMZrJ+IJ2OdnPHPp/KsnWvDdxFeNd2R2XCnOOgb/69YOralLqEUNvPaNHcxGvvI1lm8+kqUlqtL02luvL+mctvZrz/ADOy8f6prE/mxS3FsdPlmX7PEBljjoc/n+daHgLWtRbVJVvtULuE2vZyjBUDoyeormfD0o1uJdNvVD3FrzGX6kf4j+VdpB4bSR4p5Iz50JykinDD2z6V42MxKw1J4CrBJq/vJLys16pK73v6WNIrmfOmcp468R6tf+KJNM+2PBaKyoqqcAg9z69aypI9Q8I3VvqOn6g0qqwB6jnrgjJ4rc8X+H5L2cTr8s6jAJ/iHoa5a5GsTwLZ3EPy5GX9fqc4r1MvxtKdClGnKEYJWqRlZX8/P/MicWm7/I9v/wCFi6XbaDY6vcRzta3LeWzxKG8p/RhnOOD0/rW3p/jLw7qUQktdYs2B/haUIw+qtgj8q8D8P6iNNnn0TUUWawuiDsf7obsfxwPyHpT9Q0rw3HePEbi5spFPMZ+ZcdiCR0/GvNq5VhI1HTblr70ZRXMnF7ab3WzLU5Wue7SeM9BfUI9Og1W2mu5chI4n39ATyRwOnc15F4h8LXV/4nuL++vFlhllJ2rncE7Csix1DwroMyXEFvc6jdxkMjO21VYd+38jXqXhiW18W6Ut8sJhlB2yRN1U/XuD2P8AhWdWniMr/f4RSUWrNySTv5LdIE1PSR53JpF74bY6r4dv542iG6SFm6gcnp1Hsa9Z+HnjpPFunOs6iO/t8CZR0bOcMPyrjviDDJomkNJbr947Cw/hzWX8HY2hvbu6zw+I8fTmtajljMqnisVZzi0oy6vun38gXu1OWJ9ALzTxUMB3Rg1OK+YNhwqrq3/IEv8A/r2k/wDQTVsVV1f/AJAl/wD9e0n/AKCamXwsmXws5Kz/AOPKD/rmv8qsiq9l/wAeUH/XNf5VZFEfhQR+FDhSPGHUg04U8VRRzWraMsisyrzXknia2v8ATL7zTGstvnrt5X2J/rXvssQdCMVyetaGZySq9a6cJXjQqc04Ka6p/wCfR+YpK60PJfDDTXnjG2uraIoitmTPTGOc/WvoCzt0khBwOlcXo/hxrecHZgA9hXoFpF5cSrW+Y45YucXGPLGKUUr30XdkwjymNqmiR3CEhea8u8WeGLsuJbWcoyc+WTgH/wCvXuTIGGKxNU0VbtTx1rmw2Inh6iqwtdd1dFNXVmfN95BqLzL58Db043KP612keiLrek2j3sDefGmNw4b8a7F/Bp+0btveuo0zQEiiCutenis7q16dOEIqDhs43W/bsRGmk31PONH8KQWk6vFZruB4ZxuP69K9T0WCVIQHzV6DSoY+iCtCOFUGAMV5NWtUqy5qkm33buWklscZ4w0sX9nJDJHvjcYYHvXmOiarD4K8UQ6e+nutvO6+ZOZGYlTwGC+x69+PpXvl5aLPGQRXMf2L5eoLMsS714DbeQPrXRhcUqSlConKEltdrXo9O3mmKUb6o7G1YNEuPSrIqlYoyxAHrV4VxlDhVXV/+QJf/wDXtJ/6Catiqur/APIEv/8Ar2k/9BNTL4WTL4WclZf8eVv/ANc1/lVkVzMWtXMUSRqkRCKFGQe341J/b91/zzh/75P+NZRrRsjONWNkdKKcK5n/AISG7/55wf8AfJ/xpf8AhIrv/nnB/wB8n/Gq9tEftonUAUGJX6iuY/4SS8/55Qf98n/Gl/4SW8/55Qf98n/Gj20Q9tE6dIEU8AVOoxXJf8JPe/8APK3/AO+W/wAaX/hKL3/nlb/98t/jR7aIe2ideKdtBrj/APhKr7/nlb/98t/jS/8ACV33/PK2/wC+W/xo9tEPbROvES+gqVUArjP+Etv/APnjbf8AfLf40v8Awl+of88bb/vlv/iqPbRD20TtgKeK4f8A4TDUP+eNr/3y3/xVL/wmWo/88bX/AL5b/wCKo9tEPbRO5xkUnkKTnFcR/wAJnqP/ADxtf++G/wDiqX/hNdS/54Wn/fDf/FUe2iHtoneIoUcVIK4D/hNtS/54Wn/fDf8AxVL/AMJxqf8AzwtP++G/+Ko9tEPbRPQRVXV/+QHqH/XtJ/6Ca4r/AITnU/8Anhaf98N/8VUdz4z1G6tZrd4bUJKjIxVWyARjj5qmVaNmKVWNmf/Z";
String ocrMessage = getOcrMessage(imgStr);
System.out.println(ocrMessage);
}