AipNlp.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. <?php
  2. /*
  3. * Copyright (c) 2017 Baidu.com, Inc. All Rights Reserved
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. * use this file except in compliance with the License. You may obtain a copy of
  7. * the License at
  8. *
  9. * Http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. * License for the specific language governing permissions and limitations under
  15. * the License.
  16. */
  17. require_once 'lib/AipBase.php';
  18. class AipNlp extends AipBase
  19. {
  20. /**
  21. * 词法分析 lexer api url
  22. * @var string
  23. */
  24. private $lexerUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer';
  25. /**
  26. * 词法分析(定制版) lexer_custom api url
  27. * @var string
  28. */
  29. private $lexerCustomUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom';
  30. /**
  31. * 依存句法分析 dep_parser api url
  32. * @var string
  33. */
  34. private $depParserUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser';
  35. /**
  36. * 词向量表示 word_embedding api url
  37. * @var string
  38. */
  39. private $wordEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_vec';
  40. /**
  41. * DNN语言模型 dnnlm_cn api url
  42. * @var string
  43. */
  44. private $dnnlmCnUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/dnnlm_cn';
  45. /**
  46. * 词义相似度 word_sim_embedding api url
  47. * @var string
  48. */
  49. private $wordSimEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_sim';
  50. /**
  51. * 短文本相似度 simnet api url
  52. * @var string
  53. */
  54. private $simnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/simnet';
  55. /**
  56. * 评论观点抽取 comment_tag api url
  57. * @var string
  58. */
  59. private $commentTagUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag';
  60. /**
  61. * 情感倾向分析 sentiment_classify api url
  62. * @var string
  63. */
  64. private $sentimentClassifyUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify';
  65. /**
  66. * 文章标签 keyword api url
  67. * @var string
  68. */
  69. private $keywordUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/keyword';
  70. /**
  71. * 文章分类 topic api url
  72. * @var string
  73. */
  74. private $topicUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/topic';
  75. /**
  76. * 文本纠错 ecnet api url
  77. * @var string
  78. */
  79. private $ecnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/ecnet';
  80. /**
  81. * 对话情绪识别接口 emotion api url
  82. * @var string
  83. */
  84. private $emotionUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion';
  85. /**
  86. * 新闻摘要接口 news_summary api url
  87. * @var string
  88. */
  89. private $newsSummaryUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary';
  90. /**
  91. * 地址识别接口 address api url
  92. * @var string
  93. */
  94. private $addressUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/address';
  95. /**
  96. * 格式化结果
  97. * @param $content string
  98. * @return mixed
  99. */
  100. protected function proccessResult($content)
  101. {
  102. $result = null;
  103. $result = json_decode(mb_convert_encoding($content, 'UTF8', 'GBK'), true, 512, JSON_BIGINT_AS_STRING);
  104. if ($result == null) {
  105. $result = json_decode($content, true, 512, JSON_BIGINT_AS_STRING);
  106. }
  107. return $result;
  108. }
  109. /**
  110. * 词法分析接口
  111. *
  112. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过65536字节
  113. * @param array $options - 可选参数对象,key: value都为string类型
  114. * @description options列表:
  115. * @return array
  116. */
  117. public function lexer($text, $options = array())
  118. {
  119. $data = array();
  120. $data['text'] = $text;
  121. $data = array_merge($data, $options);
  122. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  123. return $this->request($this->lexerUrl, $data);
  124. }
  125. /**
  126. * 词法分析(定制版)接口
  127. *
  128. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过65536字节
  129. * @param array $options - 可选参数对象,key: value都为string类型
  130. * @description options列表:
  131. * @return array
  132. */
  133. public function lexerCustom($text, $options = array())
  134. {
  135. $data = array();
  136. $data['text'] = $text;
  137. $data = array_merge($data, $options);
  138. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  139. return $this->request($this->lexerCustomUrl, $data);
  140. }
  141. /**
  142. * 依存句法分析接口
  143. *
  144. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过256字节
  145. * @param array $options - 可选参数对象,key: value都为string类型
  146. * @description options列表:
  147. * mode 模型选择。默认值为0,可选值mode=0(对应web模型);mode=1(对应query模型)
  148. * @return array
  149. */
  150. public function depParser($text, $options = array())
  151. {
  152. $data = array();
  153. $data['text'] = $text;
  154. $data = array_merge($data, $options);
  155. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  156. return $this->request($this->depParserUrl, $data);
  157. }
  158. /**
  159. * 词向量表示接口
  160. *
  161. * @param string $word - 文本内容(GBK编码),最大64字节
  162. * @param array $options - 可选参数对象,key: value都为string类型
  163. * @description options列表:
  164. * @return array
  165. */
  166. public function wordEmbedding($word, $options = array())
  167. {
  168. $data = array();
  169. $data['word'] = $word;
  170. $data = array_merge($data, $options);
  171. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  172. return $this->request($this->wordEmbeddingUrl, $data);
  173. }
  174. /**
  175. * DNN语言模型接口
  176. *
  177. * @param string $text - 文本内容(GBK编码),最大512字节,不需要切词
  178. * @param array $options - 可选参数对象,key: value都为string类型
  179. * @description options列表:
  180. * @return array
  181. */
  182. public function dnnlm($text, $options = array())
  183. {
  184. $data = array();
  185. $data['text'] = $text;
  186. $data = array_merge($data, $options);
  187. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  188. return $this->request($this->dnnlmCnUrl, $data);
  189. }
  190. /**
  191. * 词义相似度接口
  192. *
  193. * @param string $word1 - 词1(GBK编码),最大64字节
  194. * @param string $word2 - 词1(GBK编码),最大64字节
  195. * @param array $options - 可选参数对象,key: value都为string类型
  196. * @description options列表:
  197. * mode 预留字段,可选择不同的词义相似度模型。默认值为0,目前仅支持mode=0
  198. * @return array
  199. */
  200. public function wordSimEmbedding($word1, $word2, $options = array())
  201. {
  202. $data = array();
  203. $data['word_1'] = $word1;
  204. $data['word_2'] = $word2;
  205. $data = array_merge($data, $options);
  206. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  207. return $this->request($this->wordSimEmbeddingUrl, $data);
  208. }
  209. /**
  210. * 短文本相似度接口
  211. *
  212. * @param string $text1 - 待比较文本1(GBK编码),最大512字节
  213. * @param string $text2 - 待比较文本2(GBK编码),最大512字节
  214. * @param array $options - 可选参数对象,key: value都为string类型
  215. * @description options列表:
  216. * model 默认为"BOW",可选"BOW"、"CNN"与"GRNN"
  217. * @return array
  218. */
  219. public function simnet($text1, $text2, $options = array())
  220. {
  221. $data = array();
  222. $data['text_1'] = $text1;
  223. $data['text_2'] = $text2;
  224. $data = array_merge($data, $options);
  225. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  226. return $this->request($this->simnetUrl, $data);
  227. }
  228. /**
  229. * 评论观点抽取接口
  230. *
  231. * @param string $text - 评论内容(GBK编码),最大10240字节
  232. * @param array $options - 可选参数对象,key: value都为string类型
  233. * @description options列表:
  234. * type 评论行业类型,默认为4(餐饮美食)
  235. * @return array
  236. */
  237. public function commentTag($text, $options = array())
  238. {
  239. $data = array();
  240. $data['text'] = $text;
  241. $data = array_merge($data, $options);
  242. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  243. return $this->request($this->commentTagUrl, $data);
  244. }
  245. /**
  246. * 情感倾向分析接口
  247. *
  248. * @param string $text - 文本内容(GBK编码),最大102400字节
  249. * @param array $options - 可选参数对象,key: value都为string类型
  250. * @description options列表:
  251. * @return array
  252. */
  253. public function sentimentClassify($text, $options = array())
  254. {
  255. $data = array();
  256. $data['text'] = $text;
  257. $data = array_merge($data, $options);
  258. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  259. return $this->request($this->sentimentClassifyUrl, $data);
  260. }
  261. /**
  262. * 文章标签接口
  263. *
  264. * @param string $title - 篇章的标题,最大80字节
  265. * @param string $content - 篇章的正文,最大65535字节
  266. * @param array $options - 可选参数对象,key: value都为string类型
  267. * @description options列表:
  268. * @return array
  269. */
  270. public function keyword($title, $content, $options = array())
  271. {
  272. $data = array();
  273. $data['title'] = $title;
  274. $data['content'] = $content;
  275. $data = array_merge($data, $options);
  276. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  277. return $this->request($this->keywordUrl, $data);
  278. }
  279. /**
  280. * 文章分类接口
  281. *
  282. * @param string $title - 篇章的标题,最大80字节
  283. * @param string $content - 篇章的正文,最大65535字节
  284. * @param array $options - 可选参数对象,key: value都为string类型
  285. * @description options列表:
  286. * @return array
  287. */
  288. public function topic($title, $content, $options = array())
  289. {
  290. $data = array();
  291. $data['title'] = $title;
  292. $data['content'] = $content;
  293. $data = array_merge($data, $options);
  294. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  295. return $this->request($this->topicUrl, $data);
  296. }
  297. /**
  298. * 文本纠错接口
  299. *
  300. * @param string $text - 待纠错文本,输入限制511字节
  301. * @param array $options - 可选参数对象,key: value都为string类型
  302. * @description options列表:
  303. * @return array
  304. */
  305. public function ecnet($text, $options = array())
  306. {
  307. $data = array();
  308. $data['text'] = $text;
  309. $data = array_merge($data, $options);
  310. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  311. return $this->request($this->ecnetUrl, $data);
  312. }
  313. /**
  314. * 对话情绪识别接口接口
  315. *
  316. * @param string $text - 待识别情感文本,输入限制512字节
  317. * @param array $options - 可选参数对象,key: value都为string类型
  318. * @description options列表:
  319. * scene default(默认项-不区分场景),talk(闲聊对话-如度秘聊天等),task(任务型对话-如导航对话等),customer_service(客服对话-如电信/银行客服等)
  320. * @return array
  321. */
  322. public function emotion($text, $options = array())
  323. {
  324. $data = array();
  325. $data['text'] = $text;
  326. $data = array_merge($data, $options);
  327. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  328. return $this->request($this->emotionUrl, $data);
  329. }
  330. /**
  331. * 新闻摘要接口接口
  332. *
  333. * @param string $content - 字符串(限3000字符数以内)字符串仅支持GBK编码,长度需小于3000字符数(即6000字节),请输入前确认字符数没有超限,若字符数超长会返回错误。正文中如果包含段落信息,请使用"\n"分隔,段落信息算法中有重要的作用,请尽量保留
  334. * @param integer $maxSummaryLen - 此数值将作为摘要结果的最大长度。例如:原文长度1000字,本参数设置为150,则摘要结果的最大长度是150字;推荐最优区间:200-500字
  335. * @param array $options - 可选参数对象,key: value都为string类型
  336. * @description options列表:
  337. * title 字符串(限200字符数)字符串仅支持GBK编码,长度需小于200字符数(即400字节),请输入前确认字符数没有超限,若字符数超长会返回错误。标题在算法中具有重要的作用,若文章确无标题,输入参数的“标题”字段为空即可
  338. * @return array
  339. */
  340. public function newsSummary($content, $maxSummaryLen, $options = array())
  341. {
  342. $data = array();
  343. $data['content'] = $content;
  344. $data['max_summary_len'] = $maxSummaryLen;
  345. $data = array_merge($data, $options);
  346. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  347. return $this->request($this->newsSummaryUrl, $data);
  348. }
  349. /**
  350. * 地址识别接口接口
  351. *
  352. * @param string $text - 待识别的文本内容,不超过1000字节
  353. * @param array $options - 可选参数对象,key: value都为string类型
  354. * @description options列表:
  355. * @return array
  356. */
  357. public function address($text, $options = array())
  358. {
  359. $data = array();
  360. $data['text'] = $text;
  361. $data = array_merge($data, $options);
  362. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  363. $headers['Content-Encoding'] = "GBK";
  364. return $this->request($this->addressUrl, $data, $headers);
  365. }
  366. }