address-parse.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /**
  2. * 需要解析的地址,type是解析的方式,默认是正则匹配
  3. * @param address
  4. * @param options:type: 0:正则,1:树查找, textFilter: 清洗的字段
  5. * @returns {{}|({area: Array, province: Array, phone: string, city: Array, name: string, detail: Array} & {area: (*|string), province: (*|string), city: (*|string), detail: (Array|boolean|string|string)})}
  6. * @constructor
  7. */
  8. const AddressParse = (address, options, provinceString, cityString, areaString) => {
  9. const { type = 0, textFilter = [] } = typeof options === 'object' ? options : {};
  10. if (!address) {
  11. return {}
  12. }
  13. const parseResult = {
  14. phone: '',
  15. province: [],
  16. city: [],
  17. area: [],
  18. detail: [],
  19. name: '',
  20. };
  21. address = cleanAddress(address, textFilter);
  22. // 识别手机号
  23. const resultPhone = filterPhone(address);
  24. address = resultPhone.address;
  25. parseResult.phone = resultPhone.phone;
  26. const resultCode = filterPostalCode(address);
  27. address = resultCode.address;
  28. parseResult.postalCode = resultCode.postalCode;
  29. // 地址分割
  30. const splitAddress = address.split(' ').filter(item => item).map(item => item.trim());
  31. // 找省市区和详细地址
  32. splitAddress.forEach((item) => {
  33. // 识别地址
  34. if (!parseResult.province[0] || !parseResult.city[0] || !parseResult.area[0]) {
  35. // 两个方法都可以解析,正则和树查找
  36. let parse;
  37. type === 1 && (parse = parseRegion(item, parseResult));
  38. type === 0 && (parse = parseRegionWithRegexp(item, parseResult, provinceString, cityString, areaString));
  39. const {province, city, area, detail} = parse;
  40. parseResult.province = province || [];
  41. parseResult.area = area || [];
  42. parseResult.city = city || [];
  43. parseResult.detail = parseResult.detail.concat(detail || []);
  44. } else {
  45. parseResult.detail.push(item);
  46. }
  47. });
  48. // 地址都解析完了,如果还没有姓名,那么姓名应该是在详细地址里面,取详细地址里面长度最小的那个
  49. if (!parseResult.name) {
  50. const detail = JSON.parse(JSON.stringify(parseResult.detail));
  51. detail.sort((a, b) => a.length - b.length);
  52. parseResult.name = detail[0];
  53. const nameIndex = parseResult.detail.findIndex(item => item === parseResult.name);
  54. parseResult.detail.splice(nameIndex, 1)
  55. }
  56. const province = parseResult.province[0];
  57. const city = parseResult.city[0];
  58. const area = parseResult.area[0];
  59. const detail = parseResult.detail;
  60. return Object.assign(parseResult, {
  61. province: (province && province.name) || '',
  62. city: (city && city.name) || '',
  63. area: (area && area.name) || '',
  64. detail: (detail && detail.length > 0 && detail.join('')) || ''
  65. })
  66. };
  67. /**
  68. * 利用正则表达式解析
  69. * @param fragment
  70. * @param hasParseResult
  71. * @returns {{area: (Array|*|string), province: (Array|*|string), city: (Array|*|string|string), detail: (*|Array)}}
  72. */
  73. const parseRegionWithRegexp = (fragment, hasParseResult, provinceString, cityString, areaString) => {
  74. // log('----- 当前使用正则匹配模式 -----')
  75. let province = hasParseResult.province || [], city = hasParseResult.city || [], area = hasParseResult.area || [],
  76. detail = [];
  77. let matchStr = '';
  78. if (province.length === 0) {
  79. for (let i = 1; i < fragment.length; i++) {
  80. const str = fragment.substring(0, i + 1);
  81. const regexProvince = new RegExp(`\{\"code\":\"[0-9]{1,6}\",\"name\":\"${str}[\u4E00-\u9FA5]*?\"}`, 'g');
  82. const matchProvince = provinceString.match(regexProvince);
  83. if (matchProvince) {
  84. const provinceObj = JSON.parse(matchProvince[0]);
  85. if (matchProvince.length === 1) {
  86. province = [];
  87. matchStr = str;
  88. province.push(provinceObj)
  89. }
  90. } else {
  91. break
  92. }
  93. }
  94. if (province[0]) {
  95. fragment = fragment.replace(matchStr, '')
  96. }
  97. }
  98. if (city.length === 0) {
  99. for (let i = 1; i < fragment.length; i++) {
  100. const str = fragment.substring(0, i + 1);
  101. const regexCity = new RegExp(`\{\"code\":\"[0-9]{1,6}\",\"name\":\"${str}[\u4E00-\u9FA5]*?\",\"provinceCode\":\"${province[0] ? `${province[0].code}` : '[0-9]{1,6}'}\"\}`, 'g');
  102. const matchCity = cityString.match(regexCity);
  103. if (matchCity) {
  104. const cityObj = JSON.parse(matchCity[0]);
  105. if (matchCity.length === 1) {
  106. city = [];
  107. matchStr = str;
  108. city.push(cityObj);
  109. }
  110. } else {
  111. break;
  112. }
  113. }
  114. if (city[0]) {
  115. const {provinceCode} = city[0];
  116. fragment = fragment.replace(matchStr, '');
  117. if (province.length === 0) {
  118. const regexProvince = new RegExp(`\{\"code\":\"${provinceCode}\",\"name\":\"[\u4E00-\u9FA5]+?\"}`, 'g');
  119. const matchProvince = provinceString.match(regexProvince);
  120. province.push(JSON.parse(matchProvince[0]))
  121. }
  122. }
  123. }
  124. if (area.length === 0) {
  125. for (let i = 1; i < fragment.length; i++) {
  126. const str = fragment.substring(0, i + 1);
  127. const regexArea = new RegExp(`\{\"code\":\"[0-9]{1,6}\",\"name\":\"${str}[\u4E00-\u9FA5]*?\",\"cityCode\":\"${city[0] ? city[0].code : '[0-9]{1,6}'}\",\"provinceCode\":\"${province[0] ? `${province[0].code}` : '[0-9]{1,6}'}\"\}`, 'g');
  128. const matchArea = areaString.match(regexArea);
  129. if (matchArea) {
  130. const areaObj = JSON.parse(matchArea[0]);
  131. if (matchArea.length === 1) {
  132. area = [];
  133. matchStr = str;
  134. area.push(areaObj);
  135. }
  136. } else {
  137. break
  138. }
  139. }
  140. if (area[0]) {
  141. const {provinceCode, cityCode} = area[0];
  142. fragment = fragment.replace(matchStr, '');
  143. if (province.length === 0) {
  144. const regexProvince = new RegExp(`\{\"code\":\"${provinceCode}\",\"name\":\"[\u4E00-\u9FA5]+?\"}`, 'g');
  145. const matchProvince = provinceString.match(regexProvince);
  146. province.push(JSON.parse(matchProvince[0]));
  147. }
  148. if (city.length === 0) {
  149. const regexCity = new RegExp(`\{\"code\":\"${cityCode}\",\"name\":\"[\u4E00-\u9FA5]+?\",\"provinceCode\":\"${provinceCode}\"\}`, 'g');
  150. const matchCity = cityString.match(regexCity);
  151. city.push(JSON.parse(matchCity[0]));
  152. }
  153. }
  154. }
  155. // 解析完省市区如果还存在地址,则默认为详细地址
  156. if (fragment.length > 0) {
  157. detail.push(fragment)
  158. }
  159. return {
  160. province,
  161. city,
  162. area,
  163. detail,
  164. }
  165. };
  166. /**
  167. * 利用树向下查找解析
  168. * @param fragment
  169. * @param hasParseResult
  170. * @returns {{area: Array, province: Array, city: Array, detail: Array}}
  171. */
  172. const parseRegion = (fragment, hasParseResult) => {
  173. // log('----- 当前使用树查找模式 -----')
  174. let province = [], city = [], area = [], detail = [];
  175. if (hasParseResult.province[0]) {
  176. province = hasParseResult.province
  177. } else {
  178. // 从省开始查找
  179. for (const tempProvince of provinces) {
  180. const {name} = tempProvince;
  181. let replaceName = '';
  182. for (let i = name.length; i > 1; i--) {
  183. const temp = name.substring(0, i);
  184. if (fragment.indexOf(temp) === 0) {
  185. replaceName = temp;
  186. break;
  187. }
  188. }
  189. if (replaceName) {
  190. province.push(tempProvince);
  191. fragment = fragment.replace(replaceName, '');
  192. break
  193. }
  194. }
  195. }
  196. if (hasParseResult.city[0]) {
  197. city = hasParseResult.city
  198. } else {
  199. // 从市区开始查找
  200. for (const tempCity of cities) {
  201. const {name, provinceCode} = tempCity;
  202. const currentProvince = province[0];
  203. // 有省
  204. if (currentProvince) {
  205. if (currentProvince.code === provinceCode) {
  206. let replaceName = '';
  207. for (let i = name.length; i > 1; i--) {
  208. const temp = name.substring(0, i);
  209. if (fragment.indexOf(temp) === 0) {
  210. replaceName = temp;
  211. break;
  212. }
  213. }
  214. if (replaceName) {
  215. city.push(tempCity);
  216. fragment = fragment.replace(replaceName, '');
  217. break;
  218. }
  219. }
  220. } else {
  221. // 没有省,市不可能重名
  222. for (let i = name.length; i > 1; i--) {
  223. const replaceName = name.substring(0, i);
  224. if (fragment.indexOf(replaceName) === 0) {
  225. city.push(tempCity);
  226. province.push(provinces.find(item => item.code === provinceCode));
  227. fragment = fragment.replace(replaceName, '');
  228. break;
  229. }
  230. }
  231. if (city.length > 0) {
  232. break;
  233. }
  234. }
  235. }
  236. }
  237. // 从区市县开始查找
  238. for (const tempArea of areas) {
  239. const {name, provinceCode, cityCode} = tempArea;
  240. const currentProvince = province[0];
  241. const currentCity = city[0];
  242. // 有省或者市
  243. if (currentProvince || currentCity) {
  244. if ((currentProvince && currentProvince.code === provinceCode)
  245. || (currentCity && currentCity.code) === cityCode) {
  246. let replaceName = '';
  247. for (let i = name.length; i > 1; i--) {
  248. const temp = name.substring(0, i);
  249. if (fragment.indexOf(temp) === 0) {
  250. replaceName = temp;
  251. break
  252. }
  253. }
  254. if (replaceName) {
  255. area.push(tempArea);
  256. !currentCity && city.push(cities.find(item => item.code === cityCode));
  257. !currentProvince && province.push(provinces.find(item => item.code === provinceCode));
  258. fragment = fragment.replace(replaceName, '');
  259. break;
  260. }
  261. }
  262. } else {
  263. // 没有省市,区县市有可能重名,这里暂时不处理,因为概率极低,可以根据添加市解决
  264. for (let i = name.length; i > 1; i--) {
  265. const replaceName = name.substring(0, i);
  266. if (fragment.indexOf(replaceName) === 0) {
  267. area.push(tempArea);
  268. city.push(cities.find(item => item.code === cityCode));
  269. province.push(provinces.find(item => item.code === provinceCode));
  270. fragment = fragment.replace(replaceName, '');
  271. break;
  272. }
  273. }
  274. if (area.length > 0) {
  275. break;
  276. }
  277. }
  278. }
  279. // 解析完省市区如果还存在地址,则默认为详细地址
  280. if (fragment.length > 0) {
  281. detail.push(fragment)
  282. }
  283. return {
  284. province,
  285. city,
  286. area,
  287. detail,
  288. }
  289. };
  290. /**
  291. * 匹配电话
  292. * @param address
  293. * @returns {{address: *, phone: string}}
  294. */
  295. const filterPhone = (address) => {
  296. let phone = '';
  297. // 整理电话格式
  298. address = address.replace(/(\d{3})-(\d{4})-(\d{4})/g, '$1$2$3');
  299. address = address.replace(/(\d{3}) (\d{4}) (\d{4})/g, '$1$2$3');
  300. address = address.replace(/(\d{4}) \d{4} \d{4}/g, '$1$2$3');
  301. address = address.replace(/(\d{4})/g, '$1');
  302. const mobileReg = /(\d{7,12})|(\d{3,4}-\d{6,8})|(86-[1][0-9]{10})|(86[1][0-9]{10})|([1][0-9]{10})/g;
  303. const mobile = mobileReg.exec(address);
  304. if (mobile) {
  305. phone = mobile[0];
  306. address = address.replace(mobile[0], ' ')
  307. }
  308. return {address, phone}
  309. };
  310. /**
  311. * 匹配邮编
  312. * @param address
  313. * @returns {{address: *, postalCode: string}}
  314. */
  315. const filterPostalCode = (address) => {
  316. let postalCode = '';
  317. const postalCodeReg = /\d{6}/g;
  318. const code = postalCodeReg.exec(address);
  319. if (code) {
  320. postalCode = code[0];
  321. address = address.replace(code[0], ' ');
  322. }
  323. return {address, postalCode}
  324. };
  325. /**
  326. * 地址清洗
  327. * @param address
  328. * @returns {*}
  329. */
  330. const cleanAddress = (address, textFilter = []) => {
  331. // 去换行等
  332. address = address
  333. .replace(/\r\n/g, ' ')
  334. .replace(/\n/g, ' ')
  335. .replace(/\t/g, ' ');
  336. // 自定义去除关键字,可自行添加
  337. const search = [
  338. '详细地址',
  339. '收货地址',
  340. '收件地址',
  341. '地址',
  342. '所在地区',
  343. '地区',
  344. '姓名',
  345. '收货人',
  346. '收件人',
  347. '联系人',
  348. '收',
  349. '邮编',
  350. '联系电话',
  351. '电话',
  352. '联系人手机号码',
  353. '手机号码',
  354. '手机号',
  355. ].concat(textFilter);
  356. search.forEach(str => {
  357. address = address.replace(new RegExp(str, 'g'), ' ')
  358. });
  359. const pattern = new RegExp("[`~!@#$^&*()=|{}':;',\\[\\]\.<>/?~!@#¥……&*()——|{}【】‘;:”“’。,、?]", 'g');
  360. address = address.replace(pattern, ' ');
  361. // 多个空格replace为一个
  362. address = address.replace(/ {2,}/g, ' ');
  363. return address
  364. };
  365. export default AddressParse;