html2json.js 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. import wxDiscode from './wxDiscode';
  2. import HTMLParser from './htmlparser';
  3. function makeMap(str) {
  4. const obj = {};
  5. const items = str.split(',');
  6. for (let i = 0; i < items.length; i += 1) obj[items[i]] = true;
  7. return obj;
  8. }
  9. const block = makeMap('br,code,address,article,applet,aside,audio,blockquote,button,canvas,center,dd,del,dir,div,dl,dt,fieldset,figcaption,figure,footer,form,frameset,h1,h2,h3,h4,h5,h6,header,hgroup,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,output,p,pre,section,script,table,tbody,td,tfoot,th,thead,tr,ul,video');
  10. const inline = makeMap('a,abbr,acronym,applet,b,basefont,bdo,big,button,cite,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var');
  11. const closeSelf = makeMap('colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr');
  12. function removeDOCTYPE(html) {
  13. return /<body.*>([^]*)<\/body>/.test(html) ? RegExp.$1 : html;
  14. }
  15. function trimHtml(html) {
  16. return html.replace(/<!--.*?-->/gi, '').replace(/\/\*.*?\*\//gi, '').replace(/[ ]+</gi, '<').replace(/<script[^]*<\/script>/gi, '').replace(/<style[^]*<\/style>/gi, '');
  17. }
  18. function getScreenInfo() {
  19. const screen = {};
  20. wx.getSystemInfo({
  21. success: (res) => {
  22. screen.width = res.windowWidth;
  23. screen.height = res.windowHeight;
  24. },
  25. });
  26. return screen;
  27. }
  28. function html2json(html, customHandler, imageProp, host) {
  29. // 处理字符串
  30. html = removeDOCTYPE(html);
  31. // 去除注释 样式 js
  32. html = trimHtml(html);
  33. html = wxDiscode.strDiscode(html);
  34. const bufArray = [];
  35. const results = {
  36. nodes: [],
  37. imageUrls: [],
  38. };
  39. const screen = getScreenInfo();
  40. function Node(tag) {
  41. this.node = 'element';
  42. this.tag = tag;
  43. this.$screen = screen;
  44. }
  45. HTMLParser(html, {
  46. start(tag, attrs, unary) {
  47. const node = new Node(tag);
  48. if (bufArray.length !== 0) {
  49. const parent = bufArray[0];
  50. if (parent.nodes === undefined) {
  51. parent.nodes = [];
  52. }
  53. }
  54. if (block[tag]) {
  55. node.tagType = 'block';
  56. } else if (inline[tag]) {
  57. node.tagType = 'inline';
  58. } else if (closeSelf[tag]) {
  59. node.tagType = 'closeSelf';
  60. }
  61. node.attr = attrs.reduce((pre, attr) => {
  62. const { name } = attr;
  63. let { value } = attr;
  64. if (name === 'class') {
  65. node.classStr = value;
  66. }
  67. if (name === 'style') {
  68. node.styleStr = value;
  69. }
  70. if (value.match(/ /)) {
  71. value = value.split(' ');
  72. }
  73. // merge it
  74. if (pre[name]) {
  75. if (Array.isArray(pre[name])) {
  76. // already array, push to last
  77. pre[name].push(value);
  78. } else {
  79. // single value, make it array
  80. pre[name] = [pre[name], value];
  81. }
  82. } else {
  83. // not exist, put it
  84. pre[name] = value;
  85. }
  86. return pre;
  87. }, {});
  88. // 优化样式相关属性
  89. if (node.classStr) {
  90. node.classStr += ` ${node.tag}`;
  91. } else {
  92. node.classStr = node.tag;
  93. }
  94. if (node.tagType === 'inline') {
  95. node.classStr += ' inline';
  96. }
  97. // 对img添加额外数据
  98. if (node.tag === 'img') {
  99. let imgUrl = node.attr.src;
  100. imgUrl = wxDiscode.urlToHttpUrl(imgUrl, imageProp.domain);
  101. Object.assign(node.attr, imageProp, {
  102. src: imgUrl || '',
  103. });
  104. if (imgUrl) {
  105. results.imageUrls.push(imgUrl);
  106. }
  107. }
  108. // 处理a标签属性
  109. if (node.tag === 'a') {
  110. node.attr.href = node.attr.href || '';
  111. }
  112. // 处理font标签样式属性
  113. if (node.tag === 'font') {
  114. const fontSize = [
  115. 'x-small',
  116. 'small',
  117. 'medium',
  118. 'large',
  119. 'x-large',
  120. 'xx-large',
  121. '-webkit-xxx-large',
  122. ];
  123. const styleAttrs = {
  124. color: 'color',
  125. face: 'font-family',
  126. size: 'font-size',
  127. };
  128. if (!node.styleStr) node.styleStr = '';
  129. Object.keys(styleAttrs).forEach((key) => {
  130. if (node.attr[key]) {
  131. const value = key === 'size' ? fontSize[node.attr[key] - 1] : node.attr[key];
  132. node.styleStr += `${styleAttrs[key]}: ${value};`;
  133. }
  134. });
  135. }
  136. // 临时记录source资源
  137. if (node.tag === 'source') {
  138. results.source = node.attr.src;
  139. }
  140. // #ifndef MP-BAIDU
  141. if (customHandler.start) {
  142. customHandler.start(node, results);
  143. }
  144. // #endif
  145. if (unary) {
  146. // if this tag doesn't have end tag
  147. // like <img src="hoge.png"/>
  148. // add to parents
  149. const parent = bufArray[0] || results;
  150. if (parent.nodes === undefined) {
  151. parent.nodes = [];
  152. }
  153. parent.nodes.push(node);
  154. } else {
  155. bufArray.unshift(node);
  156. }
  157. },
  158. end(tag) {
  159. const node = bufArray.shift();
  160. if (node.tag !== tag) {
  161. console.error('invalid state: mismatch end tag');
  162. }
  163. if (node.tag === 'video' && results.source) {
  164. node.attr.src = results.source;
  165. delete results.source;
  166. }
  167. if (customHandler.end) {
  168. customHandler.end(node, results);
  169. }
  170. if (bufArray.length === 0) {
  171. results.nodes.push(node);
  172. } else {
  173. const parent = bufArray[0];
  174. if (!parent.nodes) {
  175. parent.nodes = [];
  176. }
  177. parent.nodes.push(node);
  178. }
  179. },
  180. chars(text) {
  181. if (!text.trim()) return;
  182. const node = {
  183. node: 'text',
  184. text,
  185. };
  186. if (customHandler.chars) {
  187. customHandler.chars(node, results);
  188. }
  189. if (bufArray.length === 0) {
  190. results.nodes.push(node);
  191. } else {
  192. const parent = bufArray[0];
  193. if (parent.nodes === undefined) {
  194. parent.nodes = [];
  195. }
  196. parent.nodes.push(node);
  197. }
  198. },
  199. });
  200. return results;
  201. }
  202. export default html2json;