AutoP.php 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. <?php
  2. namespace Utils;
  3. /**
  4. * AutoP
  5. *
  6. * @copyright Copyright (c) 2012 Typecho Team. (http://typecho.org)
  7. * @author Joyqi <magike.net@gmail.com>
  8. * @license GNU General Public License 2.0
  9. */
  10. class AutoP
  11. {
  12. // 作为段落的标签
  13. private const BLOCK = 'p|pre|div|blockquote|form|ul|ol|dd|table|ins|h1|h2|h3|h4|h5|h6';
  14. /**
  15. * 唯一id
  16. *
  17. * @access private
  18. * @var integer
  19. */
  20. private $uniqueId = 0;
  21. /**
  22. * 存储的段落
  23. *
  24. * @access private
  25. * @var array
  26. */
  27. private $blocks = [];
  28. /**
  29. * 替换段落的回调函数
  30. *
  31. * @param array $matches 匹配值
  32. * @return string
  33. */
  34. public function replaceBlockCallback(array $matches): string
  35. {
  36. $tagMatch = '|' . $matches[1] . '|';
  37. $text = $matches[4];
  38. switch (true) {
  39. /** 用br处理换行 */
  40. case false !== strpos(
  41. '|li|dd|dt|td|p|a|span|cite|strong|sup|sub|small|del|u|i|b|ins|h1|h2|h3|h4|h5|h6|',
  42. $tagMatch
  43. ):
  44. $text = nl2br(trim($text));
  45. break;
  46. /** 用段落处理换行 */
  47. case false !== strpos('|div|blockquote|form|', $tagMatch):
  48. $text = $this->cutByBlock($text);
  49. if (false !== strpos($text, '</p><p>')) {
  50. $text = $this->fixParagraph($text);
  51. }
  52. break;
  53. default:
  54. break;
  55. }
  56. /** 没有段落能力的标签 */
  57. if (false !== strpos('|a|span|font|code|cite|strong|sup|sub|small|del|u|i|b|', $tagMatch)) {
  58. $key = '<b' . $matches[2] . '/>';
  59. } else {
  60. $key = '<p' . $matches[2] . '/>';
  61. }
  62. $this->blocks[$key] = "<{$matches[1]}{$matches[3]}>{$text}</{$matches[1]}>";
  63. return $key;
  64. }
  65. /**
  66. * 用段落方法处理换行
  67. *
  68. * @param string $text
  69. * @return string
  70. */
  71. private function cutByBlock(string $text): string
  72. {
  73. $space = "( | )";
  74. $text = str_replace("\r\n", "\n", trim($text));
  75. $text = preg_replace("/{$space}*\n{$space}*/is", "\n", $text);
  76. $text = preg_replace("/\s*<p:([0-9]{4})\/>\s*/is", "</p><p:\\1/><p>", $text);
  77. $text = preg_replace("/\n{2,}/", "</p><p>", $text);
  78. $text = nl2br($text);
  79. $text = preg_replace("/(<p>)?\s*<p:([0-9]{4})\/>\s*(<\/p>)?/is", "<p:\\2/>", $text);
  80. $text = preg_replace("/<p>{$space}*<\/p>/is", '', $text);
  81. $text = preg_replace("/\s*<p>\s*$/is", '', $text);
  82. $text = preg_replace("/^\s*<\/p>\s*/is", '', $text);
  83. return $text;
  84. }
  85. /**
  86. * 修复段落开头和结尾
  87. *
  88. * @param string $text
  89. * @return string
  90. */
  91. private function fixParagraph(string $text): string
  92. {
  93. $text = trim($text);
  94. if (!preg_match("/^<(" . self::BLOCK . ")(\s|>)/i", $text)) {
  95. $text = '<p>' . $text;
  96. }
  97. if (!preg_match("/<\/(" . self::BLOCK . ")>$/i", $text)) {
  98. $text = $text . '</p>';
  99. }
  100. return $text;
  101. }
  102. /**
  103. * 自动分段
  104. *
  105. * @param string $text
  106. * @return string
  107. */
  108. public function parse(string $text): string
  109. {
  110. /** 重置计数器 */
  111. $this->uniqueId = 0;
  112. $this->blocks = [];
  113. /** 将已有的段落后面的换行处理掉 */
  114. $text = preg_replace(["/<\/p>\s+<p(\s*)/is", "/\s*<br\s*\/?>\s*/is"], ["</p><p\\1", "<br />"], trim($text));
  115. /** 将所有非自闭合标签解析为唯一的字符串 */
  116. $foundTagCount = 0;
  117. $textLength = strlen($text);
  118. $uniqueIdList = [];
  119. if (preg_match_all("/<\/\s*([a-z0-9]+)>/is", $text, $matches, PREG_OFFSET_CAPTURE)) {
  120. foreach ($matches[0] as $key => $match) {
  121. $tag = $matches[1][$key][0];
  122. $leftOffset = $match[1] - $textLength;
  123. $posSingle = strrpos($text, '<' . $tag . '>', $leftOffset);
  124. $posFix = strrpos($text, '<' . $tag . ' ', $leftOffset);
  125. $pos = false;
  126. switch (true) {
  127. case (false !== $posSingle && false !== $posFix):
  128. $pos = max($posSingle, $posFix);
  129. break;
  130. case false === $posSingle && false !== $posFix:
  131. $pos = $posFix;
  132. break;
  133. case false !== $posSingle && false === $posFix:
  134. $pos = $posSingle;
  135. break;
  136. default:
  137. break;
  138. }
  139. if (false !== $pos) {
  140. $uniqueId = $this->makeUniqueId();
  141. $uniqueIdList[$uniqueId] = $tag;
  142. $tagLength = strlen($tag);
  143. $text = substr_replace($text, $uniqueId, $pos + 1 + $tagLength, 0);
  144. $text = substr_replace(
  145. $text,
  146. $uniqueId,
  147. $match[1] + 7 + $foundTagCount * 10 + $tagLength,
  148. 0
  149. ); // 7 = 5 + 2
  150. $foundTagCount++;
  151. }
  152. }
  153. }
  154. foreach ($uniqueIdList as $uniqueId => $tag) {
  155. $text = preg_replace_callback(
  156. "/<({$tag})({$uniqueId})([^>]*)>(.*)<\/\\1\\2>/is",
  157. [$this, 'replaceBlockCallback'],
  158. $text,
  159. 1
  160. );
  161. }
  162. $text = $this->cutByBlock($text);
  163. $blocks = array_reverse($this->blocks);
  164. foreach ($blocks as $blockKey => $blockValue) {
  165. $text = str_replace($blockKey, $blockValue, $text);
  166. }
  167. return $this->fixParagraph($text);
  168. }
  169. /**
  170. * 生成唯一的id, 为了速度考虑最多支持1万个tag的处理
  171. *
  172. * @return string
  173. */
  174. private function makeUniqueId(): string
  175. {
  176. return ':' . str_pad($this->uniqueId ++, 4, '0', STR_PAD_LEFT);
  177. }
  178. }