SyncBr.php 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. <?php
  2. namespace App\Console\Commands;
  3. use Carbon\Carbon;
  4. use Illuminate\Console\Command;
  5. use Illuminate\Support\Facades\DB;
  6. use Illuminate\Support\Facades\File;
  7. use Symfony\Component\DomCrawler\Crawler;
  8. /**
  9. * 导入相册内容到产品表
  10. * php artisan sync:brsite
  11. */
  12. class SyncBr extends Command
  13. {
  14. /**
  15. * The name and signature of the console command.
  16. *
  17. * @var string
  18. */
  19. protected $signature = 'sync:brsite';
  20. /**
  21. * The console command description.
  22. *
  23. * @var string
  24. */
  25. protected $description = 'Command description';
  26. public function handle()
  27. {
  28. $ossHost = 'https://mietubl-website.oss-accelerate.aliyuncs.com';
  29. //$ossHost = 'https://mietubl-dev.oss-accelerate.aliyuncs.com';
  30. $baseUrl = 'https://mietubloficial.com.br';
  31. $urlList = [
  32. ['slug'=>'tws', 'url'=>'/tws/'],
  33. ['slug'=>'protetor-de-tela-de-vidro-temperado', 'url'=>'/protetor-de-tela-de-vidro-temperado/'],
  34. ['slug'=>'protetor-da-tela-do-tablet', 'url'=>'/protetor-da-tela-do-tablet/'],
  35. ['slug'=>'maquina-de-corte-de-protetor-de-tela', 'url'=>'/maquina-de-corte-de-protetor-de-tela/'],
  36. ['slug'=>'folhas-de-protetor-de-tela-de-hidrogel', 'url'=>'/folhas-de-protetor-de-tela-de-hidrogel/'],
  37. ['slug'=>'lightning', 'url'=>'/lightning/'],
  38. ['slug'=>'type-c', 'url'=>'/type-c/'],
  39. ['slug'=>'micro-usb', 'url'=>'/micro-usb/'],
  40. ['slug'=>'fones-de-ouvido-auricular-com-fio', 'url'=>'/fones-de-ouvido-auricular-com-fio/'],
  41. ['slug'=>'fones-de-ouvido', 'url'=>'/fones-de-ouvido/'],
  42. ['slug'=>'alto-falantes-bluetooth', 'url'=>'/alto-falantes-bluetooth/'],
  43. ['slug'=>'carregador-de-parede', 'url'=>'/carregador-de-parede/'],
  44. ['slug'=>'produtos-perifericos', 'url'=>'/produtos-perifericos/'],
  45. ];
  46. foreach ($urlList as $entry) {
  47. try {
  48. $category = DB::table('dist_product_category')
  49. ->where('slug', $entry['slug'])
  50. ->where('dist_id', 3)
  51. ->first();
  52. if (!$category) {
  53. echo "分类未找到,slug: {$entry['slug']}\n";
  54. continue;
  55. }
  56. echo "分类 {$category->name} \n";
  57. //continue;
  58. if ($entry['slug'] == 'produtos-perifericos') {
  59. $detailUrls = ['https://mietubloficial.com.br/produto/mini-impressora-de-pele-para-celular-mtb-pp01/'];
  60. } else {
  61. $html = file_get_contents($baseUrl . $entry['url']);
  62. $listCrawler = new Crawler($html);
  63. $detailUrls = $listCrawler->filter('.elementor-shortcode a')->extract(['href']);
  64. }
  65. foreach ($detailUrls as $detailUrl) {
  66. $detailHtml = file_get_contents($detailUrl);
  67. $detailCrawler = new Crawler($detailHtml);
  68. // 解析基础数据
  69. $title = $detailCrawler->filter('.product_title')->text();
  70. $content = "";
  71. try {
  72. $content = $detailCrawler->filter('.elementor-widget-woocommerce-product-content .elementor-widget-container')->html();
  73. } catch (\Exception $e) {
  74. $content = "";
  75. }
  76. /******************** 新增内容提取逻辑 ​********************/
  77. // 提取 SEO 描述(第一个 p 标签内容)
  78. $seo_description = $detailCrawler->filter('.woocommerce-product-details__short-description p:first-child')->text('');
  79. // 提取 SKU(第二个 p 标签中 Modelo: 到 <br> 的内容)
  80. $sku_html = $detailCrawler->filter('.woocommerce-product-details__short-description p:nth-child(2)')->html('');
  81. $sku = '';
  82. if (strpos($sku_html, 'Modelo:') !== false) {
  83. $pos = strpos($sku_html, "<");
  84. if ($pos !== false) {
  85. $sku_html = substr($sku_html, 0, $pos);
  86. }
  87. $sku = str_replace("Modelo: ", "", $sku_html);
  88. $sku = trim($sku);
  89. }
  90. /******************** 新增内容提取结束 ​********************/
  91. echo "处理产品 {$title} \n";
  92. echo '插入产品获取ID完成' . "\n";
  93. // 插入产品获取ID
  94. $productId = DB::table('dist_product')->insertGetId([
  95. 'title' => $title,
  96. 'content' => $content, // 初始未处理的content
  97. 'slug' => '',
  98. 'dist_id' => 3,
  99. 'enabled' => 1,
  100. 'status' => 2,
  101. 'seo_description' => $seo_description,
  102. 'sku' => $sku,
  103. 'category_id' => $category->id,
  104. 'created_at' => Carbon::now(),
  105. 'updated_at' => Carbon::now(),
  106. 'parameters' => json_encode([])
  107. ]);
  108. echo '处理content中的图片' . "\n";
  109. if ($content != "") {
  110. //去除source标签
  111. $content = preg_replace('/<source\b[^>]*>.*?<\/source>/is', '', $content);
  112. $content = preg_replace('/<source[^>]*>/', '', $content);
  113. // 处理content中的图片
  114. $dom = new \DOMDocument();
  115. libxml_use_internal_errors(true);
  116. $dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  117. libxml_clear_errors();
  118. $images = $dom->getElementsByTagName('img');
  119. foreach ($images as $img) {
  120. $originalSrc = $img->getAttribute('src');
  121. $absoluteSrc = $originalSrc;
  122. // 下载图片
  123. $imageContent = @file_get_contents($absoluteSrc);
  124. if ($imageContent === false) {
  125. echo "图片下载失败: {$absoluteSrc}\n";
  126. continue;
  127. }
  128. // 生成唯一文件名
  129. $extension = pathinfo($absoluteSrc, PATHINFO_EXTENSION);
  130. $filename = md5(uniqid() . microtime()) . '.' . $extension;
  131. // 保存路径
  132. $imageDir = public_path("static/tpl/screen_protector_solutions/product/{$productId}");
  133. if (!File::exists($imageDir)) {
  134. File::makeDirectory($imageDir, 0755, true, true);
  135. }
  136. file_put_contents("{$imageDir}/{$filename}", $imageContent);
  137. // 替换为OSS路径
  138. $newSrc = "{$ossHost}/static/tpl/screen_protector_solutions/product/{$productId}/{$filename}";
  139. $img->setAttribute('src', $newSrc);
  140. // 新增:移除 srcset 和 data-mce-src 属性
  141. $img->removeAttribute('srcset');
  142. $img->removeAttribute('data-mce-src');
  143. $img->removeAttribute('sizes');
  144. }
  145. // 获取处理后的HTML并更新数据库
  146. $processedContent = $dom->saveHTML();
  147. } else {
  148. $processedContent = $content;
  149. }
  150. DB::table('dist_product')
  151. ->where('id', $productId)
  152. ->update(['content' => $processedContent,'slug'=> $productId]);
  153. echo '获取处理后的HTML并更新数据库' . "\n";
  154. echo '处理主图' . "\n";
  155. // 处理主图
  156. $mainImages = $detailCrawler->filter('.woocommerce-product-gallery__wrapper a')->extract(['href']);
  157. $imageDirPath = "static/tpl/screen_protector_solutions/product/{$productId}";
  158. $imageDir = public_path($imageDirPath);
  159. File::makeDirectory($imageDir, 0755, true, true);
  160. foreach ($mainImages as $index => $imgUrl) {
  161. echo "处理主图 {$imgUrl} \n";
  162. $extension = pathinfo($imgUrl, PATHINFO_EXTENSION);
  163. $filename = "{$index}_" . md5(time()) . ".{$extension}";
  164. file_put_contents("{$imageDir}/{$filename}", file_get_contents($imgUrl));
  165. // 插入数据库
  166. DB::table('dist_product_image')->insert([
  167. 'product_id' => $productId,
  168. 'image_url' => "{$ossHost}/{$imageDirPath}/{$filename}",
  169. 'order' => 0,
  170. 'created_at' => Carbon::now(),
  171. 'updated_at' => Carbon::now(),
  172. ]);
  173. }
  174. echo "处理主图完成,下一个产品 \n";
  175. echo "------------------------------------------\n";
  176. }
  177. } catch (\Exception $e) {
  178. echo "数据采集失败: " . $e->getMessage() . "\n";
  179. continue;
  180. }
  181. }
  182. dd('所有处理完成');
  183. }
  184. }