SyncBr.php 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. <?php
  2. namespace App\Console\Commands;
  3. use Carbon\Carbon;
  4. use Illuminate\Console\Command;
  5. use Illuminate\Support\Facades\DB;
  6. use Illuminate\Support\Facades\File;
  7. use Symfony\Component\DomCrawler\Crawler;
  8. /**
  9. * 导入相册内容到产品表
  10. * php artisan sync:brsite
  11. */
  12. class SyncBr extends Command
  13. {
  14. /**
  15. * The name and signature of the console command.
  16. *
  17. * @var string
  18. */
  19. protected $signature = 'sync:brsite';
  20. /**
  21. * The console command description.
  22. *
  23. * @var string
  24. */
  25. protected $description = 'Command description';
  26. public function handle()
  27. {
  28. //$ossHost = 'https://mietubl-website.oss-accelerate.aliyuncs.com';
  29. $ossHost = 'https://mietubl-dev.oss-accelerate.aliyuncs.com';
  30. $baseUrl = 'https://mietubloficial.com.br';
  31. $urlList = [
  32. // ['slug'=>'tws', 'url'=>'/tws/'],
  33. // ['slug'=>'protetor-de-tela-de-vidro-temperado', 'url'=>'/protetor-de-tela-de-vidro-temperado/'],
  34. // ['slug'=>'protetor-da-tela-do-tablet', 'url'=>'/protetor-da-tela-do-tablet/'],
  35. // ['slug'=>'maquina-de-corte-de-protetor-de-tela', 'url'=>'/maquina-de-corte-de-protetor-de-tela/'],
  36. ['slug'=>'folhas-de-protetor-de-tela-de-hidrogel', 'url'=>'/folhas-de-protetor-de-tela-de-hidrogel/'],
  37. // ['slug'=>'lightning', 'url'=>'/lightning/'],
  38. // ['slug'=>'type-c', 'url'=>'/type-c/'],
  39. // ['slug'=>'micro-usb', 'url'=>'/micro-usb/'],
  40. // ['slug'=>'fones-de-ouvido-auricular-com-fio', 'url'=>'/fones-de-ouvido-auricular-com-fio/'],
  41. // ['slug'=>'fones-de-ouvido', 'url'=>'/fones-de-ouvido/'],
  42. // ['slug'=>'alto-falantes-bluetooth', 'url'=>'/alto-falantes-bluetooth/'],
  43. // ['slug'=>'carregador-de-parede', 'url'=>'/carregador-de-parede/'],
  44. // ['slug'=>'produtos-perifericos', 'url'=>'/produtos-perifericos/'],
  45. ];
  46. foreach ($urlList as $entry) {
  47. try {
  48. $category = DB::table('dist_product_category')
  49. ->where('slug', $entry['slug'])
  50. ->where('dist_id', 3)
  51. ->first();
  52. if (!$category) {
  53. echo "分类未找到,slug: {$entry['slug']}\n";
  54. continue;
  55. }
  56. echo "分类 {$category->name} \n";
  57. //continue;
  58. if ($entry['slug'] == 'produtos-perifericos') {
  59. $detailUrls = ['https://mietubloficial.com.br/produto/mini-impressora-de-pele-para-celular-mtb-pp01/'];
  60. } else {
  61. $html = file_get_contents($baseUrl . $entry['url']);
  62. $listCrawler = new Crawler($html);
  63. $detailUrls = $listCrawler->filter('.elementor-shortcode a')->extract(['href']);
  64. }
  65. foreach ($detailUrls as $detailUrl) {
  66. $detailHtml = file_get_contents($detailUrl);
  67. $detailCrawler = new Crawler($detailHtml);
  68. // 解析基础数据
  69. $title = $detailCrawler->filter('.product_title')->text();
  70. $content = "";
  71. try {
  72. $content = $detailCrawler->filter('.elementor-widget-woocommerce-product-content .elementor-widget-container')->html();
  73. } catch (\Exception $e) {
  74. $content = "";
  75. }
  76. /******************** 新增内容提取逻辑 ​********************/
  77. // 提取 SEO 描述(第一个 p 标签内容)
  78. $seo_description = $detailCrawler->filter('.woocommerce-product-details__short-description p:first-child')->text('');
  79. // 提取 SKU(第二个 p 标签中 Modelo: 到 <br> 的内容)
  80. $sku_html = $detailCrawler->filter('.woocommerce-product-details__short-description p:nth-child(2)')->html('');
  81. $sku = '';
  82. if (strpos($sku_html, 'Modelo:') !== false) {
  83. $start_pos = strpos($sku_html, 'Modelo:') + 7; // 7 是 "Modelo:" 的长度
  84. $end_pos = strpos($sku_html, '<br>', $start_pos);
  85. $sku = trim(substr($sku_html, $start_pos, $end_pos - $start_pos));
  86. }
  87. /******************** 新增内容提取结束 ​********************/
  88. echo "处理产品 {$title} \n";
  89. echo '插入产品获取ID完成' . "\n";
  90. // 插入产品获取ID
  91. $productId = DB::table('dist_product')->insertGetId([
  92. 'title' => $title,
  93. 'content' => $content, // 初始未处理的content
  94. 'slug' => '',
  95. 'dist_id' => 3,
  96. 'enabled' => 1,
  97. 'status' => 2,
  98. 'seo_description' => $seo_description,
  99. 'sku' => $sku,
  100. 'category_id' => $category->id,
  101. 'created_at' => Carbon::now(),
  102. 'updated_at' => Carbon::now(),
  103. 'parameters' => json_encode([])
  104. ]);
  105. echo '处理content中的图片' . "\n";
  106. if ($content != "") {
  107. //去除source标签
  108. $content = preg_replace('/<source\b[^>]*>.*?<\/source>/is', '', $content);
  109. $content = preg_replace('/<source[^>]*>/', '', $content);
  110. // 处理content中的图片
  111. $dom = new \DOMDocument();
  112. libxml_use_internal_errors(true);
  113. $dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  114. libxml_clear_errors();
  115. $images = $dom->getElementsByTagName('img');
  116. foreach ($images as $img) {
  117. $originalSrc = $img->getAttribute('src');
  118. $absoluteSrc = $originalSrc;
  119. // 下载图片
  120. $imageContent = @file_get_contents($absoluteSrc);
  121. if ($imageContent === false) {
  122. echo "图片下载失败: {$absoluteSrc}\n";
  123. continue;
  124. }
  125. // 生成唯一文件名
  126. $extension = pathinfo($absoluteSrc, PATHINFO_EXTENSION);
  127. $filename = md5(uniqid() . microtime()) . '.' . $extension;
  128. // 保存路径
  129. $imageDir = public_path("static/tpl/screen_protector_solutions/product/{$productId}");
  130. if (!File::exists($imageDir)) {
  131. File::makeDirectory($imageDir, 0755, true, true);
  132. }
  133. file_put_contents("{$imageDir}/{$filename}", $imageContent);
  134. // 替换为OSS路径
  135. $newSrc = "{$ossHost}/static/tpl/screen_protector_solutions/product/{$productId}/{$filename}";
  136. $img->setAttribute('src', $newSrc);
  137. // 新增:移除 srcset 和 data-mce-src 属性
  138. $img->removeAttribute('srcset');
  139. $img->removeAttribute('data-mce-src');
  140. $img->removeAttribute('sizes');
  141. }
  142. // 获取处理后的HTML并更新数据库
  143. $processedContent = $dom->saveHTML();
  144. } else {
  145. $processedContent = $content;
  146. }
  147. DB::table('dist_product')
  148. ->where('id', $productId)
  149. ->update(['content' => $processedContent,'slug'=> $productId]);
  150. echo '获取处理后的HTML并更新数据库' . "\n";
  151. echo '处理主图' . "\n";
  152. // 处理主图
  153. $mainImages = $detailCrawler->filter('.woocommerce-product-gallery__wrapper a')->extract(['href']);
  154. $imageDirPath = "static/tpl/screen_protector_solutions/product/{$productId}";
  155. $imageDir = public_path($imageDirPath);
  156. File::makeDirectory($imageDir, 0755, true, true);
  157. foreach ($mainImages as $index => $imgUrl) {
  158. echo "处理主图 {$imgUrl} \n";
  159. $extension = pathinfo($imgUrl, PATHINFO_EXTENSION);
  160. $filename = "{$index}_" . md5(time()) . ".{$extension}";
  161. file_put_contents("{$imageDir}/{$filename}", file_get_contents($imgUrl));
  162. // 插入数据库
  163. DB::table('dist_product_image')->insert([
  164. 'product_id' => $productId,
  165. 'image_url' => "{$ossHost}/{$imageDirPath}/{$filename}",
  166. 'order' => 0,
  167. 'created_at' => Carbon::now(),
  168. 'updated_at' => Carbon::now(),
  169. ]);
  170. }
  171. echo "处理主图完成,下一个产品 \n";
  172. echo "------------------------------------------\n";
  173. }
  174. } catch (\Exception $e) {
  175. echo "数据采集失败: " . $e->getMessage() . "\n";
  176. continue;
  177. }
  178. }
  179. dd('所有处理完成');
  180. }
  181. }