SyncBr.php 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. <?php
  2. namespace App\Console\Commands;
  3. use Carbon\Carbon;
  4. use Illuminate\Console\Command;
  5. use Illuminate\Support\Facades\DB;
  6. use Illuminate\Support\Facades\File;
  7. use Symfony\Component\DomCrawler\Crawler;
  8. /**
  9. * 导入相册内容到产品表
  10. * php artisan sync:brsite
  11. */
  12. class SyncBr extends Command
  13. {
  14. /**
  15. * The name and signature of the console command.
  16. *
  17. * @var string
  18. */
  19. protected $signature = 'sync:brsite';
  20. /**
  21. * The console command description.
  22. *
  23. * @var string
  24. */
  25. protected $description = 'Command description';
  26. public function handle()
  27. {
  28. exit;
  29. $ossHost = 'https://mietubl-website.oss-accelerate.aliyuncs.com';
  30. //$ossHost = 'https://mietubl-dev.oss-accelerate.aliyuncs.com';
  31. $baseUrl = 'https://mietubloficial.com.br';
  32. $urlList = [
  33. ['slug'=>'tws', 'url'=>'/tws/'],
  34. ['slug'=>'protetor-de-tela-de-vidro-temperado', 'url'=>'/protetor-de-tela-de-vidro-temperado/'],
  35. ['slug'=>'protetor-da-tela-do-tablet', 'url'=>'/protetor-da-tela-do-tablet/'],
  36. ['slug'=>'maquina-de-corte-de-protetor-de-tela', 'url'=>'/maquina-de-corte-de-protetor-de-tela/'],
  37. ['slug'=>'folhas-de-protetor-de-tela-de-hidrogel', 'url'=>'/folhas-de-protetor-de-tela-de-hidrogel/'],
  38. ['slug'=>'lightning', 'url'=>'/lightning/'],
  39. ['slug'=>'type-c', 'url'=>'/type-c/'],
  40. ['slug'=>'micro-usb', 'url'=>'/micro-usb/'],
  41. ['slug'=>'fones-de-ouvido-auricular-com-fio', 'url'=>'/fones-de-ouvido-auricular-com-fio/'],
  42. ['slug'=>'fones-de-ouvido', 'url'=>'/fones-de-ouvido/'],
  43. ['slug'=>'alto-falantes-bluetooth', 'url'=>'/alto-falantes-bluetooth/'],
  44. ['slug'=>'carregador-de-parede', 'url'=>'/carregador-de-parede/'],
  45. ['slug'=>'produtos-perifericos', 'url'=>'/produtos-perifericos/'],
  46. ];
  47. foreach ($urlList as $entry) {
  48. try {
  49. $category = DB::table('dist_product_category')
  50. ->where('slug', $entry['slug'])
  51. ->where('dist_id', 3)
  52. ->first();
  53. if (!$category) {
  54. echo "分类未找到,slug: {$entry['slug']}\n";
  55. continue;
  56. }
  57. echo "分类 {$category->name} \n";
  58. //continue;
  59. if ($entry['slug'] == 'produtos-perifericos') {
  60. $detailUrls = ['https://mietubloficial.com.br/produto/mini-impressora-de-pele-para-celular-mtb-pp01/'];
  61. } else {
  62. $html = file_get_contents($baseUrl . $entry['url']);
  63. $listCrawler = new Crawler($html);
  64. $detailUrls = $listCrawler->filter('.elementor-shortcode a')->extract(['href']);
  65. }
  66. foreach ($detailUrls as $detailUrl) {
  67. $detailHtml = file_get_contents($detailUrl);
  68. $detailCrawler = new Crawler($detailHtml);
  69. // 解析基础数据
  70. $title = $detailCrawler->filter('.product_title')->text();
  71. $content = "";
  72. try {
  73. $content = $detailCrawler->filter('.elementor-widget-woocommerce-product-content .elementor-widget-container')->html();
  74. } catch (\Exception $e) {
  75. $content = "";
  76. }
  77. /******************** 新增内容提取逻辑 ​********************/
  78. // 提取 SEO 描述(第一个 p 标签内容)
  79. $seo_description = $detailCrawler->filter('.woocommerce-product-details__short-description p:first-child')->text('');
  80. // 提取 SKU(第二个 p 标签中 Modelo: 到 <br> 的内容)
  81. $sku_html = $detailCrawler->filter('.woocommerce-product-details__short-description p:nth-child(2)')->html('');
  82. $sku = '';
  83. if (strpos($sku_html, 'Modelo:') !== false) {
  84. $pos = strpos($sku_html, "<");
  85. if ($pos !== false) {
  86. $sku_html = substr($sku_html, 0, $pos);
  87. }
  88. $sku = str_replace("Modelo: ", "", $sku_html);
  89. $sku = trim($sku);
  90. }
  91. /******************** 新增内容提取结束 ​********************/
  92. echo "处理产品 {$title} \n";
  93. echo '插入产品获取ID完成' . "\n";
  94. // 插入产品获取ID
  95. $productId = DB::table('dist_product')->insertGetId([
  96. 'title' => $title,
  97. 'content' => $content, // 初始未处理的content
  98. 'slug' => '',
  99. 'dist_id' => 3,
  100. 'enabled' => 1,
  101. 'status' => 2,
  102. 'seo_description' => $seo_description,
  103. 'sku' => $sku,
  104. 'category_id' => $category->id,
  105. 'created_at' => Carbon::now(),
  106. 'updated_at' => Carbon::now(),
  107. 'parameters' => json_encode([])
  108. ]);
  109. echo '处理content中的图片' . "\n";
  110. if ($content != "") {
  111. //去除source标签
  112. $content = preg_replace('/<source\b[^>]*>.*?<\/source>/is', '', $content);
  113. $content = preg_replace('/<source[^>]*>/', '', $content);
  114. // 处理content中的图片
  115. $dom = new \DOMDocument();
  116. libxml_use_internal_errors(true);
  117. $dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  118. libxml_clear_errors();
  119. $images = $dom->getElementsByTagName('img');
  120. foreach ($images as $img) {
  121. $originalSrc = $img->getAttribute('src');
  122. $absoluteSrc = $originalSrc;
  123. // 下载图片
  124. $imageContent = @file_get_contents($absoluteSrc);
  125. if ($imageContent === false) {
  126. echo "图片下载失败: {$absoluteSrc}\n";
  127. continue;
  128. }
  129. // 生成唯一文件名
  130. $extension = pathinfo($absoluteSrc, PATHINFO_EXTENSION);
  131. $filename = md5(uniqid() . microtime()) . '.' . $extension;
  132. // 保存路径
  133. $imageDir = public_path("static/tpl/screen_protector_solutions/product/{$productId}");
  134. if (!File::exists($imageDir)) {
  135. File::makeDirectory($imageDir, 0755, true, true);
  136. }
  137. file_put_contents("{$imageDir}/{$filename}", $imageContent);
  138. // 替换为OSS路径
  139. $newSrc = "{$ossHost}/static/tpl/screen_protector_solutions/product/{$productId}/{$filename}";
  140. $img->setAttribute('src', $newSrc);
  141. // 新增:移除 srcset 和 data-mce-src 属性
  142. $img->removeAttribute('srcset');
  143. $img->removeAttribute('data-mce-src');
  144. $img->removeAttribute('sizes');
  145. }
  146. // 获取处理后的HTML并更新数据库
  147. $processedContent = $dom->saveHTML();
  148. } else {
  149. $processedContent = $content;
  150. }
  151. DB::table('dist_product')
  152. ->where('id', $productId)
  153. ->update(['content' => $processedContent,'slug'=> $productId]);
  154. echo '获取处理后的HTML并更新数据库' . "\n";
  155. echo '处理主图' . "\n";
  156. // 处理主图
  157. $mainImages = $detailCrawler->filter('.woocommerce-product-gallery__wrapper a')->extract(['href']);
  158. $imageDirPath = "static/tpl/screen_protector_solutions/product/{$productId}";
  159. $imageDir = public_path($imageDirPath);
  160. File::makeDirectory($imageDir, 0755, true, true);
  161. foreach ($mainImages as $index => $imgUrl) {
  162. echo "处理主图 {$imgUrl} \n";
  163. $extension = pathinfo($imgUrl, PATHINFO_EXTENSION);
  164. $filename = "{$index}_" . md5(time()) . ".{$extension}";
  165. file_put_contents("{$imageDir}/{$filename}", file_get_contents($imgUrl));
  166. // 插入数据库
  167. DB::table('dist_product_image')->insert([
  168. 'product_id' => $productId,
  169. 'image_url' => "{$ossHost}/{$imageDirPath}/{$filename}",
  170. 'order' => 0,
  171. 'created_at' => Carbon::now(),
  172. 'updated_at' => Carbon::now(),
  173. ]);
  174. }
  175. echo "处理主图完成,下一个产品 \n";
  176. echo "------------------------------------------\n";
  177. }
  178. } catch (\Exception $e) {
  179. echo "数据采集失败: " . $e->getMessage() . "\n";
  180. continue;
  181. }
  182. }
  183. dd('所有处理完成');
  184. }
  185. }