소스 검색

同步采集程序

moshaorui 2 주 전
부모
커밋
7c1366d286
1개의 변경된 파일220개의 추가작업 그리고 0개의 파일을 삭제
  1. 220 0
      app/Console/Commands/SyncBr.php

+ 220 - 0
app/Console/Commands/SyncBr.php

@@ -0,0 +1,220 @@
+<?php
+
+namespace App\Console\Commands;
+
+use Carbon\Carbon;
+use Illuminate\Console\Command;
+use Illuminate\Support\Facades\DB;
+use Illuminate\Support\Facades\File;
+use Symfony\Component\DomCrawler\Crawler;
+
+
+/**
+ * 导入相册内容到产品表
+ * php artisan sync:brsite
+ */
+class SyncBr extends Command
+{
+    /**
+     * The name and signature of the console command.
+     *
+     * @var string
+     */
+    protected $signature = 'sync:brsite';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Command description';
+
+
+    public function handle()
+    {
+
+        //$ossHost = 'https://mietubl-website.oss-accelerate.aliyuncs.com';
+        $ossHost = 'https://mietubl-dev.oss-accelerate.aliyuncs.com';
+        $baseUrl = 'https://mietubloficial.com.br';
+        $urlList = [
+//            ['slug'=>'tws', 'url'=>'/tws/'],
+//            ['slug'=>'protetor-de-tela-de-vidro-temperado', 'url'=>'/protetor-de-tela-de-vidro-temperado/'],
+//            ['slug'=>'protetor-da-tela-do-tablet', 'url'=>'/protetor-da-tela-do-tablet/'],
+//            ['slug'=>'maquina-de-corte-de-protetor-de-tela', 'url'=>'/maquina-de-corte-de-protetor-de-tela/'],
+
+            ['slug'=>'folhas-de-protetor-de-tela-de-hidrogel', 'url'=>'/folhas-de-protetor-de-tela-de-hidrogel/'],
+
+//            ['slug'=>'lightning', 'url'=>'/lightning/'],
+//            ['slug'=>'type-c', 'url'=>'/type-c/'],
+//            ['slug'=>'micro-usb', 'url'=>'/micro-usb/'],
+//            ['slug'=>'fones-de-ouvido-auricular-com-fio', 'url'=>'/fones-de-ouvido-auricular-com-fio/'],
+//            ['slug'=>'fones-de-ouvido', 'url'=>'/fones-de-ouvido/'],
+//            ['slug'=>'alto-falantes-bluetooth', 'url'=>'/alto-falantes-bluetooth/'],
+//            ['slug'=>'carregador-de-parede', 'url'=>'/carregador-de-parede/'],
+//            ['slug'=>'produtos-perifericos', 'url'=>'/produtos-perifericos/'],
+        ];
+
+        foreach ($urlList as $entry) {
+            try {
+                $category = DB::table('dist_product_category')
+                    ->where('slug', $entry['slug'])
+                    ->where('dist_id', 3)
+                    ->first();
+
+                if (!$category) {
+                    echo "分类未找到,slug: {$entry['slug']}\n";
+                    continue;
+                }
+                echo "分类 {$category->name} \n";
+                //continue;
+
+                if ($entry['slug'] == 'produtos-perifericos') {
+                    $detailUrls = ['https://mietubloficial.com.br/produto/mini-impressora-de-pele-para-celular-mtb-pp01/'];
+                } else {
+                    $html = file_get_contents($baseUrl . $entry['url']);
+                    $listCrawler = new Crawler($html);
+                    $detailUrls = $listCrawler->filter('.elementor-shortcode a')->extract(['href']);
+
+                }
+
+                foreach ($detailUrls as $detailUrl) {
+
+                    $detailHtml = file_get_contents($detailUrl);
+                    $detailCrawler = new Crawler($detailHtml);
+
+
+
+                    // 解析基础数据
+                    $title = $detailCrawler->filter('.product_title')->text();
+                    $content = "";
+                    try {
+                        $content = $detailCrawler->filter('.elementor-widget-woocommerce-product-content .elementor-widget-container')->html();
+                    } catch (\Exception $e) {
+                        $content = "";
+                    }
+
+                    /******************** 新增内容提取逻辑 ​********************/
+                    // 提取 SEO 描述(第一个 p 标签内容)
+                    $seo_description = $detailCrawler->filter('.woocommerce-product-details__short-description p:first-child')->text('');
+
+                    // 提取 SKU(第二个 p 标签中 Modelo: 到 <br> 的内容)
+                    $sku_html = $detailCrawler->filter('.woocommerce-product-details__short-description p:nth-child(2)')->html('');
+                    $sku = '';
+                    if (strpos($sku_html, 'Modelo:') !== false) {
+                        $start_pos = strpos($sku_html, 'Modelo:') + 7; // 7 是 "Modelo:" 的长度
+                        $end_pos = strpos($sku_html, '<br>', $start_pos);
+                        $sku = trim(substr($sku_html, $start_pos, $end_pos - $start_pos));
+                    }
+                    /******************** 新增内容提取结束 ​********************/
+
+                    echo "处理产品 {$title} \n";
+
+
+                    echo '插入产品获取ID完成' . "\n";
+                    // 插入产品获取ID
+                    $productId = DB::table('dist_product')->insertGetId([
+                        'title' => $title,
+                        'content' => $content, // 初始未处理的content
+                        'slug' => '',
+                        'dist_id' => 3,
+                        'enabled' => 1,
+                        'status' => 2,
+                        'seo_description' => $seo_description,
+                        'sku' => $sku,
+                        'category_id' => $category->id,
+                        'created_at' => Carbon::now(),
+                        'updated_at' => Carbon::now(),
+                        'parameters' => json_encode([])
+                    ]);
+
+                    echo '处理content中的图片' . "\n";
+                    if ($content != "") {
+                        //去除source标签
+                        $content = preg_replace('/<source\b[^>]*>.*?<\/source>/is', '', $content);
+                        $content = preg_replace('/<source[^>]*>/', '', $content);
+
+
+                        // 处理content中的图片
+                        $dom = new \DOMDocument();
+                        libxml_use_internal_errors(true);
+                        $dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
+                        libxml_clear_errors();
+
+                        $images = $dom->getElementsByTagName('img');
+                        foreach ($images as $img) {
+                            $originalSrc = $img->getAttribute('src');
+                            $absoluteSrc = $originalSrc;
+
+                            // 下载图片
+                            $imageContent = @file_get_contents($absoluteSrc);
+                            if ($imageContent === false) {
+                                echo "图片下载失败: {$absoluteSrc}\n";
+                                continue;
+                            }
+
+                            // 生成唯一文件名
+                            $extension = pathinfo($absoluteSrc, PATHINFO_EXTENSION);
+                            $filename = md5(uniqid() . microtime()) . '.' . $extension;
+
+                            // 保存路径
+                            $imageDir = public_path("static/tpl/screen_protector_solutions/product/{$productId}");
+                            if (!File::exists($imageDir)) {
+                                File::makeDirectory($imageDir, 0755, true, true);
+                            }
+                            file_put_contents("{$imageDir}/{$filename}", $imageContent);
+
+                            // 替换为OSS路径
+                            $newSrc = "{$ossHost}/static/tpl/screen_protector_solutions/product/{$productId}/{$filename}";
+                            $img->setAttribute('src', $newSrc);
+                            // 新增:移除 srcset 和 data-mce-src 属性
+                            $img->removeAttribute('srcset');
+                            $img->removeAttribute('data-mce-src');
+                            $img->removeAttribute('sizes');
+                        }
+
+                        // 获取处理后的HTML并更新数据库
+                        $processedContent = $dom->saveHTML();
+                    } else {
+                        $processedContent = $content;
+                    }
+
+                    DB::table('dist_product')
+                        ->where('id', $productId)
+                        ->update(['content' => $processedContent,'slug'=> $productId]);
+
+                    echo '获取处理后的HTML并更新数据库' . "\n";
+
+                    echo '处理主图' . "\n";
+                    // 处理主图
+                    $mainImages = $detailCrawler->filter('.woocommerce-product-gallery__wrapper a')->extract(['href']);
+                    $imageDirPath = "static/tpl/screen_protector_solutions/product/{$productId}";
+                    $imageDir = public_path($imageDirPath);
+                    File::makeDirectory($imageDir, 0755, true, true);
+
+                    foreach ($mainImages as $index => $imgUrl) {
+                        echo "处理主图 {$imgUrl} \n";
+                        $extension = pathinfo($imgUrl, PATHINFO_EXTENSION);
+                        $filename = "{$index}_" . md5(time()) . ".{$extension}";
+                        file_put_contents("{$imageDir}/{$filename}", file_get_contents($imgUrl));
+
+                        // 插入数据库
+                        DB::table('dist_product_image')->insert([
+                            'product_id' => $productId,
+                            'image_url' => "{$ossHost}/{$imageDirPath}/{$filename}",
+                            'order' => 0,
+                            'created_at' => Carbon::now(),
+                            'updated_at' => Carbon::now(),
+                        ]);
+                    }
+                    echo "处理主图完成,下一个产品 \n";
+                    echo "------------------------------------------\n";
+                }
+            } catch (\Exception $e) {
+                echo "数据采集失败: " . $e->getMessage() . "\n";
+                continue;
+            }
+        }
+        dd('所有处理完成');
+    }
+
+}