'tws', 'url'=>'/tws/'], ['slug'=>'protetor-de-tela-de-vidro-temperado', 'url'=>'/protetor-de-tela-de-vidro-temperado/'], ['slug'=>'protetor-da-tela-do-tablet', 'url'=>'/protetor-da-tela-do-tablet/'], ['slug'=>'maquina-de-corte-de-protetor-de-tela', 'url'=>'/maquina-de-corte-de-protetor-de-tela/'], ['slug'=>'folhas-de-protetor-de-tela-de-hidrogel', 'url'=>'/folhas-de-protetor-de-tela-de-hidrogel/'], ['slug'=>'lightning', 'url'=>'/lightning/'], ['slug'=>'type-c', 'url'=>'/type-c/'], ['slug'=>'micro-usb', 'url'=>'/micro-usb/'], ['slug'=>'fones-de-ouvido-auricular-com-fio', 'url'=>'/fones-de-ouvido-auricular-com-fio/'], ['slug'=>'fones-de-ouvido', 'url'=>'/fones-de-ouvido/'], ['slug'=>'alto-falantes-bluetooth', 'url'=>'/alto-falantes-bluetooth/'], ['slug'=>'carregador-de-parede', 'url'=>'/carregador-de-parede/'], ['slug'=>'produtos-perifericos', 'url'=>'/produtos-perifericos/'], ]; foreach ($urlList as $entry) { try { $category = DB::table('dist_product_category') ->where('slug', $entry['slug']) ->where('dist_id', 3) ->first(); if (!$category) { echo "分类未找到,slug: {$entry['slug']}\n"; continue; } echo "分类 {$category->name} \n"; //continue; if ($entry['slug'] == 'produtos-perifericos') { $detailUrls = ['https://mietubloficial.com.br/produto/mini-impressora-de-pele-para-celular-mtb-pp01/']; } else { $html = file_get_contents($baseUrl . $entry['url']); $listCrawler = new Crawler($html); $detailUrls = $listCrawler->filter('.elementor-shortcode a')->extract(['href']); } foreach ($detailUrls as $detailUrl) { $detailHtml = file_get_contents($detailUrl); $detailCrawler = new Crawler($detailHtml); // 解析基础数据 $title = $detailCrawler->filter('.product_title')->text(); $content = ""; try { $content = $detailCrawler->filter('.elementor-widget-woocommerce-product-content .elementor-widget-container')->html(); } catch (\Exception $e) { $content = ""; } /******************** 新增内容提取逻辑 ​********************/ // 提取 SEO 描述(第一个 p 标签内容) $seo_description = $detailCrawler->filter('.woocommerce-product-details__short-description p:first-child')->text(''); // 提取 SKU(第二个 p 标签中 Modelo: 到
的内容) $sku_html = $detailCrawler->filter('.woocommerce-product-details__short-description p:nth-child(2)')->html(''); $sku = ''; if (strpos($sku_html, 'Modelo:') !== false) { $pos = strpos($sku_html, "<"); if ($pos !== false) { $sku_html = substr($sku_html, 0, $pos); } $sku = str_replace("Modelo: ", "", $sku_html); $sku = trim($sku); } /******************** 新增内容提取结束 ​********************/ echo "处理产品 {$title} \n"; echo '插入产品获取ID完成' . "\n"; // 插入产品获取ID $productId = DB::table('dist_product')->insertGetId([ 'title' => $title, 'content' => $content, // 初始未处理的content 'slug' => '', 'dist_id' => 3, 'enabled' => 1, 'status' => 2, 'seo_description' => $seo_description, 'sku' => $sku, 'category_id' => $category->id, 'created_at' => Carbon::now(), 'updated_at' => Carbon::now(), 'parameters' => json_encode([]) ]); echo '处理content中的图片' . "\n"; if ($content != "") { //去除source标签 $content = preg_replace('/]*>.*?<\/source>/is', '', $content); $content = preg_replace('/]*>/', '', $content); // 处理content中的图片 $dom = new \DOMDocument(); libxml_use_internal_errors(true); $dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); $images = $dom->getElementsByTagName('img'); foreach ($images as $img) { $originalSrc = $img->getAttribute('src'); $absoluteSrc = $originalSrc; // 下载图片 $imageContent = @file_get_contents($absoluteSrc); if ($imageContent === false) { echo "图片下载失败: {$absoluteSrc}\n"; continue; } // 生成唯一文件名 $extension = pathinfo($absoluteSrc, PATHINFO_EXTENSION); $filename = md5(uniqid() . microtime()) . '.' . $extension; // 保存路径 $imageDir = public_path("static/tpl/screen_protector_solutions/product/{$productId}"); if (!File::exists($imageDir)) { File::makeDirectory($imageDir, 0755, true, true); } file_put_contents("{$imageDir}/{$filename}", $imageContent); // 替换为OSS路径 $newSrc = "{$ossHost}/static/tpl/screen_protector_solutions/product/{$productId}/{$filename}"; $img->setAttribute('src', $newSrc); // 新增:移除 srcset 和 data-mce-src 属性 $img->removeAttribute('srcset'); $img->removeAttribute('data-mce-src'); $img->removeAttribute('sizes'); } // 获取处理后的HTML并更新数据库 $processedContent = $dom->saveHTML(); } else { $processedContent = $content; } DB::table('dist_product') ->where('id', $productId) ->update(['content' => $processedContent,'slug'=> $productId]); echo '获取处理后的HTML并更新数据库' . "\n"; echo '处理主图' . "\n"; // 处理主图 $mainImages = $detailCrawler->filter('.woocommerce-product-gallery__wrapper a')->extract(['href']); $imageDirPath = "static/tpl/screen_protector_solutions/product/{$productId}"; $imageDir = public_path($imageDirPath); File::makeDirectory($imageDir, 0755, true, true); foreach ($mainImages as $index => $imgUrl) { echo "处理主图 {$imgUrl} \n"; $extension = pathinfo($imgUrl, PATHINFO_EXTENSION); $filename = "{$index}_" . md5(time()) . ".{$extension}"; file_put_contents("{$imageDir}/{$filename}", file_get_contents($imgUrl)); // 插入数据库 DB::table('dist_product_image')->insert([ 'product_id' => $productId, 'image_url' => "{$ossHost}/{$imageDirPath}/{$filename}", 'order' => 0, 'created_at' => Carbon::now(), 'updated_at' => Carbon::now(), ]); } echo "处理主图完成,下一个产品 \n"; echo "------------------------------------------\n"; } } catch (\Exception $e) { echo "数据采集失败: " . $e->getMessage() . "\n"; continue; } } dd('所有处理完成'); } }