'tws', 'url'=>'/tws/'],
// ['slug'=>'protetor-de-tela-de-vidro-temperado', 'url'=>'/protetor-de-tela-de-vidro-temperado/'],
// ['slug'=>'protetor-da-tela-do-tablet', 'url'=>'/protetor-da-tela-do-tablet/'],
// ['slug'=>'maquina-de-corte-de-protetor-de-tela', 'url'=>'/maquina-de-corte-de-protetor-de-tela/'],
['slug'=>'folhas-de-protetor-de-tela-de-hidrogel', 'url'=>'/folhas-de-protetor-de-tela-de-hidrogel/'],
// ['slug'=>'lightning', 'url'=>'/lightning/'],
// ['slug'=>'type-c', 'url'=>'/type-c/'],
// ['slug'=>'micro-usb', 'url'=>'/micro-usb/'],
// ['slug'=>'fones-de-ouvido-auricular-com-fio', 'url'=>'/fones-de-ouvido-auricular-com-fio/'],
// ['slug'=>'fones-de-ouvido', 'url'=>'/fones-de-ouvido/'],
// ['slug'=>'alto-falantes-bluetooth', 'url'=>'/alto-falantes-bluetooth/'],
// ['slug'=>'carregador-de-parede', 'url'=>'/carregador-de-parede/'],
// ['slug'=>'produtos-perifericos', 'url'=>'/produtos-perifericos/'],
];
foreach ($urlList as $entry) {
try {
$category = DB::table('dist_product_category')
->where('slug', $entry['slug'])
->where('dist_id', 3)
->first();
if (!$category) {
echo "分类未找到,slug: {$entry['slug']}\n";
continue;
}
echo "分类 {$category->name} \n";
//continue;
if ($entry['slug'] == 'produtos-perifericos') {
$detailUrls = ['https://mietubloficial.com.br/produto/mini-impressora-de-pele-para-celular-mtb-pp01/'];
} else {
$html = file_get_contents($baseUrl . $entry['url']);
$listCrawler = new Crawler($html);
$detailUrls = $listCrawler->filter('.elementor-shortcode a')->extract(['href']);
}
foreach ($detailUrls as $detailUrl) {
$detailHtml = file_get_contents($detailUrl);
$detailCrawler = new Crawler($detailHtml);
// 解析基础数据
$title = $detailCrawler->filter('.product_title')->text();
$content = "";
try {
$content = $detailCrawler->filter('.elementor-widget-woocommerce-product-content .elementor-widget-container')->html();
} catch (\Exception $e) {
$content = "";
}
/******************** 新增内容提取逻辑 ********************/
// 提取 SEO 描述(第一个 p 标签内容)
$seo_description = $detailCrawler->filter('.woocommerce-product-details__short-description p:first-child')->text('');
// 提取 SKU(第二个 p 标签中 Modelo: 到
的内容)
$sku_html = $detailCrawler->filter('.woocommerce-product-details__short-description p:nth-child(2)')->html('');
$sku = '';
if (strpos($sku_html, 'Modelo:') !== false) {
$start_pos = strpos($sku_html, 'Modelo:') + 7; // 7 是 "Modelo:" 的长度
$end_pos = strpos($sku_html, '
', $start_pos);
$sku = trim(substr($sku_html, $start_pos, $end_pos - $start_pos));
}
/******************** 新增内容提取结束 ********************/
echo "处理产品 {$title} \n";
echo '插入产品获取ID完成' . "\n";
// 插入产品获取ID
$productId = DB::table('dist_product')->insertGetId([
'title' => $title,
'content' => $content, // 初始未处理的content
'slug' => '',
'dist_id' => 3,
'enabled' => 1,
'status' => 2,
'seo_description' => $seo_description,
'sku' => $sku,
'category_id' => $category->id,
'created_at' => Carbon::now(),
'updated_at' => Carbon::now(),
'parameters' => json_encode([])
]);
echo '处理content中的图片' . "\n";
if ($content != "") {
//去除source标签
$content = preg_replace('/]*>.*?<\/source>/is', '', $content);
$content = preg_replace('/]*>/', '', $content);
// 处理content中的图片
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
$images = $dom->getElementsByTagName('img');
foreach ($images as $img) {
$originalSrc = $img->getAttribute('src');
$absoluteSrc = $originalSrc;
// 下载图片
$imageContent = @file_get_contents($absoluteSrc);
if ($imageContent === false) {
echo "图片下载失败: {$absoluteSrc}\n";
continue;
}
// 生成唯一文件名
$extension = pathinfo($absoluteSrc, PATHINFO_EXTENSION);
$filename = md5(uniqid() . microtime()) . '.' . $extension;
// 保存路径
$imageDir = public_path("static/tpl/screen_protector_solutions/product/{$productId}");
if (!File::exists($imageDir)) {
File::makeDirectory($imageDir, 0755, true, true);
}
file_put_contents("{$imageDir}/{$filename}", $imageContent);
// 替换为OSS路径
$newSrc = "{$ossHost}/static/tpl/screen_protector_solutions/product/{$productId}/{$filename}";
$img->setAttribute('src', $newSrc);
// 新增:移除 srcset 和 data-mce-src 属性
$img->removeAttribute('srcset');
$img->removeAttribute('data-mce-src');
$img->removeAttribute('sizes');
}
// 获取处理后的HTML并更新数据库
$processedContent = $dom->saveHTML();
} else {
$processedContent = $content;
}
DB::table('dist_product')
->where('id', $productId)
->update(['content' => $processedContent,'slug'=> $productId]);
echo '获取处理后的HTML并更新数据库' . "\n";
echo '处理主图' . "\n";
// 处理主图
$mainImages = $detailCrawler->filter('.woocommerce-product-gallery__wrapper a')->extract(['href']);
$imageDirPath = "static/tpl/screen_protector_solutions/product/{$productId}";
$imageDir = public_path($imageDirPath);
File::makeDirectory($imageDir, 0755, true, true);
foreach ($mainImages as $index => $imgUrl) {
echo "处理主图 {$imgUrl} \n";
$extension = pathinfo($imgUrl, PATHINFO_EXTENSION);
$filename = "{$index}_" . md5(time()) . ".{$extension}";
file_put_contents("{$imageDir}/{$filename}", file_get_contents($imgUrl));
// 插入数据库
DB::table('dist_product_image')->insert([
'product_id' => $productId,
'image_url' => "{$ossHost}/{$imageDirPath}/{$filename}",
'order' => 0,
'created_at' => Carbon::now(),
'updated_at' => Carbon::now(),
]);
}
echo "处理主图完成,下一个产品 \n";
echo "------------------------------------------\n";
}
} catch (\Exception $e) {
echo "数据采集失败: " . $e->getMessage() . "\n";
continue;
}
}
dd('所有处理完成');
}
}