定制 rabbyte/scraper 二次开发

按需修改功能、优化性能、对接业务系统,提供一站式技术支持

邮箱:yvsm@zunyunkeji.com | QQ:316430983 | 微信:yvsm316

rabbyte/scraper

Composer 安装命令:

composer require rabbyte/scraper

包简介

scrape data on iranian e-commerce like Torob, Digikala and Divar

README 文档

README

scraper Divar and Digikala Supermarket _ just for learning purposes

how to scrape?

add this line to composer.json

    "require": {
        "rabbyte/scraper": "dev-master"
    }

then composer install

supported categories Divar for now:

<?php
/**
 * example usage of Divar data scraping
 */
require 'vendor/autoload.php';

use Rabbyte\Scraper\divar\divarApi;

$categories = [
    'stationery',
    "clothing",
    "health-beauty",
    "rhinestones",
    "shoes-belt-bag",
    "childrens-clothing-and-shoe"
];
function scrape($categories, $layerPage, $filterPrice)
{
    if(!empty($categories)) {
        $divar = new divarApi();
        $promises = [];
        foreach ($categories as $category) {
            $asyncCategory = $divar->asyncStruct($category, $layerPage);

            $promises[$category] = $asyncCategory;

        }

        if (isset($promises)) {
            // Run requests concurrently
            $results = $divar->asyncRequest($promises);
            // Process responses
            foreach ($results as $categoryName => $response) {
                if ($response['state'] === 'fulfilled') {

                    $rsp = $response['value']->getBody();
                    $status = $divar->parseExport($filterPrice, $categoryName . "/simple/", $rsp);

                    // next layer date ads
                    if (!$status) {
                        $categories = array_filter($categories, function ($value) use (&$categoryName) {
                            return $value !== $categoryName; // Keeps all elements except $categoryName
                        });
                    }
                } else {
                    echo $categoryName . ": Failed - " . $response['reason'];
                }
            }

            sleep(5);
            $layerPage++;

            scrape($categories, $layerPage, $filterPrice);

        }
    }
}

scrape($categories, 0, 10000000);

supported categories Digikala Supermarket for now:

<?php
/**
 * example usage of digikala supermarket data scraping
 */
require 'vendor/autoload.php';

use Rabbyte\Scraper\digikala\supermarket\spDigikalaApi;

$categories = [
    'oil',
    'chocolate-and-cocoa-products',
    'rice',
    'spaghetti-pasta',
    'sugar',
    'sugar-candy',
    'cereals',
    'bread',
    'types-paste'
];

$digikala = new spDigikalaApi('127.0.0.1:8082');
$promises = [];
foreach ($categories as $category) {
    $asyncCategory = $digikala->asyncStruct($category, 1);

    $promises[$category] = $asyncCategory;

}

$results = $digikala->asyncRequest($promises);
foreach ($results as $categoryName => $response) {
    if ($response['state'] === 'fulfilled') {
        $rsp = (string)$response['value']->getBody();
        $json = json_decode($rsp);
        
        echo $json->data;
        
    }else {
        echo $categoryName . ": Failed - " . $response['reason'];
    }
}

supported categories Torob for now:

<?php
/**
 * example usage of torob data scraping
 */
require 'vendor/autoload.php';

use Rabbyte\Scraper\torob\torobApi;

$categories = [
    'mobile'
];

$brands = [
  'apple', 'xiaomi','samsung'
];

$sort = [
    '', // sort based on 'محبوب ترین'
    'price', // sort based on 'ارزان ترین'
    '-price', // sort based on 'گران ترین'
    '-date' // sort based on 'جدیدترین'
];

$torob = new torobApi('127.0.0.1:8080');
$promises = [];
for ($i=0;$i<count($brands); $i++) {

    $asyncCategory = $torob->asyncStruct($categories[0], $brands[$i], $sort[$i],  2);

    $promises[$brands[$i]] = $asyncCategory;

}

$results = $torob->asyncRequest($promises);
foreach ($results as $categoryName => $response) {
    if ($response['state'] === 'fulfilled') {
        $rsp = (string)$response['value']->getBody();
        $json = json_decode($rsp);

        var_dump($json);

    }else {
        echo $categoryName . ": Failed - " . $response['reason'];
    }
}

统计信息

  • 总下载量: 7
  • 月度下载量: 0
  • 日度下载量: 0
  • 收藏数: 7
  • 点击次数: 0
  • 依赖项目数: 0
  • 推荐数: 0

GitHub 信息

  • Stars: 6
  • Watchers: 1
  • Forks: 0
  • 开发语言: PHP

其他信息

  • 授权协议: Unknown
  • 更新时间: 2025-06-28