定制 arefshojaei/spider 二次开发

按需修改功能、优化性能、对接业务系统,提供一站式技术支持

邮箱:yvsm@zunyunkeji.com | QQ:316430983 | 微信:yvsm316

arefshojaei/spider

最新稳定版本:v2.3.0

Composer 安装命令:

composer require arefshojaei/spider

包简介

PHP web spider

README 文档

README

PHP web spider

<?php

use Spider\Spider;

$spider = new Spider;

$page = $spider->loadHTML("http://google.com");

echo $page->find("title")->text() . PHP_EOL;

$page->findAll("a")->each(function($key, $link) {
    echo "[LINK] " . $link->attr("href") . PHP_EOL;
});

Installation

Using Composer

composer create-project arefshojaei/spider

Using GIT

git clone https://github.com/ArefShojaei/Spider

Find element

  • find()
  • findAll()
$page->find("a");

$page->findAll(".product");

Iterate for each eleemnt

  • each()
  • map()
  • filter()
$page->findAll("a")->each(function($key, $anchor) {
    echo "[LINK] " . $anchor->attr("href") . PHP_EOL;
    echo "[TITLE] " . $anchor->text() . PHP_EOL;
    echo "[HTML] " . $anchor->html() . PHP_EOL;
});

# ----------------------------------------
$anchors = $page->findAll("a")->map(function($key, $anchor) {
    $anchor->attr("data-id", rand());

    return $anchor;
});

var_dump($anchors);

# ----------------------------------------
$filteredAnchors = $page->findAll("a")->filter(function($key, $anchor) => $anchor->attr("data-id")); 

var_dump($filteredAnchors);

Element traversing

  • parent()
  • after()
  • before()
  • append()
  • prepend()
$parentNode = $page->find(".product")->parent();

# Add parent Element
$page->find(".product")->after("<p>After Element</p>");
$page->find(".product")->before("<p>Before Element</p>");

# Add child (local) element
$page->find(".product")->append("<p>Append Element</p>");
$page->find(".product")->prepend("<p>Prepend Element</p>");

Element cleaner

  • empty()
  • remove()
# Clean element content
$page->find("p")->empty();

# Remove element from the DOM
$page->find("p")->remove();

Element content

  • text()
  • html()
# Getter
$text = $page->find("p")->text();
$html = $page->find("p")->html();

# Setter
$newText = $page->find("p")->text("New text content");
$newHtml = $page->find("p")->html("<p id='spider'>New html content</p>");

Element attribute

  • attr()
  • addClass()
  • removeClass()
  • hasClass()
  • addId()
  • removeId()
  • hasId()
# Getter
$attributes = $page->find("a")->attr();

$link = $page->find("a")->attr("href");

# Setter
$page->find("a")->attr("data-id", rand());

# Class
$page->find("p")->addClass("spider");
$page->find("p")->removeClass("spider");
$page->find("p")->hasClass("spider");

# ID
$page->find("p")->addID("spider");
$page->find("p")->removeID("spider");
$page->find("p")->hasID("spider");

Export current page content

$filename = "app";

$path = __DIR__ . "\\html\\" . $filename . rand() . ".html";

$page->export($path);

统计信息

  • 总下载量: 6
  • 月度下载量: 0
  • 日度下载量: 0
  • 收藏数: 0
  • 点击次数: 0
  • 依赖项目数: 0
  • 推荐数: 0

GitHub 信息

  • Stars: 0
  • Watchers: 1
  • Forks: 0
  • 开发语言: PHP

其他信息

  • 授权协议: MIT
  • 更新时间: 2025-03-28