2017 © Pedro Peláez
 

library simple-crawler

LenonLeite Component - Simple Crawler, for read pages

image

lenonleite/simple-crawler

LenonLeite Component - Simple Crawler, for read pages

  • Friday, November 17, 2017
  • by lenonleite
  • Repository
  • 5 Watchers
  • 15 Stars
  • 13 Installations
  • PHP
  • 0 Dependents
  • 0 Suggesters
  • 2 Forks
  • 0 Open issues
  • 5 Versions
  • 0 % Grown

The README.md

Single Crawler

Single Crawler is a single method of crawller sites., (*1)

Beta Version

0.4, (*2)

New Features

  • PHP / Methods
  • Html
  • Form

Usage

Get Tags In Html

use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$result = $general->get_tags('div', $html );

Result, (*3)

array(4) {
  [0]=>
  string(29) "

<

div id="header" class="all">"
  [1]=>
  string(18) "

<

div id="content">"
  [2]=>
  string(32) "

<

div id="sidebar" class="right">"
  [3]=>
  string(17) "

<

div id='footer'>"
}

General Get Atribute Tag

use Lenonleite\SimpleCrawler;
$html = '

<

div id="header" class="all">';
$general = new SimpleCrawler\General();
$result = $general->get_atribute_tag( $html );

Result, (*4)

array(3) {
  ["full"]=>
  string(29) "

<

div id="header" class="all">"
  ["key"]=>
  string(3) "div"
  ["value"]=>
  string(23) "id="header" class="all""
}

General Get Atribute Tag In Array

use Lenonleite\SimpleCrawler;
$html[] = '

<

div id="header" class="all">';
$html[] = '

<

div id="content">';
$general = new SimpleCrawler\General();
$result = $general->get_attributes_array_tags( $html );

Result, (*5)

array(2) {
  [0]=>
  array(3) {
    ["full"]=>
    string(29) "

<

div id="header" class="all">"
    ["key"]=>
    string(3) "div"
    ["value"]=>
    string(23) "id="header" class="all""
  }
  [1]=>
  array(3) {
    ["full"]=>
    string(18) "

<

div id="content">"
    ["key"]=>
    string(3) "div"
    ["value"]=>
    string(12) "id="content""
  }
}

General Get Atribute Tag In Array

use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$result = $general->get_data_tags( 'div', $html );

Result, (*6)

array(3) {
  ["tags"]=>
  array(4) {
    [0]=>
    string(29) "

<

div id="header" class="all">"
    [1]=>
    string(18) "

<

div id="content">"
    [2]=>
    string(32) "

<

div id="sidebar" class="right">"
    [3]=>
    string(17) "

<

div id='footer'>"
  }
 ["html"]=>
   string(322) "<html>...</html>"
 ["tags_atributes"]=>
   array(4) {
     [0]=>
     array(3) {
       ["full"]=>
       string(29) "

<

div id="header" class="all">"
       ["key"]=>
       string(3) "div"
       ["value"]=>
       string(23) "id="header" class="all""
     }
     [1]=>
     array(3) {
       ["full"]=>
       string(18) "

<

div id="content">"
       ["key"]=>
       string(3) "div"
       ["value"]=>
       string(12) "id="content""
     }
     [2]=>
     array(3) {
       ["full"]=>
       string(32) "

<

div id="sidebar" class="right">"
       ["key"]=>
       string(3) "div"
       ["value"]=>
       string(26) "id="sidebar" class="right""
     }
     [3]=>
     array(3) {
       ["full"]=>
       string(17) "

<

div id='footer'>"
       ["key"]=>
       string(3) "div"
       ["value"]=>
       string(11) "id='footer'"
     }
   }
 }

General Get Html betwenn Tags By Tag/Attribute/Value

use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag       = 'div';
$attribute = 'id';
$value     = 'sidebar';
$result = $general->get_html_between_tag_attr_and_value( $html, $tag, $attribute, $value );

Result, (*7)

array(1) {
  [0]=>
  string(51) "



"
}

General Get Html betwenn Tags By Tag/Value Id or Class

use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag    = 'div';
$value  = 'internal';
$result = $general->get_html_between_tag_attr_and_value( $html, $tag, $value );

Result, (*8)

array(2) {
  [0]=>
  string(64) "



"
  [1]=>
  string(60) "



"
}

General Get Html betwenn Tags By Tag

use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag    = 'div';
$result = $general->get_html_between_tag( $html, $tag );

Result, (*9)

array(4) {
  [0]=>
  string(64) "



"
  [1]=>
  string(51) "

Center right, (*10)

" [2]=> string(60) " " [3]=> string(25) " " }

General Get html between tag by tag and class or id

```php use Lenonleite\SimpleCrawler; $html = file_get_contents( 'teste.html' ); $general = new SimpleCrawler\General(); $tag = 'div'; $name_class_or_id = 'sidebar'; $result = $general->get_html_between_tag_attr_id_or_class( $html, $tag, $name_class_or_id );, (*11)

*Result*
```php
array(1) {
  [0]=>
  string(60) "



"

General Get on parts os structure tags

use Lenonleite\SimpleCrawler;
$general = new SimpleCrawler\General();
$tag = '<div id="header" class="all">';
$result = $general->get_attribute_tag( $tag );

Result, (*12)

array(3) {
  ["full"]=>
  string(29) "<div id="header" class="all">"
  ["key"]=>
  string(3) "div"
  ["value"]=>
  string(23) "id="header" class="all""
}

PHP / Methods Get data of Methods on Html.

use Lenonleite\SimpleCrawler;
$html_php = file_get_contents( 'teste_php_methods.html' );
$php = new SimpleCrawler\Php\Methods();
$result = $php->get_parameters( $html_php );

Result, (*13)

array(3) {
  [0]=>
  array(6) {
    ["type_methdd"]=>
    string(6) "public"
    ["static"]=>
    string(0) ""
    ["name_method"]=>
    string(6) " error"
    ["atributes"]=>
    array(1) {
      [0]=>
      string(8) "$message"
    }
    ["internal_context"]=>
    string(87) "
$this->CleanUp();
if (!isset($this->info['error'])) {
$this->info['error'] = array();
"
    ["all_context"]=>
    string(121) "public function error($message) {
$this->CleanUp();
if (!isset($this->info['error'])) {
$this->info['error'] = array();
}"
  }
  [1]=>
  array(6) {
    ["type_methdd"]=>
    string(0) ""
    ["static"]=>
    string(0) ""
    ["name_method"]=>
    string(8) " warning"
    ["atributes"]=>
    array(1) {
      [0]=>
      string(8) "$message"
    }
    ["internal_context"]=>
    string(51) "
$this->info['warning'][] = $message;
return true;
"
    ["all_context"]=>
    string(81) "
function warning($message) {
$this->info['warning'][] = $message;
return true;
}"
  }
  [2]=>
  array(6) {
    ["type_methdd"]=>
    string(7) "private"
    ["static"]=>
    string(7) "static "
    ["name_method"]=>
    string(8) " warning"
    ["atributes"]=>
    array(2) {
      [0]=>
      string(8) "$message"
      [1]=>
      string(6) "$error"
    }
    ["internal_context"]=>
    string(51) "
$this->info['warning'][] = $message;
return true;
"
    ["all_context"]=>
    string(102) "private static function warning($message,$error) {
$this->info['warning'][] = $message;
return true;
}"
  }
}

HTML Get all urls on Html.

use Lenonleite\SimpleCrawler;
$html_txt = '<a href="https://www.w3schools.com">Visit W3Schools</a>';
$html = new SimpleCrawler\Html();
$result = $html->get_parameters( $html_txt );

Result, (*14)

array(1) {
  [0]=>
  string(25) "https://www.w3schools.com"
}

LOGIN Get data about forms.

use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$login = new SimpleCrawler\Login();
$result = $login->get_forms( $html );

Result, (*15)

array(1) {
  [0]=>
  array(3) {
    ["html"]=>
    string(280) "

First name:

Last name:


" ["fields"]=> array(2) { ["tags"]=> array(3) { [0]=> string(55) "<input type="text" name="firstname" value="Mickey"><br>" [1]=> string(57) "<input type="text" name="lastname" value="Mouse"><br><br>" [2]=> string(36) "<input type="submit" value="Submit">" } ["tags_atributes"]=> array(3) { [0]=> array(3) { ["full"]=> string(51) "<input type="text" name="firstname" value="Mickey">" ["key"]=> string(5) "input" ["value"]=> string(43) "type="text" name="firstname" value="Mickey"" } [1]=> array(3) { ["full"]=> string(49) "<input type="text" name="lastname" value="Mouse">" ["key"]=> string(5) "input" ["value"]=> string(41) "type="text" name="lastname" value="Mouse"" } [2]=> array(3) { ["full"]=> string(36) "<input type="submit" value="Submit">" ["key"]=> string(5) "input" ["value"]=> string(28) "type="submit" value="Submit"" } } } ["form"]=> array(2) { ["tags"]=> array(1) { [0]=> string(46) " < form action="/action_page.php" method="POST">" } ["tags_atributes"]=> array(1) { [0]=> array(3) { ["full"]=> string(46) " < form action="/action_page.php" method="POST">" ["key"]=> string(4) "form" ["value"]=> string(39) "action="/action_page.php" method="POST"" } } } } }

The Versions

17/11 2017

dev-master

9999999-dev http://www.lenonleite.com.br

LenonLeite Component - Simple Crawler, for read pages

  Sources   Download

MIT

The Requires

  • php >=7.0.0

 

The Development Requires

wordpress read php library crawler hacking hacker pentest body

16/11 2017

v0.4

0.4.0.0 http://www.lenonleite.com.br

LenonLeite Component - Simple Crawler, for read pages

  Sources   Download

MIT

The Requires

  • php >=7.0.0

 

wordpress read php library crawler hacking hacker pentest body

11/08 2017

v0.3

0.3.0.0 http://www.lenonleite.com.br

LenonLeite Component - Simple Crawler, for read pages

  Sources   Download

MIT

The Requires

  • php >=7.0.0

 

wordpress read php library crawler hacking hacker pentest body

02/08 2017

v0.2

0.2.0.0 http://www.lenonleite.com.br

LenonLeite Component - Simple Crawler, for read pages

  Sources   Download

MIT

The Requires

  • php >=7.0.0

 

wordpress read php library crawler hacking hacker pentest body

01/08 2017

v0.1

0.1.0.0 http://www.lenonleite.com.br

LenonLeite Component - Simple Crawler, for read pages

  Sources   Download

MIT

The Requires

  • php >=7.0.0

 

wordpress read php library crawler hacking hacker pentest body