(Grav GitSync) Automatic Commit from GitSync
This commit is contained in:
+257
@@ -0,0 +1,257 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser;
|
||||
|
||||
use PHPHtmlParser\Enum\StringToken;
|
||||
use PHPHtmlParser\Exceptions\ContentLengthException;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
|
||||
/**
|
||||
* Class Content.
|
||||
*/
|
||||
class Content
|
||||
{
|
||||
/**
|
||||
* The content string.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $content;
|
||||
|
||||
/**
|
||||
* The size of the content.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $size;
|
||||
|
||||
/**
|
||||
* The current position we are in the content.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $pos;
|
||||
|
||||
/**
|
||||
* The following 4 strings are tags that are important to us.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $blank = " \t\r\n";
|
||||
protected $equal = ' =/>';
|
||||
protected $slash = " />\r\n\t";
|
||||
protected $attr = ' >';
|
||||
|
||||
/**
|
||||
* Content constructor.
|
||||
*/
|
||||
public function __construct(string $content = '')
|
||||
{
|
||||
$this->content = $content;
|
||||
$this->size = \strlen($content);
|
||||
$this->pos = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current position of the content.
|
||||
*/
|
||||
public function getPosition(): int
|
||||
{
|
||||
return $this->pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current character we are at.
|
||||
*
|
||||
* @param ?int $char
|
||||
*/
|
||||
public function char(?int $char = null): string
|
||||
{
|
||||
return $this->content[$char ?? $this->pos] ?? '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string from the current character position.
|
||||
*
|
||||
* @param int $length
|
||||
* @return string
|
||||
*/
|
||||
public function string(int $length = 1): string
|
||||
{
|
||||
$string = '';
|
||||
$position = $this->pos;
|
||||
do {
|
||||
$string .= $this->char($position++);
|
||||
} while ($position < $this->pos + $length);
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the current position forward.
|
||||
*
|
||||
* @throws ContentLengthException
|
||||
*/
|
||||
public function fastForward(int $count): Content
|
||||
{
|
||||
if (!$this->canFastForward($count)) {
|
||||
// trying to go over the content length, throw exception
|
||||
throw new ContentLengthException('Attempt to fastForward pass the length of the content.');
|
||||
}
|
||||
$this->pos += $count;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if we can move the position forward.
|
||||
*/
|
||||
public function canFastForward(int $count): bool
|
||||
{
|
||||
return \strlen($this->content) >= $this->pos + $count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the current position backward.
|
||||
*/
|
||||
public function rewind(int $count): Content
|
||||
{
|
||||
$this->pos -= $count;
|
||||
if ($this->pos < 0) {
|
||||
$this->pos = 0;
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the content until we find the given string.
|
||||
*/
|
||||
public function copyUntil(string $string, bool $char = false, bool $escape = false): string
|
||||
{
|
||||
if ($this->pos >= $this->size) {
|
||||
// nothing left
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($escape) {
|
||||
$position = $this->pos;
|
||||
$found = false;
|
||||
while (!$found) {
|
||||
$position = \strpos($this->content, $string, $position);
|
||||
if ($position === false) {
|
||||
// reached the end
|
||||
break;
|
||||
}
|
||||
|
||||
if ($this->char($position - 1) == '\\') {
|
||||
// this character is escaped
|
||||
++$position;
|
||||
continue;
|
||||
}
|
||||
|
||||
$found = true;
|
||||
}
|
||||
} elseif ($char) {
|
||||
$position = \strcspn($this->content, $string, $this->pos);
|
||||
$position += $this->pos;
|
||||
} else {
|
||||
$position = \strpos($this->content, $string, $this->pos);
|
||||
}
|
||||
|
||||
if ($position === false) {
|
||||
// could not find character, just return the remaining of the content
|
||||
$return = \substr($this->content, $this->pos, $this->size - $this->pos);
|
||||
if ($return === false) {
|
||||
throw new LogicalException('Substr returned false with position ' . $this->pos . '.');
|
||||
}
|
||||
$this->pos = $this->size;
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
if ($position == $this->pos) {
|
||||
// we are at the right place
|
||||
return '';
|
||||
}
|
||||
|
||||
$return = \substr($this->content, $this->pos, $position - $this->pos);
|
||||
if ($return === false) {
|
||||
throw new LogicalException('Substr returned false with position ' . $this->pos . '.');
|
||||
}
|
||||
// set the new position
|
||||
$this->pos = $position;
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the content until the string is found and return it
|
||||
* unless the 'unless' is found in the substring.
|
||||
*/
|
||||
public function copyUntilUnless(string $string, string $unless): string
|
||||
{
|
||||
$lastPos = $this->pos;
|
||||
$this->fastForward(1);
|
||||
$foundString = $this->copyUntil($string, true, true);
|
||||
|
||||
$position = \strcspn($foundString, $unless);
|
||||
if ($position == \strlen($foundString)) {
|
||||
return $string . $foundString;
|
||||
}
|
||||
// rewind changes and return nothing
|
||||
$this->pos = $lastPos;
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the content until it reaches the token string.,.
|
||||
*
|
||||
* @uses $this->copyUntil()
|
||||
*/
|
||||
public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string
|
||||
{
|
||||
$string = $stringToken->getValue();
|
||||
|
||||
return $this->copyUntil($string, $char, $escape);
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip a given set of characters.
|
||||
*
|
||||
* @throws LogicalException
|
||||
*/
|
||||
public function skip(string $string, bool $copy = false): string
|
||||
{
|
||||
$len = \strspn($this->content, $string, $this->pos);
|
||||
if ($len === false) {
|
||||
throw new LogicalException('Strspn returned false with position ' . $this->pos . '.');
|
||||
}
|
||||
$return = '';
|
||||
if ($copy) {
|
||||
$return = \substr($this->content, $this->pos, $len);
|
||||
if ($return === false) {
|
||||
throw new LogicalException('Substr returned false with position ' . $this->pos . '.');
|
||||
}
|
||||
}
|
||||
|
||||
// update the position
|
||||
$this->pos += $len;
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip a given token of pre-defined characters.
|
||||
*
|
||||
* @uses $this->skip()
|
||||
*/
|
||||
public function skipByToken(StringToken $skipToken, bool $copy = false): string
|
||||
{
|
||||
$string = $skipToken->getValue();
|
||||
|
||||
return $this->skip($string, $copy);
|
||||
}
|
||||
}
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
namespace PHPHtmlParser\Contracts\Dom;
|
||||
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
use PHPHtmlParser\Options;
|
||||
|
||||
interface CleanerInterface
|
||||
{
|
||||
/**
|
||||
* Cleans the html of any none-html information.
|
||||
*
|
||||
* @throws LogicalException
|
||||
*/
|
||||
public function clean(string $str, Options $options, string $defaultCharset): string;
|
||||
}
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
namespace PHPHtmlParser\Contracts\Dom;
|
||||
|
||||
use PHPHtmlParser\Content;
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\CircularException;
|
||||
use PHPHtmlParser\Exceptions\ContentLengthException;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
use PHPHtmlParser\Exceptions\StrictException;
|
||||
use PHPHtmlParser\Options;
|
||||
|
||||
interface ParserInterface
|
||||
{
|
||||
/**
|
||||
* Attempts to parse the html in content.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
*/
|
||||
public function parse(Options $options, Content $content, int $size): AbstractNode;
|
||||
|
||||
/**
|
||||
* Attempts to detect the charset that the html was sent in.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function detectCharset(Options $options, string $defaultCharset, AbstractNode $root): bool;
|
||||
}
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Contracts;
|
||||
|
||||
use PHPHtmlParser\Dom;
|
||||
use PHPHtmlParser\Options;
|
||||
use Psr\Http\Client\ClientInterface;
|
||||
use Psr\Http\Message\RequestInterface;
|
||||
|
||||
interface DomInterface
|
||||
{
|
||||
public function loadFromFile(string $file, ?Options $options = null): Dom;
|
||||
|
||||
public function loadFromUrl(string $url, ?Options $options, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom;
|
||||
|
||||
public function loadStr(string $str, ?Options $options = null): Dom;
|
||||
|
||||
public function setOptions(Options $options): Dom;
|
||||
|
||||
public function find(string $selector, int $nth = null);
|
||||
}
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Contracts\Selector;
|
||||
|
||||
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
|
||||
|
||||
interface ParserInterface
|
||||
{
|
||||
public function parseSelectorString(string $selector): ParsedSelectorCollectionDTO;
|
||||
}
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
<?php
|
||||
|
||||
namespace PHPHtmlParser\Contracts\Selector;
|
||||
|
||||
use PHPHtmlParser\DTO\Selector\RuleDTO;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
|
||||
interface SeekerInterface
|
||||
{
|
||||
/**
|
||||
* Attempts to find all children that match the rule
|
||||
* given.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function seek(array $nodes, RuleDTO $rule, array $options): array;
|
||||
}
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Contracts\Selector;
|
||||
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Dom\Node\Collection;
|
||||
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
|
||||
interface SelectorInterface
|
||||
{
|
||||
/**
|
||||
* Constructs with the selector string.
|
||||
*/
|
||||
public function __construct(string $selector, ?ParserInterface $parser = null, ?SeekerInterface $seeker = null);
|
||||
|
||||
/**
|
||||
* Returns the selectors that where found.
|
||||
*/
|
||||
public function getParsedSelectorCollectionDTO(): ParsedSelectorCollectionDTO;
|
||||
|
||||
/**
|
||||
* Attempts to find the selectors starting from the given
|
||||
* node object.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function find(AbstractNode $node): Collection;
|
||||
}
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\DTO\Selector;
|
||||
|
||||
final class ParsedSelectorCollectionDTO
|
||||
{
|
||||
/**
|
||||
* @var ParsedSelectorDTO[]
|
||||
*/
|
||||
private $parsedSelectorDTO = [];
|
||||
|
||||
/**
|
||||
* @param ParsedSelectorDTO[] $parsedSelectorDTOs
|
||||
*/
|
||||
private function __construct(array $parsedSelectorDTOs)
|
||||
{
|
||||
foreach ($parsedSelectorDTOs as $parsedSelectorDTO) {
|
||||
if ($parsedSelectorDTO instanceof ParsedSelectorDTO) {
|
||||
$this->parsedSelectorDTO[] = $parsedSelectorDTO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ParsedSelectorDTO[] $parsedSelectorDTOs
|
||||
*/
|
||||
public static function makeCollection(array $parsedSelectorDTOs): ParsedSelectorCollectionDTO
|
||||
{
|
||||
return new ParsedSelectorCollectionDTO($parsedSelectorDTOs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return ParsedSelectorDTO[]
|
||||
*/
|
||||
public function getParsedSelectorDTO(): array
|
||||
{
|
||||
return $this->parsedSelectorDTO;
|
||||
}
|
||||
}
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\DTO\Selector;
|
||||
|
||||
final class ParsedSelectorDTO
|
||||
{
|
||||
/**
|
||||
* @var RuleDTO[]
|
||||
*/
|
||||
private $rules = [];
|
||||
|
||||
/**
|
||||
* @param RuleDTO[] $ruleDTOs
|
||||
*/
|
||||
private function __construct(array $ruleDTOs)
|
||||
{
|
||||
foreach ($ruleDTOs as $ruleDTO) {
|
||||
if ($ruleDTO instanceof RuleDTO) {
|
||||
$this->rules[] = $ruleDTO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param RuleDTO[] $ruleDTOs
|
||||
*/
|
||||
public static function makeFromRules(array $ruleDTOs): ParsedSelectorDTO
|
||||
{
|
||||
return new ParsedSelectorDTO($ruleDTOs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return RuleDTO[]
|
||||
*/
|
||||
public function getRules(): array
|
||||
{
|
||||
return $this->rules;
|
||||
}
|
||||
}
|
||||
Vendored
+100
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\DTO\Selector;
|
||||
|
||||
final class RuleDTO
|
||||
{
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $tag;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $operator;
|
||||
|
||||
/**
|
||||
* @var string|array|null
|
||||
*/
|
||||
private $key;
|
||||
|
||||
/**
|
||||
* @var string|array|null
|
||||
*/
|
||||
private $value;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $noKey;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $alterNext;
|
||||
|
||||
private function __construct(array $values)
|
||||
{
|
||||
$this->tag = $values['tag'];
|
||||
$this->operator = $values['operator'];
|
||||
$this->key = $values['key'];
|
||||
$this->value = $values['value'];
|
||||
$this->noKey = $values['noKey'];
|
||||
$this->alterNext = $values['alterNext'];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string|array|null $key
|
||||
* @param string|array|null $value
|
||||
*/
|
||||
public static function makeFromPrimitives(string $tag, string $operator, $key, $value, bool $noKey, bool $alterNext): RuleDTO
|
||||
{
|
||||
return new RuleDTO([
|
||||
'tag' => $tag,
|
||||
'operator' => $operator,
|
||||
'key' => $key,
|
||||
'value' => $value,
|
||||
'noKey' => $noKey,
|
||||
'alterNext' => $alterNext,
|
||||
]);
|
||||
}
|
||||
|
||||
public function getTag(): string
|
||||
{
|
||||
return $this->tag;
|
||||
}
|
||||
|
||||
public function getOperator(): string
|
||||
{
|
||||
return $this->operator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string|array|null
|
||||
*/
|
||||
public function getKey()
|
||||
{
|
||||
return $this->key;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string|array|null
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->value;
|
||||
}
|
||||
|
||||
public function isNoKey(): bool
|
||||
{
|
||||
return $this->noKey;
|
||||
}
|
||||
|
||||
public function isAlterNext(): bool
|
||||
{
|
||||
return $this->alterNext;
|
||||
}
|
||||
}
|
||||
Vendored
+60
@@ -0,0 +1,60 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\DTO\Tag;
|
||||
|
||||
use stringEncode\Encode;
|
||||
use stringEncode\Exception;
|
||||
|
||||
final class AttributeDTO
|
||||
{
|
||||
/**
|
||||
* @var ?string
|
||||
*/
|
||||
private $value;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $doubleQuote;
|
||||
|
||||
private function __construct(array $values)
|
||||
{
|
||||
$this->value = $values['value'];
|
||||
$this->doubleQuote = $values['doubleQuote'] ?? true;
|
||||
}
|
||||
|
||||
public static function makeFromPrimitives(?string $value, bool $doubleQuote = true): AttributeDTO
|
||||
{
|
||||
return new AttributeDTO([
|
||||
'value' => $value,
|
||||
'doubleQuote' => $doubleQuote,
|
||||
]);
|
||||
}
|
||||
|
||||
public function getValue(): ?string
|
||||
{
|
||||
return $this->value;
|
||||
}
|
||||
|
||||
public function isDoubleQuote(): bool
|
||||
{
|
||||
return $this->doubleQuote;
|
||||
}
|
||||
|
||||
public function htmlspecialcharsDecode(): void
|
||||
{
|
||||
if (!\is_null($this->value)) {
|
||||
$this->value = \htmlspecialchars_decode($this->value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
*/
|
||||
public function encodeValue(Encode $encode)
|
||||
{
|
||||
$this->value = $encode->convert($this->value);
|
||||
}
|
||||
}
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\DTO;
|
||||
|
||||
use PHPHtmlParser\Dom\Node\HtmlNode;
|
||||
|
||||
final class TagDTO
|
||||
{
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $status;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $closing;
|
||||
|
||||
/**
|
||||
* @var ?HtmlNode
|
||||
*/
|
||||
private $node;
|
||||
|
||||
/**
|
||||
* @var ?string
|
||||
*/
|
||||
private $tag;
|
||||
|
||||
private function __construct(array $values = [])
|
||||
{
|
||||
$this->status = $values['status'] ?? false;
|
||||
$this->closing = $values['closing'] ?? false;
|
||||
$this->node = $values['node'] ?? null;
|
||||
$this->tag = $values['tag'] ?? null;
|
||||
}
|
||||
|
||||
public static function makeFromPrimitives(bool $status = false, bool $closing = false, ?HtmlNode $node = null, ?string $tag = null): TagDTO
|
||||
{
|
||||
return new TagDTO([
|
||||
'status' => $status,
|
||||
'closing' => $closing,
|
||||
'node' => $node,
|
||||
'tag' => $tag,
|
||||
]);
|
||||
}
|
||||
|
||||
public function isStatus(): bool
|
||||
{
|
||||
return $this->status;
|
||||
}
|
||||
|
||||
public function isClosing(): bool
|
||||
{
|
||||
return $this->closing;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
public function getNode(): ?HtmlNode
|
||||
{
|
||||
return $this->node;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
public function getTag(): ?string
|
||||
{
|
||||
return $this->tag;
|
||||
}
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Discovery;
|
||||
|
||||
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
|
||||
use PHPHtmlParser\Dom\Cleaner;
|
||||
|
||||
class CleanerDiscovery
|
||||
{
|
||||
/**
|
||||
* @var Cleaner|null
|
||||
*/
|
||||
private static $parser = null;
|
||||
|
||||
public static function find(): CleanerInterface
|
||||
{
|
||||
if (self::$parser == null) {
|
||||
self::$parser = new Cleaner();
|
||||
}
|
||||
|
||||
return self::$parser;
|
||||
}
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Discovery;
|
||||
|
||||
use PHPHtmlParser\Contracts\Dom\ParserInterface;
|
||||
use PHPHtmlParser\Dom\Parser;
|
||||
|
||||
class DomParserDiscovery
|
||||
{
|
||||
/**
|
||||
* @var ParserInterface|null
|
||||
*/
|
||||
private static $parser = null;
|
||||
|
||||
public static function find(): ParserInterface
|
||||
{
|
||||
if (self::$parser == null) {
|
||||
self::$parser = new Parser();
|
||||
}
|
||||
|
||||
return self::$parser;
|
||||
}
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Discovery;
|
||||
|
||||
use PHPHtmlParser\Contracts\Selector\SeekerInterface;
|
||||
use PHPHtmlParser\Selector\Seeker;
|
||||
|
||||
class SeekerDiscovery
|
||||
{
|
||||
/**
|
||||
* @var SeekerInterface|null
|
||||
*/
|
||||
private static $seeker = null;
|
||||
|
||||
public static function find(): SeekerInterface
|
||||
{
|
||||
if (self::$seeker == null) {
|
||||
self::$seeker = new Seeker();
|
||||
}
|
||||
|
||||
return self::$seeker;
|
||||
}
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Discovery;
|
||||
|
||||
use PHPHtmlParser\Contracts\Selector\ParserInterface;
|
||||
use PHPHtmlParser\Selector\Parser;
|
||||
|
||||
class SelectorParserDiscovery
|
||||
{
|
||||
/**
|
||||
* @var ParserInterface|null
|
||||
*/
|
||||
private static $parser = null;
|
||||
|
||||
public static function find(): ParserInterface
|
||||
{
|
||||
if (self::$parser == null) {
|
||||
self::$parser = new Parser();
|
||||
}
|
||||
|
||||
return self::$parser;
|
||||
}
|
||||
}
|
||||
+251
@@ -0,0 +1,251 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser;
|
||||
|
||||
use GuzzleHttp\Psr7\Request;
|
||||
use GuzzleHttp\Client;
|
||||
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
|
||||
use PHPHtmlParser\Contracts\Dom\ParserInterface;
|
||||
use PHPHtmlParser\Contracts\DomInterface;
|
||||
use PHPHtmlParser\Discovery\CleanerDiscovery;
|
||||
use PHPHtmlParser\Discovery\DomParserDiscovery;
|
||||
use PHPHtmlParser\Dom\Node\Collection;
|
||||
use PHPHtmlParser\Dom\RootAccessTrait;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\CircularException;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
use PHPHtmlParser\Exceptions\NotLoadedException;
|
||||
use PHPHtmlParser\Exceptions\StrictException;
|
||||
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
|
||||
use Psr\Http\Client\ClientExceptionInterface;
|
||||
use Psr\Http\Client\ClientInterface;
|
||||
use Psr\Http\Message\RequestInterface;
|
||||
|
||||
/**
|
||||
* Class Dom.
|
||||
*/
|
||||
class Dom implements DomInterface
|
||||
{
|
||||
use RootAccessTrait;
|
||||
|
||||
/**
|
||||
* The charset we would like the output to be in.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $defaultCharset = 'UTF-8';
|
||||
|
||||
/**
|
||||
* The document string.
|
||||
*
|
||||
* @var Content
|
||||
*/
|
||||
private $content;
|
||||
|
||||
/**
|
||||
* A global options array to be used by all load calls.
|
||||
*
|
||||
* @var ?Options
|
||||
*/
|
||||
private $globalOptions;
|
||||
|
||||
/**
|
||||
* @var ParserInterface
|
||||
*/
|
||||
private $domParser;
|
||||
/**
|
||||
* @var CleanerInterface
|
||||
*/
|
||||
private $domCleaner;
|
||||
|
||||
public function __construct(?ParserInterface $domParser = null, ?CleanerInterface $domCleaner = null)
|
||||
{
|
||||
if ($domParser === null) {
|
||||
$domParser = DomParserDiscovery::find();
|
||||
}
|
||||
if ($domCleaner === null) {
|
||||
$domCleaner = CleanerDiscovery::find();
|
||||
}
|
||||
|
||||
$this->domParser = $domParser;
|
||||
$this->domCleaner = $domCleaner;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the inner html of the root node.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws UnknownChildTypeException
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
public function __toString(): string
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->innerHtml();
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the dom from a document file/url.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws Exceptions\ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
*/
|
||||
public function loadFromFile(string $file, ?Options $options = null): Dom
|
||||
{
|
||||
$content = @\file_get_contents($file);
|
||||
if ($content === false) {
|
||||
throw new LogicalException('file_get_contents failed and returned false when trying to read "' . $file . '".');
|
||||
}
|
||||
|
||||
return $this->loadStr($content, $options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use a curl interface implementation to attempt to load
|
||||
* the content from a url.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws Exceptions\ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
* @throws ClientExceptionInterface
|
||||
*/
|
||||
public function loadFromUrl(string $url, ?Options $options = null, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom
|
||||
{
|
||||
if ($client === null) {
|
||||
$client = new Client();
|
||||
}
|
||||
if ($request === null) {
|
||||
$request = new Request('GET', $url);
|
||||
}
|
||||
|
||||
$response = $client->sendRequest($request);
|
||||
$content = $response->getBody()->getContents();
|
||||
|
||||
return $this->loadStr($content, $options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsers the html of the given string. Used for load(), loadFromFile(),
|
||||
* and loadFromUrl().
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws Exceptions\ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
*/
|
||||
public function loadStr(string $str, ?Options $options = null): Dom
|
||||
{
|
||||
$localOptions = new Options();
|
||||
if ($this->globalOptions !== null) {
|
||||
$localOptions = $localOptions->setFromOptions($this->globalOptions);
|
||||
}
|
||||
if ($options !== null) {
|
||||
$localOptions = $localOptions->setFromOptions($options);
|
||||
}
|
||||
|
||||
$html = $this->domCleaner->clean($str, $localOptions, $this->defaultCharset);
|
||||
|
||||
$this->content = new Content($html);
|
||||
|
||||
$this->root = $this->domParser->parse($localOptions, $this->content, \strlen($str));
|
||||
$this->domParser->detectCharset($localOptions, $this->defaultCharset, $this->root);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a global options array to be used by all load calls.
|
||||
*/
|
||||
public function setOptions(Options $options): Dom
|
||||
{
|
||||
$this->globalOptions = $options;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find elements by css selector on the root node.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @return mixed|Collection|null
|
||||
*/
|
||||
public function find(string $selector, int $nth = null)
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->find($selector, $nth);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper function that returns an element by the
|
||||
* id.
|
||||
*
|
||||
* @param $id
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @return mixed|Collection|null
|
||||
*/
|
||||
public function getElementById($id)
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->find('#' . $id, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper function that returns all elements by
|
||||
* tag name.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @return mixed|Collection|null
|
||||
*/
|
||||
public function getElementsByTag(string $name)
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->find($name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper function that returns all elements by
|
||||
* class name.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @return mixed|Collection|null
|
||||
*/
|
||||
public function getElementsByClass(string $class)
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->find('.' . $class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the load methods have been called.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
private function isLoaded(): void
|
||||
{
|
||||
if (\is_null($this->content)) {
|
||||
throw new NotLoadedException('Content is not loaded!');
|
||||
}
|
||||
}
|
||||
}
|
||||
+130
@@ -0,0 +1,130 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom;
|
||||
|
||||
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
use PHPHtmlParser\Options;
|
||||
|
||||
class Cleaner implements CleanerInterface
|
||||
{
|
||||
/**
|
||||
* Cleans the html of any none-html information.
|
||||
*
|
||||
* @throws LogicalException
|
||||
*/
|
||||
public function clean(string $str, Options $options, string $defaultCharset): string
|
||||
{
|
||||
if (!$options->isCleanupInput()) {
|
||||
// skip entire cleanup step
|
||||
return $str;
|
||||
}
|
||||
|
||||
// check if the string is gziped
|
||||
$is_gzip = 0 === \mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, 'US-ASCII');
|
||||
if ($is_gzip) {
|
||||
$str = \gzdecode($str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
|
||||
}
|
||||
}
|
||||
|
||||
// we must handle character encoding
|
||||
$str = $this->setUpRegexEncoding($str, $options, $defaultCharset);
|
||||
|
||||
// remove white space before closing tags
|
||||
$str = \mb_eregi_replace("'\s+>", "'>", $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
|
||||
}
|
||||
$str = \mb_eregi_replace('"\s+>', '">', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
|
||||
}
|
||||
|
||||
// clean out the \n\r
|
||||
$replace = ' ';
|
||||
if ($options->isPreserveLineBreaks()) {
|
||||
$replace = ' ';
|
||||
}
|
||||
$str = \str_replace(["\r\n", "\r", "\n"], $replace, $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
|
||||
}
|
||||
|
||||
// strip the doctype
|
||||
$str = \mb_eregi_replace('<!doctype(.*?)>', '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
|
||||
}
|
||||
|
||||
// strip out comments
|
||||
$str = \mb_eregi_replace('<!--(.*?)-->', '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
|
||||
}
|
||||
|
||||
// strip out cdata
|
||||
$str = \mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
|
||||
}
|
||||
|
||||
// strip out <script> tags
|
||||
if ($options->isRemoveScripts()) {
|
||||
$str = \mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
|
||||
}
|
||||
$str = \mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
|
||||
}
|
||||
}
|
||||
|
||||
// strip out <style> tags
|
||||
if ($options->isRemoveStyles()) {
|
||||
$str = \mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
|
||||
}
|
||||
$str = \mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
|
||||
}
|
||||
}
|
||||
|
||||
// strip smarty scripts
|
||||
if ($options->isRemoveSmartyScripts()) {
|
||||
$str = \mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
|
||||
if ($str === false) {
|
||||
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
|
||||
}
|
||||
}
|
||||
|
||||
return $str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up the mb_regex_encoding and converts the text to that encoding.
|
||||
*
|
||||
* @throws LogicalException
|
||||
*/
|
||||
private function setUpRegexEncoding(string $str, Options $options, string $defaultCharset): string
|
||||
{
|
||||
$encoding = $defaultCharset;
|
||||
$enforceEncoding = $options->getEnforceEncoding();
|
||||
if ($enforceEncoding !== null) {
|
||||
// they want to enforce the given encoding
|
||||
$encoding = $enforceEncoding;
|
||||
}
|
||||
|
||||
if (!\mb_regex_encoding($encoding)) {
|
||||
throw new LogicalException('Character encoding was not able to be changed to ' . $encoding . '.');
|
||||
}
|
||||
|
||||
return \mb_convert_encoding($str, $encoding);
|
||||
}
|
||||
}
|
||||
plugins/automagic-images/vendor/paquettg/php-html-parser/src/PHPHtmlParser/Dom/Node/AbstractNode.php
Vendored
+495
@@ -0,0 +1,495 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use PHPHtmlParser\Contracts\Selector\SelectorInterface;
|
||||
use PHPHtmlParser\Dom\Tag;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\CircularException;
|
||||
use PHPHtmlParser\Exceptions\ParentNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
|
||||
use PHPHtmlParser\Finder;
|
||||
use PHPHtmlParser\Selector\Selector;
|
||||
use stringEncode\Encode;
|
||||
|
||||
/**
|
||||
* Dom node object.
|
||||
*
|
||||
* @property-read string $outerhtml
|
||||
* @property-read string $innerhtml
|
||||
* @property-read string $innerText
|
||||
* @property-read string $text
|
||||
* @property-read Tag $tag
|
||||
* @property-read InnerNode $parent
|
||||
*/
|
||||
abstract class AbstractNode
|
||||
{
|
||||
/**
|
||||
* Contains the tag name/type.
|
||||
*
|
||||
* @var ?Tag
|
||||
*/
|
||||
protected $tag;
|
||||
|
||||
/**
|
||||
* Contains a list of attributes on this tag.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $attr = [];
|
||||
|
||||
/**
|
||||
* Contains the parent Node.
|
||||
*
|
||||
* @var ?InnerNode
|
||||
*/
|
||||
protected $parent;
|
||||
|
||||
/**
|
||||
* The unique id of the class. Given by PHP.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $id;
|
||||
|
||||
/**
|
||||
* The encoding class used to encode strings.
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
protected $encode;
|
||||
|
||||
/**
|
||||
* An array of all the children.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $children = [];
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
protected $htmlSpecialCharsDecode = false;
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private static $count = 0;
|
||||
|
||||
/**
|
||||
* Creates a unique id for this node.
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->id = self::$count;
|
||||
++self::$count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to clear out any object references.
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
$this->tag = null;
|
||||
$this->parent = null;
|
||||
$this->attr = [];
|
||||
$this->children = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Magic get method for attributes and certain methods.
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function __get(string $key)
|
||||
{
|
||||
// check attribute first
|
||||
if ($this->getAttribute($key) !== null) {
|
||||
return $this->getAttribute($key);
|
||||
}
|
||||
switch (\strtolower($key)) {
|
||||
case 'outerhtml':
|
||||
return $this->outerHtml();
|
||||
case 'innerhtml':
|
||||
return $this->innerHtml();
|
||||
case 'innertext':
|
||||
return $this->innerText();
|
||||
case 'text':
|
||||
return $this->text();
|
||||
case 'tag':
|
||||
return $this->getTag();
|
||||
case 'parent':
|
||||
return $this->getParent();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simply calls the outer text method.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return $this->outerHtml();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $htmlSpecialCharsDecode
|
||||
*/
|
||||
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
|
||||
{
|
||||
$this->htmlSpecialCharsDecode = $htmlSpecialCharsDecode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the id of this object.
|
||||
*/
|
||||
public function id(): int
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the parent of node.
|
||||
*
|
||||
* @return InnerNode
|
||||
*/
|
||||
public function getParent(): ?InnerNode
|
||||
{
|
||||
return $this->parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the parent node.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
*/
|
||||
public function setParent(InnerNode $parent): AbstractNode
|
||||
{
|
||||
// remove from old parent
|
||||
if ($this->parent !== null) {
|
||||
if ($this->parent->id() == $parent->id()) {
|
||||
// already the parent
|
||||
return $this;
|
||||
}
|
||||
|
||||
$this->parent->removeChild($this->id);
|
||||
}
|
||||
|
||||
$this->parent = $parent;
|
||||
|
||||
// assign child to parent
|
||||
$this->parent->addChild($this);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes this node and all its children from the
|
||||
* DOM tree.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function delete()
|
||||
{
|
||||
if ($this->parent !== null) {
|
||||
$this->parent->removeChild($this->id);
|
||||
}
|
||||
$this->parent->clear();
|
||||
$this->clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the encoding class to this node.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function propagateEncoding(Encode $encode)
|
||||
{
|
||||
$this->encode = $encode;
|
||||
$this->tag->setEncoding($encode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given node id is an ancestor of
|
||||
* the current node.
|
||||
*/
|
||||
public function isAncestor(int $id): bool
|
||||
{
|
||||
if ($this->getAncestor($id) !== null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get an ancestor node by the given id.
|
||||
*
|
||||
* @return AbstractNode|null
|
||||
*/
|
||||
public function getAncestor(int $id)
|
||||
{
|
||||
if ($this->parent !== null) {
|
||||
if ($this->parent->id() == $id) {
|
||||
return $this->parent;
|
||||
}
|
||||
|
||||
return $this->parent->getAncestor($id);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current node has a next sibling.
|
||||
*/
|
||||
public function hasNextSibling(): bool
|
||||
{
|
||||
try {
|
||||
$this->nextSibling();
|
||||
|
||||
// sibling found, return true;
|
||||
return true;
|
||||
} catch (ParentNotFoundException $e) {
|
||||
// no parent, no next sibling
|
||||
unset($e);
|
||||
|
||||
return false;
|
||||
} catch (ChildNotFoundException $e) {
|
||||
// no sibling found
|
||||
unset($e);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get the next sibling.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws ParentNotFoundException
|
||||
*/
|
||||
public function nextSibling(): AbstractNode
|
||||
{
|
||||
if ($this->parent === null) {
|
||||
throw new ParentNotFoundException('Parent is not set for this node.');
|
||||
}
|
||||
|
||||
return $this->parent->nextChild($this->id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get the previous sibling.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws ParentNotFoundException
|
||||
*/
|
||||
public function previousSibling(): AbstractNode
|
||||
{
|
||||
if ($this->parent === null) {
|
||||
throw new ParentNotFoundException('Parent is not set for this node.');
|
||||
}
|
||||
|
||||
return $this->parent->previousChild($this->id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the tag object of this node.
|
||||
*/
|
||||
public function getTag(): Tag
|
||||
{
|
||||
return $this->tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces the tag for this node.
|
||||
*
|
||||
* @param string|Tag $tag
|
||||
*/
|
||||
public function setTag($tag): AbstractNode
|
||||
{
|
||||
if (\is_string($tag)) {
|
||||
$tag = new Tag($tag);
|
||||
}
|
||||
|
||||
$this->tag = $tag;
|
||||
|
||||
// clear any cache
|
||||
$this->clear();
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper method that simply calls the getAttribute method
|
||||
* on the tag of this node.
|
||||
*/
|
||||
public function getAttributes(): array
|
||||
{
|
||||
$attributes = $this->tag->getAttributes();
|
||||
foreach ($attributes as $name => $attributeDTO) {
|
||||
$attributes[$name] = $attributeDTO->getValue();
|
||||
}
|
||||
|
||||
return $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper method that simply calls the getAttribute method
|
||||
* on the tag of this node.
|
||||
*/
|
||||
public function getAttribute(string $key): ?string
|
||||
{
|
||||
try {
|
||||
$attributeDTO = $this->tag->getAttribute($key);
|
||||
} catch (AttributeNotFoundException $e) {
|
||||
// no attribute with this key exists, returning null.
|
||||
unset($e);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return $attributeDTO->getValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper method that simply calls the hasAttribute method
|
||||
* on the tag of this node.
|
||||
*/
|
||||
public function hasAttribute(string $key): bool
|
||||
{
|
||||
return $this->tag->hasAttribute($key);
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper method that simply calls the setAttribute method
|
||||
* on the tag of this node.
|
||||
*/
|
||||
public function setAttribute(string $key, ?string $value, bool $doubleQuote = true): AbstractNode
|
||||
{
|
||||
$this->tag->setAttribute($key, $value, $doubleQuote);
|
||||
|
||||
//clear any cache
|
||||
$this->clear();
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper method that simply calls the removeAttribute method
|
||||
* on the tag of this node.
|
||||
*/
|
||||
public function removeAttribute(string $key): void
|
||||
{
|
||||
$this->tag->removeAttribute($key);
|
||||
|
||||
//clear any cache
|
||||
$this->clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper method that simply calls the removeAllAttributes
|
||||
* method on the tag of this node.
|
||||
*/
|
||||
public function removeAllAttributes(): void
|
||||
{
|
||||
$this->tag->removeAllAttributes();
|
||||
|
||||
//clear any cache
|
||||
$this->clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Function to locate a specific ancestor tag in the path to the root.
|
||||
*
|
||||
* @throws ParentNotFoundException
|
||||
*/
|
||||
public function ancestorByTag(string $tag): AbstractNode
|
||||
{
|
||||
// Start by including ourselves in the comparison.
|
||||
$node = $this;
|
||||
|
||||
do {
|
||||
if ($node->tag->name() == $tag) {
|
||||
return $node;
|
||||
}
|
||||
|
||||
$node = $node->getParent();
|
||||
} while ($node !== null);
|
||||
|
||||
throw new ParentNotFoundException('Could not find an ancestor with "' . $tag . '" tag');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find elements by css selector.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @return mixed|Collection|null
|
||||
*/
|
||||
public function find(string $selectorString, ?int $nth = null, ?SelectorInterface $selector = null)
|
||||
{
|
||||
if (\is_null($selector)) {
|
||||
$selector = new Selector($selectorString);
|
||||
}
|
||||
|
||||
$nodes = $selector->find($this);
|
||||
|
||||
if ($nth !== null) {
|
||||
// return nth-element or array
|
||||
if (isset($nodes[$nth])) {
|
||||
return $nodes[$nth];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
return $nodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find node by id.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws ParentNotFoundException
|
||||
*
|
||||
* @return bool|AbstractNode
|
||||
*/
|
||||
public function findById(int $id)
|
||||
{
|
||||
$finder = new Finder($id);
|
||||
|
||||
return $finder->find($this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the inner html of this node.
|
||||
*/
|
||||
abstract public function innerHtml(): string;
|
||||
|
||||
/**
|
||||
* Gets the html of this node, including it's own
|
||||
* tag.
|
||||
*/
|
||||
abstract public function outerHtml(): string;
|
||||
|
||||
/**
|
||||
* Gets the text of this node (if there is any text).
|
||||
*/
|
||||
abstract public function text(): string;
|
||||
|
||||
/**
|
||||
* Check is node type textNode.
|
||||
*/
|
||||
public function isTextNode(): bool
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this when something in the node tree has changed. Like a child has been added
|
||||
* or a parent has been changed.
|
||||
*/
|
||||
abstract protected function clear(): void;
|
||||
}
|
||||
Vendored
+45
@@ -0,0 +1,45 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use ArrayIterator;
|
||||
use Countable;
|
||||
use IteratorAggregate;
|
||||
use PHPHtmlParser\Dom\Tag;
|
||||
|
||||
/**
|
||||
* Dom node object which will allow users to use it as
|
||||
* an array.
|
||||
*
|
||||
* @property-read string $outerhtml
|
||||
* @property-read string $innerhtml
|
||||
* @property-read string $innerText
|
||||
* @property-read string $text
|
||||
* @property-read Tag $tag
|
||||
* @property-read InnerNode $parent
|
||||
*/
|
||||
abstract class ArrayNode extends AbstractNode implements IteratorAggregate, Countable
|
||||
{
|
||||
/**
|
||||
* Gets the iterator.
|
||||
*/
|
||||
public function getIterator(): ArrayIterator
|
||||
{
|
||||
return new ArrayIterator($this->getIteratorArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the count of the iterator array.
|
||||
*/
|
||||
public function count(): int
|
||||
{
|
||||
return \count($this->getIteratorArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the array to be used the the iterator.
|
||||
*/
|
||||
abstract protected function getIteratorArray(): array;
|
||||
}
|
||||
Vendored
+156
@@ -0,0 +1,156 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use ArrayAccess;
|
||||
use ArrayIterator;
|
||||
use Countable;
|
||||
use IteratorAggregate;
|
||||
use PHPHtmlParser\Exceptions\EmptyCollectionException;
|
||||
|
||||
/**
|
||||
* Class Collection.
|
||||
*/
|
||||
class Collection implements IteratorAggregate, ArrayAccess, Countable
|
||||
{
|
||||
/**
|
||||
* The collection of Nodes.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $collection = [];
|
||||
|
||||
/**
|
||||
* Attempts to call the method on the first node in
|
||||
* the collection.
|
||||
*
|
||||
* @throws EmptyCollectionException
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function __call(string $method, array $arguments)
|
||||
{
|
||||
$node = \reset($this->collection);
|
||||
if ($node instanceof AbstractNode) {
|
||||
return \call_user_func_array([$node, $method], $arguments);
|
||||
}
|
||||
throw new EmptyCollectionException('The collection does not contain any Nodes.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to apply the magic get to the first node
|
||||
* in the collection.
|
||||
*
|
||||
* @param mixed $key
|
||||
*
|
||||
* @throws EmptyCollectionException
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function __get($key)
|
||||
{
|
||||
$node = \reset($this->collection);
|
||||
if ($node instanceof AbstractNode) {
|
||||
return $node->$key;
|
||||
}
|
||||
throw new EmptyCollectionException('The collection does not contain any Nodes.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the magic string method to the first node in
|
||||
* the collection.
|
||||
*/
|
||||
public function __toString(): string
|
||||
{
|
||||
$node = \reset($this->collection);
|
||||
if ($node instanceof AbstractNode) {
|
||||
return (string) $node;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the count of the collection.
|
||||
*/
|
||||
public function count(): int
|
||||
{
|
||||
return \count($this->collection);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator for the collection.
|
||||
*/
|
||||
public function getIterator(): ArrayIterator
|
||||
{
|
||||
return new ArrayIterator($this->collection);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an attribute by the given offset.
|
||||
*
|
||||
* @param mixed $offset
|
||||
* @param mixed $value
|
||||
*/
|
||||
public function offsetSet($offset, $value): void
|
||||
{
|
||||
if (\is_null($offset)) {
|
||||
$this->collection[] = $value;
|
||||
} else {
|
||||
$this->collection[$offset] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an offset exists.
|
||||
*
|
||||
* @param mixed $offset
|
||||
*/
|
||||
public function offsetExists($offset): bool
|
||||
{
|
||||
return isset($this->collection[$offset]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unset a collection Node.
|
||||
*
|
||||
* @param mixed $offset
|
||||
*/
|
||||
public function offsetUnset($offset): void
|
||||
{
|
||||
unset($this->collection[$offset]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node at the given offset, or null.
|
||||
*
|
||||
* @param mixed $offset
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function offsetGet($offset)
|
||||
{
|
||||
return $this->collection[$offset] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this collection as an array.
|
||||
*/
|
||||
public function toArray(): array
|
||||
{
|
||||
return $this->collection;
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to jQuery "each" method. Calls the callback with each
|
||||
* Node in this collection.
|
||||
*/
|
||||
public function each(callable $callback)
|
||||
{
|
||||
foreach ($this->collection as $key => $value) {
|
||||
$callback($value, $key);
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+244
@@ -0,0 +1,244 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use PHPHtmlParser\Dom\Tag;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
|
||||
|
||||
/**
|
||||
* Class HtmlNode.
|
||||
*
|
||||
* @property-read string $outerhtml
|
||||
* @property-read string $innerhtml
|
||||
* @property-read string $innerText
|
||||
* @property-read string $text
|
||||
* @property-read Tag $tag
|
||||
* @property-read InnerNode $parent
|
||||
*/
|
||||
class HtmlNode extends InnerNode
|
||||
{
|
||||
/**
|
||||
* Remembers what the innerHtml was if it was scanned previously.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
protected $innerHtml;
|
||||
|
||||
/**
|
||||
* Remembers what the outerHtml was if it was scanned previously.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
protected $outerHtml;
|
||||
|
||||
/**
|
||||
* Remembers what the innerText was if it was scanned previously.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
protected $innerText;
|
||||
|
||||
/**
|
||||
* Remembers what the text was if it was scanned previously.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
protected $text;
|
||||
|
||||
/**
|
||||
* Remembers what the text was when we looked into all our
|
||||
* children nodes.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
protected $textWithChildren;
|
||||
|
||||
/**
|
||||
* Sets up the tag of this node.
|
||||
*
|
||||
* @param string|Tag $tag
|
||||
*/
|
||||
public function __construct($tag)
|
||||
{
|
||||
if (!$tag instanceof Tag) {
|
||||
$tag = new Tag($tag);
|
||||
}
|
||||
$this->tag = $tag;
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $htmlSpecialCharsDecode
|
||||
*/
|
||||
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
|
||||
{
|
||||
parent::setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
|
||||
$this->tag->setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the inner html of this node.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws UnknownChildTypeException
|
||||
*/
|
||||
public function innerHtml(): string
|
||||
{
|
||||
if (!$this->hasChildren()) {
|
||||
// no children
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($this->innerHtml !== null) {
|
||||
// we already know the result.
|
||||
return $this->innerHtml;
|
||||
}
|
||||
|
||||
$child = $this->firstChild();
|
||||
$string = '';
|
||||
|
||||
// continue to loop until we are out of children
|
||||
while ($child !== null) {
|
||||
if ($child instanceof TextNode) {
|
||||
$string .= $child->text();
|
||||
} elseif ($child instanceof HtmlNode) {
|
||||
$string .= $child->outerHtml();
|
||||
} else {
|
||||
throw new UnknownChildTypeException('Unknown child type "' . \get_class($child) . '" found in node');
|
||||
}
|
||||
|
||||
try {
|
||||
$child = $this->nextChild($child->id());
|
||||
} catch (ChildNotFoundException $e) {
|
||||
// no more children
|
||||
unset($e);
|
||||
$child = null;
|
||||
}
|
||||
}
|
||||
|
||||
// remember the results
|
||||
$this->innerHtml = $string;
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the inner text of this node.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws UnknownChildTypeException
|
||||
*/
|
||||
public function innerText(): string
|
||||
{
|
||||
if (\is_null($this->innerText)) {
|
||||
$this->innerText = \strip_tags($this->innerHtml());
|
||||
}
|
||||
|
||||
return $this->innerText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the html of this node, including it's own
|
||||
* tag.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws UnknownChildTypeException
|
||||
*/
|
||||
public function outerHtml(): string
|
||||
{
|
||||
// special handling for root
|
||||
if ($this->tag->name() == 'root') {
|
||||
return $this->innerHtml();
|
||||
}
|
||||
|
||||
if ($this->outerHtml !== null) {
|
||||
// we already know the results.
|
||||
return $this->outerHtml;
|
||||
}
|
||||
|
||||
$return = $this->tag->makeOpeningTag();
|
||||
if ($this->tag->isSelfClosing()) {
|
||||
// ignore any children... there should not be any though
|
||||
return $return;
|
||||
}
|
||||
|
||||
// get the inner html
|
||||
$return .= $this->innerHtml();
|
||||
|
||||
// add closing tag
|
||||
$return .= $this->tag->makeClosingTag();
|
||||
|
||||
// remember the results
|
||||
$this->outerHtml = $return;
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the text of this node (if there is any text). Or get all the text
|
||||
* in this node, including children.
|
||||
*/
|
||||
public function text(bool $lookInChildren = false): string
|
||||
{
|
||||
if ($lookInChildren) {
|
||||
if ($this->textWithChildren !== null) {
|
||||
// we already know the results.
|
||||
return $this->textWithChildren;
|
||||
}
|
||||
} elseif ($this->text !== null) {
|
||||
// we already know the results.
|
||||
return $this->text;
|
||||
}
|
||||
|
||||
// find out if this node has any text children
|
||||
$text = '';
|
||||
foreach ($this->children as $child) {
|
||||
/** @var AbstractNode $node */
|
||||
$node = $child['node'];
|
||||
if ($node instanceof TextNode) {
|
||||
$text .= $child['node']->text;
|
||||
} elseif (
|
||||
$lookInChildren &&
|
||||
$node instanceof HtmlNode
|
||||
) {
|
||||
$text .= $node->text($lookInChildren);
|
||||
}
|
||||
}
|
||||
|
||||
// remember our result
|
||||
if ($lookInChildren) {
|
||||
$this->textWithChildren = $text;
|
||||
} else {
|
||||
$this->text = $text;
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this when something in the node tree has changed. Like a child has been added
|
||||
* or a parent has been changed.
|
||||
*/
|
||||
protected function clear(): void
|
||||
{
|
||||
$this->innerHtml = null;
|
||||
$this->outerHtml = null;
|
||||
$this->text = null;
|
||||
$this->textWithChildren = null;
|
||||
|
||||
if ($this->parent !== null) {
|
||||
$this->parent->clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of this html node.
|
||||
*/
|
||||
protected function getIteratorArray(): array
|
||||
{
|
||||
return $this->getChildren();
|
||||
}
|
||||
}
|
||||
Vendored
+442
@@ -0,0 +1,442 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use PHPHtmlParser\Dom\Tag;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\CircularException;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
use stringEncode\Encode;
|
||||
|
||||
/**
|
||||
* Inner node of the html tree, might have children.
|
||||
*
|
||||
* @property-read string $outerhtml
|
||||
* @property-read string $innerhtml
|
||||
* @property-read string $innerText
|
||||
* @property-read string $text
|
||||
* @property-read Tag $tag
|
||||
* @property-read InnerNode $parent
|
||||
*/
|
||||
abstract class InnerNode extends ArrayNode
|
||||
{
|
||||
/**
|
||||
* An array of all the children.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $children = [];
|
||||
|
||||
/**
|
||||
* Sets the encoding class to this node and propagates it
|
||||
* to all its children.
|
||||
*/
|
||||
public function propagateEncoding(Encode $encode): void
|
||||
{
|
||||
$this->encode = $encode;
|
||||
$this->tag->setEncoding($encode);
|
||||
// check children
|
||||
foreach ($this->children as $child) {
|
||||
/** @var AbstractNode $node */
|
||||
$node = $child['node'];
|
||||
$node->propagateEncoding($encode);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this node has children.
|
||||
*/
|
||||
public function hasChildren(): bool
|
||||
{
|
||||
return !empty($this->children);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the child by id.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function getChild(int $id): AbstractNode
|
||||
{
|
||||
if (!isset($this->children[$id])) {
|
||||
throw new ChildNotFoundException("Child '$id' not found in this node.");
|
||||
}
|
||||
|
||||
return $this->children[$id]['node'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new array of child nodes.
|
||||
*/
|
||||
public function getChildren(): array
|
||||
{
|
||||
$nodes = [];
|
||||
$childrenIds = [];
|
||||
try {
|
||||
$child = $this->firstChild();
|
||||
do {
|
||||
$nodes[] = $child;
|
||||
$childrenIds[] = $child->id;
|
||||
$child = $this->nextChild($child->id());
|
||||
if (\in_array($child->id, $childrenIds, true)) {
|
||||
throw new CircularException('Circular sibling referance found. Child with id ' . $child->id() . ' found twice.');
|
||||
}
|
||||
} while (true);
|
||||
} catch (ChildNotFoundException $e) {
|
||||
// we are done looking for children
|
||||
unset($e);
|
||||
}
|
||||
|
||||
return $nodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts children.
|
||||
*/
|
||||
public function countChildren(): int
|
||||
{
|
||||
return \count($this->children);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a child node to this node and returns the id of the child for this
|
||||
* parent.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws LogicalException
|
||||
*/
|
||||
public function addChild(AbstractNode $child, int $before = -1): bool
|
||||
{
|
||||
$key = null;
|
||||
|
||||
// check integrity
|
||||
if ($this->isAncestor($child->id())) {
|
||||
throw new CircularException('Can not add child. It is my ancestor.');
|
||||
}
|
||||
|
||||
// check if child is itself
|
||||
if ($child->id() == $this->id) {
|
||||
throw new CircularException('Can not set itself as a child.');
|
||||
}
|
||||
|
||||
$next = null;
|
||||
|
||||
if ($this->hasChildren()) {
|
||||
if (isset($this->children[$child->id()])) {
|
||||
// we already have this child
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($before >= 0) {
|
||||
if (!isset($this->children[$before])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$key = $this->children[$before]['prev'];
|
||||
|
||||
if ($key) {
|
||||
$this->children[$key]['next'] = $child->id();
|
||||
}
|
||||
|
||||
$this->children[$before]['prev'] = $child->id();
|
||||
$next = $before;
|
||||
} else {
|
||||
$sibling = $this->lastChild();
|
||||
$key = $sibling->id();
|
||||
|
||||
$this->children[$key]['next'] = $child->id();
|
||||
}
|
||||
}
|
||||
|
||||
$keys = \array_keys($this->children);
|
||||
|
||||
$insert = [
|
||||
'node' => $child,
|
||||
'next' => $next,
|
||||
'prev' => $key,
|
||||
];
|
||||
|
||||
$index = $key ? (int) (\array_search($key, $keys, true) + 1) : 0;
|
||||
\array_splice($keys, $index, 0, (string) $child->id());
|
||||
|
||||
$children = \array_values($this->children);
|
||||
\array_splice($children, $index, 0, [$insert]);
|
||||
|
||||
// add the child
|
||||
$combination = \array_combine($keys, $children);
|
||||
if ($combination === false) {
|
||||
// The number of elements for each array isn't equal or if the arrays are empty.
|
||||
throw new LogicalException('array combine failed during add child method call.');
|
||||
}
|
||||
$this->children = $combination;
|
||||
|
||||
// tell child I am the new parent
|
||||
$child->setParent($this);
|
||||
|
||||
//clear any cache
|
||||
$this->clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert element before child with provided id.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
*/
|
||||
public function insertBefore(AbstractNode $child, int $id): bool
|
||||
{
|
||||
return $this->addChild($child, $id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert element before after with provided id.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
*/
|
||||
public function insertAfter(AbstractNode $child, int $id): bool
|
||||
{
|
||||
if (!isset($this->children[$id])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isset($this->children[$id]['next']) && \is_int($this->children[$id]['next'])) {
|
||||
return $this->addChild($child, (int) $this->children[$id]['next']);
|
||||
}
|
||||
|
||||
// clear cache
|
||||
$this->clear();
|
||||
|
||||
return $this->addChild($child);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the child by id.
|
||||
*/
|
||||
public function removeChild(int $id): InnerNode
|
||||
{
|
||||
if (!isset($this->children[$id])) {
|
||||
return $this;
|
||||
}
|
||||
|
||||
// handle moving next and previous assignments.
|
||||
$next = $this->children[$id]['next'];
|
||||
$prev = $this->children[$id]['prev'];
|
||||
if (!\is_null($next)) {
|
||||
$this->children[$next]['prev'] = $prev;
|
||||
}
|
||||
if (!\is_null($prev)) {
|
||||
$this->children[$prev]['next'] = $next;
|
||||
}
|
||||
|
||||
// remove the child
|
||||
unset($this->children[$id]);
|
||||
|
||||
//clear any cache
|
||||
$this->clear();
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if has next Child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function hasNextChild(int $id)
|
||||
{
|
||||
$child = $this->getChild($id);
|
||||
|
||||
return $this->children[$child->id()]['next'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get the next child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @uses $this->getChild()
|
||||
*/
|
||||
public function nextChild(int $id): AbstractNode
|
||||
{
|
||||
$child = $this->getChild($id);
|
||||
$next = $this->children[$child->id()]['next'];
|
||||
if (\is_null($next) || !\is_int($next)) {
|
||||
throw new ChildNotFoundException("Child '$id' next sibling not found in this node.");
|
||||
}
|
||||
|
||||
return $this->getChild($next);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get the previous child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @uses $this->getChild()
|
||||
*/
|
||||
public function previousChild(int $id): AbstractNode
|
||||
{
|
||||
$child = $this->getchild($id);
|
||||
$next = $this->children[$child->id()]['prev'];
|
||||
if (\is_null($next) || !\is_int($next)) {
|
||||
throw new ChildNotFoundException("Child '$id' previous not found in this node.");
|
||||
}
|
||||
|
||||
return $this->getChild($next);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given node id is a child of the
|
||||
* current node.
|
||||
*/
|
||||
public function isChild(int $id): bool
|
||||
{
|
||||
foreach (\array_keys($this->children) as $childId) {
|
||||
if ($id == $childId) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the child with id $childId and replace it with the new child
|
||||
* $newChild.
|
||||
*
|
||||
* @throws LogicalException
|
||||
*/
|
||||
public function replaceChild(int $childId, AbstractNode $newChild): void
|
||||
{
|
||||
$oldChild = $this->children[$childId];
|
||||
|
||||
$newChild->prev = (int) $oldChild['prev'];
|
||||
$newChild->next = (int) $oldChild['next'];
|
||||
|
||||
$keys = \array_keys($this->children);
|
||||
$index = \array_search($childId, $keys, true);
|
||||
$keys[$index] = $newChild->id();
|
||||
$combination = \array_combine($keys, $this->children);
|
||||
if ($combination === false) {
|
||||
// The number of elements for each array isn't equal or if the arrays are empty.
|
||||
throw new LogicalException('array combine failed during replace child method call.');
|
||||
}
|
||||
$this->children = $combination;
|
||||
$this->children[$newChild->id()] = [
|
||||
'prev' => $oldChild['prev'],
|
||||
'node' => $newChild,
|
||||
'next' => $oldChild['next'],
|
||||
];
|
||||
|
||||
// change previous child id to new child
|
||||
if ($oldChild['prev'] && isset($this->children[$newChild->prev])) {
|
||||
$this->children[$oldChild['prev']]['next'] = $newChild->id();
|
||||
}
|
||||
|
||||
// change next child id to new child
|
||||
if ($oldChild['next'] && isset($this->children[$newChild->next])) {
|
||||
$this->children[$oldChild['next']]['prev'] = $newChild->id();
|
||||
}
|
||||
|
||||
// remove old child
|
||||
unset($this->children[$childId]);
|
||||
|
||||
// clean out cache
|
||||
$this->clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortcut to return the first child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @uses $this->getChild()
|
||||
*/
|
||||
public function firstChild(): AbstractNode
|
||||
{
|
||||
if (\count($this->children) == 0) {
|
||||
// no children
|
||||
throw new ChildNotFoundException('No children found in node.');
|
||||
}
|
||||
|
||||
\reset($this->children);
|
||||
$key = (int) \key($this->children);
|
||||
|
||||
return $this->getChild($key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get the last child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*
|
||||
* @uses $this->getChild()
|
||||
*/
|
||||
public function lastChild(): AbstractNode
|
||||
{
|
||||
if (\count($this->children) == 0) {
|
||||
// no children
|
||||
throw new ChildNotFoundException('No children found in node.');
|
||||
}
|
||||
|
||||
\end($this->children);
|
||||
$key = \key($this->children);
|
||||
|
||||
if (!\is_int($key)) {
|
||||
throw new LogicalException('Children array contain child with a key that is not an int.');
|
||||
}
|
||||
|
||||
return $this->getChild($key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given node id is a descendant of the
|
||||
* current node.
|
||||
*/
|
||||
public function isDescendant(int $id): bool
|
||||
{
|
||||
if ($this->isChild($id)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach ($this->children as $child) {
|
||||
/** @var InnerNode $node */
|
||||
$node = $child['node'];
|
||||
if ($node instanceof InnerNode
|
||||
&& $node->hasChildren()
|
||||
&& $node->isDescendant($id)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the parent node.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
*/
|
||||
public function setParent(InnerNode $parent): AbstractNode
|
||||
{
|
||||
// check integrity
|
||||
if ($this->isDescendant($parent->id())) {
|
||||
throw new CircularException('Can not add descendant "' . $parent->id() . '" as my parent.');
|
||||
}
|
||||
|
||||
// clear cache
|
||||
$this->clear();
|
||||
|
||||
return parent::setParent($parent);
|
||||
}
|
||||
}
|
||||
Vendored
+21
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use PHPHtmlParser\Dom\Tag;
|
||||
|
||||
/**
|
||||
* Class LeafNode.
|
||||
*
|
||||
* @property-read string $outerhtml
|
||||
* @property-read string $innerhtml
|
||||
* @property-read string $innerText
|
||||
* @property-read string $text
|
||||
* @property-read Tag $tag
|
||||
* @property-read InnerNode $parent
|
||||
*/
|
||||
abstract class LeafNode extends AbstractNode
|
||||
{
|
||||
}
|
||||
Vendored
+155
@@ -0,0 +1,155 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom\Node;
|
||||
|
||||
use PHPHtmlParser\Dom\Tag;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
|
||||
/**
|
||||
* Class TextNode.
|
||||
*
|
||||
* @property-read string $outerhtml
|
||||
* @property-read string $innerhtml
|
||||
* @property-read string $innerText
|
||||
* @property-read string $text
|
||||
* @property-read Tag $tag
|
||||
* @property-read InnerNode $parent
|
||||
*/
|
||||
class TextNode extends LeafNode
|
||||
{
|
||||
/**
|
||||
* This is a text node.
|
||||
*
|
||||
* @var Tag
|
||||
*/
|
||||
protected $tag;
|
||||
|
||||
/**
|
||||
* This is the text in this node.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $text;
|
||||
|
||||
/**
|
||||
* This is the converted version of the text.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
protected $convertedText;
|
||||
|
||||
/**
|
||||
* Sets the text for this node.
|
||||
*
|
||||
* @param bool $removeDoubleSpace
|
||||
*/
|
||||
public function __construct(string $text, $removeDoubleSpace = true)
|
||||
{
|
||||
if ($removeDoubleSpace) {
|
||||
// remove double spaces
|
||||
$replacedText = \mb_ereg_replace('\s+', ' ', $text);
|
||||
if ($replacedText === false) {
|
||||
throw new LogicalException('mb_ereg_replace returns false when attempting to clean white space from "' . $text . '".');
|
||||
}
|
||||
$text = $replacedText;
|
||||
}
|
||||
|
||||
// restore line breaks
|
||||
$text = \str_replace(' ', "\n", $text);
|
||||
|
||||
$this->text = $text;
|
||||
$this->tag = new Tag('text');
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $htmlSpecialCharsDecode
|
||||
*/
|
||||
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
|
||||
{
|
||||
parent::setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
|
||||
$this->tag->setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the text of this node.
|
||||
*/
|
||||
public function text(): string
|
||||
{
|
||||
if ($this->htmlSpecialCharsDecode) {
|
||||
$text = \htmlspecialchars_decode($this->text);
|
||||
} else {
|
||||
$text = $this->text;
|
||||
}
|
||||
// convert charset
|
||||
if (!\is_null($this->encode)) {
|
||||
if (!\is_null($this->convertedText)) {
|
||||
// we already know the converted value
|
||||
return $this->convertedText;
|
||||
}
|
||||
$text = $this->encode->convert($text);
|
||||
|
||||
// remember the conversion
|
||||
$this->convertedText = $text;
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text for this node.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public function setText(string $text): void
|
||||
{
|
||||
$this->text = $text;
|
||||
if (!\is_null($this->encode)) {
|
||||
$text = $this->encode->convert($text);
|
||||
|
||||
// remember the conversion
|
||||
$this->convertedText = $text;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This node has no html, just return the text.
|
||||
*
|
||||
* @uses $this->text()
|
||||
*/
|
||||
public function innerHtml(): string
|
||||
{
|
||||
return $this->text();
|
||||
}
|
||||
|
||||
/**
|
||||
* This node has no html, just return the text.
|
||||
*
|
||||
* @uses $this->text()
|
||||
*/
|
||||
public function outerHtml(): string
|
||||
{
|
||||
return $this->text();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current node is a text node.
|
||||
*/
|
||||
public function isTextNode(): bool
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this when something in the node tree has changed. Like a child has been added
|
||||
* or a parent has been changed.
|
||||
*/
|
||||
protected function clear(): void
|
||||
{
|
||||
$this->convertedText = null;
|
||||
}
|
||||
}
|
||||
+348
@@ -0,0 +1,348 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom;
|
||||
|
||||
use PHPHtmlParser\Content;
|
||||
use PHPHtmlParser\Contracts\Dom\ParserInterface;
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Dom\Node\HtmlNode;
|
||||
use PHPHtmlParser\Dom\Node\TextNode;
|
||||
use PHPHtmlParser\DTO\TagDTO;
|
||||
use PHPHtmlParser\Enum\StringToken;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\CircularException;
|
||||
use PHPHtmlParser\Exceptions\ContentLengthException;
|
||||
use PHPHtmlParser\Exceptions\LogicalException;
|
||||
use PHPHtmlParser\Exceptions\StrictException;
|
||||
use PHPHtmlParser\Options;
|
||||
use stringEncode\Encode;
|
||||
|
||||
class Parser implements ParserInterface
|
||||
{
|
||||
/**
|
||||
* Attempts to parse the html in content.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
*/
|
||||
public function parse(Options $options, Content $content, int $size): AbstractNode
|
||||
{
|
||||
// add the root node
|
||||
$root = new HtmlNode('root');
|
||||
$root->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode());
|
||||
$activeNode = $root;
|
||||
while ($activeNode !== null) {
|
||||
if ($activeNode && $activeNode->tag->name() === 'script'
|
||||
&& $options->isCleanupInput() !== true
|
||||
) {
|
||||
$str = $content->copyUntil('</');
|
||||
} else {
|
||||
$str = $content->copyUntil('<');
|
||||
}
|
||||
if ($str == '') {
|
||||
$tagDTO = $this->parseTag($options, $content, $size);
|
||||
if (!$tagDTO->isStatus()) {
|
||||
// we are done here
|
||||
$activeNode = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if it was a closing tag
|
||||
if ($tagDTO->isClosing()) {
|
||||
$foundOpeningTag = true;
|
||||
$originalNode = $activeNode;
|
||||
while ($activeNode->getTag()->name() != $tagDTO->getTag()) {
|
||||
$activeNode = $activeNode->getParent();
|
||||
if ($activeNode === null) {
|
||||
// we could not find opening tag
|
||||
$activeNode = $originalNode;
|
||||
$foundOpeningTag = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($foundOpeningTag) {
|
||||
$activeNode = $activeNode->getParent();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($tagDTO->getNode() === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/** @var AbstractNode $node */
|
||||
$node = $tagDTO->getNode();
|
||||
$activeNode->addChild($node);
|
||||
|
||||
// check if node is self closing
|
||||
if (!$node->getTag()->isSelfClosing()) {
|
||||
$activeNode = $node;
|
||||
}
|
||||
} elseif ($options->isWhitespaceTextNode() ||
|
||||
\trim($str) != ''
|
||||
) {
|
||||
// we found text we care about
|
||||
$textNode = new TextNode($str, $options->isRemoveDoubleSpace());
|
||||
$textNode->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode());
|
||||
$activeNode->addChild($textNode);
|
||||
}
|
||||
}
|
||||
|
||||
return $root;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to detect the charset that the html was sent in.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function detectCharset(Options $options, string $defaultCharset, AbstractNode $root): bool
|
||||
{
|
||||
// set the default
|
||||
$encode = new Encode();
|
||||
$encode->from($defaultCharset);
|
||||
$encode->to($defaultCharset);
|
||||
|
||||
$enforceEncoding = $options->getEnforceEncoding();
|
||||
if ($enforceEncoding !== null) {
|
||||
// they want to enforce the given encoding
|
||||
$encode->from($enforceEncoding);
|
||||
$encode->to($enforceEncoding);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @var AbstractNode $meta */
|
||||
$meta = $root->find('meta[http-equiv=Content-Type]', 0);
|
||||
if ($meta == null) {
|
||||
if (!$this->detectHTML5Charset($encode, $root)) {
|
||||
// could not find meta tag
|
||||
$root->propagateEncoding($encode);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
$content = $meta->getAttribute('content');
|
||||
if (\is_null($content)) {
|
||||
// could not find content
|
||||
$root->propagateEncoding($encode);
|
||||
|
||||
return false;
|
||||
}
|
||||
$matches = [];
|
||||
if (\preg_match('/charset=([^;]+)/', $content, $matches)) {
|
||||
$encode->from(\trim($matches[1]));
|
||||
$root->propagateEncoding($encode);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// no charset found
|
||||
$root->propagateEncoding($encode);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to parse a tag out of the content.
|
||||
*
|
||||
* @throws StrictException
|
||||
* @throws ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
*/
|
||||
private function parseTag(Options $options, Content $content, int $size): TagDTO
|
||||
{
|
||||
if ($content->char() != '<') {
|
||||
// we are not at the beginning of a tag
|
||||
return TagDTO::makeFromPrimitives();
|
||||
}
|
||||
|
||||
// check if this is a closing tag
|
||||
try {
|
||||
$content->fastForward(1);
|
||||
} catch (ContentLengthException $exception) {
|
||||
// we are at the end of the file
|
||||
return TagDTO::makeFromPrimitives();
|
||||
}
|
||||
if ($content->char() == '/') {
|
||||
return $this->makeEndTag($content, $options);
|
||||
}
|
||||
if ($content->char() == '?') {
|
||||
// special setting tag
|
||||
$tag = $content->fastForward(1)
|
||||
->copyByToken(StringToken::SLASH(), true);
|
||||
$tag = (new Tag($tag))
|
||||
->setOpening('<?')
|
||||
->setClosing(' ?>')
|
||||
->selfClosing();
|
||||
} elseif($content->string(3) == '!--') {
|
||||
// comment tag
|
||||
$tag = $content->fastForward(3)
|
||||
->copyByToken(StringToken::CLOSECOMMENT(), true);
|
||||
$tag = (new Tag($tag))
|
||||
->setOpening('<!--')
|
||||
->setClosing('-->')
|
||||
->selfClosing();
|
||||
} else {
|
||||
$tag = \strtolower($content->copyByToken(StringToken::SLASH(), true));
|
||||
if (\trim($tag) == '') {
|
||||
// no tag found, invalid < found
|
||||
return TagDTO::makeFromPrimitives();
|
||||
}
|
||||
}
|
||||
$node = new HtmlNode($tag);
|
||||
$node->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode());
|
||||
$this->setUpAttributes($content, $size, $node, $options, $tag);
|
||||
|
||||
$content->skipByToken(StringToken::BLANK());
|
||||
if ($content->char() == '/') {
|
||||
// self closing tag
|
||||
$node->getTag()->selfClosing();
|
||||
$content->fastForward(1);
|
||||
} elseif (\in_array($node->getTag()->name(), $options->getSelfClosing(), true)) {
|
||||
// Should be a self closing tag, check if we are strict
|
||||
if ($options->isStrict()) {
|
||||
$character = $content->getPosition();
|
||||
throw new StrictException("Tag '" . $node->getTag()->name() . "' is not self closing! (character #$character)");
|
||||
}
|
||||
|
||||
// We force self closing on this tag.
|
||||
$node->getTag()->selfClosing();
|
||||
|
||||
// Should this tag use a trailing slash?
|
||||
if (\in_array($node->getTag()->name(), $options->getNoSlash(), true)) {
|
||||
$node->getTag()->noTrailingSlash();
|
||||
}
|
||||
}
|
||||
|
||||
if ($content->canFastForward(1)) {
|
||||
$content->fastForward(1);
|
||||
}
|
||||
|
||||
return TagDTO::makeFromPrimitives(true, false, $node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
private function detectHTML5Charset(Encode $encode, AbstractNode $root): bool
|
||||
{
|
||||
/** @var AbstractNode|null $meta */
|
||||
$meta = $root->find('meta[charset]', 0);
|
||||
if ($meta == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$encode->from(\trim($meta->getAttribute('charset')));
|
||||
$root->propagateEncoding($encode);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws ContentLengthException
|
||||
* @throws LogicalException
|
||||
*/
|
||||
private function makeEndTag(Content $content, Options $options): TagDTO
|
||||
{
|
||||
$tag = $content->fastForward(1)
|
||||
->copyByToken(StringToken::SLASH(), true);
|
||||
// move to end of tag
|
||||
$content->copyUntil('>');
|
||||
$content->fastForward(1);
|
||||
|
||||
// check if this closing tag counts
|
||||
$tag = \strtolower($tag);
|
||||
if (\in_array($tag, $options->getSelfClosing(), true)) {
|
||||
return TagDTO::makeFromPrimitives(true);
|
||||
}
|
||||
|
||||
return TagDTO::makeFromPrimitives(true, true, null, \strtolower($tag));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string|Tag $tag
|
||||
*
|
||||
* @throws ContentLengthException
|
||||
* @throws LogicalException
|
||||
* @throws StrictException
|
||||
*/
|
||||
private function setUpAttributes(Content $content, int $size, HtmlNode $node, Options $options, $tag): void
|
||||
{
|
||||
while (
|
||||
$content->char() != '>' &&
|
||||
$content->char() != '/'
|
||||
) {
|
||||
$space = $content->skipByToken(StringToken::BLANK(), true);
|
||||
if (empty($space)) {
|
||||
try {
|
||||
$content->fastForward(1);
|
||||
} catch (ContentLengthException $exception) {
|
||||
// reached the end of the content
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
$name = $content->copyByToken(StringToken::EQUAL(), true);
|
||||
if ($name == '/') {
|
||||
break;
|
||||
}
|
||||
|
||||
if (empty($name)) {
|
||||
$content->skipByToken(StringToken::BLANK());
|
||||
continue;
|
||||
}
|
||||
|
||||
$content->skipByToken(StringToken::BLANK());
|
||||
if ($content->char() == '=') {
|
||||
$content->fastForward(1)
|
||||
->skipByToken(StringToken::BLANK());
|
||||
switch ($content->char()) {
|
||||
case '"':
|
||||
$content->fastForward(1);
|
||||
$string = $content->copyUntil('"', true);
|
||||
do {
|
||||
$moreString = $content->copyUntilUnless('"', '=>');
|
||||
$string .= $moreString;
|
||||
} while (\strlen($moreString) > 0 && $content->getPosition() < $size);
|
||||
$content->fastForward(1);
|
||||
$node->getTag()->setAttribute($name, $string);
|
||||
break;
|
||||
case "'":
|
||||
$content->fastForward(1);
|
||||
$string = $content->copyUntil("'", true);
|
||||
do {
|
||||
$moreString = $content->copyUntilUnless("'", '=>');
|
||||
$string .= $moreString;
|
||||
} while (\strlen($moreString) > 0 && $content->getPosition() < $size);
|
||||
$content->fastForward(1);
|
||||
$node->getTag()->setAttribute($name, $string, false);
|
||||
break;
|
||||
default:
|
||||
$node->getTag()->setAttribute($name, $content->copyByToken(StringToken::ATTR(), true));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// no value attribute
|
||||
if ($options->isStrict()) {
|
||||
// can't have this in strict html
|
||||
$character = $content->getPosition();
|
||||
throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
|
||||
}
|
||||
$node->getTag()->setAttribute($name, null);
|
||||
if ($content->char() != '>') {
|
||||
$content->rewind(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+100
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom;
|
||||
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Dom\Node\HtmlNode;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\NotLoadedException;
|
||||
|
||||
trait RootAccessTrait
|
||||
{
|
||||
/**
|
||||
* Contains the root node of this dom tree.
|
||||
*
|
||||
* @var HtmlNode
|
||||
*/
|
||||
public $root;
|
||||
|
||||
/**
|
||||
* A simple wrapper around the root node.
|
||||
*
|
||||
* @param string $name
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function __get($name)
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->$name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper function that returns the first child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
public function firstChild(): AbstractNode
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->firstChild();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper function that returns the last child.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
public function lastChild(): AbstractNode
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->lastChild();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper function that returns count of child elements.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
public function countChildren(): int
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->countChildren();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get array of children.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
public function getChildren(): array
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->getChildren();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if node have children nodes.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
*/
|
||||
public function hasChildren(): bool
|
||||
{
|
||||
$this->isLoaded();
|
||||
|
||||
return $this->root->hasChildren();
|
||||
}
|
||||
|
||||
abstract public function isLoaded(): void;
|
||||
}
|
||||
+365
@@ -0,0 +1,365 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Dom;
|
||||
|
||||
use PHPHtmlParser\DTO\Tag\AttributeDTO;
|
||||
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
|
||||
use stringEncode\Encode;
|
||||
|
||||
/**
|
||||
* Class Tag.
|
||||
*/
|
||||
class Tag
|
||||
{
|
||||
/**
|
||||
* The name of the tag.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $name;
|
||||
|
||||
/**
|
||||
* The attributes of the tag.
|
||||
*
|
||||
* @var AttributeDTO[]
|
||||
*/
|
||||
protected $attr = [];
|
||||
|
||||
/**
|
||||
* Is this tag self closing.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $selfClosing = false;
|
||||
|
||||
/**
|
||||
* If self-closing, will this use a trailing slash. />.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $trailingSlash = true;
|
||||
|
||||
/**
|
||||
* Tag noise.
|
||||
*/
|
||||
protected $noise = '';
|
||||
|
||||
/**
|
||||
* The encoding class to... encode the tags.
|
||||
*
|
||||
* @var Encode|null
|
||||
*/
|
||||
protected $encode;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $HtmlSpecialCharsDecode = false;
|
||||
|
||||
/**
|
||||
* What the opening of this tag will be.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $opening = '<';
|
||||
|
||||
/**
|
||||
* What the closing tag for self-closing elements should be.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $closing = ' />';
|
||||
|
||||
/**
|
||||
* Sets up the tag with a name.
|
||||
*
|
||||
* @param $name
|
||||
*/
|
||||
public function __construct(string $name)
|
||||
{
|
||||
$this->name = $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of this tag.
|
||||
*/
|
||||
public function name(): string
|
||||
{
|
||||
return $this->name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the tag to be self closing.
|
||||
*/
|
||||
public function selfClosing(): Tag
|
||||
{
|
||||
$this->selfClosing = true;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function setOpening(string $opening): Tag
|
||||
{
|
||||
$this->opening = $opening;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function setClosing(string $closing): Tag
|
||||
{
|
||||
$this->closing = $closing;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the tag to not use a trailing slash.
|
||||
*/
|
||||
public function noTrailingSlash(): Tag
|
||||
{
|
||||
$this->trailingSlash = false;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the tag is self closing.
|
||||
*/
|
||||
public function isSelfClosing(): bool
|
||||
{
|
||||
return $this->selfClosing;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the encoding type to be used.
|
||||
*/
|
||||
public function setEncoding(Encode $encode): void
|
||||
{
|
||||
$this->encode = $encode;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $htmlSpecialCharsDecode
|
||||
*/
|
||||
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
|
||||
{
|
||||
$this->HtmlSpecialCharsDecode = $htmlSpecialCharsDecode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the noise for this tag (if any).
|
||||
*/
|
||||
public function noise(string $noise): Tag
|
||||
{
|
||||
$this->noise = $noise;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an attribute for this tag.
|
||||
*/
|
||||
public function setAttribute(string $key, ?string $attributeValue, bool $doubleQuote = true): Tag
|
||||
{
|
||||
$attributeDTO = AttributeDTO::makeFromPrimitives(
|
||||
$attributeValue,
|
||||
$doubleQuote
|
||||
);
|
||||
if ($this->HtmlSpecialCharsDecode) {
|
||||
$attributeDTO->htmlspecialcharsDecode();
|
||||
}
|
||||
$this->attr[\strtolower($key)] = $attributeDTO;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set inline style attribute value.
|
||||
*
|
||||
* @param mixed $attr_key
|
||||
* @param mixed $attr_value
|
||||
*/
|
||||
public function setStyleAttributeValue($attr_key, $attr_value): void
|
||||
{
|
||||
$style_array = $this->getStyleAttributeArray();
|
||||
$style_array[$attr_key] = $attr_value;
|
||||
|
||||
$style_string = '';
|
||||
foreach ($style_array as $key => $value) {
|
||||
$style_string .= $key . ':' . $value . ';';
|
||||
}
|
||||
|
||||
$this->setAttribute('style', $style_string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get style attribute in array.
|
||||
*/
|
||||
public function getStyleAttributeArray(): array
|
||||
{
|
||||
try {
|
||||
$value = $this->getAttribute('style')->getValue();
|
||||
if (\is_null($value)) {
|
||||
return [];
|
||||
}
|
||||
$value = \explode(';', \substr(\trim($value), 0, -1));
|
||||
$result = [];
|
||||
foreach ($value as $attr) {
|
||||
$attr = \explode(':', $attr);
|
||||
$result[$attr[0]] = $attr[1];
|
||||
}
|
||||
|
||||
return $result;
|
||||
} catch (AttributeNotFoundException $e) {
|
||||
unset($e);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes an attribute from this tag.
|
||||
*
|
||||
* @param mixed $key
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function removeAttribute($key)
|
||||
{
|
||||
$key = \strtolower($key);
|
||||
unset($this->attr[$key]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all attributes on this tag.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function removeAllAttributes()
|
||||
{
|
||||
$this->attr = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the attributes for this tag.
|
||||
*
|
||||
* @return $this
|
||||
*/
|
||||
public function setAttributes(array $attr)
|
||||
{
|
||||
foreach ($attr as $key => $info) {
|
||||
if (\is_array($info)) {
|
||||
$this->setAttribute($key, $info['value'], $info['doubleQuote']);
|
||||
} else {
|
||||
$this->setAttribute($key, $info);
|
||||
}
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all attributes of this tag.
|
||||
*
|
||||
* @throws \stringEncode\Exception
|
||||
*
|
||||
* @return AttributeDTO[]
|
||||
*/
|
||||
public function getAttributes(): array
|
||||
{
|
||||
$return = [];
|
||||
foreach (\array_keys($this->attr) as $attr) {
|
||||
try {
|
||||
$return[$attr] = $this->getAttribute($attr);
|
||||
} catch (AttributeNotFoundException $e) {
|
||||
// attribute that was in the array was not found in the array....
|
||||
unset($e);
|
||||
}
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an attribute by the key.
|
||||
*
|
||||
* @throws AttributeNotFoundException
|
||||
* @throws \stringEncode\Exception
|
||||
*/
|
||||
public function getAttribute(string $key): AttributeDTO
|
||||
{
|
||||
$key = \strtolower($key);
|
||||
if (!isset($this->attr[$key])) {
|
||||
throw new AttributeNotFoundException('Attribute with key "' . $key . '" not found.');
|
||||
}
|
||||
$attributeDTO = $this->attr[$key];
|
||||
if (!\is_null($this->encode)) {
|
||||
// convert charset
|
||||
$attributeDTO->encodeValue($this->encode);
|
||||
}
|
||||
|
||||
return $attributeDTO;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns TRUE if node has attribute.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasAttribute(string $key)
|
||||
{
|
||||
return isset($this->attr[$key]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the opening tag for this object.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function makeOpeningTag()
|
||||
{
|
||||
$return = $this->opening . $this->name;
|
||||
|
||||
// add the attributes
|
||||
foreach (\array_keys($this->attr) as $key) {
|
||||
try {
|
||||
$attributeDTO = $this->getAttribute($key);
|
||||
} catch (AttributeNotFoundException $e) {
|
||||
// attribute that was in the array not found in the array... let's continue.
|
||||
continue;
|
||||
} catch (\TypeError $e) {
|
||||
$val = null;
|
||||
}
|
||||
$val = $attributeDTO->getValue();
|
||||
if (\is_null($val)) {
|
||||
$return .= ' ' . $key;
|
||||
} elseif ($attributeDTO->isDoubleQuote()) {
|
||||
$return .= ' ' . $key . '="' . $val . '"';
|
||||
} else {
|
||||
$return .= ' ' . $key . '=\'' . $val . '\'';
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->selfClosing && $this->trailingSlash) {
|
||||
return $return . $this->closing;
|
||||
}
|
||||
|
||||
return $return . '>';
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the closing tag for this object.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function makeClosingTag()
|
||||
{
|
||||
if ($this->selfClosing) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return '</' . $this->name . '>';
|
||||
}
|
||||
}
|
||||
Vendored
+23
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Enum;
|
||||
|
||||
use MyCLabs\Enum\Enum;
|
||||
|
||||
/**
|
||||
* @method static StringToken BLANK()
|
||||
* @method static StringToken EQUAL()
|
||||
* @method static StringToken SLASH()
|
||||
* @method static StringToken ATTR()
|
||||
* @method static StringToken CLOSECOMMENT()
|
||||
*/
|
||||
class StringToken extends Enum
|
||||
{
|
||||
private const BLANK = " \t\r\n";
|
||||
private const EQUAL = ' =/>';
|
||||
private const SLASH = " />\r\n\t";
|
||||
private const ATTR = ' >';
|
||||
private const CLOSECOMMENT = '-->';
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class ChildNotFoundException.
|
||||
*/
|
||||
final class ChildNotFoundException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class CircularException.
|
||||
*/
|
||||
final class CircularException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class EmptyCollectionException.
|
||||
*/
|
||||
final class ContentLengthException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class CurlException.
|
||||
*/
|
||||
class CurlException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class EmptyCollectionException.
|
||||
*/
|
||||
final class EmptyCollectionException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class EmptyCollectionException.
|
||||
*/
|
||||
final class LogicalException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class NotLoadedException.
|
||||
*/
|
||||
final class NotLoadedException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class ParentNotFoundException.
|
||||
*/
|
||||
final class ParentNotFoundException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class StrictException.
|
||||
*/
|
||||
final class StrictException extends Exception
|
||||
{
|
||||
}
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions\Tag;
|
||||
|
||||
/**
|
||||
* Class AttributeNotFoundException.
|
||||
*/
|
||||
class AttributeNotFoundException extends \Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class UnknownChildTypeException.
|
||||
*/
|
||||
final class UnknownChildTypeException extends Exception
|
||||
{
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Exceptions;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Class UnknownOptionException.
|
||||
*/
|
||||
final class UnknownOptionException extends Exception
|
||||
{
|
||||
}
|
||||
+64
@@ -0,0 +1,64 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser;
|
||||
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Dom\Node\InnerNode;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\ParentNotFoundException;
|
||||
|
||||
class Finder
|
||||
{
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $id;
|
||||
|
||||
/**
|
||||
* Finder constructor.
|
||||
*
|
||||
* @param $id
|
||||
*/
|
||||
public function __construct($id)
|
||||
{
|
||||
$this->id = $id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find node in tree by id.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws ParentNotFoundException
|
||||
*
|
||||
* @return bool|AbstractNode
|
||||
*/
|
||||
public function find(AbstractNode $node)
|
||||
{
|
||||
if (!$node->id() && $node instanceof InnerNode) {
|
||||
return $this->find($node->firstChild());
|
||||
}
|
||||
|
||||
if ($node->id() == $this->id) {
|
||||
return $node;
|
||||
}
|
||||
|
||||
if ($node->hasNextSibling()) {
|
||||
$nextSibling = $node->nextSibling();
|
||||
if ($nextSibling->id() == $this->id) {
|
||||
return $nextSibling;
|
||||
}
|
||||
if ($nextSibling->id() > $this->id && $node instanceof InnerNode) {
|
||||
return $this->find($node->firstChild());
|
||||
}
|
||||
if ($nextSibling->id() < $this->id) {
|
||||
return $this->find($nextSibling);
|
||||
}
|
||||
} elseif (!$node->isTextNode() && $node instanceof InnerNode) {
|
||||
return $this->find($node->firstChild());
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
+367
@@ -0,0 +1,367 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser;
|
||||
|
||||
class Options
|
||||
{
|
||||
/**
|
||||
* The whitespaceTextNode, by default true, option tells the parser to save textnodes even if the content of the
|
||||
* node is empty (only whitespace). Setting it to false will ignore all whitespace only text node found in the document.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $whitespaceTextNode = true;
|
||||
|
||||
/**
|
||||
* Strict, by default false, will throw a StrictException if it finds that the html is not strictly compliant
|
||||
* (all tags must have a closing tag, no attribute with out a value, etc.).
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $strict = false;
|
||||
|
||||
/**
|
||||
* The enforceEncoding, by default null, option will enforce an character set to be used for reading the content
|
||||
* and returning the content in that encoding. Setting it to null will trigger an attempt to figure out
|
||||
* the encoding from within the content of the string given instead.
|
||||
*
|
||||
* @var ?string
|
||||
*/
|
||||
private $enforceEncoding;
|
||||
|
||||
/**
|
||||
* Set this to false to skip the entire clean up phase of the parser. Defaults to true.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $cleanupInput = true;
|
||||
|
||||
/**
|
||||
* Set this to false to skip removing the script tags from the document body. This might have adverse effects.
|
||||
* Defaults to true.
|
||||
*
|
||||
* NOTE: Ignored if cleanupInit is true.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $removeScripts = true;
|
||||
|
||||
/**
|
||||
* Set this to false to skip removing of style tags from the document body. This might have adverse effects. Defaults to true.
|
||||
*
|
||||
* NOTE: Ignored if cleanupInit is true.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $removeStyles = true;
|
||||
|
||||
/**
|
||||
* Preserves Line Breaks if set to true. If set to false line breaks are cleaned up
|
||||
* as part of the input clean up process. Defaults to false.
|
||||
*
|
||||
* NOTE: Ignored if cleanupInit is true.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $preserveLineBreaks = false;
|
||||
|
||||
/**
|
||||
* Set this to false if you want to preserve whitespace inside of text nodes. It is set to true by default.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $removeDoubleSpace = true;
|
||||
|
||||
/**
|
||||
* Set this to false if you want to preserve smarty script found in the html content. It is set to true by default.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $removeSmartyScripts = true;
|
||||
|
||||
/**
|
||||
* By default this is set to false. Setting this to true will apply the php function htmlspecialchars_decode too all attribute values and text nodes.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $htmlSpecialCharsDecode = false;
|
||||
|
||||
/**
|
||||
* A list of tags which will always be self closing.
|
||||
*
|
||||
* @var string[]
|
||||
*/
|
||||
private $selfClosing = [
|
||||
'area',
|
||||
'base',
|
||||
'basefont',
|
||||
'br',
|
||||
'col',
|
||||
'embed',
|
||||
'hr',
|
||||
'img',
|
||||
'input',
|
||||
'keygen',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
'source',
|
||||
'spacer',
|
||||
'track',
|
||||
'wbr',
|
||||
];
|
||||
|
||||
/**
|
||||
* A list of tags where there should be no /> at the end (html5 style).
|
||||
*
|
||||
* @var string[]
|
||||
*/
|
||||
private $noSlash = [];
|
||||
|
||||
public function isWhitespaceTextNode(): bool
|
||||
{
|
||||
return $this->whitespaceTextNode;
|
||||
}
|
||||
|
||||
public function setWhitespaceTextNode(bool $whitespaceTextNode): Options
|
||||
{
|
||||
$this->whitespaceTextNode = $whitespaceTextNode;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isStrict(): bool
|
||||
{
|
||||
return $this->strict;
|
||||
}
|
||||
|
||||
public function setStrict(bool $strict): Options
|
||||
{
|
||||
$this->strict = $strict;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function getEnforceEncoding(): ?string
|
||||
{
|
||||
return $this->enforceEncoding;
|
||||
}
|
||||
|
||||
public function setEnforceEncoding(?string $enforceEncoding): Options
|
||||
{
|
||||
$this->enforceEncoding = $enforceEncoding;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isCleanupInput(): bool
|
||||
{
|
||||
return $this->cleanupInput;
|
||||
}
|
||||
|
||||
public function setCleanupInput(bool $cleanupInput): Options
|
||||
{
|
||||
$this->cleanupInput = $cleanupInput;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isRemoveScripts(): bool
|
||||
{
|
||||
return $this->removeScripts;
|
||||
}
|
||||
|
||||
public function setRemoveScripts(bool $removeScripts): Options
|
||||
{
|
||||
$this->removeScripts = $removeScripts;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isRemoveStyles(): bool
|
||||
{
|
||||
return $this->removeStyles;
|
||||
}
|
||||
|
||||
public function setRemoveStyles(bool $removeStyles): Options
|
||||
{
|
||||
$this->removeStyles = $removeStyles;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isPreserveLineBreaks(): bool
|
||||
{
|
||||
return $this->preserveLineBreaks;
|
||||
}
|
||||
|
||||
public function setPreserveLineBreaks(bool $preserveLineBreaks): Options
|
||||
{
|
||||
$this->preserveLineBreaks = $preserveLineBreaks;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isRemoveDoubleSpace(): bool
|
||||
{
|
||||
return $this->removeDoubleSpace;
|
||||
}
|
||||
|
||||
public function setRemoveDoubleSpace(bool $removeDoubleSpace): Options
|
||||
{
|
||||
$this->removeDoubleSpace = $removeDoubleSpace;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isRemoveSmartyScripts(): bool
|
||||
{
|
||||
return $this->removeSmartyScripts;
|
||||
}
|
||||
|
||||
public function setRemoveSmartyScripts(bool $removeSmartyScripts): Options
|
||||
{
|
||||
$this->removeSmartyScripts = $removeSmartyScripts;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function isHtmlSpecialCharsDecode(): bool
|
||||
{
|
||||
return $this->htmlSpecialCharsDecode;
|
||||
}
|
||||
|
||||
public function setHtmlSpecialCharsDecode(bool $htmlSpecialCharsDecode): Options
|
||||
{
|
||||
$this->htmlSpecialCharsDecode = $htmlSpecialCharsDecode;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSelfClosing(): array
|
||||
{
|
||||
return $this->selfClosing;
|
||||
}
|
||||
|
||||
public function setSelfClosing(array $selfClosing): Options
|
||||
{
|
||||
$this->selfClosing = $selfClosing;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the tag to the list of tags that will always be self closing.
|
||||
*/
|
||||
public function addSelfClosingTag(string $tag): Options
|
||||
{
|
||||
$this->selfClosing[] = $tag;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the tags to the list of tags that will always be self closing.
|
||||
*
|
||||
* @param string[] $tags
|
||||
*/
|
||||
public function addSelfClosingTags(array $tags): Options
|
||||
{
|
||||
foreach ($tags as $tag) {
|
||||
$this->selfClosing[] = $tag;
|
||||
}
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the tag from the list of tags that will always be self closing.
|
||||
*/
|
||||
public function removeSelfClosingTag(string $tag): Options
|
||||
{
|
||||
$tags = [$tag];
|
||||
$this->selfClosing = \array_diff($this->selfClosing, $tags);
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the list of self closing tags to empty.
|
||||
*/
|
||||
public function clearSelfClosingTags(): Options
|
||||
{
|
||||
$this->selfClosing = [];
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getNoSlash(): array
|
||||
{
|
||||
return $this->noSlash;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $noSlash
|
||||
*/
|
||||
public function setNoSlash(array $noSlash): Options
|
||||
{
|
||||
$this->noSlash = $noSlash;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a tag to the list of self closing tags that should not have a trailing slash.
|
||||
*/
|
||||
public function addNoSlashTag(string $tag): Options
|
||||
{
|
||||
$this->noSlash[] = $tag;
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes a tag from the list of no-slash tags.
|
||||
*/
|
||||
public function removeNoSlashTag(string $tag): Options
|
||||
{
|
||||
$tags = [$tag];
|
||||
$this->noSlash = \array_diff($this->noSlash, $tags);
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Empties the list of no-slash tags.
|
||||
*/
|
||||
public function clearNoSlashTags(): Options
|
||||
{
|
||||
$this->noSlash = [];
|
||||
|
||||
return clone $this;
|
||||
}
|
||||
|
||||
public function setFromOptions(Options $options): Options
|
||||
{
|
||||
return $this->setCleanupInput($options->isCleanupInput())
|
||||
->setEnforceEncoding($options->getEnforceEncoding())
|
||||
->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode())
|
||||
->setPreserveLineBreaks($options->isPreserveLineBreaks())
|
||||
->setRemoveDoubleSpace($options->isRemoveDoubleSpace())
|
||||
->setRemoveScripts($options->isRemoveScripts())
|
||||
->setRemoveSmartyScripts($options->isRemoveSmartyScripts())
|
||||
->setRemoveStyles($options->isRemoveStyles())
|
||||
->setStrict($options->isStrict())
|
||||
->setWhitespaceTextNode($options->isWhitespaceTextNode())
|
||||
->setSelfClosing($options->getSelfClosing())
|
||||
->setNoSlash($options->getNoSlash());
|
||||
}
|
||||
}
|
||||
Vendored
+116
@@ -0,0 +1,116 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Selector;
|
||||
|
||||
use PHPHtmlParser\Contracts\Selector\ParserInterface;
|
||||
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
|
||||
use PHPHtmlParser\DTO\Selector\ParsedSelectorDTO;
|
||||
use PHPHtmlParser\DTO\Selector\RuleDTO;
|
||||
|
||||
/**
|
||||
* This is the default parser for the selector.
|
||||
*/
|
||||
class Parser implements ParserInterface
|
||||
{
|
||||
/**
|
||||
* Pattern of CSS selectors, modified from 'mootools'.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $pattern = "/([\w\-:\*>]*)(?:\#([\w\-]+)|\.([\w\.\-]+))?(?:\[@?(!?[\w\-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
|
||||
|
||||
/**
|
||||
* Parses the selector string.
|
||||
*/
|
||||
public function parseSelectorString(string $selector): ParsedSelectorCollectionDTO
|
||||
{
|
||||
$selectors = [];
|
||||
$matches = [];
|
||||
$rules = [];
|
||||
\preg_match_all($this->pattern, \trim($selector) . ' ', $matches, PREG_SET_ORDER);
|
||||
|
||||
// skip tbody
|
||||
foreach ($matches as $match) {
|
||||
// default values
|
||||
$tag = \strtolower(\trim($match[1]));
|
||||
$operator = '=';
|
||||
$key = null;
|
||||
$value = null;
|
||||
$noKey = false;
|
||||
$alterNext = false;
|
||||
|
||||
// check for elements that alter the behavior of the next element
|
||||
if ($tag == '>') {
|
||||
$alterNext = true;
|
||||
}
|
||||
|
||||
// check for id selector
|
||||
if (!empty($match[2])) {
|
||||
$key = 'id';
|
||||
$value = $match[2];
|
||||
}
|
||||
|
||||
// check for class selector
|
||||
if (!empty($match[3])) {
|
||||
$key = 'class';
|
||||
$value = \explode('.', $match[3]);
|
||||
}
|
||||
|
||||
// and final attribute selector
|
||||
if (!empty($match[4])) {
|
||||
$key = \strtolower($match[4]);
|
||||
}
|
||||
if (!empty($match[5])) {
|
||||
$operator = $match[5];
|
||||
}
|
||||
if (!empty($match[6])) {
|
||||
$value = $match[6];
|
||||
if (\strpos($value, '][') !== false) {
|
||||
// we have multiple type selectors
|
||||
$keys = [];
|
||||
$keys[] = $key;
|
||||
$key = $keys;
|
||||
$parts = \explode('][', $value);
|
||||
$value = [];
|
||||
foreach ($parts as $part) {
|
||||
if (\strpos($part, '=') !== false) {
|
||||
list($first, $second) = \explode('=', $part);
|
||||
$key[] = $first;
|
||||
$value[] = $second;
|
||||
} else {
|
||||
$value[] = $part;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for elements that do not have a specified attribute
|
||||
if (\is_string($key) && isset($key[0]) && $key[0] == '!') {
|
||||
$key = \substr($key, 1);
|
||||
$noKey = true;
|
||||
}
|
||||
|
||||
$rules[] = RuleDTO::makeFromPrimitives(
|
||||
$tag,
|
||||
$operator,
|
||||
$key,
|
||||
$value,
|
||||
$noKey,
|
||||
$alterNext
|
||||
);
|
||||
if (isset($match[7]) && \is_string($match[7]) && \trim($match[7]) == ',') {
|
||||
$selectors[] = ParsedSelectorDTO::makeFromRules($rules);
|
||||
$rules = [];
|
||||
}
|
||||
}
|
||||
|
||||
// save last results
|
||||
if (\count($rules) > 0) {
|
||||
$selectors[] = ParsedSelectorDTO::makeFromRules($rules);
|
||||
}
|
||||
|
||||
return ParsedSelectorCollectionDTO::makeCollection($selectors);
|
||||
}
|
||||
}
|
||||
Vendored
+316
@@ -0,0 +1,316 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Selector;
|
||||
|
||||
use PHPHtmlParser\Contracts\Selector\SeekerInterface;
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Dom\Node\InnerNode;
|
||||
use PHPHtmlParser\Dom\Node\LeafNode;
|
||||
use PHPHtmlParser\DTO\Selector\RuleDTO;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
|
||||
class Seeker implements SeekerInterface
|
||||
{
|
||||
/**
|
||||
* Attempts to find all children that match the rule
|
||||
* given.
|
||||
*
|
||||
* @var InnerNode[]
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function seek(array $nodes, RuleDTO $rule, array $options): array
|
||||
{
|
||||
// XPath index
|
||||
if ($rule->getTag() !== null && \is_numeric($rule->getKey())) {
|
||||
$count = 0;
|
||||
foreach ($nodes as $node) {
|
||||
if ($rule->getTag() == '*'
|
||||
|| $rule->getTag() == $node->getTag()
|
||||
->name()
|
||||
) {
|
||||
++$count;
|
||||
if ($count == $rule->getKey()) {
|
||||
// found the node we wanted
|
||||
return [$node];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
$options = $this->flattenOptions($options);
|
||||
|
||||
$return = [];
|
||||
foreach ($nodes as $node) {
|
||||
// check if we are a leaf
|
||||
if ($node instanceof LeafNode || !$node->hasChildren()
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$children = [];
|
||||
$child = $node->firstChild();
|
||||
while (!\is_null($child)) {
|
||||
// wild card, grab all
|
||||
if ($rule->getTag() == '*' && \is_null($rule->getKey())) {
|
||||
$return[] = $child;
|
||||
$child = $this->getNextChild($node, $child);
|
||||
continue;
|
||||
}
|
||||
|
||||
$pass = $this->checkTag($rule, $child);
|
||||
if ($pass && $rule->getKey() !== null) {
|
||||
$pass = $this->checkKey($rule, $child);
|
||||
}
|
||||
if ($pass &&
|
||||
$rule->getKey() !== null &&
|
||||
$rule->getValue() !== null &&
|
||||
$rule->getValue() != '*'
|
||||
) {
|
||||
$pass = $this->checkComparison($rule, $child);
|
||||
}
|
||||
|
||||
if ($pass) {
|
||||
// it passed all checks
|
||||
$return[] = $child;
|
||||
}
|
||||
// this child failed to be matched
|
||||
if ($child instanceof InnerNode && $child->hasChildren()
|
||||
) {
|
||||
if (!isset($options['checkGrandChildren'])
|
||||
|| $options['checkGrandChildren']
|
||||
) {
|
||||
// we have a child that failed but are not leaves.
|
||||
$matches = $this->seek([$child], $rule, $options);
|
||||
foreach ($matches as $match) {
|
||||
$return[] = $match;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$child = $this->getNextChild($node, $child);
|
||||
}
|
||||
|
||||
if ((!isset($options['checkGrandChildren'])
|
||||
|| $options['checkGrandChildren'])
|
||||
&& \count($children) > 0
|
||||
) {
|
||||
// we have children that failed but are not leaves.
|
||||
$matches = $this->seek($children, $rule, $options);
|
||||
foreach ($matches as $match) {
|
||||
$return[] = $match;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks comparison condition from rules against node.
|
||||
*/
|
||||
private function checkComparison(RuleDTO $rule, AbstractNode $node): bool
|
||||
{
|
||||
if ($rule->getKey() == 'plaintext') {
|
||||
// plaintext search
|
||||
$nodeValue = $node->text();
|
||||
$result = $this->checkNodeValue($nodeValue, $rule, $node);
|
||||
} else {
|
||||
// normal search
|
||||
if (!\is_array($rule->getKey())) {
|
||||
$nodeValue = $node->getAttribute($rule->getKey());
|
||||
$result = $this->checkNodeValue($nodeValue, $rule, $node);
|
||||
} else {
|
||||
$result = true;
|
||||
foreach ($rule->getKey() as $index => $key) {
|
||||
$nodeValue = $node->getAttribute($key);
|
||||
$result = $result &&
|
||||
$this->checkNodeValue($nodeValue, $rule, $node, $index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flattens the option array.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function flattenOptions(array $optionsArray)
|
||||
{
|
||||
$options = [];
|
||||
foreach ($optionsArray as $optionArray) {
|
||||
foreach ($optionArray as $key => $option) {
|
||||
$options[$key] = $option;
|
||||
}
|
||||
}
|
||||
|
||||
return $options;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next child or null if no more children.
|
||||
*
|
||||
* @return AbstractNode|null
|
||||
*/
|
||||
private function getNextChild(
|
||||
AbstractNode $node,
|
||||
AbstractNode $currentChild
|
||||
) {
|
||||
try {
|
||||
$child = null;
|
||||
if ($node instanceof InnerNode) {
|
||||
// get next child
|
||||
$child = $node->nextChild($currentChild->id());
|
||||
}
|
||||
} catch (ChildNotFoundException $e) {
|
||||
// no more children
|
||||
unset($e);
|
||||
$child = null;
|
||||
}
|
||||
|
||||
return $child;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks tag condition from rules against node.
|
||||
*/
|
||||
private function checkTag(RuleDTO $rule, AbstractNode $node): bool
|
||||
{
|
||||
if (!empty($rule->getTag()) && $rule->getTag() != $node->getTag()->name()
|
||||
&& $rule->getTag() != '*'
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks key condition from rules against node.
|
||||
*/
|
||||
private function checkKey(RuleDTO $rule, AbstractNode $node): bool
|
||||
{
|
||||
if (!\is_array($rule->getKey())) {
|
||||
if ($rule->isNoKey()) {
|
||||
if ($node->getAttribute($rule->getKey()) !== null) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if ($rule->getKey() != 'plaintext'
|
||||
&& !$node->hasAttribute($rule->getKey())
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($rule->isNoKey()) {
|
||||
foreach ($rule->getKey() as $key) {
|
||||
if (!\is_null($node->getAttribute($key))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
foreach ($rule->getKey() as $key) {
|
||||
if ($key != 'plaintext'
|
||||
&& !$node->hasAttribute($key)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function checkNodeValue(
|
||||
?string $nodeValue,
|
||||
RuleDTO $rule,
|
||||
AbstractNode $node,
|
||||
?int $index = null
|
||||
): bool {
|
||||
$check = false;
|
||||
if (
|
||||
$rule->getValue() !== null &&
|
||||
\is_string($rule->getValue()) &&
|
||||
$nodeValue !== null
|
||||
) {
|
||||
$check = $this->match($rule->getOperator(), $rule->getValue(), $nodeValue);
|
||||
}
|
||||
|
||||
// handle multiple classes
|
||||
$key = $rule->getKey();
|
||||
if (
|
||||
!$check &&
|
||||
$key == 'class' &&
|
||||
\is_array($rule->getValue())
|
||||
) {
|
||||
$nodeClasses = \explode(' ', $node->getAttribute('class') ?? '');
|
||||
foreach ($rule->getValue() as $value) {
|
||||
foreach ($nodeClasses as $class) {
|
||||
if (
|
||||
!empty($class) &&
|
||||
\is_string($rule->getOperator())
|
||||
) {
|
||||
$check = $this->match($rule->getOperator(), $value, $class);
|
||||
}
|
||||
if ($check) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$check) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} elseif (
|
||||
!$check &&
|
||||
\is_array($key) &&
|
||||
!\is_null($nodeValue) &&
|
||||
\is_string($rule->getOperator()) &&
|
||||
\is_string($rule->getValue()[$index])
|
||||
) {
|
||||
$check = $this->match($rule->getOperator(), $rule->getValue()[$index], $nodeValue);
|
||||
}
|
||||
|
||||
return $check;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to match the given arguments with the given operator.
|
||||
*/
|
||||
private function match(
|
||||
string $operator,
|
||||
string $pattern,
|
||||
string $value
|
||||
): bool {
|
||||
$value = \strtolower($value);
|
||||
$pattern = \strtolower($pattern);
|
||||
switch ($operator) {
|
||||
case '=':
|
||||
return $value === $pattern;
|
||||
case '!=':
|
||||
return $value !== $pattern;
|
||||
case '^=':
|
||||
return \preg_match('/^' . \preg_quote($pattern, '/') . '/',
|
||||
$value) == 1;
|
||||
case '$=':
|
||||
return \preg_match('/' . \preg_quote($pattern, '/') . '$/',
|
||||
$value) == 1;
|
||||
case '*=':
|
||||
if ($pattern[0] == '/') {
|
||||
return \preg_match($pattern, $value) == 1;
|
||||
}
|
||||
|
||||
return \preg_match('/' . $pattern . '/i', $value) == 1;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+105
@@ -0,0 +1,105 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser\Selector;
|
||||
|
||||
use PHPHtmlParser\Contracts\Selector\ParserInterface;
|
||||
use PHPHtmlParser\Contracts\Selector\SeekerInterface;
|
||||
use PHPHtmlParser\Contracts\Selector\SelectorInterface;
|
||||
use PHPHtmlParser\Discovery\SeekerDiscovery;
|
||||
use PHPHtmlParser\Discovery\SelectorParserDiscovery;
|
||||
use PHPHtmlParser\Dom\Node\AbstractNode;
|
||||
use PHPHtmlParser\Dom\Node\Collection;
|
||||
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
|
||||
use PHPHtmlParser\DTO\Selector\RuleDTO;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
|
||||
/**
|
||||
* Class Selector.
|
||||
*/
|
||||
class Selector implements SelectorInterface
|
||||
{
|
||||
/**
|
||||
* @var ParsedSelectorCollectionDTO
|
||||
*/
|
||||
private $ParsedSelectorCollectionDTO;
|
||||
|
||||
/**
|
||||
* @var SeekerInterface
|
||||
*/
|
||||
private $seeker;
|
||||
|
||||
/**
|
||||
* Constructs with the selector string.
|
||||
*/
|
||||
public function __construct(string $selector, ?ParserInterface $parser = null, ?SeekerInterface $seeker = null)
|
||||
{
|
||||
if ($parser == null) {
|
||||
$parser = SelectorParserDiscovery::find();
|
||||
}
|
||||
if ($seeker == null) {
|
||||
$seeker = SeekerDiscovery::find();
|
||||
}
|
||||
|
||||
$this->ParsedSelectorCollectionDTO = $parser->parseSelectorString($selector);
|
||||
$this->seeker = $seeker;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the selectors that where found in __construct.
|
||||
*/
|
||||
public function getParsedSelectorCollectionDTO(): ParsedSelectorCollectionDTO
|
||||
{
|
||||
return $this->ParsedSelectorCollectionDTO;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to find the selectors starting from the given
|
||||
* node object.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
*/
|
||||
public function find(AbstractNode $node): Collection
|
||||
{
|
||||
$results = new Collection();
|
||||
foreach ($this->ParsedSelectorCollectionDTO->getParsedSelectorDTO() as $selector) {
|
||||
$nodes = [$node];
|
||||
if (\count($selector->getRules()) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$options = [];
|
||||
foreach ($selector->getRules() as $rule) {
|
||||
if ($rule->isAlterNext()) {
|
||||
$options[] = $this->alterNext($rule);
|
||||
continue;
|
||||
}
|
||||
$nodes = $this->seeker->seek($nodes, $rule, $options);
|
||||
// clear the options
|
||||
$options = [];
|
||||
}
|
||||
|
||||
// this is the final set of nodes
|
||||
foreach ($nodes as $result) {
|
||||
$results[] = $result;
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to figure out what the alteration will be for
|
||||
* the next element.
|
||||
*/
|
||||
private function alterNext(RuleDTO $rule): array
|
||||
{
|
||||
$options = [];
|
||||
if ($rule->getTag() == '>') {
|
||||
$options['checkGrandChildren'] = false;
|
||||
}
|
||||
|
||||
return $options;
|
||||
}
|
||||
}
|
||||
+114
@@ -0,0 +1,114 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PHPHtmlParser;
|
||||
|
||||
use GuzzleHttp\Psr7\Request;
|
||||
use GuzzleHttp\Client;
|
||||
use PHPHtmlParser\Exceptions\ChildNotFoundException;
|
||||
use PHPHtmlParser\Exceptions\CircularException;
|
||||
use PHPHtmlParser\Exceptions\NotLoadedException;
|
||||
use PHPHtmlParser\Exceptions\StrictException;
|
||||
use Psr\Http\Client\ClientInterface;
|
||||
use Psr\Http\Message\RequestInterface;
|
||||
|
||||
/**
|
||||
* Class StaticDom.
|
||||
*/
|
||||
final class StaticDom
|
||||
{
|
||||
private static $dom = null;
|
||||
|
||||
/**
|
||||
* Attempts to call the given method on the most recent created dom
|
||||
* from bellow.
|
||||
*
|
||||
* @throws NotLoadedException
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public static function __callStatic(string $method, array $arguments)
|
||||
{
|
||||
if (self::$dom instanceof Dom) {
|
||||
return \call_user_func_array([self::$dom, $method], $arguments);
|
||||
}
|
||||
throw new NotLoadedException('The dom is not loaded. Can not call a dom method.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this to mount the static facade. The facade allows you to use
|
||||
* this object as a $className.
|
||||
*
|
||||
* @param ?Dom $dom
|
||||
*/
|
||||
public static function mount(string $className = 'Dom', ?Dom $dom = null): bool
|
||||
{
|
||||
if (\class_exists($className)) {
|
||||
return false;
|
||||
}
|
||||
\class_alias(__CLASS__, $className);
|
||||
if ($dom instanceof Dom) {
|
||||
self::$dom = $dom;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new dom object and calls loadFromFile() on the
|
||||
* new object.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws StrictException
|
||||
* @throws Exceptions\LogicalException
|
||||
*/
|
||||
public static function loadFromFile(string $file, ?Options $options = null): Dom
|
||||
{
|
||||
$dom = new Dom();
|
||||
self::$dom = $dom;
|
||||
|
||||
return $dom->loadFromFile($file, $options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new dom object and calls loadFromUrl() on the
|
||||
* new object.
|
||||
*
|
||||
* @throws ChildNotFoundException
|
||||
* @throws CircularException
|
||||
* @throws StrictException
|
||||
* @throws \Psr\Http\Client\ClientExceptionInterface
|
||||
*/
|
||||
public static function loadFromUrl(string $url, ?Options $options = null, ClientInterface $client = null, RequestInterface $request = null): Dom
|
||||
{
|
||||
$dom = new Dom();
|
||||
self::$dom = $dom;
|
||||
|
||||
if (\is_null($client)) {
|
||||
$client = new Client();
|
||||
}
|
||||
if (\is_null($request)) {
|
||||
$request = new Request('GET', $url);
|
||||
}
|
||||
|
||||
return $dom->loadFromUrl($url, $options, $client, $request);
|
||||
}
|
||||
|
||||
public static function loadStr(string $str, ?Options $options = null): Dom
|
||||
{
|
||||
$dom = new Dom();
|
||||
self::$dom = $dom;
|
||||
|
||||
return $dom->loadStr($str, $options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the $dom variable to null.
|
||||
*/
|
||||
public static function unload(): void
|
||||
{
|
||||
self::$dom = null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user