(Grav GitSync) Automatic Commit from GitSync

This commit is contained in:
GitSync
2026-06-14 00:27:27 +00:00
parent a2920f812d
commit 3c1bfda80f
2933 changed files with 491625 additions and 0 deletions
@@ -0,0 +1,257 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser;
use PHPHtmlParser\Enum\StringToken;
use PHPHtmlParser\Exceptions\ContentLengthException;
use PHPHtmlParser\Exceptions\LogicalException;
/**
* Class Content.
*/
class Content
{
/**
* The content string.
*
* @var string
*/
protected $content;
/**
* The size of the content.
*
* @var int
*/
protected $size;
/**
* The current position we are in the content.
*
* @var int
*/
protected $pos;
/**
* The following 4 strings are tags that are important to us.
*
* @var string
*/
protected $blank = " \t\r\n";
protected $equal = ' =/>';
protected $slash = " />\r\n\t";
protected $attr = ' >';
/**
* Content constructor.
*/
public function __construct(string $content = '')
{
$this->content = $content;
$this->size = \strlen($content);
$this->pos = 0;
}
/**
* Returns the current position of the content.
*/
public function getPosition(): int
{
return $this->pos;
}
/**
* Gets the current character we are at.
*
* @param ?int $char
*/
public function char(?int $char = null): string
{
return $this->content[$char ?? $this->pos] ?? '';
}
/**
* Gets a string from the current character position.
*
* @param int $length
* @return string
*/
public function string(int $length = 1): string
{
$string = '';
$position = $this->pos;
do {
$string .= $this->char($position++);
} while ($position < $this->pos + $length);
return $string;
}
/**
* Moves the current position forward.
*
* @throws ContentLengthException
*/
public function fastForward(int $count): Content
{
if (!$this->canFastForward($count)) {
// trying to go over the content length, throw exception
throw new ContentLengthException('Attempt to fastForward pass the length of the content.');
}
$this->pos += $count;
return $this;
}
/**
* Checks if we can move the position forward.
*/
public function canFastForward(int $count): bool
{
return \strlen($this->content) >= $this->pos + $count;
}
/**
* Moves the current position backward.
*/
public function rewind(int $count): Content
{
$this->pos -= $count;
if ($this->pos < 0) {
$this->pos = 0;
}
return $this;
}
/**
* Copy the content until we find the given string.
*/
public function copyUntil(string $string, bool $char = false, bool $escape = false): string
{
if ($this->pos >= $this->size) {
// nothing left
return '';
}
if ($escape) {
$position = $this->pos;
$found = false;
while (!$found) {
$position = \strpos($this->content, $string, $position);
if ($position === false) {
// reached the end
break;
}
if ($this->char($position - 1) == '\\') {
// this character is escaped
++$position;
continue;
}
$found = true;
}
} elseif ($char) {
$position = \strcspn($this->content, $string, $this->pos);
$position += $this->pos;
} else {
$position = \strpos($this->content, $string, $this->pos);
}
if ($position === false) {
// could not find character, just return the remaining of the content
$return = \substr($this->content, $this->pos, $this->size - $this->pos);
if ($return === false) {
throw new LogicalException('Substr returned false with position ' . $this->pos . '.');
}
$this->pos = $this->size;
return $return;
}
if ($position == $this->pos) {
// we are at the right place
return '';
}
$return = \substr($this->content, $this->pos, $position - $this->pos);
if ($return === false) {
throw new LogicalException('Substr returned false with position ' . $this->pos . '.');
}
// set the new position
$this->pos = $position;
return $return;
}
/**
* Copies the content until the string is found and return it
* unless the 'unless' is found in the substring.
*/
public function copyUntilUnless(string $string, string $unless): string
{
$lastPos = $this->pos;
$this->fastForward(1);
$foundString = $this->copyUntil($string, true, true);
$position = \strcspn($foundString, $unless);
if ($position == \strlen($foundString)) {
return $string . $foundString;
}
// rewind changes and return nothing
$this->pos = $lastPos;
return '';
}
/**
* Copies the content until it reaches the token string.,.
*
* @uses $this->copyUntil()
*/
public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string
{
$string = $stringToken->getValue();
return $this->copyUntil($string, $char, $escape);
}
/**
* Skip a given set of characters.
*
* @throws LogicalException
*/
public function skip(string $string, bool $copy = false): string
{
$len = \strspn($this->content, $string, $this->pos);
if ($len === false) {
throw new LogicalException('Strspn returned false with position ' . $this->pos . '.');
}
$return = '';
if ($copy) {
$return = \substr($this->content, $this->pos, $len);
if ($return === false) {
throw new LogicalException('Substr returned false with position ' . $this->pos . '.');
}
}
// update the position
$this->pos += $len;
return $return;
}
/**
* Skip a given token of pre-defined characters.
*
* @uses $this->skip()
*/
public function skipByToken(StringToken $skipToken, bool $copy = false): string
{
$string = $skipToken->getValue();
return $this->skip($string, $copy);
}
}
@@ -0,0 +1,16 @@
<?php
namespace PHPHtmlParser\Contracts\Dom;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Options;
interface CleanerInterface
{
/**
* Cleans the html of any none-html information.
*
* @throws LogicalException
*/
public function clean(string $str, Options $options, string $defaultCharset): string;
}
@@ -0,0 +1,33 @@
<?php
namespace PHPHtmlParser\Contracts\Dom;
use PHPHtmlParser\Content;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\ContentLengthException;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Exceptions\StrictException;
use PHPHtmlParser\Options;
interface ParserInterface
{
/**
* Attempts to parse the html in content.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws ContentLengthException
* @throws LogicalException
* @throws StrictException
*/
public function parse(Options $options, Content $content, int $size): AbstractNode;
/**
* Attempts to detect the charset that the html was sent in.
*
* @throws ChildNotFoundException
*/
public function detectCharset(Options $options, string $defaultCharset, AbstractNode $root): bool;
}
@@ -0,0 +1,23 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Contracts;
use PHPHtmlParser\Dom;
use PHPHtmlParser\Options;
use Psr\Http\Client\ClientInterface;
use Psr\Http\Message\RequestInterface;
interface DomInterface
{
public function loadFromFile(string $file, ?Options $options = null): Dom;
public function loadFromUrl(string $url, ?Options $options, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom;
public function loadStr(string $str, ?Options $options = null): Dom;
public function setOptions(Options $options): Dom;
public function find(string $selector, int $nth = null);
}
@@ -0,0 +1,12 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Contracts\Selector;
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
interface ParserInterface
{
public function parseSelectorString(string $selector): ParsedSelectorCollectionDTO;
}
@@ -0,0 +1,17 @@
<?php
namespace PHPHtmlParser\Contracts\Selector;
use PHPHtmlParser\DTO\Selector\RuleDTO;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
interface SeekerInterface
{
/**
* Attempts to find all children that match the rule
* given.
*
* @throws ChildNotFoundException
*/
public function seek(array $nodes, RuleDTO $rule, array $options): array;
}
@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Contracts\Selector;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Dom\Node\Collection;
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
interface SelectorInterface
{
/**
* Constructs with the selector string.
*/
public function __construct(string $selector, ?ParserInterface $parser = null, ?SeekerInterface $seeker = null);
/**
* Returns the selectors that where found.
*/
public function getParsedSelectorCollectionDTO(): ParsedSelectorCollectionDTO;
/**
* Attempts to find the selectors starting from the given
* node object.
*
* @throws ChildNotFoundException
*/
public function find(AbstractNode $node): Collection;
}
@@ -0,0 +1,41 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\DTO\Selector;
final class ParsedSelectorCollectionDTO
{
/**
* @var ParsedSelectorDTO[]
*/
private $parsedSelectorDTO = [];
/**
* @param ParsedSelectorDTO[] $parsedSelectorDTOs
*/
private function __construct(array $parsedSelectorDTOs)
{
foreach ($parsedSelectorDTOs as $parsedSelectorDTO) {
if ($parsedSelectorDTO instanceof ParsedSelectorDTO) {
$this->parsedSelectorDTO[] = $parsedSelectorDTO;
}
}
}
/**
* @param ParsedSelectorDTO[] $parsedSelectorDTOs
*/
public static function makeCollection(array $parsedSelectorDTOs): ParsedSelectorCollectionDTO
{
return new ParsedSelectorCollectionDTO($parsedSelectorDTOs);
}
/**
* @return ParsedSelectorDTO[]
*/
public function getParsedSelectorDTO(): array
{
return $this->parsedSelectorDTO;
}
}
@@ -0,0 +1,41 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\DTO\Selector;
final class ParsedSelectorDTO
{
/**
* @var RuleDTO[]
*/
private $rules = [];
/**
* @param RuleDTO[] $ruleDTOs
*/
private function __construct(array $ruleDTOs)
{
foreach ($ruleDTOs as $ruleDTO) {
if ($ruleDTO instanceof RuleDTO) {
$this->rules[] = $ruleDTO;
}
}
}
/**
* @param RuleDTO[] $ruleDTOs
*/
public static function makeFromRules(array $ruleDTOs): ParsedSelectorDTO
{
return new ParsedSelectorDTO($ruleDTOs);
}
/**
* @return RuleDTO[]
*/
public function getRules(): array
{
return $this->rules;
}
}
@@ -0,0 +1,100 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\DTO\Selector;
final class RuleDTO
{
/**
* @var string
*/
private $tag;
/**
* @var string
*/
private $operator;
/**
* @var string|array|null
*/
private $key;
/**
* @var string|array|null
*/
private $value;
/**
* @var bool
*/
private $noKey;
/**
* @var bool
*/
private $alterNext;
private function __construct(array $values)
{
$this->tag = $values['tag'];
$this->operator = $values['operator'];
$this->key = $values['key'];
$this->value = $values['value'];
$this->noKey = $values['noKey'];
$this->alterNext = $values['alterNext'];
}
/**
* @param string|array|null $key
* @param string|array|null $value
*/
public static function makeFromPrimitives(string $tag, string $operator, $key, $value, bool $noKey, bool $alterNext): RuleDTO
{
return new RuleDTO([
'tag' => $tag,
'operator' => $operator,
'key' => $key,
'value' => $value,
'noKey' => $noKey,
'alterNext' => $alterNext,
]);
}
public function getTag(): string
{
return $this->tag;
}
public function getOperator(): string
{
return $this->operator;
}
/**
* @return string|array|null
*/
public function getKey()
{
return $this->key;
}
/**
* @return string|array|null
*/
public function getValue()
{
return $this->value;
}
public function isNoKey(): bool
{
return $this->noKey;
}
public function isAlterNext(): bool
{
return $this->alterNext;
}
}
@@ -0,0 +1,60 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\DTO\Tag;
use stringEncode\Encode;
use stringEncode\Exception;
final class AttributeDTO
{
/**
* @var ?string
*/
private $value;
/**
* @var bool
*/
private $doubleQuote;
private function __construct(array $values)
{
$this->value = $values['value'];
$this->doubleQuote = $values['doubleQuote'] ?? true;
}
public static function makeFromPrimitives(?string $value, bool $doubleQuote = true): AttributeDTO
{
return new AttributeDTO([
'value' => $value,
'doubleQuote' => $doubleQuote,
]);
}
public function getValue(): ?string
{
return $this->value;
}
public function isDoubleQuote(): bool
{
return $this->doubleQuote;
}
public function htmlspecialcharsDecode(): void
{
if (!\is_null($this->value)) {
$this->value = \htmlspecialchars_decode($this->value);
}
}
/**
* @throws Exception
*/
public function encodeValue(Encode $encode)
{
$this->value = $encode->convert($this->value);
}
}
@@ -0,0 +1,74 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\DTO;
use PHPHtmlParser\Dom\Node\HtmlNode;
final class TagDTO
{
/**
* @var bool
*/
private $status;
/**
* @var bool
*/
private $closing;
/**
* @var ?HtmlNode
*/
private $node;
/**
* @var ?string
*/
private $tag;
private function __construct(array $values = [])
{
$this->status = $values['status'] ?? false;
$this->closing = $values['closing'] ?? false;
$this->node = $values['node'] ?? null;
$this->tag = $values['tag'] ?? null;
}
public static function makeFromPrimitives(bool $status = false, bool $closing = false, ?HtmlNode $node = null, ?string $tag = null): TagDTO
{
return new TagDTO([
'status' => $status,
'closing' => $closing,
'node' => $node,
'tag' => $tag,
]);
}
public function isStatus(): bool
{
return $this->status;
}
public function isClosing(): bool
{
return $this->closing;
}
/**
* @return mixed
*/
public function getNode(): ?HtmlNode
{
return $this->node;
}
/**
* @return mixed
*/
public function getTag(): ?string
{
return $this->tag;
}
}
@@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Discovery;
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
use PHPHtmlParser\Dom\Cleaner;
class CleanerDiscovery
{
/**
* @var Cleaner|null
*/
private static $parser = null;
public static function find(): CleanerInterface
{
if (self::$parser == null) {
self::$parser = new Cleaner();
}
return self::$parser;
}
}
@@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Discovery;
use PHPHtmlParser\Contracts\Dom\ParserInterface;
use PHPHtmlParser\Dom\Parser;
class DomParserDiscovery
{
/**
* @var ParserInterface|null
*/
private static $parser = null;
public static function find(): ParserInterface
{
if (self::$parser == null) {
self::$parser = new Parser();
}
return self::$parser;
}
}
@@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Discovery;
use PHPHtmlParser\Contracts\Selector\SeekerInterface;
use PHPHtmlParser\Selector\Seeker;
class SeekerDiscovery
{
/**
* @var SeekerInterface|null
*/
private static $seeker = null;
public static function find(): SeekerInterface
{
if (self::$seeker == null) {
self::$seeker = new Seeker();
}
return self::$seeker;
}
}
@@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Discovery;
use PHPHtmlParser\Contracts\Selector\ParserInterface;
use PHPHtmlParser\Selector\Parser;
class SelectorParserDiscovery
{
/**
* @var ParserInterface|null
*/
private static $parser = null;
public static function find(): ParserInterface
{
if (self::$parser == null) {
self::$parser = new Parser();
}
return self::$parser;
}
}
@@ -0,0 +1,251 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Client;
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
use PHPHtmlParser\Contracts\Dom\ParserInterface;
use PHPHtmlParser\Contracts\DomInterface;
use PHPHtmlParser\Discovery\CleanerDiscovery;
use PHPHtmlParser\Discovery\DomParserDiscovery;
use PHPHtmlParser\Dom\Node\Collection;
use PHPHtmlParser\Dom\RootAccessTrait;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Exceptions\NotLoadedException;
use PHPHtmlParser\Exceptions\StrictException;
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
use Psr\Http\Client\ClientExceptionInterface;
use Psr\Http\Client\ClientInterface;
use Psr\Http\Message\RequestInterface;
/**
* Class Dom.
*/
class Dom implements DomInterface
{
use RootAccessTrait;
/**
* The charset we would like the output to be in.
*
* @var string
*/
private $defaultCharset = 'UTF-8';
/**
* The document string.
*
* @var Content
*/
private $content;
/**
* A global options array to be used by all load calls.
*
* @var ?Options
*/
private $globalOptions;
/**
* @var ParserInterface
*/
private $domParser;
/**
* @var CleanerInterface
*/
private $domCleaner;
public function __construct(?ParserInterface $domParser = null, ?CleanerInterface $domCleaner = null)
{
if ($domParser === null) {
$domParser = DomParserDiscovery::find();
}
if ($domCleaner === null) {
$domCleaner = CleanerDiscovery::find();
}
$this->domParser = $domParser;
$this->domCleaner = $domCleaner;
}
/**
* Returns the inner html of the root node.
*
* @throws ChildNotFoundException
* @throws UnknownChildTypeException
* @throws NotLoadedException
*/
public function __toString(): string
{
$this->isLoaded();
return $this->root->innerHtml();
}
/**
* Loads the dom from a document file/url.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws Exceptions\ContentLengthException
* @throws LogicalException
* @throws StrictException
*/
public function loadFromFile(string $file, ?Options $options = null): Dom
{
$content = @\file_get_contents($file);
if ($content === false) {
throw new LogicalException('file_get_contents failed and returned false when trying to read "' . $file . '".');
}
return $this->loadStr($content, $options);
}
/**
* Use a curl interface implementation to attempt to load
* the content from a url.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws Exceptions\ContentLengthException
* @throws LogicalException
* @throws StrictException
* @throws ClientExceptionInterface
*/
public function loadFromUrl(string $url, ?Options $options = null, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom
{
if ($client === null) {
$client = new Client();
}
if ($request === null) {
$request = new Request('GET', $url);
}
$response = $client->sendRequest($request);
$content = $response->getBody()->getContents();
return $this->loadStr($content, $options);
}
/**
* Parsers the html of the given string. Used for load(), loadFromFile(),
* and loadFromUrl().
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws Exceptions\ContentLengthException
* @throws LogicalException
* @throws StrictException
*/
public function loadStr(string $str, ?Options $options = null): Dom
{
$localOptions = new Options();
if ($this->globalOptions !== null) {
$localOptions = $localOptions->setFromOptions($this->globalOptions);
}
if ($options !== null) {
$localOptions = $localOptions->setFromOptions($options);
}
$html = $this->domCleaner->clean($str, $localOptions, $this->defaultCharset);
$this->content = new Content($html);
$this->root = $this->domParser->parse($localOptions, $this->content, \strlen($str));
$this->domParser->detectCharset($localOptions, $this->defaultCharset, $this->root);
return $this;
}
/**
* Sets a global options array to be used by all load calls.
*/
public function setOptions(Options $options): Dom
{
$this->globalOptions = $options;
return $this;
}
/**
* Find elements by css selector on the root node.
*
* @throws NotLoadedException
* @throws ChildNotFoundException
*
* @return mixed|Collection|null
*/
public function find(string $selector, int $nth = null)
{
$this->isLoaded();
return $this->root->find($selector, $nth);
}
/**
* Simple wrapper function that returns an element by the
* id.
*
* @param $id
*
* @throws NotLoadedException
* @throws ChildNotFoundException
*
* @return mixed|Collection|null
*/
public function getElementById($id)
{
$this->isLoaded();
return $this->find('#' . $id, 0);
}
/**
* Simple wrapper function that returns all elements by
* tag name.
*
* @throws NotLoadedException
* @throws ChildNotFoundException
*
* @return mixed|Collection|null
*/
public function getElementsByTag(string $name)
{
$this->isLoaded();
return $this->find($name);
}
/**
* Simple wrapper function that returns all elements by
* class name.
*
* @throws NotLoadedException
* @throws ChildNotFoundException
*
* @return mixed|Collection|null
*/
public function getElementsByClass(string $class)
{
$this->isLoaded();
return $this->find('.' . $class);
}
/**
* Checks if the load methods have been called.
*
* @throws NotLoadedException
*/
private function isLoaded(): void
{
if (\is_null($this->content)) {
throw new NotLoadedException('Content is not loaded!');
}
}
}
@@ -0,0 +1,130 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom;
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Options;
class Cleaner implements CleanerInterface
{
/**
* Cleans the html of any none-html information.
*
* @throws LogicalException
*/
public function clean(string $str, Options $options, string $defaultCharset): string
{
if (!$options->isCleanupInput()) {
// skip entire cleanup step
return $str;
}
// check if the string is gziped
$is_gzip = 0 === \mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, 'US-ASCII');
if ($is_gzip) {
$str = \gzdecode($str);
if ($str === false) {
throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
}
}
// we must handle character encoding
$str = $this->setUpRegexEncoding($str, $options, $defaultCharset);
// remove white space before closing tags
$str = \mb_eregi_replace("'\s+>", "'>", $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
}
$str = \mb_eregi_replace('"\s+>', '">', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
}
// clean out the \n\r
$replace = ' ';
if ($options->isPreserveLineBreaks()) {
$replace = '&#10;';
}
$str = \str_replace(["\r\n", "\r", "\n"], $replace, $str);
if ($str === false) {
throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
}
// strip the doctype
$str = \mb_eregi_replace('<!doctype(.*?)>', '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
}
// strip out comments
$str = \mb_eregi_replace('<!--(.*?)-->', '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
}
// strip out cdata
$str = \mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
}
// strip out <script> tags
if ($options->isRemoveScripts()) {
$str = \mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
}
$str = \mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
}
}
// strip out <style> tags
if ($options->isRemoveStyles()) {
$str = \mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
}
$str = \mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
}
}
// strip smarty scripts
if ($options->isRemoveSmartyScripts()) {
$str = \mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
if ($str === false) {
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
}
}
return $str;
}
/**
* Sets up the mb_regex_encoding and converts the text to that encoding.
*
* @throws LogicalException
*/
private function setUpRegexEncoding(string $str, Options $options, string $defaultCharset): string
{
$encoding = $defaultCharset;
$enforceEncoding = $options->getEnforceEncoding();
if ($enforceEncoding !== null) {
// they want to enforce the given encoding
$encoding = $enforceEncoding;
}
if (!\mb_regex_encoding($encoding)) {
throw new LogicalException('Character encoding was not able to be changed to ' . $encoding . '.');
}
return \mb_convert_encoding($str, $encoding);
}
}
@@ -0,0 +1,495 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use PHPHtmlParser\Contracts\Selector\SelectorInterface;
use PHPHtmlParser\Dom\Tag;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\ParentNotFoundException;
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
use PHPHtmlParser\Finder;
use PHPHtmlParser\Selector\Selector;
use stringEncode\Encode;
/**
* Dom node object.
*
* @property-read string $outerhtml
* @property-read string $innerhtml
* @property-read string $innerText
* @property-read string $text
* @property-read Tag $tag
* @property-read InnerNode $parent
*/
abstract class AbstractNode
{
/**
* Contains the tag name/type.
*
* @var ?Tag
*/
protected $tag;
/**
* Contains a list of attributes on this tag.
*
* @var array
*/
protected $attr = [];
/**
* Contains the parent Node.
*
* @var ?InnerNode
*/
protected $parent;
/**
* The unique id of the class. Given by PHP.
*
* @var int
*/
protected $id;
/**
* The encoding class used to encode strings.
*
* @var mixed
*/
protected $encode;
/**
* An array of all the children.
*
* @var array
*/
protected $children = [];
/**
* @var bool
*/
protected $htmlSpecialCharsDecode = false;
/**
* @var int
*/
private static $count = 0;
/**
* Creates a unique id for this node.
*/
public function __construct()
{
$this->id = self::$count;
++self::$count;
}
/**
* Attempts to clear out any object references.
*/
public function __destruct()
{
$this->tag = null;
$this->parent = null;
$this->attr = [];
$this->children = [];
}
/**
* Magic get method for attributes and certain methods.
*
* @return mixed
*/
public function __get(string $key)
{
// check attribute first
if ($this->getAttribute($key) !== null) {
return $this->getAttribute($key);
}
switch (\strtolower($key)) {
case 'outerhtml':
return $this->outerHtml();
case 'innerhtml':
return $this->innerHtml();
case 'innertext':
return $this->innerText();
case 'text':
return $this->text();
case 'tag':
return $this->getTag();
case 'parent':
return $this->getParent();
}
}
/**
* Simply calls the outer text method.
*
* @return string
*/
public function __toString()
{
return $this->outerHtml();
}
/**
* @param bool $htmlSpecialCharsDecode
*/
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
{
$this->htmlSpecialCharsDecode = $htmlSpecialCharsDecode;
}
/**
* Returns the id of this object.
*/
public function id(): int
{
return $this->id;
}
/**
* Returns the parent of node.
*
* @return InnerNode
*/
public function getParent(): ?InnerNode
{
return $this->parent;
}
/**
* Sets the parent node.
*
* @throws ChildNotFoundException
* @throws CircularException
*/
public function setParent(InnerNode $parent): AbstractNode
{
// remove from old parent
if ($this->parent !== null) {
if ($this->parent->id() == $parent->id()) {
// already the parent
return $this;
}
$this->parent->removeChild($this->id);
}
$this->parent = $parent;
// assign child to parent
$this->parent->addChild($this);
return $this;
}
/**
* Removes this node and all its children from the
* DOM tree.
*
* @return void
*/
public function delete()
{
if ($this->parent !== null) {
$this->parent->removeChild($this->id);
}
$this->parent->clear();
$this->clear();
}
/**
* Sets the encoding class to this node.
*
* @return void
*/
public function propagateEncoding(Encode $encode)
{
$this->encode = $encode;
$this->tag->setEncoding($encode);
}
/**
* Checks if the given node id is an ancestor of
* the current node.
*/
public function isAncestor(int $id): bool
{
if ($this->getAncestor($id) !== null) {
return true;
}
return false;
}
/**
* Attempts to get an ancestor node by the given id.
*
* @return AbstractNode|null
*/
public function getAncestor(int $id)
{
if ($this->parent !== null) {
if ($this->parent->id() == $id) {
return $this->parent;
}
return $this->parent->getAncestor($id);
}
}
/**
* Checks if the current node has a next sibling.
*/
public function hasNextSibling(): bool
{
try {
$this->nextSibling();
// sibling found, return true;
return true;
} catch (ParentNotFoundException $e) {
// no parent, no next sibling
unset($e);
return false;
} catch (ChildNotFoundException $e) {
// no sibling found
unset($e);
return false;
}
}
/**
* Attempts to get the next sibling.
*
* @throws ChildNotFoundException
* @throws ParentNotFoundException
*/
public function nextSibling(): AbstractNode
{
if ($this->parent === null) {
throw new ParentNotFoundException('Parent is not set for this node.');
}
return $this->parent->nextChild($this->id);
}
/**
* Attempts to get the previous sibling.
*
* @throws ChildNotFoundException
* @throws ParentNotFoundException
*/
public function previousSibling(): AbstractNode
{
if ($this->parent === null) {
throw new ParentNotFoundException('Parent is not set for this node.');
}
return $this->parent->previousChild($this->id);
}
/**
* Gets the tag object of this node.
*/
public function getTag(): Tag
{
return $this->tag;
}
/**
* Replaces the tag for this node.
*
* @param string|Tag $tag
*/
public function setTag($tag): AbstractNode
{
if (\is_string($tag)) {
$tag = new Tag($tag);
}
$this->tag = $tag;
// clear any cache
$this->clear();
return $this;
}
/**
* A wrapper method that simply calls the getAttribute method
* on the tag of this node.
*/
public function getAttributes(): array
{
$attributes = $this->tag->getAttributes();
foreach ($attributes as $name => $attributeDTO) {
$attributes[$name] = $attributeDTO->getValue();
}
return $attributes;
}
/**
* A wrapper method that simply calls the getAttribute method
* on the tag of this node.
*/
public function getAttribute(string $key): ?string
{
try {
$attributeDTO = $this->tag->getAttribute($key);
} catch (AttributeNotFoundException $e) {
// no attribute with this key exists, returning null.
unset($e);
return null;
}
return $attributeDTO->getValue();
}
/**
* A wrapper method that simply calls the hasAttribute method
* on the tag of this node.
*/
public function hasAttribute(string $key): bool
{
return $this->tag->hasAttribute($key);
}
/**
* A wrapper method that simply calls the setAttribute method
* on the tag of this node.
*/
public function setAttribute(string $key, ?string $value, bool $doubleQuote = true): AbstractNode
{
$this->tag->setAttribute($key, $value, $doubleQuote);
//clear any cache
$this->clear();
return $this;
}
/**
* A wrapper method that simply calls the removeAttribute method
* on the tag of this node.
*/
public function removeAttribute(string $key): void
{
$this->tag->removeAttribute($key);
//clear any cache
$this->clear();
}
/**
* A wrapper method that simply calls the removeAllAttributes
* method on the tag of this node.
*/
public function removeAllAttributes(): void
{
$this->tag->removeAllAttributes();
//clear any cache
$this->clear();
}
/**
* Function to locate a specific ancestor tag in the path to the root.
*
* @throws ParentNotFoundException
*/
public function ancestorByTag(string $tag): AbstractNode
{
// Start by including ourselves in the comparison.
$node = $this;
do {
if ($node->tag->name() == $tag) {
return $node;
}
$node = $node->getParent();
} while ($node !== null);
throw new ParentNotFoundException('Could not find an ancestor with "' . $tag . '" tag');
}
/**
* Find elements by css selector.
*
* @throws ChildNotFoundException
*
* @return mixed|Collection|null
*/
public function find(string $selectorString, ?int $nth = null, ?SelectorInterface $selector = null)
{
if (\is_null($selector)) {
$selector = new Selector($selectorString);
}
$nodes = $selector->find($this);
if ($nth !== null) {
// return nth-element or array
if (isset($nodes[$nth])) {
return $nodes[$nth];
}
return;
}
return $nodes;
}
/**
* Find node by id.
*
* @throws ChildNotFoundException
* @throws ParentNotFoundException
*
* @return bool|AbstractNode
*/
public function findById(int $id)
{
$finder = new Finder($id);
return $finder->find($this);
}
/**
* Gets the inner html of this node.
*/
abstract public function innerHtml(): string;
/**
* Gets the html of this node, including it's own
* tag.
*/
abstract public function outerHtml(): string;
/**
* Gets the text of this node (if there is any text).
*/
abstract public function text(): string;
/**
* Check is node type textNode.
*/
public function isTextNode(): bool
{
return false;
}
/**
* Call this when something in the node tree has changed. Like a child has been added
* or a parent has been changed.
*/
abstract protected function clear(): void;
}
@@ -0,0 +1,45 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use ArrayIterator;
use Countable;
use IteratorAggregate;
use PHPHtmlParser\Dom\Tag;
/**
* Dom node object which will allow users to use it as
* an array.
*
* @property-read string $outerhtml
* @property-read string $innerhtml
* @property-read string $innerText
* @property-read string $text
* @property-read Tag $tag
* @property-read InnerNode $parent
*/
abstract class ArrayNode extends AbstractNode implements IteratorAggregate, Countable
{
/**
* Gets the iterator.
*/
public function getIterator(): ArrayIterator
{
return new ArrayIterator($this->getIteratorArray());
}
/**
* Returns the count of the iterator array.
*/
public function count(): int
{
return \count($this->getIteratorArray());
}
/**
* Returns the array to be used the the iterator.
*/
abstract protected function getIteratorArray(): array;
}
@@ -0,0 +1,156 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use ArrayAccess;
use ArrayIterator;
use Countable;
use IteratorAggregate;
use PHPHtmlParser\Exceptions\EmptyCollectionException;
/**
* Class Collection.
*/
class Collection implements IteratorAggregate, ArrayAccess, Countable
{
/**
* The collection of Nodes.
*
* @var array
*/
protected $collection = [];
/**
* Attempts to call the method on the first node in
* the collection.
*
* @throws EmptyCollectionException
*
* @return mixed
*/
public function __call(string $method, array $arguments)
{
$node = \reset($this->collection);
if ($node instanceof AbstractNode) {
return \call_user_func_array([$node, $method], $arguments);
}
throw new EmptyCollectionException('The collection does not contain any Nodes.');
}
/**
* Attempts to apply the magic get to the first node
* in the collection.
*
* @param mixed $key
*
* @throws EmptyCollectionException
*
* @return mixed
*/
public function __get($key)
{
$node = \reset($this->collection);
if ($node instanceof AbstractNode) {
return $node->$key;
}
throw new EmptyCollectionException('The collection does not contain any Nodes.');
}
/**
* Applies the magic string method to the first node in
* the collection.
*/
public function __toString(): string
{
$node = \reset($this->collection);
if ($node instanceof AbstractNode) {
return (string) $node;
}
return '';
}
/**
* Returns the count of the collection.
*/
public function count(): int
{
return \count($this->collection);
}
/**
* Returns an iterator for the collection.
*/
public function getIterator(): ArrayIterator
{
return new ArrayIterator($this->collection);
}
/**
* Set an attribute by the given offset.
*
* @param mixed $offset
* @param mixed $value
*/
public function offsetSet($offset, $value): void
{
if (\is_null($offset)) {
$this->collection[] = $value;
} else {
$this->collection[$offset] = $value;
}
}
/**
* Checks if an offset exists.
*
* @param mixed $offset
*/
public function offsetExists($offset): bool
{
return isset($this->collection[$offset]);
}
/**
* Unset a collection Node.
*
* @param mixed $offset
*/
public function offsetUnset($offset): void
{
unset($this->collection[$offset]);
}
/**
* Gets a node at the given offset, or null.
*
* @param mixed $offset
*
* @return mixed
*/
public function offsetGet($offset)
{
return $this->collection[$offset] ?? null;
}
/**
* Returns this collection as an array.
*/
public function toArray(): array
{
return $this->collection;
}
/**
* Similar to jQuery "each" method. Calls the callback with each
* Node in this collection.
*/
public function each(callable $callback)
{
foreach ($this->collection as $key => $value) {
$callback($value, $key);
}
}
}
@@ -0,0 +1,244 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use PHPHtmlParser\Dom\Tag;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
/**
* Class HtmlNode.
*
* @property-read string $outerhtml
* @property-read string $innerhtml
* @property-read string $innerText
* @property-read string $text
* @property-read Tag $tag
* @property-read InnerNode $parent
*/
class HtmlNode extends InnerNode
{
/**
* Remembers what the innerHtml was if it was scanned previously.
*
* @var ?string
*/
protected $innerHtml;
/**
* Remembers what the outerHtml was if it was scanned previously.
*
* @var ?string
*/
protected $outerHtml;
/**
* Remembers what the innerText was if it was scanned previously.
*
* @var ?string
*/
protected $innerText;
/**
* Remembers what the text was if it was scanned previously.
*
* @var ?string
*/
protected $text;
/**
* Remembers what the text was when we looked into all our
* children nodes.
*
* @var ?string
*/
protected $textWithChildren;
/**
* Sets up the tag of this node.
*
* @param string|Tag $tag
*/
public function __construct($tag)
{
if (!$tag instanceof Tag) {
$tag = new Tag($tag);
}
$this->tag = $tag;
parent::__construct();
}
/**
* @param bool $htmlSpecialCharsDecode
*/
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
{
parent::setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
$this->tag->setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
}
/**
* Gets the inner html of this node.
*
* @throws ChildNotFoundException
* @throws UnknownChildTypeException
*/
public function innerHtml(): string
{
if (!$this->hasChildren()) {
// no children
return '';
}
if ($this->innerHtml !== null) {
// we already know the result.
return $this->innerHtml;
}
$child = $this->firstChild();
$string = '';
// continue to loop until we are out of children
while ($child !== null) {
if ($child instanceof TextNode) {
$string .= $child->text();
} elseif ($child instanceof HtmlNode) {
$string .= $child->outerHtml();
} else {
throw new UnknownChildTypeException('Unknown child type "' . \get_class($child) . '" found in node');
}
try {
$child = $this->nextChild($child->id());
} catch (ChildNotFoundException $e) {
// no more children
unset($e);
$child = null;
}
}
// remember the results
$this->innerHtml = $string;
return $string;
}
/**
* Gets the inner text of this node.
*
* @throws ChildNotFoundException
* @throws UnknownChildTypeException
*/
public function innerText(): string
{
if (\is_null($this->innerText)) {
$this->innerText = \strip_tags($this->innerHtml());
}
return $this->innerText;
}
/**
* Gets the html of this node, including it's own
* tag.
*
* @throws ChildNotFoundException
* @throws UnknownChildTypeException
*/
public function outerHtml(): string
{
// special handling for root
if ($this->tag->name() == 'root') {
return $this->innerHtml();
}
if ($this->outerHtml !== null) {
// we already know the results.
return $this->outerHtml;
}
$return = $this->tag->makeOpeningTag();
if ($this->tag->isSelfClosing()) {
// ignore any children... there should not be any though
return $return;
}
// get the inner html
$return .= $this->innerHtml();
// add closing tag
$return .= $this->tag->makeClosingTag();
// remember the results
$this->outerHtml = $return;
return $return;
}
/**
* Gets the text of this node (if there is any text). Or get all the text
* in this node, including children.
*/
public function text(bool $lookInChildren = false): string
{
if ($lookInChildren) {
if ($this->textWithChildren !== null) {
// we already know the results.
return $this->textWithChildren;
}
} elseif ($this->text !== null) {
// we already know the results.
return $this->text;
}
// find out if this node has any text children
$text = '';
foreach ($this->children as $child) {
/** @var AbstractNode $node */
$node = $child['node'];
if ($node instanceof TextNode) {
$text .= $child['node']->text;
} elseif (
$lookInChildren &&
$node instanceof HtmlNode
) {
$text .= $node->text($lookInChildren);
}
}
// remember our result
if ($lookInChildren) {
$this->textWithChildren = $text;
} else {
$this->text = $text;
}
return $text;
}
/**
* Call this when something in the node tree has changed. Like a child has been added
* or a parent has been changed.
*/
protected function clear(): void
{
$this->innerHtml = null;
$this->outerHtml = null;
$this->text = null;
$this->textWithChildren = null;
if ($this->parent !== null) {
$this->parent->clear();
}
}
/**
* Returns all children of this html node.
*/
protected function getIteratorArray(): array
{
return $this->getChildren();
}
}
@@ -0,0 +1,442 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use PHPHtmlParser\Dom\Tag;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\LogicalException;
use stringEncode\Encode;
/**
* Inner node of the html tree, might have children.
*
* @property-read string $outerhtml
* @property-read string $innerhtml
* @property-read string $innerText
* @property-read string $text
* @property-read Tag $tag
* @property-read InnerNode $parent
*/
abstract class InnerNode extends ArrayNode
{
/**
* An array of all the children.
*
* @var array
*/
protected $children = [];
/**
* Sets the encoding class to this node and propagates it
* to all its children.
*/
public function propagateEncoding(Encode $encode): void
{
$this->encode = $encode;
$this->tag->setEncoding($encode);
// check children
foreach ($this->children as $child) {
/** @var AbstractNode $node */
$node = $child['node'];
$node->propagateEncoding($encode);
}
}
/**
* Checks if this node has children.
*/
public function hasChildren(): bool
{
return !empty($this->children);
}
/**
* Returns the child by id.
*
* @throws ChildNotFoundException
*/
public function getChild(int $id): AbstractNode
{
if (!isset($this->children[$id])) {
throw new ChildNotFoundException("Child '$id' not found in this node.");
}
return $this->children[$id]['node'];
}
/**
* Returns a new array of child nodes.
*/
public function getChildren(): array
{
$nodes = [];
$childrenIds = [];
try {
$child = $this->firstChild();
do {
$nodes[] = $child;
$childrenIds[] = $child->id;
$child = $this->nextChild($child->id());
if (\in_array($child->id, $childrenIds, true)) {
throw new CircularException('Circular sibling referance found. Child with id ' . $child->id() . ' found twice.');
}
} while (true);
} catch (ChildNotFoundException $e) {
// we are done looking for children
unset($e);
}
return $nodes;
}
/**
* Counts children.
*/
public function countChildren(): int
{
return \count($this->children);
}
/**
* Adds a child node to this node and returns the id of the child for this
* parent.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws LogicalException
*/
public function addChild(AbstractNode $child, int $before = -1): bool
{
$key = null;
// check integrity
if ($this->isAncestor($child->id())) {
throw new CircularException('Can not add child. It is my ancestor.');
}
// check if child is itself
if ($child->id() == $this->id) {
throw new CircularException('Can not set itself as a child.');
}
$next = null;
if ($this->hasChildren()) {
if (isset($this->children[$child->id()])) {
// we already have this child
return false;
}
if ($before >= 0) {
if (!isset($this->children[$before])) {
return false;
}
$key = $this->children[$before]['prev'];
if ($key) {
$this->children[$key]['next'] = $child->id();
}
$this->children[$before]['prev'] = $child->id();
$next = $before;
} else {
$sibling = $this->lastChild();
$key = $sibling->id();
$this->children[$key]['next'] = $child->id();
}
}
$keys = \array_keys($this->children);
$insert = [
'node' => $child,
'next' => $next,
'prev' => $key,
];
$index = $key ? (int) (\array_search($key, $keys, true) + 1) : 0;
\array_splice($keys, $index, 0, (string) $child->id());
$children = \array_values($this->children);
\array_splice($children, $index, 0, [$insert]);
// add the child
$combination = \array_combine($keys, $children);
if ($combination === false) {
// The number of elements for each array isn't equal or if the arrays are empty.
throw new LogicalException('array combine failed during add child method call.');
}
$this->children = $combination;
// tell child I am the new parent
$child->setParent($this);
//clear any cache
$this->clear();
return true;
}
/**
* Insert element before child with provided id.
*
* @throws ChildNotFoundException
* @throws CircularException
*/
public function insertBefore(AbstractNode $child, int $id): bool
{
return $this->addChild($child, $id);
}
/**
* Insert element before after with provided id.
*
* @throws ChildNotFoundException
* @throws CircularException
*/
public function insertAfter(AbstractNode $child, int $id): bool
{
if (!isset($this->children[$id])) {
return false;
}
if (isset($this->children[$id]['next']) && \is_int($this->children[$id]['next'])) {
return $this->addChild($child, (int) $this->children[$id]['next']);
}
// clear cache
$this->clear();
return $this->addChild($child);
}
/**
* Removes the child by id.
*/
public function removeChild(int $id): InnerNode
{
if (!isset($this->children[$id])) {
return $this;
}
// handle moving next and previous assignments.
$next = $this->children[$id]['next'];
$prev = $this->children[$id]['prev'];
if (!\is_null($next)) {
$this->children[$next]['prev'] = $prev;
}
if (!\is_null($prev)) {
$this->children[$prev]['next'] = $next;
}
// remove the child
unset($this->children[$id]);
//clear any cache
$this->clear();
return $this;
}
/**
* Check if has next Child.
*
* @throws ChildNotFoundException
*
* @return mixed
*/
public function hasNextChild(int $id)
{
$child = $this->getChild($id);
return $this->children[$child->id()]['next'];
}
/**
* Attempts to get the next child.
*
* @throws ChildNotFoundException
*
* @uses $this->getChild()
*/
public function nextChild(int $id): AbstractNode
{
$child = $this->getChild($id);
$next = $this->children[$child->id()]['next'];
if (\is_null($next) || !\is_int($next)) {
throw new ChildNotFoundException("Child '$id' next sibling not found in this node.");
}
return $this->getChild($next);
}
/**
* Attempts to get the previous child.
*
* @throws ChildNotFoundException
*
* @uses $this->getChild()
*/
public function previousChild(int $id): AbstractNode
{
$child = $this->getchild($id);
$next = $this->children[$child->id()]['prev'];
if (\is_null($next) || !\is_int($next)) {
throw new ChildNotFoundException("Child '$id' previous not found in this node.");
}
return $this->getChild($next);
}
/**
* Checks if the given node id is a child of the
* current node.
*/
public function isChild(int $id): bool
{
foreach (\array_keys($this->children) as $childId) {
if ($id == $childId) {
return true;
}
}
return false;
}
/**
* Removes the child with id $childId and replace it with the new child
* $newChild.
*
* @throws LogicalException
*/
public function replaceChild(int $childId, AbstractNode $newChild): void
{
$oldChild = $this->children[$childId];
$newChild->prev = (int) $oldChild['prev'];
$newChild->next = (int) $oldChild['next'];
$keys = \array_keys($this->children);
$index = \array_search($childId, $keys, true);
$keys[$index] = $newChild->id();
$combination = \array_combine($keys, $this->children);
if ($combination === false) {
// The number of elements for each array isn't equal or if the arrays are empty.
throw new LogicalException('array combine failed during replace child method call.');
}
$this->children = $combination;
$this->children[$newChild->id()] = [
'prev' => $oldChild['prev'],
'node' => $newChild,
'next' => $oldChild['next'],
];
// change previous child id to new child
if ($oldChild['prev'] && isset($this->children[$newChild->prev])) {
$this->children[$oldChild['prev']]['next'] = $newChild->id();
}
// change next child id to new child
if ($oldChild['next'] && isset($this->children[$newChild->next])) {
$this->children[$oldChild['next']]['prev'] = $newChild->id();
}
// remove old child
unset($this->children[$childId]);
// clean out cache
$this->clear();
}
/**
* Shortcut to return the first child.
*
* @throws ChildNotFoundException
*
* @uses $this->getChild()
*/
public function firstChild(): AbstractNode
{
if (\count($this->children) == 0) {
// no children
throw new ChildNotFoundException('No children found in node.');
}
\reset($this->children);
$key = (int) \key($this->children);
return $this->getChild($key);
}
/**
* Attempts to get the last child.
*
* @throws ChildNotFoundException
*
* @uses $this->getChild()
*/
public function lastChild(): AbstractNode
{
if (\count($this->children) == 0) {
// no children
throw new ChildNotFoundException('No children found in node.');
}
\end($this->children);
$key = \key($this->children);
if (!\is_int($key)) {
throw new LogicalException('Children array contain child with a key that is not an int.');
}
return $this->getChild($key);
}
/**
* Checks if the given node id is a descendant of the
* current node.
*/
public function isDescendant(int $id): bool
{
if ($this->isChild($id)) {
return true;
}
foreach ($this->children as $child) {
/** @var InnerNode $node */
$node = $child['node'];
if ($node instanceof InnerNode
&& $node->hasChildren()
&& $node->isDescendant($id)
) {
return true;
}
}
return false;
}
/**
* Sets the parent node.
*
* @throws ChildNotFoundException
* @throws CircularException
*/
public function setParent(InnerNode $parent): AbstractNode
{
// check integrity
if ($this->isDescendant($parent->id())) {
throw new CircularException('Can not add descendant "' . $parent->id() . '" as my parent.');
}
// clear cache
$this->clear();
return parent::setParent($parent);
}
}
@@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use PHPHtmlParser\Dom\Tag;
/**
* Class LeafNode.
*
* @property-read string $outerhtml
* @property-read string $innerhtml
* @property-read string $innerText
* @property-read string $text
* @property-read Tag $tag
* @property-read InnerNode $parent
*/
abstract class LeafNode extends AbstractNode
{
}
@@ -0,0 +1,155 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom\Node;
use PHPHtmlParser\Dom\Tag;
use PHPHtmlParser\Exceptions\LogicalException;
/**
* Class TextNode.
*
* @property-read string $outerhtml
* @property-read string $innerhtml
* @property-read string $innerText
* @property-read string $text
* @property-read Tag $tag
* @property-read InnerNode $parent
*/
class TextNode extends LeafNode
{
/**
* This is a text node.
*
* @var Tag
*/
protected $tag;
/**
* This is the text in this node.
*
* @var string
*/
protected $text;
/**
* This is the converted version of the text.
*
* @var ?string
*/
protected $convertedText;
/**
* Sets the text for this node.
*
* @param bool $removeDoubleSpace
*/
public function __construct(string $text, $removeDoubleSpace = true)
{
if ($removeDoubleSpace) {
// remove double spaces
$replacedText = \mb_ereg_replace('\s+', ' ', $text);
if ($replacedText === false) {
throw new LogicalException('mb_ereg_replace returns false when attempting to clean white space from "' . $text . '".');
}
$text = $replacedText;
}
// restore line breaks
$text = \str_replace('&#10;', "\n", $text);
$this->text = $text;
$this->tag = new Tag('text');
parent::__construct();
}
/**
* @param bool $htmlSpecialCharsDecode
*/
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
{
parent::setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
$this->tag->setHtmlSpecialCharsDecode($htmlSpecialCharsDecode);
}
/**
* Returns the text of this node.
*/
public function text(): string
{
if ($this->htmlSpecialCharsDecode) {
$text = \htmlspecialchars_decode($this->text);
} else {
$text = $this->text;
}
// convert charset
if (!\is_null($this->encode)) {
if (!\is_null($this->convertedText)) {
// we already know the converted value
return $this->convertedText;
}
$text = $this->encode->convert($text);
// remember the conversion
$this->convertedText = $text;
return $text;
}
return $text;
}
/**
* Sets the text for this node.
*
* @var string
*/
public function setText(string $text): void
{
$this->text = $text;
if (!\is_null($this->encode)) {
$text = $this->encode->convert($text);
// remember the conversion
$this->convertedText = $text;
}
}
/**
* This node has no html, just return the text.
*
* @uses $this->text()
*/
public function innerHtml(): string
{
return $this->text();
}
/**
* This node has no html, just return the text.
*
* @uses $this->text()
*/
public function outerHtml(): string
{
return $this->text();
}
/**
* Checks if the current node is a text node.
*/
public function isTextNode(): bool
{
return true;
}
/**
* Call this when something in the node tree has changed. Like a child has been added
* or a parent has been changed.
*/
protected function clear(): void
{
$this->convertedText = null;
}
}
@@ -0,0 +1,348 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom;
use PHPHtmlParser\Content;
use PHPHtmlParser\Contracts\Dom\ParserInterface;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Dom\Node\HtmlNode;
use PHPHtmlParser\Dom\Node\TextNode;
use PHPHtmlParser\DTO\TagDTO;
use PHPHtmlParser\Enum\StringToken;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\ContentLengthException;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Exceptions\StrictException;
use PHPHtmlParser\Options;
use stringEncode\Encode;
class Parser implements ParserInterface
{
/**
* Attempts to parse the html in content.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws ContentLengthException
* @throws LogicalException
* @throws StrictException
*/
public function parse(Options $options, Content $content, int $size): AbstractNode
{
// add the root node
$root = new HtmlNode('root');
$root->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode());
$activeNode = $root;
while ($activeNode !== null) {
if ($activeNode && $activeNode->tag->name() === 'script'
&& $options->isCleanupInput() !== true
) {
$str = $content->copyUntil('</');
} else {
$str = $content->copyUntil('<');
}
if ($str == '') {
$tagDTO = $this->parseTag($options, $content, $size);
if (!$tagDTO->isStatus()) {
// we are done here
$activeNode = null;
continue;
}
// check if it was a closing tag
if ($tagDTO->isClosing()) {
$foundOpeningTag = true;
$originalNode = $activeNode;
while ($activeNode->getTag()->name() != $tagDTO->getTag()) {
$activeNode = $activeNode->getParent();
if ($activeNode === null) {
// we could not find opening tag
$activeNode = $originalNode;
$foundOpeningTag = false;
break;
}
}
if ($foundOpeningTag) {
$activeNode = $activeNode->getParent();
}
continue;
}
if ($tagDTO->getNode() === null) {
continue;
}
/** @var AbstractNode $node */
$node = $tagDTO->getNode();
$activeNode->addChild($node);
// check if node is self closing
if (!$node->getTag()->isSelfClosing()) {
$activeNode = $node;
}
} elseif ($options->isWhitespaceTextNode() ||
\trim($str) != ''
) {
// we found text we care about
$textNode = new TextNode($str, $options->isRemoveDoubleSpace());
$textNode->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode());
$activeNode->addChild($textNode);
}
}
return $root;
}
/**
* Attempts to detect the charset that the html was sent in.
*
* @throws ChildNotFoundException
*/
public function detectCharset(Options $options, string $defaultCharset, AbstractNode $root): bool
{
// set the default
$encode = new Encode();
$encode->from($defaultCharset);
$encode->to($defaultCharset);
$enforceEncoding = $options->getEnforceEncoding();
if ($enforceEncoding !== null) {
// they want to enforce the given encoding
$encode->from($enforceEncoding);
$encode->to($enforceEncoding);
return false;
}
/** @var AbstractNode $meta */
$meta = $root->find('meta[http-equiv=Content-Type]', 0);
if ($meta == null) {
if (!$this->detectHTML5Charset($encode, $root)) {
// could not find meta tag
$root->propagateEncoding($encode);
return false;
}
return true;
}
$content = $meta->getAttribute('content');
if (\is_null($content)) {
// could not find content
$root->propagateEncoding($encode);
return false;
}
$matches = [];
if (\preg_match('/charset=([^;]+)/', $content, $matches)) {
$encode->from(\trim($matches[1]));
$root->propagateEncoding($encode);
return true;
}
// no charset found
$root->propagateEncoding($encode);
return false;
}
/**
* Attempt to parse a tag out of the content.
*
* @throws StrictException
* @throws ContentLengthException
* @throws LogicalException
* @throws StrictException
*/
private function parseTag(Options $options, Content $content, int $size): TagDTO
{
if ($content->char() != '<') {
// we are not at the beginning of a tag
return TagDTO::makeFromPrimitives();
}
// check if this is a closing tag
try {
$content->fastForward(1);
} catch (ContentLengthException $exception) {
// we are at the end of the file
return TagDTO::makeFromPrimitives();
}
if ($content->char() == '/') {
return $this->makeEndTag($content, $options);
}
if ($content->char() == '?') {
// special setting tag
$tag = $content->fastForward(1)
->copyByToken(StringToken::SLASH(), true);
$tag = (new Tag($tag))
->setOpening('<?')
->setClosing(' ?>')
->selfClosing();
} elseif($content->string(3) == '!--') {
// comment tag
$tag = $content->fastForward(3)
->copyByToken(StringToken::CLOSECOMMENT(), true);
$tag = (new Tag($tag))
->setOpening('<!--')
->setClosing('-->')
->selfClosing();
} else {
$tag = \strtolower($content->copyByToken(StringToken::SLASH(), true));
if (\trim($tag) == '') {
// no tag found, invalid < found
return TagDTO::makeFromPrimitives();
}
}
$node = new HtmlNode($tag);
$node->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode());
$this->setUpAttributes($content, $size, $node, $options, $tag);
$content->skipByToken(StringToken::BLANK());
if ($content->char() == '/') {
// self closing tag
$node->getTag()->selfClosing();
$content->fastForward(1);
} elseif (\in_array($node->getTag()->name(), $options->getSelfClosing(), true)) {
// Should be a self closing tag, check if we are strict
if ($options->isStrict()) {
$character = $content->getPosition();
throw new StrictException("Tag '" . $node->getTag()->name() . "' is not self closing! (character #$character)");
}
// We force self closing on this tag.
$node->getTag()->selfClosing();
// Should this tag use a trailing slash?
if (\in_array($node->getTag()->name(), $options->getNoSlash(), true)) {
$node->getTag()->noTrailingSlash();
}
}
if ($content->canFastForward(1)) {
$content->fastForward(1);
}
return TagDTO::makeFromPrimitives(true, false, $node);
}
/**
* @throws ChildNotFoundException
*/
private function detectHTML5Charset(Encode $encode, AbstractNode $root): bool
{
/** @var AbstractNode|null $meta */
$meta = $root->find('meta[charset]', 0);
if ($meta == null) {
return false;
}
$encode->from(\trim($meta->getAttribute('charset')));
$root->propagateEncoding($encode);
return true;
}
/**
* @throws ContentLengthException
* @throws LogicalException
*/
private function makeEndTag(Content $content, Options $options): TagDTO
{
$tag = $content->fastForward(1)
->copyByToken(StringToken::SLASH(), true);
// move to end of tag
$content->copyUntil('>');
$content->fastForward(1);
// check if this closing tag counts
$tag = \strtolower($tag);
if (\in_array($tag, $options->getSelfClosing(), true)) {
return TagDTO::makeFromPrimitives(true);
}
return TagDTO::makeFromPrimitives(true, true, null, \strtolower($tag));
}
/**
* @param string|Tag $tag
*
* @throws ContentLengthException
* @throws LogicalException
* @throws StrictException
*/
private function setUpAttributes(Content $content, int $size, HtmlNode $node, Options $options, $tag): void
{
while (
$content->char() != '>' &&
$content->char() != '/'
) {
$space = $content->skipByToken(StringToken::BLANK(), true);
if (empty($space)) {
try {
$content->fastForward(1);
} catch (ContentLengthException $exception) {
// reached the end of the content
break;
}
continue;
}
$name = $content->copyByToken(StringToken::EQUAL(), true);
if ($name == '/') {
break;
}
if (empty($name)) {
$content->skipByToken(StringToken::BLANK());
continue;
}
$content->skipByToken(StringToken::BLANK());
if ($content->char() == '=') {
$content->fastForward(1)
->skipByToken(StringToken::BLANK());
switch ($content->char()) {
case '"':
$content->fastForward(1);
$string = $content->copyUntil('"', true);
do {
$moreString = $content->copyUntilUnless('"', '=>');
$string .= $moreString;
} while (\strlen($moreString) > 0 && $content->getPosition() < $size);
$content->fastForward(1);
$node->getTag()->setAttribute($name, $string);
break;
case "'":
$content->fastForward(1);
$string = $content->copyUntil("'", true);
do {
$moreString = $content->copyUntilUnless("'", '=>');
$string .= $moreString;
} while (\strlen($moreString) > 0 && $content->getPosition() < $size);
$content->fastForward(1);
$node->getTag()->setAttribute($name, $string, false);
break;
default:
$node->getTag()->setAttribute($name, $content->copyByToken(StringToken::ATTR(), true));
break;
}
} else {
// no value attribute
if ($options->isStrict()) {
// can't have this in strict html
$character = $content->getPosition();
throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
}
$node->getTag()->setAttribute($name, null);
if ($content->char() != '>') {
$content->rewind(1);
}
}
}
}
}
@@ -0,0 +1,100 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Dom\Node\HtmlNode;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\NotLoadedException;
trait RootAccessTrait
{
/**
* Contains the root node of this dom tree.
*
* @var HtmlNode
*/
public $root;
/**
* A simple wrapper around the root node.
*
* @param string $name
*
* @throws NotLoadedException
*
* @return mixed
*/
public function __get($name)
{
$this->isLoaded();
return $this->root->$name;
}
/**
* Simple wrapper function that returns the first child.
*
* @throws ChildNotFoundException
* @throws NotLoadedException
*/
public function firstChild(): AbstractNode
{
$this->isLoaded();
return $this->root->firstChild();
}
/**
* Simple wrapper function that returns the last child.
*
* @throws ChildNotFoundException
* @throws NotLoadedException
*/
public function lastChild(): AbstractNode
{
$this->isLoaded();
return $this->root->lastChild();
}
/**
* Simple wrapper function that returns count of child elements.
*
* @throws NotLoadedException
*/
public function countChildren(): int
{
$this->isLoaded();
return $this->root->countChildren();
}
/**
* Get array of children.
*
* @throws NotLoadedException
*/
public function getChildren(): array
{
$this->isLoaded();
return $this->root->getChildren();
}
/**
* Check if node have children nodes.
*
* @throws NotLoadedException
*/
public function hasChildren(): bool
{
$this->isLoaded();
return $this->root->hasChildren();
}
abstract public function isLoaded(): void;
}
@@ -0,0 +1,365 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Dom;
use PHPHtmlParser\DTO\Tag\AttributeDTO;
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
use stringEncode\Encode;
/**
* Class Tag.
*/
class Tag
{
/**
* The name of the tag.
*
* @var string
*/
protected $name;
/**
* The attributes of the tag.
*
* @var AttributeDTO[]
*/
protected $attr = [];
/**
* Is this tag self closing.
*
* @var bool
*/
protected $selfClosing = false;
/**
* If self-closing, will this use a trailing slash. />.
*
* @var bool
*/
protected $trailingSlash = true;
/**
* Tag noise.
*/
protected $noise = '';
/**
* The encoding class to... encode the tags.
*
* @var Encode|null
*/
protected $encode;
/**
* @var bool
*/
private $HtmlSpecialCharsDecode = false;
/**
* What the opening of this tag will be.
*
* @var string
*/
private $opening = '<';
/**
* What the closing tag for self-closing elements should be.
*
* @var string
*/
private $closing = ' />';
/**
* Sets up the tag with a name.
*
* @param $name
*/
public function __construct(string $name)
{
$this->name = $name;
}
/**
* Returns the name of this tag.
*/
public function name(): string
{
return $this->name;
}
/**
* Sets the tag to be self closing.
*/
public function selfClosing(): Tag
{
$this->selfClosing = true;
return clone $this;
}
public function setOpening(string $opening): Tag
{
$this->opening = $opening;
return clone $this;
}
public function setClosing(string $closing): Tag
{
$this->closing = $closing;
return clone $this;
}
/**
* Sets the tag to not use a trailing slash.
*/
public function noTrailingSlash(): Tag
{
$this->trailingSlash = false;
return clone $this;
}
/**
* Checks if the tag is self closing.
*/
public function isSelfClosing(): bool
{
return $this->selfClosing;
}
/**
* Sets the encoding type to be used.
*/
public function setEncoding(Encode $encode): void
{
$this->encode = $encode;
}
/**
* @param bool $htmlSpecialCharsDecode
*/
public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void
{
$this->HtmlSpecialCharsDecode = $htmlSpecialCharsDecode;
}
/**
* Sets the noise for this tag (if any).
*/
public function noise(string $noise): Tag
{
$this->noise = $noise;
return clone $this;
}
/**
* Set an attribute for this tag.
*/
public function setAttribute(string $key, ?string $attributeValue, bool $doubleQuote = true): Tag
{
$attributeDTO = AttributeDTO::makeFromPrimitives(
$attributeValue,
$doubleQuote
);
if ($this->HtmlSpecialCharsDecode) {
$attributeDTO->htmlspecialcharsDecode();
}
$this->attr[\strtolower($key)] = $attributeDTO;
return clone $this;
}
/**
* Set inline style attribute value.
*
* @param mixed $attr_key
* @param mixed $attr_value
*/
public function setStyleAttributeValue($attr_key, $attr_value): void
{
$style_array = $this->getStyleAttributeArray();
$style_array[$attr_key] = $attr_value;
$style_string = '';
foreach ($style_array as $key => $value) {
$style_string .= $key . ':' . $value . ';';
}
$this->setAttribute('style', $style_string);
}
/**
* Get style attribute in array.
*/
public function getStyleAttributeArray(): array
{
try {
$value = $this->getAttribute('style')->getValue();
if (\is_null($value)) {
return [];
}
$value = \explode(';', \substr(\trim($value), 0, -1));
$result = [];
foreach ($value as $attr) {
$attr = \explode(':', $attr);
$result[$attr[0]] = $attr[1];
}
return $result;
} catch (AttributeNotFoundException $e) {
unset($e);
return [];
}
}
/**
* Removes an attribute from this tag.
*
* @param mixed $key
*
* @return void
*/
public function removeAttribute($key)
{
$key = \strtolower($key);
unset($this->attr[$key]);
}
/**
* Removes all attributes on this tag.
*
* @return void
*/
public function removeAllAttributes()
{
$this->attr = [];
}
/**
* Sets the attributes for this tag.
*
* @return $this
*/
public function setAttributes(array $attr)
{
foreach ($attr as $key => $info) {
if (\is_array($info)) {
$this->setAttribute($key, $info['value'], $info['doubleQuote']);
} else {
$this->setAttribute($key, $info);
}
}
return $this;
}
/**
* Returns all attributes of this tag.
*
* @throws \stringEncode\Exception
*
* @return AttributeDTO[]
*/
public function getAttributes(): array
{
$return = [];
foreach (\array_keys($this->attr) as $attr) {
try {
$return[$attr] = $this->getAttribute($attr);
} catch (AttributeNotFoundException $e) {
// attribute that was in the array was not found in the array....
unset($e);
}
}
return $return;
}
/**
* Returns an attribute by the key.
*
* @throws AttributeNotFoundException
* @throws \stringEncode\Exception
*/
public function getAttribute(string $key): AttributeDTO
{
$key = \strtolower($key);
if (!isset($this->attr[$key])) {
throw new AttributeNotFoundException('Attribute with key "' . $key . '" not found.');
}
$attributeDTO = $this->attr[$key];
if (!\is_null($this->encode)) {
// convert charset
$attributeDTO->encodeValue($this->encode);
}
return $attributeDTO;
}
/**
* Returns TRUE if node has attribute.
*
* @return bool
*/
public function hasAttribute(string $key)
{
return isset($this->attr[$key]);
}
/**
* Generates the opening tag for this object.
*
* @return string
*/
public function makeOpeningTag()
{
$return = $this->opening . $this->name;
// add the attributes
foreach (\array_keys($this->attr) as $key) {
try {
$attributeDTO = $this->getAttribute($key);
} catch (AttributeNotFoundException $e) {
// attribute that was in the array not found in the array... let's continue.
continue;
} catch (\TypeError $e) {
$val = null;
}
$val = $attributeDTO->getValue();
if (\is_null($val)) {
$return .= ' ' . $key;
} elseif ($attributeDTO->isDoubleQuote()) {
$return .= ' ' . $key . '="' . $val . '"';
} else {
$return .= ' ' . $key . '=\'' . $val . '\'';
}
}
if ($this->selfClosing && $this->trailingSlash) {
return $return . $this->closing;
}
return $return . '>';
}
/**
* Generates the closing tag for this object.
*
* @return string
*/
public function makeClosingTag()
{
if ($this->selfClosing) {
return '';
}
return '</' . $this->name . '>';
}
}
@@ -0,0 +1,23 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Enum;
use MyCLabs\Enum\Enum;
/**
* @method static StringToken BLANK()
* @method static StringToken EQUAL()
* @method static StringToken SLASH()
* @method static StringToken ATTR()
* @method static StringToken CLOSECOMMENT()
*/
class StringToken extends Enum
{
private const BLANK = " \t\r\n";
private const EQUAL = ' =/>';
private const SLASH = " />\r\n\t";
private const ATTR = ' >';
private const CLOSECOMMENT = '-->';
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class ChildNotFoundException.
*/
final class ChildNotFoundException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class CircularException.
*/
final class CircularException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class EmptyCollectionException.
*/
final class ContentLengthException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class CurlException.
*/
class CurlException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class EmptyCollectionException.
*/
final class EmptyCollectionException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class EmptyCollectionException.
*/
final class LogicalException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class NotLoadedException.
*/
final class NotLoadedException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class ParentNotFoundException.
*/
final class ParentNotFoundException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class StrictException.
*/
final class StrictException extends Exception
{
}
@@ -0,0 +1,12 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions\Tag;
/**
* Class AttributeNotFoundException.
*/
class AttributeNotFoundException extends \Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class UnknownChildTypeException.
*/
final class UnknownChildTypeException extends Exception
{
}
@@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Exceptions;
use Exception;
/**
* Class UnknownOptionException.
*/
final class UnknownOptionException extends Exception
{
}
@@ -0,0 +1,64 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Dom\Node\InnerNode;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\ParentNotFoundException;
class Finder
{
/**
* @var int
*/
private $id;
/**
* Finder constructor.
*
* @param $id
*/
public function __construct($id)
{
$this->id = $id;
}
/**
* Find node in tree by id.
*
* @throws ChildNotFoundException
* @throws ParentNotFoundException
*
* @return bool|AbstractNode
*/
public function find(AbstractNode $node)
{
if (!$node->id() && $node instanceof InnerNode) {
return $this->find($node->firstChild());
}
if ($node->id() == $this->id) {
return $node;
}
if ($node->hasNextSibling()) {
$nextSibling = $node->nextSibling();
if ($nextSibling->id() == $this->id) {
return $nextSibling;
}
if ($nextSibling->id() > $this->id && $node instanceof InnerNode) {
return $this->find($node->firstChild());
}
if ($nextSibling->id() < $this->id) {
return $this->find($nextSibling);
}
} elseif (!$node->isTextNode() && $node instanceof InnerNode) {
return $this->find($node->firstChild());
}
return false;
}
}
@@ -0,0 +1,367 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser;
class Options
{
/**
* The whitespaceTextNode, by default true, option tells the parser to save textnodes even if the content of the
* node is empty (only whitespace). Setting it to false will ignore all whitespace only text node found in the document.
*
* @var bool
*/
private $whitespaceTextNode = true;
/**
* Strict, by default false, will throw a StrictException if it finds that the html is not strictly compliant
* (all tags must have a closing tag, no attribute with out a value, etc.).
*
* @var bool
*/
private $strict = false;
/**
* The enforceEncoding, by default null, option will enforce an character set to be used for reading the content
* and returning the content in that encoding. Setting it to null will trigger an attempt to figure out
* the encoding from within the content of the string given instead.
*
* @var ?string
*/
private $enforceEncoding;
/**
* Set this to false to skip the entire clean up phase of the parser. Defaults to true.
*
* @var bool
*/
private $cleanupInput = true;
/**
* Set this to false to skip removing the script tags from the document body. This might have adverse effects.
* Defaults to true.
*
* NOTE: Ignored if cleanupInit is true.
*
* @var bool
*/
private $removeScripts = true;
/**
* Set this to false to skip removing of style tags from the document body. This might have adverse effects. Defaults to true.
*
* NOTE: Ignored if cleanupInit is true.
*
* @var bool
*/
private $removeStyles = true;
/**
* Preserves Line Breaks if set to true. If set to false line breaks are cleaned up
* as part of the input clean up process. Defaults to false.
*
* NOTE: Ignored if cleanupInit is true.
*
* @var bool
*/
private $preserveLineBreaks = false;
/**
* Set this to false if you want to preserve whitespace inside of text nodes. It is set to true by default.
*
* @var bool
*/
private $removeDoubleSpace = true;
/**
* Set this to false if you want to preserve smarty script found in the html content. It is set to true by default.
*
* @var bool
*/
private $removeSmartyScripts = true;
/**
* By default this is set to false. Setting this to true will apply the php function htmlspecialchars_decode too all attribute values and text nodes.
*
* @var bool
*/
private $htmlSpecialCharsDecode = false;
/**
* A list of tags which will always be self closing.
*
* @var string[]
*/
private $selfClosing = [
'area',
'base',
'basefont',
'br',
'col',
'embed',
'hr',
'img',
'input',
'keygen',
'link',
'meta',
'param',
'source',
'spacer',
'track',
'wbr',
];
/**
* A list of tags where there should be no /> at the end (html5 style).
*
* @var string[]
*/
private $noSlash = [];
public function isWhitespaceTextNode(): bool
{
return $this->whitespaceTextNode;
}
public function setWhitespaceTextNode(bool $whitespaceTextNode): Options
{
$this->whitespaceTextNode = $whitespaceTextNode;
return clone $this;
}
public function isStrict(): bool
{
return $this->strict;
}
public function setStrict(bool $strict): Options
{
$this->strict = $strict;
return clone $this;
}
public function getEnforceEncoding(): ?string
{
return $this->enforceEncoding;
}
public function setEnforceEncoding(?string $enforceEncoding): Options
{
$this->enforceEncoding = $enforceEncoding;
return clone $this;
}
public function isCleanupInput(): bool
{
return $this->cleanupInput;
}
public function setCleanupInput(bool $cleanupInput): Options
{
$this->cleanupInput = $cleanupInput;
return clone $this;
}
public function isRemoveScripts(): bool
{
return $this->removeScripts;
}
public function setRemoveScripts(bool $removeScripts): Options
{
$this->removeScripts = $removeScripts;
return clone $this;
}
public function isRemoveStyles(): bool
{
return $this->removeStyles;
}
public function setRemoveStyles(bool $removeStyles): Options
{
$this->removeStyles = $removeStyles;
return clone $this;
}
public function isPreserveLineBreaks(): bool
{
return $this->preserveLineBreaks;
}
public function setPreserveLineBreaks(bool $preserveLineBreaks): Options
{
$this->preserveLineBreaks = $preserveLineBreaks;
return clone $this;
}
public function isRemoveDoubleSpace(): bool
{
return $this->removeDoubleSpace;
}
public function setRemoveDoubleSpace(bool $removeDoubleSpace): Options
{
$this->removeDoubleSpace = $removeDoubleSpace;
return clone $this;
}
public function isRemoveSmartyScripts(): bool
{
return $this->removeSmartyScripts;
}
public function setRemoveSmartyScripts(bool $removeSmartyScripts): Options
{
$this->removeSmartyScripts = $removeSmartyScripts;
return clone $this;
}
public function isHtmlSpecialCharsDecode(): bool
{
return $this->htmlSpecialCharsDecode;
}
public function setHtmlSpecialCharsDecode(bool $htmlSpecialCharsDecode): Options
{
$this->htmlSpecialCharsDecode = $htmlSpecialCharsDecode;
return clone $this;
}
/**
* @return string[]
*/
public function getSelfClosing(): array
{
return $this->selfClosing;
}
public function setSelfClosing(array $selfClosing): Options
{
$this->selfClosing = $selfClosing;
return clone $this;
}
/**
* Adds the tag to the list of tags that will always be self closing.
*/
public function addSelfClosingTag(string $tag): Options
{
$this->selfClosing[] = $tag;
return clone $this;
}
/**
* Adds the tags to the list of tags that will always be self closing.
*
* @param string[] $tags
*/
public function addSelfClosingTags(array $tags): Options
{
foreach ($tags as $tag) {
$this->selfClosing[] = $tag;
}
return clone $this;
}
/**
* Removes the tag from the list of tags that will always be self closing.
*/
public function removeSelfClosingTag(string $tag): Options
{
$tags = [$tag];
$this->selfClosing = \array_diff($this->selfClosing, $tags);
return clone $this;
}
/**
* Sets the list of self closing tags to empty.
*/
public function clearSelfClosingTags(): Options
{
$this->selfClosing = [];
return clone $this;
}
/**
* @return string[]
*/
public function getNoSlash(): array
{
return $this->noSlash;
}
/**
* @param string[] $noSlash
*/
public function setNoSlash(array $noSlash): Options
{
$this->noSlash = $noSlash;
return clone $this;
}
/**
* Adds a tag to the list of self closing tags that should not have a trailing slash.
*/
public function addNoSlashTag(string $tag): Options
{
$this->noSlash[] = $tag;
return clone $this;
}
/**
* Removes a tag from the list of no-slash tags.
*/
public function removeNoSlashTag(string $tag): Options
{
$tags = [$tag];
$this->noSlash = \array_diff($this->noSlash, $tags);
return clone $this;
}
/**
* Empties the list of no-slash tags.
*/
public function clearNoSlashTags(): Options
{
$this->noSlash = [];
return clone $this;
}
public function setFromOptions(Options $options): Options
{
return $this->setCleanupInput($options->isCleanupInput())
->setEnforceEncoding($options->getEnforceEncoding())
->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode())
->setPreserveLineBreaks($options->isPreserveLineBreaks())
->setRemoveDoubleSpace($options->isRemoveDoubleSpace())
->setRemoveScripts($options->isRemoveScripts())
->setRemoveSmartyScripts($options->isRemoveSmartyScripts())
->setRemoveStyles($options->isRemoveStyles())
->setStrict($options->isStrict())
->setWhitespaceTextNode($options->isWhitespaceTextNode())
->setSelfClosing($options->getSelfClosing())
->setNoSlash($options->getNoSlash());
}
}
@@ -0,0 +1,116 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Selector;
use PHPHtmlParser\Contracts\Selector\ParserInterface;
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
use PHPHtmlParser\DTO\Selector\ParsedSelectorDTO;
use PHPHtmlParser\DTO\Selector\RuleDTO;
/**
* This is the default parser for the selector.
*/
class Parser implements ParserInterface
{
/**
* Pattern of CSS selectors, modified from 'mootools'.
*
* @var string
*/
private $pattern = "/([\w\-:\*>]*)(?:\#([\w\-]+)|\.([\w\.\-]+))?(?:\[@?(!?[\w\-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
/**
* Parses the selector string.
*/
public function parseSelectorString(string $selector): ParsedSelectorCollectionDTO
{
$selectors = [];
$matches = [];
$rules = [];
\preg_match_all($this->pattern, \trim($selector) . ' ', $matches, PREG_SET_ORDER);
// skip tbody
foreach ($matches as $match) {
// default values
$tag = \strtolower(\trim($match[1]));
$operator = '=';
$key = null;
$value = null;
$noKey = false;
$alterNext = false;
// check for elements that alter the behavior of the next element
if ($tag == '>') {
$alterNext = true;
}
// check for id selector
if (!empty($match[2])) {
$key = 'id';
$value = $match[2];
}
// check for class selector
if (!empty($match[3])) {
$key = 'class';
$value = \explode('.', $match[3]);
}
// and final attribute selector
if (!empty($match[4])) {
$key = \strtolower($match[4]);
}
if (!empty($match[5])) {
$operator = $match[5];
}
if (!empty($match[6])) {
$value = $match[6];
if (\strpos($value, '][') !== false) {
// we have multiple type selectors
$keys = [];
$keys[] = $key;
$key = $keys;
$parts = \explode('][', $value);
$value = [];
foreach ($parts as $part) {
if (\strpos($part, '=') !== false) {
list($first, $second) = \explode('=', $part);
$key[] = $first;
$value[] = $second;
} else {
$value[] = $part;
}
}
}
}
// check for elements that do not have a specified attribute
if (\is_string($key) && isset($key[0]) && $key[0] == '!') {
$key = \substr($key, 1);
$noKey = true;
}
$rules[] = RuleDTO::makeFromPrimitives(
$tag,
$operator,
$key,
$value,
$noKey,
$alterNext
);
if (isset($match[7]) && \is_string($match[7]) && \trim($match[7]) == ',') {
$selectors[] = ParsedSelectorDTO::makeFromRules($rules);
$rules = [];
}
}
// save last results
if (\count($rules) > 0) {
$selectors[] = ParsedSelectorDTO::makeFromRules($rules);
}
return ParsedSelectorCollectionDTO::makeCollection($selectors);
}
}
@@ -0,0 +1,316 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Selector;
use PHPHtmlParser\Contracts\Selector\SeekerInterface;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Dom\Node\InnerNode;
use PHPHtmlParser\Dom\Node\LeafNode;
use PHPHtmlParser\DTO\Selector\RuleDTO;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
class Seeker implements SeekerInterface
{
/**
* Attempts to find all children that match the rule
* given.
*
* @var InnerNode[]
*
* @throws ChildNotFoundException
*/
public function seek(array $nodes, RuleDTO $rule, array $options): array
{
// XPath index
if ($rule->getTag() !== null && \is_numeric($rule->getKey())) {
$count = 0;
foreach ($nodes as $node) {
if ($rule->getTag() == '*'
|| $rule->getTag() == $node->getTag()
->name()
) {
++$count;
if ($count == $rule->getKey()) {
// found the node we wanted
return [$node];
}
}
}
return [];
}
$options = $this->flattenOptions($options);
$return = [];
foreach ($nodes as $node) {
// check if we are a leaf
if ($node instanceof LeafNode || !$node->hasChildren()
) {
continue;
}
$children = [];
$child = $node->firstChild();
while (!\is_null($child)) {
// wild card, grab all
if ($rule->getTag() == '*' && \is_null($rule->getKey())) {
$return[] = $child;
$child = $this->getNextChild($node, $child);
continue;
}
$pass = $this->checkTag($rule, $child);
if ($pass && $rule->getKey() !== null) {
$pass = $this->checkKey($rule, $child);
}
if ($pass &&
$rule->getKey() !== null &&
$rule->getValue() !== null &&
$rule->getValue() != '*'
) {
$pass = $this->checkComparison($rule, $child);
}
if ($pass) {
// it passed all checks
$return[] = $child;
}
// this child failed to be matched
if ($child instanceof InnerNode && $child->hasChildren()
) {
if (!isset($options['checkGrandChildren'])
|| $options['checkGrandChildren']
) {
// we have a child that failed but are not leaves.
$matches = $this->seek([$child], $rule, $options);
foreach ($matches as $match) {
$return[] = $match;
}
}
}
$child = $this->getNextChild($node, $child);
}
if ((!isset($options['checkGrandChildren'])
|| $options['checkGrandChildren'])
&& \count($children) > 0
) {
// we have children that failed but are not leaves.
$matches = $this->seek($children, $rule, $options);
foreach ($matches as $match) {
$return[] = $match;
}
}
}
return $return;
}
/**
* Checks comparison condition from rules against node.
*/
private function checkComparison(RuleDTO $rule, AbstractNode $node): bool
{
if ($rule->getKey() == 'plaintext') {
// plaintext search
$nodeValue = $node->text();
$result = $this->checkNodeValue($nodeValue, $rule, $node);
} else {
// normal search
if (!\is_array($rule->getKey())) {
$nodeValue = $node->getAttribute($rule->getKey());
$result = $this->checkNodeValue($nodeValue, $rule, $node);
} else {
$result = true;
foreach ($rule->getKey() as $index => $key) {
$nodeValue = $node->getAttribute($key);
$result = $result &&
$this->checkNodeValue($nodeValue, $rule, $node, $index);
}
}
}
return $result;
}
/**
* Flattens the option array.
*
* @return array
*/
private function flattenOptions(array $optionsArray)
{
$options = [];
foreach ($optionsArray as $optionArray) {
foreach ($optionArray as $key => $option) {
$options[$key] = $option;
}
}
return $options;
}
/**
* Returns the next child or null if no more children.
*
* @return AbstractNode|null
*/
private function getNextChild(
AbstractNode $node,
AbstractNode $currentChild
) {
try {
$child = null;
if ($node instanceof InnerNode) {
// get next child
$child = $node->nextChild($currentChild->id());
}
} catch (ChildNotFoundException $e) {
// no more children
unset($e);
$child = null;
}
return $child;
}
/**
* Checks tag condition from rules against node.
*/
private function checkTag(RuleDTO $rule, AbstractNode $node): bool
{
if (!empty($rule->getTag()) && $rule->getTag() != $node->getTag()->name()
&& $rule->getTag() != '*'
) {
return false;
}
return true;
}
/**
* Checks key condition from rules against node.
*/
private function checkKey(RuleDTO $rule, AbstractNode $node): bool
{
if (!\is_array($rule->getKey())) {
if ($rule->isNoKey()) {
if ($node->getAttribute($rule->getKey()) !== null) {
return false;
}
} else {
if ($rule->getKey() != 'plaintext'
&& !$node->hasAttribute($rule->getKey())
) {
return false;
}
}
} else {
if ($rule->isNoKey()) {
foreach ($rule->getKey() as $key) {
if (!\is_null($node->getAttribute($key))) {
return false;
}
}
} else {
foreach ($rule->getKey() as $key) {
if ($key != 'plaintext'
&& !$node->hasAttribute($key)
) {
return false;
}
}
}
}
return true;
}
private function checkNodeValue(
?string $nodeValue,
RuleDTO $rule,
AbstractNode $node,
?int $index = null
): bool {
$check = false;
if (
$rule->getValue() !== null &&
\is_string($rule->getValue()) &&
$nodeValue !== null
) {
$check = $this->match($rule->getOperator(), $rule->getValue(), $nodeValue);
}
// handle multiple classes
$key = $rule->getKey();
if (
!$check &&
$key == 'class' &&
\is_array($rule->getValue())
) {
$nodeClasses = \explode(' ', $node->getAttribute('class') ?? '');
foreach ($rule->getValue() as $value) {
foreach ($nodeClasses as $class) {
if (
!empty($class) &&
\is_string($rule->getOperator())
) {
$check = $this->match($rule->getOperator(), $value, $class);
}
if ($check) {
break;
}
}
if (!$check) {
break;
}
}
} elseif (
!$check &&
\is_array($key) &&
!\is_null($nodeValue) &&
\is_string($rule->getOperator()) &&
\is_string($rule->getValue()[$index])
) {
$check = $this->match($rule->getOperator(), $rule->getValue()[$index], $nodeValue);
}
return $check;
}
/**
* Attempts to match the given arguments with the given operator.
*/
private function match(
string $operator,
string $pattern,
string $value
): bool {
$value = \strtolower($value);
$pattern = \strtolower($pattern);
switch ($operator) {
case '=':
return $value === $pattern;
case '!=':
return $value !== $pattern;
case '^=':
return \preg_match('/^' . \preg_quote($pattern, '/') . '/',
$value) == 1;
case '$=':
return \preg_match('/' . \preg_quote($pattern, '/') . '$/',
$value) == 1;
case '*=':
if ($pattern[0] == '/') {
return \preg_match($pattern, $value) == 1;
}
return \preg_match('/' . $pattern . '/i', $value) == 1;
default:
return false;
}
}
}
@@ -0,0 +1,105 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser\Selector;
use PHPHtmlParser\Contracts\Selector\ParserInterface;
use PHPHtmlParser\Contracts\Selector\SeekerInterface;
use PHPHtmlParser\Contracts\Selector\SelectorInterface;
use PHPHtmlParser\Discovery\SeekerDiscovery;
use PHPHtmlParser\Discovery\SelectorParserDiscovery;
use PHPHtmlParser\Dom\Node\AbstractNode;
use PHPHtmlParser\Dom\Node\Collection;
use PHPHtmlParser\DTO\Selector\ParsedSelectorCollectionDTO;
use PHPHtmlParser\DTO\Selector\RuleDTO;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
/**
* Class Selector.
*/
class Selector implements SelectorInterface
{
/**
* @var ParsedSelectorCollectionDTO
*/
private $ParsedSelectorCollectionDTO;
/**
* @var SeekerInterface
*/
private $seeker;
/**
* Constructs with the selector string.
*/
public function __construct(string $selector, ?ParserInterface $parser = null, ?SeekerInterface $seeker = null)
{
if ($parser == null) {
$parser = SelectorParserDiscovery::find();
}
if ($seeker == null) {
$seeker = SeekerDiscovery::find();
}
$this->ParsedSelectorCollectionDTO = $parser->parseSelectorString($selector);
$this->seeker = $seeker;
}
/**
* Returns the selectors that where found in __construct.
*/
public function getParsedSelectorCollectionDTO(): ParsedSelectorCollectionDTO
{
return $this->ParsedSelectorCollectionDTO;
}
/**
* Attempts to find the selectors starting from the given
* node object.
*
* @throws ChildNotFoundException
*/
public function find(AbstractNode $node): Collection
{
$results = new Collection();
foreach ($this->ParsedSelectorCollectionDTO->getParsedSelectorDTO() as $selector) {
$nodes = [$node];
if (\count($selector->getRules()) == 0) {
continue;
}
$options = [];
foreach ($selector->getRules() as $rule) {
if ($rule->isAlterNext()) {
$options[] = $this->alterNext($rule);
continue;
}
$nodes = $this->seeker->seek($nodes, $rule, $options);
// clear the options
$options = [];
}
// this is the final set of nodes
foreach ($nodes as $result) {
$results[] = $result;
}
}
return $results;
}
/**
* Attempts to figure out what the alteration will be for
* the next element.
*/
private function alterNext(RuleDTO $rule): array
{
$options = [];
if ($rule->getTag() == '>') {
$options['checkGrandChildren'] = false;
}
return $options;
}
}
@@ -0,0 +1,114 @@
<?php
declare(strict_types=1);
namespace PHPHtmlParser;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Client;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\NotLoadedException;
use PHPHtmlParser\Exceptions\StrictException;
use Psr\Http\Client\ClientInterface;
use Psr\Http\Message\RequestInterface;
/**
* Class StaticDom.
*/
final class StaticDom
{
private static $dom = null;
/**
* Attempts to call the given method on the most recent created dom
* from bellow.
*
* @throws NotLoadedException
*
* @return mixed
*/
public static function __callStatic(string $method, array $arguments)
{
if (self::$dom instanceof Dom) {
return \call_user_func_array([self::$dom, $method], $arguments);
}
throw new NotLoadedException('The dom is not loaded. Can not call a dom method.');
}
/**
* Call this to mount the static facade. The facade allows you to use
* this object as a $className.
*
* @param ?Dom $dom
*/
public static function mount(string $className = 'Dom', ?Dom $dom = null): bool
{
if (\class_exists($className)) {
return false;
}
\class_alias(__CLASS__, $className);
if ($dom instanceof Dom) {
self::$dom = $dom;
}
return true;
}
/**
* Creates a new dom object and calls loadFromFile() on the
* new object.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws StrictException
* @throws Exceptions\LogicalException
*/
public static function loadFromFile(string $file, ?Options $options = null): Dom
{
$dom = new Dom();
self::$dom = $dom;
return $dom->loadFromFile($file, $options);
}
/**
* Creates a new dom object and calls loadFromUrl() on the
* new object.
*
* @throws ChildNotFoundException
* @throws CircularException
* @throws StrictException
* @throws \Psr\Http\Client\ClientExceptionInterface
*/
public static function loadFromUrl(string $url, ?Options $options = null, ClientInterface $client = null, RequestInterface $request = null): Dom
{
$dom = new Dom();
self::$dom = $dom;
if (\is_null($client)) {
$client = new Client();
}
if (\is_null($request)) {
$request = new Request('GET', $url);
}
return $dom->loadFromUrl($url, $options, $client, $request);
}
public static function loadStr(string $str, ?Options $options = null): Dom
{
$dom = new Dom();
self::$dom = $dom;
return $dom->loadStr($str, $options);
}
/**
* Sets the $dom variable to null.
*/
public static function unload(): void
{
self::$dom = null;
}
}