DOMDocumentWrapper.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. <?php
  2. /**
  3. * DOMDocumentWrapper class simplifies work with DOMDocument.
  4. *
  5. * Know bug:
  6. * - in XHTML fragments, <br /> changes to <br clear="none" />
  7. *
  8. * @todo check XML catalogs compatibility
  9. * @author Tobiasz Cudnik <tobiasz.cudnik/gmail.com>
  10. * @package phpQuery
  11. */
  12. class DOMDocumentWrapper {
  13. /**
  14. * @var DOMDocument
  15. */
  16. public $document;
  17. public $id;
  18. /**
  19. * @todo Rewrite as method and quess if null.
  20. * @var unknown_type
  21. */
  22. public $contentType = '';
  23. public $xpath;
  24. public $uuid = 0;
  25. public $data = array();
  26. public $dataNodes = array();
  27. public $events = array();
  28. public $eventsNodes = array();
  29. public $eventsGlobal = array();
  30. /**
  31. * @TODO iframes support http://code.google.com/p/phpquery/issues/detail?id=28
  32. * @var unknown_type
  33. */
  34. public $frames = array();
  35. /**
  36. * Document root, by default equals to document itself.
  37. * Used by documentFragments.
  38. *
  39. * @var DOMNode
  40. */
  41. public $root;
  42. public $isDocumentFragment;
  43. public $isXML = false;
  44. public $isXHTML = false;
  45. public $isHTML = false;
  46. public $charset;
  47. public function __construct($markup = null, $contentType = null, $newDocumentID = null) {
  48. if (isset($markup))
  49. $this->load($markup, $contentType, $newDocumentID);
  50. $this->id = $newDocumentID
  51. ? $newDocumentID
  52. : md5(microtime());
  53. }
  54. public function load($markup, $contentType = null, $newDocumentID = null) {
  55. // phpQuery::$documents[$id] = $this;
  56. $this->contentType = strtolower($contentType);
  57. if ($markup instanceof DOMDOCUMENT) {
  58. $this->document = $markup;
  59. $this->root = $this->document;
  60. $this->charset = $this->document->encoding;
  61. // TODO isDocumentFragment
  62. $loaded = true;
  63. } else {
  64. $loaded = $this->loadMarkup($markup);
  65. }
  66. if ($loaded) {
  67. // $this->document->formatOutput = true;
  68. $this->document->preserveWhiteSpace = true;
  69. $this->xpath = new DOMXPath($this->document);
  70. $this->afterMarkupLoad();
  71. return true;
  72. // remember last loaded document
  73. // return phpQuery::selectDocument($id);
  74. }
  75. return false;
  76. }
  77. protected function afterMarkupLoad() {
  78. if ($this->isXHTML) {
  79. $this->xpath->registerNamespace("html", "http://www.w3.org/1999/xhtml");
  80. }
  81. }
  82. protected function loadMarkup($markup) {
  83. $loaded = false;
  84. if ($this->contentType) {
  85. self::debug("Load markup for content type {$this->contentType}");
  86. // content determined by contentType
  87. list($contentType, $charset) = $this->contentTypeToArray($this->contentType);
  88. switch($contentType) {
  89. case 'text/html':
  90. phpQuery::debug("Loading HTML, content type '{$this->contentType}'");
  91. $loaded = $this->loadMarkupHTML($markup, $charset);
  92. break;
  93. case 'text/xml':
  94. case 'application/xhtml+xml':
  95. phpQuery::debug("Loading XML, content type '{$this->contentType}'");
  96. $loaded = $this->loadMarkupXML($markup, $charset);
  97. break;
  98. default:
  99. // for feeds or anything that sometimes doesn't use text/xml
  100. if (strpos('xml', $this->contentType) !== false) {
  101. phpQuery::debug("Loading XML, content type '{$this->contentType}'");
  102. $loaded = $this->loadMarkupXML($markup, $charset);
  103. } else
  104. phpQuery::debug("Could not determine document type from content type '{$this->contentType}'");
  105. }
  106. } else {
  107. // content type autodetection
  108. if ($this->isXML($markup)) {
  109. phpQuery::debug("Loading XML, isXML() == true");
  110. $loaded = $this->loadMarkupXML($markup);
  111. if (! $loaded && $this->isXHTML) {
  112. phpQuery::debug('Loading as XML failed, trying to load as HTML, isXHTML == true');
  113. $loaded = $this->loadMarkupHTML($markup);
  114. }
  115. } else {
  116. phpQuery::debug("Loading HTML, isXML() == false");
  117. $loaded = $this->loadMarkupHTML($markup);
  118. }
  119. }
  120. return $loaded;
  121. }
  122. protected function loadMarkupReset() {
  123. $this->isXML = $this->isXHTML = $this->isHTML = false;
  124. }
  125. protected function documentCreate($charset, $version = '1.0') {
  126. if (! $version)
  127. $version = '1.0';
  128. $this->document = new DOMDocument($version, $charset);
  129. $this->charset = $this->document->encoding;
  130. // $this->document->encoding = $charset;
  131. $this->document->formatOutput = true;
  132. $this->document->preserveWhiteSpace = true;
  133. }
  134. protected function loadMarkupHTML($markup, $requestedCharset = null) {
  135. if (phpQuery::$debug)
  136. phpQuery::debug('Full markup load (HTML): '.substr($markup, 0, 250));
  137. $this->loadMarkupReset();
  138. $this->isHTML = true;
  139. if (!isset($this->isDocumentFragment))
  140. $this->isDocumentFragment = self::isDocumentFragmentHTML($markup);
  141. $charset = null;
  142. $documentCharset = $this->charsetFromHTML($markup);
  143. $addDocumentCharset = false;
  144. if ($documentCharset) {
  145. $charset = $documentCharset;
  146. $markup = $this->charsetFixHTML($markup);
  147. } else if ($requestedCharset) {
  148. $charset = $requestedCharset;
  149. }
  150. if (! $charset)
  151. $charset = phpQuery::$defaultCharset;
  152. // HTTP 1.1 says that the default charset is ISO-8859-1
  153. // @see http://www.w3.org/International/O-HTTP-charset
  154. if (! $documentCharset) {
  155. $documentCharset = 'ISO-8859-1';
  156. $addDocumentCharset = true;
  157. }
  158. // Should be careful here, still need 'magic encoding detection' since lots of pages have other 'default encoding'
  159. // Worse, some pages can have mixed encodings... we'll try not to worry about that
  160. $requestedCharset = strtoupper($requestedCharset);
  161. $documentCharset = strtoupper($documentCharset);
  162. phpQuery::debug("DOC: $documentCharset REQ: $requestedCharset");
  163. if ($requestedCharset && $documentCharset && $requestedCharset !== $documentCharset) {
  164. phpQuery::debug("CHARSET CONVERT");
  165. // Document Encoding Conversion
  166. // http://code.google.com/p/phpquery/issues/detail?id=86
  167. if (function_exists('mb_detect_encoding')) {
  168. $possibleCharsets = array($documentCharset, $requestedCharset, 'AUTO');
  169. $docEncoding = mb_detect_encoding($markup, implode(', ', $possibleCharsets));
  170. if (! $docEncoding)
  171. $docEncoding = $documentCharset; // ok trust the document
  172. phpQuery::debug("DETECTED '$docEncoding'");
  173. // Detected does not match what document says...
  174. if ($docEncoding !== $documentCharset) {
  175. // Tricky..
  176. }
  177. if ($docEncoding !== $requestedCharset) {
  178. phpQuery::debug("CONVERT $docEncoding => $requestedCharset");
  179. $markup = mb_convert_encoding($markup, $requestedCharset, $docEncoding);
  180. $markup = $this->charsetAppendToHTML($markup, $requestedCharset);
  181. $charset = $requestedCharset;
  182. }
  183. } else {
  184. phpQuery::debug("TODO: charset conversion without mbstring...");
  185. }
  186. }
  187. $return = false;
  188. if ($this->isDocumentFragment) {
  189. phpQuery::debug("Full markup load (HTML), DocumentFragment detected, using charset '$charset'");
  190. $return = $this->documentFragmentLoadMarkup($this, $charset, $markup);
  191. } else {
  192. if ($addDocumentCharset) {
  193. phpQuery::debug("Full markup load (HTML), appending charset: '$charset'");
  194. $markup = $this->charsetAppendToHTML($markup, $charset);
  195. }
  196. phpQuery::debug("Full markup load (HTML), documentCreate('$charset')");
  197. $this->documentCreate($charset);
  198. $return = phpQuery::$debug === 2
  199. ? $this->document->loadHTML($markup)
  200. : @$this->document->loadHTML($markup);
  201. if ($return)
  202. $this->root = $this->document;
  203. }
  204. if ($return && ! $this->contentType)
  205. $this->contentType = 'text/html';
  206. return $return;
  207. }
  208. protected function loadMarkupXML($markup, $requestedCharset = null) {
  209. if (phpQuery::$debug)
  210. phpQuery::debug('Full markup load (XML): '.substr($markup, 0, 250));
  211. $this->loadMarkupReset();
  212. $this->isXML = true;
  213. // check agains XHTML in contentType or markup
  214. $isContentTypeXHTML = $this->isXHTML();
  215. $isMarkupXHTML = $this->isXHTML($markup);
  216. if ($isContentTypeXHTML || $isMarkupXHTML) {
  217. self::debug('Full markup load (XML), XHTML detected');
  218. $this->isXHTML = true;
  219. }
  220. // determine document fragment
  221. if (! isset($this->isDocumentFragment))
  222. $this->isDocumentFragment = $this->isXHTML
  223. ? self::isDocumentFragmentXHTML($markup)
  224. : self::isDocumentFragmentXML($markup);
  225. // this charset will be used
  226. $charset = null;
  227. // charset from XML declaration @var string
  228. $documentCharset = $this->charsetFromXML($markup);
  229. if (! $documentCharset) {
  230. if ($this->isXHTML) {
  231. // this is XHTML, try to get charset from content-type meta header
  232. $documentCharset = $this->charsetFromHTML($markup);
  233. if ($documentCharset) {
  234. phpQuery::debug("Full markup load (XML), appending XHTML charset '$documentCharset'");
  235. $this->charsetAppendToXML($markup, $documentCharset);
  236. $charset = $documentCharset;
  237. }
  238. }
  239. if (! $documentCharset) {
  240. // if still no document charset...
  241. $charset = $requestedCharset;
  242. }
  243. } else if ($requestedCharset) {
  244. $charset = $requestedCharset;
  245. }
  246. if (! $charset) {
  247. $charset = phpQuery::$defaultCharset;
  248. }
  249. if ($requestedCharset && $documentCharset && $requestedCharset != $documentCharset) {
  250. // TODO place for charset conversion
  251. // $charset = $requestedCharset;
  252. }
  253. $return = false;
  254. if ($this->isDocumentFragment) {
  255. phpQuery::debug("Full markup load (XML), DocumentFragment detected, using charset '$charset'");
  256. $return = $this->documentFragmentLoadMarkup($this, $charset, $markup);
  257. } else {
  258. // FIXME ???
  259. if ($isContentTypeXHTML && ! $isMarkupXHTML)
  260. if (! $documentCharset) {
  261. phpQuery::debug("Full markup load (XML), appending charset '$charset'");
  262. $markup = $this->charsetAppendToXML($markup, $charset);
  263. }
  264. // see http://pl2.php.net/manual/en/book.dom.php#78929
  265. // LIBXML_DTDLOAD (>= PHP 5.1)
  266. // does XML ctalogues works with LIBXML_NONET
  267. // $this->document->resolveExternals = true;
  268. // TODO test LIBXML_COMPACT for performance improvement
  269. // create document
  270. $this->documentCreate($charset);
  271. if (phpversion() < 5.1) {
  272. $this->document->resolveExternals = true;
  273. $return = phpQuery::$debug === 2
  274. ? $this->document->loadXML($markup)
  275. : @$this->document->loadXML($markup);
  276. } else {
  277. /** @link http://pl2.php.net/manual/en/libxml.constants.php */
  278. $libxmlStatic = phpQuery::$debug === 2
  279. ? LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET
  280. : LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOERROR;
  281. $return = $this->document->loadXML($markup, $libxmlStatic);
  282. // if (! $return)
  283. // $return = $this->document->loadHTML($markup);
  284. }
  285. if ($return)
  286. $this->root = $this->document;
  287. }
  288. if ($return) {
  289. if (! $this->contentType) {
  290. if ($this->isXHTML)
  291. $this->contentType = 'application/xhtml+xml';
  292. else
  293. $this->contentType = 'text/xml';
  294. }
  295. return $return;
  296. } else {
  297. throw new Exception("Error loading XML markup");
  298. }
  299. }
  300. protected function isXHTML($markup = null) {
  301. if (! isset($markup)) {
  302. return strpos($this->contentType, 'xhtml') !== false;
  303. }
  304. // XXX ok ?
  305. return strpos($markup, "<!DOCTYPE html") !== false;
  306. // return stripos($doctype, 'xhtml') !== false;
  307. // $doctype = isset($dom->doctype) && is_object($dom->doctype)
  308. // ? $dom->doctype->publicId
  309. // : self::$defaultDoctype;
  310. }
  311. protected function isXML($markup) {
  312. // return strpos($markup, '<?xml') !== false && stripos($markup, 'xhtml') === false;
  313. return strpos(substr($markup, 0, 100), '<'.'?xml') !== false;
  314. }
  315. protected function contentTypeToArray($contentType) {
  316. $test = null;
  317. $test =
  318. $matches = explode(';', trim(strtolower($contentType)));
  319. if (isset($matches[1])) {
  320. $matches[1] = explode('=', $matches[1]);
  321. // strip 'charset='
  322. $matches[1] = isset($matches[1][1]) && trim($matches[1][1])
  323. ? $matches[1][1]
  324. : $matches[1][0];
  325. } else
  326. $matches[1] = null;
  327. return $matches;
  328. }
  329. /**
  330. *
  331. * @param $markup
  332. * @return array contentType, charset
  333. */
  334. protected function contentTypeFromHTML($markup) {
  335. $matches = array();
  336. // find meta tag
  337. preg_match('@<meta[^>]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i',
  338. $markup, $matches
  339. );
  340. if (! isset($matches[0]))
  341. return array(null, null);
  342. // get attr 'content'
  343. preg_match('@content\\s*=\\s*(["|\'])(.+?)\\1@', $matches[0], $matches);
  344. if (! isset($matches[0]))
  345. return array(null, null);
  346. return $this->contentTypeToArray($matches[2]);
  347. }
  348. protected function charsetFromHTML($markup) {
  349. $contentType = $this->contentTypeFromHTML($markup);
  350. return $contentType[1];
  351. }
  352. protected function charsetFromXML($markup) {
  353. $matches;
  354. // find declaration
  355. preg_match('@<'.'?xml[^>]+encoding\\s*=\\s*(["|\'])(.*?)\\1@i',
  356. $markup, $matches
  357. );
  358. return isset($matches[2])
  359. ? strtolower($matches[2])
  360. : null;
  361. }
  362. /**
  363. * Repositions meta[type=charset] at the start of head. Bypasses DOMDocument bug.
  364. *
  365. * @link http://code.google.com/p/phpquery/issues/detail?id=80
  366. * @param $html
  367. */
  368. protected function charsetFixHTML($markup) {
  369. $matches = array();
  370. // find meta tag
  371. preg_match('@\s*<meta[^>]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i',
  372. $markup, $matches, PREG_OFFSET_CAPTURE
  373. );
  374. if (! isset($matches[0]))
  375. return;
  376. $metaContentType = $matches[0][0];
  377. $markup = substr($markup, 0, $matches[0][1])
  378. .substr($markup, $matches[0][1]+strlen($metaContentType));
  379. $headStart = stripos($markup, '<head>');
  380. $markup = substr($markup, 0, $headStart+6).$metaContentType
  381. .substr($markup, $headStart+6);
  382. return $markup;
  383. }
  384. protected function charsetAppendToHTML($html, $charset, $xhtml = false) {
  385. // remove existing meta[type=content-type]
  386. $html = preg_replace('@\s*<meta[^>]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i', '', $html);
  387. $meta = '<meta http-equiv="Content-Type" content="text/html;charset='
  388. .$charset.'" '
  389. .($xhtml ? '/' : '')
  390. .'>';
  391. if (strpos($html, '<head') === false) {
  392. if (strpos($html, '<html') === false) {
  393. return $meta.$html;
  394. } else {
  395. return preg_replace(
  396. '@<html(.*?)(?(?<!\?)>)@s',
  397. "<html\\1><head>{$meta}</head>",
  398. $html
  399. );
  400. }
  401. } else {
  402. return preg_replace(
  403. '@<head(.*?)(?(?<!\?)>)@s',
  404. '<head\\1>'.$meta,
  405. $html
  406. );
  407. }
  408. }
  409. protected function charsetAppendToXML($markup, $charset) {
  410. $declaration = '<'.'?xml version="1.0" encoding="'.$charset.'"?'.'>';
  411. return $declaration.$markup;
  412. }
  413. public static function isDocumentFragmentHTML($markup) {
  414. return stripos($markup, '<html') === false && stripos($markup, '<!doctype') === false;
  415. }
  416. public static function isDocumentFragmentXML($markup) {
  417. return stripos($markup, '<'.'?xml') === false;
  418. }
  419. public static function isDocumentFragmentXHTML($markup) {
  420. return self::isDocumentFragmentHTML($markup);
  421. }
  422. public function importAttr($value) {
  423. // TODO
  424. }
  425. /**
  426. *
  427. * @param $source
  428. * @param $target
  429. * @param $sourceCharset
  430. * @return array Array of imported nodes.
  431. */
  432. public function import($source, $sourceCharset = null) {
  433. // TODO charset conversions
  434. $return = array();
  435. if ($source instanceof DOMNODE && !($source instanceof DOMNODELIST))
  436. $source = array($source);
  437. // if (is_array($source)) {
  438. // foreach($source as $node) {
  439. // if (is_string($node)) {
  440. // // string markup
  441. // $fake = $this->documentFragmentCreate($node, $sourceCharset);
  442. // if ($fake === false)
  443. // throw new Exception("Error loading documentFragment markup");
  444. // else
  445. // $return = array_merge($return,
  446. // $this->import($fake->root->childNodes)
  447. // );
  448. // } else {
  449. // $return[] = $this->document->importNode($node, true);
  450. // }
  451. // }
  452. // return $return;
  453. // } else {
  454. // // string markup
  455. // $fake = $this->documentFragmentCreate($source, $sourceCharset);
  456. // if ($fake === false)
  457. // throw new Exception("Error loading documentFragment markup");
  458. // else
  459. // return $this->import($fake->root->childNodes);
  460. // }
  461. if (is_array($source) || $source instanceof DOMNODELIST) {
  462. // dom nodes
  463. self::debug('Importing nodes to document');
  464. foreach($source as $node)
  465. $return[] = $this->document->importNode($node, true);
  466. } else {
  467. // string markup
  468. $fake = $this->documentFragmentCreate($source, $sourceCharset);
  469. if ($fake === false)
  470. throw new Exception("Error loading documentFragment markup");
  471. else
  472. return $this->import($fake->root->childNodes);
  473. }
  474. return $return;
  475. }
  476. /**
  477. * Creates new document fragment.
  478. *
  479. * @param $source
  480. * @return DOMDocumentWrapper
  481. */
  482. protected function documentFragmentCreate($source, $charset = null) {
  483. $fake = new DOMDocumentWrapper();
  484. $fake->contentType = $this->contentType;
  485. $fake->isXML = $this->isXML;
  486. $fake->isHTML = $this->isHTML;
  487. $fake->isXHTML = $this->isXHTML;
  488. $fake->root = $fake->document;
  489. if (! $charset)
  490. $charset = $this->charset;
  491. // $fake->documentCreate($this->charset);
  492. if ($source instanceof DOMNODE && !($source instanceof DOMNODELIST))
  493. $source = array($source);
  494. if (is_array($source) || $source instanceof DOMNODELIST) {
  495. // dom nodes
  496. // load fake document
  497. if (! $this->documentFragmentLoadMarkup($fake, $charset))
  498. return false;
  499. $nodes = $fake->import($source);
  500. foreach($nodes as $node)
  501. $fake->root->appendChild($node);
  502. } else {
  503. // string markup
  504. $this->documentFragmentLoadMarkup($fake, $charset, $source);
  505. }
  506. return $fake;
  507. }
  508. /**
  509. *
  510. * @param $document DOMDocumentWrapper
  511. * @param $markup
  512. * @return $document
  513. */
  514. private function documentFragmentLoadMarkup($fragment, $charset, $markup = null) {
  515. // TODO error handling
  516. // TODO copy doctype
  517. // tempolary turn off
  518. $fragment->isDocumentFragment = false;
  519. if ($fragment->isXML) {
  520. if ($fragment->isXHTML) {
  521. // add FAKE element to set default namespace
  522. $fragment->loadMarkupXML('<?xml version="1.0" encoding="'.$charset.'"?>'
  523. .'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" '
  524. .'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
  525. .'<fake xmlns="http://www.w3.org/1999/xhtml">'.$markup.'</fake>');
  526. $fragment->root = $fragment->document->firstChild->nextSibling;
  527. } else {
  528. $fragment->loadMarkupXML('<?xml version="1.0" encoding="'.$charset.'"?><fake>'.$markup.'</fake>');
  529. $fragment->root = $fragment->document->firstChild;
  530. }
  531. } else {
  532. $markup2 = phpQuery::$defaultDoctype.'<html><head><meta http-equiv="Content-Type" content="text/html;charset='
  533. .$charset.'"></head>';
  534. $noBody = strpos($markup, '<body') === false;
  535. if ($noBody)
  536. $markup2 .= '<body>';
  537. $markup2 .= $markup;
  538. if ($noBody)
  539. $markup2 .= '</body>';
  540. $markup2 .= '</html>';
  541. $fragment->loadMarkupHTML($markup2);
  542. // TODO resolv body tag merging issue
  543. $fragment->root = $noBody
  544. ? $fragment->document->firstChild->nextSibling->firstChild->nextSibling
  545. : $fragment->document->firstChild->nextSibling->firstChild->nextSibling;
  546. }
  547. if (! $fragment->root)
  548. return false;
  549. $fragment->isDocumentFragment = true;
  550. return true;
  551. }
  552. protected function documentFragmentToMarkup($fragment) {
  553. phpQuery::debug('documentFragmentToMarkup');
  554. $tmp = $fragment->isDocumentFragment;
  555. $fragment->isDocumentFragment = false;
  556. $markup = $fragment->markup();
  557. if ($fragment->isXML) {
  558. $markup = substr($markup, 0, strrpos($markup, '</fake>'));
  559. if ($fragment->isXHTML) {
  560. $markup = substr($markup, strpos($markup, '<fake')+43);
  561. } else {
  562. $markup = substr($markup, strpos($markup, '<fake>')+6);
  563. }
  564. } else {
  565. $markup = substr($markup, strpos($markup, '<body>')+6);
  566. $markup = substr($markup, 0, strrpos($markup, '</body>'));
  567. }
  568. $fragment->isDocumentFragment = $tmp;
  569. if (phpQuery::$debug)
  570. phpQuery::debug('documentFragmentToMarkup: '.substr($markup, 0, 150));
  571. return $markup;
  572. }
  573. /**
  574. * Return document markup, starting with optional $nodes as root.
  575. *
  576. * @param $nodes DOMNode|DOMNodeList
  577. * @return string
  578. */
  579. public function markup($nodes = null, $innerMarkup = false) {
  580. if (isset($nodes) && count($nodes) == 1 && $nodes[0] instanceof DOMDOCUMENT)
  581. $nodes = null;
  582. if (isset($nodes)) {
  583. $markup = '';
  584. if (!is_array($nodes) && !($nodes instanceof DOMNODELIST) )
  585. $nodes = array($nodes);
  586. if ($this->isDocumentFragment && ! $innerMarkup)
  587. foreach($nodes as $i => $node)
  588. if ($node->isSameNode($this->root)) {
  589. // var_dump($node);
  590. $nodes = array_slice($nodes, 0, $i)
  591. + phpQuery::DOMNodeListToArray($node->childNodes)
  592. + array_slice($nodes, $i+1);
  593. }
  594. if ($this->isXML && ! $innerMarkup) {
  595. self::debug("Getting outerXML with charset '{$this->charset}'");
  596. // we need outerXML, so we can benefit from
  597. // $node param support in saveXML()
  598. foreach($nodes as $node)
  599. $markup .= $this->document->saveXML($node);
  600. } else {
  601. $loop = array();
  602. if ($innerMarkup)
  603. foreach($nodes as $node) {
  604. if ($node->childNodes)
  605. foreach($node->childNodes as $child)
  606. $loop[] = $child;
  607. else
  608. $loop[] = $node;
  609. }
  610. else
  611. $loop = $nodes;
  612. self::debug("Getting markup, moving selected nodes (".count($loop).") to new DocumentFragment");
  613. $fake = $this->documentFragmentCreate($loop);
  614. $markup = $this->documentFragmentToMarkup($fake);
  615. }
  616. if ($this->isXHTML) {
  617. self::debug("Fixing XHTML");
  618. $markup = self::markupFixXHTML($markup);
  619. }
  620. self::debug("Markup: ".substr($markup, 0, 250));
  621. return $markup;
  622. } else {
  623. if ($this->isDocumentFragment) {
  624. // documentFragment, html only...
  625. self::debug("Getting markup, DocumentFragment detected");
  626. // return $this->markup(
  627. //// $this->document->getElementsByTagName('body')->item(0)
  628. // $this->document->root, true
  629. // );
  630. $markup = $this->documentFragmentToMarkup($this);
  631. // no need for markupFixXHTML, as it's done thought markup($nodes) method
  632. return $markup;
  633. } else {
  634. self::debug("Getting markup (".($this->isXML?'XML':'HTML')."), final with charset '{$this->charset}'");
  635. $markup = $this->isXML
  636. ? $this->document->saveXML()
  637. : $this->document->saveHTML();
  638. if ($this->isXHTML) {
  639. self::debug("Fixing XHTML");
  640. $markup = self::markupFixXHTML($markup);
  641. }
  642. self::debug("Markup: ".substr($markup, 0, 250));
  643. return $markup;
  644. }
  645. }
  646. }
  647. protected static function markupFixXHTML($markup) {
  648. $markup = self::expandEmptyTag('script', $markup);
  649. $markup = self::expandEmptyTag('select', $markup);
  650. $markup = self::expandEmptyTag('textarea', $markup);
  651. return $markup;
  652. }
  653. public static function debug($text) {
  654. phpQuery::debug($text);
  655. }
  656. /**
  657. * expandEmptyTag
  658. *
  659. * @param $tag
  660. * @param $xml
  661. * @return string
  662. * @author mjaque at ilkebenson dot com
  663. * @link http://php.net/manual/en/domdocument.savehtml.php#81256
  664. */
  665. public static function expandEmptyTag($tag, $xml){
  666. $indice = 0;
  667. while ($indice< strlen($xml)){
  668. $pos = strpos($xml, "<$tag ", $indice);
  669. if ($pos){
  670. $posCierre = strpos($xml, ">", $pos);
  671. if ($xml[$posCierre-1] == "/"){
  672. $xml = substr_replace($xml, "></$tag>", $posCierre-1, 2);
  673. }
  674. $indice = $posCierre;
  675. }
  676. else break;
  677. }
  678. return $xml;
  679. }
  680. }