以下代码用于采集页面时,获取网页中所有的链接,并循环输出:
$html = file_get_contents('http://www.w3cmap.com'); $dom = new DOMDocument(); @$dom->loadHTML($html); // grab all the on the page $xpath = new DOMXPath($dom); $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); echo $url.'
'; }