* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ define("CACHE_SQLITE_FILE", "./data/cache.db"); define("CACHE_EXPIRY_SECONDS", 7 * 24 * 60 * 60); // 7 days... function fetchfile($URL, &$effectiveURL) { $c = curl_init(); curl_setopt($c, CURLOPT_RETURNTRANSFER, 1); curl_setopt($c, CURLOPT_URL, $URL); curl_setopt($c, CURLOPT_ENCODING, "gzip"); curl_setopt($c, CURLOPT_FOLLOWLOCATION, true); $contents = curl_exec($c); $effectiveURL = curl_getinfo($c, CURLINFO_EFFECTIVE_URL); curl_close($c); if ($contents) return $contents; else return FALSE; } class CoverArtResult { var $title; var $detailsURL; var $thumbnailImageURL; var $imageURL; } abstract class AbstractCoverArtProvider { abstract public function getCoverArtList($artist, $album, $limit = 0); public function getCoverArtImage($artist, $album) { $coverArtList = $this->getCoverArtList($artist, $album, 1); $result = array(); $this->tryDownloadImage($result, $coverArtList->items, 'image/jpeg'); return $result; } public function name() { return get_class($this); } protected function getHTMLDom($url) { $startTime = microtime(true); $dom = new DomDocument(); $effectiveURL = $url; $html = fetchfile($url, $effectiveURL); $htmlLoaded = @$dom->loadHTML($html); $endTime = microtime(true); $fetchTime = $endTime - $startTime; $result->dom = $dom; $result->details['fetchTime'] = $fetchTime; $result->details['url'] = $url; $result->details['effectiveURL'] = $effectiveURL; $result->loaded = $htmlLoaded; return $result; } protected function getDOMNodesByXPath($dom, $xpath, $limit = 0, $xpathStopCondition = "") { $stopEval = false; $result->items = array(); $result->details['xpath'] = $xpath; if (!empty($xpathStopCondition)) $result->details['xpathStopCondition'] = $xpathStopCondition; if (isset($dom->dom)) $domDoc = $dom->dom; else if (is_subclass_of($dom, "DOMNode")) { $domDoc = $dom->ownerDocument; $domNode = $dom; } else $domDoc = NULL; if (!is_null($domDoc)) { $xp = new DomXPath($domDoc); $stopEval = false; if (!empty($xpathStopCondition)) { if (isset($domNode)) $items = $xp->evaluate($xpathStopCondition, $domNode); else $items = $xp->evaluate($xpathStopCondition); $stopEval = $items->length > 0; $result->details['xpathStopConditionMatched'] = $stopEval; } if (!$stopEval) { $startTime = microtime(true); if (isset($domNode)) $items = $xp->evaluate($xpath, $domNode); else $items = $xp->evaluate($xpath); if (is_object($items)) { $i = 0; foreach ($items as $item) { $result->items[] = $item; ++$i; if ($limit > 0 && $i == $limit) break; } } else if (!empty($items)) $result->items[] = $items; $count = count($result->items); $endTime = microtime(true); $evalTime = $endTime - $startTime; $result->details['evalTime'] = $evalTime; } } return $result; } protected function getListByXPath($dom, $xpath, $limit = 0, $xpathStopCondition = "", $invalidItemFilter = "") { $result = $this->getDOMNodesByXPath($dom, $xpath, $limit, $xpathStopCondition); foreach ($result->items as $item) { if (is_object($item)) $item = $item->nodeValue; if (!empty($item) && $item != $invalidItemFilter) $list[] = $item; } $result->items = $list; return $result; } protected function tryDownloadImage(&$result, $imageURLs, $mimetype = 'image/jpeg') { $result['success'] = false; if (count($imageURLs) > 0) { $imageData = false; $i = 0; while ($imageData === false && $i < count($imageURLs)) { $imageData = fetchfile($imageURLs[$i]); ++$i; } if ($imageData) { $result['imagedata'] = $imageData; $result['mimetype'] = $mimetype; $result['success'] = true; } } } } class CoverArtProviderWalmart extends AbstractCoverArtProvider { public function getCoverArtList($artist, $album, $limit = 0) { // Walmart returns an image even if it doesn't have a cover available // Currently there is no way to detect the placeholder image without inspecting the // image data... $list->results = array(); $dom = $this->getHTMLDom('http://www.walmart.com/catalog/search-ng.gsp?search_constraint=4104&search_query=' . urlencode($artist . ', ' . $album)); $list->details[] = $dom; $photoNodes = $this->getDOMNodesByXPath($dom, '//div[@class="BoxContent"]//div[starts-with(@class, "LargeItemPhoto")]'); $list->details[] = $photoNodes; if (count($photoNodes->items) > 0) { $titleList = $this->getListByXPath($photoNodes->items[0], './a/img/@alt'); $list->details[] = $titleList; $thumbnailImageList = $this->getListByXPath($photoNodes->items[0], './a/img/@src'); $list->details[] = $thumbnailImageList; $imageList = $this->getListByXPath($photoNodes->items[0], 'substring-before(substring-after(./a/@href, "photo_opener(\'"), "&product_id=")'); $list->details[] = $imageList; $coverArtItem = new CoverArtResult(); $coverArtItem->title = $titleList->items[0]; $coverArtItem->detailsURL = $dom->details["effectiveURL"]; $coverArtItem->thumbnailImageURL = $thumbnailImageList->items[0]; $coverArtItem->imageURL = $imageList->items[0]; $list->results[] = $coverArtItem; } return $list; } } class CoverArtProviderBuyDotCom extends AbstractCoverArtProvider { public function getCoverArtList($artist, $album, $limit = 0) { $list->results = array(); $dom = $this->getHTMLDom('http://www.buy.com/retail/searchresults.asp?search_store=6&qu=' . urlencode($artist . ' ' . $album)); $list->details[] = $dom; $resultNodes = $this->getDOMNodesByXPath( $dom, '//form[@name="frmSearchProducts"]//td[(@class="listTop" or @class="list") and count(./a/@href) = 1]', 0, '//*[contains(text(), "We could not find an exact match")]' // Buy.com automatically estimates if no exact match is found, so stop as soon as we detect this... ); $list->details[] = $resultNodes; $count = 0; foreach ($resultNodes->items as $resultNode) { $titleList = $this->getListByXPath($resultNode, './a/img/@alt'); $list->details[] = $titleList; $detailsList = $this->getListByXPath($resultNode, 'concat("http://www.buy.com", ./a/@href)'); $list->details[] = $detailsList; $thumbnailImageList = $this->getListByXPath($resultNode, './a/img/@src'); $list->details[] = $thumbnailImageList; if (isset($detailsList->items[0])) { $dom = $this->getHTMLDom($detailsList->items[0]); $imageList = $this->getListByXPath($dom, 'substring-before(substring-after(//div[@id="mainImgSection"]//a[@id="PROD_lrg_img_link"]/@href, "largeIMTop(\'"), "\',\'")'); $list->details[] = $imageList; if (count($imageList->items) > 0) { $coverArtItem = new CoverArtResult(); $coverArtItem->title = $titleList->items[0]; $coverArtItem->detailsURL = $detailsList->items[0]; $coverArtItem->thumbnailImageURL = $thumbnailImageList->items[0]; $coverArtItem->imageURL = $imageList->items[0]; $list->results[] = $coverArtItem; ++$count; if ($count == $limit) break; } } } return $list; } } class CoverArtProviderAmazon extends AbstractCoverArtProvider { private $topLevelDomain; function __construct($topLevelDomain = "com") { $this->topLevelDomain = $topLevelDomain; } public function name() { return AbstractCoverArtProvider::name() . "_" . $this->topLevelDomain; } public function getCoverArtList($artist, $album, $limit = 0) { $list->results = array(); $dom = $this->getHTMLDom('http://www.amazon.' . $this->topLevelDomain . '/gp/search/?search-alias=popular&unfiltered=1&sort=salesrank&field-keywords=&field-artist=' . urlencode(utf8_decode($artist)) . '&field-title=' . urlencode(utf8_decode($album))); $list->details[] = $dom; $resultNodes = $this->getDOMNodesByXPath( $dom, '//div[@id="Results"]//td[starts-with(@id, "search:Td")]//table[@class="n2"]//tr[.//td/@class="imageColumn" and not(contains(.//img/@src, "no-img-"))]', // '//div[@id="Results"]//div[@class="productImage"]//a[not(contains(img/@src, "no-img-"))]/img/@src', $limit ); $list->details[] = $resultNodes; foreach ($resultNodes->items as $resultNode) { $titleList = $this->getListByXPath($resultNode, './/td[@class="dataColumn"]//span[@class="srTitle"]/text()'); $list->details[] = $titleList; $detailsList = $this->getListByXPath($resultNode, './/td[@class="dataColumn"]//a[./span/@class="srTitle"]/@href'); $list->details[] = $detailsList; $thumbnailImageList = $this->getListByXPath($resultNode, './/td[@class="imageColumn"]//a/img/@src'); $list->details[] = $thumbnailImageList; $coverArtItem = new CoverArtResult(); $coverArtItem->title = $titleList->items[0]; $coverArtItem->detailsURL = $detailsList->items[0]; $coverArtItem->thumbnailImageURL = $thumbnailImageList->items[0]; $coverArtItem->imageURL = substr($coverArtItem->thumbnailImageURL, 0, strpos($coverArtItem->thumbnailImageURL, "._")) . '._SL500_.jpg'; $list->results[] = $coverArtItem; } return $list; } } class CoverArtProviderGoogleImages extends AbstractCoverArtProvider { private $topLevelDomain; function __construct($topLevelDomain = "com") { $this->topLevelDomain = $topLevelDomain; } public function name() { return AbstractCoverArtProvider::name() . "_" . $this->topLevelDomain; } public function getCoverArtList($artist, $album, $limit = 0) { $list->results = array(); $dom = $this->getHTMLDom('http://images.google.' . $this->topLevelDomain . '/images?q=Album+%22' . urlencode(utf8_decode($artist)) . '%22+%22' . urlencode(utf8_decode($album)) . '%22&hl=en&sa=G&gbv=1'); $list->details[] = $dom; $resultNodes = $this->getDOMNodesByXPath($dom, '//div[@id="ImgCont"]//a[starts-with(@href, "/imgres?imgurl=")]', $limit); $list->details[] = $resultNodes; foreach ($resultNodes->items as $resultNode) { $imageList = $this->getListByXPath($resultNode, 'substring-before(substring-after(./@href, "/imgres?imgurl="), "&imgrefurl=")'); $list->details[] = $imageList; $detailsList = $this->getListByXPath($resultNode, 'substring-before(substring-after(./@href, "&imgrefurl="), "&usg=")'); $list->details[] = $detailsList; $thumbnailImageList = $this->getListByXPath($resultNode, '..//img/@src'); $list->details[] = $thumbnailImageList; $coverArtItem = new CoverArtResult(); $coverArtItem->title = ""; $coverArtItem->detailsURL = $detailsList->items[0]; $coverArtItem->thumbnailImageURL = $thumbnailImageList->items[0]; $coverArtItem->imageURL = $imageList->items[0]; $list->results[] = $coverArtItem; } return $list; } } class CoverArtProviderCache extends AbstractCoverArtProvider { private $coverArtProviders; private $coverArtProviderQueryLimit; private $dbh; private $selectStmt; private $insertSearchStmt; private $insertResultStmt; function __construct($coverArtProviders = NULL, $coverArtProviderQueryLimit = 0) { if (is_null($coverArtProviders)) { global $globalCoverArtProviders; $coverArtProviders = $globalCoverArtProviders; } $this->coverArtProviders = $coverArtProviders; $this->coverArtProviderQueryLimit = $coverArtProviderQueryLimit; $createDB = !file_exists(CACHE_SQLITE_FILE); $this->dbh = new PDO('sqlite:' . CACHE_SQLITE_FILE); if ($createDB) { $this->dbh->exec(' CREATE TABLE "searches" ( "search_id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "provider_name" TEXT NOT NULL COLLATE NOCASE, "artist" TEXT NOT NULL COLLATE NOCASE, "album" TEXT NOT NULL COLLATE NOCASE, "timestamp" INTEGER NOT NULL ); CREATE TABLE "searches_results" ( "result_id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "search_id" INTEGER, "index" INTEGER NOT NULL, "title" TEXT NOT NULL, "details_url" TEXT NOT NULL, "thumbnail_image_url" TEXT NOT NULL, "image_url" TEXT NOT NULL ); CREATE INDEX "searches_provider_name" on searches (provider_name ASC); CREATE INDEX "searches_artist" on searches (artist ASC); CREATE INDEX "searches_album" on searches (album ASC); CREATE INDEX "searches_results_search_id" on searches_results (search_id ASC); CREATE TRIGGER delete_searches_results DELETE ON searches BEGIN DELETE FROM searches_results WHERE search_id = OLD.search_id; END; '); } $this->selectStmt = $this->dbh->prepare( "SELECT * FROM searches " . "LEFT JOIN searches_results AS sr ON sr.search_id = searches.search_id " . "WHERE provider_name = ? AND artist = ? AND album = ? ORDER BY `index`;" ); if (!$this->selectStmt) { echo "\nPDO::errorInfo():\n"; print_r($this->dbh->errorInfo()); } $this->insertSearchStmt = $this->dbh->prepare( "INSERT INTO searches (search_id, provider_name, artist, album, timestamp) " . "VALUES (NULL, ?, ?, ?, ?);" ); if (!$this->insertSearchStmt) { echo "\nPDO::errorInfo():\n"; print_r($this->dbh->errorInfo()); } $this->insertResultStmt = $this->dbh->prepare( "INSERT INTO searches_results (result_id, search_id, `index`, title, details_url, thumbnail_image_url, image_url) " . "VALUES (NULL, ?, ?, ?, ?, ?, ?);" ); if (!$this->insertResultStmt) { echo "\nPDO::errorInfo():\n"; print_r($this->dbh->errorInfo()); } } public function getCoverArtList($artist, $album, $limit = 0) { $listLimit = $limit; $list->results = array(); $coverArtProviderQueried = 0; foreach ($this->coverArtProviders as $provider) { $success = $this->selectStmt->execute(array($provider->name(), $artist, $album)); $rows = $this->selectStmt->fetchAll(); $resultsValid = false; if ($success && count($rows) > 0) { $resultsValid = $rows[0]["timestamp"] > time() - CACHE_EXPIRY_SECONDS; if (!$resultsValid) { echo "Cached results expired, removing " . $rows[0][0]; $this->dbh->exec("DELETE FROM searches WHERE search_id = " . $rows[0][0] . ";"); } } if ($resultsValid) { //echo "Something was found in the cache..."; $coverArtList->results = array(); foreach ($rows as $row) { if (is_null($row["image_url"])) break; $coverArtItem = new CoverArtResult(); $coverArtItem->title = $row["title"]; $coverArtItem->detailsURL = $row["details_url"]; $coverArtItem->thumbnailImageURL = $row["thumbnail_image_url"]; $coverArtItem->imageURL = $row["image_url"]; $coverArtList->results[] = $coverArtItem; if ($limit > 0 && count($coverArtList->results) == $listLimit) break; } } else { //echo "Nothing found in the cache..."; $coverArtList = $provider->getCoverArtList($artist, $album, $listLimit); $this->dbh->beginTransaction(); if ($this->insertSearchStmt->execute(array($provider->name(), $artist, $album, time()))) { $searchID = $this->dbh->lastInsertId(); //echo "Search inserted: " . $searchID; foreach ($coverArtList->results as $index => $result) { $this->insertResultStmt->execute(array( $searchID, $index, $result->title, $result->detailsURL, $result->thumbnailImageURL, $result->imageURL )); } } $this->dbh->commit(); } $list->results = array_merge($list->results, $coverArtList->results); if ($limit > 0) { $listLimit = max(0, $listLimit - count($coverArtList->results)); if ($listLimit == 0) break; } if (count($coverArtList->results) > 0) ++$coverArtProviderQueried; if ($this->coverArtProviderQueryLimit > 0 && count($coverArtList->results) > 0 && $coverArtProviderQueried == $this->coverArtProviderQueryLimit) break; } return $list; } } $globalCoverArtProviders = array( new CoverArtProviderGoogleImages("de"), new CoverArtProviderGoogleImages("com"), new CoverArtProviderAmazon("de"), new CoverArtProviderAmazon("com"), new CoverArtProviderBuyDotCom(), new CoverArtProviderWalmart() ); function getCoverArtResults($artist, $album, $limit = 0, $coverArtProviders = NULL) { $results = array(); $listLimit = $limit; if (is_null($coverArtProviders)) { global $globalCoverArtProviders; $coverArtProviders = $globalCoverArtProviders; } foreach ($coverArtProviders as $provider) { $coverArtList = $provider->getCoverArtList($artist, $album, $listLimit); $results = array_merge($results, $coverArtList->results); if ($limit > 0) { $listLimit = max(0, $listLimit - count($coverArtList->results)); if ($listLimit == 0) break; } } return $results; } ?>