Skip to content

Commit

Permalink
initial crawler for cache warming
Browse files Browse the repository at this point in the history
  • Loading branch information
aheadley committed Jan 9, 2013
1 parent abb17ee commit 33794d6
Show file tree
Hide file tree
Showing 7 changed files with 479 additions and 52 deletions.
178 changes: 178 additions & 0 deletions app/code/community/Nexcessnet/Turpentine/Helper/Cron.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
<?php

/**
* Nexcess.net Turpentine Extension for Magento
* Copyright (C) 2012 Nexcess.net L.L.C.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

class Nexcessnet_Turpentine_Helper_Cron extends Mage_Core_Helper_Abstract {

/**
* Key to store the URL queue under in the cache
*
* @var string
*/
const CRAWLER_URLS_CACHE_ID = 'turpentine_crawler_url_queue';

/**
* Crawler client singleton
*
* @var Varien_Http_Client
*/
protected $_crawlerClient = null;

/**
* Get the execution time used so far
*
* @return int
*/
public function getRunTime() {
$usage = getrusage();
return $usage['ru_utime.tv_sec'];
}

/**
* Get the max execution time (or 0 if unlimited)
*
* @return int
*/
public function getAllowedRunTime() {
return ini_get( 'max_execution_time' );
}

/**
* Add a single URL to the queue, returns whether it was actually added
* to the queue or not (false if it was already in the queue)
*
* @param string $url
* @return bool
*/
public function addUrlToCrawlerQueue( $url ) {
return $this->addUrlsToCrawlerQueue( array( $url ) );
}

/**
* Add a list of URLs to the queue, returns how many unique URLs were
* actually added to the queue
*
* @param array $urls
* @return int
*/
public function addUrlsToCrawlerQueue( array $urls ) {
$oldQueue = $this->_readUrlQueue();
$newQueue = array_unique( array_merge( $oldQueue, $urls ) );
$this->_writeUrlQueue( $newQueue );
$diff = count( $newQueue ) - count( $oldQueue );
return $diff;
}

/**
* Pop a URL to crawl off the queue, or null if no URLs left
*
* @return string|null
*/
public function getNextUrl() {
$urls = $this->_readUrlQueue();
$nextUrl = array_shift( $urls );
$this->_writeUrlQueue( $urls );
return $nextUrl;
}

/**
* Get the crawler http client
*
* @return Varien_Http_Client
*/
public function getCrawlerClient() {
if( is_null( $this->_crawlerClient ) ) {
$this->_crawlerClient = new Varien_Http_Client( null, array(
'useragent' => sprintf(
'Nexcessnet_Turpentine/%s Magento/%s Varien_Http_Client',
Mage::helper( 'turpentine/data' )->getVersion(),
Mage::getVersion() ),
'keepalive' => true,
) );
$this->_crawlerClient->setCookie( 'frontend', 'no-session' );
}
return $this->_crawlerClient;
}

/**
* Get if the crawler is enabled
*
* @return bool
*/
public function getCrawlerEnabled() {
return Mage::getStoreConfig( 'turpentine_varnish/general/crawler_enable' );
}

public function getCrawlerDebugEnabled() {
return Mage::getStoreConfig( 'turpentine_varnish/general/crawler_debug' );
}

/**
* Get the list of all URLs
*
* @return array
*/
public function getAllUrls() {
$urls = array();
$models = array(
'sitemap/catalog_category',
'sitemap/catalog_product',
'sitemap/cms_page',
);
foreach( Mage::app()->getStores() as $storeId => $store ) {
$baseUrl = $store->getBaseUrl( Mage_Core_Model_Store::URL_TYPE_LINK );
$urls[] = $baseUrl;
foreach( $models as $model ) {
foreach( Mage::getResourceModel( $model )
->getCollection( $storeId ) as $item ) {
$urls[] = $baseUrl . $item->getUrl();
}
}
}
return $urls;
}

/**
* Get the crawler URL queue from the cache
*
* @return array
*/
protected function _readUrlQueue() {
$readQueue = @unserialize(
Mage::app()->loadCache( self::CRAWLER_URLS_CACHE_ID ) );
if( !is_array( $readQueue ) ) {
// return array();
return $this->getAllUrls();
} else {
return $readQueue;
}
}

/**
* Save the crawler URL queue to the cache
*
* @param array $urls
* @return null
*/
protected function _writeUrlQueue( array $urls ) {
return Mage::app()->saveCache(
serialize( $urls ), self::CRAWLER_URLS_CACHE_ID );
}
}
50 changes: 50 additions & 0 deletions app/code/community/Nexcessnet/Turpentine/Helper/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@

class Nexcessnet_Turpentine_Helper_Data extends Mage_Core_Helper_Abstract {

/**
* Contains a newly generated v4 uuid whenever read, possibly not available
* on all kernels
*/
const UUID_SOURCE = '/proc/sys/kernel/random/uuid';

/**
* encryption singleton thing
*
Expand All @@ -41,6 +47,50 @@ public function cleanExplode( $token, $data ) {
explode( $token, trim( $data ) ) ) );
}

public function generateUuid() {
if( is_readable( self::UUID_SOURCE ) ) {
$uuid = trim( file_get_contents( self::UUID_SOURCE ) );
} elseif( function_exists( 'mt_rand' ) ) {
/**
* Taken from stackoverflow answer, possibly not the fastest or
* strictly standards compliant
* @link https://stackoverflow.com/a/2040279
*/
$uuid = sprintf( '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
// 32 bits for "time_low"
mt_rand( 0, 0xffff ), mt_rand( 0, 0xffff ),

// 16 bits for "time_mid"
mt_rand( 0, 0xffff ),

// 16 bits for "time_hi_and_version",
// four most significant bits holds version number 4
mt_rand( 0, 0x0fff ) | 0x4000,

// 16 bits, 8 bits for "clk_seq_hi_res",
// 8 bits for "clk_seq_low",
// two most significant bits holds zero and one for variant DCE1.1
mt_rand( 0, 0x3fff ) | 0x8000,

// 48 bits for "node"
mt_rand( 0, 0xffff ), mt_rand( 0, 0xffff ), mt_rand( 0, 0xffff )
);
} else {
// chosen by dice roll, guaranteed to be random
$uuid = '4';
}
return $uuid;
}

/**
* Get the Turpentine version
*
* @return string
*/
public function getVersion() {
return Mage::getConfig()->getModuleConfig( 'Nexcessnet_Turpentine' )->version;
}

/**
* Encrypt using Magento CE standard encryption (even on Magento EE)
*
Expand Down
105 changes: 105 additions & 0 deletions app/code/community/Nexcessnet/Turpentine/Helper/Debug.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
<?php

/**
* Nexcess.net Turpentine Extension for Magento
* Copyright (C) 2012 Nexcess.net L.L.C.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

class Nexcessnet_Turpentine_Helper_Debug extends Mage_Core_Helper_Abstract {

/**
* Log message through Magento's logging facility, works like sprintf
*
* @param string $message
* @param mixed ...
* @return null
*/
public function log( $message ) {
$args = func_get_args();
array_shift( $args );
Mage::log( vsprintf( 'TURPENTINE: ' . $message, $args ) );
}

/**
* Log a backtrace, can pass a already generated backtrace to use
*
* @param array $backTrace=null
* @return null
*/
public function logBackTrace( $backTrace=null ) {
if( is_null( $backTrace ) ) {
$backTrace = debug_backtrace();
array_shift( $backTrace );
}
$btuuid = Mage::helper( 'turpentine/data' )->generateUuid();
$this->log( 'TRACEBACK: START ** %s **', $btuuid );
$this->log( 'TRACEBACK: URL: %s', $_SERVER['REQUEST_URI'] );
for( $i=0; $i < count($backTrace); $i++ ) {
$line = $backTrace[$i];
$this->log( 'TRACEBACK: #%02d: %s:%d',
$i, $line['file'], $line['line'] );
$this->log( 'TRACEBACK: ==> %s%s%s(%s)',
(is_object( @$line['object'] ) ?
get_class( $line['object'] ) : @$line['class'] ),
@$line['type'],
$line['function'],
$this->_backtrace_formatArgs( $line['args'] ) );
}
$this->log( 'TRACEBACK: END ** %s **', $btuuid );
}

/**
* Format a list of function arguments for the backtrace
*
* @param array $args
* @return string
*/
protected function _backtrace_formatArgs( $args ) {
return implode( ', ',
array_map(
array( $this, '_backtrace_formatArgsHelper' ),
$args
)
);
}

/**
* Format a value for inclusion in the backtrace
*
* @param mixed $arg
* @return null
*/
protected function _backtrace_formatArgsHelper( $arg ) {
$value = $arg;
if( is_object( $arg ) ) {
$value = sprintf( 'OBJECT(%s)', get_class( $arg ) );
} elseif( is_resource( $arg ) ) {
$value = 'RESOURCE';
} elseif( is_array( $arg ) ) {
$value = 'ARRAY[%s](%s)';
$c = array();
foreach( $arg as $k => $v ) {
$c[] = sprintf( '%s => %s', $k,
$this->_backtrace_formatArgsHelper( $v ) );
}
$value = sprintf( $value, count( $arg ), implode( ', ', $c ) );
} elseif( is_string( $arg ) ) {
$value = sprintf( '\'%s\'', $arg );
}
return $value;
}
}
Loading

0 comments on commit 33794d6

Please sign in to comment.