├── .gitignore ├── README.md ├── app ├── code │ └── community │ │ └── MageHost │ │ └── RewriteFix │ │ ├── Helper │ │ └── Data.php │ │ ├── Model │ │ ├── Catalog │ │ │ └── Url.php │ │ └── Observer.php │ │ └── etc │ │ └── config.xml └── etc │ └── modules │ └── MageHost_RewriteFix.xml ├── composer.json ├── modman ├── shell └── mh_rewrite_cleanup.php └── var └── connect └── MageHost_RewriteFix.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /.idea/ 3 | /var/connect/*.tgz 4 | /var/connect/package.xml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## MageHost_RewriteFix 2 | 3 | **We are sorry but we cannot offer customer support for this extension, and it is provided "as-is" for free. We use it at a number of big shops and it works well.** 4 | 5 | Due to bugs in Magento, once an rewrite URL ends with -[number] you get more and more rewrite URLs to the same target. The number gets higher and higher. Indexing gets slower and slower. 6 | 7 | This extension is a workaround for this problem. 8 | Requires Magento 1.7.0.2 or greater. 9 | 10 | #### How to install using Modman (--copy) 11 | 1. Make sure you run Magento 1.7 or newer 12 | 1. Install [Modman](https://github.com/colinmollenhour/modman) 13 | 1. `cd` to your Magento root dir 14 | 1. `test -d .modman || modman init` 15 | 1. `modman clone --copy --force https://github.com/magehost/magehost_rewritefix` 16 | 1. If you keep your Magento code in Git: Add `.modman` to your `.gitignore` 17 | 1. Run `php shell/mh_rewrite_cleanup.php -- cleanup` once 18 | 1. Reindex the `catalog_url` index 19 | 20 | #### Verify if needed 21 | You can verify if your installation has this problem by using this query (presuming no DB prefix). If the largest count is > the number of store views then this is a hint that something is wrong. 22 | 23 | ```sql 24 | SELECT `id_path`, COUNT(*) as `total` 25 | FROM `core_url_rewrite` 26 | GROUP BY `id_path` 27 | ORDER BY `total` DESC 28 | ``` 29 | 30 | For deeper investigation you can inspect the rows with the highest count using ```SELECT * FROM `core_url_rewrite` WHERE `id_path`='[id_path from previous query]'``` 31 | There should be 1 entry per id_path for every store view - and not more - unless you renamed the product URI yourself. 32 | 33 | 34 | -------------------------------------------------------------------------------- /app/code/community/MageHost/RewriteFix/Helper/Data.php: -------------------------------------------------------------------------------- 1 | getRequest()->getControllerName() ) { 13 | // Shell script 14 | echo $message . "\n"; 15 | } else { 16 | Mage::getSingleton( 'adminhtml/session' )->addSuccess( $message ); 17 | } 18 | } 19 | 20 | } -------------------------------------------------------------------------------- /app/code/community/MageHost/RewriteFix/Model/Catalog/Url.php: -------------------------------------------------------------------------------- 1 | getStoreId(); 34 | $idPath = $this->generatePath( 'id', null, $category ); 35 | $suffix = $this->getCategoryUrlSuffix( $storeId ); 36 | 37 | if (isset( $this->_rewrites[ $idPath ] )) { 38 | $this->_rewrite = $this->_rewrites[ $idPath ]; 39 | $existingRequestPath = $this->_rewrites[ $idPath ]->getRequestPath(); 40 | } 41 | 42 | if ($category->getUrlKey() == '') { 43 | $urlKey = $this->getCategoryModel()->formatUrlKey( $category->getName() ); 44 | } else { 45 | $urlKey = $this->getCategoryModel()->formatUrlKey( $category->getUrlKey() ); 46 | } 47 | 48 | $categoryUrlSuffix = $this->getCategoryUrlSuffix( $category->getStoreId() ); 49 | if (null === $parentPath) { 50 | $parentPath = $this->getResource()->getCategoryParentPath( $category ); 51 | } elseif ($parentPath == '/') { 52 | $parentPath = ''; 53 | } 54 | $parentPath = Mage::helper('catalog/category')->getCategoryUrlPath($parentPath, 55 | true, $category->getStoreId()); 56 | 57 | $requestPath = $parentPath . $urlKey . $categoryUrlSuffix; 58 | 59 | // PATCH TO FIX MAGENTO BUG THAT DUPLICATES REWRITES WHEN URL_KEY IS EQUAL BETWEEN CATEGORIES 60 | $pattern = '/^' . preg_quote($parentPath.$urlKey, '/') . '-\d{1,}' . preg_quote($categoryUrlSuffix, '/') . '$/'; 61 | if(isset($existingRequestPath) && (preg_match($pattern, $existingRequestPath) || $requestPath === $existingRequestPath)) { 62 | return $existingRequestPath; 63 | } 64 | // END OF PATCH 65 | 66 | if (isset( $existingRequestPath ) && $existingRequestPath == $requestPath . $suffix) { 67 | return $existingRequestPath; 68 | } 69 | 70 | if ($this->_deleteOldTargetPath( $requestPath, $idPath, $storeId )) { 71 | return $requestPath; 72 | } 73 | 74 | return $this->getUnusedPath($category->getStoreId(), $requestPath, 75 | $this->generatePath( 'id', null, $category ) 76 | ); 77 | } 78 | } 79 | 80 | /** 81 | * Get unique product request path 82 | * 83 | * @param Varien_Object $product 84 | * @param Varien_Object $category 85 | * @return string 86 | */ 87 | public function getProductRequestPath($product, $category) 88 | { 89 | if ( version_compare( Mage::getVersion(), '1.7.0.0', '<' ) ) { 90 | // The fix below is not compatible with older Magento versions. 91 | // Then only the shell command of this extension is useful. 92 | return parent::getProductRequestPath($product, $category); 93 | } else { 94 | if ($product->getUrlKey() == '') { 95 | $urlKey = $this->getProductModel()->formatUrlKey( $product->getName() ); 96 | } else { 97 | $urlKey = $this->getProductModel()->formatUrlKey( $product->getUrlKey() ); 98 | } 99 | $storeId = $category->getStoreId(); 100 | $suffix = $this->getProductUrlSuffix( $storeId ); 101 | $idPath = $this->generatePath( 'id', $product, $category ); 102 | /** 103 | * Prepare product base request path 104 | */ 105 | if ($category->getLevel() > 1) { 106 | // To ensure, that category has path either from attribute or generated now 107 | $this->_addCategoryUrlPath( $category ); 108 | $categoryUrl = Mage::helper('catalog/category')->getCategoryUrlPath($category->getUrlPath(), 109 | false, $storeId); 110 | $requestPath = $categoryUrl . '/' . $urlKey; 111 | } else { 112 | $requestPath = $urlKey; 113 | } 114 | 115 | if (strlen( $requestPath ) > self::MAX_REQUEST_PATH_LENGTH + self::ALLOWED_REQUEST_PATH_OVERFLOW) { 116 | $requestPath = substr( $requestPath, 0, self::MAX_REQUEST_PATH_LENGTH ); 117 | } 118 | 119 | $this->_rewrite = null; 120 | /** 121 | * Check $requestPath should be unique 122 | */ 123 | if (isset( $this->_rewrites[ $idPath ] )) { 124 | $this->_rewrite = $this->_rewrites[ $idPath ]; 125 | $existingRequestPath = $this->_rewrites[ $idPath ]->getRequestPath(); 126 | 127 | if ($existingRequestPath == $requestPath . $suffix) { 128 | return $existingRequestPath; 129 | } 130 | 131 | $existingRequestPath = preg_replace('/' . preg_quote($suffix, '/') . '$/', '', $existingRequestPath); 132 | /** 133 | * Check if existing request past can be used 134 | */ 135 | if ( $product->getUrlKey() == '' && !empty($requestPath) 136 | && strpos($existingRequestPath, $requestPath) === 0 ) { 137 | $existingRequestPath = preg_replace( 138 | '/^' . preg_quote($requestPath, '/') . '/', '', $existingRequestPath 139 | ); 140 | if (preg_match( '#^-([0-9]+)$#i', $existingRequestPath )) { 141 | return $this->_rewrites[ $idPath ]->getRequestPath(); 142 | } 143 | } 144 | 145 | $fullPath = $requestPath . $suffix; 146 | 147 | // PATCH TO FIX MAGENTO BUG THAT DUPLICATES REWRITES WHEN URL_KEY IS EQUAL BETWEEN PRODUCTS 148 | $pattern = '/^' . preg_quote( $requestPath, '/' ) . '-\d{1,}$/'; 149 | if (preg_match( $pattern, $existingRequestPath )) { 150 | $fullPath = $existingRequestPath . $suffix; 151 | } 152 | // END OF PATCH 153 | 154 | if ($this->_deleteOldTargetPath( $fullPath, $idPath, $storeId )) { 155 | return $fullPath; 156 | } 157 | } 158 | /** 159 | * Check 2 variants: $requestPath and $requestPath . '-' . $productId 160 | */ 161 | $validatedPath = $this->getResource()->checkRequestPaths( 162 | array( $requestPath . $suffix, $requestPath . '-' . $product->getId() . $suffix ), 163 | $storeId 164 | ); 165 | 166 | if ($validatedPath) { 167 | return $validatedPath; 168 | } 169 | /** 170 | * Use unique path generator 171 | */ 172 | return $this->getUnusedPath( $storeId, $requestPath . $suffix, $idPath ); 173 | } 174 | } 175 | 176 | } 177 | -------------------------------------------------------------------------------- /app/code/community/MageHost/RewriteFix/Model/Observer.php: -------------------------------------------------------------------------------- 1 | /product-name 15 | * /category-name/product-name.html =301=> /product-name.html 16 | * 17 | * When an URL is hit ending with a number and causes a 404 error, do a 301 redirect to the URL without the number. 18 | * This helps when you are cleaning up old URLs ending with a number. 19 | * /category-name/product-name-123 =301=> /category-name/product-name 20 | * /category-name/product-name-123/ =301=> /category-name/product-name/ 21 | * /category-name/product-name-123.html =301=> /category-name/product-name.html 22 | * 23 | * @param Varien_Event_Observer $observer 24 | */ 25 | public function controllerActionPredispatchCmsIndexNoRoute( $observer ) { 26 | /** @var $controllerAction Mage_Cms_IndexController */ 27 | $controllerAction = $observer->getControllerAction(); 28 | $request = Mage::app()->getRequest(); 29 | $response = Mage::app()->getResponse(); 30 | $originalPath = $request->getOriginalPathInfo(); 31 | $baseUrl = rtrim( Mage::getBaseUrl(), '/' ); // Remove trailing slash 32 | $currentUrl = $baseUrl . $originalPath; 33 | $redirectUrl = false; 34 | 35 | // If config setting 'Use Categories Path for Product URLs' is set to disabled: 36 | // Check if request can be redirected to product URL after removing category path. 37 | if ( empty($redirectUrl) && ! Mage::getStoreConfigFlag('catalog/seo/product_use_categories') ) { 38 | //Get the last part of url: url_path 39 | $urlPath = parse_url( $currentUrl, PHP_URL_PATH ); // parse the url 40 | $urlPath = trim( $urlPath, '/' ); 41 | $splitPath = explode( '/', $urlPath ); 42 | if ( count($splitPath) > 1 ) { 43 | $productUrl = end( $splitPath ); 44 | $urlResource = Mage::getResourceModel('catalog/url'); 45 | $storeId = Mage::app()->getStore()->getId(); 46 | $rewrite = $urlResource->getRewriteByRequestPath($productUrl, $storeId); 47 | if ($rewrite) { 48 | $redirectUrl = $baseUrl . '/' . $rewrite->getRequestPath(); 49 | } 50 | } 51 | } 52 | 53 | // If URL is ending with a number, let's cut it off and 301 redirect 54 | if ( empty($redirectUrl) && preg_match( '#^([/\w\-]+)\-\d+(\.html|/)?$#', $originalPath, $matches ) ) { 55 | $redirectUrl = $baseUrl . $matches[1]; 56 | if ( isset($matches[2]) ) { 57 | $redirectUrl .= $matches[2]; 58 | } 59 | } 60 | 61 | if ( !empty($redirectUrl) && $currentUrl != $redirectUrl ) { // Double check to prevent looping 62 | $response->setRedirect($redirectUrl, 301); 63 | $response->sendHeaders(); 64 | $controllerAction->setFlag( '', Mage_Core_Controller_Varien_Action::FLAG_NO_DISPATCH, true ); 65 | } 66 | } 67 | 68 | /** 69 | * For stores that have the config setting 'Use Categories Path for Product URLs' set to disabled: 70 | * clean up records in core_url_rewite which are made for category and product combination URLs. 71 | * 72 | * @param Varien_Event_Observer $observer 73 | */ 74 | public function afterReindexProcessCatalogUrl( $observer ) { 75 | $cleanForIds = array(); 76 | $stores = Mage::app()->getStores( true ); 77 | $helper = Mage::helper( 'magehost_rewritefix' ); 78 | /** @var Mage_Core_Model_Store $store */ 79 | $allStores = true; 80 | foreach ( $stores as $store ) { 81 | if ( ! Mage::getStoreConfigFlag( 'catalog/seo/product_use_categories', $store->getId() ) ) { 82 | $cleanForIds[] = intval($store->getId()); 83 | } else { 84 | $allStores = false; 85 | } 86 | } 87 | if ( !empty($cleanForIds) ) { 88 | $writeAdapter = Mage::getSingleton('core/resource')->getConnection('core_write'); 89 | $table = Mage::getResourceModel('core/url_rewrite')->getMainTable(); 90 | $sql = sprintf( 'DELETE FROM %s 91 | WHERE %s 92 | AND `category_id` IS NOT NULL 93 | AND `product_id` IS NOT NULL', 94 | $writeAdapter->quoteIdentifier($table), 95 | $allStores ? '1' : sprintf('`store_id` IN (%s)', $writeAdapter->quote($cleanForIds) ) ); 96 | $stmt = $writeAdapter->query( $sql ); 97 | $count = $stmt->rowCount(); 98 | if ( $count ) { 99 | $helper->successMessage( $helper->__( "MageHost RewriteFix: Cleaned up %d records from '%s' index because '%s' is disabled.", 100 | $count, 101 | Mage::helper('catalog')->__("Catalog URL Rewrites"), 102 | Mage::helper('catalog')->__("Use Categories Path for Product URLs") ) ); 103 | } 104 | } 105 | } 106 | 107 | 108 | /** 109 | * This is an observer function for the event 'adminhtml_block_html_before'. 110 | * If the block is the grid for the "Index Management" we update the description of the "Catalog Search Index" 111 | * 112 | * @param Varien_Event_Observer $observer 113 | */ 114 | public function adminhtmlBlockHtmlBefore( $observer ) { 115 | $block = $observer->getData( 'block' ); 116 | if (is_a( $block, 'Mage_Index_Block_Adminhtml_Process_Grid' )) { 117 | /** @var Mage_Index_Block_Adminhtml_Process_Grid $block */ 118 | $collection = $block->getCollection(); 119 | $readAdapter = Mage::getSingleton('core/resource')->getConnection('core_read'); 120 | $table = Mage::getResourceModel('core/url_rewrite')->getMainTable(); 121 | foreach ($collection as $item) { 122 | /** @var Mage_Index_Model_Process $item */ 123 | if ('catalog_url' == $item->getIndexerCode()) { 124 | $select = $readAdapter->select()->from( $table, array('count'=>'COUNT(*)' ) ); 125 | $count = number_format( $readAdapter->fetchOne( $select ) ); 126 | $item->setDescription( $item->getDescription() . ' - ' . $block->__('%s records',$count) ); 127 | } 128 | } 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /app/code/community/MageHost/RewriteFix/etc/config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 1.6.0 6 | 7 | 8 | 9 | 10 | 11 | MageHost_RewriteFix_Model 12 | 13 | 14 | 15 | MageHost_RewriteFix_Model_Catalog_Url 16 | 17 | 18 | 19 | 20 | 21 | MageHost_RewriteFix_Helper 22 | 23 | 24 | 25 | 26 | 27 | 28 | singleton 29 | magehost_rewritefix/observer 30 | controllerActionPredispatchCmsIndexNoRoute 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | singleton 39 | magehost_rewritefix/observer 40 | controllerActionPredispatchCmsIndexNoRoute 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | singleton 49 | magehost_rewritefix/observer 50 | afterReindexProcessCatalogUrl 51 | 52 | 53 | 54 | 55 | 56 | 57 | singleton 58 | magehost_rewritefix/observer 59 | adminhtmlBlockHtmlBefore 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /app/etc/modules/MageHost_RewriteFix.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | true 6 | community 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "magehost/magehost_rewritefix", 3 | "type": "magento-module", 4 | "license": "GPL-3.0", 5 | "homepage":"https://github.com/magehost/magehost_rewritefix", 6 | "description":"MageHost_RewriteFix: Solves a problem most big ‪#‎Magento‬ 1.x shops face: an ever growing huge URL Rewrite table.", 7 | "authors":[ 8 | { 9 | "name":"MagentoHosting.pro - Jeroen Vermeulen", 10 | "email":"jeroen@magehost.pro" 11 | } 12 | ], 13 | "suggest":{ 14 | "magento-hackathon/magento-composer-installer":"Makes it possible to manage this package as a dependency" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /modman: -------------------------------------------------------------------------------- 1 | ## MageHost_RewriteFix Extension - Fix for bugs in Magento causing the core_url_rewrite table to grow very big. 2 | 3 | # ---- Module enable XML 4 | # Magento Global Config - File - modules/MageHost_RewriteFix.xml 5 | app/etc/modules/MageHost_RewriteFix.xml app/etc/modules/MageHost_RewriteFix.xml 6 | 7 | # ---- Extension code dir 8 | # Magento Community Module - Recursive Dir - MageHost/RewriteFix 9 | app/code/community/MageHost/RewriteFix/ app/code/community/MageHost/RewriteFix/ 10 | 11 | # ---- Shell script to cleanup 12 | # Magento other - File - shell/mh_rewrite_cleanup.php 13 | shell/mh_rewrite_cleanup.php shell/mh_rewrite_cleanup.php 14 | 15 | ## When you update this file, don't forget composer.json 16 | -------------------------------------------------------------------------------- /shell/mh_rewrite_cleanup.php: -------------------------------------------------------------------------------- 1 | readAdapter = Mage::getSingleton('core/resource')->getConnection('core_read'); 34 | $this->writeAdapter = Mage::getSingleton('core/resource')->getConnection('core_write'); 35 | $this->table = Mage::getResourceModel('core/url_rewrite')->getMainTable(); 36 | $this->quotedTable = $this->readAdapter->quoteIdentifier($this->table); 37 | } 38 | 39 | /** 40 | * Run script 41 | * @throws Zend_Db_Statement_Exception 42 | */ 43 | public function run() 44 | { 45 | if ( $this->getArg('cleanup') ) { 46 | echo "==== MageHost RewriteFix - https://MagentoHosting.pro ====\n\n"; 47 | echo "Checking for unnecessary product URLs with category path...\n"; 48 | $dummyObserver = new Varien_Event_Observer(); 49 | Mage::getSingleton('magehost_rewritefix/observer')->afterReindexProcessCatalogUrl($dummyObserver); 50 | 51 | echo "\nChecking if we can cleanup rewrites which only add/remove '-[number]' in the URL...\n"; 52 | 53 | // Process select and deletes in chunks to prevent "Allowed memory size" error. 54 | $deleteCount = 0; 55 | $maxSql = sprintf("SELECT MAX(url_rewrite_id) as `max` FROM %s", $this->quotedTable); 56 | $maxRewriteId = $this->readAdapter->raw_fetchRow( $maxSql, 'max' ); 57 | $selectChunks = ceil( $maxRewriteId / $this->selectChunkSize ); 58 | 59 | for ( $chunkNr=0; $chunkNr < $selectChunks; $chunkNr++ ) { 60 | $sql = sprintf( " SELECT `url_rewrite_id`, `request_path`, `target_path` 61 | FROM %s 62 | WHERE url_rewrite_id >= %d AND url_rewrite_id < %d 63 | AND `options` = 'RP' 64 | AND `product_id` IS NOT NULL 65 | AND id_path LIKE '%%\_%%' ", 66 | $this->quotedTable, 67 | $chunkNr * $this->selectChunkSize, 68 | $chunkNr * $this->selectChunkSize + $this->selectChunkSize ); 69 | /** @var Varien_Db_Statement_Pdo_Mysql $stmt */ 70 | $stmt = $this->readAdapter->query( $sql ); 71 | $pregFilter = '/\-\d+(\.html)?$/'; 72 | $deleteList = array(); 73 | while ( $row = $stmt->fetch() ) { 74 | if ( preg_replace($pregFilter,'$1',$row['request_path']) == preg_replace($pregFilter,'$1',$row['target_path']) ) { 75 | $deleteList[] = intval( $row['url_rewrite_id'] ); 76 | } 77 | if ( $this->deleteChunkSize <= count($deleteList) ) { 78 | $deleteCount += $this->cleanRewrites( $deleteList ); 79 | $deleteList = array(); 80 | } 81 | } 82 | $deleteCount += $this->cleanRewrites( $deleteList ); 83 | } 84 | 85 | if ( $deleteCount ) { 86 | printf( "\nCleaned up %d records.\n", $deleteCount ); 87 | } else { 88 | echo "Found no records to clean.\n"; 89 | } 90 | echo "\nDone.\n"; 91 | } else { 92 | echo $this->usageHelp(); 93 | } 94 | } 95 | 96 | /** 97 | * Retrieve Usage Help Message 98 | */ 99 | public function usageHelp() 100 | { 101 | return <<writeAdapter->quoteIdentifier( $this->table ), 129 | $this->writeAdapter->quote( $chunk ) ); 130 | $stmt = $this->writeAdapter->query( $sql ); 131 | $count += $stmt->rowCount(); 132 | $stmt->closeCursor(); 133 | } 134 | echo "."; 135 | flush(); 136 | } 137 | return $count; 138 | } 139 | 140 | } 141 | 142 | $shell = new Mage_Shell_RewriteCleanup(); 143 | $shell->run(); 144 | -------------------------------------------------------------------------------- /var/connect/MageHost_RewriteFix.xml: -------------------------------------------------------------------------------- 1 | <_> 2 | Qd22X9LMi8WmnSGW 3 | MageHost_RewriteFix 4 | community 5 | 6 | 2 7 | 8 | Due to bugs in Magento, once an rewrite URL ends with -[number] you get more and more rewrite URLs to the same target. The number gets higher and higher. 9 | Due to bugs in Magento, once an rewrite URL ends with -[number] you get more and more rewrite URLs to the same target. The number gets higher and higher. 10 | 11 | OSL-3.0 12 | http://opensource.org/licenses/osl-3.0.php 13 | 1.6.0 14 | stable 15 | Added shell script to cleanup. 16 | 17 | 18 | MageHost BVBA 19 | 20 | 21 | jeroenvermeulen 22 | 23 | 24 | info@magehost.pro 25 | 26 | 27 | 5.3.0 28 | 6.0.0 29 | 30 | 31 | 32 | 33 | Mage_Core_Modules 34 | 35 | 36 | 37 | community 38 | 39 | 40 | 41 | 1.7.0.2 42 | 43 | 44 | 45 | 2.0.0.0 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | Core 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | mageetc 67 | magecommunity 68 | mage 69 | 70 | 71 | modules/MageHost_RewriteFix.xml 72 | MageHost/RewriteFix 73 | shell/mh_rewrite_cleanup.php 74 | 75 | 76 | file 77 | dir 78 | dir 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 1 92 | 200 93 | 94 | 95 | 96 | --------------------------------------------------------------------------------