├── .gitignore
├── README.md
├── app
├── code
│ └── community
│ │ └── MageHost
│ │ └── RewriteFix
│ │ ├── Helper
│ │ └── Data.php
│ │ ├── Model
│ │ ├── Catalog
│ │ │ └── Url.php
│ │ └── Observer.php
│ │ └── etc
│ │ └── config.xml
└── etc
│ └── modules
│ └── MageHost_RewriteFix.xml
├── composer.json
├── modman
├── shell
└── mh_rewrite_cleanup.php
└── var
└── connect
└── MageHost_RewriteFix.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | /.idea/
3 | /var/connect/*.tgz
4 | /var/connect/package.xml
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## MageHost_RewriteFix
2 |
3 | **We are sorry but we cannot offer customer support for this extension, and it is provided "as-is" for free. We use it at a number of big shops and it works well.**
4 |
5 | Due to bugs in Magento, once an rewrite URL ends with -[number] you get more and more rewrite URLs to the same target. The number gets higher and higher. Indexing gets slower and slower.
6 |
7 | This extension is a workaround for this problem.
8 | Requires Magento 1.7.0.2 or greater.
9 |
10 | #### How to install using Modman (--copy)
11 | 1. Make sure you run Magento 1.7 or newer
12 | 1. Install [Modman](https://github.com/colinmollenhour/modman)
13 | 1. `cd` to your Magento root dir
14 | 1. `test -d .modman || modman init`
15 | 1. `modman clone --copy --force https://github.com/magehost/magehost_rewritefix`
16 | 1. If you keep your Magento code in Git: Add `.modman` to your `.gitignore`
17 | 1. Run `php shell/mh_rewrite_cleanup.php -- cleanup` once
18 | 1. Reindex the `catalog_url` index
19 |
20 | #### Verify if needed
21 | You can verify if your installation has this problem by using this query (presuming no DB prefix). If the largest count is > the number of store views then this is a hint that something is wrong.
22 |
23 | ```sql
24 | SELECT `id_path`, COUNT(*) as `total`
25 | FROM `core_url_rewrite`
26 | GROUP BY `id_path`
27 | ORDER BY `total` DESC
28 | ```
29 |
30 | For deeper investigation you can inspect the rows with the highest count using ```SELECT * FROM `core_url_rewrite` WHERE `id_path`='[id_path from previous query]'```
31 | There should be 1 entry per id_path for every store view - and not more - unless you renamed the product URI yourself.
32 |
33 |
34 |
--------------------------------------------------------------------------------
/app/code/community/MageHost/RewriteFix/Helper/Data.php:
--------------------------------------------------------------------------------
1 | getRequest()->getControllerName() ) {
13 | // Shell script
14 | echo $message . "\n";
15 | } else {
16 | Mage::getSingleton( 'adminhtml/session' )->addSuccess( $message );
17 | }
18 | }
19 |
20 | }
--------------------------------------------------------------------------------
/app/code/community/MageHost/RewriteFix/Model/Catalog/Url.php:
--------------------------------------------------------------------------------
1 | getStoreId();
34 | $idPath = $this->generatePath( 'id', null, $category );
35 | $suffix = $this->getCategoryUrlSuffix( $storeId );
36 |
37 | if (isset( $this->_rewrites[ $idPath ] )) {
38 | $this->_rewrite = $this->_rewrites[ $idPath ];
39 | $existingRequestPath = $this->_rewrites[ $idPath ]->getRequestPath();
40 | }
41 |
42 | if ($category->getUrlKey() == '') {
43 | $urlKey = $this->getCategoryModel()->formatUrlKey( $category->getName() );
44 | } else {
45 | $urlKey = $this->getCategoryModel()->formatUrlKey( $category->getUrlKey() );
46 | }
47 |
48 | $categoryUrlSuffix = $this->getCategoryUrlSuffix( $category->getStoreId() );
49 | if (null === $parentPath) {
50 | $parentPath = $this->getResource()->getCategoryParentPath( $category );
51 | } elseif ($parentPath == '/') {
52 | $parentPath = '';
53 | }
54 | $parentPath = Mage::helper('catalog/category')->getCategoryUrlPath($parentPath,
55 | true, $category->getStoreId());
56 |
57 | $requestPath = $parentPath . $urlKey . $categoryUrlSuffix;
58 |
59 | // PATCH TO FIX MAGENTO BUG THAT DUPLICATES REWRITES WHEN URL_KEY IS EQUAL BETWEEN CATEGORIES
60 | $pattern = '/^' . preg_quote($parentPath.$urlKey, '/') . '-\d{1,}' . preg_quote($categoryUrlSuffix, '/') . '$/';
61 | if(isset($existingRequestPath) && (preg_match($pattern, $existingRequestPath) || $requestPath === $existingRequestPath)) {
62 | return $existingRequestPath;
63 | }
64 | // END OF PATCH
65 |
66 | if (isset( $existingRequestPath ) && $existingRequestPath == $requestPath . $suffix) {
67 | return $existingRequestPath;
68 | }
69 |
70 | if ($this->_deleteOldTargetPath( $requestPath, $idPath, $storeId )) {
71 | return $requestPath;
72 | }
73 |
74 | return $this->getUnusedPath($category->getStoreId(), $requestPath,
75 | $this->generatePath( 'id', null, $category )
76 | );
77 | }
78 | }
79 |
80 | /**
81 | * Get unique product request path
82 | *
83 | * @param Varien_Object $product
84 | * @param Varien_Object $category
85 | * @return string
86 | */
87 | public function getProductRequestPath($product, $category)
88 | {
89 | if ( version_compare( Mage::getVersion(), '1.7.0.0', '<' ) ) {
90 | // The fix below is not compatible with older Magento versions.
91 | // Then only the shell command of this extension is useful.
92 | return parent::getProductRequestPath($product, $category);
93 | } else {
94 | if ($product->getUrlKey() == '') {
95 | $urlKey = $this->getProductModel()->formatUrlKey( $product->getName() );
96 | } else {
97 | $urlKey = $this->getProductModel()->formatUrlKey( $product->getUrlKey() );
98 | }
99 | $storeId = $category->getStoreId();
100 | $suffix = $this->getProductUrlSuffix( $storeId );
101 | $idPath = $this->generatePath( 'id', $product, $category );
102 | /**
103 | * Prepare product base request path
104 | */
105 | if ($category->getLevel() > 1) {
106 | // To ensure, that category has path either from attribute or generated now
107 | $this->_addCategoryUrlPath( $category );
108 | $categoryUrl = Mage::helper('catalog/category')->getCategoryUrlPath($category->getUrlPath(),
109 | false, $storeId);
110 | $requestPath = $categoryUrl . '/' . $urlKey;
111 | } else {
112 | $requestPath = $urlKey;
113 | }
114 |
115 | if (strlen( $requestPath ) > self::MAX_REQUEST_PATH_LENGTH + self::ALLOWED_REQUEST_PATH_OVERFLOW) {
116 | $requestPath = substr( $requestPath, 0, self::MAX_REQUEST_PATH_LENGTH );
117 | }
118 |
119 | $this->_rewrite = null;
120 | /**
121 | * Check $requestPath should be unique
122 | */
123 | if (isset( $this->_rewrites[ $idPath ] )) {
124 | $this->_rewrite = $this->_rewrites[ $idPath ];
125 | $existingRequestPath = $this->_rewrites[ $idPath ]->getRequestPath();
126 |
127 | if ($existingRequestPath == $requestPath . $suffix) {
128 | return $existingRequestPath;
129 | }
130 |
131 | $existingRequestPath = preg_replace('/' . preg_quote($suffix, '/') . '$/', '', $existingRequestPath);
132 | /**
133 | * Check if existing request past can be used
134 | */
135 | if ( $product->getUrlKey() == '' && !empty($requestPath)
136 | && strpos($existingRequestPath, $requestPath) === 0 ) {
137 | $existingRequestPath = preg_replace(
138 | '/^' . preg_quote($requestPath, '/') . '/', '', $existingRequestPath
139 | );
140 | if (preg_match( '#^-([0-9]+)$#i', $existingRequestPath )) {
141 | return $this->_rewrites[ $idPath ]->getRequestPath();
142 | }
143 | }
144 |
145 | $fullPath = $requestPath . $suffix;
146 |
147 | // PATCH TO FIX MAGENTO BUG THAT DUPLICATES REWRITES WHEN URL_KEY IS EQUAL BETWEEN PRODUCTS
148 | $pattern = '/^' . preg_quote( $requestPath, '/' ) . '-\d{1,}$/';
149 | if (preg_match( $pattern, $existingRequestPath )) {
150 | $fullPath = $existingRequestPath . $suffix;
151 | }
152 | // END OF PATCH
153 |
154 | if ($this->_deleteOldTargetPath( $fullPath, $idPath, $storeId )) {
155 | return $fullPath;
156 | }
157 | }
158 | /**
159 | * Check 2 variants: $requestPath and $requestPath . '-' . $productId
160 | */
161 | $validatedPath = $this->getResource()->checkRequestPaths(
162 | array( $requestPath . $suffix, $requestPath . '-' . $product->getId() . $suffix ),
163 | $storeId
164 | );
165 |
166 | if ($validatedPath) {
167 | return $validatedPath;
168 | }
169 | /**
170 | * Use unique path generator
171 | */
172 | return $this->getUnusedPath( $storeId, $requestPath . $suffix, $idPath );
173 | }
174 | }
175 |
176 | }
177 |
--------------------------------------------------------------------------------
/app/code/community/MageHost/RewriteFix/Model/Observer.php:
--------------------------------------------------------------------------------
1 | /product-name
15 | * /category-name/product-name.html =301=> /product-name.html
16 | *
17 | * When an URL is hit ending with a number and causes a 404 error, do a 301 redirect to the URL without the number.
18 | * This helps when you are cleaning up old URLs ending with a number.
19 | * /category-name/product-name-123 =301=> /category-name/product-name
20 | * /category-name/product-name-123/ =301=> /category-name/product-name/
21 | * /category-name/product-name-123.html =301=> /category-name/product-name.html
22 | *
23 | * @param Varien_Event_Observer $observer
24 | */
25 | public function controllerActionPredispatchCmsIndexNoRoute( $observer ) {
26 | /** @var $controllerAction Mage_Cms_IndexController */
27 | $controllerAction = $observer->getControllerAction();
28 | $request = Mage::app()->getRequest();
29 | $response = Mage::app()->getResponse();
30 | $originalPath = $request->getOriginalPathInfo();
31 | $baseUrl = rtrim( Mage::getBaseUrl(), '/' ); // Remove trailing slash
32 | $currentUrl = $baseUrl . $originalPath;
33 | $redirectUrl = false;
34 |
35 | // If config setting 'Use Categories Path for Product URLs' is set to disabled:
36 | // Check if request can be redirected to product URL after removing category path.
37 | if ( empty($redirectUrl) && ! Mage::getStoreConfigFlag('catalog/seo/product_use_categories') ) {
38 | //Get the last part of url: url_path
39 | $urlPath = parse_url( $currentUrl, PHP_URL_PATH ); // parse the url
40 | $urlPath = trim( $urlPath, '/' );
41 | $splitPath = explode( '/', $urlPath );
42 | if ( count($splitPath) > 1 ) {
43 | $productUrl = end( $splitPath );
44 | $urlResource = Mage::getResourceModel('catalog/url');
45 | $storeId = Mage::app()->getStore()->getId();
46 | $rewrite = $urlResource->getRewriteByRequestPath($productUrl, $storeId);
47 | if ($rewrite) {
48 | $redirectUrl = $baseUrl . '/' . $rewrite->getRequestPath();
49 | }
50 | }
51 | }
52 |
53 | // If URL is ending with a number, let's cut it off and 301 redirect
54 | if ( empty($redirectUrl) && preg_match( '#^([/\w\-]+)\-\d+(\.html|/)?$#', $originalPath, $matches ) ) {
55 | $redirectUrl = $baseUrl . $matches[1];
56 | if ( isset($matches[2]) ) {
57 | $redirectUrl .= $matches[2];
58 | }
59 | }
60 |
61 | if ( !empty($redirectUrl) && $currentUrl != $redirectUrl ) { // Double check to prevent looping
62 | $response->setRedirect($redirectUrl, 301);
63 | $response->sendHeaders();
64 | $controllerAction->setFlag( '', Mage_Core_Controller_Varien_Action::FLAG_NO_DISPATCH, true );
65 | }
66 | }
67 |
68 | /**
69 | * For stores that have the config setting 'Use Categories Path for Product URLs' set to disabled:
70 | * clean up records in core_url_rewite which are made for category and product combination URLs.
71 | *
72 | * @param Varien_Event_Observer $observer
73 | */
74 | public function afterReindexProcessCatalogUrl( $observer ) {
75 | $cleanForIds = array();
76 | $stores = Mage::app()->getStores( true );
77 | $helper = Mage::helper( 'magehost_rewritefix' );
78 | /** @var Mage_Core_Model_Store $store */
79 | $allStores = true;
80 | foreach ( $stores as $store ) {
81 | if ( ! Mage::getStoreConfigFlag( 'catalog/seo/product_use_categories', $store->getId() ) ) {
82 | $cleanForIds[] = intval($store->getId());
83 | } else {
84 | $allStores = false;
85 | }
86 | }
87 | if ( !empty($cleanForIds) ) {
88 | $writeAdapter = Mage::getSingleton('core/resource')->getConnection('core_write');
89 | $table = Mage::getResourceModel('core/url_rewrite')->getMainTable();
90 | $sql = sprintf( 'DELETE FROM %s
91 | WHERE %s
92 | AND `category_id` IS NOT NULL
93 | AND `product_id` IS NOT NULL',
94 | $writeAdapter->quoteIdentifier($table),
95 | $allStores ? '1' : sprintf('`store_id` IN (%s)', $writeAdapter->quote($cleanForIds) ) );
96 | $stmt = $writeAdapter->query( $sql );
97 | $count = $stmt->rowCount();
98 | if ( $count ) {
99 | $helper->successMessage( $helper->__( "MageHost RewriteFix: Cleaned up %d records from '%s' index because '%s' is disabled.",
100 | $count,
101 | Mage::helper('catalog')->__("Catalog URL Rewrites"),
102 | Mage::helper('catalog')->__("Use Categories Path for Product URLs") ) );
103 | }
104 | }
105 | }
106 |
107 |
108 | /**
109 | * This is an observer function for the event 'adminhtml_block_html_before'.
110 | * If the block is the grid for the "Index Management" we update the description of the "Catalog Search Index"
111 | *
112 | * @param Varien_Event_Observer $observer
113 | */
114 | public function adminhtmlBlockHtmlBefore( $observer ) {
115 | $block = $observer->getData( 'block' );
116 | if (is_a( $block, 'Mage_Index_Block_Adminhtml_Process_Grid' )) {
117 | /** @var Mage_Index_Block_Adminhtml_Process_Grid $block */
118 | $collection = $block->getCollection();
119 | $readAdapter = Mage::getSingleton('core/resource')->getConnection('core_read');
120 | $table = Mage::getResourceModel('core/url_rewrite')->getMainTable();
121 | foreach ($collection as $item) {
122 | /** @var Mage_Index_Model_Process $item */
123 | if ('catalog_url' == $item->getIndexerCode()) {
124 | $select = $readAdapter->select()->from( $table, array('count'=>'COUNT(*)' ) );
125 | $count = number_format( $readAdapter->fetchOne( $select ) );
126 | $item->setDescription( $item->getDescription() . ' - ' . $block->__('%s records',$count) );
127 | }
128 | }
129 | }
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/app/code/community/MageHost/RewriteFix/etc/config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 1.6.0
6 |
7 |
8 |
9 |
10 |
11 | MageHost_RewriteFix_Model
12 |
13 |
14 |
15 | MageHost_RewriteFix_Model_Catalog_Url
16 |
17 |
18 |
19 |
20 |
21 | MageHost_RewriteFix_Helper
22 |
23 |
24 |
25 |
26 |
27 |
28 | singleton
29 | magehost_rewritefix/observer
30 | controllerActionPredispatchCmsIndexNoRoute
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | singleton
39 | magehost_rewritefix/observer
40 | controllerActionPredispatchCmsIndexNoRoute
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | singleton
49 | magehost_rewritefix/observer
50 | afterReindexProcessCatalogUrl
51 |
52 |
53 |
54 |
55 |
56 |
57 | singleton
58 | magehost_rewritefix/observer
59 | adminhtmlBlockHtmlBefore
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/app/etc/modules/MageHost_RewriteFix.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | true
6 | community
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "magehost/magehost_rewritefix",
3 | "type": "magento-module",
4 | "license": "GPL-3.0",
5 | "homepage":"https://github.com/magehost/magehost_rewritefix",
6 | "description":"MageHost_RewriteFix: Solves a problem most big #Magento 1.x shops face: an ever growing huge URL Rewrite table.",
7 | "authors":[
8 | {
9 | "name":"MagentoHosting.pro - Jeroen Vermeulen",
10 | "email":"jeroen@magehost.pro"
11 | }
12 | ],
13 | "suggest":{
14 | "magento-hackathon/magento-composer-installer":"Makes it possible to manage this package as a dependency"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/modman:
--------------------------------------------------------------------------------
1 | ## MageHost_RewriteFix Extension - Fix for bugs in Magento causing the core_url_rewrite table to grow very big.
2 |
3 | # ---- Module enable XML
4 | # Magento Global Config - File - modules/MageHost_RewriteFix.xml
5 | app/etc/modules/MageHost_RewriteFix.xml app/etc/modules/MageHost_RewriteFix.xml
6 |
7 | # ---- Extension code dir
8 | # Magento Community Module - Recursive Dir - MageHost/RewriteFix
9 | app/code/community/MageHost/RewriteFix/ app/code/community/MageHost/RewriteFix/
10 |
11 | # ---- Shell script to cleanup
12 | # Magento other - File - shell/mh_rewrite_cleanup.php
13 | shell/mh_rewrite_cleanup.php shell/mh_rewrite_cleanup.php
14 |
15 | ## When you update this file, don't forget composer.json
16 |
--------------------------------------------------------------------------------
/shell/mh_rewrite_cleanup.php:
--------------------------------------------------------------------------------
1 | readAdapter = Mage::getSingleton('core/resource')->getConnection('core_read');
34 | $this->writeAdapter = Mage::getSingleton('core/resource')->getConnection('core_write');
35 | $this->table = Mage::getResourceModel('core/url_rewrite')->getMainTable();
36 | $this->quotedTable = $this->readAdapter->quoteIdentifier($this->table);
37 | }
38 |
39 | /**
40 | * Run script
41 | * @throws Zend_Db_Statement_Exception
42 | */
43 | public function run()
44 | {
45 | if ( $this->getArg('cleanup') ) {
46 | echo "==== MageHost RewriteFix - https://MagentoHosting.pro ====\n\n";
47 | echo "Checking for unnecessary product URLs with category path...\n";
48 | $dummyObserver = new Varien_Event_Observer();
49 | Mage::getSingleton('magehost_rewritefix/observer')->afterReindexProcessCatalogUrl($dummyObserver);
50 |
51 | echo "\nChecking if we can cleanup rewrites which only add/remove '-[number]' in the URL...\n";
52 |
53 | // Process select and deletes in chunks to prevent "Allowed memory size" error.
54 | $deleteCount = 0;
55 | $maxSql = sprintf("SELECT MAX(url_rewrite_id) as `max` FROM %s", $this->quotedTable);
56 | $maxRewriteId = $this->readAdapter->raw_fetchRow( $maxSql, 'max' );
57 | $selectChunks = ceil( $maxRewriteId / $this->selectChunkSize );
58 |
59 | for ( $chunkNr=0; $chunkNr < $selectChunks; $chunkNr++ ) {
60 | $sql = sprintf( " SELECT `url_rewrite_id`, `request_path`, `target_path`
61 | FROM %s
62 | WHERE url_rewrite_id >= %d AND url_rewrite_id < %d
63 | AND `options` = 'RP'
64 | AND `product_id` IS NOT NULL
65 | AND id_path LIKE '%%\_%%' ",
66 | $this->quotedTable,
67 | $chunkNr * $this->selectChunkSize,
68 | $chunkNr * $this->selectChunkSize + $this->selectChunkSize );
69 | /** @var Varien_Db_Statement_Pdo_Mysql $stmt */
70 | $stmt = $this->readAdapter->query( $sql );
71 | $pregFilter = '/\-\d+(\.html)?$/';
72 | $deleteList = array();
73 | while ( $row = $stmt->fetch() ) {
74 | if ( preg_replace($pregFilter,'$1',$row['request_path']) == preg_replace($pregFilter,'$1',$row['target_path']) ) {
75 | $deleteList[] = intval( $row['url_rewrite_id'] );
76 | }
77 | if ( $this->deleteChunkSize <= count($deleteList) ) {
78 | $deleteCount += $this->cleanRewrites( $deleteList );
79 | $deleteList = array();
80 | }
81 | }
82 | $deleteCount += $this->cleanRewrites( $deleteList );
83 | }
84 |
85 | if ( $deleteCount ) {
86 | printf( "\nCleaned up %d records.\n", $deleteCount );
87 | } else {
88 | echo "Found no records to clean.\n";
89 | }
90 | echo "\nDone.\n";
91 | } else {
92 | echo $this->usageHelp();
93 | }
94 | }
95 |
96 | /**
97 | * Retrieve Usage Help Message
98 | */
99 | public function usageHelp()
100 | {
101 | return <<writeAdapter->quoteIdentifier( $this->table ),
129 | $this->writeAdapter->quote( $chunk ) );
130 | $stmt = $this->writeAdapter->query( $sql );
131 | $count += $stmt->rowCount();
132 | $stmt->closeCursor();
133 | }
134 | echo ".";
135 | flush();
136 | }
137 | return $count;
138 | }
139 |
140 | }
141 |
142 | $shell = new Mage_Shell_RewriteCleanup();
143 | $shell->run();
144 |
--------------------------------------------------------------------------------
/var/connect/MageHost_RewriteFix.xml:
--------------------------------------------------------------------------------
1 | <_>
2 | Qd22X9LMi8WmnSGW
3 | MageHost_RewriteFix
4 | community
5 |
6 | 2
7 |
8 | Due to bugs in Magento, once an rewrite URL ends with -[number] you get more and more rewrite URLs to the same target. The number gets higher and higher.
9 | Due to bugs in Magento, once an rewrite URL ends with -[number] you get more and more rewrite URLs to the same target. The number gets higher and higher.
10 |
11 | OSL-3.0
12 | http://opensource.org/licenses/osl-3.0.php
13 | 1.6.0
14 | stable
15 | Added shell script to cleanup.
16 |
17 |
18 | MageHost BVBA
19 |
20 |
21 | jeroenvermeulen
22 |
23 |
24 | info@magehost.pro
25 |
26 |
27 | 5.3.0
28 | 6.0.0
29 |
30 |
31 |
32 |
33 | Mage_Core_Modules
34 |
35 |
36 |
37 | community
38 |
39 |
40 |
41 | 1.7.0.2
42 |
43 |
44 |
45 | 2.0.0.0
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | Core
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | mageetc
67 | magecommunity
68 | mage
69 |
70 |
71 | modules/MageHost_RewriteFix.xml
72 | MageHost/RewriteFix
73 | shell/mh_rewrite_cleanup.php
74 |
75 |
76 | file
77 | dir
78 | dir
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 | 1
92 | 200
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------