├── search_table.png ├── .gitignore ├── find_broken_link.py ├── README.md ├── local2github.py ├── LICENSE └── opencores_scraper.py /search_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabriziotappero/opencores-scraper/HEAD/search_table.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build and Release Folders 2 | bin/ 3 | bin-debug/ 4 | bin-release/ 5 | 6 | # Other files and folders 7 | .settings/ 8 | 9 | # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties` 10 | # should NOT be excluded as they contain compiler settings and other important 11 | # information for Eclipse / Flash Builder. 12 | -------------------------------------------------------------------------------- /find_broken_link.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding:utf-8 -*- 3 | ''' 4 | Python script to process the index.html file and validate the projects that 5 | have a github link. 6 | ''' 7 | _i = "index.html" 8 | _o = "index_out.html" 9 | 10 | import sys, re, requests 11 | 12 | _out = open(_o,"w") 13 | _fnd=0 14 | _tot=0 15 | _whole="" 16 | 17 | for line in open(_i, "r"): 18 | _whole += line.rstrip('\n') 19 | 20 | _whole=_whole.replace('','\n') 21 | _whl = _whole.split('\n') 22 | 23 | for ln in _whl: 24 | #print 'bingo' 25 | if len(ln)>0: 26 | # find the link inside this two marks (reg expression) 27 | m = re.search("code", ln) 28 | if m: 29 | _link = m.group(1) 30 | if len(_link)>0: 31 | _tot += 1 32 | 33 | # is it an existing page? 34 | print ("Checking link:", _link) 35 | anw = requests.get('https://github.com'+_link) 36 | if anw.ok: 37 | _out.write(ln+"\n") 38 | print ("All good.") 39 | _fnd += 1 40 | else: 41 | print ("This link does not seem to exist.") 42 | else: 43 | _out.write(ln+"\n") 44 | else: 45 | _out.write(ln+"\n") 46 | 47 | 48 | print ("Of a total of",_tot, "projects I have found and validated", _fnd) 49 | _out.close() 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | 5 |

6 | 7 | That's right, a simple searchable list of all VHDL projects: 8 | 9 | https://fabriziotappero.github.io/opencores-scraper/cores.html 10 | 11 | 12 | ## VHDL/Verilog IP CORES Scripts 13 | 14 | This repository contains few Python scripts capable to connect to the website 15 | opencores.org and download from it approximately 4.5GB of VHDL/Verilor IP cores. 16 | Downloaded data is saved locally and, in a second step, uploaded to github. An 17 | index.html file containing an index of the available IP cores is generated. 18 | 19 | All downloaded IP cores, once saved locally, are then uploaded on this 20 | guthub repository: 21 | 22 | https://github.com/fabriziotappero/ip-cores 23 | 24 | The "ip-cores" repository accounts for approximately 800 projects spread in 16 25 | categories. There is a branch for each IP project. Since the whole repository 26 | is very large, you are advised to check out only the branch that you 27 | might interested in. 28 | 29 | Available Python scripts: 30 | 31 | **opencores_scraper.py** downloads all opencores.org projects locally 32 | (in a local folder in your PC) and generate a n index file index.html. 33 | 34 | **local2github.py** analyze a local folder and upload its content to the github 35 | repository https://github.com/fabriziotappero/ip-cores 36 | 37 | 38 | -------------------------------------------------------------------------------- /local2github.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding:utf-8 -*- 3 | # 4 | ''' 5 | This is a one-file python script that analyze the content of a local folder 6 | name ./cores and upload its content to github. 7 | 8 | This script is to be used after opencores_scraper.py for the purpose of getting 9 | all opencores.org code upload to a github account. 10 | 11 | The Python libraries needed for this script can be installed with the command: 12 | 13 | sudo pip install tarfile 14 | 15 | HOW TO USE THIS SCRIPT 16 | 17 | 1) install python and its dependencies 18 | 2) configure the git address _github_addr 19 | 3) run this script with the command: ./local2git.py 20 | ''' 21 | _max_num_prjs = 1E99 # set to 1E99 if you are not debugging 22 | _github_addr = 'https://github.com/fabriziotappero/ip-cores.git' 23 | _cores_dir = "cores" 24 | 25 | import sys, os, shutil, glob 26 | import tarfile 27 | from distutils.dir_util import copy_tree 28 | 29 | prj_categ = next(os.walk(_cores_dir))[1] 30 | prjs = [] 31 | empty_prjs = 0 32 | for x in prj_categ: 33 | _path = os.path.join(_cores_dir,x) 34 | # get only projects with a tar.gz file in it(not empty) 35 | for y in next(os.walk(_path))[1]: 36 | z = os.listdir(_path + "/" + y) 37 | for elem in z: 38 | if elem.endswith(".tar.gz"): 39 | prjs.append([[x],[y]]) 40 | break 41 | 42 | # note that prjs stores both categories and projects 43 | print ("Number of local non empty projects: ", len(prjs)) 44 | 45 | # detect possible duplicates in branch names 46 | branches = [] 47 | for _ind,x in enumerate(prjs): 48 | prj_cat = x[0][0] 49 | prj_name = x[1][0] 50 | prj_branch = prj_cat+"_"+prj_name 51 | branches.append(prj_branch) 52 | dups = [x for x in branches if branches.count(x) > 1] 53 | if len(dups)>0: 54 | print ("ERROR. Projects with same branch name:", dups) 55 | sys.exit(0) 56 | 57 | 58 | _txt = ''' 59 | ## VHDL/Verilog IP Cores Repository 60 | 61 | This branch contains the following VHDL/Verilog IP Code: 62 | 63 | Project name: %s 64 | Project category: %s 65 | Project branch: %s 66 | 67 | This whole github repository is huge and, since IP cores are stored in separate 68 | branches, it is a good idea to just download the branch that you are interested 69 | in. This branch can be downloaded with the git command. 70 | 71 | git clone -b %s --single-branch https://github.com/fabriziotappero/ip-cores.git 72 | 73 | A cool searchable index of the whole repo is available from www.freerangefactory.org. 74 | ''' 75 | 76 | _license=''' 77 | ### License 78 | 79 | The code of each IP core was taken "as is" from the website opencores.org. 80 | The copyright owner of each IP core is the author of the code itself. For 81 | more information refer to the website opencores.org 82 | 83 | Each branch of this repository is a SEPARATE and DISTINCT project. 84 | Although each project is licensed under one of the various open-source 85 | licenses, it is necessary to examine the project files to determine the 86 | specific terms of that project's license. 87 | 88 | ### DISCLAIMER 89 | 90 | I am not a lawyer and I do not represent this as something meeting any 91 | specific legal requirements. 92 | 93 | IF YOU BELIEVE THAT ANYTHING STORED IN THIS REPOSITORY IS INCORRECT OR 94 | IS THE CAUSE OF ANY PROBLEM, DO NO HESITATE TO CONTACT ME AND I WILL 95 | DO ALL I CAN TO FIX IT. 96 | 97 | This code is distributed in the hope that it will be useful, but WITHOUT 98 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 99 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 100 | for more details. 101 | ''' 102 | 103 | for _ind,x in enumerate(prjs): 104 | prj_cat = x[0][0] 105 | prj_name = x[1][0] 106 | prj_branch = prj_cat+"_"+prj_name 107 | _dir = os.path.join(_cores_dir, prj_cat, prj_name) 108 | 109 | if _ind>=_max_num_prjs: 110 | print (_max_num_prjs, "projects have been unzipped. Leaving...") 111 | break 112 | 113 | for _fl in os.listdir(_dir): 114 | if _fl.endswith('.tar.gz'): 115 | prj_real_name = _fl[: -7] 116 | # if project code is >180MB let's skip it 117 | if (os.path.getsize(os.path.join(_dir, _fl))/1.0E6) > 180:# MB 118 | print ("Project:",_fl, ">120MB. Skipping it") 119 | break 120 | try: 121 | tfile = tarfile.open(os.path.join(_dir, _fl), 'r:gz') 122 | tfile.extractall(os.path.join(_dir, 'tmp')) 123 | tfile.close() 124 | except: 125 | print ("ERROR. Problems unzipping repo:",os.path.join(_dir, _fl)) 126 | if os.path.exists(os.path.join(_dir, 'src')): 127 | shutil.rmtree(os.path.join(_dir, 'src')) 128 | 129 | # copy all svn trunk in fresh src folder. If trunk does not exist 130 | # copy the whole thing. 131 | if os.path.isdir(os.path.join(_dir, 'tmp', _fl[: -7], 'trunk')): 132 | #copy_tree(os.path.join(_dir, 'tmp', _fl[: -7], 'trunk'), os.path.join(_dir, 'src')) 133 | os.system('cp -Rf '+_dir+'/tmp/'+_fl[: -7]+'/trunk '+_dir+'/src') 134 | if os.path.isdir(os.path.join(_dir, 'tmp', _fl[: -7], 'web_uploads')): 135 | #copy_tree(os.path.join(_dir, 'tmp', _fl[: -7], 'web_uploads'), os.path.join(_dir, 'src')) 136 | os.system('cp -Rf '+_dir+'/tmp/'+_fl[: -7]+'/web_uploads '+_dir+'/src') 137 | 138 | # add README.md file and index file 139 | if os.path.isdir(os.path.join(_dir,'src')): 140 | with open(os.path.join(_dir,'src','README.md'), 'w') as _file: 141 | _file.write(_txt % (prj_name, prj_cat, prj_branch, prj_branch)+_license) 142 | if os.path.isfile(os.path.join(_dir, 'index.html')): 143 | if os.path.isdir(os.path.join(_dir,'src')): 144 | shutil.copyfile(os.path.join(_dir, 'index.html'), os.path.join(_dir, 'src','index.html')) 145 | 146 | # just in case you unzipped a zip file(one zip inside another) 147 | for _x in glob.glob(os.path.join(_dir, 'src', '*')): 148 | if _x.endswith('.tar.gz') or _x.endswith('.tgz'): 149 | tfile = tarfile.open(_x, 'r:gz') 150 | tfile.extractall(os.path.join(_dir, 'src')) 151 | tfile.close() 152 | os.remove(_x) 153 | 154 | # delete not needed files 155 | if os.path.isfile(os.path.join(_dir, _fl)): 156 | if False: 157 | # remove tar.gz file. Keep it if you like. 158 | os.remove(os.path.join(_dir, _fl)) 159 | if os.path.isdir(os.path.join(_dir, 'tmp')): 160 | # remove original unzipped folder 161 | shutil.rmtree(os.path.join(_dir, 'tmp')) 162 | 163 | # proceed with git, created a local git folder 164 | # delete previous one 165 | _git_dir = os.path.join(_cores_dir, 'git_dir') 166 | if os.path.isdir(_git_dir): 167 | shutil.rmtree(_git_dir) 168 | os.mkdir(_git_dir) 169 | 170 | # download (locally) only master branch from the defaul github repository that 171 | # you specified at the beginning of this file 172 | os.system('git clone --depth=1 ' + _github_addr + ' '+_git_dir) 173 | 174 | # create a new branch per project. Copy the project content in it. 175 | for _ind,x in enumerate(prjs): 176 | prj_cat = x[0][0] 177 | prj_name = x[1][0] 178 | prj_branch = prj_cat+"_"+prj_name 179 | prj_dir = os.path.join(_cores_dir, prj_cat, prj_name) 180 | 181 | if _ind>=_max_num_prjs: 182 | print (_max_num_prjs, "projects have been unzipped. Leaving...") 183 | break 184 | 185 | if os.path.exists(os.path.join(prj_dir, 'src')) and len(os.listdir(os.path.join(prj_dir,'src')))>0: 186 | # this project is not empty 187 | os.chdir(_git_dir) 188 | # create new branch 189 | os.system('git checkout --orphan ' + prj_branch + ' >/dev/null') 190 | os.system('git rm --cached -r . >/dev/null') # empty new branch 191 | os.system('rm -Rf ./*') 192 | 193 | # add all project files into branch 194 | os.system('cp -Rf ../../'+prj_dir+'/src/* .') 195 | 196 | os.system('git add .') # add project into branch 197 | os.system("git commit -m 'added content for project'") # add project into branch 198 | os.system("git checkout master") 199 | os.chdir(os.path.join('..','..')) 200 | 201 | # build master branch 202 | os.chdir(_git_dir) 203 | os.system("git checkout master") 204 | os.system('rm -Rf ./*') 205 | with open("README.md", 'w') as _file: 206 | _file.writelines("## VHDL/Verilog IP Cores Repository\n\n") 207 | _file.writelines("This repository contains approximately 900 free and open-source VHDL/Verilog IP cores.\n") 208 | _file.write("Cores can be fetched idependently by downloading ony the branch\n") 209 | _file.write("you are interested in.\n\n") 210 | _file.write("A cool searchable index of the whole repo is available from www.freerangefactory.org.\n\n") 211 | _file.write("These are the available branches:\n\n") 212 | for _ind,x in enumerate(prjs): 213 | prj_cat = x[0][0] 214 | prj_name = x[1][0] 215 | prj_branch = prj_cat+"_"+prj_name 216 | _file.write(" "+prj_branch+"\n") 217 | _file.write(_license) 218 | 219 | os.system('git add .') 220 | os.system("git commit -m 'added content for project'") 221 | 222 | # if False: 223 | # # upload one by one all branches to github 224 | # for _ind,x in enumerate(prjs): 225 | # prj_cat = x[0][0] 226 | # prj_name = x[1][0] 227 | # prj_branch = prj_cat+"_"+prj_name 228 | # prj_dir = os.path.join(_cores_dir, prj_cat, prj_name) 229 | # 230 | # if len(os.listdir(os.path.join(prj_dir,'src')))>0: 231 | # os.chdir(_git_dir) 232 | # os.system('git checkout ' + prj_branch) 233 | # os.system('git push origin '+ prj_branch) 234 | # # manually enter login and password 235 | # os.chdir(os.path.join('..','..')) 236 | 237 | if False: 238 | # push all branches at once 239 | os.system('git push --force --all origin') 240 | # manually enter login and password 241 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /opencores_scraper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding:utf-8 -*- 3 | # 4 | ''' 5 | This is a one-file python script that download locally the content of the WHOLE 6 | project section of the website opencores.org. 7 | The downloaded content is stored in a local folder. 8 | To use this script, an opencores.org account is needed. Also note that the whole 9 | opencores.org database is essily >4GB of data. 10 | 11 | The Python libraries needed for this script can be installed with the command: 12 | 13 | sudo pip install beautifulsoup mechanize cssselect lxml 14 | ''' 15 | # 16 | # HOW TO USE THIS SCRIPT 17 | # 18 | # 0) install python and its dependencies: easy_install beautifulsoup meachanize 19 | # 1) make an account in opencores.org 20 | # 2) complete the "basic setup" section below with the login data 21 | # 3) run this script with the command: ./opencores_scraper.py >> oc.log 22 | # 23 | # 24 | #_______________________________ basic setup ___________________________________ 25 | # 26 | prj_per_cat_to_download = 1E99 # set to 1E99 to get all projects 27 | download_prj_svn = True # set to True to get opencores project svn (.zip) 28 | # your github repository 29 | oc_user='xxxxxxxx' # opencores.org login 30 | oc_pwd='xxxxxxxxxxxx' # opencores.org password 31 | 32 | #_______________________________ github link ________________________________ 33 | # 34 | # this link is just used for linking index.html with code source 35 | # 36 | _github_addr = 'https://github.com/fabriziotappero/ip-cores/' 37 | # 38 | #_______________________________________________________________________________ 39 | 40 | 41 | # import web scrape tools and other libs 42 | import re, sys, os, time 43 | import lxml.html, pickle, tarfile 44 | #import ftputil 45 | from bs4 import BeautifulSoup, Comment 46 | import mechanize 47 | import http.cookiejar as cookielib 48 | 49 | # function to get all opencores projects from a specific opencores URL 50 | def get_projects(_url): 51 | r = br.open(_url) 52 | _html_content = r.read() 53 | # convert the HTML into lxml object 54 | _lxml_content = lxml.html.fromstring(_html_content) 55 | 56 | # Extract all projects 57 | projects_name = [] 58 | projects_url = [] 59 | 60 | # Find all 'a' elements inside 'tbody tr.row1 td' 61 | for a in _lxml_content.cssselect('table tbody tr td a'): 62 | projects_name.append(a.text) 63 | 64 | # Find all 'a' elements inside 'tbody tr.row1 td' and 65 | # get the 'href' link 66 | links = _lxml_content.cssselect('tbody tr td a') 67 | for a in links: 68 | projects_url.append(a.get('href')) 69 | 70 | # make sure that number of projects is equal to the number of prj links 71 | if len(projects_name) != len(projects_url): 72 | print ('ERROR. some projects do not have a URL.') 73 | sys.exit(1) 74 | 75 | # clean up text with regular expressions because 76 | # project names contains unwanted spaces and carriage returns 77 | # replace/delete unwanted text 78 | for i,x in enumerate(projects_name): 79 | #x = x.encode('utf-8') 80 | x = x.lower() 81 | x = re.sub('(\\n *)','',x) 82 | x = re.sub(' +',' ',x) 83 | x = re.sub("'","",x) 84 | x = re.sub(',','',x) 85 | x = re.sub(':','',x) 86 | x = re.sub(';','',x) 87 | x = x.replace('(','') 88 | x = x.replace(')','') 89 | x = x.replace('[','') 90 | x = x.replace(']','') 91 | x = x.replace('.','') 92 | 93 | x = x.replace('&','and') 94 | x = x.replace('#','') 95 | x = x.replace('Ã','') 96 | x = x.replace('©','') 97 | 98 | x = re.sub(' - ','-',x) 99 | x = re.sub(' / ','/',x) 100 | x = x.lstrip().rstrip() 101 | if x.startswith('a '): x = x[2:] 102 | if len(x)>50: x=x[:50] 103 | projects_name[i] = x 104 | 105 | for i,x in enumerate(projects_url): 106 | #x = x.encode('utf-8') 107 | # x = re.sub('(\\n *)','',x) 108 | # x = re.sub(' +',' ',x) 109 | # x = x.lstrip().rstrip() 110 | projects_url[i]= "http://www.opencores.org" + x 111 | 112 | return projects_name, projects_url 113 | 114 | # structure to store everything 115 | class opencores(): 116 | def __init__(self,): 117 | self.categories=[] 118 | self.categories_num=0 119 | self.categories_url=[] 120 | self.projects_url=[] 121 | self.projects_name=[] 122 | self.projects_num=[] 123 | self.projects_html_info=[] 124 | self.projects_download_url=[] 125 | self.projects_can_be_downloaded=[] 126 | self.projects_created = [] 127 | self.projects_last_update = [] 128 | self.projects_archive_last_update = [] 129 | self.projects_lang = [] 130 | self.projects_license = [] 131 | self.projects_dev_status = [] 132 | 133 | # function to rename any multiple element of the list 'ar' 134 | # 'ar' must be a list of strings 135 | 136 | def rename_multiple(ar): 137 | #ar = ['a','er1','a4','erta','a','er'] 138 | for x in ar: 139 | i=[n for (n, e) in enumerate(ar) if e.lower() == x.lower()] 140 | #print i 141 | if len(i)>1: 142 | _ind=1 143 | for y in i: 144 | ar[y]=ar[y]+' '+str(_ind) 145 | _ind = _ind + 1 146 | print ('WARNING. '+\ 147 | 'Found two projects with same name. Will rename:', ar[y]) 148 | return ar 149 | 150 | # clean up html code from unwanted portions of the page 151 | def filter_html(in_html): 152 | doc = BeautifulSoup(in_html, features="lxml") 153 | 154 | #recs = doc.findAll("div", { "class": "class_name"}) 155 | 156 | # remove unwanted tags 157 | for div in doc.findAll('head'): 158 | div.extract() 159 | for div in doc.findAll(['i', 'h1', 'script']): 160 | div.extract() 161 | for div in doc.findAll('div','top'): 162 | div.extract() 163 | for div in doc.findAll('div','bot'): 164 | div.extract() 165 | for div in doc.findAll('div','line'): 166 | div.extract() 167 | for div in doc.findAll('div','mainmenu'): 168 | div.extract() 169 | for div in doc.findAll('div','banner'): 170 | div.extract() 171 | for div in doc.findAll('div','maintainers'): 172 | div.extract() 173 | 174 | #for div in doc.findAll('div', {'style':'clear:both;margin-left:200px;'}): 175 | # div.extract() 176 | 177 | # remove html comments 178 | comments = doc.findAll(string=lambda string:isinstance(string, Comment)) 179 | [comment.extract() for comment in comments] 180 | 181 | out_html = doc.body.prettify() 182 | 183 | # a little more cleaning up 184 | out_html = re.sub('(
)\\n','',out_html) 185 | out_html = re.sub('(
)\\n','',out_html) 186 | out_html = re.sub('
','
',out_html) 187 | out_html = re.sub('
\\n *
','
',out_html) 188 | out_html = re.sub('\\n *\\n','\\n',out_html) 189 | return out_html 190 | 191 | # get folder size 192 | def getFolderSize(folder='.'): 193 | total_size = os.path.getsize(folder) 194 | for item in os.listdir(folder): 195 | itempath = os.path.join(folder, item) 196 | if os.path.isfile(itempath): 197 | total_size += os.path.getsize(itempath) 198 | elif os.path.isdir(itempath): 199 | total_size += getFolderSize(itempath) 200 | return total_size 201 | 202 | def get_size(_path = '.'): 203 | total_size = getFolderSize(_path) 204 | if total_size >= 1.0E9: 205 | _out = str(round(total_size/1.0E9,2))+' GB' # return size in GB 206 | else: 207 | _out = str(round(total_size/1.0E6,2))+' MB' # return size in MB 208 | return _out 209 | 210 | 211 | 212 | ################################ MAIN ########################################## 213 | 214 | # create a structure to save all information from opencores.org 215 | opencores_mem = opencores() 216 | 217 | # Browser 218 | br = mechanize.Browser() 219 | 220 | # Cookie Jar 221 | cj = cookielib.LWPCookieJar() 222 | br.set_cookiejar(cj) 223 | 224 | # Browser options 225 | br.set_handle_equiv(True) 226 | #br.set_handle_gzip(True) 227 | br.set_handle_redirect(True) 228 | br.set_handle_referer(True) 229 | br.set_handle_robots(False) 230 | 231 | # Follows refresh 0 but not hangs on refresh > 0 232 | br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) 233 | 234 | # Want debugging messages? 235 | #br.set_debug_http(True) 236 | #br.set_debug_redirects(True) 237 | #br.set_debug_responses(True) 238 | 239 | # User-Agent (this is cheating, ok?) 240 | br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; '+\ 241 | 'rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] 242 | 243 | # Open opencores.org login page and select the first form in the page 244 | # maybe a better method to search for the form would be better 245 | r = br.open("http://www.opencores.org/login") 246 | br.select_form(nr=0) 247 | 248 | #Aauthenticate and submit 249 | br['user'] = oc_user 250 | br['pass'] = oc_pwd 251 | 252 | # TODO check that you have successfully authenticated 253 | res = br.submit() 254 | #print res.get_data() 255 | 256 | # Access a password protected site 257 | print ('Time:'), time.asctime() 258 | r = br.open("http://www.opencores.org/projects") 259 | print ('Opening website: http://www.opencores.org/projects\n') 260 | 261 | # Open page 262 | _html_content = r.read() 263 | _lxml_content = lxml.html.fromstring(_html_content) # turn HTML into lxml object 264 | 265 | # Extract all project categories with some cleaning 266 | for el in _lxml_content.cssselect("span.title"): 267 | x = el.text 268 | #x = x.decode('utf-8') 269 | #x = str(x) 270 | #x = x.lower() 271 | #x = re.sub(' +',' ',x) 272 | #x = re.sub(' - ','-',x) 273 | #x = re.sub(' / ','/',x) 274 | #x = x.lstrip().rstrip() 275 | x = re.sub(' ', '%20',x) 276 | # if x.startswith('a '): x = x[2:] 277 | # if len(x)>50: x=x[:50] 278 | opencores_mem.categories.append(x) 279 | 280 | # Extract all url for each project category 281 | # with: "GET http://opencores.org/projects,category,0" 282 | for x in range(len(opencores_mem.categories)): 283 | opencores_mem.categories_url.append('http://www.opencores.org/projects?expanded='+str(opencores_mem.categories[x])) 284 | print(opencores_mem.categories_url) 285 | # Extract all project names for each url that defines a category 286 | for i,x in enumerate(opencores_mem.categories_url): 287 | prjs_name, prjs_url = get_projects(x) 288 | opencores_mem.projects_url.append(prjs_url) 289 | opencores_mem.projects_name.append(prjs_name) 290 | opencores_mem.projects_num.append(len(prjs_url)) 291 | 292 | # count how many projects there are in this specific category 293 | print ('Grand total of',len(prjs_url),\ 294 | 'projects in the category:',\ 295 | opencores_mem.categories[i]) 296 | 297 | # count how many projects and categories there are 298 | opencores_mem.categories_num = len(opencores_mem.categories) 299 | print ('\n',\ 300 | 'Total number of available projects:', sum(opencores_mem.projects_num)) 301 | print ('Total number of available categories:', opencores_mem.categories_num,'\n') 302 | print ('Time:', time.asctime()) 303 | 304 | 305 | # create a structure used to store everything from opencores.org 306 | print ('Allocating memory to store opencores.org content.') 307 | for x in opencores_mem.projects_name: 308 | opencores_mem.projects_html_info.append(['None']*len(x)) 309 | opencores_mem.projects_download_url.append(['Unknown']*len(x)) 310 | opencores_mem.projects_can_be_downloaded.append([True]*len(x)) 311 | opencores_mem.projects_created.append(['Unknown']*len(x)) 312 | opencores_mem.projects_last_update.append(['Unknown']*len(x)) 313 | opencores_mem.projects_archive_last_update.append(['Unknown']*len(x)) 314 | opencores_mem.projects_lang.append(['Unknown']*len(x)) 315 | opencores_mem.projects_license.append(['Unknown']*len(x)) 316 | opencores_mem.projects_dev_status.append(['Unknown']*len(x)) 317 | 318 | # Extract html info page and its latest SVN downland link. Do this for each project 319 | # since there is an html page for each projct, this routine will need some time 320 | prj_without_svn_count = 0 321 | for i,x in enumerate(opencores_mem.projects_name): 322 | 323 | print ('Project category:',opencores_mem.categories[i].upper()) 324 | # go throuh all the projects in each category 325 | for ii,y in enumerate(x): 326 | 327 | # exit if exiding max project per category 328 | if ii>prj_per_cat_to_download: 329 | break 330 | 331 | _url=opencores_mem.projects_url[i][ii] 332 | # let's download the content of the page handling a possible error 333 | errors = 0 334 | while errors < 3: 335 | try: 336 | print ('[' + time.asctime() + ']','\nDownloading HTML from:', _url) 337 | whole_html = br.open(_url).read() 338 | break 339 | except: 340 | print ("WARNING. Getting some http error. Trying again...") 341 | whole_html = None 342 | errors = errors +1 343 | if whole_html is None: 344 | continue 345 | 346 | _html = filter_html(whole_html) 347 | opencores_mem.projects_html_info[i][ii] = _html 348 | 349 | #extract project download link for each project 350 | _lxml_content = lxml.html.fromstring(whole_html) #turn the HTML into lxml object 351 | links = _lxml_content.cssselect('body a') #TODO this is maybe not so unique... 352 | # TODO find a better way to create the array: opencores_mem.projects_download_url 353 | found_flag = False 354 | for x in links: 355 | if x.text == 'download': 356 | # if it's not an empty link 357 | if x.get('href').replace('download,','') != '': 358 | opencores_mem.projects_download_url[i][ii] = 'http://www.opencores.org' + x.get('href') 359 | print ('Latest download link found at:\nhttp://www.opencores.org' + x.get('href')+'\n') 360 | found_flag = True 361 | break 362 | if not found_flag: 363 | opencores_mem.projects_download_url[i][ii] = 'No_svn_archive_link_available' 364 | print ('WARNING. LATEST SVN DOWNLOAD LINK NOT FOUND\n') 365 | prj_without_svn_count += 1 366 | 367 | # extract some info from the page. Because of the complicated structure 368 | # of these html pages, this info extraction is not so easy. 369 | # 370 | # created data 371 | try: 372 | _txt = _lxml_content.xpath("//*[contains(text(),'Details')]/following-sibling::*")[0].cssselect('br')[0].tail 373 | _txt = _txt.split(':')[-1] 374 | if _txt == None: _txt = 'Unknow' 375 | opencores_mem.projects_created[i][ii] = _txt 376 | except: 377 | pass 378 | # 379 | # last update date 380 | try: 381 | _txt = _lxml_content.xpath("//*[contains(text(),'Details')]/following-sibling::*")[0].cssselect('br')[1].tail 382 | if _txt == None or _txt == '': _txt = 'Unknow' 383 | _txt = _txt.split(':')[-1] 384 | _txt = re.sub(' +',' ',_txt) 385 | _txt = _txt.lstrip().rstrip() 386 | opencores_mem.projects_last_update[i][ii] = _txt 387 | except: 388 | pass 389 | # 390 | # archive last update date 391 | try: 392 | _txt = _lxml_content.xpath("//*[contains(text(),'Details')]/following-sibling::*")[0].cssselect('br')[2].tail 393 | if not 'SVN Updated:' in _txt: _txt = 'Unknow' 394 | if _txt == None or _txt == '': _txt = 'Unknow' 395 | _txt = _txt.split(':')[-1] 396 | _txt = re.sub(' +',' ',_txt) 397 | _txt = _txt.lstrip().rstrip() 398 | opencores_mem.projects_archive_last_update[i][ii] = _txt 399 | except: 400 | pass 401 | # 402 | # language 403 | try: 404 | #if _lxml_content.xpath("//h2[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('a'): 405 | _txt = _lxml_content.xpath("//*[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('a')[1].text 406 | if _txt == None: _txt = 'Unknow' 407 | opencores_mem.projects_lang[i][ii] = _txt 408 | except: 409 | pass 410 | # 411 | # development status 412 | try: 413 | _txt = _lxml_content.xpath("//*[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('a')[2].text 414 | if _txt == None: _txt = 'Unknow' 415 | opencores_mem.projects_dev_status[i][ii] = _txt 416 | except: 417 | pass 418 | # 419 | # License 420 | try: 421 | _txt = _lxml_content.xpath("//*[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('br')[4].tail 422 | _txt = _txt.replace('\n','') 423 | _txt = _txt.replace(' ','') 424 | if _txt == None or len(_txt)<=8: _txt = ':Unknown' 425 | _txt = _txt.split(':')[-1] 426 | opencores_mem.projects_license[i][ii] = _txt 427 | except: 428 | pass 429 | 430 | # REFERENCE. this is an other method to select elements inside an xml document 431 | # 432 | # created_date = _lxml_content.cssselect('div.content p')[0].cssselect('br')[0].tail 433 | # svn_link = _lxml_content.cssselect('div.content p')[0].cssselect('a')[2].get('href') 434 | # category = _lxml_content.cssselect('div.content p')[1].cssselect('a')[0].text 435 | 436 | ###################### this will download only some info files per category 437 | 438 | # based on the html information extracted for each project mark with False each 439 | # projects without an SVN link 440 | # TEST THIS 441 | for i,x in enumerate(opencores_mem.projects_name): 442 | for ii,y in enumerate(x): 443 | if 'No_svn_archive_link_available' in opencores_mem.projects_download_url[i][ii]: 444 | opencores_mem.projects_can_be_downloaded[i][ii] = False 445 | 446 | # rename any project name that appears double 447 | for i,x in enumerate(opencores_mem.projects_name): 448 | opencores_mem.projects_name[i] = rename_multiple(opencores_mem.projects_name[i]) 449 | 450 | # store locally all info about the latest content of opencores website 451 | # this file is not really used. pickle is a good way to store python stuff 452 | if os.path.isdir('./cores'): 453 | fl=open('cores/opencores_web_latest.pkl','wb') 454 | pickle.dump(opencores_mem, fl) 455 | fl.close() 456 | 457 | # create local folder structure 458 | if not os.path.exists('./cores'): 459 | os.makedirs('./cores') 460 | print ('Creating folder structure.') 461 | else: 462 | print ('WARNING. Local directory "./cores" already exists. Its content will be updated') 463 | 464 | for i,x in enumerate(opencores_mem.categories): 465 | x = re.sub(' ','_',x) 466 | x = re.sub('/','-',x) 467 | x = re.sub('%20','_',x) 468 | try: 469 | os.makedirs('./cores/'+x) 470 | print ('Creating folder:','./cores/'+x) 471 | except: 472 | pass 473 | for y in opencores_mem.projects_name[i]: 474 | y = re.sub(' ','_',y) 475 | y = re.sub('/','-',y) 476 | try: 477 | os.makedirs('./cores/'+x+'/'+y) 478 | print ('Creating folder:','./cores/'+x+'/'+y) 479 | except: 480 | pass 481 | 482 | # copying project html information in each project folder EVEN IF ALREADY EXISTS 483 | for i,x in enumerate(opencores_mem.categories): 484 | x = re.sub(' ','_',x) 485 | x = re.sub('/','-',x) 486 | for ii,y in enumerate(opencores_mem.projects_name[i]): 487 | y = re.sub(' ','_',y) 488 | y = re.sub('/','-',y) 489 | try: 490 | fl_nm = './cores/'+x+'/'+y+'/index.html' 491 | print ('Writing file:', fl_nm) 492 | fl=open(fl_nm,'w') 493 | 494 | # add style.css link 495 | _header = '\n'+'\n'+'\n' 496 | fl.write(_header) 497 | 498 | # clean up all links TODO this will actually delete all links... a more selective method could be better 499 | from lxml import etree 500 | tree = etree.fromstring(opencores_mem.projects_html_info[i][ii]) 501 | etree.strip_tags(tree,'a') 502 | _out = etree.tostring(tree,pretty_print=True) 503 | 504 | # delete the three links 505 | _out = re.sub('
\n *SVN:\n *\n *Browse','',_out) 506 | _out = re.sub('
\n *Latest version:\n *\n *download','',_out) 507 | _out = re.sub('
\n *Statistics:\n *\n *View','',_out) 508 | 509 | # add source code link at the top 510 | _link = opencores_mem.projects_download_url[i][ii].encode('utf-8') 511 | #source_ln = re.sub('http://www.opencores.org/download,', '', _link) 512 | #source_ln = source_ln +'.tar.gz' 513 | source_ln = _github_addr +"tree/"+x+"_"+y # encode project branch name 514 | 515 | fl.write('Go Back\n') 516 | fl.write("

Source code

\n") 517 | 518 | fl.write(_out) 519 | fl.write("\n") 520 | fl.close() 521 | except: 522 | pass 523 | 524 | # count how many projects actually have a downloadable source code file. 525 | av_size = 0 526 | for x in opencores_mem.projects_download_url: 527 | for y in x: 528 | if 'http://www.opencores.org/download' in y: 529 | av_size =av_size +1 530 | print ('\n','Total number of downloadable SVN project archives:', av_size) 531 | print ('NOTE. Of the', sum(opencores_mem.projects_num), \ 532 | 'project folders available on opencores.com only\n', \ 533 | av_size,'SVN project archives are available for download.') 534 | 535 | print ('Time:', time.asctime()) 536 | 537 | # load info about what was downloaded last time from local file and flag 538 | # what needs to be update/downloaded 539 | 540 | # let's begin from a download all configuration. Remember that 541 | # all flags are in fact set to "True" during the creation 542 | # of the list "opencores_mem.projects_can_be_downloaded" 543 | #DOWNLOAD_TYPE = 'total' 544 | 545 | # let's see now if we can avoid some downloads 546 | if os.path.isfile('./cores/opencores_local.pkl'): 547 | fl=open('./cores/opencores_local.pkl','r') 548 | opencores_mem_local = pickle.load(fl) 549 | fl.close() 550 | for i,x in enumerate(opencores_mem.projects_name): 551 | for ii,y in enumerate(x): 552 | # search for y project name in local project list of same category 553 | if y in opencores_mem_local.projects_name[i]: 554 | ind = opencores_mem_local.projects_name[i].index(y) # position of the project that might not need to be upgraded 555 | # compare the last update date and the last archive update date 556 | if opencores_mem.projects_last_update[i][ii] == opencores_mem_local.projects_last_update[i][ind]: 557 | if opencores_mem.projects_archive_last_update[i][ind] == opencores_mem_local.projects_archive_last_update[i][ind]: 558 | # bingo ! this project y does not need to be upgraded 559 | #DOWNLOAD_TYPE = 'partial' 560 | print ("WARNING. the project", y, "doesn't need to be downloaded.") 561 | opencores_mem.projects_can_be_downloaded[i][ii]=False 562 | del opencores_mem_local 563 | 564 | # let's download all project archives flagged as "True" in "opencores_mem.projects_can_be_downloaded" 565 | if download_prj_svn: 566 | _iii = 1 567 | print ('Ready to download', av_size,'.zip project archives.') 568 | dw_cnt = 0 569 | for i,x in enumerate(opencores_mem.projects_download_url): 570 | for ii,y in enumerate(x): 571 | 572 | # exit if exiding max project per category 573 | if ii>prj_per_cat_to_download: 574 | break 575 | 576 | #y = y.encode('utf-8') 577 | if ('http://www.opencores.org/download' in y) and opencores_mem.projects_can_be_downloaded[i][ii]==True: 578 | 579 | # download svn file. Here we do some error handling as done 580 | # when we downloaded the project html content 581 | errors = 0 582 | while errors < 3: 583 | try: 584 | r = br.open(y) 585 | tar_gz_content = r.read() 586 | print ("Downloaded repository", y) 587 | break 588 | except: 589 | print ("WARNING. Getting some http error. Trying again...") 590 | 591 | tar_gz_content = None 592 | errors = errors + 1 593 | if tar_gz_content is None: 594 | continue 595 | 596 | fl_nm = re.sub('http://www.opencores.org/download/','',y) 597 | a = re.sub(' ','_',opencores_mem.categories[i]) 598 | b = re.sub(' ','_',opencores_mem.projects_name[i][ii]) 599 | a = re.sub('/','-',a) 600 | a = re.sub('%20','_',a) 601 | b = re.sub('/','-',b) 602 | # let's make the file name unique so that later we can use it 603 | # as repository name (not used now) 604 | #fl_nm = str(_iii) + fl_nm 605 | #_iii +=1 606 | fl_nm = './cores/'+a+'/'+b+'/'+fl_nm+'.tar.gz' 607 | print ('Saving file:', fl_nm) 608 | fl=open(fl_nm, 'wb') 609 | fl.write(tar_gz_content) 610 | fl.close() 611 | dw_cnt = dw_cnt + 1 612 | print (dw_cnt, 'of',av_size,'.zip files downloaded.') 613 | print ('Total number of opencores.org projects:', sum(opencores_mem.projects_num)) 614 | print ('Total number of downloaded .zip projects:', dw_cnt) 615 | print ('Total number of project without .zip archive:', prj_without_svn_count) 616 | 617 | # now all projects must have been downloaded. We can now update the local 618 | # log file 619 | print ('Saving local log file: "./cores/opencores_local.pkl".') 620 | fl=open('./cores/opencores_local.pkl','wb') 621 | pickle.dump(opencores_mem, fl) 622 | fl.close() 623 | 624 | # create a global index.html with a list of all projects in a table format 625 | if not os.path.exists('./cores'): 626 | os.makedirs('./cores') 627 | fl=open('./cores/index.html','w') 628 | fl.writelines(''' 629 | 630 | 631 | 632 | 633 | Open-Source IP Core Server 634 | 635 | 636 | 637 | 638 | 647 | 648 | 649 | ''') 650 | fl.write('

About this PageLicense and disclaimer

') 651 | fl.write('

Database size: '+get_size('./cores')+'

\n') 652 | fl.write('

Available projects: '+str(sum(opencores_mem.projects_num))+'

\n') 653 | fl.write('

Project categories: '+str(len(opencores_mem.categories))+'

\n') 654 | fl.write(''' 655 |
656 |
Search: 657 | 658 |
659 |
660 | ''') 661 | 662 | fl.write(''' 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | ''') 676 | 677 | for i,x in enumerate(opencores_mem.projects_download_url): 678 | _c = opencores_mem.categories[i] 679 | fl.write("\n") 682 | for ii,y in enumerate(opencores_mem.projects_download_url[i]): 683 | #y = y.encode('utf-8') 684 | _n = opencores_mem.projects_name[i][ii] 685 | 686 | # skip this project if empty 687 | # It does NOT seem to work! 688 | if opencores_mem.projects_can_be_downloaded[i][ii]==False: 689 | #print "Project", _n, "seems empty. Skipping it." 690 | #break 691 | pass 692 | 693 | a = re.sub(' ','_',_c) 694 | b = re.sub(' ','_',_n) 695 | a = re.sub('/','-',a) 696 | b = re.sub('/','-',b) 697 | link = a+'/'+b+'/'+'index.html' 698 | source_ln = re.sub('http://www.opencores.org/download,', '', y) 699 | source_ln = a+'/'+b+'/'+ source_ln +'.tar.gz' 700 | 701 | # let´s link this iwith project source on the github webpage 702 | source_ln = _github_addr +"tree/"+a+"_"+b 703 | 704 | # shorten the language label if too long 705 | if len(opencores_mem.projects_lang[i][ii])>7: 706 | opencores_mem.projects_lang[i][ii]=opencores_mem.projects_lang[i][ii][:7] 707 | 708 | # lets put in the table a hidden field for each project with the info 709 | # from the project html page 710 | soup = BeautifulSoup(opencores_mem.projects_html_info[i][ii]) 711 | html_info = soup.text.encode('ascii','ignore') # you need to convert from unicode to text 712 | html_info = html_info[250:850] # trip it and just get the last 600 characters 713 | 714 | fl.write("\n") 729 | fl.write("
Project NameRepositoryLast UpdateLanguageDev. StatusLicense
") 680 | fl.write(' '+str(_c.upper())+''+'\n') 681 | fl.write("
") 715 | # here the use of a hidden field allows to bind this project with its 716 | # group. Very good for the search function. 717 | fl.write(""+_n+"") # project name 718 | fl.write("") 719 | fl.write("code") # source code link 720 | fl.write("") 721 | fl.write(opencores_mem.projects_last_update[i][ii]) # last update 722 | fl.write("") 723 | fl.write(opencores_mem.projects_lang[i][ii]) # language 724 | fl.write("") 725 | fl.write(opencores_mem.projects_dev_status[i][ii]) # dev. status 726 | fl.write("") 727 | fl.write(opencores_mem.projects_license[i][ii]) # license type 728 | fl.write("
\n") 730 | fl.write("\n") 731 | fl.write(' \n\n') 732 | fl.close() 733 | 734 | # created css file 735 | fl=open('./cores/style.css','w') 736 | fl.write(''' 737 | 738 | p { line-height: 1.2em; 739 | margin-bottom: 2px; 740 | margin-top: 2px;} 741 | 742 | 743 | body {min-width:820px; 744 | color: #333333; 745 | font-family: Arial,Helvetica,sans-serif; 746 | font-size : 11pt; 747 | margin-left: 10px; 748 | margin-right: 10px; 749 | margin-bottom: 10px; 750 | margin-top: 10px;} 751 | 752 | a {text-decoration: none; color: #1F7171;} 753 | a:hover {text-decoration: underline;} 754 | 755 | #h1,h2,h3 {margin:10px 0px 5px 0px;} 756 | 757 | form { margin: 50px 10px;} 758 | table { width: 100%; border-collapse: collapse; margin: 1em 0; } 759 | 760 | #id_search { 761 | -webkit-box-sizing: border-box; 762 | -moz-box-sizing: border-box; 763 | box-sizing: border-box; 764 | display: block; 765 | padding: 11px 7px; 766 | padding-right: 43px; 767 | background-color: #fff; 768 | font-size: 1.6em; 769 | color: #ccc; 770 | border: 1px solid #c8c8c8; 771 | border-bottom-color: #d2e2e7; 772 | -webkit-border-radius: 1px; 773 | -moz-border-radius: 1px; 774 | border-radius: 1px; 775 | -webkit-box-shadow: inset 0 1px 2px rgba(0,0,0,0.1), 0 0 0 6px #f0f0f0; 776 | -moz-box-shadow: inset 0 1px 2px rgba(0,0,0,0.1), 0 0 0 6px #f0f0f0; 777 | box-shadow: inset 0 1px 2px rgba(0,0,0,0.1), 0 0 0 6px #f0f0f0; 778 | -webkit-transition: all 0.4s linear; 779 | -moz-transition: all 0.4s linear; 780 | transition: all 0.4s linear; 781 | width: 100%; } 782 | 783 | .odd, .r1 { background: #fff; } 784 | .even, .r2 { background: #eee; } 785 | .r3 { background: #ebebeb; } 786 | .search { font-weight: bold; } 787 | .new { color: #f34105; text-transform: uppercase; font-size: 85%; margin-left: 3px; } 788 | 789 | 790 | thead th { background: #077474; color: #fff; } 791 | 792 | tbody th { text-align: left; } 793 | table th, table td { border: 1px solid #ddd; padding: 2px 5px; font-size: 95%; font-weight: normal; } 794 | pre { font-size: 130%; background: #f7f7f7; padding: 10px 10px; font-weight: bold; } 795 | 796 | 797 | fieldset { border: 0px solid #ccc; padding: 5px;} 798 | #form input { font-size: 16px; border: 1px solid #ccc;} 799 | 800 | #foot{margin-top: 10px; 801 | text-align: center; 802 | color:#A8A8A8; 803 | font-size : 90%;} 804 | ''') 805 | fl.close() 806 | print ('Local style.css file created.') 807 | 808 | 809 | # created license.html file 810 | fl=open('./cores/license.html','w') 811 | fl.write(''' 812 | 813 | 814 | 815 | 816 | IP Cores - license 817 | 818 | 819 |

Disclaimer

820 |

We make no warranties regarding the correctness of the data and disclaim 821 | liability for damages resulting from its use.

822 |

This database is distributed in the hope that it will be useful, 823 | but WITHOUT ANY WARRANTY; without even the implied warranty of 824 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

825 |

We cannot provide unrestricted permission regarding the use of the data, 826 | as some data may be covered by a specific license or other rights. Please 827 | refer to the license notice that comes with each core project description.

828 | 829 | 830 | 831 | ''') 832 | fl.close() 833 | print ('Local license.html file created.') 834 | 835 | # created example.json file 836 | fl=open('./cores/example.json','w') 837 | fl.write(''' 838 | { 839 | "list_items": ["Loaded with Ajax", "Loaded with Ajax too"] 840 | } 841 | ''') 842 | fl.close() 843 | print ('Local example.json file created.') 844 | 845 | # created jquery.quicksearch.js file 846 | fl=open('./cores/jquery.quicksearch.js','w') 847 | fl.write(''' 848 | (function($, window, document, undefined) { 849 | $.fn.quicksearch = function (target, opt) { 850 | 851 | var timeout, cache, rowcache, jq_results, val = '', e = this, options = $.extend({ 852 | delay: 100, 853 | selector: null, 854 | stripeRows: null, 855 | loader: null, 856 | noResults: '', 857 | bind: 'keyup', 858 | onBefore: function () { 859 | return; 860 | }, 861 | onAfter: function () { 862 | return; 863 | }, 864 | show: function () { 865 | this.style.display = ""; 866 | }, 867 | hide: function () { 868 | this.style.display = "none"; 869 | }, 870 | prepareQuery: function (val) { 871 | return val.toLowerCase().split(' '); 872 | }, 873 | testQuery: function (query, txt, _row) { 874 | for (var i = 0; i < query.length; i += 1) { 875 | if (txt.indexOf(query[i]) === -1) { 876 | return false; 877 | } 878 | } 879 | return true; 880 | } 881 | }, opt); 882 | 883 | this.go = function () { 884 | 885 | var i = 0, 886 | noresults = true, 887 | query = options.prepareQuery(val), 888 | val_empty = (val.replace(' ', '').length === 0); 889 | 890 | for (var i = 0, len = rowcache.length; i < len; i++) { 891 | if (val_empty || options.testQuery(query, cache[i], rowcache[i])) { 892 | options.show.apply(rowcache[i]); 893 | noresults = false; 894 | } else { 895 | options.hide.apply(rowcache[i]); 896 | } 897 | } 898 | 899 | if (noresults) { 900 | this.results(false); 901 | } else { 902 | this.results(true); 903 | this.stripe(); 904 | } 905 | 906 | this.loader(false); 907 | options.onAfter(); 908 | 909 | return this; 910 | }; 911 | 912 | this.stripe = function () { 913 | 914 | if (typeof options.stripeRows === "object" && options.stripeRows !== null) 915 | { 916 | var joined = options.stripeRows.join(' '); 917 | var stripeRows_length = options.stripeRows.length; 918 | 919 | jq_results.not(':hidden').each(function (i) { 920 | $(this).removeClass(joined).addClass(options.stripeRows[i % stripeRows_length]); 921 | }); 922 | } 923 | 924 | return this; 925 | }; 926 | 927 | this.strip_html = function (input) { 928 | var output = input.replace(new RegExp('<[^<]+\>', 'g'), ""); 929 | output = $.trim(output.toLowerCase()); 930 | return output; 931 | }; 932 | 933 | this.results = function (bool) { 934 | if (typeof options.noResults === "string" && options.noResults !== "") { 935 | if (bool) { 936 | $(options.noResults).hide(); 937 | } else { 938 | $(options.noResults).show(); 939 | } 940 | } 941 | return this; 942 | }; 943 | 944 | this.loader = function (bool) { 945 | if (typeof options.loader === "string" && options.loader !== "") { 946 | (bool) ? $(options.loader).show() : $(options.loader).hide(); 947 | } 948 | return this; 949 | }; 950 | 951 | this.cache = function () { 952 | 953 | jq_results = $(target); 954 | 955 | if (typeof options.noResults === "string" && options.noResults !== "") { 956 | jq_results = jq_results.not(options.noResults); 957 | } 958 | 959 | var t = (typeof options.selector === "string") ? jq_results.find(options.selector) : $(target).not(options.noResults); 960 | cache = t.map(function () { 961 | return e.strip_html(this.innerHTML); 962 | }); 963 | 964 | rowcache = jq_results.map(function () { 965 | return this; 966 | }); 967 | 968 | return this.go(); 969 | }; 970 | 971 | this.trigger = function () { 972 | this.loader(true); 973 | options.onBefore(); 974 | 975 | window.clearTimeout(timeout); 976 | timeout = window.setTimeout(function () { 977 | e.go(); 978 | }, options.delay); 979 | 980 | return this; 981 | }; 982 | 983 | this.cache(); 984 | this.results(true); 985 | this.stripe(); 986 | this.loader(false); 987 | 988 | return this.each(function () { 989 | $(this).bind(options.bind, function () { 990 | val = $(this).val(); 991 | e.trigger(); 992 | }); 993 | }); 994 | }; 995 | }(jQuery, this, document)); 996 | ''') 997 | fl.close() 998 | print ('Local jquery.quicksearch.js file created.') 999 | --------------------------------------------------------------------------------