├── search_table.png
├── .gitignore
├── find_broken_link.py
├── README.md
├── local2github.py
├── LICENSE
└── opencores_scraper.py
/search_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabriziotappero/opencores-scraper/HEAD/search_table.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build and Release Folders
2 | bin/
3 | bin-debug/
4 | bin-release/
5 |
6 | # Other files and folders
7 | .settings/
8 |
9 | # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties`
10 | # should NOT be excluded as they contain compiler settings and other important
11 | # information for Eclipse / Flash Builder.
12 |
--------------------------------------------------------------------------------
/find_broken_link.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 | '''
4 | Python script to process the index.html file and validate the projects that
5 | have a github link.
6 | '''
7 | _i = "index.html"
8 | _o = "index_out.html"
9 |
10 | import sys, re, requests
11 |
12 | _out = open(_o,"w")
13 | _fnd=0
14 | _tot=0
15 | _whole=""
16 |
17 | for line in open(_i, "r"):
18 | _whole += line.rstrip('\n')
19 |
20 | _whole=_whole.replace('','\n')
21 | _whl = _whole.split('\n')
22 |
23 | for ln in _whl:
24 | #print 'bingo'
25 | if len(ln)>0:
26 | # find the link inside this two marks (reg expression)
27 | m = re.search("
code ", ln)
28 | if m:
29 | _link = m.group(1)
30 | if len(_link)>0:
31 | _tot += 1
32 |
33 | # is it an existing page?
34 | print ("Checking link:", _link)
35 | anw = requests.get('https://github.com'+_link)
36 | if anw.ok:
37 | _out.write(ln+"\n")
38 | print ("All good.")
39 | _fnd += 1
40 | else:
41 | print ("This link does not seem to exist.")
42 | else:
43 | _out.write(ln+"\n")
44 | else:
45 | _out.write(ln+"\n")
46 |
47 |
48 | print ("Of a total of",_tot, "projects I have found and validated", _fnd)
49 | _out.close()
50 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | That's right, a simple searchable list of all VHDL projects:
8 |
9 | https://fabriziotappero.github.io/opencores-scraper/cores.html
10 |
11 |
12 | ## VHDL/Verilog IP CORES Scripts
13 |
14 | This repository contains few Python scripts capable to connect to the website
15 | opencores.org and download from it approximately 4.5GB of VHDL/Verilor IP cores.
16 | Downloaded data is saved locally and, in a second step, uploaded to github. An
17 | index.html file containing an index of the available IP cores is generated.
18 |
19 | All downloaded IP cores, once saved locally, are then uploaded on this
20 | guthub repository:
21 |
22 | https://github.com/fabriziotappero/ip-cores
23 |
24 | The "ip-cores" repository accounts for approximately 800 projects spread in 16
25 | categories. There is a branch for each IP project. Since the whole repository
26 | is very large, you are advised to check out only the branch that you
27 | might interested in.
28 |
29 | Available Python scripts:
30 |
31 | **opencores_scraper.py** downloads all opencores.org projects locally
32 | (in a local folder in your PC) and generate a n index file index.html.
33 |
34 | **local2github.py** analyze a local folder and upload its content to the github
35 | repository https://github.com/fabriziotappero/ip-cores
36 |
37 |
38 |
--------------------------------------------------------------------------------
/local2github.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 | #
4 | '''
5 | This is a one-file python script that analyze the content of a local folder
6 | name ./cores and upload its content to github.
7 |
8 | This script is to be used after opencores_scraper.py for the purpose of getting
9 | all opencores.org code upload to a github account.
10 |
11 | The Python libraries needed for this script can be installed with the command:
12 |
13 | sudo pip install tarfile
14 |
15 | HOW TO USE THIS SCRIPT
16 |
17 | 1) install python and its dependencies
18 | 2) configure the git address _github_addr
19 | 3) run this script with the command: ./local2git.py
20 | '''
21 | _max_num_prjs = 1E99 # set to 1E99 if you are not debugging
22 | _github_addr = 'https://github.com/fabriziotappero/ip-cores.git'
23 | _cores_dir = "cores"
24 |
25 | import sys, os, shutil, glob
26 | import tarfile
27 | from distutils.dir_util import copy_tree
28 |
29 | prj_categ = next(os.walk(_cores_dir))[1]
30 | prjs = []
31 | empty_prjs = 0
32 | for x in prj_categ:
33 | _path = os.path.join(_cores_dir,x)
34 | # get only projects with a tar.gz file in it(not empty)
35 | for y in next(os.walk(_path))[1]:
36 | z = os.listdir(_path + "/" + y)
37 | for elem in z:
38 | if elem.endswith(".tar.gz"):
39 | prjs.append([[x],[y]])
40 | break
41 |
42 | # note that prjs stores both categories and projects
43 | print ("Number of local non empty projects: ", len(prjs))
44 |
45 | # detect possible duplicates in branch names
46 | branches = []
47 | for _ind,x in enumerate(prjs):
48 | prj_cat = x[0][0]
49 | prj_name = x[1][0]
50 | prj_branch = prj_cat+"_"+prj_name
51 | branches.append(prj_branch)
52 | dups = [x for x in branches if branches.count(x) > 1]
53 | if len(dups)>0:
54 | print ("ERROR. Projects with same branch name:", dups)
55 | sys.exit(0)
56 |
57 |
58 | _txt = '''
59 | ## VHDL/Verilog IP Cores Repository
60 |
61 | This branch contains the following VHDL/Verilog IP Code:
62 |
63 | Project name: %s
64 | Project category: %s
65 | Project branch: %s
66 |
67 | This whole github repository is huge and, since IP cores are stored in separate
68 | branches, it is a good idea to just download the branch that you are interested
69 | in. This branch can be downloaded with the git command.
70 |
71 | git clone -b %s --single-branch https://github.com/fabriziotappero/ip-cores.git
72 |
73 | A cool searchable index of the whole repo is available from www.freerangefactory.org.
74 | '''
75 |
76 | _license='''
77 | ### License
78 |
79 | The code of each IP core was taken "as is" from the website opencores.org.
80 | The copyright owner of each IP core is the author of the code itself. For
81 | more information refer to the website opencores.org
82 |
83 | Each branch of this repository is a SEPARATE and DISTINCT project.
84 | Although each project is licensed under one of the various open-source
85 | licenses, it is necessary to examine the project files to determine the
86 | specific terms of that project's license.
87 |
88 | ### DISCLAIMER
89 |
90 | I am not a lawyer and I do not represent this as something meeting any
91 | specific legal requirements.
92 |
93 | IF YOU BELIEVE THAT ANYTHING STORED IN THIS REPOSITORY IS INCORRECT OR
94 | IS THE CAUSE OF ANY PROBLEM, DO NO HESITATE TO CONTACT ME AND I WILL
95 | DO ALL I CAN TO FIX IT.
96 |
97 | This code is distributed in the hope that it will be useful, but WITHOUT
98 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
99 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
100 | for more details.
101 | '''
102 |
103 | for _ind,x in enumerate(prjs):
104 | prj_cat = x[0][0]
105 | prj_name = x[1][0]
106 | prj_branch = prj_cat+"_"+prj_name
107 | _dir = os.path.join(_cores_dir, prj_cat, prj_name)
108 |
109 | if _ind>=_max_num_prjs:
110 | print (_max_num_prjs, "projects have been unzipped. Leaving...")
111 | break
112 |
113 | for _fl in os.listdir(_dir):
114 | if _fl.endswith('.tar.gz'):
115 | prj_real_name = _fl[: -7]
116 | # if project code is >180MB let's skip it
117 | if (os.path.getsize(os.path.join(_dir, _fl))/1.0E6) > 180:# MB
118 | print ("Project:",_fl, ">120MB. Skipping it")
119 | break
120 | try:
121 | tfile = tarfile.open(os.path.join(_dir, _fl), 'r:gz')
122 | tfile.extractall(os.path.join(_dir, 'tmp'))
123 | tfile.close()
124 | except:
125 | print ("ERROR. Problems unzipping repo:",os.path.join(_dir, _fl))
126 | if os.path.exists(os.path.join(_dir, 'src')):
127 | shutil.rmtree(os.path.join(_dir, 'src'))
128 |
129 | # copy all svn trunk in fresh src folder. If trunk does not exist
130 | # copy the whole thing.
131 | if os.path.isdir(os.path.join(_dir, 'tmp', _fl[: -7], 'trunk')):
132 | #copy_tree(os.path.join(_dir, 'tmp', _fl[: -7], 'trunk'), os.path.join(_dir, 'src'))
133 | os.system('cp -Rf '+_dir+'/tmp/'+_fl[: -7]+'/trunk '+_dir+'/src')
134 | if os.path.isdir(os.path.join(_dir, 'tmp', _fl[: -7], 'web_uploads')):
135 | #copy_tree(os.path.join(_dir, 'tmp', _fl[: -7], 'web_uploads'), os.path.join(_dir, 'src'))
136 | os.system('cp -Rf '+_dir+'/tmp/'+_fl[: -7]+'/web_uploads '+_dir+'/src')
137 |
138 | # add README.md file and index file
139 | if os.path.isdir(os.path.join(_dir,'src')):
140 | with open(os.path.join(_dir,'src','README.md'), 'w') as _file:
141 | _file.write(_txt % (prj_name, prj_cat, prj_branch, prj_branch)+_license)
142 | if os.path.isfile(os.path.join(_dir, 'index.html')):
143 | if os.path.isdir(os.path.join(_dir,'src')):
144 | shutil.copyfile(os.path.join(_dir, 'index.html'), os.path.join(_dir, 'src','index.html'))
145 |
146 | # just in case you unzipped a zip file(one zip inside another)
147 | for _x in glob.glob(os.path.join(_dir, 'src', '*')):
148 | if _x.endswith('.tar.gz') or _x.endswith('.tgz'):
149 | tfile = tarfile.open(_x, 'r:gz')
150 | tfile.extractall(os.path.join(_dir, 'src'))
151 | tfile.close()
152 | os.remove(_x)
153 |
154 | # delete not needed files
155 | if os.path.isfile(os.path.join(_dir, _fl)):
156 | if False:
157 | # remove tar.gz file. Keep it if you like.
158 | os.remove(os.path.join(_dir, _fl))
159 | if os.path.isdir(os.path.join(_dir, 'tmp')):
160 | # remove original unzipped folder
161 | shutil.rmtree(os.path.join(_dir, 'tmp'))
162 |
163 | # proceed with git, created a local git folder
164 | # delete previous one
165 | _git_dir = os.path.join(_cores_dir, 'git_dir')
166 | if os.path.isdir(_git_dir):
167 | shutil.rmtree(_git_dir)
168 | os.mkdir(_git_dir)
169 |
170 | # download (locally) only master branch from the defaul github repository that
171 | # you specified at the beginning of this file
172 | os.system('git clone --depth=1 ' + _github_addr + ' '+_git_dir)
173 |
174 | # create a new branch per project. Copy the project content in it.
175 | for _ind,x in enumerate(prjs):
176 | prj_cat = x[0][0]
177 | prj_name = x[1][0]
178 | prj_branch = prj_cat+"_"+prj_name
179 | prj_dir = os.path.join(_cores_dir, prj_cat, prj_name)
180 |
181 | if _ind>=_max_num_prjs:
182 | print (_max_num_prjs, "projects have been unzipped. Leaving...")
183 | break
184 |
185 | if os.path.exists(os.path.join(prj_dir, 'src')) and len(os.listdir(os.path.join(prj_dir,'src')))>0:
186 | # this project is not empty
187 | os.chdir(_git_dir)
188 | # create new branch
189 | os.system('git checkout --orphan ' + prj_branch + ' >/dev/null')
190 | os.system('git rm --cached -r . >/dev/null') # empty new branch
191 | os.system('rm -Rf ./*')
192 |
193 | # add all project files into branch
194 | os.system('cp -Rf ../../'+prj_dir+'/src/* .')
195 |
196 | os.system('git add .') # add project into branch
197 | os.system("git commit -m 'added content for project'") # add project into branch
198 | os.system("git checkout master")
199 | os.chdir(os.path.join('..','..'))
200 |
201 | # build master branch
202 | os.chdir(_git_dir)
203 | os.system("git checkout master")
204 | os.system('rm -Rf ./*')
205 | with open("README.md", 'w') as _file:
206 | _file.writelines("## VHDL/Verilog IP Cores Repository\n\n")
207 | _file.writelines("This repository contains approximately 900 free and open-source VHDL/Verilog IP cores.\n")
208 | _file.write("Cores can be fetched idependently by downloading ony the branch\n")
209 | _file.write("you are interested in.\n\n")
210 | _file.write("A cool searchable index of the whole repo is available from www.freerangefactory.org.\n\n")
211 | _file.write("These are the available branches:\n\n")
212 | for _ind,x in enumerate(prjs):
213 | prj_cat = x[0][0]
214 | prj_name = x[1][0]
215 | prj_branch = prj_cat+"_"+prj_name
216 | _file.write(" "+prj_branch+"\n")
217 | _file.write(_license)
218 |
219 | os.system('git add .')
220 | os.system("git commit -m 'added content for project'")
221 |
222 | # if False:
223 | # # upload one by one all branches to github
224 | # for _ind,x in enumerate(prjs):
225 | # prj_cat = x[0][0]
226 | # prj_name = x[1][0]
227 | # prj_branch = prj_cat+"_"+prj_name
228 | # prj_dir = os.path.join(_cores_dir, prj_cat, prj_name)
229 | #
230 | # if len(os.listdir(os.path.join(prj_dir,'src')))>0:
231 | # os.chdir(_git_dir)
232 | # os.system('git checkout ' + prj_branch)
233 | # os.system('git push origin '+ prj_branch)
234 | # # manually enter login and password
235 | # os.chdir(os.path.join('..','..'))
236 |
237 | if False:
238 | # push all branches at once
239 | os.system('git push --force --all origin')
240 | # manually enter login and password
241 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/opencores_scraper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding:utf-8 -*-
3 | #
4 | '''
5 | This is a one-file python script that download locally the content of the WHOLE
6 | project section of the website opencores.org.
7 | The downloaded content is stored in a local folder.
8 | To use this script, an opencores.org account is needed. Also note that the whole
9 | opencores.org database is essily >4GB of data.
10 |
11 | The Python libraries needed for this script can be installed with the command:
12 |
13 | sudo pip install beautifulsoup mechanize cssselect lxml
14 | '''
15 | #
16 | # HOW TO USE THIS SCRIPT
17 | #
18 | # 0) install python and its dependencies: easy_install beautifulsoup meachanize
19 | # 1) make an account in opencores.org
20 | # 2) complete the "basic setup" section below with the login data
21 | # 3) run this script with the command: ./opencores_scraper.py >> oc.log
22 | #
23 | #
24 | #_______________________________ basic setup ___________________________________
25 | #
26 | prj_per_cat_to_download = 1E99 # set to 1E99 to get all projects
27 | download_prj_svn = True # set to True to get opencores project svn (.zip)
28 | # your github repository
29 | oc_user='xxxxxxxx' # opencores.org login
30 | oc_pwd='xxxxxxxxxxxx' # opencores.org password
31 |
32 | #_______________________________ github link ________________________________
33 | #
34 | # this link is just used for linking index.html with code source
35 | #
36 | _github_addr = 'https://github.com/fabriziotappero/ip-cores/'
37 | #
38 | #_______________________________________________________________________________
39 |
40 |
41 | # import web scrape tools and other libs
42 | import re, sys, os, time
43 | import lxml.html, pickle, tarfile
44 | #import ftputil
45 | from bs4 import BeautifulSoup, Comment
46 | import mechanize
47 | import http.cookiejar as cookielib
48 |
49 | # function to get all opencores projects from a specific opencores URL
50 | def get_projects(_url):
51 | r = br.open(_url)
52 | _html_content = r.read()
53 | # convert the HTML into lxml object
54 | _lxml_content = lxml.html.fromstring(_html_content)
55 |
56 | # Extract all projects
57 | projects_name = []
58 | projects_url = []
59 |
60 | # Find all 'a' elements inside 'tbody tr.row1 td'
61 | for a in _lxml_content.cssselect('table tbody tr td a'):
62 | projects_name.append(a.text)
63 |
64 | # Find all 'a' elements inside 'tbody tr.row1 td' and
65 | # get the 'href' link
66 | links = _lxml_content.cssselect('tbody tr td a')
67 | for a in links:
68 | projects_url.append(a.get('href'))
69 |
70 | # make sure that number of projects is equal to the number of prj links
71 | if len(projects_name) != len(projects_url):
72 | print ('ERROR. some projects do not have a URL.')
73 | sys.exit(1)
74 |
75 | # clean up text with regular expressions because
76 | # project names contains unwanted spaces and carriage returns
77 | # replace/delete unwanted text
78 | for i,x in enumerate(projects_name):
79 | #x = x.encode('utf-8')
80 | x = x.lower()
81 | x = re.sub('(\\n *)','',x)
82 | x = re.sub(' +',' ',x)
83 | x = re.sub("'","",x)
84 | x = re.sub(',','',x)
85 | x = re.sub(':','',x)
86 | x = re.sub(';','',x)
87 | x = x.replace('(','')
88 | x = x.replace(')','')
89 | x = x.replace('[','')
90 | x = x.replace(']','')
91 | x = x.replace('.','')
92 |
93 | x = x.replace('&','and')
94 | x = x.replace('#','')
95 | x = x.replace('Ã','')
96 | x = x.replace('©','')
97 |
98 | x = re.sub(' - ','-',x)
99 | x = re.sub(' / ','/',x)
100 | x = x.lstrip().rstrip()
101 | if x.startswith('a '): x = x[2:]
102 | if len(x)>50: x=x[:50]
103 | projects_name[i] = x
104 |
105 | for i,x in enumerate(projects_url):
106 | #x = x.encode('utf-8')
107 | # x = re.sub('(\\n *)','',x)
108 | # x = re.sub(' +',' ',x)
109 | # x = x.lstrip().rstrip()
110 | projects_url[i]= "http://www.opencores.org" + x
111 |
112 | return projects_name, projects_url
113 |
114 | # structure to store everything
115 | class opencores():
116 | def __init__(self,):
117 | self.categories=[]
118 | self.categories_num=0
119 | self.categories_url=[]
120 | self.projects_url=[]
121 | self.projects_name=[]
122 | self.projects_num=[]
123 | self.projects_html_info=[]
124 | self.projects_download_url=[]
125 | self.projects_can_be_downloaded=[]
126 | self.projects_created = []
127 | self.projects_last_update = []
128 | self.projects_archive_last_update = []
129 | self.projects_lang = []
130 | self.projects_license = []
131 | self.projects_dev_status = []
132 |
133 | # function to rename any multiple element of the list 'ar'
134 | # 'ar' must be a list of strings
135 |
136 | def rename_multiple(ar):
137 | #ar = ['a','er1','a4','erta','a','er']
138 | for x in ar:
139 | i=[n for (n, e) in enumerate(ar) if e.lower() == x.lower()]
140 | #print i
141 | if len(i)>1:
142 | _ind=1
143 | for y in i:
144 | ar[y]=ar[y]+' '+str(_ind)
145 | _ind = _ind + 1
146 | print ('WARNING. '+\
147 | 'Found two projects with same name. Will rename:', ar[y])
148 | return ar
149 |
150 | # clean up html code from unwanted portions of the page
151 | def filter_html(in_html):
152 | doc = BeautifulSoup(in_html, features="lxml")
153 |
154 | #recs = doc.findAll("div", { "class": "class_name"})
155 |
156 | # remove unwanted tags
157 | for div in doc.findAll('head'):
158 | div.extract()
159 | for div in doc.findAll(['i', 'h1', 'script']):
160 | div.extract()
161 | for div in doc.findAll('div','top'):
162 | div.extract()
163 | for div in doc.findAll('div','bot'):
164 | div.extract()
165 | for div in doc.findAll('div','line'):
166 | div.extract()
167 | for div in doc.findAll('div','mainmenu'):
168 | div.extract()
169 | for div in doc.findAll('div','banner'):
170 | div.extract()
171 | for div in doc.findAll('div','maintainers'):
172 | div.extract()
173 |
174 | #for div in doc.findAll('div', {'style':'clear:both;margin-left:200px;'}):
175 | # div.extract()
176 |
177 | # remove html comments
178 | comments = doc.findAll(string=lambda string:isinstance(string, Comment))
179 | [comment.extract() for comment in comments]
180 |
181 | out_html = doc.body.prettify()
182 |
183 | # a little more cleaning up
184 | out_html = re.sub('()\\n','',out_html)
185 | out_html = re.sub('( )\\n','',out_html)
186 | out_html = re.sub(' ',' ',out_html)
187 | out_html = re.sub(' \\n * ',' ',out_html)
188 | out_html = re.sub('\\n *\\n','\\n',out_html)
189 | return out_html
190 |
191 | # get folder size
192 | def getFolderSize(folder='.'):
193 | total_size = os.path.getsize(folder)
194 | for item in os.listdir(folder):
195 | itempath = os.path.join(folder, item)
196 | if os.path.isfile(itempath):
197 | total_size += os.path.getsize(itempath)
198 | elif os.path.isdir(itempath):
199 | total_size += getFolderSize(itempath)
200 | return total_size
201 |
202 | def get_size(_path = '.'):
203 | total_size = getFolderSize(_path)
204 | if total_size >= 1.0E9:
205 | _out = str(round(total_size/1.0E9,2))+' GB' # return size in GB
206 | else:
207 | _out = str(round(total_size/1.0E6,2))+' MB' # return size in MB
208 | return _out
209 |
210 |
211 |
212 | ################################ MAIN ##########################################
213 |
214 | # create a structure to save all information from opencores.org
215 | opencores_mem = opencores()
216 |
217 | # Browser
218 | br = mechanize.Browser()
219 |
220 | # Cookie Jar
221 | cj = cookielib.LWPCookieJar()
222 | br.set_cookiejar(cj)
223 |
224 | # Browser options
225 | br.set_handle_equiv(True)
226 | #br.set_handle_gzip(True)
227 | br.set_handle_redirect(True)
228 | br.set_handle_referer(True)
229 | br.set_handle_robots(False)
230 |
231 | # Follows refresh 0 but not hangs on refresh > 0
232 | br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
233 |
234 | # Want debugging messages?
235 | #br.set_debug_http(True)
236 | #br.set_debug_redirects(True)
237 | #br.set_debug_responses(True)
238 |
239 | # User-Agent (this is cheating, ok?)
240 | br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; '+\
241 | 'rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
242 |
243 | # Open opencores.org login page and select the first form in the page
244 | # maybe a better method to search for the form would be better
245 | r = br.open("http://www.opencores.org/login")
246 | br.select_form(nr=0)
247 |
248 | #Aauthenticate and submit
249 | br['user'] = oc_user
250 | br['pass'] = oc_pwd
251 |
252 | # TODO check that you have successfully authenticated
253 | res = br.submit()
254 | #print res.get_data()
255 |
256 | # Access a password protected site
257 | print ('Time:'), time.asctime()
258 | r = br.open("http://www.opencores.org/projects")
259 | print ('Opening website: http://www.opencores.org/projects\n')
260 |
261 | # Open page
262 | _html_content = r.read()
263 | _lxml_content = lxml.html.fromstring(_html_content) # turn HTML into lxml object
264 |
265 | # Extract all project categories with some cleaning
266 | for el in _lxml_content.cssselect("span.title"):
267 | x = el.text
268 | #x = x.decode('utf-8')
269 | #x = str(x)
270 | #x = x.lower()
271 | #x = re.sub(' +',' ',x)
272 | #x = re.sub(' - ','-',x)
273 | #x = re.sub(' / ','/',x)
274 | #x = x.lstrip().rstrip()
275 | x = re.sub(' ', '%20',x)
276 | # if x.startswith('a '): x = x[2:]
277 | # if len(x)>50: x=x[:50]
278 | opencores_mem.categories.append(x)
279 |
280 | # Extract all url for each project category
281 | # with: "GET http://opencores.org/projects,category,0"
282 | for x in range(len(opencores_mem.categories)):
283 | opencores_mem.categories_url.append('http://www.opencores.org/projects?expanded='+str(opencores_mem.categories[x]))
284 | print(opencores_mem.categories_url)
285 | # Extract all project names for each url that defines a category
286 | for i,x in enumerate(opencores_mem.categories_url):
287 | prjs_name, prjs_url = get_projects(x)
288 | opencores_mem.projects_url.append(prjs_url)
289 | opencores_mem.projects_name.append(prjs_name)
290 | opencores_mem.projects_num.append(len(prjs_url))
291 |
292 | # count how many projects there are in this specific category
293 | print ('Grand total of',len(prjs_url),\
294 | 'projects in the category:',\
295 | opencores_mem.categories[i])
296 |
297 | # count how many projects and categories there are
298 | opencores_mem.categories_num = len(opencores_mem.categories)
299 | print ('\n',\
300 | 'Total number of available projects:', sum(opencores_mem.projects_num))
301 | print ('Total number of available categories:', opencores_mem.categories_num,'\n')
302 | print ('Time:', time.asctime())
303 |
304 |
305 | # create a structure used to store everything from opencores.org
306 | print ('Allocating memory to store opencores.org content.')
307 | for x in opencores_mem.projects_name:
308 | opencores_mem.projects_html_info.append(['None']*len(x))
309 | opencores_mem.projects_download_url.append(['Unknown']*len(x))
310 | opencores_mem.projects_can_be_downloaded.append([True]*len(x))
311 | opencores_mem.projects_created.append(['Unknown']*len(x))
312 | opencores_mem.projects_last_update.append(['Unknown']*len(x))
313 | opencores_mem.projects_archive_last_update.append(['Unknown']*len(x))
314 | opencores_mem.projects_lang.append(['Unknown']*len(x))
315 | opencores_mem.projects_license.append(['Unknown']*len(x))
316 | opencores_mem.projects_dev_status.append(['Unknown']*len(x))
317 |
318 | # Extract html info page and its latest SVN downland link. Do this for each project
319 | # since there is an html page for each projct, this routine will need some time
320 | prj_without_svn_count = 0
321 | for i,x in enumerate(opencores_mem.projects_name):
322 |
323 | print ('Project category:',opencores_mem.categories[i].upper())
324 | # go throuh all the projects in each category
325 | for ii,y in enumerate(x):
326 |
327 | # exit if exiding max project per category
328 | if ii>prj_per_cat_to_download:
329 | break
330 |
331 | _url=opencores_mem.projects_url[i][ii]
332 | # let's download the content of the page handling a possible error
333 | errors = 0
334 | while errors < 3:
335 | try:
336 | print ('[' + time.asctime() + ']','\nDownloading HTML from:', _url)
337 | whole_html = br.open(_url).read()
338 | break
339 | except:
340 | print ("WARNING. Getting some http error. Trying again...")
341 | whole_html = None
342 | errors = errors +1
343 | if whole_html is None:
344 | continue
345 |
346 | _html = filter_html(whole_html)
347 | opencores_mem.projects_html_info[i][ii] = _html
348 |
349 | #extract project download link for each project
350 | _lxml_content = lxml.html.fromstring(whole_html) #turn the HTML into lxml object
351 | links = _lxml_content.cssselect('body a') #TODO this is maybe not so unique...
352 | # TODO find a better way to create the array: opencores_mem.projects_download_url
353 | found_flag = False
354 | for x in links:
355 | if x.text == 'download':
356 | # if it's not an empty link
357 | if x.get('href').replace('download,','') != '':
358 | opencores_mem.projects_download_url[i][ii] = 'http://www.opencores.org' + x.get('href')
359 | print ('Latest download link found at:\nhttp://www.opencores.org' + x.get('href')+'\n')
360 | found_flag = True
361 | break
362 | if not found_flag:
363 | opencores_mem.projects_download_url[i][ii] = 'No_svn_archive_link_available'
364 | print ('WARNING. LATEST SVN DOWNLOAD LINK NOT FOUND\n')
365 | prj_without_svn_count += 1
366 |
367 | # extract some info from the page. Because of the complicated structure
368 | # of these html pages, this info extraction is not so easy.
369 | #
370 | # created data
371 | try:
372 | _txt = _lxml_content.xpath("//*[contains(text(),'Details')]/following-sibling::*")[0].cssselect('br')[0].tail
373 | _txt = _txt.split(':')[-1]
374 | if _txt == None: _txt = 'Unknow'
375 | opencores_mem.projects_created[i][ii] = _txt
376 | except:
377 | pass
378 | #
379 | # last update date
380 | try:
381 | _txt = _lxml_content.xpath("//*[contains(text(),'Details')]/following-sibling::*")[0].cssselect('br')[1].tail
382 | if _txt == None or _txt == '': _txt = 'Unknow'
383 | _txt = _txt.split(':')[-1]
384 | _txt = re.sub(' +',' ',_txt)
385 | _txt = _txt.lstrip().rstrip()
386 | opencores_mem.projects_last_update[i][ii] = _txt
387 | except:
388 | pass
389 | #
390 | # archive last update date
391 | try:
392 | _txt = _lxml_content.xpath("//*[contains(text(),'Details')]/following-sibling::*")[0].cssselect('br')[2].tail
393 | if not 'SVN Updated:' in _txt: _txt = 'Unknow'
394 | if _txt == None or _txt == '': _txt = 'Unknow'
395 | _txt = _txt.split(':')[-1]
396 | _txt = re.sub(' +',' ',_txt)
397 | _txt = _txt.lstrip().rstrip()
398 | opencores_mem.projects_archive_last_update[i][ii] = _txt
399 | except:
400 | pass
401 | #
402 | # language
403 | try:
404 | #if _lxml_content.xpath("//h2[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('a'):
405 | _txt = _lxml_content.xpath("//*[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('a')[1].text
406 | if _txt == None: _txt = 'Unknow'
407 | opencores_mem.projects_lang[i][ii] = _txt
408 | except:
409 | pass
410 | #
411 | # development status
412 | try:
413 | _txt = _lxml_content.xpath("//*[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('a')[2].text
414 | if _txt == None: _txt = 'Unknow'
415 | opencores_mem.projects_dev_status[i][ii] = _txt
416 | except:
417 | pass
418 | #
419 | # License
420 | try:
421 | _txt = _lxml_content.xpath("//*[contains(text(),'Other project properties')]/following-sibling::*")[0].cssselect('br')[4].tail
422 | _txt = _txt.replace('\n','')
423 | _txt = _txt.replace(' ','')
424 | if _txt == None or len(_txt)<=8: _txt = ':Unknown'
425 | _txt = _txt.split(':')[-1]
426 | opencores_mem.projects_license[i][ii] = _txt
427 | except:
428 | pass
429 |
430 | # REFERENCE. this is an other method to select elements inside an xml document
431 | #
432 | # created_date = _lxml_content.cssselect('div.content p')[0].cssselect('br')[0].tail
433 | # svn_link = _lxml_content.cssselect('div.content p')[0].cssselect('a')[2].get('href')
434 | # category = _lxml_content.cssselect('div.content p')[1].cssselect('a')[0].text
435 |
436 | ###################### this will download only some info files per category
437 |
438 | # based on the html information extracted for each project mark with False each
439 | # projects without an SVN link
440 | # TEST THIS
441 | for i,x in enumerate(opencores_mem.projects_name):
442 | for ii,y in enumerate(x):
443 | if 'No_svn_archive_link_available' in opencores_mem.projects_download_url[i][ii]:
444 | opencores_mem.projects_can_be_downloaded[i][ii] = False
445 |
446 | # rename any project name that appears double
447 | for i,x in enumerate(opencores_mem.projects_name):
448 | opencores_mem.projects_name[i] = rename_multiple(opencores_mem.projects_name[i])
449 |
450 | # store locally all info about the latest content of opencores website
451 | # this file is not really used. pickle is a good way to store python stuff
452 | if os.path.isdir('./cores'):
453 | fl=open('cores/opencores_web_latest.pkl','wb')
454 | pickle.dump(opencores_mem, fl)
455 | fl.close()
456 |
457 | # create local folder structure
458 | if not os.path.exists('./cores'):
459 | os.makedirs('./cores')
460 | print ('Creating folder structure.')
461 | else:
462 | print ('WARNING. Local directory "./cores" already exists. Its content will be updated')
463 |
464 | for i,x in enumerate(opencores_mem.categories):
465 | x = re.sub(' ','_',x)
466 | x = re.sub('/','-',x)
467 | x = re.sub('%20','_',x)
468 | try:
469 | os.makedirs('./cores/'+x)
470 | print ('Creating folder:','./cores/'+x)
471 | except:
472 | pass
473 | for y in opencores_mem.projects_name[i]:
474 | y = re.sub(' ','_',y)
475 | y = re.sub('/','-',y)
476 | try:
477 | os.makedirs('./cores/'+x+'/'+y)
478 | print ('Creating folder:','./cores/'+x+'/'+y)
479 | except:
480 | pass
481 |
482 | # copying project html information in each project folder EVEN IF ALREADY EXISTS
483 | for i,x in enumerate(opencores_mem.categories):
484 | x = re.sub(' ','_',x)
485 | x = re.sub('/','-',x)
486 | for ii,y in enumerate(opencores_mem.projects_name[i]):
487 | y = re.sub(' ','_',y)
488 | y = re.sub('/','-',y)
489 | try:
490 | fl_nm = './cores/'+x+'/'+y+'/index.html'
491 | print ('Writing file:', fl_nm)
492 | fl=open(fl_nm,'w')
493 |
494 | # add style.css link
495 | _header = '\n'+' \n'+'\n'
496 | fl.write(_header)
497 |
498 | # clean up all links TODO this will actually delete all links... a more selective method could be better
499 | from lxml import etree
500 | tree = etree.fromstring(opencores_mem.projects_html_info[i][ii])
501 | etree.strip_tags(tree,'a')
502 | _out = etree.tostring(tree,pretty_print=True)
503 |
504 | # delete the three links
505 | _out = re.sub(' \n *SVN:\n *\n *Browse','',_out)
506 | _out = re.sub(' \n *Latest version:\n *\n *download','',_out)
507 | _out = re.sub(' \n *Statistics:\n *\n *View','',_out)
508 |
509 | # add source code link at the top
510 | _link = opencores_mem.projects_download_url[i][ii].encode('utf-8')
511 | #source_ln = re.sub('http://www.opencores.org/download,', '', _link)
512 | #source_ln = source_ln +'.tar.gz'
513 | source_ln = _github_addr +"tree/"+x+"_"+y # encode project branch name
514 |
515 | fl.write('Go Back \n')
516 | fl.write("Source code
\n")
517 |
518 | fl.write(_out)
519 | fl.write("\n")
520 | fl.close()
521 | except:
522 | pass
523 |
524 | # count how many projects actually have a downloadable source code file.
525 | av_size = 0
526 | for x in opencores_mem.projects_download_url:
527 | for y in x:
528 | if 'http://www.opencores.org/download' in y:
529 | av_size =av_size +1
530 | print ('\n','Total number of downloadable SVN project archives:', av_size)
531 | print ('NOTE. Of the', sum(opencores_mem.projects_num), \
532 | 'project folders available on opencores.com only\n', \
533 | av_size,'SVN project archives are available for download.')
534 |
535 | print ('Time:', time.asctime())
536 |
537 | # load info about what was downloaded last time from local file and flag
538 | # what needs to be update/downloaded
539 |
540 | # let's begin from a download all configuration. Remember that
541 | # all flags are in fact set to "True" during the creation
542 | # of the list "opencores_mem.projects_can_be_downloaded"
543 | #DOWNLOAD_TYPE = 'total'
544 |
545 | # let's see now if we can avoid some downloads
546 | if os.path.isfile('./cores/opencores_local.pkl'):
547 | fl=open('./cores/opencores_local.pkl','r')
548 | opencores_mem_local = pickle.load(fl)
549 | fl.close()
550 | for i,x in enumerate(opencores_mem.projects_name):
551 | for ii,y in enumerate(x):
552 | # search for y project name in local project list of same category
553 | if y in opencores_mem_local.projects_name[i]:
554 | ind = opencores_mem_local.projects_name[i].index(y) # position of the project that might not need to be upgraded
555 | # compare the last update date and the last archive update date
556 | if opencores_mem.projects_last_update[i][ii] == opencores_mem_local.projects_last_update[i][ind]:
557 | if opencores_mem.projects_archive_last_update[i][ind] == opencores_mem_local.projects_archive_last_update[i][ind]:
558 | # bingo ! this project y does not need to be upgraded
559 | #DOWNLOAD_TYPE = 'partial'
560 | print ("WARNING. the project", y, "doesn't need to be downloaded.")
561 | opencores_mem.projects_can_be_downloaded[i][ii]=False
562 | del opencores_mem_local
563 |
564 | # let's download all project archives flagged as "True" in "opencores_mem.projects_can_be_downloaded"
565 | if download_prj_svn:
566 | _iii = 1
567 | print ('Ready to download', av_size,'.zip project archives.')
568 | dw_cnt = 0
569 | for i,x in enumerate(opencores_mem.projects_download_url):
570 | for ii,y in enumerate(x):
571 |
572 | # exit if exiding max project per category
573 | if ii>prj_per_cat_to_download:
574 | break
575 |
576 | #y = y.encode('utf-8')
577 | if ('http://www.opencores.org/download' in y) and opencores_mem.projects_can_be_downloaded[i][ii]==True:
578 |
579 | # download svn file. Here we do some error handling as done
580 | # when we downloaded the project html content
581 | errors = 0
582 | while errors < 3:
583 | try:
584 | r = br.open(y)
585 | tar_gz_content = r.read()
586 | print ("Downloaded repository", y)
587 | break
588 | except:
589 | print ("WARNING. Getting some http error. Trying again...")
590 |
591 | tar_gz_content = None
592 | errors = errors + 1
593 | if tar_gz_content is None:
594 | continue
595 |
596 | fl_nm = re.sub('http://www.opencores.org/download/','',y)
597 | a = re.sub(' ','_',opencores_mem.categories[i])
598 | b = re.sub(' ','_',opencores_mem.projects_name[i][ii])
599 | a = re.sub('/','-',a)
600 | a = re.sub('%20','_',a)
601 | b = re.sub('/','-',b)
602 | # let's make the file name unique so that later we can use it
603 | # as repository name (not used now)
604 | #fl_nm = str(_iii) + fl_nm
605 | #_iii +=1
606 | fl_nm = './cores/'+a+'/'+b+'/'+fl_nm+'.tar.gz'
607 | print ('Saving file:', fl_nm)
608 | fl=open(fl_nm, 'wb')
609 | fl.write(tar_gz_content)
610 | fl.close()
611 | dw_cnt = dw_cnt + 1
612 | print (dw_cnt, 'of',av_size,'.zip files downloaded.')
613 | print ('Total number of opencores.org projects:', sum(opencores_mem.projects_num))
614 | print ('Total number of downloaded .zip projects:', dw_cnt)
615 | print ('Total number of project without .zip archive:', prj_without_svn_count)
616 |
617 | # now all projects must have been downloaded. We can now update the local
618 | # log file
619 | print ('Saving local log file: "./cores/opencores_local.pkl".')
620 | fl=open('./cores/opencores_local.pkl','wb')
621 | pickle.dump(opencores_mem, fl)
622 | fl.close()
623 |
624 | # create a global index.html with a list of all projects in a table format
625 | if not os.path.exists('./cores'):
626 | os.makedirs('./cores')
627 | fl=open('./cores/index.html','w')
628 | fl.writelines('''
629 |
630 |
631 |
632 |
633 | Open-Source IP Core Server
634 |
635 |
636 |
637 |
638 |
647 |
648 |
649 | ''')
650 | fl.write('About this Page • License and disclaimer
')
651 | fl.write('Database size: '+get_size('./cores')+'
\n')
652 | fl.write('Available projects: '+str(sum(opencores_mem.projects_num))+'
\n')
653 | fl.write('Project categories: '+str(len(opencores_mem.categories))+'
\n')
654 | fl.write('''
655 |
660 | ''')
661 |
662 | fl.write('''
663 |
664 |
665 |
666 | Project Name
667 | Repository
668 | Last Update
669 | Language
670 | Dev. Status
671 | License
672 |
673 |
674 |
675 | ''')
676 |
677 | for i,x in enumerate(opencores_mem.projects_download_url):
678 | _c = opencores_mem.categories[i]
679 | fl.write("")
680 | fl.write(' '+str(_c.upper())+' '+'\n')
681 | fl.write(" \n")
682 | for ii,y in enumerate(opencores_mem.projects_download_url[i]):
683 | #y = y.encode('utf-8')
684 | _n = opencores_mem.projects_name[i][ii]
685 |
686 | # skip this project if empty
687 | # It does NOT seem to work!
688 | if opencores_mem.projects_can_be_downloaded[i][ii]==False:
689 | #print "Project", _n, "seems empty. Skipping it."
690 | #break
691 | pass
692 |
693 | a = re.sub(' ','_',_c)
694 | b = re.sub(' ','_',_n)
695 | a = re.sub('/','-',a)
696 | b = re.sub('/','-',b)
697 | link = a+'/'+b+'/'+'index.html'
698 | source_ln = re.sub('http://www.opencores.org/download,', '', y)
699 | source_ln = a+'/'+b+'/'+ source_ln +'.tar.gz'
700 |
701 | # let´s link this iwith project source on the github webpage
702 | source_ln = _github_addr +"tree/"+a+"_"+b
703 |
704 | # shorten the language label if too long
705 | if len(opencores_mem.projects_lang[i][ii])>7:
706 | opencores_mem.projects_lang[i][ii]=opencores_mem.projects_lang[i][ii][:7]
707 |
708 | # lets put in the table a hidden field for each project with the info
709 | # from the project html page
710 | soup = BeautifulSoup(opencores_mem.projects_html_info[i][ii])
711 | html_info = soup.text.encode('ascii','ignore') # you need to convert from unicode to text
712 | html_info = html_info[250:850] # trip it and just get the last 600 characters
713 |
714 | fl.write("")
715 | # here the use of a hidden field allows to bind this project with its
716 | # group. Very good for the search function.
717 | fl.write(""+_c+' '+html_info+"
"+_n+" ") # project name
718 | fl.write(" ")
719 | fl.write("code ") # source code link
720 | fl.write(" ")
721 | fl.write(opencores_mem.projects_last_update[i][ii]) # last update
722 | fl.write(" ")
723 | fl.write(opencores_mem.projects_lang[i][ii]) # language
724 | fl.write(" ")
725 | fl.write(opencores_mem.projects_dev_status[i][ii]) # dev. status
726 | fl.write(" ")
727 | fl.write(opencores_mem.projects_license[i][ii]) # license type
728 | fl.write(" \n")
729 | fl.write("
\n")
730 | fl.write("\n")
731 | fl.write(' \n\n')
732 | fl.close()
733 |
734 | # created css file
735 | fl=open('./cores/style.css','w')
736 | fl.write('''
737 |
738 | p { line-height: 1.2em;
739 | margin-bottom: 2px;
740 | margin-top: 2px;}
741 |
742 |
743 | body {min-width:820px;
744 | color: #333333;
745 | font-family: Arial,Helvetica,sans-serif;
746 | font-size : 11pt;
747 | margin-left: 10px;
748 | margin-right: 10px;
749 | margin-bottom: 10px;
750 | margin-top: 10px;}
751 |
752 | a {text-decoration: none; color: #1F7171;}
753 | a:hover {text-decoration: underline;}
754 |
755 | #h1,h2,h3 {margin:10px 0px 5px 0px;}
756 |
757 | form { margin: 50px 10px;}
758 | table { width: 100%; border-collapse: collapse; margin: 1em 0; }
759 |
760 | #id_search {
761 | -webkit-box-sizing: border-box;
762 | -moz-box-sizing: border-box;
763 | box-sizing: border-box;
764 | display: block;
765 | padding: 11px 7px;
766 | padding-right: 43px;
767 | background-color: #fff;
768 | font-size: 1.6em;
769 | color: #ccc;
770 | border: 1px solid #c8c8c8;
771 | border-bottom-color: #d2e2e7;
772 | -webkit-border-radius: 1px;
773 | -moz-border-radius: 1px;
774 | border-radius: 1px;
775 | -webkit-box-shadow: inset 0 1px 2px rgba(0,0,0,0.1), 0 0 0 6px #f0f0f0;
776 | -moz-box-shadow: inset 0 1px 2px rgba(0,0,0,0.1), 0 0 0 6px #f0f0f0;
777 | box-shadow: inset 0 1px 2px rgba(0,0,0,0.1), 0 0 0 6px #f0f0f0;
778 | -webkit-transition: all 0.4s linear;
779 | -moz-transition: all 0.4s linear;
780 | transition: all 0.4s linear;
781 | width: 100%; }
782 |
783 | .odd, .r1 { background: #fff; }
784 | .even, .r2 { background: #eee; }
785 | .r3 { background: #ebebeb; }
786 | .search { font-weight: bold; }
787 | .new { color: #f34105; text-transform: uppercase; font-size: 85%; margin-left: 3px; }
788 |
789 |
790 | thead th { background: #077474; color: #fff; }
791 |
792 | tbody th { text-align: left; }
793 | table th, table td { border: 1px solid #ddd; padding: 2px 5px; font-size: 95%; font-weight: normal; }
794 | pre { font-size: 130%; background: #f7f7f7; padding: 10px 10px; font-weight: bold; }
795 |
796 |
797 | fieldset { border: 0px solid #ccc; padding: 5px;}
798 | #form input { font-size: 16px; border: 1px solid #ccc;}
799 |
800 | #foot{margin-top: 10px;
801 | text-align: center;
802 | color:#A8A8A8;
803 | font-size : 90%;}
804 | ''')
805 | fl.close()
806 | print ('Local style.css file created.')
807 |
808 |
809 | # created license.html file
810 | fl=open('./cores/license.html','w')
811 | fl.write('''
812 |
813 |
814 |
815 |
816 | IP Cores - license
817 |
818 |
819 | Disclaimer
820 | We make no warranties regarding the correctness of the data and disclaim
821 | liability for damages resulting from its use.
822 | This database is distributed in the hope that it will be useful,
823 | but WITHOUT ANY WARRANTY; without even the implied warranty of
824 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
825 | We cannot provide unrestricted permission regarding the use of the data,
826 | as some data may be covered by a specific license or other rights. Please
827 | refer to the license notice that comes with each core project description.
828 |
829 |
830 |
831 | ''')
832 | fl.close()
833 | print ('Local license.html file created.')
834 |
835 | # created example.json file
836 | fl=open('./cores/example.json','w')
837 | fl.write('''
838 | {
839 | "list_items": ["Loaded with Ajax", "Loaded with Ajax too"]
840 | }
841 | ''')
842 | fl.close()
843 | print ('Local example.json file created.')
844 |
845 | # created jquery.quicksearch.js file
846 | fl=open('./cores/jquery.quicksearch.js','w')
847 | fl.write('''
848 | (function($, window, document, undefined) {
849 | $.fn.quicksearch = function (target, opt) {
850 |
851 | var timeout, cache, rowcache, jq_results, val = '', e = this, options = $.extend({
852 | delay: 100,
853 | selector: null,
854 | stripeRows: null,
855 | loader: null,
856 | noResults: '',
857 | bind: 'keyup',
858 | onBefore: function () {
859 | return;
860 | },
861 | onAfter: function () {
862 | return;
863 | },
864 | show: function () {
865 | this.style.display = "";
866 | },
867 | hide: function () {
868 | this.style.display = "none";
869 | },
870 | prepareQuery: function (val) {
871 | return val.toLowerCase().split(' ');
872 | },
873 | testQuery: function (query, txt, _row) {
874 | for (var i = 0; i < query.length; i += 1) {
875 | if (txt.indexOf(query[i]) === -1) {
876 | return false;
877 | }
878 | }
879 | return true;
880 | }
881 | }, opt);
882 |
883 | this.go = function () {
884 |
885 | var i = 0,
886 | noresults = true,
887 | query = options.prepareQuery(val),
888 | val_empty = (val.replace(' ', '').length === 0);
889 |
890 | for (var i = 0, len = rowcache.length; i < len; i++) {
891 | if (val_empty || options.testQuery(query, cache[i], rowcache[i])) {
892 | options.show.apply(rowcache[i]);
893 | noresults = false;
894 | } else {
895 | options.hide.apply(rowcache[i]);
896 | }
897 | }
898 |
899 | if (noresults) {
900 | this.results(false);
901 | } else {
902 | this.results(true);
903 | this.stripe();
904 | }
905 |
906 | this.loader(false);
907 | options.onAfter();
908 |
909 | return this;
910 | };
911 |
912 | this.stripe = function () {
913 |
914 | if (typeof options.stripeRows === "object" && options.stripeRows !== null)
915 | {
916 | var joined = options.stripeRows.join(' ');
917 | var stripeRows_length = options.stripeRows.length;
918 |
919 | jq_results.not(':hidden').each(function (i) {
920 | $(this).removeClass(joined).addClass(options.stripeRows[i % stripeRows_length]);
921 | });
922 | }
923 |
924 | return this;
925 | };
926 |
927 | this.strip_html = function (input) {
928 | var output = input.replace(new RegExp('<[^<]+\>', 'g'), "");
929 | output = $.trim(output.toLowerCase());
930 | return output;
931 | };
932 |
933 | this.results = function (bool) {
934 | if (typeof options.noResults === "string" && options.noResults !== "") {
935 | if (bool) {
936 | $(options.noResults).hide();
937 | } else {
938 | $(options.noResults).show();
939 | }
940 | }
941 | return this;
942 | };
943 |
944 | this.loader = function (bool) {
945 | if (typeof options.loader === "string" && options.loader !== "") {
946 | (bool) ? $(options.loader).show() : $(options.loader).hide();
947 | }
948 | return this;
949 | };
950 |
951 | this.cache = function () {
952 |
953 | jq_results = $(target);
954 |
955 | if (typeof options.noResults === "string" && options.noResults !== "") {
956 | jq_results = jq_results.not(options.noResults);
957 | }
958 |
959 | var t = (typeof options.selector === "string") ? jq_results.find(options.selector) : $(target).not(options.noResults);
960 | cache = t.map(function () {
961 | return e.strip_html(this.innerHTML);
962 | });
963 |
964 | rowcache = jq_results.map(function () {
965 | return this;
966 | });
967 |
968 | return this.go();
969 | };
970 |
971 | this.trigger = function () {
972 | this.loader(true);
973 | options.onBefore();
974 |
975 | window.clearTimeout(timeout);
976 | timeout = window.setTimeout(function () {
977 | e.go();
978 | }, options.delay);
979 |
980 | return this;
981 | };
982 |
983 | this.cache();
984 | this.results(true);
985 | this.stripe();
986 | this.loader(false);
987 |
988 | return this.each(function () {
989 | $(this).bind(options.bind, function () {
990 | val = $(this).val();
991 | e.trigger();
992 | });
993 | });
994 | };
995 | }(jQuery, this, document));
996 | ''')
997 | fl.close()
998 | print ('Local jquery.quicksearch.js file created.')
999 |
--------------------------------------------------------------------------------