├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── Femnist
    │   ├── README.md
    │   ├── data
    │   │   ├── my_sample.py
    │   │   └── nist_generator.py
    │   ├── preprocess.sh
    │   ├── preprocess
    │   │   ├── data_to_json.py
    │   │   ├── data_to_json.sh
    │   │   ├── get_data.sh
    │   │   ├── get_file_dirs.py
    │   │   ├── get_hashes.py
    │   │   ├── group_by_writer.py
    │   │   └── match_hashes.py
    │   └── stats.sh
    ├── Linear_synthetic
    │   ├── data
    │   │   └── README.md
    │   ├── generate_linear_regession.py
    │   ├── generate_linear_regession_updated.py
    │   ├── generate_linear_synthetic_backup.py
    │   └── optimal_solution_finding.py
    ├── Logistic_synthetic
    │   ├── README.md
    │   └── logistic_regression.py
    └── Mnist
    │   ├── data
    │       └── mldata
    │       │   └── mnist-original.mat
    │   ├── generate_iid_20users.py
    │   ├── generate_niid_100users_updated.py
    │   ├── generate_niid_20users.py
    │   └── generate_niid_mnist_100users.py
├── flearn
    ├── optimizers
    │   └── fedoptimizer.py
    ├── servers
    │   ├── serveravg.py
    │   ├── serverbase.py
    │   └── serverfedl.py
    ├── trainmodel
    │   └── models.py
    └── users
    │   ├── useravg.py
    │   ├── userbase.py
    │   └── userfedl.py
├── main.py
├── plot_femnist.py
├── plot_linear.py
├── plot_mnist.py
├── requirements.txt
├── results
    ├── Mnist_FedAvg_0.005_0.2_15_10u_20b_20_avg.h5
    └── README.md
└── utils
    ├── model_utils.py
    └── plot_utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Federated Learning over Wireless Networks: Convergence Analysis and Resource Allocation (Accepted by IEEE/ACM Transactions on Networking (TON))
  2 | 
  3 | This repository is for the Experiment Section of the paper:
  4 | "Federated Learning over Wireless Networks: Convergence Analysis and Resource Allocation"
  5 | 
  6 | Authors:
  7 | Canh T. Dinh, Nguyen H. Tran, Minh N. H. Nguyen, Choong Seon Hong, Wei Bao, Albert Zomaya, Vincent Gramoli
  8 | 
  9 | Paper Link: https://arxiv.org/abs/1910.13067 
 10 | 
 11 | Source Code (Tensoflow version): https://github.com/CharlieDinh/FEDL
 12 | 
 13 | # Software requirements:
 14 | - numpy, scipy, pytorch, Pillow, matplotlib.
 15 | 
 16 | - To download the dependencies: **pip3 install -r requirements.txt**
 17 | 
 18 | - The code can be run on any pc.
 19 | 
 20 | # Dataset: We use 3 datasets: MNIST, FENIST, and Synthetic
 21 | 
 22 | - To generate non-idd MNIST Data: In folder data/mnist,  run: "python3 generate_niid_mnist_100users.py" 
 23 | - To generate FEMNIST Data: first In folder data/nist run preprocess.sh to obtain all raw data, or can be download in the link below, then run  python3 generate_niid_femnist_100users.py
 24 | - To generate niid Linear Synthetic: In folder data/linear_synthetic, run: "python3 generate_linear_regession.py" 
 25 | - The datasets are available to download at: https://drive.google.com/drive/folders/1Q91NCGcpHQjB3bXJTvtx5qZ-TrIZ9WzT?usp=sharing
 26 | 
 27 | 
 28 | # Produce figures in the paper:
 29 | - There is a main file "main.py" which allows running all experiments and 3 files "main_mnist.py, main_nist.py, main_linear.py" to produce the figures corresponding for 3 datasets. It is noted that each experiment is run at least 10 times and then the result is averaged.
 30 | 
 31 | - To produce the experiments for Linear Regresstion:
 32 | <p align="center">
 33 |   <img src="https://user-images.githubusercontent.com/44039773/87938445-f546e700-cad9-11ea-8138-a0b6f5e68419.png" height="300">
 34 | </p>
 35 | 
 36 |   - In folder data/linear_synthetic, before generating linear data set, configure the value of $\rho$ for example rho = 1.4 (in the papers we use 3 different values of $\rho$: 1.4, 2, 5) then run: "python3 generate_linear_regession_update.py" to generate data corresponding to different values of $\rho$.
 37 |   - To find the optimal solution: In folder data/linear_synthetic, run python3 optimal_solution_finding_update.py (also the value of $\rho$ need to be configured to find the optimal solution)
 38 |   - To generate result for the training process, run below commands:
 39 |     <pre><code>
 40 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.01 --rho 1.4 --times  1
 41 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.03 --rho 1.4 --times  1
 42 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.05 --rho 1.4 --times  1
 43 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.07 --rho 1.4 --times  1 
 44 | 
 45 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.01 --rho 2 --times  1
 46 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.03 --rho 2 --times  1 
 47 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.05 --rho 2 --times  1
 48 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.07 --rho 2 --times  1 
 49 | 
 50 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.01 --rho 5 --times  1
 51 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.03 --rho 5 --times  1 
 52 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.05 --rho 5 --times  1
 53 |     python3 -u main.py --dataset Linear_synthetic --algorithm FEDL --model linear --num_global_iters  200 --clients_per_round 100 --batch_size 0 --local_epochs  20 --learning_rate  0.04 --hyper_learning_rate  0.07 --rho 5 --times  1 
 54 |     </code></pre>
 55 |   - All the train loss, testing accuracy, and training accuracy will be stored as h5py file in the folder "results".
 56 |   - To produce the figure for linear regression run <pre><code> python3 plot_linear.py</code></pre>
 57 |   - Note that all users are selected in Synthetic data, so the experiments for each case of synthetic only need to be run once
 58 |   
 59 | - For MNIST, run below commands:
 60 |     <pre><code>
 61 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0.2 --rho 0 --times  10
 62 |     python3 -u main.py --dataset Mnist --algorithm FedAvg --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0 --rho 0 --times  10 
 63 | 
 64 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 40 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0.2 --rho 0 --times  10
 65 |     python3 -u main.py --dataset Mnist --algorithm FedAvg --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 40 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0 --rho 0 --times  10 
 66 | 
 67 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0.2 --rho 0 --times  10
 68 |     python3 -u main.py --dataset Mnist --algorithm FedAvg --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0 --rho 0 --times  10 
 69 | 
 70 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  2 --rho 0 --times  10
 71 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  4 --rho 0 --times  10
 72 |     </code></pre>
 73 |     
 74 | <p align="center">
 75 |   <img src="https://user-images.githubusercontent.com/44039773/87938456-f8da6e00-cad9-11ea-8ed2-9dbf8f5d245d.png" height="300">
 76 |   
 77 |   <img src="https://user-images.githubusercontent.com/44039773/87938464-fa0b9b00-cad9-11ea-9a5f-b68e52b4f13d.png" height="300">
 78 | </p>
 79 | 
 80 |   - To produce the figure for MNIST experiment, run <pre><code> python3 plot_mnist.py</code></pre>
 81 |   
 82 | - For FEMNIST, run below commands:
 83 |     <pre><code>
 84 |     python3 -u main.py --dataset Femnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  10 --learning_rate  0.003 --hyper_learning_rate  0.2 --rho 0 --times  10 
 85 |     python3 -u main.py --dataset Femnist --algorithm FedAvg --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  10 --learning_rate  0.003 --hyper_learning_rate  0 --rho 0 --times  10 
 86 |     python3 -u main.py --dataset Femnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  10 --learning_rate  0.015 --hyper_learning_rate  0.5 --rho 0 --times  10 
 87 | 
 88 |     python3 -u main.py --dataset Femnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0.2 --rho 0 --times  10 
 89 |     python3 -u main.py --dataset Femnist --algorithm FedAvg --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  20 --learning_rate  0.003 --hyper_learning_rate  0 --rho 0 --times  10 
 90 |     python3 -u main.py --dataset Femnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  20 --learning_rate  0.015 --hyper_learning_rate  0.5 --rho 0 --times  10 
 91 | 
 92 |     python3 -u main.py --dataset Femnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  40 --learning_rate  0.003 --hyper_learning_rate  0.2 --rho 0 --times  10 
 93 |     python3 -u main.py --dataset Femnist --algorithm FedAvg --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 20 --local_epochs  40 --learning_rate  0.003 --hyper_learning_rate  0 --rho 0 --times  10 
 94 |     python3 -u main.py --dataset Femnist --algorithm FEDL --model mclr --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  40 --learning_rate  0.015 --hyper_learning_rate  0.5 --rho 0 --times  10 
 95 |     </code></pre>
 96 |     
 97 | <p align="center">
 98 |   <img src="https://user-images.githubusercontent.com/44039773/87938469-fb3cc800-cad9-11ea-9af5-11f91ba14e5b.png" height="300">
 99 |   
100 |   <img src="https://user-images.githubusercontent.com/44039773/87938476-fd068b80-cad9-11ea-95cb-72f075ab1471.png" height="300">
101 | </p>
102 | 
103 |   - To produce the figure for FEMNIST experiment, run <pre><code> python3 plot_femnist.py</code></pre>
104 | 
105 | - For non-convex experiment on (MNIST dataset):
106 |   Note that FEDL is unstable with minibatch for example 20:
107 |    <pre><code>
108 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model dnn --num_global_iters  800 --clients_per_round 10 --batch_size 40 --local_epochs  20 --learning_rate  0.0015 --hyper_learning_rate  0.8 --rho 0 --times 10
109 |     python3 -u main.py --dataset Mnist --algorithm FEDL --model dnn --num_global_iters  800 --clients_per_round 10 --batch_size 0 --local_epochs  20 --learning_rate  0.0015 --hyper_learning_rate  4.0 --rho 0 --times 10
110 |   </code></pre>
111 | 


--------------------------------------------------------------------------------
/data/Femnist/README.md:
--------------------------------------------------------------------------------
 1 | # EMNIST Dataset
 2 | 
 3 | ## Setup Instructions
 4 | - pip3 install numpy
 5 | - pip3 install pillow
 6 | - Run ```./preprocess.sh``` with a choice of the following tags:
 7 |     - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d. is included in the 'Notes' section
 8 |     - ```--iu``` := number of users, if iid sampling; expressed as a fraction of the total number of users; default is 0.01
 9 |     - ```--sf``` := fraction of data to sample, written as a decimal; default is 0.1
10 |     - ```-k``` := minimum number of samples per user
11 |     - ```-t``` := 'user' to partition users into train-test groups, or 'sample' to partition each user's samples into train-test groups
12 |     - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
13 |     - ```--nu``` := The total number of users generated.
14 | 
15 | Instruction used to generate EMNIST with 50 users:
16 | 
17 | ```
18 | ./preprocess.sh -s niid --sf 1.0 -k 0 -tf 0.8 -t sample --nu 100
19 | ```
20 | 
21 | 
22 | 
23 | 
24 | (Make sure to delete the rem\_user\_data, sampled\_data, test, and train subfolders in the data directory before re-running preprocess.sh.)
25 | 
26 | Or you can download the dataset [here](https://drive.google.com/open?id=1sHzD4IsgEI5xLy6cqwUjSGW0PwiduPHr), unzip it and put the `train` and `test` folder under `data`.
27 | 


--------------------------------------------------------------------------------
/data/Femnist/data/my_sample.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import json
  3 | import math
  4 | import numpy as np
  5 | import os
  6 | import sys
  7 | import random
  8 | from tqdm import trange
  9 | 
 10 | from PIL import Image
 11 | 
 12 | NUM_USER = 50
 13 | CLASS_PER_USER = 19
 14 | 
 15 | 
 16 | def relabel_class(c):
 17 |     '''
 18 |     maps hexadecimal class value (string) to a decimal number
 19 |     returns:
 20 |     - 0 through 9 for classes representing respective numbers
 21 |     - 10 through 35 for classes representing respective uppercase letters
 22 |     - 36 through 61 for classes representing respective lowercase letters
 23 |     '''
 24 |     if c.isdigit() and int(c) < 40:
 25 |         return (int(c) - 30)
 26 |     elif int(c, 16) <= 90: # uppercase
 27 |         return (int(c, 16) - 55)
 28 |     else:
 29 |         return (int(c, 16) - 61)
 30 | 
 31 | def load_image(file_name):
 32 |     '''read in a png
 33 |     Return: a flatted list representing the image
 34 |     '''
 35 |     size = (28, 28)
 36 |     img = Image.open(file_name)
 37 |     gray = img.convert('L')
 38 |     gray.thumbnail(size, Image.ANTIALIAS)
 39 |     arr = np.asarray(gray).copy()
 40 |     vec = arr.flatten()
 41 |     vec = vec / 255 # scale all pixel values to between 0 and 1
 42 |     vec = vec.tolist()
 43 | 
 44 |     return vec
 45 | 
 46 | 
 47 | def main():
 48 |     file_dir = "raw_data/by_class"
 49 | 
 50 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 51 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 52 | 
 53 |     train_path = "train/mytrain.json"
 54 |     test_path = "test/mytest.json"
 55 | 
 56 |     X = [[] for _ in range(NUM_USER)]  
 57 |     y = [[] for _ in range(NUM_USER)]
 58 | 
 59 |     nist_data = {}
 60 | 
 61 | 
 62 |     for class_ in os.listdir(file_dir):
 63 | 
 64 |         real_class = relabel_class(class_)
 65 | 
 66 |         if real_class >= 36 and real_class <= 61: 
 67 | 
 68 |             full_img_path = file_dir + "/" + class_ + "/train_" + class_
 69 |             all_files_this_class = os.listdir(full_img_path)
 70 |             random.shuffle(all_files_this_class)
 71 |             sampled_files_this_class = all_files_this_class[:7000]
 72 |             imgs = []
 73 |             for img in sampled_files_this_class:
 74 |                 imgs.append(load_image(full_img_path + "/" + img))
 75 |             class_ = relabel_class(class_)
 76 |             print(class_)
 77 |             nist_data[class_-36] = imgs  # a list of list, key is (0, 25)
 78 |             print(len(imgs))
 79 | 
 80 |     # assign samples to users by power law
 81 |     num_samples = np.random.lognormal(4, 2, (NUM_USER)) + 5
 82 | 
 83 |     idx = np.zeros(26, dtype=np.int64)
 84 | 
 85 |     for user in range(NUM_USER):
 86 |         num_sample_per_class = int(num_samples[user]/CLASS_PER_USER)
 87 |         if num_sample_per_class < 2:
 88 |             num_sample_per_class = 2
 89 | 
 90 |         for j in range(CLASS_PER_USER):
 91 |             class_id = (user + j) % 26
 92 |             if idx[class_id] + num_sample_per_class < len(nist_data[class_id]):
 93 |                 idx[class_id] = 0
 94 |             X[user] += nist_data[class_id][idx[class_id] : (idx[class_id] + num_sample_per_class)]
 95 |             y[user] += (class_id * np.ones(num_sample_per_class)).tolist()
 96 |             idx[class_id] += num_sample_per_class
 97 |     
 98 |     # Create data structure
 99 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
100 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
101 |     
102 |     for i in trange(NUM_USER, ncols=120):
103 |         uname = 'f_{0:05d}'.format(i)
104 |         
105 |         combined = list(zip(X[i], y[i]))
106 |         random.shuffle(combined)
107 |         X[i][:], y[i][:] = zip(*combined)
108 |         num_samples = len(X[i])
109 |         train_len = int(0.9 * num_samples)
110 |         test_len = num_samples - train_len
111 |         
112 |         train_data['users'].append(uname) 
113 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
114 |         train_data['num_samples'].append(train_len)
115 |         test_data['users'].append(uname)
116 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
117 |         test_data['num_samples'].append(test_len)
118 |     
119 | 
120 |     with open(train_path,'w') as outfile:
121 |         json.dump(train_data, outfile)
122 |     with open(test_path, 'w') as outfile:
123 |         json.dump(test_data, outfile)
124 | 
125 | 
126 | if __name__ == "__main__":
127 |     main()
128 | 
129 | 


--------------------------------------------------------------------------------
/data/Femnist/data/nist_generator.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import json
  3 | import math
  4 | import numpy as np
  5 | import os
  6 | import sys
  7 | import random
  8 | from tqdm import trange
  9 | 
 10 | from PIL import Image
 11 | 
 12 | NUM_USER = 50
 13 | CLASS_PER_USER = 50
 14 | FEMNIST = True  # True: generate data will full 62 label, False: only 26 labels for lowercase
 15 | SAMPLE_NUM_MEAN = 400
 16 | SAMPLE_NUM_STD = 110
 17 | 
 18 | 
 19 | def relabel_class(c):
 20 |     '''
 21 |     maps hexadecimal class value (string) to a decimal number
 22 |     returns:
 23 |     - 0 through 9 for classes representing respective numbers : total 10
 24 |     - 10 through 35 for classes representing respective uppercase letters : 26
 25 |     - 36 through 61 for classes representing respective lowercase letters : 26 
 26 |     - in total we have 10 + 26 + 26 = 62 class for FEMIST   tiwand only 36-61 for FEMIST*  
 27 |     '''
 28 |     if c.isdigit() and int(c) < 40:
 29 |         return (int(c) - 30)
 30 |     elif int(c, 16) <= 90:  # uppercase
 31 |         return (int(c, 16) - 55)
 32 |     else:
 33 |         return (int(c, 16) - 61)
 34 | 
 35 | 
 36 | def load_image(file_name):
 37 |     '''read in a png
 38 |     Return: a flatted list representing the image
 39 |     '''
 40 |     size = (28, 28)
 41 |     img = Image.open(file_name)
 42 |     gray = img.convert('L')
 43 |     gray.thumbnail(size, Image.ANTIALIAS)
 44 |     arr = np.asarray(gray).copy()
 45 |     vec = arr.flatten()
 46 |     vec = vec / 255  # scale all pixel values to between 0 and 1
 47 |     vec = vec.tolist()
 48 | 
 49 |     return vec
 50 | 
 51 | 
 52 | def main():
 53 |     file_dir = "raw_data/by_class"
 54 | 
 55 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
 56 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
 57 |     if(FEMNIST):
 58 |         train_path = "train/nisttrain.json"
 59 |         test_path = "test/nisttest.json"
 60 |     else:
 61 |         train_path = "train/femnisttrain.json"
 62 |         test_path = "test/femnisttest.json"
 63 | 
 64 |     X = [[] for _ in range(NUM_USER)]
 65 |     y = [[] for _ in range(NUM_USER)]
 66 | 
 67 |     nist_data = {}
 68 | 
 69 |     for class_ in os.listdir(file_dir):
 70 | 
 71 |         real_class = relabel_class(class_)
 72 | 
 73 |         if(FEMNIST):
 74 |             full_img_path = file_dir + "/" + class_ + "/train_" + class_
 75 |             all_files_this_class = os.listdir(full_img_path)
 76 |             random.shuffle(all_files_this_class)
 77 |             sampled_files_this_class = all_files_this_class[:7000]
 78 |             imgs = []
 79 |             for img in sampled_files_this_class:
 80 |                 imgs.append(load_image(full_img_path + "/" + img))
 81 |             class_ = relabel_class(class_)
 82 |             print("Class:", class_)
 83 |             nist_data[class_] = imgs  # a list of list, key is (0, 25)
 84 |             print("Image len:", len(imgs))
 85 | 
 86 |         else:
 87 |             if real_class >= 36 and real_class <= 61:
 88 |                 full_img_path = file_dir + "/" + class_ + "/train_" + class_
 89 |                 all_files_this_class = os.listdir(full_img_path)
 90 |                 random.shuffle(all_files_this_class)
 91 |                 sampled_files_this_class = all_files_this_class[:7000]
 92 |                 imgs = []
 93 |                 for img in sampled_files_this_class:
 94 |                     imgs.append(load_image(full_img_path + "/" + img))
 95 |                 class_ = relabel_class(class_)
 96 |                 print(class_)
 97 |                 nist_data[class_-36] = imgs  # a list of list, key is (0, 25)
 98 |                 print(len(imgs))
 99 | 
100 |     # assign samples to users by power law
101 |     normal_std = np.sqrt(np.log(1 + (lognormal_std/lognormal_mean)**2))
102 |     normal_mean = np.log(lognormal_mean) - normal_std**2 / 2
103 | 
104 |     num_samples = np.random.lognormal(normal_mean, normal_std, (NUM_USER)) + 5
105 |     #num_samples = np.random.normal(SAMPLE_NUM_MEAN,SAMPLE_NUM_STD,(NUM_USER))
106 | 
107 |     if(FEMNIST):
108 |         idx = np.zeros(62, dtype=np.int64)
109 |     else:
110 |         idx = np.zeros(26, dtype=np.int64)
111 | 
112 |     for user in range(NUM_USER):
113 |         num_sample_per_class = int(num_samples[user]/CLASS_PER_USER)
114 |         if num_sample_per_class < 2:
115 |             num_sample_per_class = 2
116 | 
117 |         for j in range(CLASS_PER_USER):
118 |             if(FEMNIST):
119 |                 class_id = (user + j) % 62
120 |             else:
121 |                 class_id = (user + j) % 26
122 | 
123 |             if idx[class_id] + num_sample_per_class < len(nist_data[class_id]):
124 |                 idx[class_id] = 0
125 |             X[user] += nist_data[class_id][idx[class_id]
126 |                 : (idx[class_id] + num_sample_per_class)]
127 |             y[user] += (class_id * np.ones(num_sample_per_class)).tolist()
128 |             idx[class_id] += num_sample_per_class
129 | 
130 |     # Create data structure
131 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
132 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
133 | 
134 |     for i in trange(NUM_USER, ncols=120):
135 |         uname = 'f_{0:05d}'.format(i)
136 | 
137 |         combined = list(zip(X[i], y[i]))
138 |         random.shuffle(combined)
139 |         X[i][:], y[i][:] = zip(*combined)
140 |         num_samples = len(X[i])
141 |         train_len = int(0.9 * num_samples)
142 |         test_len = num_samples - train_len
143 | 
144 |         train_data['users'].append(uname)
145 |         train_data['user_data'][uname] = {
146 |             'x': X[i][:train_len], 'y': y[i][:train_len]}
147 |         train_data['num_samples'].append(train_len)
148 |         test_data['users'].append(uname)
149 |         test_data['user_data'][uname] = {
150 |             'x': X[i][train_len:], 'y': y[i][train_len:]}
151 |         test_data['num_samples'].append(test_len)
152 | 
153 |     with open(train_path, 'w') as outfile:
154 |         json.dump(train_data, outfile)
155 |     with open(test_path, 'w') as outfile:
156 |         json.dump(test_data, outfile)
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     main()
161 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #rm -rf rem_user_data sampled_data test train
 4 | 
 5 | # download data and convert to .json format
 6 | 
 7 | if [ ! -d "data/all_data" ] || [ ! "$(ls -A data/all_data)" ]; then
 8 |     cd preprocess
 9 |     ./data_to_json.sh
10 |     cd ..
11 | fi
12 | 
13 | NAME="nist" # name of the dataset, equivalent to directory name
14 | 
15 | cd ../../utils
16 | 
17 | # ./preprocess.sh -s niid --sf 0.05 -k 64 -t sample
18 | # ./preprocess.sh --name nist -s niid --sf 1.0 -k 0 -t sample
19 | # ./preprocess.sh --name sent140 -s niid --sf 1.0 -k 1 -t sample
20 | ./preprocess.sh --name $NAME $@
21 | 
22 | cd ../data/$NAME
23 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/data_to_json.py:
--------------------------------------------------------------------------------
 1 | # Converts a list of (writer, [list of (file,class)]) tuples into a json object
 2 | # of the form:
 3 | #   {users: [bob, etc], num_samples: [124, etc.],
 4 | #   user_data: {bob : {x:[img1,img2,etc], y:[class1,class2,etc]}, etc}}
 5 | # where 'img_' is a vectorized representation of the corresponding image
 6 | 
 7 | from __future__ import division
 8 | import json
 9 | import math
10 | import numpy as np
11 | import os
12 | import sys
13 | 
14 | from PIL import Image
15 | 
16 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
17 | utils_dir = os.path.join(utils_dir, 'utils')
18 | sys.path.append(utils_dir)
19 | 
20 | import utils
21 | 
22 | 
23 | MAX_WRITERS = 100 # max number of writers per json file.
24 | 
25 | 
26 | def relabel_class(c):
27 |     '''
28 |     maps hexadecimal class value (string) to a decimal number
29 |     returns:
30 |     - 0 through 9 for classes representing respective numbers
31 |     - 10 through 35 for classes representing respective uppercase letters
32 |     - 36 through 61 for classes representing respective lowercase letters
33 |     '''
34 |     if c.isdigit() and int(c) < 40:
35 |         return (int(c) - 30)
36 |     elif int(c, 16) <= 90: # uppercase
37 |         return (int(c, 16) - 55)
38 |     else:
39 |         return (int(c, 16) - 61)
40 | 
41 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
42 | 
43 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
44 | writers = utils.load_obj(ibwd)
45 | 
46 | num_json = int(math.ceil(len(writers) / MAX_WRITERS))
47 | 
48 | users = [[] for _ in range(num_json)]
49 | num_samples = [[] for _ in range(num_json)]
50 | user_data = [{} for _ in range(num_json)]
51 | 
52 | writer_count = 0
53 | json_index = 0
54 | for (w, l) in writers:
55 | 
56 |     users[json_index].append(w)
57 |     num_samples[json_index].append(len(l))
58 |     user_data[json_index][w] = {'x': [], 'y': []}
59 | 
60 |     size = 28, 28 # original image size is 128, 128
61 |     for (f, c) in l:
62 |         file_path = os.path.join(parent_path, f)
63 |         img = Image.open(file_path)
64 |         gray = img.convert('L')
65 |         gray.thumbnail(size, Image.ANTIALIAS)
66 |         arr = np.asarray(gray).copy()
67 |         vec = arr.flatten()
68 |         vec = vec / 255 # scale all pixel values to between 0 and 1
69 |         vec = vec.tolist()
70 | 
71 |         nc = relabel_class(c)
72 | 
73 |         user_data[json_index][w]['x'].append(vec)
74 |         user_data[json_index][w]['y'].append(nc)
75 | 
76 |     writer_count += 1
77 |     if writer_count == MAX_WRITERS:
78 | 
79 |         all_data = {}
80 |         all_data['users'] = users[json_index]
81 |         all_data['num_samples'] = num_samples[json_index]
82 |         all_data['user_data'] = user_data[json_index]
83 | 
84 |         file_name = 'all_data_%d.json' % json_index
85 |         file_path = os.path.join(parent_path, 'data', 'all_data', file_name)
86 | 
87 |         print('writing %s' % file_name)
88 | 
89 |         with open(file_path, 'w') as outfile:
90 |             json.dump(all_data, outfile)
91 | 
92 |         writer_count = 0
93 |         json_index += 1
94 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/data_to_json.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # assumes that the script is run in the preprocess folder
 4 | 
 5 | if [ ! -d "../data" ]; then
 6 |   mkdir ../data
 7 | fi
 8 | if [ ! -d "../data/raw_data" ]; then
 9 |   echo "------------------------------"
10 |   echo "downloading data"
11 |   mkdir ../data/raw_data
12 |   ./get_data.sh
13 |   echo "finished downloading data"
14 | fi
15 | 
16 | if [ ! -d "../data/intermediate" ]; then # stores .pkl files during preprocessing
17 |   mkdir ../data/intermediate
18 | fi
19 | 
20 | if [ ! -f ../data/intermediate/class_file_dirs.pkl ]; then
21 |   echo "------------------------------"
22 |   echo "extracting file directories of images"
23 |   python3 get_file_dirs.py
24 |   echo "finished extracting file directories of images"
25 | fi
26 | 
27 | if [ ! -f ../data/intermediate/class_file_hashes.pkl ]; then
28 |   echo "------------------------------"
29 |   echo "calculating image hashes"
30 |   python3 get_hashes.py
31 |   echo "finished calculating image hashes"
32 | fi
33 | 
34 | if [ ! -f ../data/intermediate/write_with_class.pkl ]; then
35 |   echo "------------------------------"
36 |   echo "assigning class labels to write images"
37 |   python3 match_hashes.py
38 |   echo "finished assigning class labels to write images"
39 | fi
40 | 
41 | if [ ! -f ../data/intermediate/images_by_writer.pkl ]; then
42 |   echo "------------------------------"
43 |   echo "grouping images by writer"
44 |   python3 group_by_writer.py
45 |   echo "finished grouping images by writer"
46 | fi
47 | 
48 | if [ ! -d "../data/all_data" ]; then
49 |   mkdir ../data/all_data
50 | fi
51 | if [ ! "$(ls -A ../data/all_data)" ]; then
52 |   echo "------------------------------"
53 |   echo "converting data to .json format"
54 |   python3 data_to_json.py
55 |   echo "finished converting data to .json format"
56 | fi
57 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/get_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # assumes that the script is run in the preprocess folder
 4 | 
 5 | cd ../data/raw_data
 6 | wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip
 7 | wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip
 8 | unzip by_class.zip
 9 | rm by_class.zip
10 | unzip by_write.zip
11 | rm by_write.zip
12 | cd ../../preprocess
13 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/get_file_dirs.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Creates .pkl files for:
 3 | 1. list of directories of every image in 'by_class'
 4 | 2. list of directories of every image in 'by_write'
 5 | the hierarchal structure of the data is as follows:
 6 | - by_class -> classes -> folders containing images -> images
 7 | - by_write -> folders containing writers -> writer -> types of images -> images
 8 | the directories written into the files are of the form 'raw_data/...'
 9 | '''
10 | 
11 | import os
12 | import sys
13 | 
14 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
15 | utils_dir = os.path.join(utils_dir, 'utils')
16 | sys.path.append(utils_dir)
17 | 
18 | import utils
19 | 
20 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
21 | 
22 | class_files = [] # (class, file directory)
23 | write_files = [] # (writer, file directory)
24 | 
25 | class_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_class')
26 | rel_class_dir = os.path.join('data', 'raw_data', 'by_class')
27 | classes = os.listdir(class_dir)
28 | 
29 | for cl in classes:
30 |     cldir = os.path.join(class_dir, cl)
31 |     rel_cldir = os.path.join(rel_class_dir, cl)
32 |     subcls = os.listdir(cldir)
33 | 
34 |     subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))]
35 | 
36 |     for subcl in subcls:
37 |         subcldir = os.path.join(cldir, subcl)
38 |         rel_subcldir = os.path.join(rel_cldir, subcl)
39 |         images = os.listdir(subcldir)
40 |         image_dirs = [os.path.join(rel_subcldir, i) for i in images]
41 | 
42 |         for image_dir in image_dirs:
43 |             class_files.append((cl, image_dir))
44 | 
45 | write_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_write')
46 | rel_write_dir = os.path.join('data', 'raw_data', 'by_write')
47 | write_parts = os.listdir(write_dir)
48 | 
49 | for write_part in write_parts:
50 |     writers_dir = os.path.join(write_dir, write_part)
51 |     rel_writers_dir = os.path.join(rel_write_dir, write_part)
52 |     writers = os.listdir(writers_dir)
53 | 
54 |     for writer in writers:
55 |         writer_dir = os.path.join(writers_dir, writer)
56 |         rel_writer_dir = os.path.join(rel_writers_dir, writer)
57 |         wtypes = os.listdir(writer_dir)
58 | 
59 |         for wtype in wtypes:
60 |             type_dir = os.path.join(writer_dir, wtype)
61 |             rel_type_dir = os.path.join(rel_writer_dir, wtype)
62 |             images = os.listdir(type_dir)
63 |             image_dirs = [os.path.join(rel_type_dir, i) for i in images]
64 | 
65 |             for image_dir in image_dirs:
66 |                 write_files.append((writer, image_dir))
67 | 
68 | utils.save_obj(
69 |     class_files,
70 |     os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs'))
71 | utils.save_obj(
72 |     write_files,
73 |     os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs'))
74 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/get_hashes.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import sys
 4 | 
 5 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | utils_dir = os.path.join(utils_dir, 'utils')
 7 | sys.path.append(utils_dir)
 8 | 
 9 | import utils
10 | 
11 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
12 | 
13 | cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs')
14 | wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs')
15 | class_file_dirs = utils.load_obj(cfd)
16 | write_file_dirs = utils.load_obj(wfd)
17 | 
18 | class_file_hashes = []
19 | write_file_hashes = []
20 | 
21 | count = 0
22 | for tup in class_file_dirs:
23 |     if (count%100000 == 0):
24 |         print('hashed %d class images' % count)
25 | 
26 |     (cclass, cfile) = tup
27 |     file_path = os.path.join(parent_path, cfile)
28 | 
29 |     chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
30 | 
31 |     class_file_hashes.append((cclass, cfile, chash))
32 | 
33 |     count += 1
34 | 
35 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
36 | utils.save_obj(class_file_hashes, cfhd)
37 | 
38 | count = 0
39 | for tup in write_file_dirs:
40 |     if (count%100000 == 0):
41 |         print('hashed %d write images' % count)
42 | 
43 |     (cclass, cfile) = tup
44 |     file_path = os.path.join(parent_path, cfile)
45 | 
46 |     chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
47 | 
48 |     write_file_hashes.append((cclass, cfile, chash))
49 | 
50 |     count += 1
51 | 
52 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
53 | utils.save_obj(write_file_hashes, wfhd)
54 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/group_by_writer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | utils_dir = os.path.join(utils_dir, 'utils')
 6 | sys.path.append(utils_dir)
 7 | 
 8 | import utils
 9 | 
10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 | 
12 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
13 | write_class = utils.load_obj(wwcd)
14 | 
15 | writers = [] # each entry is a (writer, [list of (file, class)]) tuple
16 | cimages = []
17 | (cw, _, _) = write_class[0]
18 | for (w, f, c) in write_class:
19 |     if w != cw:
20 |         writers.append((cw, cimages))
21 |         cw = w
22 |         cimages = [(f, c)]
23 |     cimages.append((f, c))
24 | writers.append((cw, cimages))
25 | 
26 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
27 | utils.save_obj(writers, ibwd)
28 | 


--------------------------------------------------------------------------------
/data/Femnist/preprocess/match_hashes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | utils_dir = os.path.join(utils_dir, 'utils')
 6 | sys.path.append(utils_dir)
 7 | 
 8 | import utils
 9 | 
10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 | 
12 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
13 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
14 | class_file_hashes = utils.load_obj(cfhd) # each elem is (class, file dir, hash)
15 | write_file_hashes = utils.load_obj(wfhd) # each elem is (writer, file dir, hash)
16 | 
17 | class_hash_dict = {}
18 | for i in range(len(class_file_hashes)):
19 |     (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1]
20 |     class_hash_dict[h] = (c, f)
21 | 
22 | write_classes = []
23 | for tup in write_file_hashes:
24 |     (w, f, h) = tup
25 |     write_classes.append((w, f, class_hash_dict[h][0]))
26 | 
27 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
28 | utils.save_obj(write_classes, wwcd)
29 | 


--------------------------------------------------------------------------------
/data/Femnist/stats.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | NAME="nist"
4 | 
5 | cd ../../utils
6 | 
7 | python3 stats.py --name $NAME
8 | 
9 | cd ../data/$NAME


--------------------------------------------------------------------------------
/data/Linear_synthetic/data/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlieDinh/FEDL_pytorch/4db34e5b698d46e2f73b94fb9c0ce00ef9b464f4/data/Linear_synthetic/data/README.md


--------------------------------------------------------------------------------
/data/Linear_synthetic/generate_linear_regession.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import json
  4 | import random
  5 | import os
  6 | np.random.seed(0)
  7 | 
  8 | NUM_USER = 100
  9 | Kappa = 1.4
 10 | Dim = 40 
 11 | Noise = 0.05
 12 | 
 13 | def generate_x(n_samples = 100, dim= 40, kappa= 10):
 14 |     '''Helper function to generate data''' 
 15 | 
 16 |     powers = - np.log(kappa) / np.log(dim) / 2
 17 | 
 18 |     S = np.power(np.arange(dim)+1, powers)
 19 |     X = np.random.randn(n_samples, dim) # Random standard Gaussian data
 20 |     X *= S
 21 |     covarient_matrix = np.cov(X)
 22 |     print("Covarient matrix:",covarient_matrix)                            # Conditioning
 23 |     print("np.diag(S)", np.diag(S))
 24 |     return X, 1, 1/kappa, np.diag(S)
 25 | 
 26 | def generate_linear_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05):
 27 | 
 28 |     '''Helper function to generate data'''
 29 |     # generate power S
 30 |     powers = - np.log(kappa) / np.log(dim) / 2
 31 |     DIM = np.arange(dim)
 32 | 
 33 |     # Covariance matrix for X
 34 |     S = np.power(DIM+1, powers)
 35 | 
 36 |     # Creat list data for all users 
 37 |     X_split = [[] for _ in range(num_users)]  # X for each user
 38 |     y_split = [[] for _ in range(num_users)]  # y for each user
 39 |     samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500
 40 |     indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
 41 |     num_total_samples = indices_per_user[-1]
 42 | 
 43 |     # Create mean of data for each user, each user will have different distribution
 44 |     mean_X = np.array([np.random.randn(dim) for _ in range(num_users)])
 45 | 
 46 | 
 47 |     X_total = np.zeros((num_total_samples, dim))
 48 |     y_total = np.zeros(num_total_samples)
 49 | 
 50 |     for n in range(num_users):
 51 |         # Generate data
 52 |         X_n = np.random.multivariate_normal(mean_X[n], np.diag(S), samples_per_user[n])
 53 |         X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
 54 | 
 55 |     # Normalize all X's using LAMBDA
 56 |     norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
 57 |     X_total /= norm
 58 | 
 59 |     # Generate weights and labels
 60 |     W = np.random.rand(dim)
 61 |     y_total = X_total.dot(W)
 62 |     noise_variance = 0.01
 63 |     y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples)
 64 | 
 65 |     for n in range(num_users):
 66 |         X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :]
 67 |         y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
 68 |         X_split[n] = X_n.tolist()
 69 |         y_split[n] = y_n.tolist()
 70 | 
 71 |         # print("User {} has {} samples.".format(n, samples_per_user[n]))
 72 | 
 73 |     print("=" * 80)
 74 |     print("Generated synthetic data for logistic regression successfully.")
 75 |     print("Summary of the generated data:".format(kappa))
 76 |     print("    Total # users       : {}".format(num_users))
 77 |     print("    Input dimension     : {}".format(dim))
 78 |     print("    rho                 : {}".format(kappa))
 79 |     print("    Total # of samples  : {}".format(num_total_samples))
 80 |     print("    Minimum # of samples: {}".format(np.min(samples_per_user)))
 81 |     print("    Maximum # of samples: {}".format(np.max(samples_per_user)))
 82 |     print("=" * 80)
 83 | 
 84 |     return X_split, y_split
 85 | 
 86 | 
 87 | def save_total_data():
 88 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
 89 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
 90 | 
 91 |     train_path = os.path.join("data", "train", "mytrain.json")
 92 |     test_path = os.path.join("data", "test", "mytest.json")
 93 |     for path in [os.path.join("data", "train"), os.path.join("data", "test")]:
 94 |         if not os.path.exists(path):
 95 |             os.makedirs(path)
 96 | 
 97 |     X, y = generate_linear_data(NUM_USER, Kappa, Dim, Noise)
 98 | 
 99 |     # Create data structure
100 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
101 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
102 | 
103 |     for i in range(NUM_USER):
104 |         uname = 'f_{0:05d}'.format(i)
105 |         combined = list(zip(X[i], y[i]))
106 |         random.shuffle(combined)
107 |         X[i][:], y[i][:] = zip(*combined)
108 |         num_samples = len(X[i])
109 |         train_len = int(0.75 * num_samples)
110 |         test_len = num_samples - train_len
111 |         print("User: ",uname, " Num Sample: ", num_samples )
112 |         train_data['users'].append(uname)
113 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
114 |         train_data['num_samples'].append(train_len)
115 |         test_data['users'].append(uname)
116 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
117 |         test_data['num_samples'].append(test_len)
118 | 
119 |     with open(train_path, 'w') as outfile:
120 |         json.dump(train_data, outfile)
121 |     with open(test_path, 'w') as outfile:
122 |         json.dump(test_data, outfile)
123 |     
124 |     print("=" * 80)
125 |     print("Saved all users' data sucessfully.")
126 |     print("    Train path:", os.path.join(os.curdir, train_path))
127 |     print("    Test path :", os.path.join(os.curdir, test_path))
128 |     print("=" * 80)
129 | 
130 | 
131 | def main():
132 |     #generate_x()
133 |     save_total_data()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     main()
138 | 


--------------------------------------------------------------------------------
/data/Linear_synthetic/generate_linear_regession_updated.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import json
  4 | import random
  5 | import os
  6 | np.random.seed(0)
  7 | 
  8 | NUM_USER = 100
  9 | Kappa = 1.4
 10 | Dim = 40 
 11 | Noise = 0.05
 12 | 
 13 | def generate_x(n_samples = 100, dim= 40, kappa= 10):
 14 |     '''Helper function to generate data''' 
 15 | 
 16 |     powers = - np.log(kappa) / np.log(dim) / 2
 17 | 
 18 |     S = np.power(np.arange(dim)+1, powers)
 19 |     X = np.random.randn(n_samples, dim) # Random standard Gaussian data
 20 |     X *= S
 21 |     covarient_matrix = np.cov(X)
 22 |     print("Covarient matrix:",covarient_matrix)                            # Conditioning
 23 |     print("np.diag(S)", np.diag(S))
 24 |     return X, 1, 1/kappa, np.diag(S)
 25 | 
 26 | def generate_linear_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05):
 27 | 
 28 |     '''Helper function to generate data'''
 29 |     # generate power S
 30 |     powers = - np.log(kappa) / np.log(dim) / 2
 31 |     DIM = np.arange(dim)
 32 | 
 33 |     # Covariance matrix for X
 34 |     S = np.power(DIM+1, powers)
 35 | 
 36 |     # Creat list data for all users 
 37 |     X_split = [[] for _ in range(num_users)]  # X for each user
 38 |     y_split = [[] for _ in range(num_users)]  # y for each user
 39 |     samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500
 40 |     indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
 41 |     num_total_samples = indices_per_user[-1]
 42 | 
 43 |     # Create mean of data for each user, each user will have different distribution
 44 |     sig = np.random.uniform(0.1, 10)
 45 |     mean = np.random.uniform(low=-0.1, high=0.1)
 46 |     cov = np.random.uniform(low=0.0, high=0.01)
 47 |     #print("mean -cov", mean,cov)
 48 |     mean_X = np.random.normal(mean, cov, dim)
 49 | 
 50 |     X_total = np.zeros((num_total_samples, dim))
 51 |     y_total = np.zeros(num_total_samples)
 52 | 
 53 |     for n in range(num_users):
 54 |         # Generate data
 55 |         X_n = np.random.multivariate_normal(mean_X, sig * np.diag(S), samples_per_user[n])
 56 |         X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
 57 | 
 58 |     # Normalize all X's using LAMBDA
 59 |     norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
 60 |     X_total /= norm
 61 | 
 62 |     # Generate weights and labels
 63 |     W = np.random.rand(dim)
 64 |     y_total = X_total.dot(W)
 65 |     noise_variance = 0.01
 66 |     y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples)
 67 | 
 68 |     for n in range(num_users):
 69 |         X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :]
 70 |         y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
 71 |         X_split[n] = X_n.tolist()
 72 |         y_split[n] = y_n.tolist()
 73 | 
 74 |         # print("User {} has {} samples.".format(n, samples_per_user[n]))
 75 | 
 76 |     print("=" * 80)
 77 |     print("Generated synthetic data for logistic regression successfully.")
 78 |     print("Summary of the generated data:".format(kappa))
 79 |     print("    Total # users       : {}".format(num_users))
 80 |     print("    Input dimension     : {}".format(dim))
 81 |     print("    rho                 : {}".format(kappa))
 82 |     print("    Total # of samples  : {}".format(num_total_samples))
 83 |     print("    Minimum # of samples: {}".format(np.min(samples_per_user)))
 84 |     print("    Maximum # of samples: {}".format(np.max(samples_per_user)))
 85 |     print("=" * 80)
 86 | 
 87 |     return X_split, y_split
 88 | 
 89 | 
 90 | def save_total_data():
 91 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
 92 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
 93 | 
 94 |     train_path = os.path.join("data", "train", "mytrain.json")
 95 |     test_path = os.path.join("data", "test", "mytest.json")
 96 |     for path in [os.path.join("data", "train"), os.path.join("data", "test")]:
 97 |         if not os.path.exists(path):
 98 |             os.makedirs(path)
 99 | 
100 |     X, y = generate_linear_data(NUM_USER, Kappa, Dim, Noise)
101 | 
102 |     # Create data structure
103 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
104 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
105 | 
106 |     for i in range(NUM_USER):
107 |         uname = 'f_{0:05d}'.format(i)
108 |         combined = list(zip(X[i], y[i]))
109 |         random.shuffle(combined)
110 |         X[i][:], y[i][:] = zip(*combined)
111 |         num_samples = len(X[i])
112 |         train_len = int(0.75 * num_samples)
113 |         test_len = num_samples - train_len
114 |         print("User: ",uname, " Num Sample: ", num_samples )
115 |         train_data['users'].append(uname)
116 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
117 |         train_data['num_samples'].append(train_len)
118 |         test_data['users'].append(uname)
119 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
120 |         test_data['num_samples'].append(test_len)
121 | 
122 |     with open(train_path, 'w') as outfile:
123 |         json.dump(train_data, outfile)
124 |     with open(test_path, 'w') as outfile:
125 |         json.dump(test_data, outfile)
126 |     
127 |     print("=" * 80)
128 |     print("Saved all users' data sucessfully.")
129 |     print("    Train path:", os.path.join(os.curdir, train_path))
130 |     print("    Test path :", os.path.join(os.curdir, test_path))
131 |     print("=" * 80)
132 | 
133 | 
134 | def main():
135 |     #generate_x()
136 |     save_total_data()
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     main()
141 | 


--------------------------------------------------------------------------------
/data/Linear_synthetic/generate_linear_synthetic_backup.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import math
  3 | import numpy as np
  4 | import os
  5 | import sys
  6 | import random
  7 | from tqdm import trange
  8 | import math
  9 | 
 10 | 
 11 | NUM_USER = 100
 12 | def normalize_data(X):
 13 |     
 14 |     #nomarlize all feature of data between (0 and 1)
 15 |     normX = X - X.min()
 16 |     normX = normX / (X.max() - X.min())
 17 |     #normX = normX*2-1 between (-1 and 1)
 18 | 
 19 |     # nomarlize data with respect to -1 < X.X^T < 1.
 20 |     temp = normX.dot(normX.T)
 21 |     return normX/np.sqrt(temp.max())
 22 | 
 23 | def generate_synthetic(alpha = 0.5, beta = 0.5):
 24 | 
 25 |     # Generate parameters for controlling kappa 
 26 |     dimension = 60
 27 |     NUM_CLASS = 1
 28 |     samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 100
 29 |     print(samples_per_user)
 30 |     num_samples = np.sum(samples_per_user)
 31 | 
 32 |     X_split = [[] for _ in range(NUM_USER)]
 33 |     y_split = [[] for _ in range(NUM_USER)]
 34 | 
 35 |     #### define some eprior ####
 36 |     mean_W = np.random.normal(0, alpha, NUM_USER)
 37 |     mean_b = mean_W
 38 |     B = np.random.normal(0, beta, NUM_USER)
 39 |     mean_x = np.zeros((NUM_USER, dimension))
 40 | 
 41 |     diagonal = np.zeros(dimension)
 42 |     for j in range(dimension):
 43 |         diagonal[j] = np.power((j+1), -1.2)
 44 |     cov_x = np.diag(diagonal)
 45 | 
 46 |     for i in range(NUM_USER):
 47 |         mean_x[i] = np.random.normal(B[i], 1, dimension)
 48 |         print(mean_x[i])
 49 | 
 50 |     for i in range(NUM_USER):
 51 | 
 52 |         W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS))
 53 |         b = np.random.normal(mean_b[i], 1,  NUM_CLASS)
 54 | 
 55 |         xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])
 56 |         nom_xx = normalize_data(xx)
 57 |         yy = np.zeros(samples_per_user[i])
 58 | 
 59 |         for j in range(samples_per_user[i]):
 60 |             yy[j] = np.dot(nom_xx[j], W) + b
 61 | 
 62 |         X_split[i] = nom_xx.tolist()
 63 |         y_split[i] = yy.tolist()
 64 | 
 65 |         print("{}-th users has {} exampls".format(i, len(y_split[i])))
 66 | 
 67 |     return X_split, y_split
 68 | 
 69 | 
 70 | 
 71 | def main():
 72 | 
 73 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 74 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 75 | 
 76 |     train_path = "data/train/mytrain.json"
 77 |     test_path = "data/test/mytest.json"
 78 | 
 79 |     X, y = generate_synthetic(alpha=0.5, beta=0.5) # synthetic (0.5, 0.5)
 80 | 
 81 | 
 82 |     # Create data structure
 83 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 84 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 85 |     
 86 |     for i in trange(NUM_USER, ncols=120):
 87 | 
 88 |         uname = 'f_{0:05d}'.format(i)        
 89 |         combined = list(zip(X[i], y[i]))
 90 |         random.shuffle(combined)
 91 |         X[i][:], y[i][:] = zip(*combined)
 92 |         num_samples = len(X[i])
 93 |         train_len = int(0.75 * num_samples)
 94 |         test_len = num_samples - train_len
 95 |         
 96 |         train_data['users'].append(uname) 
 97 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
 98 |         train_data['num_samples'].append(train_len)
 99 |         test_data['users'].append(uname)
100 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
101 |         test_data['num_samples'].append(test_len)
102 |     
103 | 
104 |     with open(train_path,'w') as outfile:
105 |         json.dump(train_data, outfile)
106 |     with open(test_path, 'w') as outfile:
107 |         json.dump(test_data, outfile)
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     main()
112 | 
113 | 


--------------------------------------------------------------------------------
/data/Linear_synthetic/optimal_solution_finding.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import math
  3 | import numpy as np
  4 | import os
  5 | import sys
  6 | import random
  7 | from tqdm import trange
  8 | import math
  9 | import numpy as np
 10 | from sklearn.linear_model import LinearRegression
 11 | import sklearn as sk
 12 | np.random.seed(0)
 13 | 
 14 | NUM_USER = 100
 15 | 
 16 | def normalize_data(X):
 17 | 
 18 |     #nomarlize all feature of data between (-1 and 1)
 19 |     normX = X - X.min()
 20 |     normX = normX / (X.max() - X.min())
 21 | 
 22 |     # nomarlize data with respect to -1 < X.X^T < 1.
 23 |     temp = normX.dot(normX.T)
 24 |     return normX/np.sqrt(temp.max())
 25 | 
 26 | 
 27 | def finding_optimal_synthetic(num_users=100, kappa=10, dim = 40, noise_ratio=0.05):
 28 |     
 29 |     powers = - np.log(kappa) / np.log(dim) / 2
 30 |     DIM = np.arange(dim)
 31 |     S = np.power(DIM+1, powers)
 32 | 
 33 |     # Creat list data for all users 
 34 |     X_split = [[] for _ in range(num_users)]  # X for each user
 35 |     y_split = [[] for _ in range(num_users)]  # y for each user
 36 |     samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500
 37 |     indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
 38 |     num_total_samples = indices_per_user[-1]
 39 | 
 40 |     # Create mean of data for each user, each user will have different distribution
 41 |     mean_X = np.array([np.random.randn(dim) for _ in range(num_users)])
 42 | 
 43 |     # Covariance matrix for X
 44 |     X_total = np.zeros((num_total_samples, dim))
 45 |     y_total = np.zeros(num_total_samples)
 46 | 
 47 |     for n in range(num_users):
 48 |         # Generate data
 49 |         X_n = np.random.multivariate_normal(mean_X[n], np.diag(S), samples_per_user[n])
 50 |         X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
 51 | 
 52 |     # Normalize all X's using LAMBDA
 53 |     norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
 54 |     X_total /= norm
 55 | 
 56 |     # Generate weights and labels
 57 |     W = np.random.rand(dim)
 58 |     y_total = X_total.dot(W)
 59 |     noise_variance = 0.01
 60 |     y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples)
 61 |     
 62 |     for n in range(num_users):
 63 |         X_n = X_total[indices_per_user[n]:indices_per_user[n+1],:]
 64 |         y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
 65 |         X_split[n] = X_n.tolist()
 66 |         y_split[n] = y_n.tolist()
 67 |     
 68 |     # split data to get training data 
 69 |     train_x = []
 70 |     train_y = []
 71 |     test_x = []
 72 |     test_y = []
 73 |     for i in range(NUM_USER):
 74 |         num_samples = len(X_split[i])
 75 |         train_len = int(0.75 * num_samples)
 76 |         test_len = num_samples - train_len
 77 |         train_x.append(X_split[i][:train_len])
 78 |         train_y.append(y_split[i][:train_len])
 79 |         test_x.append(X_split[i][train_len:])
 80 |         test_y.append(y_split[i][train_len:])
 81 | 
 82 |     train_xc = np.concatenate(train_x)
 83 |     train_yc = np.concatenate(train_y)
 84 |     test_xc = np.concatenate(test_x)
 85 |     test_yc = np.concatenate(test_y)
 86 |     
 87 |     # # finding optimal
 88 |     X_X_T = np.zeros(shape=(dim+1,dim+1))
 89 |     X_Y = np.zeros(shape=(dim+1,1))
 90 | 
 91 |     for n in range(num_users):
 92 |         X = np.array(train_x[i])
 93 |         y = np.array(train_y[i])
 94 |         one = np.ones((X.shape[0], 1))
 95 |         Xbar = np.concatenate((one, X), axis = 1)
 96 |         X_X_T += Xbar.T.dot(Xbar)*len(y)/len(train_yc)
 97 |         X_Y += np.array(Xbar).T.dot(y).reshape((dim+1, 1))*len(y)/len(train_yc)
 98 |     
 99 |     # get optimal point.
100 |     w = np.linalg.inv(X_X_T).dot(X_Y)
101 | 
102 |     # caculate loss over all devices
103 |     loss = 0
104 |     for n in range(num_users):
105 |         X = np.array(train_x[i])
106 |         y = np.array(train_y[i])
107 |         one = np.ones((X.shape[0], 1))
108 |         Xbar = np.concatenate((one, X), axis = 1)
109 |         y_predict = Xbar.dot(w)
110 |         loss += sk.metrics.mean_squared_error(y,y_predict)*len(y)/len(train_yc)
111 | 
112 |     return loss
113 | 
114 | def main():
115 |     loss = 0
116 |     loss = finding_optimal_synthetic()
117 |     print("loss for train data", loss)
118 | 
119 | if __name__ == "__main__":
120 |     main()
121 | 
122 | 


--------------------------------------------------------------------------------
/data/Logistic_synthetic/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlieDinh/FEDL_pytorch/4db34e5b698d46e2f73b94fb9c0ce00ef9b464f4/data/Logistic_synthetic/README.md


--------------------------------------------------------------------------------
/data/Logistic_synthetic/logistic_regression.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | import json
  4 | import random
  5 | import os
  6 | 
  7 | 
  8 | def logit(X, W):
  9 |     return 1 / (1 + np.exp(-np.dot(X, W)))
 10 | 
 11 | 
 12 | def generate_logistic_regression_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05):
 13 |     # For consistent results
 14 |     np.random.seed(0)
 15 | 
 16 |     # Sanity check
 17 |     assert(kappa >= 1 and num_users > 0 and dim > 0)
 18 | 
 19 |     X_split = [[] for _ in range(num_users)]  # X for each user
 20 |     y_split = [[] for _ in range(num_users)]  # y for each user
 21 | 
 22 |     # Find users' sample sizes based on the power law (heterogeneity)
 23 |     samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 50 + 10000
 24 |     indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
 25 |     num_total_samples = indices_per_user[-1]
 26 | 
 27 |     # Each user's mean is drawn from N(0, 1) (non-i.i.d. data)
 28 |     mean_X = np.array([np.random.randn(dim) for _ in range(num_users)])
 29 | 
 30 |     # Covariance matrix for X
 31 |     Sigma = np.eye(dim)
 32 | 
 33 |     # L = 1, hyper_learning_rate = LAMBDA
 34 |     LAMBDA = 100 if kappa == 1 else 1 / (kappa - 1)
 35 | 
 36 |     # Keep all users' inputs and labels in one array,
 37 |     # indexed according to indices_per_user.
 38 |     #   (e.g. X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n)
 39 |     #   (e.g. y_total[indices_per_user[n]:indices_per_user[n+1]] = y_n)
 40 |     X_total = np.zeros((num_total_samples, dim))
 41 |     y_total = np.zeros(num_total_samples)
 42 | 
 43 |     for n in range(num_users):
 44 |         # Generate data
 45 |         X_n = np.random.multivariate_normal(mean_X[n], Sigma, samples_per_user[n])
 46 |         X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
 47 | 
 48 |     # Normalize all X's using LAMBDA
 49 |     norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
 50 |     X_total /= norm + LAMBDA
 51 | 
 52 |     # Generate weights and labels
 53 |     W = np.random.rand(dim)
 54 |     y_total = logit(X_total, W)
 55 |     y_total = np.where(y_total > 0.5, 1, 0)
 56 | 
 57 |     # Apply noise: randomly flip some of y_n with probability noise_ratio
 58 |     noise = np.random.binomial(1, noise_ratio, num_total_samples)
 59 |     y_total = np.multiply(noise - y_total, noise) + np.multiply(y_total, 1 - noise)
 60 | 
 61 |     # Save each user's data separately
 62 |     for n in range(num_users):
 63 |         X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :]
 64 |         y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
 65 |         X_split[n] = X_n.tolist()
 66 |         y_split[n] = y_n.tolist()
 67 | 
 68 |         # print("User {} has {} samples.".format(n, samples_per_user[n]))
 69 | 
 70 |     print("=" * 80)
 71 |     print("Generated synthetic data for logistic regression successfully.")
 72 |     print("Summary of the generated data:".format(kappa))
 73 |     print("    Total # users       : {}".format(num_users))
 74 |     print("    Input dimension     : {}".format(dim))
 75 |     print("    rho                 : {}".format(kappa))
 76 |     print("    Total # of samples  : {}".format(num_total_samples))
 77 |     print("    Minimum # of samples: {}".format(np.min(samples_per_user)))
 78 |     print("    Maximum # of samples: {}".format(np.max(samples_per_user)))
 79 |     print("=" * 80)
 80 | 
 81 |     return X_split, y_split
 82 | 
 83 | 
 84 | def save_total_data():
 85 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
 86 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
 87 | 
 88 |     train_path = os.path.join("data", "train", "mytrain.json")
 89 |     test_path = os.path.join("data", "test", "mytest.json")
 90 |     for path in [os.path.join("data", "train"), os.path.join("data", "test")]:
 91 |         if not os.path.exists(path):
 92 |             os.makedirs(path)
 93 | 
 94 |     X, y = generate_logistic_regression_data(100, 2, 40, 0.05)
 95 | 
 96 |     # Create data structure
 97 |     train_data = {'users': [], 'user_data': {}, 'num_samples': []}
 98 |     test_data = {'users': [], 'user_data': {}, 'num_samples': []}
 99 | 
100 |     for i in range(100):
101 |         uname = 'f_{0:05d}'.format(i)
102 |         combined = list(zip(X[i], y[i]))
103 |         random.shuffle(combined)
104 |         X[i][:], y[i][:] = zip(*combined)
105 |         num_samples = len(X[i])
106 |         train_len = int(0.75 * num_samples)
107 |         test_len = num_samples - train_len
108 |         print("User: ",uname, " Num Sample: ", num_samples )
109 |         train_data['users'].append(uname)
110 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 |         train_data['num_samples'].append(train_len)
112 |         test_data['users'].append(uname)
113 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 |         test_data['num_samples'].append(test_len)
115 | 
116 |     with open(train_path, 'w') as outfile:
117 |         json.dump(train_data, outfile)
118 |     with open(test_path, 'w') as outfile:
119 |         json.dump(test_data, outfile)
120 |     
121 |     print("=" * 80)
122 |     print("Saved all users' data sucessfully.")
123 |     print("    Train path:", os.path.join(os.curdir, train_path))
124 |     print("    Test path :", os.path.join(os.curdir, test_path))
125 |     print("=" * 80)
126 | 
127 | 
128 | def main():
129 |     save_total_data()
130 |     #save_data_by_user()
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     main()
135 | 


--------------------------------------------------------------------------------
/data/Mnist/data/mldata/mnist-original.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlieDinh/FEDL_pytorch/4db34e5b698d46e2f73b94fb9c0ce00ef9b464f4/data/Mnist/data/mldata/mnist-original.mat


--------------------------------------------------------------------------------
/data/Mnist/generate_iid_20users.py:
--------------------------------------------------------------------------------
  1 | from sklearn.datasets import fetch_mldata
  2 | from tqdm import trange
  3 | import numpy as np
  4 | import random
  5 | import json
  6 | import os
  7 | 
  8 | random.seed(1)
  9 | np.random.seed(1)
 10 | NUM_USERS = 20 # should be muitiple of 10
 11 | NUM_LABELS = 10
 12 | # Setup directory for train/test data
 13 | train_path = './data/train/mnist_train.json'
 14 | test_path = './data/test/mnist_test.json'
 15 | dir_path = os.path.dirname(train_path)
 16 | if not os.path.exists(dir_path):
 17 |     os.makedirs(dir_path)
 18 | dir_path = os.path.dirname(test_path)
 19 | if not os.path.exists(dir_path):
 20 |     os.makedirs(dir_path)
 21 | 
 22 | # Get MNIST data, normalize, and divide by level
 23 | mnist = fetch_mldata('MNIST original', data_home='./data')
 24 | mu = np.mean(mnist.data.astype(np.float32), 0)
 25 | sigma = np.std(mnist.data.astype(np.float32), 0)
 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
 27 | mnist_data = []
 28 | for i in trange(10):
 29 |     idx = mnist.target==i
 30 |     mnist_data.append(mnist.data[idx])
 31 | 
 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
 33 | users_lables = []
 34 | 
 35 | print("idx",idx)
 36 | # devide for label for each users:
 37 | for user in trange(NUM_USERS):
 38 |     for j in range(NUM_LABELS):  # 4 labels for each users
 39 |         l = (user + j) % 10
 40 |         users_lables.append(l)
 41 | unique, counts = np.unique(users_lables, return_counts=True)
 42 | print("--------------")
 43 | print(unique, counts)
 44 | 
 45 | def ram_dom_gen(total, size):
 46 |     print(total)
 47 |     nums = []
 48 |     temp = []
 49 |     for i in range(size - 1):
 50 |         val = np.random.randint(total//(size + 1), total//(size - 8))
 51 |         temp.append(val)
 52 |         total -= val
 53 |     temp.append(total)
 54 |     print(temp)
 55 |     return temp
 56 | number_sample = []
 57 | for total_value, count in zip(mnist_data, counts):
 58 |     temp = ram_dom_gen(len(total_value), count)
 59 |     number_sample.append(temp)
 60 | print("--------------")
 61 | print(number_sample)
 62 | 
 63 | i = 0
 64 | number_samples = []
 65 | for i in range(len(number_sample[0])):
 66 |     for sample in number_sample:
 67 |         print(sample)
 68 |         number_samples.append(sample[i])
 69 | 
 70 | print("--------------")
 71 | print(number_samples)
 72 | 
 73 | ###### CREATE USER DATA SPLIT #######
 74 | # Assign 100 samples to each user
 75 | X = [[] for _ in range(NUM_USERS)]
 76 | y = [[] for _ in range(NUM_USERS)]
 77 | count = 0
 78 | for user in trange(NUM_USERS):
 79 |     for j in range(NUM_LABELS):  # 4 labels for each users
 80 |         l = (user + j) % 10
 81 |         print("value of L",l)
 82 |         print("value of count",count)
 83 |         num_samples =  number_samples[count] # num sample
 84 |         count = count + 1
 85 |         if idx[l] + num_samples < len(mnist_data[l]):
 86 |             X[user] += mnist_data[l][idx[l]:num_samples].tolist()
 87 |             y[user] += (l*np.ones(num_samples)).tolist()
 88 |             idx[l] += num_samples
 89 |             print("check len os user:", user, j,"len data", len(X[user]), num_samples)
 90 | 
 91 | print("IDX2:", idx) # counting samples for each labels
 92 | 
 93 | # Create data structure
 94 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 95 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 96 | 
 97 | # Setup 5 users
 98 | # for i in trange(5, ncols=120):
 99 | for i in range(NUM_USERS):
100 |     uname = 'f_{0:05d}'.format(i)
101 |     
102 |     combined = list(zip(X[i], y[i]))
103 |     random.shuffle(combined)
104 |     X[i][:], y[i][:] = zip(*combined)
105 |     num_samples = len(X[i])
106 |     train_len = int(0.75*num_samples)
107 |     test_len = num_samples - train_len
108 |     
109 |     train_data['users'].append(uname) 
110 |     train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 |     train_data['num_samples'].append(train_len)
112 |     test_data['users'].append(uname)
113 |     test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 |     test_data['num_samples'].append(test_len)
115 | 
116 | print("Num_samples:", train_data['num_samples'])
117 | print("Total_samples:",sum(train_data['num_samples'] + test_data['num_samples']))
118 |     
119 | with open(train_path,'w') as outfile:
120 |     json.dump(train_data, outfile)
121 | with open(test_path, 'w') as outfile:
122 |     json.dump(test_data, outfile)
123 | 
124 | print("Finish Generating Samples")
125 | 


--------------------------------------------------------------------------------
/data/Mnist/generate_niid_100users_updated.py:
--------------------------------------------------------------------------------
  1 | from sklearn.datasets import fetch_mldata
  2 | from tqdm import trange
  3 | import numpy as np
  4 | import random
  5 | import json
  6 | import os
  7 | 
  8 | random.seed(1)
  9 | np.random.seed(1)
 10 | NUM_USERS = 100  
 11 | NUM_LABELS = 3
 12 | # Setup directory for train/test data
 13 | train_path = './data/train/mnist_train.json'
 14 | test_path = './data/test/mnist_test.json'
 15 | dir_path = os.path.dirname(train_path)
 16 | if not os.path.exists(dir_path):
 17 |     os.makedirs(dir_path)
 18 | dir_path = os.path.dirname(test_path)
 19 | if not os.path.exists(dir_path):
 20 |     os.makedirs(dir_path)
 21 | 
 22 | # Get MNIST data, normalize, and divide by level
 23 | mnist = fetch_mldata('MNIST original', data_home='./data')
 24 | mu = np.mean(mnist.data.astype(np.float32), 0)
 25 | sigma = np.std(mnist.data.astype(np.float32), 0)
 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
 27 | mnist_data = []
 28 | for i in trange(10):
 29 |     idx = mnist.target==i
 30 |     mnist_data.append(mnist.data[idx])
 31 | 
 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
 33 | 
 34 | ###### CREATE USER DATA SPLIT #######
 35 | # Assign 100 samples to each user
 36 | X = [[] for _ in range(NUM_USERS)]
 37 | y = [[] for _ in range(NUM_USERS)]
 38 | idx = np.zeros(10, dtype=np.int64)
 39 | for user in range(NUM_USERS):
 40 |     for j in range(NUM_LABELS):  # 3 labels for each users
 41 |         #l = (2*user+j)%10
 42 |         l = (user + j) % 10
 43 |         print("L:", l)
 44 |         X[user] += mnist_data[l][idx[l]:idx[l]+10].tolist()
 45 |         y[user] += (l*np.ones(10)).tolist()
 46 |         idx[l] += 10
 47 | 
 48 | print("IDX1:", idx)  # counting samples for each labels
 49 | 
 50 | # Assign remaining sample by power law
 51 | user = 0
 52 | props = np.random.lognormal(
 53 |     0, 2., (10, NUM_USERS, NUM_LABELS))  # last 5 is 5 labels
 54 | props = np.array([[[len(v)-1000]] for v in mnist_data]) * \
 55 |     props/np.sum(props, (1, 2), keepdims=True)
 56 | # print("here:",props/np.sum(props,(1,2), keepdims=True))
 57 | #props = np.array([[[len(v)-100]] for v in mnist_data]) * \
 58 | #    props/np.sum(props, (1, 2), keepdims=True)
 59 | #idx = 1000*np.ones(10, dtype=np.int64)
 60 | # print("here2:",props)
 61 | for user in trange(NUM_USERS):
 62 |     for j in range(NUM_LABELS):  # 4 labels for each users
 63 |         # l = (2*user+j)%10
 64 |         l = (user + j) % 10
 65 |         num_samples = int(props[l, user//int(NUM_USERS/10), j])
 66 |         numran1 = random.randint(10, 200)
 67 |         numran2 = random.randint(1, 10)
 68 |         num_samples = (num_samples) * numran2 + numran1
 69 |         if(NUM_USERS <= 20):
 70 |             num_samples = num_samples * 2
 71 |         if idx[l] + num_samples < len(mnist_data[l]):
 72 |             X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist()
 73 |             y[user] += (l*np.ones(num_samples)).tolist()
 74 |             idx[l] += num_samples
 75 |             print("check len os user:", user, j,
 76 |                   "len data", len(X[user]), num_samples)
 77 | 
 78 | print("IDX2:", idx) # counting samples for each labels
 79 | 
 80 | # Create data structure
 81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 83 | 
 84 | # Setup 5 users
 85 | # for i in trange(5, ncols=120):
 86 | for i in range(NUM_USERS):
 87 |     uname = 'f_{0:05d}'.format(i)
 88 |     
 89 |     combined = list(zip(X[i], y[i]))
 90 |     random.shuffle(combined)
 91 |     X[i][:], y[i][:] = zip(*combined)
 92 |     num_samples = len(X[i])
 93 |     train_len = int(0.75*num_samples)
 94 |     test_len = num_samples - train_len
 95 |     
 96 |     train_data['users'].append(uname) 
 97 |     train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
 98 |     train_data['num_samples'].append(train_len)
 99 |     test_data['users'].append(uname)
100 |     test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
101 |     test_data['num_samples'].append(test_len)
102 | 
103 | print("Num_samples:", train_data['num_samples'])
104 | print("Total_samples:",sum(train_data['num_samples']))
105 |     
106 | with open(train_path,'w') as outfile:
107 |     json.dump(train_data, outfile)
108 | with open(test_path, 'w') as outfile:
109 |     json.dump(test_data, outfile)
110 | 
111 | print("Finish Generating Samples")
112 | 


--------------------------------------------------------------------------------
/data/Mnist/generate_niid_20users.py:
--------------------------------------------------------------------------------
  1 | from sklearn.datasets import fetch_openml
  2 | from tqdm import trange
  3 | import numpy as np
  4 | import random
  5 | import json
  6 | import os
  7 | 
  8 | random.seed(1)
  9 | np.random.seed(1)
 10 | NUM_USERS = 20 # should be muitiple of 10
 11 | NUM_LABELS = 2
 12 | # Setup directory for train/test data
 13 | train_path = './data/train/mnist_train.json'
 14 | test_path = './data/test/mnist_test.json'
 15 | dir_path = os.path.dirname(train_path)
 16 | if not os.path.exists(dir_path):
 17 |     os.makedirs(dir_path)
 18 | dir_path = os.path.dirname(test_path)
 19 | if not os.path.exists(dir_path):
 20 |     os.makedirs(dir_path)
 21 | 
 22 | # Get MNIST data, normalize, and divide by level
 23 | mnist = fetch_openml('mnist_784', data_home='./data')
 24 | mu = np.mean(mnist.data.astype(np.float32), 0)
 25 | sigma = np.std(mnist.data.astype(np.float32), 0)
 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
 27 | mnist_data = []
 28 | for i in trange(10):
 29 |     idx = mnist.target==str(i)
 30 |     mnist_data.append(mnist.data[idx])
 31 | 
 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
 33 | users_lables = []
 34 | 
 35 | print("idx",idx)
 36 | # devide for label for each users:
 37 | for user in trange(NUM_USERS):
 38 |     for j in range(NUM_LABELS):  # 4 labels for each users
 39 |         l = (user + j) % 10
 40 |         users_lables.append(l)
 41 | unique, counts = np.unique(users_lables, return_counts=True)
 42 | print("--------------")
 43 | print(unique, counts)
 44 | 
 45 | def ram_dom_gen(total, size):
 46 |     print(total)
 47 |     nums = []
 48 |     temp = []
 49 |     for i in range(size - 1):
 50 |         val = np.random.randint(total//(size + 1), total//2)
 51 |         temp.append(val)
 52 |         total -= val
 53 |     temp.append(total)
 54 |     print(temp)
 55 |     return temp
 56 | number_sample = []
 57 | for total_value, count in zip(mnist_data, counts):
 58 |     temp = ram_dom_gen(len(total_value), count)
 59 |     number_sample.append(temp)
 60 | print("--------------")
 61 | print(number_sample)
 62 | 
 63 | i = 0
 64 | number_samples = []
 65 | for i in range(len(number_sample[0])):
 66 |     for sample in number_sample:
 67 |         print(sample)
 68 |         number_samples.append(sample[i])
 69 | 
 70 | print("--------------")
 71 | print(number_samples)
 72 | 
 73 | ###### CREATE USER DATA SPLIT #######
 74 | # Assign 100 samples to each user
 75 | X = [[] for _ in range(NUM_USERS)]
 76 | y = [[] for _ in range(NUM_USERS)]
 77 | count = 0
 78 | for user in trange(NUM_USERS):
 79 |     for j in range(NUM_LABELS):  # 4 labels for each users
 80 |         l = (user + j) % 10
 81 |         print("value of L",l)
 82 |         print("value of count",count)
 83 |         num_samples =  number_samples[count] # num sample
 84 |         count = count + 1
 85 |         if idx[l] + num_samples < len(mnist_data[l]):
 86 |             X[user] += mnist_data[l][idx[l]:num_samples].tolist()
 87 |             y[user] += (l*np.ones(num_samples)).tolist()
 88 |             idx[l] += num_samples
 89 |             print("check len os user:", user, j,"len data", len(X[user]), num_samples)
 90 | 
 91 | print("IDX2:", idx) # counting samples for each labels
 92 | 
 93 | # Create data structure
 94 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 95 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 96 | 
 97 | # Setup 5 users
 98 | # for i in trange(5, ncols=120):
 99 | for i in range(NUM_USERS):
100 |     uname = 'f_{0:05d}'.format(i)
101 |     
102 |     combined = list(zip(X[i], y[i]))
103 |     random.shuffle(combined)
104 |     X[i][:], y[i][:] = zip(*combined)
105 |     num_samples = len(X[i])
106 |     train_len = int(0.75*num_samples)
107 |     test_len = num_samples - train_len
108 |     
109 |     train_data['users'].append(uname) 
110 |     train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 |     train_data['num_samples'].append(train_len)
112 |     test_data['users'].append(uname)
113 |     test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 |     test_data['num_samples'].append(test_len)
115 | 
116 | print("Num_samples:", train_data['num_samples'])
117 | print("Total_samples:",sum(train_data['num_samples'] + test_data['num_samples']))
118 |     
119 | with open(train_path,'w') as outfile:
120 |     json.dump(train_data, outfile)
121 | with open(test_path, 'w') as outfile:
122 |     json.dump(test_data, outfile)
123 | 
124 | print("Finish Generating Samples")
125 | 


--------------------------------------------------------------------------------
/data/Mnist/generate_niid_mnist_100users.py:
--------------------------------------------------------------------------------
  1 | from sklearn.datasets import fetch_mldata
  2 | from tqdm import trange
  3 | import numpy as np
  4 | import random
  5 | import json
  6 | import os
  7 | 
  8 | random.seed(1)
  9 | np.random.seed(1)
 10 | NUM_USERS = 100  
 11 | NUM_LABELS = 3
 12 | # Setup directory for train/test data
 13 | train_path = './data/train/mnist_train.json'
 14 | test_path = './data/test/mnist_test.json'
 15 | dir_path = os.path.dirname(train_path)
 16 | if not os.path.exists(dir_path):
 17 |     os.makedirs(dir_path)
 18 | dir_path = os.path.dirname(test_path)
 19 | if not os.path.exists(dir_path):
 20 |     os.makedirs(dir_path)
 21 | 
 22 | # Get MNIST data, normalize, and divide by level
 23 | mnist = fetch_mldata('MNIST original', data_home='./data')
 24 | mu = np.mean(mnist.data.astype(np.float32), 0)
 25 | sigma = np.std(mnist.data.astype(np.float32), 0)
 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
 27 | mnist_data = []
 28 | for i in trange(10):
 29 |     idx = mnist.target==i
 30 |     mnist_data.append(mnist.data[idx])
 31 | 
 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
 33 | 
 34 | ###### CREATE USER DATA SPLIT #######
 35 | # Assign 100 samples to each user
 36 | X = [[] for _ in range(NUM_USERS)]
 37 | y = [[] for _ in range(NUM_USERS)]
 38 | idx = np.zeros(10, dtype=np.int64)
 39 | for user in range(NUM_USERS):
 40 |     for j in range(NUM_LABELS):  # 3 labels for each users
 41 |         #l = (2*user+j)%10
 42 |         l = (user + j) % 10
 43 |         print("L:", l)
 44 |         X[user] += mnist_data[l][idx[l]:idx[l]+10].tolist()
 45 |         y[user] += (l*np.ones(10)).tolist()
 46 |         idx[l] += 10
 47 | 
 48 | print("IDX1:", idx)  # counting samples for each labels
 49 | 
 50 | # Assign remaining sample by power law
 51 | user = 0
 52 | props = np.random.lognormal(
 53 |     0, 2., (10, NUM_USERS, NUM_LABELS))  # last 5 is 5 labels
 54 | props = np.array([[[len(v)-1000]] for v in mnist_data]) * \
 55 |     props/np.sum(props, (1, 2), keepdims=True)
 56 | # print("here:",props/np.sum(props,(1,2), keepdims=True))
 57 | #props = np.array([[[len(v)-100]] for v in mnist_data]) * \
 58 | #    props/np.sum(props, (1, 2), keepdims=True)
 59 | #idx = 1000*np.ones(10, dtype=np.int64)
 60 | # print("here2:",props)
 61 | for user in trange(NUM_USERS):
 62 |     for j in range(NUM_LABELS):  # 4 labels for each users
 63 |         # l = (2*user+j)%10
 64 |         l = (user + j) % 10
 65 |         num_samples = int(props[l, user//int(NUM_USERS/10), j])
 66 |         numran1 = random.randint(10, 200)
 67 |         numran2 = random.randint(1, 10)
 68 |         num_samples = (num_samples) * numran2 + numran1
 69 |         if(NUM_USERS <= 20):
 70 |             num_samples = num_samples * 2
 71 |         if idx[l] + num_samples < len(mnist_data[l]):
 72 |             X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist()
 73 |             y[user] += (l*np.ones(num_samples)).tolist()
 74 |             idx[l] += num_samples
 75 |             print("check len os user:", user, j,
 76 |                   "len data", len(X[user]), num_samples)
 77 | 
 78 | print("IDX2:", idx) # counting samples for each labels
 79 | 
 80 | # Create data structure
 81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 83 | 
 84 | # Setup 5 users
 85 | # for i in trange(5, ncols=120):
 86 | for i in range(NUM_USERS):
 87 |     uname = 'f_{0:05d}'.format(i)
 88 |     
 89 |     combined = list(zip(X[i], y[i]))
 90 |     random.shuffle(combined)
 91 |     X[i][:], y[i][:] = zip(*combined)
 92 |     num_samples = len(X[i])
 93 |     train_len = int(0.75*num_samples)
 94 |     test_len = num_samples - train_len
 95 |     
 96 |     train_data['users'].append(uname) 
 97 |     train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
 98 |     train_data['num_samples'].append(train_len)
 99 |     test_data['users'].append(uname)
100 |     test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
101 |     test_data['num_samples'].append(test_len)
102 | 
103 | print("Num_samples:", train_data['num_samples'])
104 | print("Total_samples:",sum(train_data['num_samples']))
105 |     
106 | with open(train_path,'w') as outfile:
107 |     json.dump(train_data, outfile)
108 | with open(test_path, 'w') as outfile:
109 |     json.dump(test_data, outfile)
110 | 
111 | print("Finish Generating Samples")
112 | 


--------------------------------------------------------------------------------
/flearn/optimizers/fedoptimizer.py:
--------------------------------------------------------------------------------
 1 | from torch.optim import Optimizer
 2 | 
 3 | 
 4 | class MySGD(Optimizer):
 5 |     def __init__(self, params, lr):
 6 |         defaults = dict(lr=lr)
 7 |         super(MySGD, self).__init__(params, defaults)
 8 | 
 9 |     def step(self, closure=None, hyper_learning_rate = 0):
10 |         loss = None
11 |         if closure is not None:
12 |             loss = closure
13 | 
14 |         for group in self.param_groups:
15 |             # print(group)
16 |             for p in group['params']:
17 |                 if p.grad is None:
18 |                     continue
19 |                 d_p = p.grad.data
20 |                 if(hyper_learning_rate != 0):
21 |                     p.data.add_(-hyper_learning_rate, d_p)
22 |                 else:     
23 |                     p.data.add_(-group['lr'], d_p)
24 |         return loss
25 | 
26 | 
27 | class FEDLOptimizer(Optimizer):
28 |     def __init__(self, params, lr = 0.01, hyper_lr = 0.01,  L = 0.1):
29 |         if lr < 0.0:
30 |             raise ValueError("Invalid learning rate: {}".format(lr))
31 |         defaults = dict(lr=lr,hyper_lr= hyper_lr, L = L)
32 |         super(FEDLOptimizer, self).__init__(params, defaults)
33 | 
34 |     def step(self, server_grads, pre_grads, closure=None):
35 |         loss = None
36 |         if closure is not None:
37 |             loss = closure
38 |         for group in self.param_groups:
39 |             for p, server_grad, pre_grad in zip(group['params'],server_grads, pre_grads):
40 |                 if(server_grad.grad != None and pre_grad.grad != None):
41 |                     p.data = p.data - group['lr'] * (p.grad.data + group['hyper_lr'] * server_grad.grad.data - pre_grad.grad.data)
42 |                 else:
43 |                      p.data = p.data - group['lr'] * p.grad.data
44 |         return loss
45 | 
46 | class pFedMeOptimizer(Optimizer):
47 |     def __init__(self, params, lr=0.01, L=0.1 , mu = 0.001):
48 |         #self.local_weight_updated = local_weight # w_i,K
49 |         if lr < 0.0:
50 |             raise ValueError("Invalid learning rate: {}".format(lr))
51 |         defaults = dict(lr=lr, L=L, mu = mu)
52 |         super(pFedMeOptimizer, self).__init__(params, defaults)
53 |     
54 |     def step(self, local_weight_updated, closure=None):
55 |         loss = None
56 |         if closure is not None:
57 |             loss = closure
58 |         weight_update = local_weight_updated.copy()
59 |         for group in self.param_groups:
60 |             for p, localweight in zip( group['params'], weight_update):
61 |                 p.data = p.data - group['lr'] * (p.grad.data + group['L'] * (p.data - localweight.data) + group['mu']*p.data)
62 |         return  group['params'], loss
63 |     
64 |     def update_param(self, local_weight_updated, closure=None):
65 |         loss = None
66 |         if closure is not None:
67 |             loss = closure
68 |         weight_update = local_weight_updated.copy()
69 |         for group in self.param_groups:
70 |             for p, localweight in zip( group['params'], weight_update):
71 |                 p.data = localweight.data
72 |         #return  p.data
73 |         return  group['params']


--------------------------------------------------------------------------------
/flearn/servers/serveravg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | 
 4 | from flearn.users.useravg import UserAVG
 5 | from flearn.servers.serverbase import Server
 6 | from utils.model_utils import read_data, read_user_data
 7 | import numpy as np
 8 | 
 9 | # Implementation for FedAvg Server
10 | 
11 | class FedAvg(Server):
12 |     def __init__(self, dataset,algorithm, model, batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters,
13 |                  local_epochs, optimizer, num_users, rho, times):
14 |         super().__init__(dataset,algorithm, model[0], batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters,
15 |                          local_epochs, optimizer, num_users, rho,  times)
16 | 
17 |         # Initialize data for all  users
18 |         data = read_data(dataset)
19 |         total_users = len(data[0])
20 |         for i in range(total_users):
21 |             id, train , test = read_user_data(i, data, dataset)
22 |             user = UserAVG(id, train, test, model, batch_size, learning_rate,hyper_learning_rate,L, local_epochs, optimizer)
23 |             self.users.append(user)
24 |             self.total_train_samples += user.train_samples
25 |             
26 |         print("Number of users / total users:",num_users, " / " ,total_users)
27 |         print("Finished creating FedAvg server.")
28 | 
29 |     def send_grads(self):
30 |         assert (self.users is not None and len(self.users) > 0)
31 |         grads = []
32 |         for param in self.model.parameters():
33 |             if param.grad is None:
34 |                 grads.append(torch.zeros_like(param.data))
35 |             else:
36 |                 grads.append(param.grad)
37 |         for user in self.users:
38 |             user.set_grads(grads)
39 | 
40 |     def train(self):
41 |         loss = []
42 |         for glob_iter in range(self.num_glob_iters):
43 |             print("-------------Round number: ",glob_iter, " -------------")
44 |             #loss_ = 0
45 |             self.send_parameters()
46 | 
47 |             # Evaluate model each interation
48 |             self.evaluate()
49 | 
50 |             self.selected_users = self.select_users(glob_iter,self.num_users)
51 |             for user in self.selected_users:
52 |                 user.train(self.local_epochs) #* user.train_samples
53 |             self.aggregate_parameters()
54 |             #loss_ /= self.total_train_samples
55 |             #loss.append(loss_)
56 |             #print(loss_)
57 |         #print(loss)
58 |         self.save_results()
59 |         self.save_model()


--------------------------------------------------------------------------------
/flearn/servers/serverbase.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import numpy as np
  4 | import h5py
  5 | from utils.model_utils import Metrics
  6 | import copy
  7 | 
  8 | class Server:
  9 |     def __init__(self, dataset,algorithm, model, batch_size, learning_rate ,hyper_learning_rate, L,
 10 |                  num_glob_iters, local_epochs, optimizer,num_users,rho, times):
 11 | 
 12 |         # Set up the main attributes
 13 |         self.dataset = dataset
 14 |         self.num_glob_iters = num_glob_iters
 15 |         self.local_epochs = local_epochs
 16 |         self.batch_size = batch_size
 17 |         self.learning_rate = learning_rate
 18 |         self.total_train_samples = 0
 19 |         self.model = copy.deepcopy(model)
 20 |         self.users = []
 21 |         self.selected_users = []
 22 |         self.num_users = num_users
 23 |         self.hyper_learning_rate = hyper_learning_rate
 24 |         self.L = L
 25 |         self.algorithm = algorithm
 26 |         self.rs_train_acc, self.rs_train_loss, self.rs_glob_acc= [], [], []
 27 |         self.rho = rho
 28 |         self.times = times
 29 |         
 30 |     def aggregate_grads(self):
 31 |         assert (self.users is not None and len(self.users) > 0)
 32 |         for param in self.model.parameters():
 33 |             param.grad = torch.zeros_like(param.data)
 34 |         for user in self.users:
 35 |             self.add_grad(user, user.train_samples / self.total_train_samples)
 36 |     
 37 |     def send_parameters(self):
 38 |         assert (self.users is not None and len(self.users) > 0)
 39 |         for user in self.users:
 40 |             user.set_parameters(self.model)
 41 | 
 42 |     def add_parameters(self, user, ratio):
 43 |         model = self.model.parameters()
 44 |         for server_param, user_param in zip(self.model.parameters(), user.get_parameters()):
 45 |             server_param.data = server_param.data + user_param.data.clone() * ratio
 46 |             if(user_param.grad != None):
 47 |                 if(server_param.grad == None):
 48 |                     server_param.grad = torch.zeros_like(user_param.grad)
 49 |                 server_param.grad.data = server_param.grad.data + user_param.grad.data.clone() * ratio
 50 | 
 51 |     def aggregate_parameters(self):
 52 |         assert (self.users is not None and len(self.users) > 0)
 53 |         for param in self.model.parameters():
 54 |             param.data = torch.zeros_like(param.data)
 55 |             if(param.grad != None):
 56 |                 param.grad.data = torch.zeros_like(param.grad.data)
 57 |         total_train = 0
 58 |         #if(self.num_users = self.to)
 59 |         for user in self.selected_users:
 60 |             total_train += user.train_samples
 61 |         for user in self.selected_users:
 62 |             self.add_parameters(user, user.train_samples / total_train)
 63 |             #self.add_grad(user, user.train_samples / total_train)
 64 | 
 65 |     def save_model(self):
 66 |         model_path = os.path.join("models", self.dataset)
 67 |         if not os.path.exists(model_path):
 68 |             os.makedirs(model_path)
 69 |         torch.save(self.model, os.path.join(model_path, "server" + ".pt"))
 70 | 
 71 |     def load_model(self):
 72 |         model_path = os.path.join("models", self.dataset, "server" + ".pt")
 73 |         assert (os.path.exists(model_path))
 74 |         self.model = torch.load(model_path)
 75 | 
 76 |     def model_exists(self):
 77 |         return os.path.exists(os.path.join("models", self.dataset, "server" + ".pt"))
 78 |     
 79 |     def select_users(self, round, num_users):
 80 |         if(num_users == len(self.users)):
 81 |             print("All users are selected")
 82 |             return self.users
 83 | 
 84 |         num_users = min(num_users, len(self.users))
 85 |         # fix the list of user consistent
 86 |         np.random.seed(round * (self.times + 1))
 87 |         return np.random.choice(self.users, num_users, replace=False) #, p=pk)
 88 | 
 89 |             
 90 |     # Save loss, accurancy to h5 fiel
 91 |     def save_results(self):
 92 |         alg = self.dataset + "_" + self.algorithm
 93 |         if(self.algorithm == "FEDL"):
 94 |             alg = alg + "_" + str(self.learning_rate) + "_" + str(self.hyper_learning_rate) + "_" + str(self.num_users) + "u" + "_" + str(self.batch_size) + "b" + "_" + str(self.local_epochs)
 95 |         else:
 96 |              alg = alg + "_" + str(self.learning_rate) + "_" + str(self.num_users) + "u" + "_" + str(self.batch_size) + "b" + "_" + str(self.local_epochs)
 97 |         if(self.L > 0):
 98 |             alg = alg + "_" + str(self.L) + "L"
 99 | 
100 |         if(self.rho > 0):
101 |             alg = alg + "_" + str(self.rho) + "p"
102 |         
103 |         alg = alg + "_" + str(self.times)
104 |         if (len(self.rs_glob_acc) != 0 &  len(self.rs_train_acc) & len(self.rs_train_loss)) :
105 |             with h5py.File("./results/" + '{}.h5'.format(alg, self.local_epochs), 'w') as hf:
106 |                 hf.create_dataset('rs_glob_acc', data=self.rs_glob_acc)
107 |                 hf.create_dataset('rs_train_acc', data=self.rs_train_acc)
108 |                 hf.create_dataset('rs_train_loss', data=self.rs_train_loss)
109 |                 hf.close()
110 | 
111 |     def test(self):
112 |         '''tests self.latest_model on given clients
113 |         '''
114 |         num_samples = []
115 |         tot_correct = []
116 |         losses = []
117 |         for c in self.users:
118 |             ct, ns = c.test()
119 |             tot_correct.append(ct*1.0)
120 |             num_samples.append(ns)
121 |         ids = [c.id for c in self.users]
122 | 
123 |         return ids, num_samples, tot_correct
124 | 
125 |     def train_error_and_loss(self):
126 |         num_samples = []
127 |         tot_correct = []
128 |         losses = []
129 |         for c in self.users:
130 |             ct, cl, ns = c.train_error_and_loss() 
131 |             tot_correct.append(ct*1.0)
132 |             num_samples.append(ns)
133 |             losses.append(cl*1.0)
134 |         
135 |         ids = [c.id for c in self.users]
136 |         #groups = [c.group for c in self.clients]
137 | 
138 |         return ids, num_samples, tot_correct, losses
139 | 
140 |     def evaluate(self):
141 |         stats = self.test()  
142 |         stats_train = self.train_error_and_loss()
143 |         glob_acc = np.sum(stats[2])*1.0/np.sum(stats[1])
144 |         train_acc = np.sum(stats_train[2])*1.0/np.sum(stats_train[1])
145 |         # train_loss = np.dot(stats_train[3], stats_train[1])*1.0/np.sum(stats_train[1])
146 |         train_loss = sum([x * y for (x, y) in zip(stats_train[1], stats_train[3])]).item() / np.sum(stats_train[1])
147 |         self.rs_glob_acc.append(glob_acc)
148 |         self.rs_train_acc.append(train_acc)
149 |         self.rs_train_loss.append(train_loss)
150 |         #print("stats_train[1]",stats_train[3][0])
151 |         print("Average Global Accurancy: ", glob_acc)
152 |         print("Average Global Trainning Accurancy: ", train_acc)
153 |         print("Average Global Trainning Loss: ",train_loss)
154 | 


--------------------------------------------------------------------------------
/flearn/servers/serverfedl.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | 
 4 | from flearn.users.userfedl import UserFEDL
 5 | from flearn.servers.serverbase import Server
 6 | from utils.model_utils import read_data, read_user_data
 7 | import numpy as np
 8 | 
 9 | # Implementation for FedAvg Server
10 | 
11 | class FEDL(Server):
12 |     def __init__(self, dataset,algorithm, model, batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters,
13 |                  local_epochs, optimizer, num_users,rho, times):
14 |         super().__init__(dataset,algorithm, model[0], batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters,
15 |                          local_epochs, optimizer, num_users,rho, times)
16 | 
17 |         # Initialize data for all  users
18 |         data = read_data(dataset)
19 |         total_users = len(data[0])
20 |         for i in range(total_users):
21 |             id, train , test = read_user_data(i, data, dataset)
22 |             user = UserFEDL(id, train, test, model, batch_size, learning_rate, hyper_learning_rate, L, local_epochs, optimizer)
23 |             self.users.append(user)
24 |             self.total_train_samples += user.train_samples
25 |             
26 |         print("Number of users / total users:",num_users, " / " ,total_users)
27 |         print("Finished creating FEDL server.")
28 | 
29 |     def train(self):
30 |         
31 |         for glob_iter in range(self.num_glob_iters):
32 |             print("-------------Round number: ",glob_iter, " -------------")
33 | 
34 |             self.send_parameters()
35 |             self.evaluate()
36 |             self.selected_users = self.select_users(glob_iter,self.num_users)
37 |             for user in self.selected_users:
38 |                 user.train(self.local_epochs) #* user.train_samples
39 |             self.aggregate_parameters()
40 | 
41 |         self.save_results()
42 |         self.save_model()


--------------------------------------------------------------------------------
/flearn/trainmodel/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Net(nn.Module):
 6 |     def __init__(self):
 7 |         super(Net, self).__init__()
 8 |         self.conv1 = nn.Conv2d(1, 16, 2, 1)
 9 |         self.conv2 = nn.Conv2d(16, 32, 2, 1)
10 |         self.dropout1 = nn.Dropout(0.25)
11 |         self.dropout2 = nn.Dropout(0.5)
12 |         self.fc1 = nn.Linear(18432, 128)
13 |         self.fc2 = nn.Linear(128, 10)
14 | 
15 |     def forward(self, x):
16 |         x = self.conv1(x)
17 |         x = nn.ReLU()(x)
18 |         x = nn.MaxPool2d(2, 1)(x)
19 |         x = self.dropout1(x)
20 |         x = self.conv2(x)
21 |         x = nn.ReLU()(x)
22 |         x = nn.MaxPool2d(2, 1)(x)
23 |         x = self.dropout2(x)
24 |         x = torch.flatten(x, 1)
25 |         x = self.fc1(x)
26 |         x = nn.ReLU()(x)
27 |         x = self.fc2(x)
28 |         output = F.log_softmax(x, dim=1)
29 |         return output
30 | 
31 | class Mclr_Logistic(nn.Module):
32 |     def __init__(self, input_dim = 784, output_dim = 10):
33 |         super(Mclr_Logistic, self).__init__()
34 |         self.fc1 = nn.Linear(input_dim, output_dim)
35 | 
36 |     def forward(self, x):
37 |         x = torch.flatten(x, 1)
38 |         x = self.fc1(x)
39 |         output = F.log_softmax(x, dim=1)
40 |         return output
41 | 
42 | class Mclr_CrossEntropy(nn.Module):
43 |     def __init__(self, input_dim = 784, output_dim = 10):
44 |         super(Mclr_CrossEntropy, self).__init__()
45 |         self.linear = torch.nn.Linear(input_dim, output_dim)
46 | 
47 |     def forward(self, x):
48 |         x = torch.flatten(x, 1)
49 |         outputs = self.linear(x)
50 |         return outputs
51 | 
52 | class DNN(nn.Module):
53 |     def __init__(self, input_dim = 784, mid_dim = 100, output_dim = 10):
54 |         super(DNN, self).__init__()
55 |         # define network layers
56 |         self.fc1 = nn.Linear(input_dim, mid_dim)
57 |         self.fc2 = nn.Linear(mid_dim, output_dim)
58 |         
59 |     def forward(self, x):
60 |         # define forward pass
61 |         x = torch.flatten(x, 1)
62 |         x = F.relu(self.fc1(x))
63 |         x = self.fc2(x)
64 |         x = F.log_softmax(x, dim=1)
65 |         return x
66 | 
67 | class Linear_Regression(nn.Module):
68 |     def __init__(self, input_dim = 60, output_dim = 1):
69 |         super(Linear_Regression, self).__init__()
70 |         self.linear = torch.nn.Linear(input_dim, output_dim)
71 | 
72 |     def forward(self, x):
73 |         x = torch.flatten(x, 1)
74 |         outputs = self.linear(x)
75 |         return outputs
76 | 
77 | class DNN(nn.Module):
78 |     def __init__(self, input_dim = 784, mid_dim = 100, output_dim = 10):
79 |         super(DNN, self).__init__()
80 |         # define network layers
81 |         self.fc1 = nn.Linear(input_dim, mid_dim)
82 |         self.fc2 = nn.Linear(mid_dim, output_dim)
83 |         
84 |     def forward(self, x):
85 |         # define forward pass
86 |         x = torch.flatten(x, 1)
87 |         x = F.relu(self.fc1(x))
88 |         x = self.fc2(x)
89 |         x = F.log_softmax(x, dim=1)
90 |         return x


--------------------------------------------------------------------------------
/flearn/users/useravg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import os
 5 | import json
 6 | from torch.utils.data import DataLoader
 7 | from flearn.users.userbase import User
 8 | from flearn.optimizers.fedoptimizer import *
 9 | # Implementation for FedAvg clients
10 | 
11 | class UserAVG(User):
12 |     def __init__(self, numeric_id, train_data, test_data, model, batch_size, learning_rate, hyper_learning_rate, L,
13 |                  local_epochs, optimizer):
14 |         super().__init__(numeric_id, train_data, test_data, model[0], batch_size, learning_rate, hyper_learning_rate, L,
15 |                          local_epochs)
16 | 
17 |         if(model[1] == "linear"):
18 |             self.loss = nn.MSELoss()
19 |         else:
20 |             self.loss = nn.NLLLoss()
21 | 
22 |         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
23 | 
24 |     def set_grads(self, new_grads):
25 |         if isinstance(new_grads, nn.Parameter):
26 |             for model_grad, new_grad in zip(self.model.parameters(), new_grads):
27 |                 model_grad.data = new_grad.data
28 |         elif isinstance(new_grads, list):
29 |             for idx, model_grad in enumerate(self.model.parameters()):
30 |                 model_grad.data = new_grads[idx]
31 | 
32 | 
33 |     def train(self, epochs):
34 |         self.model.train()
35 |         for epoch in range(1, self.local_epochs + 1):
36 |             self.model.train()
37 |             #loss_per_epoch = 0
38 |             for batch_idx, (X, y) in enumerate(self.trainloader):
39 |                 self.optimizer.zero_grad()
40 |                 output = self.model(X)
41 |                 loss = self.loss(output, y)
42 |                 loss.backward()
43 |                 self.optimizer.step()
44 |             self.clone_model_paramenter(self.model.parameters(), self.local_model)
45 |         return loss
46 | 


--------------------------------------------------------------------------------
/flearn/users/userbase.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import os
  5 | import json
  6 | from torch.utils.data import DataLoader
  7 | import numpy as np
  8 | import copy
  9 | 
 10 | class User:
 11 |     """
 12 |     Base class for users in federated learning.
 13 |     """
 14 |     def __init__(self, id, train_data, test_data, model, batch_size = 0, learning_rate = 0, hyper_learning_rate = 0 , L = 0, local_epochs = 0):
 15 |         # from fedprox
 16 |         self.model = copy.deepcopy(model)
 17 |         self.id = id  # integer
 18 |         self.train_samples = len(train_data)
 19 |         self.test_samples = len(test_data)
 20 |         if(batch_size == 0):
 21 |             self.batch_size = len(train_data)
 22 |         else:
 23 |             self.batch_size = batch_size
 24 |         self.learning_rate = learning_rate
 25 |         self.hyper_learning_rate = hyper_learning_rate
 26 |         self.L = L
 27 |         self.local_epochs = local_epochs
 28 |         self.trainloader = DataLoader(train_data, self.batch_size)
 29 |         self.testloader =  DataLoader(test_data, self.batch_size)
 30 |         self.testloaderfull = DataLoader(test_data, self.test_samples)
 31 |         self.trainloaderfull = DataLoader(train_data, self.train_samples)
 32 |         self.iter_trainloader = iter(self.trainloader)
 33 |         self.iter_testloader = iter(self.testloader)
 34 | 
 35 |         # those parameters are for FEDL.
 36 |         self.local_model = copy.deepcopy(list(self.model.parameters()))
 37 |         self.server_grad    = copy.deepcopy(list(self.model.parameters()))
 38 |         self.pre_local_grad = copy.deepcopy(list(self.model.parameters()))
 39 | 
 40 |     def set_parameters(self, model):
 41 |         for old_param, new_param, local_param in zip(self.model.parameters(), model.parameters(), self.local_model):
 42 |             old_param.data = new_param.data.clone()
 43 |             local_param.data = new_param.data.clone()
 44 |             if(new_param.grad != None):
 45 |                 if(old_param.grad == None):
 46 |                     old_param.grad = torch.zeros_like(new_param.grad)
 47 | 
 48 |                 if(local_param.grad == None):
 49 |                     local_param.grad = torch.zeros_like(new_param.grad)
 50 | 
 51 |                 old_param.grad.data = new_param.grad.data.clone()
 52 |                 local_param.grad.data = new_param.grad.data.clone()
 53 |         #self.local_weight_updated = copy.deepcopy(self.optimizer.param_groups[0]['params'])
 54 | 
 55 |     def get_parameters(self):
 56 |         for param in self.model.parameters():
 57 |             param.detach()
 58 |         return self.model.parameters()
 59 |     
 60 |     def clone_model_paramenter(self, param, clone_param):
 61 |         for param, clone_param in zip(param, clone_param):
 62 |             clone_param.data = param.data.clone()
 63 |             if(param.grad != None):
 64 |                 if(clone_param.grad == None):
 65 |                     clone_param.grad = torch.zeros_like(param.grad)
 66 |                 clone_param.grad.data = param.grad.data.clone()
 67 |                 
 68 |         return clone_param
 69 |     
 70 |     def get_updated_parameters(self):
 71 |         return self.local_weight_updated
 72 |     
 73 |     def update_parameters(self, new_params):
 74 |         for param , new_param in zip(self.model.parameters(), new_params):
 75 |             param.data = new_param.data.clone()
 76 |             param.grad.data = new_param.grad.data.clone()
 77 | 
 78 |     def get_grads(self, grads):
 79 | 
 80 |         self.optimizer.zero_grad()
 81 |         
 82 |         for x, y in self.trainloaderfull:
 83 |             output = self.model(x)
 84 |             loss = self.loss(output, y)
 85 |             loss.backward()
 86 |         self.clone_model_paramenter(self.model.parameters(), grads)
 87 |         #for param, grad in zip(self.model.parameters(), grads):
 88 |         #    if(grad.grad == None):
 89 |         #        grad.grad = torch.zeros_like(param.grad)
 90 |         #    grad.grad.data = param.grad.data.clone()
 91 |         return grads
 92 | 
 93 |     def test(self):
 94 |         self.model.eval()
 95 |         test_acc = 0
 96 |         for x, y in self.testloaderfull:
 97 |             output = self.model(x)
 98 |             test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
 99 |             #@loss += self.loss(output, y)
100 |             #print(self.id + ", Test Accuracy:", test_acc / y.shape[0] )
101 |             #print(self.id + ", Test Loss:", loss)
102 |         return test_acc, y.shape[0]
103 | 
104 |     def train_error_and_loss(self):
105 |         self.model.eval()
106 |         train_acc = 0
107 |         loss = 0
108 |         for x, y in self.trainloaderfull:
109 |             output = self.model(x)
110 |             train_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
111 |             loss += self.loss(output, y)
112 |             #print(self.id + ", Train Accuracy:", train_acc)
113 |             #print(self.id + ", Train Loss:", loss)
114 |         return train_acc, loss , self.train_samples
115 |     
116 |     
117 |     def get_next_train_batch(self):
118 |         try:
119 |             # Samples a new batch for persionalizing
120 |             (X, y) = next(self.iter_trainloader)
121 |         except StopIteration:
122 |             # restart the generator if the previous generator is exhausted.
123 |             self.iter_trainloader = iter(self.trainloader)
124 |             (X, y) = next(self.iter_trainloader)
125 |         return (X, y)
126 |     
127 |     def get_next_test_batch(self):
128 |         try:
129 |             # Samples a new batch for persionalizing
130 |             (X, y) = next(self.iter_testloader)
131 |         except StopIteration:
132 |             # restart the generator if the previous generator is exhausted.
133 |             self.iter_testloader = iter(self.testloader)
134 |             (X, y) = next(self.iter_testloader)
135 |         return (X, y)
136 | 
137 |     def save_model(self):
138 |         model_path = os.path.join("models", self.dataset)
139 |         if not os.path.exists(model_path):
140 |             os.makedirs(model_path)
141 |         torch.save(self.model, os.path.join(model_path, "user_" + self.id + ".pt"))
142 | 
143 |     def load_model(self):
144 |         model_path = os.path.join("models", self.dataset)
145 |         self.model = torch.load(os.path.join(model_path, "server" + ".pt"))
146 |     
147 |     @staticmethod
148 |     def model_exists():
149 |         return os.path.exists(os.path.join("models", "server" + ".pt"))


--------------------------------------------------------------------------------
/flearn/users/userfedl.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import os
 5 | import json
 6 | from torch.utils.data import DataLoader
 7 | from flearn.users.userbase import User
 8 | from flearn.optimizers.fedoptimizer import *
 9 | import copy
10 | # Implementation for FedAvg clients
11 | 
12 | class UserFEDL(User):
13 |     def __init__(self, numeric_id, train_data, test_data, model, batch_size, learning_rate, hyper_learning_rate, L,
14 |                  local_epochs, optimizer):
15 |         super().__init__(numeric_id, train_data, test_data, model[0], batch_size, learning_rate, hyper_learning_rate, L,
16 |                          local_epochs)
17 | 
18 |         if(model[1] == "linear"):
19 |             self.loss = nn.MSELoss()
20 |         else:
21 |             self.loss = nn.NLLLoss()
22 | 
23 |         self.optimizer  = FEDLOptimizer(self.model.parameters(), lr=self.learning_rate, hyper_lr= hyper_learning_rate, L = L)
24 |     
25 |     def get_full_grad(self):
26 |         for X, y in self.trainloaderfull:
27 |             self.model.zero_grad()
28 |             output = self.model(X)
29 |             loss = self.loss(output, y)
30 |             loss.backward()
31 | 
32 |     def set_grads(self, new_grads):
33 |         if isinstance(new_grads, nn.Parameter):
34 |             for model_grad, new_grad in zip(self.model.parameters(), new_grads):
35 |                 model_grad.data = new_grad.data
36 |         elif isinstance(new_grads, list):
37 |             for idx, model_grad in enumerate(self.model.parameters()):
38 |                 model_grad.data = new_grads[idx]
39 | 
40 |     def train(self, epochs):
41 |         self.clone_model_paramenter(self.model.parameters(), self.server_grad)
42 |         self.get_grads(self.pre_local_grad)
43 |         self.model.train()
44 |         for epoch in range(1, self.local_epochs + 1):
45 |             loss_per_epoch = 0
46 |             for batch_idx, (X, y) in enumerate(self.trainloader):
47 |                 self.optimizer.zero_grad()
48 |                 output = self.model(X)
49 |                 loss = self.loss(output, y)
50 |                 loss.backward()
51 |                 self.optimizer.step(self.server_grad, self.pre_local_grad)
52 | 
53 |         self.optimizer.zero_grad()
54 |         self.get_full_grad()
55 |         return loss
56 | 
57 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import h5py
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import argparse
 6 | import importlib
 7 | import random
 8 | import os
 9 | from flearn.servers.serveravg import FedAvg
10 | from flearn.servers.serverfedl import FEDL
11 | from flearn.trainmodel.models import *
12 | from utils.plot_utils import *
13 | import torch
14 | torch.manual_seed(0)
15 | 
16 | def main(dataset, algorithm, model, batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters,
17 |          local_epochs, optimizer, clients_per_round, rho, times):
18 | 
19 |     for i in range(times):
20 |         print("---------------Running time:------------",i)
21 | 
22 |         # Generate model
23 |         if(model == "mclr"): #for Mnist and Femnist datasets
24 |             model = Mclr_Logistic(), model
25 | 
26 |         if(model == "linear"): # For Linear dataset
27 |             model = Linear_Regression(40,1), model
28 |         
29 |         if(model == "dnn"): # for Mnist and Femnist datasets
30 |             model = model = DNN(), model
31 | 
32 |         # select algorithm
33 |         if(algorithm == "FedAvg"):
34 |             server = FedAvg(dataset, algorithm, model, batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters, local_epochs, optimizer, clients_per_round, rho, i)
35 |         
36 |         if(algorithm == "FEDL"):
37 |             server = FEDL(dataset, algorithm, model, batch_size, learning_rate, hyper_learning_rate, L, num_glob_iters, local_epochs, optimizer, clients_per_round, rho, i)
38 |         server.train()
39 |         server.test()
40 | 
41 |     # Average data 
42 |     average_data(num_users=clients_per_round, loc_ep1=local_epochs, Numb_Glob_Iters=num_glob_iters, lamb=L,learning_rate=learning_rate, hyper_learning_rate = hyper_learning_rate, algorithms=algorithm, batch_size=batch_size, dataset=dataset, rho = rho, times = times)
43 | 
44 | if __name__ == "__main__":
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument("--dataset", type=str, default="Mnist", choices=["Mnist", "Femnist", "Linear_synthetic", "Logistic_synthetic"])
47 |     parser.add_argument("--model", type=str, default="mclr", choices=["linear", "mclr", "dnn"])
48 |     parser.add_argument("--batch_size", type=int, default=20)
49 |     parser.add_argument("--learning_rate", type=float, default=0.003, help="Local learning rate")
50 |     parser.add_argument("--hyper_learning_rate", type=float, default = 0, help=" Learning rate of FEDL")
51 |     parser.add_argument("--L", type=int, default=0, help="Regularization term")
52 |     parser.add_argument("--num_global_iters", type=int, default=800)
53 |     parser.add_argument("--local_epochs", type=int, default=20)
54 |     parser.add_argument("--optimizer", type=str, default="SGD")
55 |     parser.add_argument("--algorithm", type=str, default="FEDL",choices=["FEDL", "FedAvg"])
56 |     parser.add_argument("--clients_per_round", type=int, default=10, help="Number of Users per round")
57 |     parser.add_argument("--rho", type=float, default=0, help="Conditon Number")
58 |     parser.add_argument("--times", type=int, default=1, help="running time")
59 |     args = parser.parse_args()
60 | 
61 |     print("=" * 80)
62 |     print("Summary of training process:")
63 |     print("Algorithm: {}".format(args.algorithm))
64 |     print("Batch size: {}".format(args.batch_size))
65 |     print("Learing rate       : {}".format(args.learning_rate))
66 |     print("Hyper learing rate       : {}".format(args.hyper_learning_rate))
67 |     print("Subset of users      : {}".format(args.clients_per_round))
68 |     print("Number of local rounds       : {}".format(args.local_epochs))
69 |     print("Number of global rounds       : {}".format(args.num_global_iters))
70 |     print("Dataset       : {}".format(args.dataset))
71 |     print("Local Model       : {}".format(args.model))
72 |     print("=" * 80)
73 | 
74 |     main(
75 |         dataset=args.dataset,
76 |         algorithm = args.algorithm,
77 |         model=args.model,
78 |         batch_size=args.batch_size,
79 |         learning_rate=args.learning_rate,
80 |         hyper_learning_rate = args.hyper_learning_rate, 
81 |         L = args.L,
82 |         num_glob_iters=args.num_global_iters,
83 |         local_epochs=args.local_epochs,
84 |         optimizer= args.optimizer,
85 |         clients_per_round = args.clients_per_round,
86 |         rho = args.rho,
87 |         times = args.times
88 |         )
89 | 


--------------------------------------------------------------------------------
/plot_femnist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import h5py
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import argparse
 6 | import importlib
 7 | import random
 8 | import os
 9 | from flearn.servers.serveravg import FedAvg
10 | from flearn.servers.serverfedl import FEDL
11 | from flearn.trainmodel.models import *
12 | from utils.plot_utils import *
13 | import torch
14 | torch.manual_seed(0)
15 | 
16 | algorithms_list =  ["FEDL","FedAvg","FEDL", "FEDL","FedAvg","FEDL", "FEDL","FedAvg","FEDL"]
17 | rho = [0,0,0,0,0,0,0,0,0,0,0,0]
18 | lamb_value = [0, 0, 0, 0, 0, 0, 0, 0, 0]
19 | learning_rate = [0.003, 0.003, 0.015, 0.003, 0.003, 0.015, 0.003, 0.003, 0.015]
20 | hyper_learning_rate = [0.2, 0, 0.5, 0.2, 0, 0.5, 0.2, 0, 0.5]
21 | local_ep = [10, 10, 10, 20, 20, 20, 40, 40, 40]
22 | batch_size = [20, 20, 0, 20, 20, 0, 20, 20, 0]
23 | DATA_SET = "Femnist"
24 | number_users = 10
25 | 
26 | plot_summary_nist(num_users=number_users, loc_ep1=local_ep, Numb_Glob_Iters=800, lamb=lamb_value,
27 |                                learning_rate=learning_rate, hyper_learning_rate = hyper_learning_rate, algorithms_list=algorithms_list, batch_size=batch_size, rho = rho, dataset=DATA_SET)
28 | print("-- FINISH -- :",)


--------------------------------------------------------------------------------
/plot_linear.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import h5py
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from utils.plot_utils import *
 6 | import torch
 7 | torch.manual_seed(0)
 8 | 
 9 | algorithms_list = ["FEDL","FEDL","FEDL","FEDL","FEDL","FEDL","FEDL","FEDL","FEDL","FEDL","FEDL","FEDL"]
10 | rho = [1.4, 1.4, 1.4, 1.4,  2 ,2 , 2, 2, 5, 5, 5, 5]
11 | lamb_value = [0, 0, 0, 0,  0, 0, 0, 0 , 0, 0, 0 ,0]
12 | learning_rate = [0.04,0.04,0.04,0.04, 0.04,0.04,0.04,0.04, 0.04,0.04,0.04,0.04]
13 | hyper_learning_rate = [0.01,0.03,0.05,0.07, 0.01,0.03,0.05,0.07, 0.01,0.03,0.05,0.07]
14 | local_ep = [20, 20, 20, 20,  20, 20, 20, 20,  20, 20, 20, 20]
15 | batch_size = [0,0,0,0 ,0,0,0,0, 0,0,0,0]
16 | DATA_SET = "Linear_synthetic"
17 | number_users = 100
18 | 
19 | plot_summary_linear(num_users=number_users, loc_ep1=local_ep, Numb_Glob_Iters=200, lamb=lamb_value, learning_rate=learning_rate, hyper_learning_rate = hyper_learning_rate, algorithms_list=algorithms_list, batch_size=batch_size, rho = rho, dataset=DATA_SET)


--------------------------------------------------------------------------------
/plot_mnist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import h5py
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import argparse
 6 | import importlib
 7 | import random
 8 | import os
 9 | from flearn.servers.serveravg import FedAvg
10 | from flearn.servers.serverfedl import FEDL
11 | from flearn.trainmodel.models import *
12 | from utils.plot_utils import *
13 | import torch
14 | torch.manual_seed(0)
15 | 
16 | algorithms_list = ["FEDL","FedAvg","FEDL","FedAvg","FEDL","FedAvg","FEDL","FEDL"]
17 | rho = [0,0,0,0,0,0,0,0,0,0,0,0,0]
18 | lamb_value = [0, 0, 0, 0, 0, 0,0, 0, 0, 0]
19 | learning_rate = [0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.003]
20 | hyper_learning_rate = [0.2,0,0.2,0,0.2,0,2.0,4.0]
21 | local_ep = [20, 20, 20, 20, 20, 20, 20, 20]
22 | batch_size = [20,20,40,40,0,0,0,0]
23 | DATA_SET = "Mnist"
24 | number_users = 10
25 | plot_summary_mnist(num_users=number_users, loc_ep1=local_ep, Numb_Glob_Iters=800, lamb=lamb_value,learning_rate=learning_rate, hyper_learning_rate = hyper_learning_rate, algorithms_list=algorithms_list, batch_size=batch_size, rho = rho, dataset=DATA_SET)
26 | print("-- FINISH -- :",)
27 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | Pillow
4 | torch
5 | torchvision
6 | matplotlib
7 | tqdm


--------------------------------------------------------------------------------
/results/Mnist_FedAvg_0.005_0.2_15_10u_20b_20_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlieDinh/FEDL_pytorch/4db34e5b698d46e2f73b94fb9c0ce00ef9b464f4/results/Mnist_FedAvg_0.005_0.2_15_10u_20b_20_avg.h5


--------------------------------------------------------------------------------
/results/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlieDinh/FEDL_pytorch/4db34e5b698d46e2f73b94fb9c0ce00ef9b464f4/results/README.md


--------------------------------------------------------------------------------
/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | IMAGE_SIZE = 28
  8 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
  9 | NUM_CHANNELS = 1
 10 | 
 11 | def suffer_data(data):
 12 |     data_x = data['x']
 13 |     data_y = data['y']
 14 |         # randomly shuffle data
 15 |     np.random.seed(100)
 16 |     rng_state = np.random.get_state()
 17 |     np.random.shuffle(data_x)
 18 |     np.random.set_state(rng_state)
 19 |     np.random.shuffle(data_y)
 20 |     return (data_x, data_y)
 21 |     
 22 | def batch_data(data, batch_size):
 23 |     '''
 24 |     data is a dict := {'x': [numpy array], 'y': [numpy array]} (on one client)
 25 |     returns x, y, which are both numpy array of length: batch_size
 26 |     '''
 27 |     data_x = data['x']
 28 |     data_y = data['y']
 29 | 
 30 |     # randomly shuffle data
 31 |     np.random.seed(100)
 32 |     rng_state = np.random.get_state()
 33 |     np.random.shuffle(data_x)
 34 |     np.random.set_state(rng_state)
 35 |     np.random.shuffle(data_y)
 36 | 
 37 |     # loop through mini-batches
 38 |     for i in range(0, len(data_x), batch_size):
 39 |         batched_x = data_x[i:i+batch_size]
 40 |         batched_y = data_y[i:i+batch_size]
 41 |         yield (batched_x, batched_y)
 42 | 
 43 | 
 44 | def get_random_batch_sample(data_x, data_y, batch_size):
 45 |     num_parts = len(data_x)//batch_size + 1
 46 |     if(len(data_x) > batch_size):
 47 |         batch_idx = np.random.choice(list(range(num_parts +1)))
 48 |         sample_index = batch_idx*batch_size
 49 |         if(sample_index + batch_size > len(data_x)):
 50 |             return (data_x[sample_index:], data_y[sample_index:])
 51 |         else:
 52 |             return (data_x[sample_index: sample_index+batch_size], data_y[sample_index: sample_index+batch_size])
 53 |     else:
 54 |         return (data_x,data_y)
 55 | 
 56 | 
 57 | def get_batch_sample(data, batch_size):
 58 |     data_x = data['x']
 59 |     data_y = data['y']
 60 | 
 61 |     np.random.seed(100)
 62 |     rng_state = np.random.get_state()
 63 |     np.random.shuffle(data_x)
 64 |     np.random.set_state(rng_state)
 65 |     np.random.shuffle(data_y)
 66 | 
 67 |     batched_x = data_x[0:batch_size]
 68 |     batched_y = data_y[0:batch_size]
 69 |     return (batched_x, batched_y)
 70 | 
 71 | def read_data(dataset):
 72 |     '''parses data in given train and test data directories
 73 | 
 74 |     assumes:
 75 |     - the data in the input directories are .json files with 
 76 |         keys 'users' and 'user_data'
 77 |     - the set of train set users is the same as the set of test set users
 78 | 
 79 |     Return:
 80 |         clients: list of client ids
 81 |         groups: list of group ids; empty list if none found
 82 |         train_data: dictionary of train data
 83 |         test_data: dictionary of test data
 84 |     '''
 85 |     train_data_dir = os.path.join('data',dataset,'data', 'train')
 86 |     test_data_dir = os.path.join('data',dataset,'data', 'test')
 87 |     clients = []
 88 |     groups = []
 89 |     train_data = {}
 90 |     test_data = {}
 91 | 
 92 |     train_files = os.listdir(train_data_dir)
 93 |     train_files = [f for f in train_files if f.endswith('.json')]
 94 |     for f in train_files:
 95 |         file_path = os.path.join(train_data_dir, f)
 96 |         with open(file_path, 'r') as inf:
 97 |             cdata = json.load(inf)
 98 |         clients.extend(cdata['users'])
 99 |         if 'hierarchies' in cdata:
100 |             groups.extend(cdata['hierarchies'])
101 |         train_data.update(cdata['user_data'])
102 | 
103 |     test_files = os.listdir(test_data_dir)
104 |     test_files = [f for f in test_files if f.endswith('.json')]
105 |     for f in test_files:
106 |         file_path = os.path.join(test_data_dir, f)
107 |         with open(file_path, 'r') as inf:
108 |             cdata = json.load(inf)
109 |         test_data.update(cdata['user_data'])
110 | 
111 |     clients = list(sorted(train_data.keys()))
112 | 
113 |     return clients, groups, train_data, test_data
114 | 
115 | def read_user_data(index,data,dataset):
116 |     id = data[0][index]
117 |     train_data = data[2][id]
118 |     test_data = data[3][id]
119 |     X_train, y_train, X_test, y_test = train_data['x'], train_data['y'], test_data['x'], test_data['y']
120 |     if(dataset == "Mnist"):
121 |         X_train, y_train, X_test, y_test = train_data['x'], train_data['y'], test_data['x'], test_data['y']
122 |         X_train = torch.Tensor(X_train).view(-1, NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE).type(torch.float32)
123 |         y_train = torch.Tensor(y_train).type(torch.int64)
124 |         X_test = torch.Tensor(X_test).view(-1, NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE).type(torch.float32)
125 |         y_test = torch.Tensor(y_test).type(torch.int64)
126 |     elif(dataset == "Linear_synthetic"):
127 |         X_train = torch.Tensor(X_train).type(torch.float32)
128 |         y_train = torch.Tensor(y_train).type(torch.float32).unsqueeze(1)
129 |         X_test = torch.Tensor(X_test).type(torch.float32)
130 |         y_test = torch.Tensor(y_test).type(torch.float32).unsqueeze(1)
131 |         #y_train = torch.flatten(y_train, 1)
132 |         #y_test = torch.flatten(y_test, 1)
133 |         #print(y_test.size(),y_train.size())
134 |     else:
135 |         X_train = torch.Tensor(X_train).type(torch.float32)
136 |         y_train = torch.Tensor(y_train).type(torch.int64)
137 |         X_test = torch.Tensor(X_test).type(torch.float32)
138 |         y_test = torch.Tensor(y_test).type(torch.int64)
139 |     train_data = [(x, y) for x, y in zip(X_train, y_train)]
140 |     test_data = [(x, y) for x, y in zip(X_test, y_test)]
141 |     return id, train_data, test_data
142 | 
143 | class Metrics(object):
144 |     def __init__(self, clients, params):
145 |         self.params = params
146 |         num_rounds = params['num_rounds']
147 |         self.bytes_written = {c.id: [0] * num_rounds for c in clients}
148 |         self.client_computations = {c.id: [0] * num_rounds for c in clients}
149 |         self.bytes_read = {c.id: [0] * num_rounds for c in clients}
150 |         self.accuracies = []
151 |         self.train_accuracies = []
152 | 
153 |     def update(self, rnd, cid, stats):
154 |         bytes_w, comp, bytes_r = stats
155 |         self.bytes_written[cid][rnd] += bytes_w
156 |         self.client_computations[cid][rnd] += comp
157 |         self.bytes_read[cid][rnd] += bytes_r
158 | 
159 |     def write(self):
160 |         metrics = {}
161 |         metrics['dataset'] = self.params['dataset']
162 |         metrics['num_rounds'] = self.params['num_rounds']
163 |         metrics['eval_every'] = self.params['eval_every']
164 |         metrics['learning_rate'] = self.params['learning_rate']
165 |         metrics['mu'] = self.params['mu']
166 |         metrics['num_epochs'] = self.params['num_epochs']
167 |         metrics['batch_size'] = self.params['batch_size']
168 |         metrics['accuracies'] = self.accuracies
169 |         metrics['train_accuracies'] = self.train_accuracies
170 |         metrics['client_computations'] = self.client_computations
171 |         metrics['bytes_written'] = self.bytes_written
172 |         metrics['bytes_read'] = self.bytes_read
173 |         metrics_dir = os.path.join('out', self.params['dataset'], 'metrics_{}_{}_{}_{}_{}.json'.format(
174 |             self.params['seed'], self.params['optimizer'], self.params['learning_rate'], self.params['num_epochs'], self.params['mu']))
175 |         #os.mkdir(os.path.join('out', self.params['dataset']))
176 |         if not os.path.exists('out'):
177 |             os.mkdir('out')
178 |         if not os.path.exists(os.path.join('out', self.params['dataset'])):
179 |             os.mkdir(os.path.join('out', self.params['dataset']))
180 |         with open(metrics_dir, 'w') as ouf:
181 |             json.dump(metrics, ouf)
182 | 


--------------------------------------------------------------------------------
/utils/plot_utils.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import h5py
  3 | import numpy as np
  4 | from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
  5 | plt.rcParams.update({'font.size': 14})
  6 | 
  7 | def simple_read_data(loc_ep, alg):
  8 |     hf = h5py.File("./results/"+'{}_{}.h5'.format(alg, loc_ep), 'r')
  9 |     rs_glob_acc = np.array(hf.get('rs_glob_acc')[:])
 10 |     rs_train_acc = np.array(hf.get('rs_train_acc')[:])
 11 |     rs_train_loss = np.array(hf.get('rs_train_loss')[:])
 12 |     return rs_train_acc, rs_train_loss, rs_glob_acc
 13 | 
 14 | def get_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],hyper_learning_rate=[],algorithms_list=[], batch_size=0, rho=[], dataset=""):
 15 |     Numb_Algs = len(algorithms_list)
 16 |     train_acc = np.zeros((Numb_Algs, Numb_Glob_Iters))
 17 |     train_loss = np.zeros((Numb_Algs, Numb_Glob_Iters))
 18 |     glob_acc = np.zeros((Numb_Algs, Numb_Glob_Iters))
 19 |     algs_lbl = algorithms_list.copy()
 20 |     for i in range(Numb_Algs):
 21 |         if(lamb[i] > 0):
 22 |             algorithms_list[i] = algorithms_list[i] + "_prox_" + str(lamb[i])
 23 |             algs_lbl[i] = algs_lbl[i] + "_prox"
 24 | 
 25 |         string_learning_rate = str(learning_rate[i])
 26 |         
 27 |         if(algorithms_list[i] == "FEDL"):
 28 |             string_learning_rate = string_learning_rate + "_" +str(hyper_learning_rate[i])
 29 |         algorithms_list[i] = algorithms_list[i] + \
 30 |             "_" + string_learning_rate + "_" + str(num_users) + \
 31 |             "u" + "_" + str(batch_size[i]) + "b" + "_" + str(loc_ep1[i])
 32 |         if(rho[i] > 0):
 33 |             algorithms_list[i] += "_" + str(rho[i])+"p"
 34 | 
 35 |         train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(
 36 |             simple_read_data("avg", dataset + "_" + algorithms_list[i]))[:, :Numb_Glob_Iters]
 37 |         algs_lbl[i] = algs_lbl[i]
 38 |     return glob_acc, train_acc, train_loss
 39 | 
 40 | 
 41 | def get_data_label_style(input_data = [], linestyles= [], algs_lbl = [], lamb = [], loc_ep1 = 0, batch_size =0):
 42 |     data, lstyles, labels = [], [], []
 43 |     for i in range(len(algs_lbl)):
 44 |         data.append(input_data[i, ::])
 45 |         lstyles.append(linestyles[i])
 46 |         labels.append(algs_lbl[i]+str(lamb[i])+"_" +
 47 |                       str(loc_ep1[i])+"e" + "_" + str(batch_size[i]) + "b")
 48 | 
 49 |     return data, lstyles, labels
 50 | 
 51 | def average_smooth(data, window_len=10, window='hanning'):
 52 |     results = []
 53 |     if window_len<3:
 54 |         return data
 55 |     for i in range(len(data)):
 56 |         x = data[i]
 57 |         s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
 58 |         #print(len(s))
 59 |         if window == 'flat': #moving average
 60 |             w=np.ones(window_len,'d')
 61 |         else:
 62 |             w=eval('numpy.'+window+'(window_len)')
 63 | 
 64 |         y=np.convolve(w/w.sum(),s,mode='valid')
 65 |         results.append(y[window_len-1:])
 66 |     return np.array(results)
 67 |     
 68 | def plot_summary_one_figure(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho = [], dataset = ""):
 69 |     Numb_Algs = len(algorithms_list)
 70 |     #glob_acc, train_acc, train_loss = get_training_data_value(
 71 |     #    num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset)
 72 |     
 73 |     glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
 74 |     glob_acc =  average_smooth(glob_acc_, window='flat')
 75 |     train_loss = average_smooth(train_loss_, window='flat')
 76 |     train_acc = average_smooth(train_acc_, window='flat')
 77 |     
 78 |     plt.figure(1)
 79 |     MIN = train_loss.min() - 0.001
 80 |     start = 0
 81 |     linestyles = ['-', '--', '-.', ':', '-', '--', '-.', ':', ':']
 82 |     plt.grid(True)
 83 |     for i in range(Numb_Algs):
 84 |         plt.plot(train_acc[i, 1:], linestyle=linestyles[i], label=algorithms_list[i] + str(lamb[i])+ "_"+str(loc_ep1[i])+"e" + "_" + str(batch_size[i]) + "b")
 85 |     plt.legend(loc='lower right')
 86 |     plt.ylabel('Training Accuracy')
 87 |     plt.xlabel('Global rounds ' + '$K_g$')
 88 |     plt.title(dataset.upper())
 89 |     #plt.ylim([0.8, glob_acc.max()])
 90 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight")
 91 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf')
 92 |     plt.figure(2)
 93 | 
 94 |     plt.grid(True)
 95 |     for i in range(Numb_Algs):
 96 |         plt.plot(train_loss[i, start:], linestyle=linestyles[i], label=algorithms_list[i] + str(lamb[i]) +
 97 |                  "_"+str(loc_ep1[i])+"e" + "_" + str(batch_size[i]) + "b")
 98 |         #plt.plot(train_loss1[i, 1:], label=algs_lbl1[i])
 99 |     plt.legend(loc='upper right')
100 |     plt.ylabel('Training Loss')
101 |     plt.xlabel('Global rounds')
102 |     plt.title(dataset.upper())
103 |     #plt.ylim([train_loss.min(), 0.5])
104 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight")
105 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf')
106 |     plt.figure(3)
107 |     plt.grid(True)
108 |     for i in range(Numb_Algs):
109 |         plt.plot(glob_acc[i, start:], linestyle=linestyles[i],
110 |                  label=algorithms_list[i]+str(lamb[i])+"_"+str(loc_ep1[i])+"e" + "_" + str(batch_size[i]) + "b")
111 |         #plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i])
112 |     plt.legend(loc='lower right')
113 |     #plt.ylim([0.6, glob_acc.max()])
114 |     plt.ylabel('Test Accuracy')
115 |     plt.xlabel('Global rounds ')
116 |     plt.title(dataset.upper())
117 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight")
118 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf')
119 | 
120 | def get_max_value_index(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], algorithms_list=[], batch_size=0, dataset=""):
121 |     Numb_Algs = len(algorithms_list)
122 |     glob_acc, train_acc, train_loss = get_training_data_value(
123 |         num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, algorithms_list, batch_size, dataset)
124 |     for i in range(Numb_Algs):
125 |         print("Algorithm: ", algorithms_list[i], "Max testing Accurancy: ", glob_acc[i].max(
126 |         ), "Index: ", np.argmax(glob_acc[i]), "local update:", loc_ep1[i])
127 | 
128 | def plot_summary_mnist(num_users=100, loc_ep1=[], Numb_Glob_Iters=10, lamb=[], learning_rate=[],hyper_learning_rate=[], algorithms_list=[], batch_size=0,rho = [], dataset=""):
129 |     Numb_Algs = len(algorithms_list)
130 | 
131 |     #glob_acc, train_acc, train_loss = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
132 | 
133 |     glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
134 |     glob_acc =  average_smooth(glob_acc_, window='flat')
135 |     train_loss = average_smooth(train_loss_, window='flat')
136 |     train_acc = average_smooth(train_acc_, window='flat')
137 | 
138 |     for i in range(Numb_Algs):
139 |         print(algorithms_list[i], "acc:", glob_acc[i].max())
140 |         print(algorithms_list[i], "loss:", train_loss[i].min())
141 | 
142 |     plt.figure(1)
143 | 
144 |     linestyles = ['-', '--', '-.', ':']
145 |     algs_lbl = ["FEDL",  "FedAvg",
146 |                 "FEDL",  "FedAvg",
147 |                 "FEDL",  "FedAvg",
148 |                 "FEDL",  "FEDL"]
149 | 
150 |     fig = plt.figure(figsize=(12, 4))
151 |     ax = fig.add_subplot(111)    # The big subplot
152 |     ax1 = fig.add_subplot(131)
153 |     ax2 = fig.add_subplot(132)
154 |     ax3 = fig.add_subplot(133)
155 |     ax1.grid(True)
156 |     ax2.grid(True)
157 |     ax3.grid(True)
158 |     #min = train_loss.min()
159 |     min = train_loss.min() - 0.001
160 |     max = 0.46 
161 |     #max = train_loss.max() + 0.01
162 |     num_al = 2
163 | # Turn off axis lines and ticks of the big subplot
164 |     ax.spines['top'].set_color('none')
165 |     ax.spines['bottom'].set_color('none')
166 |     ax.spines['left'].set_color('none')
167 |     ax.spines['right'].set_color('none')
168 |     ax.tick_params(labelcolor='w', top='off',
169 |                    bottom='off', left='off', right='off')
170 |     for i in range(num_al):
171 |         stringbatch = str(batch_size[i])
172 |         if(stringbatch == '0'):
173 |             stringbatch = '$\infty$'
174 |         ax1.plot(train_loss[i, 1:], linestyle=linestyles[i],
175 |                  label=algs_lbl[i] + " : "  + '$B = $' + stringbatch+ ', $\eta = $'+ str(hyper_learning_rate[i]))
176 |         ax1.set_ylim([min, max])
177 |         ax1.legend(loc='upper right', prop={'size': 10})
178 | 
179 |     for i in range(num_al):
180 |         stringbatch = str(batch_size[i+2])
181 |         if(stringbatch == '0'):
182 |             stringbatch = '$\infty$'
183 |         ax2.plot(train_loss[i+num_al, 1:], linestyle=linestyles[i],
184 |                  label=algs_lbl[i + num_al] + " : "  + '$B = $' + stringbatch+ ', $\eta = $'+ str(hyper_learning_rate[i+num_al]))
185 |         ax2.set_ylim([min, max])
186 |         ax2.legend(loc='upper right', prop={'size': 10})
187 | 
188 |     for i in range(4):
189 |         stringbatch = str(batch_size[i+4])
190 |         if(stringbatch == '0'):
191 |             stringbatch = '$\infty$'
192 |         ax3.plot(train_loss[i+num_al*2, 1:], linestyle=linestyles[i],
193 |                  label=algs_lbl[i + num_al*2] + " : "  + '$B = $' + stringbatch+ ', $\eta = $'+ str(hyper_learning_rate[i+num_al*2]))
194 |         ax3.set_ylim([min, max])
195 |         ax3.legend(loc='upper right', prop={'size': 10})
196 | 
197 |     ax.set_title('MNIST', y=1.02)
198 |     ax.set_xlabel('Global rounds ' + '$K_g$')
199 |     ax.set_ylabel('Training Loss', labelpad=15)
200 |     plt.savefig(dataset + str(loc_ep1[1]) +
201 |                 'train_loss.pdf', bbox_inches='tight')
202 |     plt.savefig(dataset + str(loc_ep1[1]) +
203 |                 'train_loss.png', bbox_inches='tight')
204 | 
205 |     fig = plt.figure(figsize=(12, 4))
206 |     ax = fig.add_subplot(111)    # The big subplot
207 |     ax1 = fig.add_subplot(131)
208 |     ax2 = fig.add_subplot(132)
209 |     ax3 = fig.add_subplot(133)
210 |     ax1.grid(True)
211 |     ax2.grid(True)
212 |     ax3.grid(True)
213 |     #min = train_loss.min()
214 |     min = 0.82
215 |     max = glob_acc.max() + 0.001  # train_loss.max() + 0.01
216 |     num_al = 2
217 | # Turn off axis lines and ticks of the big subplot
218 |     ax.spines['top'].set_color('none')
219 |     ax.spines['bottom'].set_color('none')
220 |     ax.spines['left'].set_color('none')
221 |     ax.spines['right'].set_color('none')
222 |     ax.tick_params(labelcolor='w', top='off',
223 |                    bottom='off', left='off', right='off')
224 |     for i in range(num_al):
225 |         stringbatch = str(batch_size[i])
226 |         if(stringbatch == '0'):
227 |             stringbatch = '$\infty$'
228 |         ax1.plot(glob_acc[i, 1:], linestyle=linestyles[i],
229 |                  label=algs_lbl[i] + " : "  + '$B = $' + stringbatch + ', $\eta = $'+ str(hyper_learning_rate[i]))
230 |         ax1.set_ylim([min, max])
231 |         ax1.legend(loc='lower right', prop={'size': 10})
232 | 
233 |     for i in range(num_al):
234 |         stringbatch = str(batch_size[i+2])
235 |         if(stringbatch == '0'):
236 |             stringbatch = '$\infty$'
237 |         ax2.plot(glob_acc[i+num_al, 1:], linestyle=linestyles[i],
238 |                  label=algs_lbl[i + num_al] + " : "  + '$B = $' + stringbatch+ ', $\eta = $'+ str(hyper_learning_rate[i+num_al*1]))
239 |         ax2.set_ylim([min, max])
240 |         ax2.legend(loc='lower right', prop={'size': 10})
241 | 
242 |     for i in range(4):
243 |         stringbatch = str(batch_size[i+4])
244 |         if(stringbatch == '0'):
245 |             stringbatch = '$\infty$'
246 |         ax3.plot(glob_acc[i+num_al*2, 1:], linestyle=linestyles[i],
247 |                  label=algs_lbl[i + num_al*2] + " : " + '$B = $' + stringbatch + ', $\eta = $'+ str(hyper_learning_rate[i+num_al*2]))
248 |         ax3.set_ylim([min, max])
249 |         ax3.legend(loc='lower right', prop={'size': 10})
250 | 
251 |     ax.set_title('MNIST', y=1.02)
252 |     ax.set_xlabel('Global rounds ' + '$K_g$')
253 |     ax.set_ylabel('Testing Accuracy', labelpad=15)
254 |     plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.pdf', bbox_inches='tight')
255 |     plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.png', bbox_inches='tight')
256 | 
257 | 
258 | def plot_summary_nist(num_users=100, loc_ep1=[], Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[], algorithms_list=[], batch_size=0,rho = [], dataset=""):
259 |     Numb_Algs = len(algorithms_list)
260 |     #glob_acc, train_acc, train_loss = get_training_data_value( num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
261 |     glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
262 |     glob_acc =  average_smooth(glob_acc_, window='flat')
263 |     train_loss = average_smooth(train_loss_, window='flat')
264 |     train_acc = average_smooth(train_acc_, window='flat')
265 |     for i in range(Numb_Algs):
266 |         print(algorithms_list[i], "acc:", glob_acc[i].max())
267 |         print(algorithms_list[i], "loss:", train_loss[i].max())
268 |     plt.figure(1)
269 | 
270 |     linestyles = ['-', '--', '-.', ':']
271 |     algs_lbl = ["FEDL","FedAvg", "FEDL",
272 |                 "FEDL", "FedAvg", "FEDL",
273 |                 "FEDL", "FedAvg", "FEDL"]
274 |     fig = plt.figure(figsize=(12, 4))
275 |     
276 |     ax = fig.add_subplot(111)    # The big subplot
277 |     ax1 = fig.add_subplot(131)
278 |     ax2 = fig.add_subplot(132)
279 |     ax3 = fig.add_subplot(133)
280 |     ax1.grid(True)
281 |     ax2.grid(True)
282 |     ax3.grid(True)
283 |     #min = train_loss.min()
284 |     min = train_loss.min() - 0.01
285 |     max = 3  # train_loss.max() + 0.01
286 |     num_al = 3
287 | # Turn off axis lines and ticks of the big subplot
288 |     ax.spines['top'].set_color('none')
289 |     ax.spines['bottom'].set_color('none')
290 |     ax.spines['left'].set_color('none')
291 |     ax.spines['right'].set_color('none')
292 |     ax.tick_params(labelcolor='w', top='off',
293 |                    bottom='off', left='off', right='off')
294 |     for i in range(num_al):
295 |         stringbatch = str(batch_size[i])
296 |         if(stringbatch == '0'):
297 |             stringbatch = '$\infty$'
298 |         ax1.plot(train_loss[i, 1:], linestyle=linestyles[i],
299 |                  label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i]) + ', $K_l = $' + str(loc_ep1[i]))
300 |         ax1.set_ylim([min, max])
301 |         ax1.legend(loc='upper right', prop={'size': 10})
302 | 
303 |     for i in range(num_al):
304 |         stringbatch = str(batch_size[i+num_al])
305 |         if(stringbatch == '0'):
306 |             stringbatch = '$\infty$'
307 |         ax2.plot(train_loss[i+num_al, 1:], linestyle=linestyles[i],
308 |                  label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i+num_al]) + ', $K_l = $' + str(loc_ep1[i+ num_al]))
309 |         ax2.set_ylim([min, max])
310 |         ax2.legend(loc='upper right', prop={'size': 10})
311 | 
312 |     for i in range(num_al):
313 |         stringbatch = str(batch_size[i+num_al*2])
314 |         if(stringbatch == '0'):
315 |             stringbatch = '$\infty$'
316 |         ax3.plot(train_loss[i+num_al*2, 1:], linestyle=linestyles[i],
317 |                  label=algs_lbl[i + num_al*2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i+num_al*2]) + ', $K_l = $' + str(loc_ep1[i + num_al*2]))
318 |         ax3.set_ylim([min, max])
319 |         ax3.legend(loc='upper right', prop={'size': 10})
320 | 
321 |     ax.set_title('FEMNIST', y=1.02)
322 |     ax.set_xlabel('Global rounds ' + '$K_g$')
323 |     ax.set_ylabel('Training Loss', labelpad=15)
324 |     plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches='tight')
325 |     plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.png', bbox_inches='tight')
326 | 
327 |     fig = plt.figure(figsize=(12, 4))
328 |     ax = fig.add_subplot(111)    # The big subplot
329 |     ax1 = fig.add_subplot(131)
330 |     ax2 = fig.add_subplot(132)
331 |     ax3 = fig.add_subplot(133)
332 |     ax1.grid(True)
333 |     ax2.grid(True)
334 |     ax3.grid(True)
335 |     #min = train_loss.min()
336 |     num_al = 3
337 |     min = 0.3
338 |     max = glob_acc.max() + 0.01  # train_loss.max() + 0.01
339 | # Turn off axis lines and ticks of the big subplot
340 |     ax.spines['top'].set_color('none')
341 |     ax.spines['bottom'].set_color('none')
342 |     ax.spines['left'].set_color('none')
343 |     ax.spines['right'].set_color('none')
344 |     ax.tick_params(labelcolor='w', top='off',
345 |                    bottom='off', left='off', right='off')
346 |     for i in range(num_al):
347 |         stringbatch = str(batch_size[i])
348 |         if(stringbatch == '0'):
349 |             stringbatch = '$\infty$'
350 |         ax1.plot(glob_acc[i, 1:], linestyle=linestyles[i],
351 |                  label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i]) + ', $K_l = $' + str(loc_ep1[i]))
352 |         ax1.set_ylim([min, max])
353 |         ax1.legend(loc='lower right', prop={'size': 10})
354 | 
355 |     for i in range(num_al):
356 |         stringbatch = str(batch_size[i+num_al])
357 |         if(stringbatch == '0'):
358 |             stringbatch = '$\infty$'
359 |         ax2.plot(glob_acc[i+num_al, 1:], linestyle=linestyles[i],
360 |                  label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i+num_al*1]) + ', $K_l = $' + str(loc_ep1[i + num_al]))
361 |         ax2.set_ylim([min, max])
362 |         ax2.legend(loc='lower right', prop={'size': 10})
363 | 
364 |     for i in range(num_al):
365 |         stringbatch = str(batch_size[i+num_al*2])
366 |         if(stringbatch == '0'):
367 |             stringbatch = '$\infty$'
368 |         ax3.plot(glob_acc[i+num_al*2, 1:], linestyle=linestyles[i],
369 |                  label=algs_lbl[i + num_al*2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i+num_al*2]) + ', $K_l = $' + str(loc_ep1[i+ 2*num_al]))
370 |         ax3.set_ylim([min, max])
371 |         ax3.legend(loc='lower right', prop={'size': 10})
372 | 
373 |     ax.set_title('FEMNIST', y=1.02)
374 |     ax.set_xlabel('Global rounds ' + '$K_g$')
375 |     ax.set_ylabel('Testing Accuracy', labelpad=15)
376 |     plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.pdf', bbox_inches='tight')
377 |     plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.png', bbox_inches='tight')
378 | 
379 | def plot_summary_linear(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],hyper_learning_rate=[], algorithms_list=[], batch_size=0,rho = [], dataset = ""):
380 | 
381 |     Numb_Algs = len(algorithms_list)
382 |     glob_acc, train_acc, train_loss = get_training_data_value( num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
383 |     for i in range(Numb_Algs):
384 |         print(algorithms_list[i], "loss:", glob_acc[i].max())
385 |     plt.figure(1)
386 | 
387 |     linestyles = ['-', '-', '-', '-']
388 |     markers = ["o","v","s","*","x","P"]
389 |     algs_lbl = ["FEDL","FEDL", "FEDL","FEDL",
390 |                 "FEDL", "FEDL", "FEDL","FEDL",
391 |                 "FEDL", "FEDL", "FEDL","FEDL"]
392 |     fig = plt.figure(figsize=(12, 4))
393 |     ax = fig.add_subplot(111)    # The big subplot
394 |     ax1 = fig.add_subplot(131)
395 |     ax2 = fig.add_subplot(132)
396 |     ax3 = fig.add_subplot(133)
397 |     #min = train_loss.min()
398 |     num_al = 4
399 | # Turn off axis lines and ticks of the big subplot
400 |     ax.spines['top'].set_color('none')
401 |     ax.spines['bottom'].set_color('none')
402 |     ax.spines['left'].set_color('none')
403 |     ax.spines['right'].set_color('none')
404 |     ax.tick_params(labelcolor='w', top='off',
405 |                    bottom='off', left='off', right='off')
406 |     for i in range(num_al):
407 |         ax1.plot(train_loss[i, 1:], linestyle=linestyles[i], label=algs_lbl[i] + ": "+ '$\eta = $' + str(hyper_learning_rate[i]) ,marker = markers[i],markevery=0.4, markersize=5)
408 | 
409 |     ax1.hlines(y=0.035,xmin=0, xmax=200, linestyle='--',label = "optimal solution", color= "m" )
410 |     ax1.legend(loc='upper right', prop={'size': 10})
411 |     ax1.set_ylim([0.02, 0.5])
412 |     ax1.set_title('$\\rho = $' + str(rho[0]))
413 |     ax1.grid(True)
414 |     for i in range(num_al):
415 |         str_rho = ', $\eta  = $' + str(rho[i])
416 |         ax2.plot(train_loss[i+num_al, 1:], linestyle=linestyles[i], label=algs_lbl[i + num_al] + ": "+ '$\eta = $' + str(hyper_learning_rate[i+num_al]) ,marker = markers[i],markevery=0.4, markersize=5)
417 | 
418 |     ax2.hlines(y=0.035,xmin=0, xmax=200, linestyle='--',label = "optimal solution", color= "m" )
419 |     ax2.set_ylim([0.02, 0.5])
420 |     #ax2.legend(loc='upper right')
421 |     ax2.set_title('$\\rho = $' + str(rho[0+ num_al]))
422 |     ax2.grid(True)
423 |     for i in range(num_al):
424 |         str_rho = ', $\rho  = $' + str(rho[i])
425 |         ax3.plot(train_loss[i+num_al*2, 1:], linestyle=linestyles[i], label=algs_lbl[i + num_al*2]  + ": "+  '$\eta = $'  + str(hyper_learning_rate[i+num_al*2]) ,marker = markers[i], markevery=0.4, markersize=5)
426 |         
427 |     ax3.hlines(y=0.035, xmin=0, xmax=200, linestyle='--',
428 |                label="optimal solution", color="m")
429 |     ax3.set_ylim([0.02, 0.5])
430 |     #ax3.legend(loc='upper right')
431 |     ax3.set_title('$\\rho = $' + str(rho[0+ 2*num_al]))
432 |     ax3.grid(True)
433 |     ax.set_title('Synthetic dataset', y=1.1)
434 |     ax.set_xlabel('Global rounds ' + '$K_g$')
435 |     ax.set_ylabel('Training Loss')
436 |     plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches='tight')
437 |     plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.png', bbox_inches='tight')
438 | 
439 | def get_all_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb = 0, learning_rate=0, hyper_learning_rate=0, algorithms="", batch_size=0, dataset="", rho= 0, times = 5):
440 |     train_acc = np.zeros((times, Numb_Glob_Iters))
441 |     train_loss = np.zeros((times, Numb_Glob_Iters))
442 |     glob_acc = np.zeros((times, Numb_Glob_Iters))
443 |     algorithms_list  = [algorithms] * times
444 |     
445 |     for i in range(times):
446 |         if(lamb > 0):
447 |             algorithms_list[i] = algorithms_list[i] + "_prox_" + str(lamb)
448 | 
449 |         string_learning_rate = str(learning_rate)
450 |         
451 |         if(algorithms_list[i] == "FEDL"):
452 |             string_learning_rate = string_learning_rate + "_" +str(hyper_learning_rate)
453 | 
454 |         algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + str(num_users) + "u" + "_" + str(batch_size) + "b" +  "_" + str(loc_ep1)
455 |         
456 |         if(rho > 0):
457 |             algorithms_list[i] += "_" + str(rho) + "p"
458 | 
459 |         train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(
460 |             simple_read_data(str(i) , dataset + "_" + algorithms_list[i]))[:, :Numb_Glob_Iters]
461 | 
462 |     return glob_acc, train_acc, train_loss
463 | 
464 | 
465 | def average_data(num_users, loc_ep1, Numb_Glob_Iters, lamb,learning_rate, hyper_learning_rate, algorithms, batch_size, dataset, rho, times):
466 |     glob_acc, train_acc, train_loss = get_all_training_data_value( num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms, batch_size, dataset, rho, times)
467 |     # store average value to h5 file
468 |     glob_acc_data = np.average(glob_acc, axis=0)
469 |     train_acc_data = np.average(train_acc, axis=0)
470 |     train_loss_data = np.average(train_loss, axis=0)
471 | 
472 |     max_accurancy = []
473 |     for i in range(times):
474 |         max_accurancy.append(glob_acc[i].max())
475 |     print("std:", np.std(max_accurancy))
476 |     print("Mean:", np.mean(max_accurancy))
477 | 
478 |     alg = dataset + "_" + algorithms
479 |     alg += "_" + str(learning_rate)
480 |     
481 |     if(algorithms == "FEDL"):
482 |         alg += "_" + str(hyper_learning_rate)
483 |     
484 |     alg += "_" + str(num_users) + "u" + "_" + str(batch_size) + "b" + "_" + str(loc_ep1)
485 | 
486 |     if(lamb > 0):
487 |         alg += "_" + str(lamb) + "L" 
488 |         
489 |     if(rho > 0):
490 |         alg += "_" + str(rho) + "p"
491 |     
492 |     #alg = alg + "_" + str(learning_rate) + "_" + str(hyper_learning_rate) + "_" + str(lamb) + "_" + str(num_users) + "u" + "_" + str(batch_size) + "b" + "_" + str(loc_ep1)
493 |     alg = alg + "_" + "avg"
494 |     if (len(glob_acc) != 0 &  len(train_acc) & len(train_loss)) :
495 |         with h5py.File("./results/"+'{}.h5'.format(alg,loc_ep1), 'w') as hf:
496 |             hf.create_dataset('rs_glob_acc', data=glob_acc_data)
497 |             hf.create_dataset('rs_train_acc', data=train_acc_data)
498 |             hf.create_dataset('rs_train_loss', data=train_loss_data)
499 |             hf.close()
500 |     return 0
501 | 
502 | def plot_summary_one_mnist(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho = [], dataset = ""):
503 |     Numb_Algs = len(algorithms_list)
504 |     #glob_acc, train_acc, train_loss = get_training_data_value(
505 |     #    num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset)
506 |     
507 |     glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
508 |     glob_acc =  average_smooth(glob_acc_, window='flat')
509 |     train_loss = average_smooth(train_loss_, window='flat')
510 |     train_acc = average_smooth(train_acc_, window='flat')
511 |     
512 |     plt.figure(1)
513 |     MIN = train_loss.min() - 0.001
514 |     start = 0
515 |     linestyles = ['-', '--', '-.', ':']
516 |     markers = ["o","v","s","*","x","P"]
517 |     algs_lbl = ["FEDL","FedAvg","FEDL","FedAvg"]
518 |     plt.grid(True)
519 |     for i in range(Numb_Algs):
520 |         stringbatch = str(batch_size[i])
521 |         if(stringbatch == '0'):
522 |             stringbatch = '$\infty$'
523 |         plt.plot(train_acc[i, 1:], linestyle=linestyles[i],marker = markers[i],label=algs_lbl[i] + " : "  + '$B = $' + stringbatch, markevery=0.4, markersize=5)
524 | 
525 |     plt.legend(loc='lower right')
526 |     plt.ylabel('Training Accuracy')
527 |     plt.xlabel('Global rounds ' + '$K_g$')
528 |     plt.title(dataset.upper())
529 |     plt.ylim([0.85, train_acc.max()])
530 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight")
531 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.pdf', bbox_inches="tight")
532 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf')
533 |     plt.figure(2)
534 | 
535 |     plt.grid(True)
536 |     for i in range(Numb_Algs):
537 |         stringbatch = str(batch_size[i])
538 |         if(stringbatch == '0'):
539 |             stringbatch = '$\infty$'
540 |         plt.plot(train_loss[i, 1:], linestyle=linestyles[i],marker = markers[i],label=algs_lbl[i] + " : "  + '$B = $' + stringbatch, markevery=0.4, markersize=5)
541 |         
542 |         #plt.plot(train_loss1[i, 1:], label=algs_lbl1[i])
543 |     plt.legend(loc='upper right')
544 |     plt.ylabel('Training Loss')
545 |     plt.xlabel('Global rounds')
546 |     plt.title(dataset.upper())
547 |     plt.ylim([train_loss.min() -0.01, 0.7])
548 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight")
549 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches="tight")
550 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf')
551 |     plt.figure(3)
552 |     plt.grid(True)
553 |     for i in range(Numb_Algs):
554 |         stringbatch = str(batch_size[i])
555 |         if(stringbatch == '0'):
556 |             stringbatch = '$\infty$'
557 |         plt.plot(glob_acc[i, 1:], linestyle=linestyles[i],marker = markers[i],label=algs_lbl[i] + " : "  + '$B = $' + stringbatch, markevery=0.4, markersize=5)
558 |         #plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i])
559 |     plt.legend(loc='lower right')
560 |     plt.ylim([0.8, glob_acc.max() + 0.005])
561 |     plt.ylabel('Test Accuracy')
562 |     plt.xlabel('Global rounds ')
563 |     plt.title(dataset.upper())
564 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight")
565 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.pdf', bbox_inches="tight")
566 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf')
567 | 
568 | 
569 | def plot_summary_one_nist(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho = [], dataset = ""):
570 |     Numb_Algs = len(algorithms_list)
571 |     #glob_acc, train_acc, train_loss = get_training_data_value(
572 |     #    num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset)
573 |     
574 |     glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
575 |     glob_acc =  average_smooth(glob_acc_, window='flat')
576 |     train_loss = average_smooth(train_loss_, window='flat')
577 |     train_acc = average_smooth(train_acc_, window='flat')
578 |     
579 |     plt.figure(1)
580 |     MIN = train_loss.min() - 0.001
581 |     start = 0
582 |     linestyles = ['-', '--', '-.', ':']
583 |     markers = ["o","v","s","*","x","P"]
584 |     algs_lbl = ["FEDL","FedAvg","FedAvg"]
585 |     plt.grid(True)
586 |     for i in range(Numb_Algs):
587 |         stringbatch = str(batch_size[i])
588 |         if(stringbatch == '0'):
589 |             stringbatch = '$\infty$'
590 |         plt.plot(train_acc[i, 1:], linestyle=linestyles[i],marker = markers[i],label=algs_lbl[i] + " : "  + '$B = $' + stringbatch, markevery=0.4, markersize=5)
591 | 
592 |     plt.legend(loc='lower right')
593 |     plt.ylabel('Training Accuracy')
594 |     plt.xlabel('Global rounds ' + '$K_g$')
595 |     plt.title('FEMNIST')
596 |     #plt.ylim([0.85, train_acc.max()])
597 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight")
598 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.pdf', bbox_inches="tight")
599 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf')
600 |     plt.figure(2)
601 | 
602 |     plt.grid(True)
603 |     for i in range(Numb_Algs):
604 |         stringbatch = str(batch_size[i])
605 |         if(stringbatch == '0'):
606 |             stringbatch = '$\infty$'
607 |         plt.plot(train_loss[i, 1:], linestyle=linestyles[i],marker = markers[i],label=algs_lbl[i] + " : "  + '$B = $' + stringbatch, markevery=0.4, markersize=5)
608 |         
609 |         #plt.plot(train_loss1[i, 1:], label=algs_lbl1[i])
610 |     plt.legend(loc='upper right')
611 |     plt.ylabel('Training Loss')
612 |     plt.xlabel('Global rounds')
613 |     plt.title('FEMNIST')
614 |     #plt.ylim([train_loss.min(), 0.7])
615 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight")
616 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches="tight")
617 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf')
618 |     plt.figure(3)
619 |     plt.grid(True)
620 |     for i in range(Numb_Algs):
621 |         stringbatch = str(batch_size[i])
622 |         if(stringbatch == '0'):
623 |             stringbatch = '$\infty$'
624 |         plt.plot(glob_acc[i, 1:], linestyle=linestyles[i],marker = markers[i],label=algs_lbl[i] + " : "  + '$B = $' + stringbatch, markevery=0.4, markersize=5)
625 |         #plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i])
626 |     plt.legend(loc='lower right')
627 |     #plt.ylim([0.8, glob_acc.max() + 0.005])
628 |     plt.ylabel('Test Accuracy')
629 |     plt.xlabel('Global rounds ')
630 |     plt.title('FEMNIST')
631 |     #ax.set_title('FEMNIST', y=1.02)
632 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight")
633 |     plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.pdf', bbox_inches="tight")
634 |     #plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf')
635 | 


--------------------------------------------------------------------------------