├── .gitignore
├── LICENSE
├── README.md
├── data
    └── .gitkeep
├── graph_computation
    ├── pagerank.py
    └── transitive_closure.py
├── machine_learning
    ├── k-means.py
    └── logistic_regression.py
├── matrix_computation
    └── matrix_decomposition.py
├── optimization
    ├── asgd.py
    ├── bmuf.py
    ├── easgd.py
    ├── hogwild!.py
    ├── ma.py
    ├── ssgd.py
    └── ssgd_pytorch.py
├── pic
    └── DistributedML-cover.jpeg
├── randomized_algorithm
    └── monte_carlo.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Plot results
 10 | *.png
 11 | 
 12 | # VSCode
 13 | .vscode/
 14 | 
 15 | # OS generated files
 16 | .DS_Store
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | pip-wheel-metadata/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # data
 40 | data/*
 41 | !data/.gitkeep
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .nox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | *.py,cover
 64 | .hypothesis/
 65 | .pytest_cache/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | db.sqlite3
 75 | db.sqlite3-journal
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
108 | __pypackages__/
109 | 
110 | # Celery stuff
111 | celerybeat-schedule
112 | celerybeat.pid
113 | 
114 | # SageMath parsed files
115 | *.sage.py
116 | 
117 | # Environments
118 | .env
119 | .venv
120 | env/
121 | venv/
122 | ENV/
123 | env.bak/
124 | venv.bak/
125 | 
126 | # Spyder project settings
127 | .spyderproject
128 | .spyproject
129 | 
130 | # Rope project settings
131 | .ropeproject
132 | 
133 | # mkdocs documentation
134 | /site
135 | 
136 | # mypy
137 | .mypy_cache/
138 | .dmypy.json
139 | dmypy.json
140 | 
141 | # Pyre type checker
142 | .pyre/
143 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 HongYu Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  * @Descripttion: 
 3 |  * @Version: 1.0
 4 |  * @Author: ZhangHongYu
 5 |  * @Date: 2022-07-02 11:00:07
 6 |  * @LastEditors: ZhangHongYu
 7 |  * @LastEditTime: 2022-07-02 15:15:19
 8 | -->
 9 | <p align="center">
10 | <img src="pic/DistributedML-cover.jpeg" width="300" height="400">
11 | </p>
12 | 
13 | <div align="center">
14 | 
15 | # 分布式机器学习
16 | 📚 *如果船长的最高目标是保住他的船，那么他只能永远待在港口。*
17 | 
18 | [![Open Source Love](https://badges.frapsoft.com/os/v2/open-source.svg?v=103)](https://github.com/orion-orion/Distributed-Algorithm-PySpark)[![](https://img.shields.io/github/license/orion-orion/Distributed-Algorithm-PySpark)](https://github.com/orion-orion/Distributed-Algorithm-PySpark/blob/master/LICENSE)[![](https://img.shields.io/github/stars/orion-orion/Distributed-ML-PySpark?style=social)](https://github.com/orion-orion/Distributed-ML-PySpark) 
19 | <br/>
20 | [![](https://img.shields.io/github/directory-file-count/orion-orion/Distributed-ML-PySpark)](https://github.com/orion-orion/Distributed-ML-PySpark) [![](https://img.shields.io/github/languages/code-size/orion-orion/Distributed-ML-PySpark)](https://github.com/orion-orion/Distributed-ML-PySpark) 
21 | </div>
22 | 
23 | ## 1 简介
24 | 本项目为经典分布式机器学习算法的的PySpark/Pytorch实现, 主要参考了刘铁岩的《分布式机器学习》和[CME 323: Distributed Algorithms and Optimization](https://stanford.edu/~rezab/classes/cme323/S17/)课程。主要内容包括图/矩阵计算（graph/matrix computation）、随机算法、优化（optimization）和机器学习。
25 | 
26 | ## 2 环境依赖
27 | 
28 | 运行以下命令安装环境依赖：
29 | ```
30 | pip install -r requirements.txt
31 | ```
32 | 
33 | 注意我的Python版本是3.8.13，Java版本11.0.15。注意PySpark是运行与Java虚拟机上的，且只支持Java 8/11，请勿使用更高级的版本。这里我使用的是Java 11。运行`java -version`可查看本机Java版本。
34 | ```shell
35 | (base) ➜  ~ java -version 
36 | java version "11.0.15" 2022-04-19 LTS
37 | Java(TM) SE Runtime Environment 18.9 (build 11.0.15+8-LTS-149)
38 | Java HotSpot(TM) 64-Bit Server VM 18.9 (build 11.0.15+8-LTS-149, mixed mode)
39 | ```
40 | 最后，Pytorch的`torch.distributed.rpc`模块只支持Linux操作系统，故务必保证您在Linux操作系统上运行相关代码，否则会报错（参见[GitHub issues: torch.distributed.rpc](https://github.com/iffiX/machin/issues/17)）。
41 | 
42 | ## 3 目录
43 | 
44 | - 图计算
45 |     - PageRank  [[explanation]](https://www.cnblogs.com/orion-orion/p/16340839.html)
46 |     - Transitive Closure
47 | - 机器学习
48 |     - K-means
49 |     - Logistic Regression  [[explanation]](https://www.cnblogs.com/orion-orion/p/16318810.html)
50 | - 矩阵计算
51 |     - Matrix Decomposition
52 | - 数值优化
53 |   - 同步算法 
54 |     - Synchronous Stochastic Gradient Descent (SSGD) [[explanation]](https://www.cnblogs.com/orion-orion/p/16413182.html) [[paper]](https://proceedings.neurips.cc/paper/2010/file/abea47ba24142ed16b7d8fbf2c740e0d-Paper.pdf)
55 |       -  SSGD in Pytorch [[explanation]](https://www.cnblogs.com/orion-orion/p/16413182.html) [[paper]](https://proceedings.neurips.cc/paper/2010/file/abea47ba24142ed16b7d8fbf2c740e0d-Paper.pdf)
56 |     - Model Average (MA) [[explanation]](https://www.cnblogs.com/orion-orion/p/16426982.html) [[paper]](https://aclanthology.org/N10-1069.pdf)
57 |     - Block-wise Model Update Filtering (BMUF) [[explanation]](https://www.cnblogs.com/orion-orion/p/16426982.html) [[paper]](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/08/0005880.pdf)
58 |     - Elastic Averaging Stochastic Gradient Descent  (EASGD) [[explanation]](https://www.cnblogs.com/orion-orion/p/16426982.html) [[paper]](https://proceedings.neurips.cc/paper/2015/file/d18f655c3fce66ca401d5f38b48c89af-Paper.pdf)
59 |   - 异步算法
60 |     - Synchronous Stochastic Gradient Descent (ASGD)[[explanation]](https://www.cnblogs.com/orion-orion/p/17118029.html) [[paper]](https://proceedings.neurips.cc/paper/2011/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Paper.pdf)
61 |     - Hogwild! [[explanation]](https://www.cnblogs.com/orion-orion/p/17118029.html) [[paper]](https://proceedings.neurips.cc/paper/2011/file/218a0aefd1d1a4be65601cc6ddc1520e-Paper.pdf)
62 | - 随机算法
63 |     - Monte Carlo Method
64 | 


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orion-orion/Distributed-ML-PySpark/051790d6bc8d034cfa6af19e7d4f820f4c1fa6d6/data/.gitkeep


--------------------------------------------------------------------------------
/graph_computation/pagerank.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Descripttion: 
 3 | Version: 1.0
 4 | Author: ZhangHongYu
 5 | Date: 2022-05-31 14:14:35
 6 | LastEditors: ZhangHongYu
 7 | LastEditTime: 2022-07-02 11:48:23
 8 | '''
 9 | import sys
10 | from operator import add
11 | from typing import Iterable, Tuple
12 | from pyspark.resultiterable import ResultIterable
13 | from pyspark.sql import SparkSession
14 | import os
15 | 
16 | os.environ['PYSPARK_PYTHON'] = sys.executable
17 | 
18 | n_threads = 4  # Number of local threads
19 | n_iterations = 10  # Number of iterations
20 | q = 0.15 #the default value of q is 0.15
21 | 
22 | def computeContribs(neighbors: ResultIterable[int], rank: float) -> Iterable[Tuple[int, float]]:
23 |     # Calculates the contribution(rank/num_neighbors) of each vertex, and send it to its neighbours.
24 |     num_neighbors = len(neighbors)
25 |     for vertex in neighbors:
26 |         yield (vertex, rank / num_neighbors)
27 | 
28 | if __name__ == "__main__":
29 |     # Initialize the spark context.
30 |     spark = SparkSession\
31 |         .builder\
32 |         .appName("PageRank")\
33 |         .master("local[%d]" % n_threads)\
34 |         .getOrCreate()
35 | 
36 |     # link: (source_id, dest_id)
37 |     links = spark.sparkContext.parallelize(
38 |         [(1, 2), (1, 3), (2, 3), (3, 1)],
39 |     )                       
40 | 
41 |     # drop duplicate links and convert links to an adjacency list.
42 |     adj_list = links.distinct().groupByKey().cache()
43 | 
44 |     # count the number of vertexes
45 |     n_vertexes = adj_list.count()
46 | 
47 |     # init the rank of each vertex, the default is 1.0/n_vertexes
48 |     ranks = adj_list.map(lambda vertex_neighbors: (vertex_neighbors[0], 1.0/n_vertexes))
49 | 
50 |     # Calculates and updates vertex ranks continuously using PageRank algorithm.
51 |     for t in range(n_iterations):
52 |         # Calculates the contribution(rank/num_neighbors) of each vertex, and send it to its neighbours.
53 |         contribs = adj_list.join(ranks).flatMap(lambda vertex_neighbors_rank: computeContribs(
54 |             vertex_neighbors_rank[1][0], vertex_neighbors_rank[1][1]  # type: ignore[arg-type]
55 |         ))
56 | 
57 |         # Re-calculates rank of each vertex based on the contributions it received
58 |         ranks = contribs.reduceByKey(add).mapValues(lambda rank: q/n_vertexes + (1 - q)*rank)
59 | 
60 |     # Collects all ranks of vertexs and dump them to console.
61 |     for (vertex, rank) in ranks.collect():
62 |         print("%s has rank: %s." % (vertex, rank))
63 | 
64 |     spark.stop()
65 |     
66 |     
67 | # 1 has rank: 0.38891305880091237.  
68 | # 2 has rank: 0.214416470596171.
69 | # 3 has rank: 0.3966704706029163.


--------------------------------------------------------------------------------
/graph_computation/transitive_closure.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Descripttion: 
 3 | Version: 1.0
 4 | Author: ZhangHongYu
 5 | Date: 2022-07-01 22:04:00
 6 | LastEditors: ZhangHongYu
 7 | LastEditTime: 2022-07-02 11:51:50
 8 | '''
 9 | from pyspark.sql import SparkSession
10 | import sys
11 | import os
12 | 
13 | os.environ['PYSPARK_PYTHON'] = sys.executable
14 | 
15 | n_threads = 4  # Number of local threads
16 | 
17 | if __name__ == "__main__":
18 |     spark = SparkSession\
19 |         .builder\
20 |         .appName("Transitive Closure")\
21 |         .master("local[%d]" % n_threads)\
22 |         .getOrCreate()
23 |      
24 |     paths = spark.sparkContext.parallelize([(1, 2), (1, 3), (2, 3), (3, 1)]).cache()
25 | 
26 |     # Linear transitive closure: each round grows paths by one edge,
27 |     # by joining the the already-discovered paths with graph's edges. 
28 |     # e.g. join the path (y, z) from the paths with the edge (x, y) from
29 |     # the graph to obtain the new path (x, z).
30 |     
31 | 
32 |     # The edges are stored in reversed order because they are about to be joined.
33 |     edges = paths.map(lambda x_y: (x_y[1], x_y[0]))
34 | 
35 |     old_cnt = 0
36 |     next_cnt = paths.count()
37 |     while True:
38 |         old_cnt = next_cnt
39 |         # Perform the join, obtaining an RDD of (y, (z, x)) pairs,
40 |         # then map the result to obtain the new (x, z) paths.
41 |         new_paths = paths.join(edges).map(lambda vertexes: (vertexes[1][1], vertexes[1][0]))
42 |         # union new paths
43 |         paths = paths.union(new_paths).distinct().cache()
44 |         next_cnt = paths.count()
45 |         if next_cnt == old_cnt:
46 |             break
47 | 
48 |     print("The original graph has %i paths" % paths.count())
49 | 
50 |     spark.stop()


--------------------------------------------------------------------------------
/machine_learning/k-means.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Descripttion: 
 3 | Version: 1.0
 4 | Author: ZhangHongYu
 5 | Date: 2022-06-30 21:53:37
 6 | LastEditors: ZhangHongYu
 7 | LastEditTime: 2022-07-02 11:52:42
 8 | '''
 9 | import random
10 | from typing import List, Tuple
11 | import numpy as np
12 | from pyspark.sql import SparkSession
13 | import matplotlib.pyplot as plt
14 | import sys
15 | import os
16 | 
17 | os.environ['PYSPARK_PYTHON'] = sys.executable
18 | 
19 | k = 2
20 | convergeDist = 0.1
21 | n_threads = 4  # Number of local threads
22 | n_iterations = 5
23 | 
24 | def closest_center(p: np.ndarray, centers: List[np.ndarray]) -> int:
25 |     closest_cid = 0
26 |     min_dist = float("+inf")
27 |     for cid in range(len(centers)):
28 |         dist = np.sqrt(np.sum((p - centers[cid]) ** 2))
29 |         if dist < min_dist:
30 |             min_dist = dist
31 |             closest_cid = cid
32 |     return closest_cid
33 | 
34 | def display_clusters(center_to_point: List[Tuple]):    
35 |     clusters = dict([ (c_id, []) for c_id in range(k)])
36 |     for c_id, (p, _) in center_to_point:
37 |         clusters[c_id].append(p)
38 | 
39 |     for c_id, points in clusters.items():
40 |         points = np.array(points)
41 |         color = "#"+''.join([random.choice('0123456789ABCDEF') for i in range(6)])
42 |         plt.scatter(points[:, 0], points[:, 1], c=color)
43 |     
44 |     plt.savefig("kmeans_clusters_display.png")
45 |         
46 | 
47 | if __name__ == "__main__":
48 |     spark = SparkSession\
49 |         .builder\
50 |         .appName("K-means")\
51 |         .master("local[%d]" % n_threads)\
52 |         .getOrCreate()
53 | 
54 |     matrix = np.array([[1, 2], [1, 4], [1, 0],
55 |                   [10, 2], [10, 4], [10, 0]])
56 |     points = spark.sparkContext.parallelize(matrix).cache()
57 | 
58 |     k_centers = points.takeSample(False, k, 42)
59 | 
60 |     for t in range(n_iterations):
61 |         # assign each point to the center closest to it.
62 |         center_to_point = points.map(
63 |             lambda p: (closest_center(p, k_centers), (p, 1)))
64 | 
65 |         # for each cluster(points shareing the some center),
66 |         # compute the sum of vecters in it and the size of it.
67 |         cluster_stats = center_to_point.reduceByKey(
68 |             lambda p1_cnt1, p2_cnt2: (p1_cnt1[0] + p2_cnt2[0], p1_cnt1[1] + p2_cnt2[1]))
69 | 
70 |         # for each cluster, compute the mean vecter.
71 |         mean_vecters = cluster_stats.map(
72 |             lambda stat: (stat[0], stat[1][0] / stat[1][1])).collect()
73 | 
74 |         # update the centers.
75 |         for (c_id, mean_vecter) in mean_vecters:
76 |             k_centers[c_id] = mean_vecter
77 | 
78 |     print("Final centers: " + str(k_centers))
79 |     
80 |     if matrix.shape[1] == 2: 
81 |         display_clusters(center_to_point.collect())
82 | 
83 |     spark.stop()


--------------------------------------------------------------------------------
/machine_learning/logistic_regression.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Descripttion: 
  3 | Version: 1.0
  4 | Author: ZhangHongYu
  5 | Date: 2022-05-26 21:02:38
  6 | LastEditors: ZhangHongYu 
  7 | LastEditTime: 2022-07-01 16:22:53
  8 | '''
  9 | from sklearn.datasets import load_breast_cancer
 10 | import numpy as np
 11 | from pyspark.sql import SparkSession
 12 | from operator import add
 13 | from sklearn.model_selection import train_test_split
 14 | from sklearn.metrics import accuracy_score
 15 | import matplotlib.pyplot as plt
 16 | import sys
 17 | import os
 18 | 
 19 | os.environ['PYSPARK_PYTHON'] = sys.executable
 20 | 
 21 | n_threads = 4  # Number of local threads
 22 | n_iterations = 1500  # Number of iterations
 23 | eta = 0.1  # iteration step_size
 24 | 
 25 | def logistic_f(x, w):
 26 |     return 1 / (np.exp(-x.dot(w)) + 1)
 27 | 
 28 | 
 29 | def gradient(point: np.ndarray, w: np.ndarray) -> np.ndarray:
 30 |     """ Compute linear regression gradient for a matrix of data points
 31 |     """
 32 |     y = point[-1]    # point label
 33 |     x = point[:-1]   # point coordinate
 34 |     # For each point (x, y), compute gradient function, then sum these up
 35 |     return - (y - logistic_f(x, w)) * x
 36 | 
 37 | def draw_acc_plot(accs, n_iterations):
 38 |     def ewma_smooth(accs, alpha=0.9):
 39 |         s_accs = np.zeros(n_iterations)
 40 |         for idx, acc in enumerate(accs):
 41 |             if idx == 0:
 42 |                 s_accs[idx] = acc
 43 |             else:
 44 |                 s_accs[idx] = alpha * s_accs[idx-1] + (1 - alpha) * acc
 45 |         return s_accs
 46 | 
 47 |     s_accs = ewma_smooth(accs, alpha=0.9)
 48 |     plt.plot(np.arange(1, n_iterations + 1), accs, color="C0", alpha=0.3)
 49 |     plt.plot(np.arange(1, n_iterations + 1), s_accs, color="C0")
 50 |     plt.title(label="Accuracy on test dataset")
 51 |     plt.xlabel("Round")
 52 |     plt.ylabel("Accuracy")
 53 |     plt.savefig("logistic_regression_acc_plot.png")
 54 | 
 55 | 
 56 | if __name__ == "__main__":
 57 | 
 58 |     X, y = load_breast_cancer(return_X_y=True)
 59 | 
 60 |     D = X.shape[1]
 61 |     X_train, X_test, y_train, y_test = train_test_split(
 62 |         X, y, test_size=0.3, random_state=0)
 63 |     n_train, n_test = X_train.shape[0], X_test.shape[0]
 64 | 
 65 |     spark = SparkSession\
 66 |         .builder\
 67 |         .appName("Logistic Regression")\
 68 |         .master("local[%d]" % n_threads)\
 69 |         .getOrCreate()
 70 | 
 71 |     matrix = np.concatenate(
 72 |         [X_train, np.ones((n_train, 1)), y_train.reshape(-1, 1)], axis=1)
 73 | 
 74 |     points = spark.sparkContext.parallelize(matrix).cache()
 75 | 
 76 |     # Initialize w to a random value
 77 |     w = 2 * np.random.ranf(size=D + 1) - 1
 78 |     print("Initial w: " + str(w))
 79 |     
 80 |     accs = []
 81 |     for t in range(n_iterations):
 82 |         print("On iteration %d" % (t + 1))
 83 |         w_br = spark.sparkContext.broadcast(w)
 84 |         
 85 |         # g = points.map(lambda point: gradient(point, w)).reduce(add)
 86 |         # g = points.map(lambda point: gradient(point, w_br.value)).reduce(add)
 87 |         g = points.map(lambda point: gradient(point, w_br.value))\
 88 |             .treeAggregate(0.0, add, add)
 89 | 
 90 |         w -= eta * g
 91 |         
 92 |         y_pred = logistic_f(np.concatenate(
 93 |             [X_test, np.ones((n_test, 1))], axis=1), w)
 94 |         pred_label = np.where(y_pred < 0.5, 0, 1)
 95 |         acc = accuracy_score(y_test, pred_label)
 96 |         accs.append(acc)
 97 |         print("iterations: %d, accuracy: %f" % (t, acc))
 98 | 
 99 |     print("Final w: %s " % w)
100 |     print("Final acc: %f" % acc)
101 | 
102 |     spark.stop()
103 |     
104 |     draw_acc_plot(accs, n_iterations)
105 | 
106 | # Final w: [ 1.16200213e+04  1.30671054e+04  6.53960395e+04  2.13003287e+04
107 | #   8.92852998e+01 -1.09553416e+02 -2.98667851e+02 -1.26433988e+02
108 | #   1.59947852e+02  7.85600857e+01 -3.90622568e+01  8.09490631e+02
109 | #  -1.29356637e+03 -4.02060982e+04  4.22124893e+00 -2.30863864e+01
110 | #  -4.22144623e+01 -9.06373487e+00  1.16047444e+01  9.14892224e-01
111 | #   1.25920286e+04  1.53120086e+04  6.48615769e+04 -3.23661608e+04
112 | #   1.00625479e+02 -3.98123440e+02 -6.89846039e+02 -1.77214836e+02
113 | #   1.95991193e+02  5.96495248e+01  1.53245784e+03] 
114 | # Final acc: 0.941520
115 | 
116 | 


--------------------------------------------------------------------------------
/matrix_computation/matrix_decomposition.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Descripttion: 
 3 | Version: 1.0
 4 | Author: ZhangHongYu
 5 | Date: 2022-06-30 19:32:44
 6 | LastEditors: ZhangHongYu
 7 | LastEditTime: 2022-07-02 11:51:14
 8 | '''
 9 | import numpy as np
10 | from pyspark.sql import SparkSession
11 | import sys
12 | import os
13 | 
14 | os.environ['PYSPARK_PYTHON'] = sys.executable
15 | 
16 | lam = 0.01   # regularization coefficient
17 | m = 100 # number of users
18 | n = 500 # number of items
19 | k = 10 # dim of the latent vectors of users and items
20 | n_iterations = 5 # number of iterations
21 | n_threads = 4  # Number of local threads
22 | 
23 | def rmse(R: np.ndarray, U: np.ndarray, V: np.ndarray) -> np.float64:
24 |     diff = R - U @ V.T
25 |     return np.sqrt(np.sum(np.power(diff, 2)) / (m * n))
26 | 
27 | 
28 | def update(i: int, mat: np.ndarray, ratings: np.ndarray) -> np.ndarray:
29 |     X_dim = mat.shape[0]
30 |     
31 |     XtX = mat.T @ mat
32 |     Xty = mat.T @ ratings[i, :].T
33 | 
34 |     for i in range(k):
35 |         XtX[i, i] += lam * X_dim
36 | 
37 |     return np.linalg.solve(XtX, Xty)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     spark = SparkSession\
42 |         .builder\
43 |         .appName("Matrix Decomposition")\
44 |         .master("local[%d]" % n_threads)\
45 |         .getOrCreate()
46 | 
47 |     R = np.random.rand(m, k) @ (np.random.rand(n, k).T)
48 |     U = np.random.rand(m, k)
49 |     V = np.random.rand(n, k)
50 | 
51 |     R_br = spark.sparkContext.broadcast(R)
52 |     U_br = spark.sparkContext.broadcast(U)
53 |     V_br = spark.sparkContext.broadcast(V)
54 | 
55 |     # we use the alternating least squares (ALS) to solve the SVD problem
56 |     for t in range(n_iterations):
57 |         U_ = spark.sparkContext.parallelize(range(m)) \
58 |             .map(lambda x: update(x, V_br.value, R_br.value)) \
59 |             .collect()
60 | 
61 |         # collect() returns a list, so we need to convert it to a 2-d array
62 |         U = np.array(U_)
63 |         U_br = spark.sparkContext.broadcast(U)
64 | 
65 |         V_ = spark.sparkContext.parallelize(range(n)) \
66 |             .map(lambda x: update(x, U_br.value, R_br.value.T)) \
67 |             .collect()
68 |         V = np.array(V_)
69 |         V_br = spark.sparkContext.broadcast(V)
70 | 
71 |         error = rmse(R, U, V)
72 |         print("iterations: %d, rmse: %f" % (t, error))
73 | 
74 |     spark.stop()


--------------------------------------------------------------------------------
/optimization/asgd.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import threading
  3 | from datetime import datetime
  4 | import torch
  5 | import torch.distributed.rpc as rpc
  6 | import torch.multiprocessing as mp
  7 | import torch.nn as nn
  8 | from torch import optim
  9 | import torchvision
 10 | from torchvision import datasets, transforms
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import Subset
 13 | 
 14 | 
 15 | batch_size = 20
 16 | n_workers = 5
 17 | epochs = 10
 18 | seed = 1
 19 | log_interval = 10 # how many epochs to wait before logging training status
 20 | cuda = True # enables CUDA training
 21 | mps = False # enables macOS GPU training
 22 | use_cuda = cuda and torch.cuda.is_available()
 23 | use_mps = mps and torch.backends.mps.is_available()
 24 | if use_cuda:
 25 |     device = torch.device("cuda")
 26 | elif use_mps:
 27 |     device = torch.device("mps")
 28 | else:
 29 |     device = torch.device("cpu")
 30 |     
 31 | 
 32 | class CustomSubset(Subset):
 33 |     '''A custom subset class with customizable data transformation'''
 34 |     def __init__(self, dataset, indices, subset_transform=None):
 35 |         super().__init__(dataset, indices)
 36 |         self.subset_transform = subset_transform
 37 | 
 38 |     def __getitem__(self, idx):
 39 |         x, y = self.dataset[self.indices[idx]]
 40 |         if self.subset_transform:
 41 |             x = self.subset_transform(x)
 42 |         return x, y   
 43 | 
 44 |     def __len__(self):
 45 |         return len(self.indices)
 46 | 
 47 |     
 48 | def dataset_split(dataset, n_workers):
 49 |     n_samples = len(dataset)
 50 |     n_sample_per_workers = n_samples // n_workers
 51 |     local_datasets = []
 52 |     for w_id in range(n_workers):
 53 |         if w_id < n_workers - 1:
 54 |             local_datasets.append(CustomSubset(dataset, range(w_id * n_sample_per_workers, (w_id + 1) * n_sample_per_workers)))
 55 |         else:
 56 |             local_datasets.append(CustomSubset(dataset, range(w_id * n_sample_per_workers, n_samples)))
 57 |     return local_datasets    
 58 | 
 59 | 
 60 | class Net(nn.Module):
 61 |     def __init__(self):
 62 |         super(Net, self).__init__()
 63 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 64 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 65 |         self.conv2_drop = nn.Dropout2d()
 66 |         self.fc1 = nn.Linear(320, 50)
 67 |         self.fc2 = nn.Linear(50, 10)
 68 | 
 69 |     def forward(self, x):
 70 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 71 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 72 |         x = x.view(-1, 320)
 73 |         x = F.relu(self.fc1(x))
 74 |         x = F.dropout(x, training=self.training)
 75 |         x = self.fc2(x)
 76 |         return F.log_softmax(x, dim=1)
 77 | 
 78 | class ParameterServer(object):
 79 | 
 80 |     def __init__(self, n_workers=n_workers):
 81 |         self.model = Net().to(device)
 82 |         self.lock = threading.Lock()
 83 |         self.future_model = torch.futures.Future()
 84 |         self.n_workers = n_workers
 85 |         self.curr_update_size = 0
 86 |         self.optimizer = optim.SGD(self.model.parameters(), lr=0.001, momentum=0.9)
 87 |         for p in self.model.parameters():
 88 |             p.grad = torch.zeros_like(p)
 89 |         self.test_loader = torch.utils.data.DataLoader(
 90 |             datasets.MNIST('../data', train=False,
 91 |                            transform=transforms.Compose([
 92 |                                transforms.ToTensor(),
 93 |                                transforms.Normalize((0.1307,), (0.3081,))
 94 |                            ])),
 95 |             batch_size=32, shuffle=True)
 96 | 
 97 | 
 98 |     def get_model(self):
 99 |         # TensorPipe RPC backend only supports CPU tensors, 
100 |         # so we move your tensors to CPU before sending them over RPC
101 |         return self.model.to("cpu")
102 | 
103 |     @staticmethod
104 |     @rpc.functions.async_execution
105 |     def update_and_fetch_model(ps_rref, grads):
106 |         self = ps_rref.local_value()
107 |         for p, g in zip(self.model.parameters(), grads):
108 |             p.grad += g
109 |         with self.lock:
110 |             self.curr_update_size += 1
111 |             fut = self.future_model
112 | 
113 |             if self.curr_update_size >= self.n_workers:
114 |                 for p in self.model.parameters():
115 |                     p.grad /= self.n_workers
116 |                 self.curr_update_size = 0
117 |                 self.optimizer.step()
118 |                 self.optimizer.zero_grad()
119 |                 fut.set_result(self.model)
120 |                 self.future_model = torch.futures.Future()
121 | 
122 |         return fut
123 | 
124 |     def evaluation(self):
125 |         self.model.eval()
126 |         self.model = self.model.to(device)
127 |         test_loss = 0
128 |         correct = 0
129 |         with torch.no_grad():
130 |             for data, target in self.test_loader:
131 |                 output = self.model(data.to(device))
132 |                 test_loss += F.nll_loss(output, target.to(device), reduction='sum').item() # sum up batch loss
133 |                 pred = output.max(1)[1] # get the index of the max log-probability
134 |                 correct += pred.eq(target.to(device)).sum().item()
135 | 
136 |         test_loss /= len(self.test_loader.dataset)
137 |         print('\nTest result - Accuracy: {}/{} ({:.0f}%)\n'.format(
138 |             correct, len(self.test_loader.dataset), 100. * correct / len(self.test_loader.dataset)))  
139 | 
140 | 
141 | class Trainer(object):
142 | 
143 |     def __init__(self, ps_rref):
144 |         self.ps_rref = ps_rref
145 |         self.model = Net().to(device) 
146 | 
147 |     def train(self, train_dataset):
148 |         train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
149 |         model = self.ps_rref.rpc_sync().get_model().cuda()
150 |         pid = os.getpid()
151 |         for epoch in range(epochs):
152 |             for batch_idx, (data, target) in enumerate(train_loader):
153 |                 output = model(data.to(device))
154 |                 loss = F.nll_loss(output, target.to(device))
155 |                 loss.backward()
156 |                 model = rpc.rpc_sync(
157 |                     self.ps_rref.owner(),
158 |                     ParameterServer.update_and_fetch_model,
159 |                     args=(self.ps_rref, [p.grad for p in model.cpu().parameters()]),
160 |                 ).cuda()
161 |                 if batch_idx % log_interval == 0:
162 |                     print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
163 |                         pid, epoch + 1, batch_idx * len(data), len(train_loader.dataset),
164 |                         100. * batch_idx / len(train_loader), loss.item()))
165 |             
166 | 
167 | 
168 | def run_trainer(ps_rref, train_dataset):
169 |     trainer = Trainer(ps_rref)
170 |     trainer.train(train_dataset)
171 | 
172 | 
173 | def run_ps(trainers):
174 |     transform=transforms.Compose([
175 |     transforms.ToTensor(),
176 |     transforms.Normalize((0.1307,), (0.3081,))
177 |     ])
178 |     train_dataset = datasets.MNIST('../data', train=True, download=True,
179 |                        transform=transform)
180 |     local_train_datasets = dataset_split(train_dataset, n_workers)    
181 |     
182 |     
183 |     print(f"{datetime.now().strftime('%H:%M:%S')} Start training")
184 |     ps = ParameterServer()
185 |     ps_rref = rpc.RRef(ps)
186 |     futs = []
187 |     for idx, trainer in enumerate(trainers):
188 |         futs.append(
189 |             rpc.rpc_async(trainer, run_trainer, args=(ps_rref, local_train_datasets[idx]))
190 |         )
191 | 
192 |     torch.futures.wait_all(futs)
193 |     print(f"{datetime.now().strftime('%H:%M:%S')} Finish training")
194 |     ps.evaluation()
195 |     # Test result - Accuracy: 9696/10000 (97%)
196 | 
197 | def run(rank, world_size):
198 |     os.environ['MASTER_ADDR'] = 'localhost'
199 |     os.environ['MASTER_PORT'] = '29500'
200 |     options=rpc.TensorPipeRpcBackendOptions(
201 |         num_worker_threads=16,
202 |         rpc_timeout=0  # infinite timeout
203 |      )
204 |     if rank == 0:
205 |         rpc.init_rpc(
206 |             "ps",
207 |             rank=rank,
208 |             world_size=world_size,
209 |             rpc_backend_options=options
210 |         )
211 |         run_ps([f"trainer{r}" for r in range(1, world_size)])
212 |     else:
213 |         rpc.init_rpc(
214 |             f"trainer{rank}",
215 |             rank=rank,
216 |             world_size=world_size,
217 |             rpc_backend_options=options
218 |         )
219 |         # trainer passively waiting for ps to kick off training iterations
220 | 
221 |     # block until all rpcs finish
222 |     rpc.shutdown()
223 | 
224 | 
225 | if __name__=="__main__":
226 |     world_size = n_workers + 1
227 |     mp.spawn(run, args=(world_size, ), nprocs=world_size, join=True)
228 | 


--------------------------------------------------------------------------------
/optimization/bmuf.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Descripttion: 
  3 | Version: 1.0
  4 | Author: ZhangHongYu
  5 | Date: 2022-05-26 21:02:38
  6 | LastEditors: ZhangHongYu
  7 | LastEditTime: 2022-07-02 11:50:46
  8 | '''
  9 | from typing import Tuple
 10 | from sklearn.datasets import load_breast_cancer
 11 | import numpy as np
 12 | from pyspark.sql import SparkSession
 13 | from operator import add
 14 | from sklearn.model_selection import train_test_split
 15 | from sklearn.metrics import accuracy_score
 16 | import matplotlib.pyplot as plt
 17 | import sys
 18 | import os
 19 | 
 20 | os.environ['PYSPARK_PYTHON'] = sys.executable
 21 | 
 22 | n_threads = 4  # Number of local threads
 23 | n_iterations = 300  # Number of iterations
 24 | eta = 0.1
 25 | mini_batch_fraction = 0.1 # the fraction of mini batch sample 
 26 | n_local_iterations = 5 # the number local epochs
 27 | mu = 0.9
 28 | zeta = 0.1
 29 | 
 30 | def logistic_f(x, w):
 31 |     return 1 / (np.exp(-x.dot(w)) + 1 +1e-6)
 32 | 
 33 | 
 34 | def gradient(pt_w: Tuple):
 35 |     """ Compute linear regression gradient for a matrix of data points
 36 |     """
 37 |     idx, (point, w) = pt_w
 38 |     y = point[-1]    # point label
 39 |     x = point[:-1]   # point coordinate
 40 |     # For each point (x, y), compute gradient function, then sum these up
 41 |     return  (idx, (w, - (y - logistic_f(x, w)) * x))
 42 | 
 43 | 
 44 | def update_local_w(iter):
 45 |     iter = list(iter)
 46 |     idx, (w, _) = iter[0]
 47 |     g_mean = np.mean(np.array([ g for _, (_, g) in iter]), axis=0) 
 48 |     return  [(idx, w - eta * g_mean)]
 49 | 
 50 | 
 51 | def draw_acc_plot(accs, n_iterations):
 52 |     def ewma_smooth(accs, alpha=0.9):
 53 |         s_accs = np.zeros(n_iterations)
 54 |         for idx, acc in enumerate(accs):
 55 |             if idx == 0:
 56 |                 s_accs[idx] = acc
 57 |             else:
 58 |                 s_accs[idx] = alpha * s_accs[idx-1] + (1 - alpha) * acc
 59 |         return s_accs
 60 | 
 61 |     s_accs = ewma_smooth(accs, alpha=0.9)
 62 |     plt.plot(np.arange(1, n_iterations + 1), accs, color="C0", alpha=0.3)
 63 |     plt.plot(np.arange(1, n_iterations + 1), s_accs, color="C0")
 64 |     plt.title(label="Accuracy on test dataset")
 65 |     plt.xlabel("Round")
 66 |     plt.ylabel("Accuracy")
 67 |     plt.savefig("bmuf_acc_plot.png")
 68 | 
 69 | 
 70 | if __name__ == "__main__":
 71 | 
 72 |     X, y = load_breast_cancer(return_X_y=True)
 73 | 
 74 |     D = X.shape[1]
 75 | 
 76 |     X_train, X_test, y_train, y_test = train_test_split(
 77 |         X, y, test_size=0.3, random_state=0, shuffle=True)
 78 |     n_train, n_test = X_train.shape[0], X_test.shape[0]
 79 | 
 80 |     spark = SparkSession\
 81 |         .builder\
 82 |         .appName("BMUF")\
 83 |         .master("local[%d]" % n_threads)\
 84 |         .getOrCreate()
 85 | 
 86 |     matrix = np.concatenate(
 87 |         [X_train, np.ones((n_train, 1)), y_train.reshape(-1, 1)], axis=1)
 88 | 
 89 |     points = spark.sparkContext.parallelize(matrix).cache()
 90 |     points = points.mapPartitionsWithIndex(lambda idx, iter: [ (idx, arr) for arr in iter])
 91 | 
 92 |     ws = spark.sparkContext.parallelize(2 * np.random.ranf(size=(n_threads, D + 1)) - 1).cache()
 93 |     ws = ws.mapPartitionsWithIndex(lambda idx, iter: [(idx, next(iter))])
 94 | 
 95 |     w = 2 * np.random.ranf(size=D + 1) - 1
 96 |     print("Initial w: " + str(w))
 97 |     
 98 |     # weight update
 99 |     delta_w = 2 * np.random.ranf(size=D + 1) - 1
100 |         
101 |     accs = []
102 |     for t in range(n_iterations):
103 |         print("On iteration %d" % (t + 1))
104 |         w_br = spark.sparkContext.broadcast(w)
105 |         ws = ws.mapPartitions(lambda iter: [(iter[0][0], w_br.value)])
106 |                             
107 |         for local_t in range(n_local_iterations):
108 |             ws = points.sample(False, mini_batch_fraction, 42 + t)\
109 |                 .join(ws, numPartitions=n_threads)\
110 |                     .map(lambda pt_w: gradient(pt_w))\
111 |                         .mapPartitions(update_local_w) 
112 |             
113 |         par_w_sum = ws.mapPartitions(lambda iter: [iter[0][1]]).treeAggregate(0.0, add, add)           
114 |   
115 |         w_avg  = par_w_sum / n_threads
116 | 
117 |         delta_w = mu * delta_w + zeta * (w_avg - w)
118 |         w = w + delta_w
119 |         
120 |         y_pred = logistic_f(np.concatenate(
121 |             [X_test, np.ones((n_test, 1))], axis=1), w)
122 |         pred_label = np.where(y_pred < 0.5, 0, 1)
123 |         acc = accuracy_score(y_test, pred_label)
124 |         accs.append(acc)
125 |         print("iterations: %d, accuracy: %f" % (t, acc))
126 | 
127 |     print("Final w: %s " % w)
128 |     print("Final acc: %f" % acc)
129 | 
130 |     spark.stop()
131 | 
132 |     draw_acc_plot(accs, n_iterations)
133 | 
134 | 
135 | # Final w: [ 3.41516794e+01  5.11372499e+01  2.04081002e+02  1.03632914e+02
136 | #  -7.95309541e+00  6.00459407e+00 -9.58634353e+00 -4.56611790e+00
137 | #  -3.12493046e+00  7.20375548e+00 -6.13087884e+00  5.02524913e+00
138 | #  -9.99930137e+00 -1.26079312e+02 -7.53719022e+00 -4.93277200e-01
139 | #  -9.28534294e+00 -7.81058362e+00  1.78073479e+00 -1.49910377e-01
140 | #   3.93256717e+01  7.52357494e+01  2.09020272e+02 -1.33107647e+02
141 | #   8.22423217e+00  7.29714646e+00 -8.21168535e+00 -4.55323584e-02
142 | #   2.08715673e+00 -9.04949770e+00 -9.35055238e-01] 
143 | # Final acc: 0.929825
144 | 


--------------------------------------------------------------------------------
/optimization/easgd.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Descripttion: 
  3 | Version: 1.0
  4 | Author: ZhangHongYu
  5 | Date: 2022-05-26 21:02:38
  6 | LastEditors: ZhangHongYu
  7 | LastEditTime: 2022-07-02 11:50:30
  8 | '''
  9 | from typing import Tuple
 10 | from sklearn.datasets import load_breast_cancer
 11 | import numpy as np
 12 | from pyspark.sql import SparkSession
 13 | from operator import add
 14 | from sklearn.model_selection import train_test_split
 15 | from sklearn.metrics import accuracy_score
 16 | import matplotlib.pyplot as plt
 17 | import sys
 18 | import os
 19 | 
 20 | os.environ['PYSPARK_PYTHON'] = sys.executable
 21 | 
 22 | n_threads = 4  # Number of local threads
 23 | n_iterations = 1500  # Number of iterations 300
 24 | eta = 0.1
 25 | mini_batch_fraction = 0.1 # the fraction of mini batch sample 
 26 | rho = 0.1 # penalty constraint coefficient
 27 | alpha = eta * rho # iterative constraint coefficient
 28 | beta = n_threads * alpha # the parameter of history information
 29 | 
 30 | def logistic_f(x, w):
 31 |     return 1 / (np.exp(-x.dot(w)) + 1 +1e-6)
 32 | 
 33 | 
 34 | def gradient(pt_w: Tuple):
 35 |     """ Compute linear regression gradient for a matrix of data points
 36 |     """
 37 |     idx, (point, w) = pt_w
 38 |     y = point[-1]    # point label
 39 |     x = point[:-1]   # point coordinate
 40 |     # For each point (x, y), compute gradient function, then sum these up
 41 |     return  (idx, (w, - (y - logistic_f(x, w)) * x))
 42 | 
 43 | 
 44 | def update_local_w(iter, w):
 45 |     iter = list(iter)
 46 |     idx, (local_w, _) = iter[0]
 47 |     g_mean = np.mean(np.array([ g for _, (_, g) in iter]), axis=0) 
 48 |     return  [(idx, local_w - eta * g_mean - alpha * (local_w - w))]
 49 | 
 50 | 
 51 | def draw_acc_plot(accs, n_iterations):
 52 |     def ewma_smooth(accs, alpha=0.9):
 53 |         s_accs = np.zeros(n_iterations)
 54 |         for idx, acc in enumerate(accs):
 55 |             if idx == 0:
 56 |                 s_accs[idx] = acc
 57 |             else:
 58 |                 s_accs[idx] = alpha * s_accs[idx-1] + (1 - alpha) * acc
 59 |         return s_accs
 60 | 
 61 |     s_accs = ewma_smooth(accs, alpha=0.9)
 62 |     plt.plot(np.arange(1, n_iterations + 1), accs, color="C0", alpha=0.3)
 63 |     plt.plot(np.arange(1, n_iterations + 1), s_accs, color="C0")
 64 |     plt.title(label="Accuracy on test dataset")
 65 |     plt.xlabel("Round")
 66 |     plt.ylabel("Accuracy")
 67 |     plt.savefig("easgd_acc_plot.png")
 68 | 
 69 | 
 70 | if __name__ == "__main__":
 71 | 
 72 |     X, y = load_breast_cancer(return_X_y=True)
 73 | 
 74 |     D = X.shape[1]
 75 | 
 76 |     X_train, X_test, y_train, y_test = train_test_split(
 77 |         X, y, test_size=0.3, random_state=0, shuffle=True)
 78 |     n_train, n_test = X_train.shape[0], X_test.shape[0]
 79 | 
 80 |     spark = SparkSession\
 81 |         .builder\
 82 |         .appName("EASGD")\
 83 |         .master("local[%d]" % n_threads)\
 84 |         .getOrCreate()
 85 | 
 86 |     matrix = np.concatenate(
 87 |         [X_train, np.ones((n_train, 1)), y_train.reshape(-1, 1)], axis=1)
 88 | 
 89 |     points = spark.sparkContext.parallelize(matrix).cache()
 90 |     points = points.mapPartitionsWithIndex(lambda idx, iter: [ (idx, arr) for arr in iter])
 91 | 
 92 |     ws = spark.sparkContext.parallelize(2 * np.random.ranf(size=(n_threads, D + 1)) - 1).cache()
 93 |     ws = ws.mapPartitionsWithIndex(lambda idx, iter: [(idx, next(iter))])
 94 | 
 95 |     w = 2 * np.random.ranf(size=D + 1) - 1
 96 |     print("Initial w: " + str(w))
 97 |     
 98 |     accs = []
 99 |     for t in range(n_iterations):
100 |         print("On iteration %d" % (t + 1))
101 |         w_br = spark.sparkContext.broadcast(w)
102 |                             
103 |         ws = points.sample(False, mini_batch_fraction, 42 + t)\
104 |             .join(ws, numPartitions=n_threads)\
105 |                 .map(lambda pt_w: gradient(pt_w))\
106 |                     .mapPartitions(lambda iter: update_local_w(iter, w=w_br.value)) 
107 |             
108 |         par_w_sum = ws.mapPartitions(lambda iter: [iter[0][1]]).treeAggregate(0.0, add, add)           
109 |   
110 |         w  = (1 - beta) * w + beta * par_w_sum / n_threads 
111 | 
112 |         y_pred = logistic_f(np.concatenate(
113 |             [X_test, np.ones((n_test, 1))], axis=1), w)
114 |         pred_label = np.where(y_pred < 0.5, 0, 1)
115 |         acc = accuracy_score(y_test, pred_label)
116 |         accs.append(acc)
117 |         print("iterations: %d, accuracy: %f" % (t, acc))
118 | 
119 |     print("Final w: %s " % w)
120 |     print("Final acc: %f" % acc)
121 | 
122 |     spark.stop()
123 | 
124 |     draw_acc_plot(accs, n_iterations)
125 | 
126 | 
127 | # Final w: [ 4.41003205e+01  6.87756972e+01  2.59527758e+02  1.43995756e+02
128 | #   1.13597321e-01 -2.85033742e-01 -5.97111145e-01 -2.77260275e-01
129 | #   4.96300761e-01  3.30914106e-01 -2.22883276e-01  4.26915865e+00
130 | #  -2.62994199e+00 -1.43839576e+02 -1.78751529e-01  2.54613165e-01
131 | #  -8.19158564e-02  4.12327013e-01 -1.13116759e-01 -2.01949538e-01
132 | #   4.56239359e+01  8.74703134e+01  2.62017432e+02 -1.77434224e+02
133 | #   3.78336511e-01 -4.12976475e-01 -1.31121349e+00 -3.16414474e-01
134 | #   9.83796876e-01  2.30045103e-01  5.34560392e+00] 
135 | # Final acc: 0.929825
136 | 
137 | 


--------------------------------------------------------------------------------
/optimization/hogwild!.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.multiprocessing as mp
  6 | from torchvision import datasets, transforms
  7 | import os
  8 | import torch
  9 | import torch.optim as optim
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | batch_size = 64 # input batch size for training
 14 | test_batch_size = 1000 # input batch size for testing
 15 | epochs = 10 # number of global epochs to train
 16 | lr = 0.01 # learning rate
 17 | momentum = 0.5 # SGD momentum
 18 | seed = 1 # random seed
 19 | log_interval = 10 # how many batches to wait before logging training status
 20 | n_workers = 4 # how many training processes to use
 21 | cuda = True # enables CUDA training
 22 | mps = False # enables macOS GPU training
 23 | dry_run = False # quickly check a single pass
 24 | 
 25 | 
 26 | def train(rank, model, device, dataset, dataloader_kwargs):
 27 |     torch.manual_seed(seed + rank)
 28 | 
 29 |     train_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs)
 30 | 
 31 |     optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
 32 |     for epoch in range(1, epochs + 1):
 33 |         model.train()
 34 |         pid = os.getpid()
 35 |         for batch_idx, (data, target) in enumerate(train_loader):
 36 |             optimizer.zero_grad()
 37 |             output = model(data.to(device))
 38 |             loss = F.nll_loss(output, target.to(device))
 39 |             loss.backward()
 40 |             optimizer.step()
 41 |             if batch_idx % log_interval == 0:
 42 |                 print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 43 |                     pid, epoch, batch_idx * len(data), len(train_loader.dataset),
 44 |                     100. * batch_idx / len(train_loader), loss.item()))
 45 |                 if dry_run:
 46 |                     break
 47 | 
 48 | 
 49 | def test(model, device, dataset, dataloader_kwargs):
 50 |     torch.manual_seed(seed)
 51 |     test_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs)
 52 | 
 53 |     model.eval()
 54 |     test_loss = 0
 55 |     correct = 0
 56 |     with torch.no_grad():
 57 |         for data, target in test_loader:
 58 |             output = model(data.to(device))
 59 |             test_loss += F.nll_loss(output, target.to(device), reduction='sum').item() # sum up batch loss
 60 |             pred = output.max(1)[1] # get the index of the max log-probability
 61 |             correct += pred.eq(target.to(device)).sum().item()
 62 | 
 63 |     test_loss /= len(test_loader.dataset)
 64 |     print('\nTest set: Global loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
 65 |         test_loss, correct, len(test_loader.dataset),
 66 |         100. * correct / len(test_loader.dataset)))  
 67 |     
 68 | 
 69 | class Net(nn.Module):
 70 |     def __init__(self):
 71 |         super(Net, self).__init__()
 72 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 73 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 74 |         self.conv2_drop = nn.Dropout2d()
 75 |         self.fc1 = nn.Linear(320, 50)
 76 |         self.fc2 = nn.Linear(50, 10)
 77 | 
 78 |     def forward(self, x):
 79 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 80 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 81 |         x = x.view(-1, 320)
 82 |         x = F.relu(self.fc1(x))
 83 |         x = F.dropout(x, training=self.training)
 84 |         x = self.fc2(x)
 85 |         return F.log_softmax(x, dim=1)
 86 |     
 87 |     
 88 | if __name__ == '__main__':
 89 |     use_cuda = cuda and torch.cuda.is_available()
 90 |     use_mps = mps and torch.backends.mps.is_available()
 91 |     if use_cuda:
 92 |         device = torch.device("cuda")
 93 |     elif use_mps:
 94 |         device = torch.device("mps")
 95 |     else:
 96 |         device = torch.device("cpu")
 97 | 
 98 |     print(device)
 99 |     
100 |     transform=transforms.Compose([
101 |         transforms.ToTensor(),
102 |         transforms.Normalize((0.1307,), (0.3081,))
103 |         ])
104 |     train_dataset = datasets.MNIST('../data', train=True, download=True,
105 |                        transform=transform)
106 |     test_dataset = datasets.MNIST('../data', train=False,
107 |                        transform=transform)
108 |     kwargs = {'batch_size': batch_size,
109 |               'shuffle': True}
110 |     if use_cuda:
111 |         kwargs.update({'num_workers': 1,
112 |                        'pin_memory': True,
113 |                       })
114 | 
115 |     torch.manual_seed(seed)
116 |     mp.set_start_method('spawn', force=True)
117 | 
118 |     model = Net().to(device)
119 |     model.share_memory() # gradients are allocated lazily, so they are not shared here
120 | 
121 |     processes = []
122 |     for rank in range(n_workers):
123 |         p = mp.Process(target=train, args=(rank, model, device,
124 |                                            train_dataset, kwargs))
125 |         # We first train the model across `n_workers` processes
126 |         p.start()
127 |         processes.append(p)
128 |         
129 |     for p in processes:
130 |         p.join()
131 |         
132 |     # Once training is complete, we can test the model
133 |     test(model, device, test_dataset, kwargs)
134 |     # Test set: Global loss: 0.0325, Accuracy: 9898/10000 (99%)


--------------------------------------------------------------------------------
/optimization/ma.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Descripttion: 
  3 | Version: 1.0
  4 | Author: ZhangHongYu
  5 | Date: 2022-05-26 21:02:38
  6 | LastEditors: ZhangHongYu
  7 | LastEditTime: 2022-07-01 16:25:54
  8 | '''
  9 | from typing import Tuple
 10 | from sklearn.datasets import load_breast_cancer
 11 | import numpy as np
 12 | from pyspark.sql import SparkSession
 13 | from operator import add
 14 | from sklearn.model_selection import train_test_split
 15 | from sklearn.metrics import accuracy_score
 16 | import matplotlib.pyplot as plt
 17 | import sys
 18 | import os
 19 | 
 20 | os.environ['PYSPARK_PYTHON'] = sys.executable
 21 | 
 22 | n_threads = 4  # Number of local threads
 23 | n_iterations = 300  # Number of iterations
 24 | eta = 0.1
 25 | mini_batch_fraction = 0.1 # the fraction of mini batch sample 
 26 | n_local_iterations = 5 # the number local epochs
 27 | 
 28 | def logistic_f(x, w):
 29 |     return 1 / (np.exp(-x.dot(w)) + 1 +1e-6)
 30 | 
 31 | 
 32 | def gradient(pt_w: Tuple):
 33 |     """ Compute linear regression gradient for a matrix of data points
 34 |     """
 35 |     idx, (point, w) = pt_w
 36 |     y = point[-1]    # point label
 37 |     x = point[:-1]   # point coordinate
 38 |     # For each point (x, y), compute gradient function, then sum these up
 39 |     return  (idx, (w, - (y - logistic_f(x, w)) * x))
 40 | 
 41 | 
 42 | def update_local_w(iter):
 43 |     iter = list(iter)
 44 |     idx, (w, _) = iter[0]
 45 |     g_mean = np.mean(np.array([ g for _, (_, g) in iter]), axis=0) 
 46 |     return  [(idx, w - eta * g_mean)]
 47 | 
 48 | 
 49 | def draw_acc_plot(accs, n_iterations):
 50 |     def ewma_smooth(accs, alpha=0.9):
 51 |         s_accs = np.zeros(n_iterations)
 52 |         for idx, acc in enumerate(accs):
 53 |             if idx == 0:
 54 |                 s_accs[idx] = acc
 55 |             else:
 56 |                 s_accs[idx] = alpha * s_accs[idx-1] + (1 - alpha) * acc
 57 |         return s_accs
 58 | 
 59 |     s_accs = ewma_smooth(accs, alpha=0.9)
 60 |     plt.plot(np.arange(1, n_iterations + 1), accs, color="C0", alpha=0.3)
 61 |     plt.plot(np.arange(1, n_iterations + 1), s_accs, color="C0")
 62 |     plt.title(label="Accuracy on test dataset")
 63 |     plt.xlabel("Round")
 64 |     plt.ylabel("Accuracy")
 65 |     plt.savefig("ma_acc_plot.png")
 66 | 
 67 | 
 68 | if __name__ == "__main__":
 69 | 
 70 |     X, y = load_breast_cancer(return_X_y=True)
 71 | 
 72 |     D = X.shape[1]
 73 | 
 74 |     X_train, X_test, y_train, y_test = train_test_split(
 75 |         X, y, test_size=0.3, random_state=0, shuffle=True)
 76 |     n_train, n_test = X_train.shape[0], X_test.shape[0]
 77 | 
 78 |     spark = SparkSession\
 79 |         .builder\
 80 |         .appName("Model Average")\
 81 |         .master("local[%d]" % n_threads)\
 82 |         .getOrCreate()
 83 | 
 84 |     matrix = np.concatenate(
 85 |         [X_train, np.ones((n_train, 1)), y_train.reshape(-1, 1)], axis=1)
 86 | 
 87 |     points = spark.sparkContext.parallelize(matrix).cache()
 88 |     points = points.mapPartitionsWithIndex(lambda idx, iter: [ (idx, arr) for arr in iter])
 89 | 
 90 |     ws = spark.sparkContext.parallelize(2 * np.random.ranf(size=(n_threads, D + 1)) - 1).cache()
 91 |     ws = ws.mapPartitionsWithIndex(lambda idx, iter: [(idx, next(iter))])
 92 | 
 93 |     w = 2 * np.random.ranf(size=D + 1) - 1
 94 |     print("Initial w: " + str(w))
 95 |     
 96 |     accs = []
 97 |     for t in range(n_iterations):
 98 |         print("On iteration %d" % (t + 1))
 99 |         w_br = spark.sparkContext.broadcast(w)
100 |         ws = ws.mapPartitions(lambda iter: [(iter[0][0], w_br.value)])
101 |                             
102 |         for local_t in range(n_local_iterations):
103 |             ws = points.sample(False, mini_batch_fraction, 42 + t)\
104 |                 .join(ws, numPartitions=n_threads)\
105 |                     .map(lambda pt_w: gradient(pt_w))\
106 |                         .mapPartitions(update_local_w) 
107 |             
108 |         par_w_sum = ws.mapPartitions(lambda iter: [iter[0][1]]).treeAggregate(0.0, add, add)           
109 |   
110 |         w  = par_w_sum / n_threads
111 | 
112 |         y_pred = logistic_f(np.concatenate(
113 |             [X_test, np.ones((n_test, 1))], axis=1), w)
114 |         pred_label = np.where(y_pred < 0.5, 0, 1)
115 |         acc = accuracy_score(y_test, pred_label)
116 |         accs.append(acc)
117 |         print("iterations: %d, accuracy: %f" % (t, acc))
118 | 
119 |     print("Final w: %s " % w)
120 |     print("Final acc: %f" % acc)
121 | 
122 |     spark.stop()
123 | 
124 |     draw_acc_plot(accs, n_iterations)
125 | 
126 | 
127 | # Final w: [ 3.61341700e+01  5.45002149e+01  2.13992526e+02  1.09001657e+02
128 | #  -1.51389834e-03  3.94825208e-01 -9.31372452e-01 -7.19189889e-01
129 | #   3.73256677e-01  4.47409722e-01  2.15583787e-01  3.54025928e+00
130 | #  -2.36514711e+00 -1.33926557e+02 -3.50239176e-01 -3.85030823e-01
131 | #   6.86489587e-01 -9.21881175e-01 -5.91052918e-01 -6.89098538e-01
132 | #   3.72997343e+01  6.89626320e+01  2.16316126e+02 -1.45316947e+02
133 | #  -5.57393906e-01 -2.76067571e-01 -1.97759353e+00  1.54739454e-01
134 | #   1.26245157e-01  7.73083761e-01  4.00455457e+00] 
135 | # Final acc: 0.853801


--------------------------------------------------------------------------------
/optimization/ssgd.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Descripttion: 
  3 | Version: 1.0
  4 | Author: ZhangHongYu
  5 | Date: 2022-05-26 21:02:38
  6 | LastEditors: ZhangHongYu
  7 | LastEditTime: 2022-07-02 11:49:57
  8 | '''
  9 | from sklearn.datasets import load_breast_cancer
 10 | import numpy as np
 11 | from pyspark.sql import SparkSession
 12 | from operator import add
 13 | from sklearn.model_selection import train_test_split
 14 | from sklearn.metrics import accuracy_score
 15 | import matplotlib.pyplot as plt
 16 | import sys
 17 | import os
 18 | 
 19 | os.environ['PYSPARK_PYTHON'] = sys.executable
 20 | 
 21 | n_threads = 4  # Number of local threads
 22 | n_iterations = 1500  # Number of iterations
 23 | eta = 0.1
 24 | mini_batch_fraction = 0.1 # the fraction of mini batch sample 
 25 | lam = 0 # coefficient of regular term
 26 | 
 27 | def logistic_f(x, w):
 28 |     return 1 / (np.exp(-x.dot(w)) + 1)
 29 | 
 30 | 
 31 | def gradient(point: np.ndarray, w: np.ndarray):
 32 |     """ Compute linear regression gradient for a matrix of data points
 33 |     """
 34 |     y = point[-1]    # point label
 35 |     x = point[:-1]   # point coordinate
 36 |     # For each point (x, y), compute gradient function, then sum these up
 37 |     return - (y - logistic_f(x, w)) * x
 38 | 
 39 | 
 40 | def reg_gradient(w, reg_type="l2", alpha=0):
 41 |     """ gradient for reg_term
 42 |     """ 
 43 |     assert(reg_type in ["none", "l2", "l1", "elastic_net"])
 44 |     if reg_type == "none":
 45 |         return 0
 46 |     elif reg_type == "l2":
 47 |         return w
 48 |     elif reg_type == "l1":
 49 |         return np.sign(w)
 50 |     else:
 51 |         return alpha * np.sign(w) + (1 - alpha) * w
 52 | 
 53 | 
 54 | def draw_acc_plot(accs, n_iterations):
 55 |     def ewma_smooth(accs, alpha=0.9):
 56 |         s_accs = np.zeros(n_iterations)
 57 |         for idx, acc in enumerate(accs):
 58 |             if idx == 0:
 59 |                 s_accs[idx] = acc
 60 |             else:
 61 |                 s_accs[idx] = alpha * s_accs[idx-1] + (1 - alpha) * acc
 62 |         return s_accs
 63 | 
 64 |     s_accs = ewma_smooth(accs, alpha=0.9)
 65 |     plt.plot(np.arange(1, n_iterations + 1), accs, color="C0", alpha=0.3)
 66 |     plt.plot(np.arange(1, n_iterations + 1), s_accs, color="C0")
 67 |     plt.title(label="Accuracy on test dataset")
 68 |     plt.xlabel("Round")
 69 |     plt.ylabel("Accuracy")
 70 |     plt.savefig("ssgd_acc_plot.png")
 71 | 
 72 | 
 73 | if __name__ == "__main__":
 74 | 
 75 |     X, y = load_breast_cancer(return_X_y=True)
 76 | 
 77 |     D = X.shape[1]
 78 |     X_train, X_test, y_train, y_test = train_test_split(
 79 |         X, y, test_size=0.3, random_state=0, shuffle=True)
 80 |     n_train, n_test = X_train.shape[0], X_test.shape[0]
 81 | 
 82 |     spark = SparkSession\
 83 |         .builder\
 84 |         .appName("SSGD")\
 85 |         .master("local[%d]" % n_threads)\
 86 |         .getOrCreate()
 87 | 
 88 |     matrix = np.concatenate(
 89 |         [X_train, np.ones((n_train, 1)), y_train.reshape(-1, 1)], axis=1)
 90 | 
 91 |     points = spark.sparkContext.parallelize(matrix).cache()
 92 | 
 93 |     # Initialize w to a random value
 94 |     w = 2 * np.random.ranf(size=D + 1) - 1
 95 |     print("Initial w: " + str(w))
 96 | 
 97 |     accs = []
 98 |     for t in range(n_iterations):
 99 |         print("On iteration %d" % (t + 1))
100 |         w_br = spark.sparkContext.broadcast(w)
101 |         
102 |         (g, mini_batch_size) = points.sample(False, mini_batch_fraction, 42 + t)\
103 |             .map(lambda point: gradient(point, w_br.value))\
104 |             .treeAggregate(
105 |                 (0.0, 0),\
106 |                     seqOp=lambda res, g: (res[0] + g, res[1] + 1),\
107 |                         combOp=lambda res_1, res_2: (res_1[0] + res_2[0], res_1[1] + res_2[1])
108 |             )
109 | 
110 |         w -= eta * (g/mini_batch_size + lam * reg_gradient(w, "l2"))
111 |         
112 |         y_pred = logistic_f(np.concatenate(
113 |             [X_test, np.ones((n_test, 1))], axis=1), w)
114 |         pred_label = np.where(y_pred < 0.5, 0, 1)
115 |         acc = accuracy_score(y_test, pred_label)
116 |         accs.append(acc)
117 |         print("iterations: %d, accuracy: %f" % (t, acc))
118 | 
119 |     print("Final w: %s " % w)
120 |     print("Final acc: %f" % acc)
121 | 
122 |     spark.stop()
123 | 
124 |     draw_acc_plot(accs, n_iterations)
125 | 
126 | 
127 | # Final w: [ 3.58216967e+01  4.53599397e+01  2.07040135e+02  8.52414269e+01
128 | #   4.33038042e-01 -2.93986236e-01  1.43286366e-01 -2.95961229e-01
129 | #  -7.63362321e-02 -3.93180625e-01  8.19325971e-01  3.30881477e+00
130 | #  -3.25867503e+00 -1.24769634e+02 -8.52691792e-01 -5.18037887e-01
131 | #  -1.34380402e-01 -7.49316038e-01 -8.76722455e-01  9.23748261e-01
132 | #   3.81531205e+01  5.56880612e+01  2.04895002e+02 -1.17586430e+02
133 | #   8.92355523e-01 -9.40611324e-01 -9.24082612e-01 -1.16210791e+00
134 | #   7.10117706e-01 -7.62921434e-02  4.48389687e+00] 
135 | # Final acc: 0.929825


--------------------------------------------------------------------------------
/optimization/ssgd_pytorch.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.multiprocessing as mp
  4 | from torch.multiprocessing import Barrier
  5 | from torchvision import datasets, transforms
  6 | from torch.utils.data import Subset
  7 | import os
  8 | import torch
  9 | import torch.optim as optim
 10 | import torch.nn.functional as F
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | 
 15 | batch_size = 64 # input batch size for training
 16 | test_batch_size = 1000 # input batch size for testing
 17 | epochs = 3 # number of global epochs to train
 18 | lr = 0.01 # learning rate
 19 | momentum = 0.5 # SGD momentum
 20 | seed = 1 # random seed
 21 | log_interval = 10 # how many batches to wait before logging training status
 22 | n_workers = 4 # how many training processes to use
 23 | cuda = True # enables CUDA training
 24 | mps = False # enables macOS GPU training
 25 | 
 26 | 
 27 | class CustomSubset(Subset):
 28 |     '''A custom subset class with customizable data transformation'''
 29 |     def __init__(self, dataset, indices, subset_transform=None):
 30 |         super().__init__(dataset, indices)
 31 |         self.subset_transform = subset_transform
 32 | 
 33 |     def __getitem__(self, idx):
 34 |         x, y = self.dataset[self.indices[idx]]
 35 |         if self.subset_transform:
 36 |             x = self.subset_transform(x)
 37 |         return x, y   
 38 | 
 39 |     def __len__(self):
 40 |         return len(self.indices)
 41 | 
 42 |     
 43 | def dataset_split(dataset, n_workers):
 44 |     n_samples = len(dataset)
 45 |     n_sample_per_workers = n_samples // n_workers
 46 |     local_datasets = []
 47 |     for w_id in range(n_workers):
 48 |         if w_id < n_workers - 1:
 49 |             local_datasets.append(CustomSubset(dataset, range(w_id * n_sample_per_workers, (w_id + 1) * n_sample_per_workers)))
 50 |         else:
 51 |             local_datasets.append(CustomSubset(dataset, range(w_id * n_sample_per_workers, n_samples)))
 52 |     return local_datasets    
 53 | 
 54 | 
 55 | def pull_down(global_W, local_Ws, n_workers):
 56 |     # pull down global model to local
 57 |     for rank in range(n_workers):
 58 |         for name, value in local_Ws[rank].items():
 59 |             local_Ws[rank][name].data = global_W[name].data 
 60 | 
 61 | 
 62 | def aggregate(global_W, local_Ws, n_workers):
 63 |     # init the global model
 64 |     for name, value in global_W.items():
 65 |         global_W[name].data  = torch.zeros_like(value)
 66 |         
 67 |     for rank in range(n_workers):
 68 |         for name, value in local_Ws[rank].items():
 69 |             global_W[name].data += value.data
 70 | 
 71 |     for name in local_Ws[rank].keys():
 72 |         global_W[name].data /= n_workers
 73 |         
 74 |         
 75 | class Net(nn.Module):
 76 |     def __init__(self):
 77 |         super(Net, self).__init__()
 78 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 79 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 80 |         self.conv2_drop = nn.Dropout2d()
 81 |         self.fc1 = nn.Linear(320, 50)
 82 |         self.fc2 = nn.Linear(50, 10)
 83 | 
 84 |     def forward(self, x):
 85 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 86 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 87 |         x = x.view(-1, 320)
 88 |         x = F.relu(self.fc1(x))
 89 |         x = F.dropout(x, training=self.training)
 90 |         x = self.fc2(x)
 91 |         return F.log_softmax(x, dim=1)
 92 |     
 93 |         
 94 | def train_epoch(epoch, rank, local_model, device, dataset, synchronizer, dataloader_kwargs):
 95 |     torch.manual_seed(seed + rank)
 96 |     train_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs)
 97 |     optimizer = optim.SGD(local_model.parameters(), lr=lr, momentum=momentum)
 98 | 
 99 |     local_model.train()
100 |     pid = os.getpid()
101 |     for batch_idx, (data, target) in enumerate(train_loader):
102 |         optimizer.zero_grad()
103 |         output = local_model(data.to(device))
104 |         loss = F.nll_loss(output, target.to(device))
105 |         loss.backward()
106 |         optimizer.step()
107 |         if batch_idx % log_interval == 0:
108 |             print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
109 |                 pid, epoch + 1, batch_idx * len(data), len(train_loader.dataset),
110 |                 100. * batch_idx / len(train_loader), loss.item()))
111 |             
112 |     synchronizer.wait()
113 |     
114 |     
115 | def test(epoch, model, device, dataset, dataloader_kwargs):
116 |     torch.manual_seed(seed)
117 |     test_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs)
118 | 
119 |     model.eval()
120 |     test_loss = 0
121 |     correct = 0
122 |     with torch.no_grad():
123 |         for data, target in test_loader:
124 |             output = model(data.to(device))
125 |             test_loss += F.nll_loss(output, target.to(device), reduction='sum').item() # sum up batch loss
126 |             pred = output.max(1)[1] # get the index of the max log-probability
127 |             correct += pred.eq(target.to(device)).sum().item()
128 | 
129 |     test_loss /= len(test_loader.dataset)
130 |     print('\nTest Epoch: {} Global loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
131 |         epoch + 1, test_loss, correct, len(test_loader.dataset),
132 |         100. * correct / len(test_loader.dataset)))  
133 |     
134 |     
135 | if __name__ == "__main__":
136 |     use_cuda = cuda and torch.cuda.is_available()
137 |     use_mps = mps and torch.backends.mps.is_available()
138 |     if use_cuda:
139 |         device = torch.device("cuda")
140 |     elif use_mps:
141 |         device = torch.device("mps")
142 |     else:
143 |         device = torch.device("cpu")
144 |         
145 |     transform=transforms.Compose([
146 |         transforms.ToTensor(),
147 |         transforms.Normalize((0.1307,), (0.3081,))
148 |         ])
149 |     train_dataset = datasets.MNIST('../data', train=True, download=True,
150 |                        transform=transform)
151 |     test_dataset = datasets.MNIST('../data', train=False, download=True,
152 |                        transform=transform)
153 |     local_train_datasets = dataset_split(train_dataset, n_workers)    
154 | 
155 |     kwargs = {'batch_size': batch_size,
156 |               'shuffle': True}
157 |     if use_cuda:
158 |         kwargs.update({'num_workers': 1, # num_workers to load data
159 |                        'pin_memory': True,
160 |                       })
161 | 
162 |     torch.manual_seed(seed)
163 |     mp.set_start_method('spawn', force=True) 
164 |     # Very important, otherwise CUDA memory cannot be allocated in the child process
165 | 
166 |     local_models = [Net().to(device) for i in range(n_workers)]
167 |     global_model = Net().to(device)
168 |     local_Ws = [{key: value for key, value in local_models[i].named_parameters()} for i in range(n_workers)]
169 |     global_W = {key: value for key, value in global_model.named_parameters()}
170 |     
171 |     synchronizer = Barrier(n_workers)
172 |     for epoch in range(epochs):
173 |         for rank in range(n_workers):
174 |             # pull down global model to local
175 |             pull_down(global_W, local_Ws, n_workers)
176 |             
177 |             processes = []
178 |             for rank in range(n_workers):
179 |                 p = mp.Process(target=train_epoch, args=(epoch, rank, local_models[rank], device,
180 |                                                 local_train_datasets[rank], synchronizer, kwargs))
181 |                 # We first train the model across `num_processes` processes
182 |                 p.start()
183 |                 processes.append(p)
184 |                             
185 |             for p in processes:
186 |                 p.join()
187 |     
188 |         aggregate(global_W, local_Ws, n_workers)
189 | 
190 |         # We test the model each epoch
191 |         test(epoch, global_model, device, test_dataset, kwargs)
192 |     # Test result for synchronous training：Test Epoch: 3 Global loss: 0.0732, Accuracy: 9796/10000 (98%)
193 |     # Test result for asynchronous training：Test Epoch: 3 Global loss: 0.0742, Accuracy: 9789/10000 (98%)
194 |   


--------------------------------------------------------------------------------
/pic/DistributedML-cover.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orion-orion/Distributed-ML-PySpark/051790d6bc8d034cfa6af19e7d4f820f4c1fa6d6/pic/DistributedML-cover.jpeg


--------------------------------------------------------------------------------
/randomized_algorithm/monte_carlo.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Descripttion: 
 3 | Version: 1.0
 4 | Author: ZhangHongYu
 5 | Date: 2022-07-01 21:28:32
 6 | LastEditors: ZhangHongYu
 7 | LastEditTime: 2022-07-01 21:48:31
 8 | '''
 9 | from random import random
10 | from operator import add
11 | from pyspark.sql import SparkSession
12 | import sys
13 | import os
14 | 
15 | os.environ['PYSPARK_PYTHON'] = sys.executable
16 | 
17 | n_threads = 4  # Number of local threads
18 | # times of sampling
19 | n = 100000 * n_threads
20 |     
21 | def is_accept(_: int) -> int:
22 |     x = random() * 2 - 1
23 |     y = random() * 2 - 1
24 |     return 1 if x ** 2 + y ** 2 <= 1 else 0
25 | 
26 | if __name__ == "__main__":
27 |     spark = SparkSession\
28 |         .builder\
29 |         .appName("monte_carlo")\
30 |         .master("local[%d]" % n_threads)\
31 |         .getOrCreate()
32 | 
33 |     count = spark.sparkContext.parallelize(range(n)).map(is_accept).reduce(add)
34 | 
35 |     # equation for the ratio of the area of a circle to a square： count/n = pi/4.
36 |     print("Pi is roughly %f" % (4.0 * count / n))
37 | 
38 |     spark.stop()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.3 
2 | matplotlib==3.4.3
3 | scikit-learn==1.1.0
4 | pytorch==1.8.0
5 | torchvision==0.9.0
6 | pyspark==3.3.2  


--------------------------------------------------------------------------------