├── CPLEX MinCostMaxFlow ├── capacities.txt ├── costs.txt ├── cplex_mcmf.py └── flows.txt ├── Huffman_Encoding ├── demofile.txt ├── huffman.py └── init.txt ├── README.md ├── Sparse_Matrix ├── SparseMatrix_med.py ├── SparseMatrix_test_med.py ├── basic_usages.py └── linkedlist_med.py └── SubsetSum ├── init.txt ├── itunes.py ├── itunes_file.txt ├── subsetsum_med.py └── subsetsum_test_med.py /CPLEX MinCostMaxFlow/capacities.txt: -------------------------------------------------------------------------------- 1 | 1 2 10 2 | 1 3 90 3 | 1 4 90 4 | 2 3 90 5 | 3 5 80 6 | 4 5 90 7 | 5 4 90 8 | -------------------------------------------------------------------------------- /CPLEX MinCostMaxFlow/costs.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 2 | 1 3 4 3 | 1 4 9 4 | 2 3 3 5 | 3 5 1 6 | 4 5 3 7 | 5 4 2 8 | -------------------------------------------------------------------------------- /CPLEX MinCostMaxFlow/cplex_mcmf.py: -------------------------------------------------------------------------------- 1 | from docplex.mp.model import Model 2 | 3 | def minimum_cost_flow_problem(costs_file, capacities_file, flows_file): 4 | file = open(costs_file, "r") 5 | 6 | # costs (c) 7 | costs = [] 8 | # capacities (u) 9 | capacities = [] 10 | # flows (b) 11 | flows = [] 12 | 13 | 14 | for line in file: 15 | costs.append(line.strip(" ")[2]) 16 | file.close() 17 | 18 | file = open(capacities_file, "r") 19 | for line in file: 20 | capacities.append(line.strip(" ")[2]) 21 | file.close() 22 | 23 | file = open(flows_file, "r") 24 | for line in file: 25 | flows.append(line.strip(" ")[1]) 26 | file.close() 27 | 28 | 29 | model = Model('Max Flow/Min Cost') 30 | 31 | # Node Names x[0], x[1], x[2], x[3], x[4], x[5], x[6] to solve 32 | names = ["12", "13", "14", "23", "35", "45", "54"] 33 | # costs (c) 34 | costs = [2, 4, 9, 3, 1, 3, 2] 35 | # capacities (u) 36 | capacities = [10, 90, 90, 90, 80, 90, 90] 37 | # flows (b) 38 | flows = [50, 40, 0,-30,-60] 39 | #x 40 | x = model.continuous_var_list(7, name="x") 41 | 42 | objective = sum(costs[i]*x[i] for i in range(7)) 43 | model.set_objective("min", objective) 44 | 45 | # constraints 46 | 47 | model.add_constraint(0 <= x[0]) 48 | model.add_constraint(x[0] <= capacities[0]) 49 | model.add_constraint(0 <= x[1]) 50 | model.add_constraint(x[1] <= capacities[1]) 51 | model.add_constraint(0 <= x[2]) 52 | model.add_constraint(x[2] <= capacities[2]) 53 | model.add_constraint(0 <= x[3]) 54 | model.add_constraint(x[3] <= capacities[3]) 55 | model.add_constraint(0 <= x[4]) 56 | model.add_constraint(x[4] <= capacities[4]) 57 | model.add_constraint(0 <= x[5]) 58 | model.add_constraint(x[5] <= capacities[5]) 59 | model.add_constraint(0 <= x[6]) 60 | model.add_constraint(x[6] <= capacities[6]) 61 | model.add_constraint((x[0] + x[1] + x[2]) == flows[0]) 62 | model.add_constraint((-x[0] + x[3]) == flows[1]) 63 | model.add_constraint((-x[1] + -x[3] + x[4]) == flows[2]) 64 | model.add_constraint((-x[2] + x[5] - x[6]) == flows[3]) 65 | model.add_constraint((-x[4] - x[5] + x[6]) == flows[4]) 66 | 67 | model.print_information() 68 | solution = model.solve() 69 | 70 | model.print_solution() 71 | 72 | objective = solution.get_objective_value() 73 | x = solution.get_value_list(x) 74 | 75 | return(x, objective) 76 | 77 | def _main(): 78 | 79 | minimum_cost_flow_problem("costs.txt", "capacities.txt", "flows.txt") 80 | 81 | 82 | if __name__ == "__main__": 83 | _main() -------------------------------------------------------------------------------- /CPLEX MinCostMaxFlow/flows.txt: -------------------------------------------------------------------------------- 1 | 1 50 2 | 2 40 3 | 3 0 4 | 4 -30 5 | 5 -60 6 | -------------------------------------------------------------------------------- /Huffman_Encoding/demofile.txt: -------------------------------------------------------------------------------- 1 | Data compression is a reduction in the number of bits needed to represent data. Compressing data can save storage capacity, speed up file transfer, and decrease costs for storage hardware and network bandwidth. 2 | 3 | How compression works 4 | Compression is performed by a program that uses a formula or algorithm to determine how to shrink the size of the data. For instance, an algorithm may represent a string of bits -- or 0s and 1s -- with a smaller string of 0s and 1s by using a dictionary for the conversion between them, or the formula may insert a reference or pointer to a string of 0s and 1s that the program has already seen. 5 | 6 | Text compression can be as simple as removing all unneeded characters, inserting a single repeat character to indicate a string of repeated characters and substituting a smaller bit string for a frequently occurring bit string. Data compression can reduce a text file to 50% or a significantly higher percentage of its original size. 7 | 8 | For data transmission, compression can be performed on the data content or on the entire transmission unit, including header data. When information is sent or received via the internet, larger files, either singly or with others as part of an archive file, may be transmitted in a ZIP, GZIP or other compressed format. 9 | 10 | Why is data compression important? 11 | Data compression can dramatically decrease the amount of storage a file takes up. For example, in a 2:1 compression ratio, a 20 megabyte (MB) file takes up 10 MB of space. As a result of compression, administrators spend less money and less time on storage. 12 | 13 | Compression optimizes backup storage performance and has recently shown up in primary storage data reduction. Compression will be an important method of data reduction as data continues to grow exponentially. 14 | 15 | Virtually any type of file can be compressed, but it's important to follow best practices when choosing which ones to compress. For example, some files may already come compressed, so compressing those files would not have a significant impact. 16 | 17 | Data compression methods: lossless and lossy compression 18 | Compressing data can be a lossless or lossy process. Lossless compression enables the restoration of a file to its original state, without the loss of a single bit of data, when the file is uncompressed. Lossless compression is the typical approach with executables, as well as text and spreadsheet files, where the loss of words or numbers would change the information. 19 | 20 | Lossy compression permanently eliminates bits of data that are redundant, unimportant or imperceptible. Lossy compression is useful with graphics, audio, video and images, where the removal of some data bits has little or no discernible effect on the representation of the content. 21 | 22 | Graphics image compression can be lossy or lossless. Graphic image file formats are typically designed to compress information since the files tend to be large. JPEG is an image file format that supports lossy image compression. Formats such as GIF and PNG use lossless compression. 23 | 24 | Compression vs. data deduplication 25 | Compression is often compared to data deduplication, but the two techniques operate differently. Deduplication is a type of compression that looks for redundant chunks of data across a storage or file system and then replaces each duplicate chunk with a pointer to the original. Data compression algorithms reduce the size of the bit strings in a data stream that is far smaller in scope and generally remembers no more than the last megabyte or less of data. 26 | 27 | OK 28 | Close Modal Dialog 29 | Taneja Group analyst Mike Matchett discussed the benefits of compression and deduplication and how the two differ. 30 | 31 | File-level deduplication eliminates redundant files and replaces them with stubs pointing to the original file. Block-level deduplication identifies duplicate data at the subfile level. The system saves unique instances of each block, uses a hash algorithm to process them and generates a unique identifier to store them in an index. Deduplication typically looks for larger chunks of duplicate data than compression, and systems can deduplicate using a fixed or variable-sized chunk. 32 | 33 | Deduplication is most effective in environments that have a high degree of redundant data, such as virtual desktop infrastructure or storage backup systems. Data compression tends to be more effective than deduplication in reducing the size of unique information, such as images, audio, videos, databases and executable files. Many storage systems support both compression and deduplication. 34 | 35 | Data compression and backup 36 | Compression is often used for data that's not accessed much, as the process can be intensive and slow down systems. Administrators, though, can seamlessly integrate compression in their backup systems. 37 | 38 | Backup is a redundant type of workload, as the process captures the same files frequently. An organization that performs full backups will often have close to the same data from backup to backup. 39 | 40 | There are major benefits to compressing data prior to backup: 41 | 42 | Data takes up less space, as a compression ratio can reach 100:1, but between 2:1 and 5:1 is common. 43 | If compression is done in a server prior to transmission, the time needed to transmit the data and the total network bandwidth are drastically reduced. 44 | On tape, the compressed, smaller file system image can be scanned faster to reach a particular file, reducing restore latency. 45 | Compression is supported by backup software and tape libraries, so there is a choice of data compression techniques. 46 | Pros and cons of compression 47 | The main advantages of compression are a reduction in storage hardware, data transmission time and communication bandwidth -- and the resulting cost savings. A compressed file requires less storage capacity than an uncompressed file, and the use of compression can lead to a significant decrease in expenses for disk and/or solid-state drives. A compressed file also requires less time for transfer, and it consumes less network bandwidth than an uncompressed file. 48 | 49 | The main disadvantage of data compression is the performance impact resulting from the use of CPU and memory resources to compress the data and perform decompression. Many vendors have designed their systems to try to minimize the impact of the processor-intensive calculations associated with compression. If the compression runs inline, before the data is written to disk, the system may offload compression to preserve system resources. For instance, IBM uses a separate hardware acceleration card to handle compression with some of its enterprise storage systems. 50 | 51 | If data is compressed after it is written to disk, or post-process, the compression may run in the background to reduce the performance impact. Although post-process compression can reduce the response time for each input/output (I/O), it still consumes memory and processor cycles and can affect the overall number of I/Os a storage system can handle. Also, because data initially must be written to disk or flash drives in an uncompressed form, the physical storage savings are not as great as they are with inline compression. 52 | 53 | Data compression techniques: File system compression 54 | File system compression takes a fairly straightforward approach to reducing the storage footprint of data by transparently compressing each file as it is written. 55 | 56 | Many of the popular Linux file systems -- including Reiser4, ZFS and btrfs -- and Microsoft NTFS have a compression option. The server compresses chunks of data in a file and then writes the smaller fragments to storage. 57 | 58 | Read-back involves a relatively small latency to expand each fragment, while writing adds substantial load to the server, so compression is usually not recommended for data that is volatile. File system compression can weaken performance, so it should be deployed selectively on files that are not accessed frequently. 59 | 60 | Historically, with the expensive hard drives of early computers, data compression software, such as DiskDoubler and SuperStor Pro, were popular and helped establish mainstream file system compression. 61 | 62 | Storage administrators can also apply the technique of using compression and deduplication for improved data reduction. 63 | 64 | Technologies and products that use data compression 65 | Compression is built into a wide range of technologies, including storage systems, databases, operating systems and software applications used by businesses and enterprise organizations. Compressing data is also common in consumer devices, such as laptops, PCs and mobile phones. 66 | 67 | Many systems and devices perform compression transparently, but some give users the option to turn compression on or off. It can be performed more than once on the same file or piece of data, but subsequent compressions result in little to no additional compression and may even increase the size of the file to a slight degree, depending on the data compression algorithms. 68 | 69 | WinZip is a popular Windows program that compresses files when it packages them in an archive. Archive file formats that support compression include ZIP and RAR. The BZIP2 and GZIP formats see widespread use for compressing individual files. 70 | 71 | Other vendors that offer compression include Dell EMC with its XtremIO all-flash array, Kaminario with its K2 all-flash array and RainStor with its data compression software. 72 | 73 | Data differencing 74 | Data differencing is a general term for comparing the contents of two data objects. In the context of compression, it involves repetitively searching through the target file to find similar blocks and replacing them with a reference to a library object. This process repeats until it finds no additional duplicate objects. Data differencing can result in many compressed files with just one element in the library representing each duplicated object. 75 | 76 | In virtual desktops, this technique can feature a compression ratio of as much as 100:1. The process is often more closely aligned with deduplication, which looks for identical files or objects, rather than within the content of each object. 77 | 78 | Data differencing is sometimes referred to as deduplication. -------------------------------------------------------------------------------- /Huffman_Encoding/huffman.py: -------------------------------------------------------------------------------- 1 | # A Huffman Tree Node 2 | class Node: 3 | def __init__(self, prob, symbol, left=None, right=None): 4 | # probability of symbol 5 | self.prob = prob 6 | 7 | # symbol 8 | self.symbol = symbol 9 | 10 | # left node 11 | self.left = left 12 | 13 | # right node 14 | self.right = right 15 | 16 | # tree direction (0/1) 17 | self.code = '' 18 | 19 | """ A helper function to print the codes of symbols by traveling Huffman Tree""" 20 | codes = dict() 21 | 22 | def Calculate_Codes(node, val=''): 23 | # huffman code for current node 24 | newVal = val + str(node.code) 25 | 26 | if(node.left): 27 | Calculate_Codes(node.left, newVal) 28 | if(node.right): 29 | Calculate_Codes(node.right, newVal) 30 | 31 | if(not node.left and not node.right): 32 | codes[node.symbol] = newVal 33 | 34 | return codes 35 | 36 | """ A helper function to calculate the probabilities of symbols in given data""" 37 | def Calculate_Probability(data): 38 | symbols = dict() 39 | for element in data: 40 | if symbols.get(element) == None: 41 | symbols[element] = 1 42 | else: 43 | symbols[element] += 1 44 | return symbols 45 | 46 | """ A helper function to obtain the encoded output""" 47 | def Output_Encoded(data, coding): 48 | encoding_output = [] 49 | for c in data: 50 | # print(coding[c], end = '') 51 | encoding_output.append(coding[c]) 52 | 53 | string = ''.join([str(item) for item in encoding_output]) 54 | return string 55 | 56 | """ A helper function to calculate the space difference between compressed and non compressed data""" 57 | def Total_Gain(data, coding): 58 | before_compression = len(data) * 8 # total bit space to stor the data before compression 59 | after_compression = 0 60 | symbols = coding.keys() 61 | for symbol in symbols: 62 | count = data.count(symbol) 63 | after_compression += count * len(coding[symbol]) #calculate how many bit is required for that symbol in total 64 | print("Space usage before compression (in bits):", before_compression) 65 | print("Space usage after compression (in bits):", after_compression) 66 | 67 | def Huffman_Encoding(data): 68 | symbol_with_probs = Calculate_Probability(data) 69 | symbols = symbol_with_probs.keys() 70 | probabilities = symbol_with_probs.values() 71 | print("symbols: ", symbols) 72 | print("probabilities: ", probabilities) 73 | 74 | nodes = [] 75 | 76 | # converting symbols and probabilities into huffman tree nodes 77 | for symbol in symbols: 78 | nodes.append(Node(symbol_with_probs.get(symbol), symbol)) 79 | 80 | while len(nodes) > 1: 81 | # sort all the nodes in ascending order based on their probability 82 | nodes = sorted(nodes, key=lambda x: x.prob) 83 | # for node in nodes: 84 | # print(node.symbol, node.prob) 85 | 86 | # pick 2 smallest nodes 87 | right = nodes[0] 88 | left = nodes[1] 89 | 90 | left.code = 0 91 | right.code = 1 92 | 93 | # combine the 2 smallest nodes to create new node 94 | newNode = Node(left.prob+right.prob, left.symbol+right.symbol, left, right) 95 | 96 | nodes.remove(left) 97 | nodes.remove(right) 98 | nodes.append(newNode) 99 | 100 | huffman_encoding = Calculate_Codes(nodes[0]) 101 | print("symbols with codes", huffman_encoding) 102 | Total_Gain(data, huffman_encoding) 103 | encoded_output = Output_Encoded(data,huffman_encoding) 104 | return encoded_output, nodes[0] 105 | 106 | 107 | def Huffman_Decoding(encoded_data, huffman_tree): 108 | tree_head = huffman_tree 109 | decoded_output = [] 110 | for x in encoded_data: 111 | if x == '1': 112 | huffman_tree = huffman_tree.right 113 | elif x == '0': 114 | huffman_tree = huffman_tree.left 115 | try: 116 | if huffman_tree.left.symbol == None and huffman_tree.right.symbol == None: 117 | pass 118 | except AttributeError: 119 | decoded_output.append(huffman_tree.symbol) 120 | huffman_tree = tree_head 121 | 122 | string = ''.join([str(item) for item in decoded_output]) 123 | return string 124 | 125 | 126 | """ First Test """ 127 | data = "AAAAAAABCCCCCCDDEEEEE" 128 | print(data) 129 | encoding, tree = Huffman_Encoding(data) 130 | print("Encoded output", encoding) 131 | print("Decoded Output", Huffman_Decoding(encoding,tree)) 132 | 133 | 134 | """ Second Test """ 135 | 136 | # f = open("demofile.txt", "r") 137 | 138 | # data = f.read() 139 | # print(data) 140 | # Huffman_Encoding(data) 141 | 142 | 143 | -------------------------------------------------------------------------------- /Huffman_Encoding/init.txt: -------------------------------------------------------------------------------- 1 | This repo contains a Huffman Encoding implementation. 2 | 3 | huffman.py : Main code including helper functions and using 3 different test cases 4 | 5 | demofile.txt : Data for 3. test 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-Structures-and-Algorithms-with-Python 2 | This repo contains various data structures and algorithms problems solved using Python. To reach more detailed explinications, you can see my blog https://medium.com/data-structures-and-algorithms-with-python !! 3 | -------------------------------------------------------------------------------- /Sparse_Matrix/SparseMatrix_med.py: -------------------------------------------------------------------------------- 1 | from linkedlist_med import LinkedList 2 | 3 | class MatrixEntry: 4 | def __init__(self, column_number, value): 5 | self.column_number = column_number 6 | self.value = value 7 | 8 | 9 | class SparseMatrix(): 10 | def __init__(self, nrows, ncols, default_value): 11 | self.nrows = nrows 12 | self.ncols = ncols 13 | self.default_value = default_value 14 | self.top_list = list() 15 | for i in range(nrows): 16 | self.top_list.append(LinkedList()) 17 | 18 | def set(self, row, col, value): 19 | for i, d in enumerate(self.top_list[row]): 20 | if d.column_number == col : 21 | if value == self.default_value: 22 | self.top_list[row].remove(self.top_list[row].__getitem__(i)) 23 | return 24 | else: 25 | self.top_list[row].__getitem__(i).value = value 26 | return 27 | new_entry = MatrixEntry(col, value) 28 | self.check_row(row) 29 | self.check_row(col) 30 | self.top_list[row].add_to_head(new_entry) 31 | 32 | def get(self, row, col): 33 | self.check_row(row) 34 | self.check_row(col) 35 | for i, d in enumerate(self.top_list[row]): 36 | if d.column_number == col : 37 | matrix_entry = self.top_list[row].__getitem__(i) 38 | return matrix_entry.value 39 | return self.default_value 40 | 41 | def clear(self): 42 | for k in self.top_list: 43 | for i in k: 44 | self.top_list[k].remove(i) 45 | 46 | def check_row(self,row): 47 | if type(row) is not int: 48 | raise TypeError("row should be int") 49 | if row < 0 or row >= self.nrows: 50 | raise IndexError("row number is invalid") 51 | 52 | def check_col(self,col): 53 | if type(col) is not int: 54 | raise TypeError("col should be int") 55 | if col < 0 or col >= self.ncols: 56 | raise IndexError("col number is invalid") 57 | 58 | 59 | def get_row(self, row): 60 | self.check_row(row) 61 | cur_col = 0 62 | for i, ll in enumerate(self.top_list): 63 | if i == row: 64 | while cur_col < self.ncols: 65 | yield self.get(row,cur_col) 66 | cur_col += 1 67 | break 68 | 69 | def get_col(self, col): 70 | self.check_col(col) 71 | for i, ll in enumerate(self.top_list): 72 | yield self.get(i,col) 73 | 74 | def __str__(self, starting_row=0, starting_col=0, nrows=None, ncols=None): 75 | self.check_col(starting_col) 76 | self.check_row(starting_row) 77 | if nrows == None: 78 | nrows = self.nrows 79 | if ncols == None: 80 | ncols = self.ncols 81 | """We should be sure if start points + row-col size doesnt exceed the matrix size """ 82 | self.check_col(starting_col+ncols-1) 83 | self.check_row(starting_row+nrows-1) 84 | for i, ll in enumerate(self.top_list): 85 | if i >= starting_row and i < starting_row+nrows: 86 | k = 0 87 | while starting_col+k < starting_col+ncols: 88 | cur_col = starting_col+k 89 | print(self.get(i,cur_col)," ", end = '') 90 | k += 1 91 | print(" ") 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /Sparse_Matrix/SparseMatrix_test_med.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import TestCase 3 | from SparseMatrix_med import SparseMatrix 4 | import SparseMatrix_med 5 | 6 | class Test(TestCase): 7 | def test(self): 8 | Sparse_Matrix = SparseMatrix(5,5,0) #5x5 sparse matrix with default value 0 9 | self.assertEqual(Sparse_Matrix.get(0,0), 0) 10 | Sparse_Matrix.set(0,1,3) 11 | self.assertEqual(Sparse_Matrix.get(0,1), 3) 12 | Sparse_Matrix.set(0,1,4) 13 | self.assertEqual(Sparse_Matrix.get(0,1), 4) 14 | with self.assertRaises(IndexError) : Sparse_Matrix.get(5,6) 15 | with self.assertRaises(IndexError) : Sparse_Matrix.set(5,6,3) 16 | with self.assertRaises(IndexError) : Sparse_Matrix.set(6,5,3) 17 | with self.assertRaises(TypeError) : Sparse_Matrix.set(2.3,1,3) 18 | with self.assertRaises(TypeError) : Sparse_Matrix.set(1,4.5,3) 19 | """ 20 | After adding different values at the same (rox,col) 21 | we see the length of that LinkedList is 1. So its true, we remove the node and then add the new one 22 | """ 23 | self.assertEqual(len(Sparse_Matrix.top_list[0]), 1) 24 | """ 25 | On the other hand, if we add replace the node with default value, the s 26 | the size of LL decrease, so we dont keep a Matrix Entry having default value 27 | """ 28 | Sparse_Matrix.set(0,1,0) 29 | self.assertEqual(len(Sparse_Matrix.top_list[0]), 0) 30 | Sparse_Matrix.clear() 31 | """ After clear() function, the length of all LinkedLists in top_list 32 | is 0 and all the values are 0 (default valus) 33 | """ 34 | for i in Sparse_Matrix.top_list: 35 | for k in i: 36 | self.assertEqual(k,0) 37 | self.assertEqual(len(i),0) 38 | """ get_row testing """ 39 | Sparse_Matrix.set(0,2,4) 40 | Sparse_Matrix.set(0,4,2) 41 | Sparse_Matrix.set(1,2,23) 42 | Sparse_Matrix.set(3,3,1) 43 | Sparse_Matrix.set(3,4,1) 44 | gen1 = Sparse_Matrix.get_row(2.3) 45 | gen2 = Sparse_Matrix.get_row(7) 46 | with self.assertRaises(TypeError) : print(next(gen1)) 47 | with self.assertRaises(IndexError) : print(next(gen2)) 48 | gen1 = Sparse_Matrix.get_row(0) 49 | gen2 = Sparse_Matrix.get_row(3) 50 | list1 = [] 51 | try: 52 | while True: 53 | list1.append(next(gen1)) 54 | except StopIteration: 55 | pass 56 | self.assertEqual(list1, [0,0,4,0,2]) 57 | list2 = [] 58 | try: 59 | while True: 60 | list2.append(next(gen2)) 61 | except StopIteration: 62 | pass 63 | self.assertEqual(list2, [0,0,0,1,1]) 64 | 65 | """ get_col testing """ 66 | gen1 = Sparse_Matrix.get_col(2.3) 67 | gen2 = Sparse_Matrix.get_col(7) 68 | with self.assertRaises(TypeError) : print(next(gen1)) 69 | with self.assertRaises(IndexError) : print(next(gen2)) 70 | gen1 = Sparse_Matrix.get_col(2) 71 | gen2 = Sparse_Matrix.get_col(4) 72 | list1 = [] 73 | try: 74 | while True: 75 | list1.append(next(gen1)) 76 | except StopIteration: 77 | pass 78 | self.assertEqual(list1, [4,23,0,0,0]) 79 | list2 = [] 80 | try: 81 | while True: 82 | list2.append(next(gen2)) 83 | except StopIteration: 84 | pass 85 | self.assertEqual(list2, [2,0,0,1,0]) 86 | 87 | """__str__ testing """ 88 | Sparse_Matrix.set(2,1,5) 89 | Sparse_Matrix.set(4,0,8) 90 | Sparse_Matrix.set(4,4,15) 91 | 92 | print("__str__() output") 93 | Sparse_Matrix.__str__() 94 | expected_str = str('0 0 4 0 2' + "\n" + '0 0 23 0 0' + "\n" + '0 5 0 0 0' + "\n" + '0 0 0 1 1' + "\n" + '8 0 0 0 15') 95 | print("expected output") 96 | print(expected_str) 97 | 98 | print("__str__() output") 99 | Sparse_Matrix.__str__(2,3,2,2) 100 | expected_str = str('0 0' + "\n" + '1 1') 101 | print("expected output") 102 | print(expected_str) 103 | 104 | print("__str__() output") 105 | Sparse_Matrix.__str__(2,3,2,2) 106 | expected_str = str('0 0' + "\n" + '1 1') 107 | print("expected output") 108 | print(expected_str) 109 | 110 | print("__str__() output") 111 | Sparse_Matrix.__str__(1,2,1,1) 112 | expected_str = str('23') 113 | print("expected output") 114 | print(expected_str) 115 | 116 | with self.assertRaises(IndexError) : Sparse_Matrix.__str__(1,2,6,3) 117 | 118 | if __name__=='__main__': 119 | unittest.main() 120 | 121 | -------------------------------------------------------------------------------- /Sparse_Matrix/basic_usages.py: -------------------------------------------------------------------------------- 1 | """ Basic Tests to use with main code (Just Copy and Paste it under the "SparseMatrix_med.py if you want to try!) """ 2 | 3 | Sparse_Matrix = SparseMatrix(5,5,0) # 5x5 sparse matrix with default value 0 4 | Sparse_Matrix.set(0,1,3) 5 | Sparse_Matrix.set(0,1,4) # check if the value is updated for a column having already a non default value 6 | Sparse_Matrix.get(0,1) 7 | 8 | 9 | Sparse_Matrix = SparseMatrix(5,5,0) # 5x5 sparse matrix with default value 0 10 | Sparse_Matrix.set(0,1,4) 11 | Sparse_Matrix.set(0,2,4) 12 | Sparse_Matrix.set(0,4,2) 13 | Sparse_Matrix.set(1,2,23) 14 | Sparse_Matrix.set(3,3,1) 15 | Sparse_Matrix.set(3,4,1) 16 | 17 | print("The Whole Sparse Matrix") 18 | Sparse_Matrix.__str__() # see the Sparse Matrix 19 | 20 | print("A spesific part of the Sparse Matrix") 21 | Sparse_Matrix.__str__(2,3,2,2) # see the Sparse Matrix 22 | 23 | 24 | gen1 = Sparse_Matrix.get_row(0) 25 | 26 | list1 = [] 27 | try: 28 | while True: 29 | list1.append(next(gen1)) 30 | except StopIteration: 31 | pass 32 | 33 | print("row 0") 34 | print(list1) 35 | 36 | gen2 = Sparse_Matrix.get_row(3) 37 | list2 = [] 38 | try: 39 | while True: 40 | list2.append(next(gen2)) 41 | except StopIteration: 42 | pass 43 | 44 | print("row 3") 45 | print(list2) 46 | 47 | 48 | gen1 = Sparse_Matrix.get_col(2) 49 | gen2 = Sparse_Matrix.get_col(4) 50 | list1 = [] 51 | try: 52 | while True: 53 | list1.append(next(gen1)) 54 | except StopIteration: 55 | pass 56 | 57 | list2 = [] 58 | try: 59 | while True: 60 | list2.append(next(gen2)) 61 | except StopIteration: 62 | pass 63 | 64 | print("column 0") 65 | print(list1) 66 | 67 | print("column 4") 68 | print(list2) -------------------------------------------------------------------------------- /Sparse_Matrix/linkedlist_med.py: -------------------------------------------------------------------------------- 1 | class LinkedListNode: 2 | def __init__(self, data): 3 | self._data = data 4 | self._next = None 5 | 6 | @property 7 | def data(self): 8 | return self._data 9 | 10 | @data.setter 11 | def data(self, new_data): 12 | self._data = new_data 13 | 14 | @property 15 | def next(self): 16 | return self._next 17 | 18 | @next.setter 19 | def next(self, new_next): 20 | self._next = new_next 21 | 22 | def __str__(self): 23 | return f"{self._data}" 24 | 25 | def __repr__(self): 26 | return f"data={self._data}, next={id(self._next)}" 27 | 28 | 29 | class LinkedList: 30 | def __init__(self, iterable=None): 31 | self._head = None 32 | self._size = 0 33 | # Added 4/5/21: made it easier to initialize the list 34 | if iterable is not None: 35 | for i in reversed(iterable): 36 | self.add_to_head(i) 37 | 38 | @property 39 | def size(self): 40 | return self._size 41 | 42 | def __len__(self): 43 | return self.size 44 | 45 | def add_to_head(self, data): 46 | node = LinkedListNode(data) 47 | node.next = self._head 48 | self._head = node 49 | self._size += 1 50 | 51 | def __iter__(self): 52 | curr = self._head 53 | while curr: 54 | yield curr.data 55 | curr = curr.next 56 | 57 | def __str__(self): 58 | return " ".join([str(d) for d in self]) 59 | 60 | def find(self, data): 61 | """ 62 | :param data: the data to look for in the list 63 | :return: data if it exists in the list 64 | :raise: KeyError if data is not in the list 65 | """ 66 | for d in self: 67 | if d == data: 68 | return d 69 | raise KeyError 70 | 71 | def __contains__(self, data): 72 | try: 73 | self.find(data) 74 | return True 75 | except KeyError: 76 | return False 77 | 78 | def __getitem__(self, index): 79 | self.validate_index(index) 80 | 81 | for i, d in enumerate(self): 82 | if i == index: 83 | return d 84 | 85 | def validate_index(self, index): 86 | if type(index) is not int: 87 | raise TypeError("index should be int") 88 | 89 | if index < 0 or index >= self.size: 90 | raise ValueError(f"index {index} is invalid") 91 | 92 | def remove(self, data): 93 | prev, curr = None, self._head 94 | while curr: 95 | if data == curr.data: 96 | if prev: 97 | prev.next = curr.next 98 | else: 99 | self._head = curr.next 100 | self._size -= 1 101 | return 102 | prev, curr = curr, curr.next 103 | raise KeyError 104 | -------------------------------------------------------------------------------- /SubsetSum/init.txt: -------------------------------------------------------------------------------- 1 | This repo contains an example implementation of Subset Sum Problem. 2 | 3 | subsetsum_med.py: Main code which includes the implementation 4 | 5 | subsetsum_test_med.py: That file includes a unit test implementation to check the correctness of the implemented functions 6 | and some additional tests for working with Itunes contenst 7 | 8 | itunes.py : a source code to read and use itunes entries given by itunes_file.txt This part is not implemented by me! 9 | 10 | itunes_file.txt : includes a song list with the song names and duration in seconds 11 | 12 | For more detailed explinication, you can see my blog about this algorithm https://medium.com/data-structures-and-algorithms-with-python/subset-sum-problem-implementation-with-python-504f17f6bf3a 13 | -------------------------------------------------------------------------------- /SubsetSum/itunes.py: -------------------------------------------------------------------------------- 1 | """ 2 | iTunes reader. 3 | Authored by Prof. Eric Reed. 4 | Modified by Zibin Yang. 5 | """ 6 | 7 | from enum import Enum 8 | from functools import total_ordering 9 | 10 | 11 | # zb: removed __gt__() etc, added @total_ordering 12 | @total_ordering 13 | class iTunesEntry: 14 | # zb: sort criteria determined by a class attribute is not a clean way 15 | # for a number of reasons (it cannot support concurrent sorting, and 16 | # changes the behavior of __eq__(), among other things), and there are 17 | # better ways to do this. So for now, we'll only support sorting by time. 18 | class Sort(Enum): 19 | # TITLE = 0 20 | # ARTIST = 1 21 | TIME = 2 22 | 23 | sort_by = Sort.TIME 24 | 25 | def __init__(self, artist, title, run_time): 26 | self._artist = artist 27 | self._title = title 28 | self._run_time = run_time 29 | 30 | """ MODIFIED FOR ASSIGNMENT #1 """ 31 | def __add__(self,x): 32 | return self._run_time + x 33 | 34 | def __radd__(self, other): 35 | if other == 0: 36 | return self 37 | else: 38 | return self.__add__(other) 39 | 40 | def __eq__(self, other): 41 | return (self.run_time == other) 42 | 43 | 44 | 45 | def __lt__(self, other): 46 | if self.sort_by is iTunesEntry.Sort.TIME: 47 | return self.run_time < other 48 | raise NotImplementedError 49 | 50 | @property 51 | def title(self): 52 | return self._title 53 | 54 | @property 55 | def artist(self): 56 | return self._artist 57 | 58 | @property 59 | def run_time(self): 60 | return self._run_time 61 | 62 | # zb: these two methods make the class hashable so that it can be used in, 63 | # say, set 64 | def __hash__(self): 65 | return hash((self.artist, self.title, self.run_time)) 66 | 67 | 68 | def __str__(self): 69 | return (self.artist + " -> " + self.title + ": " 70 | + iTunesEntry.convert_time_to_string(self.run_time)) 71 | 72 | @staticmethod 73 | def convert_time_to_string(tune_time): 74 | 75 | minutes = str(tune_time // 60) 76 | seconds = str(tune_time % 60) 77 | 78 | if len(seconds) < 2: 79 | seconds = "0" + seconds 80 | return minutes + ":" + seconds 81 | 82 | @classmethod 83 | def set_sort_type(cls, sort_type: Sort): 84 | if sort_type in iTunesEntry.Sort: 85 | cls.sort_by = sort_type 86 | else: 87 | raise ValueError 88 | 89 | 90 | class iTunesEntryReader: 91 | 92 | def __init__(self, filename): 93 | self._tunes = [] 94 | 95 | # zb: making sure fh is closed properly 96 | with open(filename, "r") as fh: 97 | while True: 98 | line = fh.readline() 99 | if iTunesEntryReader._is_data_line(line): 100 | self._tunes.append(iTunesEntry(*self._read_one_entry(fh))) 101 | elif line == "": 102 | break 103 | 104 | @staticmethod 105 | def _is_data_line(line): 106 | if len(line) < 1: 107 | return False 108 | if line[0] == "#": 109 | return True 110 | return False 111 | 112 | def _read_one_entry(self, fh): 113 | """ 114 | reads 3 lines from the input stream, for example 115 | 116 | Eric Clapton 117 | Pretending 118 | 283 119 | 120 | strip newline from each item 121 | """ 122 | 123 | artist = fh.readline()[:-1] 124 | title = fh.readline()[:-1] 125 | run_time = int(fh.readline()[:-1]) 126 | return artist, title, run_time 127 | 128 | def __iter__(self): 129 | self._pos = 0 130 | return self 131 | 132 | def __next__(self): 133 | if self._pos < len(self._tunes): 134 | self._pos += 1 135 | return self._tunes[self._pos - 1] 136 | else: 137 | raise StopIteration 138 | 139 | def __getitem__(self, item): 140 | if item >= len(self._tunes): 141 | raise IndexError 142 | else: 143 | return self._tunes[item] 144 | 145 | def __setitem__(self, key, value): 146 | if key >= len(self._tunes): 147 | self._tunes.extend(key + 1) 148 | self._tunes[key] = value 149 | 150 | def __len__(self): 151 | # zb: use @property num_tunes 152 | return self.num_tunes 153 | 154 | @property 155 | def num_tunes(self): 156 | return len(self._tunes) 157 | 158 | def insert_one_item(self, location, entry): 159 | self._tunes.insert(location, entry) 160 | 161 | 162 | 163 | 164 | if __name__ == '__main__': 165 | # Sample usage 166 | itunes = iTunesEntryReader("itunes_file.txt") 167 | for tune in itunes: 168 | print(tune) 169 | print(sum(itunes)) 170 | -------------------------------------------------------------------------------- /SubsetSum/itunes_file.txt: -------------------------------------------------------------------------------- 1 | # 2 | Carrie Underwood 3 | Cowboy Casanova 4 | 236 5 | 6 | # 7 | Carrie Underwood 8 | Quitter 9 | 220 10 | 11 | # 12 | Rihanna 13 | Russian Roulette 14 | 228 15 | 16 | # 17 | Foo Fighters 18 | All My Life 19 | 263 20 | 21 | # 22 | Foo Fighters 23 | Monkey Wrench 24 | 230 25 | 26 | # 27 | Eric Clapton 28 | Pretending 29 | 283 30 | 31 | # 32 | Eric Clapton 33 | Bad Love 34 | 308 35 | 36 | # 37 | Howlin' Wolf 38 | Everybody's In The Mood 39 | 178 40 | 41 | # 42 | Howlin' Wolf 43 | Well That's All Right 44 | 175 45 | 46 | # 47 | Reverend Gary Davis 48 | Samson and Delilah 49 | 216 50 | 51 | # 52 | Reverend Gary Davis 53 | Twelve Sticks 54 | 194 55 | 56 | # 57 | Roy Buchanan 58 | Hot Cha 59 | 208 60 | 61 | # 62 | Roy Buchanan 63 | Green Onions 64 | 443 65 | 66 | # 67 | Janiva Magness 68 | I'm Just a Prisoner 69 | 230 70 | 71 | # 72 | Janiva Magness 73 | You Were Never Mine 74 | 276 75 | 76 | # 77 | John Lee Hooker 78 | Hobo Blues 79 | 187 80 | 81 | # 82 | John Lee Hooker 83 | I Can't Quit You Baby 84 | 182 85 | 86 | # 87 | Snoop Dogg 88 | That's The Homie 89 | 343 90 | 91 | # 92 | Snoop Dogg 93 | Gangsta Luv 94 | 257 95 | 96 | # 97 | The Rubyz 98 | Ladies and Gentleman 99 | 201 100 | 101 | # 102 | The Rubyz 103 | Watch the Girl 104 | 192 105 | 106 | # 107 | Veggie Tales 108 | Donuts for Benny 109 | 184 110 | 111 | # 112 | Veggie Tales 113 | Our Big Break 114 | 69 115 | 116 | # 117 | Berliner Philharmoniker 118 | Brahms: Symphony No. 1 in C Minor Op. 68 119 | 839 120 | 121 | # 122 | Berliner Philharmoniker 123 | Brahms: Symphony No. 4 in E Minor Op. 98 124 | 800 125 | 126 | # 127 | Yo-yo Ma 128 | Bach: Suite for Cello No. 1 in G Major Prelude 129 | 141 130 | 131 | # 132 | Yo-yo Ma 133 | Simple Gifts 134 | 154 135 | 136 | # 137 | Ry Cooter 138 | Alimony 139 | 175 140 | 141 | # 142 | Ry Cooter 143 | France Chance 144 | 168 145 | 146 | # 147 | Aaron Watson 148 | The Road 149 | 204 150 | 151 | # 152 | Terra Incognita 153 | Clone 154 | 298 155 | 156 | # 157 | Terra Incogni 158 | Lizard Skin 159 | 270 160 | 161 | # 162 | Blue Record 163 | Bullhead's Psalm 164 | 79 165 | 166 | # 167 | Blue Record 168 | Ogeechee Hymnal 169 | 155 170 | 171 | # 172 | Mastadon 173 | Oblivion 174 | 348 175 | 176 | # 177 | Mastadon 178 | The Bit 179 | 295 180 | 181 | # 182 | Sean Kingston 183 | Fire Burning 184 | 239 185 | 186 | # 187 | Sean Kingston 188 | My Girlfriend 189 | 204 190 | 191 | # 192 | T-Pain 193 | Take Your Shirt Off 194 | 228 195 | 196 | # 197 | Lil Jon 198 | Give It All U Got 199 | 218 200 | 201 | # 202 | Jay-Z 203 | What We Talkin' About 204 | 243 205 | 206 | # 207 | Jay-Z 208 | Empire State of Mind 209 | 276 210 | 211 | # 212 | Snoop Dog 213 | Think About It 214 | 217 215 | 216 | # 217 | Snoop Dog 218 | Lil' Crips 219 | 195 220 | 221 | # 222 | Jeff Golub 223 | Shuffleboard 224 | 210 225 | 226 | # 227 | Jeff Golub 228 | Goin' On 229 | 356 230 | 231 | # 232 | Jeff Golub 233 | Fish Fare 234 | 299 235 | 236 | # 237 | Caraivana 238 | Noites Cariocas 239 | 252 240 | 241 | # 242 | Caraivana 243 | Tico-Tico No Fuba 244 | 147 245 | 246 | # 247 | John Patitucci 248 | Monk/Trane 249 | 434 250 | 251 | # 252 | John Patitucci 253 | Sonny Side 254 | 445 255 | 256 | # 257 | Nina Simone 258 | Pirate Jenny 259 | 402 260 | 261 | # 262 | Nina Simone 263 | The Other Woman 264 | 186 265 | 266 | # 267 | Nina Simone 268 | Feeling Good 269 | 177 270 | 271 | # 272 | John Coltrane 273 | A Love Supreme Part 1 274 | 462 275 | 276 | # 277 | John Coltrane 278 | In a Sentimental Mood 279 | 256 280 | 281 | # 282 | AOL Dejando Huellas 283 | Dime Si te Vas Con El 284 | 204 285 | 286 | # 287 | AOL Dejando Huella 288 | Te Amo Tanto 289 | 192 290 | 291 | # 292 | McCoy Tyner 293 | Blues On the Corner 294 | 367 295 | 296 | # 297 | McCoy Tyner 298 | Afro Blue 299 | 742 300 | 301 | # 302 | Kanye West 303 | Stronger 304 | 311 305 | 306 | # 307 | Kanye West 308 | Good Life 309 | 207 310 | 311 | # 312 | Steely Dan 313 | Black Cow 314 | 310 315 | 316 | # 317 | Steely Dan 318 | Kid Charlemagne 319 | 278 320 | 321 | # 322 | Steely Dan 323 | Haitian Divorce 324 | 351 325 | 326 | # 327 | Herbie Hancock 328 | Nefertiti 329 | 451 330 | 331 | # 332 | Herbie Hancock 333 | Rockit 334 | 325 335 | 336 | # 337 | Herbie Hancock 338 | Chameleon 339 | 941 340 | 341 | # 342 | Return to Forever 343 | Medieval Overture 344 | 313 345 | 346 | # 347 | Suzanne Vega 348 | Luka 349 | 231 350 | 351 | # 352 | Suzanne Vega 353 | Small Blue Thing 354 | 235 355 | 356 | # 357 | Bonnie Raitt 358 | Something to Talk About 359 | 227 360 | 361 | # 362 | Bonnie Raitt 363 | I Can't Make You Love Me 364 | 331 365 | 366 | # 367 | Natalie Cole 368 | This Will Be 369 | 171 370 | 371 | # 372 | Natalie Cole 373 | Unforgettable 374 | 211 375 | 376 | # 377 | Jet 378 | Timothy 379 | 260 380 | 381 | # 382 | Jet 383 | Rip It Up 384 | 200 385 | 386 | # 387 | Was (Not Was) 388 | Where Did Your Heart Go? 389 | 347 390 | 391 | 392 | -------------------------------------------------------------------------------- /SubsetSum/subsetsum_med.py: -------------------------------------------------------------------------------- 1 | """ 2 | checkIfDuplicates : the function to check if base set containes duplicate elements 3 | 4 | best_match : during adding the subsets to Col, if the subset sum is bigger than best_match and 5 | smaller than target, we take this subset as the best_subset_added until now. 6 | When we come to the end of algorithm, its enough to return this "best_subset_added" subset 7 | as the result 8 | 9 | """ 10 | def checkIfDuplicates(listOfElems): 11 | ''' Check if given list contains any duplicates ''' 12 | if len(listOfElems) == len(set(listOfElems)): 13 | return False 14 | else: 15 | return True 16 | 17 | def subset_sum(s, target): 18 | if checkIfDuplicates(s): 19 | raise ValueError('The list contains duplicates please use set') 20 | if len(s) == 0 : 21 | raise ValueError('The set is empty, subset cant be calculated') 22 | elif len(s) == 1 and sum(s) > target: 23 | raise ValueError('Being 1 value list, its element is higher than target, subset cant be calculated') 24 | if target >= sum(s) : # no need to calculate, we return the whole set directly 25 | return s 26 | 27 | best_match = 0 28 | Col = {tuple()} 29 | for x in s: 30 | for L in Col.copy(): 31 | if L == (): 32 | subset_sum = x 33 | else: 34 | subset_sum = sum(L) + x 35 | if subset_sum == target: 36 | return L + tuple({x}) 37 | if subset_sum < target : 38 | if L == (): 39 | Col.add(tuple({x})) 40 | if subset_sum > best_match: 41 | best_match = subset_sum 42 | best_subset_added = tuple({x}) 43 | else: 44 | Col.add(L + tuple({x})) 45 | if subset_sum > best_match: 46 | best_match = subset_sum 47 | best_subset_added = L + tuple({x}) 48 | return best_subset_added 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /SubsetSum/subsetsum_test_med.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import TestCase 3 | import subsetsum_med 4 | import itunes 5 | 6 | class Test(TestCase): 7 | def test(self): 8 | with self.assertRaises(ValueError) : subsetsum_med.subset_sum(List1,8) 9 | self.assertEqual(subsetsum_med.subset_sum(List2,12), (10, 2)) 10 | self.assertEqual(subsetsum_med.subset_sum(List2,50), {8, 10, 2, 4}) 11 | self.assertEqual(subsetsum_med.subset_sum(List2,3), (2,)) 12 | with self.assertRaises(ValueError) : subsetsum_med.subset_sum(List3,3) 13 | with self.assertRaises(ValueError) : subsetsum_med.subset_sum(List4,3) 14 | with self.assertRaises(ValueError) : subsetsum_med.subset_sum(List5,3) 15 | with self.assertRaises(ValueError) : subsetsum_med.subset_sum(List6,3) 16 | self.assertEqual(subsetsum_med.subset_sum(List6,10), {10}) 17 | self.assertEqual(subsetsum_med.subset_sum(List6,15), {10}) 18 | self.assertEqual(subsetsum_med.subset_sum(List7,200),(20, 12, 22, 15, 25, 19, 29, 18, 13, 17)) 19 | self.assertEqual(subsetsum_med.subset_sum(List8,50), (25, 6, 19)) 20 | reader = itunes.iTunesEntryReader("itunes_file.txt") 21 | itunes_subset = subsetsum_med.subset_sum(reader, 3600) 22 | for song in itunes_subset: 23 | print(song) 24 | self.assertEqual(sum(itunes_subset), 3600) 25 | 26 | 27 | 28 | List1 = [10,2,8,8] 29 | List2 = {10,2,8,4} # 12 , 50, 3 30 | List3 = [ ] 31 | List4 = { } 32 | List5 = set() 33 | List6 = {10} # 3, 10, 15 34 | List7 = [20, 12, 22, 15, 25, 19, 29, 18, 11, 13, 17] #200 35 | List8 = [25, 27, 3, 12, 6, 15, 9, 30, 21, 19] # 50 36 | 37 | 38 | if __name__=='__main__': 39 | unittest.main() 40 | 41 | 42 | 43 | --------------------------------------------------------------------------------