├── .gitignore ├── docs └── img │ ├── autoclassified-regions.png │ ├── autoclassified-sectors.png │ ├── autoclassified-stock-style.png │ └── top-10-holdings.png ├── isin2secid.json ├── portfolio-classifier.py ├── readme.md ├── requirements.txt └── test └── multifaktortest.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *.sqlite -------------------------------------------------------------------------------- /docs/img/autoclassified-regions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fizban99/pp-portfolio-classifier/ad94ea255cb8c6af137228d2e9972d1452f69724/docs/img/autoclassified-regions.png -------------------------------------------------------------------------------- /docs/img/autoclassified-sectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fizban99/pp-portfolio-classifier/ad94ea255cb8c6af137228d2e9972d1452f69724/docs/img/autoclassified-sectors.png -------------------------------------------------------------------------------- /docs/img/autoclassified-stock-style.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fizban99/pp-portfolio-classifier/ad94ea255cb8c6af137228d2e9972d1452f69724/docs/img/autoclassified-stock-style.png -------------------------------------------------------------------------------- /docs/img/top-10-holdings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fizban99/pp-portfolio-classifier/ad94ea255cb8c6af137228d2e9972d1452f69724/docs/img/top-10-holdings.png -------------------------------------------------------------------------------- /isin2secid.json: -------------------------------------------------------------------------------- 1 | { 2 | "IE00B8FHGS14": "0P0000Y2A1|etf|de", 3 | "IE00BP3QZ601": "0P00014G96|etf|de", 4 | "IE00BP3QZ825": "0P00014G97|etf|de", 5 | "IE00BP3QZB59": "0P00014G99|etf|de", 6 | "IE00BP3QZD73": "0P00014G98|etf|de" 7 | } -------------------------------------------------------------------------------- /portfolio-classifier.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from xml.sax.saxutils import escape 3 | import uuid 4 | import argparse 5 | import re 6 | from jsonpath_ng import parse 7 | from typing import NamedTuple 8 | from itertools import cycle 9 | from collections import defaultdict 10 | from jinja2 import Environment, BaseLoader 11 | import requests 12 | import requests_cache 13 | from bs4 import BeautifulSoup 14 | import os 15 | import json 16 | 17 | 18 | requests_cache.install_cache(expire_after=86400) #cache downloaded files for a day 19 | requests_cache.remove_expired_responses() 20 | 21 | 22 | COLORS = [ 23 | "#EFDECD", 24 | "#CD9575", 25 | "#FDD9B5", 26 | "#78DBE2", 27 | "#87A96B", 28 | "#FFA474", 29 | "#FAE7B5", 30 | "#9F8170", 31 | "#FD7C6E", 32 | "#000000", 33 | "#ACE5EE", 34 | "#1F75FE", 35 | "#A2A2D0", 36 | "#6699CC", 37 | "#0D98BA", 38 | "#7366BD", 39 | "#DE5D83", 40 | "#CB4154", 41 | "#B4674D", 42 | "#FF7F49", 43 | "#EA7E5D", 44 | "#B0B7C6", 45 | "#FFFF99", 46 | "#1CD3A2", 47 | "#FFAACC", 48 | "#DD4492", 49 | "#1DACD6", 50 | "#BC5D58", 51 | "#DD9475", 52 | "#9ACEEB", 53 | "#FFBCD9", 54 | "#FDDB6D", 55 | "#2B6CC4", 56 | "#EFCDB8", 57 | "#6E5160", 58 | "#CEFF1D", 59 | "#71BC78", 60 | "#6DAE81", 61 | "#C364C5", 62 | "#CC6666", 63 | "#E7C697", 64 | "#FCD975", 65 | "#A8E4A0", 66 | "#95918C", 67 | "#1CAC78", 68 | "#1164B4", 69 | "#F0E891", 70 | "#FF1DCE", 71 | "#B2EC5D", 72 | "#5D76CB", 73 | "#CA3767", 74 | "#3BB08F", 75 | "#FEFE22", 76 | "#FCB4D5", 77 | "#FFF44F", 78 | "#FFBD88", 79 | "#F664AF", 80 | "#AAF0D1", 81 | "#CD4A4C", 82 | "#EDD19C", 83 | "#979AAA", 84 | "#FF8243", 85 | "#C8385A", 86 | "#EF98AA", 87 | "#FDBCB4", 88 | "#1A4876", 89 | "#30BA8F", 90 | "#C54B8C", 91 | "#1974D2", 92 | "#FFA343", 93 | "#BAB86C", 94 | "#FF7538", 95 | "#FF2B2B", 96 | "#F8D568", 97 | "#E6A8D7", 98 | "#414A4C", 99 | "#FF6E4A", 100 | "#1CA9C9", 101 | "#FFCFAB", 102 | "#C5D0E6", 103 | "#FDDDE6", 104 | "#158078", 105 | "#FC74FD", 106 | "#F78FA7", 107 | "#8E4585", 108 | "#7442C8", 109 | "#9D81BA", 110 | "#FE4EDA", 111 | "#FF496C", 112 | "#D68A59", 113 | "#714B23", 114 | "#FF48D0", 115 | "#E3256B", 116 | "#EE204D", 117 | "#FF5349", 118 | "#C0448F", 119 | "#1FCECB", 120 | "#7851A9", 121 | "#FF9BAA", 122 | "#FC2847", 123 | "#76FF7A", 124 | "#9FE2BF", 125 | "#A5694F", 126 | "#8A795D", 127 | "#45CEA2", 128 | "#FB7EFD", 129 | "#CDC5C2", 130 | "#80DAEB", 131 | "#ECEABE", 132 | "#FFCF48", 133 | "#FD5E53", 134 | "#FAA76C", 135 | "#18A7B5", 136 | "#EBC7DF", 137 | "#FC89AC", 138 | "#DBD7D2", 139 | "#17806D", 140 | "#DEAA88", 141 | "#77DDE7", 142 | "#FFFF66", 143 | "#926EAE", 144 | "#324AB2", 145 | "#F75394", 146 | "#FFA089", 147 | "#8F509D", 148 | "#FFFFFF", 149 | "#A2ADD0", 150 | "#FF43A4", 151 | "#FC6C85", 152 | "#CDA4DE", 153 | "#FCE883", 154 | "#C5E384", 155 | "#FFAE42" 156 | ] 157 | 158 | 159 | taxonomies = {'Asset-Type': {'url': 'https://www.us-api.morningstar.com/sal/sal-service/{type}/process/asset/v2/', 160 | 'component': 'sal-components-mip-asset-allocation', 161 | 'jsonpath': '$.allocationMap', 162 | 'category': '', 163 | 'percent': 'netAllocation', 164 | 'table': 0, 165 | 'column': 2, 166 | 'map':{"AssetAllocNonUSEquity":"Stocks", 167 | "CANAssetAllocCanEquity" : "Stocks", 168 | "CANAssetAllocUSEquity" : "Stocks", 169 | "CANAssetAllocInternationalEquity": "Stocks", 170 | "AssetAllocUSEquity":"Stocks", 171 | "AssetAllocCash":"Cash", 172 | "CANAssetAllocCash": "Stocks", 173 | "AssetAllocBond":"Bonds", 174 | "CANAssetAllocFixedIncome": "Bonds", 175 | "UK bond":"Bonds", 176 | "AssetAllocNotClassified":"Other", 177 | "AssetAllocOther":"Other", 178 | "CANAssetAllocOther": "Other" 179 | } 180 | }, 181 | 'Stock-style': {'url': 'https://www.us-api.morningstar.com/sal/sal-service/{type}/process/weighting/', 182 | 'component': 'sal-components-mip-style-weight', 183 | 'jsonpath': '$', 184 | 'category': '', 185 | 'percent': '', 186 | 'table': 9, 187 | 'column': 2, 188 | 'map':{"largeBlend":"Large Blend", 189 | "largeGrowth":"Large Growth", 190 | "largeValue":"Large Value", 191 | "middleBlend":"Mid-Cap Blend", 192 | "middleGrowth":"Mid-Cap Growth", 193 | "middleValue":"Mid-Cap Value", 194 | "smallBlend":"Small Blend", 195 | "smallGrowth":"Small Growth", 196 | "smallValue":"Small Value", 197 | } 198 | }, 199 | 200 | 'Sector': {'url': 'https://www.emea-api.morningstar.com/sal/sal-service/{type}/portfolio/v2/sector/', 201 | 'component': 'sal-components-mip-sector-exposure', 202 | 'jsonpath': '$.EQUITY.fundPortfolio', 203 | 'category': '', 204 | 'percent': '', 205 | 'table': 1, 206 | 'column': 0, 207 | 'map':{"basicMaterials":"Basic Materials", 208 | "communicationServices":"Communication Services", 209 | "consumerCyclical":"Consumer Cyclical", 210 | "consumerDefensive":"Consumer Defensive", 211 | "energy":"Energy", 212 | "financialServices":"Financial Services", 213 | "healthcare":"Healthcare", 214 | "industrials":"Industrials", 215 | "realEstate":"Real Estate", 216 | "technology":"Technology", 217 | "utilities":"Utilities", 218 | } 219 | }, 220 | 'Holding': {'url':'https://www.emea-api.morningstar.com/sal/sal-service/{type}/portfolio/holding/v2/', 221 | 'component': 'sal-components-mip-holdings', 222 | 'jsonpath': '$.equityHoldingPage.holdingList[*]', 223 | 'category': 'securityName', 224 | 'percent': 'weighting', 225 | 'table': 6, 226 | 'column': 4, 227 | }, 228 | 'Region': { 'url': 'https://www.emea-api.morningstar.com/sal/sal-service/{type}/portfolio/regionalSector/', 229 | 'component': 'sal-components-mip-region-exposure', 230 | 'jsonpath': '$.fundPortfolio', 231 | 'category': '', 232 | 'percent': '', 233 | 'table': 2, 234 | 'column': 0, 235 | 'map':{"northAmerica":"North America", 236 | "europeDeveloped":"Europe Developed", 237 | "asiaDeveloped":"Asia Developed", 238 | "asiaEmerging":"Asia Emerging", 239 | "australasia":"Australasia", 240 | "europeDeveloped":"Europe Developed", 241 | "europeEmerging":"Europe Emerging / Russia", 242 | "japan":"Japan", 243 | "latinAmerica":"Central & Latin America", 244 | "unitedKingdom":"United Kingdom", 245 | "africaMiddleEast":"Middle East / Africa", 246 | }, 247 | 'map2':{"United States":"North America", 248 | "Canada":"North America", 249 | "Western Europe - Euro":"Europe Developed", 250 | "Western Europe - Non Euro":"Europe Developed", 251 | "Emerging 4 Tigers":"Asia Developed", 252 | "Emerging Asia - Ex 4 Tigers":"Asia Emerging", 253 | "Australasia":"Australasia", 254 | "Emerging Europe":"Europe Emerging / Russia", 255 | "Japan":"Japan", 256 | "Central & Latin America":"Central & Latin America", 257 | "United Kingdom":"United Kingdom", 258 | "Middle East / Africa":"Middle East / Africa", 259 | "Not Classified": "Not Classified", 260 | } 261 | 262 | 263 | }, 264 | 'Country': { 'url': 'https://www.emea-api.morningstar.com/sal/sal-service/{type}/portfolio/regionalSectorIncludeCountries/', 265 | 'component': 'sal-components-mip-country-exposure', 266 | 'jsonpath': '$.fundPortfolio.countries[*]', 267 | 'category': 'name', 268 | 'percent': 'percent', 269 | 'table': 2, 270 | 'column': 0, 271 | 'map2':{"United States":"UnitedStates", 272 | "Canada":"Canada", 273 | "Western Europe - Euro":"Western Europe - Euro", 274 | "Western Europe - Non Euro":"Western Europe - Non Euro", 275 | "Emerging 4 Tigers":"Hong Kong, Singapore, SouthKorea and Taiwan", 276 | "Emerging Asia - Ex 4 Tigers":"Asia Emerging", 277 | "Australasia":"Australasia", 278 | "Emerging Europe":"Europe Emerging / Russia", 279 | "Japan":"Japan", 280 | "Central & Latin America":"Central & Latin America", 281 | "United Kingdom":"United Kingdom", 282 | "Middle East / Africa":"Middle East / Africa", 283 | "Not Classified": "Not Classified", 284 | } 285 | 286 | 287 | }, 288 | } 289 | 290 | 291 | 292 | class Isin2secid: 293 | mapping = dict() 294 | 295 | @staticmethod 296 | def load_cache(): 297 | if os.path.exists("isin2secid.json"): 298 | with open("isin2secid.json", "r") as f: 299 | try: 300 | Isin2secid.mapping = json.load(f) 301 | except json.JSONDecodeError: 302 | print("Invalid json file") 303 | 304 | 305 | @staticmethod 306 | def save_cache(): 307 | with open("isin2secid.json", "w") as f: 308 | json.dump(Isin2secid.mapping, f, indent=1, sort_keys=True) 309 | 310 | @staticmethod 311 | def get_secid(isin): 312 | cached_secid = Isin2secid.mapping.get(isin,"-") 313 | if cached_secid == "-" or len(cached_secid.split("|"))<3: 314 | url = f"https://www.morningstar.{DOMAIN}/en/util/SecuritySearch.ashx" 315 | payload = { 316 | 'q': isin, 317 | 'preferedList': '', 318 | 'source': 'nav', 319 | 'moduleId': 6, 320 | 'ifIncludeAds': False, 321 | 'usrtType': 'v' 322 | } 323 | headers = { 324 | 'accept': '*/*', 325 | 'accept-encoding': 'gzip, deflate, br', 326 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36', 327 | } 328 | resp = requests.post(url, data=payload, headers=headers) 329 | response = resp.content.decode('utf-8') 330 | if response: 331 | secid = re.search('\{"i":"([^"]+)"', response).group(1) 332 | secid_type =response.split("|")[2].lower() 333 | secid_type_domain = secid + "|" + secid_type + "|" + DOMAIN 334 | Isin2secid.mapping[isin] = secid_type_domain 335 | else: 336 | secid_type_domain = '||' 337 | else: 338 | secid_type_domain = Isin2secid.mapping[isin] 339 | return secid_type_domain.split("|") 340 | 341 | 342 | class Security: 343 | 344 | def __init__ (self, **kwargs): 345 | self.__dict__.update(kwargs) 346 | self.holdings = [] 347 | 348 | def load_holdings (self): 349 | if len(self.holdings) == 0: 350 | self.holdings = SecurityHoldingReport() 351 | self.holdings.load(isin = self.ISIN, secid = self.secid) 352 | return self.holdings 353 | 354 | 355 | class SecurityHolding(NamedTuple): 356 | name: str 357 | isin: str 358 | country: str 359 | industry: str 360 | currency: str 361 | percentage: float 362 | 363 | 364 | class Holding(NamedTuple): 365 | name: str 366 | percentage: float 367 | 368 | 369 | class SecurityHoldingReport: 370 | def __init__ (self): 371 | self.secid='' 372 | pass 373 | 374 | 375 | 376 | def get_bearer_token(self, secid, domain): 377 | # the secid can change for retrieval purposes 378 | # find the retrieval secid 379 | headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'} 380 | url = f'https://www.morningstar.{domain}/{domain}/funds/snapshot/snapshot.aspx?id={secid}' 381 | response = requests.get(url, headers=headers) 382 | secid_regexp = r"var FC = '(.*)';" 383 | matches = re.findall(secid_regexp, response.text) 384 | if len(matches)>0: 385 | secid_to_search = matches[0] 386 | else: 387 | secid_to_search = secid 388 | 389 | # get the bearer token for the new secid 390 | url = f'https://www.morningstar.{domain}/Common/funds/snapshot/PortfolioSAL.aspx' 391 | payload = { 392 | 'FC': secid_to_search} 393 | response = requests.get(url, headers=headers, params=payload) 394 | token_regex = r"const maasToken \=\s\"(.+)\"" 395 | resultstringtoken = re.findall(token_regex, response.text)[0] 396 | return resultstringtoken, secid_to_search 397 | 398 | def calculate_grouping(self, categories, percentages, grouping_name, long_equity): 399 | for category_name, percentage in zip(categories, percentages): 400 | self.grouping[grouping_name][escape(category_name)] = self.grouping[grouping_name].get(escape(category_name),0) + percentage 401 | 402 | if grouping_name !='Asset-Type': 403 | self.grouping[grouping_name] = {k:v*long_equity for k, v in 404 | self.grouping[grouping_name].items()} 405 | 406 | 407 | 408 | 409 | def load (self, isin, secid): 410 | secid, secid_type, domain = Isin2secid.get_secid(isin) 411 | if secid == '': 412 | print(f"isin {isin} not found in Morningstar for domain '{DOMAIN}', skipping it... Try another domain with -d ") 413 | return 414 | elif secid_type=="stock": 415 | print(f"isin {isin} is a stock, skipping it...") 416 | return 417 | self.secid = secid 418 | bearer_token, secid = self.get_bearer_token(secid, domain) 419 | print(f"Retrieving data for {secid_type} {isin} ({secid}) using domain '{domain}'...") 420 | headers = { 421 | 'accept': '*/*', 422 | 'accept-encoding': 'gzip, deflate, br', 423 | 'accept-language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7', 424 | 'Authorization': f'Bearer {bearer_token}', 425 | } 426 | 427 | params = { 428 | 'premiumNum': '10', 429 | 'freeNum': '10', 430 | 'languageId': 'de-DE', 431 | 'locale': 'en', 432 | 'clientId': 'MDC_intl', 433 | 'benchmarkId': 'category', 434 | 'version': '3.60.0', 435 | } 436 | 437 | 438 | self.grouping=dict() 439 | for taxonomy in taxonomies: 440 | self.grouping[taxonomy] = defaultdict(float) 441 | 442 | non_categories = ['avgMarketCap', 'portfolioDate', 'name', 'masterPortfolioId' ] 443 | json_not_found = False 444 | for grouping_name, taxonomy in taxonomies.items(): 445 | params['component'] = taxonomy['component'] 446 | url = taxonomy['url'] + secid + "/data" 447 | # use etf or fund endpoint 448 | url = url.replace("{type}", secid_type) 449 | resp = requests.get(url, params=params, headers=headers) 450 | if resp.status_code == 401: 451 | json_not_found = True 452 | print(f" {grouping_name} for secid {secid} will be retrieved from x-ray...") 453 | continue 454 | try: 455 | response = resp.json() 456 | jsonpath = parse(taxonomy['jsonpath']) 457 | percent_field = taxonomy['percent'] 458 | # single match of the jsonpath means the path contains the categories 459 | if len(jsonpath.find(response))==1: 460 | value = jsonpath.find(response)[0].value 461 | keys = [key for key in value if key not in non_categories] 462 | 463 | if percent_field != "": 464 | if value[keys[0]][percent_field] is not None: 465 | percentages = [float(value[key][percent_field]) for key in keys] 466 | else: 467 | percentages =[] 468 | else: 469 | if value[keys[0]] is not None: 470 | percentages = [float(value[key]) for key in keys] 471 | else: 472 | percentages = [] 473 | 474 | if grouping_name == 'Asset-Type': 475 | try: 476 | long_equity = (float(value.get('assetAllocEquity',{}).get('longAllocation',0)) + 477 | float(value.get('AssetAllocNonUSEquity',{}).get('longAllocation',0)) + 478 | float(value.get('AssetAllocUSEquity',{}).get('longAllocation',0)))/100 479 | except TypeError: 480 | print(f" No information on {grouping_name} for {secid}") 481 | else: 482 | # every match is a category 483 | value = jsonpath.find(response) 484 | keys = [key.value[taxonomy['category']] for key in value] 485 | if len(value) ==0 or value[0].value.get(taxonomy['percent'],"") =="": 486 | print(f" percentages not found for {grouping_name} for {secid}") 487 | else: 488 | percentages = [float(key.value[taxonomy['percent']]) for key in value] 489 | 490 | # Map names if there is a map 491 | if len(taxonomy.get('map',{})) != 0: 492 | categories = [taxonomy['map'][key] for key in keys if key in taxonomy['map'].keys()] 493 | unmapped = [key for key in keys if key not in taxonomy['map'].keys()] 494 | if unmapped: 495 | print(f" Categories not mapped: {unmapped} for {secid}") 496 | else: 497 | # capitalize first letter if not mapping 498 | categories = [key[0].upper() + key[1:] for key in keys] 499 | 500 | if percentages: 501 | self.calculate_grouping (categories, percentages, grouping_name, long_equity) 502 | 503 | except Exception: 504 | print(f" Problem with {grouping_name} for secid {secid} in PortfolioSAL...") 505 | json_not_found = True 506 | 507 | 508 | if json_not_found: 509 | 510 | non_categories = ['Defensive', 'Cyclical', 'Sensitive', 511 | 'Greater Europe', 'Americas', 'Greater Asia', 512 | ] 513 | url = "https://lt.morningstar.com/j2uwuwirpv/xray/default.aspx?LanguageId=en-EN&PortfolioType=2&SecurityTokenList=" + secid + "]2]0]FOESP%24%24ALL_1340&values=100" 514 | # print(url) 515 | resp = requests.get(url, headers=headers) 516 | soup = BeautifulSoup(resp.text, 'html.parser') 517 | for grouping_name, taxonomy in taxonomies.items(): 518 | if grouping_name in self.grouping: 519 | continue 520 | table = soup.select("table.ms_data")[taxonomy['table']] 521 | trs = table.select("tr")[1:] 522 | if grouping_name == 'Asset-Type': 523 | long_equity = float(trs[0].select("td")[0].text.replace(",","."))/100 524 | categories = [] 525 | percentages = [] 526 | for tr in trs: 527 | if len(tr.select('th'))>0: 528 | header = tr.th 529 | else: 530 | header = tr.td 531 | if tr.text != '' and header.text not in non_categories: 532 | categories.append(header.text) 533 | if len(tr.select("td")) > taxonomy['column']: 534 | percentages.append(float('0' + tr.select("td")[taxonomy['column']].text.replace(",",".").replace("-",""))) 535 | else: 536 | percentages.append(0.0) 537 | if len(taxonomy.get('map2',{})) != 0: 538 | categories = [taxonomy['map2'][key] for key in categories] 539 | 540 | self.calculate_grouping (categories, percentages, grouping_name, long_equity) 541 | 542 | 543 | def group_by_key (self,key): 544 | return self.grouping[key] 545 | 546 | 547 | class PortfolioPerformanceCategory(NamedTuple): 548 | name: str 549 | color: str 550 | uuid: str 551 | 552 | 553 | class PortfolioPerformanceFile: 554 | 555 | def __init__ (self, filepath): 556 | self.filepath = filepath 557 | self.pp_tree = ET.parse(filepath) 558 | self.pp = self.pp_tree.getroot() 559 | self.securities = None 560 | 561 | def get_security(self, security_xpath): 562 | """return a security object """ 563 | security = self.pp.findall(security_xpath)[0] 564 | if security is not None: 565 | isin = security.find('isin') 566 | if isin is not None: 567 | isin = isin.text 568 | secid = security.find('secid') 569 | if secid is not None: 570 | secid = secid.text 571 | return Security( 572 | name = security.find('name').text, 573 | ISIN = isin, 574 | secid = secid, 575 | UUID = security.find('uuid').text, 576 | ) 577 | else: 578 | name = security.find('name').text 579 | print(f"security '{name}' does not have isin, skipping it...") 580 | return None 581 | 582 | def get_security_xpath_by_uuid (self, uuid): 583 | for idx, security in enumerate(self.pp.findall(".//securities/security")): 584 | sec_uuid = security.find('uuid').text 585 | if sec_uuid == uuid: 586 | return f"../../../../../../../../securities/security[{idx + 1}]" 587 | 588 | def add_taxonomy (self, kind): 589 | securities = self.get_securities() 590 | taxonomy_tpl = """ 591 | 592 | {{ outer_uuid }} 593 | {{ kind }} 594 | 595 | {{ inner_uuid }} 596 | {{ kind }} 597 | #89afee 598 | 599 | {% for category in categories %} 600 | 601 | {{ category["uuid"] }} 602 | {{ category["name"] }} 603 | {{ category["color"] }} 604 | 605 | 606 | 607 | {% for assignment in category["assignments"] %} 608 | 609 | 610 | {{ assignment["weight"] }} 611 | {{ assignment["rank"] }} 612 | 613 | {% endfor %} 614 | 615 | 0 616 | 1 617 | 618 | {% endfor %} 619 | 620 | 621 | 10000 622 | 0 623 | 624 | 625 | """ 626 | 627 | unique_categories = defaultdict(list) 628 | 629 | rank = 1 630 | 631 | for security in securities: 632 | security_h = security.holdings 633 | security_assignments = security_h.group_by_key(kind) 634 | 635 | 636 | for category, weight in security_assignments.items(): 637 | unique_categories[category].append({ 638 | "security_xpath":self.get_security_xpath_by_uuid(security.UUID), 639 | "weight": round(weight*100), 640 | "rank": rank 641 | }) 642 | rank += 1 643 | 644 | categories = [] 645 | color = cycle(COLORS) 646 | for idx, (category, assignments) in enumerate(unique_categories.items()): 647 | cat_weight = 0 648 | for assignment in assignments: 649 | cat_weight += assignment['weight'] 650 | 651 | 652 | categories.append({ 653 | "name": category, 654 | "uuid": str(uuid.uuid4()), 655 | "color": next(color) , 656 | "assignments": assignments, 657 | "weight": cat_weight 658 | }) 659 | 660 | 661 | 662 | tax_tpl = Environment(loader=BaseLoader).from_string(taxonomy_tpl) 663 | taxonomy_xml = tax_tpl.render( 664 | outer_uuid = str(uuid.uuid4()), 665 | inner_uuid = str(uuid.uuid4()), 666 | kind = kind, 667 | categories = categories 668 | ) 669 | self.pp.find('.//taxonomies').append(ET.fromstring(taxonomy_xml)) 670 | 671 | def write_xml(self, output_file): 672 | with open(output_file, 'wb') as f: 673 | self.pp_tree.write(f, encoding="utf-8") 674 | 675 | 676 | def dump_xml(self): 677 | print (ET.tostring(self.pp, encoding="unicode")) 678 | 679 | def get_securities(self): 680 | if self.securities is None: 681 | self.securities = [] 682 | sec_xpaths = [] 683 | for transaction in self.pp.findall('.//portfolio-transaction'): 684 | for child in transaction: 685 | if child.tag == "security": 686 | sec_xpaths.append('.//'+ child.attrib["reference"].split('/')[-1]) 687 | 688 | for sec_xpath in list(set(sec_xpaths)): 689 | security = self.get_security(sec_xpath) 690 | if security is not None: 691 | security_h = security.load_holdings() 692 | if security_h.secid !='': 693 | self.securities.append(security) 694 | return self.securities 695 | 696 | def print_class (grouped_holding): 697 | for key, value in sorted(grouped_holding.items(), reverse=True): 698 | print (key, "\t\t{:.2f}%".format(value)) 699 | print ("-"*30) 700 | 701 | 702 | if __name__ == '__main__': 703 | parser = argparse.ArgumentParser( 704 | #usage="%(prog) [] [-d domain]", 705 | description='\r\n'.join(["reads a portfolio performance xml file and auto-classifies", 706 | "the securities in it by asset-type, stock-style, sector, holdings, region and country weights", 707 | "For each security, you need to have an ISIN"]) 708 | ) 709 | 710 | # Morningstar domain where your securities can be found 711 | # e.g. es for spain, de for germany, fr for france... 712 | # this is only used to find the corresponding secid from the ISIN 713 | parser.add_argument('-d', default='de', dest='domain', type=str, 714 | help='Morningstar domain from which to retrieve the secid (default: es)') 715 | 716 | parser.add_argument('input_file', metavar='input_file', type=str, 717 | help='path to unencrypted pp.xml file') 718 | 719 | parser.add_argument('output_file', metavar='output_file', type=str, nargs='?', 720 | help='path to auto-classified output file', default='pp_classified.xml') 721 | 722 | args = parser.parse_args() 723 | 724 | if "input_file" not in args: 725 | parser.print_help() 726 | else: 727 | DOMAIN = args.domain 728 | Isin2secid.load_cache() 729 | pp_file = PortfolioPerformanceFile(args.input_file) 730 | for taxonomy in taxonomies: 731 | pp_file.add_taxonomy(taxonomy) 732 | Isin2secid.save_cache() 733 | pp_file.write_xml(args.output_file) 734 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # pp-portfolio-classifier 2 | 3 | 4 | Python script that automatically classifies Funds/ETFs managed in [Portfolio Performance](https://www.portfolio-performance.info/) files by the stock types, countries and industry sectors they are invested in. Furthermore it determines the Top 10 holdings of each fund. The classifier uses the information from morningstar as a data source for classification. It first tries to retrieve the information of the security from Morningstar itself and, if not found, it tries to use the x-ray service. 5 | Based on the script by fbuchinger 6 | 7 | ## Warnings & Known Issues 8 | - Experimental software - use with caution! 9 | - Check the [Portfolio Performance Forum thread](https://forum.portfolio-performance.info/t/automatic-import-of-classifications/14672) 10 | 11 | ## Installation 12 | requires Python 3, git and Portfolio Performance. 13 | Steps: 14 | 1. `git clone` this repository 15 | 2. in the install directory run `pip3 install -r requirements.txt` 16 | 3. test the script by running `python portfolio-classifier.py test/multifaktortest.xml` to test the script. Then open the resulting file `pp_classified.xml` in Portfolio Performance. 17 | 18 | ## How it works: 19 | 20 | **Important: Never try this script on your original Portfolio Performance files -> risk of data loss. Always make a copy first that is safe to play around with or create a dummy portfolio like in test folder.** 21 | 22 | 1. In Portfolio Performance, save a copy of your portfolio file as unencrypted xml. The script won't work with any other format. 23 | 2. The secid is the value of the attribute is the code at the end of the morningstar url of the security (the id of length 10 after the "?id=", something like 0P00012345). The script will try to get it from the morningstar website, but the script might have to be configured with the domain of your country, since not all securities area available in all countries. The domain is only important for the translation from isin to secid. Once the secid is obtained, the morningstar APIs are country-independent. The script caches the mapping between the isin and the secid plus the security id type and the domain of the security into a file called isin2secid.json in order to reduce the number of requests. 24 | 3. Run the script `python portfolio-classifier.py [] [-d domain]` If output file is not specified, a file called pp_classified.xml will be created. If domain is not specified, 'de' will be used for morningstar.de. This is only used to retrieve the corresponding internal Morningstar id (secid) for each isin. 25 | 4. open pp_classified.xml (or the given output_file name) in Portfolio Performance and check out the additional classifications. 26 | 27 | 28 | ## Gallery 29 | 30 | ### Autoclassified stock-style 31 | Autoclassified Security types 32 | 33 | 34 | 35 | ### Autoclassified Regions 36 | Autoclassified Regions 37 | 38 | 39 | 40 | ### Autoclassified Sectors 41 | Autoclassified Sectors 42 | 43 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.11.2 2 | requests==2.24.0 3 | requests-cache==0.5.2 4 | jsonpath_ng==1.5.3 5 | markupsafe==1.1.1 6 | beautifulsoup4==4.9.3 7 | --------------------------------------------------------------------------------