├── .gitignore ├── README.rst ├── circle.yml ├── mpworks ├── __init__.py ├── check_snl │ ├── README.asc │ ├── __init__.py │ ├── builders │ │ ├── __init__.py │ │ ├── base.py │ │ ├── core.py │ │ └── init_plotly.py │ ├── check_snl.py │ ├── etc │ │ └── check_snl_reqs.txt │ ├── icsd.py │ ├── logs │ │ ├── canonicals.pbs.e10425471 │ │ ├── canonicals.pbs.e11010157 │ │ ├── canonicals.pbs.o10425471 │ │ ├── canonicals.pbs.o11010157 │ │ ├── groupmembers.pbs.e10460446 │ │ ├── groupmembers.pbs.e11015910 │ │ ├── groupmembers.pbs.o10460446 │ │ ├── groupmembers.pbs.o11015910 │ │ ├── icsd.pbs.e12673701 │ │ ├── icsd.pbs.e12687121 │ │ ├── icsd.pbs.o12673701 │ │ ├── icsd.pbs.o12687121 │ │ ├── spacegroups.pbs.e10417417 │ │ ├── spacegroups.pbs.e10449725 │ │ ├── spacegroups.pbs.o10417417 │ │ └── spacegroups.pbs.o10449725 │ ├── plots.py │ ├── results │ │ ├── bad_snlgroups.csv │ │ ├── bad_snlgroups_2_in_matdb.csv │ │ ├── bad_snlgroups_2_notin_matdb.csv │ │ ├── bad_snls.csv │ │ ├── canonicals_crosscheck.png │ │ ├── canonicals_deltas.png │ │ ├── canonicals_rmsdist.png │ │ ├── canonicals_spacegroups.png │ │ ├── groupmembers_ceder_dahn.csv │ │ ├── groupmembers_check.png │ │ ├── groupmembers_mismatches.png │ │ ├── matching_snlgroups_sameSG.csv │ │ ├── matching_snlgroups_sameSG_setA.csv │ │ ├── matching_snlgroups_sameSG_setB.csv │ │ ├── shared_icsds.csv │ │ ├── spacegroup_changes.png │ │ ├── spacegroup_changes_2.png │ │ ├── spacegroup_consistency.png │ │ ├── spacegroup_consistency_2.png │ │ └── zero_occu_sites.csv │ ├── scripts │ │ ├── occu_sites_query.py │ │ ├── sg_changes_examples.py │ │ └── sg_default_bad_snls_check.py │ ├── submit │ │ ├── canonicals.pbs │ │ ├── check_snl.pbs │ │ ├── groupmembers.pbs │ │ ├── icsd.pbs │ │ ├── run_check_snl_spacegroups.sh │ │ └── spacegroups.pbs │ └── utils.py ├── docs │ ├── MPWorks_docs_old.docx │ ├── MPWorks_docs_old.pdf │ ├── MPWorks_figs.pptx │ ├── a.png │ ├── b.png │ ├── c.png │ ├── d.png │ ├── e.png │ ├── f.png │ ├── g.png │ ├── h.png │ └── original │ │ ├── a.png │ │ ├── b.png │ │ ├── c.png │ │ ├── d.png │ │ ├── e.png │ │ ├── f.png │ │ └── g.png ├── drones │ ├── README.md │ ├── __init__.py │ ├── mp_vaspdrone.py │ └── signals.py ├── dupefinders │ ├── README.md │ ├── __init__.py │ └── dupefinder_vasp.py ├── examples │ ├── README.md │ ├── Si_wf.json │ ├── __init__.py │ ├── firetasks_ex.py │ └── wf_ex.py ├── firetasks │ ├── README.md │ ├── __init__.py │ ├── bandstructure.json │ ├── boltztrap_tasks.py │ ├── bs_static.json │ ├── controller_tasks.py │ ├── custodian_task.py │ ├── elastic_tasks.py │ ├── snl_tasks.py │ ├── uniform.json │ ├── vasp_io_tasks.py │ └── vasp_setup_tasks.py ├── firetasks_staging │ ├── __init__.py │ └── surface_tasks.py ├── fix_scripts │ ├── README.md │ ├── __init__.py │ ├── add_icsd_materials.py │ ├── add_old_taskids.py │ ├── bad_crystals.txt │ ├── clear_FWs.py │ ├── find_missing_snl.py │ ├── fix_bad_crystals.py │ ├── fix_bs_controller_tasks.py │ ├── fix_fizzled_defused.py │ ├── fix_float_priorities.py │ ├── fix_mpcomplete.py │ ├── fix_unmoved_dirs.py │ ├── legacy │ │ ├── __init__.py │ │ ├── actions │ │ │ ├── __init__.py │ │ │ ├── add_snl_final.py │ │ │ ├── do_fw_conversion.py │ │ │ ├── do_icsd_to_snl.py │ │ │ ├── do_mps_to_snl.py │ │ │ ├── do_task_conversion.py │ │ │ └── do_task_conversion_fixes.py │ │ ├── mps_to_snl.py │ │ ├── old_task_drone.py │ │ ├── submit_snl.py │ │ └── task_to_fw.py │ ├── reparse_old_tasks.py │ ├── reparse_old_tasks_again.py │ ├── rerun_boltztrap.py │ └── submit_bo_jobs.py ├── maintenance_scripts │ ├── README.md │ ├── __init__.py │ ├── classify_fizzled.py │ ├── deprecate_snl.py │ ├── icsd2012_to_snl.py │ ├── modify_snl.py │ └── reparse_tasks.py ├── osti_doi │ ├── __init__.py │ ├── __main__.py │ ├── builders.py │ ├── dois.json │ ├── example0.xml │ ├── osti_record.py │ ├── requirements.txt │ └── run.sh ├── processors │ ├── README.md │ ├── __init__.py │ ├── process_submissions.py │ └── submit_canonical.py ├── scripts │ ├── __init__.py │ ├── submissions_run.py │ └── submit_canonical_run.py ├── snl_utils │ ├── README.md │ ├── __init__.py │ ├── mpsnl.py │ └── snl_mongo.py ├── submission │ ├── README.md │ ├── __init__.py │ └── submission_mongo.py └── workflows │ ├── README.md │ ├── __init__.py │ ├── snl_to_wf.py │ ├── snl_to_wf_elastic.py │ ├── surface_wf.py │ ├── test_wfs │ ├── FeO.cif │ ├── Si.cif │ ├── scancel │ │ └── srun_std_err_example.txt │ ├── wf_feo_dupes.json │ └── wf_si_dupes.json │ ├── tests │ ├── __init__.py │ └── test_scancel_job_step_terminator.py │ ├── wf_settings.py │ └── wf_utils.py ├── requirements.txt ├── scripts ├── go_submissions └── go_testing ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.py[cod] 3 | 4 | # doc builds 5 | docs/_build/* 6 | docs/_build/*/* 7 | docs/_build/*/*/* 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Packages 13 | *.egg 14 | *.egg-info 15 | dist 16 | build 17 | eggs 18 | parts 19 | bin 20 | var 21 | sdist 22 | develop-eggs 23 | .installed.cfg 24 | lib 25 | lib64 26 | 27 | # Installer logs 28 | pip-log.txt 29 | 30 | # Unit test / coverage reports 31 | .coverage 32 | .tox 33 | nosetests.xml 34 | 35 | # Translations 36 | *.mo 37 | 38 | # Mr Developer 39 | .mr.developer.cfg 40 | .project 41 | .pydevproject 42 | 43 | # Pycharm 44 | .idea/* 45 | mpworks/legacy/actions/*.yaml 46 | mpworks/fix_scripts/*.yaml 47 | gulptmp_4_1 48 | 49 | octave-core 50 | 51 | *.xml 52 | 53 | mpworks/.idea/mpworks.iml 54 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | override: 3 | - pip install --upgrade pip 4 | - pip install numpy --upgrade 5 | - pip install -r requirements.txt 6 | - python setup.py install 7 | -------------------------------------------------------------------------------- /mpworks/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1dev0.1' 2 | -------------------------------------------------------------------------------- /mpworks/check_snl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/__init__.py -------------------------------------------------------------------------------- /mpworks/check_snl/builders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/builders/__init__.py -------------------------------------------------------------------------------- /mpworks/check_snl/builders/base.py: -------------------------------------------------------------------------------- 1 | import sys, multiprocessing, time 2 | from mpworks.snl_utils.mpsnl import SNLGroup 3 | from matgendb.builders.core import Builder 4 | from matgendb.builders.util import get_builder_log 5 | from mpworks.check_snl.utils import div_plus_mod 6 | from pymatgen.analysis.structure_matcher import StructureMatcher, ElementComparator 7 | from init_plotly import py, stream_ids, categories 8 | if py is not None: 9 | from plotly.graph_objs import * 10 | 11 | _log = get_builder_log("snl_group_checks") 12 | 13 | class SNLGroupBaseChecker(Builder): 14 | def __init__(self, *args, **kwargs): 15 | self.checker_name = type(self).__name__ 16 | _log.info(self.checker_name) 17 | Builder.__init__(self, *args, **kwargs) 18 | 19 | def get_items(self, snls=None, snlgroups=None, ncols=None): 20 | """iterator over same-composition groups of SNLGroups rev-sorted by size 21 | 22 | :param snls: 'snl' collection in 'snl_mp_prod' DB 23 | :type snls: QueryEngine 24 | :param snlgroups: 'snlgroups' collection in 'snl_mp_prod' DB 25 | :type snlgroups: QueryEngine 26 | :param ncols: number of columns for 2D plotly 27 | :type ncols: int 28 | """ 29 | self._matcher = StructureMatcher( 30 | ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, 31 | attempt_supercell=False, comparator=ElementComparator() 32 | ) 33 | self._lock = self._mgr.Lock() if not self._seq else None 34 | self._ncols = ncols if not self._seq else 1 35 | self._nrows = div_plus_mod(self._ncores, self._ncols) if not self._seq else 1 36 | self._counter = self.shared_list() 37 | self._counter.extend([[0]*self._ncols for i in range(self._nrows)]) 38 | self._counter_total = multiprocessing.Value('d', 0) 39 | self._mismatch_dict = self.shared_dict() 40 | self._mismatch_dict.update(dict((k,[]) for k in categories[self.checker_name])) 41 | self._mismatch_counter = self.shared_list() 42 | self._mismatch_counter.extend([0]*len(self._mismatch_dict.keys())) 43 | if py is not None: 44 | self._streams = [ py.Stream(stream_id) for stream_id in stream_ids ] 45 | for s in self._streams: s.open() 46 | self._snls = snls 47 | self._snlgroups = snlgroups 48 | if 'SNLGroup' in self.checker_name: 49 | _log.info('analyzing %d SNLGroups', self._snlgroups.collection.count()) 50 | # start pipeline to prepare aggregation of items 51 | pipeline = [{ '$project': { 52 | 'reduced_cell_formula_abc': 1, 'snlgroup_id': 1, '_id': 0 53 | }}] 54 | group_expression = { 55 | '_id': '$reduced_cell_formula_abc', 56 | 'num_snlgroups': { '$sum': 1 }, 57 | 'snlgroup_ids': { '$addToSet': "$snlgroup_id" } 58 | } 59 | pipeline.append({ '$group': group_expression }) 60 | pipeline.append({ '$match': { 'num_snlgroups': { '$gt': 1 } } }) 61 | pipeline.append({ '$sort': { 'num_snlgroups': -1 } }) 62 | pipeline.append({ '$project': { 'snlgroup_ids': 1 } }) 63 | return self._snlgroups.collection.aggregate(pipeline, cursor={}) 64 | else: 65 | _log.info('analyzing %d SNLs', snls.collection.count()) 66 | return self._snls.query(distinct_key='snl_id') 67 | 68 | def process_item(self, item, index): 69 | nrow, ncol = index/self._ncols, index%self._ncols 70 | snlgroups = {} # keep {snlgroup_id: SNLGroup} to avoid dupe queries 71 | if isinstance(item, dict) and 'snlgroup_ids' in item: 72 | for gid in item['snlgroup_ids']: 73 | try: 74 | snlgrp_dict = self._snlgroups.collection.find_one({ "snlgroup_id": gid }) 75 | snlgroups[gid] = SNLGroup.from_dict(snlgrp_dict) 76 | except: 77 | exc_type, exc_value, exc_traceback = sys.exc_info() 78 | _log.info('%r %r', exc_type, exc_value) 79 | self._increase_counter(nrow, ncol, {categories[self.checker_name]: [str(gid)]}) 80 | return nrow, ncol, snlgroups 81 | 82 | def _push_to_plotly(self): 83 | heatmap_z = self._counter._getvalue() if not self._seq else self._counter 84 | bar_x = self._mismatch_counter._getvalue() if not self._seq else self._mismatch_counter 85 | md = self._mismatch_dict._getvalue() if not self._seq else self._mismatch_dict 86 | try: 87 | self._streams[0].write(Heatmap(z=heatmap_z)) 88 | except: 89 | exc_type, exc_value, exc_traceback = sys.exc_info() 90 | _log.info('%r %r', exc_type, exc_value) 91 | _log.info('_push_to_plotly ERROR: heatmap=%r', heatmap_z) 92 | try: 93 | self._streams[1].write(Bar(x=bar_x)) 94 | except: 95 | exc_type, exc_value, exc_traceback = sys.exc_info() 96 | _log.info('%r %r', exc_type, exc_value) 97 | _log.info('_push_to_plotly ERROR: bar=%r', bar_x) 98 | for k,v in md.iteritems(): 99 | if len(v) < 1: continue 100 | try: 101 | self._streams[2].write(Scatter( 102 | x=self._mismatch_counter[categories[self.checker_name].index(k)], 103 | y=k, text='
'.join(v) 104 | )) 105 | _log.info('_push_to_plotly: mismatch_dict[%r]=%r', k, v) 106 | self._mismatch_dict.update({k:[]}) # clean 107 | time.sleep(0.052) 108 | except: 109 | exc_type, exc_value, exc_traceback = sys.exc_info() 110 | _log.info('%r %r', exc_type, exc_value) 111 | _log.info('_push_to_plotly ERROR: mismatch_dict=%r', md) 112 | _log.info( 113 | 'self._mismatch_dict=%r', 114 | self._mismatch_dict._getvalue() if not self._seq 115 | else self._mismatch_dict 116 | ) 117 | 118 | def _increase_counter(self, nrow, ncol, mismatch_dict): 119 | # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.managers.SyncManager.list 120 | if self._lock is not None: self._lock.acquire() 121 | mc = self._mismatch_counter 122 | for k in categories[self.checker_name]: 123 | mc[categories[self.checker_name].index(k)] += len(mismatch_dict[k]) 124 | self._mismatch_counter = mc 125 | for k,v in mismatch_dict.iteritems(): 126 | self._mismatch_dict[k] += v 127 | currow = self._counter[nrow] 128 | currow[ncol] += 1 129 | self._counter[nrow] = currow 130 | self._counter_total.value += 1 131 | if py is not None and not \ 132 | self._counter_total.value % (10*self._ncols*self._nrows): 133 | self._push_to_plotly() 134 | if (not self._counter_total.value%2500): 135 | _log.info('processed %d items', self._counter_total.value) 136 | if self._lock is not None: self._lock.release() 137 | 138 | def finalize(self, errors): 139 | if py is not None: self._push_to_plotly() 140 | _log.info("%d items processed.", self._counter_total.value) 141 | return True 142 | 143 | Builder.register(SNLGroupBaseChecker) 144 | -------------------------------------------------------------------------------- /mpworks/check_snl/builders/core.py: -------------------------------------------------------------------------------- 1 | from matgendb.builders.util import get_builder_log 2 | from base import SNLGroupBaseChecker 3 | from init_plotly import categories 4 | from mpworks.snl_utils.mpsnl import MPStructureNL 5 | from pymatgen.symmetry.analyzer import SpacegroupAnalyzer 6 | 7 | _log = get_builder_log("snl_group_checks") 8 | 9 | class SNLGroupCrossChecker(SNLGroupBaseChecker): 10 | """cross-check all SNL Groups via StructureMatcher.fit of their canonical SNLs""" 11 | def process_item(self, item, index): 12 | nrow, ncol, snlgroups = super(SNLGroupCrossChecker, self).process_item(item, index) 13 | for idx,primary_id in enumerate(item['snlgroup_ids'][:-1]): 14 | cat_key = '' 15 | local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) 16 | primary_group = snlgroups[primary_id] 17 | composition, primary_sg_num = primary_group.canonical_snl.snlgroup_key.split('--') 18 | for secondary_id in item['snlgroup_ids'][idx+1:]: 19 | secondary_group = snlgroups[secondary_id] 20 | secondary_sg_num = secondary_group.canonical_snl.snlgroup_key.split('--')[1] 21 | if not self._matcher.fit( 22 | primary_group.canonical_structure, 23 | secondary_group.canonical_structure 24 | ): continue 25 | cat_key = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs' 26 | local_mismatch_dict[cat_key].append('(%d,%d)' % (primary_id, secondary_id)) 27 | if cat_key: 28 | _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) 29 | self._increase_counter(nrow, ncol, local_mismatch_dict) 30 | 31 | class SNLGroupIcsdChecker(SNLGroupBaseChecker): 32 | """check one-to-one mapping of SNLGroup to ICSD ID 33 | 34 | check if two different SNLGroups have any entries that share an ICSD id. 35 | Should not happen at all due to 1-to-1 mapping of MP to ICSD material 36 | """ 37 | def get_snl_query(self, snl_ids): 38 | or_conds = [{'about._icsd.icsd_id': {'$type': i}} for i in [16, 18]] 39 | return [{'snl_id': {'$in': snl_ids}, '$or': or_conds}] 40 | 41 | def process_item(self, item, index): 42 | nrow, ncol, snlgroups = super(SNLGroupIcsdChecker, self).process_item(item, index) 43 | for idx,primary_id in enumerate(item['snlgroup_ids'][:-1]): 44 | cat_key = '' 45 | local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) 46 | primary_group = snlgroups[primary_id] 47 | primary_mpsnl_dicts = self._snls.collection.find( 48 | *self.get_snl_query(primary_group.all_snl_ids)) 49 | for secondary_id in item['snlgroup_ids'][idx+1:]: 50 | secondary_group = snlgroups[secondary_id] 51 | secondary_mpsnl_dicts = self._snls.collection.find( 52 | *self.get_snl_query(secondary_group.all_snl_ids)) 53 | for primary_mpsnl_dict in primary_mpsnl_dicts: 54 | primary_icsd_id = primary_mpsnl_dict['about']['_icsd']['icsd_id'] 55 | for secondary_mpsnl_dict in secondary_mpsnl_dicts: 56 | secondary_icsd_id = secondary_mpsnl_dict['about']['_icsd']['icsd_id'] 57 | if primary_icsd_id != secondary_icsd_id: continue 58 | cat_key = 'same ICSDs' 59 | primary_structure = MPStructureNL.from_dict(primary_mpsnl_dict).structure 60 | secondary_structure = MPStructureNL.from_dict(secondary_mpsnl_dict).structure 61 | match = self._matcher.fit(primary_structure, secondary_structure) 62 | if match: 63 | primary_match = self._matcher.fit( 64 | primary_structure, primary_group.canonical_structure) 65 | secondary_match = self._matcher.fit( 66 | secondary_structure, secondary_group.canonical_structure) 67 | canonical_match = self._matcher.fit( 68 | primary_group.canonical_structure, 69 | secondary_group.canonical_structure) 70 | local_mismatch_dict[cat_key].append( 71 | '({}, {}): ({}, {}) -> {} ({}{})'.format( 72 | primary_id, secondary_id, 73 | primary_mpsnl_dict['snl_id'], 74 | secondary_mpsnl_dict['snl_id'], 75 | primary_icsd_id, match, 76 | '/{}/{}/{}'.format( 77 | primary_match, secondary_match, canonical_match 78 | ) if match else '' 79 | ) 80 | ) 81 | if cat_key: 82 | _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) 83 | self._increase_counter(nrow, ncol, local_mismatch_dict) 84 | 85 | class SNLGroupMemberChecker(SNLGroupBaseChecker): 86 | """check whether SNLs in each SNLGroup still match resp. canonical SNL""" 87 | def process_item(self, item, index): 88 | nrow, ncol, snlgroups = super(SNLGroupMemberChecker, self).process_item(item, index) 89 | for snlgroup_id in item['snlgroup_ids']: 90 | local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) 91 | snlgrp = snlgroups[snlgroup_id] 92 | mismatch_snls = [] 93 | entry = '%d,%d:' % (snlgrp.snlgroup_id, snlgrp.canonical_snl.snl_id) 94 | for idx,snl_id in enumerate(snlgrp.all_snl_ids): 95 | if snl_id == snlgrp.canonical_snl.snl_id: continue 96 | try: 97 | mpsnl_dict = self._snls.collection.find_one({'snl_id': snl_id}) 98 | mpsnl = MPStructureNL.from_dict(mpsnl_dict) 99 | except: 100 | exc_type, exc_value, exc_traceback = sys.exc_info() 101 | _log.info('%r %r', exc_type, exc_value) 102 | local_mismatch_dict[categories[self.checker_name][-1]].append('%s%d' % (entry, snl_id)) 103 | continue 104 | if self._matcher.fit(mpsnl.structure, snlgrp.canonical_structure): continue 105 | mismatch_snls.append(str(snl_id)) 106 | _log.info('%s %d', entry, snl_id) 107 | if len(mismatch_snls) > 0: 108 | full_entry = '%s%s' % (entry, ','.join(mismatch_snls)) 109 | local_mismatch_dict[categories[self.checker_name][0]].append(full_entry) 110 | _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) 111 | self._increase_counter(nrow, ncol, local_mismatch_dict) 112 | 113 | class SNLSpaceGroupChecker(SNLGroupBaseChecker): 114 | """compare SG in db with SG from SpacegroupAnalyzer for all SNLs""" 115 | def process_item(self, item, index): 116 | nrow, ncol, snlgroups = super(SNLSpaceGroupChecker, self).process_item(item, index) 117 | local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) 118 | category = '' 119 | try: 120 | mpsnl_dict = self._snls.collection.find_one({ 'snl_id': item }) 121 | mpsnl = MPStructureNL.from_dict(mpsnl_dict) 122 | mpsnl.structure.remove_oxidation_states() 123 | sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) 124 | if sf.get_spacegroup_number() != mpsnl.sg_num: 125 | category = categories[self.checker_name][int(sf.get_spacegroup_number() == 0)] 126 | except: 127 | exc_type, exc_value, exc_traceback = sys.exc_info() 128 | category = categories[0][2] 129 | if category: 130 | local_mismatch_dict[category].append(str(item)) 131 | _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) 132 | self._increase_counter(nrow, ncol, local_mismatch_dict) 133 | -------------------------------------------------------------------------------- /mpworks/check_snl/builders/init_plotly.py: -------------------------------------------------------------------------------- 1 | import os 2 | try: 3 | import plotly.plotly as py 4 | import plotly.tools as tls 5 | from plotly.graph_objs import * 6 | except ImportError: 7 | py, tls = None, None 8 | 9 | stream_ids = ['zotjax1o9n', '4r7oj2r35i', 'nobvv4bxvw'] 10 | if py is not None: 11 | py.sign_in( 12 | os.environ.get('MP_PLOTLY_USER'), 13 | os.environ.get('MP_PLOTLY_APIKEY'), 14 | stream_ids=stream_ids 15 | ) 16 | 17 | categories = { 18 | 'SNLSpaceGroupChecker': ['SG change', 'SG default', 'others'], 19 | 'SNLGroupMemberChecker': ['mismatch', 'others'], 20 | 'SNLGroupCrossChecker': ['diff. SGs', 'same SGs', 'others'], 21 | 'SNLGroupIcsdChecker': ['same ICSDs', 'others'], 22 | } 23 | titles = { 24 | 'SNLSpaceGroupChecker': 'Spacegroup Consistency Check', 25 | 'SNLGroupMemberChecker': 'SNLGroup Members Consistency Check', 26 | 'SNLGroupCrossChecker': 'Cross-Check of Canonical SNLs / SNLGroups', 27 | 'SNLGroupIcsdChecker': 'Cross-Check of 1-to-1 SNLGroup-ICSD mapping', 28 | } 29 | xtitles = { 30 | 'SNLSpaceGroupChecker': '# affected SNLs', 31 | 'SNLGroupMemberChecker': '# affected SNLGroups', 32 | 'SNLGroupCrossChecker': '# affected SNLGroups', 33 | 'SNLGroupIcsdChecker': '# affected SNLGroups', 34 | } 35 | colorbar_titles = { 36 | 'SNLSpaceGroupChecker': '#SNLs', 37 | 'SNLGroupMemberChecker': '#SNLGroups', 38 | 'SNLGroupCrossChecker': '#SNLGroups', 39 | 'SNLGroupIcsdChecker': '#SNLGroups', 40 | } 41 | 42 | if __name__ == '__main__': 43 | from argparse import ArgumentParser 44 | parser = ArgumentParser() 45 | parser.add_argument('name', help='checker name', type=str) 46 | parser.add_argument('ncols', help='number of columns', type=int) 47 | parser.add_argument('nrows', help='number of rows', type=int) 48 | args = parser.parse_args() 49 | if py is not None: 50 | maxpoints = args.ncols*args.nrows 51 | data = Data() 52 | data.append(Bar( 53 | y=categories[args.name], x=[0]*len(categories[args.name]), 54 | orientation='h', xaxis='x1', yaxis='y1', 55 | stream=Stream(token=stream_ids[1], maxpoints=2) 56 | )) 57 | data.append(Heatmap( 58 | z=[[0]*args.ncols for i in range(args.nrows)], 59 | stream=Stream(token=stream_ids[0], maxpoints=maxpoints), 60 | xaxis='x2', yaxis='y2', colorscale='Bluered', zauto=True, 61 | colorbar=ColorBar(title=colorbar_titles[args.name]) 62 | )) 63 | data.append(Scatter( 64 | y=[], x=[], xaxis='x1', yaxis='y1', mode='markers', 65 | stream=Stream(token=stream_ids[2], maxpoints=10000) 66 | )) 67 | fig = tls.make_subplots(rows=1, cols=2) 68 | layout = Layout( 69 | showlegend=False, hovermode='closest', 70 | title = titles[args.name], 71 | xaxis1=XAxis( 72 | domain=[0,0.49], showgrid=False, anchor='y1', 73 | title=xtitles[args.name], autorange=True 74 | ), 75 | yaxis1=YAxis( 76 | showgrid=False, title='error category', anchor='x1', 77 | autorange=True 78 | ), 79 | xaxis2=XAxis( 80 | domain=[0.51,1.], showgrid=False, anchor='y2', 81 | title='CPU index = x+%dy' % args.ncols, 82 | autotick=False, tick0=0, dtick=1 83 | ), 84 | yaxis2=YAxis( 85 | showgrid=False, anchor='x2', 86 | autotick=False, tick0=0, dtick=1 87 | ), 88 | ) 89 | fig['data'] = data 90 | fig['layout'] = layout 91 | py.plot(fig, filename='builder_stream', auto_open=False) 92 | else: 93 | print 'plotly ImportError' 94 | -------------------------------------------------------------------------------- /mpworks/check_snl/etc/check_snl_reqs.txt: -------------------------------------------------------------------------------- 1 | Django==1.7 2 | FireWorks==0.92 3 | Jinja2==2.7.3 4 | MarkupSafe==0.23 5 | PyYAML==3.11 6 | docutils==0.12 7 | enum34==1.0 8 | mongomock==2.0.0 9 | monty==0.5.6 10 | numpy==1.9.0 11 | plotly==1.2.9 12 | prettytable==0.7.2 13 | pybtex==0.18 14 | pyhull==1.5.3 15 | pymatgen==3.0.5 16 | -e git+github:materialsproject/pymatgen-db.git@ba22fa86f89bb902695cb9220d4186335b07bf40#egg=pymatgen_db-master 17 | pymongo==2.7.2 18 | python-dateutil==2.2 19 | requests==2.4.1 20 | sentinels==0.0.6 21 | six==1.8.0 22 | smoqe==0.1.3 23 | wsgiref==0.1.2 24 | -------------------------------------------------------------------------------- /mpworks/check_snl/icsd.py: -------------------------------------------------------------------------------- 1 | import os, csv 2 | from builders.init_plotly import py 3 | if py is not None: 4 | fig = py.get_figure("https://plot.ly/~plotly.materialsproject/112") 5 | with open('mpworks/check_snl/results/shared_icsds.csv', 'wb') as f: 6 | writer = csv.writer(f) 7 | writer.writerow([ 8 | 'snlgroup_id 1', 'snlgroup_id 2', 'snl_id 1', 'snl_id 2', 'shared icsd_id', 'matches' 9 | ]) 10 | for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): 11 | for line in text.split('
'): 12 | before_colon, after_colon = line.split(':') 13 | snlgroup1, snlgroup2 = map(int, before_colon[1:-1].split(',')) 14 | snls, icsd_matches = after_colon.split('->') 15 | snl1, snl2 = map(int, snls[2:-2].split(',')) 16 | icsd, matches = icsd_matches.strip().split(' ') 17 | writer.writerow([snlgroup1, snlgroup2, snl1, snl2, int(icsd), matches[1:-1]]) 18 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/canonicals.pbs.o10425471: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job canonicals.pbs/10425471.cvrsvc09-ib completed Sun Oct 19 14:04:44 PDT 2014 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=80:00:00 9 | Job Resources used: cput=49:34:02,mem=1900868kb,vmem=7360940kb,walltime=15:55:50 10 | Nodes used: mc0857 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/canonicals.pbs.o11010157: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job canonicals.pbs/11010157.cvrsvc09-ib completed Fri Dec 12 07:25:57 PST 2014 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=80:00:00 9 | Job Resources used: cput=45:24:43,energy_used=0,mem=1859900kb,vmem=6966628kb,walltime=13:39:08 10 | Nodes used: mc0823 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/groupmembers.pbs.o10460446: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job groupmembers.pbs/10460446.cvrsvc09-ib completed Wed Oct 22 21:43:12 PDT 2014 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=80:00:00 9 | Job Resources used: cput=21:51:14,mem=1184900kb,vmem=6660236kb,walltime=03:51:30 10 | Nodes used: mc0860 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/groupmembers.pbs.o11015910: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job groupmembers.pbs/11015910.cvrsvc09-ib completed Fri Dec 12 14:37:50 PST 2014 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=80:00:00 9 | Job Resources used: cput=10:49:53,energy_used=0,mem=1209836kb,vmem=6313744kb,walltime=01:32:23 10 | Nodes used: mc0640 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/icsd.pbs.o12673701: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job icsd.pbs/12673701.cvrsvc09-ib completed Mon Jun 1 20:12:48 PDT 2015 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=80:00:00 9 | Job Resources used: cput=00:34:13,energy_used=0,mem=1562980kb,vmem=4700152kb,walltime=00:05:13 10 | Nodes used: mc0650 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/icsd.pbs.o12687121: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job icsd.pbs/12687121.cvrsvc09-ib completed Wed Jun 3 14:52:02 PDT 2015 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=80:00:00 9 | Job Resources used: cput=00:50:38,energy_used=0,mem=1512356kb,vmem=4058860kb,walltime=00:07:19 10 | Nodes used: mc0637 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/spacegroups.pbs.o10417417: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job spacegroups.pbs/10417417.cvrsvc09-ib completed Sat Oct 18 09:36:17 PDT 2014 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=01:30:00 9 | Job Resources used: cput=02:21:20,mem=1285940kb,vmem=6775360kb,walltime=00:27:52 10 | Nodes used: mc0816 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/logs/spacegroups.pbs.o10449725: -------------------------------------------------------------------------------- 1 | Loading torque prologue 2 | Done 3 | 4 | ---------------------------------------------------------------- 5 | Jobs exit status code is 0 6 | Job spacegroups.pbs/10449725.cvrsvc09-ib completed Tue Oct 21 16:41:06 PDT 2014 7 | Submitted by huck/huck using matcomp 8 | Job Limits: neednodes=1:ppn=8,nodes=1:ppn=8,walltime=01:30:00 9 | Job Resources used: cput=02:28:33,mem=1358216kb,vmem=6833228kb,walltime=00:22:59 10 | Nodes used: mc0822 11 | 12 | Killing any leftover processes... 13 | 14 | Job completed. 15 | -------------------------------------------------------------------------------- /mpworks/check_snl/plots.py: -------------------------------------------------------------------------------- 1 | import datetime, math 2 | from pandas.io.parsers import read_csv 3 | from pyana.ccsgp.ccsgp import make_plot 4 | from pyana.ccsgp.utils import getOpts 5 | from pandas import Series 6 | from collections import OrderedDict 7 | import numpy as np 8 | from itertools import tee, izip 9 | 10 | import plotly.plotly as py 11 | from plotly.graph_objs import * 12 | 13 | def pairwise(iterable): 14 | "s -> (s0,s1), (s1,s2), (s2, s3), ..." 15 | a, b = tee(iterable) 16 | next(b, None) 17 | return izip(a, b) 18 | 19 | def _get_shades_of_gray(num_colors, every=3): 20 | colors=[] 21 | for i in range(0, 8*every*num_colors, 8*every): 22 | colors.append('rgb'+str((i, i, i))) 23 | return colors 24 | 25 | def sg1_vs_sg2(): 26 | """plot SG #1 vs #2 via ccsgp""" 27 | df = read_csv('mpworks/check_snl/results/bad_snlgroups_2.csv') 28 | df_view = df[['composition', 'sg_num 1', 'sg_num 2']] 29 | grouped = df_view.groupby('composition') 30 | data = OrderedDict() 31 | for i, (composition, group) in enumerate(grouped): 32 | del group['composition'] 33 | for col in ['dx', 'dy1', 'dy2']: 34 | group[col] = Series([0]*group.shape[0], index=group.index) 35 | data[composition] = group.as_matrix() 36 | #if i > 10: break 37 | nSets = len(data) 38 | make_plot( 39 | data = data.values(), 40 | properties = [ getOpts(i) for i in xrange(nSets) ], 41 | titles = data.keys(), 42 | xlabel = 'SG #2', ylabel = 'SG #1', 43 | title="Spacegroups of 1927 matching SNLGroups", 44 | xr = [-1,300], yr = [-1,300] 45 | ) 46 | 47 | def sg1_vs_sg2_plotly(): 48 | """plot SG #1 vs #2 via plotly""" 49 | out_fig = Figure() 50 | bisectrix = Scatter(x=[0,230], y=[0,230], mode='lines', name='bisectrix', showlegend=False) 51 | inmatdb_df = read_csv('mpworks/check_snl/results/bad_snlgroups_2_in_matdb.csv') 52 | inmatdb_text = map(','.join, zip( 53 | inmatdb_df['task_id 1'], inmatdb_df['task_id 2'] 54 | )) 55 | inmatdb_trace = Scatter( 56 | x=inmatdb_df['sg_num 2'].as_matrix(), y=inmatdb_df['sg_num 1'].as_matrix(), 57 | text=inmatdb_text, mode='markers', name='in MatDB' 58 | ) 59 | notinmatdb_df = read_csv('mpworks/check_snl/results/bad_snlgroups_2_notin_matdb.csv') 60 | notinmatdb_text = map(','.join, zip( 61 | map(str, notinmatdb_df['snlgroup_id 1']), map(str, notinmatdb_df['snlgroup_id 2']) 62 | )) 63 | notinmatdb_trace = Scatter( 64 | x=notinmatdb_df['sg_num 2'].as_matrix()+0.1, 65 | y=notinmatdb_df['sg_num 1'].as_matrix()+0.1, 66 | text=notinmatdb_text, mode='markers', name='not in MatDB' 67 | ) 68 | out_fig['data'] = Data([bisectrix, notinmatdb_trace, inmatdb_trace]) 69 | out_fig['layout'] = Layout( 70 | hovermode='closest', 71 | title='Spacegroup Assignment Comparison of matching Canonical SNLs', 72 | xaxis=XAxis(showgrid=False, title='SG #2', range=[0,230]), 73 | yaxis=YAxis(showgrid=False, title='SG #1', range=[0,230]), 74 | ) 75 | filename = 'spacegroup_canonicals_' 76 | filename += datetime.datetime.now().strftime('%Y-%m-%d') 77 | py.plot(out_fig, filename=filename, auto_open=False) 78 | py.image.save_as(out_fig, 'canonicals_spacegroups.png') 79 | 80 | def delta_bandgap_vs_delta_energy(): 81 | """different vs. similar selection and volume changes for matching diff. SGs""" 82 | out_fig = Figure() 83 | thr1 = Scatter(x=[0.01,0.01], y=[.1e-3,5], mode='lines', name='thr1', showlegend=False) 84 | thr2 = Scatter(x=[.4e-6,1.3], y=[0.1,0.1], mode='lines', name='thr2', showlegend=False) 85 | inmatdb_df = read_csv('mpworks/check_snl/results/bad_snlgroups_2_in_matdb.csv') 86 | inmatdb_df_view = inmatdb_df.loc[inmatdb_df['category']=='diff. SGs'] 87 | inmatdb_df_view = inmatdb_df_view.loc[inmatdb_df_view['delta_bandgap']>1e-6] 88 | inmatdb_text = map(','.join, zip( 89 | inmatdb_df_view['task_id 1'], inmatdb_df_view['task_id 2'] 90 | )) 91 | dvols = inmatdb_df_view['delta_volume_per_atom'] 92 | inmatdb_text = map('
'.join, zip(inmatdb_text, map('dvol = {}'.format, dvols))) 93 | dvol_bins = np.logspace( 94 | math.log10(min(dvols)), math.log10(max(dvols)), num=10 95 | ) 96 | colorscale = _get_shades_of_gray(10) 97 | 98 | def get_dvol_index(dvol): 99 | for i, (a, b) in enumerate(pairwise(dvol_bins)): 100 | if dvol >= a and dvol < b: 101 | return i 102 | return len(dvol_bins)-1 # catch max 103 | 104 | colors = [colorscale[i] for i in map(get_dvol_index, dvols)] 105 | inmatdb_trace = Scatter( 106 | x=inmatdb_df_view['delta_energy'].as_matrix(), 107 | y=inmatdb_df_view['delta_bandgap'].as_matrix(), 108 | text=inmatdb_text, mode='markers', 109 | marker=Marker(color=colors), 110 | name='dVdBdE', showlegend=False 111 | ) 112 | out_fig['data'] = Data([thr1, thr2, inmatdb_trace]) 113 | out_fig['layout'] = Layout( 114 | hovermode='closest', 115 | title='Separation of different/similar matching SNLs w/ different SGs', 116 | xaxis=XAxis(showgrid=False, title='delta_energy', type='log', autorange=True), 117 | yaxis=YAxis(showgrid=False, title='delta_bandgap', type='log', autorange=True), 118 | ) 119 | filename = 'canonicals_deltas_' 120 | filename += datetime.datetime.now().strftime('%Y-%m-%d') 121 | py.plot(out_fig, filename=filename, auto_open=False) 122 | py.image.save_as(out_fig, 'canonicals_deltas.png') 123 | 124 | def rmsdist_histos(): 125 | """#different/similar vs rms_dist""" 126 | out_fig = Figure() 127 | inmatdb_df = read_csv('mpworks/check_snl/results/bad_snlgroups_2_in_matdb.csv') 128 | inmatdb_df_view = inmatdb_df.loc[inmatdb_df['category']=='diff. SGs'] 129 | different = inmatdb_df_view.loc[inmatdb_df_view['scenario']=='different'] 130 | similar = inmatdb_df_view.loc[inmatdb_df_view['scenario']=='similar'] 131 | 132 | def rmsdist(tupstr): 133 | if isinstance(tupstr, float) and math.isnan(tupstr): return None 134 | tup = map(float, tupstr[1:-1].split(',')) 135 | return math.sqrt(tup[0]*tup[0]+tup[1]*tup[1]) 136 | 137 | different_rmsdist = filter(None, map(rmsdist, different['rms_dist'])) 138 | similar_rmsdist = filter(None, map(rmsdist, similar['rms_dist'])) 139 | different_trace = Histogram(x=different_rmsdist, name='different', opacity=0.75) 140 | similar_trace = Histogram(x=similar_rmsdist, name='similar', opacity=0.75) 141 | out_fig['data'] = Data([different_trace,similar_trace]) 142 | out_fig['layout'] = Layout( 143 | title='rms_dist of different/similar matching SNLs w/ different SGs', 144 | xaxis=XAxis(showgrid=False, title='sqrt(rms_dist)'), 145 | barmode='overlay' 146 | ) 147 | filename = 'canonicals_rmsdist_' 148 | filename += datetime.datetime.now().strftime('%Y-%m-%d') 149 | py.plot(out_fig, filename=filename, auto_open=False) 150 | py.image.save_as(out_fig, 'canonicals_rmsdist.png') 151 | 152 | if __name__ == '__main__': 153 | #sg1_vs_sg2_plotly() 154 | #delta_bandgap_vs_delta_energy() 155 | rmsdist_histos() 156 | -------------------------------------------------------------------------------- /mpworks/check_snl/results/bad_snlgroups.csv: -------------------------------------------------------------------------------- 1 | snlgroup_id,snlgroup_key,mismatching snl_ids 2 | 8138,C1 Cd1 O3--166,172170 3 | 45845,Rh2 Sn1 V1--139,161832 4 | 55382,Co1 O6 Sr2 W1--14,342991 5 | 85534,B1 Gd1 O3--194,168755 6 | 88736,Ga1 H4 Sr1--62,181678 7 | 92448,O10 Sr1--130,346957 8 | 113268,Ca1 O6 Ti2--14,347840 9 | 122665,Ba2 Co3.2 Fe0.8 O9.32 Sr2--62,272920 10 | 161735,Li3 O29 P8 V3--1,406930 11 | 164633,Li3 O29 P8 V3--1,406968 12 | -------------------------------------------------------------------------------- /mpworks/check_snl/results/canonicals_crosscheck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/canonicals_crosscheck.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/canonicals_deltas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/canonicals_deltas.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/canonicals_rmsdist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/canonicals_rmsdist.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/canonicals_spacegroups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/canonicals_spacegroups.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/groupmembers_ceder_dahn.csv: -------------------------------------------------------------------------------- 1 | snl_id 2 | 88073 3 | 88089 4 | 88135 5 | 88179 6 | 88126 7 | 88157 8 | 88233 9 | 88312 10 | 88260 11 | 88263 12 | 88270 13 | 88314 14 | 88391 15 | 88407 16 | 88475 17 | 88453 18 | 88444 19 | 88497 20 | 88551 21 | 88588 22 | 88581 23 | 88578 24 | 88632 25 | 88630 26 | 88709 27 | 88725 28 | 88771 29 | 88815 30 | 88762 31 | 88793 32 | 88869 33 | 88948 34 | 88896 35 | 88899 36 | 88906 37 | 88950 38 | 89598 39 | 89636 40 | 89717 41 | 89641 42 | 89687 43 | 89707 44 | 89739 45 | 89784 46 | 89889 47 | 89918 48 | 89932 49 | 90812 50 | 90879 51 | 90731 52 | 90834 53 | 91027 54 | 91788 55 | 91826 56 | 91907 57 | 91831 58 | 91877 59 | 91897 60 | 91929 61 | 91974 62 | 92079 63 | 92108 64 | 92122 65 | -------------------------------------------------------------------------------- /mpworks/check_snl/results/groupmembers_check.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/groupmembers_check.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/groupmembers_mismatches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/groupmembers_mismatches.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/spacegroup_changes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/spacegroup_changes.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/spacegroup_changes_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/spacegroup_changes_2.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/spacegroup_consistency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/spacegroup_consistency.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/spacegroup_consistency_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/check_snl/results/spacegroup_consistency_2.png -------------------------------------------------------------------------------- /mpworks/check_snl/results/zero_occu_sites.csv: -------------------------------------------------------------------------------- 1 | snl_id,num_zero_occu_sites,icsd_id,is_valid,formula 2 | 42800,48,170585,False,B3 C16 H60 N12 O12 3 | 105472,32,,False,C8 H24 O9 Sn4 V2 4 | 121415,4,,False,Na64.956 O144 Si48 Y10.348 5 | 292835,2,246062,True,Bi4 Pb6 S12 6 | 292836,2,246063,True,Bi4 Pb6 S12 7 | 294087,2,247332,True,Fe4 Li16 Nd36 O78 Rh6 8 | 294705,2,249070,True,Er4 O14 Ti4 9 | 294718,2,249086,True,Lu5.344 O13.34 Ti2.656 10 | 310518,4,421537,True,B3 Eu5 F1 O9 11 | 318910,1,617916,True,C1 Ga1 Mn3 12 | 332323,1,710025,True,Al1.449 Ca0.226546906188 O6 Sm0.786644358342 Sr0.98680873547 Ta0.551 13 | 296534,8,260544,True,Pd41.02 Zn148.9 14 | 106075,2,,False,Al3.996 Ba2 N9.998 O4.002 Si3.998 Yb2 15 | 7626,2,246585,True,Co1 Na1 O2 16 | 219003,1,64642,True,Ba2 Cu2.8 Ni0.2 O6.84 Y1 17 | 108151,1,,False,Ba2 Cu2.8 Ni0.2 O6.84 Y1 18 | 120133,2,,False,Fe2 Zr1 19 | 106233,2,,False,Bi2 Pb3 S6 20 | 121937,48,,False,Ba7.012 Ca64.988 O192 P48 21 | 294712,2,249077,True,Ho2 O7 Ti2 22 | 291877,2,245097,True,Ba1 Mn2 O5 Tb1 23 | 292510,2,245736,True,Al3.996 Ba2 N9.992 O3.996 Si3.998 Yb2 24 | 252509,2,100538,True,Fe2 H0.38 Ti2 25 | 268691,44,158005,False,Ba9.52 Ca62.48 O192 P48 26 | 41108,1,644633,True,N1 Pr1 27 | 297582,2,261622,True,Fe1.4 Li2.8 O8 V1.8 28 | 309141,4,419479,True,Na64.956 O144 Si48 Y10.348 29 | 273729,32,163089,False,C8 H24 O9 Sn4 V2 30 | 294269,2,247516,True,Cu0.8 Ga1.2 In1.2 Se4 31 | 7351,4,246216,True,Na0.38 Si34 32 | 284680,2,174201,True,Mo14.966 Pt10.508 Si8.526 33 | 284870,8,174390,True,La1 Na1 O6 Zr2 34 | 293350,2,246585,True,Co1 Na1 O2 35 | 297511,12,261552,True,Al8.0004 Ca12 O48 Sn6.4 Ti5.5996 36 | 291895,20,245117,True,Al7.82 Ta22.18 37 | 291884,2,245104,True,Ba2 Mn4 O11 Tb2 38 | 291876,2,245096,True,Ba1 Mn2 O5 Tb1 39 | 281073,48,170584,False,B3 C16 H60 N12 O12 40 | 309254,12,419648,True,Ge16 Hf19.128 Zr0.872 41 | 292566,6,245793,True,O13.34 Ti2.884 Yb5.116 42 | 292641,1,245868,True,H1.5 Mg0.65 Sc0.35 43 | 292926,6,246153,True,Li2 Mn2 O8 Ti2 44 | 292928,8,246155,True,Li8 Mn7.9984 O32 Ti7.9992 45 | 294699,2,249064,True,Dy4.2 O13.896 Ti3.8 46 | 294270,2,247517,True,Cu0.5 Ga1.249 In1.25 Se4 47 | 294719,2,249087,True,O14 Tb4 Ti4 48 | 328292,1,644633,True,N1 Pr1 49 | 294730,2,249105,True,O13.34 Ti2.68 Yb5.32 50 | 297828,4,261867,True,Co5.26 O7.92 Zn0.74 51 | 322661,1,626364,True,Cr1 N1 52 | 325498,1,633751,True,Fe1 53 | 268690,48,158004,False,Ba7.012 Ca64.988 O192 P48 54 | 292511,2,245737,True,Al3.996 Ba2 N9.998 O4.002 Si3.998 Yb2 55 | 326762,1,638608,True,Hf1 56 | 97570,2,,False,Bi2 Pb3 S6 57 | 105830,2,,False,Lu4 O14 Ti4 58 | 293122,24,246357,True,Fe2.008 Li16 Mn7.992 Nd36 O78 59 | 105832,2,,False,Lu5.344 O13.34 Ti2.656 60 | 281074,48,170585,False,B3 C16 H60 N12 O12 61 | 291789,4,245007,True,Bi4 Ca2 Nb3.436 O24 Sr2 Ti1.564 62 | 291878,2,245098,True,Ba1 Mn2 O5 Tb1 63 | 260319,24,110042,False,C5 H18 Mo1 N4 O4 64 | 292514,2,245740,True,Al3.996 Ba2 N9.992 O3.996 Si3.998 Yb2 65 | 296556,4,260567,True,Fe3.88 Li4 Mn0.12 O16 P4 66 | 40210,1,626364,True,Cr1 N1 67 | 105729,20,,False,Al7.82 Ta22.18 68 | 320830,1,622288,True,Th1 69 | 40329,1,629460,True,Cu1 70 | 106072,2,,False,Al3.996 Ba2 N9.992 O3.996 Si3.998 Yb2 71 | 294698,2,249063,True,Dy4 O14 Ti4 72 | 106322,6,,False,Al8 Mg8 O20 73 | 294084,2,247329,True,Fe6.928 La36 Li16.032 O78 Rh3.04 74 | 106234,2,,False,Bi2 Pb3 S6 75 | 329472,1,648970,True,Ta1 76 | 292513,2,245739,True,Al3.996 Ba2 N9.998 O4.002 Si3.998 Yb2 77 | 106235,2,,False,Bi2 Pb3 S6 78 | 39419,1,607999,True,Ge1 Ni3 79 | 40497,1,633751,True,Fe1 80 | 291883,2,245103,True,Ba2 Mn4 O11 Tb2 81 | 39763,1,617916,True,C1 Ga1 Mn3 82 | 41393,1,648970,True,Ta1 83 | 102106,24,,False,C5 Cr1 H18 N4 O4 84 | 120768,48,,False,B3 C16 H60 N12 O12 85 | 105813,2,,False,Dy4.2 O13.896 Ti3.8 86 | 106337,24,,False,Fe2.008 Li16 Mn7.992 Nd36 O69 87 | 304744,6,412847,True,Ga15.654 In92.373 Na36 Se180 88 | 292565,4,245792,True,Ho5.2 O13.32 Ti2.8 89 | 34840,1,288,True,Li1.44 O12 W4 90 | 260318,24,110041,False,C5 Cr1 H18 N4 O4 91 | 305188,2,413471,True,Ba2 Ca2 Co5.76 Fe2.24 O14 92 | 105845,2,,False,O13.34 Ti2.68 Yb5.32 93 | 6169,2,245103,True,Ba2 Mn4 O11 Tb2 94 | 315028,1,607999,True,Ge1 Ni3 95 | 102107,24,,False,C5 H18 Mo1 N4 O4 96 | 105833,2,,False,O14 Tb4 Ti4 97 | 325399,2,633464,True,Fe2 Zr1 98 | 105839,2,,False,O14 Ti4 Tm4 99 | 105826,2,,False,Ho2 O7 Ti2 100 | 106073,2,,False,Al3.996 Ba2 N9.998 O4.002 Si3.998 Yb2 101 | 6170,2,245104,True,Ba2 Mn4 O11 Tb2 102 | 105819,2,,False,Er4 O14 Ti4 103 | 40695,1,638608,True,Hf1 104 | 101391,2,,False,Ni3.046 Sn0.6668 105 | 292989,4,246216,True,Na0.38 Si34 106 | 105674,4,,False,Bi4 Ca2 Nb3.436 O24 Sr2 Ti1.564 107 | 300694,2,391238,True,Ni3.046 Sn2.0004 108 | 106284,8,,False,Li8 Mn7.9984 O32 Ti7.9992 109 | 121938,44,,False,Ba9.52 Ca62.48 O192 P48 110 | 292838,2,246065,True,Bi4 Pb6 S12 111 | 323827,1,629460,True,Cu1 112 | 106093,6,,False,O13.34 Ti2.884 Yb5.116 113 | 10585,4,39215,True,Co1 H8 O10 V2 114 | 105812,2,,False,Dy4 O14 Ti4 115 | 294716,2,249084,True,Lu4 O14 Ti4 116 | 277626,2,167022,True,Cu1.85 In1.85 Mn0.3 Se3.868 117 | 62761,6,,True,Ga15.654 In92.373 Na36 Se180 118 | 294724,2,249092,True,O14 Ti4 Tm4 119 | 292837,2,246064,True,Bi4 Pb6 S12 120 | 40070,1,622288,True,Th1 121 | 182119,1,288,True,Li1.44 O12 W4 122 | 293053,6,246279,False,Al8 Mg8 O20 123 | 200686,4,39215,True,Co1 H8 O10 V2 124 | 119246,2,,False,Ba2 Ca2 Co5.76 Fe2.24 O14 125 | 106076,2,,False,Al3.996 Ba2 N9.992 O3.996 Si3.998 Yb2 126 | 106092,4,,False,Ho5.2 O13.32 Ti2.8 127 | 121461,12,,False,Ge16 Hf19.128 Zr0.872 128 | 106282,6,,False,Li2 Mn2 O8 Ti2 129 | -------------------------------------------------------------------------------- /mpworks/check_snl/scripts/occu_sites_query.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 3 | sma = SNLMongoAdapter.auto_load() 4 | with open('mpworks/check_snl/results/zero_occu_sites.csv', 'wb') as f: 5 | writer = csv.writer(f) 6 | writer.writerow([ 7 | 'snl_id', 'num_zero_occu_sites', 'icsd_id', 'is_valid', 'formula' 8 | ]) 9 | for doc in sma.snl.aggregate([ 10 | #{ '$match': { 'about._icsd.icsd_id': { '$exists': True } } }, 11 | { '$unwind': '$sites' }, 12 | { '$unwind': '$sites.species' }, 13 | { '$project': { 14 | 'snl_id': 1, 'sites.species.occu': 1, '_id': 0, 15 | 'about._icsd.icsd_id': 1, 'is_valid': 1, 16 | 'reduced_cell_formula_abc': 1 17 | } }, 18 | { '$match': { 'sites.species.occu': 0.0 } }, 19 | { '$group': { 20 | '_id': '$snl_id', 21 | 'num_zero_occu_sites': { '$sum': 1 }, 22 | 'icsd_ids': { '$addToSet': '$about._icsd.icsd_id' }, 23 | 'is_valid': { '$addToSet': '$is_valid' }, 24 | 'formula': { '$addToSet': '$reduced_cell_formula_abc' } 25 | } }, 26 | ], cursor={}): 27 | icsd_id = doc['icsd_ids'][0] if len(doc['icsd_ids']) > 0 else '' 28 | row = [ 29 | doc['_id'], doc['num_zero_occu_sites'], icsd_id, doc['is_valid'][0], 30 | doc['formula'][0] 31 | ] 32 | writer.writerow(row) 33 | 34 | -------------------------------------------------------------------------------- /mpworks/check_snl/scripts/sg_changes_examples.py: -------------------------------------------------------------------------------- 1 | import os 2 | import plotly.plotly as py 3 | from pandas import DataFrame 4 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 5 | 6 | sma = SNLMongoAdapter.auto_load() 7 | sma2 = SNLMongoAdapter.from_file( 8 | os.path.join(os.environ['DB_LOC'], 'materials_db.yaml') 9 | ) 10 | 11 | 12 | def _get_snlgroup_id(snl_id): 13 | return sma.snlgroups.find_one( 14 | {'all_snl_ids': int(snl_id)}, 15 | {'snlgroup_id': 1, '_id': 0} 16 | )['snlgroup_id'] 17 | 18 | def _get_mp_id(snlgroup_id): 19 | mat = sma2.database.materials.find_one( 20 | {'snlgroup_id_final': snlgroup_id}, 21 | {'_id': 0, 'task_id': 1} 22 | ) 23 | if mat is not None: 24 | return mat['task_id'] 25 | return 'not found' 26 | 27 | def _get_mp_link(mp_id): 28 | if mp_id == 'not found': return mp_id 29 | url = 'link:$$https://materialsproject.org/materials/' 30 | url += mp_id 31 | url += '$$[%s]' % mp_id 32 | return url 33 | 34 | fig = py.get_figure('tschaume',11) 35 | df = DataFrame.from_dict(fig['data'][1]).filter(['x','y','text']) 36 | grouped_x = df.groupby('x') 37 | print '|===============================' 38 | print '| old SG | close to bisectrix | far from bisectrix' 39 | for n,g in grouped_x: 40 | if g.shape[0] < 2: continue # at least two entries at same old SG 41 | grouped_y = g.groupby('y') 42 | if len(grouped_y.groups) < 2: continue # at least two different entries 43 | g['diff'] = g['x'] - g['y'] 44 | gs = g.sort('diff') # first entry: closest to bisectrix, last entry: farthest 45 | first, last = gs.iloc[0], gs.iloc[-1] 46 | ratios = [ 47 | float(abs(first['diff']))/float(first['x']), 48 | float(abs(last['diff']))/float(last['x']) 49 | ] 50 | if ratios[0] > 0.2 or ratios[1] < 0.8: continue 51 | snlgroup_ids = _get_snlgroup_id(first['text']), _get_snlgroup_id(last['text']) 52 | mp_ids = _get_mp_id(snlgroup_ids[0]), _get_mp_id(snlgroup_ids[1]) 53 | print '| %d | %d (%d) -> %d -> %s | %d (%d) -> %d -> %s' % ( 54 | first['x'], 55 | first['text'], first['y'], snlgroup_ids[0], _get_mp_link(mp_ids[0]), 56 | last['text'], last['y'], snlgroup_ids[1], _get_mp_link(mp_ids[1]) 57 | ) 58 | print '|===============================' 59 | -------------------------------------------------------------------------------- /mpworks/check_snl/submit/canonicals.pbs: -------------------------------------------------------------------------------- 1 | #!/bin/bash -login 2 | #PBS -A matcomp 3 | #PBS -q matgen_prior 4 | #PBS -l nodes=1:ppn=8 5 | #PBS -l walltime=80:00:00 6 | 7 | module load python/2.7.3 8 | module swap numpy numpy/1.8.1 9 | module load virtualenv/1.8.2 10 | module load virtualenvwrapper 11 | source $HOME/ph_playground/virtenv_ph_playground/bin/activate 12 | export VENV_LOC=/global/u1/h/huck/ph_playground/virtenv_ph_playground/bin/activate 13 | export SCRIPT_LOC=/global/u1/h/huck/ph_playground/config/scripts 14 | 15 | python -m mpworks.check_snl.builders 2 4 2 16 | sleep 5 17 | mpirun -np 1 mgbuild run -v mpworks.check_snl.builders.SNLGroupCrossChecker snlgroups=$HOME/snlgroups_mpirun.json ncols=4 -n 8 18 | -------------------------------------------------------------------------------- /mpworks/check_snl/submit/check_snl.pbs: -------------------------------------------------------------------------------- 1 | #!/bin/bash -login 2 | #PBS -A matcomp 3 | #PBS -q matgen_prior 4 | #PBS -l nodes=3:ppn=7 5 | #PBS -l walltime=01:00:00 6 | #PBS -l pvmem=2925MB 7 | 8 | cd $PBS_O_WORKDIR 9 | pbsdsh bash -l -c "$PBS_O_WORKDIR/mpworks/scripts/run_check_snl_spacegroups.sh" 10 | -------------------------------------------------------------------------------- /mpworks/check_snl/submit/groupmembers.pbs: -------------------------------------------------------------------------------- 1 | #!/bin/bash -login 2 | #PBS -A matcomp 3 | #PBS -q matgen_prior 4 | #PBS -l nodes=1:ppn=8 5 | #PBS -l walltime=80:00:00 6 | 7 | module load python/2.7.3 8 | module swap numpy numpy/1.8.1 9 | module load virtualenv/1.8.2 10 | module load virtualenvwrapper 11 | source $HOME/ph_playground/virtenv_ph_playground/bin/activate 12 | export VENV_LOC=/global/u1/h/huck/ph_playground/virtenv_ph_playground/bin/activate 13 | export SCRIPT_LOC=/global/u1/h/huck/ph_playground/config/scripts 14 | 15 | python -m mpworks.check_snl.builders 1 4 2 16 | sleep 5 17 | mpirun -np 1 mgbuild run -v mpworks.check_snl.builders.SNLGroupMemberChecker snlgroups=$HOME/snlgroups_mpirun.json snls=$HOME/snls_mpirun.json ncols=4 -n 8 18 | -------------------------------------------------------------------------------- /mpworks/check_snl/submit/icsd.pbs: -------------------------------------------------------------------------------- 1 | #!/bin/bash -login 2 | #PBS -A matcomp 3 | #PBS -q matgen_prior 4 | #PBS -l nodes=1:ppn=8 5 | #PBS -l walltime=80:00:00 6 | 7 | module load python/2.7.3 8 | module swap numpy numpy/1.8.1 9 | module load virtualenv/1.8.2 10 | module load virtualenvwrapper 11 | source $HOME/ph_playground/virtenv_ph_playground/bin/activate 12 | export VENV_LOC=/global/u1/h/huck/ph_playground/virtenv_ph_playground/bin/activate 13 | export SCRIPT_LOC=/global/u1/h/huck/ph_playground/config/scripts 14 | 15 | python -m mpworks.builders.init_plotly SNLGroupIcsdChecker 4 2 16 | sleep 5 17 | mpirun -np 1 mgbuild run -v mpworks.check_snl.builders.core.SNLGroupIcsdChecker snlgroups=$HOME/snlgroups_mpirun.json snls=$HOME/snls_mpirun.json ncols=4 -n 8 18 | -------------------------------------------------------------------------------- /mpworks/check_snl/submit/run_check_snl_spacegroups.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -login 2 | source /global/u1/h/huck/ph_playground/virtenv_ph_playground/bin/activate 3 | export FW_CONFIG_FILE=$FW_CONFIG_ph_playground 4 | export DB_LOC=/global/u1/h/huck/ph_playground/config/dbs 5 | export VENV_LOC=/global/u1/h/huck/ph_playground/virtenv_ph_playground/bin/activate 6 | export SCRIPT_LOC=/global/u1/h/huck/ph_playground/config/scripts 7 | cd $PBS_O_WORKDIR 8 | num_ids_per_job=20000 9 | start_id=$(echo "${PBS_VNODENUM}*$num_ids_per_job" | bc) 10 | end_id=$(echo "(${PBS_VNODENUM}+1)*$num_ids_per_job" | bc) 11 | sleep $(echo "(${PBS_VNODENUM}+1)*2" | bc) 12 | #logfile="${PBS_O_WORKDIR}/${PBS_JOBID}_${PBS_VNODENUM}.log" 13 | python -m mpworks.scripts.check_snl spacegroups --start $start_id --end $end_id #> $logfile 2>&1 14 | -------------------------------------------------------------------------------- /mpworks/check_snl/submit/spacegroups.pbs: -------------------------------------------------------------------------------- 1 | #!/bin/bash -login 2 | #PBS -A matcomp 3 | #PBS -q matgen_prior 4 | #PBS -l nodes=1:ppn=8 5 | #PBS -l walltime=01:30:00 6 | 7 | module load python/2.7.3 8 | module swap numpy numpy/1.8.1 9 | module load virtualenv/1.8.2 10 | module load virtualenvwrapper 11 | source $HOME/ph_playground/virtenv_ph_playground/bin/activate 12 | export VENV_LOC=/global/u1/h/huck/ph_playground/virtenv_ph_playground/bin/activate 13 | export SCRIPT_LOC=/global/u1/h/huck/ph_playground/config/scripts 14 | 15 | python -m mpworks.check_snl.builders 0 4 2 16 | sleep 5 17 | mpirun -np 1 mgbuild run -v mpworks.check_snl.builders.SNLSpaceGroupChecker snls=$HOME/snls_mpirun.json ncols=4 -n 8 18 | -------------------------------------------------------------------------------- /mpworks/check_snl/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def div_plus_mod(a, b): 4 | return a/b + bool(a%b) 5 | 6 | def sleep(start_time): 7 | sleep_time = min_sleep - time.clock() + start_time 8 | if sleep_time > 0: time.sleep(sleep_time) 9 | 10 | -------------------------------------------------------------------------------- /mpworks/docs/MPWorks_docs_old.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/MPWorks_docs_old.docx -------------------------------------------------------------------------------- /mpworks/docs/MPWorks_docs_old.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/MPWorks_docs_old.pdf -------------------------------------------------------------------------------- /mpworks/docs/MPWorks_figs.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/MPWorks_figs.pptx -------------------------------------------------------------------------------- /mpworks/docs/a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/a.png -------------------------------------------------------------------------------- /mpworks/docs/b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/b.png -------------------------------------------------------------------------------- /mpworks/docs/c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/c.png -------------------------------------------------------------------------------- /mpworks/docs/d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/d.png -------------------------------------------------------------------------------- /mpworks/docs/e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/e.png -------------------------------------------------------------------------------- /mpworks/docs/f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/f.png -------------------------------------------------------------------------------- /mpworks/docs/g.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/g.png -------------------------------------------------------------------------------- /mpworks/docs/h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/h.png -------------------------------------------------------------------------------- /mpworks/docs/original/a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/a.png -------------------------------------------------------------------------------- /mpworks/docs/original/b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/b.png -------------------------------------------------------------------------------- /mpworks/docs/original/c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/c.png -------------------------------------------------------------------------------- /mpworks/docs/original/d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/d.png -------------------------------------------------------------------------------- /mpworks/docs/original/e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/e.png -------------------------------------------------------------------------------- /mpworks/docs/original/f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/f.png -------------------------------------------------------------------------------- /mpworks/docs/original/g.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/docs/original/g.png -------------------------------------------------------------------------------- /mpworks/drones/README.md: -------------------------------------------------------------------------------- 1 | # Drones package 2 | 3 | The MPVaspDrone is currently used by the production workflow. 4 | 5 | The drones package is an extension of the pymatgen-db drone, which converts a VASP directory into a database dictionary. The MPVaspDrone adds a "post_process" method and modifies some of the default drone behavior. It would be better if this could extend the existing drone rather than repeat a lot of the pymatgen-db drone, but it was not workable at the time of its creation. 6 | 7 | For example, the signal detectors help tag extra things that have might gone wrong with the run, and put it in the key analysis.signals and analysis.critical_signals. 8 | 9 | Another thing the custom drone does is SNL management. In particular, for structure optimizations it adds a new SNL to the SNL database (the newly optimized structure). For static runs (where the structure doesn't change), a new SNL is not added. The packages also add keys like "snlgroup_changed" which check whether the new and old SNL match after the relaxation run. -------------------------------------------------------------------------------- /mpworks/drones/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Mar 26, 2013' -------------------------------------------------------------------------------- /mpworks/dupefinders/README.md: -------------------------------------------------------------------------------- 1 | # Dupefinders package 2 | 3 | The Dupefinders are currently used by the production workflow. They are used to implement the duplicate checking within the production FireWorks. -------------------------------------------------------------------------------- /mpworks/dupefinders/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Mar 22, 2013' -------------------------------------------------------------------------------- /mpworks/dupefinders/dupefinder_vasp.py: -------------------------------------------------------------------------------- 1 | from fireworks.features.dupefinder import DupeFinderBase 2 | 3 | __author__ = 'Anubhav Jain' 4 | __copyright__ = 'Copyright 2013, The Materials Project' 5 | __version__ = '0.1' 6 | __maintainer__ = 'Anubhav Jain' 7 | __email__ = 'ajain@lbl.gov' 8 | __date__ = 'Mar 22, 2013' 9 | 10 | 11 | class DupeFinderVasp(DupeFinderBase): 12 | """ 13 | TODO: add docs 14 | """ 15 | 16 | _fw_name = 'Dupe Finder Vasp' 17 | 18 | def verify(self, spec1, spec2): 19 | # assert: task_type and snlgroup_id have already been checked through query 20 | return set(spec1.get('run_tags', [])) == set(spec2.get('run_tags', [])) 21 | 22 | def query(self, spec): 23 | return {'spec.task_type': spec['task_type'], 24 | 'spec.snlgroup_id': spec['snlgroup_id']} 25 | 26 | 27 | class DupeFinderDB(DupeFinderBase): 28 | """ 29 | TODO: add docs 30 | """ 31 | 32 | _fw_name = 'Dupe Finder DB' 33 | 34 | def verify(self, spec1, spec2): 35 | # assert: task_type and prev_vasp_dir have already been checked through query 36 | return set(spec1.get('run_tags', [])) == set(spec2.get('run_tags', [])) 37 | 38 | def query(self, spec): 39 | if 'prev_task_type' in spec and 'prev_vasp_dir' in spec and '_fizzled_parents' not in spec: 40 | return {'spec.task_type': spec['task_type'], 'spec.prev_task_type': spec['prev_task_type'], 'spec.prev_vasp_dir': spec['prev_vasp_dir']} 41 | return {'fw_id': -1} -------------------------------------------------------------------------------- /mpworks/examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples package 2 | 3 | These are meant to be more simple examples of workflows than the production MP Workflows. They are referred to in the MPWorks docs. These are only examples meant as learning tools and are not used in production. The "fireworks-vasp" repo is a different attempt at this. -------------------------------------------------------------------------------- /mpworks/examples/Si_wf.json: -------------------------------------------------------------------------------- 1 | {"name": "Si2", "links": {"1": [2], "3": [4], "2": [3], "4": []}, "created_on": "2016-07-22T18:16:08.259942", "updated_on": "2016-07-22T18:16:08.259945", "fws": [{"updated_on": "2016-07-22T18:16:08.258840", "fw_id": 1, "spec": {"vaspinputset_name": "MPRelaxSet", "vasp": {"incar": {"MAGMOM": [0.6, 0.6], "ENCUT": 520, "NELM": 100, "NSW": 99, "PREC": "Accurate", "@module": "pymatgen.io.vasp.inputs", "ISIF": 3, "ICHARG": 1, "IBRION": 2, "GGAU": false, "LREAL": "Auto", "EDIFF": 0.0001, "ISPIN": 2, "ISMEAR": -5, "LWAVE": false, "NPAR": 2, "SIGMA": 0.05, "LORBIT": 11, "@class": "Incar", "ALGO": "Fast"}, "kpoints": {"comment": "Si2", "selective_dynamics": null, "velocities": null, "@module": "pymatgen.io.vasp.inputs", "@class": "Poscar", "predictor_corrector": null, "structure": {"lattice": {"a": 3.866, "c": 3.866, "b": 3.866, "matrix": [[3.3480542110306395, 0.0, 1.9330000000000005], [1.1160180703435467, 3.1565757818665885, 1.9330000000000005], [0.0, 0.0, 3.866]], "volume": 40.85738351924835, "beta": 59.99999999999999, "alpha": 59.99999999999999, "gamma": 59.99999999999999}, "sites": [{"xyz": [0.5580090351717732, 0.39457197273332356, 0.9665000000000001], "abc": [0.125, 0.125, 0.125], "species": [{"occu": 1, "element": "Si"}], "label": "Si"}, {"xyz": [3.9060632462024127, 2.762003809133265, 6.765500000000001], "abc": [0.875, 0.875, 0.875], "species": [{"occu": 1, "element": "Si"}], "label": "Si"}], "@class": "Structure", "@module": "pymatgen.core.structure"}, "true_names": true}, "poscar": {"comment": "Si2", "selective_dynamics": null, "velocities": null, "@module": "pymatgen.io.vasp.inputs", "@class": "Poscar", "predictor_corrector": null, "structure": {"lattice": {"a": 3.866, "c": 3.866, "b": 3.866, "matrix": [[3.3480542110306395, 0.0, 1.9330000000000005], [1.1160180703435467, 3.1565757818665885, 1.9330000000000005], [0.0, 0.0, 3.866]], "volume": 40.85738351924835, "beta": 59.99999999999999, "alpha": 59.99999999999999, "gamma": 59.99999999999999}, "sites": [{"xyz": [0.5580090351717732, 0.39457197273332356, 0.9665000000000001], "abc": [0.125, 0.125, 0.125], "species": [{"occu": 1, "element": "Si"}], "label": "Si"}, {"xyz": [3.9060632462024127, 2.762003809133265, 6.765500000000001], "abc": [0.875, 0.875, 0.875], "species": [{"occu": 1, "element": "Si"}], "label": "Si"}], "@class": "Structure", "@module": "pymatgen.core.structure"}, "true_names": true}, "potcar": {"comment": "Si2", "selective_dynamics": null, "velocities": null, "@module": "pymatgen.io.vasp.inputs", "@class": "Poscar", "predictor_corrector": null, "structure": {"lattice": {"a": 3.866, "c": 3.866, "b": 3.866, "matrix": [[3.3480542110306395, 0.0, 1.9330000000000005], [1.1160180703435467, 3.1565757818665885, 1.9330000000000005], [0.0, 0.0, 3.866]], "volume": 40.85738351924835, "beta": 59.99999999999999, "alpha": 59.99999999999999, "gamma": 59.99999999999999}, "sites": [{"xyz": [0.5580090351717732, 0.39457197273332356, 0.9665000000000001], "abc": [0.125, 0.125, 0.125], "species": [{"occu": 1, "element": "Si"}], "label": "Si"}, {"xyz": [3.9060632462024127, 2.762003809133265, 6.765500000000001], "abc": [0.875, 0.875, 0.875], "species": [{"occu": 1, "element": "Si"}], "label": "Si"}], "@class": "Structure", "@module": "pymatgen.core.structure"}, "true_names": true}}, "task_type": "GGA optimize structure (2x) example", "_tasks": [{"_fw_name": "Vasp Writer Task"}, {"jobs": [{"settings_override": null, "suffix": ".relax1", "auto_gamma": true, "output_file": "vasp.out", "auto_npar": false, "@module": "custodian.vasp.jobs", "gamma_vasp_cmd": null, "vasp_cmd": "", "backup": true, "final": false, "@class": "VaspJob"}, {"settings_override": [{"action": {"_set": {"ISTART": 1, "EDIFFG": -0.05}}, "dict": "INCAR"}, {"action": {"_file_copy": {"dest": "POSCAR"}}, "file": "CONTCAR"}], "suffix": ".relax2", "auto_gamma": true, "output_file": "vasp.out", "auto_npar": false, "@module": "custodian.vasp.jobs", "gamma_vasp_cmd": null, "vasp_cmd": "", "backup": false, "final": true, "@class": "VaspJob"}], "max_errors": 5, "_fw_name": "Vasp Custodian Task (Example)", "handlers": [{"output_filename": "vasp.out", "@class": "VaspErrorHandler", "@module": "custodian.vasp.handlers"}, {"timeout": 21600, "output_filename": "vasp.out", "@class": "FrozenJobErrorHandler", "@module": "custodian.vasp.handlers"}, {"output_vasprun": "vasprun.xml", "output_filename": "vasp.out", "@class": "MeshSymmetryErrorHandler", "@module": "custodian.vasp.handlers"}, {"change_algo": false, "nionic_steps": 10, "output_filename": "OSZICAR", "@class": "NonConvergingErrorHandler", "@module": "custodian.vasp.handlers"}]}]}, "created_on": "2016-07-22T18:16:08.258829", "name": "Si2--GGA_optimize_structure_(2x)_example"}, {"updated_on": "2016-07-22T18:16:08.259015", "fw_id": 2, "spec": {"_tasks": [{"_fw_name": "Vasp to Database Task (Example)"}], "task_type": "VASP db insertion example"}, "created_on": "2016-07-22T18:16:08.259011", "name": "Si2--VASP_db_insertion_example"}, {"updated_on": "2016-07-22T18:16:08.259659", "fw_id": 3, "spec": {"_tasks": [{"_fw_name": "Vasp Copy Task", "use_CONTCAR": true, "skip_CHGCAR": true}, {"_fw_name": "Setup Static Task"}, {"jobs": [{"settings_override": null, "suffix": "", "auto_gamma": true, "output_file": "vasp.out", "auto_npar": false, "@module": "custodian.vasp.jobs", "gamma_vasp_cmd": null, "vasp_cmd": "", "backup": true, "final": true, "@class": "VaspJob"}], "max_errors": 5, "_fw_name": "Vasp Custodian Task (Example)", "handlers": [{"output_filename": "vasp.out", "@class": "VaspErrorHandler", "@module": "custodian.vasp.handlers"}, {"timeout": 21600, "output_filename": "vasp.out", "@class": "FrozenJobErrorHandler", "@module": "custodian.vasp.handlers"}, {"output_vasprun": "vasprun.xml", "output_filename": "vasp.out", "@class": "MeshSymmetryErrorHandler", "@module": "custodian.vasp.handlers"}, {"change_algo": false, "nionic_steps": 10, "output_filename": "OSZICAR", "@class": "NonConvergingErrorHandler", "@module": "custodian.vasp.handlers"}]}], "task_type": "GGA static example"}, "created_on": "2016-07-22T18:16:08.259656", "name": "Si2--GGA_static_example"}, {"updated_on": "2016-07-22T18:16:08.259800", "fw_id": 4, "spec": {"_tasks": [{"_fw_name": "Vasp to Database Task (Example)"}], "task_type": "VASP db insertion example"}, "created_on": "2016-07-22T18:16:08.259797", "name": "Si2--VASP_db_insertion_example"}], "metadata": {}} -------------------------------------------------------------------------------- /mpworks/examples/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Oct 03, 2013' -------------------------------------------------------------------------------- /mpworks/examples/firetasks_ex.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shlex 4 | import socket 5 | from monty.os.path import which 6 | from custodian import Custodian 7 | from custodian.vasp.jobs import VaspJob 8 | from fireworks.core.firework import FireTaskBase, FWAction 9 | from fireworks.core.launchpad import LaunchPad 10 | from fireworks.utilities.fw_serializers import FWSerializable 11 | from matgendb.creator import VaspToDbTaskDrone 12 | from mpworks.drones.mp_vaspdrone import MPVaspDrone 13 | from pymatgen import MontyDecoder 14 | 15 | __author__ = 'Anubhav Jain' 16 | __copyright__ = 'Copyright 2013, The Materials Project' 17 | __version__ = '0.1' 18 | __maintainer__ = 'Anubhav Jain' 19 | __email__ = 'ajain@lbl.gov' 20 | __date__ = 'Oct 03, 2013' 21 | 22 | class VaspCustodianTaskEx(FireTaskBase, FWSerializable): 23 | _fw_name = "Vasp Custodian Task (Example)" 24 | 25 | def __init__(self, parameters): 26 | parameters = parameters if parameters else {} 27 | self.update(parameters) 28 | # get VaspJob objects from 'jobs' parameter in Firework 29 | self.jobs = parameters['jobs'] 30 | # get VaspHandler objects from 'handlers' parameter in Firework 31 | self.handlers = parameters['handlers'] 32 | self.max_errors = parameters['max_errors'] 33 | 34 | def run_task(self, fw_spec): 35 | 36 | fw_env = fw_spec.get("_fw_env", {}) 37 | 38 | if "mpi_cmd" in fw_env: 39 | mpi_cmd = fw_spec["_fw_env"]["mpi_cmd"] 40 | elif which("mpirun"): 41 | mpi_cmd = "mpirun" 42 | elif which("aprun"): 43 | mpi_cmd = "aprun" 44 | else: 45 | raise ValueError("No MPI command found!") 46 | 47 | nproc = os.environ['PBS_NP'] 48 | 49 | v_exe = shlex.split('{} -n {} {}'.format(mpi_cmd, nproc, fw_env.get("vasp_cmd", "vasp"))) 50 | gv_exe = shlex.split('{} -n {} {}'.format(mpi_cmd, nproc, fw_env.get("gvasp_cmd", "gvasp"))) 51 | 52 | # override vasp executable in custodian jobs 53 | for job in self.jobs: 54 | job.vasp_cmd = v_exe 55 | job.gamma_vasp_cmd = gv_exe 56 | 57 | # run the custodian 58 | c = Custodian(self.handlers, self.jobs, self.max_errors) 59 | c.run() 60 | 61 | update_spec = {'prev_vasp_dir': os.getcwd(), 62 | 'prev_task_type': fw_spec['task_type']} 63 | 64 | return FWAction(update_spec=update_spec) 65 | 66 | class VaspToDBTaskEx(FireTaskBase, FWSerializable): 67 | """ 68 | Enter the VASP run directory in 'prev_vasp_dir' to the database. 69 | """ 70 | 71 | _fw_name = "Vasp to Database Task (Example)" 72 | 73 | 74 | def run_task(self, fw_spec): 75 | prev_dir = fw_spec['prev_vasp_dir'] 76 | 77 | # get the db credentials 78 | db_dir = os.environ['DB_LOC'] 79 | db_path = os.path.join(db_dir, 'tasks_db.json') 80 | 81 | # use MPDrone to put it in the database 82 | with open(db_path) as f: 83 | db_creds = json.load(f) 84 | drone = VaspToDbTaskDrone( 85 | host=db_creds['host'], port=db_creds['port'], 86 | database=db_creds['database'], user=db_creds['admin_user'], 87 | password=db_creds['admin_password'], 88 | collection=db_creds['collection']) 89 | t_id = drone.assimilate(prev_dir) 90 | 91 | if t_id: 92 | print 'ENTERED task id:', t_id 93 | stored_data = {'task_id': t_id} 94 | update_spec = {'prev_vasp_dir': prev_dir, 'prev_task_type': fw_spec['prev_task_type']} 95 | return FWAction(stored_data=stored_data, update_spec=update_spec) 96 | else: 97 | raise ValueError("Could not parse entry for database insertion!") 98 | -------------------------------------------------------------------------------- /mpworks/examples/wf_ex.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from pprint import pprint 3 | from custodian.vasp.handlers import VaspErrorHandler, FrozenJobErrorHandler, MeshSymmetryErrorHandler, NonConvergingErrorHandler 4 | from custodian.vasp.jobs import VaspJob 5 | from fireworks.core.firework import Firework, Workflow 6 | from fireworks.utilities.fw_utilities import get_slug 7 | from mpworks.examples.firetasks_ex import VaspCustodianTaskEx, VaspToDBTaskEx 8 | from mpworks.firetasks.vasp_io_tasks import VaspWriterTask, VaspCopyTask 9 | from mpworks.firetasks.vasp_setup_tasks import SetupStaticRunTask 10 | from pymatgen import Composition, Lattice 11 | from pymatgen.core.structure import Structure 12 | from pymatgen.io.vasp.sets import MPRelaxSet 13 | 14 | __author__ = 'Anubhav Jain' 15 | __copyright__ = 'Copyright 2013, The Materials Project' 16 | __version__ = '0.1' 17 | __maintainer__ = 'Anubhav Jain' 18 | __email__ = 'ajain@lbl.gov' 19 | __date__ = 'Oct 03, 2013' 20 | 21 | 22 | def get_name(structure, task_type): 23 | return get_slug(structure.formula + '--' + task_type) 24 | 25 | 26 | def structure_to_wf(structure): 27 | """ 28 | This method starts with a Structure object and creates a Workflow object 29 | The workflow has two steps - a structure relaxation and a static run 30 | :param structure: 31 | :return: 32 | """ 33 | fws = [] # list of FireWorks to run 34 | connections = defaultdict(list) # dependencies between FireWorks 35 | 36 | # generate VASP input objects for 1st VASP run - this is put in the FW spec 37 | mpvis = MPRelaxSet(structure, user_incar_settings={'NPAR': 2, 38 | "GGAU":False}) 39 | incar = mpvis.incar 40 | poscar = mpvis.poscar 41 | kpoints = mpvis.poscar 42 | potcar = mpvis.poscar 43 | 44 | # serialize the VASP input objects to the FW spec 45 | spec = {} 46 | spec['vasp'] = {} 47 | spec['vasp']['incar'] = incar.as_dict() 48 | spec['vasp']['poscar'] = poscar.as_dict() 49 | spec['vasp']['kpoints'] = kpoints.as_dict() 50 | spec['vasp']['potcar'] = potcar.as_dict() 51 | spec['vaspinputset_name'] = mpvis.__class__.__name__ 52 | spec['task_type'] = 'GGA optimize structure (2x) example' 53 | 54 | # set up the custodian that we want to run 55 | jobs = VaspJob.double_relaxation_run('') 56 | for j in jobs: # turn off auto npar, it doesn't work for >1 node 57 | j.auto_npar = False 58 | handlers = [VaspErrorHandler(), FrozenJobErrorHandler(), MeshSymmetryErrorHandler(), 59 | NonConvergingErrorHandler()] 60 | c_params = {'jobs': [j.as_dict() for j in jobs], 'handlers': [h.as_dict() for h in handlers], 'max_errors': 5} 61 | custodiantask = VaspCustodianTaskEx(c_params) 62 | 63 | # 1st Firework - run GGA optimize structure 64 | # VaspWriterTask - write input files (INCAR, POSCAR, KPOINTS, POSCAR) based on spec 65 | # CustodianTaskEx - run VASP within a custodian 66 | tasks = [VaspWriterTask(), custodiantask] 67 | fws.append(Firework(tasks, spec, name=get_name(structure, spec['task_type']), fw_id=1)) 68 | 69 | # 2nd Firework - insert previous run into DB 70 | spec = {'task_type': 'VASP db insertion example'} 71 | fws.append( 72 | Firework([VaspToDBTaskEx()], spec, name=get_name(structure, spec['task_type']), fw_id=2)) 73 | connections[1] = [2] 74 | 75 | # 3rd Firework - static run. 76 | # VaspCopyTask - copy output from previous run to this directory 77 | # SetupStaticRunTask - override old parameters for static run 78 | # CustodianTaskEx - run VASP within a custodian 79 | spec = {'task_type': 'GGA static example'} 80 | copytask = VaspCopyTask({'use_CONTCAR': True, 'skip_CHGCAR': True}) 81 | setuptask = SetupStaticRunTask() 82 | custodiantask = VaspCustodianTaskEx({'jobs': [VaspJob('', auto_npar=False).as_dict()], 'handlers': [h.as_dict() for h in handlers], 'max_errors': 5}) 83 | fws.append(Firework([copytask, setuptask, custodiantask], spec, name=get_name(structure, spec['task_type']), fw_id=3)) 84 | connections[2] = [3] 85 | 86 | # 4th Firework - insert previous run into DB 87 | spec = {'task_type': 'VASP db insertion example'} 88 | fws.append( 89 | Firework([VaspToDBTaskEx()], spec, name=get_name(structure, spec['task_type']), fw_id=4)) 90 | connections[3] = [4] 91 | 92 | return Workflow(fws, connections, name=get_slug(structure.formula)) 93 | 94 | if __name__ == '__main__': 95 | l = Lattice.from_parameters(3.866, 3.866, 3.866, 60, 60, 60) 96 | s = Structure(l, ['Si', 'Si'], [[0.125,0.125,0.125], [0.875,0.875,0.875]]) 97 | 98 | my_wf = structure_to_wf(s) 99 | pprint(my_wf.to_dict(), indent=2) 100 | my_wf.to_file("Si_wf.json") 101 | -------------------------------------------------------------------------------- /mpworks/firetasks/README.md: -------------------------------------------------------------------------------- 1 | # Firetasks package 2 | 3 | This package is used by the production workflow. 4 | 5 | It contains implementations of FireTasks that are connected into workflows. -------------------------------------------------------------------------------- /mpworks/firetasks/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Mar 15, 2013' -------------------------------------------------------------------------------- /mpworks/firetasks/bandstructure.json: -------------------------------------------------------------------------------- 1 | { 2 | "INCAR": { 3 | "IBRION": -1, 4 | "ISMEAR": 0, 5 | "LAECHG": true, 6 | "LCHARG": false, 7 | "LORBIT": 11, 8 | "LVHAR": true, 9 | "LWAVE": false, 10 | "NSW": 0, 11 | "ICHARG":11 12 | }, 13 | "KPOINTS": 1000 14 | } -------------------------------------------------------------------------------- /mpworks/firetasks/bs_static.json: -------------------------------------------------------------------------------- 1 | { 2 | "INCAR": { 3 | "IBRION": -1, 4 | "ISMEAR": -5, 5 | "LAECHG": true, 6 | "LCHARG": true, 7 | "LORBIT": 11, 8 | "LVHAR": true, 9 | "LWAVE": false, 10 | "NSW": 0 11 | }, 12 | "KPOINTS": 90 13 | } -------------------------------------------------------------------------------- /mpworks/firetasks/snl_tasks.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fireworks.core.firework import FireTaskBase, FWAction 3 | from fireworks.utilities.fw_serializers import FWSerializable 4 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 5 | from pymatgen.matproj.snl import StructureNL 6 | 7 | __author__ = 'Anubhav Jain' 8 | __copyright__ = 'Copyright 2013, The Materials Project' 9 | __version__ = '0.1' 10 | __maintainer__ = 'Anubhav Jain' 11 | __email__ = 'ajain@lbl.gov' 12 | __date__ = 'Apr 25, 2013' 13 | 14 | 15 | class AddSNLTask(FireTaskBase, FWSerializable): 16 | """ 17 | Add a new SNL into the SNL database, and build duplicate groups 18 | """ 19 | 20 | _fw_name = "Add SNL Task" 21 | 22 | def run_task(self, fw_spec): 23 | sma = SNLMongoAdapter.auto_load() 24 | snl = fw_spec['snl'] 25 | mpsnl, snlgroup_id, spec_group = sma.add_snl(snl) 26 | mod_spec = [{"_push": {"run_tags": "species_group={}".format(spec_group)}}] if spec_group else None 27 | 28 | return FWAction(update_spec={'mpsnl': mpsnl.as_dict(), 'snlgroup_id': snlgroup_id}, mod_spec=mod_spec) -------------------------------------------------------------------------------- /mpworks/firetasks/uniform.json: -------------------------------------------------------------------------------- 1 | { 2 | "INCAR": { 3 | "ICHARG":11, 4 | "NPAR":1, 5 | "NEDOS":601 6 | }, 7 | "KPOINTS": 1000 8 | } -------------------------------------------------------------------------------- /mpworks/firetasks/vasp_setup_tasks.py: -------------------------------------------------------------------------------- 1 | import os 2 | from monty.os.path import zpath 3 | from custodian.vasp.handlers import UnconvergedErrorHandler 4 | from fireworks.utilities.fw_serializers import FWSerializable 5 | from fireworks.core.firework import FireTaskBase, FWAction 6 | from pymatgen.io.vasp.outputs import Vasprun, Outcar 7 | from pymatgen.io.vasp.inputs import VaspInput, Incar, Poscar, Kpoints, Potcar 8 | from pymatgen.io.vasp.sets import MPRelaxSet, MPStaticSet, MPNonSCFSet 9 | from pymatgen.symmetry.bandstructure import HighSymmKpath 10 | from pymatgen.symmetry.analyzer import SpacegroupAnalyzer 11 | 12 | __author__ = 'Wei Chen, Anubhav Jain' 13 | __copyright__ = 'Copyright 2013, The Materials Project' 14 | __version__ = '0.1' 15 | __maintainer__ = 'Wei Chen' 16 | __email__ = 'weichen@lbl.gov' 17 | __date__ = 'Mar 20, 2013' 18 | 19 | module_dir = os.path.dirname(__file__) 20 | 21 | 22 | class SetupStaticRunTask(FireTaskBase, FWSerializable): 23 | """ 24 | Set VASP input sets for static runs, assuming vasp Outputs (vasprun.xml 25 | and OUTCAR) from relax runs are already in the directory 26 | """ 27 | 28 | _fw_name = "Setup Static Task" 29 | 30 | def __init__(self, parameters=None): 31 | """ 32 | 33 | :param parameters: 34 | """ 35 | parameters = parameters if parameters else {} 36 | self.update(parameters) 37 | self.kpoints_density = parameters.get('kpoints_density', 90) 38 | self.user_incar_settings = parameters.get('user_incar_settings', {}) 39 | 40 | def run_task(self, fw_spec): 41 | self.user_incar_settings.update({"NPAR": 2}) 42 | # Get kpoint density per vol 43 | vol = Poscar.from_file("POSCAR").structure.volume 44 | kppra_vol = self.kpoints_density / vol 45 | new_set = MPStaticSet.from_prev_calc( 46 | os.getcwd(), 47 | user_incar_settings=self.user_incar_settings, 48 | reciprocal_density=kppra_vol) 49 | new_set.write_input('.') 50 | structure = new_set.structure 51 | sga = SpacegroupAnalyzer(structure, 0.1) 52 | return FWAction(stored_data={ 53 | 'refined_structure': sga.get_refined_structure().as_dict(), 54 | 'conventional_standard_structure': sga.get_conventional_standard_structure().as_dict(), 55 | 'symmetry_dataset': sga.get_symmetry_dataset(), 56 | 'symmetry_operations': [x.as_dict() for x in sga.get_symmetry_operations()]}) 57 | 58 | 59 | class SetupUnconvergedHandlerTask(FireTaskBase, FWSerializable): 60 | """ 61 | Assumes the current directory contains an unconverged job. Fixes it and 62 | runs it 63 | """ 64 | 65 | _fw_name = "Unconverged Handler Task" 66 | 67 | def run_task(self, fw_spec): 68 | ueh = UnconvergedErrorHandler() 69 | custodian_out = ueh.correct() 70 | return FWAction(stored_data={'error_list': custodian_out['errors']}) 71 | 72 | 73 | class SetupNonSCFTask(FireTaskBase, FWSerializable): 74 | """ 75 | Set up vasp inputs for non-SCF calculations (Uniform [DOS] or band 76 | structure) 77 | """ 78 | _fw_name = "Setup non-SCF Task" 79 | 80 | def __init__(self, parameters=None): 81 | """ 82 | 83 | :param parameters: 84 | """ 85 | parameters = parameters if parameters else {} 86 | self.update(parameters) 87 | self.line = parameters.get('mode', 'line').lower() == 'line' 88 | self.kpoints_density = parameters.get('kpoints_density', 1000) 89 | self.kpoints_line_density = parameters.get('kpoints_line_density', 20) 90 | 91 | def run_task(self, fw_spec): 92 | user_incar_settings= {"NPAR": 2} 93 | vol = Poscar.from_file("POSCAR").structure.volume 94 | kppra_vol = self.kpoints_density / vol 95 | if self.line: 96 | MPNonSCFSet.from_prev_calc( 97 | os.getcwd(), mode="Line", copy_chgcar=False, 98 | user_incar_settings=user_incar_settings, 99 | kpoints_line_density=self.kpoints_line_density).write_input('.') 100 | kpath = HighSymmKpath(Poscar.from_file("POSCAR").structure) 101 | 102 | return FWAction(stored_data={"kpath": kpath.kpath, 103 | "kpath_name": kpath.name}) 104 | else: 105 | MPNonSCFSet.from_prev_calc( 106 | os.getcwd(), mode="Uniform", copy_chgcar=False, 107 | user_incar_settings=user_incar_settings, 108 | reciprocal_density=kppra_vol).write_input('.') 109 | return FWAction() 110 | 111 | 112 | class SetupGGAUTask(FireTaskBase, FWSerializable): 113 | """ 114 | Assuming that GGA inputs/outputs already exist in the directory, set up a 115 | GGA+U run. 116 | """ 117 | _fw_name = "Setup GGAU Task" 118 | 119 | def run_task(self, fw_spec): 120 | chgcar_start = False 121 | # read the VaspInput from the previous run 122 | 123 | poscar = Poscar.from_file(zpath('POSCAR')) 124 | incar = Incar.from_file(zpath('INCAR')) 125 | 126 | # figure out what GGA+U values to use and override them 127 | # LDAU values to use 128 | mpvis = MPRelaxSet(poscar.structure) 129 | ggau_incar = mpvis.incar.as_dict() 130 | incar_updates = {k: ggau_incar[k] for k in ggau_incar.keys() if 'LDAU' in k} 131 | 132 | for k in ggau_incar: 133 | # update any parameters not set explicitly in previous INCAR 134 | if k not in incar and k in ggau_incar: 135 | incar_updates[k] = ggau_incar[k] 136 | 137 | incar.update(incar_updates) # override the +U keys 138 | 139 | 140 | # start from the CHGCAR of previous run 141 | if os.path.exists('CHGCAR'): 142 | incar['ICHARG'] = 1 143 | chgcar_start = True 144 | 145 | # write back the new INCAR to the current directory 146 | incar.write_file('INCAR') 147 | return FWAction(stored_data={'chgcar_start': chgcar_start}) 148 | -------------------------------------------------------------------------------- /mpworks/firetasks_staging/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'ajain' 2 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Fix scripts 2 | 3 | These are one-time use scripts that were used to fix the database at one time or another. It is expected that they will never need to be run again, but they are kept here for reference. There are countless other "fix_scripts" on Anubhav's computer...the criteria for inclusion here is arbitrary. -------------------------------------------------------------------------------- /mpworks/fix_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'May 29, 2013' -------------------------------------------------------------------------------- /mpworks/fix_scripts/add_icsd_materials.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 4 | 5 | __author__ = 'Anubhav Jain' 6 | __copyright__ = 'Copyright 2013, The Materials Project' 7 | __version__ = '0.1' 8 | __maintainer__ = 'Anubhav Jain' 9 | __email__ = 'ajain@lbl.gov' 10 | __date__ = 'Jul 30, 2013' 11 | 12 | __author__ = 'Anubhav Jain' 13 | __copyright__ = 'Copyright 2013, The Materials Project' 14 | __version__ = '0.1' 15 | __maintainer__ = 'Anubhav Jain' 16 | __email__ = 'ajain@lbl.gov' 17 | __date__ = 'Jul 16, 2013' 18 | 19 | import json 20 | import logging 21 | import os 22 | import sys 23 | from pymongo import MongoClient 24 | from fireworks.core.launchpad import LaunchPad 25 | from mpworks.drones.mp_vaspdrone import MPVaspDrone 26 | import multiprocessing 27 | import traceback 28 | 29 | __author__ = 'Anubhav Jain' 30 | __copyright__ = 'Copyright 2013, The Materials Project' 31 | __version__ = '0.1' 32 | __maintainer__ = 'Anubhav Jain' 33 | __email__ = 'ajain@lbl.gov' 34 | __date__ = 'May 13, 2013' 35 | 36 | ''' 37 | This script updates the ICSD id of all materials 38 | ''' 39 | 40 | class ICSDBuilder(): 41 | 42 | @classmethod 43 | def setup(cls): 44 | module_dir = os.path.dirname(os.path.abspath(__file__)) 45 | snl_f = os.path.join(module_dir, 'snl.yaml') 46 | cls.snldb = SNLMongoAdapter.from_file(snl_f) 47 | 48 | tasks_f = os.path.join(module_dir, 'materials.yaml') 49 | with open(tasks_f) as f2: 50 | task_creds = yaml.load(f2) 51 | 52 | mc = MongoClient(task_creds['host'], task_creds['port']) 53 | db = mc[task_creds['database']] 54 | db.authenticate(task_creds['admin_user'], task_creds['admin_password']) 55 | cls.materials = db[task_creds['collection']] 56 | 57 | def process_material(self, material_id): 58 | 59 | try: 60 | d = self.materials.find_one({"task_ids": material_id}, {"snlgroup_id_final": 1}) 61 | snlgroup_id = d['snlgroup_id_final'] 62 | icsd_ids = self.get_icsd_ids_from_snlgroup(snlgroup_id) 63 | 64 | self.materials.find_and_modify({"task_ids": material_id}, {"$set": {"icsd_id": icsd_ids}}) 65 | print material_id, icsd_ids 66 | print 'FINISHED', material_id 67 | except: 68 | print '-----' 69 | print 'ENCOUNTERED AN EXCEPTION!!!', material_id 70 | traceback.print_exc() 71 | print '-----' 72 | 73 | 74 | def get_icsd_ids_from_snlgroup(self, snlgroup_id): 75 | snl_ids = self.snldb.snlgroups.find_one({"snlgroup_id": snlgroup_id}, {"all_snl_ids":1})["all_snl_ids"] 76 | 77 | icsd_ids = set() 78 | for snl in self.snldb.snl.find({"snl_id":{"$in": snl_ids}}, {"about._icsd.icsd_id": 1}): 79 | if '_icsd' in snl["about"] and snl["about"]["_icsd"].get("icsd_id"): 80 | icsd_ids.add(snl["about"]["_icsd"]["icsd_id"]) 81 | 82 | return list(icsd_ids) 83 | 84 | 85 | def _analyze(data): 86 | b = ICSDBuilder() 87 | return b.process_material(data) 88 | 89 | if __name__ == '__main__': 90 | 91 | logging.basicConfig(level=logging.INFO) 92 | 93 | o = ICSDBuilder() 94 | o.setup() 95 | materials = o.materials 96 | print materials.count() 97 | m_data = [] 98 | for d in materials.find({}, {'task_id': 1}, timeout=False): 99 | m_data.append(d['task_id']) 100 | 101 | pool = multiprocessing.Pool(8) 102 | pool.map(_analyze, m_data) 103 | print 'DONE' -------------------------------------------------------------------------------- /mpworks/fix_scripts/add_old_taskids.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Jul 16, 2013' 7 | 8 | import json 9 | import logging 10 | import os 11 | import sys 12 | from pymongo import MongoClient 13 | from fireworks.core.launchpad import LaunchPad 14 | from mpworks.drones.mp_vaspdrone import MPVaspDrone 15 | import multiprocessing 16 | import traceback 17 | 18 | __author__ = 'Anubhav Jain' 19 | __copyright__ = 'Copyright 2013, The Materials Project' 20 | __version__ = '0.1' 21 | __maintainer__ = 'Anubhav Jain' 22 | __email__ = 'ajain@lbl.gov' 23 | __date__ = 'May 13, 2013' 24 | 25 | ''' 26 | This script re-runs the MPVaspDrone over all the tasks and just enters the deprecated task_id (as int). 27 | ''' 28 | 29 | class TaskBuilder(): 30 | 31 | @classmethod 32 | def setup(cls): 33 | db_dir = os.environ['DB_LOC'] 34 | db_path = os.path.join(db_dir, 'tasks_db.json') 35 | with open(db_path) as f2: 36 | db_creds = json.load(f2) 37 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 38 | db2 = mc2[db_creds['database']] 39 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 40 | 41 | cls.tasks = db2['tasks'] 42 | cls.host = db_creds['host'] 43 | cls.port = db_creds['port'] 44 | cls.database = db_creds['database'] 45 | cls.collection = db_creds['collection'] 46 | cls.admin_user = db_creds['admin_user'] 47 | cls.admin_password = db_creds['admin_password'] 48 | 49 | def process_task(self, task_id): 50 | 51 | try: 52 | task_id_deprecated = int(task_id.split('-')[-1]) 53 | self.tasks.update({"task_id": task_id}, {"$set": {"task_id_deprecated": task_id_deprecated}}) 54 | print 'FINISHED', task_id 55 | except: 56 | print '-----' 57 | print 'ENCOUNTERED AN EXCEPTION!!!', task_id 58 | traceback.print_exc() 59 | print '-----' 60 | 61 | 62 | def _analyze(data): 63 | b = TaskBuilder() 64 | return b.process_task(data) 65 | 66 | 67 | if __name__ == '__main__': 68 | 69 | logging.basicConfig(level=logging.INFO) 70 | logger = logging.getLogger('MPVaspDrone') 71 | logger.setLevel(logging.INFO) 72 | sh = logging.StreamHandler(stream=sys.stdout) 73 | sh.setLevel(getattr(logging, 'INFO')) 74 | logger.addHandler(sh) 75 | 76 | o = TaskBuilder() 77 | o.setup() 78 | tasks = TaskBuilder.tasks 79 | m_data = [] 80 | q = {} 81 | for d in tasks.find(q, {'task_id': 1}, timeout=False): 82 | o.process_task(d['task_id']) 83 | print 'DONE' -------------------------------------------------------------------------------- /mpworks/fix_scripts/clear_FWs.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | from fireworks.core.launchpad import LaunchPad 4 | 5 | __author__ = 'Anubhav Jain' 6 | __copyright__ = 'Copyright 2013, The Materials Project' 7 | __version__ = '0.1' 8 | __maintainer__ = 'Anubhav Jain' 9 | __email__ = 'ajain@lbl.gov' 10 | __date__ = 'Aug 01, 2013' 11 | 12 | 13 | class ClearFWs(): 14 | 15 | @classmethod 16 | def setup(cls): 17 | module_dir = os.path.dirname(__file__) 18 | cls.lp = LaunchPad.from_file(os.path.join(module_dir, 'my_launchpad.yaml')) 19 | 20 | def archive_fw(self, fw_id): 21 | self.lp.archive_wf(fw_id) 22 | return True 23 | 24 | def defuse_fw(self, fw_id): 25 | self.lp.defuse_fw(fw_id) 26 | return True 27 | 28 | 29 | def _archive_fw(data): 30 | b = ClearFWs() 31 | return b.archive_fw(data) 32 | 33 | 34 | def _defuse_fw(data): 35 | b = ClearFWs() 36 | return b.defuse_fw(data) 37 | 38 | if __name__ == '__main__': 39 | 40 | cfw = ClearFWs() 41 | cfw.setup() 42 | lp = ClearFWs.lp 43 | fw_ids = [] 44 | 45 | # archive READY WORKFLOWS 46 | for d in lp.workflows.find({"state": "READY"}, {'nodes': 1}): 47 | fw_ids.append(d['nodes'][0]) 48 | print 'GOT all fw_ids...' 49 | pool = multiprocessing.Pool(8) 50 | states = pool.map(_archive_fw, fw_ids) 51 | print 'DONE', all(states) 52 | 53 | # defuse any READY/WAITING FWs 54 | for d in lp.fireworks.find({"state": {"$in":["READY", "WAITING"]}}, {'fw_id': 1}): 55 | fw_ids.append(d['fw_id']) 56 | print 'GOT all fw_ids...' 57 | pool = multiprocessing.Pool(8) 58 | states = pool.map(_defuse_fw, fw_ids) 59 | print 'DONE', all(states) -------------------------------------------------------------------------------- /mpworks/fix_scripts/find_missing_snl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | from mpworks.snl_utils.mpsnl import MPStructureNL 4 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 5 | from pymatgen.matproj.snl import is_valid_bibtex 6 | 7 | __author__ = 'Anubhav Jain' 8 | __copyright__ = 'Copyright 2013, The Materials Project' 9 | __version__ = '0.1' 10 | __maintainer__ = 'Anubhav Jain' 11 | __email__ = 'ajain@lbl.gov' 12 | __date__ = 'Oct 16, 2013' 13 | 14 | # find SNL missing an SNLgroup 15 | 16 | if __name__ == '__main__': 17 | 18 | module_dir = os.path.dirname(os.path.abspath(__file__)) 19 | snl_f = os.path.join(module_dir, 'snl.yaml') 20 | snldb = SNLMongoAdapter.from_file(snl_f) 21 | 22 | all_snl_ids = [] # snl ids that have a group 23 | all_missing_ids = [] # snl ids missing a group 24 | idx = 0 25 | print 'GETTING GROUPS' 26 | for x in snldb.snlgroups.find({}, {"all_snl_ids": 1}): 27 | all_snl_ids.extend(x['all_snl_ids']) 28 | 29 | print 'CHECKING SNL' 30 | for x in snldb.snl.find({}, {'snl_id': 1}, timeout=False): 31 | print x['snl_id'] 32 | if x['snl_id'] not in all_snl_ids: 33 | print x['snl_id'], '*********' 34 | all_missing_ids.append(x['snl_id']) 35 | 36 | print 'FIXING / ADDING GROUPS' 37 | print all_missing_ids 38 | 39 | for snl_id in all_missing_ids: 40 | try: 41 | mpsnl = MPStructureNL.from_dict(snldb.snl.find_one({"snl_id": snl_id})) 42 | snldb.build_groups(mpsnl) 43 | print 'SUCCESSFUL', snl_id 44 | except: 45 | print 'ERROR with snl_id', snl_id 46 | traceback.print_exc() 47 | 48 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/fix_bad_crystals.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pymongo import MongoClient 3 | import yaml 4 | from fireworks.core.launchpad import LaunchPad 5 | from mpworks.snl_utils.mpsnl import MPStructureNL 6 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 7 | from mpworks.submission.submission_mongo import SubmissionMongoAdapter 8 | 9 | __author__ = 'Anubhav Jain' 10 | __copyright__ = 'Copyright 2013, The Materials Project' 11 | __version__ = '0.1' 12 | __maintainer__ = 'Anubhav Jain' 13 | __email__ = 'ajain@lbl.gov' 14 | __date__ = 'Jun 05, 2013' 15 | 16 | 17 | """ 18 | The purpose of this script is to detect instances where we have bad structures for ICSD 2007. It looks like back in the 'old days', some crystals were not converted correctly when migrating from the 'crystals' collection to the 'refactored crystals' collection. That bug was carried over to the MPS collection, and in turn the SNL collection. 19 | 20 | This script tries to detect these 'bad crystals' by cross-referencing with the new ICSD 2012 import, which was done much more cleanly. If an ICSD id from 2012 is present in multiple SNLgroups, we want to remove the old entry as it is probably incorrect. 21 | """ 22 | 23 | def detect(): 24 | module_dir = os.path.dirname(os.path.abspath(__file__)) 25 | snl_f = os.path.join(module_dir, 'snl.yaml') 26 | snldb = SNLMongoAdapter.from_file(snl_f) 27 | 28 | snl = snldb.snl 29 | snlgroups = snldb.snlgroups 30 | q = {"about._icsd.icsd_id":{"$exists":True}} # icsd strctures 31 | q["about._icsd.coll_code"] = {"$exists":False} # old ICSD structure 32 | q["about.history.description.fw_id"] = {"$exists":False} # non structure relaxations 33 | 34 | for old_s in snl.find(q, {"snl_id": 1, 'about._icsd.icsd_id': 1, 'about._materialsproject.deprecated.crystal_id_deprecated': 1}): 35 | icsd_id = old_s['about']['_icsd']['icsd_id'] 36 | crystal_id = old_s['about']['_materialsproject']['deprecated']['crystal_id_deprecated'] 37 | 38 | new_s = snl.find_one({"about._icsd.icsd_id":icsd_id, "about._icsd.coll_code":{"$exists":True}}, {"snl_id": 1}) 39 | if new_s: 40 | n_groups = snlgroups.find({"all_snl_ids":{"$in":[old_s['snl_id'], new_s['snl_id']]}}).count() 41 | if n_groups != 1: 42 | # The crystal_id is bad 43 | print crystal_id 44 | 45 | 46 | def fix(): 47 | 48 | # initialize databases 49 | module_dir = os.path.dirname(os.path.abspath(__file__)) 50 | 51 | snl_f = os.path.join(module_dir, 'snl.yaml') 52 | snldb = SNLMongoAdapter.from_file(snl_f) 53 | snl = snldb.snl 54 | snlgroups = snldb.snlgroups 55 | 56 | tasks_f = os.path.join(module_dir, 'tasks.yaml') 57 | with open(tasks_f) as f2: 58 | task_creds = yaml.load(f2) 59 | 60 | mc = MongoClient(task_creds['host'], task_creds['port']) 61 | db = mc[task_creds['database']] 62 | db.authenticate(task_creds['admin_user'], task_creds['admin_password']) 63 | tasks = db['tasks'] 64 | 65 | tasks_f = os.path.join(module_dir, 'tasks.yaml') 66 | with open(tasks_f) as f2: 67 | task_creds = yaml.load(f2) 68 | 69 | mc = MongoClient(task_creds['host'], task_creds['port']) 70 | db = mc[task_creds['database']] 71 | db.authenticate(task_creds['admin_user'], task_creds['admin_password']) 72 | tasks = db['tasks'] 73 | 74 | lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 75 | lpdb = LaunchPad.from_file(lp_f) 76 | fws = lpdb.fireworks 77 | launches = lpdb.launches 78 | 79 | sb_f = os.path.join(module_dir, 'submission.yaml') 80 | sbdb = SubmissionMongoAdapter.from_file(sb_f) 81 | submissions = sbdb.jobs 82 | 83 | bad_crystal_ids = [] 84 | 85 | crystals_file = os.path.join(module_dir, 'bad_crystals.txt') 86 | with open(crystals_file) as f: 87 | for line in f: 88 | bad_crystal_ids.append(int(line.strip())) 89 | 90 | 91 | for c_id in bad_crystal_ids: 92 | if c_id == 100892 or c_id == 100202: 93 | print 'SKIP' 94 | 95 | else: 96 | # FIX SNL 97 | for s in snl.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snl_id': 1}): 98 | snl.update({'snl_id': s['snl_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 99 | 100 | # FIX SNLGROUPS 101 | for s in snlgroups.find({'canonical_snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snlgroup_id': 1}): 102 | snlgroups.update({'snlgroup_id': s['snlgroup_id']}, {'$pushAll': {"canonical_snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 103 | 104 | # FIX FWs pt 1 105 | for s in fws.find({'spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}): 106 | fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 107 | 108 | # FIX FWs pt 2 109 | for s in fws.find({'spec.force_mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}): 110 | fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.force_mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 111 | 112 | # FIX Launches 113 | for s in launches.find({'action.update_spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'launch_id': 1}): 114 | launches.update({'launch_id': s['launch_id']}, {'$pushAll': {"action.update_spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 115 | 116 | # FIX TASKS 117 | for s in tasks.find({'snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'task_id': 1}): 118 | tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 119 | tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl_final.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 120 | 121 | # FIX SUBMISSIONS 122 | for s in submissions.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'submission_id': 1}): 123 | submissions.update({'submission_id': s['submission_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) 124 | 125 | print 'FIXED', c_id 126 | 127 | 128 | def find_alternate_canonical(): 129 | # see if we can replace a deprecated canonical SNL with a non-deprecated one 130 | 131 | module_dir = os.path.dirname(os.path.abspath(__file__)) 132 | 133 | snl_f = os.path.join(module_dir, 'snl.yaml') 134 | snldb = SNLMongoAdapter.from_file(snl_f) 135 | snl = snldb.snl 136 | snlgroups = snldb.snlgroups 137 | 138 | for g in snlgroups.find({"canonical_snl.about.remarks":"DEPRECATED"}, {"snlgroup_id": 1, "all_snl_ids": 1}): 139 | for s in snl.find({"snl_id": {"$in": g['all_snl_ids']}, "about.remarks": {"$ne": "DEPRECATED"}}): 140 | canonical_mpsnl = MPStructureNL.from_dict(s) 141 | snldb.switch_canonical_snl(g['snlgroup_id'], canonical_mpsnl) 142 | print g['snlgroup_id'] 143 | break 144 | 145 | print 'DONE' 146 | 147 | def archive_deprecated_fws(): 148 | # find all snlgroups that are deprecated, and archive all WFs that have deprecated fw_ids so we don't run them 149 | module_dir = os.path.dirname(os.path.abspath(__file__)) 150 | snl_f = os.path.join(module_dir, 'snl.yaml') 151 | snldb = SNLMongoAdapter.from_file(snl_f) 152 | snlgroups = snldb.snlgroups 153 | 154 | lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 155 | lpdb = LaunchPad.from_file(lp_f) 156 | 157 | for g in snlgroups.find({'canonical_snl.about.remarks':'DEPRECATED'}, {'snlgroup_id': 1}): 158 | while lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}): 159 | fw = lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}) 160 | print fw['fw_id'] 161 | lpdb.archive_wf(fw['fw_id']) 162 | 163 | 164 | print 'DONE' 165 | 166 | 167 | 168 | if __name__ == '__main__': 169 | archive_deprecated_fws() -------------------------------------------------------------------------------- /mpworks/fix_scripts/fix_fizzled_defused.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fireworks.core.launchpad import LaunchPad 3 | 4 | __author__ = 'Anubhav Jain' 5 | __copyright__ = 'Copyright 2013, The Materials Project' 6 | __version__ = '0.1' 7 | __maintainer__ = 'Anubhav Jain' 8 | __email__ = 'ajain@lbl.gov' 9 | __date__ = 'Jun 05, 2013' 10 | 11 | 12 | def restart_fizzled(): 13 | module_dir = os.path.dirname(os.path.abspath(__file__)) 14 | lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 15 | lpdb = LaunchPad.from_file(lp_f) 16 | 17 | for fw in lpdb.fireworks.find({"state": "FIZZLED"}, {"fw_id": 1, "spec.task_type": 1}): 18 | fw_id = fw['fw_id'] 19 | task_type = fw['spec']['task_type'] 20 | restart_id = fw_id 21 | if 'VASP db insertion' in task_type: 22 | restart_id = fw_id - 1 23 | elif 'Controller' in task_type: 24 | restart_id = fw_id - 2 25 | 26 | lpdb.rerun_fw(restart_id) 27 | 28 | 29 | if __name__ == '__main__': 30 | restart_fizzled() -------------------------------------------------------------------------------- /mpworks/fix_scripts/fix_float_priorities.py: -------------------------------------------------------------------------------- 1 | from _socket import timeout 2 | import os 3 | from fireworks.core.launchpad import LaunchPad 4 | 5 | __author__ = 'Anubhav Jain' 6 | __copyright__ = 'Copyright 2013, The Materials Project' 7 | __version__ = '0.1' 8 | __maintainer__ = 'Anubhav Jain' 9 | __email__ = 'ajain@lbl.gov' 10 | __date__ = 'Nov 26, 2013' 11 | 12 | 13 | if __name__ == '__main__': 14 | module_dir = os.path.dirname(os.path.abspath(__file__)) 15 | lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 16 | lpdb = LaunchPad.from_file(lp_f) 17 | 18 | 19 | for fw in lpdb.fireworks.find({"spec._tasks.1.max_errors":{"$type": 1}}, {"fw_id": 1, "state": 1, "spec._tasks": 1}, timeout=False): 20 | print fw['fw_id'], fw['state'] 21 | lpdb.fireworks.find_and_modify({"fw_id": fw['fw_id']}, {"$set": {"spec._tasks.1.max_errors": int(5)}}) 22 | if fw['state'] == 'FIZZLED': 23 | lpdb.rerun_fw(fw['fw_id']) -------------------------------------------------------------------------------- /mpworks/fix_scripts/fix_mpcomplete.py: -------------------------------------------------------------------------------- 1 | import os, json 2 | from pymongo import DESCENDING, ASCENDING 3 | from fireworks.fw_config import CONFIG_FILE_DIR, SORT_FWS 4 | from fireworks.core.fworker import FWorker 5 | from fireworks.core.launchpad import LaunchPad 6 | from pymongo import ReturnDocument 7 | 8 | launchpad = LaunchPad.from_file(os.path.join(CONFIG_FILE_DIR, 'my_launchpad.yaml')) 9 | fworker = FWorker.from_file(os.path.join(CONFIG_FILE_DIR, 'my_fworker.yaml')) 10 | #print launchpad._get_a_fw_to_run(query=fworker.query, checkout=False) 11 | m_query = dict(fworker.query) 12 | m_query['state'] = 'READY' 13 | sortby = [("spec._priority", DESCENDING)] 14 | if SORT_FWS.upper() == "FIFO": 15 | sortby.append(("created_on", ASCENDING)) 16 | elif SORT_FWS.upper() == "FILO": 17 | sortby.append(("created_on", DESCENDING)) 18 | #print json.dumps(m_query, indent=4) 19 | projection = { 20 | '_id': 0, 'fw_id': 1, 'spec._fworker': 1, 'spec.task_type': 1, 'spec._queueadapter': 1, 21 | 'spec.mpsnl.about.remarks': 1, 'spec.snl.about.remarks': 1, 'spec.prev_vasp_dir': 1, 22 | 'updated_on': 1, 'state': 1 23 | } 24 | 25 | fw_ids = [] 26 | for idoc, doc in enumerate(launchpad.fireworks.find(m_query, projection=projection, sort=sortby).limit(100)): 27 | #print doc 28 | if 'walltime' in doc['spec']['_queueadapter']: 29 | walltime = doc['spec']['_queueadapter']['walltime'] 30 | if int(walltime.split(':')[0]) > 48: 31 | launchpad.fireworks.find_one_and_update( 32 | {'fw_id': doc['fw_id']}, 33 | {'$set': {'spec._queueadapter.walltime': '48:00:00'}}, 34 | projection=projection, 35 | return_document=ReturnDocument.AFTER 36 | ) 37 | print doc['fw_id'], '----> walltime updated' 38 | if 'nnodes' in doc['spec']['_queueadapter'] and not 'nodes' in doc['spec']['_queueadapter']: 39 | launchpad.fireworks.find_one_and_update( 40 | {'fw_id': doc['fw_id']}, 41 | {'$rename': {'spec._queueadapter.nnodes': 'spec._queueadapter.nodes'}}, 42 | projection=projection, 43 | return_document=ReturnDocument.AFTER 44 | ) 45 | print doc['fw_id'], '----> nodes key renamed' 46 | if 'pre_rocket' in doc['spec']['_queueadapter']: 47 | launchpad.fireworks.find_one_and_update( 48 | m_query, 49 | {'$unset' : { 'spec._queueadapter.pre_rocket' : 1}}, 50 | projection=projection, 51 | return_document=ReturnDocument.AFTER 52 | ) 53 | print doc['fw_id'], '----> pre_rocket dropped' 54 | if 'prev_vasp_dir' in doc['spec'] and not os.path.exists(doc['spec']['prev_vasp_dir']): 55 | block_dir = doc['spec']['prev_vasp_dir'].split('/')[-2:] 56 | launch_dir = '/'.join('/oasis/projects/nsf/csd436/phuck/garden'.split('/') + block_dir) 57 | if not os.path.exists(launch_dir): 58 | print doc['fw_id'], '---->', '/'.join(block_dir), 'does not exists!' 59 | continue 60 | fw_ids.append(doc['fw_id']) 61 | print 'fixed', fw_ids 62 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/fix_unmoved_dirs.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | from mpworks.workflows.wf_utils import get_block_part 4 | 5 | __author__ = 'Anubhav Jain' 6 | __copyright__ = 'Copyright 2013, The Materials Project' 7 | __version__ = '0.1' 8 | __maintainer__ = 'Anubhav Jain' 9 | __email__ = 'ajain@lbl.gov' 10 | __date__ = 'May 29, 2013' 11 | 12 | """ 13 | The purpose of this script is to detect whether any directories were only partially moved from $SCRATCH dirs to $GARDEN due to a disk space error. 14 | If it detects a directory that is in BOTH $SCRATCH and $GARDEN, it prints it. 15 | 16 | Currently it is up to the user to manually move directories (for safety) 17 | """ 18 | 19 | 20 | SCRATCH_PATH = '/global/scratch/sd/matcomp' 21 | GARDEN_PATH = '/project/projectdirs/matgen/garden/' 22 | 23 | 24 | def detect(): 25 | for d in glob.glob(os.path.join(SCRATCH_PATH, 'block*/launch*')): 26 | block_part = get_block_part(d) 27 | garden_dir = os.path.join(GARDEN_PATH, block_part) 28 | if os.path.exists(garden_dir): 29 | print garden_dir 30 | 31 | 32 | if __name__ == '__main__': 33 | detect() -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'May 08, 2013' -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'May 08, 2013' 7 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/add_snl_final.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pymongo import MongoClient 3 | import yaml 4 | 5 | __author__ = 'Anubhav Jain' 6 | __copyright__ = 'Copyright 2013, The Materials Project' 7 | __version__ = '0.1' 8 | __maintainer__ = 'Anubhav Jain' 9 | __email__ = 'ajain@lbl.gov' 10 | __date__ = 'May 21, 2013' 11 | 12 | module_dir = os.path.dirname(os.path.abspath(__file__)) 13 | tasks_f = os.path.join(module_dir, 'tasks.yaml') 14 | 15 | with open(tasks_f) as f2: 16 | db_creds = yaml.load(f2) 17 | 18 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 19 | db2 = mc2[db_creds['database']] 20 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 21 | new_tasks = db2['tasks'] 22 | 23 | count = 0 24 | for d in new_tasks.find({'snlgroup_id_final': {'$exists': False}}, {'task_id': 1, 'snl': 1, 'snlgroup_id': 1, 'snlgroup_changed': 1}): 25 | new_tasks.update({'task_id': d['task_id']}, {'$set': {'snl_final': d['snl'], 'snlgroup_id_final': d['snlgroup_id'], 'snlgroup_changed': False}}) 26 | count+=1 27 | print count 28 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/do_fw_conversion.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pymongo import MongoClient, ASCENDING 4 | import yaml 5 | 6 | __author__ = 'Anubhav Jain' 7 | __copyright__ = 'Copyright 2013, The Materials Project' 8 | __version__ = '0.1' 9 | __maintainer__ = 'Anubhav Jain' 10 | __email__ = 'ajain@lbl.gov' 11 | __date__ = 'May 14, 2013' 12 | 13 | if __name__ == '__main__': 14 | 15 | module_dir = os.path.dirname(os.path.abspath(__file__)) 16 | # lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 17 | tasks_f = os.path.join(module_dir, 'tasks.yaml') 18 | 19 | #with open(lp_f) as f: 20 | # lp = LaunchPad.from_file(lp_f) 21 | # lp.reset(None, require_password=False) 22 | 23 | with open(tasks_f) as f2: 24 | db_creds = yaml.load(f2) 25 | 26 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 27 | db2 = mc2[db_creds['database']] 28 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 29 | new_tasks = db2['tasks'] 30 | 31 | print new_tasks.count() 32 | 33 | new_tasks.ensure_index("task_id", unique=True) 34 | new_tasks.ensure_index("task_id_deprecated", unique=True) 35 | new_tasks.ensure_index("chemsys") 36 | new_tasks.ensure_index("analysis.e_above_hull") 37 | new_tasks.ensure_index("pretty_formula") 38 | new_tasks.ensure_index([("elements", ASCENDING), ("nelements", ASCENDING)]) 39 | new_tasks.ensure_index("state") 40 | new_tasks.ensure_index([("state", ASCENDING), ("task_type", ASCENDING)]) 41 | new_tasks.ensure_index([("state", ASCENDING), ("task_type", ASCENDING), ("submission_id", ASCENDING)]) 42 | new_tasks.ensure_index("is_compatible") 43 | new_tasks.ensure_index("snl.snl_id") 44 | new_tasks.ensure_index("snlgroup_id") 45 | 46 | """ 47 | for task_dict in new_tasks.find({"state":"successful"}, sort=[("task_id", ASCENDING)], timeout=False): 48 | fw_id = task_dict_to_wf(task_dict, lp) 49 | new_tasks.update({"task_id": task_dict["task_id"]}, {"$set": {"fw_id": fw_id}}) 50 | """ 51 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/do_icsd_to_snl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | 4 | from pymongo import MongoClient, ASCENDING 5 | import yaml 6 | 7 | from mpworks.maintenance_scripts.icsd2012_to_snl import icsd_dict_to_snl 8 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 9 | 10 | __author__ = 'Anubhav Jain' 11 | __copyright__ = 'Copyright 2013, The Materials Project' 12 | __version__ = '0.1' 13 | __maintainer__ = 'Anubhav Jain' 14 | __email__ = 'ajain@lbl.gov' 15 | __date__ = 'May 12, 2013' 16 | 17 | if __name__ == '__main__': 18 | 19 | module_dir = os.path.dirname(os.path.abspath(__file__)) 20 | icsd_f = os.path.join(module_dir, 'mg_core_dev.yaml') 21 | snl_f = os.path.join(module_dir, 'snl.yaml') 22 | 23 | with open(icsd_f) as f: 24 | y = yaml.load(f) 25 | 26 | mc = MongoClient(y['host'], y['port']) 27 | db = mc[y['db']] 28 | 29 | db.authenticate(y['username'], y['password']) 30 | 31 | snldb = SNLMongoAdapter.from_file(snl_f) 32 | 33 | # query = {"icsd_id": {"$gte": 170623}} 34 | query = {} 35 | 36 | for icsd_dict in db.icsd_2012_crystals.find(query, sort=[("icsd_id", ASCENDING)], timeout=False): 37 | try: 38 | snl = icsd_dict_to_snl(icsd_dict) 39 | if snl: 40 | snldb.add_snl(snl) 41 | except: 42 | traceback.print_exc() 43 | print 'ERROR - icsd id:', icsd_dict['icsd_id'] 44 | 45 | print 'DONE' 46 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/do_mps_to_snl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | import time 4 | 5 | from pymongo import MongoClient 6 | import yaml 7 | 8 | from mpworks.fix_scripts.legacy.mps_to_snl import mps_dict_to_snl 9 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 10 | 11 | __author__ = 'Anubhav Jain' 12 | __copyright__ = 'Copyright 2013, The Materials Project' 13 | __version__ = '0.1' 14 | __maintainer__ = 'Anubhav Jain' 15 | __email__ = 'ajain@lbl.gov' 16 | __date__ = 'May 08, 2013' 17 | 18 | 19 | RESET = False 20 | 21 | if __name__ == '__main__': 22 | 23 | module_dir = os.path.dirname(os.path.abspath(__file__)) 24 | automation_f = os.path.join(module_dir, 'automation.yaml') 25 | snl_f = os.path.join(module_dir, 'snl.yaml') 26 | 27 | with open(automation_f) as f: 28 | y = yaml.load(f) 29 | 30 | mc = MongoClient(y['host'], y['port']) 31 | db = mc[y['db']] 32 | 33 | db.authenticate(y['username'], y['password']) 34 | 35 | snldb = SNLMongoAdapter.from_file(snl_f) 36 | 37 | prev_ids = [] # MPS ids that we already took care of 38 | 39 | print 'INITIALIZING' 40 | if RESET: 41 | snldb._reset() 42 | time.sleep(10) # makes me sleep better at night 43 | 44 | else: 45 | for mps in snldb.snl.find({}, {"about._materialsproject.deprecated.mps_ids": 1}): 46 | prev_ids.extend(mps['about']['_materialsproject']['deprecated']['mps_ids']) 47 | 48 | print 'PROCESSING' 49 | for mps in db.mps.find(timeout=False): 50 | try: 51 | if not mps['mps_id'] in prev_ids: 52 | snl = mps_dict_to_snl(mps) 53 | if snl: 54 | snldb.add_snl(snl) 55 | else: 56 | print 'SKIPPING', mps['mps_id'] 57 | except: 58 | traceback.print_exc() 59 | print 'ERROR - mps id:', mps['mps_id'] 60 | 61 | print 'DONE' 62 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/do_task_conversion.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import json 3 | import logging 4 | import multiprocessing 5 | import os 6 | import traceback 7 | 8 | from pymongo import MongoClient 9 | import yaml 10 | 11 | from mpworks.fix_scripts.legacy import MPVaspDrone_CONVERSION 12 | 13 | __author__ = 'Anubhav Jain' 14 | __copyright__ = 'Copyright 2013, The Materials Project' 15 | __version__ = '0.1' 16 | __maintainer__ = 'Anubhav Jain' 17 | __email__ = 'ajain@lbl.gov' 18 | __date__ = 'May 13, 2013' 19 | 20 | class OldTaskBuilder(): 21 | 22 | @classmethod 23 | def setup(cls): 24 | module_dir = os.path.dirname(os.path.abspath(__file__)) 25 | tasks_f = os.path.join(module_dir, 'mg_core_dev.yaml') 26 | 27 | with open(tasks_f) as f: 28 | y = yaml.load(f) 29 | 30 | mc = MongoClient(y['host'], y['port']) 31 | db = mc[y['db']] 32 | db.authenticate(y['username'], y['password']) 33 | 34 | cls.old_tasks = db['tasks_dbv2'] 35 | 36 | db_dir = os.environ['DB_LOC'] 37 | db_path = os.path.join(db_dir, 'tasks_db.json') 38 | with open(db_path) as f2: 39 | db_creds = json.load(f2) 40 | 41 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 42 | db2 = mc2[db_creds['database']] 43 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 44 | 45 | cls.new_tasks = db2['tasks'] 46 | 47 | cls.drone = MPVaspDrone_CONVERSION( 48 | host=db_creds['host'], port=db_creds['port'], 49 | database=db_creds['database'], user=db_creds['admin_user'], 50 | password=db_creds['admin_password'], 51 | collection=db_creds['collection'], parse_dos=False, 52 | additional_fields={}, 53 | update_duplicates=False) 54 | 55 | def process_task(self, task_id): 56 | # get the directory containing the db file 57 | if not self.new_tasks.find_one({'task_id': 'mp-{}'.format(task_id)}): 58 | t = self.old_tasks.find_one({'task_id': task_id}) 59 | try: 60 | t_id, d = self.drone.assimilate(t) 61 | print 'ENTERED', t_id 62 | except: 63 | print 'ERROR entering', t['task_id'] 64 | traceback.print_exc() 65 | else: 66 | print 'skip' 67 | 68 | 69 | def _analyze(task_id): 70 | b = OldTaskBuilder() 71 | return b.process_task(task_id) 72 | 73 | 74 | def parallel_build(min, max): 75 | tasks_old = OldTaskBuilder.old_tasks 76 | task_ids = [] 77 | for i in tasks_old.find({'task_id': {'$gte': min, '$lt': max}}, {'task_id': 1}): 78 | task_ids.append(i['task_id']) 79 | 80 | print 'GOT all tasks...' 81 | pool = multiprocessing.Pool(16) 82 | pool.map(_analyze, task_ids) 83 | print 'DONE' 84 | 85 | if __name__ == '__main__': 86 | o = OldTaskBuilder() 87 | o.setup() 88 | logging.basicConfig(level=logging.INFO) 89 | parser = ArgumentParser() 90 | parser.add_argument('min', help='min', type=int) 91 | parser.add_argument('max', help='max', type=int) 92 | args = parser.parse_args() 93 | parallel_build(args.min, args.max) 94 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/actions/do_task_conversion_fixes.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import traceback 5 | 6 | from pymongo import MongoClient 7 | import yaml 8 | 9 | from mpworks.fix_scripts.legacy import MPVaspDrone_CONVERSION 10 | 11 | __author__ = 'Anubhav Jain' 12 | __copyright__ = 'Copyright 2013, The Materials Project' 13 | __version__ = '0.1' 14 | __maintainer__ = 'Anubhav Jain' 15 | __email__ = 'ajain@lbl.gov' 16 | __date__ = 'May 13, 2013' 17 | 18 | class OldTaskBuilder(): 19 | 20 | @classmethod 21 | def setup(cls): 22 | module_dir = os.path.dirname(os.path.abspath(__file__)) 23 | tasks_f = os.path.join(module_dir, 'mg_core_dev.yaml') 24 | 25 | with open(tasks_f) as f: 26 | y = yaml.load(f) 27 | 28 | mc = MongoClient(y['host'], y['port']) 29 | db = mc[y['db']] 30 | db.authenticate(y['username'], y['password']) 31 | 32 | cls.old_tasks = db['tasks_dbv2'] 33 | 34 | db_dir = os.environ['DB_LOC'] 35 | db_path = os.path.join(db_dir, 'tasks_db.json') 36 | with open(db_path) as f2: 37 | db_creds = json.load(f2) 38 | 39 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 40 | db2 = mc2[db_creds['database']] 41 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 42 | 43 | cls.new_tasks = db2['tasks'] 44 | 45 | cls.drone = MPVaspDrone_CONVERSION( 46 | host=db_creds['host'], port=db_creds['port'], 47 | database=db_creds['database'], user=db_creds['admin_user'], 48 | password=db_creds['admin_password'], 49 | collection=db_creds['collection'], parse_dos=False, 50 | additional_fields={}, 51 | update_duplicates=True) 52 | 53 | def process_task(self, task_id): 54 | t = self.old_tasks.find_one({'task_id': task_id}) 55 | try: 56 | t_id, d = self.drone.assimilate(t) 57 | print 'ENTERED', t_id 58 | except: 59 | print 'ERROR entering', t['task_id'] 60 | traceback.print_exc() 61 | 62 | 63 | if __name__ == '__main__': 64 | o = OldTaskBuilder() 65 | o.setup() 66 | logging.basicConfig(level=logging.INFO) 67 | """ 68 | tasks_old = OldTaskBuilder.old_tasks 69 | for i in tasks_old.find({'dir_name':{'$regex':'cathode_'}}, {'task_id': 1, 'dir_name': 1}): 70 | task_id = i['task_id'] 71 | dir_name = i['dir_name'] 72 | print 'FIXING', task_id 73 | # cut off the last part of the dir_name 74 | cutoff_path = os.path.dirname(dir_name) 75 | final_path = cutoff_path.replace('cathode_block', 'block') 76 | o.old_tasks.find_and_modify({'task_id': task_id}, {'$set': {'dir_name': final_path}}) 77 | # o.process_task(task_id) 78 | """ 79 | with open('to_fix.txt') as f: 80 | for line in f: 81 | old_task_id = int(line.split(' ')[1]) 82 | new_task_id = 'mp-'+str(old_task_id) 83 | t = o.new_tasks.find_one({"task_id": new_task_id}, {"state": 1}) 84 | if t: 85 | o.new_tasks.remove({'task_id': new_task_id}) 86 | print 'REPARSING', old_task_id 87 | o.process_task(old_task_id) 88 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/mps_to_snl.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import traceback 3 | from pymongo import MongoClient 4 | from pymatgen.core.structure import Structure 5 | from pymatgen.matproj.snl import StructureNL 6 | 7 | __author__ = 'Anubhav Jain' 8 | __copyright__ = 'Copyright 2013, The Materials Project' 9 | __version__ = '0.1' 10 | __maintainer__ = 'Anubhav Jain' 11 | __email__ = 'ajain@lbl.gov' 12 | __date__ = 'May 08, 2013' 13 | 14 | 15 | def mps_dict_to_snl(mps_dict): 16 | m = mps_dict 17 | 18 | if 'deprecated' in m['about']['metadata']['info'] and m['about']['metadata']['info']['deprecated']: 19 | return None 20 | 21 | if 'Carbon Capture Storage Initiative (CCSI)' in m['about']['metadata']['project_names']: 22 | print 'rejected old CCSI' 23 | return None 24 | 25 | mps_ids = [m['mps_id']] 26 | 27 | remarks = [] 28 | for remark in m['about']['metadata']['remarks']: 29 | if 'This entry replaces deprecated mps_id' in remark: 30 | mps_ids.append(int(remark.split()[-1])) # add the deprecated MPS id to this SNL 31 | else: 32 | remarks.append(remark) 33 | for remark in m['about']['metadata']['keywords']: 34 | remarks.append(remark) 35 | 36 | projects = [] 37 | for project in m['about']['metadata']['project_names']: 38 | projects.append(project) 39 | 40 | data = {'_materialsproject': {'deprecated': {'mps_ids': mps_ids}}, '_icsd': {}} 41 | for k, v in m['about']['metadata']['info'].iteritems(): 42 | if k == 'icsd_comments': 43 | data['_icsd']['comments'] = v 44 | elif k == 'icsd_id': 45 | data['_icsd']['icsd_id'] = v 46 | elif k == 'remark': 47 | data['_materialsproject']['ordering_remark'] = v 48 | elif 'deprecated' in k or 'original_structure' in k: 49 | data['_materialsproject']['deprecated'][k] = v 50 | elif 'assert' in k or 'universe' in k or 'mp_duplicates' in k: 51 | pass 52 | else: 53 | data['_materialsproject'][k] = v 54 | 55 | authors = [] 56 | for a in m['about']['authors']: 57 | authors.append({'name': a['name'], 'email': a['email']}) 58 | for a in m['about']['acknowledgements']: 59 | authors.append({'name': a['name'], 'email': a['email']}) 60 | 61 | cites = [m['about']['please_cite']['bibtex']] 62 | if m['about']['supporting_info']: 63 | cites.append(m['about']['supporting_info']['bibtex']) 64 | references = '\n'.join(cites) 65 | 66 | history = [] 67 | for h in m['about']['links']: 68 | if 'direct_copy' in h['description']: 69 | del h['description']['direct_copy'] 70 | history.append({'name': h['name'], 'url': h['url'], 'description': h['description']}) 71 | 72 | struct = Structure.from_dict(m) 73 | 74 | created_at = datetime.datetime.strptime(m['about']['created_at'], "%Y-%m-%d %H:%M:%S") 75 | 76 | return StructureNL(struct, authors, projects, references, remarks, data, history, created_at) 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/submit_snl.py: -------------------------------------------------------------------------------- 1 | from mpworks.snl_utils.mpsnl import MPStructureNL 2 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 3 | from mpworks.submission.submission_mongo import SubmissionMongoAdapter 4 | from mpworks.workflows.wf_utils import NO_POTCARS 5 | 6 | __author__ = 'Anubhav Jain' 7 | __copyright__ = 'Copyright 2013, The Materials Project' 8 | __version__ = '0.1' 9 | __maintainer__ = 'Anubhav Jain' 10 | __email__ = 'ajain@lbl.gov' 11 | __date__ = 'May 14, 2013' 12 | 13 | 14 | def submit_all_snl(snldb, sma, snlgroup_constraint=None): 15 | constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}} 16 | constraints['elements'] = {'$nin': NO_POTCARS} 17 | constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"} 18 | constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"} 19 | 20 | if snlgroup_constraint: 21 | constraints['snlgroup_id'] = snlgroup_constraint 22 | 23 | for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}): 24 | snl = MPStructureNL.from_dict(result['canonical_snl']) 25 | parameters = {'snlgroup_id': result['snlgroup_id']} 26 | sma.submit_snl(snl, 'Anubhav Jain ', parameters=parameters) 27 | 28 | if __name__ == '__main__': 29 | submit_all_snl(0, 0) -------------------------------------------------------------------------------- /mpworks/fix_scripts/legacy/task_to_fw.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from fireworks.core.firework import Firework, Launch, FWAction, Workflow 3 | from fireworks.utilities.fw_utilities import get_slug 4 | from mpworks.firetasks.controller_tasks import DummyLegacyTask 5 | from mpworks.snl_utils.mpsnl import get_meta_from_structure 6 | from pymatgen import Composition, Structure 7 | 8 | 9 | __author__ = 'Anubhav Jain' 10 | __copyright__ = 'Copyright 2013, The Materials Project' 11 | __version__ = '0.1' 12 | __maintainer__ = 'Anubhav Jain' 13 | __email__ = 'ajain@lbl.gov' 14 | __date__ = 'May 14, 2013' 15 | 16 | # Before the FireWorks code existed, we had already run many jobs. This code was used to convert all those jobs into "fake" FireWorks runs that were entered into the database for legacy and completeness. 17 | 18 | def task_dict_to_wf(task_dict, launchpad): 19 | fw_id = launchpad.get_new_fw_id() 20 | l_id = launchpad.get_new_launch_id() 21 | 22 | spec = {'task_type': task_dict['task_type'], 'run_tags': task_dict['run_tags'], 23 | 'vaspinputset_name': None, 'vasp': None, 'mpsnl': task_dict['snl'], 24 | 'snlgroup_id': task_dict['snlgroup_id']} 25 | tasks = [DummyLegacyTask()] 26 | 27 | launch_dir = task_dict['dir_name_full'] 28 | 29 | stored_data = {'error_list': []} 30 | update_spec = {'prev_vasp_dir': task_dict['dir_name'], 31 | 'prev_task_type': spec['task_type'], 32 | 'mpsnl': spec['mpsnl'], 'snlgroup_id': spec['snlgroup_id'], 33 | 'run_tags': spec['run_tags']} 34 | 35 | fwaction = FWAction(stored_data=stored_data, update_spec=update_spec) 36 | 37 | if task_dict['completed_at']: 38 | complete_date = datetime.datetime.strptime(task_dict['completed_at'], "%Y-%m-%d %H:%M:%S") 39 | state_history = [{"created_on": complete_date, 'state': 'COMPLETED'}] 40 | else: 41 | state_history = [] 42 | 43 | launches = [Launch('COMPLETED', launch_dir, fworker=None, host=None, ip=None, action=fwaction, 44 | state_history=state_history, launch_id=l_id, fw_id=fw_id)] 45 | 46 | f = Composition(task_dict['pretty_formula']).alphabetical_formula 47 | 48 | 49 | fw = Firework(tasks, spec, name=get_slug(f + '--' + spec['task_type']), launches=launches, state='COMPLETED', created_on=None, 50 | fw_id=fw_id) 51 | 52 | wf_meta = get_meta_from_structure(Structure.from_dict(task_dict['snl'])) 53 | wf_meta['run_version'] = 'preproduction (0)' 54 | 55 | wf = Workflow.from_FireWork(fw, name=f, metadata=wf_meta) 56 | 57 | launchpad.add_wf(wf, reassign_all=False) 58 | launchpad._upsert_launch(launches[0]) 59 | 60 | print 'ADDED', fw_id 61 | # return fw_id 62 | return fw_id -------------------------------------------------------------------------------- /mpworks/fix_scripts/reparse_old_tasks.py: -------------------------------------------------------------------------------- 1 | from monty.os.path import zpath 2 | from pymatgen.io.vasp import Outcar 3 | 4 | __author__ = 'Anubhav Jain' 5 | __copyright__ = 'Copyright 2013, The Materials Project' 6 | __version__ = '0.1' 7 | __maintainer__ = 'Anubhav Jain' 8 | __email__ = 'ajain@lbl.gov' 9 | __date__ = 'Jun 13, 2013' 10 | 11 | import json 12 | import logging 13 | import os 14 | import sys 15 | from pymongo import MongoClient 16 | from fireworks.core.launchpad import LaunchPad 17 | from mpworks.drones.mp_vaspdrone import MPVaspDrone 18 | import multiprocessing 19 | import traceback 20 | 21 | __author__ = 'Anubhav Jain' 22 | __copyright__ = 'Copyright 2013, The Materials Project' 23 | __version__ = '0.1' 24 | __maintainer__ = 'Anubhav Jain' 25 | __email__ = 'ajain@lbl.gov' 26 | __date__ = 'May 13, 2013' 27 | 28 | 29 | ''' 30 | This script add selected information for all the *old-style* tasks. 31 | 32 | A few notes: 33 | * The new-style tasks will be unaffected by this script 34 | 35 | ''' 36 | 37 | class OldTaskFixer(): 38 | 39 | @classmethod 40 | def setup(cls): 41 | db_dir = os.environ['DB_LOC'] 42 | db_path = os.path.join(db_dir, 'tasks_db.json') 43 | with open(db_path) as f2: 44 | db_creds = json.load(f2) 45 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 46 | db2 = mc2[db_creds['database']] 47 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 48 | 49 | cls.tasks = db2['tasks'] 50 | 51 | def process_task(self, path): 52 | try: 53 | #Override incorrect outcar subdocs for two step relaxations 54 | if os.path.exists(os.path.join(path, "relax2")): 55 | try: 56 | run_stats = {} 57 | for i in [1,2]: 58 | outcar = Outcar(zpath(os.path.join(path,"relax"+str(i), "OUTCAR"))) 59 | m_key = "calculations."+str(i-1)+".output.outcar" 60 | self.tasks.update({'dir_name_full': path}, {'$set': {m_key: outcar.as_dict()}}) 61 | run_stats["relax"+str(i)] = outcar.run_stats 62 | except: 63 | logger.error("Bad OUTCAR for {}.".format(path)) 64 | 65 | try: 66 | overall_run_stats = {} 67 | for key in ["Total CPU time used (sec)", "User time (sec)", 68 | "System time (sec)", "Elapsed time (sec)"]: 69 | overall_run_stats[key] = sum([v[key] 70 | for v in run_stats.values()]) 71 | run_stats["overall"] = overall_run_stats 72 | except: 73 | logger.error("Bad run stats for {}.".format(path)) 74 | 75 | self.tasks.update({'dir_name_full': path}, {'$set': {"run_stats": run_stats}}) 76 | print 'FINISHED', path 77 | else: 78 | print 'SKIPPING', path 79 | except: 80 | print '-----' 81 | print 'ENCOUNTERED AN EXCEPTION!!!', path 82 | traceback.print_exc() 83 | print '-----' 84 | 85 | 86 | def _analyze(data): 87 | b = OldTaskFixer() 88 | return b.process_task(data) 89 | 90 | 91 | if __name__ == '__main__': 92 | 93 | logging.basicConfig(level=logging.INFO) 94 | logger = logging.getLogger('MPVaspDrone') 95 | logger.setLevel(logging.INFO) 96 | sh = logging.StreamHandler(stream=sys.stdout) 97 | sh.setLevel(getattr(logging, 'INFO')) 98 | logger.addHandler(sh) 99 | 100 | o = OldTaskFixer() 101 | o.setup() 102 | tasks = OldTaskFixer.tasks 103 | m_data = [] 104 | q = {'submission_id': {'$exists': True}} # these are all old-style tasks 105 | for d in tasks.find(q, {'dir_name_full': 1}): 106 | m_data.append(d['dir_name_full']) 107 | print 'GOT all tasks...' 108 | pool = multiprocessing.Pool(16) 109 | pool.map(_analyze, m_data) 110 | print 'DONE' 111 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/reparse_old_tasks_again.py: -------------------------------------------------------------------------------- 1 | from monty.os.path import zpath 2 | from pymatgen.io.vasp import Outcar 3 | 4 | __author__ = 'Anubhav Jain' 5 | __copyright__ = 'Copyright 2013, The Materials Project' 6 | __version__ = '0.1' 7 | __maintainer__ = 'Anubhav Jain' 8 | __email__ = 'ajain@lbl.gov' 9 | __date__ = 'Jun 13, 2013' 10 | 11 | import json 12 | import logging 13 | import os 14 | import sys 15 | from pymongo import MongoClient 16 | from fireworks.core.launchpad import LaunchPad 17 | from mpworks.drones.mp_vaspdrone import MPVaspDrone 18 | import multiprocessing 19 | import traceback 20 | 21 | __author__ = 'Anubhav Jain' 22 | __copyright__ = 'Copyright 2013, The Materials Project' 23 | __version__ = '0.1' 24 | __maintainer__ = 'Anubhav Jain' 25 | __email__ = 'ajain@lbl.gov' 26 | __date__ = 'May 13, 2013' 27 | 28 | 29 | ''' 30 | This script add selected information for all the *old-style* tasks. 31 | 32 | A few notes: 33 | * The new-style tasks will be unaffected by this script 34 | 35 | ''' 36 | 37 | class OldTaskFixer(): 38 | 39 | @classmethod 40 | def setup(cls): 41 | db_dir = os.environ['DB_LOC'] 42 | db_path = os.path.join(db_dir, 'tasks_db.json') 43 | with open(db_path) as f2: 44 | db_creds = json.load(f2) 45 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 46 | db2 = mc2[db_creds['database']] 47 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 48 | 49 | cls.tasks = db2['tasks'] 50 | 51 | def process_task(self, path): 52 | try: 53 | #Override incorrect outcar subdocs for two step relaxations 54 | if os.path.exists(os.path.join(path, "relax2")): 55 | try: 56 | run_stats = {} 57 | for i in [1,2]: 58 | outcar = Outcar(zpath(os.path.join(path,"relax"+str(i), "OUTCAR"))) 59 | m_key = "calculations."+str(i-1)+".output.outcar" 60 | self.tasks.update({'dir_name_full': path}, {'$set': {m_key: outcar.as_dict()}}) 61 | run_stats["relax"+str(i)] = outcar.run_stats 62 | except: 63 | logger.error("Bad OUTCAR for {}.".format(path)) 64 | 65 | try: 66 | overall_run_stats = {} 67 | for key in ["Total CPU time used (sec)", "User time (sec)", 68 | "System time (sec)", "Elapsed time (sec)"]: 69 | overall_run_stats[key] = sum([v[key] 70 | for v in run_stats.values()]) 71 | run_stats["overall"] = overall_run_stats 72 | except: 73 | logger.error("Bad run stats for {}.".format(path)) 74 | 75 | self.tasks.update({'dir_name_full': path}, {'$set': {"run_stats": run_stats}}) 76 | print 'FINISHED', path 77 | else: 78 | print 'SKIPPING', path 79 | except: 80 | print '-----' 81 | print 'ENCOUNTERED AN EXCEPTION!!!', path 82 | traceback.print_exc() 83 | print '-----' 84 | 85 | 86 | def _analyze(data): 87 | b = OldTaskFixer() 88 | return b.process_task(data) 89 | 90 | 91 | if __name__ == '__main__': 92 | 93 | logging.basicConfig(level=logging.INFO) 94 | logger = logging.getLogger('MPVaspDrone') 95 | logger.setLevel(logging.INFO) 96 | sh = logging.StreamHandler(stream=sys.stdout) 97 | sh.setLevel(getattr(logging, 'INFO')) 98 | logger.addHandler(sh) 99 | 100 | o = OldTaskFixer() 101 | o.setup() 102 | tasks = OldTaskFixer.tasks 103 | m_data = [] 104 | with open('old_tasks.txt') as f: 105 | for line in f: 106 | old_task = line.split(' ')[1].strip() 107 | m_data.append(tasks.find_one({"task_id":old_task}, {'dir_name_full': 1})["dir_name_full"]) 108 | print 'GOT all tasks...' 109 | # print len(m_data) 110 | # print m_data[1] 111 | pool = multiprocessing.Pool(2) 112 | pool.map(_analyze, m_data) 113 | print 'DONE' 114 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/rerun_boltztrap.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | from glob import glob 3 | from dateutil import parser 4 | from datetime import datetime 5 | from fireworks.core.launchpad import LaunchPad 6 | from collections import Counter 7 | from fnmatch import fnmatch 8 | 9 | lpdb = LaunchPad.from_file('/global/homes/m/matcomp/mp_prod/config/config_Mendel/my_launchpad.yaml') 10 | 11 | """ 12 | counter = Counter() 13 | for wf_idx, wf_doc in enumerate(lpdb.workflows.find( 14 | {'updated_on': {'$exists': 1}}, 15 | {'state': 1, 'updated_on': 1, 'nodes': 1} 16 | )): 17 | try: 18 | dt = parser.parse(wf_doc['updated_on']) 19 | except: 20 | dt = wf_doc['updated_on'] 21 | counter['ALL_WFS'] += 1 22 | if dt > datetime(2016, 1, 1): 23 | counter['ALL_RECENT_WFS'] += 1 24 | fws_fizzled = [] 25 | for fw_idx, fw_doc in enumerate(lpdb.fireworks.find( 26 | {'fw_id': {'$in': wf_doc['nodes']}, 'updated_on': {'$exists': 1}, 'spec.task_type': {'$ne': 'GGA Boltztrap'}}, 27 | {'fw_id': 1, 'state': 1, 'updated_on': 1, 'launches': 1, 'spec.task_type': 1} 28 | )): 29 | try: 30 | dt = parser.parse(fw_doc['updated_on']) 31 | except: 32 | dt = fw_doc['updated_on'] 33 | if dt > datetime(2016, 1, 1): 34 | counter['ALL_RECENT_FWS'] += 1 35 | counter['FW_' + fw_doc['state']] += 1 36 | if fw_doc['state'] == 'FIZZLED': 37 | fws_fizzled.append('_'.join([str(fw_doc['fw_id']), fw_doc['spec']['task_type']])) 38 | if fnmatch(fw_doc['spec']['task_type'], '*GGA optimize structure*'): 39 | lpdb.rerun_fw(fw_doc['fw_id']) 40 | print 'rerunning', fw_doc['fw_id'] 41 | if fws_fizzled: 42 | print '{}:{}> {}'.format(counter['ALL_RECENT_WFS'], wf_idx, fws_fizzled) 43 | if len(fws_fizzled) < 2: 44 | sys.exit(0) 45 | print counter 46 | """ 47 | 48 | counter = Counter() 49 | for fw_doc in lpdb.fireworks.find( 50 | {'updated_on': {'$exists': 1}, 'spec.task_type': 'GGA Boltztrap'}, 51 | {'fw_id': 1, 'state': 1, 'updated_on': 1, 'launches': 1} 52 | ): 53 | try: 54 | dt = parser.parse(fw_doc['updated_on']) 55 | except: 56 | dt = fw_doc['updated_on'] 57 | if dt > datetime(2016, 1, 1): 58 | counter['RECENT_BTZ_FWS_ALL'] += 1 59 | if fw_doc['state'] == 'RUNNING': 60 | launch_dir = lpdb.launches.find_one({'launch_id': fw_doc['launches'][0]}, {'launch_dir':1, '_id':0})['launch_dir'] 61 | with open(glob(os.path.join(launch_dir, '*.error'))[0]) as ferr: 62 | last_line = ferr.readlines()[-1].strip() 63 | if 'TIME LIMIT' in last_line: 64 | lpdb.rerun_fw(fw_doc['fw_id']) 65 | print '[{}] rerun due to TIME LIMIT'.format(fw_doc['fw_id']) 66 | else: 67 | counter['RECENT_BTZ_FWS_' + fw_doc['state']] += 1 68 | else: 69 | #wf = lpdb.workflows.find_one({'nodes': fw_doc['fw_id']}, {'parent_links':1}) 70 | #parent_fw_id = wf['parent_links'][str(fw_doc['fw_id'])][-1] 71 | #parent_fw = lpdb.fireworks.find_one({'fw_id': parent_fw_id}, {'state':1}) 72 | #if parent_fw['state'] == 'COMPLETED': 73 | counter['RECENT_BTZ_FWS_' + fw_doc['state']] += 1 74 | print counter 75 | 76 | nfws = 0 77 | for fw_doc in lpdb.fireworks.find( 78 | {'spec.task_type': 'GGA Boltztrap', 'state': 'FIZZLED'}, 79 | {'fw_id': 1, 'launches': 1, 'state': 1 } 80 | ): 81 | wf = lpdb.workflows.find_one({'nodes': fw_doc['fw_id']}, {'parent_links':1}) 82 | launch_dir = lpdb.launches.find_one({'launch_id': fw_doc['launches'][0]}, {'launch_dir':1, '_id':0})['launch_dir'] 83 | with open(glob(os.path.join(launch_dir, '*.error'))[0]) as ferr: 84 | last_line = ferr.readlines()[-1].strip() 85 | if 'parent job unsuccessful' in last_line or 'Could not find task' in last_line: 86 | parent_fw_id = wf['parent_links'][str(fw_doc['fw_id'])][-1] 87 | lpdb.rerun_fw(parent_fw_id) 88 | print '[{}] {} --> marked parent {} for rerun'.format(nfws, fw_doc['fw_id'], parent_fw_id) 89 | else: 90 | #lpdb.rerun_fw(fw_doc['fw_id']) 91 | print '[{}] {} --> {}'.format(nfws, fw_doc['fw_id'], last_line) 92 | nfws += 1 93 | -------------------------------------------------------------------------------- /mpworks/fix_scripts/submit_bo_jobs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from fireworks.core.launchpad import LaunchPad 4 | from mpworks.submission.submission_mongo import SubmissionMongoAdapter 5 | from pymatgen.matproj.snl import StructureNL 6 | 7 | __author__ = 'Anubhav Jain' 8 | __copyright__ = 'Copyright 2014, The Materials Project' 9 | __version__ = '0.1' 10 | __maintainer__ = 'Anubhav Jain' 11 | __email__ = 'ajain@lbl.gov' 12 | __date__ = 'Jan 24, 2014' 13 | 14 | if __name__ == "__main__": 15 | sma = SubmissionMongoAdapter.from_file('submission.yaml') 16 | 17 | module_dir = os.path.dirname(os.path.abspath(__file__)) 18 | lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 19 | lpdb = LaunchPad.from_file(lp_f) 20 | 21 | for s in os.listdir(os.path.join(module_dir, "snls")): 22 | if '.json' in s: 23 | print 'submitting', s 24 | with open(os.path.join(module_dir, "snls",s)) as f: 25 | snl = StructureNL.from_dict(json.load(f)) 26 | sma.submit_snl(snl, 'anubhavster@gmail.com', {"priority": 10}) 27 | print 'DONE submitting', s 28 | 29 | 30 | print 'DONE!' -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Maintenance scripts 2 | 3 | These are scripts that are not needed to run the production workflow, but might be helpful for maintaining runs or in future coding. -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'ajain' 2 | -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/classify_fizzled.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import os 3 | from pymongo import MongoClient 4 | import yaml 5 | from fireworks.core.launchpad import LaunchPad 6 | 7 | __author__ = 'Anubhav Jain' 8 | __copyright__ = 'Copyright 2013, The Materials Project' 9 | __version__ = '0.1' 10 | __maintainer__ = 'Anubhav Jain' 11 | __email__ = 'ajain@lbl.gov' 12 | __date__ = 'Nov 11, 2013' 13 | 14 | # This script tries to examine the FIZZLED FWS and classify them into groups 15 | # This can be used to identify the greatest causes of failure and fix those first 16 | # The types of failure groups will need to be updated 17 | 18 | def get_parent_launch_locs(fw_id, lpdb): 19 | parent_fw_id = lpdb.workflows.find_one({"nodes": fw_id}, {"parent_links":1})['parent_links'][str(fw_id)][0] 20 | launch_ids = lpdb.fireworks.find_one({"fw_id": parent_fw_id},{'launches': 1})['launches'] 21 | locs = [] 22 | ran_fws = [] 23 | for l in launch_ids: 24 | d = lpdb.launches.find_one({"launch_id": l}, {'launch_dir': 1, 'fw_id': 1}) 25 | launch_loc = str(d['launch_dir']) 26 | ran_fws.append(d['fw_id']) 27 | locs.append("/project/projectdirs/matgen/garden/"+launch_loc[launch_loc.find('block_'):]) 28 | 29 | return locs, parent_fw_id, ran_fws 30 | 31 | def get_task_info(fw_id, tdb): 32 | x = tdb.tasks.find_one({"fw_id": fw_id}, {"analysis": 1}) 33 | warnings = x['analysis'].get('warnings', []) 34 | warnings.extend(x['analysis']['errors_MP']['signals']) 35 | errors = x['analysis'].get('errors', []) 36 | errors.extend(x['analysis']['errors_MP']['critical_signals']) 37 | 38 | warnings = set(warnings) 39 | errors = set(errors) 40 | warnings = warnings.difference(errors) 41 | return set(warnings), set(errors) 42 | 43 | 44 | if __name__ == '__main__': 45 | module_dir = os.path.dirname(os.path.abspath(__file__)) 46 | lp_f = os.path.join(module_dir, 'my_launchpad.yaml') 47 | lpdb = LaunchPad.from_file(lp_f) 48 | 49 | tasks_f = os.path.join(module_dir, 'tasks_read.yaml') 50 | creds = {} 51 | with open(tasks_f) as f: 52 | creds = yaml.load(f) 53 | 54 | connection = MongoClient(creds['host'], creds['port']) 55 | tdb = connection[creds['db']] 56 | tdb.authenticate(creds['username'], creds['password']) 57 | 58 | 59 | except_dict = defaultdict(int) 60 | fizzled_fws = [] 61 | 62 | 63 | 64 | for f in lpdb.fireworks.find({"state": "FIZZLED"}, {"fw_id":1}): 65 | fizzled_fws.append(f['fw_id']) 66 | 67 | for l in lpdb.launches.find({"state": "FIZZLED", "action":{"$ne": None}}, {"action":1, 'fw_id': 1, 'time_start': 1, 'launch_dir':1}, timeout=False): 68 | if l['fw_id'] in fizzled_fws: 69 | except_str = l['action']['stored_data'].get('_exception') 70 | if 'Disk quota exceeded' in except_str: 71 | except_dict['DISK_QUOTA_EXCEEDED'] = except_dict['DISK_QUOTA_EXCEEDED']+1 72 | print l['fw_id'], '*' 73 | lpdb.rerun_fw(l['fw_id']) 74 | elif 'No such file' in except_str: 75 | # this is due to missing CHGCAR from Michael's old runs 76 | except_dict['NO_SUCH_FILE'] = except_dict['NO_SUCH_FILE']+1 77 | elif 'IMPROPER PARSING' in except_str: 78 | except_dict['IMPROPER_PARSING'] = except_dict['IMPROPER_PARSING']+1 79 | elif 'get valid results from relaxed run' in except_str: 80 | except_dict['INVALID_RESULTS'] = except_dict['INVALID_RESULTS']+1 81 | elif 'dir does not exist!' in except_str: 82 | except_dict['MISSING_DIR'] = except_dict['MISSING_DIR']+1 83 | elif 'Stale NFS file handle' in except_str: 84 | except_dict['STALE_NFS'] = except_dict['STALE_NFS']+1 85 | elif 'File exists' in except_str: 86 | except_dict['FILE_EXISTS'] = except_dict['FILE_EXISTS']+1 87 | elif 'MemoryError' in except_str: 88 | except_dict['MEMORY_ERROR'] = except_dict['MEMORY_ERROR']+1 89 | elif 'DB insertion successful, but don\'t know how to fix' in except_str: 90 | except_dict['NO_FIX'] = except_dict['NO_FIX']+1 91 | """ 92 | launches, pfw_id, ran_fws = get_parent_launch_locs(l['fw_id'], lpdb) 93 | print '--',l['fw_id'] 94 | for idx, l in enumerate(launches): 95 | print l 96 | print get_task_info(ran_fws[idx], tdb) 97 | """ 98 | 99 | 100 | elif 'Poscar.from_string' in except_str and 'chunks[0]' in except_str: 101 | except_dict['POSCAR_PARSE'] = except_dict['POSCAR_PARSE']+1 102 | elif 'TypeError: integer argument expected, got float' in except_str: 103 | except_dict['MAXRUN_TYPE'] = except_dict['MAXRUN_TYPE']+1 104 | elif 'cannot import name DupeFinderDB' in except_str: 105 | except_dict['DUPEFINDER_DB'] = except_dict['DUPEFINDER_DB']+1 106 | elif 'jinja2' in except_str: 107 | except_dict['JINJA2'] = except_dict['JINJA2']+1 108 | elif 'run_tags' in except_str: 109 | except_dict['RUN_TAGS'] = except_dict['RUN_TAGS']+1 110 | else: 111 | except_dict[except_str] = except_dict[except_str]+1 112 | 113 | print '-----' 114 | for k, v in except_dict.iteritems(): 115 | print {"{}\t{}".format(v, k)} 116 | 117 | -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/deprecate_snl.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2014, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Oct 13, 2014' 7 | 8 | 9 | def deprecate_snl(snl_db, snl_id, remarks): 10 | remarks.append('DEPRECATED') 11 | # FIX SNL 12 | remarks.extend(snl_db.snl.find_one({'snl_id': snl_id}, {'about.remarks': 1})['about']['remarks']) 13 | remarks = list(set(remarks)) 14 | 15 | # push existing remarks 16 | snl_db.snl.update({'snl_id': snl_id}, {'$set': {"about.remarks": remarks}}) 17 | 18 | # FIX SNLGROUPS 19 | sg = snl_db.snlgroups.find_one({'canonical_snl.snl_id': snl_id}, {'snlgroup_id': 1}) 20 | if sg: 21 | snl_db.snlgroups.update({'snlgroup_id': sg['snlgroup_id']}, {'$set': {"canonical_snl.about.remarks": remarks}}) 22 | 23 | print('FINISHED deprecating {}'.format(snl_id)) -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/icsd2012_to_snl.py: -------------------------------------------------------------------------------- 1 | import re 2 | import random 3 | import unicodedata 4 | import datetime 5 | from pymatgen import Structure 6 | from pymatgen.matproj.snl import StructureNL 7 | 8 | __author__ = 'Anubhav Jain' 9 | __copyright__ = 'Copyright 2013, The Materials Project' 10 | __version__ = '0.1' 11 | __maintainer__ = 'Anubhav Jain' 12 | __email__ = 'ajain@lbl.gov' 13 | __date__ = 'May 12, 2013' 14 | 15 | # Convert ICSD database (already converted to MongoDB by MIT from an SQL source) into SNL 16 | 17 | def icsd_dict_to_snl(icsd_dict): 18 | if 'structure' not in icsd_dict: 19 | return None 20 | 21 | struct = Structure.from_dict(icsd_dict['structure']) 22 | references = _get_icsd_reference(icsd_dict) 23 | 24 | data = {'_icsd': {}} 25 | excluded_data = ['_id', 'a_len', 'b_len', 'c_len', 'alpha', 'beta', 'gamma', 'compostion', 'composition', 'created_at', 'crystal_id', 'idnum', 'journal', 'tstruct', 'updated_at', 'username'] 26 | for k, v in icsd_dict.iteritems(): 27 | if k not in excluded_data: 28 | if isinstance(v, datetime.datetime): 29 | v = v.strftime(format='%Y-%m-%d %H:%M:%S') 30 | data['_icsd'][k] = v 31 | 32 | projects = None 33 | remarks = None 34 | 35 | history = [{'name': 'Inorganic Crystal Structure Database (ICSD)', 'url': 'http://icsd.fiz-karlsruhe.de/', 'description': {'icsd_id': data['_icsd']['icsd_id']}}, {'name': 'pymatgen', 'url': 'https://pypi.python.org/pypi/pymatgen', 'description': {'comment': 'converted to explicit structure'}}] 36 | 37 | authors = 'William Davidson Richards , Shyue Ping Ong , Stephen Dacek , Anubhav Jain ' 38 | 39 | return StructureNL(struct, authors, projects, references, remarks, data, history) 40 | 41 | 42 | def _get_icsd_reference(icsd_dict): 43 | 44 | if icsd_dict and 'journal' in icsd_dict and icsd_dict['journal']['authors']: 45 | pages = "" 46 | if icsd_dict['journal']['PAGE_FIRST']: 47 | pages = str(icsd_dict['journal']['PAGE_FIRST']) 48 | 49 | if icsd_dict['journal']['PAGE_LAST']: 50 | pages = pages + "--" + str(icsd_dict['journal']['PAGE_LAST']) 51 | 52 | bibtex_str = "@article{" 53 | #author last name as key 54 | m_key = icsd_dict['journal']['authors'][0] 55 | m_key = re.sub(r'\s', '_', m_key) 56 | m_key = m_key[0:m_key.find(',')] 57 | bibtex_str += m_key 58 | #year + random 59 | bibtex_str += str(icsd_dict['journal']['YEAR']) + "_" + str(random.randrange(1, 1000)) + ",\n" 60 | 61 | bibtex_str += "title = {{" + icsd_dict['au_title']+ "}},\n" 62 | 63 | auth_str = "author = {" + " and ".join(icsd_dict['journal']['authors']) + "},\n" 64 | # sanitize authors so there are no parentheses (weird ICSD conversion thing) 65 | regex = re.compile('\(.+?\)') 66 | auth_str = regex.sub('', auth_str) 67 | bibtex_str += auth_str 68 | 69 | if icsd_dict['journal']['YEAR']: 70 | bibtex_str += "year = {" + str(icsd_dict['journal']['YEAR']) + "},\n" 71 | 72 | if icsd_dict['journal']['J_TITLE']: 73 | bibtex_str += "journal = {" + icsd_dict['journal']['J_TITLE'] + "},\n" 74 | 75 | if icsd_dict['journal']['VOLUME']: 76 | bibtex_str += "volume = {" + str(icsd_dict['journal']['VOLUME']) + "},\n" 77 | 78 | if icsd_dict['journal']['ISSUE']: 79 | bibtex_str += "issue = {" + str(icsd_dict['journal']['ISSUE']) + "},\n" 80 | bibtex_str += "pages = {" + pages + "},\n" 81 | 82 | if icsd_dict['journal']['ISSN']: 83 | bibtex_str += "issn = " + icsd_dict['journal']['ISSN'] + "\n" 84 | 85 | bibtex_str += "}" 86 | bibtex_str = unicodedata.normalize('NFKD', bibtex_str).encode('ascii','ignore') 87 | 88 | return bibtex_str 89 | 90 | 91 | return None -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/modify_snl.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import os 3 | from pymongo import MongoClient 4 | import yaml 5 | from fireworks.core.launchpad import LaunchPad 6 | from mpworks.snl_utils.mpsnl import MPStructureNL 7 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 8 | from mpworks.submission.submission_mongo import SubmissionMongoAdapter 9 | from pymatgen.matproj.snl import StructureNL 10 | import datetime 11 | 12 | __author__ = 'Anubhav Jain' 13 | __copyright__ = 'Copyright 2014, The Materials Project' 14 | __version__ = '0.1' 15 | __maintainer__ = 'Anubhav Jain' 16 | __email__ = 'ajain@lbl.gov' 17 | __date__ = 'Feb 20, 2014' 18 | 19 | 20 | """ 21 | This is used to modify SNL data. An SNL should not just be changed in the SNL collection because 22 | that SNL is referred to in many different databases. 23 | 24 | This code tries to properly update all relevant databases with the SNL 25 | changes, not just the basic SNL collection. 26 | 27 | Note that the lattice and sites of an SNL cannot be changed! This would be a different material 28 | altogether and have affected the runs / duplicate checking. 29 | """ 30 | 31 | 32 | module_dir = os.path.dirname(os.path.abspath(__file__)) 33 | snl_f = os.path.join(module_dir, 'snl.yaml') 34 | fw_f = os.path.join(module_dir, 'my_launchpad.yaml') 35 | tasks_f = os.path.join(module_dir, 'tasks.yaml') 36 | 37 | def get_colls(): 38 | colls = namedtuple('Collections', ['snl', 'snlgroups']) 39 | sma = SNLMongoAdapter.from_file(snl_f) 40 | lp = LaunchPad.from_file(fw_f) 41 | 42 | colls.snl = sma.snl 43 | colls.snlgroups = sma.snlgroups 44 | colls.fireworks = lp.fireworks 45 | colls.launches = lp.launches 46 | 47 | with open(tasks_f) as f2: 48 | task_creds = yaml.load(f2) 49 | 50 | mc = MongoClient(task_creds['host'], task_creds['port']) 51 | db = mc[task_creds['database']] 52 | db.authenticate(task_creds['admin_user'], task_creds['admin_password']) 53 | colls.tasks = db['tasks'] 54 | 55 | return colls 56 | 57 | 58 | def modify_snl(snl_id, new_snl, colls, reject_bad_tasks=False): 59 | # get the old SNL lattice and sites 60 | snl_old = colls.snl.find_one({'snl_id': snl_id}, {'lattice': 1, 'sites': 1, 'snl_timestamp': 1}) 61 | 62 | # enforce the new SNL's lattice/sites to be same as old 63 | snl_d = new_snl.as_dict() 64 | snl_d['lattice'] = snl_old['lattice'] 65 | snl_d['sites'] = snl_old['sites'] 66 | snl_d['snl_timestamp'] = snl_old['snl_timestamp'] 67 | 68 | # insert the new SNL into the snl collection 69 | print 'INSERTING SNL_ID', {'snl_id': snl_id}, snl_d 70 | colls.snl.update({'snl_id': snl_id}, snl_d) 71 | 72 | # update the canonical SNL of the group 73 | for s in colls.snlgroups.find({'canonical_snl.about._materialsproject.snl_id': snl_id}, {'snlgroup_id': 1}): 74 | print 'CHANGING SNLGROUP_ID', s['snlgroup_id'] 75 | colls.snlgroups.find_and_modify({'snlgroup_id': s['snlgroup_id']}, {'$set': {'canonical_snl': snl_d}}) 76 | 77 | # update FWs pt 1 78 | for f in colls.fireworks.find({'spec.mpsnl.about._materialsproject.snl_id': snl_id}, {'fw_id': 1}): 79 | print 'CHANGING FW_ID', f['fw_id'] 80 | colls.fireworks.find_and_modify({'fw_id': f['fw_id']}, {'$set': {'spec.mpsnl': snl_d}}) 81 | 82 | # update FWs pt 2 83 | for f in colls.fireworks.find({'spec.force_mpsnl.about._materialsproject.snl_id': snl_id}, {'fw_id': 1}): 84 | print 'CHANGING FW_ID', f['fw_id'] 85 | colls.fireworks.find_and_modify({'fw_id': f['fw_id']}, {'$set': {'spec.force_mpsnl': snl_d}}) 86 | 87 | # update Launches 88 | for l in colls.launches.find({'action.update_spec.mpsnl.about._materialsproject.snl_id': snl_id}, {'launch_id': 1}): 89 | print 'CHANGING LAUNCH_ID', l['launch_id'] 90 | colls.launches.find_and_modify({'launch_id': l['launch_id']}, {'$set': {'action.update_spec.mpsnl': snl_d}}) 91 | 92 | # update tasks initial 93 | for t in colls.tasks.find({'snl.about._materialsproject.snl_id': snl_id}, {'task_id': 1}): 94 | print 'CHANGING init TASK_ID', t['task_id'] 95 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$set': {'snl': snl_d}}) 96 | if reject_bad_tasks: 97 | print 'REJECTING TASK_ID', t['task_id'] 98 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$set': {'state': 'rejected'}}) 99 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$push': {'analysis.errors_MP.critical_signals': 'BAD STRUCTURE SNL'}}) 100 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$inc': {'analysis.errors_MP.num_critical': 1}}) 101 | 102 | 103 | # update tasks final 104 | for t in colls.tasks.find({'snl_final.about._materialsproject.snl_id': snl_id}, {'task_id': 1}): 105 | print 'CHANGING final TASK_ID', t['task_id'] 106 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$set': {'snl_final': snl_d}}) 107 | if reject_bad_tasks: 108 | print 'REJECTING TASK_ID', t['task_id'] 109 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$set': {'state': 'rejected'}}) 110 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$push': {'analysis.errors_MP.critical_signals': 'BAD STRUCTURE SNL'}}) 111 | colls.tasks.find_and_modify({'task_id': t['task_id']}, {'$inc': {'analysis.errors_MP.num_critical': 1}}) 112 | 113 | # note: for now we are not fixing submissions in order to keep a record of submissions accurate, and also because the SNL assignment comes after submission 114 | 115 | print 'DONE PROCESSING', snl_id 116 | 117 | 118 | def get_deprecated_snl(snl_id, colls): 119 | snl_old = colls.snl.find_one({'snl_id': snl_id}) 120 | del snl_old['about']['_icsd'] 121 | snl_old['about']['remarks'].append('Record updated (about._icsd deleted) {}'.format(datetime.datetime.now().strftime('%Y-%m-%d'))) 122 | return MPStructureNL.from_dict(snl_old) 123 | 124 | 125 | if __name__ == '__main__': 126 | 127 | colls = get_colls() 128 | snl_id = 1579 129 | 130 | snl_new = get_deprecated_snl(snl_id, colls) 131 | print snl_new.as_dict() 132 | 133 | modify_snl(snl_id, snl_new, colls, reject_bad_tasks=True) -------------------------------------------------------------------------------- /mpworks/maintenance_scripts/reparse_tasks.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | from pymongo import MongoClient 7 | from fireworks.core.launchpad import LaunchPad 8 | from mpworks.drones.mp_vaspdrone import MPVaspDrone 9 | import multiprocessing 10 | import traceback 11 | 12 | __author__ = 'Anubhav Jain' 13 | __copyright__ = 'Copyright 2013, The Materials Project' 14 | __version__ = '0.1' 15 | __maintainer__ = 'Anubhav Jain' 16 | __email__ = 'ajain@lbl.gov' 17 | __date__ = 'May 13, 2013' 18 | 19 | ''' 20 | This script re-runs the MPVaspDrone over all the *new-style* tasks. It can be used when the MPVaspDrone is updated. 21 | 22 | A few notes: 23 | * The old-style tasks will be unaffected by this script 24 | * The dos_fs and band_structure_fs collections should be completely deleted before running this script over the database. 25 | 26 | Note - AJ has not run this code since its inception in May 2013. Changes may be needed. 27 | ''' 28 | 29 | class TaskBuilder(): 30 | 31 | @classmethod 32 | def setup(cls): 33 | db_dir = os.environ['DB_LOC'] 34 | db_path = os.path.join(db_dir, 'tasks_db.json') 35 | with open(db_path) as f2: 36 | db_creds = json.load(f2) 37 | mc2 = MongoClient(db_creds['host'], db_creds['port']) 38 | db2 = mc2[db_creds['database']] 39 | db2.authenticate(db_creds['admin_user'], db_creds['admin_password']) 40 | 41 | cls.tasks = db2['tasks'] 42 | cls.host = db_creds['host'] 43 | cls.port = db_creds['port'] 44 | cls.database = db_creds['database'] 45 | cls.collection = db_creds['collection'] 46 | cls.admin_user = db_creds['admin_user'] 47 | cls.admin_password = db_creds['admin_password'] 48 | 49 | def process_task(self, data): 50 | 51 | try: 52 | dir_name = data[0] 53 | parse_dos = data[1] 54 | prev_info = self.tasks.find_one({'dir_name_full': dir_name}, {'task_type': 1, 'snl_final': 1, 'snlgroup_id_final': 1, 'snlgroup_changed': 1}) 55 | drone = MPVaspDrone( 56 | host=self.host, port=self.port, 57 | database=self.database, user=self.admin_user, 58 | password=self.admin_password, 59 | collection=self.collection, parse_dos=parse_dos, 60 | additional_fields={}, 61 | update_duplicates=True) 62 | t_id, d = drone.assimilate(dir_name, launches_coll=LaunchPad.auto_load().launches) 63 | 64 | 65 | self.tasks.update({"task_id": t_id}, {"$set": {"snl_final": prev_info['snl_final'], "snlgroup_id_final": prev_info['snlgroup_id_final'], "snlgroup_changed": prev_info['snlgroup_changed']}}) 66 | print 'FINISHED', t_id 67 | except: 68 | print '-----' 69 | print 'ENCOUNTERED AN EXCEPTION!!!', data[0] 70 | traceback.print_exc() 71 | print '-----' 72 | 73 | 74 | def _analyze(data): 75 | b = TaskBuilder() 76 | return b.process_task(data) 77 | 78 | 79 | if __name__ == '__main__': 80 | 81 | logging.basicConfig(level=logging.INFO) 82 | logger = logging.getLogger('MPVaspDrone') 83 | logger.setLevel(logging.INFO) 84 | sh = logging.StreamHandler(stream=sys.stdout) 85 | sh.setLevel(getattr(logging, 'INFO')) 86 | logger.addHandler(sh) 87 | 88 | finished_tasks = [] 89 | module_dir = os.path.dirname(os.path.abspath(__file__)) 90 | if os.path.exists(os.path.join(module_dir, 'finished_tasks.txt')): 91 | with open(os.path.join(module_dir, 'finished_tasks.txt')) as f: 92 | for line in f: 93 | task = line.split()[1].strip() 94 | finished_tasks.append(task) 95 | 96 | o = TaskBuilder() 97 | o.setup() 98 | tasks = TaskBuilder.tasks 99 | m_data = [] 100 | # q = {'submission_id': {'$exists': False}} # these are all new-style tasks 101 | #q = {"task_type":{"$regex":"band structure"}, "state":"successful", "calculations.0.band_structure_fs_id":{"$exists":False}} 102 | 103 | parser = ArgumentParser() 104 | parser.add_argument('min', help='min', type=int) 105 | parser.add_argument('max', help='max', type=int) 106 | args = parser.parse_args() 107 | q = {"task_id_deprecated": {"$lte": args.max, "$gte":args.min}, "is_deprecated": True} 108 | 109 | for d in tasks.find(q, {'dir_name_full': 1, 'task_type': 1, 'task_id': 1}, timeout=False): 110 | if d['task_id'] in finished_tasks: 111 | print 'DUPLICATE', d['task_id'] 112 | else: 113 | o.process_task((d['dir_name_full'], 'Uniform' in d['task_type'])) 114 | # m_data.append((d['dir_name_full'], 'Uniform' in d['task_type'])) 115 | print 'DONE' -------------------------------------------------------------------------------- /mpworks/osti_doi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/osti_doi/__init__.py -------------------------------------------------------------------------------- /mpworks/osti_doi/__main__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | from osti_record import OstiRecord, OstiMongoAdapter 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--log", help="show log output", action="store_true") 7 | parser.add_argument("--prod", action="store_true", help="""use production DB.""") 8 | group = parser.add_mutually_exclusive_group() 9 | group.add_argument("-n", default=0, type=int, help="""number of materials to 10 | submit to OSTI. The default (0) collects all materials not 11 | yet submitted.""") 12 | group.add_argument('-l', nargs='+', type=int, help="""list of material id's to 13 | submit. mp-prefix internally added, i.e. use `-l 4 1986 14 | 571567`.""") 15 | group.add_argument("--reset", action="store_true", help="""reset collections""") 16 | group.add_argument("--info", action="store_true", help="""retrieve materials 17 | already having a doi saved in materials collection""") 18 | group.add_argument("--plotly", action="store_true", help="""init plotly graph""") 19 | args = parser.parse_args() 20 | 21 | loglevel = 'DEBUG' if args.log else 'WARNING' 22 | logging.basicConfig(level=logging.ERROR) 23 | logger = logging.getLogger('mg.build.osti_doi') 24 | logger.setLevel(getattr(logging, loglevel)) 25 | 26 | db_yaml = 'materials_db_{}.yaml'.format('prod' if args.prod else 'dev') 27 | print db_yaml 28 | if args.reset or args.info or args.plotly: 29 | matad = OstiMongoAdapter.from_config(db_yaml=db_yaml) 30 | if args.reset: 31 | matad._reset() 32 | elif args.info: 33 | print '{} DOIs in DOI collection.'.format(matad.doicoll.count()) 34 | dois = matad.get_all_dois() 35 | print '{}/{} materials have DOIs.'.format(len(dois), matad.matcoll.count()) 36 | elif args.plotly: 37 | import os, datetime 38 | import plotly.plotly as py 39 | from plotly.graph_objs import * 40 | stream_ids = ['645h22ynck', '96howh4ip8', 'nnqpv5ra02'] 41 | py.sign_in( 42 | os.environ.get('MP_PLOTLY_USER'), 43 | os.environ.get('MP_PLOTLY_APIKEY'), 44 | stream_ids=stream_ids 45 | ) 46 | today = datetime.date.today() 47 | counts = [ 48 | matad.matcoll.count(), matad.doicoll.count(), 49 | len(matad.get_all_dois()) 50 | ] 51 | names = ['materials', 'requested DOIs', 'validated DOIs'] 52 | data = Data([ 53 | Scatter( 54 | x=[today], y=[counts[idx]], name=names[idx], 55 | stream=dict(token=stream_ids[idx], maxpoints=10000) 56 | ) for idx,count in enumerate(counts) 57 | ]) 58 | filename = 'dois_{}'.format(today) 59 | print py.plot(data, filename=filename, auto_open=False) 60 | else: 61 | # generate records for either n or all (n=0) not-yet-submitted materials 62 | # OR generate records for specific materials (submitted or not) 63 | osti = OstiRecord(l=args.l, n=args.n, db_yaml=db_yaml) 64 | osti.submit() 65 | -------------------------------------------------------------------------------- /mpworks/osti_doi/builders.py: -------------------------------------------------------------------------------- 1 | import requests, json, os, datetime, logging 2 | from matgendb.builders.core import Builder 3 | from osti_record import OstiRecord 4 | from bs4 import BeautifulSoup 5 | import plotly.plotly as py 6 | from plotly.graph_objs import * 7 | 8 | now = datetime.datetime.now() 9 | dirname = os.path.dirname(os.path.realpath(__file__)) 10 | backupfile = os.path.join(dirname, 'dois.json') 11 | logfile = os.path.join(dirname, 'logs', 'dois_{}.log'.format(now)) 12 | _log = logging.getLogger('mg.build') 13 | _log.setLevel(logging.INFO) 14 | fh = logging.FileHandler(logfile) 15 | fh.setLevel(logging.INFO) 16 | formatter = logging.Formatter('####### %(asctime)s #######\n%(message)s') 17 | fh.setFormatter(formatter) 18 | _log.addHandler(fh) 19 | 20 | stream_ids = ['645h22ynck', '96howh4ip8', 'nnqpv5ra02'] 21 | py.sign_in( 22 | os.environ.get('MP_PLOTLY_USER'), 23 | os.environ.get('MP_PLOTLY_APIKEY'), 24 | stream_ids=stream_ids 25 | ) 26 | 27 | class DoiBuilder(Builder): 28 | """Builder to obtain DOIs for all/new materials""" 29 | 30 | def get_items(self, nmats=2, dois=None, materials=None): 31 | """DOIs + Materials iterator 32 | 33 | :param nmats: number of materials for which to request DOIs 34 | :type nmats: int 35 | :param dois: 'dois' collection in 'mg_core_dev/prod' 36 | :type dois: QueryEngine 37 | :param materials: 'materials' collection in 'mg_core_dev/prod' 38 | :type materials: QueryEngine 39 | """ 40 | self.nmats = nmats 41 | self.doi_qe = dois 42 | self.mat_qe = materials 43 | self.headers = {'Accept': 'text/bibliography; style=bibtex'} 44 | # loop the mp-id's 45 | # w/o valid DOI in doicoll *OR* 46 | # w/ valid DOI in doicoll but w/o doi key in matcoll 47 | mp_ids = [ 48 | {'_id': doc['_id'], 'doi': doc['doi'], 'valid': False} 49 | for doc in self.doi_qe.collection.find({'valid': False}) 50 | ] 51 | valid_mp_ids = self.doi_qe.collection.find({'valid': True}).distinct('_id') 52 | missing_mp_ids = self.mat_qe.collection.find( 53 | {'task_id': {'$in': valid_mp_ids}, 'doi': {'$exists': False}}, 54 | {'_id': 0, 'task_id': 1} 55 | ).distinct('task_id') 56 | mp_ids += list(self.doi_qe.collection.find( 57 | {'_id': {'$in': missing_mp_ids}}, 58 | {'doi': 1, 'valid': 1, 'bibtex': 1} 59 | )) 60 | return mp_ids 61 | 62 | def process_item(self, item): 63 | """validate DOI, save bibtex and build into matcoll""" 64 | if not item['valid']: 65 | #doi_url = 'http://doi.org/{}'.format(item['doi']) 66 | #doi_url = 'http://dx.doi.org/10.1038/nrd842' 67 | #r = requests.get(doi_url, headers=self.headers) 68 | if item['doi'] is None: 69 | # try loading doi from backup file, a.k.a reset item['doi'] (fixed manually) 70 | if os.path.exists(backupfile): 71 | with open(backupfile, 'r') as infile: 72 | data = json.load(infile) 73 | for d in data: 74 | if d['_id'] == item['_id'] and d['doi'] is not None: 75 | item['doi'] = d['doi'] 76 | _log.info(self.doi_qe.collection.update( 77 | {'_id': item['_id']}, {'$set': {'doi': item['doi']}} 78 | )) 79 | break 80 | # if mp-id not found in backup (not fixed manually) 81 | if item['doi'] is None: 82 | _log.warning('missing DOI for {}. Fix manually in dois.json and rerun!'.format(item['_id'])) 83 | return 0 84 | osti_id = item['doi'].split('/')[-1] 85 | doi_url = 'http://www.osti.gov/dataexplorer/biblio/{}/cite/bibtex'.format(osti_id) 86 | try: 87 | r = requests.get(doi_url) 88 | except Exception as ex: 89 | _log.warning('validation exception: {} -> {} -> {}'.format( 90 | item['_id'], item['doi'], ex 91 | )) 92 | return 0 93 | _log.info('validate {} -> {} -> {}'.format(item['_id'], item['doi'], r.status_code)) 94 | if r.status_code == 200: 95 | soup = BeautifulSoup(r.content, "html.parser") 96 | rows = soup.find_all('div', attrs={"class" : "csl-entry"}) 97 | if len(rows) == 1: 98 | bibtex = rows[0].text.strip() 99 | _log.info(self.doi_qe.collection.update( 100 | {'_id': item['_id']}, {'$set': { 101 | 'valid': True, 'bibtex': bibtex 102 | }} 103 | )) 104 | # only validated DOIs are ready to be built into matcoll 105 | _log.info(self.mat_qe.collection.update( 106 | {'task_id': item['_id']}, {'$set': { 107 | 'doi': item['doi'], 'doi_bibtex': bibtex 108 | }} 109 | )) 110 | else: 111 | _log.info('re-build {} -> {}'.format(item['_id'], item['doi'])) 112 | _log.info(self.mat_qe.collection.update( 113 | {'task_id': item['_id']}, {'$set': { 114 | 'doi': item['doi'], 'doi_bibtex': item['bibtex'] 115 | }} 116 | )) 117 | 118 | def finalize(self, errors): 119 | osti_record = OstiRecord( 120 | n=self.nmats, 121 | doicoll=self.doi_qe.collection, 122 | matcoll=self.mat_qe.collection 123 | ) 124 | osti_record.submit() 125 | with open(backupfile, 'w') as outfile: 126 | l = list(self.doi_qe.collection.find( 127 | fields={'created_at': True, 'doi': True} 128 | )) 129 | json.dump(l, outfile, indent=2) 130 | # push results to plotly streaming graph 131 | counts = [ 132 | self.mat_qe.collection.count(), 133 | self.doi_qe.collection.count(), 134 | len(osti_record.matad.get_all_dois()) 135 | ] 136 | for idx,stream_id in enumerate(stream_ids): 137 | s = py.Stream(stream_id) 138 | s.open() 139 | s.write(dict(x=now, y=counts[idx])) 140 | s.close() 141 | return True 142 | -------------------------------------------------------------------------------- /mpworks/osti_doi/example0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SM 6 | Materials Data on Nd (SG:229) by Materials Project 7 | Kristin Persson 8 | mp-4 9 | AC02-05CH11231; EDCBEE 10 | Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States) 11 | 11/02/2014 12 | English 13 | US 14 | USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22) 15 | https://materialsproject.org/materials/mp-4 16 | Kristin Persson 17 | LBNL 18 | kapersson@lbl.gov 19 | +1(510)486-7218 20 | https://materialsproject.org/citing 21 | MIT; UC Berkeley; Duke; U Louvain 22 | 36 MATERIALS SCIENCE 23 | crystal structure; Nd1; Nd; ICSD-76593; ICSD-43571 24 | Computed materials data using density functional theory calculations. These calculations determine the electronic structure of bulk materials by solving approximations to the Schrodinger equation. For more information, see https://materialsproject.org/docs/calculations 25 | 26 | 27 | 28 | SM 29 | Materials Data on ZnO (SG:216) by Materials Project 30 | Kristin Persson 31 | mp-1986 32 | AC02-05CH11231; EDCBEE 33 | Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States) 34 | 11/02/2014 35 | English 36 | US 37 | USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22) 38 | https://materialsproject.org/materials/mp-1986 39 | Kristin Persson 40 | LBNL 41 | kapersson@lbl.gov 42 | +1(510)486-7218 43 | https://materialsproject.org/citing 44 | MIT; UC Berkeley; Duke; U Louvain 45 | 36 MATERIALS SCIENCE 46 | crystal structure; O1 Zn1; O-Zn; ICSD-162753; ICSD-647683; ICSD-182357; ICSD-163383; electronic bandstructure 47 | Computed materials data using density functional theory calculations. These calculations determine the electronic structure of bulk materials by solving approximations to the Schrodinger equation. For more information, see https://materialsproject.org/docs/calculations 48 | 49 | 50 | 51 | SM 52 | Materials Data on CdI2 (SG:156) by Materials Project 53 | Kristin Persson 54 | mp-571567 55 | AC02-05CH11231; EDCBEE 56 | Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States) 57 | 11/02/2014 58 | English 59 | US 60 | USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22) 61 | https://materialsproject.org/materials/mp-571567 62 | Kristin Persson 63 | LBNL 64 | kapersson@lbl.gov 65 | +1(510)486-7218 66 | https://materialsproject.org/citing 67 | MIT; UC Berkeley; Duke; U Louvain 68 | 36 MATERIALS SCIENCE 69 | crystal structure; Cd1 I2; Cd-I; ICSD-9194; ICSD-42260; electronic bandstructure 70 | Computed materials data using density functional theory calculations. These calculations determine the electronic structure of bulk materials by solving approximations to the Schrodinger equation. For more information, see https://materialsproject.org/docs/calculations 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /mpworks/osti_doi/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.3.2 2 | dicttoxml==1.6.4 3 | Django==1.7 4 | docutils==0.12 5 | enum34==0.9.23 6 | gnureadline==6.3.3 7 | ipython==3.1.0 8 | mongomock==1.2.0 9 | monty==0.5.9 10 | numpy==1.9.2 11 | plotly==1.6.17 12 | prettytable==0.7.2 13 | pybtex==0.18 14 | pyhull==1.5.4 15 | pymatgen==3.0.5 16 | -e git+git@github.com:materialsproject/pymatgen-db.git@4cc25978a22cd380988593149d504dd655ec9c81#egg=pymatgen_db-master 17 | pymongo==2.8 18 | pytz==2015.2 19 | PyYAML==3.11 20 | requests==2.6.0 21 | sentinels==0.0.6 22 | six==1.9.0 23 | smoqe==0.1.3 24 | xmltodict==0.9.2 25 | -------------------------------------------------------------------------------- /mpworks/osti_doi/run.sh: -------------------------------------------------------------------------------- 1 | source $HOME/.credentials 2 | source $HOME/.virtualenvs/env_mp_osti_doi/bin/activate 3 | cd $HOME/MPWorks 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | mgbuild run -v mpworks.osti_doi.builders.DoiBuilder nmats=25 dois=dois.json materials=materials.json 6 | git add mpworks/osti_doi/dois.json 7 | git commit -m "osti_doi: new dois backup" 8 | git push origin osti_doi 9 | -------------------------------------------------------------------------------- /mpworks/processors/README.md: -------------------------------------------------------------------------------- 1 | # Processors package 2 | 3 | This package is used in managing the MPEnv strategy of runs - e.g., insert things into the submissions database, move those to workflows, back-update the submissions with information on the runs, etc... 4 | 5 | It also contains a "canonical" set of test runs for testing changes to MPWorks/MPEnv/etc. -------------------------------------------------------------------------------- /mpworks/processors/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'May 08, 2013' -------------------------------------------------------------------------------- /mpworks/processors/process_submissions.py: -------------------------------------------------------------------------------- 1 | import time 2 | import traceback 3 | from fireworks.core.launchpad import LaunchPad 4 | from mpworks.snl_utils.mpsnl import MPStructureNL 5 | from mpworks.submission.submission_mongo import SubmissionMongoAdapter 6 | from mpworks.workflows.snl_to_wf import snl_to_wf 7 | from mpworks.workflows.snl_to_wf_elastic import snl_to_wf_elastic 8 | from mpworks.workflows.wf_utils import NO_POTCARS 9 | from pymatgen.matproj.snl import StructureNL 10 | 11 | __author__ = 'Anubhav Jain' 12 | __copyright__ = 'Copyright 2013, The Materials Project' 13 | __version__ = '0.1' 14 | __maintainer__ = 'Anubhav Jain' 15 | __email__ = 'ajain@lbl.gov' 16 | __date__ = 'May 08, 2013' 17 | 18 | # Turn submissions into workflows, and updates the state of the submissions DB 19 | 20 | class SubmissionProcessor(): 21 | MAX_SITES = 200 22 | 23 | # This is run on the server end 24 | def __init__(self, sma, launchpad): 25 | self.sma = sma 26 | self.jobs = sma.jobs 27 | self.launchpad = launchpad 28 | 29 | def run(self, sleep_time=None, infinite=False): 30 | sleep_time = sleep_time if sleep_time else 30 31 | while True: 32 | self.submit_all_new_workflows() 33 | print "Updating existing workflows..." 34 | self.update_existing_workflows() # for updating the display 35 | if not infinite: 36 | break 37 | print 'sleeping', sleep_time 38 | time.sleep(sleep_time) 39 | 40 | def submit_all_new_workflows(self): 41 | last_id = -1 42 | while last_id: 43 | last_id = self.submit_new_workflow() 44 | 45 | def submit_new_workflow(self): 46 | # finds a submitted job, creates a workflow, and submits it to FireWorks 47 | job = self.jobs.find_and_modify({'state': 'SUBMITTED'}, {'$set': {'state': 'WAITING'}}) 48 | if job: 49 | submission_id = job['submission_id'] 50 | try: 51 | if 'snl_id' in job: 52 | snl = MPStructureNL.from_dict(job) 53 | else: 54 | snl = StructureNL.from_dict(job) 55 | if len(snl.structure.sites) > SubmissionProcessor.MAX_SITES: 56 | self.sma.update_state(submission_id, 'REJECTED', 'too many sites', {}) 57 | print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format( 58 | snl.structure.formula, len(snl.structure.sites)) 59 | elif not job['is_valid']: 60 | self.sma.update_state(submission_id, 'REJECTED', 61 | 'invalid structure (atoms too close)', {}) 62 | print 'REJECTED WORKFLOW FOR {} - invalid structure'.format( 63 | snl.structure.formula) 64 | elif len(set(NO_POTCARS) & set(job['elements'])) > 0: 65 | self.sma.update_state(submission_id, 'REJECTED', 66 | 'invalid structure (no POTCAR)', {}) 67 | print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format( 68 | snl.structure.formula) 69 | elif not job['is_ordered']: 70 | self.sma.update_state(submission_id, 'REJECTED', 71 | 'invalid structure (disordered)', {}) 72 | print 'REJECTED WORKFLOW FOR {} - invalid structure'.format( 73 | snl.structure.formula) 74 | else: 75 | snl.data['_materialsproject'] = snl.data.get('_materialsproject', {}) 76 | snl.data['_materialsproject']['submission_id'] = submission_id 77 | 78 | # create a workflow 79 | if "Elasticity" in snl.projects: 80 | wf=snl_to_wf_elastic(snl, job['parameters']) 81 | else: 82 | wf = snl_to_wf(snl, job['parameters']) 83 | self.launchpad.add_wf(wf) 84 | print 'ADDED WORKFLOW FOR {}'.format(snl.structure.formula) 85 | except: 86 | self.jobs.find_and_modify({'submission_id': submission_id}, 87 | {'$set': {'state': 'ERROR'}}) 88 | traceback.print_exc() 89 | 90 | return submission_id 91 | 92 | def update_existing_workflows(self): 93 | # updates the state of existing workflows by querying the FireWorks database 94 | # this is an optional step that updates the submissions db with jobs info 95 | # it is useful for the frontend display but not needed for workflow execution 96 | for submission in self.jobs.find({'state': {'$nin': ['COMPLETED', 'ERROR', 'REJECTED', 'CANCELLED']}}, 97 | {'submission_id': 1}): 98 | submission_id = submission['submission_id'] 99 | try: 100 | self.update_wf_state(submission_id) 101 | except: 102 | print 'ERROR while processing s_id', submission_id 103 | traceback.print_exc() 104 | 105 | 106 | def update_wf_state(self, submission_id): 107 | # state of the workflow 108 | tasks = {} 109 | 110 | wf = self.launchpad.workflows.find_one({'metadata.submission_id': submission_id}, 111 | sort=[('updated_on', -1)]) 112 | if not wf: 113 | # submission_id from jobs collection doesn't exist in workflows collection 114 | # workflow has probably been removed manually by user via `lpad delete_wflows` 115 | return 116 | 117 | details = '(none)' 118 | for e in self.launchpad.fireworks.find({'fw_id': {'$in' : wf['nodes']}}, 119 | {'spec.task_type': 1 ,'state': 1, 'launches': 1}): 120 | if e['spec']['task_type'] == 'VASP db insertion' and \ 121 | e['state'] == 'COMPLETED': 122 | for launch in self.launchpad.launches.find({'launch_id': {'$in' : e['launches']}}, 123 | {'action.stored_data.task_id': 1, 124 | 'action.update_spec.prev_task_type' : 1}): 125 | try: 126 | tasks[launch['action']['update_spec']['prev_task_type']] \ 127 | = launch['action']['stored_data']['task_id'] 128 | break 129 | except: 130 | pass 131 | 132 | self.sma.update_state(submission_id, wf['state'], details, tasks) 133 | return wf['state'], details, tasks 134 | 135 | @classmethod 136 | def auto_load(cls): 137 | sma = SubmissionMongoAdapter.auto_load() 138 | lp = LaunchPad.auto_load() 139 | 140 | return SubmissionProcessor(sma, lp) -------------------------------------------------------------------------------- /mpworks/processors/submit_canonical.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import warnings 4 | from pymongo import MongoClient 5 | from fireworks.core.launchpad import LaunchPad 6 | from mpworks.snl_utils.snl_mongo import SNLMongoAdapter 7 | from mpworks.submission.submission_mongo import SubmissionMongoAdapter 8 | from pymatgen import MPRester 9 | from pymatgen.matproj.snl import StructureNL 10 | 11 | __author__ = 'Anubhav Jain' 12 | __copyright__ = 'Copyright 2013, The Materials Project' 13 | __version__ = '0.1' 14 | __maintainer__ = 'Anubhav Jain' 15 | __email__ = 'ajain@lbl.gov' 16 | __date__ = 'May 06, 2013' 17 | 18 | def clear_env(): 19 | sma = SubmissionMongoAdapter.auto_load() 20 | if 'prod' in sma.db: 21 | warnings.warn("Not clearing production db for safety reasons.") 22 | return 23 | 24 | lp = LaunchPad.auto_load() 25 | 26 | snl = SNLMongoAdapter.auto_load() 27 | 28 | db_dir = os.environ['DB_LOC'] 29 | db_path = os.path.join(db_dir, 'tasks_db.json') 30 | with open(db_path) as f: 31 | db_creds = json.load(f) 32 | 33 | sma._reset() 34 | lp.reset('', require_password=False) 35 | snl._reset() 36 | 37 | conn = MongoClient(db_creds['host'], db_creds['port']) 38 | db = conn[db_creds['database']] 39 | db.authenticate(db_creds['admin_user'], db_creds['admin_password']) 40 | db.tasks.remove() 41 | db.boltztrap.remove() 42 | db.counter.remove() 43 | db['dos_fs.chunks'].remove() 44 | db['dos_fs.files'].remove() 45 | db['band_structure_fs.files'].remove() 46 | db['band_structure_fs.files'].remove() 47 | 48 | 49 | def submit_tests(names=None, params=None): 50 | sma = SubmissionMongoAdapter.auto_load() 51 | 52 | # note: TiO2 is duplicated twice purposely, duplicate check should catch this 53 | compounds = {"Si": 149, "Al": 134, "ZnO": 2133, "FeO": 18905, 54 | "LiCoO2": 601860, "LiFePO4": 585433, "GaAs": 2534, "Ge": 32, "PbTe": 19717, 55 | "YbO": 1216, "SiC": 567551, "Fe3C": 510623, "SiO2": 547211, "Na2O": 2352, 56 | "InSb (unstable)": 10148, "Sb2O5": 1705, "N2O5": 554368, "BaTiO3": 5020, 57 | "Rb2O": 1394, "TiO2": 554278, "TiO2 (2)": 554278, 'BaNbTePO8': 560794, 58 | "AgCl": 22922, "AgCl (2)": 570858, "SiO2 (2)": 555211, "Mg2SiO4": 2895, "CO2": 20066, 59 | "PbSO4": 22298, "SrTiO3": 5532, "FeAl": 2658, "AlFeCo2": 10884, "NaCoO2": 554427, 60 | "ReO3": 547271, "LaH2": 24153, "SiH3I": 28538, "LiBH4": 30209, "H8S5N2": 28143, 61 | "LiOH": 23856, "SrO2": 2697, "Mn": 35, "Hg4Pt": 2312, 62 | "PdF4": 13868, "Gd2WO6": 651333, 'MnO2': 19395, 'VO2': 504800} 63 | 64 | mpr = MPRester() 65 | 66 | for name, sid in compounds.iteritems(): 67 | if not names or name in names: 68 | sid = mpr.get_materials_id_from_task_id("mp-{}".format(sid)) 69 | s = mpr.get_structure_by_material_id(sid, final=False) 70 | 71 | snl = StructureNL(s, 'Anubhav Jain ') 72 | 73 | parameters = {'priority': 10} if name == 'Si' else {} 74 | if params: 75 | parameters.update(params) 76 | sma.submit_snl(snl, 'anubhavster@gmail.com', parameters=parameters) 77 | 78 | 79 | def clear_and_submit(clear=False, names=None, params=None): 80 | if clear: 81 | clear_env() 82 | submit_tests(names=names, params=params) 83 | -------------------------------------------------------------------------------- /mpworks/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'May 06, 2013' -------------------------------------------------------------------------------- /mpworks/scripts/submissions_run.py: -------------------------------------------------------------------------------- 1 | from mpworks.processors.process_submissions import SubmissionProcessor 2 | 3 | __author__ = 'Anubhav Jain' 4 | __copyright__ = 'Copyright 2013, The Materials Project' 5 | __version__ = '0.1' 6 | __maintainer__ = 'Anubhav Jain' 7 | __email__ = 'ajain@lbl.gov' 8 | __date__ = 'May 06, 2013' 9 | 10 | """ 11 | A runnable script for submissions 12 | """ 13 | 14 | from argparse import ArgumentParser 15 | 16 | __author__ = "Anubhav Jain" 17 | __copyright__ = "Copyright 2013, The Materials Project" 18 | __version__ = "0.1" 19 | __maintainer__ = "Anubhav Jain" 20 | __email__ = "ajain@lbl.gov" 21 | __date__ = "Jan 14, 2013" 22 | 23 | 24 | def go_submissions(): 25 | m_description = 'This program is used to pull jobs from the Submissions database, create FireWorks workflows from those submissions, and then monitor all previous submissions for updates to state (so that the submission database can be updated)' 26 | 27 | parser = ArgumentParser(description=m_description) 28 | parser.add_argument('--sleep', help='sleep time between loops', default=None, type=int) 29 | parser.add_argument('--infinite', help='loop infinite times', action='store_true') 30 | args = parser.parse_args() 31 | 32 | sp = SubmissionProcessor.auto_load() 33 | sp.run(args.sleep, args.infinite) 34 | 35 | if __name__ == '__main__': 36 | go_submissions() -------------------------------------------------------------------------------- /mpworks/scripts/submit_canonical_run.py: -------------------------------------------------------------------------------- 1 | from mpworks.processors.submit_canonical import clear_and_submit 2 | 3 | __author__ = 'Anubhav Jain' 4 | __copyright__ = 'Copyright 2013, The Materials Project' 5 | __version__ = '0.1' 6 | __maintainer__ = 'Anubhav Jain' 7 | __email__ = 'ajain@lbl.gov' 8 | __date__ = 'May 06, 2013' 9 | 10 | """ 11 | A runnable script for submitting test jobs 12 | """ 13 | 14 | from argparse import ArgumentParser 15 | 16 | __author__ = "Anubhav Jain" 17 | __copyright__ = "Copyright 2013, The Materials Project" 18 | __version__ = "0.1" 19 | __maintainer__ = "Anubhav Jain" 20 | __email__ = "ajain@lbl.gov" 21 | __date__ = "Jan 14, 2013" 22 | 23 | 24 | def go_testing(): 25 | m_description = 'This program is used to clear and submit jobs from the database' 26 | 27 | parser = ArgumentParser(description=m_description) 28 | parser.add_argument('-c', '--clear', help='clear old databases', action='store_true') 29 | parser.add_argument('-n', '--names', help='csv of compound names', default=None) 30 | parser.add_argument('--noboltztrap', help='do NOT run boltztrap', action='store_true') 31 | parser.add_argument('--exact', help='exact structure', action='store_true') 32 | args = parser.parse_args() 33 | 34 | names = [x.strip() for x in args.names.split(',')] if args.names else None 35 | 36 | params = {} 37 | if args.noboltztrap: 38 | params['boltztrap'] = False 39 | if args.exact: 40 | params['exact_structure'] = True 41 | clear_and_submit(args.clear, names, params) 42 | 43 | if __name__ == '__main__': 44 | go_testing() -------------------------------------------------------------------------------- /mpworks/snl_utils/README.md: -------------------------------------------------------------------------------- 1 | # snl_utils 2 | 3 | This package is poorly-named, but contains MP extensions to SNL that are needed for duplicate checking and database storage of SNL. 4 | 5 | This includes: 6 | - MPSNL, which adds snl_id and spacegroup info to an SNL 7 | - SNLGroup, which represents a "material" and can have several associated SNL 8 | - Routines for adding an SNL into the database, assigning an SNLGroup, etc. -------------------------------------------------------------------------------- /mpworks/snl_utils/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Apr 24, 2013' -------------------------------------------------------------------------------- /mpworks/submission/README.md: -------------------------------------------------------------------------------- 1 | # Submission package 2 | 3 | Contains an interface to a submissions database for managing jobs. It is part of the "MPenv" way of running things. -------------------------------------------------------------------------------- /mpworks/submission/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Mar 27, 2013' -------------------------------------------------------------------------------- /mpworks/workflows/README.md: -------------------------------------------------------------------------------- 1 | # Workflows package 2 | 3 | This package is used by the production workflow. Indeed, it *defines* the production workflows for various types of runs. -------------------------------------------------------------------------------- /mpworks/workflows/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'Mar 15, 2013' -------------------------------------------------------------------------------- /mpworks/workflows/snl_to_wf_elastic.py: -------------------------------------------------------------------------------- 1 | from pymatgen.io.vasp import Poscar 2 | from mpworks.firetasks.elastic_tasks import SetupElastConstTask, SetupFConvergenceTask, SetupDeformedStructTask 3 | 4 | __author__ = 'weichen' 5 | 6 | from fireworks.core.firework import Firework, Workflow 7 | from fireworks.utilities.fw_utilities import get_slug 8 | from mpworks.firetasks.custodian_task import get_custodian_task 9 | from mpworks.firetasks.snl_tasks import AddSNLTask 10 | from mpworks.firetasks.vasp_io_tasks import VaspCopyTask, VaspWriterTask, \ 11 | VaspToDBTask 12 | from mpworks.firetasks.vasp_setup_tasks import SetupGGAUTask 13 | from mpworks.snl_utils.mpsnl import get_meta_from_structure, MPStructureNL 14 | from mpworks.workflows.wf_settings import QA_DB, QA_VASP, QA_CONTROL 15 | from pymatgen import Composition 16 | from mpworks.workflows import snl_to_wf 17 | from mpworks.firetasks.elastic_tasks import update_spec_force_convergence 18 | from collections import defaultdict 19 | 20 | 21 | def snl_to_wf_elastic(snl, parameters): 22 | # parameters["user_vasp_settings"] specifies user defined incar/kpoints parameters 23 | fws = [] 24 | connections = defaultdict(list) 25 | parameters = parameters if parameters else {} 26 | 27 | snl_priority = parameters.get('priority', 1) 28 | priority = snl_priority * 2 # once we start a job, keep going! 29 | 30 | f = Composition(snl.structure.composition.reduced_formula).alphabetical_formula 31 | 32 | # add the SNL to the SNL DB and figure out duplicate group 33 | tasks = [AddSNLTask()] 34 | spec = {'task_type': 'Add to SNL database', 'snl': snl.as_dict(), 35 | '_queueadapter': QA_DB, '_priority': snl_priority} 36 | if 'snlgroup_id' in parameters and isinstance(snl, MPStructureNL): 37 | spec['force_mpsnl'] = snl.as_dict() 38 | spec['force_snlgroup_id'] = parameters['snlgroup_id'] 39 | del spec['snl'] 40 | fws.append(Firework(tasks, spec, 41 | name=get_slug(f + '--' + spec['task_type']), fw_id=0)) 42 | connections[0] = [1] 43 | 44 | parameters["exact_structure"] = True 45 | # run GGA structure optimization for force convergence 46 | spec = snl_to_wf._snl_to_spec(snl, parameters=parameters) 47 | user_vasp_settings = parameters.get("user_vasp_settings") 48 | spec = update_spec_force_convergence(spec, user_vasp_settings) 49 | spec['run_tags'].append("origin") 50 | spec['_priority'] = priority 51 | spec['_queueadapter'] = QA_VASP 52 | del spec['_dupefinder'] 53 | spec['task_type'] = "Vasp force convergence optimize structure (2x)" 54 | tasks = [VaspWriterTask(), get_custodian_task(spec)] 55 | fws.append(Firework(tasks, spec, 56 | name=get_slug(f + '--' + spec['task_type']), fw_id=1)) 57 | 58 | # insert into DB - GGA structure optimization 59 | spec = {'task_type': 'VASP db insertion', '_priority': priority, 60 | '_allow_fizzled_parents': True, '_queueadapter': QA_DB, 61 | 'clean_task_doc':True, 'elastic_constant':"force_convergence"} 62 | fws.append(Firework([VaspToDBTask()], spec, 63 | name=get_slug(f + '--' + spec['task_type']), fw_id=2)) 64 | connections[1] = [2] 65 | 66 | spec = {'task_type': 'Setup Deformed Struct Task', '_priority': priority, 67 | '_queueadapter': QA_CONTROL} 68 | fws.append(Firework([SetupDeformedStructTask()], spec, 69 | name=get_slug(f + '--' + spec['task_type']),fw_id=3)) 70 | connections[2] = [3] 71 | 72 | wf_meta = get_meta_from_structure(snl.structure) 73 | wf_meta['run_version'] = 'May 2013 (1)' 74 | 75 | if '_materialsproject' in snl.data and 'submission_id' in snl.data['_materialsproject']: 76 | wf_meta['submission_id'] = snl.data['_materialsproject']['submission_id'] 77 | 78 | return Workflow(fws, connections, name=Composition( 79 | snl.structure.composition.reduced_formula).alphabetical_formula, metadata=wf_meta) 80 | -------------------------------------------------------------------------------- /mpworks/workflows/test_wfs/FeO.cif: -------------------------------------------------------------------------------- 1 | #\#CIF1.1 2 | ########################################################################## 3 | # Crystallographic Information Format file 4 | # Produced by PyCifRW module 5 | # 6 | # This is a CIF file. CIF has been adopted by the International 7 | # Union of Crystallography as the standard for data archiving and 8 | # transmission. 9 | # 10 | # For information on this file format, follow the CIF links at 11 | # http://www.iucr.org 12 | ########################################################################## 13 | 14 | data_FeO 15 | _symmetry_space_group_name_H-M 'P 1' 16 | _cell_length_a 3.05251370035 17 | _cell_length_b 3.05251369966 18 | _cell_length_c 3.0525137 19 | _cell_angle_alpha 59.999999996 20 | _cell_angle_beta 60.000000004 21 | _cell_angle_gamma 60.000000058 22 | _chemical_name_systematic 'Generated by pymatgen' 23 | _symmetry_Int_Tables_number 1 24 | _chemical_formula_structural FeO 25 | _chemical_formula_sum 'Fe1 O1' 26 | _cell_volume 20.112120746 27 | _cell_formula_units_Z 1 28 | loop_ 29 | _symmetry_equiv_pos_site_id 30 | _symmetry_equiv_pos_as_xyz 31 | 1 'x, y, z' 32 | 33 | loop_ 34 | _atom_site_type_symbol 35 | _atom_site_label 36 | _atom_site_symmetry_multiplicity 37 | _atom_site_fract_x 38 | _atom_site_fract_y 39 | _atom_site_fract_z 40 | _atom_site_attached_hydrogens 41 | _atom_site_B_iso_or_equiv 42 | _atom_site_occupancy 43 | O O1 1 0.499999 0.500001 0.499999 0 . 1 44 | Fe Fe2 1 0.000000 0.000000 0.000000 0 . 1 45 | 46 | -------------------------------------------------------------------------------- /mpworks/workflows/test_wfs/Si.cif: -------------------------------------------------------------------------------- 1 | #\#CIF1.1 2 | ########################################################################## 3 | # Crystallographic Information Format file 4 | # Produced by PyCifRW module 5 | # 6 | # This is a CIF file. CIF has been adopted by the International 7 | # Union of Crystallography as the standard for data archiving and 8 | # transmission. 9 | # 10 | # For information on this file format, follow the CIF links at 11 | # http://www.iucr.org 12 | ########################################################################## 13 | 14 | data_Si 15 | _symmetry_space_group_name_H-M 'P 1' 16 | _cell_length_a 3.86697462285 17 | _cell_length_b 3.86697462378 18 | _cell_length_c 3.86697462 19 | _cell_angle_alpha 60.000000032 20 | _cell_angle_beta 60.000000024 21 | _cell_angle_gamma 60.000000024 22 | _chemical_name_systematic 'Generated by pymatgen' 23 | _symmetry_Int_Tables_number 1 24 | _chemical_formula_structural Si 25 | _chemical_formula_sum Si2 26 | _cell_volume 40.8882918885 27 | _cell_formula_units_Z 2 28 | loop_ 29 | _symmetry_equiv_pos_site_id 30 | _symmetry_equiv_pos_as_xyz 31 | 1 'x, y, z' 32 | 33 | loop_ 34 | _atom_site_type_symbol 35 | _atom_site_label 36 | _atom_site_symmetry_multiplicity 37 | _atom_site_fract_x 38 | _atom_site_fract_y 39 | _atom_site_fract_z 40 | _atom_site_attached_hydrogens 41 | _atom_site_B_iso_or_equiv 42 | _atom_site_occupancy 43 | Si Si1 1 0.875000 0.875000 0.875000 0 . 1 44 | Si Si2 1 0.125000 0.125000 0.125000 0 . 1 45 | 46 | -------------------------------------------------------------------------------- /mpworks/workflows/test_wfs/scancel/srun_std_err_example.txt: -------------------------------------------------------------------------------- 1 | srun: defined options for program `srun' 2 | srun: --------------- --------------------- 3 | srun: user : `xhqu' 4 | srun: uid : 58809 5 | srun: gid : 58809 6 | srun: cwd : /global/cscratch1/sd/xhqu/tt/software_test/test_vasp/fix_attempt/launcher_2016-07-07-11-10-02-640839 7 | srun: ntasks : 32 (set) 8 | srun: nodes : 1 (set) 9 | srun: jobid : 2667797 (default) 10 | srun: partition : default 11 | srun: profile : `NotSet' 12 | srun: job name : `vtest' 13 | srun: reservation : `(null)' 14 | srun: burst_buffer : `(null)' 15 | srun: wckey : `(null)' 16 | srun: cpu_freq_min : 4294967294 17 | srun: cpu_freq_max : 4294967294 18 | srun: cpu_freq_gov : 4294967294 19 | srun: switches : -1 20 | srun: wait-for-switches : -1 21 | srun: distribution : unknown 22 | srun: cpu_bind : default 23 | srun: mem_bind : default 24 | srun: verbose : 1 25 | srun: slurmd_debug : 0 26 | srun: immediate : false 27 | srun: label output : false 28 | srun: unbuffered IO : false 29 | srun: overcommit : false 30 | srun: threads : 60 31 | srun: checkpoint_dir : /var/slurm/checkpoint 32 | srun: wait : 0 33 | srun: nice : -2 34 | srun: account : (null) 35 | srun: comment : (null) 36 | srun: dependency : (null) 37 | srun: exclusive : false 38 | srun: bcast : false 39 | srun: qos : (null) 40 | srun: constraints : mem-per-node=124928M nodelist=nid00024 41 | srun: geometry : (null) 42 | srun: reboot : yes 43 | srun: rotate : no 44 | srun: preserve_env : false 45 | srun: network : (null) 46 | srun: propagate : NONE 47 | srun: prolog : (null) 48 | srun: epilog : (null) 49 | srun: mail_type : NONE 50 | srun: mail_user : (null) 51 | srun: task_prolog : (null) 52 | srun: task_epilog : (null) 53 | srun: multi_prog : no 54 | srun: sockets-per-node : -2 55 | srun: cores-per-socket : -2 56 | srun: threads-per-core : -2 57 | srun: ntasks-per-node : 32 58 | srun: ntasks-per-socket : -2 59 | srun: ntasks-per-core : -2 60 | srun: plane_size : 4294967294 61 | srun: core-spec : NA 62 | srun: power : 63 | srun: remote command : `vasp_std' 64 | srun: Consumable Resources (CR) Node Selection plugin loaded with argument 50 65 | srun: launching 2667797.4 on host nid00024, 32 tasks: [0-31] 66 | srun: route default plugin loaded 67 | srun: Node nid00024, 32 tasks started 68 | srun: Received task exit notification for 32 tasks (status=0x0009). 69 | srun: error: nid00024: tasks 0-31: Killed 70 | srun: Terminating job step 2667797.4 71 | srun: Force Terminated job step 2667797.4 72 | srun: Complete job step 2667797.4 received 73 | -------------------------------------------------------------------------------- /mpworks/workflows/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsproject/MPWorks/17d024204a5dd5ef160b2b865e85d46d99e8001f/mpworks/workflows/tests/__init__.py -------------------------------------------------------------------------------- /mpworks/workflows/tests/test_scancel_job_step_terminator.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from mpworks.workflows.wf_utils import ScancelJobStepTerminator 5 | 6 | test_dir = os.path.join(os.path.dirname(__file__), "..", 7 | 'test_wfs', "scancel") 8 | 9 | class TestScancelJobStepTerminator(TestCase): 10 | def test_parse_srun_step_number(self): 11 | std_err_file = os.path.join(test_dir, "srun_std_err_example.txt") 12 | terminator = ScancelJobStepTerminator(std_err_file) 13 | step_id = terminator.parse_srun_step_number() 14 | self.assertEqual(step_id, "2667797.4") 15 | 16 | -------------------------------------------------------------------------------- /mpworks/workflows/wf_settings.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Anubhav Jain' 2 | __copyright__ = 'Copyright 2013, The Materials Project' 3 | __version__ = '0.1' 4 | __maintainer__ = 'Anubhav Jain' 5 | __email__ = 'ajain@lbl.gov' 6 | __date__ = 'May 10, 2013' 7 | 8 | # don't change nnodes unless other parts of code are also changed 9 | # nodes configuration will be ignored on SLURM due to different naming convention (nnodes vs nodes) 10 | QA_VASP = {'nnodes': 2, 'nodes': 2, 'walltime': '48:00:00'} 11 | QA_VASP_SMALL = {'nnodes': 2, 'nodes': 2, 'walltime': '48:00:00'} # small walltime jobs 12 | QA_DB = {'nnodes': 1, 'nodes' : 1, 'walltime': '2:00:00'} 13 | QA_CONTROL = {'nnodes': 1, 'nodes': 1, 'walltime': '00:30:00'} 14 | 15 | MOVE_TO_GARDEN_DEV = False 16 | MOVE_TO_GARDEN_PROD = False 17 | 18 | GARDEN = '/project/projectdirs/matgen/garden' 19 | 20 | RUN_LOCS = [GARDEN, GARDEN+'/dev', 21 | '/project/projectdirs/matgen/garden/control_blocks', 22 | '/project/projectdirs/matgen/scratch', 23 | '/global/scratch/sd/matcomp/', '/global/homes/m/matcomp', 24 | '/scratch/scratchdirs/matcomp/', '/scratch2/scratchdirs/matcomp/', 25 | '/global/scratch/sd/matcomp/aj_tests/', 26 | '/global/scratch/sd/matcomp/wc_tests/', 27 | '/global/scratch/sd/matcomp/aj_prod/', 28 | '/global/scratch2/sd/matcomp/mp_prod/', 29 | '/global/scratch2/sd/matcomp/mp_prod_hopper/'] 30 | -------------------------------------------------------------------------------- /mpworks/workflows/wf_utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import os 4 | import shlex 5 | import shutil 6 | import time 7 | import traceback 8 | 9 | import subprocess 10 | 11 | import re 12 | from monty.os.path import zpath 13 | from mpworks.workflows.wf_settings import RUN_LOCS, GARDEN 14 | 15 | 16 | __author__ = 'Anubhav Jain' 17 | __copyright__ = 'Copyright 2013, The Materials Project' 18 | __version__ = '0.1' 19 | __maintainer__ = 'Anubhav Jain' 20 | __email__ = 'ajain@lbl.gov' 21 | __date__ = 'May 06, 2013' 22 | 23 | 24 | NO_POTCARS = ['Po', 'At', 'Rn', 'Fr', 'Ra', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'] 25 | 26 | 27 | def j_decorate(m_dict): 28 | m_dict['auto_npar'] = False 29 | return m_dict 30 | 31 | 32 | def last_relax(filename): 33 | # for old runs 34 | m_dir = os.path.dirname(filename) 35 | m_file = os.path.basename(filename) 36 | 37 | if os.path.exists(zpath(os.path.join(m_dir, 'relax2', m_file))): 38 | return zpath(os.path.join(m_dir, 'relax2', m_file)) 39 | 40 | elif os.path.exists(zpath(filename)): 41 | return zpath(filename) 42 | 43 | relaxations = glob.glob('%s.relax*' % filename) 44 | if relaxations: 45 | return sorted(relaxations)[-1] 46 | 47 | # backup for old runs 48 | elif os.path.exists(zpath(os.path.join(m_dir, 'relax1', m_file))): 49 | return zpath(os.path.join(m_dir, 'relax1', m_file)) 50 | 51 | return filename 52 | 53 | 54 | def orig(filename): 55 | orig = glob.glob('%s.orig' % filename) 56 | if orig: 57 | return orig[0] 58 | else: 59 | return filename 60 | 61 | 62 | def get_block_part(m_dir): 63 | if 'block_' in m_dir: 64 | return m_dir[m_dir.find('block_'):] 65 | return m_dir 66 | 67 | 68 | def get_loc(m_dir): 69 | if os.path.exists(m_dir): 70 | return m_dir 71 | block_part = get_block_part(m_dir) 72 | 73 | for preamble in RUN_LOCS: 74 | new_loc = os.path.join(preamble, block_part) 75 | if os.path.exists(new_loc): 76 | return new_loc 77 | 78 | raise ValueError('get_loc() -- dir does not exist!! Make sure your base directory is listed in RUN_LOCS of wf_settings.py') 79 | 80 | 81 | def move_to_garden(m_dir, prod=False): 82 | block_part = get_block_part(m_dir) 83 | garden_part = GARDEN if prod else GARDEN+'/dev' 84 | f_dir = os.path.join(garden_part, block_part) 85 | if os.path.exists(m_dir) and not os.path.exists(f_dir) and m_dir != f_dir: 86 | try: 87 | shutil.move(m_dir, f_dir) 88 | time.sleep(30) 89 | except: 90 | # double check the move error is not due to path existing 91 | # there is sometimes a race condition with duplicate check 92 | if os.path.exists(f_dir): 93 | return f_dir 94 | traceback.print_exc() 95 | raise ValueError('Could not move file to GARDEN! {}'.format(traceback.format_exc())) 96 | 97 | 98 | return f_dir 99 | 100 | class ScancelJobStepTerminator: 101 | """ 102 | A tool to cancel a job step in a SLURM srun job using scancel command. 103 | """ 104 | 105 | def __init__(self, stderr_filename): 106 | """ 107 | 108 | Args: 109 | stderr_filename: The file name of the stderr for srun job step. 110 | """ 111 | self.stderr_filename = stderr_filename 112 | 113 | def cancel_job_step(self): 114 | step_id = self.parse_srun_step_number() 115 | scancel_cmd = shlex.split("scancel --signal=KILL {}".format(step_id)) 116 | logging.info("Terminate the job step using {}".format(' '.join(scancel_cmd))) 117 | subprocess.Popen(scancel_cmd) 118 | 119 | def parse_srun_step_number(self): 120 | step_pat_text = r"srun: launching (?P\d+[.]\d+) on host \w+, \d+ tasks:" 121 | step_pat = re.compile(step_pat_text) 122 | step_id = None 123 | with open(self.stderr_filename) as f: 124 | err_text = f.readlines() 125 | for line in err_text: 126 | m = step_pat.search(line) 127 | if m is not None: 128 | step_id = m.group("step_id") 129 | if step_id is None: 130 | raise ValueError("Can't find SRUN job step number in STDERR file") 131 | return step_id 132 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dicttoxml==1.6.4 2 | monty==0.6.4 3 | pybtex==0.18 4 | PyYAML==3.11 5 | requests==2.6.0 6 | six==1.9.0 7 | xmltodict==0.9.2 8 | pymatgen>=4.0.0 9 | -------------------------------------------------------------------------------- /scripts/go_submissions: -------------------------------------------------------------------------------- 1 | from mpworks.scripts.submissions_run import go_submissions 2 | 3 | if __name__ == '__main__': 4 | go_submissions() -------------------------------------------------------------------------------- /scripts/go_testing: -------------------------------------------------------------------------------- 1 | from mpworks.scripts.submit_canonical_run import go_testing 2 | 3 | if __name__ == '__main__': 4 | go_testing() -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | upload_docs = upload_docs --upload-dir=docs/_build/html 3 | release = register sdist upload -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Anubhav Jain" 4 | __copyright__ = "Copyright 2013, The Materials Project" 5 | __version__ = "0.1" 6 | __maintainer__ = "Anubhav Jain" 7 | __email__ = "ajain@lbl.gov" 8 | __date__ = "Mar 15, 2013" 9 | 10 | from setuptools import setup, find_packages 11 | from mpworks import __version__ 12 | import os 13 | import multiprocessing, logging # AJ: for some reason this is needed to not have "python setup.py test" freak out 14 | 15 | module_dir = os.path.dirname(os.path.abspath(__file__)) 16 | 17 | if __name__ == "__main__": 18 | setup(name='MPWorks', 19 | version=__version__, 20 | description='Materials Project codes', 21 | long_description=open(os.path.join(module_dir, 'README.rst')).read(), 22 | url='https://github.com/materialsproject/MPWorks', 23 | author='Anubhav Jain', 24 | author_email='anubhavster@gmail.com', 25 | license='modified BSD', 26 | packages=find_packages(), 27 | zip_safe=False, 28 | install_requires=["pymatgen>=4.0", "FireWorks>=0.9", "custodian>=0.7"], 29 | classifiers=["Programming Language :: Python :: 2.7", "Development Status :: 2 - Pre-Alpha", 30 | "Intended Audience :: Science/Research", "Intended Audience :: System Administrators", 31 | "Intended Audience :: Information Technology", 32 | "Operating System :: OS Independent", "Topic :: Other/Nonlisted Topic", 33 | "Topic :: Scientific/Engineering"], 34 | test_suite='nose.collector', 35 | tests_require=['nose'], 36 | scripts=[os.path.join(os.path.join(module_dir, "scripts", f)) for f in 37 | os.listdir(os.path.join(module_dir, "scripts"))]) 38 | --------------------------------------------------------------------------------