├── zeolite_data ├── ge_synthesis_data.csv ├── ge_synthesis_data.xlsx └── readme.txt ├── tableextractor ├── bin │ ├── word_classifier.pkl │ └── word_classifier_python3.pkl ├── Table Extractor Tutorial.ipynb ├── table.py ├── table_extractor.py └── data │ └── 101039c3ta12829f.html ├── LICENSE ├── .gitignore └── README.md /zeolite_data/ge_synthesis_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olivettigroup/table_extractor/HEAD/zeolite_data/ge_synthesis_data.csv -------------------------------------------------------------------------------- /zeolite_data/ge_synthesis_data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olivettigroup/table_extractor/HEAD/zeolite_data/ge_synthesis_data.xlsx -------------------------------------------------------------------------------- /tableextractor/bin/word_classifier.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olivettigroup/table_extractor/HEAD/tableextractor/bin/word_classifier.pkl -------------------------------------------------------------------------------- /tableextractor/bin/word_classifier_python3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olivettigroup/table_extractor/HEAD/tableextractor/bin/word_classifier_python3.pkl -------------------------------------------------------------------------------- /zeolite_data/readme.txt: -------------------------------------------------------------------------------- 1 | Ge containing zeolite synthesis data. If used please cite: 2 | Jensen et al. A Machine Learning Approach to Zeolite Synthesis Enabled by Automatic Data Extraction. ACS Central Science. April 19, 2019. 10.1021/acscentsci.9b00193 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tableextractor/Table Extractor Tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from table_extractor import TableExtractor\n", 10 | "import pprint as pp" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "files = ['101021acscgd8b00078.html']\n", 20 | "dois = ['10.1021/acs.cgd.8b00078'] " 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# Supported domain names = zeolites, geopolymers, steel, titanium, aluminum, alloys \n", 30 | "te = TableExtractor(domain_name='zeolites')\n", 31 | "tables = te.extract_and_save_all_tables(files, dois)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "for table in tables:\n", 41 | " pp.pprint(table)\n", 42 | " print('-----')" 43 | ] 44 | } 45 | ], 46 | "metadata": { 47 | "kernelspec": { 48 | "display_name": "Python 3", 49 | "language": "python", 50 | "name": "python3" 51 | }, 52 | "language_info": { 53 | "codemirror_mode": { 54 | "name": "ipython", 55 | "version": 3 56 | }, 57 | "file_extension": ".py", 58 | "mimetype": "text/x-python", 59 | "name": "python", 60 | "nbconvert_exporter": "python", 61 | "pygments_lexer": "ipython3", 62 | "version": "3.6.5" 63 | } 64 | }, 65 | "nbformat": 4, 66 | "nbformat_minor": 2 67 | } 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /tableextractor/table.py: -------------------------------------------------------------------------------- 1 | from bson.objectid import ObjectId 2 | from copy import deepcopy 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | class Document(dict): 8 | 9 | structure = dict() 10 | 11 | def __init__(self, initializer=None): 12 | for key, (struct, value) in self.structure.items(): 13 | dict.__setitem__(self, key, deepcopy(value)) 14 | 15 | if initializer is not None: 16 | for key, value in initializer.items(): 17 | self.__setitem__(self, key, deepcopy(value)) 18 | 19 | def __setitem__(self, key, value): 20 | if key not in self.structure: 21 | raise KeyError("Invalid key used: '" + key + "'.") 22 | 23 | expected = self.structure[key][0] 24 | 25 | if value is not None: 26 | if type(expected) == list: 27 | if type(value) != list: 28 | raise TypeError("Invalid type used: Expected '[" + 29 | self.structure[key][0][0].__name__ + "]' but key '" + key + "' is not array.") 30 | if not all(isinstance(x, expected[0]) for x in value): 31 | raise TypeError("Invalid type used: Expected '[" + 32 | self.structure[key][0][0].__name__ + "]' but got '" + type(value).__name__ 33 | + "' for item in key '" + key + "'.") 34 | elif not isinstance(value, expected): 35 | raise TypeError("Invalid type used: Expected '" + 36 | self.structure[key][0].__name__ + "' but got '" + 37 | type(value).__name__ + "' for key '" + key + "'.") 38 | 39 | return dict.__setitem__(self, key, value) 40 | 41 | 42 | class Link(Document): 43 | structure = dict({ 44 | 'name':(str, None), 45 | 'link_ref':(list, []) 46 | }) 47 | 48 | class Attribute(Document): 49 | structure = dict({ 50 | 'name':(str, None), 51 | 'value':(float, 0), 52 | 'links': ([Link], []), 53 | 'string_value' : (str, None), 54 | 'unit': (str, None), 55 | 'value_ref':(list, []), 56 | 'attr_ref':(list, []) 57 | }) 58 | 59 | # top level of graph structure 60 | class Entity(Document): 61 | structure = dict({ 62 | 'name':(str, None), 63 | 'attributes':([Attribute], []), 64 | 'links':([Link], []), 65 | 'descriptor':(str, None), 66 | 'ent_ref':(list, []) 67 | }) 68 | 69 | class Table(Document): 70 | structure = dict({ 71 | '_id': (ObjectId, None), 72 | 'paper_doi':(str, None), 73 | 'order':(int, 0), 74 | 'act_table':(list, None), 75 | 'entities':([Entity], []), 76 | 'caption':(str, None), 77 | 'caption_ref':(list, []), 78 | 'composition_table':(bool, False), 79 | 'footer':(dict, None) 80 | }) 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # table_extractor 2 | Code and data used in the paper, A Machine Learning Approach to Zeolite Synthesis Enabled by Automatic Literature Data Extraction 3 | 4 | There are two main components to this repository: 5 | 1. table_extractor code 6 | 2. zeolite synthesis data 7 | 8 | # 1. Table Extraction Code 9 | This code extracts tables into json format from HTML/XML files. These HTML/XML files need to be supplied by the researcher. The code is written in Python3. To run the code: 10 | 1. Fork this repository 11 | 2. Download the Olivetti group materials science FastText word embeddings 12 | - Available here: https://figshare.com/s/70455cfcd0084a504745 13 | - Download all 4 files and place in the tableextractor/bin folder 14 | 3. Install all dependencies 15 | - json, pandas, spacy, bs4, gensim, numpy, unidecode, sklearn, scipy, traceback 16 | 4. Place all files in tableextractor/data 17 | 5. Use Jupyter (Table Extractor Tutorial) to run the code 18 | 19 | The code takes in a list of files and corresponding DOIs and returns a list of all tables extracted from the files as JSON objects. Currently, the code supports files from ACS, APS, Elsevier, Wiley, Springer, and RSC. 20 | 21 | # 2. Zeolite Synthesis Data 22 | The germanium containing zeolite data set used in the paper is publicly available in both Excel and CSV formats. Here is a description of each feature: 23 | 24 | doi- DOI of the paper the synthesis route comes from 25 | 26 | Si:B- molar amount of each element/compound/molecule used in the synthesis. Amounts are normalized to give Si=1 or Ge=1 if Si=0 27 | 28 | Time- crystallization time in hours 29 | 30 | Temp- crystallization temperature in °C 31 | 32 | SDA Type- name given to the organic structure directing agent (OSDA) molecule in the paper 33 | 34 | SMILES- the SMILES representation of the OSDA molecule 35 | 36 | SDA_Vol- the DFT calculated molar volume of the OSDA molecule in bohr^3 37 | 38 | SDA_SA- the DFT calculated surface area of the OSDA molecule in bohr^2 39 | 40 | SDA_KFI- the DFT calculated Kier flexibility index of the OSDA molecule 41 | 42 | From?- the location within a paper the compositional information is extracted. Either Table, Text, or Supplemental 43 | 44 | Extracted- Products of the synthesis as they appear in the paper 45 | 46 | Zeo1- the primary zeolite (zeotype) material made in the synthesis 47 | 48 | Zeo2- the secondary zeolite (zeotype) material made in the synthesis 49 | 50 | Dense1- the primary dense phase made in the synthesis 51 | 52 | Dense2- the secondary dense phase made in the synthesis 53 | 54 | Am- whether an amorphous phase is made in (or remains after) the synthesis 55 | 56 | Other- any other unidentified phases made in the synthesis 57 | 58 | ITQ- whether the synthesis made a zeolite in the ITQ series 59 | 60 | FD1- the framework density of Zeo1 61 | 62 | MR1- the maximum ring size of Zeo1 63 | 64 | FD2- the framework density of Zeo2 65 | 66 | MR2- the framework density of Zeo2 67 | 68 | # Citing 69 | If you use this code or data, please cite the following as appropriate. 70 | 71 | A Machine Learning Approach to Zeolite Synthesis Enabled by Automatic Literature Data Extraction 72 | Zach Jensen, Edward Kim, Soonhyoung Kwon, Terry Z. H. Gani, Yuriy Román-Leshkov, Manuel Moliner, Avelino Corma, and Elsa Olivetti 73 | ACS Central Science Article ASAP 74 | DOI: 10.1021/acscentsci.9b00193 75 | -------------------------------------------------------------------------------- /tableextractor/table_extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding: utf-8 3 | 4 | import traceback 5 | from json import loads, dumps 6 | import pprint as pp 7 | import pandas as pd 8 | import pickle 9 | from bs4 import BeautifulSoup 10 | import spacy 11 | from spacy.tokens import Doc 12 | import re 13 | from gensim.models.deprecated import keyedvectors 14 | import numpy as np 15 | from unidecode import unidecode_expect_nonascii 16 | import unidecode 17 | from sklearn.externals import joblib 18 | from scipy import stats 19 | from table import (Table, Entity, Attribute, Link) 20 | from bson.objectid import (ObjectId) 21 | from autologging import (logged) 22 | from html.parser import HTMLParser 23 | import traceback 24 | 25 | class TableExtractor(object): 26 | def __init__(self, domain_name = None, embedding_loc = 'bin/fasttext_embeddings-MINIFIED.model'): 27 | try: 28 | self.nlp = spacy.load('en') 29 | except: 30 | self.nlp = spacy.load('en_core_web_sm') 31 | self.load_embeddings(file_loc = embedding_loc) 32 | self.load_units() 33 | self.unit_regex = re.compile('\(.*?\)') 34 | self.load_composition_elements(domain_name) 35 | 36 | # files- list of file locations to either html or xml files 37 | # dois- list of dois that match the order of the files 38 | def extract_and_save_all_tables(self, files = None, dois = None): 39 | with open('bin/word_classifier_python3.pkl', 'rb') as f: 40 | self.clf = pickle.load(f) 41 | if files is None: 42 | print('Need to provide list of files') 43 | if dois is None: 44 | print('Need to provide list of dois for files') 45 | all_tables = [] 46 | failures = 0 47 | for num, (doi, f) in enumerate(zip(dois, files)): 48 | tab = [] 49 | self.doi = doi 50 | if 'html' in f: 51 | print('Extracting Tables (HTML) from: ', doi) 52 | # try: 53 | problem = False 54 | tables, captions, footers, capt_refs, table_refs = self.get_tables(f) 55 | print(len(tables), len(captions), len(footers)) 56 | cols, rows, col_inds, row_inds = self.get_headers(tables) 57 | pred_cols, pred_rows = self.classify_table_headers(cols, rows) 58 | orients = [] 59 | composition_flags = [] 60 | for pred_col, pred_row, col, row in zip(pred_cols, pred_rows, cols, rows): 61 | orient, composition_flag = self.determine_table_orientation(pred_col, pred_row, col, row) 62 | orients.append(orient) 63 | composition_flags.append(composition_flag) 64 | tab = [] 65 | for table, row_ind, col_ind, orient, table_ref in zip(tables, row_inds, col_inds, orients, table_refs): 66 | tab.append(self.construct_table_object(orient, table, row_ind, col_ind, table_ref)) 67 | for i, (t, comp_flag, caption, footer, ref) in enumerate(zip(tab,composition_flags, captions, footers, capt_refs)): 68 | if t is not None: 69 | t['order'] = i 70 | t['_id'] = ObjectId() 71 | t['paper_doi'] = self.doi 72 | t['composition_table'] = comp_flag 73 | t['caption'] = caption 74 | if ref is not None: 75 | t['caption_ref'] = ref 76 | if footer is not None: 77 | t['footer'] = footer 78 | if comp_flag: 79 | t = self.clean_composition_table(t, remaining = self.remaining) 80 | all_tables.append(t) 81 | print('Success: Extracted Tables from ', doi) 82 | # except IOError as e: 83 | # print('Failure: No permission to read, DOI:', doi) 84 | # failures += 1 85 | elif 'xml' in f: 86 | print('Extracting Tables (XML) from ', doi) 87 | try: 88 | tables, captions, footers, table_refs, capt_refs = self.get_xml_tables(f) 89 | cols, rows, col_inds, row_inds = self.get_headers(tables) 90 | pred_cols, pred_rows = self.classify_table_headers(cols, rows) 91 | orients = [] 92 | composition_flags = [] 93 | for pred_col, pred_row, col, row in zip(pred_cols, pred_rows, cols, rows): 94 | orient, composition_flag = self.determine_table_orientation(pred_col, pred_row, col, row) 95 | orients.append(orient) 96 | composition_flags.append(composition_flag) 97 | tab = [] 98 | for table, row_ind, col_ind, orient, ref in zip(tables, row_inds, col_inds, orients, table_refs): 99 | try: 100 | curr = (self.construct_table_object(orient, table, row_ind, col_ind, ref)) 101 | tab.append(curr) 102 | except IndexError as e: 103 | print('Failure:', doi) 104 | failure += 1 105 | for i, (t, comp_flag, caption, footer, capt_ref) in enumerate(zip(tab,composition_flags, captions, footers, capt_refs)): 106 | if t is not None: 107 | t['order'] = i 108 | t['_id'] = ObjectId() 109 | t['caption'] = caption 110 | t['paper_doi'] = self.doi 111 | t['composition_table'] = comp_flag 112 | if capt_ref is not None: 113 | t['caption_ref'] = capt_ref 114 | if footer is not None: 115 | t['footer'] = footer 116 | if comp_flag: 117 | t = self.clean_composition_table(t, remaining = self.remaining) 118 | all_tables.append(t) 119 | print('Success: Extracted Tables from', doi) 120 | except IndexError as e: 121 | print('FAILURE: XML index error', doi) 122 | failures += 1 123 | else: 124 | print('Failure: File needs to be html or xml') 125 | failures += 1 126 | print('Finished Extracting all Papers') 127 | print('Number Attempted:', len(files)) 128 | print('Number Successful:', len(all_tables)) 129 | print('Number Failed:', failures) 130 | return all_tables 131 | def get_caption(self, table, format): 132 | if format == 'html': 133 | if '10.1016' in self.doi: 134 | up = table.parent 135 | table_root = up.parent 136 | caption = table_root.find('div', 'caption') 137 | caption = caption.find('p') 138 | caption, ref = self._search_for_reference(caption, format) 139 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 140 | return caption, ref 141 | 142 | elif '10.1039' in self.doi: 143 | check = table.parent 144 | check = check.parent 145 | if check.get('class') == ['rtable__wrapper']: 146 | up = table.parent 147 | up = up.parent 148 | caption = up.previous_sibling 149 | if caption is None: return '', [] 150 | else: 151 | caption = caption.previous_sibling 152 | if caption is None: return '', [] 153 | else: 154 | caption = caption.find('span') 155 | caption, ref = self._search_for_reference(caption, format) 156 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 157 | return caption, ref 158 | else: 159 | return '', [] 160 | elif '10.1002' in self.doi: 161 | up = table.parent 162 | caption = up.previous_sibling 163 | caption = caption.previous_sibling 164 | if caption is not None: 165 | caption.span.decompose() 166 | caption, ref = self._search_for_reference(caption, format) 167 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 168 | return caption, ref 169 | else: 170 | print('No caption') 171 | return '', [] 172 | elif '10.1021' in self.doi: 173 | up = table.parent 174 | if up.get('class') == ['NLM_table-wrap']: 175 | caption = up.find('div', 'NLM_caption') 176 | else: 177 | caption = up.previous_sibling 178 | if caption == ' ': 179 | caption = caption.previous_sibling 180 | if caption is not None: 181 | caption, ref = self._search_for_reference(caption, format) 182 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 183 | return caption, ref 184 | else: 185 | return '', None 186 | 187 | elif '10.1007' in self.doi: 188 | up = table.parent 189 | caption = up.previous_sibling 190 | caption = caption.find('p') 191 | caption, ref = self._search_for_reference(caption, format) 192 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 193 | return caption, ref 194 | else: 195 | return '', [] 196 | elif format == 'xml': 197 | if '10.1016' in self.doi: 198 | caption = table.find('caption') 199 | caption, ref = self._search_for_reference(caption, format) 200 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 201 | return caption, ref 202 | elif '10.1021' in self.doi: 203 | #up = table.parent 204 | #caption = up.find('title') 205 | caption = table.find('title') 206 | if caption is None: 207 | up = table.parent 208 | caption = table.find('title') 209 | if caption is None: 210 | caption = up.find('caption') 211 | caption, ref = self._search_for_reference(caption, format) 212 | caption = unidecode.unidecode(HTMLParser().unescape(caption.text)).strip() 213 | return caption, ref 214 | return '', [] 215 | 216 | def get_footer(self, table, format): 217 | footer_dict = dict() 218 | if format == 'html': 219 | if '10.1016' in self.doi: 220 | up = table.parent 221 | table_root = up.parent 222 | footer = table_root.find_all('dl') 223 | if len(footer) > 0: 224 | for f in footer: 225 | dds = f.find('dd') 226 | dts = f.find('dt') 227 | if dds is None or dts is None: 228 | print('Problem in Footer: Keys and paragraphs len dont match') 229 | return None 230 | else: 231 | footer_dict[dts.text.strip()] = unidecode.unidecode(HTMLParser().unescape(dds.text)).strip() 232 | else: return None 233 | elif '10.1039' in self.doi: 234 | footer = table.find('tfoot') 235 | if footer is None: return None 236 | else: 237 | dts = footer.find_all('span', 'tfootnote') 238 | dds = footer.find_all('span', 'sup_inf') 239 | if len(dds) != len(dts): 240 | print('Problem in Footer: Keys and paragraphs len dont match') 241 | return None 242 | else: 243 | for d, t in zip(dds, dts): 244 | footer_dict[t.text.strip()] = unidecode.unidecode(HTMLParser().unescape(d.text)).strip() 245 | elif '10.1002' in self.doi: 246 | up = table.parent 247 | next = up.next_sibling 248 | next = next.next_sibling 249 | if next is None: return None 250 | else: 251 | footer = next.find('li').text 252 | if footer is None: return None 253 | else: 254 | regrex = re.compile('\[\D\]') 255 | dts = regrex.findall(footer) 256 | inter = regrex.split(footer) 257 | dds = inter[1:] 258 | if len(dts) != len(dds): 259 | print('Problem in Footer: Keys and paragraphs len dont match') 260 | return None 261 | else: 262 | for d, t in zip(dds, dts): 263 | footer_dict[t.strip()] = unidecode.unidecode(HTMLParser().unescape(d)).strip() 264 | elif '10.1007' in self.doi: 265 | up = table.parent 266 | next = up.next_sibling 267 | if next is None: return None 268 | else: 269 | if next.get('class') == ['TableFooter']: 270 | footer = next.find_all('p') 271 | if len(footer) > 0: 272 | for f in footer: 273 | sup = f.find('sup') 274 | if sup is not None: 275 | dt = sup.text 276 | f.sup.decompose() 277 | else: 278 | dt = 'NA' 279 | footer_dict[dt.strip()] = unidecode.unidecode(HTMLParser().unescape(f.text)).strip() 280 | else: return None 281 | elif '10.1021' in self.doi: 282 | up = table.parent 283 | next = up.next_sibling 284 | if next == ' ': 285 | next = next.next_sibling 286 | if next is None: 287 | next = up 288 | if next is None: 289 | return None 290 | footer = next.find_all('div', 'footnote') 291 | if len(footer) > 0: 292 | for f in footer: 293 | sup = f.find('sup') 294 | if sup is not None: 295 | dt = sup.text 296 | f.sup.decompose() 297 | else: 298 | p = f.find('p') 299 | if p.text != f.text: 300 | p = f.p.extract() 301 | dt = f.text 302 | f = p 303 | else: 304 | dt = 'NA' 305 | footer_dict[dt.strip()] = unidecode.unidecode(HTMLParser().unescape(f.text)).strip() 306 | else: return None 307 | elif format == 'xml': 308 | if '10.1016' in self.doi: 309 | footer = table.find_all('table-footnote') 310 | if len(footer) > 0: 311 | for f in footer: 312 | sup = f.find('label') 313 | if sup is not None: 314 | dt = sup.text 315 | f.label.decompose() 316 | else: 317 | dt = 'NA' 318 | footer_dict[dt.strip()] = unidecode.unidecode(HTMLParser().unescape(f.text)).strip() 319 | else: 320 | footer = table.find('legend') 321 | if footer is None: return None 322 | else: 323 | all = footer.find_all('simple-para') 324 | for a in all: 325 | sup = a.find('sup') 326 | if sup is not None: 327 | dt = sup.text 328 | a.sup.decompose() 329 | else: 330 | dt = 'NA' 331 | footer_dict[dt.strip()] = unidecode.unidecode(HTMLParser().unescape(a.text)).strip() 332 | elif '10.1021' in self.doi: 333 | up = table.parent 334 | footer = up.find('table-wrap-foot') 335 | if footer is not None: 336 | dts = footer.find_all('label') 337 | dds = footer.find_all('p') 338 | if len(dts) != len(dds): 339 | ts = footer.find_all('sup') 340 | dts = [] 341 | for t in ts: 342 | if t.text != '': 343 | dts.append(t) 344 | if len(dds) == 1 and len(dts) > 1: 345 | para = dds[0] 346 | cont = para.contents 347 | c = [] 348 | for co in cont: 349 | try: 350 | c.append(co.text) 351 | except: 352 | c.append(co) 353 | ind = [i for i,x in enumerate(c) if x == ''] 354 | dts = [] 355 | dds = [] 356 | curr = ind[0] 357 | for i in ind[1:]: 358 | dts.append(c[curr-1]) 359 | dds.append(''.join(c[(curr+1):(i-1)])) 360 | curr = i 361 | dts.append(c[curr-1]) 362 | dds.append(''.join(c[(curr+1):])) 363 | for d, t in zip(dds, dts): 364 | footer_dict[t.strip()] = unidecode.unidecode(HTMLParser().unescape(d)).strip().replace('\n', ' ') 365 | 366 | elif len(dts) != len(dds): 367 | print('Problem in Footer: Keys and paragraphs len dont match') 368 | return None 369 | else: 370 | for d, t in zip(dds, dts): 371 | footer_dict[t.text.strip()] = unidecode.unidecode(HTMLParser().unescape(d.text)).strip().replace('\n', ' ') 372 | else: 373 | for d, t in zip(dds, dts): 374 | footer_dict[t.text.strip()] = unidecode.unidecode(HTMLParser().unescape(d.text)).strip().replace('\n', ' ') 375 | else: return None 376 | return footer_dict 377 | 378 | def get_xml_tables(self, xml): 379 | all_tables = [] 380 | all_captions = [] 381 | all_caption_refs = [] 382 | all_footers = [] 383 | all_ref_tables = [] 384 | soup = BeautifulSoup(open(('data/'+xml), 'r+'), 'xml') 385 | tables = soup.find_all('table') 386 | if len(tables) == 0: 387 | soup = BeautifulSoup(open('data/'+xml, 'r+'), 'lxml') 388 | tables = soup.find_all('table-wrap') 389 | for w, table in enumerate(tables): 390 | try: 391 | try: 392 | caption, ref = self.get_caption(table, format='xml') 393 | except Exception as e: print(e, 'Problem in caption') 394 | try: 395 | footer = self.get_footer(table, format='xml') 396 | except Exception as e: print(e, 'problem in footer') 397 | all_captions.append(caption) 398 | all_caption_refs.append(ref) 399 | all_footers.append(footer) 400 | tab = [] 401 | sup_tab = [] 402 | for t in range(150): 403 | tab.append([None]*150) 404 | sup_tab.append([None]*150) 405 | rows = table.find_all('row') 406 | if len(rows) == 0: 407 | rows = table.find_all('oasis:row') 408 | num_rows = len(rows) 409 | for i, row in enumerate(rows): 410 | counter = 0 411 | for ent in row: 412 | curr_col = 0 413 | beg = 0 414 | end = 0 415 | more_row = 0 416 | if type(ent) == type(row): 417 | if ent.has_attr('colname'): 418 | try: 419 | curr_col = int(ent['colname']) 420 | except: 421 | curr = list(ent['colname']) 422 | for c in curr: 423 | try: 424 | curr_col = int(c) 425 | except: 426 | continue 427 | if ent.has_attr('namest'): 428 | try: 429 | beg = int(ent['namest']) 430 | except: 431 | curr = list(ent['namest']) 432 | for c in curr: 433 | try: 434 | beg = int(c) 435 | except: 436 | continue 437 | if ent.has_attr('nameend'): 438 | try: 439 | end = int(ent['nameend']) 440 | except: 441 | curr = list(ent['nameend']) 442 | for c in curr: 443 | try: 444 | end = int(c) 445 | except: 446 | continue 447 | if ent.has_attr('morerows'): 448 | try: 449 | more_row = int(ent['morerows']) 450 | except: 451 | curr = list(ent['morerows']) 452 | for c in curr: 453 | try: 454 | more_row = int(c) 455 | except: 456 | continue 457 | ent, curr_ref = self._search_for_reference(ent, 'xml') 458 | if beg != 0 and end != 0 and more_row != 0: 459 | for j in range(beg, end+1): 460 | for k in range(more_row+1): 461 | tab[i+k][j-1] = unidecode.unidecode(HTMLParser().unescape(ent.get_text())).strip().replace('\n', ' ') 462 | sup_tab[i+k][j-1] = curr_ref 463 | elif beg != 0 and end != 0: 464 | for j in range(beg, end+1): 465 | tab[i][j-1] = unidecode.unidecode(HTMLParser().unescape(ent.get_text())).strip().replace('\n', ' ') 466 | sup_tab[i][j-1] = curr_ref 467 | elif more_row != 0: 468 | for j in range(more_row+1): 469 | tab[i+j][counter] = unidecode.unidecode(HTMLParser().unescape(ent.get_text())).strip().replace('\n', ' ') 470 | sup_tab[i+j][counter] = curr_ref 471 | elif curr_col != 0: 472 | tab[i][curr_col-1] = unidecode.unidecode(HTMLParser().unescape(ent.get_text())).strip().replace('\n', ' ') 473 | sup_tab[i][curr_col-1] = curr_ref 474 | else: 475 | counter_ent = counter 476 | found = False 477 | while not found: 478 | if tab[i][counter_ent] is None: 479 | tab[i][counter_ent] = unidecode.unidecode(HTMLParser().unescape(ent.get_text())).strip().replace('\n', ' ') 480 | sup_tab[i][counter_ent] = curr_ref 481 | found = True 482 | else: 483 | counter_ent+=1 484 | counter = counter_ent 485 | counter = counter + 1 + (end-beg) 486 | for t, s in zip(tab, sup_tab): 487 | for j,k in zip(reversed(t), reversed(s)): 488 | if j is None: 489 | t.remove(j) 490 | s.remove(k) 491 | for t, s in zip(reversed(tab), reversed(sup_tab)): 492 | if len(t) == 0: 493 | tab.remove(t) 494 | sup_tab.remove(s) 495 | lens = [] 496 | for t in tab: 497 | lens.append(len(t)) 498 | size = stats.mode(lens)[0][0] 499 | for t, s in zip(tab, sup_tab): 500 | if len(t) != size: 501 | for j in range(len(t), size): 502 | t.append('') 503 | s.append([]) 504 | all_tables.append(tab) 505 | all_ref_tables.append(sup_tab) 506 | except: 507 | print('Failed to extract XML table') 508 | table = [[0]] 509 | sup_table = [[None]] 510 | all_tables.append(table) 511 | all_ref_tables.append(sup_table) 512 | tb = sys.exc_info()[-1] 513 | print(traceback.extract_tb(tb, limit=1)[-1][1]) 514 | return all_tables, all_captions, all_footers, all_ref_tables, all_caption_refs 515 | 516 | def get_tables(self, html): 517 | all_tables = [] 518 | all_table_refs = [] 519 | all_captions = [] 520 | all_footers = [] 521 | all_caption_ref = [] 522 | try: 523 | soup = BeautifulSoup(open(('data/'+html), 'r+'), 'html.parser') 524 | except UnicodeDecodeError: 525 | soup = BeautifulSoup(open('data/'+html, 'rt', encoding = 'latin1'), 'html.parser') 526 | tables = soup.find_all('table') 527 | for i, table in enumerate(tables): 528 | try: 529 | try: 530 | caption, ref = self.get_caption(table, format='html') 531 | except: 532 | print('Failed to get captions') 533 | caption = '' 534 | if caption != '': 535 | all_captions.append(caption) 536 | all_caption_ref.append(ref) 537 | try: 538 | all_footers.append(self.get_footer(table, format='html')) 539 | except: print('Failed to get footer') 540 | num_rows = 0 541 | num_cols = 0 542 | thead = table.find('thead') 543 | tbody = table.find('tbody') 544 | if thead is None and tbody is None: 545 | continue 546 | elif thead is not None and tbody is not None: 547 | trs = thead.find_all('tr') 548 | trs.extend(tbody.find_all('tr')) 549 | ths = trs[0].find_all('th') 550 | elif thead is not None: 551 | trs = thead.find_all('tr') 552 | ths = trs[0].find_all('th') 553 | else: 554 | trs = tbody.find_all('tr') 555 | ths = trs[0].find_all('td') 556 | copy = list(trs) 557 | for j, tr in enumerate(copy): 558 | if tr.find_all('ol') or tr.find_all('ul'): 559 | trs.remove(tr) 560 | num_rows = len(trs) 561 | for j, th in enumerate(ths): 562 | if th.has_attr('colspan'): 563 | num_cols = num_cols + int(th['colspan']) 564 | else: 565 | num_cols+=1 566 | tab = [None]*num_rows 567 | sup_tab = [None]*num_rows 568 | for j, row in enumerate(tab): 569 | tab[j] = [None]*num_cols 570 | sup_tab[j] = [None]*num_cols 571 | for row in range(len(tab)): 572 | th_counter = 0 573 | td_counter = 0 574 | curr_trs = row 575 | ths = trs[curr_trs].find_all('th') 576 | tds = trs[curr_trs].find_all('td') 577 | for col in range(len(tab[0])): 578 | if len(tds) > 0 and len(ths) > 0: 579 | curr = th_counter 580 | if tab[row][col] is None: 581 | if th_counter < len(ths): 582 | ths[curr], curr_ref = self._search_for_reference(ths[curr], 'html') 583 | th_counter+=1 584 | tab[row][col] = unidecode.unidecode(HTMLParser().unescape(ths[curr].text)).strip().replace('\n', '') 585 | sup_tab[row][col] = curr_ref 586 | if ths[curr].has_attr('rowspan') and ths[curr].has_attr('colspan'): 587 | rs = int(ths[curr]['rowspan']) 588 | cs = int(ths[curr]['colspan']) 589 | for r in range(rs): 590 | for c in range(cs): 591 | tab[row+r][col+c] = unidecode.unidecode(HTMLParser().unescape(ths[curr].text)).strip().replace('\n', '') 592 | sup_tab[row+r][col+c] = curr_ref 593 | elif ths[curr].has_attr('rowspan'): 594 | rs = int(ths[curr]['rowspan']) 595 | for r in range(rs): 596 | tab[row+r][col] = unidecode.unidecode(HTMLParser().unescape(ths[curr].text)).strip().replace('\n', '') 597 | sup_tab[row+r][col] = curr_ref 598 | elif ths[curr].has_attr('colspan'): 599 | cs = int(ths[curr]['colspan']) 600 | for c in range(cs): 601 | tab[row][col+c] = unidecode.unidecode(HTMLParser().unescape(ths[curr].text)).strip().replace('\n', '') 602 | sup_tab[row][col+c] = curr_ref 603 | else: 604 | curr = td_counter 605 | tds[curr], curr_ref = self._search_for_reference(tds[curr], 'html') 606 | tab[row][col] = unidecode.unidecode(HTMLParser().unescape(tds[curr].text)).strip().replace('\n', '') 607 | sup_tab[row][col] = curr_ref 608 | if tds[curr].has_attr('rowspan') and tds[curr].has_attr('colspan'): 609 | rs = int(tds[curr]['rowspan']) 610 | cs = int(tds[curr]['colspan']) 611 | for r in range(rs): 612 | for c in range(cs): 613 | tab[row+r][col+c] = unidecode.unidecode(HTMLParser().unescape(tds[curr].text)).strip().replace('\n', '') 614 | sup_tab[row+r][col+c] = curr_ref 615 | elif tds[curr].has_attr('rowspan'): 616 | rs = int(tds[curr]['rowspan']) 617 | for r in range(rs): 618 | tab[row+r][col] = unidecode.unidecode(HTMLParser().unescape(tds[curr].text)).strip().replace('\n', '') 619 | sup_tab[row+r][col] = curr_ref 620 | elif tds[curr].has_attr('colspan'): 621 | cs = int(tds[curr]['colspan']) 622 | for c in range(cs): 623 | tab[row][col+c] = unidecode.unidecode(HTMLParser().unescape(tds[curr].text)).strip().replace('\n', '') 624 | sup_tab[row][col+c] = curr_ref 625 | td_counter+=1 626 | elif len(ths) > 0: 627 | curr_ths = th_counter 628 | if tab[row][col] is None: 629 | ths[curr_ths], curr_ref = self._search_for_reference(ths[curr_ths], 'html') 630 | th_counter+=1 631 | tab[row][col] = unidecode.unidecode(HTMLParser().unescape(ths[curr_ths].text)).strip().replace('\n', '') 632 | sup_tab[row][col] = curr_ref 633 | if ths[curr_ths].has_attr('rowspan') and ths[curr_ths].has_attr('colspan'): 634 | rs = int(ths[curr_ths]['rowspan']) 635 | cs = int(ths[curr_ths]['colspan']) 636 | for r in range(rs): 637 | for c in range(cs): 638 | tab[row+r][col+c] = unidecode.unidecode(HTMLParser().unescape(ths[curr_ths].text)).strip().replace('\n', '') 639 | sup_tab[row+r][col+c] = curr_ref 640 | elif ths[curr_ths].has_attr('rowspan'): 641 | rs = int(ths[curr_ths]['rowspan']) 642 | for r in range(rs): 643 | tab[row+r][col] = unidecode.unidecode(HTMLParser().unescape(ths[curr_ths].text)).strip().replace('\n', '') 644 | sup_tab[row+r][col] = curr_ref 645 | elif ths[curr_ths].has_attr('colspan'): 646 | cs = int(ths[curr_ths]['colspan']) 647 | for c in range(cs): 648 | tab[row][col+c] = unidecode.unidecode(HTMLParser().unescape(ths[curr_ths].text)).strip().replace('\n', '') 649 | sup_tab[row][col+c] = curr_ref 650 | elif len(tds) > 0: 651 | curr_tds = td_counter 652 | if tab[row][col] is None: 653 | tds[curr_tds], curr_ref = self._search_for_reference(tds[curr_tds], 'html') 654 | td_counter+=1 655 | tab[row][col] = unidecode.unidecode(HTMLParser().unescape(tds[curr_tds].text)).strip().replace('\n', '') 656 | sup_tab[row][col] = curr_ref 657 | if tds[curr_tds].has_attr('rowspan') and tds[curr_tds].has_attr('colspan'): 658 | rs = int(tds[curr_tds]['rowspan']) 659 | cs = int(tds[curr_tds]['colspan']) 660 | for r in range(rs): 661 | for c in range(cs): 662 | tab[row+r][col+c] = unidecode.unidecode(HTMLParser().unescape(tds[curr_tds].text)).strip().replace('\n', '') 663 | sup_tab[row+r][col+c] = curr_ref 664 | elif tds[curr_tds].has_attr('rowspan'): 665 | rs = int(tds[curr_tds]['rowspan']) 666 | for r in range(rs): 667 | tab[row+r][col] = unidecode.unidecode(HTMLParser().unescape(tds[curr_tds].text)).strip().replace('\n', '') 668 | sup_tab[row+r][col] = curr_ref 669 | elif tds[curr_tds].has_attr('colspan'): 670 | cs = int(tds[curr_tds]['colspan']) 671 | for c in range(cs): 672 | tab[row][col+c] = unidecode.unidecode(HTMLParser().unescape(tds[curr_tds].text)).strip().replace('\n', '') 673 | sup_tab[row][col+c] = curr_ref 674 | all_tables.append(tab) 675 | all_table_refs.append(sup_tab) 676 | except IndexError as e: 677 | self.fails+=1 678 | self.__log.info( "FAILURE: Index, Failed to extract table #" + str(i) + " from paper " + str(self.doi) ) 679 | print('Index Error in get_tables') 680 | tb = sys.exc_info()[-1] 681 | print(traceback.extract_tb(tb, limit=1)[-1][1]) 682 | except ValueError as e: 683 | self.values+=1 684 | self.__log.info( "FAILURE: Value, Failed to extract table #" + str(i) + " from paper " + str(self.doi) ) 685 | tb = sys.exc_info()[-1] 686 | print('Value Error') 687 | print(e) 688 | print(traceback.extract_tb(tb, limit=1)[-1][1]) 689 | return all_tables, all_captions, all_footers, all_caption_ref, all_table_refs 690 | 691 | def get_headers(self, tables): 692 | all_col_headers = [] 693 | all_row_headers = [] 694 | all_col_indexes = [] 695 | all_row_indexes = [] 696 | for num, table in enumerate(tables): 697 | col_ind, row_ind = self._get_easy_headers(table) 698 | if col_ind == -1 and row_ind == -1: 699 | try: 700 | curr = table[0] 701 | col_index = 0 702 | for i in range(len(table)-1): 703 | next = table[i+1] 704 | count_curr = 0 705 | count_next = 0 706 | for cell in curr: 707 | try: 708 | cell, _ = self.value_extractor(cell) 709 | fixed = float(cell) 710 | except: 711 | if cell != '': 712 | count_curr+=1 713 | for cell in next: 714 | try: 715 | cell, _ = self.value_extractor(cell) 716 | fixed = float(cell) 717 | except: 718 | if cell != '': 719 | count_next+=1 720 | if count_next >= count_curr: 721 | curr = next 722 | else: 723 | col_index = i 724 | break 725 | trans_table = list(map(list, zip(*table))) 726 | curr_row = trans_table[0] 727 | row_index = 0 728 | for i in range(len(trans_table)-1): 729 | next = trans_table[i+1] 730 | count_curr = 0 731 | count_next = 0 732 | for cell in curr: 733 | try: 734 | cell, _ = self.value_extractor(cell) 735 | fixed = float(cell) 736 | except: 737 | if cell != '': 738 | count_curr+=1 739 | for cell in next: 740 | try: 741 | cell, _ = self.value_extractor(cell) 742 | fixed = float(cell) 743 | except: 744 | if cell != '': 745 | count_next+=1 746 | if count_next >= count_curr: 747 | curr = next 748 | else: 749 | row_index = i 750 | break 751 | row_header = [] 752 | col_header = [] 753 | for i in range(col_index+1): 754 | col_header.extend(table[i]) 755 | for i in range(row_index+1): 756 | row_header.extend(trans_table[i]) 757 | indexes = [] 758 | curr = col_header[0] 759 | for i in range(len(col_header)-1): 760 | next = col_header[i+1] 761 | if curr == next: 762 | indexes.append(i) 763 | curr = next 764 | else: 765 | curr = next 766 | for i in reversed(indexes): 767 | col_header.pop(i) 768 | indexes = [] 769 | curr = row_header[0] 770 | for i in range(len(row_header)-1): 771 | next = row_header[i+1] 772 | if curr == next: 773 | indexes.append(i) 774 | curr = next 775 | else: 776 | curr = next 777 | for i in reversed(indexes): 778 | row_header.pop(i) 779 | all_col_headers.append(col_header) 780 | all_row_headers.append(row_header) 781 | all_col_indexes.append(col_index) 782 | all_row_indexes.append(row_index) 783 | except IndexError as e: 784 | self.other+=1 785 | self.__log.info( "FAILURE: Index get_headers table #" + str(num) + " from paper " + str(self.doi) ) 786 | print('IndexError in get headers') 787 | print(e) 788 | tb = sys.exc_info()[-1] 789 | print(traceback.extract_tb(tb, limit=1)[-1][1]) 790 | else: 791 | all_col_indexes.append(col_ind) 792 | all_row_indexes.append(row_ind) 793 | all_col_headers.append(table[col_ind]) 794 | trans = list(map(list, zip(*table))) 795 | all_row_headers.append(trans[row_ind]) 796 | return all_col_headers, all_row_headers, all_col_indexes, all_row_indexes 797 | 798 | def load_embeddings(self, file_loc=None): 799 | if file_loc == None: 800 | print('Need to specify path to word embedding model') 801 | print('Materials science training word2vec and fasttext are available for download') 802 | print('Check the read-me') 803 | else: 804 | self.embeddings = keyedvectors.KeyedVectors.load(file_loc) 805 | # self.embeddings.bucket = 2000000 806 | self.emb_vocab_ft = dict([('', 0), ('', 1)] + 807 | [(k, v.index+2) for k, v in self.embeddings.vocab.items()]) 808 | self.emb_weights_ft = np.vstack([np.zeros((1,100)), np.ones((1,100)), np.array(self.embeddings.syn0)]) 809 | 810 | def _normalize_string(self, string): 811 | ret_string = '' 812 | for char in string: 813 | if re.match(u'[Α-Ωα-ωÅ]', char) is not None: 814 | ret_string += str(char) 815 | else: 816 | ret_string += str(unidecode_expect_nonascii(str(char))) 817 | 818 | return ret_string 819 | 820 | def vectorize_words(self, words, labels): 821 | emb_vector = [] 822 | label_vector = [] 823 | for word, label in zip(words, labels): 824 | if word in self.emb_vocab_ft: 825 | ind = self.emb_vocab_ft[word] 826 | emb_vector.append(self.emb_weights_ft[ind]) 827 | # Old way of getting embeddings, does not work on a model trained on 828 | # gensim 3.4 when using in gensim 3.7 and up 829 | # if str(word) in self.embeddings: 830 | # print(word) 831 | # try: 832 | # print(self.embeddings[str(word)][:10]) 833 | # emb_vector.append(self.embeddings[str(word)]) 834 | # except: 835 | # spl = word.split() 836 | # curr = [] 837 | # for w in spl: 838 | # print(w) 839 | # curr.append(self.embeddings[str(w)]) 840 | # curr_array = np.mean(np.array(curr), axis=0) 841 | # print(curr_array.shape) 842 | # print(curr_array[:10]) 843 | # emb_vector.append(curr_array) 844 | 845 | label_vector.append(label) 846 | else: 847 | label_vector.append(label) 848 | emb_vector.append(np.zeros(100, dtype=np.float32)) 849 | emb_vector = np.array(emb_vector) 850 | return emb_vector, label_vector 851 | 852 | def classify_table_headers(self, cols, rows): 853 | vect_cols = [] 854 | vect_rows = [] 855 | for col, row in zip(cols, rows): 856 | vect_c, label = self.vectorize_words(col, [0]*len(col)) 857 | vect_r, label = self.vectorize_words(row, [0]*len(row)) 858 | vect_cols.append(self.clf.predict(vect_c)) 859 | vect_rows.append(self.clf.predict(vect_r)) 860 | return vect_cols, vect_rows 861 | 862 | def determine_table_orientation(self, pred_cols, pred_rows, cols, rows): 863 | # True- entities are row labels 864 | pred_cols = list(pred_cols) 865 | pred_rows = list(pred_rows) 866 | cols = list(cols) 867 | rows = list(rows) 868 | 869 | constituent_counter = 0 870 | for c in cols: 871 | if c in self.material_constituents: 872 | constituent_counter +=1 873 | if constituent_counter >= self.constituent_threshold: 874 | return True, True 875 | 876 | constituent_counter = 0 877 | for r in rows: 878 | if r in self.material_constituents: 879 | constituent_counter +=1 880 | if constituent_counter >= self.constituent_threshold: 881 | return False, True 882 | 883 | if stats.mode(pred_cols)[0][0] == 4 and pred_cols.count(4) >= (len(pred_cols)/2): 884 | return False, False 885 | if stats.mode(pred_rows)[0][0] == 4 and pred_rows.count(4) >= (len(pred_rows)/2): 886 | return True, False 887 | if stats.mode(pred_cols)[0][0] == 1 and pred_cols.count(1) >= (len(pred_cols)/2): 888 | return True, False 889 | if stats.mode(pred_rows)[0][0] == 1 and pred_rows.count(1) >= (len(pred_rows)/2): 890 | return False, False 891 | if stats.mode(pred_cols)[0][0] == 2 and pred_cols.count(2) >= (len(pred_cols)/2) and 2 in pred_rows: 892 | return True, False 893 | if stats.mode(pred_rows)[0][0] == 2 and pred_rows.count(2) >= (len(pred_rows)/2) and 2 in pred_cols: 894 | return False, False 895 | return True, False 896 | 897 | def construct_table_object(self, orientation, table, row_ind, col_ind, ref_table): 898 | #Orientation=True - entities are row labels 899 | new_table = Table() 900 | new_table['act_table'] = table 901 | mat_trans_table = np.array(table).T.tolist() 902 | trans_ref_table = list(map(list, zip(*ref_table))) 903 | mat_table = np.array(table).tolist() 904 | try: 905 | if orientation: 906 | for i, r in enumerate(mat_trans_table[row_ind][(col_ind+1):]): 907 | entity = Entity() 908 | entity['name'] = str(r) 909 | entity['ent_ref'] = trans_ref_table[row_ind][i+col_ind+1] 910 | descriptor = mat_trans_table[row_ind][col_ind] 911 | entity['descriptor'] = str(descriptor) 912 | if row_ind > 0: 913 | for j in range(row_ind): 914 | link = Link() 915 | link['name'] = str(mat_trans_table[row_ind-j-1][i+1]) 916 | link['link_ref'] = trans_ref_table[row_ind-j-1][i+1] 917 | if link['name'] != entity['name']: 918 | entity['links'].append(link) 919 | for j, c in enumerate(mat_table[col_ind][(row_ind+1):]): 920 | attr = Attribute() 921 | attr['name'] = str(c) 922 | attr['attr_ref'] = ref_table[col_ind][j+row_ind+1] 923 | try: 924 | potential_unit = self.unit_regex.search(c).group(0)[1:-1] 925 | found_units = [u for u in self.list_of_units if u in potential_unit] 926 | if len(found_units) > 0: 927 | attr['unit'] = potential_unit 928 | except: 929 | pass 930 | if col_ind > 0: 931 | for k in range(col_ind): 932 | link = Link() 933 | link['name'] = str(mat_table[col_ind-k-1][j+1]) 934 | link['link_ref'] = ref_table[col_ind-k-1][j+1] 935 | if link['name'] != attr['name']: 936 | attr['links'].append(link) 937 | val, unit = self.value_extractor(str(mat_table[col_ind+i+1][j+1])) 938 | attr['value_ref'] = ref_table[col_ind+i+1][j+1] 939 | if type(val) == float: 940 | attr['value'] = val 941 | else: 942 | attr['string_value'] = val 943 | if unit is not None: #overwrites previous unit 944 | attr['unit'] = unit 945 | entity['attributes'].append(attr) 946 | new_table['entities'].append(entity) 947 | else: 948 | for i, c in enumerate(mat_table[col_ind][(row_ind+1):]): 949 | entity = Entity() 950 | entity['name'] = str(c) 951 | entity['ent_ref'] = ref_table[col_ind][i+row_ind+1] 952 | entity['descriptor'] = str(mat_table[col_ind][row_ind]) 953 | if col_ind > 0: 954 | for j in range(col_ind): 955 | link = Link() 956 | link['name'] = str(mat_table[col_ind-j-1][i+1]) 957 | link['link_ref'] = ref_table[col_ind-j-1][i+1] 958 | if link['name'] != entity['name']: 959 | entity['links'] 960 | for j, r in enumerate(mat_trans_table[row_ind][(col_ind+1):]): 961 | attr = Attribute() 962 | try: 963 | potential_unit = self.unit_regex.search(r).group(0)[1:-1] 964 | found_units = [u for u in self.list_of_units if u in potential_units] 965 | if len(found_units) >0: 966 | attr['unit'] = unit 967 | except: 968 | pass 969 | attr['name'] = str(r) 970 | attr['attr_ref'] = trans_ref_table[row_ind][j+col_ind+1] 971 | if row_ind > 0: 972 | for k in range(row_ind): 973 | link = Link() 974 | link['name'] = str(mat_trans_table[row_ind-k-1][j+1]) 975 | link['link_ref'] = trans_ref_table[row_ind-k-1][j+1] 976 | if link['name'] != attr['name']: 977 | attr['links'].append(link) 978 | val, unit = self.value_extractor(str(mat_table[row_ind+j+1][i+1])) 979 | attr['value_ref'] = ref_table[row_ind+j+1][i+1] 980 | if type(val) == float: 981 | attr['value'] = val 982 | else: 983 | attr['string_value'] = val 984 | if unit is not None: #overwrites previous unit 985 | attr['unit'] = unit 986 | entity['attributes'].append(attr) 987 | new_table['entities'].append(entity) 988 | return new_table 989 | except IndexError as e: 990 | self.other+=1 991 | self.__log.info( "FAILURE: Index construct_table table from paper " + str(self.doi)) 992 | print('IndexError in construct object') 993 | print(e) 994 | tb = sys.exc_info()[-1] 995 | print(traceback.extract_tb(tb, limit=1)[-1][1]) 996 | 997 | def print_table_object(self, table): 998 | for ent in table['entities']: 999 | print ('Ent:', ent['name']) 1000 | print ('Links:') 1001 | for link in ent['links']: 1002 | print (' ', link['name']) 1003 | print ('Attr:') 1004 | for att in ent['attributes']: 1005 | print( ' ', att['name'], att['value']) 1006 | for link in att['links']: 1007 | print( ' ', link['name']) 1008 | print( '-------') 1009 | print( '--------------') 1010 | 1011 | def value_extractor(self,string): 1012 | original_string = string[:] 1013 | extracted_unit = None 1014 | balance_syn = ['balance', 'bal', 'bal.', 'other.','other'] 1015 | if string.lower() in balance_syn: 1016 | return 'balance', extracted_unit 1017 | 1018 | units = [u for u in self.list_of_units if u in string] 1019 | if units: 1020 | extracted_unit = max(units) 1021 | string = string.replace(extracted_unit,'') 1022 | 1023 | #e.g. already in int or float form: 12.5 -> 12.5 1024 | try: 1025 | return float(string), extracted_unit 1026 | except: 1027 | pass 1028 | 1029 | #e.g. 12.5 - 13.5 -> 13.0 1030 | range_regex = re.compile('\d+\.?\d*\s*-\s*\d+\.?\d*') 1031 | try: 1032 | ranges = range_regex.search(string).group().split('-') 1033 | average = (float(ranges[0]) + float(ranges[1])) / 2.0 1034 | return average, extracted_unit 1035 | except: 1036 | pass 1037 | 1038 | #e.g. 12.2 (5.2) -> 12.2 1039 | bracket_regex = re.compile('(\d+\.?\d*)\s*\(\d*.?\d*\)') 1040 | try: 1041 | extracted_value = float(bracket_regex.search(string).group(1)) 1042 | return float(extracted_value), extracted_unit 1043 | except: 1044 | pass 1045 | 1046 | #e.g. 12.3 ± 0.5 -> 12.3 1047 | plusmin_regex = re.compile('(\d+\.?\d*)(\s*[±+-]+\s*\d+\.?\d*)') 1048 | try: 1049 | extracted_value = float(plusmin_regex.search(string).group(1)) 1050 | return extracted_value, extracted_unit 1051 | except AttributeError: 1052 | pass 1053 | 1054 | #e.g. <0.05 -> 0.05 | >72.0 -> 72.0 | ~12 -> 12 1055 | lessthan_roughly_regex = re.compile('([<]|[~]|[>])=?\s*\d+\.*\d*') 1056 | try: 1057 | extracted_value = lessthan_roughly_regex.search(string).group() 1058 | num_regex = re.compile('\d+\.*\d*') 1059 | extracted_value = num_regex.search(extracted_value).group() 1060 | return float(extracted_value), extracted_unit 1061 | except: 1062 | pass 1063 | 1064 | # e.g. 0.4:0.6 (ratios) 1065 | if ':' in string: 1066 | split = string.split(":") 1067 | try: 1068 | extracted_value = round(float(split[0])/float(split[1]), 3) 1069 | return extracted_value, extracted_unit 1070 | except: 1071 | pass 1072 | return original_string, None 1073 | 1074 | def load_units(self): 1075 | self.list_of_units = ['GPa', 'MPa', 'kPa' 'Pa', 'mPa', 1076 | '%', 1077 | 'km', 'm', 'cm', 'mm', 'µm', 'nm', 'um', 1078 | 'kg', 'g', 'gr' 'mg', 1079 | 'hr', 'h', 'sec', 's', 1080 | '°', '°C', 'F', 'degC', 1081 | 'ppm'] 1082 | 1083 | def load_composition_elements(self, domain=None): 1084 | # Compositional elements to help in correclty identifiying the orientation of tables in specific domains 1085 | if domain == 'geopolymers': 1086 | self.material_constituents = ['Al2O3', 'SiO2'] 1087 | self.constituent_threshold = 2 1088 | self.remaining = None 1089 | elif domain == 'steel': 1090 | self.material_constituents = ['Fe', 'Cr', 'Cu', 'C', 'Ti', 'Ni', 'Mo','Mn'] 1091 | self.constituent_threshold = 4 1092 | self.remaining = ['Fe'] 1093 | elif domain == 'titanium': 1094 | self.material_constituents = ['Ti', 'Fe', 'C'] 1095 | self.constituent_threshold = 2 1096 | self.remaining = ['Fe'] 1097 | elif domain == 'zeolites': 1098 | self.material_constituents = (['Si/Ge','DMAP/T','HF/T','H2O/T','(Si + Ge)/Al','SiO2','GeO2','SDA','HF','H2O','Ge','Si','SiO2/Al2O3','Si/Al', 1099 | 'R(OH)2/Si','F-/Si','(Si + Ge)/Zr','Al','SDA/Si','H2O/Si','OH/Si','Si/H2O','Si/OH','Ge/Si','Si/Ti','MeO', 1100 | 'SiO2/GeO2','TMHDA','TMEDA','TEOS','NH4F','Al/T','N,N-Diethylethylenediamine','NaGaGeO4','NaGaO2','Na2GeO3*H2O', 1101 | 'SOD','NaNO2','NaOH']) 1102 | self.constituent_threshold = 2 1103 | self.remaining = None 1104 | elif domain == 'aluminum': 1105 | self.material_constituents = ['Al','Cu','Mn','Si','O','Mg'] 1106 | self.constituent_threshold = 2 1107 | self.remaining = None 1108 | elif domain == 'alloys': 1109 | self.material_constituents = ['Ag', 'Al', 'Ar', 'As', 'Au', 'B', 'Ba', 'Be', 'Bi', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cl', 'Co', 'Cr', 'Cs', 'Cu', 'Dy', 1110 | 'Er', 'Eu', 'F', 'Fe', 'Ga', 'Gd', 'Ge', 'H', 'Hf', 'Hg', 'Ho', 'I', 'In', 'Ir', 'K', 'La', 'Li', 'Lu', 'Md', 'Mg', 1111 | 'Mn', 'Mo', 'N', 'Na', 'Nb', 'Nd', 'Ni', 'O', 'Os', 'P', 'Pb', 'Pd', 'Pr', 'Pt', 'Rb', 'Re', 'Rh', 'Ru', 'S', 'Sb', 1112 | 'Sc', 'Se', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', 'Tb', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'V', 'W', 'Y', 'Yb', 'Zn', 'Zr'] 1113 | self.constituent_threshold = 2 1114 | self.remaining = ['Fe', 'Al','Ti'] 1115 | 1116 | 1117 | def clean_composition_table(self, table, remaining = None): 1118 | entities_to_remove = [] 1119 | for entity_num, entity in enumerate(table['entities']): 1120 | #self.get_links(entity) 1121 | cumsum, balance_pos, elements_in_entity = self.get_balance(entity) 1122 | remaining_in_entity = None 1123 | try: 1124 | for check_element in self.remaining: 1125 | if check_element not in elements_in_entity: 1126 | remaining_in_entity = check_element 1127 | break 1128 | except: 1129 | pass 1130 | #if 'balance' in a cell, it finds the balance and enters this under the 'value' key under the right attributes. 1131 | #if no 'balance' but the total doesnt add up to 1.00 or 100, then there may be an implicit element missing. 1132 | #we try to impute this by adding the 'remaining' 1133 | if balance_pos: 1134 | self.set_balance(entity, balance_pos, cumsum) 1135 | continue 1136 | elif (not balance_pos 1137 | and not self.check_if_balanced(cumsum) 1138 | and type(remaining_in_entity) == str): 1139 | 1140 | new_attr = Attribute() 1141 | new_attr['name'] = str(remaining_in_entity) 1142 | if cumsum < 1: 1143 | new_attr['value'] = 1.0-cumsum 1144 | else: 1145 | new_attr['value'] = 100.0 - cumsum 1146 | new_attr['string_value'] = 'added by us' 1147 | table['entities'][entity_num]['attributes'].append(new_attr) 1148 | #if the cumsum == 0, it means that we can discard 1149 | #this entire entity out as it holds no useful information 1150 | if cumsum ==0: 1151 | entities_to_remove.append(entity_num) 1152 | 1153 | for i in sorted(entities_to_remove, reverse=True): 1154 | del(table['entities'][i]) 1155 | 1156 | return table 1157 | 1158 | def get_balance(self, entity): 1159 | cumsum = 0 1160 | elements_in_entity = [] 1161 | balance_pos = None 1162 | if entity['descriptor'] in self.material_constituents: 1163 | attr = Attribute() 1164 | attr['name'] = entity['descriptor'] 1165 | val, unit = self.value_extractor(entity['name']) 1166 | if str(val).isnumeric(): 1167 | attr['value'] = float(val) 1168 | elif type(val) == str: 1169 | attr['string_value'] = val 1170 | entity['attributes'].append(attr) 1171 | for counter, attr in enumerate(entity['attributes']): 1172 | if attr['name'] in self.material_constituents: 1173 | cumsum+= attr['value'] 1174 | elements_in_entity.append(attr['name']) 1175 | if type(attr['string_value']) == str: 1176 | if attr['string_value'].lower() in ['balance', 'bal','bal.']: 1177 | balance_pos = counter 1178 | print('found a balance') 1179 | return cumsum, balance_pos, elements_in_entity 1180 | 1181 | def set_balance(self, entity, balance_pos, cumsum): 1182 | if cumsum < 1: 1183 | entity['attributes'][balance_pos]['value'] = 1.0-cumsum 1184 | else: 1185 | entity['attributes'][balance_pos]['value'] = 100.0 - cumsum 1186 | 1187 | def get_links(self, entity): 1188 | list_of_names = [] 1189 | for attr in entity['attributes']: 1190 | list_of_names.append(attr['name']) 1191 | if len(set(list_of_names)) < 3: 1192 | for attr in entity['attributes']: 1193 | if len(attr['links']) >0: 1194 | swapped = attr['name'] 1195 | attr['name'] = attr['links'][0]['name'] 1196 | attr['links'][0]['name'] = swapped 1197 | 1198 | def check_if_balanced(self, cumsum): 1199 | if cumsum > 1: 1200 | if 100 - cumsum < 1.5: 1201 | return True 1202 | else: 1203 | return False 1204 | else: 1205 | if 1 - cumsum < 0.015: 1206 | return True 1207 | else: 1208 | return False 1209 | 1210 | def _get_easy_headers(self, table): 1211 | sample_names = ['sample name','sample','run no.','experiment','osda','entry','run','gel','catalyst'] 1212 | col_ind = -1 1213 | row_ind = -1 1214 | try: 1215 | for i, row in enumerate(table): 1216 | for j, col in enumerate(row): 1217 | if col.lower() in sample_names: 1218 | col_ind = i 1219 | row_ind = j 1220 | return col_ind, row_ind 1221 | except: 1222 | return -1, -1 1223 | def _search_for_reference(self, soup, format): 1224 | if format == 'html': 1225 | ref = soup.find_all('a') 1226 | tags = [] 1227 | if len(ref) == 0: 1228 | text = soup.text 1229 | refs = re.findall('\[\D\]', text) 1230 | if len(refs) == 0: 1231 | return soup, tags 1232 | else: 1233 | text = re.split('\[\D\]', text) 1234 | text = ''.join(text) 1235 | soup.string = text 1236 | return soup, refs 1237 | else: 1238 | for r in ref: 1239 | tag = soup.a.extract() 1240 | tags.append(tag.text) 1241 | return soup, tags 1242 | elif format == 'xml': 1243 | ref = soup.find_all('xref') 1244 | tags = [] 1245 | if len(ref) == 0: 1246 | if soup.name == 'caption': 1247 | return soup, tags 1248 | ref = soup.find_all('sup') 1249 | for r in ref: 1250 | text = r.text.split(',') 1251 | for t in text: 1252 | if len(t) == 1 and t.isalpha(): 1253 | tags.append(t) 1254 | soup.sup.decompose() 1255 | return soup, tags 1256 | else: 1257 | for r in ref: 1258 | if len(r.text) < 4: 1259 | tag = soup.xref.extract() 1260 | tags.append(tag.text) 1261 | return soup, tags 1262 | 1263 | 1264 | 1265 | -------------------------------------------------------------------------------- /tableextractor/data/101039c3ta12829f.html: -------------------------------------------------------------------------------- 1 | Aminothermal synthesis of CHA-type SAPO molecular sieves and their catalytic performance in methanol to olefins (MTO) reaction - Journal of Materials Chemistry A (RSC Publishing) DOI:10.1039/C3TA12829F
 

Aminothermal synthesis of CHA-type SAPO molecular sieves and their catalytic performance in methanol to olefins (MTO) reaction

Dong Fanabc, Peng Tianab, Xiong Suabc, Yangyang Yuanabc, Dehua Wangabc, Chan Wangabc, Miao Yangab, Linying Wangab, Shutao Xuab and Zhongmin Liu*ab
aDalian National Laboratory for Clean Energy, Dalian Institute of Chemical Physics, Chinese Academy of Sciences, Dalian, P. R. China. E-mail: liuzm@dicp.ac.cn
bNational Engineering Laboratory for Methanol to Olens, Dalian Institute of Chemical Physics, Chinese Academy of Sciences, Dalian, P. R. China
cUniversity of Chinese Academy of Sciences, Beijing, P. R. China

Received 22nd July 2013, Accepted 18th September 2013

First published on 18th September 2013


Aminothermal synthesis of SAPO molecular sieves, in which organic amines are used as both solvent and template, is explored based on a variety of amines. Di-iso-propylamine (DIPA) and N,N,N′,N′-tetramethylethylenediamine (TMEDA) are found to lead to the rapid crystallization of SAPO-34 with high solid yield. A solid yield of 96.2% could be acquired using the TMEDA system (200 °C, 12 h), which is the highest value ever reported for SAPO molecular sieves. SAPO-44 is obtained for the first time using the hexamethyleneimine (HMI) template. Detailed synthetic investigation shows that the silicon content in the initial gel has an important effect on the crystalline nature of the final products, and higher Si concentration favours the synthesis of pure SAPO-34 and SAPO-44. In addition, it is shown that the Si coordination environment in the samples is closely related to the choice of template. Among the three samples investigated, SAPO-34-DIPA has the lowest threshold of Si content for the formation of Si islands in the framework due to the smallest charge compensation centers occluded in its CHA cage. The catalytic performance of the synthesized samples is tested by the MTO reaction and a high olefin selectivity of 85.8% is obtained on SAPO-34 templated by DIPA.


1 Introduction

Silicoaluminophosphate (SAPO) molecular sieves were firstly reported by Union Carbide in 1984.1,2 The structures of SAPOs cover a range of different structure types; some are analogous to certain zeolites such as SAPO-34 (CHA topology), but a large number have unique structures without a zeolite counterpart. Among SAPOs, small-pore SAPO-34 has received great attention in recent years due to its good performance in methanol-to-olefins (MTO) reaction.3,4 The CHA framework topology, comprised of cylinder-like cages (6.7 × 6.7 × 10.0 Å) with 8-ring openings (3.8 × 3.8 Å), has been reported to be the ideal breeding ground for hydrocarbon pool intermediates (polymethylbenzenes as active species) in the MTO reaction.5–9 Recently, a commercial MTO process with a production capacity of 600[thin space (1/6-em)] 000 tons of light olefins per year has been successfully put into operation based on the SAPO-34 catalyst.10

The traditional synthesis of SAPO-34 is commonly carried out in a hydrothermal way, which involves the addition of large amounts of water as the mass and heat transfer medium. The presence of organic amines as structure directing agents (SDAs) or templates is essential for the successful synthesis.11,12 Many organic amines, including tetraethylammonium hydroxide (TEAOH),1,2 dipropylamine,2 isopropylamine,13 piperidine,14 morpholine,15 triethylamine (TEA),4 and diethylamine (DEA),16,17etc., have been reported to direct the crystallization of SAPO-34. According to the literature, the choice of SDAs for the synthesis of SAPO-34 is crucial for the properties of the final products, such as microscopic structures, elemental compositions and morphologies. For example, Barthomeuf et al.18 carried out comparative studies on the properties of SAPO-34 templated by TEAOH and morpholine, and concluded that the template could determine the maximum charge and govern the distribution of silicon in the framework. The same group also reported that the choice of template exerted a significant impact on the short- and long-term stability of SAPO-34.19 Gon et al.20 and Nishiyama et al.21 found the crystallite size variation of SAPO-34 crystals synthesized using different templates, respectively, and successfully correlated this with their catalytic performance in the MTO reaction. Our group systematically studied the synthesis of SAPO-34 templated by DEA and discovered that DEA tended to prompt higher silicon incorporation into the SAPO-34 framework than TEAOH and TEA.16

Novel synthetic methods are always attractive in the field of molecular sieves, which may have the possibility to create new materials or known phases with specific properties.22,23 One of the alternative synthetic methods for SAPO molecular sieves is the solvothermal synthesis.24–26 The methodology was firstly introduced into the zeolite synthesis by Bibby and Dale, in which organic solvents instead of water were used as the reaction medium.27 Xu and co-workers extended this method to the synthesis of aluminophosphate (AlPO) molecular sieves. By employing alcohols as solvents, they synthesized many novel AlPO materials with anionic frameworks, among which JDF-20 possessing 20-ring extra-large pores are the most attractive ones.24,28,29 Recently, we have proposed a novel solvothermal approach to prepare SAPO molecular sieves, designated as aminothermal synthesis, in which the organic amine acts as both the template and solvent.30 Solvothermal synthesis of SAPO-34 and SAPO-18 were realized for the first time with this method. The synthesized materials exhibited good adsorption capacities for CO2 and high CO2/CH4 ratio. Moreover, the organic amines could be easily collected and recycled after each synthesis, suggesting the environmental benignity of the methodology. It should be noted that the aminothermal synthesis is not truly anhydrous, and the presence of small quantity of water in the initial system is essential for the successful crystallization of SAPO molecular sieves.

In the present work, a variety of organic amines were employed for the aminothermal synthesis of SAPO molecular sieves. The investigated organic amines include n-propylamine (PA), tri-n-propylamine (TPA), di-iso-propylamine (DIPA), n-butylamine (BA), N,N-dimethylbenzylamine (DMBA), 1,2-ethylenediamine (EDA), N,N,N′,N′-tetramethylethylenediamine (TMEDA), cyclohexylamine (CHA) and hexamethyleneimine (HMI). Three amines, TMEDA, DIPA and HMI, rarely used for the synthesis of CHA-type SAPO molecular sieves (termed as CHA-SAPO), are found to direct the formation of SAPO-34 and SAPO-44. Detailed investigations on the synthesis, physiochemical properties and catalytic performance of CHA-SAPO samples are carried out.

2 Experimental

2.1 General procedure of aminothermal synthesis

Organic amines (chemical pure) were used as received. Pseudoboehmite (70.5 wt%), phosphoric acid (85 wt%), and silica sol (27.5 wt%) were used as inorganic precursors.

The molar ratio of the initial gel is R/Al2O3/P2O5/SiO2/H2O = X/1.2/0.9/0.5/14.6, in which the volume of R is kept constant (50 ml), and the masses of Al, P and Si sources are 8.4 g, 10.4 g and 5.3 g, respectively. Water from each resource has been calculated into the gel composition. A typical synthesis procedure is as follows. Organic amine, pseudoboehmite, silica sol and water were added in sequence into a glass beaker. The mixture was stirred at room temperature for 5 min, and then transferred into a stainless steel autoclave. After further addition of phosphoric acid, the autoclave was sealed quickly, placed in an oven and rotated at 60 rpm for 20 min to get a homogeneous mixture. Subsequently, the autoclave was heated in 60 min to the desired temperature under rotation and kept for a certain time. The solid product was obtained after centrifugation, washing and drying at 100 °C overnight.

The solid yield of samples is calculated by the following formula: yield (%) = (Msample × DW%) × 100/(MAl2O3 + MP2O5 + MSiO2)gel, where Msample, DW% and (MAl2O3 + MP2O5 + MSiO2)gel stand for the weight of as-synthesized samples, the weight percentage of inorganic oxides in the as-synthesized samples derived from TG analysis, and the dry mass of the three oxides in the starting mixture, respectively.

2.2 Characterization

The powder XRD pattern was recorded on a PANalytical X'Pert PRO X-ray diffractometer with CuKα radiation (λ = 1.54059 Å), operating at 40 kV and 40 mA. The chemical composition of the solid samples was determined with a Philips Magix-601 X-ray fluorescence (XRF) spectrometer. The crystal morphology was observed by scanning electron microscopy (SEM, Hitachi S-3400N). All NMR experiments were performed on a Varian Infinity plus 400WB spectrometer with a BBO MAS probe operating at a magnetic field strength of 9.4 T. The resonance frequencies were 104.2, 161.9, 79.4, and 100.5 MHz for 27Al, 31P, 29Si and 13C, respectively. Chemical shifts were referenced to 1.0 M Al(NO3)3 for 27Al, 85% H3PO4 for 31P, and 2,2-dimethyl-2-ilapentane-5-sulfonate sodium salt (DSS) for 29Si and 13C. The spinning rates of the samples at the magic angle were 4, 10, 6 and 8 kHz for 29Si, 27Al, 31P and 13C, respectively. Textural properties of the calcined samples were determined by N2 adsorption at 77 K on a Micromeritics ASAP 2020 system. The total surface area was calculated based on the BET equation. The micropore volume and micropore surface area were evaluated using the t-plot method. TG and DSC analysis were performed on a TA SDTQ600 analyzer with the temperature-programmed rate of 10 °C min−1 under an air flow of 100 ml min−1. The temperature-programmed desorption of ammonia (NH3-TPD) experiments were conducted in a Micromeritics Autochem II 2920 device. 0.2 grams of the sample particles (40–60 mesh) were loaded into a U-quartz tube and pretreated at 650 °C for 60 min under helium flow. After cooling down to 100 °C, a gas mixture of NH3 and He flow was introduced to saturate the sample surface with NH3 adsorption (60 min). After this, He flow was purged through the sample for 30 min to remove the weakly adsorbed NH3 molecules. The measurement of the desorbed NH3 was performed from 100 °C to 700 °C (10 °C min−1) under He flow (20 ml min−1).

2.3 Catalyst evaluation

MTO reaction was carried out with a fixed-bed reactor at atmospheric pressure. 1.2 grams of the calcined sample (40–60 mesh) were loaded into the reactor. The catalyst was pretreated in a flow of nitrogen atmosphere at 550 °C for 1 h. Nitrogen flow was turned off when the reactor was cooled down to 450 °C. A mixture of methanol and water with a CH3OH–H2O weight ratio of 40/60 was consequently pumped into the reactor. The weight hourly space velocity (WHSV) of methanol was 2 h−1. The products were analysed by an Agilent GC7890 gas chromatograph equipped with an FID detector and Poraplot Q-HT capillary column.

The methanol conversion is defined as the percentage of CH3OH consumed during the MTO reaction. The selectivity is defined as the weight percent of each compound in the total products. It should be noted that dimethylether (DME) is considered as a reactant instead of a product here.

3 Results and discussion

3.1 Aminothermal synthesis of SAPO molecular sieves based on a variety of organic amines

Fig. 1 displays the XRD patterns of the as-synthesized samples prepared with different amines as both the solvent and template. It was found that primary amines, including BA, CHA, PA and EDA, mainly led to the formation of lamellar materials (a–d, Fig. 1). SAPO-5 was obtained as the final product while employing TPA or DMBA amines (e and f, Fig. 1). Three amines, including HMI, DIPA and TMEDA, were found to direct the crystallization of SAPO molecular sieves with the CHA topology (g–i, Fig. 1). It should be noted that the samples templated by DIPA and TMEDA exhibit the typical diffraction pattern of SAPO-34, while some difference is observed for the sample templated by HMI. The distance between two peaks in the range of 23–26° (2 theta) and the relative intensities of the neighboring peaks around 30° (2 theta) in the pattern of SAPO-HMI sample obviously differ from those of SAPO-34-DIPA and SAPO-34-TMEDA, suggesting that SAPO-HMI was in fact SAPO-44, a silicoaluminophosphate also possessing chabasite topology.31
XRD patterns of the as-synthesized samples using BA (a), CHA (b), PA (c), EDA (d), TPA (e), DMBA (f), HMI (g), DIPA (h) and TMEDA (i) as the solvent and template. The crystallization conditions are 200 °C and 48 h for all except for sample i (200 °C, 12 h). The peak intensities of samples a–d shown in the figure have been reduced to 1/3 of the original.
Fig. 1 XRD patterns of the as-synthesized samples using BA (a), CHA (b), PA (c), EDA (d), TPA (e), DMBA (f), HMI (g), DIPA (h) and TMEDA (i) as the solvent and template. The crystallization conditions are 200 °C and 48 h for all except for sample i (200 °C, 12 h). The peak intensities of samples a–d shown in the figure have been reduced to 1/3 of the original.

Among the three organic amines for CHA-SAPO molecular sieves, DIPA was once reported for the dry-gel synthesis of SAPO-34.32 Although TMEDA has been patented to direct the formation of SAPO-34,33 the hydrothermal synthesis was conducted via a fluoride route. Without F, the product would become SAPO/AlPO-21. Moreover, Pastore et al.34 reported that the lamellar AlPO-kanemite precursor could be transformed into CHA-SAPO (designated as CAL-1 therein) by a hydrothermal reaction with silica in the presence of HMI. Their results showed that the synthesis of CAL-1 was restricted within a relatively narrow initial SiO2/Al2O3 ratio (0.8–1.2), and CAL-1 was actually co-templated by HMI and BA (the SDA for the AlPO-kanemite). To the best of our knowledge, there is no research on SAPO-44 templated by HMI until now.

3.2 Synthesis of SAPO-34 with DIPA as the solvent and template

The effect of silicon content in the initial gel on the synthesis was firstly investigated based on the DIPA system by fixing other synthetic conditions. The results are shown in Table 1. AlPO-11 instead of AlPO-34 was acquired as the final product in the absence of silicon. This is not surprising because DIPA is a well-known template for the hydrothermal synthesis of AlPO-11. On the other hand, AlPO-34 could only crystallize in a fluoride-containing system. By increasing the x(SiO2) to 0.15, SAPO-34 appeared together with small quantity of SAPO-11 as impurity. Pure SAPO-34 was readily synthesized when the silicon content reached 0.3 or higher. However, a small amount of the amorphous phase due to the unreacted silica residues started to appear in the product when the x(SiO2) became 1.0, implying the capacity limitation of silicon incorporation into the framework of SAPO-34-DIPA. The elemental composition of the obtained samples was determined by XRF and shown in Table 1. The Si concentration in SAPO-34 rises with the increasing content in the initial gel, though the Si incorporation degree shows a decline. 19 |
Table 1 The influence of synthetic parameters on the synthesis results based on the DIPA system 20 |
21 |
Sampleax(SiO2)t (h)ProductProduct compositionSi incorporationbRelative crystallinityc (%)Yield (%)
a All samples are prepared using 50 ml of DIPA as both the solvent and template under a crystallization temperature of 200 °C. The initial molar composition for the inorganic species is as follows: Al2O3/P2O5/SiO2/H2O = 1.2/0.9/x(SiO2)/14.6.b The silicon incorporation is defined as [Si/(Si + Al + P)]product/[Si/(Si + Al + P)]gel.c The relative crystallinity is calculated based on the relative intensity of the three strongest peaks (2θ = 9.5, 16.0 and 20.5°) in the XRD patterns.
1048AlPO-11
20.1548SAPO-34 + SAPO-11
30.348SAPO-34Al0.511P0.416Si0.0731.0992.3
40.548SAPO-34Al0.482P0.422Si0.0960.9110083.8
50.7548SAPO-34Al0.490P0.389Si0.1210.8087.3
61.048SAPO-34 + amorphous SiO2Al0.458P0.357Si0.18593.0
70.53SAPO-34Al0.516P0.413Si0.0710.6787.061.8
80.57SAPO-34Al0.507P0.409Si0.0840.7996.984.7
90.512SAPO-34Al0.501P0.407Si0.0920.8797.390.1


The crystallization process of SAPO-34 using DIPA as the solvent and template was studied based on sample 4 (Table 1). The process exhibits a feature of fast crystallization. The relative crystallinity of the product reaches 87% after a crystallization duration of 3 hours. This value further increases to 96.9% after 7 hours and keeps at a high level until the end of the crystallization. The Si content in the product shows an increasing trend with time, consistent with what we have found in the hydrothermal synthesis of SAPO-34 templated by TEA and DEA.17,35 It implies that both the initial Si content in the gel and the crystallization time can modify the Si concentration in the final product. Moreover, the solid yield during the crystallization shows a maximum of 90.1% at t = 12 h. Afterwards, it gradually drops and a yield of 83.8% is obtained for the 48 h sample.36 The possible reason is that molecular sieves are generally metastable materials, which crystallize through a kinetic control. Dissolution and phase transformation might occur after the optimal synthesis period of a certain phase.

The SEM images (Fig. 2) reveal that SAPO-34-DIPA crystals (samples 4 and 9) possess the typical rhombohedral morphology. The crystal size ranges from 1 to 2 μm. The N2 physisorption results of samples 3 and 4 are given in Table 2. Both samples exhibit a BET surface area of around 590 m2 g−1 and a micropore volume of 0.27 cm3 g−1. Both the SEM image and N2 physisorption results verify the high purity and crystallinity of the SAPO-34 templated by DIPA.


SEM images of the as-synthesized samples 4(a), 9(b), 13(c), 18(d).
Fig. 2 SEM images of the as-synthesized samples 4(a), 9(b), 13(c), 18(d).
22 |
Table 2 Textural properties of the samples 23 |
24 |
SampleSurface area (m2 g−1)Pore volume (cm3 g−1)
StotalaSmicrobSextcVtotalVmicrod
a BET surface area.b t-plot micropore surface area.c t-plot external surface area.d t-plot micropore volume.
358758610.270.27
459058730.300.27
13503490130.240.23
18490466240.240.22


3.3 Synthesis of SAPO-34 with TMEDA as the solvent and template

The effect of the silicon content on the synthesis using TMEDA as the solvent and template are summarized in Table 3. It was found that AlPO-21 possessing the AWO topology was crystallized from the silica-free initial gel. By increasing the x(SiO2) to 0.15, SAPO-34 appeared together with the presence of a large amount of SAPO-21. After further increasing the value to 0.30, SAPO-34 became the dominant product at the expense of SAPO-21. Pure SAPO-34 could be obtained in the range of 0.5–1.0. It should be mentioned that high solid yields of SAPO-34 are observed under the present aminothermal environment, especially for sample 13, which shows a value of 96.2%. Such a high yield has never been reported before for SAPO molecular sieves. 25 |
Table 3 The influence of synthetic parameters on the synthesis results based on TMEDA and HMI systems 26 |
27 |
SampleaAminex(SiO2)t (h)ProductProduct compositionSi incorporationbYield (%)
a All samples are prepared using 50 ml of organic amine as both the solvent and template under a crystallization temperature of 200 °C. The initial molar composition for the inorganic species is as follows: Al2O3/P2O5/SiO2/H2O = 1.2/0.9/x(SiO2)/14.6.b The silicon incorporation is defined as [Si/(Si + Al + P)]product/[Si/(Si + Al + P)]gel.
10TMEDA012AlPO-2143.2
11TMEDA0.1512SAPO-21 + SAPO-3447.5
12TMEDA0.3012SAPO-34 + minor SAPO-21Al0.521P0.389Si0.08972.2
13TMEDA0.512SAPO-34Al0.506P0.381Si0.1121.0596.2
14TMEDA0.7512SAPO-34Al0.488P0.372Si0.1391.0394.7
15TMEDA1.012SAPO-34Al0.470P0.351Si0.1790.9689.8
16HMI048Unknown phase
17HMI0.348SAPO-44Al0.503P0.388Si0.1091.6341.4
18HMI0.548SAPO-44Al0.507P0.366Si0.1271.1940.1
19HMI0.512Amorphous


The crystals of SAPO-34-TMEDA (sample 13) present elongated rhombohedron morphology with a relatively large crystal size of around 10 μm (Fig. 2). The BET surface area and micropore volume of sample 13 were calculated to be 503 m2 g−1 and 0.23 cm3 g−1, respectively. The values are lower than those of SAPO-34-DIPA, but still in the reasonable range for SAPO-34, indicating the good crystallinity of the product synthesized under the TMEDA system.

3.4 Synthesis of SAPO-44 with HMI as the solvent and template

Synthetic results based on the aminothermal synthesis of SAPO-44 with HMI as the solvent and template verify again that the Si content in the starting gel has great effect on the final product (Table 3). In the silica-free system, aluminophosphate with an unknown phase was acquired. When the value of x(SiO2) was 0.3 or higher, SAPO-44 was obtained as the crystalline phase. Noteworthily, samples 17 and 18 possess larger silicon contents than SAPO-34-DIPA and SAPO-34-TMEDA synthesized from the same initial gel composition, suggesting that HMI is able to induce higher silicon incorporation into the framework of SAPO-44 (see the column of Si incorporation in Tables 1 and 3). However, the solid yield with HMI system is unexpectedly low (∼40%).

By shortening the crystallization duration to 12 h, no crystalline product was obtained. This phenomenon indicates that the crystallization kinetic under the HMI system is much slower than that under DIPA and TMEDA systems, suggesting the weaker templating efficacy of HMI in the synthesis of SAPO-44. This might also be the possible reason for the low solid yield associated with the HMI system.

The crystal morphology of SAPO-44 (sample 18) was also examined by SEM. It exhibits the typical rhombohedral shape. Moreover, it is found that the textural properties of sample 18 are close to those of SAPO-34-TMEDA. These results imply the good quality of SAPO-44 obtained under the HMI system.

3.5 Physiochemical properties of CHA-SAPOs synthesized with three templates: 13C MAS NMR, TG-DSC, 29Si, 27Al and 27P MAS NMR

The as-synthesized samples 4, 13 and 18 (SAPO-34-DIPA, SAPO-34-TMEDA and SAPO-44, respectively) were used for various characterizations.

13C MAS NMR spectra were recorded in order to verify the exact template species occluded in the three samples (see Fig. 3). The spectrum of sample 4 exhibits two symmetrical peaks centred around 46 and 19 ppm, which are ascribed to the carbon atoms bound to the nitrogen atoms (C1) and the methyl carbons (C2) in the DIPA molecule, respectively. For sample 16 templated by the diamine TMEDA, the two peaks in the spectrum occur at relatively higher chemical shifts (58 ppm for C1 and 47 ppm for C2), due to the intimate connection of both methyl and methylene to the nitrogen atoms with stronger electronegativity. Sample 18, obtained under the HMI system, displays two resonance peaks centred around 49 and 26 ppm. The former peak was commonly ascribed to the carbon atoms (Cα) adjacent to the imino groups, while the latter one was assigned to the remaining carbon atoms (Cβ and Cγ) in the HMI ring.34 Generally, the obtained spectra are in good agreement with the previous literature, suggesting that the investigated three samples are well templated by DIPA, TMEDA and HMI, respectively.


13C MAS NMR of the as-synthesized samples 4(a), 13(b) and 18(c).
Fig. 3 13C MAS NMR of the as-synthesized samples 4(a), 13(b) and 18(c).

The TG and DSC curves for the three samples are illustrated in Fig. S1, S2 and S3, respectively (see ESI). The weight loss in the samples occurs in three or four stages (Table 4). In the first stage (<230 °C), the weight loss is attributed to the water desorption. The second weight loss between 230 and 410 °C with an exothermal process is due to the combustion decomposition of the template. The third and fourth weight losses at a temperature higher than 410 °C with strongly exothermic processes are likely associated with the further removal of organic residue occluded in the channels and cages of samples. In addition, it can be seen from the curves that there is no weight loss and exothermic peak associated with structural collapse until 900 °C, suggesting the high thermal stability of SAPO-34 and SAPO-44 synthesized in the present work. Notably, sample 18 demonstrated a much obvious weight loss due to the template removal, indicating a higher organic occlusion of HMI molecules in the SAPO-44 framework. Based on the elemental composition and topological structure of the sample, it is calculated that for samples 4 and 13, one organic amine molecule could be accommodated per CHA cage. As for sample 18, every cage contains nearly two HMI molecules, possibly due to the relatively small size of HMI.

28 |
Table 4 Thermal analysis results of the samples 29 |
30 |
SampleWeight loss (wt%)Template per cage
IIIIII + IV
44.310.81.01.01
133.26.36.50.96
183.54.015.71.91


Solid-state 29Si, 31P and 27Al MAS NMR spectra were recorded to investigate the local atomic coordination environments in the as-synthesized samples. Fig. 4 presents the 29Si spectra of the three samples. Only one symmetric peak centred at −91 ppm, ascribed to Si (4Al) species, appears in the spectrum of SAPO-34-TMEDA. Complex Si environments are observed for SAPO-34-DIPA and SAPO-44. Besides the apparent resonance at −91 ppm, there emerge several small signals at around −96, −100, −105 and −110 ppm, corresponding to Si (3Al), Si (2Al), Si (1Al) and Si-island, respectively. Additionally, a weak peak at −85 ppm is also present in the spectra of both samples, which possibly arises from the Si(OAl)3(OH) or Si(OAl)(OSi)(OH)2 species located in the Si–Al domains.37,38 According to the deconvoluted analysis of the spectra (Table S1), the concentration of Si (4Al) species in the three samples has the following order: SAPO-34-TMEDA > SAPO-44 > SAPO-34-DIPA. Barthomeuf et al.18 once reported that a higher template number per cage in SAPO-34 could lead to a larger framework charge, and thus a higher Si (4Al) concentration in the framework. In the present work, the template number per cage for TMEDA, HMI and DIPA are 1, 2 and 1, respectively. However, it should be noted that TMEDA, containing two nitrogen atoms, actually has two charge compensation centers despite only one molecule is accommodated in a cage. Therefore, the smallest charge compensation centers confined in the cage of SAPO-34-DIPA causes the lowest Si (4Al) concentration in the framework, though it possesses the lowest Si content among the three investigated samples.


29Si MAS NMR of the as-synthesized samples 4(a), 13(b) and 18(c).
Fig. 4 29Si MAS NMR of the as-synthesized samples 4(a), 13(b) and 18(c).

As shown in Fig. 5, two peaks centred at around 37 and 9 ppm are observed in the 27Al MAS NMR spectra of the two SAPO-34 samples. The strong resonance at high field should arise from tetrahedral Al species, whereas the weak one is attributed to penta-coordinated Al formed by an additional interaction of one water or template molecule with the framework aluminum. For SAPO-44, one more peak centred at 0 ppm is observed besides the two peaks shown in the spectra of SAPO-34 samples, which corresponds to hexa-coordinated aluminum. The assignment is confirmed by the 27Al spectrum of the calcined SAPO-44, in which only one signal with a chemical shift of 37 ppm appears.


27Al MAS NMR of the as-synthesized samples 4(a), 13(b), 18(c) and calcined sample 18(d).
Fig. 5 27Al MAS NMR of the as-synthesized samples 4(a), 13(b), 18(c) and calcined sample 18(d).

The 31P MAS NMR spectra of the samples are illustrated in Fig. 6. One strong resonance peak at −30 ppm appears in the spectra of all three samples, suggesting the predominant P (4Al) environment in the framework. In addition, the spectrum of SAPO-44 demonstrates two additional small peaks located at −17.6 and −11.4 ppm, which might be assigned to partially hydrated P(OAl)x(OH)y species.39


31P MAS NMR of the as-synthesized samples 4(a), 13(b) and 18(c).
Fig. 6 31P MAS NMR of the as-synthesized samples 4(a), 13(b) and 18(c).

3.6 Catalytic performance in the MTO reaction

Among the three serial CHA-SAPO molecular sieves, samples 3, 13 and 17 (SAPO-34-DIPA, SAPO-34-TMEDA and SAPO-44) with pure CHA structure and low Si contents were selected to test their catalytic properties in the MTO reaction, considering that SAPO-34 with lower Si contents generally exhibited better MTO performance.3,40 The results are illustrated in Fig. 7 and Table S2.
Methanol conversion (solid) and selectivity of C2H4 plus C3H6 (hollow) during the MTO reaction on samples 3 (a), 13 (b), and 17 (c) (reaction conditions: 450 °C, CH3OH3 WHSV = 2 h−1, 40 wt% methanol solution).
Fig. 7 Methanol conversion (solid) and selectivity of C2H4 plus C3H6 (hollow) during the MTO reaction on samples 3 (a), 13 (b), and 17 (c) (reaction conditions: 450 °C, CH3OH3 WHSV = 2 h−1, 40 wt% methanol solution).

Both SAPO-34 samples demonstrate good catalytic properties, especially for SAPO-34 templated by DIPA. The selectivity of ethylene and propylene on SAPO-34-DIPA could reach 85.8% under complete methanol conversion. NH3-TPD experiments are carried out to probe the acid properties of the two samples and the curves are given in Fig. S4. Clearly, sample 13 possesses larger acid concentration and higher acid strength than sample 3. According to the literature, SAPO-34 with lower Si content (lower acidity) generally has better MTO catalytic performance.41,42 It is therefore inferred that the larger amount of strong/moderate acid sites in sample 13 prompts the occurrence of side reactions such as coking and hydrogen transfer (higher propane selectivity), and causes a lower selectivity to light olefins and shorter catalyst lifetime. Moreover, the larger surface area and higher pore volume with sample 3 would also benefit its catalytic stability.

SAPO-44 exhibits the worst catalytic performance, whatever be the lifetime and selectivity to light olefins. This is in agreement with the previous results.43 The difference between the microstructures of SAPO-44 and SAPO-34, as reflected in their XRD patterns, is supposed to be responsible for the bad catalytic behaviour of SAPO-44.

4 Conclusions

Three SAPO molecular sieves with the CHA topology have been aminothermally synthesized with DIPA, TMEDA and HMI templates, respectively. Both DIPA and TMEDA were found to direct the formation of SAPO-34 with a fast crystallization rate and high solid yield. 90% and 96.2% yields have been achieved after 12 h crystallization based on DIPA and TMEDA systems, respectively. Pure SAPO-44 was obtained in the HMI system. This is the first synthesis report on SAPO-44 using HMI as the template. Among the three amines, HMI shows a relatively high ability to prompt the silicon incorporation into the framework of SAPO-44. Moreover, the concentration of Si (4Al) species in the three samples has the following order: SAPO-34-TMEDA > SAPO-44 > SAPO-34-DIPA. According to the TG results, the smallest number of charge compensation centers confined in the CHA cage of SAPO-34-DIPA results in the lowest Si (4Al) content in the framework. MTO evaluation results reveal that SAPO-34-DIPA possesses excellent catalytic properties. The selectivity of ethylene and propylene on SAPO-34-DIPA could reach as high as 85.8% under the investigated conditions. It is believed that the excellent MTO performance together with the high synthetic yield will promote the aminothermal synthesis of SAPO-34-DIPA to be an interesting method for preparing the MTO catalyst.

Acknowledgements

The authors would like to acknowledge the National Natural Science Foundation of China (Grant no. 21101150 and Grant no. 21103180) for the support of this project. Dong Fan acknowledges the financial support of DNL-Topsøe scholarship provided by Haldor Topsøe Corporation.

Notes and references

  1. B. M. Lok, C. A. Messina, R. L. Patton, R. T. Gajek, T. R. Cannan and E. M. Flanigen, J. Am. Chem. Soc., 1984, 106, 6092–6093 CrossRef CAS .
  2. B. M. Lok, C. A. Messina, R. L. Patton, R. T. Gajek, T. R. Cannan and E. M. Flanigen, US Pat. 4440871, 1984 .
  3. S. Wilson and P. Barger, Microporous Mesoporous Mater., 1999, 29, 117–126 CrossRef CAS .
  4. J. Liang, H. Y. Li, S. Zhao, W. G. Guo, R. H. Wang and M. L. Ying, Appl. Catal., 1990, 64, 31–40 CrossRef CAS .
  5. I. M. Dahl and S. Kolboe, J. Catal., 1994, 149, 458–464 CrossRef CAS .
  6. W. G. Song, H. Fu and J. F. Haw, J. Am. Chem. Soc., 2001, 123, 4749–4754 CrossRef CAS PubMed .
  7. W. G. Song, H. Fu and J. F. Haw, J. Phys. Chem. B, 2001, 105, 12839–12843 CrossRef CAS .
  8. W. G. Song, J. F. Haw, J. B. Nicholas and C. S. Heneghan, J. Am. Chem. Soc., 2000, 122, 10726–10727 CrossRef CAS .
  9. J. Z. Li, Y. X. Wei, J. R. Chen, P. Tian, X. Su, S. T. Xu, Y. Qi, Q. Y. Wang, Y. Zhou, Y. L. He and Z. M. Liu, J. Am. Chem. Soc., 2012, 134, 836–839 CrossRef CAS PubMed .
  10. http://www.syn.ac.cn/english/index.php.
  11. D. W. Lewis, C. R. A. Catlow and J. M. Thomas, Chem. Mater., 1996, 8, 1112–1118 CrossRef CAS .
  12. D. W. Lewis, C. M. Freeman and C. R. A. Catlow, J. Phys. Chem., 1995, 99, 11194–11202 CrossRef CAS .
  13. N. Rajic, D. Stojakovic, S. Hocevar and V. Kaucic, Zeolites, 1993, 13, 384–387 CrossRef CAS .
  14. E. Dumitriu, A. Azzouz, V. Hulea, D. Lutic and H. Kessler, Microporous Mater., 1997, 10, 1–12 CrossRef CAS .
  15. A. M. Prakash and S. Unnikrishnan, J. Chem. Soc., Faraday Trans., 1994, 90, 2291–2296 RSC .
  16. G. Liu, P. Tian, J. Li, D. Zhang, F. Zhou and Z. Liu, Microporous Mesoporous Mater., 2008, 111, 143–149 CrossRef CAS PubMed .
  17. G. Y. Liu, P. Tian, Y. Zhang, J. Z. Li, L. Xu, S. H. Meng and Z. M. Liu, Microporous Mesoporous Mater., 2008, 114, 416–423 CrossRef CAS PubMed .
  18. R. Vomscheid, M. Briend, M. J. Peltre, P. P. Man and D. Barthomeuf, J. Phys. Chem., 1994, 98, 9614–9618 CrossRef CAS .
  19. M. Briend, R. Vomscheid, M. J. Peltre, P. P. Man and D. Barthomeuf, J. Phys. Chem., 1995, 99, 8270–8276 CrossRef CAS .
  20. K. Y. Lee, H. J. Chae, S. Y. Jeong and G. Seo, Appl. Catal., A, 2009, 369, 60–66 CrossRef CAS PubMed .
  21. N. Nishiyama, M. Kawaguchi, Y. Hirota, D. Van Vu, Y. Egashira and K. Ueyama, Appl. Catal., A, 2009, 362, 193–199 CrossRef CAS PubMed .
  22. R. E. Morris and S. J. Weigel, Chem. Soc. Rev., 1997, 26, 309–317 RSC .
  23. Y. Jin, Q. Sun, G. Qi, C. Yang, J. Xu, F. Chen, X. Meng, F. Deng and F.-S. Xiao, Angew. Chem., 2013, 125, 9342–9345 CrossRef .
  24. Q. S. Huo and R. R. Xu, J. Chem. Soc., Chem. Commun., 1990, 783–784 CAS .
  25. A. K. Sinha and S. Seelan, Appl. Catal., A, 2004, 270, 245–252 CrossRef CAS PubMed .
  26. N. Venkatathri, Catal. Commun., 2006, 7, 773–777 CrossRef CAS PubMed .
  27. D. M. Bibby and M. P. Dale, Nature, 1985, 317, 157–158 CrossRef CAS .
  28. Q. H. Huo, R. R. Xu, S. G. Li, Z. G. Ma, J. M. Thomas, R. H. Jones and A. M. Chippindale, J. Chem. Soc., Chem. Commun., 1992, 875–876 RSC .
  29. R. H. Jones, J. M. Thomas, J. S. Chen, R. R. Xu, Q. S. Huo, S. G. Li, Z. G. Ma and A. M. Chippindale, J. Solid State Chem., 1993, 102, 204–208 CrossRef CAS .
  30. D. Fan, P. Tian, S. Xu, Q. Xia, X. Su, L. Zhang, Y. Zhang, Y. He and Z. Liu, J. Mater. Chem., 2012, 22, 6568 RSC .
  31. S. Ashtekar, S. V. V. Chilukuri and D. K. Chakrabarty, J. Phys. Chem., 1994, 98, 4878–4883 CrossRef CAS .
  32. L. Zhang, J. Yao, C. Zeng and N. Xu, Chem. Commun., 2003, 2232 RSC .
  33. K. G. Strohmaier and P. Murry, US Pat. 6835363B1, 2004 .
  34. H. O. Pastore, E. C. de Oliveira, G. B. Superti, G. Gatti and L. Marchese, J. Phys. Chem. C, 2007, 111, 3116–3129 CAS .
  35. L. Xu, A. P. Du, Y. X. Wei, Y. L. Wang, Z. X. Yu, Y. L. He, X. Z. Zhang and Z. M. Liu, Microporous Mesoporous Mater., 2008, 115, 332–337 CrossRef CAS PubMed .
  36. Synthesis for sample 4 and 9 have been repeated twice. Similar product yields are obtained as shown in Table 1.
  37. C. Doremieux-Morin, C. Martin, J.-M. Bregeault and J. Fraissard, Appl. Catal., 1991, 77, 149–161 CrossRef CAS .
  38. L. Zhang, J. Bates, D. H. Chen, H. Y. Nie and Y. N. Huang, J. Phys. Chem. C, 2011, 115, 22309–22319 CAS .
  39. B. Chen and Y. Huang, J. Phys. Chem. C, 2007, 111, 15236–15243 CAS .
  40. A. Izadbakhsh, F. Farhadi, F. Khorasheh, S. Sahebdelfar, M. Asadi and Y. Z. Feng, Appl. Catal., A, 2009, 364, 48–56 CrossRef CAS PubMed .
  41. U. Olsbye, S. Svelle, M. Bjrgen, P. Beato, T. V. W. Janssens, F. Joensen, S. Bordiga and K. P. Lillerud, Angew. Chem., Int. Ed., 2012, 51, 5810–5831 CrossRef CAS PubMed .
  42. L.-T. Yuen, S. I. Zones, T. V. Harris, E. J. Gallegos and A. Auroux, Microporous Mater., 1994, 2, 105–117 CrossRef CAS .
  43. J. S. Chen and J. M. Thomas, Catal. Lett., 1991, 11, 199–207 CrossRef CAS .

Footnote

Electronic supplementary information (ESI) available. See DOI: 10.1039/c3ta12829f

This journal is © The Royal Society of Chemistry 2013
--------------------------------------------------------------------------------