├── pdtrend ├── datasets │ ├── __init__.py │ ├── images │ │ ├── PDT_logo.png │ │ ├── spatial.png │ │ ├── detrended.png │ │ └── master_trends.png │ ├── lightcurves │ │ └── lc.pbz2 │ ├── base.py │ └── originals │ │ ├── misclib.py │ │ ├── clusterlib.py │ │ └── pdtrending.py ├── test │ ├── __init__.py │ ├── run.py │ └── run_with_missing_data.py ├── utils │ ├── __init__.py │ └── logger.py ├── __init__.py ├── filling_missing_data.py └── detrend.py ├── pdtrend.egg-info ├── dependency_links.txt ├── top_level.txt ├── requires.txt └── SOURCES.txt ├── MANIFEST.in ├── .gitignore ├── README.rst ├── LICENSE ├── setup.py └── README.md /pdtrend/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdtrend/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdtrend.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pdtrend.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pdtrend 2 | -------------------------------------------------------------------------------- /pdtrend/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'kim' 2 | -------------------------------------------------------------------------------- /pdtrend.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.10 2 | scikit-learn>=0.17 3 | scipy>=0.17 4 | matplotlib>=1.5 5 | -------------------------------------------------------------------------------- /pdtrend/datasets/images/PDT_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwkim78/pdtrend/HEAD/pdtrend/datasets/images/PDT_logo.png -------------------------------------------------------------------------------- /pdtrend/datasets/images/spatial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwkim78/pdtrend/HEAD/pdtrend/datasets/images/spatial.png -------------------------------------------------------------------------------- /pdtrend/datasets/lightcurves/lc.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwkim78/pdtrend/HEAD/pdtrend/datasets/lightcurves/lc.pbz2 -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pdt/datasets/lightcurves/* 2 | 3 | exclude pdt/datasets/images/* 4 | exclude pdt/datasets/originals/* 5 | -------------------------------------------------------------------------------- /pdtrend/datasets/images/detrended.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwkim78/pdtrend/HEAD/pdtrend/datasets/images/detrended.png -------------------------------------------------------------------------------- /pdtrend/datasets/images/master_trends.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwkim78/pdtrend/HEAD/pdtrend/datasets/images/master_trends.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/* 3 | 4 | *.pyc 5 | 6 | /pdtrend/test/outputs 7 | 8 | dist/* 9 | /pdtrend.egg-info/ 10 | 11 | pdtrend.egg-info/PKG-INFO 12 | .DS_Store 13 | -------------------------------------------------------------------------------- /pdtrend/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'kim' 2 | 3 | from pdtrend.filling_missing_data import FMdata 4 | from pdtrend.detrend import PDTrend 5 | from pdtrend.test.run import test 6 | 7 | from pdtrend.utils.logger import Logger 8 | from pdtrend.datasets.base import load_lightcurve_set 9 | 10 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PDT (Photometric DeTrending Algorithm Using Machine Learning) aims to remove systematic trends in the light curves. For details about the algorithm, see [Kim et al. 2009](http://adsabs.harvard.edu/abs/2009MNRAS.397..558K). In brief, PDT finds clusters of light curves that are highly correlated using machine learning, construct one master trend per cluster and detrend an individual light curve using the constructed master trends by minimizing residuals while constraining coefficients to be positive. For details, visit the GitHub repository (https://github.com/dwkim78/pdtrend) -------------------------------------------------------------------------------- /pdtrend.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | .gitignore 2 | LICENSE 3 | MANIFEST.in 4 | README.md 5 | README.rst 6 | setup.py 7 | pdtrend/__init__.py 8 | pdtrend/detrend.py 9 | pdtrend/filling_missing_data.py 10 | pdtrend.egg-info/PKG-INFO 11 | pdtrend.egg-info/SOURCES.txt 12 | pdtrend.egg-info/dependency_links.txt 13 | pdtrend.egg-info/requires.txt 14 | pdtrend.egg-info/top_level.txt 15 | pdtrend/datasets/__init__.py 16 | pdtrend/datasets/base.py 17 | pdtrend/datasets/images/PDT_logo.png 18 | pdtrend/datasets/images/detrended.png 19 | pdtrend/datasets/images/master_trends.png 20 | pdtrend/datasets/images/spatial.png 21 | pdtrend/datasets/lightcurves/lc.pbz2 22 | pdtrend/datasets/originals/clusterlib.py 23 | pdtrend/datasets/originals/misclib.py 24 | pdtrend/datasets/originals/pdtrending.py 25 | pdtrend/test/__init__.py 26 | pdtrend/test/run.py 27 | pdtrend/test/run_with_missing_data.py 28 | pdtrend/utils/__init__.py 29 | pdtrend/utils/logger.py -------------------------------------------------------------------------------- /pdtrend/datasets/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base IO code for all datasets 3 | """ 4 | 5 | import sys 6 | 7 | from os.path import dirname 8 | from os.path import join 9 | 10 | 11 | def load_lightcurve_set(): 12 | """ 13 | Return the set of light curves for testing pdtrend. 14 | 15 | Returns 16 | ------- 17 | lcs : numpy.ndarray 18 | An array of light curves. 19 | """ 20 | 21 | import bz2 22 | 23 | try: 24 | import cPickle as pickle 25 | except: 26 | import pickle 27 | 28 | module_path = dirname(__file__) 29 | 30 | # The light curves are bzipped and pickled. 31 | file_path = join(module_path, 'lightcurves/lc.pbz2') 32 | # For Python 3. 33 | if sys.version_info.major >= 3: 34 | lcs = pickle.load(bz2.BZ2File(file_path, 'r'), encoding='bytes') 35 | # For Python 2. 36 | else: 37 | lcs = pickle.load(bz2.BZ2File(file_path, 'r')) 38 | 39 | return lcs 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Dae-Won Kim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | def readme(): 4 | with open('README.rst') as f: 5 | return f.read() 6 | 7 | setup( 8 | name='pdtrend', 9 | version='0.2.3', 10 | description='Photometric Detrending Algorithm', 11 | long_description=readme(), 12 | platforms=['any'], 13 | packages=find_packages(), 14 | include_package_data=True, 15 | url='https://github.com/dwkim78/pdtrend', 16 | license='MIT', 17 | author='Dae-Won Kim', 18 | author_email='dwkim78@gmail.com', 19 | install_requires=['numpy>=1.10', 'scikit-learn>=0.17', 'scipy>=0.17', 20 | 'matplotlib>=1.5'], 21 | keywords=['astronomy', 'light curves', 'time series', 22 | 'machine learning', 'trend', 'detrend'], 23 | classifiers=[ 24 | 'Development Status :: 2 - Pre-Alpha', 25 | 'Intended Audience :: Science/Research', 26 | 'License :: OSI Approved :: MIT License', 27 | 'Natural Language :: English', 28 | 'Operating System :: OS Independent', 29 | 'Programming Language :: Python :: 2.7', 30 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 31 | 'Topic :: Scientific/Engineering :: Astronomy' 32 | ] 33 | ) 34 | -------------------------------------------------------------------------------- /pdtrend/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class Logger(): 5 | """ 6 | Create logger instance for writing to both console and a local file. 7 | 8 | Parameters 9 | ---------- 10 | filepath : str, optional 11 | Specify a log filename with the absolute path. 12 | If not given, no output is written to a file. 13 | """ 14 | def __init__(self, filepath=None): 15 | # create logger. 16 | logger = logging.getLogger('pdtrend') 17 | logger.setLevel(logging.DEBUG) 18 | 19 | # create formatter and add it to the handlers 20 | formatter = logging.Formatter('%(asctime)s %(levelname)s - %(message)s') 21 | #datefmt='%Y/%m/%d %H:%M:%S') 22 | 23 | # create file handler which logs even debug messages. 24 | if filepath and filepath[0] == '/': 25 | fh = logging.FileHandler(filepath, 'w') 26 | fh.setLevel(logging.DEBUG) 27 | fh.setFormatter(formatter) 28 | logger.addHandler(fh) 29 | 30 | # create console handler with a higher log level. 31 | ch = logging.StreamHandler() 32 | ch.setLevel(logging.INFO) 33 | ch.setFormatter(formatter) 34 | 35 | # add the handlers to logger 36 | logger.addHandler(ch) 37 | 38 | self.logger = logger 39 | 40 | def getLogger(self): 41 | """Return a logger instance.""" 42 | 43 | return self.logger 44 | 45 | if __name__ == '__main__': 46 | 47 | logger = Logger().getLogger() 48 | 49 | logger.debug('debug message') 50 | logger.info('info message') 51 | logger.warn('warn message') 52 | logger.error('error message') 53 | logger.critical('critical message') -------------------------------------------------------------------------------- /pdtrend/datasets/originals/misclib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Small function set for various usages. 5 | ''' 6 | 7 | from numpy import * 8 | 9 | def sigma_clipping(x, sigma=3., iteration=1): 10 | ''' 11 | Sigma clipping of x array. Replaced the values over the 12 | sigma limit with the median values of x array. 13 | 14 | x : 15 | Array of elements. 16 | 17 | sigma : 18 | Threshold. 19 | 20 | iteration : 21 | Number of iteration of sigma clipping. 1~3 is desirable. 22 | 23 | return : 24 | Sigma clipped array of x 25 | ''' 26 | 27 | x = array(x) 28 | xx = x.copy() 29 | for i in range(iteration): 30 | median_val = median(x) 31 | std_val = std(x) 32 | xx[where(fabs(x - median_val) > std_val * sigma)] = median_val 33 | return xx 34 | 35 | def cmp_length(l, m): 36 | ''' 37 | Compare the length of two input elements. 38 | For sort function in length. 39 | ''' 40 | return len(l) - len(m) 41 | 42 | def bin_lc(x, window_size=10, axis=None): 43 | '''Binning array and return new shortened array 44 | 45 | x : 46 | original series. 47 | 48 | window_size = 10 : 49 | default window size. 50 | 51 | axis = None : 52 | index of x 53 | 54 | return : 55 | list of [bin_lc, std_lc, new_axis if axis != None] 56 | ''' 57 | x = array(x) 58 | new_lc = [] 59 | std_lc = [] 60 | for i in range(int(len(x) / window_size) + 1): 61 | start_index = i * window_size 62 | end_index = (i + 1) * window_size 63 | if end_index >= len(x): 64 | break 65 | new_lc.append(mean(x[start_index:end_index])) 66 | std_lc.append(std(x[start_index:end_index])) 67 | 68 | if axis != None: 69 | new_axis = [] 70 | for i in range(int(len(axis) / window_size) + 1): 71 | start_index = i * window_size 72 | end_index = (i + 1) * window_size 73 | if end_index >= len(axis): 74 | break 75 | new_axis.append(mean(axis[start_index:end_index])) 76 | 77 | if axis != None: 78 | return [array(new_lc), array(std_lc), array(new_axis)] 79 | else: 80 | return [array(new_lc), array(std_lc)] 81 | 82 | if __name__=='__main__': 83 | print 'Function set for various usages' 84 | -------------------------------------------------------------------------------- /pdtrend/test/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pylab as pl 5 | 6 | from pdtrend.detrend import PDTrend 7 | from pdtrend.datasets.base import load_lightcurve_set 8 | from pdtrend.utils.logger import Logger 9 | 10 | 11 | def test(): 12 | logger = Logger().getLogger() 13 | 14 | logger.info('Loading the light curve set.') 15 | lcs = load_lightcurve_set() 16 | logger.info('\tThe number of light curves is %d.' % len(lcs)) 17 | 18 | np.random.seed(1024) 19 | # Create random weights. 20 | #weights = np.random.rand(lcs.shape[0]) 21 | # Same weights. 22 | weights = np.ones(lcs.shape[0]) 23 | 24 | # X and Y coordinates 25 | xy_coords = np.random.rand(lcs.shape[0], 2) * 1000. 26 | 27 | logger.info('Initializing pdtrend.') 28 | pdt = PDTrend(lcs, weights=weights, xy_coords=xy_coords, 29 | n_min_member=10, dist_cut=0.45) 30 | 31 | # We can run all the following routines using "pdtrend.run()", 32 | # but, here we do it individually to print log messages. 33 | logger.info('Calculating the distance matrix.') 34 | pdt._calculate_distance_matrix() 35 | logger.info('Searching for clusters using Birch.') 36 | pdt._find_clusters() 37 | logger.info('Filtering the clusters.') 38 | pdt._filter_clusters() 39 | logger.info('Building master trends.') 40 | pdt._build_master_trends() 41 | 42 | logger.info('Detrending a light curve using the master trends.') 43 | detrended = pdt.detrend(lcs[1]) 44 | 45 | logger.info('Plotting results.') 46 | # Creating an output folder. 47 | output_path = './outputs/' 48 | if not os.path.exists(output_path): 49 | os.makedirs(output_path) 50 | 51 | # Plotting spatial distribution. 52 | pdt.plot_spatial('%s/spatial.png' % output_path) 53 | 54 | # Plotting the master trends. 55 | pl.figure(figsize=(10, 4)) 56 | for i in range(len(pdt.master_trends)): 57 | pl.subplot(len(pdt.master_trends), 1, i + 1) 58 | pl.plot(pdt.master_trends[i], 'b.', label='Master trend %d' % (i + 1)) 59 | pl.ylabel('Normalized flux') 60 | pl.xlabel('Time index') 61 | pl.legend(numpoints=1, loc='lower right') 62 | pl.grid() 63 | pl.tight_layout() 64 | pl.savefig('%s/master_trends.png' % output_path) 65 | 66 | # Plotting a detrended result of one light curve. 67 | pl.figure(figsize=(14, 8)) 68 | pl.subplot(211) 69 | pl.plot(lcs[1], 'b.', label='Raw') 70 | pl.text(8955, 70000, r'$\sigma$: %.1f' % np.std(lcs[1]), fontsize=15, 71 | va='center', bbox=dict(boxstyle='round', ec='w', fc='g', alpha=0.3)) 72 | pl.ylabel('Flux') 73 | pl.grid() 74 | pl.legend(numpoints=1) 75 | 76 | pl.subplot(212) 77 | pl.plot(detrended, 'b.', label='Detrended') 78 | pl.text(8955, 70000, r'$\sigma$: %.1f' % np.std(detrended), fontsize=15, 79 | va='center', bbox=dict(boxstyle='round', ec='w', fc='g', alpha=0.3)) 80 | pl.ylabel('Flux') 81 | pl.xlabel('Time index') 82 | pl.grid() 83 | pl.legend(numpoints=1) 84 | pl.savefig('%s/detrended.png' % output_path) 85 | 86 | logger.info('Done.') 87 | logger.handlers = [] 88 | 89 | 90 | if __name__ == '__main__': 91 | test() -------------------------------------------------------------------------------- /pdtrend/test/run_with_missing_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pylab as pl 5 | 6 | from pdtrend.filling_missing_data import FMdata 7 | from pdtrend.detrend import PDTrend 8 | from pdtrend.datasets.base import load_lightcurve_set 9 | from pdtrend.utils.logger import Logger 10 | 11 | 12 | def test_with_missing_data(): 13 | logger = Logger().getLogger() 14 | 15 | logger.info('Loading the light curve set.') 16 | lcs = load_lightcurve_set() 17 | logger.info('\tThe number of light curves is %d.' % len(lcs)) 18 | 19 | # Create time information. 20 | times = np.ones(lcs.shape) * np.arange(lcs.shape[1]) 21 | 22 | logger.info('Make missing data points.') 23 | # The maximum ratio of missing data points. 24 | np.random.seed(2) 25 | missing_ratio = 1.0 26 | lcs_new = [] 27 | times_new = [] 28 | for i in range(len(lcs)): 29 | n_missing = int(np.random.rand() * missing_ratio * len(lcs[i])) 30 | index = np.arange(0, len(lcs[i])) 31 | np.random.shuffle(index) 32 | keep_index = index[n_missing:] 33 | keep_index.sort() 34 | 35 | lcs_new.append(lcs[i][keep_index]) 36 | # Make wrong time information for test. 37 | if i == 10: 38 | times_new.append(times[i][keep_index[:-2]]) 39 | else: 40 | times_new.append(times[i][keep_index]) 41 | 42 | logger.info('Fill missing data points.') 43 | # Create random weights. 44 | # weights = np.random.rand(lcs.shape[0]) 45 | # Same weights. 46 | weights = np.ones(lcs.shape[0]) 47 | 48 | # Fill missing values. 49 | fmd = FMdata(lcs_new, times_new) 50 | results = fmd.run() 51 | 52 | # The output, "results", contains the following three items. 53 | lcs = results['lcs'] 54 | epoch = results['epoch'] 55 | indices = results['indices'] 56 | 57 | # X and Y coordinates, randomly generated. 58 | xy_coords = np.random.rand(lcs.shape[0], 2) * 1000. 59 | 60 | logger.info('Initializing pdtrend.') 61 | pdt = PDTrend(lcs, weights=weights[indices], xy_coords=xy_coords, 62 | n_min_member=10, dist_cut=0.45) 63 | 64 | # We can run all the following protected routines using "pdtrend.run()", 65 | # but, here we do it individually to print log messages. 66 | logger.info('Calculating the distance matrix.') 67 | pdt._calculate_distance_matrix() 68 | logger.info('Searching for clusters using Birch.') 69 | pdt._find_clusters() 70 | logger.info('Filtering the clusters.') 71 | pdt._filter_clusters() 72 | logger.info('Building master trends.') 73 | pdt._build_master_trends() 74 | logger.info('Detrending a light curve using the master trends.') 75 | detrended = pdt.detrend(lcs[1]) 76 | 77 | logger.info('Plotting results.') 78 | # Creating an output folder. 79 | output_path = './outputs/' 80 | if not os.path.exists(output_path): 81 | os.makedirs(output_path) 82 | 83 | # Plotting spatial distribution. 84 | pdt.plot_spatial('%s/spatial.png' % output_path) 85 | 86 | # Plotting the master trends. 87 | pl.figure(figsize=(10, 4)) 88 | for i in range(len(pdt.master_trends)): 89 | pl.subplot(len(pdt.master_trends), 1, i + 1) 90 | pl.plot(epoch, pdt.master_trends[i], 'b.', label='Master trend %d' % (i + 1)) 91 | pl.ylabel('Normalized flux') 92 | pl.xlabel('Time index') 93 | pl.legend(numpoints=1, loc='lower right') 94 | pl.grid() 95 | pl.tight_layout() 96 | pl.savefig('%s/master_trends.png' % output_path) 97 | 98 | # Plotting a detrended result of one light curve. 99 | pl.figure(figsize=(14, 8)) 100 | pl.subplot(211) 101 | pl.plot(epoch, lcs[1], 'b.', label='Raw') 102 | pl.text(8955, 62200, r'$\sigma$: %.1f' % np.std(lcs[1]), fontsize=15, 103 | va='center', bbox=dict(boxstyle='round', ec='w', fc='g', alpha=0.3)) 104 | pl.ylabel('Flux') 105 | pl.grid() 106 | pl.legend(numpoints=1) 107 | 108 | pl.subplot(212) 109 | pl.plot(epoch, detrended, 'b.', label='Detrended') 110 | pl.text(8955, 58300, r'$\sigma$: %.1f' % np.std(detrended), fontsize=15, 111 | va='center', bbox=dict(boxstyle='round', ec='w', fc='g', alpha=0.3)) 112 | pl.ylabel('Flux') 113 | pl.xlabel('Time index') 114 | pl.grid() 115 | pl.legend(numpoints=1) 116 | pl.savefig('%s/detrended.png' % output_path) 117 | 118 | logger.info('Done.') 119 | logger.handlers = [] 120 | 121 | 122 | if __name__ == '__main__': 123 | test_with_missing_data() -------------------------------------------------------------------------------- /pdtrend/filling_missing_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import warnings 3 | 4 | from scipy.interpolate import UnivariateSpline 5 | 6 | 7 | class FMdata: 8 | """ 9 | Filling missing values using linear interpolation. 10 | 11 | Extrapolation should be avoided, and thus we sync 12 | each light curve based on the latest start epoch and the earliest end 13 | epoch among the all light curves. 14 | 15 | Parameters 16 | ---------- 17 | lcs : array_like 18 | An array containing a set of light curves. The number of data points 19 | in each light curve does not need to be identical. 20 | times : array_like 21 | An array of observation epochs of the corresponding light curve. 22 | n_min_data : int, optional 23 | The minimum number of data points in each light curve. 24 | If fewer than this, the light curve will be discarded. 25 | """ 26 | def __init__(self, lcs, times, n_min_data=100): 27 | # Type check. 28 | if type(lcs) != np.ndarray: 29 | lcs = np.array(lcs) 30 | if type(times) != np.ndarray: 31 | times = np.array(times) 32 | 33 | # Dimension check. 34 | if lcs.shape[0] != times.shape[0]: 35 | raise RuntimeError('The number of light curves and ' + 36 | 'the number of times do not match.') 37 | 38 | # Discard light curves having fewer data points than "n_min_data". 39 | keep_index = [] 40 | for i in range(len(lcs)): 41 | if len(lcs[i]) >= n_min_data: 42 | keep_index.append(i) 43 | 44 | # Initialize. 45 | self.lcs = lcs[keep_index] 46 | self.times = times[keep_index] 47 | 48 | def run(self): 49 | """ 50 | Fill missing values and returns results. 51 | 52 | Returns 53 | ------- 54 | results : dict 55 | A Python dictionary containing three items such as 56 | missing-value filled light curves, 57 | synced epoch, and the indices of corresponding 58 | raw light curves. 59 | """ 60 | # Sync times. 61 | self._sync_time() 62 | 63 | # Fill missing values. 64 | self._fill_missing_values() 65 | 66 | # Returned output is a Python dictionary containing three items. 67 | results = {'lcs': self.filled_lcs, 'epoch': self.synced_epoch, 68 | 'indices': self.indices} 69 | return results 70 | 71 | def _sync_time(self): 72 | """ 73 | Walk through times of all light curves and create one-dimensional 74 | list of times that will be used to fill missing values for every light 75 | curves. In order to prevent extrapolation, we chose the latest start 76 | epoch and the earliest end epoch as the new epoch range. 77 | """ 78 | # Get all unique epochs and find 79 | # the latest start epoch and the earliest end epoch. 80 | all_epoch = [] 81 | latest_start_epoch = -np.inf 82 | earliest_end_epoch = np.inf 83 | for t in self.times: 84 | all_epoch = np.hstack([all_epoch, t]) 85 | if t[0] > latest_start_epoch: 86 | latest_start_epoch = t[0] 87 | if t[-1] < earliest_end_epoch: 88 | earliest_end_epoch = t[-1] 89 | 90 | all_epoch = np.unique(np.ravel(all_epoch)) 91 | all_epoch.sort() 92 | 93 | # Cut epoch. 94 | start_index = np.searchsorted(all_epoch, latest_start_epoch) 95 | end_index = np.searchsorted(all_epoch, earliest_end_epoch) + 1 96 | epoch = all_epoch[start_index:end_index] 97 | 98 | self.synced_epoch = epoch 99 | 100 | def _fill_missing_values(self): 101 | """Fill missing values for each light curve.""" 102 | filled_lcs = [] 103 | filled_indices = [] 104 | for i in range(len(self.lcs)): 105 | # Check if the length is same. Print warning but not break the loop. 106 | # Thus, need to discard the corresponding weight as well. 107 | if len(self.times[i]) != len(self.lcs[i]): 108 | warnings.warn(('The number of data points of the %dth ' + 109 | 'light curve is not matched with the one ' + 110 | 'of the list of times, and thus is discarded.') 111 | % (i + 1)) 112 | continue 113 | 114 | # Linear spline fitting without smoothing. Thus the fitted line 115 | # follows the exact values of the given input data. 116 | spl = UnivariateSpline(self.times[i], self.lcs[i], k=1., s=0.) 117 | filled_lc = spl(self.synced_epoch) 118 | 119 | # Add the filled lc to a list. 120 | filled_lcs.append(filled_lc) 121 | # Indices for the new list. 122 | filled_indices.append(i) 123 | 124 | self.filled_lcs = np.array(filled_lcs) 125 | self.indices = np.array(filled_indices) 126 | -------------------------------------------------------------------------------- /pdtrend/detrend.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.cluster import Birch 4 | from scipy.optimize import least_squares 5 | 6 | 7 | class PDTrend: 8 | """ 9 | Detrending systematic trends in a set of light curves. 10 | 11 | PDTrend determines master trends using Pearson correlation coefficients 12 | between each light curve and using machine learning, and then provide a 13 | function to detrend each light curve using the determined master trends. 14 | For details, see Kim et al. 2009 or visit the GitHub repository 15 | (https://goo.gl/uRAXfr). 16 | 17 | Parameters 18 | ---------- 19 | lcs : array_like 20 | A list of light curves. It must be the shape of N x M, 21 | where N is the number of light curves and M is the number of 22 | data points. Note that M must be same for all N light curves. 23 | pdtrend assumes that these M data points are synced in time. 24 | Thus pdtrend does not use any time information. 25 | weights : array_like, optional 26 | A list of weights for the corresponding light curves. 27 | It is used only when constructing master trends. It must contain N 28 | elements. N is the number of the light curves. See the "lcs" parameter. 29 | Default is None, so the identical weights for all light curves. 30 | xy_coords : array_like, optional 31 | X and Y coordinates of stars of the light curves. 32 | It must contain Nx2 elements, where N is the number of the light curves. 33 | If the coordinates are given, the function "plot_spatial" can be called 34 | after constructing master trends, which will plot the spatial 35 | distribution of the master trends. 36 | n_min_member : int, optional 37 | The minimum number of members in each cluster. Default is 10. 38 | dist_cut : float, optional 39 | Distance cut to filter clusters found using the Birch clustering 40 | algorithm. If the median distance 41 | between members in a cluster is larger than the cut, 42 | the cluster is discarded. Must be between [0, 1]. Default is 0.45. 43 | branching_factor : int, optional 44 | Branching factor for the Birch clustering. Default is 50. 45 | threshold : float, optional 46 | Threshold for the Birch clustering. Default is 0.5. 47 | """ 48 | def __init__(self, lcs, weights=None, xy_coords=None, 49 | n_min_member=10, dist_cut=0.45, 50 | branching_factor=50, threshold=0.5): 51 | # Convert the light curve set to numpy array. 52 | if type(lcs) != np.ndarray: 53 | lcs = np.array(lcs) 54 | 55 | # Sanity check. 56 | if len(lcs.shape) != 2: 57 | raise RuntimeError('lcs must be a 2-dimensional array.') 58 | 59 | if lcs.shape[0] < n_min_member: 60 | raise RuntimeError('The number of light curves in lcs ' + 61 | 'is fewer than the n_min_member.') 62 | if weights is not None: 63 | if type(weights) != np.ndarray: 64 | weights = np.array(weights) 65 | 66 | if lcs.shape[0] != weights.shape[0]: 67 | raise RuntimeError('Shapes of lcs and weights do not match.') 68 | else: 69 | # Same weights for the all light curves. 70 | weights = np.ones(lcs.shape[0]) 71 | 72 | if xy_coords is not None: 73 | if type(xy_coords) != np.ndarray: 74 | xy_coords = np.array(xy_coords) 75 | 76 | if lcs.shape[0] != xy_coords.shape[0]: 77 | raise RuntimeError('Shapes of lcs and xy_coords do not match.') 78 | 79 | # Set parameters. 80 | self.lcs = lcs 81 | self.weights = weights 82 | self.xy_coords = xy_coords 83 | self.n_min_member = n_min_member 84 | self.dist_cut = dist_cut 85 | 86 | # Initialize. 87 | self.corr_matrix = None 88 | self.dist_matrix = None 89 | self.birch = None 90 | self.branching_factor = branching_factor 91 | self.threshold = threshold 92 | 93 | def _calculate_distance_matrix(self): 94 | """ 95 | Calculate a distance matrix, which is defined as: 96 | (1. - correlation_matrix) / 2. 97 | """ 98 | corr_matrix = np.corrcoef(self.lcs) 99 | dist_matrix = (1. - corr_matrix) / 2. 100 | 101 | self.corr_matrix = corr_matrix 102 | self.dist_matrix = dist_matrix 103 | 104 | def _find_clusters(self): 105 | """Find clusters using Birch and the distance matrix.""" 106 | # TODO: Need to test with different threshold. 107 | # Need to test with multiple dataset having trends. 108 | # Branching factor is fine. 109 | birch = Birch(branching_factor=self.branching_factor, 110 | threshold=self.threshold, 111 | n_clusters=None).fit(self.dist_matrix) 112 | 113 | self.birch = birch 114 | 115 | def _filter_clusters(self): 116 | """ 117 | Discard a cluster if 1) it has less than "n_min_member" members, or 118 | 2) median distance between each member is larger than "dist_cut". 119 | """ 120 | unique_labels = set(self.birch.labels_) 121 | _filtered_labels = [] 122 | 123 | for label in unique_labels: 124 | index = [i for i in range(len(self.birch.labels_)) if 125 | self.birch.labels_[i] == label] 126 | # The number of members in the given cluster. 127 | if len(index) < self.n_min_member: 128 | continue 129 | 130 | dist_list = [] 131 | for i in range(len(index) - 1): 132 | for j in range(i + 1, len(index)): 133 | dist_list.append(self.dist_matrix[index[i], index[j]]) 134 | 135 | # Median distance check. 136 | if np.median(dist_list) <= self.dist_cut: 137 | _filtered_labels.append(label) 138 | 139 | self._filtered_labels = _filtered_labels 140 | 141 | # Check how many clusters are left. 142 | if len(self._filtered_labels) == 0: 143 | raise RuntimeWarning( 144 | 'No clusters were found. ' + 145 | 'Adjust input parameters and try again. ' + 146 | 'For instance, decrease "n_min_member" or ' + 147 | 'increase "dist_cut". For details, ' + 148 | 'visit https://github.com/dwkim78/pdtrend' 149 | ) 150 | 151 | def _build_master_trends(self): 152 | """Build master trends using the filtered clusters.""" 153 | master_trends_indices = [] 154 | master_trends = [] 155 | for label in self._filtered_labels: 156 | index = [i for i in range(len(self.birch.labels_)) if 157 | self.birch.labels_[i] == label] 158 | master_trends_indices.append(index) 159 | 160 | trends = [] 161 | weights_sum = 0 162 | for i in range(len(index)): 163 | # Normalization. 164 | med_lc = np.median(self.lcs[index[i]]) 165 | normed = (self.lcs[index[i]] - med_lc) / med_lc 166 | 167 | # Weights. 168 | weights = self.weights[index[i]] 169 | normed *= weights 170 | weights_sum += weights 171 | 172 | trends.append(normed) 173 | 174 | # Construct a master trends using the normalized and weighted 175 | # light curves from the above for loop. 176 | master_trends.append(np.sum(trends, axis=0) / weights_sum) 177 | 178 | self.master_trends_indices = master_trends_indices 179 | self.master_trends = master_trends 180 | 181 | def run(self): 182 | """Run pdtrend pipeline.""" 183 | if self.corr_matrix is None or self.dist_matrix is None: 184 | self._calculate_distance_matrix() 185 | self._find_clusters() 186 | else: 187 | # For the safety, just in case. 188 | # If corr_matrix and dist_matrix is not None, 189 | # birch must be not None either. 190 | if self.birch is None: 191 | self._find_clusters() 192 | 193 | self._filter_clusters() 194 | self._build_master_trends() 195 | 196 | def _func_trends(self, p, x): 197 | """Return sum of the trends.""" 198 | return np.sum(x * p.reshape(len(p), 1), axis=0) 199 | 200 | def _residuals(self, p, x, y): 201 | """Return residual between sum of the trends and a light curve.""" 202 | return y - self._func_trends(p, x) 203 | 204 | def detrend(self, lc): 205 | """Detrend a light curves using the constructed master trends.""" 206 | 207 | # Convert the light curve set to numpy array. 208 | if type(lc) != np.ndarray: 209 | lc = np.array(lc) 210 | 211 | # Normalize. 212 | med_lc = np.median(lc) 213 | raw = (lc - med_lc) / med_lc 214 | 215 | # Initial guess. 216 | p0 = np.ones(len(self.master_trends)) 217 | # Bounds in [0, infinite] 218 | p1 = least_squares(self._residuals, p0, args=(self.master_trends, raw), 219 | bounds=[0, np.inf]) 220 | p1 = p1['x'] 221 | 222 | detrended = raw - self._func_trends(p1, self.master_trends) 223 | 224 | # Scale back to the original flux. 225 | detrended = detrended * med_lc + med_lc 226 | 227 | return detrended 228 | 229 | def plot_spatial(self, filename='spatial.png'): 230 | """ 231 | Plot a spatial distribution of the constructed master trends. 232 | 233 | Parameters 234 | ---------- 235 | filename : str, optional 236 | A png filename including the path. For example, 237 | "./outputs/spatial.png". Default is "spatial.png" 238 | """ 239 | if self.xy_coords is None: 240 | raise RuntimeError('No x and y coordinates are given.') 241 | 242 | import pylab as pl 243 | 244 | pl.figure(figsize=(12, 12)) 245 | pl.title('Spatial distribution of the constructed master trends') 246 | 247 | colors = 'bgrkmc' 248 | marks = 's^.+x*' 249 | for i in range(len(self.master_trends_indices)): 250 | indices = self.master_trends_indices[i] 251 | pl.plot(self.xy_coords[indices][:, 0], 252 | self.xy_coords[indices][:, 1], 253 | marker=marks[int(i / len(colors)) % len(marks)], 254 | color=colors[i % len(colors)], ls='None', 255 | label='Master trend %d: %d light curves' % 256 | (i + 1, len(self.master_trends_indices[i]))) 257 | pl.xlabel('X coordinate') 258 | pl.ylabel('Y coordinate') 259 | pl.legend(numpoints=1) 260 | pl.savefig(filename) 261 | -------------------------------------------------------------------------------- /pdtrend/datasets/originals/clusterlib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Libraries for PDtrending algorithm. The major part of the libraries 5 | consist of clustering routine. 6 | ''' 7 | 8 | import sys 9 | import math 10 | from numpy import * 11 | 12 | import rpy 13 | from rpy import r 14 | 15 | from misclib import * 16 | 17 | sys.path.append('./') 18 | 19 | def Pycluster_to_hcluster(tree): 20 | ''' 21 | Transform tree structure of Pycluster to hcluster to use 22 | dendrogram function of hcluster which plots dendrogram. 23 | 24 | tree : 25 | Tree from Pycluster. 26 | 27 | return : 28 | hcluster tree. 29 | ''' 30 | 31 | hcluster_tree = [] 32 | len_tree = len(tree) + 1 33 | i = 0 34 | for node in tree: 35 | hcluster_node = [] 36 | #make new node for hcluster 37 | node_left = node.left 38 | node_right = node.right 39 | #modify index of Pycluster for hcluster. 40 | if node_left < 0: 41 | node_left = (node_left + 1) * -1 + len_tree 42 | if node_right < 0: 43 | node_right = (node_right + 1) * -1 + len_tree 44 | 45 | #sort 46 | if node_left > node_right: 47 | dummy = node_right 48 | node_right = node_left 49 | node_left = dummy 50 | 51 | #count number of whole elements below each node. 52 | s_group = [] 53 | simplify_group(find_group_with_node_index(tree, i), s_group) 54 | 55 | hcluster_node.append([node_left, node_right, node.distance, len(s_group)]) 56 | hcluster_tree.append(hcluster_node[0]) 57 | i += 1 58 | hcluster_tree = array(hcluster_tree) 59 | #print hcluster_tree 60 | 61 | return hcluster_tree 62 | 63 | def remove_small(groups, size=9): 64 | ''' 65 | Remove groups smaller than size. 66 | 67 | groups : 68 | List of group. 69 | 70 | size : 71 | Minimum size of group. 72 | ''' 73 | 74 | new_groups = [] 75 | 76 | for group in groups: 77 | if len(group) > size: 78 | new_groups.append(group) 79 | 80 | return new_groups 81 | 82 | def find_group_with_distance(tree, distance): 83 | ''' 84 | Find group from tree based on the index. 85 | 86 | tree : 87 | Tree structure returned from Pycluster module 88 | 89 | distance : 90 | Maximum distance within group. 91 | 92 | return : 93 | Multiple groups within the distance. 94 | ''' 95 | 96 | groups = [] 97 | 98 | #get node within the distance 99 | node_list = [] 100 | for i in range(len(tree)): 101 | if tree[i].distance > distance: 102 | break 103 | node_list.append(i) 104 | 105 | #make group. 106 | temp_groups = [] 107 | for node_index in node_list: 108 | group = find_group_with_node_index(tree, node_index) 109 | left_group = [] 110 | right_group = [] 111 | simplify_group(group[0], left_group) 112 | simplify_group(group[1], right_group) 113 | group = left_group + right_group 114 | 115 | temp_groups.append(group) 116 | 117 | #check overlapped group and remove. 118 | groups = remove_subset_cluster(temp_groups) 119 | 120 | return groups 121 | 122 | def find_group_with_node_index(tree, index): 123 | ''' 124 | Find group from tree based on the index of node. 125 | 126 | tree : 127 | Tree structure returned from Pycluster module 128 | 129 | index : 130 | Real index of tree. 131 | 132 | return : 133 | Two groups divided by index. 134 | ''' 135 | 136 | group = [] 137 | node = tree[index] 138 | eles = [node.left, node.right] 139 | for ele in eles: 140 | if ele >= 0: 141 | group.append([ele]) 142 | else: 143 | group.append(find_group_with_link_index(tree, ele)) 144 | 145 | return group 146 | 147 | def find_group_with_link_index(tree, index): 148 | ''' 149 | Find group from tree based on the index of link. 150 | Value of link is smaller than -1. -1 means the element 151 | is linked with 0th node. -2 means it is linked with 1th node and so on. 152 | 153 | tree : 154 | Tree structure returned from Pycluster module 155 | 156 | index : 157 | Linkage index less than -1. 158 | 159 | return : 160 | Two groups divided by index. 161 | ''' 162 | 163 | group = [] 164 | node = tree[index * -1 - 1] 165 | eles = [node.left, node.right] 166 | for ele in eles: 167 | if ele >= 0: 168 | group.append([ele]) 169 | else: 170 | group.append(find_group_with_link_index(tree, ele)) 171 | 172 | return group 173 | 174 | 175 | def simplify_group(group, s_group): 176 | ''' 177 | Unify multiple lists inside group into one list. 178 | 179 | group : 180 | Looks like [[6, 4], 3] 181 | 182 | s_group : 183 | simplified group looks like [6, 4, 3] 184 | 185 | return : 186 | None 187 | ''' 188 | 189 | if isinstance(group, int): 190 | s_group.append(group) 191 | return 192 | else: 193 | for ele in group: 194 | simplify_group(ele, s_group) 195 | 196 | def find_group_DW(tree, dist_matrix, mes=0, seed_max=0, l_significance=0.1): 197 | ''' 198 | For more details, see the Kim et al. 2008. 199 | 200 | tree : 201 | Tree structure returned from Pycluster module. 202 | 203 | dist_matrix : 204 | Distance matrix (= 1. - correlation matrix) 205 | 206 | mes = 0 : 207 | Total number of measurement of each light curve. 208 | 209 | seed_max = 0 : 210 | To get more tighter seed. 1 ~ 10 are good values. '10' gets more tighter clusters than '1'. 211 | 212 | return : 213 | List of clusters. 214 | ''' 215 | 216 | r.library('nortest') 217 | 218 | clusters = [] 219 | #print tree, len(tree) 220 | density_list = [] 221 | for i in range(len(dist_matrix) - 1): 222 | for j in range(i + 1, len(dist_matrix)): 223 | density_list.append(dist_matrix[i][j]) 224 | density_list_clip = sigma_clipping(density_list, sigma=3.) 225 | overall_density = (max(density_list_clip) - min(density_list_clip)) / len(dist_matrix) 226 | #print overall_density, mean(density_list_clip), std(density_list_clip) 227 | 228 | #get highly correlated pair of elements. 229 | initial_seed = [] 230 | for i in range(len(tree)): 231 | #both left and right element has to be star. not a link to other cluster. 232 | if tree[i].left >= 0 and tree[i].right >= 0: 233 | #to get more tight elements. 234 | if dist_matrix[tree[i].left][tree[i].right] <= median(density_list_clip) / seed_max: 235 | if mes == 0: 236 | initial_seed.append(i) 237 | elif dist_matrix[tree[i].left][tree[i].right] <= (1. - 3. / math.sqrt(mes)): 238 | initial_seed.append(i) 239 | #print initial_seed 240 | 241 | #start from highly correlated initial pair. 242 | for i in initial_seed: 243 | #print tree[i] 244 | current_node = i 245 | while current_node < len(tree) - 1: 246 | cluster_1 = [] 247 | cluster_2 = [] 248 | #find base cluster --> cluster_1 249 | simplify_group(find_group_with_node_index(tree, current_node), cluster_1) 250 | #find cluster which will be merged --> cluster_2 251 | dummy = find_one_side_group(tree, (current_node + 1) * -1) 252 | current_node = dummy[0] 253 | simplify_group(dummy[1], cluster_2) 254 | 255 | #check the density changes with overall density 256 | #initial density 257 | d_1 = [] 258 | for ele_i in range(len(cluster_1) - 1): 259 | for ele_j in range(ele_i + 1, len(cluster_1)): 260 | if ele_i != ele_j: 261 | d_1.append(dist_matrix[cluster_1[ele_i]][cluster_1[ele_j]]) 262 | #density after merged 263 | d_merge = [] 264 | cluster_3 = hstack([cluster_1, cluster_2]) 265 | for ele_i in range(len(cluster_3) - 1): 266 | for ele_j in range(ele_i + 1, len(cluster_3)): 267 | if ele_i != ele_j: 268 | d_merge.append(dist_matrix[cluster_3[ele_i]][cluster_3[ele_j]]) 269 | 270 | d_1 = array(d_1) 271 | d_merge = array(d_merge) 272 | if len(d_merge) < 8: 273 | continue 274 | else: 275 | #the resulting clusters are almost identical. not use anymore. 276 | #d_merge = array(d_merge) 277 | #d_merge = .5 * log((1. + d_merge) / (1. - d_merge)) 278 | 279 | ad = r.ad_test(d_merge) 280 | ad_p = ad['p.value'] 281 | p_value = ad_p 282 | 283 | #check the level of significance 284 | #if it's out of normality, the previous cluster is the final cluster. 285 | if p_value < l_significance: 286 | #becausd AD test needs at least 8 elements. 287 | if len(cluster_1) >= 5: 288 | #print cluster_1 289 | clusters.append(cluster_1) 290 | break 291 | #it's still gaussian, but if there comes outliers into clusters, stop it. 292 | #the resulting clusters are almost identical. not use anymore. 293 | #elif len(d_1[where(d_1 > mean(density_list_clip))]) > 0: 294 | # if len(cluster_1) >= 5: 295 | # clusters.append(cluster_1) 296 | # break 297 | 298 | return clusters 299 | 300 | def find_one_side_group(tree, index): 301 | ''' 302 | Return group of one side. 303 | 304 | tree : 305 | Tree structure returned from Pycluster module 306 | 307 | index : 308 | Index of tree structure. This routine first find node of index 309 | which looks like [index, other_index or node_value] 310 | and, return the group of [other_index] or [value] 311 | ''' 312 | 313 | for i in range(len(tree)): 314 | if tree[i].left == index: 315 | group_index = tree[i].right 316 | break 317 | if tree[i].right == index: 318 | group_index = tree[i].left 319 | break 320 | 321 | if group_index >=0 : 322 | return [i, [group_index]] 323 | else: 324 | return [i, find_group_with_link_index(tree, group_index)] 325 | 326 | def cal_correlation_matrix(whole_lc, bin_size=0): 327 | ''' 328 | Calculate the Pearson correlation matrix. 329 | 330 | whole_lc : 331 | List of light curves. 332 | 333 | bin_size : 334 | Binning window size. to reduce noise. 335 | Binning the data only when calculated correlation values. 336 | Will not change original light curves. 337 | 338 | return : 339 | The Pearson correlation magtrix. 340 | ''' 341 | 342 | corr_list = ones((len(whole_lc), len(whole_lc))) 343 | for i in range(len(whole_lc) - 1): 344 | #print ' #Correlation values of %dth star is calculating..' % (i + 1) 345 | for j in range(i, len(whole_lc)): 346 | if bin_size == 0: 347 | pear_corr = corrcoef(whole_lc[i], whole_lc[j])[0][1] 348 | else: 349 | pear_corr = corrcoef(bin_lc(whole_lc[i], bin_size)[0], bin_lc(whole_lc[j], bin_size)[0])[0][1] 350 | corr_list[i, j] = pear_corr 351 | corr_list[j, i] = pear_corr 352 | 353 | return corr_list 354 | 355 | def get_detrened_lc_set(lc, trend_set): 356 | ''' 357 | Return de-trended lc by using trends set. 358 | We use Multiple Linear Regression Method to de-trend. 359 | 360 | lc : 361 | Original light curve of flux. 362 | 363 | trend_set : 364 | Set of trend light curves constructed by create_trend routine. 365 | 366 | return : 367 | De-trended light curve. 368 | ''' 369 | 370 | #Multiple linear regression method, least square method 371 | X = ones([len(trend_set[0]), len(trend_set) + 1]) 372 | X[:, 0] = [1] 373 | X[:, 1:] = transpose(trend_set) 374 | beta_1 = linalg.inv(dot(transpose(X), X)) 375 | beta_2 = dot(transpose(X), lc) 376 | beta = dot(beta_1, beta_2) 377 | 378 | return lc - dot(X, beta) 379 | 380 | def get_quadprog(lc, trend_set): 381 | ''' 382 | Return de-trended lc by quadratic programming. 383 | It constraints the free parameters to be bigger than 0. 384 | See Kim et al. 2008 for more details. 385 | 386 | lc : 387 | Original light curve of flux. 388 | 389 | trend_set : 390 | Set of trend light curves constructed by create_trend routine. 391 | 392 | return : 393 | De-trended light curve. 394 | ''' 395 | 396 | r.library('quadprog') 397 | 398 | X = transpose(trend_set) 399 | dmat = r.crossprod(X, X) 400 | dvec = r.crossprod(lc, X) 401 | 402 | results = r.solve_QP(dmat, dvec, r.diag(len(trend_set))) 403 | #print results['solution'], results['value'] 404 | 405 | return lc - dot(results['solution'], trend_set) 406 | 407 | def get_linprog(lc, trend_set): 408 | ''' 409 | Return de-trended lc by linear programming. 410 | It constraints the free parameters to be bigger than 0. 411 | 412 | lc : 413 | Original light curve of flux. 414 | 415 | trend_set : 416 | Set of trend light curves constructed by create_trend routine. 417 | 418 | return : 419 | De-trended light curve. 420 | ''' 421 | 422 | r.library('linprog') 423 | 424 | X = transpose(trend_set) 425 | #dmat = r.crossprod(X, X) 426 | dvec = r.crossprod(lc, X) 427 | 428 | results = r.solveLP(dvec, zeros([len(trend_set)]), r.diag(len(trend_set))) 429 | print results['opt'], results['solution'] 430 | sys.exit() 431 | 432 | #return lc - dot(results['solution'], trend_set) 433 | 434 | def kovacs(lc, trend_set): 435 | ''' 436 | Apply TFA with determined trends by our algorithm. 437 | See Kovacs et al. 2005 for more detail. 438 | 439 | lc : 440 | Original light curve of flux 441 | 442 | trend_set : 443 | Set of trend light curves constructed by create_trend routine. 444 | 445 | return : 446 | De-trended light curve. 447 | ''' 448 | 449 | dup_lc = lc[::] 450 | dup_trend = trend_set[::] 451 | 452 | dup_lc -= mean(dup_lc) 453 | for i in range(len(dup_trend)): 454 | dup_trend[i] -= mean(dup_trend[i]) 455 | 456 | g = zeros((len(dup_trend), len(dup_trend))) 457 | h = zeros((len(dup_trend))) 458 | c = zeros((len(dup_trend))) 459 | for i in range(len(dup_trend)): 460 | for j in range(len(dup_trend)): 461 | g[i, j] = sum(dot(dup_trend[i], dup_trend[j])) 462 | G = linalg.inv(g) 463 | 464 | for i in range(len(dup_trend)): 465 | h[i] = sum(dot(dup_lc, dup_trend[i])) 466 | for i in range(len(dup_trend)): 467 | c[i] = sum(dot(G[i:i + 1], h)) 468 | 469 | trend = dot(c, dup_trend) 470 | detrended = dup_lc - trend 471 | 472 | return detrended 473 | 474 | def create_trend(template_set, whole_lc): 475 | ''' 476 | Create trend with selected template light curves. 477 | We use weighted sum of normlized light curves by median values of the lc. 478 | See Kim et al. 2008 for more details. 479 | 480 | template_set : 481 | Indices of template light curves. 482 | 483 | whole_lc : 484 | Light curves of every stars. 485 | 486 | return : 487 | Constructed trend. 488 | ''' 489 | 490 | trend = [] 491 | trend_lc = [] 492 | 493 | #normalized by mean value 494 | normalized = [] 495 | for i in template_set: 496 | normalized.append((whole_lc[i] - mean(whole_lc[i])) / std(whole_lc[i])) 497 | 498 | #weighting by sigma^2 499 | std_inv_list = [] 500 | for i in range(len(normalized)): 501 | std_inv_list.append(1. / std(normalized[i])**2.) 502 | weight_list = std_inv_list / sum(std_inv_list) 503 | 504 | for i in range(len(normalized)): 505 | trend_lc.append(normalized[i] * weight_list[i]) 506 | trend_lc = array(trend_lc) 507 | 508 | #make trend.. 509 | for i in range(len(trend_lc[0])): 510 | #trend.append(median(trend_lc[:, i])) 511 | trend.append(sum(trend_lc[:, i])) 512 | 513 | return array(trend) 514 | 515 | def remove_subset_cluster(cluster_list): 516 | ''' 517 | Remove subset clusters which are included in other clusters. 518 | 519 | cluster_list : 520 | Initial list of clusters. 521 | 522 | return : 523 | List of clusters after removal of subsets. 524 | ''' 525 | 526 | cluster_list.sort(cmp=cmp_length) 527 | dup = cluster_list[::] 528 | 529 | new_cluster_list = [] 530 | #print dup 531 | 532 | #check sub-set. 533 | for i in range(len(dup)): 534 | if_duplicated = 0 535 | cluster_d = dup[i] 536 | len_cluster_d = len(cluster_d) 537 | #print 'd:', cluster_d 538 | for j in range(len(cluster_list)): 539 | if i == j: 540 | break 541 | cluster_o = dup[j] 542 | len_cluster_o = len(cluster_o) 543 | #print 'o:', cluster_o 544 | if len_cluster_d <= len_cluster_o: 545 | duplicated = [ele for ele in cluster_d if ele in cluster_o] 546 | if len(duplicated) == len_cluster_d: 547 | if_duplicated = 1 548 | break 549 | 550 | if if_duplicated == 0: 551 | new_cluster_list.append(cluster_d) 552 | 553 | return new_cluster_list 554 | 555 | def remove_duplicated_cluster(corr, cluster_list): 556 | ''' 557 | Make clusters simple by removing duplicated members through all clusters. 558 | 559 | corr : 560 | The Pearson correlation matrix. 561 | 562 | cluster_list : 563 | Initial cluster list. 564 | 565 | return : 566 | list of cluster after removal of dup. clusters. 567 | ''' 568 | 569 | dup=corr.copy() 570 | 571 | #make a member of cluster list. 572 | member_list=[] 573 | for cluster in cluster_list: 574 | for member in cluster: 575 | if not member in member_list: 576 | member_list.append(member) 577 | 578 | #check if each member are overlapped through multiple cluster. 579 | for member in member_list: 580 | mem_corr_list={} 581 | cluster_index=0 582 | for cluster in cluster_list: 583 | if len(cluster)==2: 584 | cluster_index+=1 585 | continue 586 | #calculate average correlation if overlapped. 587 | sum_corr=0. 588 | avg_corr=0. 589 | if member in cluster: 590 | for cluster_member in cluster: 591 | if member!=cluster_member: 592 | sum_corr+=dup[member, cluster_member] 593 | avg_corr=sum_corr/(len(cluster)-1) 594 | mem_corr_list[cluster_index]=avg_corr 595 | cluster_index+=1 596 | #print member, mem_corr_list, max(mem_corr_list) 597 | 598 | #remain only one highest correlated value, remove else others. 599 | if len(mem_corr_list)>1: 600 | max_key=max(mem_corr_list) 601 | for key, val in mem_corr_list.iteritems(): 602 | #print key, cluster_list[key] 603 | if key!=max_key: 604 | cluster_list[key].pop([j for j in range(len(cluster_list[key])) if cluster_list[key][j]==member][0]) 605 | 606 | return cluster_list 607 | 608 | if __name__=='__main__': 609 | print 'Function set for PDtrending algorithm' 610 | -------------------------------------------------------------------------------- /pdtrend/datasets/originals/pdtrending.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | help = ''' 4 | Photometric De-Trending (pdtrend, also Pavlos-Daewon de-Trending) pipeline for astronomical time series. See details in Kim et al. 2008. 5 | 6 | 7 | Packages to run this pipeline : 8 | matplotlib : http://matplotlib.sourceforge.net/ 9 | numpy : http://numpy.scipy.org/ 10 | Pycluster : http://bonsai.ims.u-tokyo.ac.jp/~mdehoon/software/cluster/ 11 | hcluster : http://code.google.com/p/scipy-cluster/ 12 | R : http://www.r-project.org/ 13 | You need to install 'nortest' abd 'quadprog' package at R. Use 'install.packages' command in R shell. 14 | Rpy : http://rpy.sourceforge.net/ 15 | 16 | 17 | Usage : ./pdtrending.py -f \'file_list\' [OPTION...] 18 | 19 | -f : list of files which you want to de-trend. You can use regular expression. Don't forget for surrounding file_list with \'. i.e. -f \'/home/astro/hht_[0-9]*.lc\' 20 | 21 | -c : Correlation matrix file if you have one. i.e. -c /home/astro/corr.dat 22 | 23 | -o : Output directory. Default is './pdtrend' under the directory where light-curves are. i.e. -o /home/astro/pdtrend/ 24 | 25 | -l 0 : Column number of flux in light-curves. Default is 0 (first column). 26 | 27 | -i 1 : Not implemented in this version. Number of iteration of de-trending. If no clusters are found, the iteration will stop regardless the number of iteration. Note that png files of determined-trends trends-position will be generated only at the first iteration. Default is 1, which means no iteration but single execution of de-trending. You cannot use this option with -c together. 28 | 29 | -d : X and Y coordinates of light-curves. First(second) column should be X(Y) coordinates. The order of stars should be in same order with file_list. i.e. -c /home/astro/xy.dat 30 | 31 | -b [True|False] : True if light-curves are in flux based. If they are in magnitude based, then False. Default is True. 32 | 33 | -z 20 : Zero magnitude. Default is 20. 34 | 35 | -n 0 : Binning light-curves just when calculating correlation values, which can help to increase signal-to-noise ratio. The binning might be useful when low frequency trends are contaminated by high frequency noise source. Default is 0, which means no binning. Note that light-curves will be de-trended in original sampling rate (without binning). 36 | 37 | -t 2 : Constrain initial seeds. Default is 2. From 1 to 10 is suitable. If no clusters are found, decrease this number. It might help to find clusters. See Kim et al. 2008 for more details. 38 | 39 | -g 0.1 : level of significance, p-value. Default is 0.1. Smaller p-value generally gives more clusters. 40 | 41 | -m 5 : Mimimum number of stars in a cluster. Default is 5. According to the number of stars in dataset, you can adjust this value. 42 | 43 | -s [True|False] : If True, you can see a dendrogram of hierarchical tree of similarity matrix derived using light-curves. Default is False. 44 | 45 | -h : Show this help. 46 | 47 | 48 | mailto : dakim@cfa.harvard.edu 49 | ''' 50 | 51 | change_log = ''' 52 | @ Change log : 53 | v1.00 at 01/05/2009 : Just change the version number to 1.00 to publish this package. 54 | 55 | v4.00 at 11/14/2008 : We gave up multiple linear regression which occassionally destroys intrinsic signals. 56 | Instead of that, we implemented quadratic program which can constraint free parameters to be bigger than or equal to 0. 57 | This greatly reduces the signal destruction effects. Another big jump of our algorithm. 58 | This is our last improvement at this version of De-trending algorithm. 59 | Next version might start from handling the phase shift of trends. 60 | 61 | v3.51 at 09/22/2008 : Save the standard deviation values of light curves after- and before de-trending. 62 | 63 | v3.50 at 09/15/2008 : Fisher's tranformation is applied to the distances lists of subsets. 64 | The transformation convert skewed distribution of correlations to normal distribution; 65 | which strengthen our normality assumption. Nevertheless, the resulting clusters are almost 66 | identical because we gather only highly correlated elements and cut outliers by p-value. 67 | This is the thrid big jump of our algorithm. 68 | 69 | v3.03 at 08/25/2008 : Remove Shapiro-Wilk test. Use only Anderson-Darling test. 70 | 71 | v3.01 at 08/19/2008 : Save the determined trends light curves as well; as 'trend_[0-9][0-9]' 72 | 73 | v3.00 at 08/13/2008 : Clustering algorithm changed to the normality test algorithm. 74 | Shapiro-Wilk test (Shapiro & Wilk 1965) and Anderson-Darling test (Anderson & Darling 1952) 75 | is used to test the normality of cluster. Many simulation had performed again. 76 | New algorithm idenfity the clusters much better than before. 77 | The second big jump of our de-trending algorithm. 78 | 79 | v2.21 at 06/10/2008 : Software is packaged at the first time. Remove not-used function. 80 | Make simulation light curves for test run. Packaging the light curves separately. 81 | 82 | v2.20 at 06/01/2008 : Improvement of combining template set routine to build trend, 83 | we use weighting according to the standard deviation of normalized light curves. 84 | 85 | v2.10 at 05/28/2008 : Improvement of removal of trends routine. Based on the constructed trends, 86 | Applied multiple regression method. 87 | 88 | v2.01 at 05/22/2008 : Save constructed trends light curves as png image. If xy_coords is not null, 89 | then also save position of each trend on CCD plane as png image. 90 | 91 | v2.00 at 05/21/2008 : Change block-clustering algorithm to hierachichal tree clustering algorithm. 92 | It's one of the biggest changes in our algorithm. 93 | 94 | v1.00 was finished at 05/08/2008 by Dae-Won Kim. 95 | ''' 96 | 97 | import getopt 98 | #import matplotlib 99 | #matplotlib.use('agg') 100 | from pylab import * 101 | from numpy import * 102 | import os 103 | import sys 104 | import glob 105 | 106 | from Pycluster import * 107 | from hcluster import * 108 | from clusterlib import * 109 | 110 | sys.path.append('./') 111 | 112 | def pdetrending(file_list, correlation_matrix='', xy_coords='', output_dir='', \ 113 | img=True, show_tree=False, min_template_stars=0, std_save=True, \ 114 | flux_based=True, zero_mag=20., column_flux=0, bin_window=0, initial_seed=2, \ 115 | n_iteration=1, l_significance=0.1): 116 | ''' 117 | flie_list : 118 | list of files. This list should be sorted by brightness of stars. 119 | Each file has to contain flux list of a star at the first column. 120 | 121 | correlation_matrix = '' : 122 | Correlation matrix of stars in file_list. If this is '', it will be generated by our routine. 123 | Newly generated matrix will be saved under the output_dir as 'corr.dat' 124 | 125 | xy_coords = '' : 126 | x and y coordinates of star on CCD plane. This list shoud be sorted by brightness of stars either. 127 | Therefore total number of lines should be the same with total number of lines of file_list. 128 | The first column should be x coordinate. The second column should be y coordinate. 129 | If this coordinates file is provided and img=True, then we save the png file which show where each 130 | constructed trends are placed on CCD plane. It's named 'trends_position.png' in output_dir. 131 | 132 | output_dir = '' : 133 | Output directory where de-trended light curves will be saved. 134 | If this is '', de-trended light curves will be saved under the directory of file_list[0], 'pdtrend' 135 | 136 | img = False : 137 | Save the image of de-trended light curves as png files. N number of png files will be generated 138 | under output_dir. N is the total number of stars. 139 | Top panel is raw, middle is constructed trend and bottom panel is de-trended resules. 140 | Also constructed trends image file will be saved as 'constructed_trends.png' 141 | 142 | show_tree = False : 143 | If it's true, this routine shows the dendrogram constructed based on the correlation matrix. 144 | 145 | min_template_stars = 0: 146 | Minimum number of template stars per cluster. Empirically, about (total number of star) / 50 is acceptable. 147 | However, if this value is smaller than 5, it means that constructed trends might suffer from noise 148 | because the number of template star is too small. If this value is set to 0, then (total number of star) / 50 149 | is used. 150 | 151 | std_save = True: 152 | Save stadnard deviation values of light curves (before and after de-trending). 153 | 154 | flux_based = True: 155 | If data is in magnitude, change this values to False and set zero_mag to proper values. 156 | 157 | zero_mag = 20: 158 | Only activated when flux_based = False. 159 | 160 | column_flux = 0: 161 | Column of flux in light-curve files. 162 | 163 | bin_window = 0: 164 | Bin light-curves when calculating correlation values. This can help to find clusters when trends are contaminated by noise a lot. 0 means no binning. 165 | 166 | initial_seed = 2: 167 | Constraint initial seeds. For more details, see Kim et al. 2008 168 | 169 | n_iteration = 3: 170 | Number of iteration of de-trending processes. Not implemented in this version. 171 | ''' 172 | 173 | #Append current directory for python libraries 174 | sys.path.append('./') 175 | 176 | #Check initial values. 177 | if file_list == '': 178 | print '#ERROR : Check the file_list.' 179 | sys.exit() 180 | if output_dir == '': 181 | output_dir = os.path.dirname(file_list[0]) + '/pdtrend/' 182 | if not os.path.exists(output_dir): 183 | os.makedirs(output_dir) 184 | 185 | #read file into memory. 186 | if xy_coords != '': 187 | print '#Start to read xy coordinates..' 188 | whole_xy = [] 189 | fp = open(xy_coords) 190 | whole_xy = array([map(float, line.split()) for line in fp]) 191 | max_x = max(whole_xy[:,0]) 192 | max_y = max(whole_xy[:,1]) 193 | fp.close() 194 | if len(whole_xy) != len(file_list): 195 | print '#ERROR : The number of xy coords file is not matched with the number of stars.' 196 | sys.exit() 197 | 198 | print '#Start to read file..' 199 | whole_lc = [] 200 | for i in range(len(file_list)): 201 | print ' #Reading ' + file_list[i] + '..' 202 | lc = [] 203 | fp = open(file_list[i]) 204 | for line in fp: 205 | if line[0] == '#': 206 | continue 207 | flux_column = float(line.split()[column_flux]) 208 | if not flux_based: 209 | flux = math.pow(10, (flux_column - zero_mag) / -2.5) 210 | else: 211 | flux = flux_column 212 | lc.append(flux) 213 | fp.close() 214 | whole_lc.append(lc) 215 | whole_lc = array(whole_lc) 216 | 217 | #Check correlation matrix. 218 | if correlation_matrix == '': 219 | print '#Calculate correlation matrix..' 220 | corr_list = cal_correlation_matrix(whole_lc, bin_window) 221 | 222 | print '#Save correlation matrix to local file to %s..' % (output_dir + '/corr.dat') 223 | fp = open(output_dir + '/corr.dat', 'wb') 224 | for ele in corr_list: 225 | fp.write(' '.join(map(str, ele)) + '\n') 226 | fp.close() 227 | else: 228 | print '#Read correlation matrix..' 229 | fp = open(correlation_matrix) 230 | corr_list = array([map(float, line.split()) for line in fp]) 231 | if corr_list.shape != (len(whole_lc), len(whole_lc)): 232 | print '#ERROR : Dimension of correlation matrix is not match!. \ 233 | Execute this routine without correlation matrix value and generate correlation matrix again.' 234 | sys.exit() 235 | 236 | #select template clusters based on the hierarchical tree structure. 237 | #for more details, see Kim et al. 2008. 238 | print '#Now finding clusters for template set..' 239 | dist_matrix = 1. - corr_list 240 | print ' #Making Hierarchical tree..' 241 | tree = treecluster(method='m', distancematrix=dist_matrix.copy()) 242 | if show_tree: 243 | R = dendrogram(Pycluster_to_hcluster(tree), leaf_font_size=10) 244 | ylabel('Distance'); xlabel('Index of star'); show() 245 | 246 | print ' #Finding clusters in tree..' 247 | groups = find_group_DW(tree, dist_matrix, len(whole_lc[0]), initial_seed, l_significance) 248 | print ' #Remove subset of clusters..' 249 | groups = remove_subset_cluster(groups) 250 | groups = remove_small(groups, min_template_stars - 1) 251 | print ' #Total %d clusters found..' % (len(groups)) 252 | #print groups 253 | for group in groups: 254 | print '#----------------------------#' 255 | for ele in group: 256 | print file_list[ele] 257 | 258 | if len(groups) == 0: 259 | print '#ERROR : No cluster is found; which means this dataset has no \ 260 | reliable template set. You have to check below things. \ 261 | 1) Are your data contaminated by other noise sources too much? if yes, retry this algorithm after data binning. \ 262 | 2) Do you have enough number of stars? if yes, there is no solution. Our algorithm is \ 263 | developed for wide field survey; which means you should have enough number of stars, \ 264 | ~ a few hundreds. \ 265 | 3) Do your data have trends? In other words, do your stars just show independent trends with others? \ 266 | Check the light curves by eyes. If the trends appear just in tiny number of stars, \ 267 | then it\'s very hard to find reliable clusters. However, you could retry this algorithm after data binning, \ 268 | which might help to find cluters.' 269 | sys.exit() 270 | 271 | #make trend list of each group. 272 | print '#Construct trends with identified clusters..' 273 | trend_set = [] 274 | for group in groups: 275 | trend_set.append(create_trend(group, whole_lc)) 276 | trend_set = array(trend_set) 277 | 278 | if img: 279 | clf() 280 | color=['r', 'g', 'b', 'y', 'k'] 281 | #color = ['0.5', '0.6', '0.7', '0.8', '0.9'] 282 | shape=['s', '^', 'o', 'D', '+', 'x', '1', 'h', 'p'] 283 | print ' #Saving constructed trends as png files..' 284 | for i in range(len(trend_set)): 285 | ax = subplot(len(trend_set), 1, i + 1) 286 | #plot(trend_set[i] + 1., color[i%len(color)]+shape[i%len(shape)]) 287 | plot(trend_set[i] + 1., 'b,') 288 | ylabel('%d' % len(groups[i])) 289 | xlabel('Time index') 290 | savefig(output_dir + '/constructed_trends.png') 291 | 292 | if xy_coords != '': 293 | clf() 294 | print ' #Saving position of trends on CCD as png files..' 295 | for i in range(len(groups)): 296 | for ele in groups[i]: 297 | plot([whole_xy[ele][0]], [whole_xy[ele][1]], color=color[i%len(color)], marker=shape[i%len(shape)]) 298 | axis([min(whole_xy[:,0]), max(whole_xy[:,0]), min(whole_xy[:,1]), max(whole_xy[:,1])]) 299 | xlabel('x-coordinate on CCD plane') 300 | ylabel('y-coordinate on CCD plane') 301 | savefig(output_dir + '/trends_position.png') 302 | 303 | #Start de-trending. 304 | print '#Start to de-trend every light curves..' 305 | #standard deviation list of before and after de-trending 306 | std_list = [] 307 | for i in range(len(whole_lc)): 308 | print ' #Now de-trending %dth star.. %s' %((i + 1), file_list[i]) 309 | detrended = get_quadprog(whole_lc[i] / median(whole_lc[i]) - 1. , trend_set) 310 | detrended += 1. 311 | detrended *= median(whole_lc[i]) 312 | #detrended = get_quadprog(whole_lc[i] , trend_set) 313 | print ' #Save the de-trended light curve..' 314 | fp = open(output_dir + '/' + os.path.basename(file_list[i]), 'w') 315 | if len(trend_set) == 1: 316 | detrended = detrended[0] 317 | for flux in detrended: 318 | fp.write('%.2f\n' % (round(flux, 2))) 319 | fp.close() 320 | if std_save: 321 | std_list.append([std(whole_lc[i]), std(detrended)]) 322 | 323 | if img: 324 | print ' #Save as image file..' 325 | clf() 326 | ax = subplot(211) 327 | title(os.path.basename(file_list[i])) 328 | plot(whole_lc[i] / median(whole_lc[i]), 'b.') 329 | text(0.98, 0.9, r'Raw, $\sigma$ : %.1f' % std(whole_lc[i]), horizontalalignment='right', \ 330 | verticalalignment='center', transform=ax.transAxes, fontsize=10, \ 331 | bbox=dict(facecolor='blue', alpha=0.5)) 332 | ylabel('Flux') 333 | ax = subplot(212) 334 | plot(detrended / median(whole_lc[i]), 'g.') 335 | text(0.98, 0.9, r'De-trended, $\sigma$ : %.1f' % std(detrended), horizontalalignment='right', \ 336 | verticalalignment='center', transform=ax.transAxes, fontsize=10, \ 337 | bbox=dict(facecolor='green', alpha=0.5)) 338 | if flux_based: 339 | ylabel('Normalized Flux') 340 | else: 341 | ylabel('Normalized Magnitude') 342 | xlabel('Time indices') 343 | savefig(output_dir + '/' + os.path.basename(file_list[i]) + '.png') 344 | 345 | if std_save: 346 | print '#Save standard deviation values..' 347 | fp = open(output_dir + '/std_list', 'w') 348 | for i in range(len(std_list)): 349 | fp.write('%.2f %.2f %s\n' % (round(std_list[i][0], 2), round(std_list[i][1], 2), file_list[i])) 350 | fp.close() 351 | 352 | if __name__=='__main__': 353 | 354 | file_l = '' # file list of light-curves. 355 | correlation_matrix = '' # correlation matrix file. 356 | xy_coords = '' # x and y coordinates of all stars. 357 | output_d = '' # output directory. 358 | column_f = 0 # colun number of flux in light-curve files. 359 | flux_b = True # flux based or magnitude based. 360 | zero_m = 20 # zero magnitude when light-curves are in magnitude based. 361 | show_t = False # show dendrogram of hierarchical tree. 362 | ini_s = 2 # initial seeds constraint. 363 | bin_w = 0 # binning window size. 364 | min_t = 5 # minimum number of stars in clusters. 5~10 is good choise. 365 | n_iter = 1 366 | l_sig = 0.1 367 | 368 | if len(sys.argv) == 1: 369 | print help 370 | sys.exit() 371 | 372 | #read command line option. 373 | try: 374 | optlist, args = getopt.getopt(sys.argv[1:], 'h:f:c:d:b:z:s:o:l:b:t:i:m:g:') 375 | except getopt.GetoptError, err: 376 | print help 377 | sys.exit() 378 | 379 | for o, a in optlist: 380 | if o in ('-h'): 381 | print help 382 | sys.exit() 383 | elif o in ('-f'): 384 | file_l = a 385 | elif o in ('-c'): 386 | correlation_matrix = a 387 | elif o in ('-d'): 388 | xy_coords = a 389 | elif o in ('-b'): 390 | if a == 'False': 391 | flux_b = False 392 | else: 393 | flux_b = True 394 | elif o in ('-z'): 395 | zero_m = float(a) 396 | elif o in ('-s'): 397 | show_t = a 398 | elif o in ('-o'): 399 | output_d = a 400 | elif o in ('-l'): 401 | column_f = int(a) 402 | elif o in ('-n'): 403 | bin_w = int(a) 404 | elif o in ('-t'): 405 | ini_s = float(a) 406 | elif o in ('-i'): 407 | n_iter = int(a) 408 | elif o in ('-m'): 409 | min_t = int(a) 410 | elif o in ('-g'): 411 | l_sig = float(a) 412 | else: 413 | continue 414 | 415 | file_list = glob.glob(file_l) 416 | if len(file_list) == 0: 417 | print 'There is no file in : ' + file_l 418 | sys.exit() 419 | if len(file_list) < 50: 420 | print 'Too few light curves : %d.\nIf you still want to run the program, modify line#406 in pdtrending.py.' % (len(file_list)) 421 | sys.exit() 422 | file_list.sort() 423 | 424 | pdetrending(file_list, correlation_matrix, xy_coords, show_tree=show_t, \ 425 | min_template_stars=min_t, flux_based=flux_b, zero_mag=zero_m, output_dir=output_d, \ 426 | column_flux=column_f, bin_window=bin_w, initial_seed=ini_s, n_iteration=n_iter, l_significance=l_sig) 427 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDT 2 | 3 |


[ Example of the detrended light curve ]