31 | """
32 | logger = logging.getLogger(__file__)
33 | logging_level = getattr(logging, verbosity_level)
34 | logger.setLevel(logging_level)
35 | formatter = logging.Formatter(
36 | fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s')
37 | stdout_handler = logging.StreamHandler(sys.stdout)
38 | stdout_handler.setLevel(logging_level)
39 | stdout_handler.setFormatter(formatter)
40 | logger.addHandler(stdout_handler)
41 | if use_error_log:
42 | stderr_handler = logging.StreamHandler(sys.stderr)
43 | stderr_handler.setLevel(logging.WARNING)
44 | stderr_handler.setFormatter(formatter)
45 | logger.addHandler(stderr_handler)
46 | logger.propagate = False
47 | return logger
48 |
49 |
50 | LOGGER = get_logger(VERBOSITY_LEVEL)
51 |
52 |
53 | def _here(*args):
54 | """Helper function for getting the current directory of the script."""
55 | here_dir = os.path.dirname(os.path.realpath(__file__))
56 | return os.path.abspath(join(here_dir, *args))
57 |
58 |
59 | def _get_solution(solution_dir):
60 | """Get the solution array from solution directory."""
61 | solution_file = join(solution_dir, SOLUTION_FILE)
62 | solution = pd.read_csv(solution_file, sep='\t')
63 | return solution
64 |
65 |
66 | def _get_prediction(prediction_dir):
67 | pred_file = join(prediction_dir, 'predictions')
68 | return pd.read_csv(pred_file)['label']
69 |
70 |
71 | def _get_score(solution_dir, prediction_dir):
72 | """get score"""
73 | LOGGER.info('===== get solution')
74 | solution = _get_solution(solution_dir)['label']
75 | LOGGER.info('===== read prediction')
76 | prediction = _get_prediction(prediction_dir)
77 | if solution.shape != prediction.shape:
78 | raise ValueError(f"Bad prediction shape: {prediction.shape}. "
79 | f"Expected shape: {solution.shape}")
80 |
81 | LOGGER.info('===== calculate score')
82 | LOGGER.debug(f'solution shape = {solution.shape}')
83 | LOGGER.debug(f'prediction shape = {prediction.shape}')
84 | score = accuracy_score(solution, prediction)
85 |
86 | return score
87 |
88 |
89 | def _update_score(args, duration):
90 | score = _get_score(solution_dir=args.solution_dir,
91 | prediction_dir=args.prediction_dir)
92 | # Update learning curve page (detailed_results.html)
93 | _write_scores_html(args.score_dir)
94 | # Write score
95 | LOGGER.info('===== write score')
96 | write_score(args.score_dir, score, duration)
97 | LOGGER.info(f"accuracy: {score:.4}")
98 | return score
99 |
100 |
101 | def _init_scores_html(detailed_results_filepath):
102 | html_head = (' '
103 | '')
104 | html_end = '
'
105 | with open(detailed_results_filepath, 'a') as html_file:
106 | html_file.write(html_head)
107 | html_file.write("Starting training process...
Please be patient. "
108 | "Learning curves will be generated when first "
109 | "predictions are made.")
110 | html_file.write(html_end)
111 |
112 |
113 | def _write_scores_html(score_dir, auto_refresh=True, append=False):
114 | filename = 'detailed_results.html'
115 | if auto_refresh:
116 | html_head = (' '
117 | '')
118 | else:
119 | html_head = """"""
120 | html_end = '
'
121 | if append:
122 | mode = 'a'
123 | else:
124 | mode = 'w'
125 | filepath = join(score_dir, filename)
126 | with open(filepath, mode) as html_file:
127 | html_file.write(html_head)
128 | html_file.write(html_end)
129 | LOGGER.debug(f"Wrote learning curve page to {filepath}")
130 |
131 |
132 | def write_score(score_dir, score, duration):
133 | """Write score and duration to score_dir/scores.txt"""
134 | score_filename = join(score_dir, 'scores.txt')
135 | with open(score_filename, 'w') as ftmp:
136 | ftmp.write(f'score: {score}\n')
137 | ftmp.write(f'Duration: {duration}\n')
138 | LOGGER.debug(f"Wrote to score_filename={score_filename} with "
139 | f"score={score}, duration={duration}")
140 |
141 |
142 | class IngestionError(Exception):
143 | """Ingestion error"""
144 |
145 |
146 | class ScoringError(Exception):
147 | """scoring error"""
148 |
149 |
150 | def get_ingestion_info(prediction_dir):
151 | """get ingestion information"""
152 | ingestion_info = None
153 | endfile_path = os.path.join(prediction_dir, 'end.yaml')
154 |
155 | if not os.path.isfile(endfile_path):
156 | raise IngestionError("[-] No end.yaml exist, ingestion failed")
157 |
158 | LOGGER.info('===== Detected end.yaml file, get ingestion information')
159 | with open(endfile_path, 'r') as ftmp:
160 | ingestion_info = yaml.safe_load(ftmp)
161 |
162 | return ingestion_info
163 |
164 |
165 | def get_ingestion_pid(prediction_dir):
166 | """get ingestion pid"""
167 | # Wait 60 seconds for ingestion to start and write 'start.txt',
168 | # Otherwise, raise an exception.
169 | wait_time = 60
170 | startfile = os.path.join(prediction_dir, 'start.txt')
171 | lockfile = os.path.join(prediction_dir, 'start.txt.lock')
172 |
173 | for i in range(wait_time):
174 | if os.path.exists(startfile):
175 | with FileLock(lockfile):
176 | with open(startfile, 'r') as ftmp:
177 | ingestion_pid = ftmp.read()
178 | LOGGER.info(
179 | f'Detected the start of ingestion after {i} seconds.')
180 | return int(ingestion_pid)
181 | else:
182 | time.sleep(1)
183 | raise IngestionError(f'[-] Failed: scoring didn\'t detected the start of'
184 | 'ingestion after {wait_time} seconds.')
185 |
186 |
187 | def is_process_alive(ingestion_pid):
188 | """detect ingestion alive"""
189 | try:
190 | os.kill(ingestion_pid, 0)
191 | except OSError:
192 | return False
193 | else:
194 | return True
195 |
196 |
197 | def _parse_args():
198 | # Default I/O directories:
199 | root_dir = _here(os.pardir)
200 | default_solution_dir = join(root_dir, "sample_data")
201 | default_prediction_dir = join(root_dir, "sample_result_submission")
202 | default_score_dir = join(root_dir, "scoring_output")
203 | parser = argparse.ArgumentParser()
204 | parser.add_argument('--solution_dir', type=str,
205 | default=default_solution_dir,
206 | help=("Directory storing the solution with true "
207 | "labels, e.g. adult.solution."))
208 | parser.add_argument('--prediction_dir', type=str,
209 | default=default_prediction_dir,
210 | help=("Directory storing the predictions. It should"
211 | "contain e.g. [start.txt, adult.predict_0, "
212 | "adult.predict_1, ..., end.yaml]."))
213 | parser.add_argument('--score_dir', type=str,
214 | default=default_score_dir,
215 | help=("Directory storing the scoring output e.g. "
216 | "`scores.txt` and `detailed_results.html`."))
217 | args = parser.parse_args()
218 | LOGGER.debug(f"Parsed args are: {args}")
219 | LOGGER.debug("-" * 50)
220 | LOGGER.debug(f"Using solution_dir: {args.solution_dir}")
221 | LOGGER.debug(f"Using prediction_dir: {args.prediction_dir}")
222 | LOGGER.debug(f"Using score_dir: {args.score_dir}")
223 | return args
224 |
225 |
226 | def _init(args):
227 | if not os.path.isdir(args.score_dir):
228 | os.mkdir(args.score_dir)
229 | detailed_results_filepath = join(
230 | args.score_dir, 'detailed_results.html')
231 | # Initialize detailed_results.html
232 | _init_scores_html(detailed_results_filepath)
233 |
234 |
235 | def _finalize(score, scoring_start):
236 | """finalize the scoring"""
237 | # Use 'end.yaml' file to detect if ingestion program ends
238 | duration = time.time() - scoring_start
239 | LOGGER.info(
240 | "[+] Successfully finished scoring! "
241 | f"Scoring duration: {duration:.2} sec. "
242 | f"The score of your algorithm on the task is: {score:.6}.")
243 |
244 | LOGGER.info("[Scoring terminated]")
245 |
246 |
247 | def main():
248 | """main entry"""
249 | scoring_start = time.time()
250 | LOGGER.info('===== init scoring program')
251 | args = _parse_args()
252 | _init(args)
253 | score = DEFAULT_SCORE
254 |
255 | ingestion_pid = get_ingestion_pid(args.prediction_dir)
256 |
257 | LOGGER.info("===== wait for the exit of ingestion.")
258 | while is_process_alive(ingestion_pid):
259 | time.sleep(1)
260 |
261 | # Compute/write score
262 | ingestion_info = get_ingestion_info(args.prediction_dir)
263 | duration = ingestion_info['ingestion_duration']
264 | score = _update_score(args, duration)
265 |
266 | _finalize(score, scoring_start)
267 |
268 |
269 | if __name__ == "__main__":
270 | main()
271 |
--------------------------------------------------------------------------------