├── data
    └── hi.txt
├── sec3_transactions_2017.pdf
├── sec3_transactions_2018.pdf
├── sec3_transactions_2019.pdf
├── sec3_transactions_2020.pdf
├── sec3_transactions_2021.pdf
├── sec3_transactions_2022.pdf
├── p1sec3_transactions_2023.pdf
├── p2sec3_transactions_2024.pdf
├── requirements.txt
├── .devcontainer
    └── devcontainer.json
└── streamlit_app_9-28-23.py


/data/hi.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/sec3_transactions_2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/sec3_transactions_2017.pdf


--------------------------------------------------------------------------------
/sec3_transactions_2018.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/sec3_transactions_2018.pdf


--------------------------------------------------------------------------------
/sec3_transactions_2019.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/sec3_transactions_2019.pdf


--------------------------------------------------------------------------------
/sec3_transactions_2020.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/sec3_transactions_2020.pdf


--------------------------------------------------------------------------------
/sec3_transactions_2021.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/sec3_transactions_2021.pdf


--------------------------------------------------------------------------------
/sec3_transactions_2022.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/sec3_transactions_2022.pdf


--------------------------------------------------------------------------------
/p1sec3_transactions_2023.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/p1sec3_transactions_2023.pdf


--------------------------------------------------------------------------------
/p2sec3_transactions_2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danshorstein/tc_lookup/main/p2sec3_transactions_2024.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.12.2
 2 | Flask==3.0.0
 3 | pandas==2.1.1
 4 | pdfplumber==0.10.2
 5 | pyngrok==7.0.0
 6 | Requests==2.31.0
 7 | services==0.1.1
 8 | streamlit==1.26.0
 9 | xlsxwriter
10 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Python 3",
 3 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 4 |   "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
 5 |   "customizations": {
 6 |     "codespaces": {
 7 |       "openFiles": [
 8 |         "README.md",
 9 |         "streamlit_app_9-28-23.py"
10 |       ]
11 |     },
12 |     "vscode": {
13 |       "settings": {},
14 |       "extensions": [
15 |         "ms-python.python",
16 |         "ms-python.vscode-pylance"
17 |       ]
18 |     }
19 |   },
20 |   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21 |   "postAttachCommand": {
22 |     "server": "streamlit run streamlit_app_9-28-23.py --server.enableCORS false --server.enableXsrfProtection false"
23 |   },
24 |   "portsAttributes": {
25 |     "8501": {
26 |       "label": "Application",
27 |       "onAutoForward": "openPreview"
28 |     }
29 |   },
30 |   "forwardPorts": [
31 |     8501
32 |   ]
33 | }


--------------------------------------------------------------------------------
/streamlit_app_9-28-23.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | 
  4 | import streamlit as st
  5 | import pandas as pd
  6 | 
  7 | st.set_page_config(layout="wide")
  8 | 
  9 | 
 10 | 
 11 | # Reusing functions from the provided scripts
 12 | def load_tc_lookup_tool(fy):
 13 |     full_path = './data/'
 14 |     ussgl_tc_json_files = get_json_files(full_path)
 15 |     current_tc_tool = os.path.join(full_path, f'tc_tool_{fy}.json')
 16 |     ussgl_tc_df = load_df_from_json(current_tc_tool)
 17 |     return ussgl_tc_df
 18 | 
 19 | def get_json_files(full_path):
 20 |     files = [[json, os.path.join(full_path, json)] for json in list(os.walk(full_path))[0][2] if json.endswith('json')]
 21 |     return {json[0]: json[1] for json in files} if files else {}
 22 | 
 23 | def load_df_from_json(path):
 24 |     columns = ['Description', 'Comment', 'Reference', 'Budgetary_Debits',
 25 |                'Budgetary_Credits', 'Proprietary_Debits',
 26 |                'Proprietary_Credits', 'Memo_Debits', 'Memo_Credits']
 27 |     df = pd.read_json(path)
 28 |     df = df[columns]
 29 |     return df
 30 | 
 31 | def get_fiscal_years_from_filenames():
 32 |     full_path = './data/'  # Adjusting path to current directory for local run
 33 |     filenames = get_json_files(full_path).keys()
 34 |     fiscal_years = sorted([int(fname.split('_')[-1].split('.')[0]) for fname in filenames], reverse=True)
 35 |     return fiscal_years
 36 | 
 37 | def filter_tc_tool(df, drs=None, crs=None):
 38 |     sgls = df.copy()
 39 |     if drs:
 40 |         for dr in drs:
 41 |             if dr:
 42 |                 dr = str(dr)
 43 |                 sgls = sgls.where(df.Budgetary_Debits.str.contains(dr) | 
 44 |                                   df.Proprietary_Debits.str.contains(dr) |
 45 |                                   df.Memo_Debits.str.contains(dr)).dropna()
 46 |     if crs:
 47 |         for cr in crs:
 48 |             if cr:
 49 |                 cr = str(cr)
 50 |                 sgls = sgls.where(df.Budgetary_Credits.str.contains(cr) | 
 51 |                                   df.Proprietary_Credits.str.contains(cr) |
 52 |                                   df.Memo_Credits.str.contains(cr)).dropna()
 53 |     return sgls
 54 | 
 55 | def keyword_filter(df, keyword):
 56 |     return df[df['Description'].str.contains(keyword, case=False, na=False) | 
 57 |               df['Comment'].str.contains(keyword, case=False, na=False)]
 58 | 
 59 | 
 60 | def display_summary_table(df, title, return_df=False):
 61 |     """
 62 |     Display a summary table for the given dataframe.
 63 |     """
 64 |     
 65 |     # Abbreviated major categories with section names
 66 |     categories = {
 67 |         'A': 'A. Funding',
 68 |         'B': 'B. Disb and Pbls',
 69 |         'C': 'C. Coll and Recvs',
 70 |         'D': 'D. Adj/Write-offs/Reclass',
 71 |         'E': 'E. Accr/Nonbudg Transfers',
 72 |         'F': 'F. Yearend',
 73 |         'G': 'G. Memo Entries',
 74 |         'H': 'H. Specialized Entries'
 75 |     }
 76 |     
 77 |     summary_data = {}
 78 |     
 79 |     # Loop through each major category
 80 |     for code, category_name in categories.items():
 81 |         # Filter the dataframe based on the first letter of the transaction code
 82 |         subset = df[df.index.str.startswith(code)]
 83 |         
 84 |         total_records = len(subset)
 85 |         normal_records = len(subset[~subset.index.str.endswith('R')])
 86 |         reversed_records = total_records - normal_records
 87 |         
 88 |         # Update the summary data
 89 |         summary_data[category_name] = [total_records, normal_records, reversed_records]
 90 |     
 91 |     # Convert the summary data to a dataframe
 92 |     summary_df = pd.DataFrame(summary_data, index=['Total Records', 'Normal', 'Reversed']).T
 93 |     
 94 |     # Check if need to return summary DF
 95 |     if return_df:
 96 |         return summary_df
 97 | 
 98 |     # Display the summary table
 99 |     st.subheader(title)
100 |     st.table(summary_df)
101 | 
102 | 
103 | def truncate_search(df, drs, crs, truncate_length=3):
104 |     """
105 |     Attempt to find close matches by iteratively truncating the least significant digits
106 |     of the GL codes and searching for matches.
107 |     """
108 |     # Helper function to perform truncation
109 |     def truncated_gl_code(gl_code, num_chars):
110 |         return gl_code[:-num_chars]
111 | 
112 |     # Initialize empty dataframe to store results
113 |     matches = pd.DataFrame()
114 | 
115 |     # Iterate over the range of truncation lengths
116 |     for i in range(1, truncate_length + 1):
117 |         # Truncate GL codes
118 |         truncated_drs = [truncated_gl_code(str(dr), i) if dr else '' for dr in drs]
119 |         truncated_crs = [truncated_gl_code(str(cr), i) if cr else '' for cr in crs]
120 |         # Filter using the truncated GL codes
121 |         current_matches = filter_tc_tool(df, drs=truncated_drs, crs=truncated_crs)
122 |         # Append results to matches dataframe
123 |         matches = pd.concat([matches, current_matches], axis=0)
124 | 
125 |     return matches.drop_duplicates()
126 | 
127 | # Set column configuration for TC table
128 | col_config = {
129 |     'Description': st.column_config.Column(width='large'),
130 |     'Comment': st.column_config.Column(width='medium'),
131 |     'Reference': st.column_config.Column(width='medium'),
132 |     'Budgetary_Debits': st.column_config.Column(width='small'),
133 |     'Budgetary_Credits': st.column_config.Column(width='small'),
134 |     'Proprietary_Debits': st.column_config.Column(width='small'),
135 |     'Proprietary_Credits': st.column_config.Column(width='small'),
136 |     'Memo_Debits': st.column_config.Column(width='small'),
137 |     'Memo_Credits': st.column_config.Column(width='small'),
138 | 
139 | }
140 | 
141 | # def process_and_analyze_uploaded_csv(uploaded_file, df_tc_lookup):
142 | #     """
143 | #     Process and analyze the uploaded CSV with advanced cancellation logic and handle cases where all SGLs are canceled out.
144 | #     """
145 | #     # Check if the analysis has already been done and stored in session state
146 | #     if 'analyzed_data' in st.session_state:
147 | #         return st.session_state['analyzed_data']
148 |     
149 | #     # Read the uploaded CSV into a DataFrame
150 | #     df_uploaded = pd.read_csv(uploaded_file)
151 |     
152 | #     # Truncate each debit and credit amount to the first 6 digits
153 | #     for col in df_uploaded.columns:
154 | #         if col.startswith(('Debit', 'Credit')):
155 | #             df_uploaded[col] = df_uploaded[col].apply(lambda x: str(x)[:6] if pd.notnull(x) else '')
156 |     
157 | #     # Create new columns to store results
158 | #     df_uploaded['Matching TCs'] = None
159 | #     df_uploaded['Match Type'] = None
160 |     
161 | #     # For each record, perform a TC Lookup analysis
162 | #     for idx, row in df_uploaded.iterrows():
163 | #         drs = [row['Debit1'], row['Debit2'], row['Debit3']]
164 | #         crs = [row['Credit1'], row['Credit2'], row['Credit3']]
165 |         
166 | #         # Filter out None values
167 | #         drs = [dr for dr in drs if dr is not None]
168 | #         crs = [cr for cr in crs if cr is not None]
169 |         
170 | #         # Identify SGLs that are both in debits and credits
171 | #         common_sglas = set(drs) & set(crs)
172 |         
173 | #         # For each common SGL, cancel out equal number of debits and credits
174 | #         for sgl in common_sglas:
175 | #             dr_count = drs.count(sgl)
176 | #             cr_count = crs.count(sgl)
177 | #             cancel_count = min(dr_count, cr_count)
178 |             
179 | #             # Remove the canceled SGLs from drs and crs
180 | #             for _ in range(cancel_count):
181 | #                 drs.remove(sgl)
182 | #                 crs.remove(sgl)
183 |         
184 | #         # If both debit and credit lists are empty after cancellation, set it to "No Matches"
185 | #         if not drs and not crs:
186 | #             df_uploaded.at[idx, 'Matching TCs'] = "No Matches"
187 | #             df_uploaded.at[idx, 'Match Type'] = "No Matches"
188 | #             continue
189 |         
190 | #         # Apply filters for all the Debits and all the Credits
191 | #         exact_matches = filter_tc_tool(df_tc_lookup, drs=drs, crs=crs)
192 |         
193 | #         # If there are exact matches
194 | #         if not exact_matches.empty:
195 | #             df_uploaded.at[idx, 'Matching TCs'] = ", ".join(exact_matches.index.to_list())
196 | #             df_uploaded.at[idx, 'Match Type'] = "Exact Matches"
197 | #         else:
198 | #             # If no exact matches, then perform truncate search
199 | #             close_matches = truncate_search(df_tc_lookup, drs, crs)
200 | #             if not close_matches.empty:
201 | #                 df_uploaded.at[idx, 'Matching TCs'] = ", ".join(close_matches.index.to_list())
202 | #                 df_uploaded.at[idx, 'Match Type'] = "Close Matches"
203 |     
204 | #     # Store the analysis results in session state
205 | #     st.session_state['analyzed_data'] = df_uploaded
206 |     
207 | #     return df_uploaded
208 | 
209 | # The function has been adjusted to consider the advanced cancellation of debits and credits to the same SGL.
210 | 
211 | 
212 | def potential_concern(row, df_tc_lookup):
213 |     matched_tcs = str(row['Matching TCs']).split(', ')
214 |     for tc in matched_tcs:
215 |         if tc in df_tc_lookup.index:
216 |             tc_row = df_tc_lookup.loc[tc]
217 |             budgetary_drs = tc_row['Budgetary_Debits']
218 |             budgetary_crs = tc_row['Budgetary_Credits']
219 |             proprietary_drs = tc_row['Proprietary_Debits']
220 |             proprietary_crs = tc_row['Proprietary_Credits']
221 | 
222 |             # Check if the TC has both budgetary and proprietary debits/credits
223 |             if (budgetary_drs or budgetary_crs) and (proprietary_drs or proprietary_crs):
224 |                 # Check if uploaded data is missing budgetary or proprietary transactions
225 |                 if any([isinstance(row[f'Debit{i}'], str) and len(row[f'Debit{i}']) > 0 and row[f'Debit{i}'][0] == '4' for i in range(1, 4)]) or \
226 |                    any([isinstance(row[f'Credit{i}'], str) and len(row[f'Credit{i}']) > 0 and row[f'Credit{i}'][0] == '4' for i in range(1, 4)]):
227 |                    return 'Potentially missing Proprietary'
228 |                 else:
229 |                    return 'Potentially missing Budgetary'
230 |     return ''
231 | 
232 | 
233 | 
234 | def process_and_analyze_uploaded_csv_with_categories(uploaded_file, df_tc_lookup, categories_selected):
235 |     """
236 |     Process and analyze the uploaded CSV considering selected TC categories and flagging potential concerns.
237 |     """
238 |     if 'analyzed_data' in st.session_state:
239 |         return st.session_state['analyzed_data']
240 |     
241 |     df_uploaded = pd.read_csv(uploaded_file)
242 |     
243 |     for col in df_uploaded.columns:
244 |         if col.startswith(('Debit', 'Credit')):
245 |             df_uploaded[col] = df_uploaded[col].apply(lambda x: str(x)[:6] if pd.notnull(x) else '')
246 |     
247 |     df_uploaded['Matching TCs'] = None
248 |     df_uploaded['Match Type'] = None
249 |     # df_uploaded['Potential Concern'] = None
250 |     
251 |     for idx, row in df_uploaded.iterrows():
252 |         drs = [row['Debit1'], row['Debit2'], row['Debit3']]
253 |         crs = [row['Credit1'], row['Credit2'], row['Credit3']]
254 |         drs = [dr for dr in drs if dr is not None]
255 |         crs = [cr for cr in crs if cr is not None]
256 |         
257 |         common_sglas = set(drs) & set(crs)
258 | 
259 |         for sgl in common_sglas:
260 |             if sgl in drs and sgl in crs:
261 |                 if len(drs) > len(crs):
262 |                     drs.remove(sgl)
263 |                 else:
264 |                     crs.remove(sgl)
265 |         # for sgl in common_sglas:
266 |         #     dr_count = drs.count(sgl)
267 |         #     cr_count = crs.count(sgl)
268 |         #     cancel_count = min(dr_count, cr_count)
269 |         #     for _ in range(cancel_count):
270 |         #         drs.remove(sgl)
271 |         #         crs.remove(sgl)
272 |         
273 |         exact_matches = filter_tc_tool(df_tc_lookup[df_tc_lookup.index.str.startswith(tuple(categories_selected))], drs=drs, crs=crs)
274 |         
275 |         if not exact_matches.empty:
276 |             df_uploaded.at[idx, 'Matching TCs'] = ", ".join(exact_matches.index.to_list())
277 |             df_uploaded.at[idx, 'Match Type'] = "Exact Matches"
278 |             
279 |             # # Check for potential concern
280 |             # for tc in exact_matches.index:
281 |             #     bd_exists = any(exact_matches.loc[tc, col] for col in ['Budgetary_Debits', 'Budgetary_Credits'])
282 |             #     pr_exists = any(exact_matches.loc[tc, col] for col in ['Proprietary_Debits', 'Proprietary_Credits'])
283 |             #     if (bd_exists and not pr_exists) or (pr_exists and not bd_exists):
284 |             #         df_uploaded.at[idx, 'Potential Concern'] = 'Yes'
285 |             #         break
286 |         else:
287 |             close_matches = truncate_search(df_tc_lookup[df_tc_lookup.index.str.startswith(tuple(categories_selected))], drs, crs)
288 |             if not close_matches.empty:
289 |                 df_uploaded.at[idx, 'Matching TCs'] = ", ".join(close_matches.index.to_list())
290 |                 df_uploaded.at[idx, 'Match Type'] = "Close Matches"
291 |     
292 |     st.session_state['analyzed_data'] = df_uploaded
293 |     return df_uploaded
294 | 
295 | 
296 | # def process_and_analyze_uploaded_csv_with_session_state(uploaded_file, df_tc_lookup):
297 | #     """
298 | #     Process and analyze the uploaded CSV using Streamlit's session state to avoid unnecessary re-analysis.
299 | #     """
300 | #     # Check if the analysis has already been done and stored in session state
301 | #     if 'analyzed_data' in st.session_state:
302 | #         return st.session_state['analyzed_data']
303 |     
304 | #     # If not, perform the analysis
305 | #     df_uploaded = process_and_analyze_uploaded_csv(uploaded_file, df_tc_lookup)
306 |     
307 | #     # Store the analysis results in session state
308 | #     st.session_state['analyzed_data'] = df_uploaded
309 |     
310 | #     return df_uploaded
311 | 
312 | 
313 | 
314 | 
315 | def generate_excel_from_analysis(df_results, df_tc_lookup):
316 |     """
317 |     Generate an Excel file based on the analysis results.
318 |     """
319 |     towrite = io.BytesIO()
320 |     
321 |     with pd.ExcelWriter(towrite, engine='xlsxwriter') as writer:
322 |         # Main tab: Resulting Analysis
323 |         df_results.to_excel(writer, sheet_name='Analysis Results', index=False)
324 |         
325 |         # Second tab: Filtered Trans Codes
326 |         # Extract unique TCs from the results and filter the TC lookup table
327 |         unique_tcs = set(tc for tcs in df_results['Matching TCs'].dropna() for tc in tcs.split(", "))
328 |         df_filtered_tcs = df_tc_lookup[df_tc_lookup.index.isin(unique_tcs)]
329 |         df_filtered_tcs.to_excel(writer, sheet_name='Filtered Trans Codes', index=True)
330 |     
331 |     towrite.seek(0)
332 |     return towrite
333 | 
334 | def streamlit_app():
335 |     st.title("Fed TC Lookup Tool")
336 | 
337 |     tab1, tab2 = st.tabs(['TC Analysis', 'File Analysis'])
338 | 
339 |     with tab1:
340 | 
341 |         with st.sidebar:
342 | 
343 | 
344 |             # Dropdown for fiscal year selection
345 |             fiscal_years = get_fiscal_years_from_filenames()
346 |             selected_fy = st.selectbox("Select Fiscal Year", fiscal_years)
347 | 
348 |             # Loading data for the selected fiscal year
349 |             df = load_tc_lookup_tool(selected_fy)
350 | 
351 |             
352 |             # User input
353 |             col1, col2 = st.columns(2)
354 |             with col1:
355 |                 drs = [st.text_input(f"Debit {i+1}", value='') for i in range(3)]
356 |             with col2:
357 |                 crs = [st.text_input(f"Credit {i+1}", value='') for i in range(3)]
358 |         
359 |             keyword = st.text_input("Keyword/Phrase Filter", value='')
360 | 
361 |                 # Filter data based on user input
362 |             exact_matches = filter_tc_tool(df, drs=drs, crs=crs)
363 |             if keyword:
364 |                 exact_matches = keyword_filter(exact_matches, keyword)
365 |                 
366 |             st.session_state['exact_matches'] = exact_matches
367 | 
368 | 
369 |             # Check for close matches if no exact matches are found
370 |             if exact_matches.empty:
371 | 
372 |                 # Check for close matches using truncation-based search
373 |                 close_matches = truncate_search(df, drs, crs)
374 |                 if keyword:
375 |                     close_matches = keyword_filter(close_matches, keyword)
376 | 
377 |                 st.session_state['close_matches'] = close_matches
378 | 
379 |             if 'exact_matches' not in st.session_state:
380 |                 st.session_state['exact_matches'] = pd.DataFrame()
381 |             exact_matches = st.session_state['exact_matches']
382 | 
383 |             if 'close_matches' not in st.session_state:
384 |                 st.session_state['close_matches'] = pd.DataFrame()
385 |             close_matches = st.session_state['close_matches']
386 | 
387 |             if not exact_matches.empty:
388 |                 display_summary_table(exact_matches, 'Filtered Results Summary')
389 |             elif not close_matches.empty:
390 |                 display_summary_table(close_matches, 'Filtered Results Summary')
391 |             # else:
392 |             #     display_summary_table(df, 'Filtered Results Summary')
393 |         
394 | 
395 | 
396 | 
397 |         # Display exact matches
398 |         st.subheader("Exact Matches")
399 | 
400 | 
401 |         # Link to the source PDF file
402 |         if selected_fy == 2024:
403 |             st.write("[FY2024 Transactions updated September 2023](https://raw.githubusercontent.com/danshorstein/tc_lookup/main/p2sec3_transactions_2024.pdf)")
404 |         elif selected_fy == 2023:
405 |             st.write("[FY2023 Transactions updated September 2023](https://raw.githubusercontent.com/danshorstein/tc_lookup/main/p1sec3_transactions_2023.pdf)")
406 | 
407 | 
408 |         # Download as Excel functionality
409 |         try:
410 |             towrite = io.BytesIO()
411 |             
412 |             # Create an Excel writer object
413 |             with pd.ExcelWriter(towrite, engine='xlsxwriter') as writer:
414 |                 # Sheet 1: Filtering Criteria and Summary Table
415 |                 criteria_data = {
416 |                     'Criteria': ['Debits', 'Debits', 'Debits', 'Credits', 'Credits', 'Credits', 'Keyword', 'Fiscal Year'],
417 |                     'Values': [drs[0], drs[1], drs[2], crs[0], crs[1], crs[2], keyword, selected_fy]
418 |                 }
419 |                 criteria_df = pd.DataFrame(criteria_data)
420 |                 criteria_df.to_excel(writer, sheet_name='Criteria & Summary', startrow=0, startcol=0, index=False)
421 |                 
422 |                 # Get the summary table as a DataFrame
423 |                 if not exact_matches.empty:
424 |                     summary_df = display_summary_table(exact_matches, 'Filtered Results Summary', return_df=True)
425 |                 elif not close_matches.empty:
426 |                     summary_df = display_summary_table(close_matches, 'Filtered Results Summary', return_df=True)
427 |                 else:
428 |                     summary_df = display_summary_table(df, 'Filtered Results Summary', return_df=True)
429 |                 
430 |                 # Add the summary table below the filtering criteria
431 |                 summary_df.to_excel(writer, sheet_name='Criteria & Summary', startrow=len(criteria_df) + 2)
432 |                 
433 |                 # Sheet 2: Resulting Trans Codes
434 |                 if not exact_matches.empty:
435 |                     exact_matches.to_excel(writer, sheet_name='Trans Codes', index=True, header=True)
436 |                 else:
437 |                     close_matches.to_excel(writer, sheet_name='Trans Codes', index=True, header=True)
438 |             
439 |             towrite.seek(0)
440 |             st.download_button(
441 |                 label="Download Matches as Excel",
442 |                 data=towrite,
443 |                 file_name=f"matches_fy{selected_fy}.xlsx",
444 |                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
445 |             )
446 | 
447 |         except Exception as e:
448 |             st.write(f'EXCEPTION! {e}')
449 | 
450 | 
451 |         
452 |         if not exact_matches.empty:
453 |             st.dataframe(exact_matches, 
454 |                         column_config=col_config,
455 |                         height=600)
456 | 
457 |         else:
458 |             st.write("No exact matches found.")
459 |             if not close_matches.empty:
460 |                 st.subheader("Close Matches")
461 |                 st.dataframe(close_matches, 
462 |                         column_config=col_config,
463 |                         height=600)
464 | 
465 |     with tab2:
466 |         st.header("Upload CSV for Analysis")
467 |         uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
468 |         
469 |         # Create the template DataFrame
470 |         template_df = pd.DataFrame(columns=["ID", "Debit1", "Debit2", "Debit3", "Credit1", "Credit2", "Credit3"])
471 | 
472 |         # Convert the DataFrame to a CSV in-memory object
473 |         csv_data = template_df.to_csv(index=False)
474 | 
475 |         # Offer the CSV for download
476 |         st.download_button(
477 |             label="Download CSV Template",
478 |             data=csv_data,
479 |             file_name="upload_template.csv",
480 |             mime="text/csv",
481 |         )
482 | 
483 |         # Transaction Code Categories for selection
484 |         tc_categories = {
485 |             'A': 'A. Funding',
486 |             'B': 'B. Disb and Pbls',
487 |             'C': 'C. Coll and Recvs',
488 |             'D': 'D. Adj/Write-offs/Reclass',
489 |             'E': 'E. Accr/Nonbudg Transfers',
490 |             'F': 'F. Yearend',
491 |             'G': 'G. Memo Entries',
492 |             'H': 'H. Specialized Entries'
493 |         }
494 |         
495 |         selected_values = st.multiselect('Select Trans Code Categories', list(tc_categories.values()), default=list(tc_categories.values()))
496 | 
497 |         categories_selected = [key for key, value in tc_categories.items() if value in selected_values]
498 | 
499 |         # If there's a change in the selected categories, clear the analysis results
500 |         if 'selected_categories' not in st.session_state or set(st.session_state['selected_categories']) != set(categories_selected):
501 |             st.session_state['selected_categories'] = categories_selected
502 |             if 'analyzed_data' in st.session_state:
503 |                 del st.session_state['analyzed_data']
504 | 
505 |         if uploaded_file:
506 |             # Generate a hash of the uploaded file to uniquely identify it
507 |             file_hash = hash(uploaded_file.getvalue())
508 |             
509 |             # Check if the file is different from the previously uploaded file
510 |             if 'uploaded_file_hash' not in st.session_state or st.session_state['uploaded_file_hash'] != file_hash:
511 |                 # Update the session state with the new file's hash
512 |                 st.session_state['uploaded_file_hash'] = file_hash
513 |                 
514 |                 # Clear the analysis results from the session state
515 |                 if 'analyzed_data' in st.session_state:
516 |                     del st.session_state['analyzed_data']
517 |                     
518 |             fiscal_years = get_fiscal_years_from_filenames()
519 |             selected_fy = fiscal_years[0]
520 |             df_tc_lookup = load_tc_lookup_tool(selected_fy)
521 |             df_results = process_and_analyze_uploaded_csv_with_categories(uploaded_file, df_tc_lookup, categories_selected)
522 |             df_results['Potential Concern'] = df_results.apply(lambda row: potential_concern(row, df_tc_lookup), axis=1)
523 |             
524 |             st.subheader("Analysis Results")
525 |             st.write(df_results)
526 |             
527 |             excel_data = generate_excel_from_analysis(df_results, df_tc_lookup)
528 |             st.download_button(
529 |                 label="Download Analysis as Excel",
530 |                 data=excel_data,
531 |                 file_name="analysis_results.xlsx",
532 |                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
533 |             )
534 | 
535 | 
536 | 
537 | streamlit_app()
538 | 


--------------------------------------------------------------------------------