├── Code
    └── KT Generator
    │   ├── CarbonSnippets.py
    │   ├── CodeParser.py
    │   ├── CreateVideo.py
    │   ├── DIDVideoGenerator.py
    │   ├── ResponseGenerator.py
    │   ├── config.py
    │   └── main.py
├── KT Generator (hackathon).pptx
└── README.md


/Code/KT Generator/CarbonSnippets.py:
--------------------------------------------------------------------------------
 1 | import carbon
 2 | import asyncio
 3 | import os
 4 | 
 5 | 
 6 | def generate_carbon_snippets(code_list, save_path):
 7 |     async def generate_snippet(code, path):
 8 |         cb = carbon.Carbon()  # Create a Carbon instance
 9 |         opts = carbon.CarbonOptions(
10 |             code=code,
11 |             show_window_controls=False,
12 |             language="python",
13 |             theme="vscode",
14 |             background_color=(171, 184, 195, 0),
15 |         )  # Set the options for the image
16 |         image = await cb.generate(opts)  # Generate the image
17 |         await image.save(path)  # Save the image in png format
18 | 
19 |     for index, code in enumerate(code_list):
20 |         path = os.path.join(save_path, f"image_{index}.png")
21 |         asyncio.run(generate_snippet(code, path))
22 | 


--------------------------------------------------------------------------------
/Code/KT Generator/CodeParser.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | 
 3 | class code_parser:
 4 | 
 5 |     def extract_classes_from_code(self, code: str):
 6 |         parsed_code = ast.parse(code)
 7 |         classes = []
 8 |         for node in parsed_code.body:
 9 |             if isinstance(node, ast.ClassDef):
10 |                 class_info = {
11 |                     "class_name": node.name,
12 |                     "docstring": ast.get_docstring(node),
13 |                     "init_method": None,
14 |                     "methods": [],
15 |                 }
16 | 
17 |                 for class_node in node.body:
18 |                     if isinstance(class_node, ast.FunctionDef):
19 |                         if class_node.name == "__init__":
20 |                             init_method_code = ast.get_source_segment(code, class_node)
21 |                             class_info["init_method"] = init_method_code
22 |                         else:
23 |                             method_code = ast.get_source_segment(code, class_node)
24 |                             class_info["methods"].append(method_code)
25 |                 classes.append(class_info)
26 | 
27 |         output = []
28 |         for class_info in classes:
29 |             class_info["docstring"] = (
30 |                 "" if class_info["docstring"] is None else class_info["docstring"]
31 |             )
32 |             class_information = f"Class {class_info['class_name']} \n {class_info['docstring']} \n {class_info['init_method']}"
33 |             output.append(class_information)
34 |             for method in class_info["methods"]:
35 |                 output.append(method)
36 | 
37 |         return output
38 | 
39 |     def extract_elements(self, source: str):
40 |         node = ast.parse(source)
41 | 
42 |         def is_at_module_level(n):
43 |             for parent in ast.walk(n):
44 |                 if isinstance(parent, (ast.FunctionDef, ast.ClassDef, ast.With)):
45 |                     return False
46 |             return True
47 | 
48 |         elements = []
49 | 
50 |         imports_block = ""
51 | 
52 |         for n in ast.walk(node):
53 |             if isinstance(n, (ast.Import, ast.ImportFrom)):
54 |                 if is_at_module_level(n):
55 |                     start_line = n.lineno - 1
56 |                     end_line = n.lineno
57 |                     imports_block += "".join(source.splitlines(True)[start_line:end_line])
58 |             elif isinstance(n, ast.FunctionDef):
59 |                 if is_at_module_level(n):  # Ensure the function is not within a class
60 |                     start_line = n.lineno - 1
61 |                     end_line = max(
62 |                         (x.lineno for x in ast.walk(n) if hasattr(x, "lineno")), default=n.lineno
63 |                     )
64 |                     elements.append("".join(source.splitlines(True)[start_line:end_line]))
65 |             elif isinstance(n, ast.ClassDef):
66 |                 start_line = n.lineno - 1
67 |                 end_line = max(
68 |                     (x.lineno for x in ast.walk(n) if hasattr(x, "lineno")), default=n.lineno
69 |                 )
70 |                 class_code = "".join(source.splitlines(True)[start_line:end_line])
71 |                 if (end_line - start_line) > 10:
72 |                     classes_functions = self.extract_classes_from_code(class_code)
73 |                     elements.extend(classes_functions)
74 |                 else:
75 |                     elements.append(class_code)
76 |             # Add more handlers if you need to extract more types of nodes
77 | 
78 |         # Prepend the imports block if there were any imports
79 |         if imports_block:
80 |             elements.insert(0, imports_block)
81 | 
82 |         return elements
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     # Provide the code directly as a string or read from a file
87 |     # If you are reading from a file:
88 |     with open("test_code_google_calender.py", "r") as f:
89 |         source = f.read()
90 | 
91 |     extracted_elements = code_parser().extract_elements(source)
92 | 


--------------------------------------------------------------------------------
/Code/KT Generator/CreateVideo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from moviepy.editor import *
 3 | import os
 4 | 
 5 | 
 6 | def circular_mask(width, height):
 7 |     center_x = width // 2
 8 |     center_y = height // 2
 9 |     radius = min(center_x, center_y)
10 | 
11 |     mask = np.zeros((height, width), dtype=bool)
12 |     y, x = np.ogrid[:height, :width]
13 |     mask = (x - center_x) ** 2 + (y - center_y) ** 2 <= radius**2
14 |     return mask
15 | 
16 | 
17 | def create_circular_video(video, video_path):
18 |     # Apply the circular mask to the video frames
19 |     masked_frames = [
20 |         frame * circular_mask(video.w, video.h)[:, :, np.newaxis] for frame in video.iter_frames()
21 |     ]
22 | 
23 |     # Convert the masked frames into a VideoClip
24 |     video_with_mask = ImageSequenceClip(masked_frames, fps=video.fps)
25 | 
26 |     # Load the audio from the original video
27 |     audio = AudioFileClip(video_path)
28 | 
29 |     # Combine the masked video with the original audio
30 |     video_with_mask = video_with_mask.with_audio(audio)
31 |     return video_with_mask
32 | 
33 | 
34 | def get_chunk_clip(video_path, image_path):
35 |     video = VideoFileClip(video_path)
36 |     image = ImageClip(image_path)
37 | 
38 |     video = video.resize(0.7)
39 | 
40 |     # Set the duration of the image clip to match the video duration
41 |     image = image.with_duration(video.duration)
42 | 
43 |     # Convert the masked frames into a VideoClip
44 |     video_with_mask = create_circular_video(video, video_path)
45 | 
46 |     # Create a clips array with the video (with circular mask) on the left and the image on the right
47 |     final_clip = clips_array([[image, video_with_mask]])
48 |     # final_clip.fps = video.fps
49 |     return final_clip
50 | 
51 | 
52 | def stitch_video(save_path, video_paths, image_paths):
53 |     summary_video_path = os.path.join(save_path, "chunk_summaries.mp4")
54 |     summary_clip = VideoFileClip(summary_video_path)
55 | 
56 |     summary_clip_circle = create_circular_video(summary_clip, summary_video_path)
57 | 
58 |     final_clip = concatenate_videoclips(
59 |         [summary_clip_circle] + [get_chunk_clip(v, i) for v, i in zip(video_paths, image_paths)],
60 |         method="compose",
61 |     )
62 | 
63 |     # Write the final video to a file
64 |     output_file = os.path.join(save_path, "video_snippet_concat_summary.mp4")
65 |     final_clip.write_videofile(output_file, codec="libx264", audio_codec="aac")
66 | 
67 |     print("Video and image stitching with circular mask complete.")
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     save_path = "./kt_gen_jerry_response"
72 |     videos = [os.path.join(save_path, f"chunk_{i}.mp4") for i in range(14)]
73 |     images = [os.path.join(save_path, f"image_{i}.png") for i in range(14)]
74 |     stitch_video(save_path, videos, images)
75 | 


--------------------------------------------------------------------------------
/Code/KT Generator/DIDVideoGenerator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | import time
 4 | 
 5 | class DIDVideoGeneration:
 6 | 
 7 |     def __init__(self, source_url, did_authorization_key):
 8 |         self.source_url = source_url
 9 |         self.BASE_URL = "https://api.d-id.com/talks"
10 |         self.HEADERS = {
11 |         "accept": "application/json",
12 |         "content-type": "application/json",
13 |         # NOTE: Avoid hardcoding sensitive information. Ideally, this should be loaded securely
14 |         "authorization": did_authorization_key
15 |     }
16 | 
17 |     def create_talk(self, text):
18 |         payload = {
19 |             "script": {
20 |                 "type": "text",
21 |                 "subtitles": "false",
22 |                 "provider": {
23 |                     "type": "microsoft",
24 |                     "voice_id": "Guy"
25 |                 },
26 |                 "ssml": "false",
27 |                 "input": text
28 |             },
29 |             "config": {
30 |                 "fluent": "false",
31 |                 "pad_audio": "0.0"
32 |             },
33 |             "source_url": self.source_url
34 |         }
35 |         response = requests.post(self.BASE_URL, json=payload, headers=self.HEADERS)
36 |         return json.loads(response.text)["id"]
37 | 
38 |     def get_talk(self, talk_id):
39 |         response = requests.get(f"{self.BASE_URL}/{talk_id}", headers=self.HEADERS)
40 |         return json.loads(response.text)["result_url"]
41 | 
42 |     def download_video(self, result_url, folder_name, output_file_name):
43 |         response = requests.get(result_url)
44 |         response.raise_for_status()
45 |         with open(f"{folder_name}/{output_file_name}", "wb") as file:
46 |             file.write(response.content)
47 | 
48 |     def process_chunk(self, chunk, i, folder_name):
49 |         talk_id = self.create_talk(chunk)
50 |         time.sleep(60)  # Wait for processing
51 |         result_url = self.get_talk(talk_id)
52 |         self.download_video(result_url, folder_name, f"chunk_{i}.mp4")


--------------------------------------------------------------------------------
/Code/KT Generator/ResponseGenerator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | from llama_index import ListIndex
  5 | from llama_index import ServiceContext
  6 | from llama_index.llms import OpenAI
  7 | from llama_index.llms.palm import PaLM
  8 | from llama_index.response_synthesizers import get_response_synthesizer
  9 | from llama_index.schema import NodeRelationship
 10 | from llama_index.schema import RelatedNodeInfo
 11 | from llama_index.schema import TextNode
 12 | import openai
 13 | 
 14 | class ServiceConfiguration:
 15 |     def __init__(self, api_key, model_name):
 16 |         if model_name == "PaLM":
 17 |             self.llm = PaLM(api_key=api_key)
 18 |         else:
 19 |             openai.api_key = api_key
 20 |             self.llm = OpenAI(model=model_name, temperature=0, max_tokens=512)
 21 | 
 22 |     def get_service_context(self):
 23 |         return ServiceContext.from_defaults(llm=self.llm)
 24 | class TextNodeManager:
 25 |     @staticmethod
 26 |     def get_nodes(texts):
 27 |         nodes = [TextNode(text=text, id_=str(idx)) for idx, text in enumerate(texts, start=1)]
 28 |         TextNodeManager._set_relationships(nodes)
 29 |         return nodes
 30 | 
 31 |     @staticmethod
 32 |     def _set_relationships(nodes):
 33 |         for idx, node in enumerate(nodes):
 34 |             if idx > 0:
 35 |                 node.relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(
 36 |                     node_id=nodes[idx - 1].node_id
 37 |                 )
 38 |             if idx < len(nodes) - 1:
 39 |                 node.relationships[NodeRelationship.NEXT] = RelatedNodeInfo(
 40 |                     node_id=nodes[idx + 1].node_id
 41 |                 )
 42 | 
 43 |         return nodes
 44 | 
 45 | 
 46 | class ResponseParser:
 47 |     PATTERN = r"Response \d+: \n(.*?)(?:\n---------------------|$)"
 48 | 
 49 |     @staticmethod
 50 |     def parse(response):
 51 |         return [resp.strip() for resp in re.findall(ResponseParser.PATTERN, response, re.DOTALL)]
 52 | 
 53 | 
 54 | class PromptManager:
 55 |     def __init__(self):
 56 |         self.short_line_description_prompt = (
 57 |             "Give a simple one-line description of what the code does?"
 58 |         )
 59 |         self.explanation_prompt = (
 60 |             "Give an explanation in 40 words maximum for the given code base. "
 61 |             "Don't include any code in your explanation. If the code is about import statements, "
 62 |             "give an overall explanation for import statements."
 63 |         )
 64 |         self.summary_prompt = "Give short summary for given codebase."
 65 | 
 66 |     def get_short_summaries_prompt(self):
 67 |         return self.short_line_description_prompt
 68 | 
 69 |     def get_explanation_prompt(self):
 70 |         return self.explanation_prompt
 71 | 
 72 |     def get_summary_prompt(self):
 73 |         return self.summary_prompt
 74 | 
 75 | 
 76 | class QueryHandler:
 77 |     def __init__(self, nodes, service_context):
 78 |         self.index = ListIndex(nodes, service_context=service_context)
 79 | 
 80 |         self.prompt_manager = PromptManager()
 81 | 
 82 |     def get_response(self, prompt="short_summaries"):
 83 |         query = ""
 84 |         response_mode = ""
 85 |         if prompt == "short_summaries":
 86 |             query = self.prompt_manager.get_short_summaries_prompt()
 87 |             response_mode = "accumulate"
 88 |         elif prompt == "explaination":
 89 |             query = self.prompt_manager.get_explanation_prompt()
 90 |             response_mode = "accumulate"
 91 |         elif prompt == "summary":
 92 |             query = self.prompt_manager.get_summary_prompt()
 93 |             response_mode = "tree_summarize"
 94 | 
 95 |         response_synthesizer = get_response_synthesizer(response_mode=response_mode)
 96 |         query_engine = self.index.as_query_engine(response_synthesizer=response_synthesizer)
 97 |         return query_engine.query(query)
 98 | 
 99 |     @staticmethod
100 |     def modify_texts(original_texts, short_summaries):
101 |         new_texts = [original_texts[0]]
102 |         for i in range(1, len(original_texts)):
103 |             new_text = f"The previous code has the following explanation: \n {short_summaries[i-1]}. \n Use this explanation only if required to explain the following code. \n {original_texts[i]}"
104 |             new_texts.append(new_text)
105 |         return new_texts
106 | 


--------------------------------------------------------------------------------
/Code/KT Generator/config.py:
--------------------------------------------------------------------------------
1 | model_api_key = "<LLM_API_KEY>" # PaLM model API key
2 | model_name = "PaLM" # change it to any openai model by updating api key and name.
3 | save_path = "./kt_gen" # path to save final KT videos
4 | avatar_image_url = "<AVATAR_IMAGE_URL>" # hosted image url to generate videos
5 | test_code = "<PATH TO PYTHON CODE FILE TO BE TESTED>" # Code for which KT Video needs to be generated
6 | did_authorization_key = "<DID AUTHORIZATION KEY>" # D-ID Authorization key


--------------------------------------------------------------------------------
/Code/KT Generator/main.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from CarbonSnippets import *
 3 | from CodeParser import *
 4 | from CreateVideo import *
 5 | from DIDVideoGenerator import *
 6 | from ResponseGenerator import *
 7 | from config import *
 8 | import openai
 9 | 
10 | # Split the code using parser
11 | with open(test_code, "r") as f:
12 |     source = f.read()
13 | 
14 | codeparser = code_parser()
15 | extracted_elements = codeparser.extract_elements(source)
16 | 
17 | # %%
18 | # Generate carbon snippets
19 | generate_carbon_snippets(extracted_elements, save_path)
20 | 
21 | # %%
22 | # Generate Explainations and Summaries
23 | service_context_manager = ServiceConfiguration(model_api_key, model_name)
24 | service_context = service_context_manager.get_service_context()
25 | text_node_manager = TextNodeManager()
26 | response_parse_manager = ResponseParser()
27 | 
28 | # Generate short summary
29 | nodes = text_node_manager.get_nodes(extracted_elements)
30 | 
31 | query_handler = QueryHandler(nodes, service_context)
32 | short_summary_response = query_handler.get_response("short_summaries")
33 | short_summaries = response_parse_manager.parse(short_summary_response.response)
34 | summary = query_handler.get_response("summary").response
35 | 
36 | # Generate explainations
37 | new_texts = query_handler.modify_texts(extracted_elements, short_summaries)
38 | new_nodes = text_node_manager.get_nodes(new_texts)
39 | 
40 | query_handler = QueryHandler(new_nodes, service_context)
41 | explaination_response = query_handler.get_response("explaination")
42 | explaination_summaries = response_parse_manager.parse(explaination_response.response)
43 | 
44 | # %%
45 | # Generate video
46 | video_processor = DIDVideoGeneration(source_url=avatar_image_url, did_authorization_key=did_authorization_key)
47 | 
48 | video_processor.process_chunk(summary, "summaries", save_path)
49 | for index, chunk in enumerate(explaination_summaries):
50 |     video_processor.process_chunk(chunk, index, save_path)
51 | 
52 | # %%
53 | # Stitch videos and images together
54 | 
55 | video_paths = [os.path.join(save_path, f"chunk_{i}.mp4") for i in range(len(extracted_elements))]
56 | image_paths = [os.path.join(save_path, f"image_{i}.png") for i in range(len(extracted_elements))]
57 | stitch_video(save_path, video_paths, image_paths)
58 | 


--------------------------------------------------------------------------------
/KT Generator (hackathon).pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ravi03071991/KT_Generator/be53f49bfaf739ecab5385a900d2091d1f2177ff/KT Generator (hackathon).pptx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # KT_Generator
 2 | 
 3 | Repository of the code base for KT Generation process that we worked at Google Cloud, Searce and LifeSight GenAI Hackathon.
 4 | 
 5 | ## Repo structure
 6 | 
 7 | - Code
 8 | - Presentation
 9 | - Readme.md
10 | 
11 | ### Code
12 | 
13 | This folder contains the code base for the KT Generation process.
14 | 
15 | - KT Generator
16 |     - `CodeParser.py`: This file contains the code to parse the code base and chunk it into logical code blocks (chunks).
17 |     - `CarbonSnippets.py`: This file generates the `carbon.now` snippets for the code blocks.
18 |     - `ResponseGenerator.py`: This file generates the explaination and the code summary using Llamaindex and PaLM LLM.
19 |     - `DIDVideoGenerator.py`: This file generates the DID video avatar using the code explainations for all the chunks.
20 |     - `CreateVideo.py`: This file stitches the final video using the videos and the code snippets and the summary.
21 |     - `main.py`: This file is the main file that runs the entire process.
22 |     - `config.py`: This file contains the necessary api keys, model names and other variable details to run main file.
23 |     
24 | ### Usage
25 | 
26 | - Update details in `config.py` file and run `main.py` file to generate KT Video for your file.
27 | 
28 | ```bash
29 | python main.py
30 | ```
31 | 


--------------------------------------------------------------------------------