├── Version.xml ├── Templates ├── Version.xml ├── Dotnet │ └── UDF │ │ └── HelloFabric │ │ ├── Deploy.zip │ │ ├── SourceCode.zip │ │ ├── FabricFunctions.cs │ │ └── functions.metadata └── Python │ └── UDF │ └── HelloFabric │ ├── Deploy.zip │ ├── SourceCode.zip │ ├── functions.metadata │ ├── function_app.py │ └── .github │ └── copilot-instructions.md ├── images ├── add-function-in-VS-Code.png ├── view-code-samples-in-VS-Code.png └── sample-code-snippet-functions-fabric-portal.png ├── PYTHON ├── DataManipulation │ ├── mask_credit_card.py │ ├── sentiment_analysis.py │ ├── transform_data_with_numpy.py │ ├── manipulate_data_with_pandas.py │ └── vectorize_strings.py ├── UDFDataTypes │ ├── raise_userthrownerror.py │ └── use_userdatafunctioncontext.py ├── pandas │ ├── series-example.py │ └── dataframe-example.py ├── SQLDB │ ├── read_from_sql_db.py │ ├── write_many_rows_to_sql_db.py │ └── write_one_row_to_sql_db.py ├── Lakehouse │ ├── query_data_from_tables.py │ ├── read_csv_file_from_lakehouse.py │ ├── read_csv_from_lakehouse_async.py │ ├── write_csv_file_in_lakehouse.py │ └── read_parquet_from_lakehouse.py ├── Warehouse │ ├── query_data_from_warehouse.py │ └── export_warehouse_data_to_lakehouse.py ├── VariableLibrary │ ├── get_variables_from_library.py │ └── chat_completion_with_azure_openai.py ├── CosmosDB │ ├── read_item.py │ ├── create_item.py │ ├── query_items.py │ ├── update_item.py │ └── vector_query_items.py ├── samples-llms.txt └── index.json ├── LICENSE ├── SECURITY.md ├── .github └── instructions │ └── generate-python-samples-llms-txt.instructions.md └── README.md /Version.xml: -------------------------------------------------------------------------------- 1 | 2 | 2025.9.8.2 3 | -------------------------------------------------------------------------------- /Templates/Version.xml: -------------------------------------------------------------------------------- 1 | 2 | 2025.9.25.1 3 | -------------------------------------------------------------------------------- /images/add-function-in-VS-Code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/images/add-function-in-VS-Code.png -------------------------------------------------------------------------------- /images/view-code-samples-in-VS-Code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/images/view-code-samples-in-VS-Code.png -------------------------------------------------------------------------------- /Templates/Dotnet/UDF/HelloFabric/Deploy.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/Templates/Dotnet/UDF/HelloFabric/Deploy.zip -------------------------------------------------------------------------------- /Templates/Python/UDF/HelloFabric/Deploy.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/Templates/Python/UDF/HelloFabric/Deploy.zip -------------------------------------------------------------------------------- /Templates/Dotnet/UDF/HelloFabric/SourceCode.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/Templates/Dotnet/UDF/HelloFabric/SourceCode.zip -------------------------------------------------------------------------------- /Templates/Python/UDF/HelloFabric/SourceCode.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/Templates/Python/UDF/HelloFabric/SourceCode.zip -------------------------------------------------------------------------------- /images/sample-code-snippet-functions-fabric-portal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/HEAD/images/sample-code-snippet-functions-fabric-portal.png -------------------------------------------------------------------------------- /Templates/Python/UDF/HelloFabric/functions.metadata: -------------------------------------------------------------------------------- 1 | [{"name": "hello_fabric", "scriptFile": "function_app.py", "bindings": [{"name": "req", "direction": "In", "type": "httpTrigger", "methods": ["post"], "route": "hello_fabric", "authLevel": "Anonymous"}], "fabricProperties": {"fabricFunctionReturnType": "str", "fabricFunctionParameters": [{"name": "name", "dataType": "str"}]}}] -------------------------------------------------------------------------------- /Templates/Python/UDF/HelloFabric/function_app.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import fabric.functions as fn 3 | import logging 4 | 5 | udf = fn.UserDataFunctions() 6 | 7 | @udf.function() 8 | def hello_fabric(name: str) -> str: 9 | logging.info('Python UDF trigger function processed a request.') 10 | 11 | return f"Welcome to Fabric Functions, {name}, at {datetime.datetime.now()}!" 12 | -------------------------------------------------------------------------------- /Templates/Dotnet/UDF/HelloFabric/FabricFunctions.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Azure.Functions.Worker; 2 | using Microsoft.Extensions.Logging; 3 | 4 | namespace HelloFabric 5 | { 6 | public class FabricFunctions 7 | { 8 | private readonly ILogger _logger; 9 | 10 | public FabricFunctions(ILoggerFactory loggerFactory) 11 | { 12 | _logger = loggerFactory.CreateLogger(); 13 | } 14 | 15 | [Function(nameof(HelloFabric))] 16 | public string HelloFabric(string name) 17 | { 18 | _logger.LogInformation("C# Fabric data function is called."); 19 | 20 | return $"Welcome to Fabric Functions, {name}, at {DateTime.Now}!"; 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /PYTHON/DataManipulation/mask_credit_card.py: -------------------------------------------------------------------------------- 1 | 2 | @udf.function() 3 | def mask_credit_card(cardnumber: int) -> str: 4 | ''' 5 | Description: Mask credit card number showing only the last 4 digits. 6 | 7 | Args: 8 | - cardnumber (int): Credit card number to be masked 9 | 10 | Returns: str: Masked credit card number with asterisks except last 4 digits 11 | ''' 12 | # Convert the card number to a string 13 | numberstr = str(cardnumber) 14 | 15 | # Check if the card number is valid 16 | if not numberstr.isdigit() or not (13 <= len(numberstr) <= 19): 17 | raise ValueError("Invalid credit card number") 18 | 19 | # Mask all but the last four digits 20 | masked_number = '*' * (len(numberstr) - 4) + numberstr[-4:] 21 | 22 | return str(masked_number) 23 | -------------------------------------------------------------------------------- /PYTHON/DataManipulation/sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | 2 | from textblob import TextBlob 3 | 4 | @udf.function() 5 | def analyze_sentiment(text: str) -> str: 6 | ''' 7 | Description: Analyze sentiment of input text using TextBlob and classify as Happy, Sad, or Neutral. 8 | 9 | Args: 10 | - text (str): Input text string to analyze for sentiment 11 | 12 | Returns: str: Formatted message with text and sentiment classification (Happy/Sad/Neutral) 13 | ''' 14 | 15 | sentimentscore= TextBlob(text).sentiment.polarity 16 | sentiment= "N/A" 17 | # Classify sentiment based on polarity value 18 | if sentimentscore > 0.1: 19 | sentiment= "Happy" 20 | elif sentimentscore < -0.1: 21 | sentiment="Sad" 22 | else: 23 | sentiment="Neutral" 24 | 25 | return f"Sentiment for {text} is {sentiment}" 26 | -------------------------------------------------------------------------------- /PYTHON/UDFDataTypes/raise_userthrownerror.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | @udf.function() 4 | def raise_userthrownerror(age: int)-> str: 5 | ''' 6 | Description: Validate user age and return welcome message or raise error for underage users. 7 | 8 | Args: 9 | age (int): User's age to validate. 10 | 11 | Returns: 12 | str: Welcome message with current timestamp. 13 | 14 | Raises: 15 | fn.UserThrownError: If age is less than 18. 16 | 17 | Example: 18 | raise_userthrownerror(25) returns "Welcome to Fabric Functions at 2025-07-01 10:30:00!" 19 | raise_userthrownerror(16) raises UserThrownError 20 | ''' 21 | if age < 18: 22 | raise fn.UserThrownError("You must be 18 years or older to use this service.", {"age": age}) 23 | 24 | return f"Welcome to Fabric Functions at {datetime.datetime.now()}!" 25 | -------------------------------------------------------------------------------- /PYTHON/DataManipulation/transform_data_with_numpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | 4 | @udf.function() 5 | def transform_data(data: list) -> dict: 6 | ''' 7 | Description: Transform 1D list to normalized numpy array and calculate mean. 8 | 9 | Args: 10 | - data (list): Input 1D list of numeric values 11 | Example: [1, 2, 3, 4, 5] 12 | 13 | Returns: dict: Dictionary containing normalized data array and mean value 14 | ''' 15 | # Convert the 1D list to a numpy array 16 | np_data = np.array(data) 17 | 18 | # Normalize the data (scale values to range [0, 1]) 19 | min_vals = np.min(np_data, axis=0) 20 | max_vals = np.max(np_data, axis=0) 21 | normalized_data = (np_data - min_vals) / (max_vals - min_vals) 22 | # Calculate the mean of each column 23 | column_means = np.mean(np_data, axis=0) 24 | norm = np.array(normalized_data) 25 | 26 | return { "NormalizedData": norm.tolist(), "Mean": float(column_means) } 27 | -------------------------------------------------------------------------------- /Templates/Dotnet/UDF/HelloFabric/functions.metadata: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "HelloFabric", 4 | "scriptFile": "HelloFabric.dll", 5 | "entryPoint": "HelloFabric.FabricFunctions.HelloFabric", 6 | "language": "dotnet-isolated", 7 | "properties": { 8 | "IsCodeless": false 9 | }, 10 | "bindings": [ 11 | { 12 | "name": "data", 13 | "direction": "In", 14 | "authLevel": "Anonymous", 15 | "fabricBinding": true, 16 | "type": "httpTrigger", 17 | "methods": [ 18 | "post" 19 | ] 20 | }, 21 | { 22 | "name": "$return", 23 | "direction": "Out", 24 | "type": "http", 25 | "fabricBinding": true 26 | } 27 | ], 28 | "fabricProperties": { 29 | "fabricFunctionParameters": [ 30 | { 31 | "dataType": "String", 32 | "name": "name" 33 | } 34 | ], 35 | "fabricFunctionReturnType": "String" 36 | } 37 | } 38 | ] -------------------------------------------------------------------------------- /PYTHON/UDFDataTypes/use_userdatafunctioncontext.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | @udf.context(argName="udfContext") 4 | @udf.function() 5 | def get_function_invocation_details(udfContext: fn.UserDataFunctionContext) -> str: 6 | ''' 7 | Description: Get function invocation details including user info and invocation ID. 8 | 9 | Args: 10 | udfContext (fn.UserDataFunctionContext): Context containing invocation metadata. 11 | 12 | Returns: 13 | str: Welcome message with username, timestamp, and invocation ID. 14 | 15 | Example: 16 | Returns "Welcome to Fabric Functions, user@example.com, at 2025-07-01 10:30:00! Invocation ID: abc123" 17 | ''' 18 | invocation_id = udfContext.invocation_id 19 | invoking_users_username = udfContext.executing_user['PreferredUsername'] 20 | # Other executing_user keys include: 'Oid', 'TenantId' 21 | 22 | return f"Welcome to Fabric Functions, {invoking_users_username}, at {datetime.datetime.now()}! Invocation ID: {invocation_id}" 23 | -------------------------------------------------------------------------------- /PYTHON/pandas/series-example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | @udf.function() 4 | def summarize_age_distribution(ageSeries: pd.Series) -> str: 5 | """ 6 | Summarizes the distribution of ages in a Pandas Series. 7 | 8 | Args: 9 | ageSeries (pd.Series): Series containing age values. 10 | Example: [23, 45, 31, 35, 29, 41, 38, 27] 11 | Returns: 12 | str: Summary string describing the distribution. 13 | """ 14 | try: 15 | if ageSeries.empty: 16 | return "No age data provided." 17 | summary = ( 18 | f"Age Summary:\n" 19 | f"- Count: {ageSeries.count()}\n" 20 | f"- Mean: {ageSeries.mean():.2f}\n" 21 | f"- Median: {ageSeries.median():.2f}\n" 22 | f"- Min: {ageSeries.min()}\n" 23 | f"- Max: {ageSeries.max()}\n" 24 | f"- Std Dev: {ageSeries.std():.2f}" 25 | ) 26 | return summary 27 | 28 | except Exception as e: 29 | return f"Error processing age data: {str(e)}" 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Microsoft 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PYTHON/DataManipulation/manipulate_data_with_pandas.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | 4 | @udf.function() 5 | def manipulate_data(data: list) -> list: 6 | ''' 7 | Description: Manipulate data using pandas to group by age categories and calculate mean ages. 8 | 9 | Args: 10 | - data (list): List of dictionaries containing Name, Age, and Gender fields 11 | Example: [{"Name": "John", "Age": 22, "Gender": "male"}, {"Name": "Jane", "Age": 17, "Gender": "female"}] 12 | 13 | Returns: list: JSON records with AgeGroup (Adult/Minor) and mean Age values 14 | ''' 15 | # Convert the data dictionary to a DataFrame 16 | df = pd.DataFrame(data) 17 | # Perform basic data manipulation 18 | # Example: Add a new column 'AgeGroup' based on the 'Age' column 19 | df['AgeGroup'] = df['Age'].apply(lambda x: 'Adult' if x >= 18 else 'Minor') 20 | 21 | # Example: Filter rows where 'Age' is greater than 30 22 | # df_filtered = df[df["Age"] > 30] 23 | 24 | # Example: Group by 'AgeGroup' and calculate the mean age 25 | df_grouped = df.groupby("AgeGroup")["Age"].mean().reset_index() 26 | 27 | return df_grouped.to_json(orient='records') 28 | -------------------------------------------------------------------------------- /PYTHON/DataManipulation/vectorize_strings.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_extraction.text import CountVectorizer 2 | 3 | @udf.function() 4 | def vectorize_string(text: str) -> str: 5 | ''' 6 | Description: Vectorize a string of text using CountVectorizer and return vectorized representation. 7 | 8 | Args: 9 | - text (str): Input text string to be vectorized 10 | 11 | Returns: str: Formatted string containing vectorized text array and feature names 12 | ''' 13 | try: 14 | # Initialize the CountVectorizer 15 | vectorizer = CountVectorizer() 16 | 17 | # Fit and transform the input text to vectorize it 18 | vectorized_text = vectorizer.fit_transform([text]) 19 | vectors = ''.join(str(x) for x in vectorized_text.toarray()) 20 | featurenames= " ,".join(str(x) for x in vectorizer.get_feature_names_out()) 21 | print("Vectorized text:\n", vectorized_text.toarray()) 22 | print("Feature names:\n",vectorizer.get_feature_names_out()) 23 | return "vectorized_text: " + vectors + "\nfeature_names: " + featurenames 24 | except Exception as e: 25 | return "An error occurred during vectorization: " + str(e) 26 | -------------------------------------------------------------------------------- /PYTHON/SQLDB/read_from_sql_db.py: -------------------------------------------------------------------------------- 1 | # Select 'Manage connections' and add a connection to a Fabric SQL Database 2 | # Replace the alias "" with your connection alias. 3 | @udf.connection(argName="sqlDB",alias="") 4 | @udf.function() 5 | def read_from_sql_db(sqlDB: fn.FabricSqlConnection)-> list: 6 | ''' 7 | Description: Read employee data from SQL database using sample query. 8 | 9 | Args: 10 | sqlDB (fn.FabricSqlConnection): Fabric SQL database connection. 11 | 12 | Returns: 13 | list: Employee records as tuples with name and department ID. 14 | 15 | Example: 16 | Returns [('John Smith', 31), ('Kayla Jones', 33)] 17 | ''' 18 | 19 | # Replace with the query you want to run 20 | query = "SELECT * FROM (VALUES ('John Smith', 31), ('Kayla Jones', 33)) AS Employee(EmpName, DepID);" 21 | 22 | # Establish a connection to the SQL database 23 | connection = sqlDB.connect() 24 | cursor = connection.cursor() 25 | 26 | # Execute the query 27 | cursor.execute(query) 28 | 29 | # Fetch all results 30 | results = [] 31 | for row in cursor.fetchall(): 32 | results.append(row) 33 | 34 | # Close the connection 35 | cursor.close() 36 | connection.close() 37 | 38 | return results 39 | 40 | 41 | -------------------------------------------------------------------------------- /PYTHON/Lakehouse/query_data_from_tables.py: -------------------------------------------------------------------------------- 1 | 2 | import datetime 3 | # Select 'Manage connections' and add a connection to a Lakehouse. 4 | # Replace the alias "" with your connection alias. 5 | @udf.connection(argName="myLakehouse", alias="") 6 | @udf.function() 7 | def query_data_from_tables(myLakehouse: fn.FabricLakehouseClient) -> list: 8 | ''' 9 | Description: Query employee data from lakehouse tables and return as JSON objects. 10 | 11 | Args: 12 | - myLakehouse (fn.FabricLakehouseClient): Fabric lakehouse connection 13 | 14 | Returns: list: Employee records as dictionaries with EmpName and DepID fields 15 | ''' 16 | # Connect to the Lakehouse SQL Endpoint 17 | connection = myLakehouse.connectToSql() 18 | 19 | # Use connection to execute a query 20 | cursor = connection.cursor() 21 | cursor.execute(f"SELECT * FROM (VALUES ('John Smith', 31) , ('Kayla Jones', 33)) AS Employee(EmpName, DepID);") 22 | 23 | rows = [x for x in cursor] 24 | columnNames = [x[0] for x in cursor.description] 25 | 26 | # Turn the rows into a json object 27 | values = [] 28 | for row in rows: 29 | item = {} 30 | for prop, val in zip(columnNames, row): 31 | if isinstance(val, (datetime.date, datetime.datetime)): 32 | val = val.isoformat() 33 | item[prop] = val 34 | values.append(item) 35 | 36 | # Close the connection 37 | cursor.close() 38 | connection.close() 39 | 40 | return values 41 | 42 | -------------------------------------------------------------------------------- /PYTHON/Lakehouse/read_csv_file_from_lakehouse.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | # Select 'Manage connections' and add a connection to a Lakehouse which has a CSV file 3 | # Replace the alias "" with your connection alias. 4 | @udf.connection(argName="myLakehouse", alias="") 5 | @udf.function() 6 | def read_csv_from_lakehouse(myLakehouse: fn.FabricLakehouseClient, csvFileName: str) -> str: 7 | ''' 8 | Description: Read CSV file from lakehouse and return data as formatted string. 9 | 10 | Args: 11 | myLakehouse (fn.FabricLakehouseClient): Fabric lakehouse connection. 12 | csvFileName (str): CSV file name in the lakehouse Files folder. 13 | 14 | Returns: 15 | str: Confirmation message with formatted CSV data rows. 16 | ''' 17 | # Connect to the Lakehouse 18 | connection = myLakehouse.connectToFiles() 19 | 20 | # Download the CSV file from the Lakehouse 21 | csvFile = connection.get_file_client(csvFileName) 22 | downloadFile=csvFile.download_file() 23 | csvData = downloadFile.readall() 24 | 25 | # Read the CSV data into a pandas DataFrame 26 | from io import StringIO 27 | df = pd.read_csv(StringIO(csvData.decode('utf-8'))) 28 | 29 | # Display the DataFrame 30 | result="" 31 | for index, row in df.iterrows(): 32 | result=result + "["+ (",".join([str(item) for item in row]))+"]" 33 | 34 | # Close the connection 35 | csvFile.close() 36 | connection.close() 37 | 38 | return f"CSV file read successfully.{result}" 39 | -------------------------------------------------------------------------------- /PYTHON/Warehouse/query_data_from_warehouse.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | #Select 'Manage connections' to connect to a Warehouse 3 | #Replace the alias "" with your connection alias. 4 | @udf.connection(argName="myWarehouse", alias="") 5 | @udf.function() 6 | def query_data_from_warehouse(myWarehouse: fn.FabricSqlConnection) -> list: 7 | ''' 8 | Description: Query employee data from a Fabric warehouse and return as JSON objects. 9 | 10 | Args: 11 | myWarehouse (fn.FabricSqlConnection): Fabric warehouse connection. 12 | 13 | Returns: 14 | list: Employee records as dictionaries with EmpName and DepID fields. 15 | 16 | Example: 17 | Returns [{'EmpName': 'John Smith', 'DepID': 31}, {'EmpName': 'Kayla Jones', 'DepID': 33}] 18 | ''' 19 | whSqlConnection = myWarehouse.connect() 20 | # Use connection to execute a query 21 | cursor = whSqlConnection.cursor() 22 | cursor.execute(f"SELECT * FROM (VALUES ('John Smith', 31) , ('Kayla Jones', 33)) AS Employee(EmpName, DepID);") 23 | 24 | rows = [x for x in cursor] 25 | columnNames = [x[0] for x in cursor.description] 26 | # Turn the rows into a json object 27 | values = [] 28 | for row in rows: 29 | item = {} 30 | for prop, val in zip(columnNames, row): 31 | if isinstance(val, (datetime.date, datetime.datetime)): 32 | val = val.isoformat() 33 | item[prop] = val 34 | values.append(item) 35 | 36 | cursor.close() 37 | whSqlConnection.close() 38 | 39 | return values 40 | -------------------------------------------------------------------------------- /PYTHON/Lakehouse/read_csv_from_lakehouse_async.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | # Select 'Manage connections' and add a connection to a Lakehouse which has a CSV file 4 | # Replace the alias "" with your connection alias. 5 | @udf.connection(argName="myLakehouse", alias="") 6 | @udf.function() 7 | async def read_csv_from_lakehouse(myLakehouse: fn.FabricLakehouseClient, csvFileName: str) -> str: 8 | ''' 9 | Description: Read CSV file from lakehouse and return data as formatted string. 10 | 11 | Args: 12 | myLakehouse (fn.FabricLakehouseClient): Fabric lakehouse connection. 13 | csvFileName (str): CSV file name in the lakehouse Files folder. 14 | 15 | Returns: 16 | str: Confirmation message with formatted CSV data rows. 17 | ''' 18 | 19 | # Connect to the Lakehouse 20 | connection = myLakehouse.connectToFilesAsync() 21 | 22 | # Download the CSV file from the Lakehouse 23 | csvFile = connection.get_file_client(csvFileName) 24 | 25 | downloadFile = await csvFile.download_file() 26 | csvData = await downloadFile.readall() 27 | 28 | # Read the CSV data into a pandas DataFrame 29 | from io import StringIO 30 | df = pd.read_csv(StringIO(csvData.decode('utf-8'))) 31 | 32 | # Display the DataFrame 33 | result="" 34 | for index, row in df.iterrows(): 35 | result=result + "["+ (",".join([str(item) for item in row]))+"]" 36 | 37 | # Close the connection 38 | csvFile.close() 39 | connection.close() 40 | 41 | return f"CSV file read successfully.{result}" 42 | -------------------------------------------------------------------------------- /PYTHON/VariableLibrary/get_variables_from_library.py: -------------------------------------------------------------------------------- 1 | # Select 'Manage connections' and add a connection to a Variable Library 2 | # Replace the alias "" with your connection alias. 3 | from datetime import datetime 4 | @udf.connection(argName="varLib", alias="") 5 | @udf.function() 6 | def standardize_date(rawDate: str, varLib: fn.FabricVariablesClient) -> str: 7 | ''' 8 | Description: Standardize date format using configuration from Variable Library before data ingestion. 9 | 10 | Args: 11 | rawDate (str): Raw date string in desired format. 12 | varLib (fn.FabricVariablesClient): Fabric Variable Library connection. 13 | 14 | Returns: 15 | str: Standardized date in the format specified in Variable Library. 16 | 17 | Example: 18 | Assumes Variable Library contains: DATE_FORMAT = "%Y-%m-%d" 19 | standardize_date("15/10/2025", varLib) returns "Standardized Date: 2025-10-15" 20 | ''' 21 | # Retrieve all variables from the Variable Library 22 | variables = varLib.getVariables() 23 | 24 | # Get desired format from environment or use default 25 | date_format = variables["DATE_FORMAT"] 26 | 27 | try: 28 | # Assume input is DD/MM/YYYY 29 | parsed_date = datetime.strptime(rawDate, "%d/%m/%Y") 30 | # Convert to standardized format from Variable Library 31 | standardized_date = parsed_date.strftime(date_format) 32 | except ValueError: 33 | standardized_date = "Invalid Date" 34 | 35 | return f"Standardized Date: {standardized_date}" 36 | 37 | -------------------------------------------------------------------------------- /PYTHON/Lakehouse/write_csv_file_in_lakehouse.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime 3 | # Select 'Manage connections' and add a connection to a Lakehouse. 4 | #Replace the alias "" with your connection alias. 5 | @udf.connection(argName="myLakehouse", alias="") 6 | @udf.function() 7 | def write_csv_file_in_lakehouse(myLakehouse: fn.FabricLakehouseClient, employees: list)-> str: 8 | ''' 9 | Description: Write employee data to lakehouse as timestamped CSV file using pandas. 10 | 11 | Args: 12 | myLakehouse (fn.FabricLakehouseClient): Fabric lakehouse connection. 13 | employees (list): List of employee records as [ID, Name, DeptID] arrays. 14 | 15 | Returns: 16 | str: Confirmation message with filename and viewing instructions. 17 | 18 | Example: 19 | employees = [[1,"John Smith", 31], [2,"Kayla Jones", 33]] 20 | Creates "Employees1672531200.csv" in lakehouse 21 | ''' 22 | 23 | csvFileName = "Employees" + str(round(datetime.datetime.now().timestamp())) + ".csv" 24 | 25 | # Convert the data to a DataFrame 26 | df = pd.DataFrame(employees, columns=['ID','EmpName', 'DepID']) 27 | # Write the DataFrame to a CSV file 28 | csv_string = df.to_csv(index=False) 29 | 30 | # Upload the CSV file to the Lakehouse 31 | connection = myLakehouse.connectToFiles() 32 | csvFile = connection.get_file_client(csvFileName) 33 | 34 | csvFile.upload_data(csv_string, overwrite=True) 35 | 36 | csvFile.close() 37 | connection.close() 38 | return f"File {csvFileName} was written to the Lakehouse. Open the Lakehouse in https://app.fabric.microsoft.com to view the files" 39 | -------------------------------------------------------------------------------- /PYTHON/SQLDB/write_many_rows_to_sql_db.py: -------------------------------------------------------------------------------- 1 | # Select 'Manage connections' and add a connection to a Fabric SQL Database 2 | # Replace the alias "" with your connection alias. 3 | @udf.connection(argName="sqlDB",alias="") 4 | @udf.function() 5 | def write_many_to_sql_db(sqlDB: fn.FabricSqlConnection) -> str: 6 | ''' 7 | Description: Insert multiple employee records into SQL database, creating table if needed. 8 | 9 | Args: 10 | sqlDB (fn.FabricSqlConnection): Fabric SQL database connection. 11 | 12 | Returns: 13 | str: Confirmation message about table creation and data insertion. 14 | 15 | Example: 16 | Inserts sample employee records: John Smith, Kayla Jones, Edward Harris 17 | ''' 18 | 19 | # Replace with the data you want to insert 20 | data = [(1,"John Smith", 31), (2,"Kayla Jones", 33),(3,"Edward Harris", 33)] 21 | 22 | # Establish a connection to the SQL database 23 | connection = sqlDB.connect() 24 | cursor = connection.cursor() 25 | 26 | # Create the table if it doesn't exist 27 | create_table_query = ''' 28 | IF OBJECT_ID(N'dbo.Employee', N'U') IS NULL 29 | CREATE TABLE dbo.Employee ( 30 | EmpID INT PRIMARY KEY, 31 | EmpName nvarchar(50), 32 | DepID INT 33 | ); 34 | ''' 35 | cursor.execute(create_table_query) 36 | 37 | # Insert data into the table 38 | insert_query = "INSERT INTO Employee (EmpID, EmpName, DepID) VALUES (?, ?, ?);" 39 | cursor.executemany(insert_query, data) 40 | 41 | # Commit the transaction 42 | connection.commit() 43 | 44 | # Close the connection 45 | cursor.close() 46 | connection.close() 47 | return "Employee table was created (if necessary) and data was added to this table" 48 | -------------------------------------------------------------------------------- /PYTHON/SQLDB/write_one_row_to_sql_db.py: -------------------------------------------------------------------------------- 1 | # Select 'Manage connections' and add a connection to a Fabric SQL Database 2 | # Replace the alias "" with your connection alias. 3 | @udf.connection(argName="sqlDB",alias="") 4 | @udf.function() 5 | def write_one_to_sql_db(sqlDB: fn.FabricSqlConnection, employeeId: int, employeeName: str, deptId: int) -> str: 6 | ''' 7 | Description: Insert one employee record into SQL database, creating table if needed. 8 | 9 | Args: 10 | sqlDB (fn.FabricSqlConnection): Fabric SQL database connection. 11 | employeeId (int): Employee ID (primary key). 12 | employeeName (str): Employee name. 13 | deptId (int): Department ID. 14 | 15 | Returns: 16 | str: Confirmation message about table creation and data insertion. 17 | 18 | ''' 19 | 20 | # Replace with the data you want to insert 21 | data = (employeeId, employeeName, deptId) 22 | 23 | # Establish a connection to the SQL database 24 | connection = sqlDB.connect() 25 | cursor = connection.cursor() 26 | 27 | # Create the table if it doesn't exist 28 | create_table_query = ''' 29 | IF OBJECT_ID(N'dbo.Employee', N'U') IS NULL 30 | CREATE TABLE dbo.Employee ( 31 | EmpID INT PRIMARY KEY, 32 | EmpName nvarchar(50), 33 | DepID INT 34 | ); 35 | ''' 36 | cursor.execute(create_table_query) 37 | 38 | # Insert data into the table 39 | insert_query = "INSERT INTO Employee (EmpID, EmpName, DepID) VALUES (?, ?, ?);" 40 | cursor.execute(insert_query, data) 41 | 42 | # Commit the transaction 43 | connection.commit() 44 | 45 | # Close the connection 46 | cursor.close() 47 | connection.close() 48 | return "Employee table was created (if necessary) and data was added to this table" 49 | -------------------------------------------------------------------------------- /PYTHON/Lakehouse/read_parquet_from_lakehouse.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | from io import BytesIO 4 | import pyarrow.parquet as pq 5 | 6 | #Select 'Manage connections' and add a connection to a Lakehouse which has a parquet file 7 | # Replace the alias "" with your connection alias. 8 | @udf.connection(argName="myLakehouse", alias="") 9 | @udf.function() 10 | def read_parquet_from_lakehouse(myLakehouse: fn.FabricLakehouseClient, parquetFileName: str) -> str: 11 | ''' 12 | Description: Read parquet file from lakehouse and return data as formatted string. 13 | 14 | Args: 15 | myLakehouse (fn.FabricLakehouseClient): Fabric lakehouse connection. 16 | parquetFileName (str): Parquet file name or path relative to Files folder. 17 | 18 | Returns: 19 | str: Confirmation message with formatted parquet data rows. 20 | 21 | Example: 22 | parquetFileName = "data.parquet" or "Folder1/data.parquet" 23 | Returns formatted rows from the parquet file 24 | ''' 25 | 26 | # Connect to the Lakehouse 27 | connection = myLakehouse.connectToFiles() 28 | 29 | # Download the Parquet file from the Lakehouse 30 | # If relative path is "Files/myfile.parquet , then parquetFileName = "myfile.parquet" 31 | # If relative path is "Files/Folder1/myfile.parquet , then parquetFileName = "Folder1/myfile.parquet" 32 | parquetFile = connection.get_file_client(parquetFileName) 33 | downloadFile = parquetFile.download_file() 34 | parquetData = downloadFile.readall() 35 | 36 | # Read the Parquet data into a pandas DataFrame 37 | df = pd.read_parquet(BytesIO(parquetData)) 38 | 39 | # Display the DataFrame 40 | rows = [] 41 | for index, row in df.iterrows(): 42 | rows.append("[" + (",".join([str(item) for item in row])) + "]") 43 | result = "".join(rows) 44 | 45 | # Close the connection 46 | parquetFile.close() 47 | connection.close() 48 | 49 | return f"Parquet file read successfully.{result}" 50 | -------------------------------------------------------------------------------- /PYTHON/pandas/dataframe-example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import logging 3 | 4 | @udf.function() 5 | def filter_customers_by_country_df(df: pd.DataFrame, countryname: str) -> pd.DataFrame: 6 | """ 7 | Filters customer and order information by country name from a Pandas DataFrame. 8 | 9 | Args: 10 | df (pd.DataFrame): DataFrame containing customer and order information 11 | Example: [ {"CustomerID": 1, "Name": "Alice", "Country": "USA", "OrderID": 101}, {"CustomerID": 2, "Name": "Bob", "Country": "Canada", "OrderID": 102}, {"CustomerID": 3, "Name": "Charlie", "Country": "USA", "OrderID": 103}, {"CustomerID": 4, "Name": "Diana", "Country": "Mexico", "OrderID": 104} ] 12 | countryname (str): Name of the country to filter by 13 | Example: USA 14 | 15 | Returns: 16 | pd.DataFrame: Filtered DataFrame of customers from the specified country 17 | """ 18 | logging.info(f'Filtering customers by country: {countryname}') 19 | 20 | try: 21 | # Check if the DataFrame is empty 22 | if df.empty: 23 | logging.warning('No data provided') 24 | return pd.DataFrame() 25 | 26 | # Check if 'Country' column exists (case-insensitive) 27 | country_column = None 28 | for col in df.columns: 29 | if col.lower() in ['country', 'country_name', 'countryname']: 30 | country_column = col 31 | break 32 | 33 | if country_column is None: 34 | logging.error('Country column not found in data') 35 | return pd.DataFrame() 36 | 37 | # Filter by country (case-insensitive) 38 | filtered_df = df[df[country_column].str.lower() == countryname.lower()] 39 | 40 | logging.info(f'Found {len(filtered_df)} customers from {countryname}') 41 | return filtered_df 42 | 43 | except Exception as e: 44 | logging.error(f'Error filtering customers by country: {str(e)}') 45 | return pd.DataFrame() 46 | 47 | -------------------------------------------------------------------------------- /PYTHON/Warehouse/export_warehouse_data_to_lakehouse.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | # Select 'Manage connections' and add a connection to a Warehouse and a Lakehouse. 5 | # Replace the aliases "" and "" with your connection aliases. 6 | @udf.connection(argName="myWarehouse", alias="") 7 | @udf.connection(argName="myLakehouse", alias="") 8 | @udf.function() 9 | def export_warehouse_data_to_lakehouse(myWarehouse: fn.FabricSqlConnection, myLakehouse: fn.FabricLakehouseClient) -> dict: 10 | ''' 11 | Description: Export employee data from warehouse to lakehouse as timestamped CSV file. 12 | 13 | Args: 14 | myWarehouse (fn.FabricSqlConnection): Fabric warehouse connection. 15 | myLakehouse (fn.FabricLakehouseClient): Fabric lakehouse connection. 16 | 17 | Returns: 18 | dict: Contains confirmation message and employee data as JSON objects. 19 | 20 | Example: 21 | Creates "Employees1672531200.csv" with sample employee records. 22 | ''' 23 | 24 | whSqlConnection = myWarehouse.connect() 25 | 26 | cursor = whSqlConnection.cursor() 27 | cursor.execute(f"SELECT * FROM (VALUES ('John Smith', 31) , ('Kayla Jones', 33)) AS Employee(EmpName, DepID);") 28 | 29 | rows = [x for x in cursor] 30 | columnNames = [x[0] for x in cursor.description] 31 | csvRows = [] 32 | csvRows.append(','.join(columnNames)) 33 | 34 | # Turn the rows into comma separated values, and then upload it to Employees.csv 35 | for row in rows: 36 | csvRows.append(','.join(map(str, row))) 37 | 38 | lhFileConnection = myLakehouse.connectToFiles() 39 | csvFileName = "Employees" + str(round(datetime.datetime.now().timestamp())) + ".csv" 40 | csvFile = lhFileConnection.get_file_client(csvFileName) 41 | csvFile.upload_data('\n'.join(csvRows), overwrite=True) 42 | 43 | # Turn the rows into a json object 44 | values = [] 45 | 46 | for row in rows: 47 | item = {} 48 | for prop, val in zip(columnNames, row): 49 | if isinstance(val, (datetime.datetime, datetime.date)): 50 | val = val.isoformat() 51 | item[prop] = val 52 | values.append(item) 53 | 54 | cursor.close() 55 | whSqlConnection.close() 56 | csvFile.close() 57 | lhFileConnection.close() 58 | 59 | return {"message": "File {} is written to {} Lakehouse. You can delete it from the Lakehouse after trying this sample.".format(csvFileName, myLakehouse.alias_name), 60 | "values": values} 61 | -------------------------------------------------------------------------------- /PYTHON/CosmosDB/read_item.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | from fabric.functions.cosmosdb import get_cosmos_client 4 | from azure.cosmos import exceptions 5 | 6 | @udf.generic_connection(argName="cosmosDb", audienceType="CosmosDB") 7 | @udf.function() 8 | def get_product(cosmosDb: fn.FabricItem, categoryName: str, productId: str) -> list[dict[str, Any]]: 9 | 10 | ''' 11 | Description: 12 | Get a single document from a Cosmos DB container. 13 | 14 | The read operation on a Cosmos container takes two parameters, a partition key value and the id of the item (document). 15 | The property used as the partition key for this container is `categoryName`. An example value would be, "Computers, Laptops". 16 | For products in this container, the id value is the same as its `productId` so we will use that as the parameter name. 17 | 18 | To run this sample, create a new Cosmos artifact, then click on SampleData in Cosmos Home screen. 19 | Next go to settings (gear icon), then Connection tab, and copy the URI to COSMOS_DB_URI variable below. 20 | Copy the artifact name to DB_NAME variable below. The Sample Data will create a SampleData container. 21 | 22 | Before running this function, go Library Management and add the azure-cosmos package, version 4.14.0 or later. 23 | 24 | # Example values to use when calling this function 25 | # categoryName = "Computers, Laptops" 26 | # productId = "77be013f-4036-4311-9b5a-dab0c3d022be" 27 | 28 | Args: 29 | - cosmosDb (fn.FabricItem): The Cosmos DB connection information. 30 | - categoryName: The partition key property for this container. 31 | - productId: The productId and id are the same value for products. 32 | 33 | Returns: 34 | - list[dict[str, Any]]: JSON Object. List of dictionaries with string keys and values of Any type. 35 | ''' 36 | 37 | COSMOS_DB_URI = "{my-cosmos-artifact-uri}" 38 | DB_NAME = "{my-cosmos-artifact-name}" 39 | CONTAINER_NAME = "SampleData" 40 | 41 | try: 42 | cosmosClient = get_cosmos_client(cosmosDb, COSMOS_DB_URI) 43 | database = cosmosClient.get_database_client(DB_NAME) 44 | container = database.get_container_client(CONTAINER_NAME) 45 | 46 | # Read a single item 47 | product = container.read_item(item=productId, partition_key=categoryName) 48 | 49 | return product 50 | 51 | except exceptions.CosmosResourceNotFoundError as e: 52 | logging.error(f"Item not found in get_product: {e}") 53 | raise 54 | except exceptions as e: 55 | logging.error(f"Unexpected error in get_product: {e}") 56 | raise -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /PYTHON/CosmosDB/create_item.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | from datetime import datetime, timezone 4 | from fabric.functions.cosmosdb import get_cosmos_client 5 | from azure.cosmos import exceptions 6 | 7 | @udf.generic_connection(argName="cosmosDb", audienceType="CosmosDB") 8 | @udf.function() 9 | def insert_product(cosmosDb: fn.FabricItem) -> list[dict[str, Any]]: 10 | 11 | ''' 12 | Description: 13 | Insert a new single document item into in a container for a Cosmos DB artifact. 14 | 15 | To run this sample, create a new Cosmos artifact, then click on SampleData in Cosmos Home screen. 16 | Next go to settings (gear icon), then Connection tab, and copy the URI to COSMOS_DB_URI variable below. 17 | Copy the artifact name to DB_NAME variable below. The Sample Data will create a SampleData container. 18 | 19 | Before running this function, go Library Management and add the azure-cosmos package, version 4.14.0 or later. 20 | 21 | Args: 22 | - cosmosDb (fn.FabricItem): The Cosmos DB connection information. 23 | 24 | Returns: 25 | - list[dict[str, Any]]: JSON Object. List of dictionaries with string keys and values of Any type. 26 | ''' 27 | 28 | COSMOS_DB_URI = "{my-cosmos-artifact-uri}" 29 | DB_NAME = "{my-cosmos-artifact-name}" 30 | CONTAINER_NAME = "SampleData" 31 | 32 | try: 33 | cosmosClient = get_cosmos_client(cosmosDb, COSMOS_DB_URI) 34 | database = cosmosClient.get_database_client(DB_NAME) 35 | container = database.get_container_client(CONTAINER_NAME) 36 | 37 | # A new GUID for document id and product id 38 | productId = "8a82f850-a33b-4734-80ce-740ba16c39f1" # = str(uuid.uuid4()) 39 | 40 | # Get current date and time in ISO8601 format 41 | iso_now = datetime.now(timezone.utc).isoformat() + "Z" 42 | 43 | # Create the product document 44 | product = { 45 | "id": productId, 46 | "docType": "product", 47 | "productId": productId, 48 | "name": "UnSmart Phone", 49 | "description": "The UnSmart Phone features a SlackDragon CPU, CRT display, 8KB RAM, 4MB storage, and no camera. With big buttons and fat case, it is designed for people who only make phone calls.", 50 | "categoryName": "Devices, Smartphones", 51 | "countryOfOrigin": "China", 52 | "inventory": 279, 53 | "firstAvailable": iso_now, 54 | "currentPrice": 99.93, 55 | "priceHistory": [ 56 | { 57 | "date": iso_now, 58 | "price": 99.93 59 | } 60 | ] 61 | } 62 | 63 | return container.create_item(body=product) 64 | 65 | except exceptions.CosmosResourceExistsError as e: 66 | logging.error(f"Cosmos error in insert_product: {e}") 67 | raise 68 | except exceptions.CosmosHttpResponseError as e: 69 | logging.error(f"Cosmos error in insert_product: {e}") 70 | raise 71 | except exceptions as e: 72 | logging.error(f"Unexpected error in insert_product: {e}") 73 | raise -------------------------------------------------------------------------------- /.github/instructions/generate-python-samples-llms-txt.instructions.md: -------------------------------------------------------------------------------- 1 | **Expert Prompt for Generating Microsoft Fabric User Data Functions samples-llms.txt** 2 | 3 | You are tasked with creating a comprehensive samples-llms.txt file for the Microsoft Fabric User Data Functions Python samples repository. Follow these specific requirements: 4 | 5 | ## Format Requirements 6 | 1. **Strictly follow the llms.txt specification format:** 7 | ``` 8 | # Title 9 | 10 | > Optional description goes here 11 | 12 | Optional details go here 13 | 14 | ## Section name 15 | 16 | - [Link title](https://link_url): Optional link details 17 | 18 | ## Optional 19 | 20 | - [Link title](https://link_url) 21 | ``` 22 | 23 | ## Content Requirements 24 | 1. **Read and analyze ALL Python sample files** in the repository structure: 25 | - DataManipulation/*.py 26 | - Lakehouse/*.py 27 | - SQLDB/*.py 28 | - Warehouse/*.py 29 | - UDFDataTypes/*.py 30 | 31 | 2. **Use the attached `index.json` file as the authoritative source** for: 32 | - Section names (use the "name" field exactly) 33 | - Section descriptions (use the "description" field exactly) 34 | - Function names (use the "name" field exactly, not filenames) 35 | - Function descriptions (use the "description" field exactly) 36 | - Only include functions that exist in index.json (ignore any additional files) 37 | 38 | 3. **URL Structure Requirements:** 39 | - Use raw GitHub URLs for all links 40 | - Format: `https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/{folder}/{filename}.py` 41 | - Example: `https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/Warehouse/query_data_from_warehouse.py` 42 | 43 | ## Specific Structure 44 | 1. **Title:** "Microsoft Fabric User Data Functions - Python Samples" 45 | 2. **Description (blockquote):** Brief overview of serverless functions for Fabric platform 46 | 3. **Details paragraph:** Explain what the functions can do 47 | 4. **Sections:** Match index.json categories exactly: 48 | - Use category "name" as section header 49 | - Add category "description" as first line under each section 50 | - List functions using their display names from index.json, not filenames 51 | - Use function descriptions exactly as written in index.json 52 | 53 | ## Quality Checks 54 | - Ensure no duplicate sections or links 55 | - Verify all URLs are properly formatted 56 | - Cross-reference with actual file structure to confirm files exist 57 | - Maintain consistent formatting throughout 58 | - Follow the exact order from index.json 59 | 60 | ## Key Details from Our Context 61 | - Repository: `microsoft/fabric-user-data-functions-samples` 62 | - Branch: `main` 63 | - Directory: PYTHON 64 | - Reference format from: https://llmstxt.org/llms.txt 65 | - Must align perfectly with the provided index.json structure and content 66 | 67 | ## Output Requirements 68 | **Create a complete, production-ready samples-llms.txt file that should be saved in the PYTHON directory of this repository (fabric-user-data-functions-samples/PYTHON/samples-llms.txt). This file serves as comprehensive documentation for LLMs to understand Microsoft Fabric User Data Functions Python samples.** -------------------------------------------------------------------------------- /PYTHON/CosmosDB/query_items.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | from fabric.functions.cosmosdb import get_cosmos_client 4 | from azure.cosmos import exceptions 5 | 6 | @udf.generic_connection(argName="cosmosDb", audienceType="CosmosDB") 7 | @udf.function() 8 | def query_products(cosmosDb: fn.FabricItem, categoryName: str) -> list[dict[str, Any]]: 9 | 10 | ''' 11 | Description: 12 | Query for multiple items from a Cosmos DB container. 13 | 14 | The query operation on a Cosmos container takes three parameters, the query text, an array of parameters and a 15 | boolean indicating whether to enable cross-partition query. Since we are using the partition key, `categoryName` 16 | as a filter predicate for this query, we can set `enable_cross_partition_query=False` (this is the default). 17 | Using the partition key in the query is essential to optimizing performance and reduce Fabric Capacity Units (CUs) consumed. 18 | The `docType` property is used to distinguish between product and review documents in the container. If you want to 19 | query for one or more products and all their reviews, you can omit that parameter to get the reviews as well. 20 | 21 | # Example values to use when calling this function 22 | # categoryName = "Computers, Laptops" 23 | 24 | To run this sample, create a new Cosmos artifact, then click on SampleData in Cosmos Home screen. 25 | Next go to settings (gear icon), then Connection tab, and copy the URI to COSMOS_DB_URI variable below. 26 | Copy the artifact name to DB_NAME variable below. The Sample Data will create a SampleData container. 27 | 28 | Before running this function, go Library Management and add the azure-cosmos package, version 4.14.0 or later. 29 | 30 | Args: 31 | - cosmosDb (fn.FabricItem): The Cosmos DB connection information. 32 | - categoryName: The filter predicate for our query and the partition key property for this container. 33 | 34 | Returns: 35 | - list[dict[str, Any]]: JSON Object. List of dictionaries with string keys and values of Any type. 36 | ''' 37 | 38 | COSMOS_DB_URI = "{my-cosmos-artifact-uri}" 39 | DB_NAME = "{my-cosmos-artifact-name}" 40 | CONTAINER_NAME = "SampleData" 41 | 42 | try: 43 | cosmosClient = get_cosmos_client(cosmosDb, COSMOS_DB_URI) 44 | database = cosmosClient.get_database_client(DB_NAME) 45 | container = database.get_container_client(CONTAINER_NAME) 46 | 47 | # Use parameterized query 48 | query = """ 49 | SELECT 50 | c.categoryName, 51 | c.name, 52 | c.description, 53 | c.currentPrice, 54 | c.inventory, 55 | c.priceHistory 56 | FROM c 57 | WHERE 58 | c.categoryName = @categoryName AND 59 | c.docType = @docType 60 | ORDER BY 61 | c.price DESC 62 | """ 63 | 64 | parameters = [ 65 | {"name": "@categoryName", "value": categoryName}, 66 | {"name": "@docType", "value": 'product'} 67 | ] 68 | 69 | # Execute the query 70 | products = [p for p in container.query_items( 71 | query=query, 72 | enable_cross_partition_query=False, 73 | parameters=parameters 74 | )] 75 | 76 | return products 77 | 78 | except exceptions.CosmosHttpResponseError as e: 79 | logging.error(f"Cosmos DB query failed: {e}") 80 | raise 81 | except exceptions as e: 82 | logging.error(f"Unexpected error in search_products: {e}") 83 | raise -------------------------------------------------------------------------------- /PYTHON/CosmosDB/update_item.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | from datetime import datetime, timezone 4 | from fabric.functions.cosmosdb import get_cosmos_client 5 | from azure.cosmos import exceptions 6 | 7 | @udf.generic_connection(argName="cosmosDb", audienceType="CosmosDB") 8 | @udf.function() 9 | def update_product(cosmosDb: fn.FabricItem, categoryName: str, productId: str, newPrice: float) -> list[dict[str, Any]]: 10 | 11 | ''' 12 | Description: 13 | Read and update a single document in a Cosmos DB container. 14 | 15 | This function first reads the document using the read_item operation, updates the currentPrice property, 16 | appends a new entry to the priceHistory array, then replaces the document in the container. 17 | 18 | The read operation on a Cosmos container takes two parameters, a partition key value and the id of the item (document). 19 | `categoryName` is the partition key for this container. Example value is, "Computers, Laptops". The `productId` is also 20 | the value for the id field so we will use that as the parameter name. 21 | 22 | The update operation, takes the modified document as the body parameter and the id of the item to replace. 23 | 24 | To run this sample, create a new Cosmos artifact, then click on SampleData in Cosmos Home screen. 25 | Next go to settings (gear icon), then Connection tab, and copy the URI to COSMOS_DB_URI variable below. 26 | Copy the artifact name to DB_NAME variable below. The Sample Data will create a SampleData container. 27 | 28 | Before running this function, go Library Management and add the azure-cosmos package, version 4.14.0 or later. 29 | 30 | # Example values to use when calling this function 31 | # categoryName = "Computers, Laptops" 32 | # productId = "77be013f-4036-4311-9b5a-dab0c3d022be" 33 | # newPrice = 2899.99 34 | 35 | Args: 36 | - cosmosDb (fn.FabricItem): The Cosmos DB connection information. 37 | - categoryName: The partition key property for this container. 38 | - productId: The productId and id are the same value for products. 39 | - newPrice: The new current price to set for the product. 40 | 41 | Returns: 42 | - list[dict[str, Any]]: JSON Object. List of dictionaries with string keys and values of Any type. 43 | ''' 44 | 45 | COSMOS_DB_URI = "{my-cosmos-artifact-uri}" 46 | DB_NAME = "{my-cosmos-artifact-name}" 47 | CONTAINER_NAME = "SampleData" 48 | 49 | try: 50 | cosmosClient = get_cosmos_client(cosmosDb, COSMOS_DB_URI) 51 | database = cosmosClient.get_database_client(DB_NAME) 52 | container = database.get_container_client(CONTAINER_NAME) 53 | 54 | # Get the current product document 55 | product = container.read_item(item=productId, partition_key=categoryName) 56 | 57 | # Update the product's price 58 | product["currentPrice"] = newPrice 59 | 60 | now = datetime.now().replace(microsecond=0) 61 | current_time_iso = now.isoformat() 62 | 63 | # Append to the price history 64 | product["priceHistory"].append({ 65 | "date": current_time_iso, 66 | "price": newPrice 67 | }) 68 | 69 | # Replace the document in the container 70 | container.replace_item(item=productId, body=product) 71 | 72 | return json.dumps(product) 73 | 74 | except exceptions.CosmosResourceNotFoundError as e: 75 | logging.error(f"Item not found in update_product: {e}") 76 | raise 77 | except exceptions.CosmosHttpResponseError as e: 78 | logging.error(f"Cosmos error in update_product: {e}") 79 | raise 80 | except exceptions as e: 81 | logging.error(f"Unexpected error in update_product: {e}") 82 | raise -------------------------------------------------------------------------------- /PYTHON/VariableLibrary/chat_completion_with_azure_openai.py: -------------------------------------------------------------------------------- 1 | # Select 'Manage connections' and add a connection to a Variable Library 2 | # Replace the alias "" with your connection alias. 3 | from openai import AzureOpenAI 4 | from azure.keyvault.secrets import SecretClient 5 | 6 | @udf.generic_connection(argName="keyVaultClient", audienceType="KeyVault") 7 | @udf.connection(argName="varLib", alias="") 8 | @udf.function() 9 | def chat_request(prompt: str, keyVaultClient: fn.FabricItem, varLib: fn.FabricVariablesClient) -> str: 10 | ''' 11 | Description: Sends a chat completion request to an Azure OpenAI model using configuration values 12 | retrieved from a Fabric Variable Library and Azure Key Vault. 13 | 14 | Pre-requisites: 15 | * Create an Azure OpenAI endpoint in Azure Portal 16 | * Create an Azure Key Vault and store your Azure OpenAI API key as a secret 17 | * Grant your Fabric User Data Functions item owner's identity access to read secrets (Access policies or RBAC). Guidance: https://learn.microsoft.com/en-us/fabric/data-factory/azure-key-vault-reference-overview 18 | * Create a Variable Library in Fabric and add variables for: 19 | - KEY_VAULT_URL: Your Azure Key Vault URL (e.g., "https://your-keyvault.vault.azure.net/") 20 | - API_KEY_SECRET_NAME: Name of the secret in Key Vault containing the API key 21 | - ENDPOINT: Your Azure OpenAI endpoint URL 22 | - MODEL: Your deployed model name 23 | * Add the openai and azure-keyvault-secrets libraries to your function dependencies 24 | * Ensure fabric-user-data-functions library is using the latest version 25 | 26 | Args: 27 | prompt (str): The user input or query to be processed by the model. 28 | varLib (fn.FabricVariablesClient): A client instance to access stored variables in Variable Library 29 | for Key Vault URL, secret name, endpoint, and model name. 30 | 31 | Returns: 32 | str: The generated response from the Azure OpenAI model. 33 | 34 | Workflow: 35 | 1. Fetch Key Vault URL, secret name, endpoint, and model details from Variable Library. 36 | 2. Use the generic Key Vault connection to obtain an access token (managed by Fabric). 37 | 3. Retrieve the API key securely from Azure Key Vault using SecretClient. 38 | 4. Initialize the AzureOpenAI client with the retrieved configuration. 39 | 5. Send a chat completion request with the given prompt and system instructions. 40 | 6. Return the content of the first message in the response. 41 | 42 | Example: 43 | Assumes Variable Library contains: 44 | - KEY_VAULT_URL = "https://my-keyvault.vault.azure.net/" 45 | - API_KEY_SECRET_NAME = "openai-api-key" 46 | - ENDPOINT = "https://your-resource.openai.azure.com/" 47 | - MODEL = "gpt-4" 48 | 49 | chat_request("What is Microsoft Fabric?", varLib) returns AI-generated response 50 | ''' 51 | 52 | # Retrieve configuration from Variable Library 53 | variables = varLib.getVariables() 54 | key_vault_url = variables["KEY_VAULT_URL"] 55 | api_key_secret_name = variables["API_KEY_SECRET_NAME"] 56 | endpoint = variables["ENDPOINT"] 57 | model_name = variables["MODEL"] 58 | 59 | # Obtain a credential from the generic Key Vault connection (Fabric-managed identity) 60 | credential = keyVaultClient.get_access_token() 61 | secret_client = SecretClient(vault_url=key_vault_url, credential=credential) 62 | key = secret_client.get_secret(api_key_secret_name).value 63 | 64 | api_version = "2024-12-01-preview" 65 | 66 | client = AzureOpenAI( 67 | api_version=api_version, 68 | azure_endpoint=endpoint, 69 | api_key=key, 70 | ) 71 | 72 | response = client.chat.completions.create( 73 | messages=[ 74 | { 75 | "role": "system", 76 | "content": "You are a helpful assistant.", 77 | }, 78 | { 79 | "role": "user", 80 | "content": prompt 81 | } 82 | ], 83 | max_completion_tokens=13107, 84 | temperature=1.0, 85 | top_p=1.0, 86 | frequency_penalty=0.0, 87 | presence_penalty=0.0, 88 | model=model_name 89 | ) 90 | 91 | return (response.choices[0].message.content) 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fabric User data functions Samples 2 | You can find these sample code snippets for functions that can be used in Fabric User data functions. These functions can also be available to users in Fabric portal and VS Code. The text in the sample file will be inserted at the end of the function_app.py file. 3 | 4 | >![NOTE] These are not complete runnable samples. They are snippets that are inserted in User data function item in Fabric. 5 | 6 | 7 | **Fabric portal** 8 | 1. Open User data functions in the portal and select **Insert samples** to add your code snippet. 9 | ![Sample code snippet for Fabric User data functions in portal](images/sample-code-snippet-functions-fabric-portal.png) 10 | 11 | **Fabric User data functions in VS Code** 12 | 1. Select ~**Local folder** and open **Functions**. Select **+** to add a new function from a sample. 13 | ![Add function](images/add-function-in-VS-Code.png) 14 | 2. View all the sample categories to select a sample to add within function_app.py. 15 | ![View code samples in VS Code](images/view-code-samples-in-VS-Code.png) 16 | 17 | 18 | 19 | ## How to contribute 20 | 21 | You can contribute to more function samples here. Follow the structure and checklist below: 22 | 23 | Before submitting your sample function, ensure you complete all the following steps: 24 | 25 | ### 1. Code Preparation 26 | - Write your sample function with proper docstring following the format to provide guidance to the user what the function does and how to use it. 27 | ```python 28 | def functionname()->: 29 | ''' 30 | Description: Brief description of what the function does 31 | 32 | Args: 33 | - param1 (type): Description of parameter 34 | 35 | Returns: type: Description of return value 36 | ''' 37 | ``` 38 | - Include clear examples in your docstring 39 | - Document any special setup requirements in comments 40 | - **DO NOT** include these lines in your code (they may exist in the base function_app.py): 41 | ```python 42 | import fabric.functions as fn 43 | udf = fn.UserDataFunctions() 44 | ``` 45 | - Include necessary import statements for libraries used in your function 46 | - Use appropriate decorators `@udf.function()` before your function definition. 47 | - Use `@udf.connection()` decorator for any Fabric data source connections that your sample requires. 48 | 49 | ### 2. File Organization 50 | - Create your sample file as `.py` in the `PYTHON` folder 51 | - Choose an appropriate subfolder or create a new one if existing subfolders don't match your content: 52 | - `Warehouse/` - Functions working with Fabric warehouses 53 | - `Lakehouse/` - Functions working with Fabric lakehouses 54 | - `SQLDB/` - Functions working with SQL databases 55 | - `DataManipulation/` - Functions for data transformation and analysis 56 | - `UDFDataTypes/` - Functions demonstrating UDF SDK data types 57 | - Create new subfolder if your sample doesn't fit existing categories 58 | 59 | ### 3. Index.json Update 60 | - Update the appropriate `PYTHON/index.json` file 61 | - Add your sample entry with: 62 | - Descriptive `name` (shown in bold in QuickPick) 63 | - Clear `description` (shown at end of first line) 64 | - Optional `detail` (shown on second line) 65 | - Current `dateAdded` in ISO format (e.g., "2024-12-07T00:00:00Z") 66 | - Correct `data` path relative to PYTHON folder 67 | - Ensure JSON syntax is valid (no trailing commas, proper brackets) 68 | 69 | ### 4. Testing and Validation 70 | Test and validate your function code and share the conclusions of the test in the PR you submit. 71 | - Verify your function follows Fabric UDF patterns 72 | - Test that required libraries are commonly available or document special requirements 73 | - Ensure function handles errors appropriately 74 | 75 | ### 5. Submission 76 | - Submit a Pull Request (PR) to the repository 77 | - Include a clear description of what your sample does 78 | - Mention any new dependencies or requirements 79 | - Wait for product team review and address any feedback 80 | 81 | ### Sample code snippet example 82 | 83 | ```python 84 | 85 | import pandas as pd 86 | 87 | @udf.function() 88 | def my_sample_function(data: list) -> dict: 89 | ''' 90 | Description: Process input data and return summary statistics 91 | 92 | Args: 93 | - data (list): List of dictionaries containing numeric data 94 | 95 | Returns: dict: Summary statistics including mean and count 96 | ''' 97 | df = pd.DataFrame(data) 98 | return {"mean": df.mean().to_dict(), "count": len(df)} 99 | ``` 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /PYTHON/samples-llms.txt: -------------------------------------------------------------------------------- 1 | # Microsoft Fabric User Data Functions - Python Samples 2 | 3 | > Comprehensive collection of serverless Python functions for the Microsoft Fabric platform 4 | 5 | This repository contains sample User Data Functions (UDFs) for Microsoft Fabric, demonstrating how to create serverless functions that can process data, connect to various Fabric data sources, and perform data transformations. These functions can be invoked from KQL queries, Spark notebooks, and other Fabric services to extend data processing capabilities. 6 | 7 | ## Warehouse 8 | 9 | Sample functions to read from and write data to a warehouse in Fabric. 10 | 11 | - [Export data from warehouse into a lakehouse](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/Warehouse/export_warehouse_data_to_lakehouse.py): Use this user data function that writes data from a warehouse into a csv file and saves it in a lakehouse 12 | - [Query data from a warehouse](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/Warehouse/query_data_from_warehouse.py): Use this user data function to query data from a warehouse. 13 | 14 | ## Lakehouse 15 | 16 | Sample functions to work with tables and files within a lakehouse in Fabric. 17 | 18 | - [Write csv file into a lakehouse](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/Lakehouse/write_csv_file_in_lakehouse.py): This sample writes a CSV file into a lakehouse using pandas. 19 | - [Read csv file from lakehouse](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/Lakehouse/read_csv_file_from_lakehouse.py): This sample reads a CSV file from a lakehouse using pandas. Function takes file name as an input parameter. 20 | - [Query data from lakehouse tables](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/Lakehouse/query_data_from_tables.py): This sample reads data from a table in a lakehouse. 21 | 22 | ## SQL Database 23 | 24 | Sample functions to work with SQL database. 25 | 26 | - [Write multiple rows of data into a table in SQL database](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/SQLDB/write_many_rows_to_sql_db.py): This sample allows you to write multiple rows of data into a SQL database. 27 | - [Write one row of data into a table in SQL database](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/SQLDB/write_one_row_to_sql_db.py): This sample allows you to write one row of data into a SQL database. 28 | - [Read data from a table in SQL database](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/SQLDB/read_from_sql_db.py): This sample allows you to read data from SQL database. 29 | 30 | ## Variable Library 31 | 32 | Sample function to retrieve values and from Fabric Variable Library. 33 | 34 | - [Get variables from Variable Library](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/VariableLibrary/get_variables_from_library.py): This sample retrieves date format for data ingestion using date format configuration from Variable Library. 35 | - [Chat completion with Azure OpenAI](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/VariableLibrary/chat_completion_with_azure_openai.py): This sample sends a chat completion request to Azure OpenAI using an API key securely retrieved via a generic Key Vault connection. 36 | 37 | ## Data Manipulation 38 | 39 | Sample functions to transform data using pandas,numpy. 40 | 41 | - [Manipulate data with pandas library](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/DataManipulation/manipulate_data_with_pandas.py): This sample uses pandas to manipulate a given dataset to group people by age. 42 | - [Transform data with numpy library](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/DataManipulation/transform_data_with_numpy.py): This samples converts the input 1D list to a numpy array. The output is normalized to the range [0, 1] and we calculate the mean. 43 | 44 | ## UDF Data Types 45 | 46 | Sample functions to show usage of data types supplied within the SDK library. 47 | 48 | - [Using UserDataFunctionContext](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/UDFDataTypes/use_userdatafunctioncontext.py): This sample uses UserDataFunctionContext to get metadata about the invocation. 49 | - [Raising UserThrownError](https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/UDFDataTypes/raise_userthrownerror.py): This samples raises an error if the age supplied as a parameter is < 18. 50 | -------------------------------------------------------------------------------- /Templates/Python/UDF/HelloFabric/.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | ## Copilot Instructions — Fabric UDF 2 | 3 | Primary objective: Generate correct Microsoft Fabric User Data Functions (UDFs) in Python with strict grounding, zero fabrication, and minimal verbosity. 4 | 5 | ### Constitutional Rules (Always) 6 | - Conceptual grounding: All conceptual / architectural / policy / security / performance answers MUST be grounded ONLY in Microsoft Docs via the Microsoft Docs MCP server. If that server (`microsoft.docs.mcp`) is not available, respond with EXACTLY (no extra words): "Microsoft Docs MCP server not available. Install/enable it: https://learn.microsoft.com/en-us/training/support/mcp-get-started#configure-vs-code" 7 | - Sample grounding: For any code generation or modification, first fetch `samples-llms.txt` from the official GitHub samples index (#fetch https://raw.githubusercontent.com/microsoft/fabric-user-data-functions-samples/refs/heads/main/PYTHON/samples-llms.txt) and then fetch at least one concrete Python sample it links to. Implement strictly by adapting the closest single sample. If no sample is available, state that explicitly and do not invent APIs. 8 | - UDF essentials: Follow the mandatory UDF practices below (single-file layout, parameter casing, connections/placeholders, warnings, validation, and response schema). 9 | 10 | ### Scope & Layout 11 | - "Function" means a Fabric UDF entry point in this repo. 12 | - Single-file code: Place all executable UDF code in `function_app.py`. Only add small helpers in the same file unless a separate module is clearly necessary. 13 | - No alternate hosting/execution models unless the user explicitly requests a pivot away from Fabric UDFs. 14 | 15 | ### Sample-First Workflow (Code Tasks) 16 | 1) Fetch index: Retrieve `samples-llms.txt` from GitHub and parse it. 17 | 2) Fetch sample: Follow a #fetch link to a concrete Python sample that best matches the needed binding/decorator pattern. Selection order: 18 | - Same trigger/binding decorators; else closest semantics; else smallest working example. 19 | 3) Adapt patterns: Reuse the sample’s binding signatures, decorators, parameter naming style, logging, response envelope, and error handling. Do not combine multiple unique sample patterns unless necessary. 20 | 4) Cite: Name the sample file used (file path only). Avoid large quotations. 21 | 5) If a matching sample cannot be fetched: say so explicitly and implement the safest minimal, clearly marked "Unverified — needs doc lookup". 22 | 23 | ### Concept Questions (Docs Tasks) 24 | - Source: Microsoft Docs through active MCP only; paraphrase concisely. 25 | - If MCP missing: return only the directive line above—no additions. 26 | 27 | ### UDF Essentials 28 | - Naming: function names snake_case; parameters camelCase. 29 | - Parameter enforcement: 30 | - Public UDF parameters MUST be lowerCamelCase. Convert snake_case inputs to camelCase (`user_id` → `userId`) and proceed; add a short inline comment mapping if relevant. 31 | - Do not leave underscores in parameter names. If conflict occurs after conversion, append a numeric suffix (e.g., `userId2`). 32 | - Signature defaults: PROHIBITED. No '=' in the parameter list (covers `None`, union/optional, numeric/string literals, splat defaults). All params are required. 33 | - Validation: At start, validate required parameters for None/empty/type; return the sample’s standard error envelope or raise ValueError per sample pattern. 34 | - Logging: Use standard library logging directly (`import logging` then calls like `logging.info(...)`); Use info for start/end and key branches, warning for recoverable anomalies, error for failures; never silently swallow exceptions. 35 | - Response schema: Mirror the sample’s schema exactly (commonly `status`/`result`/`error`). Add fields only if explicitly requested. 36 | 37 | ### Dependencies, Connections, Warnings 38 | - Prefer stdlib. Add external libraries only with explicit user need or clear benefit (perf/correctness/security). Place custom wheels in `privateLibraries/` if used. 39 | - Connections: Verify configured alias names. If unknown, insert a clear placeholder such as `PLACEHOLDER_DATA_LAKE_CONN # TODO: replace with configured connection alias` and proceed. 40 | - `definition.json` (if present) outranks assumptions; align imports/bindings to it. 41 | - Warning policy: Emit a single bold warning only when introducing a new external library or adding an unverified connection alias (or changing prior connection assumptions). Do not repeat the same warning in later turns unless a new item appears. 42 | 43 | ### Guardrails 44 | - Do not invent APIs or bindings not seen in samples or docs. 45 | - Keep edits minimal and focused on the request; avoid unrelated refactors. 46 | - If uncertainty remains after samples/docs, choose the safest minimal approach and label it Unverified. 47 | 48 | ### Output Discipline 49 | - Be concise. Summarize what changed and why in a few bullets. 50 | - Reference the explicit sample by filename/path. Avoid long code pastes from samples; adapt the pattern instead. 51 | -------------------------------------------------------------------------------- /PYTHON/CosmosDB/vector_query_items.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | from fabric.functions.cosmosdb import get_cosmos_client 4 | from azure.cosmos import exceptions 5 | from openai import AzureOpenAI 6 | 7 | @udf.generic_connection(argName="cosmosDb", audienceType="CosmosDB") 8 | @udf.function() 9 | def product_vector_search(cosmosDb: fn.FabricItem, searchtext: str, similarity: float, limit: int) -> list[dict[str, Any]]: 10 | 11 | ''' 12 | Description: 13 | Vector search for multiple items from a Cosmos DB container. 14 | 15 | A vector search on a Cosmos container is a query that includes the `VectorDistance` system function. The VectorDistance 16 | function takes two parameters, the property containing the vector embeddings and the embeddings themselves. 17 | 18 | The query operation on a Cosmos container takes three parameters, the query text, an array of parameters and a 19 | boolean indicating whether to enable cross-partition query. Since we are not the partition key, `categoryName` 20 | as a filter predicate for this query, we have to set `enable_cross_partition_query=True`. 21 | 22 | The `docType` property is used to distinguish between product and review documents in the container. If you want to 23 | query for one or more products and all their reviews, you can omit that parameter to get the reviews as well. 24 | 25 | # Example values to use when calling this function 26 | # searchText = "gaming pc" 27 | # similarity = 0.824 28 | # limit = 5 29 | 30 | To run this sample, create a new Cosmos artifact, then click on SampleVectorData in Cosmos Home screen. 31 | This will create a container with vectorized data, generated with the text-embedding-ada-002 model. 32 | Next go to settings (gear icon), then Connection tab, and copy the URI to COSMOS_DB_URI variable below. 33 | Copy the artifact name to DB_NAME variable below. The Sample Data will create a SampleData container. 34 | 35 | Next, create an Azure OpenAI account and deploy the text-embedding-ada-002 model, and copy the endpoint 36 | and key to the variables below in the generate_embeddings function. Be sure to validate the API version in the 37 | AI Foundry portal when deploying the model. 38 | 39 | Before running this function, go Library Management and add the azure-cosmos package, version 4.14.0 or later. 40 | and openai package, version 2.3.0 or later. 41 | 42 | Args: 43 | - cosmosDb (fn.FabricItem): The Cosmos DB connection information. 44 | - searchtext (str): The text to generate embeddings for vector search. 45 | - similarity (float): The minimum similarity score for results. 46 | - limit (int): The maximum number of results to return. 47 | 48 | Returns: 49 | - list[dict[str, Any]]: JSON object. List of dictionaries with string keys and values of Any type. 50 | ''' 51 | 52 | COSMOS_DB_URI = "{my-cosmos-artifact-uri}" 53 | DB_NAME = "{my-cosmos-artifact-name}" 54 | CONTAINER_NAME = "SampleVectorData" 55 | 56 | try: 57 | cosmosClient = get_cosmos_client(cosmosDb, COSMOS_DB_URI) 58 | database = cosmosClient.get_database_client(DB_NAME) 59 | container = database.get_container_client(CONTAINER_NAME) 60 | 61 | # Generate embeddings for the search text 62 | embeddings = generate_embeddings(searchtext.strip()) 63 | 64 | # Cosmos query with VectorDistance to perform similarity search 65 | query = """ 66 | SELECT TOP @limit 67 | VectorDistance(c.vectors, @embeddings) AS SimilarityScore, 68 | c.productId, 69 | c.categoryName, 70 | c.name, 71 | c.description, 72 | c.currentPrice, 73 | c.inventory, 74 | c.priceHistory 75 | FROM c 76 | WHERE 77 | c.docType = @docType AND 78 | VectorDistance(c.vectors, @embeddings) >= @similarity 79 | ORDER BY 80 | VectorDistance(c.vectors, @embeddings) 81 | """ 82 | 83 | parameters = [ 84 | {"name": "@limit", "value": limit}, 85 | {"name": "@embeddings", "value": embeddings}, 86 | {"name": "@docType", "value": "product"}, 87 | {"name": "@similarity", "value": similarity} 88 | ] 89 | 90 | # Execute the query 91 | products = [p for p in container.query_items( 92 | query=query, 93 | enable_cross_partition_query=True, 94 | parameters=parameters 95 | )] 96 | 97 | # Always remove the vectors property if you accidentally project it 98 | # it is unnecessarily large and not needed in the results 99 | for p in products: 100 | p.pop('vectors', None) 101 | 102 | return products 103 | 104 | except exceptions.CosmosHttpResponseError as e: 105 | logging.error(f"Cosmos DB query failed: {e}") 106 | raise 107 | except exceptions as e: 108 | logging.error(f"Unexpected error in search_products: {e}") 109 | raise 110 | 111 | # Generate embeddings on passed in text 112 | def generate_embeddings(text: str) -> list[float]: 113 | 114 | OPENAI_URI = "{my-azure-openai-endpoint}" 115 | OPENAI_KEY = "{my-azure-openai-key}" 116 | OPENAI_API_VERSION = "2023-05-15" 117 | OPENAI_EMBEDDING_MODEL = "text-embedding-ada-002" 118 | OPENAI_EMBEDDING_DIMENSIONS = 1536 119 | 120 | try: 121 | # Initialize Azure OpenAI client 122 | OPENAI_CLIENT = AzureOpenAI( 123 | api_version=OPENAI_API_VERSION, 124 | azure_endpoint=OPENAI_URI, 125 | api_key=OPENAI_KEY 126 | ) 127 | 128 | # Create embeddings 129 | response=OPENAI_CLIENT.embeddings.create( 130 | input=text, 131 | model=OPENAI_EMBEDDING_MODEL 132 | ) 133 | # Include dimensions when using models newer than text-embedding-ada-002 134 | #dimensions=OPENAI_EMBEDDING_DIMENSIONS 135 | 136 | embeddings = response.model_dump() 137 | return embeddings['data'][0]['embedding'] 138 | 139 | except Exception as e: 140 | logging.error(f"Unexpected Error in generate_embeddings: {e}") 141 | raise -------------------------------------------------------------------------------- /PYTHON/index.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Warehouse", 4 | "description": "Sample functions to read from and write data to a warehouse in Fabric.", 5 | "data": [ 6 | { 7 | "name": "Export data from warehouse into a lakehouse", 8 | "description": "Use this user data function that writes data from a warehouse into a csv file and saves it in a lakehouse", 9 | "detail": "Add a warehouse and lakehouse connection. Then use this sample function that writes data from a warehouse into csv file and saves it in a lakehouse", 10 | "dateAdded": "2024-11-05T00:00:00Z", 11 | "data": "Warehouse/export_warehouse_data_to_lakehouse.py" 12 | }, 13 | { 14 | "name": "Query data from a warehouse", 15 | "description": "Use this user data function to query data from a warehouse.", 16 | "detail": "Add a warehouse connection to the user data function and then use the function to query data from a warehouse.", 17 | "dateAdded": "2024-11-05T00:00:00Z", 18 | "data": "Warehouse/query_data_from_warehouse.py" 19 | } 20 | ] 21 | }, 22 | { 23 | "name": "Lakehouse", 24 | "description": "Sample functions to work with tables and files within a lakehouse in Fabric.", 25 | "data": [ 26 | { 27 | "name": "Read a parquet file from a lakehouse", 28 | "description": "This sample reads a parquet file from a lakehouse.", 29 | "dateAdded": "2025-04-30T00:00:00Z", 30 | "data": "Lakehouse/read_parquet_from_lakehouse.py" 31 | }, 32 | { 33 | "name": "Write csv file into a lakehouse", 34 | "description": "This sample writes a CSV file into a lakehouse using pandas.", 35 | "dateAdded": "2024-11-05T00:00:00Z", 36 | "data": "Lakehouse/write_csv_file_in_lakehouse.py" 37 | }, 38 | { 39 | "name": "Read csv file from lakehouse", 40 | "description": "This sample reads a CSV file from a lakehouse using pandas. Function takes file name as an input parameter.", 41 | "dateAdded": "2024-11-05T00:00:00Z", 42 | "data": "Lakehouse/read_csv_file_from_lakehouse.py" 43 | }, 44 | { 45 | "name": "Query data from lakehouse tables", 46 | "description": "This sample reads data from a table in a lakehouse.", 47 | "dateAdded": "2024-11-05T00:00:00Z", 48 | "data": "Lakehouse/query_data_from_tables.py" 49 | } 50 | ] 51 | }, 52 | { 53 | "name": "SQL Database", 54 | "description": "Sample functions to work with SQL database.", 55 | "data": [ 56 | { 57 | "name": "Write multiple rows of data into a table in SQL database", 58 | "description": "This sample allows you to write multiple rows of data into a SQL database.", 59 | "dateAdded": "2024-11-05T00:00:00Z", 60 | "data": "SQLDB/write_many_rows_to_sql_db.py" 61 | }, 62 | { 63 | "name": "Write one row of data into a table in SQL database", 64 | "description": "This sample allows you to write one row of data into a SQL database.", 65 | "dateAdded": "2025-01-14T00:00:00Z", 66 | "data": "SQLDB/write_one_row_to_sql_db.py" 67 | }, 68 | { 69 | "name": "Read data from a table in SQL database", 70 | "description": "This sample allows you to read data from SQL database.", 71 | "dateAdded": "2024-11-05T00:00:00Z", 72 | "data": "SQLDB/read_from_sql_db.py" 73 | } 74 | ] 75 | }, 76 | { 77 | "name": "Variable Library", 78 | "description": "Sample function to retrieve values and from Fabric Variable Library.", 79 | "data": [ 80 | { 81 | "name": "Get variables from Variable Library", 82 | "description": "This sample retrieves date format for data ingestion using date format configuration from Variable Library.", 83 | "detail": "Add a Variable Library connection and store a DATE_FORMAT variable. Use this function to standardize date strings before data ingestion.", 84 | "dateAdded": "2025-10-17T00:00:00Z", 85 | "data": "VariableLibrary/get_variables_from_library.py" 86 | }, 87 | { 88 | "name": "Chat completion with Azure OpenAI", 89 | "description": "This sample sends a chat completion request to Azure OpenAI using an API key securely retrieved via a generic Key Vault connection.", 90 | "detail": "Create an Azure OpenAI endpoint and store the API key in Azure Key Vault. Add: (1) a Generic connection with audience type KeyVault, (2) a Variable Library connection with KEY_VAULT_URL, API_KEY_SECRET_NAME, ENDPOINT, and MODEL variables. The function uses the generic connection to obtain an access token and then fetches the secret from Key Vault at runtime.", 91 | "dateAdded": "2025-10-20T00:00:00Z", 92 | "data": "VariableLibrary/chat_completion_with_azure_openai.py" 93 | } 94 | ] 95 | }, 96 | { 97 | "name": "Data Manipulation", 98 | "description": "Sample functions to transform data using pandas, numpy, and other libraries.", 99 | "data": [ 100 | { 101 | "name": "Manipulate data with pandas library", 102 | "description": "This sample uses pandas to manipulate a given dataset to group people by age.", 103 | "dateAdded": "2024-11-05T00:00:00Z", 104 | "data": "DataManipulation/manipulate_data_with_pandas.py" 105 | }, 106 | { 107 | "name": "Filter data using pandas dataframes", 108 | "description": "This sample takes as input customers object data as a pandas dataframe type and filters the data based on country.", 109 | "dateAdded": "2025-15-09T00:00:00Z", 110 | "data": "pandas/dataframe-example.py" 111 | }, 112 | { 113 | "name": "Summarize age distribution with pandas Series", 114 | "description": "This sample takes a list of ages as a pandas Series object and then summarizes the distribution of ages.", 115 | "dateAdded": "2025-15-09T00:00:00Z", 116 | "data": "pandas/series-example.py" 117 | }, 118 | { 119 | "name": "Transform data with numpy library", 120 | "description": "This samples converts the input 1D list to a numpy array. The output is normalized to the range [0, 1] and we calculate the mean.", 121 | "dateAdded": "2024-11-05T00:00:00Z", 122 | "data": "DataManipulation/transform_data_with_numpy.py" 123 | }, 124 | { 125 | "name": "Vectorize strings with scikit-learn", 126 | "description": "This sample vectorizes a string of text using CountVectorizer and returns a vectorized representation.", 127 | "dateAdded": "2025-07-01T00:00:00Z", 128 | "data": "DataManipulation/vectorize_strings.py" 129 | }, 130 | { 131 | "name": "Sentiment analysis with TextBlob", 132 | "description": "This sample analyzes sentiment of input text using TextBlob and classifies as Happy, Sad, or Neutral.", 133 | "dateAdded": "2025-07-01T00:00:00Z", 134 | "data": "DataManipulation/sentiment_analysis.py" 135 | }, 136 | { 137 | "name": "Mask credit card numbers", 138 | "description": "This sample masks credit card numbers showing only the last 4 digits for security purposes.", 139 | "dateAdded": "2025-07-01T00:00:00Z", 140 | "data": "DataManipulation/mask_credit_card.py" 141 | } 142 | ] 143 | }, 144 | { 145 | "name": "UDF Data Types", 146 | "description": "Sample functions to show usage of data types supplied within the SDK library.", 147 | "data": [ 148 | { 149 | "name": "Using UserDataFunctionContext", 150 | "description": "This sample uses UserDataFunctionContext to get metadata about the invocation.", 151 | "dateAdded": "2025-01-17T00:00:00Z", 152 | "data": "UDFDataTypes/use_userdatafunctioncontext.py" 153 | }, 154 | { 155 | "name": "Raising UserThrownError", 156 | "description": "This samples raises an error if the age supplied as a parameter is < 18.", 157 | "dateAdded": "2025-01-17T00:00:00Z", 158 | "data": "UDFDataTypes/raise_userthrownerror.py" 159 | } 160 | ] 161 | } 162 | ] 163 | --------------------------------------------------------------------------------