├── __init__.py
├── tools
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-312.pyc
    │   ├── arm_checks.cpython-312.pyc
    │   ├── arm_ingest.cpython-312.pyc
    │   ├── entra_analyzer.cpython-312.pyc
    │   └── entra_ingest.cpython-312.pyc
    ├── arm_ingest.py
    ├── entra_ingest.py
    ├── arm_checks.py
    └── entra_analyzer.py
├── requirements.txt
├── .gitattributes
├── __pycache__
    ├── server.cpython-312.pyc
    └── __init__.cpython-312.pyc
├── fixtures
    ├── entra_test
    │   ├── Policies
    │   │   ├── AuthenticationMethodsPolicy
    │   │   │   └── AuthenticationMethodConfigurations
    │   │   │   │   ├── SMS.json
    │   │   │   │   ├── FIDO2.json
    │   │   │   │   └── MicrosoftAuthenticator.json
    │   │   ├── IdentitySecurityDefaultsEnforcementPolicy
    │   │   │   └── 00000000-0000-0000-0000-000000000005
    │   │   │   │   └── 00000000-0000-0000-0000-000000000005.json
    │   │   └── AuthorizationPolicy
    │   │   │   └── authorizationPolicy
    │   │   │       └── authorizationPolicy.json
    │   ├── DirectoryRoles
    │   │   └── 2cbdd8b6-e641-4f64-918e-4a55f7c059ed
    │   │   │   ├── Members
    │   │   │       └── d4e6d6ab-2291-4435-bfc6-3fb11de1bffc
    │   │   │       │   └── d4e6d6ab-2291-4435-bfc6-3fb11de1bffc.json
    │   │   │   └── 2cbdd8b6-e641-4f64-918e-4a55f7c059ed.json
    │   ├── Directory
    │   │   └── OnPremisesSynchronization.json
    │   └── Organization
    │   │   └── Organization.json
    ├── parameters.json
    └── template.json
├── server.py
├── test_entra_mcp_client.py
├── test_entra_analyzer.py
├── client_example.py
├── README.md
├── agent
    └── run_agent.py
└── evals
    └── test_llm_eval.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mcp>=1.18.0
2 | pydantic>=2.12.0
3 | pydantic-ai>=1.2.0
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/__pycache__/server.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/__pycache__/server.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/tools/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/arm_checks.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/tools/__pycache__/arm_checks.cpython-312.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/arm_ingest.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/tools/__pycache__/arm_ingest.cpython-312.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/entra_analyzer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/tools/__pycache__/entra_analyzer.cpython-312.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/entra_ingest.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hausec/ARMAlyzer/main/tools/__pycache__/entra_ingest.cpython-312.pyc


--------------------------------------------------------------------------------
/fixtures/entra_test/Policies/AuthenticationMethodsPolicy/AuthenticationMethodConfigurations/SMS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "state": "enabled",
 3 |   "includeTarget": {
 4 |     "targetType": "group",
 5 |     "id": "all_users",
 6 |     "isRegistrationRequired": false
 7 |   },
 8 |   "excludeTarget": {
 9 |     "targetType": "group",
10 |     "id": "00000000-0000-0000-0000-000000000000"
11 |   },
12 |   "@odata.type": "#microsoft.graph.smsAuthenticationMethodConfiguration"
13 | }
14 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/Policies/AuthenticationMethodsPolicy/AuthenticationMethodConfigurations/FIDO2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "state": "disabled",
 3 |   "includeTarget": {
 4 |     "targetType": "group",
 5 |     "id": "all_users",
 6 |     "isRegistrationRequired": false
 7 |   },
 8 |   "excludeTarget": {
 9 |     "targetType": "group",
10 |     "id": "00000000-0000-0000-0000-000000000000"
11 |   },
12 |   "@odata.type": "#microsoft.graph.fido2AuthenticationMethodConfiguration"
13 | }
14 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/DirectoryRoles/2cbdd8b6-e641-4f64-918e-4a55f7c059ed/Members/d4e6d6ab-2291-4435-bfc6-3fb11de1bffc/d4e6d6ab-2291-4435-bfc6-3fb11de1bffc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "@odata.type": "#microsoft.graph.user",
3 |   "id": "d4e6d6ab-2291-4435-bfc6-3fb11de1bffc",
4 |   "userPrincipalName": "admin@testcompany.onmicrosoft.com",
5 |   "displayName": "Test Admin User",
6 |   "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects(id,userPrincipalName,displayName)/$entity"
7 | }
8 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/Policies/AuthenticationMethodsPolicy/AuthenticationMethodConfigurations/MicrosoftAuthenticator.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "state": "disabled",
 3 |   "includeTarget": {
 4 |     "targetType": "group",
 5 |     "id": "all_users",
 6 |     "isRegistrationRequired": false
 7 |   },
 8 |   "excludeTarget": {
 9 |     "targetType": "group",
10 |     "id": "00000000-0000-0000-0000-000000000000"
11 |   },
12 |   "@odata.type": "#microsoft.graph.microsoftAuthenticatorAuthenticationMethodConfiguration"
13 | }
14 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/DirectoryRoles/2cbdd8b6-e641-4f64-918e-4a55f7c059ed/2cbdd8b6-e641-4f64-918e-4a55f7c059ed.json:
--------------------------------------------------------------------------------
1 | {
2 |   "id": "2cbdd8b6-e641-4f64-918e-4a55f7c059ed",
3 |   "deletedDateTime": null,
4 |   "description": "Can manage all aspects of Microsoft Entra ID and Microsoft services that use Microsoft Entra identities.",
5 |   "displayName": "Global Administrator",
6 |   "roleTemplateId": "62e90394-69f5-4237-9190-012177145e10",
7 |   "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryRoles/$entity"
8 | }
9 | 


--------------------------------------------------------------------------------
/fixtures/parameters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |   "contentVersion": "1.0.0.0",
 4 |   "parameters": {
 5 |       "virtualMachines_WindowsAssessment_name": {
 6 |           "value": null
 7 |       },
 8 |       "disks_WindowsAssessment_OsDisk_1_db8cfc0704a9426c83c6d2c97616db55_externalid": {
 9 |           "value": null
10 |       },
11 |       "networkInterfaces_windowsassessment821_externalid": {
12 |           "value": null
13 |       }
14 |   }
15 | }


--------------------------------------------------------------------------------
/fixtures/entra_test/Policies/IdentitySecurityDefaultsEnforcementPolicy/00000000-0000-0000-0000-000000000005/00000000-0000-0000-0000-000000000005.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#policies/identitySecurityDefaultsEnforcementPolicy/$entity",
 3 |   "id": "00000000-0000-0000-0000-000000000005",
 4 |   "displayName": "Security Defaults",
 5 |   "description": "Security defaults is a set of basic identity security mechanisms recommended by Microsoft. When enabled, these recommendations will be automatically enforced in your organization. Administrators and users will be better protected from common identity related attacks.",
 6 |   "isEnabled": true,
 7 |   "controlTypes": [],
 8 |   "securityDefaultsUpsell": {
 9 |     "action": "none",
10 |     "dueDateTime": "2023-02-20T12:30:55.655Z"
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | from mcp.server import FastMCP
 2 | import json
 3 | from tools.arm_checks import check_vm_template, Findings  # your checker
 4 | from tools.entra_ingest import analyze_entra_backup_tool, EntraAnalyzeArgs, EntraFindings
 5 | 
 6 | mcp = FastMCP("azure-arm-sec")
 7 | 
 8 | @mcp.tool()
 9 | def analyze_arm(template_text: str, parameters_text: str) -> Findings:
10 |     """Analyze Azure ARM/Bicep template+parameters and return findings."""
11 |     template = json.loads(template_text)
12 |     parameters = json.loads(parameters_text)
13 |     return check_vm_template(template, parameters)
14 | 
15 | @mcp.tool()
16 | def analyze_entra(backup_path: str, include_summary: bool = True) -> EntraFindings:
17 |     """Analyze Entra ID backup files for security misconfigurations."""
18 |     args = EntraAnalyzeArgs(backup_path=backup_path, include_summary=include_summary)
19 |     return analyze_entra_backup_tool(args)
20 | 
21 | if __name__ == "__main__":
22 |     mcp.run()  # stdio (local dev)
23 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/Policies/AuthorizationPolicy/authorizationPolicy/authorizationPolicy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#policies/authorizationPolicy/$entity",
 3 |   "id": "authorizationPolicy",
 4 |   "allowInvitesFrom": "everyone",
 5 |   "allowedToSignUpEmailBasedSubscriptions": true,
 6 |   "allowedToUseSSPR": true,
 7 |   "allowEmailVerifiedUsersToJoinOrganization": true,
 8 |   "allowUserConsentForRiskyApps": null,
 9 |   "blockMsolPowerShell": false,
10 |   "displayName": "Authorization Policy",
11 |   "description": "Used to manage authorization related settings across the company.",
12 |   "guestUserRoleId": "a0b1b346-4d3e-4e8b-98f8-753987be4970",
13 |   "defaultUserRolePermissions": {
14 |     "allowedToCreateApps": true,
15 |     "allowedToCreateSecurityGroups": false,
16 |     "allowedToCreateTenants": true,
17 |     "allowedToReadBitlockerKeysForOwnedDevice": true,
18 |     "allowedToReadOtherUsers": true,
19 |     "permissionGrantPoliciesAssigned": [
20 |       "ManagePermissionGrantsForSelf.microsoft-user-default-recommended",
21 |       "ManagePermissionGrantsForOwnedResource.microsoft-dynamically-managed-permissions-for-team",
22 |       "ManagePermissionGrantsForOwnedResource.microsoft-dynamically-managed-permissions-for-chat"
23 |     ]
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/tools/arm_ingest.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Optional
 3 | from pydantic import BaseModel, Field
 4 | from .arm_checks import Findings, check_vm_template
 5 | 
 6 | class IngestArgs(BaseModel):
 7 |     template_text: Optional[str] = Field(None, description="Contents of template.json")
 8 |     parameters_text: Optional[str] = Field(None, description="Contents of parameters.json")
 9 |     template_resource_uri: Optional[str] = None  # e.g., resource://local/template.json
10 |     parameters_resource_uri: Optional[str] = None
11 | 
12 | def _read_resource(uri: str) -> str:
13 |     # Implement with your SDK's resource reader if you wire one in; stdio-only can skip this.
14 |     raise NotImplementedError("Resource reading not wired; pass raw text for now.")
15 | 
16 | def analyze_arm(args: IngestArgs) -> Findings:
17 |     """
18 |     Analyze Azure ARM/Bicep-exported template+parameters for common risks and misconfigurations.
19 |     Returns structured findings.
20 |     """
21 |     if not (args.template_text and args.parameters_text):
22 |         # For protos, require raw text; later you can support resource URIs.
23 |         raise ValueError("Provide template_text and parameters_text")
24 | 
25 |     template = json.loads(args.template_text)
26 |     parameters = json.loads(args.parameters_text)
27 |     return check_vm_template(template, parameters)
28 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/Directory/OnPremisesSynchronization.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directory/onPremisesSynchronization/$entity",
 3 |   "id": "775a4b29-b4ec-434a-857e-8d0628d4c1e5",
 4 |   "configuration": {
 5 |     "accidentalDeletionPrevention": {
 6 |       "synchronizationPreventionType": "enabledForCount",
 7 |       "alertThreshold": 500
 8 |     }
 9 |   },
10 |   "features": {
11 |     "passwordSyncEnabled": true,
12 |     "passwordWritebackEnabled": false,
13 |     "directoryExtensionsEnabled": true,
14 |     "quarantineUponUpnConflictEnabled": true,
15 |     "quarantineUponProxyAddressesConflictEnabled": true,
16 |     "softMatchOnUpnEnabled": true,
17 |     "cloudPasswordPolicyForPasswordSyncedUsersEnabled": false,
18 |     "fopeConflictResolutionEnabled": false,
19 |     "unifiedGroupWritebackEnabled": true,
20 |     "userWritebackEnabled": false,
21 |     "deviceWritebackEnabled": false,
22 |     "synchronizeUpnForManagedUsersEnabled": true,
23 |     "userForcePasswordChangeOnLogonEnabled": false,
24 |     "concurrentOrgIdProvisioningEnabled": true,
25 |     "concurrentCredentialUpdateEnabled": false,
26 |     "groupWriteBackEnabled": false,
27 |     "blockSoftMatchEnabled": false,
28 |     "blockCloudObjectTakeoverThroughHardMatchEnabled": false,
29 |     "bypassDirSyncOverridesEnabled": false
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/test_entra_mcp_client.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Example client to test Entra ID analysis via MCP server.
 4 | """
 5 | 
 6 | import asyncio
 7 | import json
 8 | from mcp import ClientSession, StdioServerParameters
 9 | from mcp.client.stdio import stdio_client
10 | 
11 | async def test_entra_analysis():
12 |     """Test Entra ID analysis via MCP server"""
13 |     
14 |     # Server parameters
15 |     server_params = StdioServerParameters(
16 |         command='python',
17 |         args=['server.py'],
18 |         cwd=str(pathlib.Path(__file__).parent)
19 |     )
20 |     
21 |     print("Connecting to MCP server...")
22 |     
23 |     try:
24 |         async with stdio_client(server_params) as (read, write):
25 |             async with ClientSession(read, write) as session:
26 |                 print("Initializing session...")
27 |                 await session.initialize()
28 |                 
29 |                 print("Calling analyze_entra tool...")
30 |                 result = await session.call_tool('analyze_entra', {
31 |                     'backup_path': 'c:/EntraBackup',
32 |                     'include_summary': True
33 |                 })
34 |                 
35 |                 print("\n=== Entra ID Analysis Results ===")
36 |                 print(result.content[0].text)
37 |                 
38 |     except Exception as e:
39 |         print(f"Error: {e}")
40 | 
41 | if __name__ == "__main__":
42 |     import pathlib
43 |     asyncio.run(test_entra_analysis())
44 | 


--------------------------------------------------------------------------------
/tools/entra_ingest.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Optional
 3 | from pydantic import BaseModel, Field
 4 | from .entra_analyzer import analyze_entra_backup, EntraFindings
 5 | 
 6 | class EntraAnalyzeArgs(BaseModel):
 7 |     backup_path: str = Field(..., description="Path to the EntraBackup directory")
 8 |     include_summary: Optional[bool] = Field(True, description="Include detailed summary in results")
 9 | 
10 | def analyze_entra_backup_tool(args: EntraAnalyzeArgs) -> EntraFindings:
11 |     """
12 |     Analyze Entra ID backup files for security misconfigurations.
13 |     
14 |     This tool examines:
15 |     - Organization settings (accidental deletion protection, notifications)
16 |     - Authorization policies (guest access, user permissions)
17 |     - Directory roles (privileged users, Global Admins)
18 |     - Sync settings (password writeback, user writeback)
19 |     - Security defaults (MFA enforcement)
20 |     - Authentication methods (weak vs strong methods)
21 |     
22 |     Returns structured findings with severity levels and recommendations.
23 |     """
24 |     try:
25 |         findings = analyze_entra_backup(args.backup_path)
26 |         
27 |         # If summary not requested, remove it to reduce response size
28 |         if not args.include_summary:
29 |             findings.summary = {}
30 |         
31 |         return findings
32 |         
33 |     except Exception as e:
34 |         # Return error as a finding
35 |         return EntraFindings(
36 |             findings=[{
37 |                 "id": "ERROR-001",
38 |                 "severity": "critical",
39 |                 "category": "system",
40 |                 "message": f"Analysis failed: {str(e)}",
41 |                 "recommendation": "Check backup path and file integrity"
42 |             }],
43 |             stats={"total": 1, "critical": 1, "high": 0, "med": 0, "low": 0},
44 |             summary={"error": str(e)}
45 |         )
46 | 


--------------------------------------------------------------------------------
/test_entra_analyzer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Test script for Entra ID backup analysis.
 4 | This demonstrates how to analyze Entra ID backup files for security issues.
 5 | """
 6 | 
 7 | import sys
 8 | import pathlib
 9 | from tools.entra_analyzer import analyze_entra_backup
10 | 
11 | def main():
12 |     """Main function for command line usage"""
13 |     if len(sys.argv) != 2:
14 |         print("Usage: python test_entra_analyzer.py <backup_path>")
15 |         print("Example: python test_entra_analyzer.py c:/EntraBackup")
16 |         sys.exit(1)
17 |     
18 |     backup_path = sys.argv[1]
19 |     
20 |     try:
21 |         print(f"Analyzing Entra ID backup: {backup_path}")
22 |         print("=" * 60)
23 |         
24 |         findings = analyze_entra_backup(backup_path)
25 |         
26 |         # Print summary
27 |         print(f"\nANALYSIS SUMMARY")
28 |         print(f"Total findings: {findings.stats['total']}")
29 |         print(f"Critical: {findings.stats['critical']} | High: {findings.stats['high']} | Medium: {findings.stats['med']} | Low: {findings.stats['low']}")
30 |         
31 |         # Print findings by severity
32 |         severity_order = ["critical", "high", "med", "low"]
33 |         for severity in severity_order:
34 |             severity_findings = [f for f in findings.findings if f.severity == severity]
35 |             if severity_findings:
36 |                 print(f"\n{severity.upper()} SEVERITY ISSUES:")
37 |                 for finding in severity_findings:
38 |                     print(f"  - {finding.id}: {finding.message}")
39 |                     print(f"    Recommendation: {finding.recommendation}")
40 |                     print()
41 |         
42 |         # Print category breakdown
43 |         if findings.summary.get("categories"):
44 |             print(f"\nISSUES BY CATEGORY:")
45 |             for category, issue_ids in findings.summary["categories"].items():
46 |                 print(f"  {category}: {len(issue_ids)} issues")
47 |         
48 |         print(f"\nAnalysis complete!")
49 |         
50 |     except Exception as e:
51 |         print(f"Error analyzing backup: {e}")
52 |         sys.exit(1)
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/fixtures/entra_test/Organization/Organization.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "775a4b29-b4ec-434a-857e-8d0628d4c1e5",
 3 |   "deletedDateTime": null,
 4 |   "businessPhones": [
 5 |     "555-0123"
 6 |   ],
 7 |   "city": "Test City",
 8 |   "country": null,
 9 |   "countryLetterCode": "US",
10 |   "createdDateTime": "2019-11-18T21:40:02Z",
11 |   "defaultUsageLocation": null,
12 |   "displayName": "Test Directory",
13 |   "isMultipleDataLocationsForServicesEnabled": null,
14 |   "marketingNotificationEmails": [],
15 |   "onPremisesLastSyncDateTime": "2020-02-22T04:43:52Z",
16 |   "onPremisesSyncEnabled": true,
17 |   "partnerTenantType": null,
18 |   "postalCode": "12345",
19 |   "preferredLanguage": "en",
20 |   "securityComplianceNotificationMails": [],
21 |   "securityComplianceNotificationPhones": [],
22 |   "state": "CA",
23 |   "street": "123 Test Street",
24 |   "technicalNotificationMails": [
25 |     "admin@testcompany.com"
26 |   ],
27 |   "tenantType": "AAD",
28 |   "directorySizeQuota": {
29 |     "used": 451,
30 |     "total": 50000
31 |   },
32 |   "assignedPlans": [
33 |     {
34 |       "assignedDateTime": "2023-08-20T13:55:08Z",
35 |       "capabilityStatus": "Enabled",
36 |       "service": "WindowsAzure",
37 |       "servicePlanId": "fca3e605-0754-4279-8504-3f1229f29614"
38 |     }
39 |   ],
40 |   "onPremisesSyncStatus": [
41 |     {
42 |       "attributeSetName": "iab",
43 |       "state": "enabled",
44 |       "version": 1
45 |     }
46 |   ],
47 |   "privacyProfile": {
48 |     "contactEmail": "",
49 |     "statementUrl": ""
50 |   },
51 |   "provisionedPlans": [
52 |     {
53 |       "capabilityStatus": "Suspended",
54 |       "provisioningStatus": "Success",
55 |       "service": "Adallom"
56 |     }
57 |   ],
58 |   "verifiedDomains": [
59 |     {
60 |       "capabilities": "None",
61 |       "isDefault": false,
62 |       "isInitial": false,
63 |       "name": "testcompany.mail.onmicrosoft.com",
64 |       "type": "Managed"
65 |     },
66 |     {
67 |       "capabilities": "Email, OfficeCommunicationsOnline",
68 |       "isDefault": true,
69 |       "isInitial": true,
70 |       "name": "testcompany.onmicrosoft.com",
71 |       "type": "Managed"
72 |     }
73 |   ],
74 |   "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#organization/$entity"
75 | }
76 | 


--------------------------------------------------------------------------------
/client_example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Example client to interact with the ARMAlyzer MCP server.
 4 | This demonstrates how to connect to and use the MCP server programmatically.
 5 | """
 6 | 
 7 | import asyncio
 8 | import json
 9 | import pathlib
10 | from mcp import ClientSession, StdioServerParameters
11 | from mcp.client.stdio import stdio_client
12 | 
13 | async def analyze_with_mcp_server(template_path: str, params_path: str):
14 |     """Connect to MCP server and analyze ARM templates"""
15 |     
16 |     # Read template and parameters
17 |     template_text = pathlib.Path(template_path).read_text(encoding="utf-8")
18 |     params_text = pathlib.Path(params_path).read_text(encoding="utf-8")
19 |     
20 |     # Server parameters - adjust path as needed
21 |     server_params = StdioServerParameters(
22 |         command='python',
23 |         args=['server.py'],
24 |         cwd=str(pathlib.Path(__file__).parent)
25 |     )
26 |     
27 |     print("Connecting to MCP server...")
28 |     
29 |     try:
30 |         async with stdio_client(server_params) as (read, write):
31 |             async with ClientSession(read, write) as session:
32 |                 print("Initializing session...")
33 |                 await session.initialize()
34 |                 
35 |                 print("Calling analyze_arm tool...")
36 |                 result = await session.call_tool('analyze_arm', {
37 |                     'template_text': template_text,
38 |                     'parameters_text': params_text
39 |                 })
40 |                 
41 |                 print("\n=== Analysis Results ===")
42 |                 print(result.content[0].text)
43 |                 
44 |     except Exception as e:
45 |         print(f"Error: {e}")
46 |         print("Make sure the MCP server is running or can be started.")
47 | 
48 | def main():
49 |     """Main function for command line usage"""
50 |     import sys
51 |     
52 |     if len(sys.argv) != 3:
53 |         print("Usage: python client_example.py <template.json> <parameters.json>")
54 |         print("Example: python client_example.py fixtures/template.json fixtures/parameters.json")
55 |         sys.exit(1)
56 |     
57 |     template_path = sys.argv[1]
58 |     params_path = sys.argv[2]
59 |     
60 |     # Run the async analysis
61 |     asyncio.run(analyze_with_mcp_server(template_path, params_path))
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/tools/arm_checks.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Dict, Any
 2 | from pydantic import BaseModel
 3 | 
 4 | class Finding(BaseModel):
 5 |     id: str
 6 |     severity: str  # "low" | "med" | "high"
 7 |     resource: Optional[str] = None
 8 |     path: Optional[str] = None
 9 |     message: str
10 | 
11 | class Findings(BaseModel):
12 |     findings: List[Finding]
13 |     stats: Dict[str, int]
14 | 
15 | def check_vm_template(template: Dict[str, Any], parameters: Dict[str, Any]) -> Findings:
16 |     f: List[Finding] = []
17 |     res = template.get("resources", [])
18 |     # 1) image pinned?
19 |     for r in res:
20 |         if r.get("type", "").lower() == "microsoft.compute/virtualmachines":
21 |             name = r.get("name")
22 |             img = (r.get("properties", {})
23 |                      .get("storageProfile", {})
24 |                      .get("imageReference", {}))
25 |             version = img.get("version")
26 |             if version and version.lower() == "latest":
27 |                 f.append(Finding(
28 |                     id="IMG-001",
29 |                     severity="med",
30 |                     resource=name,
31 |                     path="resources[*].properties.storageProfile.imageReference.version",
32 |                     message="Image version is 'latest'. Pin to a specific version to avoid drift."
33 |                 ))
34 |             # 2) admin username hard-coded
35 |             osprof = r.get("properties", {}).get("osProfile", {})
36 |             if "adminUsername" in osprof:
37 |                 f.append(Finding(
38 |                     id="OS-001",
39 |                     severity="med",
40 |                     resource=name,
41 |                     path="resources[*].properties.osProfile.adminUsername",
42 |                     message="Local admin username defined in template. Ensure password not hard-coded and use Azure VM extensions or secrets store."
43 |                 ))
44 |             # 3) MI hygiene
45 |             ident = r.get("identity", {})
46 |             if ident:
47 |                 t = ident.get("type", "")
48 |                 user_ids = list((ident.get("userAssignedIdentities") or {}).keys())
49 |                 if "UserAssigned" in t and "SystemAssigned" in t and len(user_ids) >= 1:
50 |                     f.append(Finding(
51 |                         id="MI-001",
52 |                         severity="med",
53 |                         resource=name,
54 |                         path="resources[*].identity",
55 |                         message=f"VM has SystemAssigned + {len(user_ids)} UserAssigned identities. Review least-privilege, role scope, and blast radius."
56 |                     ))
57 | 
58 |     # 4) parameters quality (nulls)
59 |     null_params = []
60 |     for k, v in (parameters.get("parameters") or {}).items():
61 |         if isinstance(v, dict) and v.get("value", None) is None:
62 |             null_params.append(k)
63 |     if null_params:
64 |         f.append(Finding(
65 |             id="PAR-001",
66 |             severity="low",
67 |             resource=None,
68 |             path="parameters.*.value",
69 |             message=f"Parameters have null values: {', '.join(null_params)}. Provide CI defaults or validation."
70 |         ))
71 | 
72 |     stats = {
73 |         "total": len(f),
74 |         "high": sum(1 for x in f if x.severity == "high"),
75 |         "med": sum(1 for x in f if x.severity == "med"),
76 |         "low": sum(1 for x in f if x.severity == "low"),
77 |     }
78 |     return Findings(findings=f, stats=stats)
79 | 


--------------------------------------------------------------------------------
/fixtures/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
 3 |   "contentVersion": "1.0.0.0",
 4 |   "parameters": {
 5 |       "virtualMachines_WindowsAssessment_name": {
 6 |           "defaultValue": "WindowsAssessment",
 7 |           "type": "String"
 8 |       },
 9 |       "disks_WindowsAssessment_OsDisk_1_db8cfc0704a9426c83c6d2c97616db55_externalid": {
10 |           "defaultValue": "/subscriptions/7cd95fb1-1234-4a0d-91b4-d1999a961ba4/resourceGroups/TESTING/providers/Microsoft.Compute/disks/WindowsAssessment_OsDisk_1_db8cfc0704a9426c83c6d2c97616db55",
11 |           "type": "String"
12 |       },
13 |       "networkInterfaces_windowsassessment821_externalid": {
14 |           "defaultValue": "/subscriptions/7cd95fb1-1234-4a0d-91b4-d1999a961ba4/resourceGroups/Testing/providers/Microsoft.Network/networkInterfaces/windowsassessment821",
15 |           "type": "String"
16 |       }
17 |   },
18 |   "variables": {},
19 |   "resources": [
20 |       {
21 |           "type": "Microsoft.Compute/virtualMachines",
22 |           "apiVersion": "2024-11-01",
23 |           "name": "[parameters('virtualMachines_WindowsAssessment_name')]",
24 |           "location": "eastus",
25 |           "identity": {
26 |               "type": "SystemAssigned, UserAssigned",
27 |               "userAssignedIdentities": {
28 |                   "/subscriptions/7cd95fb1-1234-4a0d-91b4-d1999a961ba4/resourceGroups/LabRG/providers/Microsoft.ManagedIdentity/userAssignedIdentities/Test_MI": {},
29 |                   "/subscriptions/7cd95fb1-1234-4a0d-91b4-d1999a961ba4/resourceGroups/Testing/providers/Microsoft.ManagedIdentity/userAssignedIdentities/AssessmentMI": {}
30 |               }
31 |           },
32 |           "properties": {
33 |               "hardwareProfile": {
34 |                   "vmSize": "Standard_D2s_v3"
35 |               },
36 |               "storageProfile": {
37 |                   "imageReference": {
38 |                       "publisher": "MicrosoftWindowsDesktop",
39 |                       "offer": "Windows-10",
40 |                       "sku": "win10-21h2-ent",
41 |                       "version": "latest"
42 |                   },
43 |                   "osDisk": {
44 |                       "osType": "Windows",
45 |                       "name": "[concat(parameters('virtualMachines_WindowsAssessment_name'), '_OsDisk_1_db8cfc0704a9426c83c6d2c97616db55')]",
46 |                       "createOption": "FromImage",
47 |                       "caching": "ReadWrite",
48 |                       "managedDisk": {
49 |                           "id": "[parameters('disks_WindowsAssessment_OsDisk_1_db8cfc0704a9426c83c6d2c97616db55_externalid')]"
50 |                       },
51 |                       "deleteOption": "Delete"
52 |                   },
53 |                   "dataDisks": []
54 |               },
55 |               "osProfile": {
56 |                   "computerName": "WindowsAssessme",
57 |                   "adminUsername": "hausec",
58 |                   "windowsConfiguration": {
59 |                       "provisionVMAgent": true,
60 |                       "enableAutomaticUpdates": true,
61 |                       "patchSettings": {
62 |                           "patchMode": "AutomaticByOS",
63 |                           "assessmentMode": "ImageDefault",
64 |                           "enableHotpatching": false
65 |                       }
66 |                   },
67 |                   "secrets": [],
68 |                   "allowExtensionOperations": true,
69 |                   "requireGuestProvisionSignal": true
70 |               },
71 |               "networkProfile": {
72 |                   "networkInterfaces": [
73 |                       {
74 |                           "id": "[parameters('networkInterfaces_windowsassessment821_externalid')]",
75 |                           "properties": {
76 |                               "deleteOption": "Delete"
77 |                           }
78 |                       }
79 |                   ]
80 |               },
81 |               "licenseType": "Windows_Client"
82 |           }
83 |       }
84 |   ]
85 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ARMAlyzer - Azure Security Analysis Suite
  2 | 
  3 | A comprehensive security analysis tool for Azure ARM templates and Entra ID configurations using MCP (Model Context Protocol) and PydanticAI.
  4 | 
  5 | ## Overview
  6 | 
  7 | ARMAlyzer provides security analysis for two critical Azure components:
  8 | 
  9 | ### ARM Template Analysis
 10 | - **MCP Server**: Exposes analysis tools via Model Context Protocol
 11 | - **PydanticAI Agent**: Intelligent analysis and reporting
 12 | - **Security Checks**: Automated detection of common Azure ARM security issues
 13 | - **Evaluation Framework**: Testing and validation capabilities
 14 | 
 15 | ### Entra ID Analysis
 16 | - **Backup Analysis**: Analyzes EntraExporter backup files for security misconfigurations
 17 | - **Policy Review**: Checks authorization policies, security defaults, and authentication methods
 18 | - **Role Analysis**: Identifies privileged users and role assignments
 19 | - **Sync Settings**: Reviews on-premises synchronization security settings
 20 | 
 21 | ## Features
 22 | 
 23 | ### ARM Template Security Checks
 24 | 
 25 | - **Image Version Pinning**: Detects use of "latest" image versions
 26 | - **Admin Credentials**: Identifies hardcoded admin usernames
 27 | - **Identity Management**: Analyzes managed identity configurations
 28 | - **Parameter Validation**: Checks for null or missing parameters
 29 | - **Network Security**: Reviews network interface configurations
 30 | 
 31 | ### Entra ID Security Checks
 32 | 
 33 | - **Organization Settings**: Accidental deletion protection, notification emails
 34 | - **Authorization Policies**: Guest access, user permissions, app creation rights
 35 | - **Directory Roles**: Privileged user analysis, Global Administrator count
 36 | - **Sync Settings**: Password writeback, user writeback, device writeback
 37 | - **Security Defaults**: MFA enforcement, security policy compliance
 38 | - **Authentication Methods**: Weak vs strong authentication method analysis
 39 | 
 40 | ### Architecture
 41 | 
 42 | ```
 43 | ├── server.py              # MCP server entry point
 44 | ├── tools/
 45 | │   ├── arm_checks.py      # ARM template security analysis
 46 | │   ├── arm_ingest.py      # ARM MCP tool wrapper
 47 | │   ├── entra_analyzer.py  # Entra ID security analysis
 48 | │   └── entra_ingest.py    # Entra ID MCP tool wrapper
 49 | ├── agent/
 50 | │   └── run_agent.py       # PydanticAI agent implementation
 51 | ├── evals/
 52 | │   └── test_eval.py       # Evaluation tests
 53 | ├── fixtures/
 54 | │   ├── template.json      # Sample ARM template
 55 | │   └── parameters.json    # Sample parameters
 56 | ├── test_entra_analyzer.py # Entra ID analysis test script
 57 | └── test_entra_mcp_client.py # Entra ID MCP client test
 58 | ```
 59 | 
 60 | ## Installation
 61 | 
 62 | 1. **Clone and setup**:
 63 |    ```bash
 64 |    git clone <your-repo>
 65 |    cd mcp_server
 66 |    pip install -r requirements.txt
 67 |    ```
 68 | 
 69 | 2. **Set up OpenAI API key** (for PydanticAI agent):
 70 |    ```bash
 71 |    export OPENAI_API_KEY="your-api-key"
 72 |    ```
 73 | 
 74 | ## Usage
 75 | 
 76 | ### 1. MCP Server
 77 | 
 78 | Start the MCP server:
 79 | 
 80 | ```bash
 81 | python server.py
 82 | ```
 83 | 
 84 | The server exposes the `analyze_arm` tool that accepts:
 85 | - `template_text`: JSON content of ARM template
 86 | - `parameters_text`: JSON content of parameters file
 87 | 
 88 | ### 2. Direct Analysis (Agent)
 89 | 
 90 | Run ARM template analysis with the agent:
 91 | 
 92 | ```bash
 93 | python agent/run_agent.py arm fixtures/template.json fixtures/parameters.json
 94 | ```
 95 | 
 96 | Run Entra ID analysis with the agent:
 97 | 
 98 | ```bash
 99 | python agent/run_agent.py entra c:/EntraBackup
100 | ```
101 | 
102 | The agent provides AI-powered analysis with:
103 | - Executive summaries
104 | - Detailed findings tables
105 | - Specific recommendations
106 | - Structured reports
107 | 
108 | **Direct Analysis (without agent):**
109 | 
110 | Run Entra ID analysis directly:
111 | 
112 | ```bash
113 | python test_entra_analyzer.py c:/EntraBackup
114 | ```
115 | 
116 | ### 3. Programmatic Usage
117 | 
118 | **ARM Template Analysis:**
119 | ```python
120 | from tools.arm_ingest import analyze_arm, IngestArgs
121 | 
122 | # Load your template and parameters
123 | template_text = open("template.json").read()
124 | params_text = open("parameters.json").read()
125 | 
126 | # Analyze
127 | findings = analyze_arm(IngestArgs(
128 |     template_text=template_text,
129 |     parameters_text=params_text
130 | ))
131 | 
132 | print(f"Found {findings.stats['total']} issues")
133 | for finding in findings.findings:
134 |     print(f"- {finding.severity}: {finding.message}")
135 | ```
136 | 
137 | **Entra ID Analysis:**
138 | ```python
139 | from tools.entra_analyzer import analyze_entra_backup
140 | 
141 | # Analyze Entra ID backup
142 | findings = analyze_entra_backup("c:/EntraBackup")
143 | 
144 | print(f"Found {findings.stats['total']} issues")
145 | for finding in findings.findings:
146 |     print(f"- {finding.severity}: {finding.message}")
147 | ```
148 | 
149 | ### 4. MCP Client Integration
150 | 
151 | Connect to the MCP server from any MCP-compatible client:
152 | 
153 | ```python
154 | import asyncio
155 | from mcp import ClientSession, StdioServerParameters
156 | from mcp.client.stdio import stdio_client
157 | 
158 | async def analyze_template():
159 |     server_params = StdioServerParameters(
160 |         command='python', 
161 |         args=['server.py']
162 |     )
163 |     
164 |     async with stdio_client(server_params) as (read, write):
165 |         async with ClientSession(read, write) as session:
166 |             await session.initialize()
167 |             
168 |             # ARM Template Analysis
169 |             result = await session.call_tool('analyze_arm', {
170 |                 'template_text': template_json,
171 |                 'parameters_text': parameters_json
172 |             })
173 |             
174 |             # Entra ID Analysis
175 |             result = await session.call_tool('analyze_entra', {
176 |                 'backup_path': 'c:/EntraBackup',
177 |                 'include_summary': True
178 |             })
179 |             
180 |             print(result.content[0].text)
181 | 
182 | asyncio.run(analyze_template())
183 | ```
184 | 
185 | ## Testing
186 | 
187 | Run the evaluation tests:
188 | 
189 | **ARM Template Tests:**
190 | ```bash
191 | python evals/test_eval.py
192 | ```
193 | 
194 | **Entra ID Tests:**
195 | ```bash
196 | python evals/test_entra_eval.py
197 | ```
198 | 
199 | **All Tests:**
200 | ```bash
201 | python evals/test_eval.py
202 | python evals/test_entra_eval.py
203 | ```
204 | 
205 | The tests use sanitized fixture data to ensure consistent, predictable results.
206 | 
207 | ## Security Findings
208 | 
209 | ### ARM Template Findings
210 | 
211 | | ID | Severity | Description |
212 | |----|----------|-------------|
213 | | IMG-001 | Medium | Image version set to "latest" |
214 | | OS-001 | Medium | Admin username hardcoded in template |
215 | | MI-001 | Medium | Multiple managed identities assigned |
216 | | PAR-001 | Low | Parameters with null values |
217 | 
218 | ### Entra ID Findings
219 | 
220 | | ID | Severity | Category | Description |
221 | |----|----------|----------|-------------|
222 | | AUTH-008 | Critical | Policies | Default users can create tenants |
223 | | AUTH-002 | High | Policies | Guest invitations allowed from everyone |
224 | | AUTH-006 | High | Policies | Default users can create applications |
225 | | ORG-004 | Medium | Organization | No security compliance notification emails |
226 | | AUTH-003 | Medium | Policies | Email-based subscriptions signup allowed |
227 | | SYNC-002 | Medium | Sync | Password writeback is disabled |
228 | | AUTH-METHOD-FIDO2 | Medium | Auth | FIDO2 authentication method is disabled |
229 | | ORG-005 | Low | Organization | Using legacy Azure AD tenant type |
230 | 
231 | ## Configuration
232 | 
233 | ### Environment Variables
234 | 
235 | - `OPENAI_API_KEY`: Required for PydanticAI agent functionality
236 | - `MCP_LOG_LEVEL`: Set logging level (DEBUG, INFO, WARNING, ERROR)
237 | 
238 | ### Customizing Checks
239 | 
240 | Modify `tools/arm_checks.py` to add new security checks:
241 | 
242 | ```python
243 | def check_custom_security(template: Dict[str, Any], parameters: Dict[str, Any]) -> List[Finding]:
244 |     findings = []
245 |     # Add your custom security checks here
246 |     return findings
247 | ```
248 | 
249 | ## Development
250 | 
251 | ### Adding New Security Checks
252 | 
253 | 1. Add check logic to `tools/arm_checks.py`
254 | 2. Update the `check_vm_template` function
255 | 3. Add test cases to `evals/test_eval.py`
256 | 4. Update documentation
257 | 
258 | ### MCP Tool Development
259 | 
260 | The MCP server uses FastMCP for easy tool registration:
261 | 
262 | ```python
263 | @mcp.tool()
264 | def your_custom_tool(param: str) -> str:
265 |     """Your tool description."""
266 |     return "result"
267 | ```
268 | 
269 | ## Contributing
270 | 
271 | 1. Fork the repository
272 | 2. Create a feature branch
273 | 3. Add tests for new functionality
274 | 4. Submit a pull request
275 | 
276 | ## License
277 | 
278 | MIT License - see LICENSE file for details.
279 | 
280 | ## Support
281 | 
282 | For issues and questions:
283 | - Create an issue in the repository
284 | - Check the documentation
285 | - Review the test cases for usage examples
286 | 


--------------------------------------------------------------------------------
/agent/run_agent.py:
--------------------------------------------------------------------------------
  1 | import json, pathlib, sys
  2 | from pydantic import BaseModel
  3 | from pydantic_ai import Agent
  4 | 
  5 | # Add parent directory to path for imports
  6 | sys.path.append(str(pathlib.Path(__file__).parent.parent))
  7 | 
  8 | # ---- Client ↔ MCP glue depends on your client runtime. Here we'll just call the tool code directly.
  9 | # In production you'd let your MCP client host the tool and the agent call via MCP.
 10 | from tools.arm_ingest import analyze_arm, IngestArgs
 11 | from tools.entra_analyzer import analyze_entra_backup
 12 | 
 13 | SYSTEM_PROMPT = """
 14 | You are a cloud security analyst specializing in Azure security. You can analyze:
 15 | 
 16 | 1. ARM Templates: When given ARM template and parameters files, analyze for infrastructure security issues
 17 | 2. Entra ID Configurations: When given Entra ID backup files, analyze for identity security issues
 18 | 
 19 | For ARM Templates:
 20 | - Call `analyze_arm` with template and parameteDrs
 21 | - Focus on infrastructure security: image versions, credentials, identities, networking
 22 | 
 23 | For Entra ID Analysis:
 24 | - Call `analyze_entra` with backup path
 25 | - Focus on identity security: policies, roles, authentication, sync settings
 26 | 
 27 | Always:
 28 | - Summarize key risks (highest severity first)
 29 | - Explain why each finding matters
 30 | - Provide concrete remediation steps
 31 | - Output executive summary followed by detailed findings
 32 | """
 33 | 
 34 | class Report(BaseModel):
 35 |     analysis_type: str  # "arm" or "entra"
 36 |     summary: str
 37 |     table_md: str
 38 |     recommendations: str
 39 | 
 40 | def render_arm_report(findings) -> Report:
 41 |     sev_order = {"high": 0, "med": 1, "low": 2}
 42 |     sorted_findings = sorted(findings.findings, key=lambda x: sev_order[x.severity])
 43 |     lines = ["| Severity | ID | Resource | Message |", "|---|---|---|---|"]
 44 |     for f in sorted_findings:
 45 |         lines.append(f"| {f.severity} | {f.id} | {f.resource or ''} | {f.message} |")
 46 |     summary = f"{findings.stats['total']} ARM template findings " \
 47 |               f"({findings.stats['high']} high / {findings.stats['med']} med / {findings.stats['low']} low)."
 48 |     return Report(
 49 |         analysis_type="arm",
 50 |         summary=summary, 
 51 |         table_md="\n".join(lines),
 52 |         recommendations="Review ARM template security configurations and implement recommended changes."
 53 |     )
 54 | 
 55 | def render_entra_report(findings) -> Report:
 56 |     sev_order = {"critical": 0, "high": 1, "med": 2, "low": 3}
 57 |     sorted_findings = sorted(findings.findings, key=lambda x: sev_order[x.severity])
 58 |     lines = ["| Severity | ID | Category | Message |", "|---|---|---|---|"]
 59 |     for f in sorted_findings:
 60 |         lines.append(f"| {f.severity} | {f.id} | {f.category} | {f.message} |")
 61 |     summary = f"{findings.stats['total']} Entra ID findings " \
 62 |               f"({findings.stats['critical']} critical / {findings.stats['high']} high / {findings.stats['med']} med / {findings.stats['low']} low)."
 63 |     return Report(
 64 |         analysis_type="entra",
 65 |         summary=summary,
 66 |         table_md="\n".join(lines),
 67 |         recommendations="Review Entra ID security policies and implement recommended changes."
 68 |     )
 69 | 
 70 | async def analyze_arm_with_agent(template_path: str, params_path: str):
 71 |     """Use pydantic-ai agent to analyze ARM templates"""
 72 |     import os
 73 |     
 74 |     # Check if API key is set
 75 |     if not os.getenv('OPENAI_API_KEY'):
 76 |         raise ValueError("OPENAI_API_KEY environment variable is not set. Please set it to use the LLM agent.")
 77 |     
 78 |     # Create agent with a simple model (you can configure this with your preferred LLM)
 79 |     agent = Agent('openai:gpt-4o-mini', result_type=Report, system_prompt=SYSTEM_PROMPT)
 80 |     
 81 |     # Read files
 82 |     t_text = pathlib.Path(template_path).read_text(encoding="utf-8")
 83 |     p_text = pathlib.Path(params_path).read_text(encoding="utf-8")
 84 |     
 85 |     # Analyze using the tool
 86 |     findings = analyze_arm(IngestArgs(template_text=t_text, parameters_text=p_text))
 87 |     
 88 |     # Use agent to generate report
 89 |     prompt = f"""
 90 |     Analyze these ARM template findings and create a comprehensive security report:
 91 |     
 92 |     Findings: {findings.model_dump_json()}
 93 |     
 94 |     Focus on:
 95 |     1. Infrastructure security risks
 96 |     2. Credential management issues
 97 |     3. Identity and access management
 98 |     4. Network security configurations
 99 |     
100 |     Provide actionable remediation steps for each critical finding.
101 |     """
102 |     
103 |     result = await agent.run_sync(prompt)
104 |     return result.data
105 | 
106 | async def analyze_entra_with_agent(backup_path: str):
107 |     """Use pydantic-ai agent to analyze Entra ID configurations"""
108 |     import os
109 |     
110 |     # Check if API key is set
111 |     if not os.getenv('OPENAI_API_KEY'):
112 |         raise ValueError("OPENAI_API_KEY environment variable is not set. Please set it to use the LLM agent.")
113 |     
114 |     # Create agent with a simple model (you can configure this with your preferred LLM)
115 |     agent = Agent('openai:gpt-4o-mini', result_type=Report, system_prompt=SYSTEM_PROMPT)
116 |     
117 |     # Analyze using the tool
118 |     findings = analyze_entra_backup(backup_path)
119 |     
120 |     # Use agent to generate report
121 |     prompt = f"""
122 |     Analyze these Entra ID security findings and create a comprehensive security report:
123 |     
124 |     Findings: {findings.model_dump_json()}
125 |     
126 |     Focus on:
127 |     1. Identity and access management risks
128 |     2. Authentication and authorization policies
129 |     3. Privileged access management
130 |     4. Directory synchronization security
131 |     
132 |     Prioritize critical and high-severity findings. Provide specific remediation steps
133 |     for policy changes, role assignments, and security configurations.
134 |     """
135 |     
136 |     result = await agent.run_sync(prompt)
137 |     return result.data
138 | 
139 | def main_arm(template_path: str, params_path: str):
140 |     """Analyze ARM templates"""
141 |     print(f"Analyzing ARM template: {template_path}")
142 |     print(f"Parameters file: {params_path}")
143 |     print("=" * 60)
144 |     
145 |     t_text = pathlib.Path(template_path).read_text(encoding="utf-8")
146 |     p_text = pathlib.Path(params_path).read_text(encoding="utf-8")
147 | 
148 |     # In a full MCP flow: the Agent would choose a tool call. For a local demo, we invoke it directly:
149 |     findings = analyze_arm(IngestArgs(template_text=t_text, parameters_text=p_text))
150 | 
151 |     # Print summary
152 |     print(f"\nANALYSIS SUMMARY")
153 |     print(f"Total findings: {findings.stats['total']}")
154 |     print(f"High: {findings.stats['high']} | Medium: {findings.stats['med']} | Low: {findings.stats['low']}")
155 |     
156 |     # Print findings by severity
157 |     severity_order = ["high", "med", "low"]
158 |     for severity in severity_order:
159 |         severity_findings = [f for f in findings.findings if f.severity == severity]
160 |         if severity_findings:
161 |             print(f"\n{severity.upper()} SEVERITY ISSUES:")
162 |             for finding in severity_findings:
163 |                 print(f"  - {finding.id}: {finding.message}")
164 |                 print(f"    Resource: {finding.resource or 'N/A'}")
165 |                 print()
166 |     
167 |     print(f"\nAnalysis complete!")
168 | 
169 | def main_entra(backup_path: str):
170 |     """Analyze Entra ID backup"""
171 |     findings = analyze_entra_backup(backup_path)
172 |     
173 |     print(f"Analyzing Entra ID backup: {backup_path}")
174 |     print("=" * 60)
175 |     
176 |     # Print summary
177 |     print(f"\nANALYSIS SUMMARY")
178 |     print(f"Total findings: {findings.stats['total']}")
179 |     print(f"Critical: {findings.stats['critical']} | High: {findings.stats['high']} | Medium: {findings.stats['med']} | Low: {findings.stats['low']}")
180 |     
181 |     # Print findings by severity
182 |     severity_order = ["critical", "high", "med", "low"]
183 |     for severity in severity_order:
184 |         severity_findings = [f for f in findings.findings if f.severity == severity]
185 |         if severity_findings:
186 |             print(f"\n{severity.upper()} SEVERITY ISSUES:")
187 |             for finding in severity_findings:
188 |                 print(f"  - {finding.id}: {finding.message}")
189 |                 print(f"    Recommendation: {finding.recommendation}")
190 |                 print()
191 |     
192 |     # Print category breakdown
193 |     if findings.summary.get("categories"):
194 |         print(f"\nISSUES BY CATEGORY:")
195 |         for category, issue_ids in findings.summary["categories"].items():
196 |             print(f"  {category}: {len(issue_ids)} issues")
197 |     
198 |     print(f"\nAnalysis complete!")
199 | 
200 | def main():
201 |     """Main function with argument parsing"""
202 |     import sys
203 |     
204 |     if len(sys.argv) < 2:
205 |         print("Usage:")
206 |         print("  ARM Analysis: python agent/run_agent.py arm <template.json> <parameters.json>")
207 |         print("  Entra Analysis: python agent/run_agent.py entra <backup_path>")
208 |         print("\nExamples:")
209 |         print("  python agent/run_agent.py arm fixtures/template.json fixtures/parameters.json")
210 |         print("  python agent/run_agent.py entra c:/EntraBackup")
211 |         sys.exit(1)
212 |     
213 |     analysis_type = sys.argv[1].lower()
214 |     
215 |     if analysis_type == "arm":
216 |         if len(sys.argv) != 4:
217 |             print("ARM analysis requires template and parameters files")
218 |             print("Usage: python agent/run_agent.py arm <template.json> <parameters.json>")
219 |             sys.exit(1)
220 |         main_arm(sys.argv[2], sys.argv[3])
221 |     elif analysis_type == "entra":
222 |         if len(sys.argv) != 3:
223 |             print("Entra analysis requires backup path")
224 |             print("Usage: python agent/run_agent.py entra <backup_path>")
225 |             sys.exit(1)
226 |         main_entra(sys.argv[2])
227 |     else:
228 |         print(f"Unknown analysis type: {analysis_type}")
229 |         print("Supported types: arm, entra")
230 |         sys.exit(1)
231 | 
232 | if __name__ == "__main__":
233 |     main()
234 | 


--------------------------------------------------------------------------------
/evals/test_llm_eval.py:
--------------------------------------------------------------------------------
  1 | import json, pathlib, sys
  2 | from pydantic import BaseModel
  3 | from pydantic_ai import Agent
  4 | from pydantic_evals import Dataset, Case
  5 | from pydantic_evals.evaluators import Contains, Equals, Evaluator, EvaluatorContext
  6 | 
  7 | # Add parent directory to path for imports
  8 | sys.path.append(str(pathlib.Path(__file__).parent.parent))
  9 | 
 10 | from tools.arm_ingest import analyze_arm, IngestArgs
 11 | 
 12 | SYSTEM_PROMPT = """
 13 | You are a cloud security analyst specializing in Azure security. You can analyze:
 14 | 
 15 | 1. ARM Templates: When given ARM template and parameters files, analyze for infrastructure security issues
 16 | 2. Entra ID Configurations: When given Entra ID backup files, analyze for identity security issues
 17 | 
 18 | For ARM Templates:
 19 | - Call `analyze_arm` with template and parameters
 20 | - Focus on infrastructure security: image versions, credentials, identities, networking
 21 | 
 22 | For Entra ID Analysis:
 23 | - Call `analyze_entra` with backup path
 24 | - Focus on identity security: policies, roles, authentication, sync settings
 25 | 
 26 | Always:
 27 | - Summarize key risks (highest severity first)
 28 | - Explain why each finding matters
 29 | - Provide concrete remediation steps
 30 | - Output executive summary followed by detailed findings
 31 | """
 32 | 
 33 | class VulnerabilityAnalysis(BaseModel):
 34 |     """Response model for vulnerability analysis"""
 35 |     highest_priority_vulnerability: str
 36 |     vulnerability_id: str
 37 |     severity: str
 38 |     explanation: str
 39 | 
 40 | class FieldContainsEvaluator(Evaluator):
 41 |     """Custom evaluator that checks if a specific field contains a value"""
 42 |     
 43 |     def __init__(self, field_name: str, expected_value: str, evaluation_name: str = None):
 44 |         self.field_name = field_name
 45 |         self.expected_value = expected_value
 46 |         self.evaluation_name = evaluation_name or f"field_{field_name}_contains"
 47 |     
 48 |     def evaluate(self, ctx: EvaluatorContext) -> bool:
 49 |         """Check if the specified field contains the expected value"""
 50 |         if hasattr(ctx.output, self.field_name):
 51 |             field_value = getattr(ctx.output, self.field_name)
 52 |             return self.expected_value.lower() in str(field_value).lower()
 53 |         return False
 54 | 
 55 | def create_arm_evaluation_dataset():
 56 |     """Create evaluation dataset for ARM template analysis"""
 57 |     
 58 |     # Read the test template and parameters
 59 |     template_path = pathlib.Path(__file__).parent.parent / "fixtures" / "template.json"
 60 |     params_path = pathlib.Path(__file__).parent.parent / "fixtures" / "parameters.json"
 61 |     
 62 |     template_text = template_path.read_text(encoding="utf-8")
 63 |     params_text = params_path.read_text(encoding="utf-8")
 64 |     
 65 |     # Get the actual findings from our analyzer
 66 |     findings = analyze_arm(IngestArgs(template_text=template_text, parameters_text=params_text))
 67 |     
 68 |     # Find the highest severity finding
 69 |     severity_order = {"high": 0, "med": 1, "low": 2}
 70 |     highest_severity_finding = min(findings.findings, key=lambda x: severity_order[x.severity])
 71 |     
 72 |     # Create the evaluation case
 73 |     case = Case(
 74 |         inputs={
 75 |             "template": template_text,
 76 |             "parameters": params_text,
 77 |             "prompt": f"""
 78 | Analyze this ARM template for security vulnerabilities and identify the highest priority issue:
 79 | 
 80 | Template:
 81 | {template_text}
 82 | 
 83 | Parameters:
 84 | {params_text}
 85 | 
 86 | What is the highest priority vulnerability in this ARM template? Provide the vulnerability ID, severity, and a brief explanation.
 87 | """
 88 |         },
 89 |         expected_output={
 90 |             "highest_priority_vulnerability": highest_severity_finding.message,
 91 |             "vulnerability_id": highest_severity_finding.id,
 92 |             "severity": highest_severity_finding.severity,
 93 |             "explanation": f"This is a {highest_severity_finding.severity} severity issue that should be addressed first."
 94 |         }
 95 |     )
 96 |     
 97 |     dataset = Dataset(name="arm_vulnerability_analysis", cases=[case])
 98 |     print(f"Created dataset with {len(dataset.cases)} cases")
 99 |     return dataset
100 | 
101 | async def analyze_vulnerability_task(inputs: dict) -> VulnerabilityAnalysis:
102 |     """Real LLM task function that analyzes ARM templates for vulnerabilities"""
103 |     
104 |     import os
105 |     
106 |     # Check if API key is set
107 |     if not os.getenv('OPENAI_API_KEY'):
108 |         raise ValueError("OPENAI_API_KEY environment variable is not set. Please set it to use the LLM agent.")
109 |     
110 |     try:
111 |         # Create agent
112 |         agent = Agent('openai:gpt-4o-mini', result_type=VulnerabilityAnalysis, system_prompt=SYSTEM_PROMPT)
113 |         
114 |         print(f"Running LLM analysis with prompt length: {len(inputs['prompt'])} characters")
115 |         
116 |         # Run the agent with the prompt
117 |         result = await agent.run(inputs["prompt"])
118 |         
119 |         print(f"LLM response: {result.data}")
120 |         return result.data
121 |         
122 |     except Exception as e:
123 |         print(f"Error in LLM task: {e}")
124 |         raise
125 | 
126 | def test_llm_vulnerability_identification():
127 |     """Test LLM's ability to identify highest priority vulnerabilities"""
128 |     
129 |     # Create evaluation dataset
130 |     dataset = create_arm_evaluation_dataset()
131 |     
132 |     # Create evaluators that check specific fields
133 |     evaluators = [
134 |         FieldContainsEvaluator(
135 |             field_name="vulnerability_id",
136 |             expected_value="IMG-001",
137 |             evaluation_name="correct_vulnerability_id"
138 |         ),
139 |         FieldContainsEvaluator(
140 |             field_name="severity",
141 |             expected_value="med",
142 |             evaluation_name="correct_severity"
143 |         ),
144 |         FieldContainsEvaluator(
145 |             field_name="explanation",
146 |             expected_value="security",
147 |             evaluation_name="explanation_contains_security"
148 |         )
149 |     ]
150 |     
151 |     # Add evaluators to dataset
152 |     for evaluator in evaluators:
153 |         dataset.add_evaluator(evaluator)
154 |     
155 |     # Run evaluation using the real LLM task function
156 |     import asyncio
157 |     results = asyncio.run(dataset.evaluate(analyze_vulnerability_task))
158 |     
159 |     print("LLM Vulnerability Analysis Evaluation Results:")
160 |     print("=" * 50)
161 |     print(f"Number of cases: {len(results.cases)}")
162 |     
163 |     for case_result in results.cases:
164 |         print(f"Case: {case_result.name}")
165 |         print(f"Expected vulnerability ID: IMG-001")
166 |         print(f"Expected severity: med")
167 |         print(f"Actual vulnerability ID: {case_result.output.vulnerability_id}")
168 |         print(f"Actual severity: {case_result.output.severity}")
169 |         print(f"Assertions: {len(case_result.assertions)}")
170 |         
171 |         for assertion_name, assertion_result in case_result.assertions.items():
172 |             print(f"  {assertion_name}: {assertion_result.value}")
173 |             if not assertion_result.value:
174 |                 print(f"    Details: {assertion_result.reason}")
175 |         print()
176 |     
177 |     # Summary
178 |     total_cases = len(results.cases)
179 |     passed_cases = sum(1 for case in results.cases if all(assertion_result.value for assertion_result in case.assertions.values()))
180 |     
181 |     print(f"Summary: {passed_cases}/{total_cases} cases passed")
182 |     
183 |     return results
184 | 
185 | def test_llm_priority_ranking():
186 |     """Test LLM's ability to correctly rank vulnerabilities by priority"""
187 |     
188 |     # Create a more complex case with multiple vulnerabilities
189 |     template_with_multiple_issues = """
190 |     {
191 |       "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
192 |       "contentVersion": "1.0.0.0",
193 |       "parameters": {
194 |           "adminPassword": {
195 |               "type": "securestring",
196 |               "defaultValue": "Password123!"
197 |           }
198 |       },
199 |       "resources": [
200 |           {
201 |               "type": "Microsoft.Compute/virtualMachines",
202 |               "apiVersion": "2024-11-01",
203 |               "name": "test-vm",
204 |               "properties": {
205 |                   "hardwareProfile": {
206 |                       "vmSize": "Standard_D2s_v3"
207 |                   },
208 |                   "storageProfile": {
209 |                       "imageReference": {
210 |                           "publisher": "MicrosoftWindowsDesktop",
211 |                           "offer": "Windows-10",
212 |                           "sku": "win10-21h2-ent",
213 |                           "version": "latest"
214 |                       }
215 |                   },
216 |                   "osProfile": {
217 |                       "adminUsername": "admin",
218 |                       "adminPassword": "[parameters('adminPassword')]"
219 |                   }
220 |               }
221 |           }
222 |       ]
223 |     }
224 |     """
225 |     
226 |     # Create case for priority ranking
227 |     case = Case(
228 |         inputs={
229 |             "template": template_with_multiple_issues,
230 |             "parameters": '{"adminPassword": "Password123!"}',
231 |             "prompt": f"""
232 | Analyze this ARM template and identify the HIGHEST priority security vulnerability:
233 | 
234 | Template:
235 | {template_with_multiple_issues}
236 | 
237 | Parameters:
238 | {{"adminPassword": "Password123!"}}
239 | 
240 | Rank the vulnerabilities by priority and identify the most critical one that should be fixed first.
241 | """
242 |         },
243 |         expected_output={
244 |             "highest_priority_vulnerability": "Hard-coded password in template",
245 |             "vulnerability_id": "CRED-001",  # This would be a credential-related issue
246 |             "severity": "high",
247 |             "explanation": "Hard-coded passwords are a critical security risk that should be addressed immediately."
248 |         }
249 |     )
250 |     
251 |     dataset = Dataset(name="priority_ranking", cases=[case])
252 |     
253 |     # Create evaluators
254 |     evaluators = [
255 |         FieldContainsEvaluator(
256 |             field_name="highest_priority_vulnerability",
257 |             expected_value="password",
258 |             evaluation_name="identifies_password_issue"
259 |         ),
260 |         FieldContainsEvaluator(
261 |             field_name="severity",
262 |             expected_value="high",
263 |             evaluation_name="high_severity"
264 |         )
265 |     ]
266 |     
267 |     # Add evaluators to dataset
268 |     for evaluator in evaluators:
269 |         dataset.add_evaluator(evaluator)
270 |     
271 |     # Run evaluation with real LLM task
272 |     import asyncio
273 |     results = asyncio.run(dataset.evaluate(analyze_vulnerability_task))
274 |     
275 |     print("LLM Priority Ranking Evaluation Results:")
276 |     print("=" * 50)
277 |     
278 |     for case_result in results.cases:
279 |         print(f"Case: {case_result.name}")
280 |         print(f"Expected: Password-related vulnerability with high severity")
281 |         print(f"Actual: {case_result.output.highest_priority_vulnerability} (severity: {case_result.output.severity})")
282 |         
283 |         for assertion_name, assertion_result in case_result.assertions.items():
284 |             print(f"  {assertion_name}: {assertion_result.value}")
285 |             if not assertion_result.value:
286 |                 print(f"    Details: {assertion_result.reason}")
287 |         print()
288 |     
289 |     return results
290 | 
291 | if __name__ == "__main__":
292 |     print("Running LLM Evaluation Tests...")
293 |     print("=" * 60)
294 |     
295 |     # Test 1: Basic vulnerability identification
296 |     print("\n1. Testing vulnerability identification:")
297 |     test_llm_vulnerability_identification()
298 |     
299 |     # Test 2: Priority ranking
300 |     print("\n2. Testing priority ranking:")
301 |     test_llm_priority_ranking()
302 |     
303 |     print("\nEvaluation complete!")


--------------------------------------------------------------------------------
/tools/entra_analyzer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pathlib
  3 | from typing import List, Optional, Dict, Any
  4 | from pydantic import BaseModel
  5 | 
  6 | class EntraFinding(BaseModel):
  7 |     id: str
  8 |     severity: str  # "low" | "med" | "high" | "critical"
  9 |     category: str  # "organization", "policies", "roles", "sync", "auth"
 10 |     resource: Optional[str] = None
 11 |     path: Optional[str] = None
 12 |     message: str
 13 |     recommendation: str
 14 | 
 15 | class EntraFindings(BaseModel):
 16 |     findings: List[EntraFinding]
 17 |     stats: Dict[str, int]
 18 |     summary: Dict[str, Any]
 19 | 
 20 | def analyze_entra_backup(backup_path: str) -> EntraFindings:
 21 |     """
 22 |     Analyze Entra ID backup files for security misconfigurations.
 23 |     
 24 |     Args:
 25 |         backup_path: Path to the EntraBackup directory
 26 |         
 27 |     Returns:
 28 |         EntraFindings object with security analysis results
 29 |     """
 30 |     findings: List[EntraFinding] = []
 31 |     backup_dir = pathlib.Path(backup_path)
 32 |     
 33 |     if not backup_dir.exists():
 34 |         raise ValueError(f"Backup directory not found: {backup_path}")
 35 |     
 36 |     # Analyze Organization settings
 37 |     findings.extend(_analyze_organization_settings(backup_dir))
 38 |     
 39 |     # Analyze Authorization Policies
 40 |     findings.extend(_analyze_authorization_policies(backup_dir))
 41 |     
 42 |     # Analyze Directory Roles and Members
 43 |     findings.extend(_analyze_directory_roles(backup_dir))
 44 |     
 45 |     # Analyze On-Premises Sync Settings
 46 |     findings.extend(_analyze_sync_settings(backup_dir))
 47 |     
 48 |     # Analyze Security Defaults
 49 |     findings.extend(_analyze_security_defaults(backup_dir))
 50 |     
 51 |     # Analyze Authentication Methods
 52 |     findings.extend(_analyze_auth_methods(backup_dir))
 53 |     
 54 |     # Calculate statistics
 55 |     stats = {
 56 |         "total": len(findings),
 57 |         "critical": sum(1 for f in findings if f.severity == "critical"),
 58 |         "high": sum(1 for f in findings if f.severity == "high"),
 59 |         "med": sum(1 for f in findings if f.severity == "med"),
 60 |         "low": sum(1 for f in findings if f.severity == "low"),
 61 |     }
 62 |     
 63 |     # Generate summary
 64 |     summary = _generate_summary(findings, backup_dir)
 65 |     
 66 |     return EntraFindings(findings=findings, stats=stats, summary=summary)
 67 | 
 68 | def _analyze_organization_settings(backup_dir: pathlib.Path) -> List[EntraFinding]:
 69 |     """Analyze organization-level security settings"""
 70 |     findings = []
 71 |     org_file = backup_dir / "Organization" / "Organization.json"
 72 |     
 73 |     if not org_file.exists():
 74 |         findings.append(EntraFinding(
 75 |             id="ORG-001",
 76 |             severity="high",
 77 |             category="organization",
 78 |             message="Organization.json not found in backup",
 79 |             recommendation="Ensure complete backup of organization settings"
 80 |         ))
 81 |         return findings
 82 |     
 83 |     try:
 84 |         with open(org_file, 'r', encoding='utf-8') as f:
 85 |             org_data = json.load(f)
 86 |         
 87 |         # Check for accidental deletion protection
 88 |         if not org_data.get("onPremisesSyncEnabled", False):
 89 |             findings.append(EntraFinding(
 90 |                 id="ORG-002",
 91 |                 severity="med",
 92 |                 category="organization",
 93 |                 message="On-premises sync is disabled - no accidental deletion protection",
 94 |                 recommendation="Enable on-premises sync or implement alternative deletion protection"
 95 |             ))
 96 |         
 97 |         # Check technical notification emails
 98 |         tech_emails = org_data.get("technicalNotificationMails", [])
 99 |         if not tech_emails:
100 |             findings.append(EntraFinding(
101 |                 id="ORG-003",
102 |                 severity="med",
103 |                 category="organization",
104 |                 message="No technical notification emails configured",
105 |                 recommendation="Configure technical notification emails for security alerts"
106 |             ))
107 |         
108 |         # Check security compliance notification emails
109 |         sec_emails = org_data.get("securityComplianceNotificationMails", [])
110 |         if not sec_emails:
111 |             findings.append(EntraFinding(
112 |                 id="ORG-004",
113 |                 severity="med",
114 |                 category="organization",
115 |                 message="No security compliance notification emails configured",
116 |                 recommendation="Configure security compliance notification emails"
117 |             ))
118 |         
119 |         # Check tenant type
120 |         tenant_type = org_data.get("tenantType", "")
121 |         if tenant_type == "AAD":
122 |             findings.append(EntraFinding(
123 |                 id="ORG-005",
124 |                 severity="low",
125 |                 category="organization",
126 |                 message="Using legacy Azure AD tenant type",
127 |                 recommendation="Consider migrating to Microsoft Entra ID for enhanced features"
128 |             ))
129 |         
130 |     except Exception as e:
131 |         findings.append(EntraFinding(
132 |             id="ORG-006",
133 |             severity="high",
134 |             category="organization",
135 |             message=f"Error reading organization settings: {str(e)}",
136 |             recommendation="Verify organization.json file integrity"
137 |         ))
138 |     
139 |     return findings
140 | 
141 | def _analyze_authorization_policies(backup_dir: pathlib.Path) -> List[EntraFinding]:
142 |     """Analyze authorization policy settings"""
143 |     findings = []
144 |     auth_policy_file = backup_dir / "Policies" / "AuthorizationPolicy" / "authorizationPolicy" / "authorizationPolicy.json"
145 |     
146 |     if not auth_policy_file.exists():
147 |         findings.append(EntraFinding(
148 |             id="AUTH-001",
149 |             severity="high",
150 |             category="policies",
151 |             message="Authorization policy not found in backup",
152 |             recommendation="Ensure complete backup of authorization policies"
153 |         ))
154 |         return findings
155 |     
156 |     try:
157 |         with open(auth_policy_file, 'r', encoding='utf-8') as f:
158 |             auth_data = json.load(f)
159 |         
160 |         # Check guest invite settings
161 |         allow_invites = auth_data.get("allowInvitesFrom", "")
162 |         if allow_invites == "everyone":
163 |             findings.append(EntraFinding(
164 |                 id="AUTH-002",
165 |                 severity="high",
166 |                 category="policies",
167 |                 message="Guest invitations allowed from everyone",
168 |                 recommendation="Restrict guest invitations to specific domains or disable"
169 |             ))
170 |         
171 |         # Check email-based subscriptions
172 |         if auth_data.get("allowedToSignUpEmailBasedSubscriptions", False):
173 |             findings.append(EntraFinding(
174 |                 id="AUTH-003",
175 |                 severity="med",
176 |                 category="policies",
177 |                 message="Email-based subscriptions signup allowed",
178 |                 recommendation="Disable email-based subscription signup for better control"
179 |             ))
180 |         
181 |         # Check self-service password reset
182 |         if auth_data.get("allowedToUseSSPR", False):
183 |             findings.append(EntraFinding(
184 |                 id="AUTH-004",
185 |                 severity="med",
186 |                 category="policies",
187 |                 message="Self-service password reset enabled",
188 |                 recommendation="Review SSPR settings and consider additional restrictions"
189 |             ))
190 |         
191 |         # Check user consent for risky apps
192 |         risky_apps_consent = auth_data.get("allowUserConsentForRiskyApps")
193 |         if risky_apps_consent is None:
194 |             findings.append(EntraFinding(
195 |                 id="AUTH-005",
196 |                 severity="med",
197 |                 category="policies",
198 |                 message="User consent for risky apps not explicitly configured",
199 |                 recommendation="Explicitly disable user consent for risky applications"
200 |             ))
201 |         
202 |         # Check default user role permissions
203 |         default_perms = auth_data.get("defaultUserRolePermissions", {})
204 |         if default_perms.get("allowedToCreateApps", False):
205 |             findings.append(EntraFinding(
206 |                 id="AUTH-006",
207 |                 severity="high",
208 |                 category="policies",
209 |                 message="Default users can create applications",
210 |                 recommendation="Restrict application creation to administrators only"
211 |             ))
212 |         
213 |         if default_perms.get("allowedToCreateSecurityGroups", False):
214 |             findings.append(EntraFinding(
215 |                 id="AUTH-007",
216 |                 severity="high",
217 |                 category="policies",
218 |                 message="Default users can create security groups",
219 |                 recommendation="Restrict security group creation to administrators"
220 |             ))
221 |         
222 |         if default_perms.get("allowedToCreateTenants", False):
223 |             findings.append(EntraFinding(
224 |                 id="AUTH-008",
225 |                 severity="critical",
226 |                 category="policies",
227 |                 message="Default users can create tenants",
228 |                 recommendation="Disable tenant creation for default users"
229 |             ))
230 |         
231 |     except Exception as e:
232 |         findings.append(EntraFinding(
233 |             id="AUTH-009",
234 |             severity="high",
235 |             category="policies",
236 |             message=f"Error reading authorization policy: {str(e)}",
237 |             recommendation="Verify authorization policy file integrity"
238 |         ))
239 |     
240 |     return findings
241 | 
242 | def _analyze_directory_roles(backup_dir: pathlib.Path) -> List[EntraFinding]:
243 |     """Analyze directory roles and privileged users"""
244 |     findings = []
245 |     roles_dir = backup_dir / "DirectoryRoles"
246 |     
247 |     if not roles_dir.exists():
248 |         findings.append(EntraFinding(
249 |             id="ROLE-001",
250 |             severity="high",
251 |             category="roles",
252 |             message="Directory roles not found in backup",
253 |             recommendation="Ensure complete backup of directory roles"
254 |         ))
255 |         return findings
256 |     
257 |     privileged_users = []
258 |     global_admins = []
259 |     
260 |     try:
261 |         for role_dir in roles_dir.iterdir():
262 |             if not role_dir.is_dir():
263 |                 continue
264 |             
265 |             role_file = role_dir / f"{role_dir.name}.json"
266 |             if not role_file.exists():
267 |                 continue
268 |             
269 |             with open(role_file, 'r', encoding='utf-8') as f:
270 |                 role_data = json.load(f)
271 |             
272 |             role_name = role_data.get("displayName", "")
273 |             
274 |             # Check for Global Administrator role
275 |             if "Global Administrator" in role_name or role_data.get("roleTemplateId") == "62e90394-69f5-4237-9190-012177145e10":
276 |                 members_dir = role_dir / "Members"
277 |                 if members_dir.exists():
278 |                     for member_dir in members_dir.iterdir():
279 |                         if member_dir.is_dir():
280 |                             member_file = member_dir / f"{member_dir.name}.json"
281 |                             if member_file.exists():
282 |                                 with open(member_file, 'r', encoding='utf-8') as f:
283 |                                     member_data = json.load(f)
284 |                                 global_admins.append({
285 |                                     "name": member_data.get("displayName", ""),
286 |                                     "upn": member_data.get("userPrincipalName", "")
287 |                                 })
288 |             
289 |             # Check for other privileged roles
290 |             privileged_role_names = [
291 |                 "Privileged Role Administrator",
292 |                 "Security Administrator", 
293 |                 "Application Administrator",
294 |                 "Cloud Application Administrator",
295 |                 "Exchange Administrator",
296 |                 "SharePoint Administrator"
297 |             ]
298 |             
299 |             if any(priv_role in role_name for priv_role in privileged_role_names):
300 |                 members_dir = role_dir / "Members"
301 |                 if members_dir.exists():
302 |                     for member_dir in members_dir.iterdir():
303 |                         if member_dir.is_dir():
304 |                             member_file = member_dir / f"{member_dir.name}.json"
305 |                             if member_file.exists():
306 |                                 with open(member_file, 'r', encoding='utf-8') as f:
307 |                                     member_data = json.load(f)
308 |                                 privileged_users.append({
309 |                                     "name": member_data.get("displayName", ""),
310 |                                     "upn": member_data.get("userPrincipalName", ""),
311 |                                     "role": role_name
312 |                                 })
313 |         
314 |         # Analyze findings
315 |         if len(global_admins) > 5:
316 |             findings.append(EntraFinding(
317 |                 id="ROLE-002",
318 |                 severity="high",
319 |                 category="roles",
320 |                 message=f"Too many Global Administrators ({len(global_admins)})",
321 |                 recommendation="Reduce Global Administrator count to minimum required (2-3 users)"
322 |             ))
323 |         
324 |         if len(global_admins) == 0:
325 |             findings.append(EntraFinding(
326 |                 id="ROLE-003",
327 |                 severity="critical",
328 |                 category="roles",
329 |                 message="No Global Administrators found",
330 |                 recommendation="Ensure at least one Global Administrator exists"
331 |             ))
332 |         
333 |         if len(privileged_users) > 20:
334 |             findings.append(EntraFinding(
335 |                 id="ROLE-004",
336 |                 severity="med",
337 |                 category="roles",
338 |                 message=f"High number of privileged users ({len(privileged_users)})",
339 |                 recommendation="Review privileged user assignments and implement PIM"
340 |             ))
341 |         
342 |     except Exception as e:
343 |         findings.append(EntraFinding(
344 |             id="ROLE-005",
345 |             severity="high",
346 |             category="roles",
347 |             message=f"Error analyzing directory roles: {str(e)}",
348 |             recommendation="Verify directory roles backup integrity"
349 |         ))
350 |     
351 |     return findings
352 | 
353 | def _analyze_sync_settings(backup_dir: pathlib.Path) -> List[EntraFinding]:
354 |     """Analyze on-premises synchronization settings"""
355 |     findings = []
356 |     sync_file = backup_dir / "Directory" / "OnPremisesSynchronization.json"
357 |     
358 |     if not sync_file.exists():
359 |         return findings  # Not applicable if no sync
360 |     
361 |     try:
362 |         with open(sync_file, 'r', encoding='utf-8') as f:
363 |             sync_data = json.load(f)
364 |         
365 |         features = sync_data.get("features", {})
366 |         config = sync_data.get("configuration", {})
367 |         
368 |         # Check accidental deletion prevention
369 |         acc_del_prevention = config.get("accidentalDeletionPrevention", {})
370 |         if acc_del_prevention.get("synchronizationPreventionType") != "enabledForCount":
371 |             findings.append(EntraFinding(
372 |                 id="SYNC-001",
373 |                 severity="high",
374 |                 category="sync",
375 |                 message="Accidental deletion prevention not properly configured",
376 |                 recommendation="Enable accidental deletion prevention with appropriate threshold"
377 |             ))
378 |         
379 |         # Check password writeback
380 |         if not features.get("passwordWritebackEnabled", False):
381 |             findings.append(EntraFinding(
382 |                 id="SYNC-002",
383 |                 severity="med",
384 |                 category="sync",
385 |                 message="Password writeback is disabled",
386 |                 recommendation="Enable password writeback for better user experience"
387 |             ))
388 |         
389 |         # Check user writeback
390 |         if not features.get("userWritebackEnabled", False):
391 |             findings.append(EntraFinding(
392 |                 id="SYNC-003",
393 |                 severity="low",
394 |                 category="sync",
395 |                 message="User writeback is disabled",
396 |                 recommendation="Consider enabling user writeback if using Exchange hybrid"
397 |             ))
398 |         
399 |         # Check group writeback
400 |         if not features.get("groupWriteBackEnabled", False):
401 |             findings.append(EntraFinding(
402 |                 id="SYNC-004",
403 |                 severity="low",
404 |                 category="sync",
405 |                 message="Group writeback is disabled",
406 |                 recommendation="Consider enabling group writeback for Office 365 groups"
407 |             ))
408 |         
409 |         # Check device writeback
410 |         if not features.get("deviceWritebackEnabled", False):
411 |             findings.append(EntraFinding(
412 |                 id="SYNC-005",
413 |                 severity="low",
414 |                 category="sync",
415 |                 message="Device writeback is disabled",
416 |                 recommendation="Consider enabling device writeback for device management"
417 |             ))
418 |         
419 |     except Exception as e:
420 |         findings.append(EntraFinding(
421 |             id="SYNC-006",
422 |             severity="high",
423 |             category="sync",
424 |             message=f"Error reading sync settings: {str(e)}",
425 |             recommendation="Verify sync settings file integrity"
426 |         ))
427 |     
428 |     return findings
429 | 
430 | def _analyze_security_defaults(backup_dir: pathlib.Path) -> List[EntraFinding]:
431 |     """Analyze security defaults policy"""
432 |     findings = []
433 |     sec_defaults_file = backup_dir / "Policies" / "IdentitySecurityDefaultsEnforcementPolicy" / "00000000-0000-0000-0000-000000000005" / "00000000-0000-0000-0000-000000000005.json"
434 |     
435 |     if not sec_defaults_file.exists():
436 |         findings.append(EntraFinding(
437 |             id="SEC-001",
438 |             severity="high",
439 |             category="policies",
440 |             message="Security defaults policy not found",
441 |             recommendation="Enable security defaults or implement equivalent Conditional Access policies"
442 |         ))
443 |         return findings
444 |     
445 |     try:
446 |         with open(sec_defaults_file, 'r', encoding='utf-8') as f:
447 |             sec_data = json.load(f)
448 |         
449 |         if not sec_data.get("isEnabled", False):
450 |             findings.append(EntraFinding(
451 |                 id="SEC-002",
452 |                 severity="critical",
453 |                 category="policies",
454 |                 message="Security defaults are disabled",
455 |                 recommendation="Enable security defaults or implement equivalent Conditional Access policies"
456 |             ))
457 |         
458 |     except Exception as e:
459 |         findings.append(EntraFinding(
460 |             id="SEC-003",
461 |             severity="high",
462 |             category="policies",
463 |             message=f"Error reading security defaults: {str(e)}",
464 |             recommendation="Verify security defaults file integrity"
465 |         ))
466 |     
467 |     return findings
468 | 
469 | def _analyze_auth_methods(backup_dir: pathlib.Path) -> List[EntraFinding]:
470 |     """Analyze authentication methods policy"""
471 |     findings = []
472 |     auth_methods_dir = backup_dir / "Policies" / "AuthenticationMethodsPolicy" / "AuthenticationMethodConfigurations"
473 |     
474 |     if not auth_methods_dir.exists():
475 |         return findings
476 |     
477 |     try:
478 |         # Check for weak authentication methods
479 |         weak_methods = ["SMS", "Voice"]
480 |         
481 |         for method in weak_methods:
482 |             method_file = auth_methods_dir / f"{method}.json"
483 |             if method_file.exists():
484 |                 with open(method_file, 'r', encoding='utf-8') as f:
485 |                     method_data = json.load(f)
486 |                 
487 |                 if method_data.get("state") == "enabled":
488 |                     findings.append(EntraFinding(
489 |                         id=f"AUTH-METHOD-{method}",
490 |                         severity="med",
491 |                         category="auth",
492 |                         message=f"{method} authentication method is enabled",
493 |                         recommendation=f"Consider disabling {method} authentication in favor of stronger methods"
494 |                     ))
495 |         
496 |         # Check for strong authentication methods
497 |         strong_methods = ["FIDO2", "MicrosoftAuthenticator"]
498 |         
499 |         for method in strong_methods:
500 |             method_file = auth_methods_dir / f"{method}.json"
501 |             if method_file.exists():
502 |                 with open(method_file, 'r', encoding='utf-8') as f:
503 |                     method_data = json.load(f)
504 |                 
505 |                 if method_data.get("state") != "enabled":
506 |                     findings.append(EntraFinding(
507 |                         id=f"AUTH-METHOD-{method}",
508 |                         severity="med",
509 |                         category="auth",
510 |                         message=f"{method} authentication method is disabled",
511 |                         recommendation=f"Enable {method} authentication for stronger security"
512 |                     ))
513 |         
514 |     except Exception as e:
515 |         findings.append(EntraFinding(
516 |             id="AUTH-METHOD-ERROR",
517 |             severity="high",
518 |             category="auth",
519 |             message=f"Error reading authentication methods: {str(e)}",
520 |             recommendation="Verify authentication methods policy files"
521 |         ))
522 |     
523 |     return findings
524 | 
525 | def _generate_summary(findings: List[EntraFinding], backup_dir: pathlib.Path) -> Dict[str, Any]:
526 |     """Generate summary of the analysis"""
527 |     summary = {
528 |         "backup_path": str(backup_dir),
529 |         "analysis_date": pathlib.Path().cwd().stat().st_mtime,
530 |         "critical_issues": [f for f in findings if f.severity == "critical"],
531 |         "high_issues": [f for f in findings if f.severity == "high"],
532 |         "categories": {}
533 |     }
534 |     
535 |     # Group by category
536 |     for finding in findings:
537 |         category = finding.category
538 |         if category not in summary["categories"]:
539 |             summary["categories"][category] = []
540 |         summary["categories"][category].append(finding.id)
541 |     
542 |     return summary
543 | 


--------------------------------------------------------------------------------