} ], max_tokens=500 )
return response.choices[0].message.content
# Example usage
result = analyze_image("chart.png", "Explain the trends shown in this chart")
print(result)
Document Analysis
Figure: SharePoint document library – metadata columns, views, and filter panel.
def extract_table_data(image_path):
```python
"""Extract structured data from table images"""
prompt = """```
Analyze this table image and extract the data in JSON format.
Structure: {"columns": [...], "rows": [[...], [...], ...]}
"""
```text
base64_image = encode_image(image_path)
response = client.chat.completions.create(
model="gpt-4-vision",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
]
}
]
)
return json.loads(response.choices[0].message.content)
## DALL-E Image Generation
```python
def generate_image(prompt, size="1024x1024", quality="standard", n=1):
```python
"""Generate images from text descriptions"""
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
size=size,
quality=quality,
n=n
)
image_url = response.data[0].url
revised_prompt = response.data[0].revised_prompt
return {
"url": image_url,
"revised_prompt": revised_prompt
}
Example
Figure: Configuration and management dashboard with status overview.
result = generate_image(
"A futuristic cityscape with flying cars and holographic billboards, cyberpunk style"```
)
Whisper Speech Recognition
Figure: AI Services resource – deployed models and endpoint configuration.
from azure.cognitiveservices.speech import SpeechConfig, AudioConfig, SpeechRecognizer
def transcribe_audio(audio_file_path):
```text
"""Convert speech to text"""
speech_config = SpeechConfig(
subscription="<key>",
region="<region>"
)
audio_config = AudioConfig(filename=audio_file_path)
recognizer = SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
result = recognizer.recognize_once()
return result.text
Or use Azure OpenAI Whisper
Figure: Azure OpenAI Studio – chat playground with parameters and token usage.
def transcribe_with_whisper(audio_file_path):
with open(audio_file_path, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper",
file=audio_file
)
return transcript.text
## Text-to-Speech Generation
```python
from azure.cognitiveservices.speech import SpeechSynthesizer
def generate_speech(text, output_file="output.wav", voice="en-US-JennyNeural"):
```text
"""Convert text to natural speech"""
speech_config = SpeechConfig(subscription="<key>", region="<region>")
speech_config.speech_synthesis_voice_name = voice
audio_config = AudioConfig(filename=output_file)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
result = synthesizer.speak_text_async(text).get()
return result.audio_duration
## Embeddings for Semantic Search
```python
from openai import AzureOpenAI
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def get_embedding(text, model="text-embedding-ada-002"):
```text
"""Generate vector embedding for text"""
response = client.embeddings.create(
input=text,
model=model
)
return response.data[0].embedding
def semantic_search(query, documents):
"""Find most relevant documents using embeddings"""
query_embedding = get_embedding(query)
doc_embeddings = [get_embedding(doc) for doc in documents]
similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
results = sorted(
zip(documents, similarities),
key=lambda x: x[1],
reverse=True
)
return results
Example
Figure: Configuration and management dashboard with status overview.
docs = [
"Azure Machine Learning provides MLOps capabilities",
"Python is a popular programming language",
"Cloud computing enables scalable infrastructure"```
]
results = semantic_search("How to deploy ML models?", docs)
for doc, score in results:
```text
print(f"{score:.3f}: {doc}")
## Fine-Tuning Custom Models
```python
from openai import AzureOpenAI
def prepare_training_data(examples):
```sql
"""Format training data for fine-tuning"""
training_data = []
for example in examples:
training_data.append({
"messages": [
{"role": "system", "content": "You are a customer support assistant."},
{"role": "user", "content": example["input"]},
{"role": "assistant", "content": example["output"]}
]
})
with open("training_data.jsonl", "w") as f:
for item in training_data:
f.write(json.dumps(item) + "\n")
def create_fine_tune_job(training_file_id):
"""Submit fine-tuning job"""
response = client.fine_tuning.jobs.create(
training_file=training_file_id,
model="gpt-35-turbo",
hyperparameters={
"n_epochs": 3,
"batch_size": 1,
"learning_rate_multiplier": 0.1
}
)
return response.id
Upload training file
with open("training_data.jsonl", "rb") as f:
file_response = client.files.create(file=f, purpose="fine-tune")
Create fine-tune job
job_id = create_fine_tune_job(file_response.id)
## Multi-Agent Orchestration
```python
class AgentOrchestrator:
```python
"""Coordinate multiple specialized agents"""
def __init__(self):
self.agents = {
"researcher": self._create_agent("Research specialist"),
"coder": self._create_agent("Expert programmer"),
"writer": self._create_agent("Technical writer")
}
def _create_agent(self, role):
return lambda prompt: client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": f"You are a {role}."},
{"role": "user", "content": prompt}
]
).choices[0].message.content
def solve_complex_task(self, task):
"""Break down task and delegate to specialized agents"""
# Research phase
research = self.agents["researcher"](
f"Research this topic: {task}"
)
# Code implementation
code = self.agents["coder"](
f"Based on this research, implement a solution:\n{research}"
)
# Documentation
docs = self.agents["writer"](
f"Document this code:\n{code}"
)
return {
"research": research,
"implementation": code,
"documentation": docs
}
## Emerging Capabilities
### Chain-of-Thought Reasoning
```python
def chain_of_thought_reasoning(problem):
```text
"""Use step-by-step reasoning for complex problems"""
prompt = f"""```
Solve this problem step by step. Show your reasoning at each step.
Problem: {problem}
Step 1: Understand the problem
Step 2: Identify relevant information
Step 3: Break down into sub-problems
Step 4: Solve each sub-problem
Step 5: Combine solutions
Final Answer:
"""
```text
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0.1
)
return response.choices[0].message.content
### Tree-of-Thought
```python
def tree_of_thought(problem, num_branches=3):
```text
"""Explore multiple reasoning paths"""
branches = []
for i in range(num_branches):
prompt = f"""```
Generate reasoning path #{i+1} for this problem:
{problem}
Think creatively and explore different approaches.
"""
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0.8
)
branches.append(response.choices[0].message.content)
```text
## Evaluate branches
evaluation_prompt = f"""```
Problem: {problem}
Reasoning paths:
{chr(10).join([f"{i+1}. {branch}" for i, branch in enumerate(branches)])}
Which reasoning path is most sound? Explain and provide final answer.
"""
```text
final_response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": evaluation_prompt}]
)
return final_response.choices[0].message.content
## Best Practices
- Combine modalities for richer applications
- Validate generated content before use
- Implement fallback mechanisms
- Monitor usage and costs across services
- Cache embeddings for repeated queries
- Use appropriate models for each task
- Test extensively with edge cases
- Implement safety filters for all outputs
## Future Trends
- **Multimodal Foundation Models**: Unified models for all modalities
- **Autonomous Agents**: Self-directed task completion
- **Continuous Learning**: Models that adapt without retraining
- **Smaller, Efficient Models**: Edge deployment capabilities
- **Enhanced Reasoning**: Improved logical and mathematical capabilities
## Troubleshooting
| Issue | Cause | Resolution |
|-------|-------|------------|
| Vision API errors | Unsupported image format | Convert to JPEG/PNG; check size limits |
| Poor image quality | Low resolution input | Use higher resolution; enhance preprocessing |
| Embedding drift | Model version change | Re-embed all documents with same model |
| High latency | Large multi-modal inputs | Compress images; optimize requests |
## Architecture Decision and Tradeoffs
When designing AI/ML solutions with Azure AI Services, consider these key architectural trade-offs:
| Approach | Best For | Tradeoff |
|----------|----------|----------|
| Managed / platform service | Rapid delivery, reduced ops burden | Less customisation, potential vendor lock-in |
| Custom / self-hosted | Full control, advanced tuning | Higher operational overhead and cost |
> **Recommendation:** Start with the managed approach for most workloads and move to custom only when specific requirements demand it.
## Validation and Versioning
- Last validated: April 2026
- Validate examples against your tenant, region, and SKU constraints before production rollout.
- Keep module, CLI, and SDK versions pinned in automation pipelines and review quarterly.
## Security and Governance Considerations
- Apply least-privilege access using RBAC roles and just-in-time elevation for admin tasks.
- Store secrets in managed secret stores and avoid embedding credentials in scripts or source files.
- Enable audit logging, data protection policies, and periodic access reviews for regulated workloads.
## Cost and Performance Notes
- Define budgets and alerts, then monitor usage and cost trends continuously after go-live.
- Baseline performance with synthetic and real-user checks before and after major changes.
- Scale resources with measured thresholds and revisit sizing after usage pattern changes.
## Official Microsoft References
- https://learn.microsoft.com/azure/ai-services/
- https://learn.microsoft.com/azure/machine-learning/
- https://learn.microsoft.com/azure/ai-foundry/
## Public Examples from Official Sources
- These examples are sourced from official public Microsoft documentation and sample repositories.
- Documentation examples: https://learn.microsoft.com/azure/ai-services/
- Sample repositories: https://github.com/Azure-Samples?tab=repositories&q=ai&type=&language=&sort=
- Prefer adapting these examples to your tenant, subscriptions, and governance requirements before production use.
## Key Takeaways
Advanced AI combines multiple modalities, specialized models, and sophisticated orchestration patterns to solve complex, real-world problems.
## References
- https://learn.microsoft.com/azure/ai-services/openai/concepts/models
- https://learn.microsoft.com/azure/ai-services/speech-service/
Discussion