Focused on real-time screen/vision analysis and and a expert skill router. The agent acts as an intelligent observer and advisor and views your screen as you do, providing analysis, advise, suggestions based on their skill setting.
# Example from screenshot_analyzer.py
async def analyze_screenshot(self, image: Image.Image):
"""Analyze the captured screenshot with Gemini."""
prompt = """
Analyze this crypto-related image carefully and identify:
1. The full ticker symbol
2. The complete context it appears in
3. Any nearby metrics and data
4. Contract addresses
5. Chain identifiers
6. Liquidity and volume data
"""
response = self.model.generate_content([prompt, image])
2. Computer Use Mode
Direct computer control through optimized commands. The agent acts as an intelligent computer operator and executes on commands provided by the user text or voice input.
# Example from computer_use.py
async def execute_command(self, command: str) -> Optional[str]:
"""Execute the user's command via the computer use handler."""
if not self.computer_use:
self.notification.show_message("Computer use handler not initialized")
self._reset_command_ui()
return
try:
result = await self.computer_use.execute_command(command)
if result:
self.notification.show_message(result)
self.command_textbox.clear()
self.status_label.setText("Ready")
except Exception as e:
self.logger.error(f"Command execution error: {str(e)}")
self.status_label.setText("Command failed")
self.notification.show_message(f"Command failed: {str(e)}")
finally:
self._reset_command_ui()