Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ If you run into any issues, checkout our [troubleshooting guide](./docs/troubles

<!-- BEGIN AUTO GENERATED TOOLS -->

- **Input automation** (10 tools)
- **Input automation** (11 tools)
- [`click`](docs/tool-reference.md#click)
- [`drag`](docs/tool-reference.md#drag)
- [`fill`](docs/tool-reference.md#fill)
Expand All @@ -488,6 +488,7 @@ If you run into any issues, checkout our [troubleshooting guide](./docs/troubles
- [`type_text`](docs/tool-reference.md#type_text)
- [`upload_file`](docs/tool-reference.md#upload_file)
- [`click_at`](docs/tool-reference.md#click_at)
- [`get_element_at`](docs/tool-reference.md#get_element_at)
- **Navigation automation** (6 tools)
- [`close_page`](docs/tool-reference.md#close_page)
- [`list_pages`](docs/tool-reference.md#list_pages)
Expand Down
14 changes: 13 additions & 1 deletion docs/tool-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Chrome DevTools MCP Tool Reference

- **[Input automation](#input-automation)** (10 tools)
- **[Input automation](#input-automation)** (11 tools)
- [`click`](#click)
- [`drag`](#drag)
- [`fill`](#fill)
Expand All @@ -13,6 +13,7 @@
- [`type_text`](#type_text)
- [`upload_file`](#upload_file)
- [`click_at`](#click_at)
- [`get_element_at`](#get_element_at)
- **[Navigation automation](#navigation-automation)** (6 tools)
- [`close_page`](#close_page)
- [`list_pages`](#list_pages)
Expand Down Expand Up @@ -175,6 +176,17 @@

---

### `get_element_at`

**Description:** Returns the uid of the DOM element at viewport-relative CSS-pixel coordinates (x, y). Pair with [`take_screenshot`](#take_screenshot) + a vision model that emits coordinates; feed the returned uid into uid-based tools such as [`click`](#click), [`hover`](#hover), or [`fill`](#fill). The response also includes the refreshed page snapshot. Pierces open shadow roots and descends same-origin iframes. Cannot reach closed shadow roots or cross-origin / OOPIF iframes. (requires flag: --experimentalVision=true)

**Parameters:**

- **x** (number) **(required)**: CSS-pixel X coordinate, viewport-relative.
- **y** (number) **(required)**: CSS-pixel Y coordinate, viewport-relative.

---

## Navigation automation

### `close_page`
Expand Down
89 changes: 89 additions & 0 deletions scripts/eval_scenarios/get_element_at_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import assert from 'node:assert';

import type {TestScenario} from '../eval_gemini.ts';

export const scenario: TestScenario = {
serverArgs: ['--experimentalVision=true'],
prompt: `Take a screenshot of <TEST_URL>. There is a single large blue square on the page. Use the get_element_at tool to inspect the DOM element at the center of that blue square (the page is 800x600 and the square spans roughly x=100..300, y=100..300, so a coordinate around 200,200 is appropriate). Then tell me the element's id and class.`,
maxTurns: 4,
htmlRoute: {
path: '/get_element_at_test.html',
htmlContent: `
<!doctype html>
<html>
<head>
<style>
body { margin: 0; background: #ffffff; }
#target {
position: absolute;
left: 100px;
top: 100px;
width: 200px;
height: 200px;
background: #1a73e8;
color: white;
display: flex;
align-items: center;
justify-content: center;
font-size: 24px;
}
</style>
</head>
<body>
<div id="target" class="cta-button" data-testid="primary">CLICK ME</div>
</body>
</html>
`,
},
expectations: calls => {
const visualCalls = calls.filter(
c => c.name === 'take_screenshot' || c.name === 'take_snapshot',
);
assert.ok(
visualCalls.length >= 1,
'Expected at least one take_screenshot or take_snapshot call before inspecting coordinates',
);

const elementAtCalls = calls.filter(c => c.name === 'get_element_at');
assert.ok(
elementAtCalls.length >= 1,
'Expected at least one get_element_at call',
);

let withinTarget = 0;
for (const call of elementAtCalls) {
const x = call.args.x;
const y = call.args.y;
assert.strictEqual(
typeof x,
'number',
'get_element_at must receive a numeric x',
);
assert.strictEqual(
typeof y,
'number',
'get_element_at must receive a numeric y',
);
if (
typeof x === 'number' &&
typeof y === 'number' &&
x >= 100 &&
x <= 300 &&
y >= 100 &&
y <= 300
) {
withinTarget++;
}
}
assert.ok(
withinTarget >= 1,
'Expected at least one get_element_at call with x in [100,300] and y in [100,300] (inside the blue square)',
);
},
};
19 changes: 19 additions & 0 deletions src/bin/chrome-devtools-cli-options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,25 @@ export const commands: Commands = {
},
},
},
get_element_at: {
description:
'Returns the uid of the DOM element at viewport-relative CSS-pixel coordinates (x, y). Pair with take_screenshot + a vision model that emits coordinates; feed the returned uid into uid-based tools such as click, hover, or fill. The response also includes the refreshed page snapshot. Pierces open shadow roots and descends same-origin iframes. Cannot reach closed shadow roots or cross-origin / OOPIF iframes. (requires flag: --experimentalVision=true)',
category: 'Input automation',
args: {
x: {
name: 'x',
type: 'number',
description: 'CSS-pixel X coordinate, viewport-relative.',
required: true,
},
y: {
name: 'y',
type: 'number',
description: 'CSS-pixel Y coordinate, viewport-relative.',
required: true,
},
},
},
get_memory_snapshot_details: {
description:
'Loads a memory heapsnapshot and returns all available information including statistics, static data, and aggregated node information. Supports pagination for aggregates. (requires flag: --experimentalMemory=true)',
Expand Down
33 changes: 33 additions & 0 deletions src/telemetry/tool_call_metrics.json
Original file line number Diff line number Diff line change
Expand Up @@ -631,5 +631,38 @@
{
"name": "list3p_developer_tools",
"args": []
},
{
"name": "get_element_at",
"args": [
{
"name": "x",
"argType": "number"
},
{
"name": "y",
"argType": "number"
},
{
"name": "mode",
"argType": "string",
"isDeprecated": true
},
{
"name": "css",
"argType": "string",
"isDeprecated": true
},
{
"name": "pierce_shadow",
"argType": "boolean",
"isDeprecated": true
},
{
"name": "file_path_length",
"argType": "number",
"isDeprecated": true
}
]
}
]
118 changes: 118 additions & 0 deletions src/tools/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import {logger} from '../logger.js';
import type {McpContext} from '../McpContext.js';
import type {McpPage} from '../McpPage.js';
import {TextSnapshot} from '../TextSnapshot.js';
import {zod} from '../third_party/index.js';
import type {ElementHandle, KeyInput} from '../third_party/index.js';
import type {TextSnapshotNode} from '../types.js';
Expand Down Expand Up @@ -536,3 +538,119 @@ export const pressKey = definePageTool({
}
},
});

async function hitTestElementHandle(
page: ContextPage,
x: number,
y: number,
): Promise<ElementHandle<Element> | null> {
const handle = await page.pptrPage.evaluateHandle(
(px, py) => {
let doc: Document | ShadowRoot = document;
let cx = px;
let cy = py;
let hit: Element | null = null;
for (let i = 0; i < 32; i++) {
const candidate: Element | null = doc.elementFromPoint(cx, cy);
if (!candidate) {
break;
}
hit = candidate;
if (
candidate instanceof HTMLIFrameElement ||
candidate instanceof HTMLFrameElement
) {
const rect = candidate.getBoundingClientRect();
let inner: Document | null = null;
try {
inner = candidate.contentDocument;
} catch {
inner = null;
}
if (inner) {
doc = inner;
cx -= rect.left;
cy -= rect.top;
continue;
}
break;
}
const shadow: ShadowRoot | null = candidate.shadowRoot;
if (shadow) {
doc = shadow;
continue;
}
break;
}
return hit;
},
x,
y,
);
const element = handle.asElement() as ElementHandle<Element> | null;
if (!element) {
void handle.dispose();
return null;
}
return element;
}

export const getElementAt = definePageTool({
name: 'get_element_at',
description: `Returns the uid of the DOM element at viewport-relative CSS-pixel coordinates (x, y). Pair with take_screenshot + a vision model that emits coordinates; feed the returned uid into uid-based tools such as click, hover, or fill. The response also includes the refreshed page snapshot. Pierces open shadow roots and descends same-origin iframes. Cannot reach closed shadow roots or cross-origin / OOPIF iframes.`,
annotations: {
category: ToolCategory.INPUT,
readOnlyHint: true,
conditions: ['experimentalVision'],
},
schema: {
x: zod.number().describe('CSS-pixel X coordinate, viewport-relative.'),
y: zod.number().describe('CSS-pixel Y coordinate, viewport-relative.'),
},
blockedByDialog: true,
handler: async (request, response) => {
const {x, y} = request.params;
const page = request.page as McpPage;

const element = await hitTestElementHandle(page, x, y);
if (!element) {
throw new Error(
`No element found at (${x}, ${y}). The coordinate may be outside the viewport, inside a closed shadow root, or inside a cross-origin iframe. Call take_screenshot to verify the page state, or list_pages + select_page to switch into a cross-origin frame.`,
);
}

const backendNodeId = await element.backendNodeId();
if (!backendNodeId) {
void element.dispose();
throw new Error(`Could not resolve element identity at (${x}, ${y}).`);
}

if (!page.textSnapshot) {
page.textSnapshot = await TextSnapshot.create(page);
}

let uid = page.resolveCdpElementId(backendNodeId);
let elementOwned = true;
if (!uid) {
// Hit-tested element is not in the a11y-tree snapshot (e.g. a plain
// div with no accessible role). Inject it via extraHandles so the
// refreshed snapshot exposes it with a uid that uid-based tools
// (click, hover, fill, …) can consume.
const extraHandles = [...page.extraHandles, element];
page.textSnapshot = await TextSnapshot.create(page, {extraHandles});
elementOwned = false;
uid = page.resolveCdpElementId(backendNodeId);
}

if (elementOwned) {
void element.dispose();
}

if (!uid) {
throw new Error(`Could not assign a uid to element at (${x}, ${y}).`);
}

response.appendResponseLine(`Element uid: ${uid}`);
response.includeSnapshot();
},
});
17 changes: 17 additions & 0 deletions tests/e2e/chrome-devtools-commands.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,21 @@ describe('chrome-devtools', () => {
'restart command suggestion is miss: ' + result.stdout,
);
});

it('fails to invoke get_element_at when experimentalVision is disabled (default)', async () => {
await runCli(['start'], sessionId);

const result = await runCli(['get_element_at', '100', '100'], sessionId);
assert.strictEqual(result.status, 0);
assert(
result.stdout.includes(
'Tool get_element_at requires experimental feature --experimentalVision and is currently disabled',
),
'error message is unexpected: ' + result.stdout,
);
assert(
result.stdout.includes('chrome-devtools start --experimentalVision=true'),
'restart command suggestion is miss: ' + result.stdout,
);
});
});
2 changes: 2 additions & 0 deletions tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ describe('e2e', () => {
const {tools} = await client.listTools();
const clickAt = tools.find(t => t.name === 'click_at');
assert.ok(clickAt);
const getElementAt = tools.find(t => t.name === 'get_element_at');
assert.ok(getElementAt);
},
['--experimental-vision'],
);
Expand Down
Loading