Skip to content

Commit dc7a4ad

Browse files
feat: implement query profiling (#542)
1 parent 702775e commit dc7a4ad

2 files changed

Lines changed: 155 additions & 27 deletions

File tree

datastore/samples/snippets/snippets.py

Lines changed: 102 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,16 @@ def sum_query_property_filter(client):
278278
# Execute sum aggregation query with filters
279279
completed_tasks = client.query(kind="Task").add_filter("done", "=", True)
280280
completed_tasks_query = client.aggregation_query(query=completed_tasks).sum(
281-
property_ref="hours",
282-
alias="total_completed_sum_hours"
281+
property_ref="hours", alias="total_completed_sum_hours"
283282
)
284283

285284
completed_query_result = completed_tasks_query.fetch()
286285
for aggregation_results in completed_query_result:
287286
for aggregation_result in aggregation_results:
288287
if aggregation_result.alias == "total_completed_sum_hours":
289-
print(f"Total sum of hours in completed tasks is {aggregation_result.value}")
288+
print(
289+
f"Total sum of hours in completed tasks is {aggregation_result.value}"
290+
)
290291
# [END datastore_sum_aggregation_query_with_filters]
291292
return tasks
292293

@@ -339,15 +340,16 @@ def avg_query_property_filter(client):
339340
# Execute average aggregation query with filters
340341
completed_tasks = client.query(kind="Task").add_filter("done", "=", True)
341342
completed_tasks_query = client.aggregation_query(query=completed_tasks).avg(
342-
property_ref="hours",
343-
alias="total_completed_avg_hours"
343+
property_ref="hours", alias="total_completed_avg_hours"
344344
)
345345

346346
completed_query_result = completed_tasks_query.fetch()
347347
for aggregation_results in completed_query_result:
348348
for aggregation_result in aggregation_results:
349349
if aggregation_result.alias == "total_completed_avg_hours":
350-
print(f"Total average of hours in completed tasks is {aggregation_result.value}")
350+
print(
351+
f"Total average of hours in completed tasks is {aggregation_result.value}"
352+
)
351353
# [END datastore_avg_aggregation_query_with_filters]
352354
return tasks
353355

@@ -375,9 +377,11 @@ def multiple_aggregations_query(client):
375377
[
376378
datastore.aggregation.CountAggregation(alias="count_aggregation"),
377379
datastore.aggregation.SumAggregation(
378-
property_ref="hours", alias="sum_aggregation"),
380+
property_ref="hours", alias="sum_aggregation"
381+
),
379382
datastore.aggregation.AvgAggregation(
380-
property_ref="hours", alias="avg_aggregation")
383+
property_ref="hours", alias="avg_aggregation"
384+
),
381385
]
382386
)
383387

@@ -389,6 +393,96 @@ def multiple_aggregations_query(client):
389393
return tasks
390394

391395

396+
def explain_analyze_entity(client):
397+
# [START datastore_query_explain_analyze_entity]
398+
# Build the query with explain_options
399+
# analzye = true to get back the query stats, plan info, and query results
400+
query = client.query(
401+
kind="Task", explain_options=datastore.ExplainOptions(analyze=True)
402+
)
403+
404+
# initiate the query
405+
iterator = query.fetch()
406+
407+
# explain_metrics is only available after query is completed
408+
for task_result in iterator:
409+
print(task_result)
410+
411+
# get the plan summary
412+
plan_summary = iterator.explain_metrics.plan_summary
413+
print(f"Indexes used: {plan_summary.indexes_used}")
414+
415+
# get the execution stats
416+
execution_stats = iterator.explain_metrics.execution_stats
417+
print(f"Results returned: {execution_stats.results_returned}")
418+
print(f"Execution duration: {execution_stats.execution_duration}")
419+
print(f"Read operations: {execution_stats.read_operations}")
420+
print(f"Debug stats: {execution_stats.debug_stats}")
421+
# [END datastore_query_explain_analyze_entity]
422+
423+
424+
def explain_entity(client):
425+
# [START datastore_query_explain_entity]
426+
# Build the query with explain_options
427+
# by default (analyze = false), only plan_summary property is available
428+
query = client.query(kind="Task", explain_options=datastore.ExplainOptions())
429+
430+
# initiate the query
431+
iterator = query.fetch()
432+
433+
# get the plan summary
434+
plan_summary = iterator.explain_metrics.plan_summary
435+
print(f"Indexes used: {plan_summary.indexes_used}")
436+
# [END datastore_query_explain_entity]
437+
438+
439+
def explain_analyze_aggregation(client):
440+
# [START datastore_query_explain_analyze_aggregation]
441+
# Build the aggregation query with explain_options
442+
# analzye = true to get back the query stats, plan info, and query results
443+
all_tasks_query = client.query(kind="Task")
444+
count_query = client.aggregation_query(
445+
all_tasks_query, explain_options=datastore.ExplainOptions(analyze=True)
446+
).count()
447+
448+
# initiate the query
449+
iterator = count_query.fetch()
450+
451+
# explain_metrics is only available after query is completed
452+
for task_result in iterator:
453+
print(task_result)
454+
455+
# get the plan summary
456+
plan_summary = iterator.explain_metrics.plan_summary
457+
print(f"Indexes used: {plan_summary.indexes_used}")
458+
459+
# get the execution stats
460+
execution_stats = iterator.explain_metrics.execution_stats
461+
print(f"Results returned: {execution_stats.results_returned}")
462+
print(f"Execution duration: {execution_stats.execution_duration}")
463+
print(f"Read operations: {execution_stats.read_operations}")
464+
print(f"Debug stats: {execution_stats.debug_stats}")
465+
# [END datastore_query_explain_analyze_aggregation]
466+
467+
468+
def explain_aggregation(client):
469+
# [START datastore_query_explain_aggregation]
470+
# Build the aggregation query with explain_options
471+
# by default (analyze = false), only plan_summary property is available
472+
all_tasks_query = client.query(kind="Task")
473+
count_query = client.aggregation_query(
474+
all_tasks_query, explain_options=datastore.ExplainOptions()
475+
).count()
476+
477+
# initiate the query
478+
iterator = count_query.fetch()
479+
480+
# get the plan summary
481+
plan_summary = iterator.explain_metrics.plan_summary
482+
print(f"Indexes used: {plan_summary.indexes_used}")
483+
# [END datastore_query_explain_aggregation]
484+
485+
392486
def main(project_id):
393487
client = datastore.Client(project_id)
394488

datastore/samples/snippets/snippets_test.py

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,15 @@ def setup_indexes(request):
5252

5353
indexes = []
5454
done_property_index = datastore_admin_v1.Index.IndexedProperty(
55-
name='done',
56-
direction=datastore_admin_v1.Index.Direction.ASCENDING
55+
name="done", direction=datastore_admin_v1.Index.Direction.ASCENDING
5756
)
5857
hour_property_index = datastore_admin_v1.Index.IndexedProperty(
59-
name='hours',
60-
direction=datastore_admin_v1.Index.Direction.ASCENDING
58+
name="hours", direction=datastore_admin_v1.Index.Direction.ASCENDING
6159
)
6260
done_hour_index = datastore_admin_v1.Index(
63-
kind='Task',
61+
kind="Task",
6462
ancestor=datastore_admin_v1.Index.AncestorMode.NONE,
65-
properties=[done_property_index, hour_property_index]
63+
properties=[done_property_index, hour_property_index],
6664
)
6765
indexes.append(done_hour_index)
6866

@@ -157,9 +155,7 @@ def test_count_query_with_stale_read(self, capsys, client):
157155
def test_sum_query_on_kind(self, capsys, client):
158156
tasks = snippets.sum_query_on_kind(client)
159157
captured = capsys.readouterr()
160-
assert (
161-
captured.out.strip() == "Total sum of hours in tasks is 9"
162-
)
158+
assert captured.out.strip() == "Total sum of hours in tasks is 9"
163159
assert captured.err == ""
164160

165161
client.entities_to_delete.extend(tasks)
@@ -168,9 +164,7 @@ def test_sum_query_on_kind(self, capsys, client):
168164
def test_sum_query_property_filter(self, capsys, client):
169165
tasks = snippets.sum_query_property_filter(client)
170166
captured = capsys.readouterr()
171-
assert (
172-
captured.out.strip() == "Total sum of hours in completed tasks is 8"
173-
)
167+
assert captured.out.strip() == "Total sum of hours in completed tasks is 8"
174168
assert captured.err == ""
175169

176170
client.entities_to_delete.extend(tasks)
@@ -179,9 +173,7 @@ def test_sum_query_property_filter(self, capsys, client):
179173
def test_avg_query_on_kind(self, capsys, client):
180174
tasks = snippets.avg_query_on_kind(client)
181175
captured = capsys.readouterr()
182-
assert (
183-
captured.out.strip() == "Total average of hours in tasks is 3.0"
184-
)
176+
assert captured.out.strip() == "Total average of hours in tasks is 3.0"
185177
assert captured.err == ""
186178

187179
client.entities_to_delete.extend(tasks)
@@ -201,15 +193,57 @@ def test_avg_query_property_filter(self, capsys, client):
201193
def test_multiple_aggregations_query(self, capsys, client):
202194
tasks = snippets.multiple_aggregations_query(client)
203195
captured = capsys.readouterr()
196+
assert "avg_aggregation value is 3.0" in captured.out
197+
assert "count_aggregation value is 3" in captured.out
198+
assert "sum_aggregation value is 9" in captured.out
199+
assert captured.err == ""
200+
201+
client.entities_to_delete.extend(tasks)
202+
203+
@backoff.on_exception(backoff.expo, AssertionError, max_time=240)
204+
def test_explain_analyze_entity(self, capsys, client):
205+
snippets.explain_analyze_entity(client)
206+
captured = capsys.readouterr()
204207
assert (
205-
'avg_aggregation value is 3.0' in captured.out
208+
"Indexes used: [{'properties': '(__name__ ASC)', 'query_scope': 'Collection group'}]"
209+
in captured.out
206210
)
211+
assert "Results returned: 0" in captured.out
212+
assert "Execution duration: 0:00" in captured.out
213+
assert "Read operations: 0" in captured.out
214+
assert "Debug stats: {" in captured.out
215+
assert captured.err == ""
216+
217+
@backoff.on_exception(backoff.expo, AssertionError, max_time=240)
218+
def test_explain_entity(self, capsys, client):
219+
snippets.explain_entity(client)
220+
captured = capsys.readouterr()
207221
assert (
208-
'count_aggregation value is 3' in captured.out
222+
"Indexes used: [{'properties': '(__name__ ASC)', 'query_scope': 'Collection group'}]"
223+
in captured.out
209224
)
225+
assert captured.err == ""
226+
227+
@backoff.on_exception(backoff.expo, AssertionError, max_time=240)
228+
def test_explain_analyze_aggregation(self, capsys, client):
229+
snippets.explain_analyze_aggregation(client)
230+
captured = capsys.readouterr()
210231
assert (
211-
'sum_aggregation value is 9' in captured.out
232+
"Indexes used: [{'properties': '(__name__ ASC)', 'query_scope': 'Collection group'}]"
233+
in captured.out
212234
)
235+
assert "Results returned: 1" in captured.out
236+
assert "Execution duration: 0:00" in captured.out
237+
assert "Read operations: 1" in captured.out
238+
assert "Debug stats: {" in captured.out
213239
assert captured.err == ""
214240

215-
client.entities_to_delete.extend(tasks)
241+
@backoff.on_exception(backoff.expo, AssertionError, max_time=240)
242+
def test_explain_aggregation(self, capsys, client):
243+
snippets.explain_aggregation(client)
244+
captured = capsys.readouterr()
245+
assert (
246+
"Indexes used: [{'properties': '(__name__ ASC)', 'query_scope': 'Collection group'}]"
247+
in captured.out
248+
)
249+
assert captured.err == ""

0 commit comments

Comments
 (0)