Code error for DAP library
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi all,
I refer the section 'Getting latest changes with an incremental query' from the link
https://data-access-platform-api.s3.amazonaws.com/client/README.html#command-line-usage
and program the code to retrieve the web_logs with datetime range.
==========================================================================================
import os
import asyncio
from datetime import datetime, timezone
from urllib.parse import ParseResult, urlparse
import aiofiles
from dap.api import DAPClient
from dap.dap_types import Credentials
from dap.dap_types import Format, IncrementalQuery
base_url: str = os.environ["DAP_API_URL"]
client_id: str = os.environ["DAP_CLIENT_ID"]
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
# timestamp returned by last snapshot or incremental query
last_seen = datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
until_seen = datetime(2024, 1, 2, 0, 0, 0, tzinfo=timezone.utc)
print("Last seen: ", last_seen)
print("Until: ", until_seen)
async def main():
async with DAPClient(base_url, credentials) as session:
query = IncrementalQuery(
format=Format.JSONL,
mode=None,
filter=None,
since=last_seen,
until=until_seen,
)
result = await session.get_table_data("canvas_logs", "web_logs", query)
resources = await session.get_resources(result.objects)
for resource in resources:
components: ParseResult = urlparse(str(resource.url))
file_path = os.path.join(
os.getcwd(), "data", os.path.basename(components.path)
)
print("File path: ", file_path)
async with session.stream_resource(resource) as stream:
async with aiofiles.open(file_path, "wb") as file:
# save gzip data to file without decompressing
async for chunk in stream.iter_chunked(64 * 1024):
await file.write(chunk)
starttoday = datetime.now()
print("Start datetime:", starttoday)
asyncio.run(main());
endtoday = datetime.now()
print("End datetime:", endtoday)
========================================================================================
After ran the code with python, it showed 'components: ParseResult = urlparse(str(resource.url))' has problem, so I modify it to
components: ParseResult = urlparse(str(resource))
Then, ran again, and this time showed:
Last seen: 2024-01-01 00:00:00+00:00
Until: 2024-01-02 00:00:00+00:00
Start datetime: 2024-02-19 18:42:52.493787
File path: /home/adm1/cd2/script/data/part-00000-0a9d64fc-013f-4315-9225-260949ce4fdf-c000.json.gz
Traceback (most recent call last):
File "/home/adm1/cd2/script/get_weblogs_date_range.py", line 51, in <module>
asyncio.run(main());
File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
return future.result()
File "/home/adm1/cd2/script/get_weblogs_date_range.py", line 42, in main
async with session.stream_resource(resource) as stream:
AttributeError: __aenter__
Does anyone has idea about it? Sorry that I am new beginner of python. Many Thanks