import boto3
from tqdm import tqdm
s3 = boto3.client('s3',
aws_access_key_id=ACCESS_KEY_ID,
aws_secret_access_key=ACCESS_SECRET_KEY,
)
bucket_name='BUCKET_NAME'
prefix = 'dev/table_name=dc_distribution/site_code='
site_code = ['02010012', '02010013', '02010016', '02010017', '02010021'...]
prefix_lst = [prefix+cd+'/' for cd in site_code]
paginator = s3.get_paginator('list_objects_v2')
site_dict = {}
for pfx in tqdm(prefix_lst):
key = pfx.split('/')[-2].split('=')[-1] # dict의 key인 site_code뽑는 것
files = []
pages = paginator.paginate(Bucket=bucket_name, Prefix=pfx)
for page in pages:
for obj in page['Contents']:
files.append(obj['Key'])
site_dict[key] = files
!aws s3 ls s3://BUCKET_NAME/dev/table_name=dc_distribution/ --recursive| wc -l # 전체 수집량