aws s3 sync boto3
from pathlib import Path
from bisect import bisect_left

import boto3


class S3Sync:
    """
    Class that holds the operations needed for synchronize local dirs to a given bucket.
    """

    def __init__(self):
        self._s3 = boto3.client('s3')

    def sync(self, source: str, dest: str) -> [str]:
        """
        Sync source to dest, this means that all elements existing in
        source that not exists in dest will be copied to dest.

        No element will be deleted.

        :param source: Source folder.
        :param dest: Destination folder.

        :return: None
        """

        paths = self.list_source_objects(source_folder=source)
        objects = self.list_bucket_objects(dest)

        # Getting the keys and ordering to perform binary search
        # each time we want to check if any paths is already there.
        object_keys = [obj['Key'] for obj in objects]
        object_keys.sort()
        object_keys_length = len(object_keys)
        
        for path in paths:
            # Binary search.
            index = bisect_left(object_keys, path)
            if index == object_keys_length:
                # If path not found in object_keys, it has to be sync-ed.
                self._s3.upload_file(str(Path(source).joinpath(path)),  Bucket=dest, Key=path)

    def list_bucket_objects(self, bucket: str) -> [dict]:
        """
        List all objects for the given bucket.

        :param bucket: Bucket name.
        :return: A [dict] containing the elements in the bucket.

        Example of a single object.

        {
            'Key': 'example/example.txt',
            'LastModified': datetime.datetime(2019, 7, 4, 13, 50, 34, 893000, tzinfo=tzutc()),
            'ETag': '"b11564415be7f58435013b414a59ae5c"',
            'Size': 115280,
            'StorageClass': 'STANDARD',
            'Owner': {
                'DisplayName': 'webfile',
                'ID': '75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a'
            }
        }

        """
        try:
            contents = self._s3.list_objects(Bucket=bucket)['Contents']
        except KeyError:
            # No Contents Key, empty bucket.
            return []
        else:
            return contents

    @staticmethod
    def list_source_objects(source_folder: str) -> [str]:
        """
        :param source_folder:  Root folder for resources you want to list.
        :return: A [str] containing relative names of the files.

        Example:

            /tmp
                - example
                    - file_1.txt
                    - some_folder
                        - file_2.txt

            >>> sync.list_source_objects("/tmp/example")
            ['file_1.txt', 'some_folder/file_2.txt']

        """

        path = Path(source_folder)

        paths = []

        for file_path in path.rglob("*"):
            if file_path.is_dir():
                continue
            str_file_path = str(file_path)
            str_file_path = str_file_path.replace(f'{str(path)}/', "")
            paths.append(str_file_path)

        return paths


if __name__ == '__main__':
    sync = S3Sync()
    sync.sync("/temp/some_folder", "some_bucket_name")

Python相关代码片段

pathlib python

pandas excelwriter

python practise

key,value en python

linear regression in tensorflow

spark dataframe get column

drop one table sqlalchemy

python code to remove last character from string

fastapi get body on http middleware

custom neural network in keras

python selenium execute_script

db model for blog

yolov5 opencv

Get first 100 lines of file - python

python playground

print number pattern using for loop in python

ollama python

no module named 'wget'

No module named 'langchain'

failed to build wxpython

python vs c#

rabbitmq python example

change the django url prefix name

np.linspace is not defined python

LLM beguiner guide python

python parquet file to csv

python best practices

yolov5 without net

save variable as pkl python

python [-9:]

eigenface python

'DataFrame' object has no attribute 'dtype'

unable to enable maximize window tkinter

rabbit and fox numpy python

lstm in keras

neural network in keras

resnet50 in keras

autoencoder in keras

cnn in keras

tensor in keras

pyTelegramBotAPI edit photo

print api python

how to get values but not index from pandas series

how to get mode of a column from pandas

bayesian neural network pymcmc

lda python

back propagation python

logical syntax is not none python

register model django

Descending Selection sort

Selection sort with while loops

Selection sort with for loops

Doubling Algorithm for cluster analysis in python

Tkinter widgets

nameerror: name 'callable' is not defined

NameError: name 'Union' is not defined

Make a widget customtkinter python

nn module pytorch

import tf python

Spark SEssion object

Implement Bubble sort with while loops

Unoptimized bubble sort algorithm

Optimized bubble sort algorithm

how to get today's date in python

st_aggrid install

python venv pip blocked by admin windows

numpy matrix from lists of different leght

python postgres auto commit

dotenv install python

np mean axis

LinkExtractor Object

admin django documentation

Python native Convolution implementation

is django monolithic

what is function call with an llm

np array to series

dht22 micropython pico

disable slash command discord.py

python docker compose not printing

tabnet probabilities