im2geojson

Parse GeoJSON from image metadata.


Quick Start

Import im2geojson and initialise by passing the directory of images:

>>> from im2geojson import ImageToGeoJSON

# For example: if your current directory is named `parent` and this contains a folder of images: `my_images`,
# Initialise with `input_directory`:
>>> input_directory = './my_images'
>>> im2geojson = ImageToGeoJSON(input_directory=input_directory)

# Start image processing:
>>> im2geojson.start()
Running...
Finished in 0.31 seconds


Summary

>>> im2geojson.summary
'1 out of 6 images processed successfully'


Output

// my_images.geojson
{
    "type": "FeatureCollection", 
    "title": "my_images", 
    "features": 
    [
        {
            "type": "Feature", 
            "geometry": 
            {
                "type": "Point", 
                "coordinates": [115.095269, -8.631053]
            }, 
            "properties": 
            {
                "datetime": "2023-05-05 06:19:24", 
                "filename": "EXIF.jpg"
            }
        }
    ], 
    "properties": 
    {
        "parent": "parent"
    }
}


Errors

>>> im2geojson.error_dictionary
{'my_images/MISSING_EXIF.jpg': 'AttributeError: image does not have attribute gps_latitude',
 'my_images/MISSING_DATETIME.jpg': 'AttributeError: image does not have attribute datetime_original',
 'my_images/CORRUPTED_DATETIME.jpg': "ValueError: time data 'corrupted' does not match format '%Y:%m:%d %H:%M:%S'",
 'my_images/CORRUPTED_EXIF.jpg': 'ValueError: Invalid GPS Reference X, Expecting N, S, E or W',
 'my_images/NO_EXIF.jpg': "'No metadata.'"}





  1"""
  2
  3Parse GeoJSON from image metadata.
  4
  5<br>
  6
  7
  8Quick Start
  9-----------
 10
 11
 12Import im2geojson and initialise by passing the directory of images:
 13
 14
 15```python
 16>>> from im2geojson import ImageToGeoJSON
 17
 18# For example: if your current directory is named `parent` and this contains a folder of images: `my_images`,
 19# Initialise with `input_directory`:
 20>>> input_directory = './my_images'
 21>>> im2geojson = ImageToGeoJSON(input_directory=input_directory)
 22
 23# Start image processing:
 24>>> im2geojson.start()
 25```
 26```s
 27Running...
 28Finished in 0.31 seconds
 29```
 30<br>
 31
 32
 33Summary
 34-------
 35
 36```python
 37>>> im2geojson.summary
 38```
 39```s
 40'1 out of 6 images processed successfully'
 41```
 42<br>
 43
 44
 45Output
 46------
 47
 48```json
 49// my_images.geojson
 50{
 51    "type": "FeatureCollection", 
 52    "title": "my_images", 
 53    "features": 
 54    [
 55        {
 56            "type": "Feature", 
 57            "geometry": 
 58            {
 59                "type": "Point", 
 60                "coordinates": [115.095269, -8.631053]
 61            }, 
 62            "properties": 
 63            {
 64                "datetime": "2023-05-05 06:19:24", 
 65                "filename": "EXIF.jpg"
 66            }
 67        }
 68    ], 
 69    "properties": 
 70    {
 71        "parent": "parent"
 72    }
 73}
 74```
 75<br>
 76
 77
 78Errors
 79------
 80
 81```python
 82>>> im2geojson.error_dictionary
 83```
 84```s
 85{'my_images/MISSING_EXIF.jpg': 'AttributeError: image does not have attribute gps_latitude',
 86 'my_images/MISSING_DATETIME.jpg': 'AttributeError: image does not have attribute datetime_original',
 87 'my_images/CORRUPTED_DATETIME.jpg': "ValueError: time data 'corrupted' does not match format '%Y:%m:%d %H:%M:%S'",
 88 'my_images/CORRUPTED_EXIF.jpg': 'ValueError: Invalid GPS Reference X, Expecting N, S, E or W',
 89 'my_images/NO_EXIF.jpg': "'No metadata.'"}
 90```
 91<br>
 92<br>
 93
 94   
 95***
 96
 97<br>
 98
 99"""
100
101import logging
102
103logging.getLogger('im2geojson').addHandler(logging.NullHandler())
104
105
106from im2geojson.im2geojson import ImageToGeoJSON
107
108__all__ = ['ImageToGeoJSON']
class ImageToGeoJSON:
 24class ImageToGeoJSON(object):
 25    """
 26    ImageToGeoJSON
 27
 28    Note
 29    ----
 30    Saves the harvested metadata as geojson to 'output_directory`
 31    Optionally saves images without metadata and thumbnails images.
 32    """
 33
 34    def __init__(self, 
 35                 input_directory, 
 36                 output_directory=DEFAULT_OUTPUT_DIRECTORY, 
 37                 save_images=False, 
 38                 save_thumbnails=False):
 39        """
 40        Initialise ImageToGeoJSON object.
 41
 42        Initialise the object and creates `output_directory` and folders.
 43        
 44        Parameters
 45        ----------
 46        input_directory : str
 47            The path to the `input_directory`.
 48            
 49        output_directory : str, default './assets'
 50            The path to the `output_directory`.
 51
 52        save_images : bool, default False
 53            Save images stripped of metadata to `output_directory`.
 54
 55        save_thumbnails : bool, default False
 56            Save thumbnail images to `output_directory`.
 57        
 58        """
 59        
 60        self._input_directory = input_directory
 61        self._output_directory = output_directory.rstrip('/')
 62        self._save_images = save_images
 63        self._save_thumbnails = save_thumbnails
 64
 65        self._geojson_parser = GeoJSONParser()
 66        self._timer = None
 67        self._error_dictionary = {}
 68        self._total_count = 0
 69        self._success_count = 0
 70
 71        # Make Output Directories
 72        dir_paths = [self._geojson_dir_path]
 73        if save_images or save_thumbnails:
 74            dir_paths.append(self._image_dir_path)
 75        for path in dir_paths:
 76            try:
 77                os.makedirs(path)
 78            except FileExistsError:
 79                log.info(f"Folder {path} already exists.")
 80            else:
 81                log.info(f"Folder {path} created.")
 82
 83    @property
 84    def input_directory(self):
 85        """str: Return the path to the `input_directory`."""
 86        return self._input_directory
 87    
 88    @property
 89    def output_directory(self):
 90        """str: Return the path to the `output_directory`."""
 91        return self._output_directory
 92        
 93    @property
 94    def summary(self):
 95        """str: Return the `summary` string."""
 96        return f'{self._success_count} out of {self._total_count} images processed successfully'
 97    
 98    @property
 99    def has_errors(self):
100        """bool: Return `true` if `error_dictionary` contains errors."""
101        return False if self._error_dictionary == {} else True
102    
103    @property
104    def error_dictionary(self):
105        """dict: Return the `error_dictionary`."""
106        return self._error_dictionary
107
108    def start(self):
109        """
110        Process the images from `input_directory`.
111
112        """
113        if self._timer is not None:
114            raise RuntimeError('Error: Too many calls to function')
115        
116        with Timer() as self._timer:
117            self._process_files()
118            
119    def _process_files(self):
120        # Process image files concurrently
121        files = glob.iglob(f'{self.input_directory}**/*.[Jj][Pp][Gg]')
122        # TODO - **/*.@(jpg|JPG|jpeg|JPEG|gif|GIF|png|PNG) : Tests for gif, png
123        with concurrent.futures.ThreadPoolExecutor() as executor:
124            future_to_path = {executor.submit(self._process_image_file, filepath): filepath for filepath in files}
125            for future in concurrent.futures.as_completed(future_to_path):
126                filepath = future_to_path[future]
127                self._total_count += 1
128                try:
129                    folder, coord, props = future.result()
130                except Exception as e:
131                    self._add_file_to_errors_with_exception_string(filepath, str(e))
132                else:
133                    parent = ImageToGeoJSON._parent_folder_from_filepath(filepath)
134                    self._geojson_parser.add_feature(folder, *coord, props, parent)
135                    self._success_count += 1
136
137        # Save geojson
138        for title, feature_collection in self._geojson_parser:
139            geojson_file_path = os.path.join(self._geojson_dir_path, f'{title}.geojson')
140            with open(geojson_file_path, 'w') as f:
141                json.dump(feature_collection, f, indent=4)
142
143    def _process_image_file(self, filepath):
144        try:
145            coord, props, image_b, thumb_b = read_exif(filepath, 
146                                                       get_image=self._save_images, 
147                                                       get_thumbnail=self._save_thumbnails)
148        except Exception as e:
149            raise e
150        else:
151            folder, filename = ImageToGeoJSON._folder_and_filename_from_filepath(filepath)
152            props['filename'] = filename
153
154            # image 
155            if self._save_images and image_b is not None:
156                rel_image_path = self._rel_image_path(filename)
157                image_path = os.path.join(self.output_directory, rel_image_path)            
158
159                with open(image_path, 'wb') as im:
160                    im.write(image_b)
161                    props["rel_image_path"] = rel_image_path
162
163            # thumbnail 
164            if self._save_thumbnails and thumb_b is not None:
165                rel_thumbnail_path = self._rel_thumbnail_path(filename)
166                thumbnail_path = os.path.join(self.output_directory, rel_thumbnail_path)
167
168                with open(thumbnail_path, 'wb') as im:
169                    im.write(thumb_b)
170                    props["rel_thumbnail_path"] = rel_thumbnail_path
171
172            return folder, coord, props
173        
174    def _add_file_to_errors_with_exception_string(self, filepath, exception_string):
175        folder, filename = ImageToGeoJSON._folder_and_filename_from_filepath(filepath)
176        key = os.path.join(folder, filename)
177        self._error_dictionary[key] = exception_string
178
179    def _output_parent_folder(self):
180        """str: Return the output parent folder name."""
181        head, folder = os.path.split(self._output_directory)
182        return folder
183    
184    @property
185    def _geojson_dir_path(self):
186        """str: Return the path to the geojson directory."""
187        return os.path.join(self.output_directory, GEOJSON_DIR)
188    
189    @property
190    def _image_dir_path(self):
191        """str: Return the path to the image directory."""
192        return os.path.join(self.output_directory, IMAGE_DIR)
193
194    def _rel_image_path(self, filename):
195        """str: Return the relative path to the image filename."""
196        return os.path.join(IMAGE_DIR, filename)
197    
198    def _rel_thumbnail_path(self, filename):
199        """str: Return the relative path to the thumbnail image filename."""
200        thumb_file_name = ImageToGeoJSON._thumbnail_filename(filename)
201        return os.path.join(IMAGE_DIR, thumb_file_name)
202
203    @staticmethod
204    def _folder_and_filename_from_filepath(filepath):
205        """tuple of str: Split the filepath and return the folder and filename."""
206        head, filename = os.path.split(filepath)
207        head, folder = os.path.split(head)
208        return folder, filename
209    
210    @staticmethod
211    def _parent_folder_from_filepath(filepath):
212        """str: Split the filepath and return the parent folder."""
213        head, filename = os.path.split(filepath)
214        head, folder = os.path.split(head)
215        head, parent = os.path.split(head)
216        return parent
217    
218    @staticmethod
219    def _thumbnail_filename(image_filename):
220        """str: Split the image filename and return the thumbnail filename."""
221        f_name, f_type  = image_filename.split('.')
222        return f_name + '_thumb.' + f_type

ImageToGeoJSON

Note

Saves the harvested metadata as geojson to 'output_directory` Optionally saves images without metadata and thumbnails images.

ImageToGeoJSON( input_directory, output_directory='./assets', save_images=False, save_thumbnails=False)
34    def __init__(self, 
35                 input_directory, 
36                 output_directory=DEFAULT_OUTPUT_DIRECTORY, 
37                 save_images=False, 
38                 save_thumbnails=False):
39        """
40        Initialise ImageToGeoJSON object.
41
42        Initialise the object and creates `output_directory` and folders.
43        
44        Parameters
45        ----------
46        input_directory : str
47            The path to the `input_directory`.
48            
49        output_directory : str, default './assets'
50            The path to the `output_directory`.
51
52        save_images : bool, default False
53            Save images stripped of metadata to `output_directory`.
54
55        save_thumbnails : bool, default False
56            Save thumbnail images to `output_directory`.
57        
58        """
59        
60        self._input_directory = input_directory
61        self._output_directory = output_directory.rstrip('/')
62        self._save_images = save_images
63        self._save_thumbnails = save_thumbnails
64
65        self._geojson_parser = GeoJSONParser()
66        self._timer = None
67        self._error_dictionary = {}
68        self._total_count = 0
69        self._success_count = 0
70
71        # Make Output Directories
72        dir_paths = [self._geojson_dir_path]
73        if save_images or save_thumbnails:
74            dir_paths.append(self._image_dir_path)
75        for path in dir_paths:
76            try:
77                os.makedirs(path)
78            except FileExistsError:
79                log.info(f"Folder {path} already exists.")
80            else:
81                log.info(f"Folder {path} created.")

Initialise ImageToGeoJSON object.

Initialise the object and creates output_directory and folders.

Parameters

input_directory : str The path to the input_directory.

output_directory : str, default './assets' The path to the output_directory.

save_images : bool, default False Save images stripped of metadata to output_directory.

save_thumbnails : bool, default False Save thumbnail images to output_directory.

input_directory
83    @property
84    def input_directory(self):
85        """str: Return the path to the `input_directory`."""
86        return self._input_directory

str: Return the path to the input_directory.

output_directory
88    @property
89    def output_directory(self):
90        """str: Return the path to the `output_directory`."""
91        return self._output_directory

str: Return the path to the output_directory.

summary
93    @property
94    def summary(self):
95        """str: Return the `summary` string."""
96        return f'{self._success_count} out of {self._total_count} images processed successfully'

str: Return the summary string.

has_errors
 98    @property
 99    def has_errors(self):
100        """bool: Return `true` if `error_dictionary` contains errors."""
101        return False if self._error_dictionary == {} else True

bool: Return true if error_dictionary contains errors.

error_dictionary
103    @property
104    def error_dictionary(self):
105        """dict: Return the `error_dictionary`."""
106        return self._error_dictionary

dict: Return the error_dictionary.

def start(self):
108    def start(self):
109        """
110        Process the images from `input_directory`.
111
112        """
113        if self._timer is not None:
114            raise RuntimeError('Error: Too many calls to function')
115        
116        with Timer() as self._timer:
117            self._process_files()

Process the images from input_directory.