im2geojson
Parse GeoJSON from image metadata.
Quick Start
Import im2geojson and initialise by passing the directory of images:
>>> from im2geojson import ImageToGeoJSON
# For example: if your current directory is named `parent` and this contains a folder of images: `my_images`,
# Initialise with `input_directory`:
>>> input_directory = './my_images'
>>> im2geojson = ImageToGeoJSON(input_directory=input_directory)
# Start image processing:
>>> im2geojson.start()
Running...
Finished in 0.31 seconds
Summary
>>> im2geojson.summary
'1 out of 6 images processed successfully'
Output
// my_images.geojson
{
"type": "FeatureCollection",
"title": "my_images",
"features":
[
{
"type": "Feature",
"geometry":
{
"type": "Point",
"coordinates": [115.095269, -8.631053]
},
"properties":
{
"datetime": "2023-05-05 06:19:24",
"filename": "EXIF.jpg"
}
}
],
"properties":
{
"parent": "parent"
}
}
Errors
>>> im2geojson.error_dictionary
{'my_images/MISSING_EXIF.jpg': 'AttributeError: image does not have attribute gps_latitude',
'my_images/MISSING_DATETIME.jpg': 'AttributeError: image does not have attribute datetime_original',
'my_images/CORRUPTED_DATETIME.jpg': "ValueError: time data 'corrupted' does not match format '%Y:%m:%d %H:%M:%S'",
'my_images/CORRUPTED_EXIF.jpg': 'ValueError: Invalid GPS Reference X, Expecting N, S, E or W',
'my_images/NO_EXIF.jpg': "'No metadata.'"}
1""" 2 3Parse GeoJSON from image metadata. 4 5<br> 6 7 8Quick Start 9----------- 10 11 12Import im2geojson and initialise by passing the directory of images: 13 14 15```python 16>>> from im2geojson import ImageToGeoJSON 17 18# For example: if your current directory is named `parent` and this contains a folder of images: `my_images`, 19# Initialise with `input_directory`: 20>>> input_directory = './my_images' 21>>> im2geojson = ImageToGeoJSON(input_directory=input_directory) 22 23# Start image processing: 24>>> im2geojson.start() 25``` 26```s 27Running... 28Finished in 0.31 seconds 29``` 30<br> 31 32 33Summary 34------- 35 36```python 37>>> im2geojson.summary 38``` 39```s 40'1 out of 6 images processed successfully' 41``` 42<br> 43 44 45Output 46------ 47 48```json 49// my_images.geojson 50{ 51 "type": "FeatureCollection", 52 "title": "my_images", 53 "features": 54 [ 55 { 56 "type": "Feature", 57 "geometry": 58 { 59 "type": "Point", 60 "coordinates": [115.095269, -8.631053] 61 }, 62 "properties": 63 { 64 "datetime": "2023-05-05 06:19:24", 65 "filename": "EXIF.jpg" 66 } 67 } 68 ], 69 "properties": 70 { 71 "parent": "parent" 72 } 73} 74``` 75<br> 76 77 78Errors 79------ 80 81```python 82>>> im2geojson.error_dictionary 83``` 84```s 85{'my_images/MISSING_EXIF.jpg': 'AttributeError: image does not have attribute gps_latitude', 86 'my_images/MISSING_DATETIME.jpg': 'AttributeError: image does not have attribute datetime_original', 87 'my_images/CORRUPTED_DATETIME.jpg': "ValueError: time data 'corrupted' does not match format '%Y:%m:%d %H:%M:%S'", 88 'my_images/CORRUPTED_EXIF.jpg': 'ValueError: Invalid GPS Reference X, Expecting N, S, E or W', 89 'my_images/NO_EXIF.jpg': "'No metadata.'"} 90``` 91<br> 92<br> 93 94 95*** 96 97<br> 98 99""" 100 101import logging 102 103logging.getLogger('im2geojson').addHandler(logging.NullHandler()) 104 105 106from im2geojson.im2geojson import ImageToGeoJSON 107 108__all__ = ['ImageToGeoJSON']
24class ImageToGeoJSON(object): 25 """ 26 ImageToGeoJSON 27 28 Note 29 ---- 30 Saves the harvested metadata as geojson to 'output_directory` 31 Optionally saves images without metadata and thumbnails images. 32 """ 33 34 def __init__(self, 35 input_directory, 36 output_directory=DEFAULT_OUTPUT_DIRECTORY, 37 save_images=False, 38 save_thumbnails=False): 39 """ 40 Initialise ImageToGeoJSON object. 41 42 Initialise the object and creates `output_directory` and folders. 43 44 Parameters 45 ---------- 46 input_directory : str 47 The path to the `input_directory`. 48 49 output_directory : str, default './assets' 50 The path to the `output_directory`. 51 52 save_images : bool, default False 53 Save images stripped of metadata to `output_directory`. 54 55 save_thumbnails : bool, default False 56 Save thumbnail images to `output_directory`. 57 58 """ 59 60 self._input_directory = input_directory 61 self._output_directory = output_directory.rstrip('/') 62 self._save_images = save_images 63 self._save_thumbnails = save_thumbnails 64 65 self._geojson_parser = GeoJSONParser() 66 self._timer = None 67 self._error_dictionary = {} 68 self._total_count = 0 69 self._success_count = 0 70 71 # Make Output Directories 72 dir_paths = [self._geojson_dir_path] 73 if save_images or save_thumbnails: 74 dir_paths.append(self._image_dir_path) 75 for path in dir_paths: 76 try: 77 os.makedirs(path) 78 except FileExistsError: 79 log.info(f"Folder {path} already exists.") 80 else: 81 log.info(f"Folder {path} created.") 82 83 @property 84 def input_directory(self): 85 """str: Return the path to the `input_directory`.""" 86 return self._input_directory 87 88 @property 89 def output_directory(self): 90 """str: Return the path to the `output_directory`.""" 91 return self._output_directory 92 93 @property 94 def summary(self): 95 """str: Return the `summary` string.""" 96 return f'{self._success_count} out of {self._total_count} images processed successfully' 97 98 @property 99 def has_errors(self): 100 """bool: Return `true` if `error_dictionary` contains errors.""" 101 return False if self._error_dictionary == {} else True 102 103 @property 104 def error_dictionary(self): 105 """dict: Return the `error_dictionary`.""" 106 return self._error_dictionary 107 108 def start(self): 109 """ 110 Process the images from `input_directory`. 111 112 """ 113 if self._timer is not None: 114 raise RuntimeError('Error: Too many calls to function') 115 116 with Timer() as self._timer: 117 self._process_files() 118 119 def _process_files(self): 120 # Process image files concurrently 121 files = glob.iglob(f'{self.input_directory}**/*.[Jj][Pp][Gg]') 122 # TODO - **/*.@(jpg|JPG|jpeg|JPEG|gif|GIF|png|PNG) : Tests for gif, png 123 with concurrent.futures.ThreadPoolExecutor() as executor: 124 future_to_path = {executor.submit(self._process_image_file, filepath): filepath for filepath in files} 125 for future in concurrent.futures.as_completed(future_to_path): 126 filepath = future_to_path[future] 127 self._total_count += 1 128 try: 129 folder, coord, props = future.result() 130 except Exception as e: 131 self._add_file_to_errors_with_exception_string(filepath, str(e)) 132 else: 133 parent = ImageToGeoJSON._parent_folder_from_filepath(filepath) 134 self._geojson_parser.add_feature(folder, *coord, props, parent) 135 self._success_count += 1 136 137 # Save geojson 138 for title, feature_collection in self._geojson_parser: 139 geojson_file_path = os.path.join(self._geojson_dir_path, f'{title}.geojson') 140 with open(geojson_file_path, 'w') as f: 141 json.dump(feature_collection, f, indent=4) 142 143 def _process_image_file(self, filepath): 144 try: 145 coord, props, image_b, thumb_b = read_exif(filepath, 146 get_image=self._save_images, 147 get_thumbnail=self._save_thumbnails) 148 except Exception as e: 149 raise e 150 else: 151 folder, filename = ImageToGeoJSON._folder_and_filename_from_filepath(filepath) 152 props['filename'] = filename 153 154 # image 155 if self._save_images and image_b is not None: 156 rel_image_path = self._rel_image_path(filename) 157 image_path = os.path.join(self.output_directory, rel_image_path) 158 159 with open(image_path, 'wb') as im: 160 im.write(image_b) 161 props["rel_image_path"] = rel_image_path 162 163 # thumbnail 164 if self._save_thumbnails and thumb_b is not None: 165 rel_thumbnail_path = self._rel_thumbnail_path(filename) 166 thumbnail_path = os.path.join(self.output_directory, rel_thumbnail_path) 167 168 with open(thumbnail_path, 'wb') as im: 169 im.write(thumb_b) 170 props["rel_thumbnail_path"] = rel_thumbnail_path 171 172 return folder, coord, props 173 174 def _add_file_to_errors_with_exception_string(self, filepath, exception_string): 175 folder, filename = ImageToGeoJSON._folder_and_filename_from_filepath(filepath) 176 key = os.path.join(folder, filename) 177 self._error_dictionary[key] = exception_string 178 179 def _output_parent_folder(self): 180 """str: Return the output parent folder name.""" 181 head, folder = os.path.split(self._output_directory) 182 return folder 183 184 @property 185 def _geojson_dir_path(self): 186 """str: Return the path to the geojson directory.""" 187 return os.path.join(self.output_directory, GEOJSON_DIR) 188 189 @property 190 def _image_dir_path(self): 191 """str: Return the path to the image directory.""" 192 return os.path.join(self.output_directory, IMAGE_DIR) 193 194 def _rel_image_path(self, filename): 195 """str: Return the relative path to the image filename.""" 196 return os.path.join(IMAGE_DIR, filename) 197 198 def _rel_thumbnail_path(self, filename): 199 """str: Return the relative path to the thumbnail image filename.""" 200 thumb_file_name = ImageToGeoJSON._thumbnail_filename(filename) 201 return os.path.join(IMAGE_DIR, thumb_file_name) 202 203 @staticmethod 204 def _folder_and_filename_from_filepath(filepath): 205 """tuple of str: Split the filepath and return the folder and filename.""" 206 head, filename = os.path.split(filepath) 207 head, folder = os.path.split(head) 208 return folder, filename 209 210 @staticmethod 211 def _parent_folder_from_filepath(filepath): 212 """str: Split the filepath and return the parent folder.""" 213 head, filename = os.path.split(filepath) 214 head, folder = os.path.split(head) 215 head, parent = os.path.split(head) 216 return parent 217 218 @staticmethod 219 def _thumbnail_filename(image_filename): 220 """str: Split the image filename and return the thumbnail filename.""" 221 f_name, f_type = image_filename.split('.') 222 return f_name + '_thumb.' + f_type
ImageToGeoJSON
Note
Saves the harvested metadata as geojson to 'output_directory` Optionally saves images without metadata and thumbnails images.
34 def __init__(self, 35 input_directory, 36 output_directory=DEFAULT_OUTPUT_DIRECTORY, 37 save_images=False, 38 save_thumbnails=False): 39 """ 40 Initialise ImageToGeoJSON object. 41 42 Initialise the object and creates `output_directory` and folders. 43 44 Parameters 45 ---------- 46 input_directory : str 47 The path to the `input_directory`. 48 49 output_directory : str, default './assets' 50 The path to the `output_directory`. 51 52 save_images : bool, default False 53 Save images stripped of metadata to `output_directory`. 54 55 save_thumbnails : bool, default False 56 Save thumbnail images to `output_directory`. 57 58 """ 59 60 self._input_directory = input_directory 61 self._output_directory = output_directory.rstrip('/') 62 self._save_images = save_images 63 self._save_thumbnails = save_thumbnails 64 65 self._geojson_parser = GeoJSONParser() 66 self._timer = None 67 self._error_dictionary = {} 68 self._total_count = 0 69 self._success_count = 0 70 71 # Make Output Directories 72 dir_paths = [self._geojson_dir_path] 73 if save_images or save_thumbnails: 74 dir_paths.append(self._image_dir_path) 75 for path in dir_paths: 76 try: 77 os.makedirs(path) 78 except FileExistsError: 79 log.info(f"Folder {path} already exists.") 80 else: 81 log.info(f"Folder {path} created.")
Initialise ImageToGeoJSON object.
Initialise the object and creates output_directory and folders.
Parameters
input_directory : str
The path to the input_directory.
output_directory : str, default './assets'
The path to the output_directory.
save_images : bool, default False
Save images stripped of metadata to output_directory.
save_thumbnails : bool, default False
Save thumbnail images to output_directory.
83 @property 84 def input_directory(self): 85 """str: Return the path to the `input_directory`.""" 86 return self._input_directory
str: Return the path to the input_directory.
88 @property 89 def output_directory(self): 90 """str: Return the path to the `output_directory`.""" 91 return self._output_directory
str: Return the path to the output_directory.
93 @property 94 def summary(self): 95 """str: Return the `summary` string.""" 96 return f'{self._success_count} out of {self._total_count} images processed successfully'
str: Return the summary string.
98 @property 99 def has_errors(self): 100 """bool: Return `true` if `error_dictionary` contains errors.""" 101 return False if self._error_dictionary == {} else True
bool: Return true if error_dictionary contains errors.
103 @property 104 def error_dictionary(self): 105 """dict: Return the `error_dictionary`.""" 106 return self._error_dictionary
dict: Return the error_dictionary.
108 def start(self): 109 """ 110 Process the images from `input_directory`. 111 112 """ 113 if self._timer is not None: 114 raise RuntimeError('Error: Too many calls to function') 115 116 with Timer() as self._timer: 117 self._process_files()
Process the images from input_directory.