incremental backup skips activity exports that have already been tried
This commit is contained in:
		
							parent
							
								
									012c029c83
								
							
						
					
					
						commit
						38ac9ca0f2
					
				| @ -8,7 +8,8 @@ stored in the backup directory will be downloaded. | ||||
| import argparse | ||||
| import getpass | ||||
| from garminexport.garminclient import GarminClient | ||||
| import garminexport.util | ||||
| import garminexport.backup | ||||
| from garminexport.backup import export_formats | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
| @ -27,21 +28,6 @@ LOG_LEVELS = { | ||||
| } | ||||
| """Command-line (string-based) log-level mapping to logging module levels.""" | ||||
| 
 | ||||
| def get_backed_up(activities, backup_dir, formats): | ||||
|     """Return all activity (id, ts) pairs that have been backed up in the | ||||
|     given backup directory. | ||||
| 
 | ||||
|     :rtype: list of int | ||||
|     """ | ||||
|     # backed up activities follow this pattern: <ISO8601>_<id>_<suffix> | ||||
|     format_suffix = dict(json_summary="_summary.json", json_details="_details.json", gpx=".gpx", tcx=".tcx", fit=".fit") | ||||
|      | ||||
|     backed_up = set() | ||||
|     dir_entries = os.listdir(backup_dir) | ||||
|     for id, start in activities: | ||||
|         if all( "{}_{}{}".format(start.isoformat(), id, format_suffix[f]) in dir_entries for f in formats): | ||||
|             backed_up.add((id, start)) | ||||
|     return backed_up | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| @ -66,9 +52,9 @@ if __name__ == "__main__": | ||||
|         help=("Desired log output level (DEBUG, INFO, WARNING, ERROR). " | ||||
|               "Default: INFO."), default="INFO") | ||||
|     parser.add_argument( | ||||
|         "-f", "--format", choices=garminexport.util.export_formats, | ||||
|         "-f", "--format", choices=export_formats, | ||||
|         default=None, action='append', | ||||
|         help=("Desired output formats ("+', '.join(garminexport.util.export_formats)+"). " | ||||
|         help=("Desired output formats ("+', '.join(export_formats)+"). " | ||||
|               "Default: ALL.")) | ||||
|     parser.add_argument( | ||||
|         "-E", "--ignore-errors", action='store_true', | ||||
| @ -76,7 +62,12 @@ if __name__ == "__main__": | ||||
|      | ||||
|     args = parser.parse_args() | ||||
|     if not args.log_level in LOG_LEVELS: | ||||
|         raise ValueError("Illegal log-level argument: {}".format(args.log_level)) | ||||
|         raise ValueError("Illegal log-level: {}".format(args.log_level)) | ||||
| 
 | ||||
|     # if no --format was specified, all formats are to be backed up | ||||
|     args.format = args.format if args.format else export_formats | ||||
|     log.info("backing up formats: %s", ", ".join(args.format)) | ||||
|      | ||||
|     logging.root.setLevel(LOG_LEVELS[args.log_level]) | ||||
| 
 | ||||
|     try: | ||||
| @ -89,25 +80,26 @@ if __name__ == "__main__": | ||||
|         with GarminClient(args.username, args.password) as client: | ||||
|             # get all activity ids and timestamps from Garmin account | ||||
|             log.info("retrieving activities for {} ...".format(args.username)) | ||||
|             all_activities = set(client.list_activities()) | ||||
|             activities = set(client.list_activities()) | ||||
|             log.info("account has a total of {} activities.".format( | ||||
|                 len(all_activities))) | ||||
|                 len(activities))) | ||||
|              | ||||
|             # get already backed up activities (stored in backup-dir) | ||||
|             backed_up_activities = get_backed_up(all_activities, args.backup_dir, args.format) | ||||
|             missing_activities = garminexport.backup.need_backup( | ||||
|                 activities, args.backup_dir, args.format) | ||||
|             backed_up = activities - missing_activities | ||||
|             log.info("{} contains {} backed up activities.".format( | ||||
|                 args.backup_dir, len(backed_up_activities))) | ||||
|                 args.backup_dir, len(backed_up))) | ||||
| 
 | ||||
|             missing_activities = all_activities - backed_up_activities | ||||
|             log.info("activities that haven't been backed up: {}".format( | ||||
|             log.info("activities that aren't backed up: {}".format( | ||||
|                 len(missing_activities))) | ||||
|              | ||||
|             for index, (id, start) in enumerate(missing_activities): | ||||
|                 log.info("backing up activity {} from {} ({} out of {}) ...".format( | ||||
|                     id, start, index+1, len(missing_activities))) | ||||
|             for index, activity in enumerate(missing_activities): | ||||
|                 id, start = activity | ||||
|                 log.info("backing up activity %d from %s (%d out of %d) ..." % | ||||
|                          (id, start, index+1, len(missing_activities))) | ||||
|                 try: | ||||
|                     garminexport.util.export_activity( | ||||
|                         client, id, args.backup_dir, args.format) | ||||
|                     garminexport.backup.download( | ||||
|                         client, activity, args.backup_dir, args.format) | ||||
|                 except Exception as e: | ||||
|                     log.error(u"failed with exception: %s", e) | ||||
|                     if not args.ignore_errors: | ||||
|  | ||||
							
								
								
									
										164
									
								
								garminexport/backup.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								garminexport/backup.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,164 @@ | ||||
| """Module with methods useful when backing up activities. | ||||
| """ | ||||
| import codecs | ||||
| import json | ||||
| from datetime import datetime | ||||
| import dateutil.parser | ||||
| import logging | ||||
| import os | ||||
| 
 | ||||
| log = logging.getLogger(__name__) | ||||
| 
 | ||||
| export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"] | ||||
| """The range of supported export formats for activities.""" | ||||
| 
 | ||||
| format_suffix = { | ||||
|     "json_summary": "_summary.json", | ||||
|     "json_details": "_details.json", | ||||
|     "gpx": ".gpx", | ||||
|     "tcx": ".tcx", | ||||
|     "fit": ".fit" | ||||
| } | ||||
| """A table that maps export formats to their file format extensions.""" | ||||
| 
 | ||||
| 
 | ||||
| not_found_file = ".not_found" | ||||
| """A file that lists all tried but failed export attempts. The lines in | ||||
| the file are the would-have-been file names, had the exports been successful. | ||||
| An entry in the ``.not_found`` file is a strong indication of an | ||||
| activity-format that simply doesn't exist and therefore should not be retried | ||||
| on the next backup run. One such scenario is for manually created activities, | ||||
| which cannot be exported to ``.fit`` format.""" | ||||
| 
 | ||||
| 
 | ||||
| def export_filename(activity, export_format): | ||||
|     """Returns a destination file name to use for a given activity that is | ||||
|     to be exported to a given format. Exported files follow this pattern: | ||||
|       ``<timestamp>_<activity_id>_<suffix>``. | ||||
|     For example: ``2015-02-17T05:45:00+00:00_123456789.tcx`` | ||||
|      | ||||
|     :param activity: An activity tuple `(id, starttime)` | ||||
|     :type activity: tuple of `(int, datetime)` | ||||
|     :param export_format: The export format (see :attr:`export_formats`) | ||||
|     :type export_format: str | ||||
| 
 | ||||
|     :return: The file name to use for the exported activity. | ||||
|     :rtype: str | ||||
|     """ | ||||
|     return "{time}_{id}{suffix}".format( | ||||
|         id=activity[0], | ||||
|         time=activity[1].isoformat(),  | ||||
|         suffix=format_suffix[export_format])    | ||||
| 
 | ||||
| 
 | ||||
| def need_backup(activities, backup_dir, export_formats=None): | ||||
|     """From a given set of activities, return all activities that haven't been | ||||
|     backed up in a given set of export formats. | ||||
| 
 | ||||
|     Activities are considered already backed up if they, for each desired | ||||
|     export format, have an activity file under the ``backup_dir`` *or* | ||||
|     if the activity file is listed in the ``.not_found`` file in the backup | ||||
|     directory. | ||||
| 
 | ||||
|     :param activities: A list of activity tuples `(id, starttime)` | ||||
|     :type activities: list of tuples of `(int, datetime)` | ||||
|     :param backup_dir: Destination directory for exported activities. | ||||
|     :type backup_dir: str | ||||
|     :return: All activities that need to be backed up. | ||||
|     :rtype: set of tuples of `(int, datetime)` | ||||
|     """ | ||||
|     need_backup = set() | ||||
|     backed_up = os.listdir(backup_dir) + _not_found_activities(backup_dir) | ||||
| 
 | ||||
|     # get all activities missing at least one export format | ||||
|     for activity in activities: | ||||
|         activity_files = [export_filename(activity, f) for f in export_formats] | ||||
|         if any(f not in backed_up for f in activity_files): | ||||
|             need_backup.add(activity) | ||||
|     return need_backup | ||||
| 
 | ||||
|      | ||||
| def _not_found_activities(backup_dir): | ||||
|     # consider all entries in <backup_dir>/.not_found as backed up | ||||
|     # (or rather, as tried but failed back ups) | ||||
|     failed_activities = [] | ||||
|     _not_found = os.path.join(backup_dir, not_found_file) | ||||
|     if os.path.isfile(_not_found): | ||||
|         with open(_not_found, mode="r") as f: | ||||
|             failed_activities = [line.strip() for line in f.readlines()] | ||||
|     log.debug("%d tried but failed activities in %s", | ||||
|               len(failed_activities), _not_found) | ||||
|     return failed_activities | ||||
|      | ||||
| 
 | ||||
|      | ||||
| def download(client, activity, backup_dir, export_formats=None): | ||||
|     """Exports a Garmin Connect activity to a given set of formats | ||||
|     and saves the resulting file(s) to a given backup directory. | ||||
|     In case a given format cannot be exported for the activity, the | ||||
|     file name will be appended to the :attr:`not_found_file` in the | ||||
|     backup directory (to prevent it from being retried on subsequent | ||||
|     backup runs). | ||||
| 
 | ||||
|     :param client: A :class:`garminexport.garminclient.GarminClient` | ||||
|       instance that is assumed to be connected. | ||||
|     :type client: :class:`garminexport.garminclient.GarminClient` | ||||
|     :param activity: An activity tuple `(id, starttime)` | ||||
|     :type activity: tuple of `(int, datetime)` | ||||
|     :param backup_dir: Backup directory path (assumed to exist already). | ||||
|     :type backup_dir: str | ||||
|     :keyword export_formats: Which format(s) to export to. Could be any | ||||
|       of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'. | ||||
|     :type export_formats: list of str | ||||
|     """ | ||||
|     id = activity[0] | ||||
|          | ||||
|     if 'json_summary' in export_formats: | ||||
|         activity_summary = client.get_activity_summary(id) | ||||
|         dest = os.path.join( | ||||
|             backup_dir, export_filename(activity, 'json_summary')) | ||||
|         with codecs.open(dest, encoding="utf-8", mode="w") as f: | ||||
|             f.write(json.dumps( | ||||
|                 activity_summary, ensure_ascii=False, indent=4)) | ||||
|              | ||||
|     if 'json_details' in export_formats: | ||||
|         activity_details = client.get_activity_details(id) | ||||
|         dest = os.path.join( | ||||
|             backup_dir, export_filename(activity, 'json_details')) | ||||
|         with codecs.open(dest, encoding="utf-8", mode="w") as f: | ||||
|             f.write(json.dumps( | ||||
|                 activity_details, ensure_ascii=False, indent=4)) | ||||
| 
 | ||||
|     not_found_path = os.path.join(backup_dir, not_found_file) | ||||
|     with open(not_found_path, mode="a") as not_found:     | ||||
|         if 'gpx' in export_formats: | ||||
|             activity_gpx = client.get_activity_gpx(id) | ||||
|             dest = os.path.join( | ||||
|                 backup_dir, export_filename(activity, 'gpx'))         | ||||
|             if activity_gpx is None: | ||||
|                 not_found.write(os.path.basename(dest) + "\n") | ||||
|             else: | ||||
|                 with codecs.open(dest, encoding="utf-8", mode="w") as f: | ||||
|                     f.write(activity_gpx) | ||||
|              | ||||
|         if 'tcx' in export_formats: | ||||
|             activity_tcx = client.get_activity_tcx(id) | ||||
|             dest = os.path.join( | ||||
|                 backup_dir, export_filename(activity, 'tcx'))         | ||||
|             if activity_tcx is None: | ||||
|                 not_found.write(os.path.basename(dest) + "\n") | ||||
|             else: | ||||
|                 with codecs.open(dest, encoding="utf-8", mode="w") as f: | ||||
|                     f.write(activity_tcx) | ||||
|              | ||||
|         if 'fit' in export_formats: | ||||
|             activity_fit = client.get_activity_fit(id) | ||||
|             dest = os.path.join( | ||||
|                 backup_dir, export_filename(activity, 'fit')) | ||||
|             if activity_fit is None: | ||||
|                 not_found.write(os.path.basename(dest) + "\n") | ||||
|             else: | ||||
|                 with open(dest, mode="wb") as f: | ||||
|                     f.write(activity_fit) | ||||
|      | ||||
| 
 | ||||
| @ -5,6 +5,7 @@ parts of the Garmin Connect REST API. | ||||
| 
 | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
| import requests | ||||
| from StringIO import StringIO | ||||
| @ -241,11 +242,14 @@ class GarminClient(object): | ||||
|     @require_session         | ||||
|     def get_activity_gpx(self, activity_id): | ||||
|         """Return a GPX (GPS Exchange Format) representation of a | ||||
|         given activity. | ||||
|         given activity. If the activity cannot be exported to GPX | ||||
|         (not yet observed in practice, but that doesn't exclude the | ||||
|         possibility), a :obj:`None` value is returned. | ||||
| 
 | ||||
|         :param activity_id: Activity identifier. | ||||
|         :type activity_id: int | ||||
|         :returns: The GPX representation of the activity as an XML string. | ||||
|         :returns: The GPX representation of the activity as an XML string | ||||
|           or ``None`` if the activity couldn't be exported to GPX. | ||||
|         :rtype: str | ||||
|         """ | ||||
|         response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.3/gpx/course/{}".format(activity_id)) | ||||
| @ -253,6 +257,8 @@ class GarminClient(object): | ||||
|         # and is the one used when exporting through the Garmin | ||||
|         # Connect web page. | ||||
|         #response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/gpx/activity/{}?full=true".format(activity_id)) | ||||
|         if response.status_code == 404: | ||||
|             return None | ||||
|         if response.status_code != 200: | ||||
|             raise Exception(u"failed to fetch GPX for activity {}: {}\n{}".format( | ||||
|                 activity_id, response.status_code, response.text))         | ||||
| @ -261,20 +267,58 @@ class GarminClient(object): | ||||
| 
 | ||||
|     def get_activity_tcx(self, activity_id): | ||||
|         """Return a TCX (Training Center XML) representation of a | ||||
|         given activity. | ||||
|         given activity. If the activity doesn't have a TCX source (for | ||||
|         example, if it was originally uploaded in GPX format, Garmin | ||||
|         won't try to synthesize a TCX file) a :obj:`None` value is | ||||
|         returned. | ||||
| 
 | ||||
|         :param activity_id: Activity identifier. | ||||
|         :type activity_id: int | ||||
|         :returns: The TCX representation of the activity as an XML string. | ||||
|         :returns: The TCX representation of the activity as an XML string | ||||
|           or ``None`` if the activity cannot be exported to TCX. | ||||
|         :rtype: str | ||||
|         """ | ||||
|          | ||||
|         response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/tcx/activity/{}?full=true".format(activity_id)) | ||||
|         if response.status_code == 404: | ||||
|             return None | ||||
|         if response.status_code != 200: | ||||
|             raise Exception(u"failed to fetch TCX for activity {}: {}\n{}".format( | ||||
|                 activity_id, response.status_code, response.text))         | ||||
|         return response.text | ||||
| 
 | ||||
| 
 | ||||
|     def get_original_activity(self, activity_id): | ||||
|         """Return the original file that was uploaded for an activity. | ||||
|         If the activity doesn't have any file source (for example, | ||||
|         if it was entered manually rather than imported from a Garmin | ||||
|         device) then :obj:`(None,None)` is returned. | ||||
| 
 | ||||
|         :param activity_id: Activity identifier. | ||||
|         :type activity_id: int | ||||
|         :returns: A tuple of the file type (e.g. 'fit', 'tcx', 'gpx') and | ||||
|           its contents, or :obj:`(None,None)` if no file is found. | ||||
|         :rtype: (str, str) | ||||
|         """ | ||||
|         response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id)) | ||||
|         if response.status_code == 404: | ||||
|             # Manually entered activity, no file source available | ||||
|             return (None,None) | ||||
|         if response.status_code != 200: | ||||
|             raise Exception( | ||||
|                 u"failed to get original activity file {}: {}\n{}".format( | ||||
|                 activity_id, response.status_code, response.text)) | ||||
| 
 | ||||
|         # return the first entry from the zip archive where the filename is | ||||
|         # activity_id (should be the only entry!) | ||||
|         zip = zipfile.ZipFile(StringIO(response.content), mode="r") | ||||
|         for path in zip.namelist(): | ||||
|             fn, ext = os.path.splitext(path) | ||||
|             if fn==str(activity_id): | ||||
|                 return ext[1:], zip.open(path).read() | ||||
|         return (None,None)     | ||||
| 
 | ||||
|          | ||||
|     def get_activity_fit(self, activity_id): | ||||
|         """Return a FIT representation for a given activity. If the activity | ||||
|         doesn't have a FIT source (for example, if it was entered manually | ||||
| @ -287,16 +331,9 @@ class GarminClient(object): | ||||
|           if no FIT source exists for this activity (e.g., entered manually). | ||||
|         :rtype: str | ||||
|         """ | ||||
|         fmt, orig_file = self.get_original_activity(activity_id) | ||||
|         # if the file extension of the original activity file isn't 'fit', | ||||
|         # this activity was uploaded in a different format (e.g. gpx/tcx) | ||||
|         # and cannot be exported to fit | ||||
|         return orig_file if fmt=='fit' else None | ||||
| 
 | ||||
|         response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id)) | ||||
|         if response.status_code == 404: | ||||
|             # No FIT source available for activity | ||||
|             return None | ||||
|         if response.status_code != 200: | ||||
|             raise Exception(u"failed to fetch FIT for activity {}: {}\n{}".format( | ||||
|                 activity_id, response.status_code, response.text)) | ||||
|         # fit file returned from server is in a zip archive | ||||
|         zipped_fit_file = response.content | ||||
|         zip = zipfile.ZipFile(StringIO(zipped_fit_file), mode="r") | ||||
|         # return the "<activity-activity_id>.fit" entry from the zip archive | ||||
|         return zip.open(str(activity_id) + ".fit").read() | ||||
|  | ||||
| @ -1,71 +0,0 @@ | ||||
| #! /usr/bin/env python | ||||
| """A module with utility functions.""" | ||||
| 
 | ||||
| import codecs | ||||
| import json | ||||
| from datetime import datetime | ||||
| import dateutil.parser | ||||
| import os | ||||
| 
 | ||||
| export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"] | ||||
| 
 | ||||
| def export_activity(client, activity_id, destination, | ||||
|                     formats=None): | ||||
|     """Exports a Garmin Connect activity to a given set of formats | ||||
|     and saves the result file(es) to a given destination directory. | ||||
| 
 | ||||
|     :param client: A :class:`garminexport.garminclient.GarminClient` | ||||
|       instance that is assumed to be connected. | ||||
|     :type client: :class:`garminexport.garminclient.GarminClient` | ||||
|     :param activity_id: Activity identifier. | ||||
|     :type activity_id: int | ||||
|     :param destination: Destination directory (assumed to exist already). | ||||
|     :type destination: str | ||||
| 
 | ||||
|     :keyword formats: Which format(s) to export to. Could be any | ||||
|       of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'. | ||||
|       If set to :obj:`None`  all formats will be exported. | ||||
|     :type formats: list of str | ||||
|     """ | ||||
|     if formats is None: | ||||
|         formats = export_formats | ||||
|     activity_summary = client.get_activity_summary(activity_id) | ||||
|      | ||||
|     # prefix saved activity files with timestamp and activity id | ||||
|     start = activity_summary["activity"]["activitySummary"]["BeginTimestamp"]["value"] | ||||
|     timestamp = dateutil.parser.parse(start) | ||||
|     filename_prefix = "{}_{}".format(timestamp.isoformat(), activity_id) | ||||
|     path_prefix = os.path.join(destination, filename_prefix) | ||||
|      | ||||
|     if 'json_summary' in formats: | ||||
|         summary_file = path_prefix + "_summary.json" | ||||
|         with codecs.open(summary_file, encoding="utf-8", mode="w") as f: | ||||
|             f.write(json.dumps( | ||||
|                 activity_summary, ensure_ascii=False, indent=4)) | ||||
|              | ||||
|     if 'json_details' in formats: | ||||
|         activity_details = client.get_activity_details(activity_id) | ||||
|         details_file = path_prefix + "_details.json" | ||||
|         with codecs.open(details_file, encoding="utf-8", mode="w") as f: | ||||
|             f.write(json.dumps( | ||||
|                 activity_details, ensure_ascii=False, indent=4)) | ||||
|              | ||||
|     if 'gpx' in formats: | ||||
|         activity_gpx = client.get_activity_gpx(activity_id) | ||||
|         gpx_file = path_prefix + ".gpx" | ||||
|         with codecs.open(gpx_file, encoding="utf-8", mode="w") as f: | ||||
|             f.write(activity_gpx) | ||||
|              | ||||
|     if 'tcx' in formats: | ||||
|         activity_tcx = client.get_activity_tcx(activity_id) | ||||
|         tcx_file = path_prefix + ".tcx" | ||||
|         with codecs.open(tcx_file, encoding="utf-8", mode="w") as f: | ||||
|             f.write(activity_tcx) | ||||
|              | ||||
|     if 'fit' in formats: | ||||
|         activity_fit = client.get_activity_fit(activity_id) | ||||
|         fit_file = path_prefix + ".fit" | ||||
|         if activity_fit: | ||||
|             with open(fit_file, mode="wb") as f: | ||||
|                 f.write(activity_fit) | ||||
|      | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user