From 38ac9ca0f2aa9041a172e704be84f0e800c3e6e5 Mon Sep 17 00:00:00 2001 From: petergardfjall Date: Wed, 4 Mar 2015 20:55:55 +0100 Subject: [PATCH] incremental backup skips activity exports that have already been tried --- garminbackup.py | 56 +++++------- garminexport/backup.py | 164 +++++++++++++++++++++++++++++++++++ garminexport/garminclient.py | 71 +++++++++++---- garminexport/util.py | 71 --------------- 4 files changed, 242 insertions(+), 120 deletions(-) create mode 100644 garminexport/backup.py delete mode 100644 garminexport/util.py diff --git a/garminbackup.py b/garminbackup.py index 19b865b..014c8b8 100755 --- a/garminbackup.py +++ b/garminbackup.py @@ -8,7 +8,8 @@ stored in the backup directory will be downloaded. import argparse import getpass from garminexport.garminclient import GarminClient -import garminexport.util +import garminexport.backup +from garminexport.backup import export_formats import logging import os import re @@ -27,21 +28,6 @@ LOG_LEVELS = { } """Command-line (string-based) log-level mapping to logging module levels.""" -def get_backed_up(activities, backup_dir, formats): - """Return all activity (id, ts) pairs that have been backed up in the - given backup directory. - - :rtype: list of int - """ - # backed up activities follow this pattern: __ - format_suffix = dict(json_summary="_summary.json", json_details="_details.json", gpx=".gpx", tcx=".tcx", fit=".fit") - - backed_up = set() - dir_entries = os.listdir(backup_dir) - for id, start in activities: - if all( "{}_{}{}".format(start.isoformat(), id, format_suffix[f]) in dir_entries for f in formats): - backed_up.add((id, start)) - return backed_up if __name__ == "__main__": @@ -66,9 +52,9 @@ if __name__ == "__main__": help=("Desired log output level (DEBUG, INFO, WARNING, ERROR). " "Default: INFO."), default="INFO") parser.add_argument( - "-f", "--format", choices=garminexport.util.export_formats, + "-f", "--format", choices=export_formats, default=None, action='append', - help=("Desired output formats ("+', '.join(garminexport.util.export_formats)+"). " + help=("Desired output formats ("+', '.join(export_formats)+"). " "Default: ALL.")) parser.add_argument( "-E", "--ignore-errors", action='store_true', @@ -76,9 +62,14 @@ if __name__ == "__main__": args = parser.parse_args() if not args.log_level in LOG_LEVELS: - raise ValueError("Illegal log-level argument: {}".format(args.log_level)) + raise ValueError("Illegal log-level: {}".format(args.log_level)) + + # if no --format was specified, all formats are to be backed up + args.format = args.format if args.format else export_formats + log.info("backing up formats: %s", ", ".join(args.format)) + logging.root.setLevel(LOG_LEVELS[args.log_level]) - + try: if not os.path.isdir(args.backup_dir): os.makedirs(args.backup_dir) @@ -89,25 +80,26 @@ if __name__ == "__main__": with GarminClient(args.username, args.password) as client: # get all activity ids and timestamps from Garmin account log.info("retrieving activities for {} ...".format(args.username)) - all_activities = set(client.list_activities()) + activities = set(client.list_activities()) log.info("account has a total of {} activities.".format( - len(all_activities))) + len(activities))) - # get already backed up activities (stored in backup-dir) - backed_up_activities = get_backed_up(all_activities, args.backup_dir, args.format) + missing_activities = garminexport.backup.need_backup( + activities, args.backup_dir, args.format) + backed_up = activities - missing_activities log.info("{} contains {} backed up activities.".format( - args.backup_dir, len(backed_up_activities))) + args.backup_dir, len(backed_up))) - missing_activities = all_activities - backed_up_activities - log.info("activities that haven't been backed up: {}".format( + log.info("activities that aren't backed up: {}".format( len(missing_activities))) - for index, (id, start) in enumerate(missing_activities): - log.info("backing up activity {} from {} ({} out of {}) ...".format( - id, start, index+1, len(missing_activities))) + for index, activity in enumerate(missing_activities): + id, start = activity + log.info("backing up activity %d from %s (%d out of %d) ..." % + (id, start, index+1, len(missing_activities))) try: - garminexport.util.export_activity( - client, id, args.backup_dir, args.format) + garminexport.backup.download( + client, activity, args.backup_dir, args.format) except Exception as e: log.error(u"failed with exception: %s", e) if not args.ignore_errors: diff --git a/garminexport/backup.py b/garminexport/backup.py new file mode 100644 index 0000000..a201140 --- /dev/null +++ b/garminexport/backup.py @@ -0,0 +1,164 @@ +"""Module with methods useful when backing up activities. +""" +import codecs +import json +from datetime import datetime +import dateutil.parser +import logging +import os + +log = logging.getLogger(__name__) + +export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"] +"""The range of supported export formats for activities.""" + +format_suffix = { + "json_summary": "_summary.json", + "json_details": "_details.json", + "gpx": ".gpx", + "tcx": ".tcx", + "fit": ".fit" +} +"""A table that maps export formats to their file format extensions.""" + + +not_found_file = ".not_found" +"""A file that lists all tried but failed export attempts. The lines in +the file are the would-have-been file names, had the exports been successful. +An entry in the ``.not_found`` file is a strong indication of an +activity-format that simply doesn't exist and therefore should not be retried +on the next backup run. One such scenario is for manually created activities, +which cannot be exported to ``.fit`` format.""" + + +def export_filename(activity, export_format): + """Returns a destination file name to use for a given activity that is + to be exported to a given format. Exported files follow this pattern: + ``__``. + For example: ``2015-02-17T05:45:00+00:00_123456789.tcx`` + + :param activity: An activity tuple `(id, starttime)` + :type activity: tuple of `(int, datetime)` + :param export_format: The export format (see :attr:`export_formats`) + :type export_format: str + + :return: The file name to use for the exported activity. + :rtype: str + """ + return "{time}_{id}{suffix}".format( + id=activity[0], + time=activity[1].isoformat(), + suffix=format_suffix[export_format]) + + +def need_backup(activities, backup_dir, export_formats=None): + """From a given set of activities, return all activities that haven't been + backed up in a given set of export formats. + + Activities are considered already backed up if they, for each desired + export format, have an activity file under the ``backup_dir`` *or* + if the activity file is listed in the ``.not_found`` file in the backup + directory. + + :param activities: A list of activity tuples `(id, starttime)` + :type activities: list of tuples of `(int, datetime)` + :param backup_dir: Destination directory for exported activities. + :type backup_dir: str + :return: All activities that need to be backed up. + :rtype: set of tuples of `(int, datetime)` + """ + need_backup = set() + backed_up = os.listdir(backup_dir) + _not_found_activities(backup_dir) + + # get all activities missing at least one export format + for activity in activities: + activity_files = [export_filename(activity, f) for f in export_formats] + if any(f not in backed_up for f in activity_files): + need_backup.add(activity) + return need_backup + + +def _not_found_activities(backup_dir): + # consider all entries in /.not_found as backed up + # (or rather, as tried but failed back ups) + failed_activities = [] + _not_found = os.path.join(backup_dir, not_found_file) + if os.path.isfile(_not_found): + with open(_not_found, mode="r") as f: + failed_activities = [line.strip() for line in f.readlines()] + log.debug("%d tried but failed activities in %s", + len(failed_activities), _not_found) + return failed_activities + + + +def download(client, activity, backup_dir, export_formats=None): + """Exports a Garmin Connect activity to a given set of formats + and saves the resulting file(s) to a given backup directory. + In case a given format cannot be exported for the activity, the + file name will be appended to the :attr:`not_found_file` in the + backup directory (to prevent it from being retried on subsequent + backup runs). + + :param client: A :class:`garminexport.garminclient.GarminClient` + instance that is assumed to be connected. + :type client: :class:`garminexport.garminclient.GarminClient` + :param activity: An activity tuple `(id, starttime)` + :type activity: tuple of `(int, datetime)` + :param backup_dir: Backup directory path (assumed to exist already). + :type backup_dir: str + :keyword export_formats: Which format(s) to export to. Could be any + of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'. + :type export_formats: list of str + """ + id = activity[0] + + if 'json_summary' in export_formats: + activity_summary = client.get_activity_summary(id) + dest = os.path.join( + backup_dir, export_filename(activity, 'json_summary')) + with codecs.open(dest, encoding="utf-8", mode="w") as f: + f.write(json.dumps( + activity_summary, ensure_ascii=False, indent=4)) + + if 'json_details' in export_formats: + activity_details = client.get_activity_details(id) + dest = os.path.join( + backup_dir, export_filename(activity, 'json_details')) + with codecs.open(dest, encoding="utf-8", mode="w") as f: + f.write(json.dumps( + activity_details, ensure_ascii=False, indent=4)) + + not_found_path = os.path.join(backup_dir, not_found_file) + with open(not_found_path, mode="a") as not_found: + if 'gpx' in export_formats: + activity_gpx = client.get_activity_gpx(id) + dest = os.path.join( + backup_dir, export_filename(activity, 'gpx')) + if activity_gpx is None: + not_found.write(os.path.basename(dest) + "\n") + else: + with codecs.open(dest, encoding="utf-8", mode="w") as f: + f.write(activity_gpx) + + if 'tcx' in export_formats: + activity_tcx = client.get_activity_tcx(id) + dest = os.path.join( + backup_dir, export_filename(activity, 'tcx')) + if activity_tcx is None: + not_found.write(os.path.basename(dest) + "\n") + else: + with codecs.open(dest, encoding="utf-8", mode="w") as f: + f.write(activity_tcx) + + if 'fit' in export_formats: + activity_fit = client.get_activity_fit(id) + dest = os.path.join( + backup_dir, export_filename(activity, 'fit')) + if activity_fit is None: + not_found.write(os.path.basename(dest) + "\n") + else: + with open(dest, mode="wb") as f: + f.write(activity_fit) + + diff --git a/garminexport/garminclient.py b/garminexport/garminclient.py index 46fd49e..413103c 100755 --- a/garminexport/garminclient.py +++ b/garminexport/garminclient.py @@ -5,6 +5,7 @@ parts of the Garmin Connect REST API. import json import logging +import os import re import requests from StringIO import StringIO @@ -241,11 +242,14 @@ class GarminClient(object): @require_session def get_activity_gpx(self, activity_id): """Return a GPX (GPS Exchange Format) representation of a - given activity. + given activity. If the activity cannot be exported to GPX + (not yet observed in practice, but that doesn't exclude the + possibility), a :obj:`None` value is returned. :param activity_id: Activity identifier. :type activity_id: int - :returns: The GPX representation of the activity as an XML string. + :returns: The GPX representation of the activity as an XML string + or ``None`` if the activity couldn't be exported to GPX. :rtype: str """ response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.3/gpx/course/{}".format(activity_id)) @@ -253,6 +257,8 @@ class GarminClient(object): # and is the one used when exporting through the Garmin # Connect web page. #response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/gpx/activity/{}?full=true".format(activity_id)) + if response.status_code == 404: + return None if response.status_code != 200: raise Exception(u"failed to fetch GPX for activity {}: {}\n{}".format( activity_id, response.status_code, response.text)) @@ -261,20 +267,58 @@ class GarminClient(object): def get_activity_tcx(self, activity_id): """Return a TCX (Training Center XML) representation of a - given activity. + given activity. If the activity doesn't have a TCX source (for + example, if it was originally uploaded in GPX format, Garmin + won't try to synthesize a TCX file) a :obj:`None` value is + returned. :param activity_id: Activity identifier. :type activity_id: int - :returns: The TCX representation of the activity as an XML string. + :returns: The TCX representation of the activity as an XML string + or ``None`` if the activity cannot be exported to TCX. :rtype: str """ response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/tcx/activity/{}?full=true".format(activity_id)) + if response.status_code == 404: + return None if response.status_code != 200: raise Exception(u"failed to fetch TCX for activity {}: {}\n{}".format( activity_id, response.status_code, response.text)) return response.text + + def get_original_activity(self, activity_id): + """Return the original file that was uploaded for an activity. + If the activity doesn't have any file source (for example, + if it was entered manually rather than imported from a Garmin + device) then :obj:`(None,None)` is returned. + + :param activity_id: Activity identifier. + :type activity_id: int + :returns: A tuple of the file type (e.g. 'fit', 'tcx', 'gpx') and + its contents, or :obj:`(None,None)` if no file is found. + :rtype: (str, str) + """ + response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id)) + if response.status_code == 404: + # Manually entered activity, no file source available + return (None,None) + if response.status_code != 200: + raise Exception( + u"failed to get original activity file {}: {}\n{}".format( + activity_id, response.status_code, response.text)) + + # return the first entry from the zip archive where the filename is + # activity_id (should be the only entry!) + zip = zipfile.ZipFile(StringIO(response.content), mode="r") + for path in zip.namelist(): + fn, ext = os.path.splitext(path) + if fn==str(activity_id): + return ext[1:], zip.open(path).read() + return (None,None) + + def get_activity_fit(self, activity_id): """Return a FIT representation for a given activity. If the activity doesn't have a FIT source (for example, if it was entered manually @@ -287,16 +331,9 @@ class GarminClient(object): if no FIT source exists for this activity (e.g., entered manually). :rtype: str """ - - response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id)) - if response.status_code == 404: - # No FIT source available for activity - return None - if response.status_code != 200: - raise Exception(u"failed to fetch FIT for activity {}: {}\n{}".format( - activity_id, response.status_code, response.text)) - # fit file returned from server is in a zip archive - zipped_fit_file = response.content - zip = zipfile.ZipFile(StringIO(zipped_fit_file), mode="r") - # return the ".fit" entry from the zip archive - return zip.open(str(activity_id) + ".fit").read() + fmt, orig_file = self.get_original_activity(activity_id) + # if the file extension of the original activity file isn't 'fit', + # this activity was uploaded in a different format (e.g. gpx/tcx) + # and cannot be exported to fit + return orig_file if fmt=='fit' else None + diff --git a/garminexport/util.py b/garminexport/util.py deleted file mode 100644 index e4ee75e..0000000 --- a/garminexport/util.py +++ /dev/null @@ -1,71 +0,0 @@ -#! /usr/bin/env python -"""A module with utility functions.""" - -import codecs -import json -from datetime import datetime -import dateutil.parser -import os - -export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"] - -def export_activity(client, activity_id, destination, - formats=None): - """Exports a Garmin Connect activity to a given set of formats - and saves the result file(es) to a given destination directory. - - :param client: A :class:`garminexport.garminclient.GarminClient` - instance that is assumed to be connected. - :type client: :class:`garminexport.garminclient.GarminClient` - :param activity_id: Activity identifier. - :type activity_id: int - :param destination: Destination directory (assumed to exist already). - :type destination: str - - :keyword formats: Which format(s) to export to. Could be any - of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'. - If set to :obj:`None` all formats will be exported. - :type formats: list of str - """ - if formats is None: - formats = export_formats - activity_summary = client.get_activity_summary(activity_id) - - # prefix saved activity files with timestamp and activity id - start = activity_summary["activity"]["activitySummary"]["BeginTimestamp"]["value"] - timestamp = dateutil.parser.parse(start) - filename_prefix = "{}_{}".format(timestamp.isoformat(), activity_id) - path_prefix = os.path.join(destination, filename_prefix) - - if 'json_summary' in formats: - summary_file = path_prefix + "_summary.json" - with codecs.open(summary_file, encoding="utf-8", mode="w") as f: - f.write(json.dumps( - activity_summary, ensure_ascii=False, indent=4)) - - if 'json_details' in formats: - activity_details = client.get_activity_details(activity_id) - details_file = path_prefix + "_details.json" - with codecs.open(details_file, encoding="utf-8", mode="w") as f: - f.write(json.dumps( - activity_details, ensure_ascii=False, indent=4)) - - if 'gpx' in formats: - activity_gpx = client.get_activity_gpx(activity_id) - gpx_file = path_prefix + ".gpx" - with codecs.open(gpx_file, encoding="utf-8", mode="w") as f: - f.write(activity_gpx) - - if 'tcx' in formats: - activity_tcx = client.get_activity_tcx(activity_id) - tcx_file = path_prefix + ".tcx" - with codecs.open(tcx_file, encoding="utf-8", mode="w") as f: - f.write(activity_tcx) - - if 'fit' in formats: - activity_fit = client.get_activity_fit(activity_id) - fit_file = path_prefix + ".fit" - if activity_fit: - with open(fit_file, mode="wb") as f: - f.write(activity_fit) -