incremental backup skips activity exports that have already been tried

This commit is contained in:
petergardfjall 2015-03-04 20:55:55 +01:00
parent 012c029c83
commit 38ac9ca0f2
4 changed files with 242 additions and 120 deletions

View File

@ -8,7 +8,8 @@ stored in the backup directory will be downloaded.
import argparse import argparse
import getpass import getpass
from garminexport.garminclient import GarminClient from garminexport.garminclient import GarminClient
import garminexport.util import garminexport.backup
from garminexport.backup import export_formats
import logging import logging
import os import os
import re import re
@ -27,21 +28,6 @@ LOG_LEVELS = {
} }
"""Command-line (string-based) log-level mapping to logging module levels.""" """Command-line (string-based) log-level mapping to logging module levels."""
def get_backed_up(activities, backup_dir, formats):
"""Return all activity (id, ts) pairs that have been backed up in the
given backup directory.
:rtype: list of int
"""
# backed up activities follow this pattern: <ISO8601>_<id>_<suffix>
format_suffix = dict(json_summary="_summary.json", json_details="_details.json", gpx=".gpx", tcx=".tcx", fit=".fit")
backed_up = set()
dir_entries = os.listdir(backup_dir)
for id, start in activities:
if all( "{}_{}{}".format(start.isoformat(), id, format_suffix[f]) in dir_entries for f in formats):
backed_up.add((id, start))
return backed_up
if __name__ == "__main__": if __name__ == "__main__":
@ -66,9 +52,9 @@ if __name__ == "__main__":
help=("Desired log output level (DEBUG, INFO, WARNING, ERROR). " help=("Desired log output level (DEBUG, INFO, WARNING, ERROR). "
"Default: INFO."), default="INFO") "Default: INFO."), default="INFO")
parser.add_argument( parser.add_argument(
"-f", "--format", choices=garminexport.util.export_formats, "-f", "--format", choices=export_formats,
default=None, action='append', default=None, action='append',
help=("Desired output formats ("+', '.join(garminexport.util.export_formats)+"). " help=("Desired output formats ("+', '.join(export_formats)+"). "
"Default: ALL.")) "Default: ALL."))
parser.add_argument( parser.add_argument(
"-E", "--ignore-errors", action='store_true', "-E", "--ignore-errors", action='store_true',
@ -76,7 +62,12 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
if not args.log_level in LOG_LEVELS: if not args.log_level in LOG_LEVELS:
raise ValueError("Illegal log-level argument: {}".format(args.log_level)) raise ValueError("Illegal log-level: {}".format(args.log_level))
# if no --format was specified, all formats are to be backed up
args.format = args.format if args.format else export_formats
log.info("backing up formats: %s", ", ".join(args.format))
logging.root.setLevel(LOG_LEVELS[args.log_level]) logging.root.setLevel(LOG_LEVELS[args.log_level])
try: try:
@ -89,25 +80,26 @@ if __name__ == "__main__":
with GarminClient(args.username, args.password) as client: with GarminClient(args.username, args.password) as client:
# get all activity ids and timestamps from Garmin account # get all activity ids and timestamps from Garmin account
log.info("retrieving activities for {} ...".format(args.username)) log.info("retrieving activities for {} ...".format(args.username))
all_activities = set(client.list_activities()) activities = set(client.list_activities())
log.info("account has a total of {} activities.".format( log.info("account has a total of {} activities.".format(
len(all_activities))) len(activities)))
# get already backed up activities (stored in backup-dir) missing_activities = garminexport.backup.need_backup(
backed_up_activities = get_backed_up(all_activities, args.backup_dir, args.format) activities, args.backup_dir, args.format)
backed_up = activities - missing_activities
log.info("{} contains {} backed up activities.".format( log.info("{} contains {} backed up activities.".format(
args.backup_dir, len(backed_up_activities))) args.backup_dir, len(backed_up)))
missing_activities = all_activities - backed_up_activities log.info("activities that aren't backed up: {}".format(
log.info("activities that haven't been backed up: {}".format(
len(missing_activities))) len(missing_activities)))
for index, (id, start) in enumerate(missing_activities): for index, activity in enumerate(missing_activities):
log.info("backing up activity {} from {} ({} out of {}) ...".format( id, start = activity
id, start, index+1, len(missing_activities))) log.info("backing up activity %d from %s (%d out of %d) ..." %
(id, start, index+1, len(missing_activities)))
try: try:
garminexport.util.export_activity( garminexport.backup.download(
client, id, args.backup_dir, args.format) client, activity, args.backup_dir, args.format)
except Exception as e: except Exception as e:
log.error(u"failed with exception: %s", e) log.error(u"failed with exception: %s", e)
if not args.ignore_errors: if not args.ignore_errors:

164
garminexport/backup.py Normal file
View File

@ -0,0 +1,164 @@
"""Module with methods useful when backing up activities.
"""
import codecs
import json
from datetime import datetime
import dateutil.parser
import logging
import os
log = logging.getLogger(__name__)
export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"]
"""The range of supported export formats for activities."""
format_suffix = {
"json_summary": "_summary.json",
"json_details": "_details.json",
"gpx": ".gpx",
"tcx": ".tcx",
"fit": ".fit"
}
"""A table that maps export formats to their file format extensions."""
not_found_file = ".not_found"
"""A file that lists all tried but failed export attempts. The lines in
the file are the would-have-been file names, had the exports been successful.
An entry in the ``.not_found`` file is a strong indication of an
activity-format that simply doesn't exist and therefore should not be retried
on the next backup run. One such scenario is for manually created activities,
which cannot be exported to ``.fit`` format."""
def export_filename(activity, export_format):
"""Returns a destination file name to use for a given activity that is
to be exported to a given format. Exported files follow this pattern:
``<timestamp>_<activity_id>_<suffix>``.
For example: ``2015-02-17T05:45:00+00:00_123456789.tcx``
:param activity: An activity tuple `(id, starttime)`
:type activity: tuple of `(int, datetime)`
:param export_format: The export format (see :attr:`export_formats`)
:type export_format: str
:return: The file name to use for the exported activity.
:rtype: str
"""
return "{time}_{id}{suffix}".format(
id=activity[0],
time=activity[1].isoformat(),
suffix=format_suffix[export_format])
def need_backup(activities, backup_dir, export_formats=None):
"""From a given set of activities, return all activities that haven't been
backed up in a given set of export formats.
Activities are considered already backed up if they, for each desired
export format, have an activity file under the ``backup_dir`` *or*
if the activity file is listed in the ``.not_found`` file in the backup
directory.
:param activities: A list of activity tuples `(id, starttime)`
:type activities: list of tuples of `(int, datetime)`
:param backup_dir: Destination directory for exported activities.
:type backup_dir: str
:return: All activities that need to be backed up.
:rtype: set of tuples of `(int, datetime)`
"""
need_backup = set()
backed_up = os.listdir(backup_dir) + _not_found_activities(backup_dir)
# get all activities missing at least one export format
for activity in activities:
activity_files = [export_filename(activity, f) for f in export_formats]
if any(f not in backed_up for f in activity_files):
need_backup.add(activity)
return need_backup
def _not_found_activities(backup_dir):
# consider all entries in <backup_dir>/.not_found as backed up
# (or rather, as tried but failed back ups)
failed_activities = []
_not_found = os.path.join(backup_dir, not_found_file)
if os.path.isfile(_not_found):
with open(_not_found, mode="r") as f:
failed_activities = [line.strip() for line in f.readlines()]
log.debug("%d tried but failed activities in %s",
len(failed_activities), _not_found)
return failed_activities
def download(client, activity, backup_dir, export_formats=None):
"""Exports a Garmin Connect activity to a given set of formats
and saves the resulting file(s) to a given backup directory.
In case a given format cannot be exported for the activity, the
file name will be appended to the :attr:`not_found_file` in the
backup directory (to prevent it from being retried on subsequent
backup runs).
:param client: A :class:`garminexport.garminclient.GarminClient`
instance that is assumed to be connected.
:type client: :class:`garminexport.garminclient.GarminClient`
:param activity: An activity tuple `(id, starttime)`
:type activity: tuple of `(int, datetime)`
:param backup_dir: Backup directory path (assumed to exist already).
:type backup_dir: str
:keyword export_formats: Which format(s) to export to. Could be any
of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'.
:type export_formats: list of str
"""
id = activity[0]
if 'json_summary' in export_formats:
activity_summary = client.get_activity_summary(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'json_summary'))
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_summary, ensure_ascii=False, indent=4))
if 'json_details' in export_formats:
activity_details = client.get_activity_details(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'json_details'))
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_details, ensure_ascii=False, indent=4))
not_found_path = os.path.join(backup_dir, not_found_file)
with open(not_found_path, mode="a") as not_found:
if 'gpx' in export_formats:
activity_gpx = client.get_activity_gpx(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'gpx'))
if activity_gpx is None:
not_found.write(os.path.basename(dest) + "\n")
else:
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(activity_gpx)
if 'tcx' in export_formats:
activity_tcx = client.get_activity_tcx(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'tcx'))
if activity_tcx is None:
not_found.write(os.path.basename(dest) + "\n")
else:
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(activity_tcx)
if 'fit' in export_formats:
activity_fit = client.get_activity_fit(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'fit'))
if activity_fit is None:
not_found.write(os.path.basename(dest) + "\n")
else:
with open(dest, mode="wb") as f:
f.write(activity_fit)

View File

@ -5,6 +5,7 @@ parts of the Garmin Connect REST API.
import json import json
import logging import logging
import os
import re import re
import requests import requests
from StringIO import StringIO from StringIO import StringIO
@ -241,11 +242,14 @@ class GarminClient(object):
@require_session @require_session
def get_activity_gpx(self, activity_id): def get_activity_gpx(self, activity_id):
"""Return a GPX (GPS Exchange Format) representation of a """Return a GPX (GPS Exchange Format) representation of a
given activity. given activity. If the activity cannot be exported to GPX
(not yet observed in practice, but that doesn't exclude the
possibility), a :obj:`None` value is returned.
:param activity_id: Activity identifier. :param activity_id: Activity identifier.
:type activity_id: int :type activity_id: int
:returns: The GPX representation of the activity as an XML string. :returns: The GPX representation of the activity as an XML string
or ``None`` if the activity couldn't be exported to GPX.
:rtype: str :rtype: str
""" """
response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.3/gpx/course/{}".format(activity_id)) response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.3/gpx/course/{}".format(activity_id))
@ -253,6 +257,8 @@ class GarminClient(object):
# and is the one used when exporting through the Garmin # and is the one used when exporting through the Garmin
# Connect web page. # Connect web page.
#response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/gpx/activity/{}?full=true".format(activity_id)) #response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/gpx/activity/{}?full=true".format(activity_id))
if response.status_code == 404:
return None
if response.status_code != 200: if response.status_code != 200:
raise Exception(u"failed to fetch GPX for activity {}: {}\n{}".format( raise Exception(u"failed to fetch GPX for activity {}: {}\n{}".format(
activity_id, response.status_code, response.text)) activity_id, response.status_code, response.text))
@ -261,20 +267,58 @@ class GarminClient(object):
def get_activity_tcx(self, activity_id): def get_activity_tcx(self, activity_id):
"""Return a TCX (Training Center XML) representation of a """Return a TCX (Training Center XML) representation of a
given activity. given activity. If the activity doesn't have a TCX source (for
example, if it was originally uploaded in GPX format, Garmin
won't try to synthesize a TCX file) a :obj:`None` value is
returned.
:param activity_id: Activity identifier. :param activity_id: Activity identifier.
:type activity_id: int :type activity_id: int
:returns: The TCX representation of the activity as an XML string. :returns: The TCX representation of the activity as an XML string
or ``None`` if the activity cannot be exported to TCX.
:rtype: str :rtype: str
""" """
response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/tcx/activity/{}?full=true".format(activity_id)) response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/tcx/activity/{}?full=true".format(activity_id))
if response.status_code == 404:
return None
if response.status_code != 200: if response.status_code != 200:
raise Exception(u"failed to fetch TCX for activity {}: {}\n{}".format( raise Exception(u"failed to fetch TCX for activity {}: {}\n{}".format(
activity_id, response.status_code, response.text)) activity_id, response.status_code, response.text))
return response.text return response.text
def get_original_activity(self, activity_id):
"""Return the original file that was uploaded for an activity.
If the activity doesn't have any file source (for example,
if it was entered manually rather than imported from a Garmin
device) then :obj:`(None,None)` is returned.
:param activity_id: Activity identifier.
:type activity_id: int
:returns: A tuple of the file type (e.g. 'fit', 'tcx', 'gpx') and
its contents, or :obj:`(None,None)` if no file is found.
:rtype: (str, str)
"""
response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id))
if response.status_code == 404:
# Manually entered activity, no file source available
return (None,None)
if response.status_code != 200:
raise Exception(
u"failed to get original activity file {}: {}\n{}".format(
activity_id, response.status_code, response.text))
# return the first entry from the zip archive where the filename is
# activity_id (should be the only entry!)
zip = zipfile.ZipFile(StringIO(response.content), mode="r")
for path in zip.namelist():
fn, ext = os.path.splitext(path)
if fn==str(activity_id):
return ext[1:], zip.open(path).read()
return (None,None)
def get_activity_fit(self, activity_id): def get_activity_fit(self, activity_id):
"""Return a FIT representation for a given activity. If the activity """Return a FIT representation for a given activity. If the activity
doesn't have a FIT source (for example, if it was entered manually doesn't have a FIT source (for example, if it was entered manually
@ -287,16 +331,9 @@ class GarminClient(object):
if no FIT source exists for this activity (e.g., entered manually). if no FIT source exists for this activity (e.g., entered manually).
:rtype: str :rtype: str
""" """
fmt, orig_file = self.get_original_activity(activity_id)
# if the file extension of the original activity file isn't 'fit',
# this activity was uploaded in a different format (e.g. gpx/tcx)
# and cannot be exported to fit
return orig_file if fmt=='fit' else None
response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id))
if response.status_code == 404:
# No FIT source available for activity
return None
if response.status_code != 200:
raise Exception(u"failed to fetch FIT for activity {}: {}\n{}".format(
activity_id, response.status_code, response.text))
# fit file returned from server is in a zip archive
zipped_fit_file = response.content
zip = zipfile.ZipFile(StringIO(zipped_fit_file), mode="r")
# return the "<activity-activity_id>.fit" entry from the zip archive
return zip.open(str(activity_id) + ".fit").read()

View File

@ -1,71 +0,0 @@
#! /usr/bin/env python
"""A module with utility functions."""
import codecs
import json
from datetime import datetime
import dateutil.parser
import os
export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"]
def export_activity(client, activity_id, destination,
formats=None):
"""Exports a Garmin Connect activity to a given set of formats
and saves the result file(es) to a given destination directory.
:param client: A :class:`garminexport.garminclient.GarminClient`
instance that is assumed to be connected.
:type client: :class:`garminexport.garminclient.GarminClient`
:param activity_id: Activity identifier.
:type activity_id: int
:param destination: Destination directory (assumed to exist already).
:type destination: str
:keyword formats: Which format(s) to export to. Could be any
of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'.
If set to :obj:`None` all formats will be exported.
:type formats: list of str
"""
if formats is None:
formats = export_formats
activity_summary = client.get_activity_summary(activity_id)
# prefix saved activity files with timestamp and activity id
start = activity_summary["activity"]["activitySummary"]["BeginTimestamp"]["value"]
timestamp = dateutil.parser.parse(start)
filename_prefix = "{}_{}".format(timestamp.isoformat(), activity_id)
path_prefix = os.path.join(destination, filename_prefix)
if 'json_summary' in formats:
summary_file = path_prefix + "_summary.json"
with codecs.open(summary_file, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_summary, ensure_ascii=False, indent=4))
if 'json_details' in formats:
activity_details = client.get_activity_details(activity_id)
details_file = path_prefix + "_details.json"
with codecs.open(details_file, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_details, ensure_ascii=False, indent=4))
if 'gpx' in formats:
activity_gpx = client.get_activity_gpx(activity_id)
gpx_file = path_prefix + ".gpx"
with codecs.open(gpx_file, encoding="utf-8", mode="w") as f:
f.write(activity_gpx)
if 'tcx' in formats:
activity_tcx = client.get_activity_tcx(activity_id)
tcx_file = path_prefix + ".tcx"
with codecs.open(tcx_file, encoding="utf-8", mode="w") as f:
f.write(activity_tcx)
if 'fit' in formats:
activity_fit = client.get_activity_fit(activity_id)
fit_file = path_prefix + ".fit"
if activity_fit:
with open(fit_file, mode="wb") as f:
f.write(activity_fit)