incremental backup skips activity exports that have already been tried

This commit is contained in:
petergardfjall 2015-03-04 20:55:55 +01:00
parent 012c029c83
commit 38ac9ca0f2
4 changed files with 242 additions and 120 deletions

View File

@ -8,7 +8,8 @@ stored in the backup directory will be downloaded.
import argparse
import getpass
from garminexport.garminclient import GarminClient
import garminexport.util
import garminexport.backup
from garminexport.backup import export_formats
import logging
import os
import re
@ -27,21 +28,6 @@ LOG_LEVELS = {
}
"""Command-line (string-based) log-level mapping to logging module levels."""
def get_backed_up(activities, backup_dir, formats):
"""Return all activity (id, ts) pairs that have been backed up in the
given backup directory.
:rtype: list of int
"""
# backed up activities follow this pattern: <ISO8601>_<id>_<suffix>
format_suffix = dict(json_summary="_summary.json", json_details="_details.json", gpx=".gpx", tcx=".tcx", fit=".fit")
backed_up = set()
dir_entries = os.listdir(backup_dir)
for id, start in activities:
if all( "{}_{}{}".format(start.isoformat(), id, format_suffix[f]) in dir_entries for f in formats):
backed_up.add((id, start))
return backed_up
if __name__ == "__main__":
@ -66,9 +52,9 @@ if __name__ == "__main__":
help=("Desired log output level (DEBUG, INFO, WARNING, ERROR). "
"Default: INFO."), default="INFO")
parser.add_argument(
"-f", "--format", choices=garminexport.util.export_formats,
"-f", "--format", choices=export_formats,
default=None, action='append',
help=("Desired output formats ("+', '.join(garminexport.util.export_formats)+"). "
help=("Desired output formats ("+', '.join(export_formats)+"). "
"Default: ALL."))
parser.add_argument(
"-E", "--ignore-errors", action='store_true',
@ -76,9 +62,14 @@ if __name__ == "__main__":
args = parser.parse_args()
if not args.log_level in LOG_LEVELS:
raise ValueError("Illegal log-level argument: {}".format(args.log_level))
raise ValueError("Illegal log-level: {}".format(args.log_level))
# if no --format was specified, all formats are to be backed up
args.format = args.format if args.format else export_formats
log.info("backing up formats: %s", ", ".join(args.format))
logging.root.setLevel(LOG_LEVELS[args.log_level])
try:
if not os.path.isdir(args.backup_dir):
os.makedirs(args.backup_dir)
@ -89,25 +80,26 @@ if __name__ == "__main__":
with GarminClient(args.username, args.password) as client:
# get all activity ids and timestamps from Garmin account
log.info("retrieving activities for {} ...".format(args.username))
all_activities = set(client.list_activities())
activities = set(client.list_activities())
log.info("account has a total of {} activities.".format(
len(all_activities)))
len(activities)))
# get already backed up activities (stored in backup-dir)
backed_up_activities = get_backed_up(all_activities, args.backup_dir, args.format)
missing_activities = garminexport.backup.need_backup(
activities, args.backup_dir, args.format)
backed_up = activities - missing_activities
log.info("{} contains {} backed up activities.".format(
args.backup_dir, len(backed_up_activities)))
args.backup_dir, len(backed_up)))
missing_activities = all_activities - backed_up_activities
log.info("activities that haven't been backed up: {}".format(
log.info("activities that aren't backed up: {}".format(
len(missing_activities)))
for index, (id, start) in enumerate(missing_activities):
log.info("backing up activity {} from {} ({} out of {}) ...".format(
id, start, index+1, len(missing_activities)))
for index, activity in enumerate(missing_activities):
id, start = activity
log.info("backing up activity %d from %s (%d out of %d) ..." %
(id, start, index+1, len(missing_activities)))
try:
garminexport.util.export_activity(
client, id, args.backup_dir, args.format)
garminexport.backup.download(
client, activity, args.backup_dir, args.format)
except Exception as e:
log.error(u"failed with exception: %s", e)
if not args.ignore_errors:

164
garminexport/backup.py Normal file
View File

@ -0,0 +1,164 @@
"""Module with methods useful when backing up activities.
"""
import codecs
import json
from datetime import datetime
import dateutil.parser
import logging
import os
log = logging.getLogger(__name__)
export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"]
"""The range of supported export formats for activities."""
format_suffix = {
"json_summary": "_summary.json",
"json_details": "_details.json",
"gpx": ".gpx",
"tcx": ".tcx",
"fit": ".fit"
}
"""A table that maps export formats to their file format extensions."""
not_found_file = ".not_found"
"""A file that lists all tried but failed export attempts. The lines in
the file are the would-have-been file names, had the exports been successful.
An entry in the ``.not_found`` file is a strong indication of an
activity-format that simply doesn't exist and therefore should not be retried
on the next backup run. One such scenario is for manually created activities,
which cannot be exported to ``.fit`` format."""
def export_filename(activity, export_format):
"""Returns a destination file name to use for a given activity that is
to be exported to a given format. Exported files follow this pattern:
``<timestamp>_<activity_id>_<suffix>``.
For example: ``2015-02-17T05:45:00+00:00_123456789.tcx``
:param activity: An activity tuple `(id, starttime)`
:type activity: tuple of `(int, datetime)`
:param export_format: The export format (see :attr:`export_formats`)
:type export_format: str
:return: The file name to use for the exported activity.
:rtype: str
"""
return "{time}_{id}{suffix}".format(
id=activity[0],
time=activity[1].isoformat(),
suffix=format_suffix[export_format])
def need_backup(activities, backup_dir, export_formats=None):
"""From a given set of activities, return all activities that haven't been
backed up in a given set of export formats.
Activities are considered already backed up if they, for each desired
export format, have an activity file under the ``backup_dir`` *or*
if the activity file is listed in the ``.not_found`` file in the backup
directory.
:param activities: A list of activity tuples `(id, starttime)`
:type activities: list of tuples of `(int, datetime)`
:param backup_dir: Destination directory for exported activities.
:type backup_dir: str
:return: All activities that need to be backed up.
:rtype: set of tuples of `(int, datetime)`
"""
need_backup = set()
backed_up = os.listdir(backup_dir) + _not_found_activities(backup_dir)
# get all activities missing at least one export format
for activity in activities:
activity_files = [export_filename(activity, f) for f in export_formats]
if any(f not in backed_up for f in activity_files):
need_backup.add(activity)
return need_backup
def _not_found_activities(backup_dir):
# consider all entries in <backup_dir>/.not_found as backed up
# (or rather, as tried but failed back ups)
failed_activities = []
_not_found = os.path.join(backup_dir, not_found_file)
if os.path.isfile(_not_found):
with open(_not_found, mode="r") as f:
failed_activities = [line.strip() for line in f.readlines()]
log.debug("%d tried but failed activities in %s",
len(failed_activities), _not_found)
return failed_activities
def download(client, activity, backup_dir, export_formats=None):
"""Exports a Garmin Connect activity to a given set of formats
and saves the resulting file(s) to a given backup directory.
In case a given format cannot be exported for the activity, the
file name will be appended to the :attr:`not_found_file` in the
backup directory (to prevent it from being retried on subsequent
backup runs).
:param client: A :class:`garminexport.garminclient.GarminClient`
instance that is assumed to be connected.
:type client: :class:`garminexport.garminclient.GarminClient`
:param activity: An activity tuple `(id, starttime)`
:type activity: tuple of `(int, datetime)`
:param backup_dir: Backup directory path (assumed to exist already).
:type backup_dir: str
:keyword export_formats: Which format(s) to export to. Could be any
of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'.
:type export_formats: list of str
"""
id = activity[0]
if 'json_summary' in export_formats:
activity_summary = client.get_activity_summary(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'json_summary'))
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_summary, ensure_ascii=False, indent=4))
if 'json_details' in export_formats:
activity_details = client.get_activity_details(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'json_details'))
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_details, ensure_ascii=False, indent=4))
not_found_path = os.path.join(backup_dir, not_found_file)
with open(not_found_path, mode="a") as not_found:
if 'gpx' in export_formats:
activity_gpx = client.get_activity_gpx(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'gpx'))
if activity_gpx is None:
not_found.write(os.path.basename(dest) + "\n")
else:
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(activity_gpx)
if 'tcx' in export_formats:
activity_tcx = client.get_activity_tcx(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'tcx'))
if activity_tcx is None:
not_found.write(os.path.basename(dest) + "\n")
else:
with codecs.open(dest, encoding="utf-8", mode="w") as f:
f.write(activity_tcx)
if 'fit' in export_formats:
activity_fit = client.get_activity_fit(id)
dest = os.path.join(
backup_dir, export_filename(activity, 'fit'))
if activity_fit is None:
not_found.write(os.path.basename(dest) + "\n")
else:
with open(dest, mode="wb") as f:
f.write(activity_fit)

View File

@ -5,6 +5,7 @@ parts of the Garmin Connect REST API.
import json
import logging
import os
import re
import requests
from StringIO import StringIO
@ -241,11 +242,14 @@ class GarminClient(object):
@require_session
def get_activity_gpx(self, activity_id):
"""Return a GPX (GPS Exchange Format) representation of a
given activity.
given activity. If the activity cannot be exported to GPX
(not yet observed in practice, but that doesn't exclude the
possibility), a :obj:`None` value is returned.
:param activity_id: Activity identifier.
:type activity_id: int
:returns: The GPX representation of the activity as an XML string.
:returns: The GPX representation of the activity as an XML string
or ``None`` if the activity couldn't be exported to GPX.
:rtype: str
"""
response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.3/gpx/course/{}".format(activity_id))
@ -253,6 +257,8 @@ class GarminClient(object):
# and is the one used when exporting through the Garmin
# Connect web page.
#response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/gpx/activity/{}?full=true".format(activity_id))
if response.status_code == 404:
return None
if response.status_code != 200:
raise Exception(u"failed to fetch GPX for activity {}: {}\n{}".format(
activity_id, response.status_code, response.text))
@ -261,20 +267,58 @@ class GarminClient(object):
def get_activity_tcx(self, activity_id):
"""Return a TCX (Training Center XML) representation of a
given activity.
given activity. If the activity doesn't have a TCX source (for
example, if it was originally uploaded in GPX format, Garmin
won't try to synthesize a TCX file) a :obj:`None` value is
returned.
:param activity_id: Activity identifier.
:type activity_id: int
:returns: The TCX representation of the activity as an XML string.
:returns: The TCX representation of the activity as an XML string
or ``None`` if the activity cannot be exported to TCX.
:rtype: str
"""
response = self.session.get("https://connect.garmin.com/proxy/activity-service-1.1/tcx/activity/{}?full=true".format(activity_id))
if response.status_code == 404:
return None
if response.status_code != 200:
raise Exception(u"failed to fetch TCX for activity {}: {}\n{}".format(
activity_id, response.status_code, response.text))
return response.text
def get_original_activity(self, activity_id):
"""Return the original file that was uploaded for an activity.
If the activity doesn't have any file source (for example,
if it was entered manually rather than imported from a Garmin
device) then :obj:`(None,None)` is returned.
:param activity_id: Activity identifier.
:type activity_id: int
:returns: A tuple of the file type (e.g. 'fit', 'tcx', 'gpx') and
its contents, or :obj:`(None,None)` if no file is found.
:rtype: (str, str)
"""
response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id))
if response.status_code == 404:
# Manually entered activity, no file source available
return (None,None)
if response.status_code != 200:
raise Exception(
u"failed to get original activity file {}: {}\n{}".format(
activity_id, response.status_code, response.text))
# return the first entry from the zip archive where the filename is
# activity_id (should be the only entry!)
zip = zipfile.ZipFile(StringIO(response.content), mode="r")
for path in zip.namelist():
fn, ext = os.path.splitext(path)
if fn==str(activity_id):
return ext[1:], zip.open(path).read()
return (None,None)
def get_activity_fit(self, activity_id):
"""Return a FIT representation for a given activity. If the activity
doesn't have a FIT source (for example, if it was entered manually
@ -287,16 +331,9 @@ class GarminClient(object):
if no FIT source exists for this activity (e.g., entered manually).
:rtype: str
"""
response = self.session.get("https://connect.garmin.com/proxy/download-service/files/activity/{}".format(activity_id))
if response.status_code == 404:
# No FIT source available for activity
return None
if response.status_code != 200:
raise Exception(u"failed to fetch FIT for activity {}: {}\n{}".format(
activity_id, response.status_code, response.text))
# fit file returned from server is in a zip archive
zipped_fit_file = response.content
zip = zipfile.ZipFile(StringIO(zipped_fit_file), mode="r")
# return the "<activity-activity_id>.fit" entry from the zip archive
return zip.open(str(activity_id) + ".fit").read()
fmt, orig_file = self.get_original_activity(activity_id)
# if the file extension of the original activity file isn't 'fit',
# this activity was uploaded in a different format (e.g. gpx/tcx)
# and cannot be exported to fit
return orig_file if fmt=='fit' else None

View File

@ -1,71 +0,0 @@
#! /usr/bin/env python
"""A module with utility functions."""
import codecs
import json
from datetime import datetime
import dateutil.parser
import os
export_formats=["json_summary", "json_details", "gpx", "tcx", "fit"]
def export_activity(client, activity_id, destination,
formats=None):
"""Exports a Garmin Connect activity to a given set of formats
and saves the result file(es) to a given destination directory.
:param client: A :class:`garminexport.garminclient.GarminClient`
instance that is assumed to be connected.
:type client: :class:`garminexport.garminclient.GarminClient`
:param activity_id: Activity identifier.
:type activity_id: int
:param destination: Destination directory (assumed to exist already).
:type destination: str
:keyword formats: Which format(s) to export to. Could be any
of: 'json_summary', 'json_details', 'gpx', 'tcx', 'fit'.
If set to :obj:`None` all formats will be exported.
:type formats: list of str
"""
if formats is None:
formats = export_formats
activity_summary = client.get_activity_summary(activity_id)
# prefix saved activity files with timestamp and activity id
start = activity_summary["activity"]["activitySummary"]["BeginTimestamp"]["value"]
timestamp = dateutil.parser.parse(start)
filename_prefix = "{}_{}".format(timestamp.isoformat(), activity_id)
path_prefix = os.path.join(destination, filename_prefix)
if 'json_summary' in formats:
summary_file = path_prefix + "_summary.json"
with codecs.open(summary_file, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_summary, ensure_ascii=False, indent=4))
if 'json_details' in formats:
activity_details = client.get_activity_details(activity_id)
details_file = path_prefix + "_details.json"
with codecs.open(details_file, encoding="utf-8", mode="w") as f:
f.write(json.dumps(
activity_details, ensure_ascii=False, indent=4))
if 'gpx' in formats:
activity_gpx = client.get_activity_gpx(activity_id)
gpx_file = path_prefix + ".gpx"
with codecs.open(gpx_file, encoding="utf-8", mode="w") as f:
f.write(activity_gpx)
if 'tcx' in formats:
activity_tcx = client.get_activity_tcx(activity_id)
tcx_file = path_prefix + ".tcx"
with codecs.open(tcx_file, encoding="utf-8", mode="w") as f:
f.write(activity_tcx)
if 'fit' in formats:
activity_fit = client.get_activity_fit(activity_id)
fit_file = path_prefix + ".fit"
if activity_fit:
with open(fit_file, mode="wb") as f:
f.write(activity_fit)