For backups I have a dedicated hard drive on my server, I have a number of jobs that pull data from my various machines using rsync (via ssh) and store it on that drive. based on information from http://www.mikerubel.org/computers/rsync_snapshots/ I use hard links to minimise the diskspace used allowing me to have several months work of backups.
The code is available in a git repo at http://git.trollgod.org.uk/?p=gkbackup.git;a=summary.
#!/usr/bin/env python
"""Backup program using rsync and hardlinks.
Copyright (c) 2005-2009 Ghworg
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
"""
__version__ = 1
__revision__ = 3
buildno = 152
verString = "gkbackup %d.%d.%d" % (__version__, __revision__, buildno)
# Delay importing os and shutil till after options parsing
import calendar, datetime, logging, optparse, re, sys, time
try:
from iniparse.compat import ConfigParser
except ImportError:
from ConfigParser import ConfigParser
try:
import posix1e
aclSupport = True
except ImportError:
aclSupport = False
try:
import xattr
xattrSupport = True
except ImportError:
xattrSupport = False
class CustomFormatter(logging.Formatter):
"""Custom log message formatter to add Job name to every log message"""
def __init__(self, format, timeFormat):
logging.Formatter.__init__(self, format, timeFormat)
def format(self, record):
"""
"""
try:
if hasattr(options, ‘jobname’):
record.jobname = options.jobname
except NameError:
record.jobname = ”
return logging.Formatter.format(self, record)
_logProgressCache = {}
def logProgress(output, level=logging.INFO):
"""Print data captured from external command to logs"""
cache = _logProgressCache.get(level, ”)
for line in output.splitlines(True):
cache += line.strip(‘\r\n‘)
if ‘\n‘ in line and len(cache) > 0:
logging.getLogger().log(level, cache)
cache = ”
_logProgressCache[level] = cache
def pingrsync(name, source):
"""Look for client machine using rsync"""
logging.debug(‘Looking for %s’ % name)
try:
for connectType in [‘::’, ‘:’]:
if connectType in source:
lines = getCommandOutput(‘rsync %s’ % (source))
number = len(lines.splitlines())
logging.info(‘%s found (responded to rsync request)’ % name)
if number > 1:
return(True)
else:
logging.info(‘Nothing to backup (empty source dir)’)
except RuntimeError, error:
logProgress(error.args[0], logging.DEBUG)
return(False)
def mount(mountpoint):
"""Mount the drive at mountpoint. Drive must be in fstab."""
if options.custommount:
mountCmd = options.custommount
else:
mountCmd = ‘mount’
if os.getuid() == 0 or options.custommount:
logging.debug(‘Mounting at %s’ % mountpoint)
try:
getCommandOutput(‘%s %s’ % (mountCmd, mountpoint))
logging.info(‘Mounted drive at %s’ % mountpoint)
return True
except RuntimeError, error:
logProgress(error.args[0], logging.DEBUG)
return(False)
def umount(mountpoint):
"""Unmount the drive at mountpoint. Drive must be in fstab."""
if options.customumount:
umountCmd = options.customumount
else:
umountCmd = ‘umount’
if os.getuid() == 0 or options.customumount:
logging.debug(‘Unmounting %s’ % mountpoint)
try:
getCommandOutput(‘%s %s’ % (umountCmd, mountpoint))
logging.info(‘Unmounted drive at %s’ % mountpoint)
return True
except RuntimeError, error:
logProgress(error.args[0], logging.DEBUG)
return(False)
def sshagent(source):
"""Run ssh-add if needed and requested"""
if (‘:’ in source) and (‘::’ not in source) and (options.sshkey):
logging.debug(‘Adding backup ssh key to ssh-agent’)
sshadd = getCommandOutput(’ssh-add %s’ % options.sshkey)
logProgress(sshadd, logging.DEBUG)
def getDateDiffs(oldDate, newDate):
"""Return the difference in years, months and days between two dates"""
monthlengths = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]
if calendar.isleap(oldDate.year):
monthlengths[1] += 1
yeardiff = newDate.year - oldDate.year
monthdiff = newDate.month – oldDate.month
daydiff = newDate.day – oldDate.day
if daydiff < 0:
monthdiff -= 1
daydiff += monthlengths[oldDate.month-1]
if monthdiff < 0:
yeardiff -= 1
monthdiff += 12
return yeardiff, monthdiff, daydiff
def genNewname(adir):
"""Generate a new name based on the age of the dir.
Bias the age by an hour to make sure yesterday’s backup gets
moved along even if the job runs a few minutes before the
previous days.
"""
oldDate = datetime.datetime.fromtimestamp(adir[0])
newDate = datetime.datetime.fromtimestamp(now + 3600)
yeardiff, monthdiff, daydiff = getDateDiffs(oldDate, newDate)
if yeardiff > 0:
return ‘yearly.%d’ % yeardiff
elif monthdiff > 0:
return ‘monthly.%d’ % monthdiff
elif daydiff >= 7:
return ‘weekly.%d’ % int(daydiff / 7)
else:
return ‘daily.%d’ % daydiff
def filterdir(dirname):
"""Filter out all files/dirs that aren’t backups"""
if (re.match(‘(daily|weekly|monthly|yearly)\.[0-9]+$’, dirname)):
return True
return False
def addTimestamp(entry):
"""Get a timestamp for the dir and return a tuple"""
timestamp = os.path.getmtime( os.path.join(options.destdir, entry) )
return timestamp, entry
def listOldBackups():
"""Return a list of previous backups, sorted by age"""
dirlist = filter(filterdir, os.listdir(options.destdir))
entrylist = map(addTimestamp, dirlist)
entrylist.sort()
return entrylist
def renameByAge():
"""Rename dirs by pattern $prefix.$age"""
logging.debug(‘Renaming old backups’)
for adir in listOldBackups():
oldname = adir[1]
oldpath = os.path.join(options.destdir, oldname)
ageInDays = (now – adir[0]) / 86400
if ageInDays > options.maxage:
logging.info(‘Deleting %s (%d days old)’ % (oldname, ageInDays))
shutil.rmtree(oldpath)
continue
newname = genNewname(adir)
if oldname == newname:
logging.info(‘Skipping %s (%d days old)’ % (oldname, ageInDays))
continue
logging.info(‘Moving %s to %s’ % (oldname, newname))
newpath = os.path.join(options.destdir, newname)
if os.path.exists(newpath):
logging.info(‘%s already exists, deleting %s’ % (newname, oldname))
shutil.rmtree(oldpath)
else:
os.rename(oldpath, newpath)
def findnewest():
"""Find most recent backup to use as src"""
entrylist = listOldBackups()
if len(entrylist) > 0:
return entrylist[len(entrylist)-1][1]
else:
return None
def getSrcAndDstPaths(lsrc, ldst, root, name):
"""Generate full src and dst paths from walk components"""
sourcepath = os.path.join(root, name)
destpath = os.path.join(ldst, sourcepath[len(lsrc)+1:])
return sourcepath, destpath
def clonedir(srcdir, dstdir):
"""Copy the owner, perms and timestamps of a dir"""
sdat = os.stat(srcdir)
os.mkdir(dstdir)
os.chown(dstdir, sdat.st_uid, sdat.st_gid)
if aclSupport:
acl1 = posix1e.ACL(file=srcdir)
acl1.applyto(dstdir)
else:
os.chmod(dstdir, sdat.st_mode)
if xattrSupport:
for attrName in xattr.listxattr(srcdir):
attrVal = xattr.getxattr(srcdir, attrName)
xattr.setxattr(dstdir, attrName, attrVal)
os.utime(dstdir, (sdat.st_atime, sdat.st_mtime) )
def link(parentDir):
"""Create hardlinks from the newest dir to daily.0"""
newestDir = findnewest()
linkdst = os.path.join(parentDir, options.latestname)
if os.path.exists(linkdst) or newestDir==None:
logging.warning(‘Nothing to link’)
return
lsrc = os.path.join(parentDir, newestDir)
logging.info(‘Linking from %s to %s’ % (newestDir, options.latestname))
os.mkdir(linkdst)
for root, dirs, files in os.walk(lsrc):
for name in dirs:
realdir, linkpath = getSrcAndDstPaths(lsrc, linkdst, root, name)
if os.path.islink(realdir):
os.symlink(os.readlink(realdir), linkpath)
else:
clonedir(realdir, linkpath)
for name in files:
try:
realfile, linkpath = getSrcAndDstPaths(lsrc, linkdst, root, name)
os.link(realfile, linkpath)
except OSError, errdata:
logging.error(‘Error linking %s to %s: %s’ % (lsrc, linkdst, errdata))
def sync():
"""Run rsync"""
logging.info(‘Making new backup’)
dst = os.path.join(options.destdir, options.latestname)
command = ‘rsync %s %s %s’ % (options.rsyncopts, src, dst)
logging.info(‘rsync %s %s’ % (src, dst))
logging.debug(command)
getCommandOutput(command, logProgress, timeout=900000)
os.utime(os.path.join(options.destdir, options.latestname), (now, now) )
def storeBackupTime():
"""If using a config file,
store the timestamp for the current backup to it"""
if options.timestamps:
tstampFile = options.timestamps
elif options.config:
tstampFile = options.config
else:
logging.debug(‘No timestamp file’)
return
config = ConfigParser()
if os.path.exists(tstampFile):
config.read(tstampFile)
if not config.has_section(options.jobname):
config.add_section(options.jobname)
logging.debug(‘Save timestamp "%s" to %s’ % (now, tstampFile))
config.set(options.jobname, ‘lastbackup’, now)
config.write(open(tstampFile, ‘w’))
def parseOptions():
"""Options parsing"""
opts = optparse.OptionParser(version=verString)
reqgrp = optparse.OptionGroup(opts, ‘Required’)
optgrp = optparse.OptionGroup(opts, ‘Optional’)
# Required options
reqgrp.add_option(‘-j’, ‘–jobname’, metavar=‘NAME’, help=‘Name of backup job’)
reqgrp.add_option(‘-s’, ‘–src’, dest=’srcdir’, metavar=‘DIR’, help=‘Directory to backup’)
reqgrp.add_option(‘-c’, ‘–maxage’, type=‘int’, metavar=‘N’, help=‘Number of days to keep old backups for’)
reqgrp.add_option(‘-d’, ‘–dest’, dest=‘destdir’, metavar=‘DIR’, help=‘Dir to place backups in ‘
‘(will create a dir named after the job name)’)
# Optional options
optgrp.add_option(‘-m’, ‘–machine’, metavar=‘HOST’, help=‘Hostname for rsync to pull data from, ‘
‘if omitted assumes localhost’)
optgrp.add_option(‘-e’, ‘–excluded’, metavar=‘FILE’, help=‘File containing list of files to exclude from the backup’)
optgrp.add_option(‘-l’, ‘–logdir’, metavar=‘DIR’, help=‘DIR to store logfiles in’)
optgrp.add_option(‘-n’, ‘–no-act’, action=’store_true’, dest=‘noact’, help=‘Show what would be done in the backup job’)
optgrp.add_option(‘–rsyncopts’, metavar=‘OPTS’, help=‘Set rsync options, defaults to ‘
‘"-aAxzSO –delete-excluded –delete-during –fake-super –numeric-ids"’)
optgrp.add_option(‘–sshkey’, metavar=‘FILE’, help=‘Run ssh-agent and give it the key FILE’)
optgrp.add_option(‘–logfile’, metavar=‘FILE’, help=‘Log to FILE’)
optgrp.add_option(‘–latestname’, default=‘daily.0/’)
optgrp.add_option(‘–period’, type=‘int’, help=‘Min time between backups’)
optgrp.add_option(‘–config’, metavar=‘FILE’, help=‘FILE to read more options from’)
optgrp.add_option(‘–timestamps’, metavar=‘FILE’, help=‘FILE to read/write backup timestamps from/to’)
optgrp.add_option(‘–mount’, metavar=‘MOUNTPOINT’, help=‘Mount drive at MOUNTPOINT on demand’)
optgrp.add_option(‘–custommount’, metavar=‘COMMAND’, help=‘Use a custom mount command’)
optgrp.add_option(‘–customumount’, metavar=‘COMMAND’, help=‘Use a custom umount command’)
optgrp.add_option(‘-q’, ‘–quiet’, action=’store_const’, const=0, dest=‘verbose’)
optgrp.add_option(‘-v’, ‘–verbose’, action=’store_const’, const=2, dest=‘verbose’)
optgrp.add_option(‘–debug’, action=’store_const’, const=3, dest=‘verbose’)
opts.add_option_group(reqgrp)
opts.add_option_group(optgrp)
opts.set_defaults(verbose=1)
(parsedopts, dummy) = opts.parse_args()
return parsedopts
def readSection(config, section, fileopts):
"""Load in the options from the named section of an ini file"""
if config.has_section(section):
#logging.debug(‘Reading config file %s, section %s’ %
# (fileopts.config, section))
for opt in config.items(section):
if hasattr(fileopts, opt[0]) and getattr(fileopts, opt[0]) != None:
#logging.debug(‘Ignoring opt %s, already set to "%s"’ %
# (opt[0], getattr(fileopts, opt[0])))
continue
tempStr = opt[1]
if tempStr.isdigit():
tempVal = int(tempStr)
else:
tempVal = tempStr
setattr(fileopts, opt[0], tempVal)
return fileopts
def parseOptionsFile(cmdlineOpts):
"""Read in ini file and add options to any cmd line args already given"""
jndict = {‘jobname’: cmdlineOpts.jobname}
config = ConfigParser(jndict)
config.read(cmdlineOpts.config)
fileopts = readSection(config, ‘common’, cmdlineOpts)
if cmdlineOpts.jobname:
fileopts = readSection(config, cmdlineOpts.jobname, fileopts)
return fileopts
def init():
"""Initialise the program"""
setupLogging2()
if not options.destdir:
errormsg = ‘Error: No destination dir specified’
logging.error(errormsg)
sys.exit(2)
if not options.srcdir:
errormsg = ‘Error: No source dir specified’
logging.error(errormsg)
sys.exit(2)
if options.jobname == None:
errormsg = ‘Error: No name for backup job given’
logging.error(errormsg)
sys.exit(2)
if options.machine:
rsyncsrc = options.machine + ‘:’ + options.srcdir
else:
rsyncsrc = options.srcdir
options.destdir = os.path.join(options.destdir, options.jobname)
if options.verbose == 0:
options.rsyncopts += ‘ -q’
else:
options.rsyncopts += ‘ -v’
if options.excluded:
options.rsyncopts += ‘ –exclude-from=’ + options.excluded
return rsyncsrc
def setupLogging1():
"""Setup logging module.
Log to terminal if available (usually only for debugging)
"""
log = logging.getLogger()
logformat = ‘%(asctime)s %(levelname)s %(message)s’
dateformat = ‘%Y-%m-%d %H:%M:%S’
frmttr = logging.Formatter(logformat, dateformat)
if sys.stdin.isatty():
shdlr = logging.StreamHandler(sys.stdout)
shdlr.setFormatter(frmttr)
shdlr.setLevel(logging.DEBUG)
log.addHandler(shdlr)
log.setLevel(logging.INFO)
return log
def setupLogging2():
"""Add any user requested log destinations to the log already setup"""
log = logging.getLogger()
if options.verbose >= 3:
log.setLevel(logging.DEBUG)
elif options.verbose == 0:
log.setLevel(logging.WARNING)
else:
log.setLevel(logging.INFO)
logformat = ‘%(asctime)s %(levelname)-7s: %(jobname)s: %(message)s’
dateformat = ‘%Y-%m-%d %H:%M:%S’
frmttr = CustomFormatter(logformat, dateformat)
if options.logfile:
if os.path.isabs(options.logfile):
fhdlr = logging.FileHandler(options.logfile)
else:
if options.logdir:
logpath = os.path.join(options.logdir, options.logfile)
else:
logpath = os.path.join(os.getcwd(), options.logfile)
fhdlr = logging.FileHandler(logpath)
elif options.logdir and options.jobname:
logname = options.jobname + ‘.log’
fhdlr = logging.FileHandler( os.path.join(options.logdir, logname) )
if options.logfile or (options.logdir and options.jobname):
fhdlr.setFormatter(frmttr)
fhdlr.setLevel(logging.INFO)
log.addHandler(fhdlr)
return log
def checkLastTime():
"""If a lastbackup option has been found, use it to determine whether
enough time has passed that another backup is required"""
config = ConfigParser()
if options.timestamps and os.path.exists(options.timestamps):
config.read(options.timestamps)
else:
config.read(options.config)
if not config.has_section(options.jobname):
return True
if config.has_option(options.jobname, ‘lastbackup’):
logging.debug(‘Reading timestamp from %s’ % options.timestamps)
tstamp = config.getfloat(options.jobname, ‘lastbackup’)
oDate = datetime.datetime.fromtimestamp(tstamp)
nDate = datetime.datetime.fromtimestamp(now)
logging.debug(‘Last backup at %s’ % oDate)
logging.debug(‘Time is now %s’ % nDate)
timediff = nDate – oDate
logging.debug(‘Age of last backup = %s’ % timediff)
if timediff.days < options.period:
logging.debug(‘Not long enough since last backup (%d < %d)’ %
(timediff.days, options.period))
return False
else:
return True
logging.debug(‘lastbackup timestamp not found’)
return True
def main():
"""Actually do stuff"""
try:
sshagent(src)
if options.machine:
srcUp = pingrsync(options.machine, src)
else:
srcUp = True # Local machine, must be up
dstUp = False
if srcUp:
if options.mount:
dstUp = mount(options.mount)
else:
dstUp = True
if srcUp and dstUp:
if not os.path.exists(options.destdir):
logging.warning(‘Backupdir "%s" doesn\’t exist’ % options.destdir)
logging.info(‘Creating dir %s’ % options.destdir)
os.mkdir(options.destdir)
if not os.path.isdir(options.destdir):
raise RuntimeError(‘Destdir "%s" isn\’t a dir’ % options.destdir)
renameByAge()
link(options.destdir)
sync()
storeBackupTime()
exitCode = 0
else:
exitCode = 1
except RuntimeError, err:
logProgress(err.args[0], logging.ERROR)
exitCode = 2
except KeyboardInterrupt, err:
logging.error(‘User interrupted program’)
exitCode = 3
# If we mounted the drive then unmount it
if srcUp and dstUp and options.mount:
umount(options.mount)
sys.exit(exitCode)
###############################
# Main
###############################
if __name__ == ‘__main__’:
setupLogging1()
options = parseOptions()
if options.config:
options = parseOptionsFile(options)
# Set unset options to default values
if options.rsyncopts == None:
options.rsyncopts = ‘-aAxzSO –delete-excluded –delete-during –fake-super –numeric-ids’
if options.maxage == None:
options.maxage = sys.maxint
if options.noact:
import fakeos as os
import fakeshutil as shutil
from fakegetCommandOutput import getCommandOutput
else:
import os
import shutil
from getCommandOutput import getCommandOutput
src = init()
#logging.debug(str(options))
now = time.time()
if checkLastTime():
main()

![[Google]]( http://blog.trollgod.org.uk/wp-content/plugins/easy-adsenser/google-light.gif)
