Package CedarBackup2 :: Package actions :: Module collect
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.actions.collect

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2008,2011 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python 2 (>= 2.7) 
 29  # Project  : Cedar Backup, release 2 
 30  # Purpose  : Implements the standard 'collect' action. 
 31  # 
 32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 33   
 34  ######################################################################## 
 35  # Module documentation 
 36  ######################################################################## 
 37   
 38  """ 
 39  Implements the standard 'collect' action. 
 40  @sort: executeCollect 
 41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 42  """ 
 43   
 44   
 45  ######################################################################## 
 46  # Imported modules 
 47  ######################################################################## 
 48   
 49  # System modules 
 50  import os 
 51  import logging 
 52  import pickle 
 53   
 54  # Cedar Backup modules 
 55  from CedarBackup2.filesystem import BackupFileList, FilesystemList 
 56  from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 57  from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 58  from CedarBackup2.actions.util import writeIndicatorFile 
 59   
 60   
 61  ######################################################################## 
 62  # Module-wide constants and variables 
 63  ######################################################################## 
 64   
 65  logger = logging.getLogger("CedarBackup2.log.actions.collect") 
 66   
 67   
 68  ######################################################################## 
 69  # Public functions 
 70  ######################################################################## 
 71   
 72  ############################ 
 73  # executeCollect() function 
 74  ############################ 
 75   
76 -def executeCollect(configPath, options, config):
77 """ 78 Executes the collect backup action. 79 80 @note: When the collect action is complete, we will write a collect 81 indicator to the collect directory, so it's obvious that the collect action 82 has completed. The stage process uses this indicator to decide whether a 83 peer is ready to be staged. 84 85 @param configPath: Path to configuration file on disk. 86 @type configPath: String representing a path on disk. 87 88 @param options: Program command-line options. 89 @type options: Options object. 90 91 @param config: Program configuration. 92 @type config: Config object. 93 94 @raise ValueError: Under many generic error conditions 95 @raise TarError: If there is a problem creating a tar file 96 """ 97 logger.debug("Executing the 'collect' action.") 98 if config.options is None or config.collect is None: 99 raise ValueError("Collect configuration is not properly filled in.") 100 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 101 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 102 raise ValueError("There must be at least one collect file or collect directory.") 103 fullBackup = options.full 104 logger.debug("Full backup flag is [%s]", fullBackup) 105 todayIsStart = isStartOfWeek(config.options.startingDay) 106 resetDigest = fullBackup or todayIsStart 107 logger.debug("Reset digest flag is [%s]", resetDigest) 108 if config.collect.collectFiles is not None: 109 for collectFile in config.collect.collectFiles: 110 logger.debug("Working with collect file [%s]", collectFile.absolutePath) 111 collectMode = _getCollectMode(config, collectFile) 112 archiveMode = _getArchiveMode(config, collectFile) 113 digestPath = _getDigestPath(config, collectFile.absolutePath) 114 tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode) 115 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 116 logger.debug("File meets criteria to be backed up today.") 117 _collectFile(config, collectFile.absolutePath, tarfilePath, 118 collectMode, archiveMode, resetDigest, digestPath) 119 else: 120 logger.debug("File will not be backed up, per collect mode.") 121 logger.info("Completed collecting file [%s]", collectFile.absolutePath) 122 if config.collect.collectDirs is not None: 123 for collectDir in config.collect.collectDirs: 124 logger.debug("Working with collect directory [%s]", collectDir.absolutePath) 125 collectMode = _getCollectMode(config, collectDir) 126 archiveMode = _getArchiveMode(config, collectDir) 127 ignoreFile = _getIgnoreFile(config, collectDir) 128 linkDepth = _getLinkDepth(collectDir) 129 dereference = _getDereference(collectDir) 130 recursionLevel = _getRecursionLevel(collectDir) 131 (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 132 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 133 logger.debug("Directory meets criteria to be backed up today.") 134 _collectDirectory(config, collectDir.absolutePath, 135 collectMode, archiveMode, ignoreFile, linkDepth, dereference, 136 resetDigest, excludePaths, excludePatterns, recursionLevel) 137 else: 138 logger.debug("Directory will not be backed up, per collect mode.") 139 logger.info("Completed collecting directory [%s]", collectDir.absolutePath) 140 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 141 config.options.backupUser, config.options.backupGroup) 142 logger.info("Executed the 'collect' action successfully.")
143 144 145 ######################################################################## 146 # Private utility functions 147 ######################################################################## 148 149 ########################## 150 # _collectFile() function 151 ########################## 152
153 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
154 """ 155 Collects a configured collect file. 156 157 The indicated collect file is collected into the indicated tarfile. 158 For files that are collected incrementally, we'll use the indicated 159 digest path and pay attention to the reset digest flag (basically, the reset 160 digest flag ignores any existing digest, but a new digest is always 161 rewritten). 162 163 The caller must decide what the collect and archive modes are, since they 164 can be on both the collect configuration and the collect file itself. 165 166 @param config: Config object. 167 @param absolutePath: Absolute path of file to collect. 168 @param tarfilePath: Path to tarfile that should be created. 169 @param collectMode: Collect mode to use. 170 @param archiveMode: Archive mode to use. 171 @param resetDigest: Reset digest flag. 172 @param digestPath: Path to digest file on disk, if needed. 173 """ 174 backupList = BackupFileList() 175 backupList.addFile(absolutePath) 176 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
177 178 179 ############################### 180 # _collectDirectory() function 181 ############################### 182
183 -def _collectDirectory(config, absolutePath, collectMode, archiveMode, 184 ignoreFile, linkDepth, dereference, resetDigest, 185 excludePaths, excludePatterns, recursionLevel):
186 """ 187 Collects a configured collect directory. 188 189 The indicated collect directory is collected into the indicated tarfile. 190 For directories that are collected incrementally, we'll use the indicated 191 digest path and pay attention to the reset digest flag (basically, the reset 192 digest flag ignores any existing digest, but a new digest is always 193 rewritten). 194 195 The caller must decide what the collect and archive modes are, since they 196 can be on both the collect configuration and the collect directory itself. 197 198 @param config: Config object. 199 @param absolutePath: Absolute path of directory to collect. 200 @param collectMode: Collect mode to use. 201 @param archiveMode: Archive mode to use. 202 @param ignoreFile: Ignore file to use. 203 @param linkDepth: Link depth value to use. 204 @param dereference: Dereference flag to use. 205 @param resetDigest: Reset digest flag. 206 @param excludePaths: List of absolute paths to exclude. 207 @param excludePatterns: List of patterns to exclude. 208 @param recursionLevel: Recursion level (zero for no recursion) 209 """ 210 if recursionLevel == 0: 211 # Collect the actual directory because we're at recursion level 0 212 logger.info("Collecting directory [%s]", absolutePath) 213 tarfilePath = _getTarfilePath(config, absolutePath, archiveMode) 214 digestPath = _getDigestPath(config, absolutePath) 215 216 backupList = BackupFileList() 217 backupList.ignoreFile = ignoreFile 218 backupList.excludePaths = excludePaths 219 backupList.excludePatterns = excludePatterns 220 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference) 221 222 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 223 else: 224 # Find all of the immediate subdirectories 225 subdirs = FilesystemList() 226 subdirs.excludeFiles = True 227 subdirs.excludeLinks = True 228 subdirs.excludePaths = excludePaths 229 subdirs.excludePatterns = excludePatterns 230 subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False) 231 232 # Back up the subdirectories separately 233 for subdir in subdirs: 234 _collectDirectory(config, subdir, collectMode, archiveMode, 235 ignoreFile, linkDepth, dereference, resetDigest, 236 excludePaths, excludePatterns, recursionLevel-1) 237 excludePaths.append(subdir) # this directory is already backed up, so exclude it 238 239 # Back up everything that hasn't previously been backed up 240 _collectDirectory(config, absolutePath, collectMode, archiveMode, 241 ignoreFile, linkDepth, dereference, resetDigest, 242 excludePaths, excludePatterns, 0)
243 244 245 ############################ 246 # _executeBackup() function 247 ############################ 248
249 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
250 """ 251 Execute the backup process for the indicated backup list. 252 253 This function exists mainly to consolidate functionality between the 254 L{_collectFile} and L{_collectDirectory} functions. Those functions build 255 the backup list; this function causes the backup to execute properly and 256 also manages usage of the digest file on disk as explained in their 257 comments. 258 259 For collect files, the digest file will always just contain the single file 260 that is being backed up. This might little wasteful in terms of the number 261 of files that we keep around, but it's consistent and easy to understand. 262 263 @param config: Config object. 264 @param backupList: List to execute backup for 265 @param absolutePath: Absolute path of directory or file to collect. 266 @param tarfilePath: Path to tarfile that should be created. 267 @param collectMode: Collect mode to use. 268 @param archiveMode: Archive mode to use. 269 @param resetDigest: Reset digest flag. 270 @param digestPath: Path to digest file on disk, if needed. 271 """ 272 if collectMode != 'incr': 273 logger.debug("Collect mode is [%s]; no digest will be used.", collectMode) 274 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 275 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 276 else: 277 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 278 if len(backupList) > 0: 279 backupList.generateTarfile(tarfilePath, archiveMode, True) 280 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 281 else: 282 if resetDigest: 283 logger.debug("Based on resetDigest flag, digest will be cleared.") 284 oldDigest = {} 285 else: 286 logger.debug("Based on resetDigest flag, digest will loaded from disk.") 287 oldDigest = _loadDigest(digestPath) 288 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 289 logger.debug("Removed %d unchanged files based on digest values.", removed) 290 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 291 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 292 else: 293 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 294 if len(backupList) > 0: 295 backupList.generateTarfile(tarfilePath, archiveMode, True) 296 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 297 _writeDigest(config, newDigest, digestPath)
298 299 300 ######################### 301 # _loadDigest() function 302 ######################### 303
304 -def _loadDigest(digestPath):
305 """ 306 Loads the indicated digest path from disk into a dictionary. 307 308 If we can't load the digest successfully (either because it doesn't exist or 309 for some other reason), then an empty dictionary will be returned - but the 310 condition will be logged. 311 312 @param digestPath: Path to the digest file on disk. 313 314 @return: Dictionary representing contents of digest path. 315 """ 316 if not os.path.isfile(digestPath): 317 digest = {} 318 logger.debug("Digest [%s] does not exist on disk.", digestPath) 319 else: 320 try: 321 digest = pickle.load(open(digestPath, "r")) 322 logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest)) 323 except: 324 digest = {} 325 logger.error("Failed loading digest [%s] from disk.", digestPath) 326 return digest
327 328 329 ########################## 330 # _writeDigest() function 331 ########################## 332
333 -def _writeDigest(config, digest, digestPath):
334 """ 335 Writes the digest dictionary to the indicated digest path on disk. 336 337 If we can't write the digest successfully for any reason, we'll log the 338 condition but won't throw an exception. 339 340 @param config: Config object. 341 @param digest: Digest dictionary to write to disk. 342 @param digestPath: Path to the digest file on disk. 343 """ 344 try: 345 pickle.dump(digest, open(digestPath, "w")) 346 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 347 logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest)) 348 except: 349 logger.error("Failed to write digest [%s] to disk.", digestPath)
350 351 352 ######################################################################## 353 # Private attribute "getter" functions 354 ######################################################################## 355 356 ############################ 357 # getCollectMode() function 358 ############################ 359
360 -def _getCollectMode(config, item):
361 """ 362 Gets the collect mode that should be used for a collect directory or file. 363 If possible, use the one on the file or directory, otherwise take from collect section. 364 @param config: Config object. 365 @param item: C{CollectFile} or C{CollectDir} object 366 @return: Collect mode to use. 367 """ 368 if item.collectMode is None: 369 collectMode = config.collect.collectMode 370 else: 371 collectMode = item.collectMode 372 logger.debug("Collect mode is [%s]", collectMode) 373 return collectMode
374 375 376 ############################# 377 # _getArchiveMode() function 378 ############################# 379
380 -def _getArchiveMode(config, item):
381 """ 382 Gets the archive mode that should be used for a collect directory or file. 383 If possible, use the one on the file or directory, otherwise take from collect section. 384 @param config: Config object. 385 @param item: C{CollectFile} or C{CollectDir} object 386 @return: Archive mode to use. 387 """ 388 if item.archiveMode is None: 389 archiveMode = config.collect.archiveMode 390 else: 391 archiveMode = item.archiveMode 392 logger.debug("Archive mode is [%s]", archiveMode) 393 return archiveMode
394 395 396 ############################ 397 # _getIgnoreFile() function 398 ############################ 399
400 -def _getIgnoreFile(config, item):
401 """ 402 Gets the ignore file that should be used for a collect directory or file. 403 If possible, use the one on the file or directory, otherwise take from collect section. 404 @param config: Config object. 405 @param item: C{CollectFile} or C{CollectDir} object 406 @return: Ignore file to use. 407 """ 408 if item.ignoreFile is None: 409 ignoreFile = config.collect.ignoreFile 410 else: 411 ignoreFile = item.ignoreFile 412 logger.debug("Ignore file is [%s]", ignoreFile) 413 return ignoreFile
414 415 416 ############################ 417 # _getLinkDepth() function 418 ############################ 419
420 -def _getLinkDepth(item):
421 """ 422 Gets the link depth that should be used for a collect directory. 423 If possible, use the one on the directory, otherwise set a value of 0 (zero). 424 @param item: C{CollectDir} object 425 @return: Link depth to use. 426 """ 427 if item.linkDepth is None: 428 linkDepth = 0 429 else: 430 linkDepth = item.linkDepth 431 logger.debug("Link depth is [%d]", linkDepth) 432 return linkDepth
433 434 435 ############################ 436 # _getDereference() function 437 ############################ 438
439 -def _getDereference(item):
440 """ 441 Gets the dereference flag that should be used for a collect directory. 442 If possible, use the one on the directory, otherwise set a value of False. 443 @param item: C{CollectDir} object 444 @return: Dereference flag to use. 445 """ 446 if item.dereference is None: 447 dereference = False 448 else: 449 dereference = item.dereference 450 logger.debug("Dereference flag is [%s]", dereference) 451 return dereference
452 453 454 ################################ 455 # _getRecursionLevel() function 456 ################################ 457
458 -def _getRecursionLevel(item):
459 """ 460 Gets the recursion level that should be used for a collect directory. 461 If possible, use the one on the directory, otherwise set a value of 0 (zero). 462 @param item: C{CollectDir} object 463 @return: Recursion level to use. 464 """ 465 if item.recursionLevel is None: 466 recursionLevel = 0 467 else: 468 recursionLevel = item.recursionLevel 469 logger.debug("Recursion level is [%d]", recursionLevel) 470 return recursionLevel
471 472 473 ############################ 474 # _getDigestPath() function 475 ############################ 476
477 -def _getDigestPath(config, absolutePath):
478 """ 479 Gets the digest path associated with a collect directory or file. 480 @param config: Config object. 481 @param absolutePath: Absolute path to generate digest for 482 @return: Absolute path to the digest associated with the collect directory or file. 483 """ 484 normalized = buildNormalizedPath(absolutePath) 485 filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 486 digestPath = os.path.join(config.options.workingDir, filename) 487 logger.debug("Digest path is [%s]", digestPath) 488 return digestPath
489 490 491 ############################# 492 # _getTarfilePath() function 493 ############################# 494
495 -def _getTarfilePath(config, absolutePath, archiveMode):
496 """ 497 Gets the tarfile path (including correct extension) associated with a collect directory. 498 @param config: Config object. 499 @param absolutePath: Absolute path to generate tarfile for 500 @param archiveMode: Archive mode to use for this tarfile. 501 @return: Absolute path to the tarfile associated with the collect directory. 502 """ 503 if archiveMode == 'tar': 504 extension = "tar" 505 elif archiveMode == 'targz': 506 extension = "tar.gz" 507 elif archiveMode == 'tarbz2': 508 extension = "tar.bz2" 509 normalized = buildNormalizedPath(absolutePath) 510 filename = "%s.%s" % (normalized, extension) 511 tarfilePath = os.path.join(config.collect.targetDir, filename) 512 logger.debug("Tarfile path is [%s]", tarfilePath) 513 return tarfilePath
514 515 516 ############################ 517 # _getExclusions() function 518 ############################ 519
520 -def _getExclusions(config, collectDir):
521 """ 522 Gets exclusions (file and patterns) associated with a collect directory. 523 524 The returned files value is a list of absolute paths to be excluded from the 525 backup for a given directory. It is derived from the collect configuration 526 absolute exclude paths and the collect directory's absolute and relative 527 exclude paths. 528 529 The returned patterns value is a list of patterns to be excluded from the 530 backup for a given directory. It is derived from the list of patterns from 531 the collect configuration and from the collect directory itself. 532 533 @param config: Config object. 534 @param collectDir: Collect directory object. 535 536 @return: Tuple (files, patterns) indicating what to exclude. 537 """ 538 paths = [] 539 if config.collect.absoluteExcludePaths is not None: 540 paths.extend(config.collect.absoluteExcludePaths) 541 if collectDir.absoluteExcludePaths is not None: 542 paths.extend(collectDir.absoluteExcludePaths) 543 if collectDir.relativeExcludePaths is not None: 544 for relativePath in collectDir.relativeExcludePaths: 545 paths.append(os.path.join(collectDir.absolutePath, relativePath)) 546 patterns = [] 547 if config.collect.excludePatterns is not None: 548 patterns.extend(config.collect.excludePatterns) 549 if collectDir.excludePatterns is not None: 550 patterns.extend(collectDir.excludePatterns) 551 logger.debug("Exclude paths: %s", paths) 552 logger.debug("Exclude patterns: %s", patterns) 553 return(paths, patterns)
554