User Tools

Site Tools


python:folder_cleaner

Folder cleaner script

python 2.6 version

03.02.2016

Requirements

The script should remove specific files from a designated folder, with consideration to creation time.

In this specific example, it will remove .gz and .log files above a variable number of days (specified in command line).

A couple of options are available using constants within the script. These should be modified accordingly to your specific needs.

Note: This is a python 2.6 version due some client server constraints, but it can easily be upgraded to 3.X.

Code

#!/usr/bin/env python
"""
Script to clean up unrecognized folder of older files.
It can be used from CLI or as a cronjob.
"""
__author__ = "Cristian Navalici"
__email__ = "ncristian@lemonsoftware.eu"
__version__ = "1.0 Feb 2016"
__license__ = "GPLv3"
 
import sys
import os
import os.path
import getopt
 
from datetime import datetime
from time import time
 
# where are the files located
TARGET_FOLDER = '/var/www/vhosts/staging/data/unrecognized'
 
# target extensions
TARGET_EXTENSIONS = (".gz", ".log")
 
# where the log files will be saved (the folder must exist)
LOG_FOLDER = '/tmp/logfiles'
 
# after how many files shall we log an intermediate message
LOG_MILESTONE = 10000
 
# logfiles filename format
LOG_FILENAME_FORMAT = 'log_{0}.cleanup'
 
# using the seconds to express days ;)
DAY = 86400
 
def run_cleanup(logger, keepdays = 0):
    """procedure to clean up log and gz files out of the target folder"""
    counter = 0
    counter_not_deleted = 0
 
    keeper = time() - (keepdays * DAY)
    keep_date = datetime.fromtimestamp(keeper).isoformat()[:10]
    logger("Operation started. Deleting files older than {0}.".format(keep_date))
 
    for iter in os.listdir(TARGET_FOLDER):
        if iter.endswith(TARGET_EXTENSIONS):
            try:
                target_file = os.path.join(TARGET_FOLDER, iter)
                fstat = os.stat(target_file)
 
                if fstat.st_ctime < keeper:
                    os.remove(target_file)
                    counter += 1
                    if (counter % LOG_MILESTONE) == 0:
                        logger("Files deleted so far: {0}.".format(counter))
                else:
                    counter_not_deleted += 1
            except OSError as e:
                logger("Oops! Something went wrong.")
                logger("Error: {0}".format(e))
 
                sys.exit(1)
 
    logger("Operation completed, {0} files deleted, {1} files remaining." \
        .format(counter, counter_not_deleted))
 
 
def log_message(message):
    """logging function to different behaviour based on stdlog param"""
    now = datetime.now().isoformat()
 
    if log_message.stdlog:
        print "Log({0}): {1}".format(now, message)
    else:
        # log output to a file (one file per day)
        filename = LOG_FILENAME_FORMAT.format(now[:10])
        filename = os.path.join(LOG_FOLDER, filename)
 
        with open(filename, "a") as logfile:
            logfile.write("{0} {1}\n".format(now, message))
 
 
def print_help():
    """function to display the help for command line"""
    print "Usage for {0} Ver: {1}".format(sys.argv[0], __version__)
    print "--help or -h - displays this help"
    print "--stdlog - prints the logging information to console (default: logfile)"
    print "--keep X or -k X - keeps the last X days of files (default:0)"
    print "Logging options (if used):"
    print "\tTarget folder: {0}".format(LOG_FOLDER)
    print "\tFilename example: {0}".format(LOG_FILENAME_FORMAT.replace("{0}", "2016-01-01"))
    print "\tLog milestone: {0}".format(LOG_MILESTONE)
 
    sys.exit(2)
 
 
if __name__ == "__main__":
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hk:", ["help", "stdlog", "keep="])
    except getopt.GetoptError:
        print_help()
 
    log_message.stdlog = False
    keepdays = 0
 
    for opt, arg in opts:
        if opt in ('--help', '-h'):
            print_help()
 
        if opt in ('--keep', '-k'):
            keepdays = int(arg)
 
        log_message.stdlog = (opt == "--stdlog")
 
    # start the show
    run_cleanup(log_message, keepdays)

Usage

Runs with the default settings (log to file, keep no files)

./unrecognized_cleanup.py

Keeps the last 10 days worth of files

./unrecognized_cleanup.py -k 10

Keeps the last 15 days and log to console instead of files

./unrecognized_cleanup.py --keep 15 --stdlog

Displays the help section:

./unrecognized_cleanup.py --help

python/folder_cleaner.txt · Last modified: 2016/02/03 07:15 by admin