Quick Python search and replace script

Have a client machine that is a little loaded that has a ton of modified files. Normally we just restore off the last backup or the previous generation backup, but, over 120k files since June 2011 have been exploited. Since the machine is doing quite a bit of work, we need to throttle our replacements so that we don’t kill the server.

#!/usr/bin/python
"""

Quick search and replace to replace an exploit on a client's site while
trying to keep the load disruption on the machine to a minimum.

Replace the variable exploit with the code to be replaced. By default, 
this script starts at the current directory. max_load controls our five
second sleep until the load drops.

"""

import glob
import os
import re
import time

path = '.'
max_load = 10

exploit = """
<script>var i,y,x="3cblahblahblah3e";y='';for(i=0;i
""".strip()

file_exclude = re.compile('\.(gif|jpe?g|swf|css|js|flv|wmv|mp3|mp4|pdf|ico|png|zip)$', \
                          re.IGNORECASE)

def check_load():
    load_avg = int(os.getloadavg()[0])
    while load_avg > max_load:
        time.sleep(30)
        load_avg = int(os.getloadavg()[0])

def getdir(path):
    check_load()
    for file in os.listdir(path):
        file_path = os.path.join(path,file)
        if os.path.isdir(file_path):
            getdir(file_path)
        else:
            if not file_exclude.search(file_path):
                process_file(file_path)

def process_file(file_path):
    file = open(file_path, 'r+')
    contents = file.read()
    if exploit in contents:
        print 'fixing:', file_path
        contents = contents.replace(exploit, '')
        file.truncate(0)
        file.seek(0, os.SEEK_SET )
        file.write(contents)
    file.close()

getdir(path)

Thankfully, since this server is run as www-data rather than SetUID, the damage wasn’t as bad as it could have been.

Tags: ,

2 Responses to “Quick Python search and replace script”

  1. cd34 Says:

    Modified to use a regexp as there are multiple encoded strings being used.

    #!/usr/bin/python
    """
    
    Quick search and replace to replace an exploit on a client's site while
    trying to keep the load disruption on the machine to a minimum.
    
    Replace the variable exploit with the code to be replaced. By default, 
    this script starts at the current directory. max_load controls our five
    second sleep until the load drops.
    
    """
    
    import glob
    import os
    import re
    import time
    
    path = '.'
    max_load = 20
    
    exploit_regexp = re.compile('<script>var i,y,x=.*</script>')
    
    file_exclude = re.compile('\.(gif|jpe?g|swf|css|js|flv|wmv|mp3|mp4|pdf|ico|png|zip)$', \
                              re.IGNORECASE)
    
    def check_load():
        load_avg = int(os.getloadavg()[0])
        while load_avg > max_load:
            time.sleep(30)
            load_avg = int(os.getloadavg()[0])
    
    def getdir(path):
        check_load()
        for file in os.listdir(path):
            file_path = os.path.join(path,file)
            if os.path.isdir(file_path):
                getdir(file_path)
            else:
                if not file_exclude.search(file_path):
                    process_file(file_path)
    
    def process_file(file_path):
        file = open(file_path, 'r+')
        contents = file.read()
        if exploit_regexp.search(contents):
            print 'fixing:', file_path
            contents = re.sub(exploit_regexp, '', contents)
            file.truncate(0)
            file.seek(0, os.SEEK_SET )
            file.write(contents)
        file.close()
    
    getdir(path)
    
  2. cd34 Says:

    Performance modifications:

    #!/usr/bin/python
    """
    
    Quick search and replace to replace an exploit on a client's site while
    trying to keep the load disruption on the machine to a minimum.
    
    Replace the variable exploit with the code to be replaced. By default, 
    this script starts at the current directory. max_load controls our five
    second sleep until the load drops.
    
    """
    
    import os
    import re
    import time
    
    path = '.'
    max_load = 20
    
    exploit_regexp = re.compile('<script>var i,y,x=.*</script>')
    
    file_exclude = re.compile('\.(gif|jpe?g|swf|css|js|flv|wmv|mp3|mp4|pdf|ico|png|zip)$', \
                              re.IGNORECASE)
    
    def check_load():
        load_avg = int(os.getloadavg()[0])
        while load_avg > max_load:
            time.sleep(30)
            load_avg = int(os.getloadavg()[0])
    
    def getdir(path):
        check_load()
        files = [i for i in os.listdir(path) if not file_exclude.search(i)]
        for file in files:
            file_path = os.path.join(path,file)
            if os.path.isdir(file_path):
                getdir(file_path)
            else:
                process_file(file_path)
    
    def process_file(file_path):
        file = open(file_path, 'r+')
        contents = file.read()
        if exploit_regexp.search(contents):
            print 'fixing:', file_path
            contents = re.sub(exploit_regexp, '', contents)
            file.truncate(0)
            file.seek(0, os.SEEK_SET )
            file.write(contents)
        file.close()
    
    try:
        getdir(path)
    except KeyboardInterrupt:
        pass
    

Leave a Reply

You must be logged in to post a comment.

Entries (RSS) and Comments (RSS).
Cluster host: li