报告三 webshell检测模块与模块测试（下）

by Hence Zhang HenceTech

webshell检测的两个阶段

webshell的检测大概分两个阶段。一是在将该模块部署到业务机时，对web目录的一个初始化扫描，消耗的资源也会比较大。另一个是在文件被修改的时候，被动对被修改后的文件进行扫描。第一阶段的实现并没有新意，大概就是对某个目录进行一个遍历，需要注意的我们要控制深搜的目录深度，避免目录过深造成的大量的内存损耗。第二阶段在最开始的版本中还是傻乎乎地在一个while True中使用目录遍历，但是后来用了python的inotify模块，大概的实现就是监听linux文件系统时发出的信号，利用该信号去被动地检查被修改的文件。这种方法应该是不是可以应用在ctf中？毕竟我们的文件系统监控的脚本用的还是那些高性能损耗的技巧。

初始化扫描的脚本如下所示：

jsp_detector.py

 
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import hashlib
from config import *
import all
import sys
import datetime
import sqlite3
from time import sleep
def md5(string):
    return hashlib.md5(string).hexdigest()
def check_file_hash(filepath):
    content = open(filepath).read()
    cursor = conn.execute("select filename,hash_id,last_check  from file where filename='%s'"%filepath)
    old_hash = ""
    for row in cursor:
    old_hash = row[1]
    hash_id = md5(content)
    sleep(check_hash_span)
    if hash_id != old_hash:
    print "[!]File => " + filepath + " has been modified or created! " + old_hash + "|" + hash_id
    time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    conn.execute("insert into file (filename,hash_id,last_check) values ('%s','%s','%s')"%(filepath,hash_id,time))
    print hash_id
    conn.commit()
    return False
    return True  
def init_database():
    conn = sqlite3.connect('file.db')
    conn.execute('''drop table if exists `file`;''')
    conn.execute('''create table file(
       filename  TEXT    NOT NULL,
       hash_id   TEXT  NOT NULL,
       last_check     CHAR(64));''')     
    conn.commit()
    conn.close() 
def walklevel(some_dir, level=1):
    some_dir = some_dir.rstrip(os.path.sep)
    assert os.path.isdir(some_dir)
    num_sep = some_dir.count(os.path.sep)
    for root, dirs, files in os.walk(some_dir):
        yield root, dirs, files
        num_sep_this = root.count(os.path.sep)
        if num_sep + level <= num_sep_this:
            del dirs[:]
if not os.path.isfile('file.db'):
    init_database();
conn = sqlite3.connect('file.db')
pathdir = sys.argv[1]
wis = 'jsp|jspx'
filepaths = []      
for fpathe,dirs,fs in walklevel(pathdir,max_directory_depth):
    for f in fs:
        ppp = os.path.join(fpathe,f)
        if os.path.isfile(ppp) and re.match(r'^\.('+wis+')$',os.path.splitext(ppp)[1]) and not check_file_hash(ppp):
            filepaths.append(ppp)
webshell = 0
ok_file = 0
res = []
for f in filepaths:
    shell_type,is_shell = all.is_webshell(f,"jsp")
    if  is_shell:
    webshell += 1
    else:
    ok_file += 1
    res.append(f)
    print "###################### REST #########################"
    sleep(time_span)
    
print webshell,ok_file,ok_file + webshell

他使用的webshell检测函数如下所示：

all.py

 
#!/usr/bin/env python
from config import *
import re
import sys
def all_rules():
    for rulelist_hash in webshell_rules:
    init_rules[rulelist_hash] = 1
    #print len(init_rules),init_rules
if len(sys.argv) <= 2:
    all_rules()
else:
    tmp = sys.argv[2].split(',')
    i = 0
    for rulelist_hash in init_rules:
    init_rules[rulelist_hash] = tmp[i]
    i += 1
    #print len(init_rules),init_rules
def is_webshell(filepath,filetype):
    content = open(filepath).read()
    length = len(content)
    if length > max_file_size:
    print "[*]File Skip => " + filepath
        return ("Skip",0)
    for rulelist_hash in webshell_rules:
    if(init_rules[rulelist_hash]):
        flag = 1 
        for rule in webshell_rules[rulelist_hash]:
        if int(rule['type']) == 2:  # type=2 string match
            if content.find(rule['data']) < 0:
            flag = 0 
                        break
        elif int(rule['type']) == 1 : # type=1 regrex
            if length > max_regrex_file_size or (not re.search(rule['data'],content)):
            flag = 0 
            break
            if flag:
            print "[!]webshell find => " + filepath + ";  rule_hash => " + rulelist_hash
        return ("Unkown shell",1)
    print "[*]File OK => " + filepath
    return ("OK",0)

使用的inotify文件监控脚本大致如下：

file_detector.py

 
#!/usr/bin/env python
import os
import inotify.adapters
from config import *
import all
i = inotify.adapters.InotifyTree('/root/sample/tmp')
for event in i.event_gen():
    if event is not None:
    (header, type_names, watch_path, filename) = event
    if 'IN_MODIFY' in type_names:
        print "WD=(%d) MASK=(%d) COOKIE=(%d) LEN=(%d) MASK->NAMES=%s WATCH-PATH=[%s] FILENAME=[%s]"\
                             %(header.wd, header.mask, header.cookie, header.len, type_names,
                             watch_path.decode('utf-8'), filename.decode('utf-8'))
        all.is_webshell(watch_path + "/" + filename,"jsp")

这个文件的内容还需要修改。之后的实验主要是基于第一个阶段进行了，因为小范围的文件部署，因文件扫描所造成的性能损失可以忽略不计。

扫描参数的确定及检测实验

扫描过程中因为扫描策略的定制，需要设置如下几个参数：

1.最大目录深度；（避免目录过深导致的内存消耗）

2.最大扫描文件大小；（无论字符串匹配还是字符串正则，大文件都会消耗过多的资源）

3.最大正则文件大小；（通过限制使用正则规则的文件大小来优化检测效率）

4.文件检查间隔；（如果间隔为0，虽然检测速度很快，但是CPU利用率会爬升到100%）

前几个参数可以参考阿里云和腾讯云设置，但是最后一个参数特异性较大，需要通过实验确定。在实验的开始阶段，我们先通过已有的webshell样本和普通样本生成上万个在多级目录下的文件，然后在使用我们的webshell检测模块检测的同时，使用python的pyuntil监视整个系统的资源消耗。在实验开始前，我们首先进行粗略的实验，确定可行参数的大致范围。得到的参数测速范围是：0.05s ---- 0.25s 以0.05s为一个采样间隔。检测实验的所使用的的虚拟机环境是centos7，4核8GB。使用的测试文件产生脚本如下所示：

random_file.py

 
#!/usr/bin/env python
from config import *
import random
import os
def get_random_string():
    filename_string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"
    length = random.randint(5,20)
    res = ""
    for i in range(length):
        res += filename_string[random.randint(0,62)]
    return res
def random_file(dirpath):
    for i in range(random.randint(4,10)):
        src_file = files[random.randint(0,761)] 
        dest_file = get_random_string() + ".jsp"
        os.system("cp jsp_sample/%s  %s/%s"%(src_file,dirpath,dest_file))
def random_dir(dirpath,now_depth):
    if now_depth > max_depth:
        random_file(dirpath)
        return 1
    for i in range(random.randint(3,10)):
        os.system("mkdir " + dirpath + "/" + get_random_string())
    for directory in os.listdir(dirpath):
        random_dir(dirpath + "/" + directory,now_depth+1)   
   
random_dir("./tmp",1)

使用的系统资源监测脚本如下所示：

ps.py

 
#!/usr/bin/env python
import psutil
import time
import python.jsp_detector.config
IO_write_old = psutil.disk_io_counters(perdisk=False)[3]
IO_read_old = psutil.disk_io_counters(perdisk=False)[2]
res_filename = "res_" + str(python.jsp_detector.config.time_span) + ".txt"
open(res_filename,'w').write('')
while True:
    cpu_consume = str(psutil.cpu_percent(interval=None, percpu=False))
    memory_consume = str(psutil.virtual_memory()[2])
    IO_read_now = psutil.disk_io_counters(perdisk=False)[2]
    IO_write_now = psutil.disk_io_counters(perdisk=False)[3]
    IO_read_speed = str((IO_read_now - IO_read_old)/1000)
    IO_write_speed = str((IO_write_now - IO_write_old)/1000)
    IO_read_old = IO_read_now
    IO_write_old = IO_write_now
    print "CPU => " + cpu_consume + "%"
    print "memory => " + memory_consume + "%"
    print "IO read => " + IO_read_speed + "KBps"
    print "IO write => " + IO_write_speed + "KBps"
    res = cpu_consume + ' ' +  memory_consume + ' ' + IO_read_speed + ' ' + IO_write_speed + ' ' + '\r\n'
    open(res_filename,'a').write(res)
    print "##################################################################"
    time.sleep(1)

最终由ps.py根据配置的time_span不同，在当前目录下生成不同时间间隔下的系统资源损耗的实验结果。在最后的一个部分，我们将使用matlab对这些数据进行处理。

实验数据的处理和结论

在ps.py产生的数据中主要四组至关重要的参数：CPU利用率，内存利用率，IO读取速度，IO写入速度。我们通过如下的matlab代码对这几组数据进行处理：

webshell_data.m

 for i=0.05:0.05:0.25
   filename = strcat('C:\Users\Administrator\Desktop\intern\data\res_',num2str(i),'.txt');
   fid = fopen(filename,'r')
   for j = 1:200
       tmp = fscanf(fid,'%f %f %f %f',[1,4])
       cpu_consume(j) = tmp(1)
       memory_consume(j) = tmp(2)
       read_IO(j) = tmp(3)
       write_IO(j) = tmp(4)
       time(j) = j
   end
    scrsz = get(0,'ScreenSize');
    figure1=figure('Position',[0 30 scrsz(3) scrsz(4)-95]);
    subplot(2,2,1)
    plot(time,cpu_consume)
    grid on
    ylabel('CPU consumption ratio /%')
    xlabel('Time/s')
    subplot(2,2,2)
    plot(time,memory_consume)
    grid on
    ylabel('Memory consumption ratio /%')
    xlabel('Time/s')
    subplot(2,2,3)
    plot(time,read_IO)
    grid on
    ylabel('Read IO speed /KBps')
    xlabel('Time/s')
    subplot(2,2,4)
    plot(time,write_IO)
    grid on
    ylabel('Write IO speed /KBps')
    xlabel('Time/s')
 saveas(gcf,strcat('C:\Users\Administrator\Desktop\intern\data\img\res_',num2str(i),'.jpg'),'jpg'); 
    fid = fopen('C:\Users\Administrator\Desktop\intern\data\average.txt','at');
    content = strcat('result: ',num2str(i),'\r\n')
    fprintf(fid,content);
    fprintf(fid,'%f %f %f %f\n',mean(cpu_consume),mean(memory_consume),mean(read_IO),mean(write_IO))
    fclose(fid)
end
close all 
clear all

最终得到的结果折线图如下所示：

0.05s

0.1s

0.15s

0.2s

0.25s

图像中存在着很多突变点，可以通过大量实验取均值来消除其影响。但是在为200s中的平均值还是具有相当重要的参考价值：

time_span	scan_speed	average_CPU	average_memory	directory_dep	average_write_IO
0.05	18.23	2.5835	18.9095	4	4.25
0.1	9.52	1.669	18.9235	4	4.18
0.15	6.45	1.2635	18.914	4	5.39
0.2	4.88	1.1785	18.939	4	5.64
0.25	3.92	0.9665	18.908	4	28.94

从对阿里云和腾讯云的webshell规则逆向，再到样本测试，规则优化，最终到参数确定实验，一整套流畅的操作让我体会到了工程之美。而且，在开始一样工程之前，特别是业务相关的工程，我们不要吝啬在调研和方案把握上所花的时间，指示性的思维，可以以明确的工作方向大大优化你在工程中投入的劳动成本。其次，业务代码和正常学术用途的代码不一样，不需要花哨的算法和验证，只求在最快的时间内，提供最稳定的，占用资源最少的程序。从学术界转到工业界，还是有一个大跨度的思维映射。

继续滑动看下一个