Add a couple of cool things

2025-11-25 08:23:32 -05:00 · 2018-06-14 22:24:08 -07:00 · 2018-06-14 22:24:08 -07:00 · 98bd1fe12f
commit 98bd1fe12f
parent ba37a20b2c
16 changed files with 439 additions and 0 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@ -6,6 +6,7 @@
 * ELK shell scripts.
 * data science quick scripts.
 * Suricata shell scripts and config files.
 * Chef useful commands.
 😈 OBVIOUSLY I DON'T INCLUDE SENSITIVE PARTS 😈
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit 5ea9eee01bc9f55d93ca3cdcd9e02490a49616c8
--- a/chef/CHEF_COMMANDS.md
+++ b/chef/CHEF_COMMANDS.md
@ -0,0 +1,47 @@
 ## Chef Commands
 Roles contain recipes:
 ```
 $ knife role list
 ```
 All the recipes that are in that role
 ```
 $ knife role show <role_name>
 ```
 To see the nodes:
 ```
 $ knife node list
 $ knife node show <node_name>
 ```
 To register a brand new machine with chef
 ```
 $ knife bootstrap
 ```
 This will SSH into the machine, download chef package, install chef to `/opt`, and get it running and registered with the chef server, see `~/.chef/knife.rb`,
 Before adding/pushing a change in chef
 ```
 $ knife spork role from file roles/key.json
 $ knife spork omni logstash --remote
 ```
 when logstash version:
 ```
 $ knife spork bump logstash
 ```
 then
 ```
 $ knife spork omni logstash --remote
 ```
--- a/data_science/export_results.py
+++ b/data_science/export_results.py
@ -0,0 +1,45 @@
 #!/usr/bin/env python
 # Need to have argparse installed:
 # $ pip install argparse
 import sys
 import os.path
 import argparse
 from pandas import DataFrame
 def read_data(data):
    lines = data.readlines()
    feature, precision, recall, f1 = [], [], [], []
    for line in lines:
        line_clean = line.strip().split(",")
        feature.append(line_clean[0])
        precision.append(line_clean[1])
        recall.append(line_clean[4])
        f1.append(line_clean[6])
    return feature, precision, recall, f1
 def save_to_spreadsheet(resultfile, data):
    try:
        df = DataFrame({'Feature': data[0], 'Precision': data[1], 'Recall': data[2], 'f1-score': data[3]})
        df.to_csv(resultfile, index=False)
        print("Spreadsheet saved at {0}".format(resultfile))
    except:
        print("Error: {0}".format(sys.exc_info()[0]))
 def menu():
    parser = argparse.ArgumentParser(description='Copy data results into a spreadsheet.')
    parser.add_argument('-s', dest='input', type=argparse.FileType('r'), required=True, help="File with the results.")
    parser.add_argument('-d', dest='output', required=True, help="The name of the file to save the spreadsheet.")
    args = parser.parse_args()
    args.input, args.output
    return args.input, args.output
 if __name__ == "__main__":
    datafile, resultfile = menu()
    data = read_data(datafile)
    save_to_spreadsheet(resultfile, data)
--- a/data_science/runEval.py
+++ b/data_science/runEval.py
@ -0,0 +1,225 @@
 #!/usr/bin/env python
 """
    Run svm_light, parse its stdout, calculate
    ML scores, HDFS copy data to local.
 """
 import sys
 import os
 import getpass
 import subprocess
 import shutil
 import math
 def delete_dir(dir_path):
    '''
        Remove a directory.
        Args:
            dir_path: full path to the directory.
    '''
    if os.path.isdir(dir_path):
        shutil.rmtree(dir_path)
 def usage():
    '''
        Handle the CLI arguments.
    '''
    args = sys.argv
    if len(args) != 3:
        print("Usage: ./runEval <method> <version>")
        sys.exit(2)
    return args[1], args[2]
 def create_dir(dir_path):
    '''
        Create a a directory.
        Args:
            dir_path: full path to the directory.
    '''
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
 def run_svm_classify(test_data, svml_model, svml_eval):
    '''
        Spawn a subprocess to run svm_classify binary.
        From svm_classify.c, svm_light usage requires the following
        arguments: example_file model_file output_file.
        Args:
            test_data: path_to_feature/test.dat
            svml_model: something like ~/data/models/svmlight/method/version/model
            svml_eval: something like ~/data/models/svmlight/method/version/eval
        Returns:
            Strings with stdout and stderr so that it can be parsed later.
    '''
    p = subprocess.Popen(['./models/svm_classify', \
            '{0}'.format(test_data), \
            '{0}'.format(svml_model),\
            '{0}'.format(svml_eval)],\
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
    out, err = p.communicate()
    return out, err
 def paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out):
    '''
        Copy all eval and alpha data from results to local files.
        Args:
            src and dst paths.
    '''
    # Copy all eval data.
    with open(test_data, 'r') as ft:
        test_data = ft.readlines()
    with open(svml_eval, 'r') as fe:
        eval_data = fe.readlines()
    with open(final_eval, 'a') as f:
        for line in test_data:
            f.write('{0}\n'.format(line))
        for line in eval_data:
            f.write('{0}\n'.format(line))
    # Copy all alpha data.
    with open(svml_alpha, 'r') as fa:
        alpha_data = fa.readlines()
    with open(final_alphas, 'a') as f:
        for line in alpha_data:
            f.write('{0}     {1}\n'.format(line, out))
 def parse_svmlight_output(out):
    '''
        Parse the svm_light stdout string for an example
        Returns:
            c: counts
            p: precision
            r: recall
    '''
    c = out.split('OK. (')[1].split(' support')[0]
    pr = out.split('Precision/recall on test set: ')[1].split(' support')[0].strip()
    p, r = pr.split('/')
    p = float(p.strip('%').strip()) / 100
    r = float(r.strip('%').strip()) / 100
    return c, p, r
 def hdfs_copy_data(home_dir, method, version):
    '''
        Run CLI HDFS commands to clean up and save data.
    '''
    os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/scores'.format(method, version))
    os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/alphas'.format(method, version))
    os.system('hdfs dfs -mkdir /data/shared/structdata/modelOutput/{0}/{1}'.format(method, version))
    os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/alphas \
            /data/shared/structdata/modelOutput/{3}/{4}/alphas'.format(home_dir, version, method, method, version))
    os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/eval \
            /data/shared/structdata/modelOutput/{3}/{4}/scores'.format(home_dir, version, method, method, version))
 def calculate_scores(list_of_scores):
    '''
        Calculate the mean of a given list of scores,
        taking care of any nan or 0 division.
    '''
    c, score = 0, 0
    for i in list_of_scores:
        if not math.isnan(i):
            c += 1
            score += i
    if c > 0:
        return score / c
    else:
        return 0
 def calculate_f1(precision, recall):
    '''
        Calculates the f1-score as the harmonic
        mean of precision and recall.
    '''
    if precision + recall < 1:
        return 0
    else:
        return  2 / (1/precision + 1/recall)
 if __name__ == '__main__':
    # Grab the CLI arguments.
    METHOD, VERSION = usage()
    # Setup output dirs.
    home_dir = os.path.join('/home', getpass.getuser())
    final_dir = os.path.join(home_dir, 'data/eval', VERSION, METHOD)
    final_alphas = os.path.join(final_dir, 'alphas')
    final_eval = os.path.join(final_dir, 'eval')
    delete_dir(final_alphas)
    delete_dir(final_eval)
    create_dir(final_dir)
    # Loop over the attributes and features.
    training_data_dir = os.path.join(home_dir, 'data/training_data/', VERSION, METHOD)
    for attribute in os.listdir(training_data_dir):
        attribute_path = os.path.join(training_data_dir, attribute)
        counts = 0
        precision, recall = [], []
        for feature in os.listdir(attribute_path):
            # Create all the paths in use.
            out = os.path.join(VERSION, METHOD, attribute, feature)
            svmlight = os.path.join(home_dir,'data/models/svmlight', out)
            svml_model =  os.path.join(svmlight, 'model')
            svml_eval = os.path.join(svmlight, 'eval')
            svml_alpha = os.path.join(svmlight, 'alphas')
            test_data = os.path.join(attribute_path, feature, 'test.dat')
            # Run svm_classify.
            out, err = run_svm_classify(test_data, svml_model, svml_eval)
            # Save current results.
            paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out)
            # Parse output from svm_classify to print to stdout.
            if err:
                print('Error: {0}'.format(err))
            # Get Train counts, Test counts, Accuracy, Precision, Recall.
            c, p ,r = parse_svmlight_output(out)
            counts += int(c)
            precision.append(p)
            recall.append(r)
        attribute_precision = calculate_scores(precision)
        attribute_recall = calculate_scores(recall)
        attribute_f1 = calculate_f1(attribute_precision, attribute_recall)
        print("{: <20} Counts: {: <20} Precision: {: <20} Recall: {: <20} F1-score: {: <20}".format(attribute.title(), \
            counts, round(attribute_precision, 4), round(attribute_recall, 4), round(attribute_f1, 4)))
    # Copying results from remote hdfs.
    print("\nCopying results to hdfs")
    hdfs_copy_data(home_dir, METHOD, VERSION)
    print("\nDone!".format())
--- a/elk/NICE.md
+++ b/elk/NICE.md
@ -0,0 +1,5 @@
 # Nice ELK hacks
 ```
 curl -s logs.HOST.com:9200/logstash-2017.09.08/_search\?q=ty_params.ProcessName:osqueryd\&size=10000\&sort=@timestamp:desc | jq -r '.hits.hits[]._source.ty_params.Username' | sort | uniq -c | sort -nr
 ```
--- a/elk/grabbing_elk_data.py
+++ b/elk/grabbing_elk_data.py
@ -0,0 +1,47 @@
 import elasticsearch
 import whois
 import json
 from elasticsearch import Elasticsearch
 es = Elasticsearch([{ 'host': "HOST NAME"}])
 query = {
  'size': 100,
  'query': {
    'filtered': {
      'query': {
        'query_string': {
          'query': 'type:named_query_log',
          'analyze_wildcard': True
        }
    },
  'filter': {
    'bool': {
      'must_not': {
        'query_string': {
          'query': '*HOST.com OR *otherhost.com',
          'analyze_wildcard': True
        }
      }
    }
  }
 }}}
 # Make the search
 res = es.search(index="LOG-NAME", body=query)
 results = []
 counter = 0
 # Print out our results
 for hit in res['hits']['hits']:
  if "dns_dest" in hit['_source'].keys():
    try:
      results.append(json.dumps(whois.whois(hit['_source']['dns_dest'])))
    except Exception as e:
      pass
  counter += 1
  print "Scanning {0}/{1} domains, {2} succeeded..".format(counter, len(res['hits']['hits']), len(results))
 with open('processed_domains.txt', 'w') as outfile:
  json.dump(results, outfile)
--- a/elk/set_log.py
+++ b/elk/set_log.py
@ -0,0 +1,49 @@
 #!/usr/bin/env python
 import os
 import subprocess
 import json
 import socket
 import logging
 LOG_PATH = "/var/log/logname.log"
 FORWARD_PATH = "/etc/logstash-forwarder.conf"
 LOG_LEVEL = logging.DEBUG
 # Set up logpath
 if not os.path.isfile(LOG_PATH):
    logging.info("No {0} file. Calling: sudo touch {1}".format(LOG_PATH, LOG_PATH))
    subprocess.call("sudo touch {0}".format(LOG_PATH), shell=True)
    logging.info("Setting perms. Calling: sudo chmod 666 {0}".format(LOG_PATH))
    subprocess.call("sudo chmod 666 {0}".format(LOG_PATH), shell=True)
 # Set up forwarding
 if os.path.isfile(FORWARD_PATH):
    logging.info("Forwarding {0} to logstash...".format(FORWARD_PATH))
    try:
        with open(FORWARD_PATH, "r+") as f:
        data = json.load(jsonFile)
        try:
            if LOG_PATH not in data['files'][0]['paths']:
                data['files'][0]['paths'].append(LOG_PATH)
                jsonFile = open("/etc/logstash-forwarder.conf", "w+")
                jsonFile.write(json.dumps(data))
        except KeyError:
            logging.error("Could not set logstash: {0} is not well formated.".format(FORWARD_PATH))
    except IOError:
        logging.error("Could not open {0}".format(FORWARD_PATH))
 else:
    hostname = socket.gethostname()
    #Search for logstash-forwarder locations per each host
    if "prodvpn" in hostname:
        logging.warning("Forwarder should be in {0}. Please set up a forwarder and try again.".format(FORWARD_PATH))
--- a/shell_scripts/.DS_Store
+++ b/shell_scripts/.DS_Store
--- a/shell_scripts/BASH_USEFUL.md
+++ b/shell_scripts/BASH_USEFUL.md
@ -0,0 +1,15 @@
 * Downloading all pdfs of url
 $ wget --recursive --level=2 --no-directories --no-host-directories --accept pdf
 * Extended attributes of files
 $ xattr -l $file
 * Check signatures of Apps
 $ codesign -dvvv $file.app
 * Show all the configs
 $ system_profiler -detaillevel full
--- a/shell_scripts/super_fun/let_it_snow.sh
+++ b/shell_scripts/super_fun/let_it_snow.sh
--- a/shell_scripts/super_fun/matrix.sh
+++ b/shell_scripts/super_fun/matrix.sh
--- a/shell_scripts/super_fun/rainbow.sh
+++ b/shell_scripts/super_fun/rainbow.sh
--- a/shell_scripts/useful/rsync.sh
+++ b/shell_scripts/useful/rsync.sh
@ -0,0 +1,3 @@
 #!/bin/bash
 VM=vm_dev
 rsync -avz -e ssh <local dir> ${VM}:<remote dir> --exclude-from ~/.rsync-excludes
--- a/shell_scripts/useful/sshfs.sh
+++ b/shell_scripts/useful/sshfs.sh
@ -0,0 +1 @@
 /usr/local/bin/sshfs vm_dev:<remove dir> <mount local dir>
		`@ -0,0 +1 @@`
							`Subproject commit 5ea9eee01bc9f55d93ca3cdcd9e02490a49616c8`
		`@ -0,0 +1 @@`
							`/usr/local/bin/sshfs vm_dev:<remove dir> <mount local dir>`