diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..f1691d3 Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md index aa9dfa1..c4f5bae 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ * ELK shell scripts. * data science quick scripts. * Suricata shell scripts and config files. +* Chef useful commands. 😈 OBVIOUSLY I DON'T INCLUDE SENSITIVE PARTS 😈 diff --git a/Suricata b/Suricata new file mode 160000 index 0000000..5ea9eee --- /dev/null +++ b/Suricata @@ -0,0 +1 @@ +Subproject commit 5ea9eee01bc9f55d93ca3cdcd9e02490a49616c8 diff --git a/chef/CHEF_COMMANDS.md b/chef/CHEF_COMMANDS.md new file mode 100644 index 0000000..55a1264 --- /dev/null +++ b/chef/CHEF_COMMANDS.md @@ -0,0 +1,47 @@ +## Chef Commands + +Roles contain recipes: + +``` +$ knife role list +``` + +All the recipes that are in that role + +``` +$ knife role show +``` + +To see the nodes: + +``` +$ knife node list +$ knife node show +``` + +To register a brand new machine with chef + +``` +$ knife bootstrap +``` +This will SSH into the machine, download chef package, install chef to `/opt`, and get it running and registered with the chef server, see `~/.chef/knife.rb`, + + +Before adding/pushing a change in chef + +``` +$ knife spork role from file roles/key.json +$ knife spork omni logstash --remote +``` + +when logstash version: + +``` +$ knife spork bump logstash +``` + +then + +``` +$ knife spork omni logstash --remote +``` diff --git a/data_science/export_results.py b/data_science/export_results.py new file mode 100755 index 0000000..0976031 --- /dev/null +++ b/data_science/export_results.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# Need to have argparse installed: +# $ pip install argparse + +import sys +import os.path +import argparse +from pandas import DataFrame + + +def read_data(data): + lines = data.readlines() + + feature, precision, recall, f1 = [], [], [], [] + for line in lines: + line_clean = line.strip().split(",") + feature.append(line_clean[0]) + precision.append(line_clean[1]) + recall.append(line_clean[4]) + f1.append(line_clean[6]) + return feature, precision, recall, f1 + + +def save_to_spreadsheet(resultfile, data): + try: + df = DataFrame({'Feature': data[0], 'Precision': data[1], 'Recall': data[2], 'f1-score': data[3]}) + df.to_csv(resultfile, index=False) + print("Spreadsheet saved at {0}".format(resultfile)) + except: + print("Error: {0}".format(sys.exc_info()[0])) + + +def menu(): + parser = argparse.ArgumentParser(description='Copy data results into a spreadsheet.') + parser.add_argument('-s', dest='input', type=argparse.FileType('r'), required=True, help="File with the results.") + parser.add_argument('-d', dest='output', required=True, help="The name of the file to save the spreadsheet.") + args = parser.parse_args() + args.input, args.output + return args.input, args.output + + +if __name__ == "__main__": + datafile, resultfile = menu() + data = read_data(datafile) + save_to_spreadsheet(resultfile, data) diff --git a/data_science/runEval.py b/data_science/runEval.py new file mode 100644 index 0000000..fec0c1a --- /dev/null +++ b/data_science/runEval.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python + +""" + Run svm_light, parse its stdout, calculate + ML scores, HDFS copy data to local. +""" + +import sys +import os +import getpass +import subprocess +import shutil +import math + + +def delete_dir(dir_path): + ''' + Remove a directory. + + Args: + dir_path: full path to the directory. + ''' + if os.path.isdir(dir_path): + shutil.rmtree(dir_path) + + +def usage(): + ''' + Handle the CLI arguments. + ''' + args = sys.argv + if len(args) != 3: + print("Usage: ./runEval ") + sys.exit(2) + return args[1], args[2] + + +def create_dir(dir_path): + ''' + Create a a directory. + + Args: + dir_path: full path to the directory. + ''' + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + +def run_svm_classify(test_data, svml_model, svml_eval): + ''' + Spawn a subprocess to run svm_classify binary. + + From svm_classify.c, svm_light usage requires the following + arguments: example_file model_file output_file. + + Args: + test_data: path_to_feature/test.dat + svml_model: something like ~/data/models/svmlight/method/version/model + svml_eval: something like ~/data/models/svmlight/method/version/eval + + Returns: + Strings with stdout and stderr so that it can be parsed later. + ''' + p = subprocess.Popen(['./models/svm_classify', \ + '{0}'.format(test_data), \ + '{0}'.format(svml_model),\ + '{0}'.format(svml_eval)],\ + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate() + return out, err + + +def paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out): + ''' + Copy all eval and alpha data from results to local files. + + Args: + src and dst paths. + ''' + # Copy all eval data. + with open(test_data, 'r') as ft: + test_data = ft.readlines() + + with open(svml_eval, 'r') as fe: + eval_data = fe.readlines() + + with open(final_eval, 'a') as f: + for line in test_data: + f.write('{0}\n'.format(line)) + for line in eval_data: + f.write('{0}\n'.format(line)) + + # Copy all alpha data. + with open(svml_alpha, 'r') as fa: + alpha_data = fa.readlines() + + with open(final_alphas, 'a') as f: + for line in alpha_data: + f.write('{0} {1}\n'.format(line, out)) + + +def parse_svmlight_output(out): + ''' + Parse the svm_light stdout string for an example + + Returns: + c: counts + p: precision + r: recall + ''' + c = out.split('OK. (')[1].split(' support')[0] + pr = out.split('Precision/recall on test set: ')[1].split(' support')[0].strip() + p, r = pr.split('/') + p = float(p.strip('%').strip()) / 100 + r = float(r.strip('%').strip()) / 100 + + return c, p, r + + +def hdfs_copy_data(home_dir, method, version): + ''' + Run CLI HDFS commands to clean up and save data. + ''' + os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/scores'.format(method, version)) + os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/alphas'.format(method, version)) + + os.system('hdfs dfs -mkdir /data/shared/structdata/modelOutput/{0}/{1}'.format(method, version)) + + os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/alphas \ + /data/shared/structdata/modelOutput/{3}/{4}/alphas'.format(home_dir, version, method, method, version)) + + os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/eval \ + /data/shared/structdata/modelOutput/{3}/{4}/scores'.format(home_dir, version, method, method, version)) + +def calculate_scores(list_of_scores): + ''' + Calculate the mean of a given list of scores, + taking care of any nan or 0 division. + ''' + c, score = 0, 0 + for i in list_of_scores: + if not math.isnan(i): + c += 1 + score += i + if c > 0: + return score / c + else: + return 0 + + +def calculate_f1(precision, recall): + ''' + Calculates the f1-score as the harmonic + mean of precision and recall. + ''' + if precision + recall < 1: + return 0 + else: + return 2 / (1/precision + 1/recall) + + +if __name__ == '__main__': + + # Grab the CLI arguments. + METHOD, VERSION = usage() + + # Setup output dirs. + home_dir = os.path.join('/home', getpass.getuser()) + final_dir = os.path.join(home_dir, 'data/eval', VERSION, METHOD) + final_alphas = os.path.join(final_dir, 'alphas') + final_eval = os.path.join(final_dir, 'eval') + + delete_dir(final_alphas) + delete_dir(final_eval) + create_dir(final_dir) + + # Loop over the attributes and features. + training_data_dir = os.path.join(home_dir, 'data/training_data/', VERSION, METHOD) + + for attribute in os.listdir(training_data_dir): + + attribute_path = os.path.join(training_data_dir, attribute) + counts = 0 + precision, recall = [], [] + + for feature in os.listdir(attribute_path): + + # Create all the paths in use. + out = os.path.join(VERSION, METHOD, attribute, feature) + svmlight = os.path.join(home_dir,'data/models/svmlight', out) + svml_model = os.path.join(svmlight, 'model') + svml_eval = os.path.join(svmlight, 'eval') + svml_alpha = os.path.join(svmlight, 'alphas') + test_data = os.path.join(attribute_path, feature, 'test.dat') + + # Run svm_classify. + out, err = run_svm_classify(test_data, svml_model, svml_eval) + + # Save current results. + paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out) + + # Parse output from svm_classify to print to stdout. + if err: + print('Error: {0}'.format(err)) + + # Get Train counts, Test counts, Accuracy, Precision, Recall. + c, p ,r = parse_svmlight_output(out) + + counts += int(c) + precision.append(p) + recall.append(r) + + attribute_precision = calculate_scores(precision) + attribute_recall = calculate_scores(recall) + attribute_f1 = calculate_f1(attribute_precision, attribute_recall) + + print("{: <20} Counts: {: <20} Precision: {: <20} Recall: {: <20} F1-score: {: <20}".format(attribute.title(), \ + counts, round(attribute_precision, 4), round(attribute_recall, 4), round(attribute_f1, 4))) + + + # Copying results from remote hdfs. + print("\nCopying results to hdfs") + hdfs_copy_data(home_dir, METHOD, VERSION) + print("\nDone!".format()) diff --git a/elk/NICE.md b/elk/NICE.md new file mode 100644 index 0000000..69a66e7 --- /dev/null +++ b/elk/NICE.md @@ -0,0 +1,5 @@ +# Nice ELK hacks + +``` +curl -s logs.HOST.com:9200/logstash-2017.09.08/_search\?q=ty_params.ProcessName:osqueryd\&size=10000\&sort=@timestamp:desc | jq -r '.hits.hits[]._source.ty_params.Username' | sort | uniq -c | sort -nr +``` diff --git a/elk/grabbing_elk_data.py b/elk/grabbing_elk_data.py new file mode 100644 index 0000000..d805c36 --- /dev/null +++ b/elk/grabbing_elk_data.py @@ -0,0 +1,47 @@ +import elasticsearch +import whois +import json + +from elasticsearch import Elasticsearch + +es = Elasticsearch([{ 'host': "HOST NAME"}]) + +query = { + 'size': 100, + 'query': { + 'filtered': { + 'query': { + 'query_string': { + 'query': 'type:named_query_log', + 'analyze_wildcard': True + } + }, + 'filter': { + 'bool': { + 'must_not': { + 'query_string': { + 'query': '*HOST.com OR *otherhost.com', + 'analyze_wildcard': True + } + } + } + } +}}} + +# Make the search +res = es.search(index="LOG-NAME", body=query) + +results = [] +counter = 0 +# Print out our results +for hit in res['hits']['hits']: + if "dns_dest" in hit['_source'].keys(): + try: + results.append(json.dumps(whois.whois(hit['_source']['dns_dest']))) + except Exception as e: + pass + counter += 1 + print "Scanning {0}/{1} domains, {2} succeeded..".format(counter, len(res['hits']['hits']), len(results)) + +with open('processed_domains.txt', 'w') as outfile: + json.dump(results, outfile) \ No newline at end of file diff --git a/elk/set_log.py b/elk/set_log.py new file mode 100644 index 0000000..8ca254b --- /dev/null +++ b/elk/set_log.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +import os +import subprocess +import json +import socket +import logging + +LOG_PATH = "/var/log/logname.log" +FORWARD_PATH = "/etc/logstash-forwarder.conf" +LOG_LEVEL = logging.DEBUG + + +# Set up logpath +if not os.path.isfile(LOG_PATH): + + logging.info("No {0} file. Calling: sudo touch {1}".format(LOG_PATH, LOG_PATH)) + subprocess.call("sudo touch {0}".format(LOG_PATH), shell=True) + + logging.info("Setting perms. Calling: sudo chmod 666 {0}".format(LOG_PATH)) + subprocess.call("sudo chmod 666 {0}".format(LOG_PATH), shell=True) + +# Set up forwarding +if os.path.isfile(FORWARD_PATH): + + logging.info("Forwarding {0} to logstash...".format(FORWARD_PATH)) + try: + with open(FORWARD_PATH, "r+") as f: + data = json.load(jsonFile) + + try: + if LOG_PATH not in data['files'][0]['paths']: + data['files'][0]['paths'].append(LOG_PATH) + jsonFile = open("/etc/logstash-forwarder.conf", "w+") + jsonFile.write(json.dumps(data)) + + except KeyError: + logging.error("Could not set logstash: {0} is not well formated.".format(FORWARD_PATH)) + + except IOError: + logging.error("Could not open {0}".format(FORWARD_PATH)) + +else: + hostname = socket.gethostname() + + #Search for logstash-forwarder locations per each host + if "prodvpn" in hostname: + logging.warning("Forwarder should be in {0}. Please set up a forwarder and try again.".format(FORWARD_PATH)) + diff --git a/shell_scripts/.DS_Store b/shell_scripts/.DS_Store new file mode 100644 index 0000000..e4b1645 Binary files /dev/null and b/shell_scripts/.DS_Store differ diff --git a/shell_scripts/BASH_USEFUL.md b/shell_scripts/BASH_USEFUL.md new file mode 100644 index 0000000..df780a2 --- /dev/null +++ b/shell_scripts/BASH_USEFUL.md @@ -0,0 +1,15 @@ +* Downloading all pdfs of url + +$ wget --recursive --level=2 --no-directories --no-host-directories --accept pdf + + +* Extended attributes of files +$ xattr -l $file + + +* Check signatures of Apps +$ codesign -dvvv $file.app + + +* Show all the configs +$ system_profiler -detaillevel full \ No newline at end of file diff --git a/shell_scripts/super_fun/let_it_snow.sh b/shell_scripts/funny/let_it_snow.sh similarity index 100% rename from shell_scripts/super_fun/let_it_snow.sh rename to shell_scripts/funny/let_it_snow.sh diff --git a/shell_scripts/super_fun/matrix.sh b/shell_scripts/funny/matrix.sh similarity index 100% rename from shell_scripts/super_fun/matrix.sh rename to shell_scripts/funny/matrix.sh diff --git a/shell_scripts/super_fun/rainbow.sh b/shell_scripts/funny/rainbow.sh similarity index 100% rename from shell_scripts/super_fun/rainbow.sh rename to shell_scripts/funny/rainbow.sh diff --git a/shell_scripts/useful/rsync.sh b/shell_scripts/useful/rsync.sh new file mode 100755 index 0000000..375294f --- /dev/null +++ b/shell_scripts/useful/rsync.sh @@ -0,0 +1,3 @@ +#!/bin/bash +VM=vm_dev +rsync -avz -e ssh ${VM}: --exclude-from ~/.rsync-excludes diff --git a/shell_scripts/useful/sshfs.sh b/shell_scripts/useful/sshfs.sh new file mode 100755 index 0000000..95a415e --- /dev/null +++ b/shell_scripts/useful/sshfs.sh @@ -0,0 +1 @@ +/usr/local/bin/sshfs vm_dev: