Add a couple of cool things

This commit is contained in:
bt3 2018-06-14 22:24:08 -07:00
parent ba37a20b2c
commit 98bd1fe12f
16 changed files with 439 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View file

@ -6,6 +6,7 @@
* ELK shell scripts. * ELK shell scripts.
* data science quick scripts. * data science quick scripts.
* Suricata shell scripts and config files. * Suricata shell scripts and config files.
* Chef useful commands.
😈 OBVIOUSLY I DON'T INCLUDE SENSITIVE PARTS 😈 😈 OBVIOUSLY I DON'T INCLUDE SENSITIVE PARTS 😈

1
Suricata Submodule

@ -0,0 +1 @@
Subproject commit 5ea9eee01bc9f55d93ca3cdcd9e02490a49616c8

47
chef/CHEF_COMMANDS.md Normal file
View file

@ -0,0 +1,47 @@
## Chef Commands
Roles contain recipes:
```
$ knife role list
```
All the recipes that are in that role
```
$ knife role show <role_name>
```
To see the nodes:
```
$ knife node list
$ knife node show <node_name>
```
To register a brand new machine with chef
```
$ knife bootstrap
```
This will SSH into the machine, download chef package, install chef to `/opt`, and get it running and registered with the chef server, see `~/.chef/knife.rb`,
Before adding/pushing a change in chef
```
$ knife spork role from file roles/key.json
$ knife spork omni logstash --remote
```
when logstash version:
```
$ knife spork bump logstash
```
then
```
$ knife spork omni logstash --remote
```

45
data_science/export_results.py Executable file
View file

@ -0,0 +1,45 @@
#!/usr/bin/env python
# Need to have argparse installed:
# $ pip install argparse
import sys
import os.path
import argparse
from pandas import DataFrame
def read_data(data):
lines = data.readlines()
feature, precision, recall, f1 = [], [], [], []
for line in lines:
line_clean = line.strip().split(",")
feature.append(line_clean[0])
precision.append(line_clean[1])
recall.append(line_clean[4])
f1.append(line_clean[6])
return feature, precision, recall, f1
def save_to_spreadsheet(resultfile, data):
try:
df = DataFrame({'Feature': data[0], 'Precision': data[1], 'Recall': data[2], 'f1-score': data[3]})
df.to_csv(resultfile, index=False)
print("Spreadsheet saved at {0}".format(resultfile))
except:
print("Error: {0}".format(sys.exc_info()[0]))
def menu():
parser = argparse.ArgumentParser(description='Copy data results into a spreadsheet.')
parser.add_argument('-s', dest='input', type=argparse.FileType('r'), required=True, help="File with the results.")
parser.add_argument('-d', dest='output', required=True, help="The name of the file to save the spreadsheet.")
args = parser.parse_args()
args.input, args.output
return args.input, args.output
if __name__ == "__main__":
datafile, resultfile = menu()
data = read_data(datafile)
save_to_spreadsheet(resultfile, data)

225
data_science/runEval.py Normal file
View file

@ -0,0 +1,225 @@
#!/usr/bin/env python
"""
Run svm_light, parse its stdout, calculate
ML scores, HDFS copy data to local.
"""
import sys
import os
import getpass
import subprocess
import shutil
import math
def delete_dir(dir_path):
'''
Remove a directory.
Args:
dir_path: full path to the directory.
'''
if os.path.isdir(dir_path):
shutil.rmtree(dir_path)
def usage():
'''
Handle the CLI arguments.
'''
args = sys.argv
if len(args) != 3:
print("Usage: ./runEval <method> <version>")
sys.exit(2)
return args[1], args[2]
def create_dir(dir_path):
'''
Create a a directory.
Args:
dir_path: full path to the directory.
'''
if not os.path.exists(dir_path):
os.makedirs(dir_path)
def run_svm_classify(test_data, svml_model, svml_eval):
'''
Spawn a subprocess to run svm_classify binary.
From svm_classify.c, svm_light usage requires the following
arguments: example_file model_file output_file.
Args:
test_data: path_to_feature/test.dat
svml_model: something like ~/data/models/svmlight/method/version/model
svml_eval: something like ~/data/models/svmlight/method/version/eval
Returns:
Strings with stdout and stderr so that it can be parsed later.
'''
p = subprocess.Popen(['./models/svm_classify', \
'{0}'.format(test_data), \
'{0}'.format(svml_model),\
'{0}'.format(svml_eval)],\
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate()
return out, err
def paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out):
'''
Copy all eval and alpha data from results to local files.
Args:
src and dst paths.
'''
# Copy all eval data.
with open(test_data, 'r') as ft:
test_data = ft.readlines()
with open(svml_eval, 'r') as fe:
eval_data = fe.readlines()
with open(final_eval, 'a') as f:
for line in test_data:
f.write('{0}\n'.format(line))
for line in eval_data:
f.write('{0}\n'.format(line))
# Copy all alpha data.
with open(svml_alpha, 'r') as fa:
alpha_data = fa.readlines()
with open(final_alphas, 'a') as f:
for line in alpha_data:
f.write('{0} {1}\n'.format(line, out))
def parse_svmlight_output(out):
'''
Parse the svm_light stdout string for an example
Returns:
c: counts
p: precision
r: recall
'''
c = out.split('OK. (')[1].split(' support')[0]
pr = out.split('Precision/recall on test set: ')[1].split(' support')[0].strip()
p, r = pr.split('/')
p = float(p.strip('%').strip()) / 100
r = float(r.strip('%').strip()) / 100
return c, p, r
def hdfs_copy_data(home_dir, method, version):
'''
Run CLI HDFS commands to clean up and save data.
'''
os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/scores'.format(method, version))
os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/alphas'.format(method, version))
os.system('hdfs dfs -mkdir /data/shared/structdata/modelOutput/{0}/{1}'.format(method, version))
os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/alphas \
/data/shared/structdata/modelOutput/{3}/{4}/alphas'.format(home_dir, version, method, method, version))
os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/eval \
/data/shared/structdata/modelOutput/{3}/{4}/scores'.format(home_dir, version, method, method, version))
def calculate_scores(list_of_scores):
'''
Calculate the mean of a given list of scores,
taking care of any nan or 0 division.
'''
c, score = 0, 0
for i in list_of_scores:
if not math.isnan(i):
c += 1
score += i
if c > 0:
return score / c
else:
return 0
def calculate_f1(precision, recall):
'''
Calculates the f1-score as the harmonic
mean of precision and recall.
'''
if precision + recall < 1:
return 0
else:
return 2 / (1/precision + 1/recall)
if __name__ == '__main__':
# Grab the CLI arguments.
METHOD, VERSION = usage()
# Setup output dirs.
home_dir = os.path.join('/home', getpass.getuser())
final_dir = os.path.join(home_dir, 'data/eval', VERSION, METHOD)
final_alphas = os.path.join(final_dir, 'alphas')
final_eval = os.path.join(final_dir, 'eval')
delete_dir(final_alphas)
delete_dir(final_eval)
create_dir(final_dir)
# Loop over the attributes and features.
training_data_dir = os.path.join(home_dir, 'data/training_data/', VERSION, METHOD)
for attribute in os.listdir(training_data_dir):
attribute_path = os.path.join(training_data_dir, attribute)
counts = 0
precision, recall = [], []
for feature in os.listdir(attribute_path):
# Create all the paths in use.
out = os.path.join(VERSION, METHOD, attribute, feature)
svmlight = os.path.join(home_dir,'data/models/svmlight', out)
svml_model = os.path.join(svmlight, 'model')
svml_eval = os.path.join(svmlight, 'eval')
svml_alpha = os.path.join(svmlight, 'alphas')
test_data = os.path.join(attribute_path, feature, 'test.dat')
# Run svm_classify.
out, err = run_svm_classify(test_data, svml_model, svml_eval)
# Save current results.
paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out)
# Parse output from svm_classify to print to stdout.
if err:
print('Error: {0}'.format(err))
# Get Train counts, Test counts, Accuracy, Precision, Recall.
c, p ,r = parse_svmlight_output(out)
counts += int(c)
precision.append(p)
recall.append(r)
attribute_precision = calculate_scores(precision)
attribute_recall = calculate_scores(recall)
attribute_f1 = calculate_f1(attribute_precision, attribute_recall)
print("{: <20} Counts: {: <20} Precision: {: <20} Recall: {: <20} F1-score: {: <20}".format(attribute.title(), \
counts, round(attribute_precision, 4), round(attribute_recall, 4), round(attribute_f1, 4)))
# Copying results from remote hdfs.
print("\nCopying results to hdfs")
hdfs_copy_data(home_dir, METHOD, VERSION)
print("\nDone!".format())

5
elk/NICE.md Normal file
View file

@ -0,0 +1,5 @@
# Nice ELK hacks
```
curl -s logs.HOST.com:9200/logstash-2017.09.08/_search\?q=ty_params.ProcessName:osqueryd\&size=10000\&sort=@timestamp:desc | jq -r '.hits.hits[]._source.ty_params.Username' | sort | uniq -c | sort -nr
```

47
elk/grabbing_elk_data.py Normal file
View file

@ -0,0 +1,47 @@
import elasticsearch
import whois
import json
from elasticsearch import Elasticsearch
es = Elasticsearch([{ 'host': "HOST NAME"}])
query = {
'size': 100,
'query': {
'filtered': {
'query': {
'query_string': {
'query': 'type:named_query_log',
'analyze_wildcard': True
}
},
'filter': {
'bool': {
'must_not': {
'query_string': {
'query': '*HOST.com OR *otherhost.com',
'analyze_wildcard': True
}
}
}
}
}}}
# Make the search
res = es.search(index="LOG-NAME", body=query)
results = []
counter = 0
# Print out our results
for hit in res['hits']['hits']:
if "dns_dest" in hit['_source'].keys():
try:
results.append(json.dumps(whois.whois(hit['_source']['dns_dest'])))
except Exception as e:
pass
counter += 1
print "Scanning {0}/{1} domains, {2} succeeded..".format(counter, len(res['hits']['hits']), len(results))
with open('processed_domains.txt', 'w') as outfile:
json.dump(results, outfile)

49
elk/set_log.py Normal file
View file

@ -0,0 +1,49 @@
#!/usr/bin/env python
import os
import subprocess
import json
import socket
import logging
LOG_PATH = "/var/log/logname.log"
FORWARD_PATH = "/etc/logstash-forwarder.conf"
LOG_LEVEL = logging.DEBUG
# Set up logpath
if not os.path.isfile(LOG_PATH):
logging.info("No {0} file. Calling: sudo touch {1}".format(LOG_PATH, LOG_PATH))
subprocess.call("sudo touch {0}".format(LOG_PATH), shell=True)
logging.info("Setting perms. Calling: sudo chmod 666 {0}".format(LOG_PATH))
subprocess.call("sudo chmod 666 {0}".format(LOG_PATH), shell=True)
# Set up forwarding
if os.path.isfile(FORWARD_PATH):
logging.info("Forwarding {0} to logstash...".format(FORWARD_PATH))
try:
with open(FORWARD_PATH, "r+") as f:
data = json.load(jsonFile)
try:
if LOG_PATH not in data['files'][0]['paths']:
data['files'][0]['paths'].append(LOG_PATH)
jsonFile = open("/etc/logstash-forwarder.conf", "w+")
jsonFile.write(json.dumps(data))
except KeyError:
logging.error("Could not set logstash: {0} is not well formated.".format(FORWARD_PATH))
except IOError:
logging.error("Could not open {0}".format(FORWARD_PATH))
else:
hostname = socket.gethostname()
#Search for logstash-forwarder locations per each host
if "prodvpn" in hostname:
logging.warning("Forwarder should be in {0}. Please set up a forwarder and try again.".format(FORWARD_PATH))

BIN
shell_scripts/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,15 @@
* Downloading all pdfs of url
$ wget --recursive --level=2 --no-directories --no-host-directories --accept pdf
* Extended attributes of files
$ xattr -l $file
* Check signatures of Apps
$ codesign -dvvv $file.app
* Show all the configs
$ system_profiler -detaillevel full

3
shell_scripts/useful/rsync.sh Executable file
View file

@ -0,0 +1,3 @@
#!/bin/bash
VM=vm_dev
rsync -avz -e ssh <local dir> ${VM}:<remote dir> --exclude-from ~/.rsync-excludes

1
shell_scripts/useful/sshfs.sh Executable file
View file

@ -0,0 +1 @@
/usr/local/bin/sshfs vm_dev:<remove dir> <mount local dir>