add scripts from old box

2025-12-15 08:09:02 -05:00 · 2019-02-20 11:37:28 -08:00 · 2019-02-20 11:37:28 -08:00 · df6379f72d
commit df6379f72d
parent b05e40158b
11 changed files with 471 additions and 6 deletions
--- a/configs/tmux.conf
+++ b/configs/tmux.conf
@ -1,8 +1,3 @@
-# Change prefix key to Ctrl+p
-# unbind C-b
-# set -g prefix C-p
-
-
 # More straight forward key bindings for splitting
 unbind %
 bind | split-window -h
@ -45,14 +40,19 @@ set-option -g default-terminal screen-256color
 # Status bar has a dim gray background
 set-option -g status-bg colour234
 set-option -g status-fg colour74
+
 # Left shows the session name, in blue
 set-option -g status-left-bg default
 set-option -g status-left-fg colour74
+
 # Right is some CPU stats, so terminal green
 set-option -g status-right-bg default
 set-option -g status-right-fg colour71
+set -g status-right "Impossible is a State of Mind"
+
 # Highlighting the active window in status bar
 setw -g window-status-current-bg colour234
 setw -g window-status-current-fg colour71

 new-session
+
--- a/data_science/export_csv_reports.py
+++ b/data_science/export_csv_reports.py
@ -0,0 +1,51 @@
+#!/usr/bin/env python
+"""
+    Export data in a CSV spreadsheet.
+
+    Marina von Steinkirch - 2017
+
+    Need to have argparse installed:
+    $ pip install argparse
+"""
+
+import sys
+import argparse
+from pandas import DataFrame
+
+
+def read_data(data):
+    lines = data.readlines()
+
+    feature, precision, recall, f1 = [], [], [], []
+    for line in lines:
+        line_clean = line.strip().split(",")
+        feature.append(line_clean[0])
+        precision.append(line_clean[1])
+        recall.append(line_clean[4])
+        f1.append(line_clean[6])
+    return feature, precision, recall, f1
+
+
+def save_to_spreadsheet(resultfile, data):
+    try:
+        df = DataFrame({'Feature': data[0], 'Precision': data[1], 'Recall': data[2], 'f1-score': data[3]})
+        df.to_csv(resultfile, index=False)
+        print("Spreadsheet saved at {0}".format(resultfile))
+    except:
+        print("Error: {0}".format(sys.exc_info()[0]))
+
+
+def menu():
+    parser = argparse.ArgumentParser(description='Copy data results into a spreadsheet.')
+    parser.add_argument('-s', dest='input', type=argparse.FileType('r'), required=True, help="File with the results.")
+    parser.add_argument('-d', dest='output', required=True, help="The name of the file to save the spreadsheet.")
+    args = parser.parse_args()
+    args.input, args.output
+    return args.input, args.output
+
+
+if __name__ == "__main__":
+    datafile, resultfile = menu()
+    data = read_data(datafile)
+    save_to_spreadsheet(resultfile, data)
+
--- a/data_science/export_results.py
+++ b/data_science/export_results.py
@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# Need to have argparse installed:
+# $ pip install argparse
+
+import sys
+import os.path
+import argparse
+from pandas import DataFrame
+
+
+def read_data(data):
+    lines = data.readlines()
+
+    feature, precision, recall, f1 = [], [], [], []
+    for line in lines:
+        line_clean = line.strip().split(",")
+        feature.append(line_clean[0])
+        precision.append(line_clean[1])
+        recall.append(line_clean[4])
+        f1.append(line_clean[6])
+    return feature, precision, recall, f1
+
+
+def save_to_spreadsheet(resultfile, data):
+    try:
+        df = DataFrame({'Feature': data[0], 'Precision': data[1], 'Recall': data[2], 'f1-score': data[3]})
+        df.to_csv(resultfile, index=False)
+        print("Spreadsheet saved at {0}".format(resultfile))
+    except:
+        print("Error: {0}".format(sys.exc_info()[0]))
+
+
+def menu():
+    parser = argparse.ArgumentParser(description='Copy data results into a spreadsheet.')
+    parser.add_argument('-s', dest='input', type=argparse.FileType('r'), required=True, help="File with the results.")
+    parser.add_argument('-d', dest='output', required=True, help="The name of the file to save the spreadsheet.")
+    args = parser.parse_args()
+    args.input, args.output
+    return args.input, args.output
+
+
+if __name__ == "__main__":
+    datafile, resultfile = menu()
+    data = read_data(datafile)
+    save_to_spreadsheet(resultfile, data)
+
--- a/data_science/run_eval.py
+++ b/data_science/run_eval.py
@ -0,0 +1,226 @@
+#!/usr/bin/env python
+
+"""
+    Run svm_light, parse its stdout, calculate
+    ML scores, HDFS copy data to local.
+"""
+
+import sys
+import os
+import getpass
+import subprocess
+import shutil
+import math
+
+
+def delete_dir(dir_path):
+    '''
+        Remove a directory.
+
+        Args:
+            dir_path: full path to the directory.
+    '''
+    if os.path.isdir(dir_path):
+        shutil.rmtree(dir_path)
+
+
+def usage():
+    '''
+        Handle the CLI arguments.
+    '''
+    args = sys.argv
+    if len(args) != 3:
+        print("Usage: ./runEval <method> <version>")
+        sys.exit(2)
+    return args[1], args[2]
+
+
+def create_dir(dir_path):
+    '''
+        Create a a directory.
+
+        Args:
+            dir_path: full path to the directory.
+    '''
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+
+
+def run_svm_classify(test_data, svml_model, svml_eval):
+    '''
+        Spawn a subprocess to run svm_classify binary.
+
+        From svm_classify.c, svm_light usage requires the following
+        arguments: example_file model_file output_file.
+
+        Args:
+            test_data: path_to_feature/test.dat
+            svml_model: something like ~/data/models/svmlight/method/version/model
+            svml_eval: something like ~/data/models/svmlight/method/version/eval
+
+        Returns:
+            Strings with stdout and stderr so that it can be parsed later.
+    '''
+    p = subprocess.Popen(['./models/svm_classify', \
+            '{0}'.format(test_data), \
+            '{0}'.format(svml_model),\
+            '{0}'.format(svml_eval)],\
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE)
+    out, err = p.communicate()
+    return out, err
+
+
+def paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out):
+    '''
+        Copy all eval and alpha data from results to local files.
+
+        Args:
+            src and dst paths.
+    '''
+    # Copy all eval data.
+    with open(test_data, 'r') as ft:
+        test_data = ft.readlines()
+
+    with open(svml_eval, 'r') as fe:
+        eval_data = fe.readlines()
+
+    with open(final_eval, 'a') as f:
+        for line in test_data:
+            f.write('{0}\n'.format(line))
+        for line in eval_data:
+            f.write('{0}\n'.format(line))
+
+    # Copy all alpha data.
+    with open(svml_alpha, 'r') as fa:
+        alpha_data = fa.readlines()
+
+    with open(final_alphas, 'a') as f:
+        for line in alpha_data:
+            f.write('{0}     {1}\n'.format(line, out))
+
+
+def parse_svmlight_output(out):
+    '''
+        Parse the svm_light stdout string for an example
+
+        Returns:
+            c: counts
+            p: precision
+            r: recall
+    '''
+    c = out.split('OK. (')[1].split(' support')[0]
+    pr = out.split('Precision/recall on test set: ')[1].split(' support')[0].strip()
+    p, r = pr.split('/')
+    p = float(p.strip('%').strip()) / 100
+    r = float(r.strip('%').strip()) / 100
+
+    return c, p, r
+
+
+def hdfs_copy_data(home_dir, method, version):
+    '''
+        Run CLI HDFS commands to clean up and save data.
+    '''
+    os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/scores'.format(method, version))
+    os.system('hdfs dfs -rm /data/shared/structdata/modelOutput/{0}/{1}/alphas'.format(method, version))
+
+    os.system('hdfs dfs -mkdir /data/shared/structdata/modelOutput/{0}/{1}'.format(method, version))
+
+    os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/alphas \
+            /data/shared/structdata/modelOutput/{3}/{4}/alphas'.format(home_dir, version, method, method, version))
+
+    os.system('hdfs dfs -copyFromLocal {0}/data/eval/{1}/{2}/eval \
+            /data/shared/structdata/modelOutput/{3}/{4}/scores'.format(home_dir, version, method, method, version))
+
+def calculate_scores(list_of_scores):
+    '''
+        Calculate the mean of a given list of scores,
+        taking care of any nan or 0 division.
+    '''
+    c, score = 0, 0
+    for i in list_of_scores:
+        if not math.isnan(i):
+            c += 1
+            score += i
+    if c > 0:
+        return score / c
+    else:
+        return 0
+
+
+def calculate_f1(precision, recall):
+    '''
+        Calculates the f1-score as the harmonic
+        mean of precision and recall.
+    '''
+    if precision + recall < 1:
+        return 0
+    else:
+        return  2 / (1/precision + 1/recall)
+
+
+if __name__ == '__main__':
+
+    # Grab the CLI arguments.
+    METHOD, VERSION = usage()
+
+    # Setup output dirs.
+    home_dir = os.path.join('/home', getpass.getuser())
+    final_dir = os.path.join(home_dir, 'data/eval', VERSION, METHOD)
+    final_alphas = os.path.join(final_dir, 'alphas')
+    final_eval = os.path.join(final_dir, 'eval')
+
+    delete_dir(final_alphas)
+    delete_dir(final_eval)
+    create_dir(final_dir)
+
+    # Loop over the attributes and features.
+    training_data_dir = os.path.join(home_dir, 'data/training_data/', VERSION, METHOD)
+
+    for attribute in os.listdir(training_data_dir):
+
+        attribute_path = os.path.join(training_data_dir, attribute)
+        counts = 0
+        precision, recall = [], []
+
+        for feature in os.listdir(attribute_path):
+
+            # Create all the paths in use.
+            out = os.path.join(VERSION, METHOD, attribute, feature)
+            svmlight = os.path.join(home_dir,'data/models/svmlight', out)
+            svml_model =  os.path.join(svmlight, 'model')
+            svml_eval = os.path.join(svmlight, 'eval')
+            svml_alpha = os.path.join(svmlight, 'alphas')
+            test_data = os.path.join(attribute_path, feature, 'test.dat')
+
+            # Run svm_classify.
+            out, err = run_svm_classify(test_data, svml_model, svml_eval)
+
+            # Save current results.
+            paste_data(test_data, svml_eval, final_eval, svml_alpha, final_alphas, out)
+
+            # Parse output from svm_classify to print to stdout.
+            if err:
+                print('Error: {0}'.format(err))
+
+            # Get Train counts, Test counts, Accuracy, Precision, Recall.
+            c, p ,r = parse_svmlight_output(out)
+
+            counts += int(c)
+            precision.append(p)
+            recall.append(r)
+
+        attribute_precision = calculate_scores(precision)
+        attribute_recall = calculate_scores(recall)
+        attribute_f1 = calculate_f1(attribute_precision, attribute_recall)
+
+        print("{: <20} Counts: {: <20} Precision: {: <20} Recall: {: <20} F1-score: {: <20}".format(attribute.title(), \
+            counts, round(attribute_precision, 4), round(attribute_recall, 4), round(attribute_f1, 4)))
+
+
+    # Copying results from remote hdfs.
+    print("\nCopying results to hdfs")
+    hdfs_copy_data(home_dir, METHOD, VERSION)
+    print("\nDone!".format())
+
--- a/elasticsearch/README.md
+++ b/elasticsearch/README.md
@ -0,0 +1,6 @@
+# Elastalert hacks
+
+```
+curl -s logs.HOST.com:9200/logstash-2017.09.08/_search\?q=ty_params.ProcessName:osqueryd\&size=10000\&sort=@timestamp:desc | jq -r '.hits.hits[]._source.ty_params.Username' | sort | uniq -c | sort -nr
+```
+
--- a/elasticsearch/grabbing_es_data.py
+++ b/elasticsearch/grabbing_es_data.py
@ -0,0 +1,48 @@
+import elasticsearch
+import whois
+import json
+
+from elasticsearch import Elasticsearch
+
+es = Elasticsearch([{ 'host': "HOST NAME"}])
+
+query = {
+  'size': 100,
+  'query': {
+    'filtered': {
+      'query': {
+        'query_string': {
+          'query': 'type:named_query_log',
+          'analyze_wildcard': True
+        }
+    },
+  'filter': {
+    'bool': {
+      'must_not': {
+        'query_string': {
+          'query': '*HOST.com OR *otherhost.com',
+          'analyze_wildcard': True
+        }
+      }
+    }
+  }
+}}}
+
+# Make the search
+res = es.search(index="LOG-NAME", body=query)
+
+results = []
+counter = 0
+# Print out our results
+for hit in res['hits']['hits']:
+  if "dns_dest" in hit['_source'].keys():
+    try:
+      results.append(json.dumps(whois.whois(hit['_source']['dns_dest'])))
+    except Exception as e:
+      pass
+  counter += 1
+  print "Scanning {0}/{1} domains, {2} succeeded..".format(counter, len(res['hits']['hits']), len(results))
+
+with open('processed_domains.txt', 'w') as outfile:
+  json.dump(results, outfile)
+
--- a/elasticsearch/set_log.py
+++ b/elasticsearch/set_log.py
@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+import json
+import socket
+import logging
+
+LOG_PATH = "/var/log/logname.log"
+FORWARD_PATH = "/etc/logstash-forwarder.conf"
+LOG_LEVEL = logging.DEBUG
+
+
+# Set up logpath
+if not os.path.isfile(LOG_PATH):
+
+    logging.info("No {0} file. Calling: sudo touch {1}".format(LOG_PATH, LOG_PATH))
+    subprocess.call("sudo touch {0}".format(LOG_PATH), shell=True)
+
+    logging.info("Setting perms. Calling: sudo chmod 666 {0}".format(LOG_PATH))
+    subprocess.call("sudo chmod 666 {0}".format(LOG_PATH), shell=True)
+
+# Set up forwarding
+if os.path.isfile(FORWARD_PATH):
+
+    logging.info("Forwarding {0} to logstash...".format(FORWARD_PATH))
+    try:
+        with open(FORWARD_PATH, "r+") as f:
+        data = json.load(jsonFile)
+
+        try:
+            if LOG_PATH not in data['files'][0]['paths']:
+                data['files'][0]['paths'].append(LOG_PATH)
+                jsonFile = open("/etc/logstash-forwarder.conf", "w+")
+                jsonFile.write(json.dumps(data))
+
+        except KeyError:
+            logging.error("Could not set logstash: {0} is not well formated.".format(FORWARD_PATH))
+
+    except IOError:
+        logging.error("Could not open {0}".format(FORWARD_PATH))
+
+else:
+    hostname = socket.gethostname()
+
+    #Search for logstash-forwarder locations per each host
+    if "prodvpn" in hostname:
+        logging.warning("Forwarder should be in {0}. Please set up a forwarder and try again.".format(FORWARD_PATH))
+
+
--- a/gcloud/get_cloudsql_instances.sh
+++ b/gcloud/get_cloudsql_instances.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+for x in $(gcloud projects list | tail -n +2 | awk '{ print $1}'); 
+do 
+  gcloud sql instances list --project $x
+done
+
--- a/gcloud/get_gcp_apps.sh
+++ b/gcloud/get_gcp_apps.sh
@ -0,0 +1,13 @@
+#!/bin/bash
+
+projects=$(gcloud projects list | tail -n +2 | awk '{ print $1}')
+
+list_app () {
+  gcloud app services list --project $1 2>&1 | grep -v ERROR
+}
+
+for x in $projects; 
+do 
+  list_app $x
+done
+
--- a/gcloud/get_kb8_clusters.sh
+++ b/gcloud/get_kb8_clusters.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+
+projects=$(gcloud projects list | tail -n +2 | awk '{ print $1}');
+
+for x in $projects; 
+do 
+  project_json=$(gcloud container clusters list --project $x --format json)
+  echo $project_json | jq -e -r '.[] | .endpoint'
+done
+
--- a/gcloud/get_public_ips.sh
+++ b/gcloud/get_public_ips.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+for x in $(gcloud projects list | tail -n +2 | awk '{ print $1}'); 
+do 
+  ip_list=$(gcloud compute instances list --project $x --format json)
+  gcloud compute instances list --project $x --format json | jq -r '.[] | .networkInterfaces[] | .accessConfigs[] | .natIP';
+done
+