WebHacking: README, urllib2 scripts

2025-07-12 01:39:37 -04:00 · 2014-12-29 13:06:07 -05:00 · 2014-12-29 13:06:07 -05:00 · a36bde60b7
commit a36bde60b7
parent 54d8d02892
15 changed files with 209 additions and 99 deletions
--- a/Web_Security/urllib2/mapping_web_app_install.py
+++ b/Web_Security/urllib2/mapping_web_app_install.py
@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+__author__ = "bt3"
+
+import urllib2
+import Queue
+import os
+import threading
+
+THREADS = 10
+TARGET = 'http://www.blackhatpython.com'
+# local directory into which we have downloaded and extracted the web app
+#DIRECTORY = '/home/User/Desktop/Joomla'
+#DIRECTORY = '/home/User/Desktop/wordpress'
+DIRECTORY = '/home/User/Desktop/drupal'
+# list of file extensions to not fingerprinting
+FILTERS = ['.jpg', '.gif', '.png', '.css']
+
+# each operation in the loop will keep executing until the web_paths
+# Queue is empty. on each iteration we grab a path from the queue, add it
+# to the target website's base path and then attempt to retrieve it
+def test_remote():
+    while not web_paths.empty():
+        path = web_paths.get()
+        url = '%s%s' % (TARGET, path)
+        request = urllib2.Request(url)
+
+        try:
+            response = urllib2.urlopen(request)
+            content = response.read()
+
+            # if we are successfully retrieving the file, the output HTTP status code
+            # and the full path for the file is printed
+            print '[%d] => %s' % (response.code, path)
+            response.close()
+
+        # if the file is not found or protected by .htaccess, error
+        except urllib2.HTTPError as error:
+            fail_count += 1
+            print "Failed" + str(error.code)
+
+
+if __name__ == '__main__':
+    os.chdir(DIRECTORY)
+    # queue object where we store files to locate in the remote server
+    web_paths = Queue.Queue()
+
+    # os.walk function walks through all the files and directories in the local
+    # web application directory. this builds the full path to the target files
+    # and test them against filter list to make sure we are looking for the
+    # files types we want. For each valid file we find, we add it to our
+    # web_paths Queue.
+    for r, d, f in os.walk('.'):
+        for files in f:
+            remote_path = '%s/%s' %(r, files)
+            if remote_path[0] == '.':
+                remote_path = remote_path[1:]
+            if os.path.splitext(files)[1] not in FILTERS:
+                web_paths.put(remote_path)
+
+
+    # create a number of threads that will be called the test_remote function
+    # it operates in a loop that keep executing untul the web_paths queue is
+    # empty.
+    for i in range(THREADS):
+        print 'Spawning thread: ' + str(i)
+        t = threading.Thread(target=test_remote)
+        t.start()
--- a/Web_Security/urllib2/simple_get.py
+++ b/Web_Security/urllib2/simple_get.py
@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+__author__ = "bt3"
+
+import urllib2
+
+def get(url):
+    msg = urllib2.urlopen(url)
+    print msg.read()
+
+def get_user_agent(url):
+    headers = {}
+    headers['User-Agent'] = 'Googlebot'
+
+    request = urllib2.Request(url, headers=headers)
+    response = urllib2.urlopen(request)
+
+    print response.read()
+    response.close()
+
+
+
+if __name__ == '__main__':
+    HOST = 'http://www.google.com'
+    get_user_agent(HOST)