From b8e67721b848b72d53fc8d3ee4c46e76d1c6913d Mon Sep 17 00:00:00 2001
From: Mark Qvist <mark@unsigned.io>
Date: Fri, 17 Sep 2021 18:45:08 +0200
Subject: [PATCH] Implemented page caching and cache control headers

---
 nomadnet/NomadNetworkApp.py        |   6 +-
 nomadnet/ui/textui/Browser.py      | 147 +++++++++++++++++++++++++----
 nomadnet/ui/textui/Guide.py        |  25 +++++
 nomadnet/ui/textui/MicronParser.py |   4 +
 4 files changed, 165 insertions(+), 17 deletions(-)

diff --git a/nomadnet/NomadNetworkApp.py b/nomadnet/NomadNetworkApp.py
index 6eaa400..0039bf9 100644
--- a/nomadnet/NomadNetworkApp.py
+++ b/nomadnet/NomadNetworkApp.py
@@ -53,6 +53,7 @@ class NomadNetworkApp:
 
         self.pagespath         = self.configdir+"/storage/pages"
         self.filespath         = self.configdir+"/storage/files"
+        self.cachepath         = self.configdir+"/storage/cache"
 
         self.downloads_path    = os.path.expanduser("~/Downloads")
 
@@ -78,6 +79,9 @@ class NomadNetworkApp:
         if not os.path.isdir(self.filespath):
             os.makedirs(self.filespath)
 
+        if not os.path.isdir(self.cachepath):
+            os.makedirs(self.cachepath)
+
         if os.path.isfile(self.configpath):
             try:
                 self.config = ConfigObj(self.configpath)
@@ -128,7 +132,7 @@ class NomadNetworkApp:
 
                 if not "node_last_announce" in self.peer_settings:
                     self.peer_settings["node_last_announce"] = None
-                    
+
             except Exception as e:
                 RNS.log("Could not load local peer settings from "+self.peersettingspath, RNS.LOG_ERROR)
                 RNS.log("The contained exception was: %s" % (str(e)), RNS.LOG_ERROR)
diff --git a/nomadnet/ui/textui/Browser.py b/nomadnet/ui/textui/Browser.py
index 6e20727..aa4bde9 100644
--- a/nomadnet/ui/textui/Browser.py
+++ b/nomadnet/ui/textui/Browser.py
@@ -33,6 +33,7 @@ class BrowserFrame(urwid.Frame):
 class Browser:
     DEFAULT_PATH       = "/page/index.mu"
     DEFAULT_TIMEOUT    = 10
+    DEFAULT_CACHE_TIME = 12*60*60
 
     NO_PATH            = 0x00
     PATH_REQUESTED     = 0x01
@@ -80,6 +81,7 @@ class Browser:
         self.history_inc = False
         self.history_dec = False
         self.reloading = False
+        self.loaded_from_cache = False
 
         if self.path == None:
             self.path = Browser.DEFAULT_PATH
@@ -97,6 +99,13 @@ class Browser:
                 path = self.path
             return RNS.hexrep(self.destination_hash, delimit=False)+":"+path
 
+    def url_hash(self, url):
+        if url == None:
+            return None
+        else:
+            url = url.encode("utf-8")
+            return RNS.hexrep(RNS.Identity.full_hash(url), delimit=False)
+
 
     def marked_link(self, link_target):
         if self.status == Browser.DONE:
@@ -411,6 +420,7 @@ class Browser:
     def reload(self):
         if not self.reloading and self.status == Browser.DONE:
             self.reloading = True
+            self.uncache_page(self.current_url())
             self.load_page()
 
     def close_dialogs(self):
@@ -445,22 +455,10 @@ class Browser:
 
 
     def load_page(self):
-        if self.destination_hash != self.loopback:
-            load_thread = threading.Thread(target=self.__load)
-            load_thread.setDaemon(True)
-            load_thread.start()
-        else:
-            RNS.log("Browser handling local page: "+str(self.path), RNS.LOG_DEBUG)
-            page_path = self.app.pagespath+self.path.replace("/page", "", 1)
-
-            page_data = b"The requested local page did not exist in the file system"
-            if os.path.isfile(page_path):
-                file = open(page_path, "rb")
-                page_data = file.read()
-                file.close()
-
+        cached = self.get_cached(self.current_url())
+        if cached:
             self.status = Browser.DONE
-            self.page_data = page_data
+            self.page_data = cached
             self.markup = self.page_data.decode("utf-8")
             self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
             
@@ -468,6 +466,7 @@ class Browser:
             self.response_size = None
             self.response_transfer_size = None
             self.saved_file_name = None
+            self.loaded_from_cache = True
 
             self.update_display()
 
@@ -478,6 +477,41 @@ class Browser:
                 self.history_inc = False
                 self.reloading = False
 
+        else:
+            if self.destination_hash != self.loopback:
+                load_thread = threading.Thread(target=self.__load)
+                load_thread.setDaemon(True)
+                load_thread.start()
+            else:
+                RNS.log("Browser handling local page: "+str(self.path), RNS.LOG_DEBUG)
+                page_path = self.app.pagespath+self.path.replace("/page", "", 1)
+
+                page_data = b"The requested local page did not exist in the file system"
+                if os.path.isfile(page_path):
+                    file = open(page_path, "rb")
+                    page_data = file.read()
+                    file.close()
+
+                self.status = Browser.DONE
+                self.page_data = page_data
+                self.markup = self.page_data.decode("utf-8")
+                self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
+                
+                self.response_progress = 0
+                self.response_size = None
+                self.response_transfer_size = None
+                self.saved_file_name = None
+                self.loaded_from_cache = False
+
+                self.update_display()
+
+                if not self.history_inc and not self.history_dec and not self.reloading:
+                    self.write_history()
+                else:
+                    self.history_dec = False
+                    self.history_inc = False
+                    self.reloading = False
+
 
     def __load(self):
         # If an established link exists, but it doesn't match the target
@@ -585,6 +619,16 @@ class Browser:
             self.markup = self.page_data.decode("utf-8")
             self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
             self.response_progress = 0
+            self.loaded_from_cache = False
+
+            # Simple header handling. Should be expanded when more
+            # header tags are added.
+            cache_time = Browser.DEFAULT_CACHE_TIME
+            if self.markup[:4] == "#!c=":
+                endpos = self.markup.find("\n")
+                if endpos == -1:
+                    endpos = len(self.markup)
+                cache_time = int(self.markup[4:endpos])
 
             self.update_display()
 
@@ -595,9 +639,78 @@ class Browser:
                 self.history_inc = False
                 self.reloading = False
 
+            if cache_time == 0:
+                RNS.log("Received page "+str(self.current_url())+", not caching due to header.", RNS.LOG_DEBUG)
+            else:
+                RNS.log("Received page "+str(self.current_url())+", caching for %.3f hours." % (cache_time/60/60), RNS.LOG_DEBUG)    
+                self.cache_page(cache_time)
+
         except Exception as e:
             RNS.log("An error occurred while handling response. The contained exception was: "+str(e))
 
+    def uncache_page(self, url):
+        url_hash = self.url_hash(url)
+        files = os.listdir(self.app.cachepath)
+        for file in files:
+            if file.startswith(url_hash):
+                cachefile = self.app.cachepath+"/"+file
+                os.unlink(cachefile)
+                RNS.log("Removed "+str(cachefile)+" from cache.", RNS.LOG_DEBUG)
+
+    def get_cached(self, url):
+        url_hash = self.url_hash(url)
+        files = os.listdir(self.app.cachepath)
+        for file in files:
+            cachepath = self.app.cachepath+"/"+file
+            try:
+                components = file.split("_")
+                if len(components) == 2 and len(components[0]) == 64 and len(components[1]) > 0:
+                    expires = float(components[1])
+
+                    if time.time() > expires:
+                        RNS.log("Removing stale cache entry "+str(file), RNS.LOG_DEBUG)
+                        os.unlink(cachepath)
+                    else:
+                        if file.startswith(url_hash):
+                            RNS.log("Found "+str(file)+" in cache.", RNS.LOG_DEBUG)
+                            RNS.log("Returning cached page", RNS.LOG_DEBUG)
+                            file = open(cachepath, "rb")
+                            data = file.read()
+                            file.close()
+                            return data
+
+            except Exception as e:
+                RNS.log("Error while parsing cache entry "+str(cachepath)+", removing it.", RNS.LOG_ERROR)
+                RNS.log("The contained exception was: "+str(e), RNS.LOG_ERROR)
+                try:
+                    os.unlink(cachepath)
+                except Exception as e:
+                    RNS.log("Additionally, an exception occurred while unlinking the entry: "+str(e), RNS.LOG_ERROR)
+                    RNS.log("You will probably need to remove this entry manually by deleting the file: "+str(cachepath), RNS.LOG_ERROR)
+
+                
+        return None
+
+
+    def cache_page(self, cache_time):
+        url_hash = self.url_hash(self.current_url())
+        if url_hash == None:
+            RNS.log("Could not cache page "+str(self.current_url()), RNS.LOG_ERROR)
+        else:
+            try:
+                self.uncache_page(self.current_url())
+                cache_expires = time.time()+cache_time
+                filename = url_hash+"_"+str(cache_expires)
+                cachefile = self.app.cachepath+"/"+filename
+                file = open(cachefile, "wb")
+                file.write(self.page_data)
+                file.close()
+                RNS.log("Cached page "+str(self.current_url())+" to "+str(cachefile), RNS.LOG_DEBUG)
+
+            except Exception as e:
+                RNS.log("Could not write cache file for page "+str(self.current_url()), RNS.LOG_ERROR)
+                RNS.log("The contained exception was: "+str(e), RNS.LOG_ERROR)
+
 
     def file_received(self, request_receipt):
         try:
@@ -620,7 +733,7 @@ class Browser:
 
             self.update_display()
         except Exception as e:
-            RNS.log("An error occurred while handling file response. The contained exception was: "+str(e))
+            RNS.log("An error occurred while handling file response. The contained exception was: "+str(e), RNS.LOG_ERROR)
 
     
     def request_failed(self, request_receipt=None):
@@ -668,6 +781,8 @@ class Browser:
             stats_string = "  "+self.g["page"]+size_str(self.response_size)
             stats_string += "   "+self.g["arrow_d"]+size_str(self.response_transfer_size)+" in "+response_time_str
             stats_string += "s   "+self.g["speed"]+size_str(self.response_transfer_size/self.response_time, suffix="b")+"/s"
+        elif self.loaded_from_cache:
+            stats_string = " (cached)"
         else:
             stats_string = ""
 
diff --git a/nomadnet/ui/textui/Guide.py b/nomadnet/ui/textui/Guide.py
index d197588..0fb62a4 100644
--- a/nomadnet/ui/textui/Guide.py
+++ b/nomadnet/ui/textui/Guide.py
@@ -232,6 +232,8 @@ Nomad Network nodes can host pages similar to web pages, that other peers can re
 
 To add pages to your node, place micron files in the `*pages`* directory of your Nomad Network programs `*storage`* directory. By default, the path to this will be `!~/.nomadnetwork/storage/pages`!. You should probably create the file `!index.mu`! first, as this is the page that will get served by default to a connecting peer.
 
+You can control how long a peer will cache your pages by including the cache header in a page. To do so, the first line of your page must start with `!#!c=X`!, where `!X`! is the cache time in seconds. To tell the peer to always load the page from your node, and never cache it, set the cache time to zero. You should only do this if there is a real need, for example if your page displays dynamic content that `*must`* be updated at every page view. The default caching time is 12 hours. In most cases, you should not need to include the cache control header in your pages.
+
 Pages are static in this version, but the next release of Nomad Network will add the ability to use a preprocessor such as PHP, bash, Python (or whatever you prefer) to generate dynamic pages.
 
 >>Files
@@ -553,6 +555,29 @@ Here is `F00f`_`[a more visible link`1385edace36466a6b3dd:/page/index.mu]`_`f
 
 When links like these are displayed in the built-in browser, clicking on them or activating them using the keyboard will cause the browser to load the specified URL.
 
+>Comments
+
+You can insert comments that will not be displayed in the output by starting a line with the # character.
+
+Here's an example:
+
+`Faaa
+`=
+# This line will not be displayed
+This line will
+`=
+``
+
+The above markup produces the following output:
+
+`Faaa`B333
+
+# This line will not be displayed
+This line will
+
+``
+
+
 >Literals
 
 To display literal content, for example source-code, or blocks of text that should not be interpreted by micron, you can use literal blocks, specified by the \\`= tag. Below is the source code of this entire document, presented as a literal block.
diff --git a/nomadnet/ui/textui/MicronParser.py b/nomadnet/ui/textui/MicronParser.py
index de09802..1483832 100644
--- a/nomadnet/ui/textui/MicronParser.py
+++ b/nomadnet/ui/textui/MicronParser.py
@@ -72,6 +72,10 @@ def parse_line(line, state, url_delegate):
             if first_char == "\\":
                 line = line[1:]
 
+            # Check for comments
+            elif first_char == "#":
+                return None
+
             # Check for section heading reset
             elif first_char == "<":
                 state["depth"] = 0