Implemented page caching and cache control headers

2025-12-17 09:24:07 -05:00 · 2021-09-17 18:45:08 +02:00 · 2021-09-17 18:45:08 +02:00 · b8e67721b8
commit b8e67721b8
parent 913c7935c0
4 changed files with 165 additions and 17 deletions
--- a/nomadnet/NomadNetworkApp.py
+++ b/nomadnet/NomadNetworkApp.py
@ -53,6 +53,7 @@ class NomadNetworkApp:

        self.pagespath         = self.configdir+"/storage/pages"
        self.filespath         = self.configdir+"/storage/files"
+        self.cachepath         = self.configdir+"/storage/cache"

        self.downloads_path    = os.path.expanduser("~/Downloads")

@ -78,6 +79,9 @@ class NomadNetworkApp:
        if not os.path.isdir(self.filespath):
            os.makedirs(self.filespath)

+        if not os.path.isdir(self.cachepath):
+            os.makedirs(self.cachepath)
+
        if os.path.isfile(self.configpath):
            try:
                self.config = ConfigObj(self.configpath)
--- a/nomadnet/ui/textui/Browser.py
+++ b/nomadnet/ui/textui/Browser.py
@ -33,6 +33,7 @@ class BrowserFrame(urwid.Frame):
 class Browser:
    DEFAULT_PATH       = "/page/index.mu"
    DEFAULT_TIMEOUT    = 10
+    DEFAULT_CACHE_TIME = 12*60*60

    NO_PATH            = 0x00
    PATH_REQUESTED     = 0x01
@ -80,6 +81,7 @@ class Browser:
        self.history_inc = False
        self.history_dec = False
        self.reloading = False
+        self.loaded_from_cache = False

        if self.path == None:
            self.path = Browser.DEFAULT_PATH
@ -97,6 +99,13 @@ class Browser:
                path = self.path
            return RNS.hexrep(self.destination_hash, delimit=False)+":"+path

+    def url_hash(self, url):
+        if url == None:
+            return None
+        else:
+            url = url.encode("utf-8")
+            return RNS.hexrep(RNS.Identity.full_hash(url), delimit=False)
+

    def marked_link(self, link_target):
        if self.status == Browser.DONE:
@ -411,6 +420,7 @@ class Browser:
    def reload(self):
        if not self.reloading and self.status == Browser.DONE:
            self.reloading = True
+            self.uncache_page(self.current_url())
            self.load_page()

    def close_dialogs(self):
@ -445,6 +455,29 @@ class Browser:


    def load_page(self):
+        cached = self.get_cached(self.current_url())
+        if cached:
+            self.status = Browser.DONE
+            self.page_data = cached
+            self.markup = self.page_data.decode("utf-8")
+            self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
+            
+            self.response_progress = 0
+            self.response_size = None
+            self.response_transfer_size = None
+            self.saved_file_name = None
+            self.loaded_from_cache = True
+
+            self.update_display()
+
+            if not self.history_inc and not self.history_dec and not self.reloading:
+                self.write_history()
+            else:
+                self.history_dec = False
+                self.history_inc = False
+                self.reloading = False
+
+        else:
            if self.destination_hash != self.loopback:
                load_thread = threading.Thread(target=self.__load)
                load_thread.setDaemon(True)
@ -468,6 +501,7 @@ class Browser:
                self.response_size = None
                self.response_transfer_size = None
                self.saved_file_name = None
+                self.loaded_from_cache = False

                self.update_display()

@ -585,6 +619,16 @@ class Browser:
            self.markup = self.page_data.decode("utf-8")
            self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
            self.response_progress = 0
+            self.loaded_from_cache = False
+
+            # Simple header handling. Should be expanded when more
+            # header tags are added.
+            cache_time = Browser.DEFAULT_CACHE_TIME
+            if self.markup[:4] == "#!c=":
+                endpos = self.markup.find("\n")
+                if endpos == -1:
+                    endpos = len(self.markup)
+                cache_time = int(self.markup[4:endpos])

            self.update_display()

@ -595,9 +639,78 @@ class Browser:
                self.history_inc = False
                self.reloading = False

+            if cache_time == 0:
+                RNS.log("Received page "+str(self.current_url())+", not caching due to header.", RNS.LOG_DEBUG)
+            else:
+                RNS.log("Received page "+str(self.current_url())+", caching for %.3f hours." % (cache_time/60/60), RNS.LOG_DEBUG)    
+                self.cache_page(cache_time)
+
        except Exception as e:
            RNS.log("An error occurred while handling response. The contained exception was: "+str(e))

+    def uncache_page(self, url):
+        url_hash = self.url_hash(url)
+        files = os.listdir(self.app.cachepath)
+        for file in files:
+            if file.startswith(url_hash):
+                cachefile = self.app.cachepath+"/"+file
+                os.unlink(cachefile)
+                RNS.log("Removed "+str(cachefile)+" from cache.", RNS.LOG_DEBUG)
+
+    def get_cached(self, url):
+        url_hash = self.url_hash(url)
+        files = os.listdir(self.app.cachepath)
+        for file in files:
+            cachepath = self.app.cachepath+"/"+file
+            try:
+                components = file.split("_")
+                if len(components) == 2 and len(components[0]) == 64 and len(components[1]) > 0:
+                    expires = float(components[1])
+
+                    if time.time() > expires:
+                        RNS.log("Removing stale cache entry "+str(file), RNS.LOG_DEBUG)
+                        os.unlink(cachepath)
+                    else:
+                        if file.startswith(url_hash):
+                            RNS.log("Found "+str(file)+" in cache.", RNS.LOG_DEBUG)
+                            RNS.log("Returning cached page", RNS.LOG_DEBUG)
+                            file = open(cachepath, "rb")
+                            data = file.read()
+                            file.close()
+                            return data
+
+            except Exception as e:
+                RNS.log("Error while parsing cache entry "+str(cachepath)+", removing it.", RNS.LOG_ERROR)
+                RNS.log("The contained exception was: "+str(e), RNS.LOG_ERROR)
+                try:
+                    os.unlink(cachepath)
+                except Exception as e:
+                    RNS.log("Additionally, an exception occurred while unlinking the entry: "+str(e), RNS.LOG_ERROR)
+                    RNS.log("You will probably need to remove this entry manually by deleting the file: "+str(cachepath), RNS.LOG_ERROR)
+
+                
+        return None
+
+
+    def cache_page(self, cache_time):
+        url_hash = self.url_hash(self.current_url())
+        if url_hash == None:
+            RNS.log("Could not cache page "+str(self.current_url()), RNS.LOG_ERROR)
+        else:
+            try:
+                self.uncache_page(self.current_url())
+                cache_expires = time.time()+cache_time
+                filename = url_hash+"_"+str(cache_expires)
+                cachefile = self.app.cachepath+"/"+filename
+                file = open(cachefile, "wb")
+                file.write(self.page_data)
+                file.close()
+                RNS.log("Cached page "+str(self.current_url())+" to "+str(cachefile), RNS.LOG_DEBUG)
+
+            except Exception as e:
+                RNS.log("Could not write cache file for page "+str(self.current_url()), RNS.LOG_ERROR)
+                RNS.log("The contained exception was: "+str(e), RNS.LOG_ERROR)
+

    def file_received(self, request_receipt):
        try:
@ -620,7 +733,7 @@ class Browser:

            self.update_display()
        except Exception as e:
-            RNS.log("An error occurred while handling file response. The contained exception was: "+str(e))
+            RNS.log("An error occurred while handling file response. The contained exception was: "+str(e), RNS.LOG_ERROR)

    
    def request_failed(self, request_receipt=None):
@ -668,6 +781,8 @@ class Browser:
            stats_string = "  "+self.g["page"]+size_str(self.response_size)
            stats_string += "   "+self.g["arrow_d"]+size_str(self.response_transfer_size)+" in "+response_time_str
            stats_string += "s   "+self.g["speed"]+size_str(self.response_transfer_size/self.response_time, suffix="b")+"/s"
+        elif self.loaded_from_cache:
+            stats_string = " (cached)"
        else:
            stats_string = ""

--- a/nomadnet/ui/textui/Guide.py
+++ b/nomadnet/ui/textui/Guide.py
@ -232,6 +232,8 @@ Nomad Network nodes can host pages similar to web pages, that other peers can re

 To add pages to your node, place micron files in the `*pages`* directory of your Nomad Network programs `*storage`* directory. By default, the path to this will be `!~/.nomadnetwork/storage/pages`!. You should probably create the file `!index.mu`! first, as this is the page that will get served by default to a connecting peer.

+You can control how long a peer will cache your pages by including the cache header in a page. To do so, the first line of your page must start with `!#!c=X`!, where `!X`! is the cache time in seconds. To tell the peer to always load the page from your node, and never cache it, set the cache time to zero. You should only do this if there is a real need, for example if your page displays dynamic content that `*must`* be updated at every page view. The default caching time is 12 hours. In most cases, you should not need to include the cache control header in your pages.
+
 Pages are static in this version, but the next release of Nomad Network will add the ability to use a preprocessor such as PHP, bash, Python (or whatever you prefer) to generate dynamic pages.

 >>Files
@ -553,6 +555,29 @@ Here is `F00f`_`[a more visible link`1385edace36466a6b3dd:/page/index.mu]`_`f

 When links like these are displayed in the built-in browser, clicking on them or activating them using the keyboard will cause the browser to load the specified URL.

+>Comments
+
+You can insert comments that will not be displayed in the output by starting a line with the # character.
+
+Here's an example:
+
+`Faaa
+`=
+# This line will not be displayed
+This line will
+`=
+``
+
+The above markup produces the following output:
+
+`Faaa`B333
+
+# This line will not be displayed
+This line will
+
+``
+
+
 >Literals

 To display literal content, for example source-code, or blocks of text that should not be interpreted by micron, you can use literal blocks, specified by the \\`= tag. Below is the source code of this entire document, presented as a literal block.
--- a/nomadnet/ui/textui/MicronParser.py
+++ b/nomadnet/ui/textui/MicronParser.py
@ -72,6 +72,10 @@ def parse_line(line, state, url_delegate):
            if first_char == "\\":
                line = line[1:]

+            # Check for comments
+            elif first_char == "#":
+                return None
+
            # Check for section heading reset
            elif first_char == "<":
                state["depth"] = 0