Implemented page caching and cache control headers

2025-12-16 17:03:51 -05:00 · 2021-09-17 18:45:08 +02:00 · 2021-09-17 18:45:08 +02:00 · b8e67721b8
commit b8e67721b8
parent 913c7935c0
4 changed files with 165 additions and 17 deletions
--- a/nomadnet/NomadNetworkApp.py
+++ b/nomadnet/NomadNetworkApp.py
@ -53,6 +53,7 @@ class NomadNetworkApp:
        self.pagespath         = self.configdir+"/storage/pages"
        self.filespath         = self.configdir+"/storage/files"
        self.cachepath         = self.configdir+"/storage/cache"
        self.downloads_path    = os.path.expanduser("~/Downloads")
@ -78,6 +79,9 @@ class NomadNetworkApp:
        if not os.path.isdir(self.filespath):
            os.makedirs(self.filespath)
        if not os.path.isdir(self.cachepath):
            os.makedirs(self.cachepath)
        if os.path.isfile(self.configpath):
            try:
                self.config = ConfigObj(self.configpath)
@ -128,7 +132,7 @@ class NomadNetworkApp:
                if not "node_last_announce" in self.peer_settings:
                    self.peer_settings["node_last_announce"] = None
-                    
+
            except Exception as e:
                RNS.log("Could not load local peer settings from "+self.peersettingspath, RNS.LOG_ERROR)
                RNS.log("The contained exception was: %s" % (str(e)), RNS.LOG_ERROR)
--- a/nomadnet/ui/textui/Browser.py
+++ b/nomadnet/ui/textui/Browser.py
@ -33,6 +33,7 @@ class BrowserFrame(urwid.Frame):
 class Browser:
    DEFAULT_PATH       = "/page/index.mu"
    DEFAULT_TIMEOUT    = 10
    DEFAULT_CACHE_TIME = 12*60*60
    NO_PATH            = 0x00
    PATH_REQUESTED     = 0x01
@ -80,6 +81,7 @@ class Browser:
        self.history_inc = False
        self.history_dec = False
        self.reloading = False
        self.loaded_from_cache = False
        if self.path == None:
            self.path = Browser.DEFAULT_PATH
@ -97,6 +99,13 @@ class Browser:
                path = self.path
            return RNS.hexrep(self.destination_hash, delimit=False)+":"+path
    def url_hash(self, url):
        if url == None:
            return None
        else:
            url = url.encode("utf-8")
            return RNS.hexrep(RNS.Identity.full_hash(url), delimit=False)
    def marked_link(self, link_target):
        if self.status == Browser.DONE:
@ -411,6 +420,7 @@ class Browser:
    def reload(self):
        if not self.reloading and self.status == Browser.DONE:
            self.reloading = True
            self.uncache_page(self.current_url())
            self.load_page()
    def close_dialogs(self):
@ -445,22 +455,10 @@ class Browser:
    def load_page(self):
-        if self.destination_hash != self.loopback:
+        cached = self.get_cached(self.current_url())
-            load_thread = threading.Thread(target=self.__load)
+        if cached:
            load_thread.setDaemon(True)
            load_thread.start()
        else:
            RNS.log("Browser handling local page: "+str(self.path), RNS.LOG_DEBUG)
            page_path = self.app.pagespath+self.path.replace("/page", "", 1)
            page_data = b"The requested local page did not exist in the file system"
            if os.path.isfile(page_path):
                file = open(page_path, "rb")
                page_data = file.read()
                file.close()
            self.status = Browser.DONE
-            self.page_data = page_data
+            self.page_data = cached
            self.markup = self.page_data.decode("utf-8")
            self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
@ -468,6 +466,7 @@ class Browser:
            self.response_size = None
            self.response_transfer_size = None
            self.saved_file_name = None
            self.loaded_from_cache = True
            self.update_display()
@ -478,6 +477,41 @@ class Browser:
                self.history_inc = False
                self.reloading = False
        else:
            if self.destination_hash != self.loopback:
                load_thread = threading.Thread(target=self.__load)
                load_thread.setDaemon(True)
                load_thread.start()
            else:
                RNS.log("Browser handling local page: "+str(self.path), RNS.LOG_DEBUG)
                page_path = self.app.pagespath+self.path.replace("/page", "", 1)
                page_data = b"The requested local page did not exist in the file system"
                if os.path.isfile(page_path):
                    file = open(page_path, "rb")
                    page_data = file.read()
                    file.close()
                self.status = Browser.DONE
                self.page_data = page_data
                self.markup = self.page_data.decode("utf-8")
                self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
                self.response_progress = 0
                self.response_size = None
                self.response_transfer_size = None
                self.saved_file_name = None
                self.loaded_from_cache = False
                self.update_display()
                if not self.history_inc and not self.history_dec and not self.reloading:
                    self.write_history()
                else:
                    self.history_dec = False
                    self.history_inc = False
                    self.reloading = False
    def __load(self):
        # If an established link exists, but it doesn't match the target
@ -585,6 +619,16 @@ class Browser:
            self.markup = self.page_data.decode("utf-8")
            self.attr_maps = markup_to_attrmaps(self.markup, url_delegate=self)
            self.response_progress = 0
            self.loaded_from_cache = False
            # Simple header handling. Should be expanded when more
            # header tags are added.
            cache_time = Browser.DEFAULT_CACHE_TIME
            if self.markup[:4] == "#!c=":
                endpos = self.markup.find("\n")
                if endpos == -1:
                    endpos = len(self.markup)
                cache_time = int(self.markup[4:endpos])
            self.update_display()
@ -595,9 +639,78 @@ class Browser:
                self.history_inc = False
                self.reloading = False
            if cache_time == 0:
                RNS.log("Received page "+str(self.current_url())+", not caching due to header.", RNS.LOG_DEBUG)
            else:
                RNS.log("Received page "+str(self.current_url())+", caching for %.3f hours." % (cache_time/60/60), RNS.LOG_DEBUG)    
                self.cache_page(cache_time)
        except Exception as e:
            RNS.log("An error occurred while handling response. The contained exception was: "+str(e))
    def uncache_page(self, url):
        url_hash = self.url_hash(url)
        files = os.listdir(self.app.cachepath)
        for file in files:
            if file.startswith(url_hash):
                cachefile = self.app.cachepath+"/"+file
                os.unlink(cachefile)
                RNS.log("Removed "+str(cachefile)+" from cache.", RNS.LOG_DEBUG)
    def get_cached(self, url):
        url_hash = self.url_hash(url)
        files = os.listdir(self.app.cachepath)
        for file in files:
            cachepath = self.app.cachepath+"/"+file
            try:
                components = file.split("_")
                if len(components) == 2 and len(components[0]) == 64 and len(components[1]) > 0:
                    expires = float(components[1])
                    if time.time() > expires:
                        RNS.log("Removing stale cache entry "+str(file), RNS.LOG_DEBUG)
                        os.unlink(cachepath)
                    else:
                        if file.startswith(url_hash):
                            RNS.log("Found "+str(file)+" in cache.", RNS.LOG_DEBUG)
                            RNS.log("Returning cached page", RNS.LOG_DEBUG)
                            file = open(cachepath, "rb")
                            data = file.read()
                            file.close()
                            return data
            except Exception as e:
                RNS.log("Error while parsing cache entry "+str(cachepath)+", removing it.", RNS.LOG_ERROR)
                RNS.log("The contained exception was: "+str(e), RNS.LOG_ERROR)
                try:
                    os.unlink(cachepath)
                except Exception as e:
                    RNS.log("Additionally, an exception occurred while unlinking the entry: "+str(e), RNS.LOG_ERROR)
                    RNS.log("You will probably need to remove this entry manually by deleting the file: "+str(cachepath), RNS.LOG_ERROR)
        return None
    def cache_page(self, cache_time):
        url_hash = self.url_hash(self.current_url())
        if url_hash == None:
            RNS.log("Could not cache page "+str(self.current_url()), RNS.LOG_ERROR)
        else:
            try:
                self.uncache_page(self.current_url())
                cache_expires = time.time()+cache_time
                filename = url_hash+"_"+str(cache_expires)
                cachefile = self.app.cachepath+"/"+filename
                file = open(cachefile, "wb")
                file.write(self.page_data)
                file.close()
                RNS.log("Cached page "+str(self.current_url())+" to "+str(cachefile), RNS.LOG_DEBUG)
            except Exception as e:
                RNS.log("Could not write cache file for page "+str(self.current_url()), RNS.LOG_ERROR)
                RNS.log("The contained exception was: "+str(e), RNS.LOG_ERROR)
    def file_received(self, request_receipt):
        try:
@ -620,7 +733,7 @@ class Browser:
            self.update_display()
        except Exception as e:
-            RNS.log("An error occurred while handling file response. The contained exception was: "+str(e))
+            RNS.log("An error occurred while handling file response. The contained exception was: "+str(e), RNS.LOG_ERROR)
    def request_failed(self, request_receipt=None):
@ -668,6 +781,8 @@ class Browser:
            stats_string = "  "+self.g["page"]+size_str(self.response_size)
            stats_string += "   "+self.g["arrow_d"]+size_str(self.response_transfer_size)+" in "+response_time_str
            stats_string += "s   "+self.g["speed"]+size_str(self.response_transfer_size/self.response_time, suffix="b")+"/s"
        elif self.loaded_from_cache:
            stats_string = " (cached)"
        else:
            stats_string = ""
--- a/nomadnet/ui/textui/Guide.py
+++ b/nomadnet/ui/textui/Guide.py
@ -232,6 +232,8 @@ Nomad Network nodes can host pages similar to web pages, that other peers can re
 To add pages to your node, place micron files in the `*pages`* directory of your Nomad Network programs `*storage`* directory. By default, the path to this will be `!~/.nomadnetwork/storage/pages`!. You should probably create the file `!index.mu`! first, as this is the page that will get served by default to a connecting peer.
 You can control how long a peer will cache your pages by including the cache header in a page. To do so, the first line of your page must start with `!#!c=X`!, where `!X`! is the cache time in seconds. To tell the peer to always load the page from your node, and never cache it, set the cache time to zero. You should only do this if there is a real need, for example if your page displays dynamic content that `*must`* be updated at every page view. The default caching time is 12 hours. In most cases, you should not need to include the cache control header in your pages.
 Pages are static in this version, but the next release of Nomad Network will add the ability to use a preprocessor such as PHP, bash, Python (or whatever you prefer) to generate dynamic pages.
 >>Files
@ -553,6 +555,29 @@ Here is `F00f`_`[a more visible link`1385edace36466a6b3dd:/page/index.mu]`_`f
 When links like these are displayed in the built-in browser, clicking on them or activating them using the keyboard will cause the browser to load the specified URL.
 >Comments
 You can insert comments that will not be displayed in the output by starting a line with the # character.
 Here's an example:
 `Faaa
 `=
 # This line will not be displayed
 This line will
 `=
 ``
 The above markup produces the following output:
 `Faaa`B333
 # This line will not be displayed
 This line will
 ``
 >Literals
 To display literal content, for example source-code, or blocks of text that should not be interpreted by micron, you can use literal blocks, specified by the \\`= tag. Below is the source code of this entire document, presented as a literal block.
--- a/nomadnet/ui/textui/MicronParser.py
+++ b/nomadnet/ui/textui/MicronParser.py
@ -72,6 +72,10 @@ def parse_line(line, state, url_delegate):
            if first_char == "\\":
                line = line[1:]
            # Check for comments
            elif first_char == "#":
                return None
            # Check for section heading reset
            elif first_char == "<":
                state["depth"] = 0