Module TermTk.TTkCore.filebuffer

Expand source code
#!/usr/bin/env python3

# MIT License
#
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import os
import re
import threading
from TermTk.TTkCore.log import TTkLog
from TermTk.TTkCore.signal import pyTTkSignal, pyTTkSlot

'''
             w1   w3   w2   w5
    Buffer |----|----|----|----|            cache buffer
             |      \ /       \
             |       x         \
             |      / \         \
    Pages  | 0  | 2  | 1  |None| 3  |None|  index to buffer
    File   |----|----|----|----|----|----|  view as list of windows
             w1   w2   w3   w4   w5   w6
'''
class TTkFileBuffer():
    class _Page:
        __slots__ = ('_page', '_size', '_buffer')
        def __init__(self, page, size):
            self._page = page
            self._size = size
            self._buffer = [""]*self._size
            #TTkLog.debug(f"{self._buffer}")
        @property
        def buffer(self):
            return self._buffer
        @property
        def page(self):
            return self._page

    __slots__ = (
        '_indexes', '_indexesMutex',
        '_filename', '_fd',
        '_pages', '_buffer',
        '_window', '_numW',
        '_width',
        #Signals
        'indexUpdated', 'indexed')
    def __init__(self, filename, window, numWindows):
        # Signals
        self.indexUpdated = pyTTkSignal(float)
        self.indexed = pyTTkSignal()

        self._window = window
        self._numW = numWindows
        self._filename = filename
        self._indexes = [0]
        self._indexesMutex = threading.Lock()
        self._width=0
        self._buffer = [None]*self._numW
        self._pages = [None]
        self._fd = open(self._filename,'r')
        threading.Thread(target=self.createIndex).start()

    def __del__(self):
        self._fd.close()

    def getLen(self):
        return len(self._indexes)

    def getWidth(self, indexes=None):
       return self._width

    def getLineDirect(self, line):
        if line >= self.getLen():
            return ""
        self._indexesMutex.acquire()
        self._fd.seek(self._indexes[line])
        self._indexesMutex.release()
        return self._fd.readline()

    def getLine(self, line):
        if line >= self.getLen():
            return ""
        page = line//self._window
        offset = line%self._window
        if self._pages[page] == None:
            # Dispose of the pages to the bottom
            dispose = self._buffer.pop(0)
            if dispose is not None:
                self._pages[dispose.page] = None
            self._pages[page] = self._Page(page, self._window)
            self._buffer.append(self._pages[page])
            self._indexesMutex.acquire()
            self._fd.seek(self._indexes[line-offset])
            self._indexesMutex.release()
            buffer = self._pages[page].buffer
            for i in range(self._window):
                buffer[i] = self._fd.readline()
                #self._width = max(self._width,len(buffer[i]))
        else:
            # Push the page to the top of the buffer
            i = self._buffer.index(self._pages[page])
            p = self._buffer.pop(i)
            self._buffer.append(p)
        return self._pages[page].buffer[offset]

    def getSlice(self, line, length):
        ret = []
        for i in range(line, line+length):
            ret.append(self.getLine(i))
        return ret

    def createIndex(self):
        # TTkLog.debug(f"Start Indexing {self._filename}")
        indexes = []
        lines = 0
        offset = 0
        width = 0
        fileSize = os.stat(self._filename).st_size
        chunkSize = 0x1000000 # ~16M
        with open(self._filename,'r') as infile:
            while (chunk:=infile.read(chunkSize)):
                start = 0
                while (index:=chunk.find('\n',start))!=-1:
                    indexes.append(index+offset+1)
                    start = index+1
                self._indexesMutex.acquire()
                self._indexes += indexes
                self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages)))
                self._indexesMutex.release()
                indexes = []
                offset+=len(chunk)
                self.indexUpdated.emit(offset/fileSize)
                # TTkLog.debug(f"{self._filename} {offset/fileSize} ...")
        self._width = max([ (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) ])
        self.indexUpdated.emit(1.0)
        self.indexed.emit()
        # TTkLog.debug(f"{self._filename} {offset/fileSize} END")

    def searchRe(self, regex):
        indexes = []
        id = 0
        rr = re.compile(regex)
        with open(self._filename,'r') as infile:
            for line in infile:
                ma = rr.match(line)
                if ma:
                    indexes.append(id)
                id += 1
        return indexes

    def search(self, txt):
        indexes = []
        id = 0
        with open(self._filename,'r') as infile:
            for line in infile:
                if txt in line:
                    indexes.append(id)
                id += 1
        return indexes

Classes

class TTkFileBuffer (filename, window, numWindows)
Expand source code
class TTkFileBuffer():
    class _Page:
        __slots__ = ('_page', '_size', '_buffer')
        def __init__(self, page, size):
            self._page = page
            self._size = size
            self._buffer = [""]*self._size
            #TTkLog.debug(f"{self._buffer}")
        @property
        def buffer(self):
            return self._buffer
        @property
        def page(self):
            return self._page

    __slots__ = (
        '_indexes', '_indexesMutex',
        '_filename', '_fd',
        '_pages', '_buffer',
        '_window', '_numW',
        '_width',
        #Signals
        'indexUpdated', 'indexed')
    def __init__(self, filename, window, numWindows):
        # Signals
        self.indexUpdated = pyTTkSignal(float)
        self.indexed = pyTTkSignal()

        self._window = window
        self._numW = numWindows
        self._filename = filename
        self._indexes = [0]
        self._indexesMutex = threading.Lock()
        self._width=0
        self._buffer = [None]*self._numW
        self._pages = [None]
        self._fd = open(self._filename,'r')
        threading.Thread(target=self.createIndex).start()

    def __del__(self):
        self._fd.close()

    def getLen(self):
        return len(self._indexes)

    def getWidth(self, indexes=None):
       return self._width

    def getLineDirect(self, line):
        if line >= self.getLen():
            return ""
        self._indexesMutex.acquire()
        self._fd.seek(self._indexes[line])
        self._indexesMutex.release()
        return self._fd.readline()

    def getLine(self, line):
        if line >= self.getLen():
            return ""
        page = line//self._window
        offset = line%self._window
        if self._pages[page] == None:
            # Dispose of the pages to the bottom
            dispose = self._buffer.pop(0)
            if dispose is not None:
                self._pages[dispose.page] = None
            self._pages[page] = self._Page(page, self._window)
            self._buffer.append(self._pages[page])
            self._indexesMutex.acquire()
            self._fd.seek(self._indexes[line-offset])
            self._indexesMutex.release()
            buffer = self._pages[page].buffer
            for i in range(self._window):
                buffer[i] = self._fd.readline()
                #self._width = max(self._width,len(buffer[i]))
        else:
            # Push the page to the top of the buffer
            i = self._buffer.index(self._pages[page])
            p = self._buffer.pop(i)
            self._buffer.append(p)
        return self._pages[page].buffer[offset]

    def getSlice(self, line, length):
        ret = []
        for i in range(line, line+length):
            ret.append(self.getLine(i))
        return ret

    def createIndex(self):
        # TTkLog.debug(f"Start Indexing {self._filename}")
        indexes = []
        lines = 0
        offset = 0
        width = 0
        fileSize = os.stat(self._filename).st_size
        chunkSize = 0x1000000 # ~16M
        with open(self._filename,'r') as infile:
            while (chunk:=infile.read(chunkSize)):
                start = 0
                while (index:=chunk.find('\n',start))!=-1:
                    indexes.append(index+offset+1)
                    start = index+1
                self._indexesMutex.acquire()
                self._indexes += indexes
                self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages)))
                self._indexesMutex.release()
                indexes = []
                offset+=len(chunk)
                self.indexUpdated.emit(offset/fileSize)
                # TTkLog.debug(f"{self._filename} {offset/fileSize} ...")
        self._width = max([ (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) ])
        self.indexUpdated.emit(1.0)
        self.indexed.emit()
        # TTkLog.debug(f"{self._filename} {offset/fileSize} END")

    def searchRe(self, regex):
        indexes = []
        id = 0
        rr = re.compile(regex)
        with open(self._filename,'r') as infile:
            for line in infile:
                ma = rr.match(line)
                if ma:
                    indexes.append(id)
                id += 1
        return indexes

    def search(self, txt):
        indexes = []
        id = 0
        with open(self._filename,'r') as infile:
            for line in infile:
                if txt in line:
                    indexes.append(id)
                id += 1
        return indexes

Instance variables

var indexUpdated

Return an attribute of instance, which is of type owner.

var indexed

Return an attribute of instance, which is of type owner.

Methods

def createIndex(self)
Expand source code
def createIndex(self):
    # TTkLog.debug(f"Start Indexing {self._filename}")
    indexes = []
    lines = 0
    offset = 0
    width = 0
    fileSize = os.stat(self._filename).st_size
    chunkSize = 0x1000000 # ~16M
    with open(self._filename,'r') as infile:
        while (chunk:=infile.read(chunkSize)):
            start = 0
            while (index:=chunk.find('\n',start))!=-1:
                indexes.append(index+offset+1)
                start = index+1
            self._indexesMutex.acquire()
            self._indexes += indexes
            self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages)))
            self._indexesMutex.release()
            indexes = []
            offset+=len(chunk)
            self.indexUpdated.emit(offset/fileSize)
            # TTkLog.debug(f"{self._filename} {offset/fileSize} ...")
    self._width = max([ (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) ])
    self.indexUpdated.emit(1.0)
    self.indexed.emit()
def getLen(self)
Expand source code
def getLen(self):
    return len(self._indexes)
def getLine(self, line)
Expand source code
def getLine(self, line):
    if line >= self.getLen():
        return ""
    page = line//self._window
    offset = line%self._window
    if self._pages[page] == None:
        # Dispose of the pages to the bottom
        dispose = self._buffer.pop(0)
        if dispose is not None:
            self._pages[dispose.page] = None
        self._pages[page] = self._Page(page, self._window)
        self._buffer.append(self._pages[page])
        self._indexesMutex.acquire()
        self._fd.seek(self._indexes[line-offset])
        self._indexesMutex.release()
        buffer = self._pages[page].buffer
        for i in range(self._window):
            buffer[i] = self._fd.readline()
            #self._width = max(self._width,len(buffer[i]))
    else:
        # Push the page to the top of the buffer
        i = self._buffer.index(self._pages[page])
        p = self._buffer.pop(i)
        self._buffer.append(p)
    return self._pages[page].buffer[offset]
def getLineDirect(self, line)
Expand source code
def getLineDirect(self, line):
    if line >= self.getLen():
        return ""
    self._indexesMutex.acquire()
    self._fd.seek(self._indexes[line])
    self._indexesMutex.release()
    return self._fd.readline()
def getSlice(self, line, length)
Expand source code
def getSlice(self, line, length):
    ret = []
    for i in range(line, line+length):
        ret.append(self.getLine(i))
    return ret
def getWidth(self, indexes=None)
Expand source code
def getWidth(self, indexes=None):
   return self._width
def search(self, txt)
Expand source code
def search(self, txt):
    indexes = []
    id = 0
    with open(self._filename,'r') as infile:
        for line in infile:
            if txt in line:
                indexes.append(id)
            id += 1
    return indexes
def searchRe(self, regex)
Expand source code
def searchRe(self, regex):
    indexes = []
    id = 0
    rr = re.compile(regex)
    with open(self._filename,'r') as infile:
        for line in infile:
            ma = rr.match(line)
            if ma:
                indexes.append(id)
            id += 1
    return indexes