#!/usr/bin/env python3 # MIT License # # Copyright (c) 2021 Eugenio Parodi # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os import re import threading from TermTk.TTkCore.log import TTkLog from TermTk.TTkCore.signal import pyTTkSignal, pyTTkSlot ''' w1 w3 w2 w5 Buffer |----|----|----|----| cache buffer | \ / \ | x \ | / \ \ Pages | 0 | 2 | 1 |None| 3 |None| index to buffer File |----|----|----|----|----|----| view as list of windows w1 w2 w3 w4 w5 w6 ''' class TTkFileBuffer(): class _Page: __slots__ = ('_page', '_size', '_buffer') def __init__(self, page, size): self._page = page self._size = size self._buffer = [""]*self._size #TTkLog.debug(f"{self._buffer}") @property def buffer(self): return self._buffer @property def page(self): return self._page __slots__ = ( '_indexes', '_indexesMutex', '_filename', '_fd', '_pages', '_buffer', '_window', '_numW', '_width', #Signals 'indexUpdated', 'indexed') def __init__(self, filename, window, numWindows): # Signals self.indexUpdated = pyTTkSignal(float) self.indexed = pyTTkSignal() self._window = window self._numW = numWindows self._filename = filename self._indexes = [0] self._indexesMutex = threading.Lock() self._width=0 self._buffer = [None]*self._numW self._pages = [None] self._fd = open(self._filename,'r') threading.Thread(target=self.createIndex).start() def __del__(self): self._fd.close() def getLen(self): return len(self._indexes) def getWidth(self, indexes=None): return self._width def getLineDirect(self, line): if line >= self.getLen(): return "" self._indexesMutex.acquire() self._fd.seek(self._indexes[line]) self._indexesMutex.release() return self._fd.readline() def getLine(self, line): if line >= self.getLen(): return "" page = line//self._window offset = line%self._window if self._pages[page] == None: # Dispose of the pages to the bottom dispose = self._buffer.pop(0) if dispose is not None: self._pages[dispose.page] = None self._pages[page] = self._Page(page, self._window) self._buffer.append(self._pages[page]) self._indexesMutex.acquire() self._fd.seek(self._indexes[line-offset]) self._indexesMutex.release() buffer = self._pages[page].buffer for i in range(self._window): buffer[i] = self._fd.readline() #self._width = max(self._width,len(buffer[i])) else: # Push the page to the top of the buffer i = self._buffer.index(self._pages[page]) p = self._buffer.pop(i) self._buffer.append(p) return self._pages[page].buffer[offset] def getSlice(self, line, length): ret = [] for i in range(line, line+length): ret.append(self.getLine(i)) return ret def createIndex(self): # TTkLog.debug(f"Start Indexing {self._filename}") indexes = [] lines = 0 offset = 0 width = 0 fileSize = os.stat(self._filename).st_size chunkSize = 0x1000000 # ~16M with open(self._filename,'r') as infile: while (chunk:=infile.read(chunkSize)): start = 0 while (index:=chunk.find('\n',start))!=-1: indexes.append(index+offset+1) start = index+1 self._indexesMutex.acquire() self._indexes += indexes self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages))) self._indexesMutex.release() indexes = [] offset+=len(chunk) self.indexUpdated.emit(offset/fileSize) # TTkLog.debug(f"{self._filename} {offset/fileSize} ...") self._width = max([ (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) ]) self.indexUpdated.emit(1.0) self.indexed.emit() # TTkLog.debug(f"{self._filename} {offset/fileSize} END") def searchRe(self, regex): indexes = [] id = 0 rr = re.compile(regex) with open(self._filename,'r') as infile: for line in infile: ma = rr.match(line) if ma: indexes.append(id) id += 1 return indexes def search(self, txt): indexes = [] id = 0 with open(self._filename,'r') as infile: for line in infile: if txt in line: indexes.append(id) id += 1 return indexes