You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
6.9 KiB
192 lines
6.9 KiB
# MIT License |
|
# |
|
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com> |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
# of this software and associated documentation files (the "Software"), to deal |
|
# in the Software without restriction, including without limitation the rights |
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
# copies of the Software, and to permit persons to whom the Software is |
|
# furnished to do so, subject to the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be included in all |
|
# copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
# SOFTWARE. |
|
|
|
__all__ = ['TTkFileBuffer'] |
|
|
|
import os |
|
import re |
|
import threading |
|
from TermTk.TTkCore.log import TTkLog |
|
from TermTk.TTkCore.signal import pyTTkSignal |
|
|
|
# w1 w3 w2 w5 |
|
# Buffer |----|----|----|----| cache buffer |
|
# | \ / \ |
|
# | x \ |
|
# | / \ \ |
|
# Pages | 0 | 2 | 1 |None| 3 |None| index to buffer |
|
# File |----|----|----|----|----|----| view as list of windows |
|
# w1 w2 w3 w4 w5 w6 |
|
|
|
class TTkFileBuffer(): |
|
class _Page: |
|
__slots__ = ('_page', '_size', '_buffer') |
|
def __init__(self, page, size): |
|
self._page = page |
|
self._size = size |
|
self._buffer = [""]*self._size |
|
#TTkLog.debug(f"{self._buffer}") |
|
@property |
|
def buffer(self): |
|
return self._buffer |
|
@property |
|
def page(self): |
|
return self._page |
|
|
|
__slots__ = ( |
|
'_indexes', '_indexesMutex', |
|
'_filename', '_fd', |
|
'_pages', '_buffer', |
|
'_window', '_numW', |
|
'_width', |
|
#Signals |
|
'indexUpdated', 'indexed') |
|
def __init__(self, filename, window, numWindows): |
|
# Signals |
|
self.indexUpdated = pyTTkSignal(float) |
|
self.indexed = pyTTkSignal() |
|
|
|
self._window = window |
|
self._numW = numWindows |
|
self._filename = filename |
|
self._indexes = [0] |
|
self._indexesMutex = threading.Lock() |
|
self._width=0 |
|
self._buffer = [None]*self._numW |
|
self._pages = [None] |
|
self._fd = open(self._filename, 'r', errors='replace', newline='\n') |
|
threading.Thread(target=self.createIndex).start() |
|
|
|
def __del__(self): |
|
self._fd.close() |
|
|
|
def filename(self): |
|
return self._filename |
|
|
|
def getLen(self): |
|
return len(self._indexes) |
|
|
|
def getWidth(self, indexes=None): |
|
return self._width |
|
|
|
def getLineDirect(self, line): |
|
if line >= self.getLen(): |
|
return "" |
|
self._indexesMutex.acquire() |
|
self._fd.seek(self._indexes[line]) |
|
self._indexesMutex.release() |
|
return self._fd.readline() |
|
|
|
def getLine(self, line): |
|
if line >= self.getLen(): |
|
return "" |
|
page = line//self._window |
|
offset = line%self._window |
|
if self._pages[page] == None: |
|
# Dispose of the pages to the bottom |
|
dispose = self._buffer.pop(0) |
|
if dispose is not None: |
|
self._pages[dispose.page] = None |
|
self._pages[page] = self._Page(page, self._window) |
|
self._buffer.append(self._pages[page]) |
|
self._indexesMutex.acquire() |
|
self._fd.seek(self._indexes[line-offset]) |
|
self._indexesMutex.release() |
|
buffer = self._pages[page].buffer |
|
for i in range(self._window): |
|
buffer[i] = self._fd.readline().replace('\r','') |
|
#self._width = max(self._width,len(buffer[i])) |
|
else: |
|
# Push the page to the top of the buffer |
|
i = self._buffer.index(self._pages[page]) |
|
p = self._buffer.pop(i) |
|
self._buffer.append(p) |
|
return self._pages[page].buffer[offset] |
|
|
|
def getSlice(self, line, length): |
|
ret = [] |
|
for i in range(line, line+length): |
|
ret.append(self.getLine(i)) |
|
return ret |
|
|
|
def createIndex(self): |
|
# TTkLog.debug(f"Start Indexing {self._filename}") |
|
indexes = [] |
|
offset = 0 |
|
fileSize = os.stat(self._filename).st_size |
|
chunkSize = 0x1000000 # ~16M |
|
with open(self._filename,'rb') as infile: |
|
while (chunk:=infile.read(chunkSize)): |
|
start = 0 |
|
while (index:=chunk.find(0x0A,start))!=-1: |
|
indexes.append(index+offset+1) |
|
start = index+1 |
|
self._indexesMutex.acquire() |
|
self._indexes += indexes |
|
self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages))) |
|
self._indexesMutex.release() |
|
indexes = [] |
|
offset+=len(chunk) |
|
self.indexUpdated.emit(offset/fileSize) |
|
# TTkLog.debug(f"{self._filename} {offset/fileSize} ...") |
|
self._width = max( (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) ) |
|
self.indexUpdated.emit(1.0) |
|
self.indexed.emit() |
|
# TTkLog.debug(f"{self._filename} {offset/fileSize} END") |
|
|
|
def searchRe(self, regex, ignoreCase=False): |
|
indexes = [] |
|
rr = re.compile(regex, re.IGNORECASE if ignoreCase else 0) |
|
TTkLog.debug(f"Search RE: {regex}") |
|
# from datetime import datetime |
|
# now = datetime.now() |
|
# TTkLog.debug(f"Time1 {now}") |
|
with open(self._filename, 'r', errors='replace', newline='\n') as infile: |
|
# for i,line in enumerate(infile): |
|
# if rr.search(line): |
|
# indexes.append(i) |
|
|
|
# id = 0 |
|
# for line in infile: |
|
# ma = rr.search(line) |
|
# if ma: |
|
# indexes.append(id) |
|
# id += 1 |
|
|
|
# for i,index in enumerate(self._indexes): |
|
# infile.seek(index) |
|
# rl = infile.readline() |
|
# if rr.search(rl): |
|
# indexes.append(i) |
|
|
|
indexes = [i for i,line in enumerate(infile) if rr.search(line)] |
|
# TTkLog.debug(f"Time2 {datetime.now()}") |
|
# TTkLog.debug(f"Diff: {datetime.now() - now}") |
|
return indexes |
|
|
|
def search(self, txt): |
|
indexes = [] |
|
with open(self._filename, 'r', errors='replace', newline='\n') as infile: |
|
for line in infile: |
|
if txt in line: |
|
indexes.append(id) |
|
return indexes
|
|
|