Browse Source

Testing few solutions

pull/68/head
Eugenio Parodi 3 years ago
parent
commit
30ae472959
  1. 158
      tests/timeit/04.wcwidth.bisearch.py
  2. 231
      tests/timeit/05.wcwidth.bisearch.py

158
tests/timeit/04.wcwidth.bisearch.py

@ -0,0 +1,158 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import sys, os
import timeit
import random
from wcwidth import *
from functools import lru_cache
sys.path.append(os.path.join(sys.path[0],'../..'))
sys.path.append(os.path.join(sys.path[0],'.'))
import TermTk as ttk
# Try to create a table with ~200 entries
print(f"Create Table...")
table = []
base = 0x1000
for _ in range(200):
incr = random.randint(0x10,0x200)
table.append((base,base+incr))
base += incr + random.randint(0x10,0x100)
table = tuple(table)
print(f"Create Done!!!")
for a,b in table:
print(f"0x{a:06x}, 0x{b:06x}")
print(f"Create Set...")
tset = []
for a,b in table:
for v in range(a,b+1):
tset.append(v)
tset = set(tset)
print(f"Create Set DONE!!!")
print(f"len tset 0x{len(tset):04x}")
print(f"Create CharSetStringTest...")
cstr = ""
for _ in range(0x4000):
cstr += chr(random.randint(0xA0,0x40000))
print(f"Create CharSetStringTest DONE!!!")
@lru_cache(maxsize=3)
def ttt(val):
return random.randint(10,100)
print(f"{ttt(1)=}")
print(f"{ttt(2)=}")
print(f"{ttt(3)=}")
print(f"{ttt(1)=}")
print(f"{ttt(2)=}")
print(f"{ttt(3)=}")
print(f"{ttt(4)=}")
print(f"{ttt(1)=}")
print(f"{ttt(3)=}")
print(f"{ttt(2)=}")
def _bisearch(ucs, table):
lbound = 0
ubound = len(table) - 1
if ucs < table[0][0] or ucs > table[ubound][1]:
return 0
while ubound >= lbound:
mid = (lbound + ubound) // 2
if ucs > table[mid][1]:
lbound = mid + 1
elif ucs < table[mid][0]:
ubound = mid - 1
else:
return 1
return 0
@lru_cache(maxsize=1000)
def _bicache(ucs, table):
lbound = 0
ubound = len(table) - 1
if ucs < table[0][0] or ucs > table[ubound][1]:
return 0
while ubound >= lbound:
mid = (lbound + ubound) // 2
if ucs > table[mid][1]:
lbound = mid + 1
elif ucs < table[mid][0]:
ubound = mid - 1
else:
return 1
return 0
def test1():
cw = 0
for ch in cstr:
cw += _bisearch(ord(ch), table)
return cw
def test2():
cw = 0
for ch in cstr:
cw += _bicache(ord(ch), table)
return cw
def test3():
return wcswidth(cstr)
def test4():
cw = 0
for ch in cstr:
cw += 1 if ord(ch) in tset else 0
return cw
def test5():
cw = sum([1 if ord(ch) in tset else 0 for ch in cstr])
return cw
def test6():
return sum([ord(ch) in tset for ch in cstr])
loop = 100
result = timeit.timeit('test4()', globals=globals(), number=loop)
print(f"{result / loop:.10f} - {result / loop} {test4()}")
result = timeit.timeit('test5()', globals=globals(), number=loop)
print(f"{result / loop:.10f} - {result / loop} {test4()}")
result = timeit.timeit('test6()', globals=globals(), number=loop)
print(f"{result / loop:.10f} - {result / loop} {test4()}")
result = timeit.timeit('test3()', globals=globals(), number=loop)
print(f"{result / loop:.10f} - {result / loop} {test3()}")
result = timeit.timeit('test1()', globals=globals(), number=loop)
print(f"{result / loop:.10f} - {result / loop} {test1()}")
result = timeit.timeit('test2()', globals=globals(), number=loop)
print(f"{result / loop:.10f} - {result / loop} {test2()}")

231
tests/timeit/05.wcwidth.bisearch.py

@ -0,0 +1,231 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import sys, os
import timeit
import random
import unicodedata
import wcwidth
from functools import lru_cache
sys.path.append(os.path.join(sys.path[0],'../..'))
sys.path.append(os.path.join(sys.path[0],'.'))
import TermTk as ttk
_unicode_version = "13.0.0"
zw = wcwidth.ZERO_WIDTH[_unicode_version]
# zwcf = wcwidth.ZERO_WIDTH_CF
we = wcwidth.WIDE_EASTASIAN[_unicode_version]
zwcf = [
0, # Null (Cc)
0x034F, # Combining grapheme joiner (Mn)
0x200B, # Zero width space
0x200C, # Zero width non-joiner
0x200D, # Zero width joiner
0x200E, # Left-to-right mark
0x200F, # Right-to-left mark
0x2028, # Line separator (Zl)
0x2029, # Paragraph separator (Zp)
0x202A, # Left-to-right embedding
0x202B, # Right-to-left embedding
0x202C, # Pop directional formatting
0x202D, # Left-to-right override
0x202E, # Right-to-left override
0x2060, # Word joiner
0x2061, # Function application
0x2062, # Invisible times
0x2063, # Invisible separator
]
def set2binmask(s):
ret = []
for v in s:
id = v >> 5
mask = v & 0x1F
bit = 1 << mask
if id >= len(ret):
ret += [0]*(id-len(ret)+2)
ret[id] |= bit
return ret
print(f"Create Set...")
zset = []
for a,b in zw:
for v in range(a,b+1):
zset.append(v)
for v in zwcf:
zset.append(v)
zset = set(zset)
wset = []
for a,b in we:
for v in range(a,b+1):
wset.append(v)
wset = set(wset)
print(f"Create Set DONE!!!")
print(f"Create CharSetStringTest...")
cstr = ""
for _ in range(0x4000):
cstr += chr(random.randint(0x100,0x20000))
print(f"Create CharSetStringTest DONE!!!")
# print(f"{set2binmask(zset)}")
bzset = set2binmask(zset)
bwset = set2binmask(wset)
print(f"len zset 0x{len(zset):04x}")
print(f"len zset 0x{len(bzset):04x}")
print(f"len wset 0x{len(wset):04x}")
print(f"len wset 0x{len(bwset):04x}")
print(f"len cstr 0x{len(cstr):04x}")
print([f"'{ch}':{unicodedata.east_asian_width(ch)}:{unicodedata.category(ch)}" for ch in cstr])
# @lru_cache(maxsize=3)
# def ttt(val):
# return random.randint(10,100)
#
# print(f"{ttt(1)=}")
# print(f"{ttt(2)=}")
# print(f"{ttt(3)=}")
# print(f"{ttt(1)=}")unicodedata.category
# print(f"{ttt(1)=}")
# print(f"{ttt(3)=}")
# print(f"{ttt(2)=}")
def _bisearch(ucs, table):
lbound = 0
ubound = len(table) - 1
if ucs < table[0][0] or ucs > table[ubound][1]:
return 0
while ubound >= lbound:
mid = (lbound + ubound) // 2
if ucs > table[mid][1]:
lbound = mid + 1
elif ucs < table[mid][0]:
ubound = mid - 1
else:
return 1
return 0
@lru_cache(maxsize=1000)
def _bicache(ucs, table):
lbound = 0
ubound = len(table) - 1
if ucs < table[0][0] or ucs > table[ubound][1]:
return 0
while ubound >= lbound:
mid = (lbound + ubound) // 2
if ucs > table[mid][1]:
lbound = mid + 1
elif ucs < table[mid][0]:
ubound = mid - 1
else:
return 1
return 0
def test1():
cw = 0
for ch in cstr:
cw += _bisearch(ord(ch), zw)
return cw
def test2():
cw = 0
for ch in cstr:
cw += _bicache(ord(ch), zw)
return cw
def test3():
return wcwidth.wcswidth(cstr)
def test4():
cw = 0
for ch in cstr:
cw += 1 if ord(ch) in wset else 0
return cw
def test5():
cw = sum([1 if ord(ch) in wset else 0 for ch in cstr])
return cw
def test6():
return len(cstr) + sum([ord(ch) in wset for ch in cstr]) - sum([ord(ch) in zset for ch in cstr])
def test7():
return len(cstr) + sum([bwset[ord(ch)>>5]>>(ord(ch)&0x1F)&1 for ch in cstr]) - sum([bzset[ord(ch)>>5]>>(ord(ch)&0x1F)&1 for ch in cstr])
def test8():
return len(cstr) + sum([bwset[ord(ch)>>5]>>(ord(ch)&0x1F)&1 for ch in cstr]) - sum([ord(ch) in zset for ch in cstr])
def test9():
return len(cstr) + sum([0!=(bwset[ord(ch)>>5]&(1<<(ord(ch)&0x1F))) for ch in cstr]) - sum([ord(ch) in zset for ch in cstr])
def test10():
return ( len(cstr) +
sum(['W'==unicodedata.east_asian_width(ch) for ch in cstr]) -
sum(['Me'==(c:=unicodedata.category(ch)) or 'Mn'==c for ch in cstr]) )
def test11():
return ( len(cstr) +
sum([unicodedata.east_asian_width(ch) == 'W' for ch in cstr]) -
sum([unicodedata.category(ch) in ('Me','Mn') for ch in cstr]) )
loop = 100
result = timeit.timeit('test4()', globals=globals(), number=loop)
print(f"4 {result / loop:.10f} - {result / loop} {test4()}")
result = timeit.timeit('test5()', globals=globals(), number=loop)
print(f"5 {result / loop:.10f} - {result / loop} {test5()}")
result = timeit.timeit('test6()', globals=globals(), number=loop)
print(f"6 {result / loop:.10f} - {result / loop} {test6()}")
result = timeit.timeit('test10()', globals=globals(), number=loop)
print(f"10 {result / loop:.10f} - {result / loop} {test10()}")
result = timeit.timeit('test11()', globals=globals(), number=loop)
print(f"11 {result / loop:.10f} - {result / loop} {test11()}")
result = timeit.timeit('test7()', globals=globals(), number=loop)
print(f"7 {result / loop:.10f} - {result / loop} {test7()}")
result = timeit.timeit('test8()', globals=globals(), number=loop)
print(f"8 {result / loop:.10f} - {result / loop} {test8()}")
result = timeit.timeit('test9()', globals=globals(), number=loop)
print(f"9 {result / loop:.10f} - {result / loop} {test9()}")
result = timeit.timeit('test3()', globals=globals(), number=loop)
print(f"3w {result / loop:.10f} - {result / loop} {test3()}")
result = timeit.timeit('test1()', globals=globals(), number=loop)
print(f"1w {result / loop:.10f} - {result / loop} {test1()}")
result = timeit.timeit('test2()', globals=globals(), number=loop)
print(f"2w {result / loop:.10f} - {result / loop} {test2()}")
Loading…
Cancel
Save