2 changed files with 389 additions and 0 deletions
@ -0,0 +1,158 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
|
||||||
|
# MIT License |
||||||
|
# |
||||||
|
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com> |
||||||
|
# |
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||||
|
# of this software and associated documentation files (the "Software"), to deal |
||||||
|
# in the Software without restriction, including without limitation the rights |
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||||
|
# copies of the Software, and to permit persons to whom the Software is |
||||||
|
# furnished to do so, subject to the following conditions: |
||||||
|
# |
||||||
|
# The above copyright notice and this permission notice shall be included in all |
||||||
|
# copies or substantial portions of the Software. |
||||||
|
# |
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||||
|
# SOFTWARE. |
||||||
|
|
||||||
|
import sys, os |
||||||
|
|
||||||
|
import timeit |
||||||
|
import random |
||||||
|
|
||||||
|
from wcwidth import * |
||||||
|
from functools import lru_cache |
||||||
|
|
||||||
|
sys.path.append(os.path.join(sys.path[0],'../..')) |
||||||
|
sys.path.append(os.path.join(sys.path[0],'.')) |
||||||
|
import TermTk as ttk |
||||||
|
|
||||||
|
# Try to create a table with ~200 entries |
||||||
|
print(f"Create Table...") |
||||||
|
table = [] |
||||||
|
base = 0x1000 |
||||||
|
for _ in range(200): |
||||||
|
incr = random.randint(0x10,0x200) |
||||||
|
table.append((base,base+incr)) |
||||||
|
base += incr + random.randint(0x10,0x100) |
||||||
|
table = tuple(table) |
||||||
|
print(f"Create Done!!!") |
||||||
|
|
||||||
|
for a,b in table: |
||||||
|
print(f"0x{a:06x}, 0x{b:06x}") |
||||||
|
|
||||||
|
print(f"Create Set...") |
||||||
|
tset = [] |
||||||
|
for a,b in table: |
||||||
|
for v in range(a,b+1): |
||||||
|
tset.append(v) |
||||||
|
tset = set(tset) |
||||||
|
print(f"Create Set DONE!!!") |
||||||
|
print(f"len tset 0x{len(tset):04x}") |
||||||
|
|
||||||
|
print(f"Create CharSetStringTest...") |
||||||
|
cstr = "" |
||||||
|
for _ in range(0x4000): |
||||||
|
cstr += chr(random.randint(0xA0,0x40000)) |
||||||
|
print(f"Create CharSetStringTest DONE!!!") |
||||||
|
|
||||||
|
@lru_cache(maxsize=3) |
||||||
|
def ttt(val): |
||||||
|
return random.randint(10,100) |
||||||
|
|
||||||
|
print(f"{ttt(1)=}") |
||||||
|
print(f"{ttt(2)=}") |
||||||
|
print(f"{ttt(3)=}") |
||||||
|
print(f"{ttt(1)=}") |
||||||
|
print(f"{ttt(2)=}") |
||||||
|
print(f"{ttt(3)=}") |
||||||
|
print(f"{ttt(4)=}") |
||||||
|
print(f"{ttt(1)=}") |
||||||
|
print(f"{ttt(3)=}") |
||||||
|
print(f"{ttt(2)=}") |
||||||
|
|
||||||
|
def _bisearch(ucs, table): |
||||||
|
lbound = 0 |
||||||
|
ubound = len(table) - 1 |
||||||
|
|
||||||
|
if ucs < table[0][0] or ucs > table[ubound][1]: |
||||||
|
return 0 |
||||||
|
while ubound >= lbound: |
||||||
|
mid = (lbound + ubound) // 2 |
||||||
|
if ucs > table[mid][1]: |
||||||
|
lbound = mid + 1 |
||||||
|
elif ucs < table[mid][0]: |
||||||
|
ubound = mid - 1 |
||||||
|
else: |
||||||
|
return 1 |
||||||
|
|
||||||
|
return 0 |
||||||
|
|
||||||
|
@lru_cache(maxsize=1000) |
||||||
|
def _bicache(ucs, table): |
||||||
|
lbound = 0 |
||||||
|
ubound = len(table) - 1 |
||||||
|
|
||||||
|
if ucs < table[0][0] or ucs > table[ubound][1]: |
||||||
|
return 0 |
||||||
|
while ubound >= lbound: |
||||||
|
mid = (lbound + ubound) // 2 |
||||||
|
if ucs > table[mid][1]: |
||||||
|
lbound = mid + 1 |
||||||
|
elif ucs < table[mid][0]: |
||||||
|
ubound = mid - 1 |
||||||
|
else: |
||||||
|
return 1 |
||||||
|
|
||||||
|
return 0 |
||||||
|
|
||||||
|
def test1(): |
||||||
|
cw = 0 |
||||||
|
for ch in cstr: |
||||||
|
cw += _bisearch(ord(ch), table) |
||||||
|
return cw |
||||||
|
|
||||||
|
def test2(): |
||||||
|
cw = 0 |
||||||
|
for ch in cstr: |
||||||
|
cw += _bicache(ord(ch), table) |
||||||
|
return cw |
||||||
|
|
||||||
|
def test3(): |
||||||
|
return wcswidth(cstr) |
||||||
|
|
||||||
|
def test4(): |
||||||
|
cw = 0 |
||||||
|
for ch in cstr: |
||||||
|
cw += 1 if ord(ch) in tset else 0 |
||||||
|
return cw |
||||||
|
|
||||||
|
def test5(): |
||||||
|
cw = sum([1 if ord(ch) in tset else 0 for ch in cstr]) |
||||||
|
return cw |
||||||
|
|
||||||
|
def test6(): |
||||||
|
return sum([ord(ch) in tset for ch in cstr]) |
||||||
|
|
||||||
|
loop = 100 |
||||||
|
|
||||||
|
result = timeit.timeit('test4()', globals=globals(), number=loop) |
||||||
|
print(f"{result / loop:.10f} - {result / loop} {test4()}") |
||||||
|
result = timeit.timeit('test5()', globals=globals(), number=loop) |
||||||
|
print(f"{result / loop:.10f} - {result / loop} {test4()}") |
||||||
|
result = timeit.timeit('test6()', globals=globals(), number=loop) |
||||||
|
print(f"{result / loop:.10f} - {result / loop} {test4()}") |
||||||
|
|
||||||
|
result = timeit.timeit('test3()', globals=globals(), number=loop) |
||||||
|
print(f"{result / loop:.10f} - {result / loop} {test3()}") |
||||||
|
result = timeit.timeit('test1()', globals=globals(), number=loop) |
||||||
|
print(f"{result / loop:.10f} - {result / loop} {test1()}") |
||||||
|
result = timeit.timeit('test2()', globals=globals(), number=loop) |
||||||
|
print(f"{result / loop:.10f} - {result / loop} {test2()}") |
||||||
@ -0,0 +1,231 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
|
||||||
|
# MIT License |
||||||
|
# |
||||||
|
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com> |
||||||
|
# |
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||||
|
# of this software and associated documentation files (the "Software"), to deal |
||||||
|
# in the Software without restriction, including without limitation the rights |
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||||
|
# copies of the Software, and to permit persons to whom the Software is |
||||||
|
# furnished to do so, subject to the following conditions: |
||||||
|
# |
||||||
|
# The above copyright notice and this permission notice shall be included in all |
||||||
|
# copies or substantial portions of the Software. |
||||||
|
# |
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||||
|
# SOFTWARE. |
||||||
|
|
||||||
|
import sys, os |
||||||
|
|
||||||
|
import timeit |
||||||
|
import random |
||||||
|
import unicodedata |
||||||
|
|
||||||
|
import wcwidth |
||||||
|
from functools import lru_cache |
||||||
|
|
||||||
|
sys.path.append(os.path.join(sys.path[0],'../..')) |
||||||
|
sys.path.append(os.path.join(sys.path[0],'.')) |
||||||
|
import TermTk as ttk |
||||||
|
|
||||||
|
|
||||||
|
_unicode_version = "13.0.0" |
||||||
|
zw = wcwidth.ZERO_WIDTH[_unicode_version] |
||||||
|
# zwcf = wcwidth.ZERO_WIDTH_CF |
||||||
|
we = wcwidth.WIDE_EASTASIAN[_unicode_version] |
||||||
|
zwcf = [ |
||||||
|
0, # Null (Cc) |
||||||
|
0x034F, # Combining grapheme joiner (Mn) |
||||||
|
0x200B, # Zero width space |
||||||
|
0x200C, # Zero width non-joiner |
||||||
|
0x200D, # Zero width joiner |
||||||
|
0x200E, # Left-to-right mark |
||||||
|
0x200F, # Right-to-left mark |
||||||
|
0x2028, # Line separator (Zl) |
||||||
|
0x2029, # Paragraph separator (Zp) |
||||||
|
0x202A, # Left-to-right embedding |
||||||
|
0x202B, # Right-to-left embedding |
||||||
|
0x202C, # Pop directional formatting |
||||||
|
0x202D, # Left-to-right override |
||||||
|
0x202E, # Right-to-left override |
||||||
|
0x2060, # Word joiner |
||||||
|
0x2061, # Function application |
||||||
|
0x2062, # Invisible times |
||||||
|
0x2063, # Invisible separator |
||||||
|
] |
||||||
|
|
||||||
|
|
||||||
|
def set2binmask(s): |
||||||
|
ret = [] |
||||||
|
for v in s: |
||||||
|
id = v >> 5 |
||||||
|
mask = v & 0x1F |
||||||
|
bit = 1 << mask |
||||||
|
if id >= len(ret): |
||||||
|
ret += [0]*(id-len(ret)+2) |
||||||
|
ret[id] |= bit |
||||||
|
return ret |
||||||
|
|
||||||
|
|
||||||
|
print(f"Create Set...") |
||||||
|
zset = [] |
||||||
|
for a,b in zw: |
||||||
|
for v in range(a,b+1): |
||||||
|
zset.append(v) |
||||||
|
for v in zwcf: |
||||||
|
zset.append(v) |
||||||
|
zset = set(zset) |
||||||
|
|
||||||
|
wset = [] |
||||||
|
for a,b in we: |
||||||
|
for v in range(a,b+1): |
||||||
|
wset.append(v) |
||||||
|
wset = set(wset) |
||||||
|
|
||||||
|
print(f"Create Set DONE!!!") |
||||||
|
|
||||||
|
print(f"Create CharSetStringTest...") |
||||||
|
cstr = "" |
||||||
|
for _ in range(0x4000): |
||||||
|
cstr += chr(random.randint(0x100,0x20000)) |
||||||
|
print(f"Create CharSetStringTest DONE!!!") |
||||||
|
|
||||||
|
# print(f"{set2binmask(zset)}") |
||||||
|
|
||||||
|
bzset = set2binmask(zset) |
||||||
|
bwset = set2binmask(wset) |
||||||
|
|
||||||
|
print(f"len zset 0x{len(zset):04x}") |
||||||
|
print(f"len zset 0x{len(bzset):04x}") |
||||||
|
print(f"len wset 0x{len(wset):04x}") |
||||||
|
print(f"len wset 0x{len(bwset):04x}") |
||||||
|
print(f"len cstr 0x{len(cstr):04x}") |
||||||
|
|
||||||
|
print([f"'{ch}':{unicodedata.east_asian_width(ch)}:{unicodedata.category(ch)}" for ch in cstr]) |
||||||
|
|
||||||
|
# @lru_cache(maxsize=3) |
||||||
|
# def ttt(val): |
||||||
|
# return random.randint(10,100) |
||||||
|
# |
||||||
|
# print(f"{ttt(1)=}") |
||||||
|
# print(f"{ttt(2)=}") |
||||||
|
# print(f"{ttt(3)=}") |
||||||
|
# print(f"{ttt(1)=}")unicodedata.category |
||||||
|
# print(f"{ttt(1)=}") |
||||||
|
# print(f"{ttt(3)=}") |
||||||
|
# print(f"{ttt(2)=}") |
||||||
|
|
||||||
|
def _bisearch(ucs, table): |
||||||
|
lbound = 0 |
||||||
|
ubound = len(table) - 1 |
||||||
|
|
||||||
|
if ucs < table[0][0] or ucs > table[ubound][1]: |
||||||
|
return 0 |
||||||
|
while ubound >= lbound: |
||||||
|
mid = (lbound + ubound) // 2 |
||||||
|
if ucs > table[mid][1]: |
||||||
|
lbound = mid + 1 |
||||||
|
elif ucs < table[mid][0]: |
||||||
|
ubound = mid - 1 |
||||||
|
else: |
||||||
|
return 1 |
||||||
|
|
||||||
|
return 0 |
||||||
|
|
||||||
|
@lru_cache(maxsize=1000) |
||||||
|
def _bicache(ucs, table): |
||||||
|
lbound = 0 |
||||||
|
ubound = len(table) - 1 |
||||||
|
|
||||||
|
if ucs < table[0][0] or ucs > table[ubound][1]: |
||||||
|
return 0 |
||||||
|
while ubound >= lbound: |
||||||
|
mid = (lbound + ubound) // 2 |
||||||
|
if ucs > table[mid][1]: |
||||||
|
lbound = mid + 1 |
||||||
|
elif ucs < table[mid][0]: |
||||||
|
ubound = mid - 1 |
||||||
|
else: |
||||||
|
return 1 |
||||||
|
|
||||||
|
return 0 |
||||||
|
|
||||||
|
def test1(): |
||||||
|
cw = 0 |
||||||
|
for ch in cstr: |
||||||
|
cw += _bisearch(ord(ch), zw) |
||||||
|
return cw |
||||||
|
|
||||||
|
def test2(): |
||||||
|
cw = 0 |
||||||
|
for ch in cstr: |
||||||
|
cw += _bicache(ord(ch), zw) |
||||||
|
return cw |
||||||
|
|
||||||
|
def test3(): |
||||||
|
return wcwidth.wcswidth(cstr) |
||||||
|
|
||||||
|
def test4(): |
||||||
|
cw = 0 |
||||||
|
for ch in cstr: |
||||||
|
cw += 1 if ord(ch) in wset else 0 |
||||||
|
return cw |
||||||
|
|
||||||
|
def test5(): |
||||||
|
cw = sum([1 if ord(ch) in wset else 0 for ch in cstr]) |
||||||
|
return cw |
||||||
|
|
||||||
|
def test6(): |
||||||
|
return len(cstr) + sum([ord(ch) in wset for ch in cstr]) - sum([ord(ch) in zset for ch in cstr]) |
||||||
|
|
||||||
|
def test7(): |
||||||
|
return len(cstr) + sum([bwset[ord(ch)>>5]>>(ord(ch)&0x1F)&1 for ch in cstr]) - sum([bzset[ord(ch)>>5]>>(ord(ch)&0x1F)&1 for ch in cstr]) |
||||||
|
|
||||||
|
def test8(): |
||||||
|
return len(cstr) + sum([bwset[ord(ch)>>5]>>(ord(ch)&0x1F)&1 for ch in cstr]) - sum([ord(ch) in zset for ch in cstr]) |
||||||
|
|
||||||
|
def test9(): |
||||||
|
return len(cstr) + sum([0!=(bwset[ord(ch)>>5]&(1<<(ord(ch)&0x1F))) for ch in cstr]) - sum([ord(ch) in zset for ch in cstr]) |
||||||
|
|
||||||
|
def test10(): |
||||||
|
return ( len(cstr) + |
||||||
|
sum(['W'==unicodedata.east_asian_width(ch) for ch in cstr]) - |
||||||
|
sum(['Me'==(c:=unicodedata.category(ch)) or 'Mn'==c for ch in cstr]) ) |
||||||
|
def test11(): |
||||||
|
return ( len(cstr) + |
||||||
|
sum([unicodedata.east_asian_width(ch) == 'W' for ch in cstr]) - |
||||||
|
sum([unicodedata.category(ch) in ('Me','Mn') for ch in cstr]) ) |
||||||
|
|
||||||
|
|
||||||
|
loop = 100 |
||||||
|
|
||||||
|
result = timeit.timeit('test4()', globals=globals(), number=loop) |
||||||
|
print(f"4 {result / loop:.10f} - {result / loop} {test4()}") |
||||||
|
result = timeit.timeit('test5()', globals=globals(), number=loop) |
||||||
|
print(f"5 {result / loop:.10f} - {result / loop} {test5()}") |
||||||
|
result = timeit.timeit('test6()', globals=globals(), number=loop) |
||||||
|
print(f"6 {result / loop:.10f} - {result / loop} {test6()}") |
||||||
|
result = timeit.timeit('test10()', globals=globals(), number=loop) |
||||||
|
print(f"10 {result / loop:.10f} - {result / loop} {test10()}") |
||||||
|
result = timeit.timeit('test11()', globals=globals(), number=loop) |
||||||
|
print(f"11 {result / loop:.10f} - {result / loop} {test11()}") |
||||||
|
result = timeit.timeit('test7()', globals=globals(), number=loop) |
||||||
|
print(f"7 {result / loop:.10f} - {result / loop} {test7()}") |
||||||
|
result = timeit.timeit('test8()', globals=globals(), number=loop) |
||||||
|
print(f"8 {result / loop:.10f} - {result / loop} {test8()}") |
||||||
|
result = timeit.timeit('test9()', globals=globals(), number=loop) |
||||||
|
print(f"9 {result / loop:.10f} - {result / loop} {test9()}") |
||||||
|
|
||||||
|
result = timeit.timeit('test3()', globals=globals(), number=loop) |
||||||
|
print(f"3w {result / loop:.10f} - {result / loop} {test3()}") |
||||||
|
result = timeit.timeit('test1()', globals=globals(), number=loop) |
||||||
|
print(f"1w {result / loop:.10f} - {result / loop} {test1()}") |
||||||
|
result = timeit.timeit('test2()', globals=globals(), number=loop) |
||||||
|
print(f"2w {result / loop:.10f} - {result / loop} {test2()}") |
||||||
Loading…
Reference in new issue