Better search algorithm and unicode handling
- Use a line based search (20% faster) - Parse file as bytes to avoid offset by unicode characters
This commit is contained in:
parent
04abeec7b6
commit
4230489d4b
22
main.py
22
main.py
@ -92,20 +92,19 @@ def search_log(name: str, query: str, start: int = 0):
|
|||||||
if resp:
|
if resp:
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
query_b = query.encode()
|
||||||
matches = []
|
matches = []
|
||||||
buffer_size = max(len(query), 1024)
|
|
||||||
break_outer = False
|
break_outer = False
|
||||||
with open(path) as f:
|
with open(path, "rb") as f:
|
||||||
f.seek(start)
|
f.seek(start)
|
||||||
head = start
|
count = start
|
||||||
tail = start + 2 * buffer_size
|
|
||||||
buffer = f.read(tail)
|
while line := f.readline():
|
||||||
while True:
|
|
||||||
offset = 0
|
offset = 0
|
||||||
while True:
|
while True:
|
||||||
index = buffer.find(query, offset)
|
index = line.find(query_b, offset)
|
||||||
if index != -1:
|
if index != -1:
|
||||||
matches.append(head + index)
|
matches.append(count + index)
|
||||||
offset = index + 1
|
offset = index + 1
|
||||||
if len(matches) >= 100:
|
if len(matches) >= 100:
|
||||||
break_outer = True
|
break_outer = True
|
||||||
@ -114,11 +113,6 @@ def search_log(name: str, query: str, start: int = 0):
|
|||||||
break
|
break
|
||||||
if break_outer:
|
if break_outer:
|
||||||
break
|
break
|
||||||
|
count += len(line)
|
||||||
|
|
||||||
data = f.read(buffer_size)
|
|
||||||
if not data:
|
|
||||||
break
|
|
||||||
buffer = buffer[-len(query):] + data
|
|
||||||
head = tail - len(query)
|
|
||||||
tail += len(data)
|
|
||||||
return {"matches": matches}
|
return {"matches": matches}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user