Files
LogPatternExtractor/Processor/TemplateDatabase.py
2026-05-02 18:33:38 +03:00

122 lines
4.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import sqlite3
import numpy as np
from typing import List, Dict, Tuple, Optional
from Processor.Models.LogTemplate import LogTemplate
from Processor.Models.LogVariable import LogVariable
class TemplateDatabase:
def __init__(self, db_path: str = "logs_knowledge.db"):
self.conn = sqlite3.connect(db_path, check_same_thread=False)
self.create_tables()
def create_tables(self):
with self.conn:
self.conn.execute("""
CREATE TABLE IF NOT EXISTS templates (
id INTEGER PRIMARY KEY,
pattern TEXT NOT NULL,
embedding BLOB NOT NULL,
hits INTEGER DEFAULT 1,
local_counter INTEGER DEFAULT 1
)
""")
self.conn.execute("""
CREATE TABLE IF NOT EXISTS variables (
template_id INTEGER,
local_id INTEGER,
var_type TEXT,
initial_value TEXT,
PRIMARY KEY (template_id, local_id),
FOREIGN KEY(template_id) REFERENCES templates(id) ON DELETE CASCADE
)
""")
def save_template(self, tpl: LogTemplate):
emb_bytes = tpl.embedding.astype(np.float32).tobytes()
pattern_str = tpl.render()
with self.conn:
self.conn.execute("""
INSERT INTO templates (id, pattern, embedding, hits, local_counter)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
pattern = excluded.pattern,
embedding = excluded.embedding,
hits = excluded.hits,
local_counter = excluded.local_counter
""", (tpl.uid, pattern_str, emb_bytes, tpl.hits, tpl.local_var_counter))
self.conn.execute("DELETE FROM variables WHERE template_id = ?", (tpl.uid,))
vars_data = []
for token in tpl.tokens:
if isinstance(token, LogVariable):
vars_data.append((tpl.uid, token.uid, token.var_type, token.initial_value))
if vars_data:
self.conn.executemany("INSERT INTO variables VALUES (?, ?, ?, ?)", vars_data)
# --- НОВЫЕ МЕТОДЫ ДЛЯ ОПТИМИЗАЦИИ ОЗУ ---
def load_index_data(self) -> List[Tuple[int, bytes]]:
"""
Загружает ТОЛЬКО идентификаторы и эмбеддинги.
Используется при старте приложения для построения RAM-индекса.
"""
cursor = self.conn.execute("SELECT id, embedding FROM templates")
return cursor.fetchall()
def get_template_data_by_id(self, template_id: int) -> Tuple[Optional[Tuple], Dict[int, LogVariable]]:
"""
Точечно загружает сырые данные ОДНОГО шаблона по его ID.
Возвращает: (row_шаблона, словарь_переменных)
"""
# 1. Загружаем сам шаблон
cursor = self.conn.execute(
"SELECT id, pattern, embedding, hits, local_counter FROM templates WHERE id = ?",
(template_id,)
)
row = cursor.fetchone()
if not row:
return None, {}
# 2. Загружаем его переменные
vars_cursor = self.conn.execute(
"SELECT local_id, var_type, initial_value FROM variables WHERE template_id = ?",
(template_id,)
)
vars_map = {}
for v_row in vars_cursor:
l_id, v_type, init_val = v_row
vars_map[l_id] = LogVariable(l_id, initial_value=init_val, var_type=v_type)
return row, vars_map
def load_raw_data(self):
"""Возвращает все данные целиком. (Осторожно: может забить ОЗУ при большом объеме БД)"""
cursor = self.conn.execute("SELECT template_id, local_id, var_type, initial_value FROM variables")
vars_map = {}
for row in cursor:
t_id, l_id, v_type, init_val = row
if t_id not in vars_map: vars_map[t_id] = {}
vars_map[t_id][l_id] = LogVariable(l_id, initial_value=init_val, var_type=v_type)
templates_data = []
cursor = self.conn.execute("SELECT id, pattern, embedding, hits, local_counter FROM templates")
for row in cursor:
templates_data.append(row)
return templates_data, vars_map
def get_max_id(self) -> int:
res = self.conn.execute("SELECT MAX(id) FROM templates").fetchone()[0]
return res if res else 0
def close(self):
self.conn.close()