Skip to content

Commit

Permalink
Add full text search and indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
kamicut committed May 30, 2024
1 parent 586813e commit bb72d7a
Showing 1 changed file with 99 additions and 82 deletions.
181 changes: 99 additions & 82 deletions players.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,100 +2,117 @@
import xml.etree.ElementTree as ET
from tqdm import tqdm

# Function to parse the XML file and insert data into the SQLite database
def parse_xml_to_sqlite(xml_file, db_file):
# Parse the XML file
tree = ET.parse(xml_file)
root = tree.getroot()
def convert_to_none(value):
return value if value else None

# Connect to SQLite database (or create it if it doesn't exist)
def create_database(db_file):
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
c = conn.cursor()

# Create table if it doesn't exist, with nullable fields
cursor.execute('''
CREATE TABLE IF NOT EXISTS players (
fideid INTEGER PRIMARY KEY,
name TEXT,
country TEXT,
sex TEXT,
title TEXT,
w_title TEXT,
o_title TEXT,
foa_title TEXT,
rating INTEGER,
games INTEGER,
k INTEGER,
rapid_rating INTEGER,
rapid_games INTEGER,
rapid_k INTEGER,
blitz_rating INTEGER,
blitz_games INTEGER,
blitz_k INTEGER,
birthday INTEGER,
flag TEXT
)
# Create players table
c.execute('''CREATE TABLE IF NOT EXISTS players (
fideid INTEGER PRIMARY KEY,
name TEXT,
country TEXT,
sex TEXT,
title TEXT,
w_title TEXT,
o_title TEXT,
foa_title TEXT,
rating INTEGER,
games INTEGER,
k INTEGER,
rapid_rating INTEGER,
rapid_games INTEGER,
rapid_k INTEGER,
blitz_rating INTEGER,
blitz_games INTEGER,
blitz_k INTEGER,
birthday INTEGER,
flag TEXT
)''')

# Create the FTS5 virtual table
c.execute('''CREATE VIRTUAL TABLE IF NOT EXISTS players_fts USING fts5(
name,
content='players',
content_rowid='fideid'
)''')

# Set up triggers to keep the FTS table in sync with the original table
c.executescript('''
CREATE TRIGGER IF NOT EXISTS players_ai AFTER INSERT ON players BEGIN
INSERT INTO players_fts(rowid, name) VALUES (new.fideid, new.name);
END;
CREATE TRIGGER IF NOT EXISTS players_ad AFTER DELETE ON players BEGIN
DELETE FROM players_fts WHERE rowid=old.fideid;
END;
CREATE TRIGGER IF NOT EXISTS players_au AFTER UPDATE ON players BEGIN
UPDATE players_fts SET name = new.name WHERE rowid=old.fideid;
END;
''')

# Function to convert text to integer, returning None if the text is empty
def to_int(value):
return int(value) if value and value.isdigit() else None
# Create an index on the country column
c.execute('CREATE INDEX IF NOT EXISTS idx_country ON players (country)')

# Function to return the text of an element or None if the element is missing
def maybe(element):
return element.text if element is not None else None
conn.commit()
conn.close()

# Get the list of players
players = root.findall('player')

# Iterate over each player in the XML and insert into the database with a progress bar
for player in tqdm(players, desc="Processing players"):
try:
fideid = to_int(maybe(player.find('fideid')))
name = maybe(player.find('name'))
country = maybe(player.find('country'))
sex = maybe(player.find('sex'))
title = maybe(player.find('title'))
w_title = maybe(player.find('w_title'))
o_title = maybe(player.find('o_title'))
foa_title = maybe(player.find('foa_title'))
def parse_xml_to_sqlite(xml_file, db_file):
create_database(db_file)
conn = sqlite3.connect(db_file)
c = conn.cursor()

rating = to_int(maybe(player.find('rating')))
games = to_int(maybe(player.find('games')))
k = to_int(maybe(player.find('k')))
rapid_rating = to_int(maybe(player.find('rapid_rating')))
rapid_games = to_int(maybe(player.find('rapid_games')))
rapid_k = to_int(maybe(player.find('rapid_k')))
blitz_rating = to_int(maybe(player.find('blitz_rating')))
blitz_games = to_int(maybe(player.find('blitz_games')))
blitz_k = to_int(maybe(player.find('blitz_k')))
birthday = to_int(maybe(player.find('birthday')))
flag = maybe(player.find('flag'))
tree = ET.parse(xml_file)
root = tree.getroot()

cursor.execute('''
INSERT OR REPLACE INTO players (
fideid, name, country, sex, title, w_title, o_title, foa_title,
rating, games, k, rapid_rating, rapid_games, rapid_k, blitz_rating,
blitz_games, blitz_k, birthday, flag
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
fideid, name, country, sex, title, w_title, o_title, foa_title,
rating, games, k, rapid_rating, rapid_games, rapid_k, blitz_rating,
blitz_games, blitz_k, birthday, flag
))
players = []
for player in tqdm(root.findall('player'), desc="Processing players"):
fideid = int(player.find('fideid').text)
name = player.find('name').text
country = player.find('country').text
sex = player.find('sex').text
title = convert_to_none(player.find('title').text)
w_title = convert_to_none(player.find('w_title').text)
o_title = convert_to_none(player.find('o_title').text)
foa_title = convert_to_none(player.find('foa_title').text)
rating = int(player.find('rating').text) if player.find('rating').text else None
games = int(player.find('games').text) if player.find('games').text else None
k = int(player.find('k').text) if player.find('k').text else None
rapid_rating = int(player.find('rapid_rating').text) if player.find('rapid_rating').text else None
rapid_games = int(player.find('rapid_games').text) if player.find('rapid_games').text else None
rapid_k = int(player.find('rapid_k').text) if player.find('rapid_k').text else None
blitz_rating = int(player.find('blitz_rating').text) if player.find('blitz_rating').text else None
blitz_games = int(player.find('blitz_games').text) if player.find('blitz_games').text else None
blitz_k = int(player.find('blitz_k').text) if player.find('blitz_k').text else None
birthday = int(player.find('birthday').text) if player.find('birthday').text else None
flag = convert_to_none(player.find('flag').text)

players.append((
fideid, name, country, sex, title, w_title, o_title, foa_title,
rating, games, k, rapid_rating, rapid_games, rapid_k,
blitz_rating, blitz_games, blitz_k, birthday, flag
))

except Exception as e:
print(f"Error processing player: {ET.tostring(player, encoding='unicode')}")
print(f"Error details: {e}")
break
c.executemany('''
INSERT OR IGNORE INTO players (
fideid, name, country, sex, title, w_title, o_title, foa_title,
rating, games, k, rapid_rating, rapid_games, rapid_k,
blitz_rating, blitz_games, blitz_k, birthday, flag
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', players)

# Commit the transaction and close the connection
conn.commit()
conn.close()

print("Data has been successfully imported into the SQLite database.")
if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(description='Parse XML and store data into SQLite.')
parser.add_argument('xml_file', help='Path to the XML file to parse.')
parser.add_argument('db_file', help='Path to the SQLite database file.')

# Example usage
xml_file = 'players.xml'
db_file = 'players.db'
parse_xml_to_sqlite(xml_file, db_file)
args = parser.parse_args()
parse_xml_to_sqlite(args.xml_file, args.db_file)

0 comments on commit bb72d7a

Please sign in to comment.