Initial commit
This commit is contained in:
0
src/storage/__init__.py
Normal file
0
src/storage/__init__.py
Normal file
80
src/storage/data.py
Normal file
80
src/storage/data.py
Normal file
@ -0,0 +1,80 @@
|
||||
import logging
|
||||
import os
|
||||
from model import *
|
||||
import sqlite3
|
||||
|
||||
log = logging.getLogger("storage")
|
||||
|
||||
DB_FRAGMENTS = ""
|
||||
|
||||
# Commands
|
||||
|
||||
# birth location - general area, not exact location (i.e. Transylvania)
|
||||
# birth origin - rural or urban
|
||||
# studies - masters, bachelors, high school, middle school, primary school
|
||||
# occupation - comma separated if there are multiple
|
||||
# studiesAbroad - foreign cities where author studied (comma separated)
|
||||
COMMAND_CREATE_AUTHORS = """CREATE TABLE Authors (
|
||||
name TEXT PRIMARY KEY,
|
||||
birthYear INTEGER,
|
||||
birthLocation TEXT,
|
||||
birthOrigin TEXT,
|
||||
studies TEXT,
|
||||
occupations TEXT,
|
||||
studiesAbroad TEXT
|
||||
)"""
|
||||
|
||||
# genre - short story (nuvela), novel (roman), poem etc
|
||||
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
|
||||
# tags - other relevant information (i.e. psychological)
|
||||
COMMAND_CREATE_FRAGMENTS = """CREATE TABLE Fragments (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
year INTEGER,
|
||||
author TEXT REFERENCES Authors(name),
|
||||
genre TEXT,
|
||||
movement TEXT,
|
||||
tags TEXT
|
||||
)"""
|
||||
|
||||
# contains the actual text
|
||||
COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE FragmentsContent (
|
||||
id INTEGER REFERENCES Fragments(id),
|
||||
content TEXT
|
||||
)"""
|
||||
|
||||
# Initialize databases
|
||||
def initializeFragmentDatabase(dbFile):
|
||||
global DB_FRAGMENTS
|
||||
DB_FRAGMENTS = dbFile
|
||||
|
||||
if not os.path.exists(dbFile):
|
||||
log.info("Text database %s not found. Will create database.", dbFile)
|
||||
con = sqlite3.connect(dbFile)
|
||||
c = con.cursor()
|
||||
c.execute(COMMAND_CREATE_AUTHORS)
|
||||
c.execute(COMMAND_CREATE_FRAGMENTS)
|
||||
c.execute(COMMAND_CREATE_FRAGMENTS_CONTENT)
|
||||
con.commit()
|
||||
con.close()
|
||||
log.info("Database created!")
|
||||
|
||||
def getTextCount():
|
||||
con = sqlite3.connect(DB_FRAGMENTS)
|
||||
c = con.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM Fragments")
|
||||
item = c.fetchone()
|
||||
c.close()
|
||||
con.close()
|
||||
return item[0]
|
||||
|
||||
def getAllTexts():
|
||||
con = sqlite3.connect(DB_FRAGMENTS)
|
||||
c = con.cursor()
|
||||
c.execute("SELECT id, content FROM FragmentsContent")
|
||||
|
||||
items = c.fetchall()
|
||||
|
||||
c.close()
|
||||
con.close()
|
||||
return items
|
84
src/storage/results.py
Normal file
84
src/storage/results.py
Normal file
@ -0,0 +1,84 @@
|
||||
import logging
|
||||
import os
|
||||
from model.Word import *
|
||||
import sqlite3
|
||||
|
||||
log = logging.getLogger("storage")
|
||||
|
||||
DB_RESULTS = ""
|
||||
|
||||
COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE LetterFrequencies (
|
||||
idtext INTEGER,
|
||||
lettergroup TEXT,
|
||||
category TEXT,
|
||||
frequency REAL
|
||||
)"""
|
||||
|
||||
COMMAND_CREATE_TEXT_WORDS = """CREATE TABLE TextWords (
|
||||
idtext INTEGER,
|
||||
wordIndex INTEGER,
|
||||
sentenceIndex INTEGER,
|
||||
word TEXT,
|
||||
lemma TEXT,
|
||||
analysis TEXT,
|
||||
chunk TEXT
|
||||
)"""
|
||||
|
||||
# COMMAND_CREATE_WORDLENGTH_HISTOGRAM = """CREATE TABLE WordLengthHistogram (
|
||||
# idtext INTEGER,
|
||||
# wordlength INTEGER,
|
||||
# frequency REAL
|
||||
# )"""
|
||||
|
||||
def initializeResultsDatabase(dbFile, cleanupOldData):
|
||||
global DB_RESULTS
|
||||
DB_RESULTS = dbFile
|
||||
|
||||
# cleanup old data
|
||||
if cleanupOldData:
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
try:
|
||||
c.execute("DROP TABLE LetterFrequencies")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
c.execute(COMMAND_CREATE_LETTER_FREQUENCIES)
|
||||
|
||||
try:
|
||||
c.execute("DROP TABLE TextWords")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
c.execute(COMMAND_CREATE_TEXT_WORDS)
|
||||
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
||||
|
||||
|
||||
def storeFrequencies(idtext, freq):
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
# add data
|
||||
chr = ['p', 'l1', 'l2', 'l3']
|
||||
for i in range(4):
|
||||
for let, fr in freq[i]:
|
||||
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
|
||||
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
||||
|
||||
def storeTtlAnalysis(idtext, words):
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
# store words
|
||||
for word in words:
|
||||
c.execute("INSERT INTO TextWords VALUES (?, ?, ?, ?, ?, ?, ?)", (idtext, word.wordIndex, word.sentenceIndex, word.text, word.lemma, word.ana, word.chunk))
|
||||
|
||||
# finish
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
Reference in New Issue
Block a user