Refactored code; organized letter and word metrics

This commit is contained in:
2017-06-05 20:30:13 +03:00
parent 806d9cdedc
commit 64caeab14e
24 changed files with 388 additions and 243 deletions

View File

@ -0,0 +1,33 @@
import os.path
import sqlite3
class Storage:
def __init__(self, dbFile):
self.__dbFile = dbFile
self.__initialize()
self.__con = None
self.__cur = None
def __initialize(self):
self._createDatabase()
def _createDatabase(self):
pass
def _destroyDatabase(self):
pass
def connect(self):
self.__con = sqlite3.connect(self.__dbFile)
self.__cur = self.__con.cursor()
return self.__cur
def commit(self, doClose=True):
self.__con.commit()
if doClose:
self.__cur.close()
self.__con.close()
def recreateDatabase(self):
self._destroyDatabase()
self._createDatabase()

View File

@ -1,80 +0,0 @@
import logging
import os
from model import *
import sqlite3
log = logging.getLogger("storage")
DB_FRAGMENTS = ""
# Commands
# birth location - general area, not exact location (i.e. Transylvania)
# birth origin - rural or urban
# studies - masters, bachelors, high school, middle school, primary school
# occupation - comma separated if there are multiple
# studiesAbroad - foreign cities where author studied (comma separated)
COMMAND_CREATE_AUTHORS = """CREATE TABLE Authors (
name TEXT PRIMARY KEY,
birthYear INTEGER,
birthLocation TEXT,
birthOrigin TEXT,
studies TEXT,
occupations TEXT,
studiesAbroad TEXT
)"""
# genre - short story (nuvela), novel (roman), poem etc
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
# tags - other relevant information (i.e. psychological)
COMMAND_CREATE_FRAGMENTS = """CREATE TABLE Fragments (
id INTEGER PRIMARY KEY,
title TEXT,
year INTEGER,
author TEXT REFERENCES Authors(name),
genre TEXT,
movement TEXT,
tags TEXT
)"""
# contains the actual text
COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE FragmentsContent (
id INTEGER REFERENCES Fragments(id),
content TEXT
)"""
# Initialize databases
def initializeFragmentDatabase(dbFile):
global DB_FRAGMENTS
DB_FRAGMENTS = dbFile
if not os.path.exists(dbFile):
log.info("Text database %s not found. Will create database.", dbFile)
con = sqlite3.connect(dbFile)
c = con.cursor()
c.execute(COMMAND_CREATE_AUTHORS)
c.execute(COMMAND_CREATE_FRAGMENTS)
c.execute(COMMAND_CREATE_FRAGMENTS_CONTENT)
con.commit()
con.close()
log.info("Database created!")
def getTextCount():
con = sqlite3.connect(DB_FRAGMENTS)
c = con.cursor()
c.execute("SELECT COUNT(*) FROM Fragments")
item = c.fetchone()
c.close()
con.close()
return item[0]
def getAllTexts():
con = sqlite3.connect(DB_FRAGMENTS)
c = con.cursor()
c.execute("SELECT id, content FROM FragmentsContent")
items = c.fetchall()
c.close()
con.close()
return items

View File

@ -1,84 +0,0 @@
import logging
import os
from model.Word import *
import sqlite3
log = logging.getLogger("storage")
DB_RESULTS = ""
COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE LetterFrequencies (
idtext INTEGER,
lettergroup TEXT,
category TEXT,
frequency REAL
)"""
COMMAND_CREATE_TEXT_WORDS = """CREATE TABLE TextWords (
idtext INTEGER,
wordIndex INTEGER,
sentenceIndex INTEGER,
word TEXT,
lemma TEXT,
analysis TEXT,
chunk TEXT
)"""
# COMMAND_CREATE_WORDLENGTH_HISTOGRAM = """CREATE TABLE WordLengthHistogram (
# idtext INTEGER,
# wordlength INTEGER,
# frequency REAL
# )"""
def initializeResultsDatabase(dbFile, cleanupOldData):
global DB_RESULTS
DB_RESULTS = dbFile
# cleanup old data
if cleanupOldData:
con = sqlite3.connect(DB_RESULTS)
c = con.cursor()
try:
c.execute("DROP TABLE LetterFrequencies")
except sqlite3.OperationalError:
pass
c.execute(COMMAND_CREATE_LETTER_FREQUENCIES)
try:
c.execute("DROP TABLE TextWords")
except sqlite3.OperationalError:
pass
c.execute(COMMAND_CREATE_TEXT_WORDS)
con.commit()
c.close()
con.close()
def storeFrequencies(idtext, freq):
con = sqlite3.connect(DB_RESULTS)
c = con.cursor()
# add data
chr = ['p', 'l1', 'l2', 'l3']
for i in range(4):
for let, fr in freq[i]:
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
con.commit()
c.close()
con.close()
def storeTtlAnalysis(idtext, words):
con = sqlite3.connect(DB_RESULTS)
c = con.cursor()
# store words
for word in words:
c.execute("INSERT INTO TextWords VALUES (?, ?, ?, ?, ?, ?, ?)", (idtext, word.wordIndex, word.sentenceIndex, word.text, word.lemma, word.ana, word.chunk))
# finish
con.commit()
c.close()
con.close()

View File

View File

@ -0,0 +1,27 @@
import storage
class LetterFrequencyStorage(storage.Storage):
__COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE IF NOT EXISTS LetterFrequencies (
idtext INTEGER,
lettergroup TEXT,
category TEXT,
frequency REAL
)"""
def _createDatabase(self):
c = self.connect()
c.execute(self.__COMMAND_CREATE_LETTER_FREQUENCIES)
self.commit()
def _destroyDatabase(self):
c = self.connect()
c.execute('DROP TABLE IF EXISTS LetterFrequencies')
self.commit()
def store(self, idtext, frequencies):
c = self.connect()
chr = ['p', 'l1', 'l2', 'l3']
for i in range(4):
for let, fr in frequencies[i]:
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
self.commit()

View File

@ -0,0 +1,24 @@
import storage
class WordFrequencyStorage(storage.Storage):
__COMMAND_CREATE_WORD_FREQUENCIES = """CREATE TABLE IF NOT EXISTS WordFrequencies (
idtext INTEGER,
word TEXT,
frequency REAL
)"""
def _createDatabase(self):
c = self.connect()
c.execute(self.__COMMAND_CREATE_WORD_FREQUENCIES)
self.commit()
def _destroyDatabase(self):
c = self.connect()
c.execute('DROP TABLE IF EXISTS WordFrequencies')
self.commit()
def store(self, idtext, frequencies):
c = self.connect()
for word, freq in frequencies:
c.execute('INSERT INTO WordFrequencies VALUES(?, ?, ?)', (idtext, word, freq))
self.commit()

View File

@ -0,0 +1,24 @@
import storage
class WordLengthStorage(storage.Storage):
__COMMAND_CREATE_WORD_LENGTHS = """CREATE TABLE IF NOT EXISTS WordLengths (
idtext INTEGER,
wordlength INTEGER,
frequency REAL
)"""
def _createDatabase(self):
c = self.connect()
c.execute(self.__COMMAND_CREATE_WORD_LENGTHS)
self.commit()
def _destroyDatabase(self):
c = self.connect()
c.execute('DROP TABLE IF EXISTS WordLengths')
self.commit()
def store(self, idtext, frequencies):
c = self.connect()
for length, frequency in frequencies:
c.execute("INSERT INTO WordLengths VALUES(?, ?, ?)", (idtext, length, frequency))
self.commit()

65
src/storage/texts.py Normal file
View File

@ -0,0 +1,65 @@
import storage
class TextStorage(storage.Storage):
# birth location - general area, not exact location (i.e. Transylvania)
# birth origin - rural or urban
# studies - masters, bachelors, high school, middle school, primary school
# occupation - comma separated if there are multiple
# studiesAbroad - foreign cities where author studied (comma separated)
__COMMAND_CREATE_AUTHORS = """CREATE TABLE IF NOT EXISTS Authors (
name TEXT PRIMARY KEY,
birthYear INTEGER,
birthLocation TEXT,
birthOrigin TEXT,
studies TEXT,
occupations TEXT,
studiesAbroad TEXT
)"""
# genre - short story (nuvela), novel (roman), poem etc
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
# tags - other relevant information (i.e. psychological)
__COMMAND_CREATE_FRAGMENTS = """CREATE TABLE IF NOT EXISTS Fragments (
id INTEGER PRIMARY KEY,
title TEXT,
year INTEGER,
author TEXT REFERENCES Authors(name),
genre TEXT,
movement TEXT,
tags TEXT
)"""
# contains the actual text
__COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE IF NOT EXISTS FragmentsContent (
id INTEGER REFERENCES Fragments(id),
content TEXT
)"""
def _createDatabase(self):
c = self.connect()
c.execute(self.__COMMAND_CREATE_AUTHORS)
c.execute(self.__COMMAND_CREATE_FRAGMENTS)
c.execute(self.__COMMAND_CREATE_FRAGMENTS_CONTENT)
self.commit()
def _destroyDatabase(self):
c = self.connect()
c.execute('DROP TABLE IF EXISTS Authors')
c.execute('DROP TABLE IF EXISTS Fragments')
c.execute('DROP TABLE IF EXISTS FragmentsContent')
self.commit()
def getTextCount(self):
c = self.connect()
c.execute("SELECT COUNT(*) FROM Fragments")
item = c.fetchone()
self.commit()
return item[0]
def getAllTexts(self):
c = self.connect()
c.execute("SELECT id, content FROM FragmentsContent")
items = c.fetchall()
self.commit()
return items