Refactored code; organized letter and word metrics
This commit is contained in:
@ -0,0 +1,33 @@
|
||||
import os.path
|
||||
import sqlite3
|
||||
|
||||
class Storage:
|
||||
def __init__(self, dbFile):
|
||||
self.__dbFile = dbFile
|
||||
self.__initialize()
|
||||
self.__con = None
|
||||
self.__cur = None
|
||||
|
||||
def __initialize(self):
|
||||
self._createDatabase()
|
||||
|
||||
def _createDatabase(self):
|
||||
pass
|
||||
|
||||
def _destroyDatabase(self):
|
||||
pass
|
||||
|
||||
def connect(self):
|
||||
self.__con = sqlite3.connect(self.__dbFile)
|
||||
self.__cur = self.__con.cursor()
|
||||
return self.__cur
|
||||
|
||||
def commit(self, doClose=True):
|
||||
self.__con.commit()
|
||||
if doClose:
|
||||
self.__cur.close()
|
||||
self.__con.close()
|
||||
|
||||
def recreateDatabase(self):
|
||||
self._destroyDatabase()
|
||||
self._createDatabase()
|
||||
|
@ -1,80 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
from model import *
|
||||
import sqlite3
|
||||
|
||||
log = logging.getLogger("storage")
|
||||
|
||||
DB_FRAGMENTS = ""
|
||||
|
||||
# Commands
|
||||
|
||||
# birth location - general area, not exact location (i.e. Transylvania)
|
||||
# birth origin - rural or urban
|
||||
# studies - masters, bachelors, high school, middle school, primary school
|
||||
# occupation - comma separated if there are multiple
|
||||
# studiesAbroad - foreign cities where author studied (comma separated)
|
||||
COMMAND_CREATE_AUTHORS = """CREATE TABLE Authors (
|
||||
name TEXT PRIMARY KEY,
|
||||
birthYear INTEGER,
|
||||
birthLocation TEXT,
|
||||
birthOrigin TEXT,
|
||||
studies TEXT,
|
||||
occupations TEXT,
|
||||
studiesAbroad TEXT
|
||||
)"""
|
||||
|
||||
# genre - short story (nuvela), novel (roman), poem etc
|
||||
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
|
||||
# tags - other relevant information (i.e. psychological)
|
||||
COMMAND_CREATE_FRAGMENTS = """CREATE TABLE Fragments (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
year INTEGER,
|
||||
author TEXT REFERENCES Authors(name),
|
||||
genre TEXT,
|
||||
movement TEXT,
|
||||
tags TEXT
|
||||
)"""
|
||||
|
||||
# contains the actual text
|
||||
COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE FragmentsContent (
|
||||
id INTEGER REFERENCES Fragments(id),
|
||||
content TEXT
|
||||
)"""
|
||||
|
||||
# Initialize databases
|
||||
def initializeFragmentDatabase(dbFile):
|
||||
global DB_FRAGMENTS
|
||||
DB_FRAGMENTS = dbFile
|
||||
|
||||
if not os.path.exists(dbFile):
|
||||
log.info("Text database %s not found. Will create database.", dbFile)
|
||||
con = sqlite3.connect(dbFile)
|
||||
c = con.cursor()
|
||||
c.execute(COMMAND_CREATE_AUTHORS)
|
||||
c.execute(COMMAND_CREATE_FRAGMENTS)
|
||||
c.execute(COMMAND_CREATE_FRAGMENTS_CONTENT)
|
||||
con.commit()
|
||||
con.close()
|
||||
log.info("Database created!")
|
||||
|
||||
def getTextCount():
|
||||
con = sqlite3.connect(DB_FRAGMENTS)
|
||||
c = con.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM Fragments")
|
||||
item = c.fetchone()
|
||||
c.close()
|
||||
con.close()
|
||||
return item[0]
|
||||
|
||||
def getAllTexts():
|
||||
con = sqlite3.connect(DB_FRAGMENTS)
|
||||
c = con.cursor()
|
||||
c.execute("SELECT id, content FROM FragmentsContent")
|
||||
|
||||
items = c.fetchall()
|
||||
|
||||
c.close()
|
||||
con.close()
|
||||
return items
|
@ -1,84 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
from model.Word import *
|
||||
import sqlite3
|
||||
|
||||
log = logging.getLogger("storage")
|
||||
|
||||
DB_RESULTS = ""
|
||||
|
||||
COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE LetterFrequencies (
|
||||
idtext INTEGER,
|
||||
lettergroup TEXT,
|
||||
category TEXT,
|
||||
frequency REAL
|
||||
)"""
|
||||
|
||||
COMMAND_CREATE_TEXT_WORDS = """CREATE TABLE TextWords (
|
||||
idtext INTEGER,
|
||||
wordIndex INTEGER,
|
||||
sentenceIndex INTEGER,
|
||||
word TEXT,
|
||||
lemma TEXT,
|
||||
analysis TEXT,
|
||||
chunk TEXT
|
||||
)"""
|
||||
|
||||
# COMMAND_CREATE_WORDLENGTH_HISTOGRAM = """CREATE TABLE WordLengthHistogram (
|
||||
# idtext INTEGER,
|
||||
# wordlength INTEGER,
|
||||
# frequency REAL
|
||||
# )"""
|
||||
|
||||
def initializeResultsDatabase(dbFile, cleanupOldData):
|
||||
global DB_RESULTS
|
||||
DB_RESULTS = dbFile
|
||||
|
||||
# cleanup old data
|
||||
if cleanupOldData:
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
try:
|
||||
c.execute("DROP TABLE LetterFrequencies")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
c.execute(COMMAND_CREATE_LETTER_FREQUENCIES)
|
||||
|
||||
try:
|
||||
c.execute("DROP TABLE TextWords")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
c.execute(COMMAND_CREATE_TEXT_WORDS)
|
||||
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
||||
|
||||
|
||||
def storeFrequencies(idtext, freq):
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
# add data
|
||||
chr = ['p', 'l1', 'l2', 'l3']
|
||||
for i in range(4):
|
||||
for let, fr in freq[i]:
|
||||
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
|
||||
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
||||
|
||||
def storeTtlAnalysis(idtext, words):
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
# store words
|
||||
for word in words:
|
||||
c.execute("INSERT INTO TextWords VALUES (?, ?, ?, ?, ?, ?, ?)", (idtext, word.wordIndex, word.sentenceIndex, word.text, word.lemma, word.ana, word.chunk))
|
||||
|
||||
# finish
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
0
src/storage/results/__init__.py
Normal file
0
src/storage/results/__init__.py
Normal file
27
src/storage/results/letterFrequencies.py
Normal file
27
src/storage/results/letterFrequencies.py
Normal file
@ -0,0 +1,27 @@
|
||||
import storage
|
||||
|
||||
class LetterFrequencyStorage(storage.Storage):
|
||||
__COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE IF NOT EXISTS LetterFrequencies (
|
||||
idtext INTEGER,
|
||||
lettergroup TEXT,
|
||||
category TEXT,
|
||||
frequency REAL
|
||||
)"""
|
||||
|
||||
def _createDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute(self.__COMMAND_CREATE_LETTER_FREQUENCIES)
|
||||
self.commit()
|
||||
|
||||
def _destroyDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute('DROP TABLE IF EXISTS LetterFrequencies')
|
||||
self.commit()
|
||||
|
||||
def store(self, idtext, frequencies):
|
||||
c = self.connect()
|
||||
chr = ['p', 'l1', 'l2', 'l3']
|
||||
for i in range(4):
|
||||
for let, fr in frequencies[i]:
|
||||
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
|
||||
self.commit()
|
24
src/storage/results/wordFrequencies.py
Normal file
24
src/storage/results/wordFrequencies.py
Normal file
@ -0,0 +1,24 @@
|
||||
import storage
|
||||
|
||||
class WordFrequencyStorage(storage.Storage):
|
||||
__COMMAND_CREATE_WORD_FREQUENCIES = """CREATE TABLE IF NOT EXISTS WordFrequencies (
|
||||
idtext INTEGER,
|
||||
word TEXT,
|
||||
frequency REAL
|
||||
)"""
|
||||
|
||||
def _createDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute(self.__COMMAND_CREATE_WORD_FREQUENCIES)
|
||||
self.commit()
|
||||
|
||||
def _destroyDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute('DROP TABLE IF EXISTS WordFrequencies')
|
||||
self.commit()
|
||||
|
||||
def store(self, idtext, frequencies):
|
||||
c = self.connect()
|
||||
for word, freq in frequencies:
|
||||
c.execute('INSERT INTO WordFrequencies VALUES(?, ?, ?)', (idtext, word, freq))
|
||||
self.commit()
|
24
src/storage/results/wordLengths.py
Normal file
24
src/storage/results/wordLengths.py
Normal file
@ -0,0 +1,24 @@
|
||||
import storage
|
||||
|
||||
class WordLengthStorage(storage.Storage):
|
||||
__COMMAND_CREATE_WORD_LENGTHS = """CREATE TABLE IF NOT EXISTS WordLengths (
|
||||
idtext INTEGER,
|
||||
wordlength INTEGER,
|
||||
frequency REAL
|
||||
)"""
|
||||
|
||||
def _createDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute(self.__COMMAND_CREATE_WORD_LENGTHS)
|
||||
self.commit()
|
||||
|
||||
def _destroyDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute('DROP TABLE IF EXISTS WordLengths')
|
||||
self.commit()
|
||||
|
||||
def store(self, idtext, frequencies):
|
||||
c = self.connect()
|
||||
for length, frequency in frequencies:
|
||||
c.execute("INSERT INTO WordLengths VALUES(?, ?, ?)", (idtext, length, frequency))
|
||||
self.commit()
|
65
src/storage/texts.py
Normal file
65
src/storage/texts.py
Normal file
@ -0,0 +1,65 @@
|
||||
import storage
|
||||
|
||||
class TextStorage(storage.Storage):
|
||||
|
||||
# birth location - general area, not exact location (i.e. Transylvania)
|
||||
# birth origin - rural or urban
|
||||
# studies - masters, bachelors, high school, middle school, primary school
|
||||
# occupation - comma separated if there are multiple
|
||||
# studiesAbroad - foreign cities where author studied (comma separated)
|
||||
__COMMAND_CREATE_AUTHORS = """CREATE TABLE IF NOT EXISTS Authors (
|
||||
name TEXT PRIMARY KEY,
|
||||
birthYear INTEGER,
|
||||
birthLocation TEXT,
|
||||
birthOrigin TEXT,
|
||||
studies TEXT,
|
||||
occupations TEXT,
|
||||
studiesAbroad TEXT
|
||||
)"""
|
||||
|
||||
# genre - short story (nuvela), novel (roman), poem etc
|
||||
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
|
||||
# tags - other relevant information (i.e. psychological)
|
||||
__COMMAND_CREATE_FRAGMENTS = """CREATE TABLE IF NOT EXISTS Fragments (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
year INTEGER,
|
||||
author TEXT REFERENCES Authors(name),
|
||||
genre TEXT,
|
||||
movement TEXT,
|
||||
tags TEXT
|
||||
)"""
|
||||
|
||||
# contains the actual text
|
||||
__COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE IF NOT EXISTS FragmentsContent (
|
||||
id INTEGER REFERENCES Fragments(id),
|
||||
content TEXT
|
||||
)"""
|
||||
|
||||
def _createDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute(self.__COMMAND_CREATE_AUTHORS)
|
||||
c.execute(self.__COMMAND_CREATE_FRAGMENTS)
|
||||
c.execute(self.__COMMAND_CREATE_FRAGMENTS_CONTENT)
|
||||
self.commit()
|
||||
|
||||
def _destroyDatabase(self):
|
||||
c = self.connect()
|
||||
c.execute('DROP TABLE IF EXISTS Authors')
|
||||
c.execute('DROP TABLE IF EXISTS Fragments')
|
||||
c.execute('DROP TABLE IF EXISTS FragmentsContent')
|
||||
self.commit()
|
||||
|
||||
def getTextCount(self):
|
||||
c = self.connect()
|
||||
c.execute("SELECT COUNT(*) FROM Fragments")
|
||||
item = c.fetchone()
|
||||
self.commit()
|
||||
return item[0]
|
||||
|
||||
def getAllTexts(self):
|
||||
c = self.connect()
|
||||
c.execute("SELECT id, content FROM FragmentsContent")
|
||||
items = c.fetchall()
|
||||
self.commit()
|
||||
return items
|
Reference in New Issue
Block a user