Initial commit

This commit is contained in:
2017-05-23 13:57:53 +03:00
commit 6badfbd103
38 changed files with 1286 additions and 0 deletions

0
src/storage/__init__.py Normal file
View File

80
src/storage/data.py Normal file
View File

@ -0,0 +1,80 @@
import logging
import os
from model import *
import sqlite3
log = logging.getLogger("storage")
DB_FRAGMENTS = ""
# Commands
# birth location - general area, not exact location (i.e. Transylvania)
# birth origin - rural or urban
# studies - masters, bachelors, high school, middle school, primary school
# occupation - comma separated if there are multiple
# studiesAbroad - foreign cities where author studied (comma separated)
COMMAND_CREATE_AUTHORS = """CREATE TABLE Authors (
name TEXT PRIMARY KEY,
birthYear INTEGER,
birthLocation TEXT,
birthOrigin TEXT,
studies TEXT,
occupations TEXT,
studiesAbroad TEXT
)"""
# genre - short story (nuvela), novel (roman), poem etc
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
# tags - other relevant information (i.e. psychological)
COMMAND_CREATE_FRAGMENTS = """CREATE TABLE Fragments (
id INTEGER PRIMARY KEY,
title TEXT,
year INTEGER,
author TEXT REFERENCES Authors(name),
genre TEXT,
movement TEXT,
tags TEXT
)"""
# contains the actual text
COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE FragmentsContent (
id INTEGER REFERENCES Fragments(id),
content TEXT
)"""
# Initialize databases
def initializeFragmentDatabase(dbFile):
global DB_FRAGMENTS
DB_FRAGMENTS = dbFile
if not os.path.exists(dbFile):
log.info("Text database %s not found. Will create database.", dbFile)
con = sqlite3.connect(dbFile)
c = con.cursor()
c.execute(COMMAND_CREATE_AUTHORS)
c.execute(COMMAND_CREATE_FRAGMENTS)
c.execute(COMMAND_CREATE_FRAGMENTS_CONTENT)
con.commit()
con.close()
log.info("Database created!")
def getTextCount():
con = sqlite3.connect(DB_FRAGMENTS)
c = con.cursor()
c.execute("SELECT COUNT(*) FROM Fragments")
item = c.fetchone()
c.close()
con.close()
return item[0]
def getAllTexts():
con = sqlite3.connect(DB_FRAGMENTS)
c = con.cursor()
c.execute("SELECT id, content FROM FragmentsContent")
items = c.fetchall()
c.close()
con.close()
return items

84
src/storage/results.py Normal file
View File

@ -0,0 +1,84 @@
import logging
import os
from model.Word import *
import sqlite3
log = logging.getLogger("storage")
DB_RESULTS = ""
COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE LetterFrequencies (
idtext INTEGER,
lettergroup TEXT,
category TEXT,
frequency REAL
)"""
COMMAND_CREATE_TEXT_WORDS = """CREATE TABLE TextWords (
idtext INTEGER,
wordIndex INTEGER,
sentenceIndex INTEGER,
word TEXT,
lemma TEXT,
analysis TEXT,
chunk TEXT
)"""
# COMMAND_CREATE_WORDLENGTH_HISTOGRAM = """CREATE TABLE WordLengthHistogram (
# idtext INTEGER,
# wordlength INTEGER,
# frequency REAL
# )"""
def initializeResultsDatabase(dbFile, cleanupOldData):
global DB_RESULTS
DB_RESULTS = dbFile
# cleanup old data
if cleanupOldData:
con = sqlite3.connect(DB_RESULTS)
c = con.cursor()
try:
c.execute("DROP TABLE LetterFrequencies")
except sqlite3.OperationalError:
pass
c.execute(COMMAND_CREATE_LETTER_FREQUENCIES)
try:
c.execute("DROP TABLE TextWords")
except sqlite3.OperationalError:
pass
c.execute(COMMAND_CREATE_TEXT_WORDS)
con.commit()
c.close()
con.close()
def storeFrequencies(idtext, freq):
con = sqlite3.connect(DB_RESULTS)
c = con.cursor()
# add data
chr = ['p', 'l1', 'l2', 'l3']
for i in range(4):
for let, fr in freq[i]:
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
con.commit()
c.close()
con.close()
def storeTtlAnalysis(idtext, words):
con = sqlite3.connect(DB_RESULTS)
c = con.cursor()
# store words
for word in words:
c.execute("INSERT INTO TextWords VALUES (?, ?, ?, ?, ?, ?, ?)", (idtext, word.wordIndex, word.sentenceIndex, word.text, word.lemma, word.ana, word.chunk))
# finish
con.commit()
c.close()
con.close()