Initial commit
This commit is contained in:
20
src/logger.py
Normal file
20
src/logger.py
Normal file
@ -0,0 +1,20 @@
|
||||
import time
|
||||
import logging
|
||||
|
||||
def init_logger(level):
|
||||
# Log filename
|
||||
tm = time.strftime('%Y-%m-%d_%H-%M-%S')
|
||||
logFile = "logs/log_{0}.log".format(tm)
|
||||
|
||||
# Set up file logger
|
||||
logging.basicConfig(filename=logFile,
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s %(name)s %(levelname)s %(message)s',
|
||||
datefmt='%m-%d %H:%M')
|
||||
|
||||
# Set up console logger
|
||||
formatter = logging.Formatter('[%(name)s] %(levelname)s: %(message)s')
|
||||
console = logging.StreamHandler()
|
||||
console.setLevel(level)
|
||||
console.setFormatter(formatter)
|
||||
logging.getLogger().addHandler(console)
|
40
src/main.py
Normal file
40
src/main.py
Normal file
@ -0,0 +1,40 @@
|
||||
import logging
|
||||
import time
|
||||
# own
|
||||
import logger
|
||||
import storage.data
|
||||
import storage.results
|
||||
import textprocessor.letterfreq
|
||||
import ttl.ttlparser
|
||||
import ttl.ttlservice
|
||||
|
||||
def init():
|
||||
logger.init_logger(logging.WARNING)
|
||||
storage.data.initializeFragmentDatabase("data/texts.db")
|
||||
storage.results.initializeResultsDatabase("data/results.db", True)
|
||||
|
||||
def processTexts():
|
||||
count = storage.data.getTextCount()
|
||||
current = 0
|
||||
for item in storage.data.getAllTexts():
|
||||
print("Processing item", current, "out of", count)
|
||||
current = current + 1
|
||||
|
||||
itemid = item[0]
|
||||
itemtext = item[1]
|
||||
|
||||
# obtain ttl analysis
|
||||
# unfeasable - it takes 5-10 minutes for a single text
|
||||
# ttlResult = ttl.ttlservice.executeTtl(itemtext)
|
||||
# (words, chunks) = ttl.ttlparser.parseText(ttlResult)
|
||||
# storage.results.storeTtlAnalysis(itemid, words)
|
||||
|
||||
# perform analysis
|
||||
letterFreq = textprocessor.letterfreq.letterFrequencies(itemtext)
|
||||
storage.results.storeFrequencies(itemid, letterFreq)
|
||||
|
||||
print("Finished!")
|
||||
|
||||
init()
|
||||
processTexts()
|
||||
|
32
src/model.py
Normal file
32
src/model.py
Normal file
@ -0,0 +1,32 @@
|
||||
# Defines a fragment author
|
||||
class Author:
|
||||
def __init__(self, name = "", birthYear = "", location = "Romania"):
|
||||
self.name = name
|
||||
self.yearOfBirth = birthYear
|
||||
self.location = location
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def dump(self):
|
||||
return "[Author name={0} yearOfBirth={1} location={2}]".format(self.name, self.yearOfBirth, self.location)
|
||||
|
||||
# Defines a text fragment
|
||||
class Fragment:
|
||||
def __init__(self, title = "", text = "", author = Author(), year = 1999):
|
||||
self.title = title
|
||||
self.text = text
|
||||
self.author = author
|
||||
self.year = year
|
||||
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
def __repr__(self):
|
||||
return self.title
|
||||
|
||||
def dump(self):
|
||||
return "[Fragment title={0} author={1} year={2} text={3}]".format(self.title, self.author.dump(), self.year, self.text)
|
87
src/model/Word.py
Normal file
87
src/model/Word.py
Normal file
@ -0,0 +1,87 @@
|
||||
|
||||
# Defines a processed word
|
||||
class Word:
|
||||
|
||||
text = ""
|
||||
lemma = ""
|
||||
ana = ""
|
||||
chunk = ""
|
||||
|
||||
sentenceIndex = 0
|
||||
wordIndex = 0
|
||||
|
||||
def __init__(self, text, lemma, ana, chunk, sentenceIndex, wordIndex):
|
||||
self.text = text
|
||||
self.lemma = lemma
|
||||
self.ana = ana
|
||||
self.chunk = chunk
|
||||
self.sentenceIndex = sentenceIndex
|
||||
self.wordIndex = wordIndex
|
||||
|
||||
def __str__(self):
|
||||
return "{0} (lemma {1}, ana {2}, chunk {3})".format(self.text, self.lemma, self.ana, self.chunk)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def isNoun(self):
|
||||
return self.ana[0] == "N"
|
||||
|
||||
def nounIsCommon(self):
|
||||
return self.isNoun() and self.ana[1] == "c"
|
||||
|
||||
def nounIsProper(self):
|
||||
return self.isNoun() and self.ana[1] == "p"
|
||||
|
||||
def nounGetCase(self):
|
||||
|
||||
if self.isNoun():
|
||||
return self.ana[4]
|
||||
|
||||
return None
|
||||
|
||||
'Este articulat?'
|
||||
def nounIsDefinite(self):
|
||||
if self.isNoun():
|
||||
if (self.nounIsProper()):
|
||||
return True
|
||||
|
||||
if len(self.ana) > 5:
|
||||
return self.ana[5]
|
||||
|
||||
return "n"
|
||||
|
||||
def pronounGetPerson(self):
|
||||
if self.isPronoun():
|
||||
return self.ana[2]
|
||||
|
||||
def getGender(self):
|
||||
if self.isNoun():
|
||||
if (len(self.ana) >= 3):
|
||||
return self.ana[2]
|
||||
return 'n'
|
||||
|
||||
if self.isPronoun():
|
||||
return self.ana[3]
|
||||
|
||||
return None
|
||||
|
||||
def getNumber(self):
|
||||
if self.isNoun():
|
||||
if self.nounIsProper():
|
||||
return 's'
|
||||
else:
|
||||
return self.ana[3]
|
||||
if self.isPronoun():
|
||||
return self.ana[4]
|
||||
|
||||
return None
|
||||
|
||||
def isPronoun(self):
|
||||
return self.ana[0] == "P"
|
||||
|
||||
def isVerb(self):
|
||||
return self.ana[0] == "V"
|
||||
|
||||
def isPreposition(self):
|
||||
return self.ana[0] == "S" and self.ana[1] == "p"
|
0
src/model/__init__.py
Normal file
0
src/model/__init__.py
Normal file
0
src/storage/__init__.py
Normal file
0
src/storage/__init__.py
Normal file
80
src/storage/data.py
Normal file
80
src/storage/data.py
Normal file
@ -0,0 +1,80 @@
|
||||
import logging
|
||||
import os
|
||||
from model import *
|
||||
import sqlite3
|
||||
|
||||
log = logging.getLogger("storage")
|
||||
|
||||
DB_FRAGMENTS = ""
|
||||
|
||||
# Commands
|
||||
|
||||
# birth location - general area, not exact location (i.e. Transylvania)
|
||||
# birth origin - rural or urban
|
||||
# studies - masters, bachelors, high school, middle school, primary school
|
||||
# occupation - comma separated if there are multiple
|
||||
# studiesAbroad - foreign cities where author studied (comma separated)
|
||||
COMMAND_CREATE_AUTHORS = """CREATE TABLE Authors (
|
||||
name TEXT PRIMARY KEY,
|
||||
birthYear INTEGER,
|
||||
birthLocation TEXT,
|
||||
birthOrigin TEXT,
|
||||
studies TEXT,
|
||||
occupations TEXT,
|
||||
studiesAbroad TEXT
|
||||
)"""
|
||||
|
||||
# genre - short story (nuvela), novel (roman), poem etc
|
||||
# movement - literary movement (submovements separated by /) (i.e. realism/naturalism)
|
||||
# tags - other relevant information (i.e. psychological)
|
||||
COMMAND_CREATE_FRAGMENTS = """CREATE TABLE Fragments (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
year INTEGER,
|
||||
author TEXT REFERENCES Authors(name),
|
||||
genre TEXT,
|
||||
movement TEXT,
|
||||
tags TEXT
|
||||
)"""
|
||||
|
||||
# contains the actual text
|
||||
COMMAND_CREATE_FRAGMENTS_CONTENT = """CREATE TABLE FragmentsContent (
|
||||
id INTEGER REFERENCES Fragments(id),
|
||||
content TEXT
|
||||
)"""
|
||||
|
||||
# Initialize databases
|
||||
def initializeFragmentDatabase(dbFile):
|
||||
global DB_FRAGMENTS
|
||||
DB_FRAGMENTS = dbFile
|
||||
|
||||
if not os.path.exists(dbFile):
|
||||
log.info("Text database %s not found. Will create database.", dbFile)
|
||||
con = sqlite3.connect(dbFile)
|
||||
c = con.cursor()
|
||||
c.execute(COMMAND_CREATE_AUTHORS)
|
||||
c.execute(COMMAND_CREATE_FRAGMENTS)
|
||||
c.execute(COMMAND_CREATE_FRAGMENTS_CONTENT)
|
||||
con.commit()
|
||||
con.close()
|
||||
log.info("Database created!")
|
||||
|
||||
def getTextCount():
|
||||
con = sqlite3.connect(DB_FRAGMENTS)
|
||||
c = con.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM Fragments")
|
||||
item = c.fetchone()
|
||||
c.close()
|
||||
con.close()
|
||||
return item[0]
|
||||
|
||||
def getAllTexts():
|
||||
con = sqlite3.connect(DB_FRAGMENTS)
|
||||
c = con.cursor()
|
||||
c.execute("SELECT id, content FROM FragmentsContent")
|
||||
|
||||
items = c.fetchall()
|
||||
|
||||
c.close()
|
||||
con.close()
|
||||
return items
|
84
src/storage/results.py
Normal file
84
src/storage/results.py
Normal file
@ -0,0 +1,84 @@
|
||||
import logging
|
||||
import os
|
||||
from model.Word import *
|
||||
import sqlite3
|
||||
|
||||
log = logging.getLogger("storage")
|
||||
|
||||
DB_RESULTS = ""
|
||||
|
||||
COMMAND_CREATE_LETTER_FREQUENCIES = """CREATE TABLE LetterFrequencies (
|
||||
idtext INTEGER,
|
||||
lettergroup TEXT,
|
||||
category TEXT,
|
||||
frequency REAL
|
||||
)"""
|
||||
|
||||
COMMAND_CREATE_TEXT_WORDS = """CREATE TABLE TextWords (
|
||||
idtext INTEGER,
|
||||
wordIndex INTEGER,
|
||||
sentenceIndex INTEGER,
|
||||
word TEXT,
|
||||
lemma TEXT,
|
||||
analysis TEXT,
|
||||
chunk TEXT
|
||||
)"""
|
||||
|
||||
# COMMAND_CREATE_WORDLENGTH_HISTOGRAM = """CREATE TABLE WordLengthHistogram (
|
||||
# idtext INTEGER,
|
||||
# wordlength INTEGER,
|
||||
# frequency REAL
|
||||
# )"""
|
||||
|
||||
def initializeResultsDatabase(dbFile, cleanupOldData):
|
||||
global DB_RESULTS
|
||||
DB_RESULTS = dbFile
|
||||
|
||||
# cleanup old data
|
||||
if cleanupOldData:
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
try:
|
||||
c.execute("DROP TABLE LetterFrequencies")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
c.execute(COMMAND_CREATE_LETTER_FREQUENCIES)
|
||||
|
||||
try:
|
||||
c.execute("DROP TABLE TextWords")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
c.execute(COMMAND_CREATE_TEXT_WORDS)
|
||||
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
||||
|
||||
|
||||
def storeFrequencies(idtext, freq):
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
# add data
|
||||
chr = ['p', 'l1', 'l2', 'l3']
|
||||
for i in range(4):
|
||||
for let, fr in freq[i]:
|
||||
c.execute("INSERT INTO LetterFrequencies VALUES (?, ?, ?, ?)", (idtext, let, chr[i], fr))
|
||||
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
||||
|
||||
def storeTtlAnalysis(idtext, words):
|
||||
con = sqlite3.connect(DB_RESULTS)
|
||||
c = con.cursor()
|
||||
|
||||
# store words
|
||||
for word in words:
|
||||
c.execute("INSERT INTO TextWords VALUES (?, ?, ?, ?, ?, ?, ?)", (idtext, word.wordIndex, word.sentenceIndex, word.text, word.lemma, word.ana, word.chunk))
|
||||
|
||||
# finish
|
||||
con.commit()
|
||||
c.close()
|
||||
con.close()
|
14
src/test.py
Normal file
14
src/test.py
Normal file
@ -0,0 +1,14 @@
|
||||
# coding: utf-8
|
||||
from ttl import ttlservice
|
||||
from ttl import ttlparser
|
||||
import nltk
|
||||
|
||||
import storage
|
||||
|
||||
data = storage.parseIndex("data")
|
||||
print(data)
|
||||
|
||||
#textXml = ttlservice.executeTtl(u"Numele meu este Tibi și îmi place să cânt la chitară bass. Ce faci?")
|
||||
#words, chunks = ttlparser.parseText(textXml)
|
||||
#print ("Words: ", words)
|
||||
#print ("Chunks: ", chunks)
|
0
src/textprocessor/__init__.py
Normal file
0
src/textprocessor/__init__.py
Normal file
38
src/textprocessor/letterfreq.py
Normal file
38
src/textprocessor/letterfreq.py
Normal file
@ -0,0 +1,38 @@
|
||||
import operator
|
||||
import storage
|
||||
|
||||
def letterFrequencies(text):
|
||||
letterfreq = [{}, {}, {}, {}]
|
||||
lettersum = [0, 0, 0, 0]
|
||||
|
||||
n = len(text)
|
||||
for i in range(n):
|
||||
|
||||
# compute substring frequency
|
||||
# l = substring length
|
||||
for l in range(1, 4):
|
||||
sub = text[i : i + l].lower()
|
||||
if len(sub) == l and sub.isalnum():
|
||||
lettersum[l] += 1
|
||||
if not sub in letterfreq[l]:
|
||||
letterfreq[l][sub] = 1
|
||||
else:
|
||||
letterfreq[l][sub] += 1
|
||||
|
||||
# compute punctuation frequency
|
||||
chr = text[i]
|
||||
if not chr.isalnum() and not chr.isspace() and chr.isprintable():
|
||||
lettersum[0] += 1
|
||||
if not chr in letterfreq[0]:
|
||||
letterfreq[0][chr] = 1
|
||||
else:
|
||||
letterfreq[0][chr] += 1
|
||||
|
||||
# Almost done. Sort and remove irrelevant items (with low frequency), and normalize data
|
||||
for i in range(4):
|
||||
freqSorted = sorted(letterfreq[i].items(), key=operator.itemgetter(1), reverse=True)
|
||||
freqFiltered = freqSorted[0:50]
|
||||
freqNormalized = [(symbol, freq / lettersum[i]) for symbol, freq in freqFiltered]
|
||||
letterfreq[i] = freqNormalized
|
||||
|
||||
return letterfreq
|
2
src/textprocessor/wordanalysis.py
Normal file
2
src/textprocessor/wordanalysis.py
Normal file
@ -0,0 +1,2 @@
|
||||
def analyzeWords(text):
|
||||
pass
|
128
src/tools/wikisource_downloader.py
Normal file
128
src/tools/wikisource_downloader.py
Normal file
@ -0,0 +1,128 @@
|
||||
import urllib
|
||||
from pyquery import PyQuery
|
||||
import sqlite3
|
||||
import re
|
||||
|
||||
BASE_URL = "https://ro.wikisource.org"
|
||||
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
def getAuthorList():
|
||||
authors = []
|
||||
for letter in LETTERS:
|
||||
print("Processing link page for letter", letter)
|
||||
# Read index page
|
||||
url = BASE_URL + '/wiki/Categorie:Autori-' + letter
|
||||
data = urllib.request.urlopen(url).read()
|
||||
q = PyQuery(data)
|
||||
for item in q("div.mw-category-generated").find("a"):
|
||||
if (item.text.startswith("Autor:")):
|
||||
authorname = item.text[6:]
|
||||
authorlink = item.attrib['href']
|
||||
authors.append((authorname, authorlink))
|
||||
return list(set(authors))
|
||||
|
||||
def getAuthorWikiLink(query):
|
||||
wikilink = None
|
||||
body = query("div#mw-content-text")
|
||||
table = body.find("table")
|
||||
for link in table.find("a"):
|
||||
if "ro.wikipedia.org" in link.attrib['href']:
|
||||
wikilink = link.attrib['href']
|
||||
return wikilink
|
||||
|
||||
def getAuthorLinksList(authorname, query):
|
||||
links = []
|
||||
body = query("div#mw-content-text")
|
||||
for link in body.find("a"):
|
||||
address = link.attrib['href']
|
||||
ok = True
|
||||
if "http" in address:
|
||||
ok = False
|
||||
if "redlink" in address:
|
||||
ok = False
|
||||
if "Fi%C8%99ier:" in address:
|
||||
ok = False
|
||||
if "index.php" in address:
|
||||
ok = False
|
||||
if address.startswith("#"):
|
||||
ok = False
|
||||
if "Autor:" in address:
|
||||
ok = False
|
||||
if ok:
|
||||
links.append(link.attrib['href'])
|
||||
return links
|
||||
|
||||
def getAuthorBasicInfo(authorname, authorlink):
|
||||
info = {}
|
||||
data = urllib.request.urlopen(BASE_URL + authorlink).read()
|
||||
q = PyQuery(data)
|
||||
|
||||
info["wiki"] = getAuthorWikiLink(q)
|
||||
info["links"] = getAuthorLinksList(authorname, q)
|
||||
|
||||
return info
|
||||
|
||||
# def getAuthorWikiInfo(authorinfo):
|
||||
|
||||
# # Nothing can be learned without wiki page
|
||||
# if authorinfo["wiki"] is None:
|
||||
# return authorinfo
|
||||
|
||||
# try:
|
||||
# data = urllib.request.urlopen(authorinfo["wiki"]).read()
|
||||
# q = PyQuery(data)
|
||||
|
||||
# # Find the birth date
|
||||
# body = q("#mw-content-text").text()
|
||||
# result = re.compile(u"Născut\s+([\w\s]+)").match(body)
|
||||
# if not result is None:
|
||||
# authorinfo["birthyear"] = result.group(0)
|
||||
|
||||
# except urllib.error.HTTPError:
|
||||
# pass
|
||||
|
||||
# return authorinfo
|
||||
|
||||
def getText(url):
|
||||
data = urllib.request.urlopen(BASE_URL + url).read()
|
||||
q = PyQuery(data)
|
||||
|
||||
texttitle = q("h1").text()
|
||||
|
||||
body = q("#mw-content-text")
|
||||
body.find("table").remove()
|
||||
|
||||
textcontent = body.text()
|
||||
return (texttitle, textcontent)
|
||||
|
||||
def addAuthorToDb(authorinfo):
|
||||
con = sqlite3.connect("data/texts.db")
|
||||
c = con.cursor()
|
||||
c.execute("INSERT INTO Authors")
|
||||
|
||||
def getAllTexts():
|
||||
|
||||
con = sqlite3.connect("data/texts.db")
|
||||
c = con.cursor()
|
||||
#c.execute("ALTER TABLE Authors ADD COLUMN wiki TEXT")
|
||||
id = 1
|
||||
|
||||
authors = getAuthorList()
|
||||
for authorname, authorlink in authors:
|
||||
print("Processing author", authorname)
|
||||
authorinfo = getAuthorBasicInfo(authorname, authorlink)
|
||||
c.execute("INSERT INTO Authors(name,wiki) VALUES(?, ?)", (authorname, authorinfo["wiki"]))
|
||||
|
||||
# authorinfo = getAuthorWikiInfo(authorinfo)
|
||||
for text in authorinfo["links"]:
|
||||
try:
|
||||
title, content = getText(text)
|
||||
c.execute("INSERT INTO Fragments(id, title, author) VALUES (?, ?, ?)", (id, title, authorname))
|
||||
c.execute("INSERT INTO FragmentsContent(id, content) VALUES (?, ?)", (id, content))
|
||||
id = id + 1
|
||||
except urllib.error.HTTPError:
|
||||
continue
|
||||
|
||||
con.commit()
|
||||
|
||||
getAllTexts()
|
0
src/ttl/__init__.py
Normal file
0
src/ttl/__init__.py
Normal file
62
src/ttl/ttlparser.py
Normal file
62
src/ttl/ttlparser.py
Normal file
@ -0,0 +1,62 @@
|
||||
'''
|
||||
Created on May 22, 2016
|
||||
|
||||
@author: tibi
|
||||
'''
|
||||
|
||||
from xml.dom import minidom;
|
||||
from xml.parsers.expat import ExpatError
|
||||
from model.Word import Word
|
||||
|
||||
def parseText(xmlText):
|
||||
|
||||
words = []
|
||||
chunks = {}
|
||||
|
||||
sentence_i = 0
|
||||
|
||||
# get the root "segs" element
|
||||
try:
|
||||
dom = minidom.parseString(xmlText)
|
||||
except ExpatError as e:
|
||||
print("Error in text:", xmlText)
|
||||
print(e)
|
||||
exit(-1)
|
||||
|
||||
alltext = dom.getElementsByTagName("segs")
|
||||
|
||||
# iterate paragraphs
|
||||
for paragraph in alltext[0].getElementsByTagName("seg"):
|
||||
|
||||
# iterate sentences
|
||||
for sentence in paragraph.getElementsByTagName("s"):
|
||||
|
||||
# increment sentence index
|
||||
sentence_i += 1
|
||||
word_i = 0
|
||||
|
||||
# iterate words
|
||||
for word in sentence.getElementsByTagName("w"):
|
||||
|
||||
# increment word index
|
||||
word_i += 1
|
||||
|
||||
# obtain word info
|
||||
wordText = word.firstChild.data
|
||||
lemma = word.getAttribute("lemma")
|
||||
ana = word.getAttribute("ana")
|
||||
chunk = word.getAttribute("chunk")
|
||||
|
||||
# create word
|
||||
#w = Word(wordText, lemma, ana, chunk, sentence_i, word_i)
|
||||
#words.append(w)
|
||||
|
||||
for c in chunk.split(","):
|
||||
w = Word(wordText, lemma, ana, c, sentence_i, word_i)
|
||||
words.append(w)
|
||||
if chunks.get((sentence_i, c)) == None:
|
||||
chunks[(sentence_i, c)] = [ w ]
|
||||
else:
|
||||
chunks[(sentence_i, c)].append(w)
|
||||
|
||||
return (words, chunks)
|
34
src/ttl/ttlservice.py
Normal file
34
src/ttl/ttlservice.py
Normal file
@ -0,0 +1,34 @@
|
||||
# coding: utf-8
|
||||
import zeep
|
||||
|
||||
def executeTtl(text):
|
||||
# Preprocess the text
|
||||
text = text.replace(u'ĭ', 'i')
|
||||
text = text.replace(u'ŭ', 'u')
|
||||
text = text.replace(u'à', 'a')
|
||||
|
||||
client = zeep.Client("http://ws.racai.ro/ttlws.wsdl")
|
||||
textSgml = client.service.UTF8toSGML(text)
|
||||
result = client.service.XCES("ro", "id", textSgml)
|
||||
|
||||
# Cleanup result - generate valid xml
|
||||
result = result.replace('’', '`')
|
||||
result = result.replace('ă', u'ă')
|
||||
result = result.replace('à', u'à')
|
||||
result = result.replace('â', u'â')
|
||||
result = result.replace('î', u'î')
|
||||
result = result.replace('ş', u'ș')
|
||||
result = result.replace('ţ', u'ț')
|
||||
result = result.replace('ŭ', u'u')
|
||||
result = result.replace('Ă', u'Ă')
|
||||
result = result.replace('À', u'À')
|
||||
result = result.replace('Â', u'Â')
|
||||
result = result.replace('Î', u'Î')
|
||||
result = result.replace('Ş', u'Ș')
|
||||
result = result.replace('Ţ', u'Ț')
|
||||
result = result.replace('Ŭ', u'U')
|
||||
|
||||
xmlResult = "<?xml version=\"1.0\" encoding=\"utf-8\" ?><segs>"
|
||||
xmlResult += result
|
||||
xmlResult += "</segs>"
|
||||
return xmlResult
|
Reference in New Issue
Block a user