Initial commit

This commit is contained in:
2017-05-23 13:57:53 +03:00
commit 6badfbd103
38 changed files with 1286 additions and 0 deletions

1
NLP/data/text1.txt Normal file
View File

@ -0,0 +1 @@
Maria are mere. Ea mai are șapte pere. Acestea sunt foarte delicioase.

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8" ?>
<segs>
<seg lang="ro"><s id="id_temp_aiurea.1"><w lemma="Maria" ana="Np" chunk="Np#1">Maria</w><w lemma="avea" ana="Vmip3s" chunk="Vp#1">are</w><w lemma="măr" ana="Ncfp-n" chunk="Np#2">mere</w><c>.</c></s></seg>
<seg lang="ro"><s id="id_temp_aiurea.2"><w lemma="el" ana="Pp3fsr--------s">Ea</w><w lemma="mai" ana="Rp" chunk="Ap#1">mai</w><w lemma="avea" ana="Vmip3s" chunk="Vp#1">are</w><w lemma="șapte" ana="Mc-p-l" chunk="Np#1">șapte</w><w lemma="pară" ana="Ncfp-n" chunk="Np#1">pere</w><c>.</c></s></seg>
<seg lang="ro"><s id="id_temp_aiurea.3"><w lemma="acesta" ana="Pd3fpr">Acestea</w><w lemma="fi" ana="Vmip3p" chunk="Vp#1">sunt</w><w lemma="foarte" ana="Rp" chunk="Ap#1,Vp#1">foarte</w><w lemma="delicios" ana="Afpfp-n" chunk="Ap#1">delicioase</w><c>.</c></s></seg>
</segs>

1
NLP/data/text2.txt Normal file
View File

@ -0,0 +1 @@
Sabeer Bhatia a ajuns la Aeroportul Internațional din Los Angeles la ora 18 în data de 23 septembrie 1998. Zborul său din Bangalore a durat 22 ore, și el era înfometat.

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<segs>
<seg lang="ro"><s id="id_temp_aiurea.1"><w lemma="Sabeer" ana="Np" chunk="Np#1">Sabeer</w><w lemma="Bhatia" ana="Np" chunk="Np#1">Bhatia</w><w lemma="avea" ana="Va--3s" chunk="Vp#1">a</w><w lemma="ajunge" ana="Vmp--sm" chunk="Vp#1">ajuns</w><w lemma="la" ana="Spsa" chunk="Pp#1">la</w><w lemma="aeroport" ana="Ncmsry" chunk="Pp#1,Np#2">Aeroportul</w><w lemma="internațional" ana="Afpms-n" chunk="Pp#1,Np#2,Ap#1">Internațional</w><w lemma="din" ana="Spsa" chunk="Pp#2">din</w><w lemma="Los" ana="Np" chunk="Pp#2,Np#3">Los</w><w lemma="Angeles" ana="Np" chunk="Pp#2,Np#3">Angeles</w><w lemma="la" ana="Spsa" chunk="Pp#3">la</w><w lemma="oră" ana="Ncfsry" chunk="Pp#3,Np#4">ora</w><w lemma="18" ana="Mc" chunk="Pp#3,Np#4">18</w><w lemma="în" ana="Spsa" chunk="Pp#4">în</w><w lemma="dată" ana="Ncfsry" chunk="Pp#4,Np#5">data</w><w lemma="de" ana="Spsa" chunk="Pp#5">de</w><w lemma="23" ana="Mc" chunk="Pp#5,Np#6">23</w><w lemma="septembrie" ana="Ncms-n" chunk="Pp#5,Np#6">septembrie</w><w lemma="1998" ana="Mc" chunk="Pp#5,Np#6">1998</w><c>.</c></s></seg>
<seg lang="ro"><s id="id_temp_aiurea.2"><w lemma="zbor" ana="Ncmsry" chunk="Np#1">Zborul</w><w lemma="său" ana="Ds3ms-s" chunk="Np#1">său</w><w lemma="din" ana="Spsa" chunk="Pp#1">din</w><w lemma="Bangalore" ana="Np" chunk="Pp#1,Np#2">Bangalore</w><w lemma="avea" ana="Va--3s" chunk="Vp#1">a</w><w lemma="dura" ana="Vmp--sm" chunk="Vp#1">durat</w><w lemma="22" ana="Mc" chunk="Np#3">22</w><w lemma="oră" ana="Ncfp-n" chunk="Np#3">ore</w><c>,</c><w lemma="și" ana="Crssp">și</w><w lemma="el" ana="Pp3msr--------s" chunk="Vp#2">el</w><w lemma="fi" ana="Vaii3s" chunk="Vp#2">era</w><w lemma="înfometa" ana="Vmp--sm" chunk="Vp#2">înfometat</w><c>.</c></s></seg>
</segs>

1
NLP/data/text3.txt Normal file
View File

@ -0,0 +1 @@
Sophia Loren spune că ea va fi întotdeauna mulțumitoare față de Bono. Actrița a dezvăluit că cântărețul trupei U2 a ajutat-o să se liniștească atunci când ea s-a speriat de o furtună în timp ce zburau cu avionul.

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8" ?>
<segs>
<seg lang="ro"><s id="id_temp_aiurea.1"><w lemma="Sophia" ana="Np" chunk="Np#1">Sophia</w><w lemma="Loren" ana="Np" chunk="Np#1">Loren</w><w lemma="spune" ana="Vmnp" chunk="Vp#1">spune</w><w lemma="că" ana="Csssp"></w><w lemma="el" ana="Pp3fsr--------s" chunk="Vp#2">ea</w><w lemma="vrea" ana="Va--3s" chunk="Vp#2">va</w><w lemma="fi" ana="Vmnp" chunk="Vp#2">fi</w><w lemma="întotdeauna" ana="Rgp" chunk="Vp#2,Ap#1">întotdeauna</w><w lemma="mulțumitor" ana="Afpf--n" chunk="Ap#1">mulțumitoare</w><w lemma="față_de" ana="Spca" chunk="Pp#1">față_de</w><w lemma="bonă" ana="Ncfsvy" chunk="Pp#1,Np#2">Bono</w><c>.</c></s></seg>
<seg lang="ro"><s id="id_temp_aiurea.2"><w lemma="actriță" ana="Ncfsry" chunk="Np#1">Actrița</w><w lemma="avea" ana="Va--3s" chunk="Vp#1">a</w><w lemma="dezvălui" ana="Vmp--sm" chunk="Vp#1">dezvăluit</w><w lemma="că" ana="Csssp"></w><w lemma="cântăreț" ana="Ncmsry" chunk="Np#2">cântărețul</w><w lemma="trupă" ana="Ncfsoy" chunk="Np#2">trupei</w><w lemma="U2" ana="Np" chunk="Np#2">U2</w><w lemma="avea" ana="Va--3s" chunk="Vp#2">a</w><w lemma="ajuta" ana="Vmp--sm" chunk="Vp#2">ajutat</w><w lemma="el" ana="Pp3fsa--y-----w">-o</w><w lemma="să" ana="Qs" chunk="Vp#3"></w><w lemma="sine" ana="Px3--a--------w" chunk="Vp#3">se</w><w lemma="liniști" ana="Vmsp3" chunk="Vp#3">liniștească</w><w lemma="atunci_când" ana="Rw" chunk="Vp#3,Ap#1">atunci_când</w><w lemma="el" ana="Pp3fsr--------s">ea</w><w lemma="sine" ana="Px3--a--y-----w" chunk="Vp#4">s-</w><w lemma="avea" ana="Va--3s" chunk="Vp#4">a</w><w lemma="speria" ana="Vmp--sm" chunk="Vp#4">speriat</w><w lemma="de" ana="Spsa" chunk="Pp#1">de</w><w lemma="un" ana="Tifsr" chunk="Pp#1,Np#3">o</w><w lemma="furtună" ana="Ncfsrn" chunk="Pp#1,Np#3">furtună</w><w lemma="în_timp_ce" ana="Cscsp">în_timp_ce</w><w lemma="zbura" ana="Vmii3p" chunk="Vp#5">zburau</w><w lemma="cu" ana="Spsa" chunk="Pp#2">cu</w><w lemma="avion" ana="Ncmsry" chunk="Pp#2,Np#4">avionul</w><c>.</c></s></seg>
</segs>