<?xml version='1.0' encoding='UTF-8'?>
<OAI-PMH xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.openarchives.org/OAI/2.0/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  <responseDate>2026-05-21T23:55:00Z</responseDate>
  <request verb="GetRecord" metadataPrefix="olac" identifier="ec397bb9bae611ee9c10e99c00eb27649a7f673b85724ebfaeb0f267373423c0">https://metashare.ut.ee/oai_pmh/</request>
  <GetRecord>
    <record>
      <header>
        <identifier>ec397bb9bae611ee9c10e99c00eb27649a7f673b85724ebfaeb0f267373423c0</identifier>
        <datestamp>2025-10-30T10:09:51Z</datestamp>
        <setSpec>corpus</setSpec>
        <setSpec>corpus:text</setSpec>
      </header>
      <metadata>
        <olac:olac xmlns:olac="http://www.language-archives.org/OLAC/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://www.language-archives.org/OLAC/1.1/dc.xsd http://purl.org/dc/terms/ http://www.language-archives.org/OLAC/1.1/dcterms.xsd http://www.language-archives.org/OLAC/1.1/ http://www.language-archives.org/OLAC/1.1/olac.xsd">
          <dc:title xml:lang="et">Eesti keele ühendkorpus 2023 (annoteerimata)</dc:title>
          <dc:title xml:lang="en">Estonian National Corpus 2023 (prevert)</dc:title>
          <dc:description xml:lang="et">Estonian corpus of written texts. Consists of the Estonian Reference Corpus (90s–2008), Contemporary and old literature, Estonian Web (2013, 2017, 2019, 2021, 2023), Timestamped Estonian corpora (2014–2021, 2020–2023), Estonian Wikipedia (articles: 2023, talkpages: 2017) and Estonian academic writing (2020–2023). Cleaned, deduplicated. Text type annotation: topics, genres. 

ENCODING: UTF-8

== Comparison to ENC 2021 corpus
Balanced Corpus 1990–2008 ................. kept without changes
Reference Corpus 1990–2008 ................ kept without changes
Literature Old 1864–1945 .................. updated according to the source
Literature Contemporary 2000–2023 ......... updated according to the source
Web 2013 .................................. kept without changes
Web 2017 .................................. kept without changes
Wikipedia Talk 2017 ....................... kept without changes
Academic Texts (formerly DOAJ) up to 2023 . updated with new data
Web 2019 .................................. kept without changes
Web 2021 .................................. kept without changes
Wikipedia 2023 ............................ replacing Wikipedia 2021
Feeds (JSI) 2014–2021 ..................... kept without changes
Feeds (LC) 2020–2023 ...................... updated with new data
Web 2023 .................................. new</dc:description>
          <dc:description xml:lang="en">Estonian corpus of written texts. Consists of the Estonian Reference Corpus (90s–2008), Contemporary and old literature, Estonian Web (2013, 2017, 2019, 2021, 2023), Timestamped Estonian corpora (2014–2021, 2020–2023), Estonian Wikipedia (articles: 2023, talkpages: 2017) and Estonian academic writing (2020–2023). Cleaned, deduplicated. Text type annotation: topics, genres. 

ENCODING: UTF-8

== Comparison to ENC 2021 corpus
Balanced Corpus 1990–2008 ................. kept without changes
Reference Corpus 1990–2008 ................ kept without changes
Literature Old 1864–1945 .................. updated according to the source
Literature Contemporary 2000–2023 ......... updated according to the source
Web 2013 .................................. kept without changes
Web 2017 .................................. kept without changes
Wikipedia Talk 2017 ....................... kept without changes
Academic Texts (formerly DOAJ) up to 2023 . updated with new data
Web 2019 .................................. kept without changes
Web 2021 .................................. kept without changes
Wikipedia 2023 ............................ replacing Wikipedia 2021
Feeds (JSI) 2014–2021 ..................... kept without changes
Feeds (LC) 2020–2023 ...................... updated with new data
Web 2023 .................................. new</dc:description>
          <dcterms:alternative xml:lang="en">Estonian NC 2023</dcterms:alternative>
          <dc:identifier xsi:type="dcterms:URI">https://doi.org/10.15155/3-00-0000-0000-0000-08C04M</dc:identifier>
          <dc:language xsi:type="olac:language" olac:code="et">Estonian</dc:language>
          <dc:type xsi:type="olac:linguistic-type" olac:code="primary_text"/>
          <dc:subject>language resources, monolingual corpus</dc:subject>
          <dc:type xsi:type="dcterms:DCMIType">Text</dc:type>
          <dcterms:license>
	CLARIN_ACA
	</dcterms:license>
          <dc:relation>varasem versioon: https://metashare.ut.ee/repository/browse/estonian-national-corpus-2021/47c34882a0d411eebb4773db10791bcfcf7a44936eb047b881539529d635fc17/</dc:relation>
          <dcterms:bibliographicCitation>https://www.sketchengine.eu/estonian-national-corpus/</dcterms:bibliographicCitation>
          <dcterms:extent>3800000000 tokens</dcterms:extent>
          <dc:creator>Jelena Kallas, jelena.kallas[at]eki.ee, Eesti Keele Instituut,
Kristina Koppel, Kristina.Koppel[at]eki.ee, Eesti Keele Instituut,
Helen Kaljumäe, helen.kaljumae[at]eki.ee, Eesti Keele Instituut,
Madis Jürviste, madis.jyrviste[at]eki.ee, Eesti Keele Instituut</dc:creator>
          <dc:contributor xsi:type="olac:role" olac:code="depositor">Tõnis Nurk, tonis[at]eki.ee, Eesti Keele Instituut</dc:contributor>
          <dc:contributor xsi:type="olac:role" olac:code="depositor">Jelena Kallas, jelena.kallas[at]eki.ee, Eesti Keele Instituut</dc:contributor>
          <dc:contributor xsi:type="olac:role" olac:code="depositor">Kristina Koppel, Kristina.Koppel[at]eki.ee, Eesti Keele Instituut</dc:contributor>
        </olac:olac>
      </metadata>
    </record>
  </GetRecord>
</OAI-PMH>
