SPARQL/WIKIDATA Lexicographical data
The data on WIKIDATA contains more info than only triples with concepts: Q-items are related to a thing or an idea. Since 2018, Wikidata has also stored a new type of data: words, phrases and sentences, in many languages, described in many languages. This information is stored in new types of entities, called Lexemes (L), Forms (F) and Senses (S).
Comment This chapter is not yet complete. Please help expand this.
Glossary | SPARQL code |
A Lexeme is a lexical element of a language, such as a word, a phrase, or a prefix (see Lexeme on Wikipedia). Lexemes are Entities in the sense of the Wikibase data model. A Lexeme is described using the following information:
|
|
|
|
|
|
Prefixes
[edit | edit source]Prefixes used only for Lexicograpical data are:
PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
PREFIX dct: <http://purl.org/dc/terms/>
Examples
[edit | edit source]Get swedish gloss of specific lexeme
[edit | edit source]SELECT ?sense ?gloss
WHERE {
VALUES ?l {wd:L35455}. # Swedish noun "vara"
?l ontolex:sense ?sense.
?sense skos:definition ?gloss.
# Get only the swedish gloss, exclude otherwise
FILTER(LANG(?gloss) = "sv")
}
Get senses of a specific lexeme that has P5137 (item for this sense)
[edit | edit source]SELECT ?sense ?gloss
WHERE {
VALUES ?l {wd:L39751}. # Swedish adjective "smaklös"
?l ontolex:sense ?sense.
?sense skos:definition ?gloss.
# Exclude lexemes without a linked QID from at least one sense
?sense wdt:P5137 []. # has P5137 (item for this sense)
}
Lexemes describing a color
[edit | edit source]# By Vesihiisi
SELECT ?l ?lemma ?languageLabel WHERE {
?l a ontolex:LexicalEntry;
dct:language ?language;
wikibase:lemma ?lemma .
?l wdt:P31 wd:Q376431. # color term
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY ?languageLabel
Words per language
[edit | edit source]Here is an overview of the number of words per language
SELECT (?language AS ?label) (COUNT(*) AS ?count)
WHERE {
?l a ontolex:LexicalEntry ; wikibase:lemma ?word .
BIND( LANG(?word) AS ?language )
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?language
ORDER BY DESC(?count)
English and American English
[edit | edit source]This query lists all words that are different in English and American English
SELECT ?l ?english ?american
WHERE {
?l wikibase:lemma ?english . FILTER(LANG(?english)="en-gb")
?l wikibase:lemma ?american . FILTER(LANG(?american)="en")
FILTER(?english!=?american)
}
ORDER BY ?english
Overview of Lexical categories
[edit | edit source]Here is an overview of the most used Lexical categories in English:
SELECT ?categoryLabel (COUNT(*) AS ?count)
WHERE {
?l a ontolex:LexicalEntry ; wikibase:lemma ?word ; wikibase:lexicalCategory ?category; dct:language ?language.
?language wdt:P218 'en'
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?categoryLabel
ORDER BY DESC(?count)
Longest words
[edit | edit source]Here as example a list of the longest words in English
SELECT DISTINCT ?l ?word ?len
WHERE {
{
?l a ontolex:LexicalEntry ; dct:language wd:Q1860 ; wikibase:lemma ?word .
BIND(strlen(?word) as ?len)
} UNION {
?l a ontolex:LexicalEntry ; dct:language wd:Q1860 ; ontolex:lexicalForm/ontolex:representation ?word .
BIND(strlen(?word) as ?len)
}
}
order by DESC(?len)
LIMIT 20
Adjectives
[edit | edit source]This example shows (English) adjectives and their positive, comparative and superlative degrees. By changing VALUES ?language { wd:Q1860 }
this query can be changed into any language.
# adjectives
SELECT DISTINCT ?l ?word (GROUP_CONCAT(DISTINCT ?subfeatLabel; SEPARATOR=", ") AS ?subfeatures)
(GROUP_CONCAT(DISTINCT ?positive; SEPARATOR=", ") AS ?Positive)
(GROUP_CONCAT(DISTINCT ?comparative; SEPARATOR=", ") AS ?Comparative)
(GROUP_CONCAT(DISTINCT ?superlative; SEPARATOR=", ") AS ?Superlative)
WHERE {
VALUES ?language { wd:Q1860 } # English
?l a ontolex:LexicalEntry ; wikibase:lemma ?word; wikibase:lexicalCategory wd:Q34698 . # adjective
?l dct:language ?language.
OPTIONAL {
?l ontolex:lexicalForm ?form1 .
?form1 ontolex:representation ?positive ; wikibase:grammaticalFeature wd:Q3482678 . # positive
OPTIONAL { ?form1 wikibase:grammaticalFeature ?subfeat . FILTER(?subfeat != wd:Q3482678 ) }
}
?l ontolex:lexicalForm ?form2 .
?form2 ontolex:representation ?comparative ; wikibase:grammaticalFeature wd:Q14169499 . # comparative
OPTIONAL { ?form2 wikibase:grammaticalFeature ?subfeat . FILTER(?subfeat != wd:Q14169499 ) }
?l ontolex:lexicalForm ?form3 .
?form3 ontolex:representation ?superlative ; wikibase:grammaticalFeature wd:Q1817208 . # superlative
OPTIONAL { ?form3 wikibase:grammaticalFeature ?subfeat . FILTER(?subfeat != wd:Q1817208 ) }
# use ?word if ?positive is blank
BIND(IF(BOUND(?positive),?positive,?word) AS ?positive).
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?subfeat rdfs:label ?subfeatLabel.
}
}
GROUP BY ?word ?l
ORDER BY ?word ?l
LIMIT 20000
Verbs
[edit | edit source]This example shows (English) verbs and their conjugations. This query is very complex because conjugations in Wikidata are modeled very complex. By changing VALUES ?language { "en" }
this query can be changed into any language. Currently only a few verbs are conjugated.
# verbs
SELECT ?l ?word (GROUP_CONCAT(DISTINCT ?subfeatLabel; SEPARATOR=", ") AS ?subfeatures)
?single1 ?single2 ?single3 ?plural1 ?plural2 ?plural3
WHERE {
VALUES ?language { "en" }
?l a ontolex:LexicalEntry ; wikibase:lemma ?word; wikibase:lexicalCategory ?category .
FILTER(?category = wd:Q24905 ) # verb
FILTER(LANG(?word) = ?language)
OPTIONAL {
?l ontolex:lexicalForm ?form1 .
{ ?form1 ontolex:representation ?single1 ; wikibase:grammaticalFeature wd:Q51929218 . # first-person singular
} UNION
{ ?form1 ontolex:representation ?single1 ; wikibase:grammaticalFeature wd:Q21714344 . # first person
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q146786 . } # without plural
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q51929154 . } # without plural person
} UNION
{ ?form1 ontolex:representation ?single1 ; wikibase:grammaticalFeature wd:Q51929131 . # singular person
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
} UNION
{ ?form1 ontolex:representation ?single1 ; wikibase:grammaticalFeature wd:Q110786 . # singular
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form1 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
}
FILTER(LANG(?single1) = ?language )
OPTIONAL { ?form1 wikibase:grammaticalFeature ?subfeat .
FILTER(?subfeat != wd:Q51929218 && ?subfeat != wd:Q21714344 ) # not first-person singular / first person
FILTER(?subfeat != wd:Q51929131 && ?subfeat != wd:Q110786 ) # not singular person / singular
FILTER(?subfeat != wd:Q51929049 && ?subfeat != wd:Q51929074 ) } # not second person / third person
}
OPTIONAL {
?l ontolex:lexicalForm ?form2 .
{ ?form2 ontolex:representation ?single2 ; wikibase:grammaticalFeature wd:Q51929369 . # second-person singular
} UNION
{ ?form2 ontolex:representation ?single2 ; wikibase:grammaticalFeature wd:Q51929049 . # second person
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q146786 . } # without plural
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q51929154 . } # without plural person
} UNION
{ ?form2 ontolex:representation ?single2 ; wikibase:grammaticalFeature wd:Q51929131 . # singular person
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
} UNION
{ ?form2 ontolex:representation ?single2 ; wikibase:grammaticalFeature wd:Q110786 . # singular
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form2 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
}
FILTER(LANG(?single2) = ?language )
OPTIONAL { ?form2 wikibase:grammaticalFeature ?subfeat .
FILTER(?subfeat != wd:Q51929369 && ?subfeat != wd:Q51929049 ) # not second-person singular / second person
FILTER(?subfeat != wd:Q51929131 && ?subfeat != wd:Q110786 ) # not singular person / singular
FILTER(?subfeat != wd:Q21714344 && ?subfeat != wd:Q51929074 ) } # not first person / third person
}
OPTIONAL {
?l ontolex:lexicalForm ?form3 .
{ ?form3 ontolex:representation ?single3 ; wikibase:grammaticalFeature wd:Q51929447 . # third-person singular
} UNION
{ ?form3 ontolex:representation ?single3 ; wikibase:grammaticalFeature wd:Q51929074 . # third person
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q146786 . } # without plural
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q51929154 . } # without plural person
} UNION
{ ?form3 ontolex:representation ?single3 ; wikibase:grammaticalFeature wd:Q51929131 . # singular person
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
} UNION
{ ?form3 ontolex:representation ?single3 ; wikibase:grammaticalFeature wd:Q110786 . # singular
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form3 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
}
FILTER(LANG(?single3) = ?language )
OPTIONAL { ?form3 wikibase:grammaticalFeature ?subfeat .
FILTER(?subfeat != wd:Q51929447 && ?subfeat != wd:Q51929074 ) # not third-person singular / third person
FILTER(?subfeat != wd:Q51929131 && ?subfeat != wd:Q110786 ) # not singular person / singular
FILTER(?subfeat != wd:Q21714344 && ?subfeat != wd:Q51929049 ) } # not first person / second person
}
OPTIONAL {
?l ontolex:lexicalForm ?form4 .
{ ?form4 ontolex:representation ?plural1 ; wikibase:grammaticalFeature wd:Q51929290 . # first-person plural
} UNION
{ ?form4 ontolex:representation ?plural1 ; wikibase:grammaticalFeature wd:Q21714344 . # first person
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q110786 . } # without singular
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q51929131 . } # without singular person
} UNION
{ ?form4 ontolex:representation ?plural1 ; wikibase:grammaticalFeature wd:Q51929154 . # plural person
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
} UNION
{ ?form4 ontolex:representation ?plural1 ; wikibase:grammaticalFeature wd:Q146786 . # plural
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form4 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
}
FILTER(LANG(?plural1) = ?language )
OPTIONAL { ?form4 wikibase:grammaticalFeature ?subfeat .
FILTER(?subfeat != wd:Q51929290 && ?subfeat != wd:Q21714344 ) # not first-person plural / first person
FILTER(?subfeat != wd:Q51929154 && ?subfeat != wd:Q146786 ) # not plural person / plural
FILTER(?subfeat != wd:Q51929049 && ?subfeat != wd:Q51929074 ) } # not second person / third person
}
OPTIONAL {
?l ontolex:lexicalForm ?form5 .
{ ?form5 ontolex:representation ?plural2 ; wikibase:grammaticalFeature wd:Q51929403 . # second-person plural
} UNION
{ ?form5 ontolex:representation ?plural2 ; wikibase:grammaticalFeature wd:Q51929049 . # second person
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q110786 . } # without singular
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q51929131 . } # without singular person
} UNION
{ ?form5 ontolex:representation ?plural2 ; wikibase:grammaticalFeature wd:Q51929154 . # plural person
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
} UNION
{ ?form5 ontolex:representation ?plural2 ; wikibase:grammaticalFeature wd:Q146786 . # plural
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form5 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
}
FILTER(LANG(?plural2) = ?language )
OPTIONAL { ?form5 wikibase:grammaticalFeature ?subfeat .
FILTER(?subfeat != wd:Q51929403 && ?subfeat != wd:Q51929049 ) # not second-person plural / second person
FILTER(?subfeat != wd:Q51929154 && ?subfeat != wd:Q146786 ) # not plural person / plural
FILTER(?subfeat!= wd:Q21714344 && ?subfeat != wd:Q51929074 ) } # not first person / third person
}
OPTIONAL {
?l ontolex:lexicalForm ?form6 .
{ ?form6 ontolex:representation ?plural3 ; wikibase:grammaticalFeature wd:Q51929517 . # third-person plural
} UNION
{ ?form6 ontolex:representation ?plural3 ; wikibase:grammaticalFeature wd:Q51929074 . # third person
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q110786 . } # without singular
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q51929131 . } # without singular person
} UNION
{ ?form6 ontolex:representation ?plural3 ; wikibase:grammaticalFeature wd:Q51929154 . # plural person
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
} UNION
{ ?form6 ontolex:representation ?plural3 ; wikibase:grammaticalFeature wd:Q146786 . # plural
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q21714344 . } # without first person
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q51929049 . } # without second person
FILTER NOT EXISTS{ ?form6 wikibase:grammaticalFeature wd:Q51929074 . } # without third person
}
FILTER(LANG(?plural3) = ?language )
OPTIONAL { ?form6 wikibase:grammaticalFeature ?subfeat .
FILTER(?subfeat != wd:Q51929517 && ?subfeat != wd:Q51929074 ) # not third-person plural / third person
FILTER(?subfeat != wd:Q51929154 && ?subfeat != wd:Q146786 ) # not plural person / plural
FILTER(?subfeat != wd:Q21714344 && ?subfeat != wd:Q51929049 ) } # not first person / second person
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?subfeat rdfs:label ?subfeatLabel.
}
}
GROUP BY ?l ?word ?single1 ?single2 ?single3 ?plural1 ?plural2 ?plural3
ORDER BY ?word ?single1 ?single2 ?single3 ?plural1 ?plural2 ?plural3
LIMIT 20000
Articles in all languages
[edit | edit source]This example shows articles in many languages.
# articles in all languages
SELECT ?l ?language ?categoryLabel ?word ?subfeatures
(GROUP_CONCAT(DISTINCT ?masculine; SEPARATOR=", ") AS ?Masculine )
(GROUP_CONCAT(DISTINCT ?feminine; SEPARATOR=", ") AS ?Feminine )
(GROUP_CONCAT(DISTINCT ?neuter; SEPARATOR=", ") AS ?Neuter )
WHERE {
SELECT ?l ?language ?categoryLabel ?word (GROUP_CONCAT(DISTINCT ?subfeatLabel; SEPARATOR=", ") AS ?subfeatures)
?masculine ?feminine ?neuter
WHERE {
VALUES ?categories { wd:Q103184 wd:Q2865743 wd:Q3813849 } # article or definite article or indefinite article
?l a ontolex:LexicalEntry ; wikibase:lemma ?word; wikibase:lexicalCategory ?category .
FILTER(?category = ?categories ) # article or definite article or indefinite article
BIND(LANG(?word) as ?language)
OPTIONAL {
?l ontolex:lexicalForm ?form1 .
?form1 ontolex:representation ?masculine ; wikibase:grammaticalFeature wd:Q499327 . # masculine
FILTER(LANG(?masculine) = ?language )
OPTIONAL { ?form1 wikibase:grammaticalFeature ?subfeat . FILTER(?subfeat != wd:Q499327 ) }
}
OPTIONAL {
?l ontolex:lexicalForm ?form2 .
?form2 ontolex:representation ?feminine ; wikibase:grammaticalFeature wd:Q1775415 . # feminine
FILTER(LANG(?feminine) = ?language )
OPTIONAL { ?form2 wikibase:grammaticalFeature ?subfeat . FILTER(?subfeat != wd:Q1775415 ) }
}
OPTIONAL {
?l ontolex:lexicalForm ?form3 .
?form3 ontolex:representation ?neuter ; wikibase:grammaticalFeature wd:Q1775461 . # neuter
FILTER(LANG(?neuter) = ?language )
OPTIONAL { ?form3 wikibase:grammaticalFeature ?subfeat . FILTER(?subfeat != wd:Q1775461 ) }
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
?subfeat rdfs:label ?subfeatLabel.
?category rdfs:label ?categoryLabel.
}
}
GROUP BY ?language ?l ?categoryLabel ?word ?masculine ?feminine ?neuter
ORDER BY ?language ?categoryLabel ?subfeatures ?word ?masculine ?feminine ?neuter
}
GROUP BY ?language ?l ?categoryLabel ?word ?subfeatures
ORDER BY ?language ?l ?categoryLabel ?word ?subfeatures ?masculine ?feminine ?neuter
External tools
[edit | edit source]See Wikidata:Tools/Lexicographical data for a list of external tools for Lexicographical data.
References
[edit | edit source]