Created
June 14, 2017 17:00
-
-
Save PtkFerraro/018ac787cfd10a977def552a1bdfeebe to your computer and use it in GitHub Desktop.
ElasticSearch setup for PT-BR
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PUT idxsearch/ | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"default_us_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["us_stop", "stemmer_us_filter", "lowercase", "asciifolding"] | |
}, | |
"snowball_us_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["us_stop", "lowercase", "asciifolding", "snowball_us_filter"] | |
}, | |
"shingle_us_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["us_stop", "lowercase", "asciifolding", "shingle_filter"] | |
}, | |
"edgengram_us_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["us_stop", "lowercase", "asciifolding", "stemmer_us_filter", "edgengram_filter"] | |
}, | |
"default_ptbr_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["ptbr_stop", "stemmer_ptbr_filter", "lowercase", "asciifolding"] | |
}, | |
"default_ptbr2_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["ptbr_stop", "stemmer_ptbr2_filter", "lowercase", "asciifolding"] | |
}, | |
"snowball_ptbr_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["ptbr_stop", "lowercase", "asciifolding", "snowball_ptbr_filter"] | |
}, | |
"shingle_ptbr_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["ptbr_stop", "lowercase", "asciifolding", "shingle_filter"] | |
}, | |
"edgengram_ptbr_analyzer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": ["ptbr_stop", "lowercase", "asciifolding", "stemmer_ptbr_filter", "edgengram_filter"] | |
} | |
}, | |
"filter": { | |
"stemmer_ptbr_filter": { | |
"type": "stemmer", | |
"name": "light_portuguese" | |
}, | |
"stemmer_ptbr2_filter": { | |
"type": "stemmer", | |
"name": "portuguese_rslp" | |
}, | |
"stemmer_us_filter": { | |
"type": "stemmer", | |
"name": "english" | |
}, | |
"us_stop": { | |
"type": "stop", | |
"stopwords": "_english_" | |
}, | |
"ptbr_stop": { | |
"type": "stop", | |
"stopwords": "_brazilian_" | |
}, | |
"snowball_ptbr_filter" : { | |
"type" : "snowball", | |
"language" : "Portuguese" | |
}, | |
"snowball_us_filter" : { | |
"type" : "snowball", | |
"language" : "English" | |
}, | |
"shingle_filter": { | |
"type": "shingle", | |
"min_shingle_size": 3, | |
"max_shingle_size": 5, | |
"token_separator": " ", | |
"filler_token": "" | |
}, | |
"edgengram_filter": { | |
"type": "edgeNGram", | |
"min_gram": 3, | |
"max_gram": 100 | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"entersport": { | |
"_all": { "enabled": false }, | |
"properties": { | |
"is_adult": { "type": "boolean" }, | |
"match_start": { "type": "date" }, | |
"championship_name": { "type": "text", "analyzer": "default_ptbr_analyzer" }, | |
"match_title": { | |
"type": "text", | |
"fields": { | |
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
} | |
} | |
} | |
}, | |
"tv": { | |
"_all": { "enabled": false }, | |
"properties": { | |
"is_adult": { "type": "boolean" }, | |
"program_start": { "type": "date" }, | |
"channel_name": { "type": "text", "analyzer": "default_ptbr_analyzer" }, | |
"program_title": { | |
"type": "text", | |
"fields": { | |
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
} | |
} | |
} | |
}, | |
"actors": { | |
"_all": { "enabled": false }, | |
"properties": { | |
"is_adult": { "type": "boolean" }, | |
"actor_name": { | |
"type": "text", | |
"fields": { | |
"default": {"type": "text", "analyzer": "default_us_analyzer" }, | |
"snowball": {"type": "text", "analyzer": "snowball_us_analyzer"}, | |
"shingles": {"type": "text", "analyzer": "shingle_us_analyzer" }, | |
"ngrams": {"type": "text", "analyzer": "edgengram_us_analyzer", "search_analyzer": "default_us_analyzer"} | |
} | |
} | |
} | |
}, | |
"radios": { | |
"_all": { "enabled": false }, | |
"properties": { | |
"is_adult": { "type": "boolean" }, | |
"station_name": { | |
"type": "text", | |
"fields": { | |
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
} | |
} | |
} | |
}, | |
"movies": { | |
"_all": { "enabled": false }, | |
"properties": { | |
"genre.name": { "type": "keyword"}, | |
"vote_average": { "type": "half_float"}, | |
"tag.name": { "type": "keyword"}, | |
"is_adult": { "type": "boolean" }, | |
"cast.real_name": { "type": "text", "analyzer": "default_us_analyzer" }, | |
"movie_original_title": { "type": "text", "analyzer": "default_us_analyzer" }, | |
"movie_title": { | |
"type": "text", | |
"fields": { | |
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
} | |
} | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment