Skip to content

Instantly share code, notes, and snippets.

@PtkFerraro
Created June 14, 2017 17:00
Show Gist options
  • Save PtkFerraro/018ac787cfd10a977def552a1bdfeebe to your computer and use it in GitHub Desktop.
Save PtkFerraro/018ac787cfd10a977def552a1bdfeebe to your computer and use it in GitHub Desktop.
ElasticSearch setup for PT-BR
PUT idxsearch/
{
"settings": {
"analysis": {
"analyzer": {
"default_us_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["us_stop", "stemmer_us_filter", "lowercase", "asciifolding"]
},
"snowball_us_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["us_stop", "lowercase", "asciifolding", "snowball_us_filter"]
},
"shingle_us_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["us_stop", "lowercase", "asciifolding", "shingle_filter"]
},
"edgengram_us_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["us_stop", "lowercase", "asciifolding", "stemmer_us_filter", "edgengram_filter"]
},
"default_ptbr_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["ptbr_stop", "stemmer_ptbr_filter", "lowercase", "asciifolding"]
},
"default_ptbr2_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["ptbr_stop", "stemmer_ptbr2_filter", "lowercase", "asciifolding"]
},
"snowball_ptbr_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["ptbr_stop", "lowercase", "asciifolding", "snowball_ptbr_filter"]
},
"shingle_ptbr_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["ptbr_stop", "lowercase", "asciifolding", "shingle_filter"]
},
"edgengram_ptbr_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["ptbr_stop", "lowercase", "asciifolding", "stemmer_ptbr_filter", "edgengram_filter"]
}
},
"filter": {
"stemmer_ptbr_filter": {
"type": "stemmer",
"name": "light_portuguese"
},
"stemmer_ptbr2_filter": {
"type": "stemmer",
"name": "portuguese_rslp"
},
"stemmer_us_filter": {
"type": "stemmer",
"name": "english"
},
"us_stop": {
"type": "stop",
"stopwords": "_english_"
},
"ptbr_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},
"snowball_ptbr_filter" : {
"type" : "snowball",
"language" : "Portuguese"
},
"snowball_us_filter" : {
"type" : "snowball",
"language" : "English"
},
"shingle_filter": {
"type": "shingle",
"min_shingle_size": 3,
"max_shingle_size": 5,
"token_separator": " ",
"filler_token": ""
},
"edgengram_filter": {
"type": "edgeNGram",
"min_gram": 3,
"max_gram": 100
}
}
}
},
"mappings": {
"entersport": {
"_all": { "enabled": false },
"properties": {
"is_adult": { "type": "boolean" },
"match_start": { "type": "date" },
"championship_name": { "type": "text", "analyzer": "default_ptbr_analyzer" },
"match_title": {
"type": "text",
"fields": {
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" },
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"},
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" },
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"}
}
}
}
},
"tv": {
"_all": { "enabled": false },
"properties": {
"is_adult": { "type": "boolean" },
"program_start": { "type": "date" },
"channel_name": { "type": "text", "analyzer": "default_ptbr_analyzer" },
"program_title": {
"type": "text",
"fields": {
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" },
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"},
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" },
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"}
}
}
}
},
"actors": {
"_all": { "enabled": false },
"properties": {
"is_adult": { "type": "boolean" },
"actor_name": {
"type": "text",
"fields": {
"default": {"type": "text", "analyzer": "default_us_analyzer" },
"snowball": {"type": "text", "analyzer": "snowball_us_analyzer"},
"shingles": {"type": "text", "analyzer": "shingle_us_analyzer" },
"ngrams": {"type": "text", "analyzer": "edgengram_us_analyzer", "search_analyzer": "default_us_analyzer"}
}
}
}
},
"radios": {
"_all": { "enabled": false },
"properties": {
"is_adult": { "type": "boolean" },
"station_name": {
"type": "text",
"fields": {
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" },
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"},
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" },
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"}
}
}
}
},
"movies": {
"_all": { "enabled": false },
"properties": {
"genre.name": { "type": "keyword"},
"vote_average": { "type": "half_float"},
"tag.name": { "type": "keyword"},
"is_adult": { "type": "boolean" },
"cast.real_name": { "type": "text", "analyzer": "default_us_analyzer" },
"movie_original_title": { "type": "text", "analyzer": "default_us_analyzer" },
"movie_title": {
"type": "text",
"fields": {
"default": {"type": "text", "analyzer": "default_ptbr_analyzer" },
"snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"},
"shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" },
"ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment