from elasticsearch import Elasticsearch from controllers import Config import json client = Elasticsearch( ['54.37.31.100'], http_auth=('oaboss', 'master'), port=9201, ) RESULT_SIE = 300 RESULT_SIE_CLUSTERED = 2000 def fetch_clusters(size, group, disipline, author, pub_period): queries = [] filters = [] if group: filters.append({"term": {"group_name": group}}) if disipline: filters.append({"term": {"discipline": disipline}}) if author: filters.append({"term": {"author": author}}) if pub_period and len(pub_period) > 0: filters.append({ "range": { "published_at": { "gte": pub_period[0], "lte": pub_period[1], "format": "dd/MM/yyyy" } } }) for idx in range(0, int(size)): cluster = "cluster_" + str(size) + "_" + str(idx) queries.append({ "_source": ["title", "title_cleaned", "discipline"], "size": RESULT_SIE, "query": { "bool": { "must": [ {"term": {"cluster.id": cluster}} ], "filter": filters } } }) request = '' for each in queries: request += '{} \n' request += '%s \n' % json.dumps(each) res = client.msearch(body=request, index="app", doc_type="document") return res def get_facets(keywords, cluster_size, group, discipline, author, pub_period): filters = [] terms = [] for keyword in keywords: terms.append({"term": {"title": keyword.lower()}}) if group: filters.append({"term": {"group_name": group}}) if discipline: filters.append({"term": {"discipline": discipline}}) if author: filters.append({"term": {"author": author}}) if pub_period and len(pub_period) > 0: filters.append({ "range": { "published_at": { "gte": pub_period[0], "lte": pub_period[1], "format": "dd/MM/yyyy" } } }) request = { "size": 0, "query": { "bool": { "must": terms, "filter": filters } }, "aggregations": { "group_name": { "terms": { "field": "group_name" } }, "discipline": { "terms": { "field": "discipline" } }, "author": { "terms": { "field": "author" } } } } return client.search(body=request, index="app", doc_type="document") def search_query_filter(keywords, subcluster, size): queries = [] terms = [] for keyword in keywords: terms.append({"term": {"title": keyword.lower()}}) try: subcluster = int(subcluster) except: subcluster = None if not subcluster: for idx in range(0, int(size)): cluster = "cluster_" + str(size) + "_" + str(idx) queries.append({ "_source": ["title", "title_cleaned"] + list(Config.DEPTH.values()), "size": RESULT_SIE, "query": { "bool": { "must": [{"term": {"cluster.id": cluster}}] + terms } } }) else: cluster = "cluster_" + str(size) + "_" + str(subcluster) queries.append({ "_source": ["title", "title_cleaned"] + list(Config.DEPTH.values()), "size": RESULT_SIE, "query": { "bool": { "must": [{"term": {"cluster.id": cluster}}] + terms } } }) request = '' for each in queries: request += '{} \n' request += '%s \n' % json.dumps(each) res = client.msearch(body=request, index="app", doc_type="document") return res def search_query_by_cluster(keywords, size, group, discipline, author, pub_period): queries = [] terms = [] filters = [] if group: filters.append({"term": {"group_name": group}}) if discipline: filters.append({"term": {"discipline": discipline}}) if author: filters.append({"term": {"author": author}}) if pub_period and len(pub_period) > 0: filters.append({ "range": { "published_at": { "gte": pub_period[0], "lte": pub_period[1], "format": "dd/MM/yyyy" } } }) if keywords: for keyword in keywords: terms.append({"term": {"title": keyword.lower()}}) for idx in range(0, int(size)): cluster = "cluster_" + str(size) + "_" + str(idx) queries.append({ "_source": ["title", "title_cleaned", "discipline"], "size": RESULT_SIE_CLUSTERED, "query": { "bool": { "must": [{"term": {"cluster.id": cluster}}] + terms, "filter": filters } } }) request = '' for each in queries: request += '{} \n' request += '%s \n' % json.dumps(each) res = client.msearch(body=request, index="app", doc_type="document") print(request) return res def get(id): return client.get(index="app", doc_type='document', id=id)