textnavi/models/ESAModel.py

209 lines
5.6 KiB
Python

from elasticsearch import Elasticsearch
from controllers import Config
import json
client = Elasticsearch(
['54.37.31.100'],
http_auth=('oaboss', 'master'),
port=9201,
)
RESULT_SIE = 300
RESULT_SIE_CLUSTERED = 2000
def fetch_clusters(size, group, disipline, author, pub_period):
queries = []
filters = []
if group:
filters.append({"term": {"group_name": group}})
if disipline:
filters.append({"term": {"discipline": disipline}})
if author:
filters.append({"term": {"author": author}})
if pub_period and len(pub_period) > 0:
filters.append({
"range": {
"published_at": {
"gte": pub_period[0],
"lte": pub_period[1],
"format": "dd/MM/yyyy"
}
}
})
for idx in range(0, int(size)):
cluster = "cluster_" + str(size) + "_" + str(idx)
queries.append({
"_source": ["title", "title_cleaned", "discipline"],
"size": RESULT_SIE,
"query": {
"bool": {
"must": [
{"term": {"cluster.id": cluster}}
],
"filter": filters
}
}
})
request = ''
for each in queries:
request += '{} \n'
request += '%s \n' % json.dumps(each)
res = client.msearch(body=request, index="app", doc_type="document")
return res
def get_facets(keywords, cluster_size, group, discipline, author, pub_period):
filters = []
terms = []
for keyword in keywords:
terms.append({"term": {"title": keyword.lower()}})
if group:
filters.append({"term": {"group_name": group}})
if discipline:
filters.append({"term": {"discipline": discipline}})
if author:
filters.append({"term": {"author": author}})
if pub_period and len(pub_period) > 0:
filters.append({
"range": {
"published_at": {
"gte": pub_period[0],
"lte": pub_period[1],
"format": "dd/MM/yyyy"
}
}
})
request = {
"size": 0,
"query": {
"bool": {
"must": terms,
"filter": filters
}
},
"aggregations": {
"group_name": {
"terms": {
"field": "group_name"
}
},
"discipline": {
"terms": {
"field": "discipline"
}
},
"author": {
"terms": {
"field": "author"
}
}
}
}
return client.search(body=request, index="app", doc_type="document")
def search_query_filter(keywords, subcluster, size):
queries = []
terms = []
for keyword in keywords:
terms.append({"term": {"title": keyword.lower()}})
try:
subcluster = int(subcluster)
except:
subcluster = None
if not subcluster:
for idx in range(0, int(size)):
cluster = "cluster_" + str(size) + "_" + str(idx)
queries.append({
"_source": ["title", "title_cleaned"] + list(Config.DEPTH.values()),
"size": RESULT_SIE,
"query": {
"bool": {
"must": [{"term": {"cluster.id": cluster}}] + terms
}
}
})
else:
cluster = "cluster_" + str(size) + "_" + str(subcluster)
queries.append({
"_source": ["title", "title_cleaned"] + list(Config.DEPTH.values()),
"size": RESULT_SIE,
"query": {
"bool": {
"must": [{"term": {"cluster.id": cluster}}] + terms
}
}
})
request = ''
for each in queries:
request += '{} \n'
request += '%s \n' % json.dumps(each)
res = client.msearch(body=request, index="app", doc_type="document")
return res
def search_query_by_cluster(keywords, size, group, discipline, author, pub_period):
queries = []
terms = []
filters = []
if group:
filters.append({"term": {"group_name": group}})
if discipline:
filters.append({"term": {"discipline": discipline}})
if author:
filters.append({"term": {"author": author}})
if pub_period and len(pub_period) > 0:
filters.append({
"range": {
"published_at": {
"gte": pub_period[0],
"lte": pub_period[1],
"format": "dd/MM/yyyy"
}
}
})
if keywords:
for keyword in keywords:
terms.append({"term": {"title": keyword.lower()}})
for idx in range(0, int(size)):
cluster = "cluster_" + str(size) + "_" + str(idx)
queries.append({
"_source": ["title", "title_cleaned", "discipline"],
"size": RESULT_SIE_CLUSTERED,
"query": {
"bool": {
"must": [{"term": {"cluster.id": cluster}}] + terms,
"filter": filters
}
}
})
request = ''
for each in queries:
request += '{} \n'
request += '%s \n' % json.dumps(each)
res = client.msearch(body=request, index="app", doc_type="document")
print(request)
return res
def get(id):
return client.get(index="app", doc_type='document', id=id)