-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
70 lines (52 loc) · 1.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import requests
import pandas as pd
from elasticsearch import Elasticsearch
from movie_mapping.mapping import movies_mappings
"""to check elasticsearch is up or not"""
substring = "You Know, for Search".encode()
response = requests.get("http://127.0.0.1:9200")
if substring in response.content:
print("Elasticsearch is up and running!")
else:
print("Something went wrong, ensure the cluster is up!")
"""function to store data on elasticsearch """
def elasticstore(inputfilepath, indexname):
"""to read movies.csv"""
df =(
pd.read_csv(inputfilepath).iloc[:,:-1]
.dropna()
.sample(200, random_state=42)
.reset_index()
)
"""connect elasticsearch"""
es = Elasticsearch("http://127.0.0.1:9200")
es.info()
"""create new index"""
es.indices.create(indexname)
"""create mapping """
es.indices.put_mapping(index=indexname, body=movies_mappings)
for i,row in df.iterrows():
doc = {
"name":row["name"],
"rating":row["rating"],
"genre":row["genre"],
"year":row["year"],
"score":row["score"],
"votes":row["votes"],
"director":row["director"],
"writer":row["writer"],
"star":row["star"],
"country":row["country"],
"budget":row["budget"],
"gross":row["gross"],
"company":row["company"],
"runtime":row["runtime"]
}
es.index(index=indexname, id=i, body= doc)
es.indices.refresh(index=indexname)
print("process done")
return True
"""take file path and index name by user"""
inputfilename = input("enter the path of csv file: ")
indexname = input("enter index name here: ")
elasticstore(inputfilename, indexname)