from sklearn.ensemble import IsolationForest
import pandas as pd
# Sample data (replace with your actual data)
data = {
'feed_type': ['linux', 'linux', 'linux', 'linux', 'linux', 'PA', 'PA', 'PA', 'PA', 'linux', 'linux', 'linux', 'linux', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'linux', 'linux', 'linux', 'linux', 'linux', 'PA', 'PA', 'linux', 'linux','PA','PA'],
'volume': [6098989898, 6098989899, 6548989198, 7098989898, 7098989899, 7198989198, 6398989898, 6498989899, 6198989198, 6098989898, 6098989898, 6198989898, 6298989898, 6598989898, 6698989898, 6798989898, 6898989898, 6598989898, 6698989898, 6798989898, 8198989898, 6998989898, 6698989898, 6898989898, 6998989898, 7098989898, 6898989898, 6698989898, 7898989898, 7698989898, 8898989898, 7098989898]
}
# Create a DataFrame
df = pd.DataFrame(data)
# Extract feature names
feature_names = df.columns.tolist()
# Train the model
model = IsolationForest(contamination=0.01) # Adjust contamination based on your dataset
model.fit(df[['volume']])
# Function to detect anomalies
def detect_anomalies(feed_type, volume):
anomaly_prediction = model.predict([[volume]])
return anomaly_prediction[0] == -1, feed_type
# Simulate incoming data
# Sample data (replace with your actual data)
incoming_data = [
{'feed_type': 'linux', 'volume': 8698989898 },
{'feed_type': 'PA', 'volume': 14000000000 }
]
# Check for anomalies for each incoming data point
alerts = []
for data_point in incoming_data:
is_anomaly, feed_type = detect_anomalies(data_point['feed_type'], data_point['volume'])
if is_anomaly:
alerts.append(feed_type)
# Check for anomalies
if alerts:
print(f"Alert: Volume spike detected for {' and '.join(alerts)}!")
# Convert new data to DataFrame
new_df = pd.DataFrame(incoming_data)
# Concatenate existing DataFrame with new DataFrame
df = pd.concat([df, new_df], ignore_index=True)
# Retrain the model with the combined data
model.fit(df[['volume']])
print("Model retrained with new data.")
else:
print("No anomaly detected.")