Alerty SIEM dotyczące skoków głośności przy użyciu uczenia maszynowego

from sklearn.ensemble import IsolationForest
import pandas as pd

# Sample data (replace with your actual data)
data = {
    'feed_type': ['linux', 'linux', 'linux', 'linux', 'linux', 'PA', 'PA', 'PA', 'PA', 'linux', 'linux', 'linux', 'linux', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'linux', 'linux', 'linux', 'linux', 'linux', 'PA', 'PA', 'linux', 'linux','PA','PA'],
    'volume': [6098989898, 6098989899, 6548989198, 7098989898, 7098989899, 7198989198, 6398989898, 6498989899, 6198989198, 6098989898, 6098989898, 6198989898, 6298989898, 6598989898, 6698989898, 6798989898, 6898989898, 6598989898, 6698989898, 6798989898, 8198989898, 6998989898, 6698989898, 6898989898, 6998989898, 7098989898, 6898989898, 6698989898, 7898989898, 7698989898, 8898989898, 7098989898]
}

# Create a DataFrame
df = pd.DataFrame(data)
# Extract feature names
feature_names = df.columns.tolist()

# Train the model
model = IsolationForest(contamination=0.01)  # Adjust contamination based on your dataset
model.fit(df[['volume']])

# Function to detect anomalies
def detect_anomalies(feed_type, volume):
    anomaly_prediction = model.predict([[volume]])
    return anomaly_prediction[0] == -1, feed_type

# Simulate incoming data
# Sample data (replace with your actual data)
incoming_data = [
    {'feed_type': 'linux', 'volume': 8698989898 },
    {'feed_type': 'PA', 'volume': 14000000000 }
]
# Check for anomalies for each incoming data point
alerts = []
for data_point in incoming_data:
    is_anomaly, feed_type = detect_anomalies(data_point['feed_type'], data_point['volume'])
    if is_anomaly:
        alerts.append(feed_type)

# Check for anomalies
if alerts:
    print(f"Alert: Volume spike detected for {' and '.join(alerts)}!")
    # Convert new data to DataFrame
    new_df = pd.DataFrame(incoming_data)
    # Concatenate existing DataFrame with new DataFrame
    df = pd.concat([df, new_df], ignore_index=True)
    
    # Retrain the model with the combined data
    model.fit(df[['volume']])

    print("Model retrained with new data.")
else:
    print("No anomaly detected.")