Add files via upload

This commit is contained in:
Omar Santos 2023-09-04 22:57:22 -04:00 committed by GitHub
parent b6020f3567
commit 99a0fce025
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 1472 additions and 0 deletions

View File

@ -0,0 +1,106 @@
Sure! Here's an example of a Python script that demonstrates Actor-Critic Methods using the OpenAI Gym environment and the Keras library:
```python
import gym
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
class ActorCriticAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
# Hyperparameters for the agent
self.lr_actor = 0.001
self.lr_critic = 0.005
self.discount_factor = 0.99
# Create models for the actor and critic
self.actor = self.build_actor()
self.critic = self.build_critic()
def build_actor(self):
input = Input(shape=(self.state_size,))
dense = Dense(24, activation='relu')(input)
probs = Dense(self.action_size, activation='softmax')(dense)
model = Model(inputs=input, outputs=probs)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.lr_actor))
return model
def build_critic(self):
input = Input(shape=(self.state_size,))
dense = Dense(24, activation='relu')(input)
value = Dense(1, activation='linear')(dense)
model = Model(inputs=input, outputs=value)
model.compile(loss='mse', optimizer=Adam(lr=self.lr_critic))
return model
def get_action(self, state):
state = np.reshape(state, [1, self.state_size])
probs = self.actor.predict(state)[0]
action = np.random.choice(self.action_size, p=probs)
return action
def train_model(self, state, action, reward, next_state, done):
target = np.zeros((1, self.action_size))
advantages = np.zeros((1, self.action_size))
value = self.critic.predict(state)[0]
next_value = self.critic.predict(next_state)[0]
if done:
advantages[0][action] = reward - value
target[0][action] = reward
else:
advantages[0][action] = reward + self.discount_factor * (next_value) - value
target[0][action] = reward + self.discount_factor * next_value
self.actor.fit(state, advantages, epochs=1, verbose=0)
self.critic.fit(state, target, epochs=1, verbose=0)
if __name__ == "__main__":
# Create the environment
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
# Create an instance of the agent
agent = ActorCriticAgent(state_size, action_size)
scores, episodes = [], []
EPISODES = 100
for episode in range(EPISODES):
done = False
score = 0
state = env.reset()
while not done:
action = agent.get_action(state)
next_state, reward, done, info = env.step(action)
agent.train_model(state, action, reward, next_state, done)
score += reward
state = next_state
if done:
scores.append(score)
episodes.append(episode)
print("Episode:", episode, "Score:", score)
# Plot the scores
import matplotlib.pyplot as plt
plt.plot(episodes, scores, 'b')
plt.xlabel("Episode")
plt.ylabel("Score")
plt.show()
```
In this script, we create an `ActorCriticAgent` class that represents the agent using Actor-Critic Methods. We then use this agent to train on the `CartPole-v1` environment from OpenAI Gym. The agent has an actor model that outputs action probabilities and a critic model that predicts values. These models are trained using the actor-critic algorithm in the `train_model` method.
During training, the agent selects actions based on the actor model's output and updates its models using the observed rewards and states. The scores are stored and plotted at the end to visualize the agent's performance over multiple episodes.
Please make sure you have installed the required libraries (`gym`, `numpy`, `tensorflow`, `keras`, and `matplotlib`) before running this script.

View File

@ -0,0 +1,46 @@
Sure! Here's a Python script that demonstrates the use of Association Rules using the Apriori and FP-Growth algorithms.
```python
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.frequent_patterns import fpgrowth
# Sample dataset
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]
# Create one-hot encoded DataFrame
te = TransactionEncoder()
te_data = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_data, columns=te.columns_)
# Generate frequent itemsets using Apriori algorithm
frequent_itemsets_apriori = apriori(df, min_support=0.2, use_colnames=True)
print("Frequent Itemsets (Apriori):")
print(frequent_itemsets_apriori)
# Generate association rules from frequent itemsets using confidence threshold
rules_apriori = association_rules(frequent_itemsets_apriori, metric="confidence", min_threshold=0.7)
print("\nAssociation Rules (Apriori):")
print(rules_apriori)
# Generate frequent itemsets using FP-Growth algorithm
frequent_itemsets_fpgrowth = fpgrowth(df, min_support=0.2, use_colnames=True)
print("\nFrequent Itemsets (FP-Growth):")
print(frequent_itemsets_fpgrowth)
# Generate association rules from frequent itemsets using confidence threshold
rules_fpgrowth = association_rules(frequent_itemsets_fpgrowth, metric="confidence", min_threshold=0.7)
print("\nAssociation Rules (FP-Growth):")
print(rules_fpgrowth)
```
This script uses the `mlxtend` library, which provides implementations of the Apriori and FP-Growth algorithms. It first creates a one-hot encoded DataFrame from the given dataset. Then, it applies the Apriori algorithm to generate frequent itemsets and prints them. Next, it applies the association rules algorithm to derive association rules from the frequent itemsets and prints them.
Similarly, it applies the FP-Growth algorithm to generate frequent itemsets and prints them. Finally, it applies the association rules algorithm to derive association rules from the frequent itemsets generated by FP-Growth and prints them.
You can replace the `dataset` variable with your own dataset to observe the results for your data. Make sure to install the `mlxtend` library before running this script by using the command `pip install mlxtend`.

View File

@ -0,0 +1,50 @@
Sure! Here's an example of a Python script that demonstrates the DBSCAN algorithm using the iris dataset from scikit-learn:
```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
# Load the iris dataset
iris = datasets.load_iris()
X = iris.data
# Scale the features
X = StandardScaler().fit_transform(X)
# Apply DBSCAN
dbscan = DBSCAN(eps=0.3, min_samples=5)
dbscan.fit(X)
# Get the predicted labels and number of clusters
labels = dbscan.labels_
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
# Plot the results
unique_labels = set(labels)
colors = [plt.cm.Spectral(each)
for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise
col = [0, 0, 0, 1]
class_member_mask = (labels == k)
xy = X[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=6)
plt.title(f"Estimated number of clusters: {n_clusters}, Noise points: {n_noise}")
plt.show()
```
In this script, we first load the iris dataset and normalize the features using `StandardScaler`. Then we apply DBSCAN with `eps=0.3` (maximum distance between samples to form a cluster) and `min_samples=5` (minimum number of samples in a cluster).
Next, we retrieve the predicted labels and calculate the number of clusters and noise points. Finally, we plot the data points, highlighting the clusters and noise using different colors.
Note that you may need to have scikit-learn and matplotlib installed for this script to work.

View File

@ -0,0 +1,44 @@
Sure! Below is a Python script that demonstrates the concept of Decision Trees using the popular scikit-learn library.
```python
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Decision Tree classifier
clf = DecisionTreeClassifier()
# Train the classifier on the training data
clf.fit(X_train, y_train)
# Make predictions on the testing data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# Visualize the Decision Tree
from sklearn import tree
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
tree.plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names, filled=True)
plt.show()
```
In this script, we first import the necessary libraries: `load_iris` from `sklearn.datasets` to load the Iris dataset, `train_test_split` from `sklearn.model_selection` to split the dataset into training and testing sets, `DecisionTreeClassifier` from `sklearn.tree` to create the Decision Tree classifier, and `metrics` from `sklearn` to evaluate the model.
We load the Iris dataset and split it into training and testing sets using a 80:20 split. Then, we create a Decision Tree classifier and train it on the training data. After that, we make predictions on the testing data and evaluate the model using accuracy as the metric.
Finally, we visualize the Decision Tree using `tree.plot_tree` from `sklearn` and `matplotlib.pyplot`. The resulting tree is displayed using a figure.

View File

@ -0,0 +1,35 @@
Sure! Here's a Python script that demonstrates the concept of Gaussian Mixture Models (GMM) using the scikit-learn library:
```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
# Generate random data
np.random.seed(0)
n_samples = 1000
n_components = 3
X = np.concatenate([
np.random.multivariate_normal([0, 0], [[1, 0], [0, 20]], int(0.1 * n_samples)),
np.random.multivariate_normal([-5, 0], [[1, 0], [0, 1]], int(0.3 * n_samples)),
np.random.multivariate_normal([5, 10], [[4, 0], [0, 4]], int(0.6 * n_samples))
])
# Fit the GMM model
gmm = GaussianMixture(n_components=n_components)
gmm.fit(X)
# Predict the labels for the data samples
y_pred = gmm.predict(X)
# Visualize the results
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Gaussian Mixture Models')
plt.show()
```
In this script, we first generate random data points from three different Gaussian distributions. We then use the `GaussianMixture` class from scikit-learn to fit a GMM model on this data. Next, we use the learned GMM model to predict the labels for each data sample. Finally, we visualize the results by plotting the data points colored according to their predicted labels.
Please make sure you have the scikit-learn and matplotlib libraries installed in your Python environment before running this script.

View File

@ -0,0 +1,34 @@
Sure! Below is an example of a Python script demonstrating Gradient Boosting Machines (GBM) using the scikit-learn library:
```python
# Importing required libraries
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
# Generate a random classification dataset
X, y = make_classification(n_samples=100, random_state=42)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and fit the Gradient Boosting Classifier
gbm_model = GradientBoostingClassifier(random_state=42)
gbm_model.fit(X_train, y_train)
# Predict the labels for the test set
y_pred = gbm_model.predict(X_test)
# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
```
In this script, we first generate a random classification dataset using the `make_classification` function from scikit-learn. Then, we split the dataset into training and testing sets using the `train_test_split` function.
Next, we create an instance of the Gradient Boosting Classifier using `GradientBoostingClassifier` and fit the model to the training data using the `fit` method.
After fitting the model, we predict the labels for the test set using the `predict` method.
Finally, we calculate the accuracy score by comparing the predicted labels with the true labels and print it out.

View File

@ -0,0 +1,52 @@
Sure! Here's a Python script that demonstrates Independent Component Analysis (ICA) using the scikit-learn library:
```python
import numpy as np
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
# Generate data
np.random.seed(0)
n_samples = 2000
time = np.linspace(0, 8, n_samples)
s1 = np.sin(2 * time) # Signal 1: sinusoidal signal
s2 = np.sign(np.sin(3 * time)) # Signal 2: square signal
S = np.c_[s1, s2]
S += 0.2 * np.random.normal(size=S.shape) # Add noise
# Mix data
A = np.array([[1, 1], [0.5, 2]]) # Mixing matrix
X = np.dot(S, A.T) # Generate observations
# ICA
ica = FastICA(n_components=2)
S_ = ica.fit_transform(X) # Reconstruct signals
A_ = ica.mixing_ # Estimated mixing matrix
# Plot results
plt.figure(figsize=(8, 6))
models = [X, S, S_]
names = ['Observations (mixed signals)',
'True Sources',
'ICA estimated sources']
colors = ['red', 'steelblue']
for i, (model, name) in enumerate(zip(models, names), 1):
plt.subplot(3, 1, i)
plt.title(name)
for sig, color in zip(model.T, colors):
plt.plot(sig, color=color)
plt.tight_layout()
plt.show()
```
This script demonstrates ICA by generating two mixed signals, `s1` and `s2`, adding noise, and then applying ICA to separate the original signals. The original signals are represented by the `S` matrix, and the observed mixed signals are stored in the `X` matrix. The `FastICA` class from scikit-learn is used to perform ICA.
The script plots three subplots: the observed mixed signals, the true sources, and the estimated sources achieved through ICA.
Make sure to install required dependencies (scikit-learn, numpy, matplotlib) before running the script.

View File

@ -0,0 +1,45 @@
Sure! Here's an example Python script that demonstrates Latent Dirichlet Allocation (LDA) using the scikit-learn library:
```python
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
# Sample data
documents = [
'apple orange apple orange',
'banana apple orange',
'orange banana apple',
'apple apple apple',
'orange orange',
]
# Tokenize and vectorize the documents
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(documents)
# Apply LDA
lda_model = LatentDirichletAllocation(n_components=2, random_state=0)
lda_model.fit(X)
# Get the topics and their corresponding word probabilities
feature_names = vectorizer.get_feature_names_out()
topics = lda_model.components_
# Display the top words for each topic
for topic_idx, topic in enumerate(topics):
top_words = [feature_names[i] for i in topic.argsort()[:-4:-1]]
print(f"Top words for topic {topic_idx+1}: {', '.join(top_words)}")
```
In this script, we start by importing the necessary libraries: pandas for data manipulation, CountVectorizer for tokenization and vectorization of text documents, and LatentDirichletAllocation from sklearn.decomposition for performing LDA.
Next, we define a sample dataset of text documents. You can replace this with your own dataset.
We then use the CountVectorizer to transform the documents into a matrix representation with token counts. This matrix is then passed to the LatentDirichletAllocation model for training. In this example, we have specified `n_components=2` to indicate that we want to extract two topics.
After fitting the LDA model, we retrieve the feature names and the LDA model's components (topics and word probabilities).
Finally, we loop through each topic to display the top words with the highest probabilities for each topic.
Note that the results may vary each time you run the script due to the random nature of LDA.

View File

@ -0,0 +1,121 @@
Sure! Here's a Python script that demonstrates the Monte Carlo Tree Search (MCTS) algorithm:
```python
import numpy as np
import random
class Node:
def __init__(self, state, parent=None):
self.state = state
self.parent = parent
self.children = []
self.visits = 0
self.rewards = 0
def expand(self):
possible_moves = self.state.get_possible_moves()
for move in possible_moves:
new_state = self.state.make_move(move)
new_node = Node(new_state, parent=self)
self.children.append(new_node)
def select(self):
selected_child = max(self.children, key=lambda child: child.get_ucb_score())
return selected_child
def simulate(self):
current_state = self.state
while not current_state.is_terminal():
random_move = random.choice(current_state.get_possible_moves())
current_state = current_state.make_move(random_move)
return current_state.get_reward()
def backpropagate(self, reward):
self.visits += 1
self.rewards += reward
if self.parent:
self.parent.backpropagate(reward)
def get_ucb_score(self):
exploration_factor = 1.414 # Adjust this for exploration vs exploitation trade-off
exploitation_score = self.rewards / self.visits
exploration_score = np.sqrt(np.log(self.parent.visits) / self.visits)
return exploitation_score + exploration_factor * exploration_score
class State:
def __init__(self):
self.board = np.zeros((3, 3))
self.current_player = 1
def get_possible_moves(self):
return [(i, j) for i in range(3) for j in range(3) if self.board[i][j] == 0]
def make_move(self, move):
new_state = State()
new_state.board = np.copy(self.board)
new_state.current_player = -self.current_player
new_state.board[move[0]][move[1]] = self.current_player
return new_state
def is_terminal(self):
return np.any(np.sum(self.board, axis=1) == 3) or np.any(np.sum(self.board, axis=0) == 3) \
or np.trace(self.board) == 3 or np.trace(np.fliplr(self.board)) == 3 \
or np.any(np.sum(self.board, axis=1) == -3) or np.any(np.sum(self.board, axis=0) == -3) \
or np.trace(self.board) == -3 or np.trace(np.fliplr(self.board)) == -3 \
or len(self.get_possible_moves()) == 0
def get_reward(self):
if np.any(np.sum(self.board, axis=1) == 3) or np.any(np.sum(self.board, axis=0) == 3) \
or np.trace(self.board) == 3 or np.trace(np.fliplr(self.board)) == 3:
return 1
elif np.any(np.sum(self.board, axis=1) == -3) or np.any(np.sum(self.board, axis=0) == -3) \
or np.trace(self.board) == -3 or np.trace(np.fliplr(self.board)) == -3:
return -1
else:
return 0
def monte_carlo_tree_search(initial_state, iterations):
root = Node(initial_state)
for _ in range(iterations):
# Selection
selected_node = root
while selected_node.children:
selected_node = selected_node.select()
# Expansion
if not selected_node.state.is_terminal():
selected_node.expand()
selected_node = random.choice(selected_node.children)
# Simulation
reward = selected_node.simulate()
# Backpropagation
selected_node.backpropagate(reward)
best_child = max(root.children, key=lambda child: child.visits)
return best_child.state.board
# Test the Monte Carlo Tree Search algorithm on Tic-Tac-Toe game
initial_state = State()
best_move = monte_carlo_tree_search(initial_state, iterations=10000)
print("Best move found by Monte Carlo Tree Search:")
print(best_move)
```
In the above script, we define a `Node` class to represent each state in the game and a `State` class to maintain the current game state. The `monte_carlo_tree_search` function implements the MCTS algorithm and returns the best move found after the specified number of iterations.
In the Tic-Tac-Toe game example, each state is represented by a 3x3 game board. The `is_terminal` method checks if the game is over, and the `get_reward` method assigns rewards to terminal states (-1 for loss, 1 for win, 0 for draw). The `get_possible_moves` method returns all valid moves for the current state, and the `make_move` method creates a new state after making a move.
During each iteration of the MCTS algorithm, the `selection` step selects the most promising child node by applying the Upper Confidence Bound (UCB) formula. If the selected node is not terminal, the `expansion` step creates child nodes by simulating all possible moves. The `simulation` step randomly plays out the game from the selected child node until a terminal state is reached. Finally, the `backpropagation` step updates all nodes in the selected path with the simulation result. The process is repeated for the specified number of iterations.
After running the MCTS algorithm, the best move is determined by selecting the child node with the highest visit count from the root node. The resulting board configuration is returned as the best move.
Feel free to adjust the number of iterations and exploration factor to see different results.

View File

@ -0,0 +1,42 @@
Sure! The following is a python script that demonstrates the Naïve Bayes algorithm using the famous Iris dataset:
```python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# Load the Iris dataset
iris = load_iris()
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
# Create an instance of the Naïve Bayes classifier
classifier = GaussianNB()
# Train the classifier using the training data
classifier.fit(X_train, y_train)
# Make predictions on the testing data
y_pred = classifier.predict(X_test)
# Calculate accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
```
In this script, we start by importing the necessary libraries: `numpy` for numerical operations, `sklearn.datasets` to load the Iris dataset, `sklearn.model_selection` to split the data into training and testing sets, `sklearn.naive_bayes` for the Naïve Bayes classifier, and `sklearn.metrics` for calculating accuracy.
Next, we load the Iris dataset using `load_iris()` function. Then we split the data into training and testing sets using `train_test_split()` function, where `test_size=0.2` indicates that 20% of the data will be used for testing.
We create an instance of the Naïve Bayes classifier using `GaussianNB()`. This classifier assumes that features follow a Gaussian distribution. If your data doesn't meet this assumption, you can explore other variants like multinomial or Bernoulli Naïve Bayes.
We train the classifier using the training data by calling the `fit()` method and passing in the features (X_train) and corresponding labels (y_train).
Then, we make predictions on the testing data using the `predict()` method and passing in the features of the test set (X_test).
Finally, we calculate the accuracy of the classifier by comparing the predicted labels with the true labels from the testing set using the `accuracy_score()` function.
Hope this helps to demonstrate the Naïve Bayes algorithm in python!

View File

@ -0,0 +1,63 @@
import numpy as np
import matplotlib.pyplot as plt
# Create a simple neural network with one input layer, one hidden layer, and one output layer
class NeuralNetwork:
def __init__(self):
self.weights1 = np.random.rand(3, 4) # weight matrix between input and hidden layer
self.weights2 = np.random.rand(4, 1) # weight matrix between hidden and output layer
self.bias1 = np.random.rand(1, 4) # bias matrix for hidden layer
self.bias2 = np.random.rand(1, 1) # bias matrix for output layer
def sigmoid(self, x):
# Sigmoid activation function
return 1 / (1 + np.exp(-x))
def forward_propagation(self, X):
# Perform forward propagation
self.hidden_layer = self.sigmoid(np.dot(X, self.weights1) + self.bias1) # calculate hidden layer activations
self.output_layer = self.sigmoid(np.dot(self.hidden_layer, self.weights2) + self.bias2) # calculate output layer activations
return self.output_layer
def backward_propagation(self, X, y, output):
# Perform backward propagation to update weights and biases
self.error = y - output # calculate error
self.delta_output = self.error * (output * (1 - output)) # calculate output gradient
self.delta_hidden = np.dot(self.delta_output, self.weights2.T) * (self.hidden_layer * (1 - self.hidden_layer)) # calculate hidden gradient
self.weights2 += np.dot(self.hidden_layer.T, self.delta_output) # update weights between hidden and output layer
self.weights1 += np.dot(X.T, self.delta_hidden) # update weights between input and hidden layer
self.bias2 += np.sum(self.delta_output, axis=0) # update bias for output layer
self.bias1 += np.sum(self.delta_hidden, axis=0) # update bias for hidden layer
def train(self, X, y, epochs):
# Train the neural network
for _ in range(epochs):
output = self.forward_propagation(X)
self.backward_propagation(X, y, output)
def predict(self, X):
# Make predictions
return self.forward_propagation(X)
# Create a sample dataset for XOR gate
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# Create and train the neural network
nn = NeuralNetwork()
nn.train(X, y, epochs=10000)
# Make predictions on the same dataset
predictions = nn.predict(X)
# Print the predictions
print("Predictions:")
for i in range(len(predictions)):
print(f"Input: {X[i]}, Predicted Output: {predictions[i]}")
# Plot the predictions
plt.scatter(X[:, 0], X[:, 1], c=predictions.flatten(), cmap='viridis')
plt.xlabel("Input 1")
plt.ylabel("Input 2")
plt.title("Neural Network Predictions for XOR Gate")
plt.show()

View File

@ -0,0 +1,91 @@
Sure! Here's an example of a Python script that demonstrates the concept of Policy Gradients using the OpenAI Gym environment and a simple neural network:
```python
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
# Create the policy network
def build_policy_network(input_shape, output_units):
model = Sequential()
model.add(Dense(24, activation='relu', input_shape=input_shape))
model.add(Dense(24, activation='relu'))
model.add(Dense(output_units, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy')
return model
def choose_action(state, model):
state = np.reshape(state, [1, input_shape[0]])
prob_dist = model.predict(state).flatten()
action = np.random.choice(num_actions, 1, p=prob_dist)[0]
return action
def discount_rewards(rewards, gamma):
discounted_rewards = np.zeros_like(rewards)
running_sum = 0
for t in reversed(range(len(rewards))):
running_sum = running_sum * gamma + rewards[t]
discounted_rewards[t] = running_sum
return discounted_rewards
# Set hyperparameters
learning_rate = 0.01
num_episodes = 1000
gamma = 0.99
# Create the environment
env = gym.make('CartPole-v0')
input_shape = env.observation_space.shape
num_actions = env.action_space.n
# Build the policy network and initialize weights
policy_network = build_policy_network(input_shape, num_actions)
# Start training
for episode in range(num_episodes):
state = env.reset()
done = False
episode_rewards = []
episode_gradients = []
while not done:
# Choose action based on the policy network
action = choose_action(state, policy_network)
# Take the chosen action and observe the next state and reward
next_state, reward, done, _ = env.step(action)
# Store the reward
episode_rewards.append(reward)
# Compute the one-hot encoded action
action_onehot = np.zeros(num_actions)
action_onehot[action] = 1
# Compute the gradient of the policy network's output w.r.t. the action taken
with tf.GradientTape() as tape:
logits = policy_network.predict(np.expand_dims(state, axis=0))
loss = tf.reduce_sum(tf.multiply(logits, tf.convert_to_tensor(action_onehot, dtype=tf.float32)))
# Store the gradients
episode_gradients.append(tape.gradient(loss, policy_network.trainable_variables))
state = next_state
# Update the policy network
rewards = discount_rewards(episode_rewards, gamma)
for i in range(len(episode_gradients)):
grads = episode_gradients[i]
for j in range(len(grads)):
policy_network.trainable_variables[j].assign_sub(learning_rate * grads[j] * rewards[i])
if episode % 100 == 0:
print("Episode {}: Average reward = {}".format(episode, np.mean(episode_rewards)))
```
This script uses the `gym` package to create the CartPole-v0 environment, which is a classic reinforcement learning problem. It then builds a simple neural network as the policy network, with two hidden layers and a softmax output layer for the action probabilities. It implements the `choose_action` function to sample actions based on the probabilities predicted by the policy network.
During training, the script collects rewards and gradients for each episode. It then applies the policy gradient update rule, computing the discounted rewards and updating the policy network weights accordingly. Finally, it prints the average reward for every 100 episodes.
Note that this script is a simplified example and may not converge to the optimal policy. Policy Gradients typically require careful tuning of hyperparameters, selecting suitable network architectures, and using more advanced techniques like baseline estimation and advantage normalization to improve the training stability and performance.

View File

@ -0,0 +1,36 @@
Certainly! Here's a Python script that demonstrates Principal Component Analysis (PCA):
```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
# Generate some random 2D data points
np.random.seed(0)
n_samples = 100
X = np.random.randn(n_samples, 2) * 2
# Apply PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
# Plot the original data
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], alpha=0.5)
plt.title('Original Data')
plt.xlabel('X')
plt.ylabel('Y')
# Plot the PCA-transformed data
plt.subplot(1, 2, 2)
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5)
plt.title('PCA Transformed Data')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.tight_layout()
plt.show()
```
In this script, we generate a random 2D dataset using `np.random.randn()`. We then apply PCA using `sklearn.decomposition.PCA` and transform the data using `fit_transform()`. Finally, we visualize the original data and the PCA-transformed data in separate subplots using `matplotlib.pyplot.scatter()`.

View File

@ -0,0 +1,160 @@
Sure! Here's a Python script that demonstrates Proximal Policy Optimization (PPO) using the OpenAI Gym's CartPole environment:
```python
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Set up the CartPole environment
env = gym.make("CartPole-v1")
num_states = env.observation_space.shape[0]
num_actions = env.action_space.n
# PPO Agent
class PPOAgent:
def __init__(self, num_states, num_actions):
self.gamma = 0.99 # Discount factor
self.epsilon = 0.2 # Clipping factor
self.actor_lr = 0.0003 # Actor learning rate
self.critic_lr = 0.001 # Critic learning rate
self.actor = self.build_actor()
self.critic = self.build_critic()
def build_actor(self):
inputs = layers.Input(shape=(num_states,))
hidden = layers.Dense(128, activation="relu")(inputs)
action_probs = layers.Dense(num_actions, activation="softmax")(hidden)
model = keras.Model(inputs=inputs, outputs=action_probs)
optimizer = tf.keras.optimizers.Adam(learning_rate=self.actor_lr)
model.compile(optimizer=optimizer, loss="categorical_crossentropy")
return model
def build_critic(self):
inputs = layers.Input(shape=(num_states,))
hidden = layers.Dense(128, activation="relu")(inputs)
value = layers.Dense(1, activation="linear")(hidden)
model = keras.Model(inputs=inputs, outputs=value)
optimizer = tf.keras.optimizers.Adam(learning_rate=self.critic_lr)
model.compile(optimizer=optimizer, loss="mean_squared_error")
return model
def choose_action(self, state):
state = np.expand_dims(state, axis=0)
action_probs = self.actor.predict(state).flatten()
# Sample an action from the action probability distribution
action = np.random.choice(num_actions, 1, p=action_probs)[0]
return action
def compute_returns(self, rewards, dones, values):
returns = np.zeros_like(rewards)
discounted_sum = 0
for i in reversed(range(len(rewards))):
if dones[i]:
discounted_sum = 0
discounted_sum = rewards[i] + self.gamma * discounted_sum
returns[i] = discounted_sum
advantages = returns - values
advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-10)
return returns, advantages
def train(self, old_states, actions, rewards, dones, values):
returns, advantages = self.compute_returns(rewards, dones, values)
# Convert inputs to numpy arrays for better indexing
old_states = np.array(old_states)
actions = np.array(actions)
returns = np.array(returns)
advantages = np.array(advantages)
num_samples = len(old_states)
# Actor training
actions_one_hot = np.eye(num_actions)[actions]
old_action_probs = self.actor.predict(old_states)
old_action_probs = np.clip(old_action_probs, 1e-10, 1.0)
old_action_probs = old_action_probs * actions_one_hot
old_action_probs = np.sum(old_action_probs, axis=1)
with tf.GradientTape() as tape:
new_action_probs = self.actor(old_states, training=True)
new_action_probs = np.clip(new_action_probs, 1e-10, 1.0)
new_action_probs = new_action_probs * actions_one_hot
new_action_probs = np.sum(new_action_probs, axis=1)
ratio = new_action_probs / old_action_probs
surrogate1 = ratio * advantages
surrogate2 = np.clip(ratio, 1 - self.epsilon, 1 + self.epsilon) * advantages
actor_loss = -tf.reduce_mean(tf.minimum(surrogate1, surrogate2))
actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables)
self.actor.optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables))
# Critic training
with tf.GradientTape() as tape:
values_pred = self.critic(old_states, training=True)
critic_loss = tf.reduce_mean(tf.square(returns - values_pred))
critic_grads = tape.gradient(critic_loss, self.critic.trainable_variables)
self.critic.optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables))
# Initialize PPO agent
agent = PPOAgent(num_states, num_actions)
# Training loop
num_episodes = 500
for episode in range(num_episodes):
state = env.reset()
done = False
old_states = []
actions = []
rewards = []
dones = []
values = []
while not done:
# Collect data
action = agent.choose_action(state)
next_state, reward, done, _ = env.step(action)
old_states.append(state)
actions.append(action)
rewards.append(reward)
dones.append(done)
values.append(agent.critic.predict(np.expand_dims(state, axis=0)))
state = next_state
# Train the agent
agent.train(old_states, actions, rewards, dones, values)
# Display rewards every 10 episodes
if episode % 10 == 0:
total_rewards = sum(rewards)
print(f"Episode: {episode}, Rewards: {total_rewards}")
# Test the trained agent
state = env.reset()
done = False
total_rewards = 0
while not done:
env.render()
action = agent.choose_action(state)
state, reward, done, _ = env.step(action)
total_rewards += reward
print(f"Total Rewards: {total_rewards}")
env.close()
```
Make sure you have the OpenAI Gym package installed (`pip install gym`) and run the script. It will train a PPO agent on the CartPole-v1 environment and then test the trained agent. You should see the total rewards increasing as the agent learns to balance the pole on the cart.

View File

@ -0,0 +1,34 @@
Sure! Here's a python script that demonstrates the concept of Random Forests using the famous Iris dataset.
```python
# Importing the required libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Loading the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Creating a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100)
# Training the classifier
rf_classifier.fit(X_train, y_train)
# Predicting on the test set
y_pred = rf_classifier.predict(X_test)
# Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
```
In this script, we import the necessary libraries, load the Iris dataset, and split it into training and testing sets. Then, we create a Random Forest classifier with 100 decision trees (`n_estimators=100`) and train it on the training set. Finally, we predict the target variable for the test set and calculate the accuracy of the classifier.
Note that you may need to install scikit-learn (`pip install scikit-learn`) if you don't have it already installed.

View File

@ -0,0 +1,112 @@
Sure! Below is an example script that demonstrates the SARSA (State-Action-Reward-State-Action) algorithm using a simple grid world environment:
```python
import numpy as np
# Define the grid world environment
grid = [
[0, 0, 0, 0],
[0, -1, 0, -1],
[0, 0, 0, -1],
[0, -1, 0, 1]
]
# Initialize Q-table
Q = np.zeros((4, 4, 4)) # Q(s, a) - 4x4 grid, 4 actions (up, down, left, right)
# Define epsilon-greedy exploration
epsilon = 0.1
# Define learning parameters
alpha = 0.1 # Learning rate
gamma = 0.9 # Discount factor
# Define action mapping
actions = ['up', 'down', 'left', 'right']
# Get next action using epsilon-greedy exploration
def get_action(state):
if np.random.rand() < epsilon:
action = np.random.choice(actions)
else:
action = actions[np.argmax(Q[state[0], state[1]])]
return action
# Update Q-values using SARSA algorithm
def update_q_values(state, action, reward, next_state, next_action):
Q[state[0], state[1], actions.index(action)] += alpha * (
reward + gamma * Q[next_state[0], next_state[1], actions.index(next_action)] -
Q[state[0], state[1], actions.index(action)])
# Train the agent
def train_agent():
num_episodes = 1000
for episode in range(num_episodes):
state = [3, 0] # Start state
action = get_action(state)
while True:
# Perform selected action
if action == 'up':
next_state = [state[0] - 1, state[1]]
elif action == 'down':
next_state = [state[0] + 1, state[1]]
elif action == 'left':
next_state = [state[0], state[1] - 1]
else:
next_state = [state[0], state[1] + 1]
# Check if next state is valid
if next_state[0] < 0 or next_state[0] >= 4 or next_state[1] < 0 or next_state[1] >= 4:
next_state = state
# Get next action using epsilon-greedy exploration
next_action = get_action(next_state)
# Update Q-values
update_q_values(state, action, grid[next_state[0]][next_state[1]], next_state, next_action)
# Update current state and action
state = next_state
action = next_action
# Break if goal state reached
if grid[state[0]][state[1]] == 1:
break
# Test the trained agent
def test_agent():
state = [3, 0] # Start state
while True:
# Choose the best action based on Q-values
action = actions[np.argmax(Q[state[0], state[1]])]
# Perform selected action
if action == 'up':
next_state = [state[0] - 1, state[1]]
elif action == 'down':
next_state = [state[0] + 1, state[1]]
elif action == 'left':
next_state = [state[0], state[1] - 1]
else:
next_state = [state[0], state[1] + 1]
# Print the current state and action taken
print(f"Current state: {state}, Action: {action}")
# Update current state
state = next_state
# Break if goal state reached
if grid[state[0]][state[1]] == 1:
print("Reached the goal!")
break
# Train and test the agent
train_agent()
test_agent()
```
This script demonstrates SARSA algorithm in a simple grid world environment, where the agent has to navigate from the starting state `[3, 0]` to the goal state `[3, 3]` while avoiding obstacles represented by `-1`. The agent uses the SARSA algorithm to learn optimal Q-values and then applies them to reach the goal state.

View File

@ -0,0 +1,38 @@
Sure! Here's an example script demonstrating the use of Support Vector Machines (SVM) for a binary classification problem using the famous iris dataset:
```python
# Importing the required libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
# Loading the iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Creating the SVM classifier
svm = SVC(kernel='linear')
# Training the SVM classifier
svm.fit(X_train, y_train)
# Making predictions on the testing set
y_pred = svm.predict(X_test)
# Calculating the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
```
In this script, the iris dataset is first loaded using the `datasets.load_iris()` method from the sklearn library. The features matrix `X` and the target vector `y` are then initialized. Next, the dataset is split into training and testing sets using `train_test_split()` function from the `model_selection` module.
A Support Vector Machine classifier is created using `SVC()` class from the `svm` module, with the `kernel` parameter set to 'linear'. The classifier is then trained on the training set using the `fit()` method.
Predictions are made on the testing set using the `predict()` method, and the accuracy of the model is calculated using the `accuracy_score()` function from the `metrics` module. Finally, the accuracy is printed to the console.
You can modify this script according to your specific use case or dataset. Remember to install the required packages (`scikit-learn`) in your Python environment before running the script.

View File

@ -0,0 +1,65 @@
Sure! Here's a simple example of a Python script that demonstrates Temporal Difference Learning (TD Learning) using a simple grid world environment:
```python
import numpy as np
# Environment
grid_size = 4
num_episodes = 100
start_state = (0, 0)
end_state = (grid_size - 1, grid_size - 1)
actions = ['up', 'down', 'left', 'right']
# Hyperparameters
alpha = 0.1 # learning rate
gamma = 0.9 # discount factor
# Initialize state-action value function
Q = np.zeros((grid_size, grid_size, len(actions)))
# Helper function to choose an action based on Q-values (epsilon-greedy policy)
def choose_action(state, epsilon):
if np.random.random() < epsilon:
return np.random.choice(actions)
return actions[np.argmax(Q[state])]
# Helper function to get the next state and reward based on the chosen action
def get_next_state_reward(state, action):
if action == 'up':
next_state = (state[0] - 1, state[1])
elif action == 'down':
next_state = (state[0] + 1, state[1])
elif action == 'left':
next_state = (state[0], state[1] - 1)
elif action == 'right':
next_state = (state[0], state[1] + 1)
if next_state[0] < 0 or next_state[0] >= grid_size or next_state[1] < 0 or next_state[1] >= grid_size:
# Hit wall, stay in the same state with a negative reward
return state, -10
elif next_state == end_state:
# Reached the end, stay in the same state with a positive reward
return state, 10
else:
return next_state, 0 # Regular move, stay in the same state with no reward
# TD Learning algorithm
for episode in range(num_episodes):
state = start_state
epsilon = 1.0 / (episode + 1) # epsilon-greedy exploration rate
while state != end_state:
action = choose_action(state, epsilon)
next_state, reward = get_next_state_reward(state, action)
# Update Q-values using Temporal Difference Learning
Q[state][actions.index(action)] += alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][actions.index(action)])
state = next_state
# Print the learned Q-values
print(Q)
```
In this script, we define a simple grid world environment with a start state, an end state, and possible actions ('up', 'down', 'left', 'right'). The script then uses the Temporal Difference Learning algorithm to update the state-action values in the Q-table based on the rewards obtained from interactions with the environment. Finally, it prints the learned Q-values.

View File

@ -0,0 +1,213 @@
Sure! Below is an example Python script that demonstrates the Trust Region Policy Optimization (TRPO) algorithm using OpenAI Gym's "CartPole-v1" environment:
```python
import gym
import numpy as np
import tensorflow as tf
class PolicyNetwork:
def __init__(self, state_dim, action_dim, hidden_dim):
self.state_dim = state_dim
self.action_dim = action_dim
self.hidden_dim = hidden_dim
self.states = tf.placeholder(tf.float32, [None, state_dim], name="states")
self.actions = tf.placeholder(tf.int32, [None], name="actions")
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
self.mean_network = self.build_network(scope="mean")
self.sample_network = self.build_network(scope="sample")
self.sampled_actions = self.sample_network(self.states)
self.mean_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="mean")
self.sample_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="sample")
self.policy_loss = self.compute_policy_loss()
self.kl_divergence = self.compute_kl_divergence()
self.gradient = self.compute_gradient()
def build_network(self, scope):
with tf.variable_scope(scope):
hidden_layer = tf.layers.dense(self.states, self.hidden_dim, activation=tf.nn.relu)
output_layer = tf.layers.dense(hidden_layer, self.action_dim)
output_probs = tf.nn.softmax(output_layer)
def network(states):
feed_dict = {self.states: states}
sess = tf.get_default_session()
return sess.run(output_probs, feed_dict=feed_dict)
return network
def compute_policy_loss(self):
indices = tf.range(tf.shape(self.sampled_actions)[0]) * tf.shape(self.sampled_actions)[1] + self.actions
selected_action_probs = tf.gather(tf.reshape(self.sampled_actions, [-1]), indices)
ratio = selected_action_probs / tf.stop_gradient(self.mean_network(self.states))
surrogate_loss = -tf.reduce_mean(ratio * self.advantages)
return surrogate_loss
def compute_kl_divergence(self):
mean_network_probs = self.mean_network(self.states)
sample_network_probs = tf.stop_gradient(self.sampled_actions)
return tf.reduce_mean(tf.reduce_sum(mean_network_probs * tf.log(mean_network_probs / sample_network_probs), axis=1))
def compute_gradient(self):
grads = tf.gradients(self.policy_loss, self.sample_weights)
flat_grads = tf.concat([tf.reshape(grad, [-1]) for grad in grads], axis=0)
return flat_grads
def compute_advantages(rewards, next_value, discount_factor=0.99, gae_lambda=0.95):
values = np.append(rewards, next_value)
deltas = rewards + discount_factor * values[1:] - values[:-1]
advantages = np.zeros_like(rewards)
for t in reversed(range(len(rewards))):
delta = deltas[t]
advantages[t] = delta + discount_factor * gae_lambda * advantages[t+1]
return advantages
def run_episode(env, policy_network, render=False):
states, actions, rewards = [], [], []
state = env.reset()
while True:
if render:
env.render()
action_probs = policy_network.sample_network(np.expand_dims(state, axis=0))
action = np.random.choice(len(action_probs[0]), p=action_probs[0])
next_state, reward, done, _ = env.step(action)
states.append(state)
actions.append(action)
rewards.append(reward)
state = next_state
if done:
break
return states, actions, rewards
def train(env, policy_network, max_iterations=1000, max_episode_length=1000, cg_iterations=10, delta=0.01):
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
trainable_variables = tf.trainable_variables()
grads_placeholder = tf.placeholder(tf.float32, shape=[None])
flat_grads_and_vars_placeholder = tf.placeholder(tf.float32, shape=[None])
grads = tf.gradients(policy_network.kl_divergence, trainable_variables)
grads_placeholder_and_vars = list(zip(grads_placeholder, trainable_variables))
flat_grads_and_vars_placeholder_and_vars = list(zip(flat_grads_and_vars_placeholder, trainable_variables))
compute_grads = tf.train.AdamOptimizer(learning_rate=1e-3).apply_gradients(grads_placeholder_and_vars)
compute_flat_grad = flatten_gradients(grads)
apply_flat_grad = unflatten_gradients(flat_grads_and_vars_placeholder, trainable_variables)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for iteration in range(max_iterations):
episode_states, episode_actions, episode_rewards = run_episode(env, policy_network)
episode_advantages = compute_advantages(episode_rewards, 0)
episode_mean = np.mean(episode_rewards)
episode_std = np.std(episode_rewards)
feed_dict = {
policy_network.states: np.array(episode_states),
policy_network.actions: np.array(episode_actions),
policy_network.advantages: episode_advantages
}
# Update policy network
sess.run(optimizer.minimize(policy_network.policy_loss), feed_dict=feed_dict)
# Update value function (critic network)
for _ in range(cg_iterations):
sess.run(compute_flat_grad, feed_dict=feed_dict)
flat_grads = sess.run(compute_flat_grad, feed_dict=feed_dict)
feed_dict[flat_grads_placeholder] = flat_grads
step_direction = conjugate_gradients(sess, compute_flat_grad, feed_dict)
step_size = np.sqrt(2 * delta / (np.dot(step_direction, hessian_vector_product(sess, state, compute_flat_grad, feed_dict)) + 1e-8))
feed_dict[flat_grads_and_vars_placeholder] = step_size * step_direction
sess.run(apply_flat_grad, feed_dict=feed_dict)
sess.close()
def conjugate_gradients(sess, compute_flat_grad_fn, feed_dict, cg_iterations=10, residual_tol=1e-10):
x = np.zeros_like(np.array(feed_dict).flatten())
b = sess.run(compute_flat_grad_fn, feed_dict=feed_dict)
r = b.copy()
p = b.copy()
rsold = np.dot(r, r)
for _ in range(cg_iterations):
Ap = sess.run(compute_flat_grad_fn, feed_dict={x: p})
alpha = rsold / (np.dot(p, Ap) + 1e-8)
x += alpha * p
r -= alpha * Ap
rsnew = np.dot(r, r)
if np.sqrt(rsnew) < residual_tol:
break
p = r + (rsnew / rsold) * p
rsold = rsnew
return x
def hessian_vector_product(sess, state, compute_flat_grad_fn, feed_dict, damping=0.1):
grads = sess.run(compute_flat_grad_fn, feed_dict=feed_dict)
constraints = tf.placeholder(tf.float32, shape=[None])
compute_kl_grads = tf.gradients(policy_network.kl_divergence, trainable_variables)
gradient_products = tf.reduce_sum(compute_kl_grads * constraints)
feed_dict.update({constraints: grads})
return sess.run(gradient_products, feed_dict=feed_dict)
def flatten_gradients(grads):
flat_grads = []
for grad in grads:
flat_grads.append(tf.reshape(grad, [-1]))
return tf.concat(flat_grads, axis=0)
def unflatten_gradients(grads_placeholder, trainable_variables):
grads = []
start = 0
for var in trainable_variables:
var_shape = var.shape.as_list()
var_size = np.prod(var_shape)
grads.append(tf.reshape(grads_placeholder[start:start+var_size], var_shape))
start += var_size
return grads
def main():
env = gym.make('CartPole-v1')
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
hidden_dim = 32
policy_network = PolicyNetwork(state_dim, action_dim, hidden_dim)
train(env, policy_network, max_iterations=100)
env.close()
if __name__ == "__main__":
main()
```
In this script, the TRPO algorithm is used to optimize a policy network to solve the CartPole-v1 environment from the Gym library. The `PolicyNetwork` class defines the policy network, and the `train` function implements the TRPO algorithm to train the network. The `compute_advantages`, `run_episode`, `conjugate_gradients`, `hessian_vector_product`, `flatten_gradients`, and `unflatten_gradients` functions are helper functions used in the training process.
Note that this implementation assumes you have TensorFlow and Gym libraries installed. You may need to install additional dependencies if necessary.

View File

@ -0,0 +1,46 @@
Sure! Here's an example of a Python script that demonstrates the k-Nearest Neighbors (k-NN) algorithm using the scikit-learn library:
```python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a k-NN classifier with k = 3
k = 3
knn = KNeighborsClassifier(n_neighbors=k)
# Fit the classifier to the training data
knn.fit(X_train, y_train)
# Predict the classes for the test set
y_pred = knn.predict(X_test)
# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
# Example usage: Predict the class for new data
new_data = np.array([[5.0, 3.6, 1.4, 0.2]])
predicted_class = knn.predict(new_data)
print(f"Predicted class: {predicted_class}")
```
In this script, we first load the famous Iris dataset using the scikit-learn `load_iris` function. Then, we split the dataset into training and test sets using the `train_test_split` function, with 80% for training and 20% for testing.
Next, we create a k-NN classifier with `k = 3` using the `KNeighborsClassifier` class from scikit-learn. We fit the classifier to the training data using the `fit` method.
After that, we predict the classes for the test set using the `predict` method. We calculate the accuracy of the classifier by comparing the predicted classes with the true classes using the `accuracy_score` function.
Finally, we demonstrate the usage of the trained classifier by predicting the class for a new data point with the `predict` method.
Note that this is a basic example to showcase the concept of k-NN. In practice, you would need to perform more extensive data preprocessing, feature scaling, hyperparameter tuning, and evaluation to build a more robust and accurate model.

View File

@ -0,0 +1,39 @@
Sure! Here's a Python script that demonstrates the t-SNE algorithm using the famous Iris dataset for visualization:
```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.manifold import TSNE
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Applying t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X)
# Plotting the t-SNE results
colors = ['r', 'g', 'b']
target_names = iris.target_names
plt.figure(figsize=(10, 6))
for i, target_name in enumerate(target_names):
plt.scatter(X_tsne[y == i, 0], X_tsne[y == i, 1], c=colors[i], label=target_name)
plt.xlabel('First t-SNE Component')
plt.ylabel('Second t-SNE Component')
plt.title('t-SNE Visualization of Iris Dataset')
plt.legend()
plt.show()
```
In this script, we first import the necessary libraries (`numpy`, `matplotlib`, `sklearn.datasets`, and `sklearn.manifold.TSNE`) to perform the t-SNE algorithm. Then, we load the Iris dataset using the `datasets.load_iris()` from scikit-learn. The data and target variables are assigned to `X` and `y` arrays, respectively.
After that, we create an instance of the t-SNE algorithm with `TSNE(n_components=2, random_state=42)`. We choose 2 components (`n_components=2`) since we want to visualize the data in 2D. The `random_state` parameter is set to ensure reproducibility in the results. We then apply the t-SNE algorithm to the data using `fit_transform(X)`, which returns the transformed data as `X_tsne`.
Finally, we plot the t-SNE results using `matplotlib`. Each data point is plotted in a scatter plot, with different colors representing different classes ('setosa', 'versicolor', and 'virginica') in the Iris dataset.
To run this script, ensure that you have the necessary libraries installed (`numpy`, `matplotlib`, and `scikit-learn`). Save it as a .py file and execute it using a Python interpreter. The script will show a plot with the t-SNE visualization of the Iris dataset.