# Execute a command that has a cell output
print("Hello Jupyter")

Hello Jupyter

# Execute a command that raises an error
print("Hello Jupyter", end=1)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/Users/gilbartv/Documents/git/python-intro/exercise/jupyter-notebook/lesson-3-companion.ipynb Cell 3 line 2
      <a href='vscode-notebook-cell:/Users/gilbartv/Documents/git/python-intro/exercise/jupyter-notebook/lesson-3-companion.ipynb#W1sZmlsZQ%3D%3D?line=0'>1</a> # Execute a command that raises an error
----> <a href='vscode-notebook-cell:/Users/gilbartv/Documents/git/python-intro/exercise/jupyter-notebook/lesson-3-companion.ipynb#W1sZmlsZQ%3D%3D?line=1'>2</a> print("Hello Jupyter", end=1)

TypeError: end must be None or a string, not int

import pandas as pd

df = pd.read_csv('exercise/data/example.txt', index_col=0, sep=' ')
df = pd.melt(df, var_name='nucl', value_name='freq', ignore_index=False)

df.index

Index(['seq1', 'seq2', 'seq3', 'seq4', 'seq5', 'seq1', 'seq2', 'seq3', 'seq4',
       'seq5', 'seq1', 'seq2', 'seq3', 'seq4', 'seq5', 'seq1', 'seq2', 'seq3',
       'seq4', 'seq5'],
      dtype='object', name='Seq')

df.columns

Index(['nucl', 'freq'], dtype='object')

df.head()

df['freq'].mean()

0.25

df.groupby("nucl")[['freq']].mean()

# %load exercise/script/analyse_fasta.py
#!/usr/bin/env python3

import sys 

def nucl_freq(seq):
  if not isinstance(seq, str):
    raise TypeError("Input must be a string.")
  valid_nucl = {"A", "T", "C", "G"}
  seq_nucl = set(seq)
  if seq_nucl.difference(valid_nucl) != set():
    raise ValueError("Input string must only contain characters A, C, T or G.")
    
  n = len(seq)
  freq = dict()
  for nucl in valid_nucl:
    freq[nucl] = seq.count(nucl)/n
  return freq 

def analyse_fasta(input_file, output_file):
  freq = {}
  
  with open(input_file, 'r') as input:
    for line in input:
      if line.startswith(">"):
        sequence_name = line.strip()[1:]
      else: 
        current_sequence = line.strip()
        freq[sequence_name] = nucl_freq(current_sequence)

  with open(output_file, 'w') as output:
    output.write("Seq A T C G\n")
    for key, value in freq.items():
      output.write(f"{key} {value.get('A')} {value.get('T')} {value.get('C')} {value.get('G')}\n")

  return None

nucl_freq("AACTTG")

{'T': 0.3333333333333333,
 'C': 0.16666666666666666,
 'G': 0.16666666666666666,
 'A': 0.3333333333333333}

%whos

del nucl_freq, analyse_fasta

%whos

Variable           Type                Data/Info
------------------------------------------------
analyse_fasta      function            <function analyse_fasta at 0x132eb7100>
f                  function            <function f at 0x10589a840>
fig                Figure              Figure({\n    'data': [{'<...>': {'text': 'freq'}}}\n})
interact           _InteractFactory    <ipywidgets.widgets.inter<...>ry object at 0x105887a50>
interactive_plot   function            <function interactive_plot at 0x113baccc0>
nucl_freq          function            <function nucl_freq at 0x132eb7740>
pd                 module              <module 'pandas' from '/U<...>ages/pandas/__init__.py'>
plt                module              <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>
px                 module              <module 'plotly.express' <...>tly/express/__init__.py'>
sys                module              <module 'sys' (built-in)>
widgets            module              <module 'ipywidgets.widge<...>ets/widgets/__init__.py'>
Variable           Type                Data/Info
------------------------------------------------
f                  function            <function f at 0x10589a840>
fig                Figure              Figure({\n    'data': [{'<...>': {'text': 'freq'}}}\n})
interact           _InteractFactory    <ipywidgets.widgets.inter<...>ry object at 0x105887a50>
interactive_plot   function            <function interactive_plot at 0x113baccc0>
pd                 module              <module 'pandas' from '/U<...>ages/pandas/__init__.py'>
plt                module              <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>
px                 module              <module 'plotly.express' <...>tly/express/__init__.py'>
sys                module              <module 'sys' (built-in)>
widgets            module              <module 'ipywidgets.widge<...>ets/widgets/__init__.py'>

%run exercise/script/analyse_fasta.py

nucl_freq("AACTTG")

{'T': 0.3333333333333333,
 'C': 0.16666666666666666,
 'G': 0.16666666666666666,
 'A': 0.3333333333333333}

import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact
from ipywidgets import widgets

# Create a DataFrame from the given data
df = pd.read_csv('exercise/data/example.txt', index_col=0, sep=' ')

def interactive_plot(columns):
    # Plot the data
    fig, ax = plt.subplots(1, figsize=(5, 6))

    bottom = pd.Series([0,0,0,0,0], index = df.index) 
    for col in columns:
        ax.bar(df.index, df[col], label=col, bottom=bottom)
        bottom += df[col]

    # Add labels and legend
    ax.set_xlabel('Sequences')
    ax.set_ylabel('Frequency')
    ax.legend(title='Nucleotides', bbox_to_anchor=(0, 1),
              loc='lower left', ncols=4)
    plt.show()

interact(interactive_plot, columns=df.columns)

interactive(children=(Dropdown(description='columns', options=('A', 'T', 'C', 'G'), value='A'), Output()), _do…

<function __main__.interactive_plot(columns)>

interact(interactive_plot, columns=widgets.SelectMultiple(
    options=df.columns,
    value=[df.columns[0]],
    description='Fruits',
    disabled=False
))

interactive(children=(SelectMultiple(description='Fruits', index=(0,), options=('A', 'T', 'C', 'G'), value=('A…

<function __main__.interactive_plot(columns)>

import plotly.express as px
import pandas as pd

df = pd.read_csv('exercise/data/example.txt', sep=' ')
df = pd.melt(df, var_name='nucl', value_name='freq', id_vars=['Seq'])

fig = px.bar(df, x="Seq", y="freq", color = "nucl", title="Nucleotide frequency")
fig.show()

	freq
nucl
A	0.252
C	0.252
G	0.256
T	0.240

Lesson 3 Jupyter Notebook Companion¶

Recap of lesson 2¶

Setup¶

Description of dataset¶

Statistics¶

Testing magic commands¶

Interactive plots¶

Widgets¶

Plotly express¶

	nucl	freq
Seq
seq1	A	0.46
seq2	A	0.20
seq3	A	0.16
seq4	A	0.18
seq5	A	0.26