#INTRO:
This is a completely reproducible analysis script for DATASET X.
You can run it all code here.
All git logs show exactly how the script was run. https://
SETUP fMRIreproduceยถ
<โINSERT NAME OF DATASETโ>
๐ฅ โจ ๐ฑ ๐ง Install prerequisitesยถ
%pip install pandas # Data manipulation and analysis
#%pip install numpy # Numerical operations
%pip install scipy # Scientific computing
%pip install matplotlib # Creating static, interactive, and animated visualizations
%pip install seaborn # Statistical data visualization based on matplotlib
%pip install nibabel # Reading and writing neuroimaging data
%pip install nilearn # Statistical learning for neuroimaging
%pip install nltools # Neuroimaging analysis in Python
%pip install nipype # Neuroimaging in Python Pipelines and Environments
%pip install hypertools # Visualizing and manipulating high-dimensional data
%pip install networkx # Creation, manipulation, and study of the structure, dynamics, and functions of complex networks
%pip install scikit-learn # Machine learning in Python
%pip install pynv # NVIDIA management library wrapper (Note: pynv is not a standard package, might be a typo or internal)
%pip install nipype # Neuroimaging in Python Pipelines and Environments (duplicate)
%pip install pybids # BIDS dataset indexing and querying
%pip install git-annex # Install git-annex for managing large files
%pip install datalad # Install datalad for data management and reproducibility
%pip install gputil # Install gputil for monitoring GPU usage
%pip install --upgrade rclone
## Before publishing notebook, add the version of each package that last ran the script succefully as comments.Fetching long content....
#@title Mount Dataset
# This cell performs all necessary steps in the correct order:
# 1. Installs the "fuse3" system package to provide the missing "fusermount3" tool.
# 2. Installs the latest version of rclone.
# 3. Writes the service account credential to a temporary file.
# 4. Creates the rclone config file.
# 5. Mounts the drive using the stable "nohup" command and the unambiguous Folder ID.
import os
import time
# --- The unique Folder ID for your "data" folder. ---
FOLDER_ID = "1_E2sBa7VGfs_kqJ9a-86wnf2nRn3Jmnt"
# --- The service account credential key. ---
# THESE CREDENTIALS ONLY GIVE ACCESS TO AN OPEN DATASET; SO IS SAFE
SERVICE_ACCOUNT_JSON_CONTENT = """
{
"type": "service_account",
"project_id": "reproduciblescience",
"private_key_id": "994bddbdd5cc002fa5cc180801c64fa3fd75a40b",
"private_key": "-----BEGIN PRIVATE KEY-----\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCvu+wn0XCJON0D\\nCYoInCDa/8KV2ht+3DBP3yik5kwmTTvv5EyX0Xy+gAtZ3gBmYhn0uO66XdVfaUq6\\n5GN6RlX7RkGLyWd5Rn1FXv0TwI5F2HsQWvNtav6bFGseQ+tY0+oEQeyBcEc1m7gl\\nP8q9Lh7ZDWEPLZ4X2ulOJyCAH08mQ06+ctZaakDkNAOF0Jp8qa7UdmHhHHx/VXjx\\noimhMIFHkRUgYTSKiFEQ+xtTyLPuSjbCwMhauw/CKn/CEcJrEwz0lYKX0zMNScZ0\\nZvJCU+S3qdmTdPgxwJS5z+ujPkSJef71GO10DqdrZN5om5xuykjZ5LQxq7y51RC+\\nHCqKRZV3AgMBAAECggEAMaSbGkZ6jXEgUhbURQlh6Ku+etmR0dz+0tleIHMkSN+h\\nkmuCTZLys9f1HO5vW3zUC0NgwQn32EukvjX9ehG7gU+//W6oSeSlR78FLBrFtipo\\n3V5sDNlgUkvK1NJn/gBjR8+dsZUXsKesNe2W5qlMMpGqfQCse92t8ZNxBXCSyeJa\\nerFHYkTIRJRH1SFuGcRoNf0v7OnjCKfdT6Nq9oaOXpu2M5MYap8KKV42eSmkKHFx\\n922CIPisAHYjo474A8Ng8PRV2GG/wzb/r/DdNz8uFhMhDNose0fI/KLoAl8FWwAS\\nHDQ3wdhJX1pcPQdxPyDtbj7f/MTSEE50V7PRhmGx6QKBgQDlkl+d2su6sOJseWfb\\nJ+pNpBo9QUGUkJPXySb7DqshKC3kqXWdieaFyKWOEqhMQKy+xjP4ZpCTCaBaetT5\\nm6gMxIBIub6LKHVMI1u+Kp9OkeOBehhsOsbCD3N//A1FHrYl1PZXAjKwoTG+1cWW\\n4vVi5GpANFaGOALdnSXENnndCQKBgQDD9urrLhTladAtZ9y8/c410A0TwpYJoVmc\\nWlrNh64GChr9iHFd1CKKUjHykKtLGhsvVlkIlelxHFGWt+eJDBo+bcdLrno0bsPb\\nswbGxNZQkBRbypTsvSbF8e2RgLHM0GnwSIAspdZOMzRTOMQ4tv4XHv3uVeNvAWqX\\n4CSIaPL+fwKBgQDENNHbzEALE1qxcyG8QOUT7okuiTNPkjGJX7YdJHBGKlzu7ZZE\\nSrPm65HzVybX5P6/Hg8U4jP2N2gBbLHRk6ORJfCuN3IO5Uk7BY1Hfo4hTLaw1nVB\\nKwrsmOASJo4A4Tq0IstXmd1Q8LPfcj0ptFgdaTMWJKU+mWh5l/HLPPPyYQKBgAue\\nooNbciXARnXKyM30BrwuH/hh+1+Kz7NzsBqrxufYxYzvADneEEY/ZXLKnT2QyLff\\n2Nund/YbBcN4GCSKMSwuejjEmb5uKvLmBKrYlRSl1m8y7e2zsY7fL4UHKoPIf9+d\\njOwY0UJFyv58STZLs3lPfyMYuKmmXdmvkrr77hylAoGAYNczmJQY/pH9OaE9SKoT\\nWPdx2ppgFc5UKRigPZ65I+KXnOnT3ETMo/fAdu02wKR2ckF3PfGLpfJKXQcEY59W\\nCnI9wn2+PAKlp2Xl29N7bzxKDbCCBwSg7dZT4RzW3l5zF93DUVnmQa5iQasxVVEr\\nveVB2lWK51zTNswv+kEf3Bw=\\n-----END PRIVATE KEY-----\\n",
"client_email": "readonly-ds-localizer@reproduciblescience.iam.gserviceaccount.com",
"client_id": "107277597286637154079",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/readonly-ds-localizer%40reproduciblescience.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}
"""
# --- Step 1: Install system dependencies ---
print("Installing fuse3 to provide fusermount3...")
!sudo apt-get install -y fuse3 > /dev/null 2>&1
# --- Step 2: Install rclone ---
print("Installing rclone...")
!curl https://rclone.org/install.sh | sudo bash > /dev/null 2>&1
# --- Step 3: Write the credential and config files ---
print("Configuring rclone...")
sa_key_path = '/tmp/service_account.json'
with open(sa_key_path, 'w') as f:
f.write(SERVICE_ACCOUNT_JSON_CONTENT)
!mkdir -p ~/.config/rclone
config_content = f"""
[PublicDrive]
type = drive
scope = drive.readonly
service_account_file = {sa_key_path}
"""
with open('/root/.config/rclone/rclone.conf', 'w') as f:
f.write(config_content)
# --- Step 4: Mount the drive using nohup for stability ---
print("Mounting the drive...")
!mkdir -p /content/dataset
!nohup rclone mount PublicDrive: /content/dataset \
--drive-root-folder-id={FOLDER_ID} \
--read-only \
--allow-other \
--log-level INFO --log-file /tmp/rclone.log &
# --- Step 5: Verify the mount was successful ---
print("โ
Mount command sent. Waiting a few seconds for it to initialize...")
time.sleep(5) # Give it a generous 5 seconds to initialize
print("\n๐ Mounted directory contents:")
!ls -l /content/datasetInstalling fuse3 to provide fusermount3...
Installing rclone...
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 4734 100 4734 0 0 8100 0 --:--:-- --:--:-- --:--:-- 8106
Configuring rclone...
Mounting the drive...
nohup: appending output to 'nohup.out'
โ
Mount command sent. Waiting a few seconds for it to initialize...
๐ Mounted directory contents:
total 6
-rw-r--r-- 1 root root 1507 Jun 17 01:17 dataset_description.json
drwxr-xr-x 1 root root 0 Jun 17 01:34 derivatives
-rw-r--r-- 1 root root 485 Jun 17 01:17 participants.json
-rw-r--r-- 1 root root 2583 Jun 17 01:17 participants.tsv
-rw-r--r-- 1 root root 184 Jun 17 01:17 README
drwxr-xr-x 1 root root 0 Jun 17 01:17 sub-S01
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S02
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S03
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S04
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S05
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S06
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S07
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S08
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S09
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S10
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S11
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S12
drwxr-xr-x 1 root root 0 Jun 18 20:21 sub-S13
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S14
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S15
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S16
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S17
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S18
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S19
drwxr-xr-x 1 root root 0 Jun 18 20:22 sub-S20
-rw-r--r-- 1 root root 213 Jun 17 01:17 task-localizer_bold.json
!lssample_data
1.2 Configยถ
#to set your account's default identity. Uncomment 2 following lines:
#!git config --global user.email "you@example.com"
#!git config --global user.name "Your Name"Inspect System Resourcesยถ
import platform
import psutil
import shutil
import GPUtil
import socket
import os
# Gather system information
system_info = {
"Operating System Name": platform.system(),
"Architecture": platform.machine(),
"Total RAM": f"{psutil.virtual_memory().total / (1024**3):.2f} GB",
"CPU Info": platform.processor(),
"CPU Cores": psutil.cpu_count(logical=True),
"OS Release": platform.release(),
"OS Version": platform.version(),
}
# Attempt to get distribution information (works well for Linux)
try:
if hasattr(platform, 'dist'):
system_info["OS Distribution"] = platform.dist()
elif hasattr(platform, 'freedesktop_os_info'):
system_info["OS Distribution"] = platform.freedesktop_os_info()
else:
system_info["OS Distribution"] = "Distribution information not available"
except AttributeError:
system_info["OS Distribution"] = "Distribution information not available"
# Get disk usage information
try:
total_disk_bytes, used_disk_bytes, free_disk_bytes = shutil.disk_usage("/")
disk_info = {
"Total Disk Space": f"{total_disk_bytes / (1024**3):.2f} GB",
"Used Disk Space": f"{used_disk_bytes / (1024**3):.2f} GB",
"Free Disk Space": f"{free_disk_bytes / (1024**3):.2f} GB"
}
except Exception as e:
disk_info = f"Could not retrieve disk usage information: {e}"
# Get GPU information
try:
gpus = GPUtil.getGPUs()
gpu_info = []
if gpus:
for gpu in gpus:
gpu_info.append({
"id": gpu.id,
"name": gpu.name,
"load": f"{gpu.load*100:.2f}%",
"free_memory": f"{gpu.memoryFree}MB",
"used_memory": f"{gpu.memoryUsed}MB",
"total_memory": f"{gpu.memoryTotal}MB",
"temperature": f"{gpu.temperature:.2f}ยฐC",
"uuid": gpu.uuid
})
else:
gpu_info = "No GPU information available."
except Exception as e:
gpu_info = f"Could not retrieve GPU information: {e}"
# Get network information
try:
hostname = socket.gethostname()
ip_address = socket.gethostbyname(hostname)
network_info = {
"Hostname": hostname,
"IP Address": ip_address
}
except Exception as e:
network_info = f"Could not retrieve network information: {e}"
# Get user information - Handle potential errors in environments like Colab
username = "Could not retrieve username"
home_directory = "Could not retrieve home directory"
is_root = False # Initialize is_root to False
try:
username = os.getlogin()
except Exception:
pass # Ignore error if getlogin fails
try:
home_directory = os.path.expanduser("~")
except Exception:
pass # Ignore error if expanduser fails
try:
is_root = os.geteuid() == 0
except Exception:
pass # Ignore error if geteuid fails
user_info = {
"Username": username,
"Home Directory": home_directory,
"Is Root User": is_root
}
# Get mounted drives information
try:
mounted_drives = [partition.mountpoint for partition in psutil.disk_partitions()]
# Explicitly check for Google Drive mount point in Colab
if os.path.exists('/content/drive'):
mounted_drives.append("Google Drive: /content/drive")
except Exception as e:
mounted_drives = f"Could not retrieve mounted drives information: {e}"
# Print the gathered system information
print("Gathered System Information:")
for key, value in system_info.items():
print(f"{key}: {value}")
print("\nDisk Usage Information (for /):")
if isinstance(disk_info, dict):
for key, value in disk_info.items():
print(f" {key}: {value}")
else:
print(f" {disk_info}")
print(f"\nMounted Drives: {mounted_drives}")
print("\nGPU Information:")
if isinstance(gpu_info, list):
if gpu_info:
for i, gpu in enumerate(gpu_info):
print(f" GPU {i}:")
for key, value in gpu.items():
print(f" {key.replace('_', ' ').capitalize()}: {value}")
else:
print(" No GPU information available.")
else:
print(f" {gpu_info}")
print("\nNetwork Information:")
if isinstance(network_info, dict):
for key, value in network_info.items():
print(f" {key}: {value}")
else:
print(f" {network_info}")
print("\nUser Information:")
if isinstance(user_info, dict):
for key, value in user_info.items():
print(f" {key}: {value}")
else:
print(f" {user_info}")Gathered System Information:
Operating System Name: Linux
Architecture: x86_64
Total RAM: 12.67 GB
CPU Info: x86_64
CPU Cores: 2
OS Release: 6.1.123+
OS Version: #1 SMP PREEMPT_DYNAMIC Sun Mar 30 16:01:29 UTC 2025
OS Distribution: Distribution information not available
Disk Usage Information (for /):
Total Disk Space: 225.83 GB
Used Disk Space: 37.90 GB
Free Disk Space: 187.92 GB
Mounted Drives: ['/usr/sbin/docker-init', '/kaggle/input', '/etc/resolv.conf', '/etc/hostname', '/etc/hosts']
GPU Information:
No GPU information available.
Network Information:
Hostname: 32467780888e
IP Address: 172.28.0.12
User Information:
Username: Could not retrieve username
Home Directory: /root
Is Root User: True
Mount SharePointยถ
#@title Mount Lab SharePoint Drive (Safe Append Version)
#@markdown This cell installs rclone, fetches a specific rclone configuration
#@markdown from Secret Manager, and safely appends it to the main config file
#@markdown before mounting the drive.
import os
import time
from google.colab import auth
from google.cloud import secretmanager
# --- V V V --- Configuration --- V V V ---
SECRET_RESOURCE_NAME = "projects/789057049651/secrets/RCLONE_CONFIG_CNCLAB/versions/latest"
REMOTE_NAME = "sharepoint_cnclab" #<-- IMPORTANT: Change this to match your remote name
MOUNT_POINT = "/content/sharepoint"
# --- ^ ^ ^ --- End of Configuration --- ^ ^ ^ ---
# --- Step 1: Install system dependencies ---
print("โ๏ธ Installing system packages (fuse3)...")
!sudo apt-get install -y fuse3 > /dev/null 2>&1
print("โ๏ธ Installing rclone...")
!curl https://rclone.org/install.sh | sudo bash > /dev/null 2>&1
# --- Step 2: Fetch the rclone config from Secret Manager ---
print("๐ Authenticating to Google Cloud...")
auth.authenticate_user()
client = secretmanager.SecretManagerServiceClient()
print(f"๐ Fetching secret: {SECRET_RESOURCE_NAME}")
try:
response = client.access_secret_version(name=SECRET_RESOURCE_NAME)
new_config_content = response.payload.data.decode("UTF-8")
print("โ
Secret fetched successfully!")
except Exception as e:
print(f"๐จ ERROR: Could not fetch secret. Please check the SECRET_RESOURCE_NAME and your permissions.")
raise e
# --- Step 3: Write or Append the rclone config file (SAFE VERSION) ---
print("๐ Checking rclone configuration...")
rclone_config_path = "/root/.config/rclone/rclone.conf"
os.makedirs(os.path.dirname(rclone_config_path), exist_ok=True)
# Read the existing config file if it exists
existing_config = ""
if os.path.exists(rclone_config_path):
with open(rclone_config_path, "r") as f:
existing_config = f.read()
# Check if the new remote name is already in the config to avoid duplicates
remote_header = f"[{REMOTE_NAME}]"
if remote_header in existing_config:
print(f"โ
Configuration for '{REMOTE_NAME}' already exists. Skipping write.")
else:
# If it doesn't exist, append the new config
print(f"โ Appending new configuration for '{REMOTE_NAME}'...")
# Add newlines for clean separation
combined_config = existing_config.strip() + "\n\n" + new_config_content.strip()
with open(rclone_config_path, "w") as f:
f.write(combined_config)
print("โ
Configuration updated successfully.")
# --- Step 4: Mount the drive using nohup for stability ---
print(f"๐ Mounting '{REMOTE_NAME}:' at '{MOUNT_POINT}'...")
os.makedirs(MOUNT_POINT, exist_ok=True)
mount_command = f"""
nohup rclone mount {REMOTE_NAME}: {MOUNT_POINT} \
--allow-other \
--log-level INFO --log-file /tmp/rclone.log &
"""
get_ipython().system(mount_command)
# --- Step 5: Verify the mount was successful ---
print("โณ Mount command sent. Waiting 5 seconds for initialization...")
time.sleep(5)
print(f"\n๐ Checking contents of mounted directory ({MOUNT_POINT}):")
try:
contents = os.listdir(MOUNT_POINT)
if not contents:
print("โ ๏ธ Directory is empty. Mount might be successful but the remote root is empty, or there could be an issue.")
print(" Check the log for errors: !cat /tmp/rclone.log")
else:
print("โ
Mount successful! Directory contents:")
!ls -l {MOUNT_POINT} | head -n 10
except Exception as e:
print(f"๐จ ERROR: Failed to list directory contents. The mount probably failed.")
print(" Check the rclone log file for more details by running this command in a new cell:")
print(" !cat /tmp/rclone.log")โ๏ธ Installing system packages (fuse3)...
โ๏ธ Installing rclone...
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 4734 100 4734 0 0 9109 0 --:--:-- --:--:-- --:--:-- 9103
๐ Authenticating to Google Cloud...
๐ Fetching secret: projects/789057049651/secrets/RCLONE_CONFIG_CNCLAB/versions/latest
โ
Secret fetched successfully!
๐ Checking rclone configuration...
โ Appending new configuration for 'sharepoint_cnclab'...
โ
Configuration updated successfully.
๐ Mounting 'sharepoint_cnclab:' at '/content/sharepoint'...
nohup: appending output to 'nohup.out'
โณ Mount command sent. Waiting 5 seconds for initialization...
๐ Checking contents of mounted directory (/content/sharepoint):
โ
Mount successful! Directory contents:
total 1
drwxr-xr-x 1 root root 0 Nov 2 2024 Admin
drwxr-xr-x 1 root root 0 Oct 15 2024 Automations
drwxr-xr-x 1 root root 0 Jan 20 2025 content
drwxr-xr-x 1 root root 0 Jan 14 2025 docs.cnclab
-rw-r--r-- 1 root root 0 Jan 22 2025 gdrive_cnclab
drwxr-xr-x 1 root root 0 Nov 2 2024 Projects
drwxr-xr-x 1 root root 0 Jan 28 2025 pub
-rw-r--r-- 1 root root 593 Nov 2 2024 README.md
drwxr-xr-x 1 root root 0 Nov 2 2024 Resources
EDIT COLAB _ GITHUB