来源官方文档
model = dict(
type='Pix2Pix',
generator=dict(
type='UnetGenerator',
in_channels=3,
out_channels=3,
num_down=8,
base_channels=64,
norm_cfg=dict(type='BN'),
use_dropout=True,
init_cfg=dict(type='normal', gain=0.02)),
discriminator=dict(
type='PatchDiscriminator',
in_channels=6,
base_channels=64,
num_conv=3,
norm_cfg=dict(type='BN'),
init_cfg=dict(type='normal', gain=0.02)),
gan_loss=dict(
type='GANLoss',
gan_type='vanilla',
real_label_val=1.0,
fake_label_val=0.0,
loss_weight=1.0),
pixel_loss=dict(type='L1Loss', loss_weight=100.0, reduction='mean'))
train_cfg = dict(
direction='b2a')
test_cfg = dict(
direction='b2a',
show_input=True)
train_dataset_type = 'GenerationPairedDataset'
val_dataset_type = 'GenerationPairedDataset'
img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
train_pipeline = [
dict(
type='LoadPairedImageFromFile',
io_backend='disk',
key='pair',
flag='color'),
dict(
type='Resize',
keys=['img_a', 'img_b'],
scale=(286, 286),
interpolation='bicubic'),
dict(
type='FixedCrop',
keys=['img_a', 'img_b'],
crop_size=(256, 256)),
dict(
type='Flip',
keys=['img_a', 'img_b'],
direction='horizontal'),
dict(
type='RescaleToZeroOne',
keys=['img_a', 'img_b']),
dict(
type='Normalize',
keys=['img_a', 'img_b'],
to_rgb=True,
**img_norm_cfg),
dict(
type='ToTensor',
keys=['img_a', 'img_b']),
dict(
type='Collect',
keys=['img_a', 'img_b'],
meta_keys=['img_a_path', 'img_b_path'])
test_pipeline = [
dict(
type='LoadPairedImageFromFile',
io_backend='disk',
key='pair',
flag='color'),
dict(
type='Resize',
keys=['img_a', 'img_b'],
scale=(256, 256),
interpolation='bicubic'),
dict(
type='RescaleToZeroOne',
keys=['img_a', 'img_b']),
dict(
type='Normalize',
keys=['img_a', 'img_b'],
to_rgb=True,
**img_norm_cfg),
dict(
type='ToTensor',
keys=['img_a', 'img_b']),
dict(
type='Collect',
keys=['img_a', 'img_b'],
meta_keys=['img_a_path', 'img_b_path'])
data_root = 'data/pix2pix/facades'
data = dict(
samples_per_gpu=1,
workers_per_gpu=4,
drop_last=True,
val_samples_per_gpu=1,
val_workers_per_gpu=0,
train=dict(
type=train_dataset_type,
dataroot=data_root,
pipeline=train_pipeline,
test_mode=False),
val=dict(
type=val_dataset_type,
dataroot=data_root,
pipeline=test_pipeline,
test_mode=True),
test=dict(
type=val_dataset_type,
dataroot=data_root,
pipeline=test_pipeline,
test_mode=True))
optimizers = dict(
generator=dict(type='Adam', lr=2e-4, betas=(0.5, 0.999)),
discriminator=dict(type='Adam', lr=2e-4, betas=(0.5, 0.999)))
lr_config = dict(policy='Fixed', by_epoch=False)
checkpoint_config = dict(interval=4000, save_optimizer=True, by_epoch=False)
evaluation = dict(
interval=4000,
save_image=True)
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
visual_config = None
total_iters = 80000
cudnn_benchmark = True
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
exp_name = 'pix2pix_facades'
work_dir = f'./work_dirs/{exp_name}'
model = dict(
type='GLInpaintor',
encdec=dict(
type='GLEncoderDecoder',
encoder=dict(type='GLEncoder', norm_cfg=dict(type='SyncBN')),
decoder=dict(type='GLDecoder', norm_cfg=dict(type='SyncBN')),
dilation_neck=dict(
type='GLDilationNeck', norm_cfg=dict(type='SyncBN'))),
disc=dict(
type='GLDiscs',
global_disc_cfg=dict(
in_channels=3,
max_channels=512,
fc_in_channels=512 * 4 * 4,
fc_out_channels=1024,
num_convs=6,
norm_cfg=dict(type='SyncBN')
local_disc_cfg=dict(
in_channels=3,
max_channels=512,
fc_in_channels=512 * 4 * 4,
fc_out_channels=1024,
num_convs=5,
norm_cfg=dict(type='SyncBN')
loss_gan=dict(
type='GANLoss',
gan_type='vanilla',
loss_weight=0.001
loss_l1_hole=dict(
type='L1Loss',
loss_weight=1.0
pretrained=None)
train_cfg = dict(
disc_step=1,
iter_tc=90000,
iter_td=100000,
start_iter=0,
local_size=(128, 128))
test_cfg = dict(metrics=['l1'])
dataset_type = 'ImgInpaintingDataset'
input_shape = (256, 256)
train_pipeline = [
dict(type='LoadImageFromFile', key='gt_img'),
dict(
type='LoadMask',
mask_mode='bbox',
mask_config=dict(
max_bbox_shape=(128, 128),
max_bbox_delta=40,
min_margin=20,
img_shape=input_shape)),
dict(
type='Crop',
keys=['gt_img'],
crop_size=(384, 384),
random_crop=True,
dict(
type='Resize',
keys=['gt_img'],
scale=input_shape,
keep_ratio=False,
dict(
type='Normalize',
keys=['gt_img'],
mean=[127.5] * 3,
std=[127.5] * 3,
to_rgb=False),
dict(type='GetMaskedImage'),
dict(
type='Collect',
keys=['gt_img', 'masked_img', 'mask', 'mask_bbox'],
meta_keys=['gt_img_path']),
dict(type='ToTensor', keys=['gt_img', 'masked_img', 'mask']),
dict(type='ToTensor', keys=['mask_bbox'])
test_pipeline = train_pipeline
data_root = 'data/places365'
data = dict(
samples_per_gpu=12,
workers_per_gpu=8,
val_samples_per_gpu=1,
val_workers_per_gpu=8,
drop_last=True,
train=dict(
type=dataset_type,
ann_file=f'{data_root}/train_places_img_list_total.txt',
data_prefix=data_root,
pipeline=train_pipeline,
test_mode=False),
val=dict(
type=dataset_type,
ann_file=f'{data_root}/val_places_img_list.txt',
data_prefix=data_root,
pipeline=test_pipeline,
test_mode=True))
optimizers = dict(
generator=dict(type='Adam', lr=0.0004), disc=dict(type='Adam', lr=0.0004))
lr_config = dict(policy='Fixed', by_epoch=False)
checkpoint_config = dict(by_epoch=False, interval=50000)
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
])
visual_config = dict(
type='VisualizationHook',
output_dir='visual',
interval=1000,
res_name_list=[
'gt_img', 'masked_img', 'fake_res', 'fake_img', 'fake_gt_local'
)
evaluation = dict(interval=50000)
total_iters = 500002
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = None
load_from = None
resume_from = None
workflow = [('train', 10000)]
exp_name = 'gl_places'
find_unused_parameters = False
model = dict(
type='DIM',
backbone=dict(
type='SimpleEncoderDecoder',
encoder=dict(
type='VGG16'),
decoder=dict(
type='PlainDecoder')),
pretrained='./weights/vgg_state_dict.pth',
loss_alpha=dict(
type='CharbonnierLoss',
loss_weight=0.5),
loss_comp=dict(
type='CharbonnierCompLoss',
loss_weight=0.5))
train_cfg = dict(
train_backbone=True,
train_refiner=False)
test_cfg = dict(
refine=False,
metrics=['SAD', 'MSE', 'GRAD', 'CONN'])
dataset_type = 'AdobeComp1kDataset'
data_root = 'data/adobe_composition-1k'
img_norm_cfg = dict(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
to_rgb=True)
train_pipeline = [
dict(
type='LoadImageFromFile',
key='alpha',
flag='grayscale'),
dict(
type='LoadImageFromFile',
key='fg'),
dict(
type='LoadImageFromFile',
key='bg'),
dict(
type='LoadImageFromFile',
key='merged'),
dict(
type='CropAroundUnknown',
keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'],
crop_sizes=[320, 480, 640]),
dict(
type='Flip',
keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg']),
dict(
type='Resize',
keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'],
scale=(320, 320),
keep_ratio=False),
dict(
type='GenerateTrimap',
kernel_size=(1, 30)),
dict(
type='RescaleToZeroOne',
keys=['merged', 'alpha', 'ori_merged', 'fg', 'bg']),
dict(
type='Normalize',
keys=['merged'],
**img_norm_cfg),
dict(
type='Collect',
keys=['merged', 'alpha', 'trimap', 'ori_merged', 'fg', 'bg'],
meta_keys=[]),
dict(
type='ToTensor',
keys=['merged', 'alpha', 'trimap', 'ori_merged', 'fg', 'bg']),
test_pipeline = [
dict(
type='LoadImageFromFile',
key='alpha',
flag='grayscale',
save_original_img=True),
dict(
type='LoadImageFromFile',
key='trimap',
flag='grayscale',
save_original_img=True),
dict(
type='LoadImageFromFile',
key='merged'),
dict(
type='Pad',
keys=['trimap', 'merged'],
mode='reflect'),
dict(
type='RescaleToZeroOne',
keys=['merged', 'ori_alpha']),
dict(
type='Normalize',
keys=['merged'],
**img_norm_cfg),
dict(
type='Collect',
keys=['merged', 'trimap'],
meta_keys=[
'merged_path', 'pad', 'merged_ori_shape', 'ori_alpha',
'ori_trimap'
]),
dict(
type='ToTensor',
keys=['merged', 'trimap']),
data = dict(
samples_per_gpu=1,
workers_per_gpu=4,
drop_last=True,
train=dict(
type=dataset_type,
ann_file=f'{data_root}/training_list.json',
data_prefix=data_root,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=f'{data_root}/test_list.json',
data_prefix=data_root,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=f'{data_root}/test_list.json',
data_prefix=data_root,
pipeline=test_pipeline))
optimizers = dict(type='Adam', lr=0.00001)
lr_config = dict(
policy='Fixed')
checkpoint_config = dict(
interval=40000,
by_epoch=False)
evaluation = dict(
interval=40000)
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
total_iters = 1000000
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/dim_stage1'
load_from = None
resume_from = None
workflow = [('train', 1)]
exp_name = 'edsr_x2c64b16_1x16_300k_div2k'
scale = 2
model = dict(
type='BasicRestorer',
generator=dict(
type='EDSR',
in_channels=3,
out_channels=3,
mid_channels=64,
num_blocks=16,
upscale_factor=scale,
res_scale=1,
rgb_mean=(0.4488, 0.4371, 0.4040),
rgb_std=(1.0, 1.0, 1.0)),
pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'))
train_cfg = None
test_cfg = dict(
metrics=['PSNR'],
crop_border=scale)
train_dataset_type = 'SRAnnotationDataset'
val_dataset_type = 'SRFolderDataset'
train_pipeline = [
dict(type='LoadImageFromFile',
io_backend='disk',
key='lq',
flag='unchanged'),
dict(type='LoadImageFromFile',
io_backend='disk',
key='gt',
flag='unchanged'),
dict(type='RescaleToZeroOne', keys=['lq', 'gt']),
dict(type='Normalize',
keys=['lq', 'gt'],
mean=[0, 0, 0],
std=[1, 1, 1],
to_rgb=True),
dict(type='PairedRandomCrop', gt_patch_size=96),
dict(type='Flip',
keys=['lq', 'gt'],
flip_ratio=0.5,
direction='horizontal'),
dict(type='Flip',
keys=['lq', 'gt'],
flip_ratio=0.5,
direction='vertical'),
dict(type='RandomTransposeHW',
keys=['lq', 'gt'],
transpose_ratio=0.5
dict(type='Collect',
keys=['lq', 'gt'],
meta_keys=['lq_path', 'gt_path']),
dict(type='ToTensor',
keys=['lq', 'gt'])
test_pipeline = [
dict(
type='LoadImageFromFile',
io_backend='disk',
key='lq',
flag='unchanged'),
dict(
type='LoadImageFromFile',
io_backend='disk',
key='gt',
flag='unchanged'),
dict(type='RescaleToZeroOne', keys=['lq', 'gt']),
dict(
type='Normalize',
keys=['lq', 'gt'],
mean=[0, 0, 0],
std=[1, 1, 1],
to_rgb=True),
dict(type='Collect',
keys=['lq', 'gt'],
meta_keys=['lq_path', 'gt_path']),
dict(type='ToTensor',
keys=['lq', 'gt'])
data = dict(
samples_per_gpu=16,
workers_per_gpu=6,
drop_last=True,
train=dict(
type='RepeatDataset',
times=1000,
dataset=dict(
type=train_dataset_type,
lq_folder='data/DIV2K/DIV2K_train_LR_bicubic/X2_sub',
gt_folder='data/DIV2K/DIV2K_train_HR_sub',
ann_file='data/DIV2K/meta_info_DIV2K800sub_GT.txt',
pipeline=train_pipeline,
scale=scale)),
val_samples_per_gpu=1,
val_workers_per_gpu=1,
val=dict(
type=val_dataset_type,
lq_folder='data/val_set5/Set5_bicLRx2',
gt_folder='data/val_set5/Set5_mod12',
pipeline=test_pipeline,
scale=scale,
filename_tmpl='{}'),
test=dict(
type=val_dataset_type,
lq_folder='data/val_set5/Set5_bicLRx2',
gt_folder='data/val_set5/Set5_mod12',
pipeline=test_pipeline,
scale=scale,
filename_tmpl='{}'))
optimizers = dict(generator=dict(type='Adam', lr=1e-4, betas=(0.9, 0.999)))
total_iters = 300000
lr_config = dict(
policy='Step', by_epoch=False, step=[200000], gamma=0.5)
checkpoint_config = dict(
interval=5000,
save_optimizer=True,
by_epoch=False)
evaluation = dict(
interval=5000,
save_image=True,
gpu_collect=True)
log_config = dict(
interval=100,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
dict(type='TensorboardLoggerHook'),
visual_config = None
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = f'./work_dirs/{exp_name}'
load_from = None
resume_from = None
workflow = [('train', 1)]
MMagic (Multimodal Advanced, Generative, and Intelligent Creation) 是一个供专业人工智能研究人员和机器学习工程师去处理、编辑和生成图像与视频的开源 AIGC 工具箱。MMagic 允许研究人员和工程师使用最先进的预训练模型,并且可以轻松训练和开发新的定制模型。无条件生成对抗网络 (GANs)条件生成对抗网络 (GANs)内部学习*扩散模型还有许多其他生成模型即将推出!图文生成图像翻译3D 生成图像超分辨率。