本文所用qemu为1.5版本的,不是android emulator的。
之前几篇文章介绍的都是android emulator中的设备模拟。一些是android自己加的platform bus上的虚拟设备;一些是qemu自己的虚拟设备,但是这部分代码很旧,没有使用到QOM模型。
qemu1.1以及之后的qemu开始有了QOM模型。QOM很大一部分代码是为了实现了C++的继承,公用的东西放到ObjectClass里,只有一个实例;其他的放Object里,可以有多个实例。
PS:android emulator的代码对应了qemu 0.1x的代码,但是又有一些新版本的qemu的代码porting上去了。
QOM设备模型可以看:
1、Qemu中的设备注册:http://ytliu.info/blog/2015/01/10/qemushe-bei-chu-shi-hua/
2、QEMU 设备模拟:http://mnstory.net/wp-content/uploads/2014/10/qemu-device-simulation/qemu-device-simulation.pdf
第二篇pdf讲的非常详细了,但是最后关于PMIO地址和读写函数如何对应起来的,还是有些没清楚的地方。
本文针对这个问题进行一些补充。
初始化内存空间
在memory_map_init(main->cpu_exec_init_all->memory_map_init)中,会设置MemoryRegion改变时的回调函数,memory_map_init是在设备注册之前调用的:
static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); memory_region_init(system_memory, "system", INT64_MAX); address_space_init(&address_space_memory, system_memory); address_space_memory.name = "memory"; system_io = g_malloc(sizeof(*system_io)); memory_region_init(system_io, "io", 65536); address_space_init(&address_space_io, system_io); address_space_io.name = "I/O"; memory_listener_register(&core_memory_listener, &address_space_memory); memory_listener_register(&io_memory_listener, &address_space_io); memory_listener_register(&tcg_memory_listener, &address_space_memory); dma_context_init(&dma_context_memory, &address_space_memory, NULL, NULL, NULL); }
这个是PMIO的listener,PMIO的MemoryRegion改变后,会调用io_region_add函数,映射PMIO地址和设备读写函数。
普通内存是其他的listener。
static MemoryListener io_memory_listener = { .region_add = io_region_add, .region_del = io_region_del, .priority = 0, };
把注册的listener添加到全局的memory_listeners链表中:
void memory_listener_register(MemoryListener *listener, AddressSpace *filter) { MemoryListener *other = NULL; AddressSpace *as; listener->address_space_filter = filter; // listener是处理那个AddressSpace的 if (QTAILQ_EMPTY(&memory_listeners) || listener->priority >= QTAILQ_LAST(&memory_listeners, memory_listeners)->priority) { QTAILQ_INSERT_TAIL(&memory_listeners, listener, link); } else { QTAILQ_FOREACH(other, &memory_listeners, link) { if (listener->priority < other->priority) { break; } } QTAILQ_INSERT_BEFORE(other, listener, link); } QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { listener_add_address_space(listener, as); } }
对AddressSpace(也就是根MemoryRegion)中的每一个MemoryRegion进行一下listener->region_add
static void listener_add_address_space(MemoryListener *listener, AddressSpace *as) { FlatRange *fr; if (listener->address_space_filter && listener->address_space_filter != as) { return; } if (global_dirty_log) { if (listener->log_global_start) { listener->log_global_start(listener); } } FOR_EACH_FLAT_RANGE(fr, as->current_map) { MemoryRegionSection section = { .mr = fr->mr, .address_space = as, .offset_within_region = fr->offset_in_region, .size = int128_get64(fr->addr.size), .offset_within_address_space = int128_get64(fr->addr.start), .readonly = fr->readonly, }; if (listener->region_add) { listener->region_add(listener, secion); } } }
添加PIT设备
MemoryRegion有修改,更新,调用io_region_add函数
添加pit设备时,会调用到memory_region_add_subregion函数,MemoryRegion被修改了,然后会调用到memory_region_transaction_commit函数,更新地址空间,调用listener,是在这里映射PMIO地址和设备读写函数的。
注意memory_region_transaction_depth的使用,保证多层调用时,只需要更新一次。
void memory_region_transaction_commit(void) { AddressSpace *as; assert(memory_region_transaction_depth); --memory_region_transaction_depth; if (!memory_region_transaction_depth && memory_region_update_pending) { memory_region_update_pending = false; MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { address_space_update_topology(as); } MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); } }
真正的更新操作:
static void address_space_update_topology(AddressSpace *as) { FlatView old_view = *as->current_map; FlatView new_view = generate_memory_topology(as->root); address_space_update_topology_pass(as, old_view, new_view, false); address_space_update_topology_pass(as, old_view, new_view, true); *as->current_map = new_view; flatview_destroy(&old_view); address_space_update_ioeventfds(as); }
static FlatView generate_memory_topology(MemoryRegion *mr) { FlatView view; flatview_init(&view); if (mr) { render_memory_region(&view, mr, int128_zero(), addrrange_make(int128_zero(), int128_2_64()), false); } flatview_simplify(&view); return view; }
FlatView有点像把链表描述的MemoryRegion搞成了FlatRange数组(FlatRange中记录了MemoryRegion,自然可以获得MemoryRegion中的pit_ioport_ops):
static void render_memory_region(FlatView *view, MemoryRegion *mr, Int128 base, AddrRange clip, bool readonly) { MemoryRegion *subregion; unsigned i; hwaddr offset_in_region; Int128 remain; Int128 now; FlatRange fr; AddrRange tmp; if (!mr->enabled) { return; } int128_addto(&base, int128_make64(mr->addr)); readonly |= mr->readonly; tmp = addrrange_make(base, mr->size); if (!addrrange_intersects(tmp, clip)) { return; } clip = addrrange_intersection(tmp, clip); if (mr->alias) { int128_subfrom(&base, int128_make64(mr->alias->addr)); int128_subfrom(&base, int128_make64(mr->alias_offset)); render_memory_region(view, mr->alias, base, clip, readonly); return; } /* Render subregions in priority order. */ QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) { render_memory_region(view, subregion, base, clip, readonly); } if (!mr->terminates) { return; } offset_in_region = int128_get64(int128_sub(clip.start, base)); base = clip.start; remain = clip.size; /* Render the region itself into any gaps left by the current view. */ for (i = 0; i < view->nr && int128_nz(remain); ++i) { if (int128_ge(base, addrrange_end(view->ranges[i].addr))) { continue; } if (int128_lt(base, view->ranges[i].addr.start)) { now = int128_min(remain, int128_sub(view->ranges[i].addr.start, base)); fr.mr = mr; fr.offset_in_region = offset_in_region; fr.addr = addrrange_make(base, now); fr.dirty_log_mask = mr->dirty_log_mask; fr.readable = mr->readable; fr.readonly = readonly; flatview_insert(view, i, &fr); ++i; int128_addto(&base, now); offset_in_region += int128_get64(now); int128_subfrom(&remain, now); } now = int128_sub(int128_min(int128_add(base, remain), addrrange_end(view->ranges[i].addr)), base); int128_addto(&base, now); offset_in_region += int128_get64(now); int128_subfrom(&remain, now); } if (int128_nz(remain)) { fr.mr = mr; fr.offset_in_region = offset_in_region; fr.addr = addrrange_make(base, remain); fr.dirty_log_mask = mr->dirty_log_mask; fr.readable = mr->readable; fr.readonly = readonly; flatview_insert(view, i, &fr); } }
MemoryRegion被修改的话,如果是有添加,那么会调用到MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add),添加PMIO地址和设备的映射关系。
static void address_space_update_topology_pass(AddressSpace *as, FlatView old_view, FlatView new_view, bool adding) { unsigned iold, inew; FlatRange *frold, *frnew; /* Generate a symmetric difference of the old and new memory maps. * Kill ranges in the old map, and instantiate ranges in the new map. */ iold = inew = 0; while (iold < old_view.nr || inew < new_view.nr) { if (iold < old_view.nr) { frold = &old_view.ranges[iold]; } else { frold = NULL; } if (inew < new_view.nr) { frnew = &new_view.ranges[inew]; } else { frnew = NULL; } if (frold && (!frnew || int128_lt(frold->addr.start, frnew->addr.start) || (int128_eq(frold->addr.start, frnew->addr.start) && !flatrange_equal(frold, frnew)))) { /* In old, but (not in new, or in new but attributes changed). */ if (!adding) { MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del); } ++iold; } else if (frold && frnew && flatrange_equal(frold, frnew)) { /* In both (logging may have changed) */ if (adding) { MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop); if (frold->dirty_log_mask && !frnew->dirty_log_mask) { MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop); } else if (frnew->dirty_log_mask && !frold->dirty_log_mask) { MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start); } } ++iold; ++inew; } else { /* In new */ if (adding) { MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add); } ++inew; } } }
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback) \ MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) { \ .mr = (fr)->mr, \ .address_space = (as), \ .offset_within_region = (fr)->offset_in_region, \ .size = int128_get64((fr)->addr.size), \ .offset_within_address_space = int128_get64((fr)->addr.start), \ .readonly = (fr)->readonly, \ }))
调用了region_add函数,也就是io_region_add函数:
#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \ do { \ MemoryListener *_listener; \ \ switch (_direction) { \ case Forward: \ QTAILQ_FOREACH(_listener, &memory_listeners, link) { \ if (_listener->_callback \ && memory_listener_match(_listener, _section)) { \ _listener->_callback(_listener, _section, ##_args); \ } \ } \ break; \ case Reverse: \ QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, \ memory_listeners, link) { \ if (_listener->_callback \ && memory_listener_match(_listener, _section)) { \ _listener->_callback(_listener, _section, ##_args); \ } \ } \ break; \ default: \ abort(); \ } \ } while (0)
io_region_add函数处理PMIO地址和设备读写函数的映射关系
static void io_region_add(MemoryListener *listener, MemoryRegionSection *section) { MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1); mrio->mr = section->mr; mrio->offset = section->offset_within_region; iorange_init(&mrio->iorange, &memory_region_iorange_ops, section->offset_within_address_space, section->size); ioport_register(&mrio->iorange); }
memory_region_iorange_ops就是IORange->ops,其中读函数为memory_region_iorange_read,真正的设备读写函数保存在mr中:
static void memory_region_iorange_read(IORange *iorange, uint64_t offset, unsigned width, uint64_t *data) { MemoryRegionIORange *mrio = container_of(iorange, MemoryRegionIORange, iorange); MemoryRegion *mr = mrio->mr; offset += mrio->offset; if (mr->ops->old_portio) { const MemoryRegionPortio *mrp = find_portio(mr, offset - mrio->offset, width, false); *data = ((uint64_t)1 << (width * 8)) - 1; if (mrp) { *data = mrp->read(mr->opaque, offset); } else if (width == 2) { mrp = find_portio(mr, offset - mrio->offset, 1, false); assert(mrp); *data = mrp->read(mr->opaque, offset) | (mrp->read(mr->opaque, offset + 1) << 8); } return; } *data = 0; access_with_adjusted_size(offset, data, width, mr->ops->impl.min_access_size, mr->ops->impl.max_access_size, memory_region_read_accessor, mr); }
这里执行真正的读写函数,也就是pit_ioport_ops。
static void memory_region_write_accessor(void *opaque, hwaddr addr, uint64_t *value, unsigned size, unsigned shift, uint64_t mask) { MemoryRegion *mr = opaque; uint64_t tmp; if (mr->flush_coalesced_mmio) { qemu_flush_coalesced_mmio_buffer(); } tmp = (*value >> shift) & mask; mr->ops->write(mr->opaque, addr, tmp, size); }
注册设备的三组读写函数:
void ioport_register(IORange *ioport) { register_ioport_read(ioport->base, ioport->len, 1, ioport_readb_thunk, ioport); register_ioport_read(ioport->base, ioport->len, 2, ioport_readw_thunk, ioport); register_ioport_read(ioport->base, ioport->len, 4, ioport_readl_thunk, ioport); register_ioport_write(ioport->base, ioport->len, 1, ioport_writeb_thunk, ioport); register_ioport_write(ioport->base, ioport->len, 2, ioport_writew_thunk, ioport); register_ioport_write(ioport->base, ioport->len, 4, ioport_writel_thunk, ioport); ioport_destructor_table[ioport->base] = iorange_destructor_thunk; }
int register_ioport_read(pio_addr_t start, int length, int size, IOPortReadFunc *func, void *opaque) { int i, bsize; if (ioport_bsize(size, &bsize)) { hw_error("register_ioport_read: invalid size"); return -1; } for(i = start; i < start + length; ++i) { ioport_read_table[bsize][i] = func; if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) hw_error("register_ioport_read: invalid opaque for address 0x%x", i); ioport_opaque[i] = opaque; } return 0; }
通过opaque可以获取读函数IORange->ops->read(也就是memory_region_iorange_read)。
static uint32_t ioport_readb_thunk(void *opaque, uint32_t addr) { IORange *ioport = opaque; uint64_t data; ioport->ops->read(ioport, addr - ioport->base, 1, &data); return data; }
对于读PMIO,KVM_EXIT_IO之后的流程是:
kvm_handle_io
->cpu_inb
->ioport_read
->ioport_read_table[0][addr](也就是ioport_readb_thunk)
->memory_region_iorange_ops(也就是IORange->ops)
->access_with_adjusted_size(需要mr,保存了pit_ioport_ops)
->memory_region_read_accessor
->mr-ops
PS:
1、Object的parent可能是用来搞总线结构的,比如Object是bus上的设备,parent是bus。
2、ObjectProperty里面type为child<的应该就是Object用来记录子Object的,也就是bus记录上面挂的设备的。
3、注意QObject和QType(比C语言的type多了ref),用来折腾ObjectProperty的属性设置的,和之前的Object,ObjectClass不同。4、设置属性都是通过object_property_set_qobject来设置的,会生成visitor,然后调用void object_property_set(Object *obj, Visitor *v, const char *name,
Error **errp)。
5、isa_create中创建了Object,调用了pit_class_initfn等初始化函数。
6、isa bus的address_space_io就是系统的system_io:
static void pc_init_isa(QEMUMachineInitArgs *args) { ram_addr_t ram_size = args->ram_size; const char *cpu_model = args->cpu_model; const char *kernel_filename = args->kernel_filename; const char *kernel_cmdline = args->kernel_cmdline; const char *initrd_filename = args->initrd_filename; const char *boot_device = args->boot_device; has_pvpanic = false; if (cpu_model == NULL) cpu_model = "486"; disable_kvm_pv_eoi(); enable_compat_apic_id_mode(); pc_init1(get_system_memory(), get_system_io(), ram_size, boot_device, kernel_filename, kernel_cmdline, initrd_filename, cpu_model, 0, 1); }
if (pci_enabled) { pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi, system_memory, system_io, ram_size, below_4g_mem_size, 0x100000000ULL - below_4g_mem_size, 0x100000000ULL + above_4g_mem_size, (sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62)), pci_memory, ram_memory); } else { pci_bus = NULL; i440fx_state = NULL; isa_bus = isa_bus_new(NULL, system_io); no_hpet = 1; }